Add script to download a book from project gutenberg

main
Buddy Sandidge 12 years ago
parent 000f61c4c7
commit 57ffcd4f23

@ -0,0 +1,64 @@
#!/usr/bin/env python
"""Get books from project gutenberg by passing the book id"""
import os
import sys
import urllib2
from BeautifulSoup import BeautifulSoup
_PAGE_URL = "http://www.gutenberg.org/etext/%d"
_DL_LINK = "http://www.gutenberg.org/cache/epub/%d/pg%d.mobi"
class Book:
"""Book gets the author and title by scraping the page"""
def __init__(self, book_id):
"""Scraping the page author and title of the page"""
self._book = book_id
self._html = urllib2.urlopen(_PAGE_URL % self._book)
self._soup = BeautifulSoup(self._html)
self._title = self._soup.h1.contents[0]
by_parts = self._title.split(' by ')
self._author = ' by '.join(by_parts[-1:])
self._title = ' by '.join(by_parts[:-1])
def get_author(self):
"""Return the author"""
return self._author
def get_title(self):
"""Return the title"""
return self._title
def get_book_ids():
"""Get the arguments from the command line - ignore bad arguments"""
id_args = sys.argv[1:]
ids = []
for book_id in id_args:
try:
ids.append(int(book_id))
except ValueError:
print "ERROR: Could not add id '%s', must be id" % book_id
return ids
def main():
"""Run the script"""
ids = get_book_ids()
for bid in ids:
tmp_book = Book(bid)
if not os.path.exists(tmp_book.get_author()):
os.makedirs(tmp_book.get_author())
mobi_file = urllib2.urlopen(_DL_LINK % (bid, bid))
output = open('%s/%s.mobi' % \
(tmp_book.get_author(), tmp_book.get_title()), 'wb')
output.write(mobi_file.read())
output.close()
if __name__ == "__main__":
main()
Loading…
Cancel
Save