diff --git a/bin/getbook b/bin/getbook new file mode 100755 index 0000000..0ee4a7c --- /dev/null +++ b/bin/getbook @@ -0,0 +1,64 @@ +#!/usr/bin/env python +"""Get books from project gutenberg by passing the book id""" + +import os +import sys +import urllib2 + +from BeautifulSoup import BeautifulSoup + +_PAGE_URL = "http://www.gutenberg.org/etext/%d" +_DL_LINK = "http://www.gutenberg.org/cache/epub/%d/pg%d.mobi" + +class Book: + """Book gets the author and title by scraping the page""" + + def __init__(self, book_id): + """Scraping the page author and title of the page""" + self._book = book_id + self._html = urllib2.urlopen(_PAGE_URL % self._book) + self._soup = BeautifulSoup(self._html) + self._title = self._soup.h1.contents[0] + + by_parts = self._title.split(' by ') + + self._author = ' by '.join(by_parts[-1:]) + self._title = ' by '.join(by_parts[:-1]) + + def get_author(self): + """Return the author""" + return self._author + + def get_title(self): + """Return the title""" + return self._title + +def get_book_ids(): + """Get the arguments from the command line - ignore bad arguments""" + id_args = sys.argv[1:] + ids = [] + for book_id in id_args: + try: + ids.append(int(book_id)) + except ValueError: + print "ERROR: Could not add id '%s', must be id" % book_id + return ids + +def main(): + """Run the script""" + ids = get_book_ids() + for bid in ids: + tmp_book = Book(bid) + if not os.path.exists(tmp_book.get_author()): + os.makedirs(tmp_book.get_author()) + + mobi_file = urllib2.urlopen(_DL_LINK % (bid, bid)) + output = open('%s/%s.mobi' % \ + (tmp_book.get_author(), tmp_book.get_title()), 'wb') + output.write(mobi_file.read()) + output.close() + +if __name__ == "__main__": + main() + +