#!/usr/bin/env python """Get books from project gutenberg by passing the book id""" import os import sys import urllib2 from BeautifulSoup import BeautifulSoup _PAGE_URL = "http://www.gutenberg.org/etext/%d" _DL_LINK = "http://www.gutenberg.org/cache/epub/%d/pg%d.mobi" class Book: """Book gets the author and title by scraping the page""" def __init__(self, book_id): """Scraping the page author and title of the page""" self._book = book_id self._html = urllib2.urlopen(_PAGE_URL % self._book) self._soup = BeautifulSoup(self._html) self._title = self._soup.h1.contents[0] by_parts = self._title.split(' by ') self._author = ' by '.join(by_parts[-1:]) self._title = ' by '.join(by_parts[:-1]) def get_author(self): """Return the author""" return self._author def get_title(self): """Return the title""" return self._title def get_book_ids(): """Get the arguments from the command line - ignore bad arguments""" id_args = sys.argv[1:] ids = [] for book_id in id_args: try: ids.append(int(book_id)) except ValueError: print "ERROR: Could not add id '%s', must be id" % book_id return ids def main(): """Run the script""" ids = get_book_ids() for bid in ids: tmp_book = Book(bid) if not os.path.exists(tmp_book.get_author()): os.makedirs(tmp_book.get_author()) mobi_file = urllib2.urlopen(_DL_LINK % (bid, bid)) output = open('%s/%s.mobi' % \ (tmp_book.get_author(), tmp_book.get_title()), 'wb') output.write(mobi_file.read()) output.close() if __name__ == "__main__": main()