dot-files/bin/getbook

#!/usr/bin/env python
"""Get books from  project gutenberg by passing the book id"""

import os
import sys
import urllib2

from BeautifulSoup import BeautifulSoup

_PAGE_URL = "http://www.gutenberg.org/etext/%d"
_DL_LINK = "http://www.gutenberg.org/cache/epub/%d/pg%d.mobi"

class Book:
    """Book gets the author and title by scraping the page"""

    def __init__(self, book_id):
        """Scraping the page author and title of the page"""
        self._book = book_id
        self._html = urllib2.urlopen(_PAGE_URL % self._book)
        self._soup = BeautifulSoup(self._html)
        self._title = self._soup.h1.contents[0]

        by_parts = self._title.split(' by ')

        self._author = ' by '.join(by_parts[-1:])
        self._title = ' by '.join(by_parts[:-1])

    def get_author(self):
        """Return the author"""
        return self._author

    def get_title(self):
        """Return the title"""
        return self._title

def get_book_ids():
    """Get the arguments from the command line - ignore bad arguments"""
    id_args = sys.argv[1:]
    ids = []
    for book_id in id_args:
        try:
            ids.append(int(book_id))
        except ValueError:
            print "ERROR: Could not add id '%s', must be id" % book_id
    return ids

def main():
    """Run the script"""
    ids = get_book_ids()
    for bid in ids:
        tmp_book = Book(bid)
        if not os.path.exists(tmp_book.get_author()):
            os.makedirs(tmp_book.get_author())

        mobi_file = urllib2.urlopen(_DL_LINK % (bid, bid))
        output = open('%s/%s.mobi' % \
            (tmp_book.get_author(), tmp_book.get_title()), 'wb')
        output.write(mobi_file.read())
        output.close()

if __name__ == "__main__":
    main()