You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			65 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			65 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Python
		
	
#!/usr/bin/env python
 | 
						|
"""Get books from  project gutenberg by passing the book id"""
 | 
						|
 | 
						|
import os
 | 
						|
import sys
 | 
						|
import urllib2
 | 
						|
 | 
						|
from BeautifulSoup import BeautifulSoup
 | 
						|
 | 
						|
_PAGE_URL = "http://www.gutenberg.org/etext/%d"
 | 
						|
_DL_LINK = "http://www.gutenberg.org/cache/epub/%d/pg%d.mobi"
 | 
						|
 | 
						|
class Book:
 | 
						|
    """Book gets the author and title by scraping the page"""
 | 
						|
 | 
						|
    def __init__(self, book_id):
 | 
						|
        """Scraping the page author and title of the page"""
 | 
						|
        self._book = book_id
 | 
						|
        self._html = urllib2.urlopen(_PAGE_URL % self._book)
 | 
						|
        self._soup = BeautifulSoup(self._html)
 | 
						|
        self._title = self._soup.h1.contents[0]
 | 
						|
 | 
						|
        by_parts = self._title.split(' by ')
 | 
						|
 | 
						|
        self._author = ' by '.join(by_parts[-1:])
 | 
						|
        self._title = ' by '.join(by_parts[:-1])
 | 
						|
 | 
						|
    def get_author(self):
 | 
						|
        """Return the author"""
 | 
						|
        return self._author
 | 
						|
 | 
						|
    def get_title(self):
 | 
						|
        """Return the title"""
 | 
						|
        return self._title
 | 
						|
 | 
						|
def get_book_ids():
 | 
						|
    """Get the arguments from the command line - ignore bad arguments"""
 | 
						|
    id_args = sys.argv[1:]
 | 
						|
    ids = []
 | 
						|
    for book_id in id_args:
 | 
						|
        try:
 | 
						|
            ids.append(int(book_id))
 | 
						|
        except ValueError:
 | 
						|
            print "ERROR: Could not add id '%s', must be id" % book_id
 | 
						|
    return ids
 | 
						|
 | 
						|
def main():
 | 
						|
    """Run the script"""
 | 
						|
    ids = get_book_ids()
 | 
						|
    for bid in ids:
 | 
						|
        tmp_book = Book(bid)
 | 
						|
        if not os.path.exists(tmp_book.get_author()):
 | 
						|
            os.makedirs(tmp_book.get_author())
 | 
						|
 | 
						|
        mobi_file = urllib2.urlopen(_DL_LINK % (bid, bid))
 | 
						|
        output = open('%s/%s.mobi' % \
 | 
						|
            (tmp_book.get_author(), tmp_book.get_title()), 'wb')
 | 
						|
        output.write(mobi_file.read())
 | 
						|
        output.close()
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    main()
 | 
						|
 | 
						|
 |