Add script to download a book from project gutenberg
parent
000f61c4c7
commit
57ffcd4f23
@ -0,0 +1,64 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
"""Get books from project gutenberg by passing the book id"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import urllib2
|
||||||
|
|
||||||
|
from BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
|
_PAGE_URL = "http://www.gutenberg.org/etext/%d"
|
||||||
|
_DL_LINK = "http://www.gutenberg.org/cache/epub/%d/pg%d.mobi"
|
||||||
|
|
||||||
|
class Book:
|
||||||
|
"""Book gets the author and title by scraping the page"""
|
||||||
|
|
||||||
|
def __init__(self, book_id):
|
||||||
|
"""Scraping the page author and title of the page"""
|
||||||
|
self._book = book_id
|
||||||
|
self._html = urllib2.urlopen(_PAGE_URL % self._book)
|
||||||
|
self._soup = BeautifulSoup(self._html)
|
||||||
|
self._title = self._soup.h1.contents[0]
|
||||||
|
|
||||||
|
by_parts = self._title.split(' by ')
|
||||||
|
|
||||||
|
self._author = ' by '.join(by_parts[-1:])
|
||||||
|
self._title = ' by '.join(by_parts[:-1])
|
||||||
|
|
||||||
|
def get_author(self):
|
||||||
|
"""Return the author"""
|
||||||
|
return self._author
|
||||||
|
|
||||||
|
def get_title(self):
|
||||||
|
"""Return the title"""
|
||||||
|
return self._title
|
||||||
|
|
||||||
|
def get_book_ids():
|
||||||
|
"""Get the arguments from the command line - ignore bad arguments"""
|
||||||
|
id_args = sys.argv[1:]
|
||||||
|
ids = []
|
||||||
|
for book_id in id_args:
|
||||||
|
try:
|
||||||
|
ids.append(int(book_id))
|
||||||
|
except ValueError:
|
||||||
|
print "ERROR: Could not add id '%s', must be id" % book_id
|
||||||
|
return ids
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Run the script"""
|
||||||
|
ids = get_book_ids()
|
||||||
|
for bid in ids:
|
||||||
|
tmp_book = Book(bid)
|
||||||
|
if not os.path.exists(tmp_book.get_author()):
|
||||||
|
os.makedirs(tmp_book.get_author())
|
||||||
|
|
||||||
|
mobi_file = urllib2.urlopen(_DL_LINK % (bid, bid))
|
||||||
|
output = open('%s/%s.mobi' % \
|
||||||
|
(tmp_book.get_author(), tmp_book.get_title()), 'wb')
|
||||||
|
output.write(mobi_file.read())
|
||||||
|
output.close()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue