You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
65 lines
1.7 KiB
Python
65 lines
1.7 KiB
Python
#!/usr/bin/env python
|
|
"""Get books from project gutenberg by passing the book id"""
|
|
|
|
import os
|
|
import sys
|
|
import urllib2
|
|
|
|
from BeautifulSoup import BeautifulSoup
|
|
|
|
_PAGE_URL = "http://www.gutenberg.org/etext/%d"
|
|
_DL_LINK = "http://www.gutenberg.org/cache/epub/%d/pg%d.mobi"
|
|
|
|
class Book:
|
|
"""Book gets the author and title by scraping the page"""
|
|
|
|
def __init__(self, book_id):
|
|
"""Scraping the page author and title of the page"""
|
|
self._book = book_id
|
|
self._html = urllib2.urlopen(_PAGE_URL % self._book)
|
|
self._soup = BeautifulSoup(self._html)
|
|
self._title = self._soup.h1.contents[0]
|
|
|
|
by_parts = self._title.split(' by ')
|
|
|
|
self._author = ' by '.join(by_parts[-1:])
|
|
self._title = ' by '.join(by_parts[:-1])
|
|
|
|
def get_author(self):
|
|
"""Return the author"""
|
|
return self._author
|
|
|
|
def get_title(self):
|
|
"""Return the title"""
|
|
return self._title
|
|
|
|
def get_book_ids():
|
|
"""Get the arguments from the command line - ignore bad arguments"""
|
|
id_args = sys.argv[1:]
|
|
ids = []
|
|
for book_id in id_args:
|
|
try:
|
|
ids.append(int(book_id))
|
|
except ValueError:
|
|
print "ERROR: Could not add id '%s', must be id" % book_id
|
|
return ids
|
|
|
|
def main():
|
|
"""Run the script"""
|
|
ids = get_book_ids()
|
|
for bid in ids:
|
|
tmp_book = Book(bid)
|
|
if not os.path.exists(tmp_book.get_author()):
|
|
os.makedirs(tmp_book.get_author())
|
|
|
|
mobi_file = urllib2.urlopen(_DL_LINK % (bid, bid))
|
|
output = open('%s/%s.mobi' % \
|
|
(tmp_book.get_author(), tmp_book.get_title()), 'wb')
|
|
output.write(mobi_file.read())
|
|
output.close()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|
|
|