#!/usr/bin/env python """ This is a program that attempts to get all the album art from amazon as is possible. """ import os import sys import urllib import urllib2 from BeautifulSoup import BeautifulSoup def walklevel(some_dir, level=1): some_dir = some_dir.rstrip(os.path.sep) assert os.path.isdir(some_dir) num_sep = some_dir.count(os.path.sep) for root, dirs, files in os.walk(some_dir): yield root, dirs, files num_sep_this = root.count(os.path.sep) if num_sep + level <= num_sep_this: del dirs[:] def get_search_dirs(base = '.'): ret = [] for root, dirs, files in walklevel(base, 0): for the_dir in dirs: new_path = "%s/%s" %(root, the_dir) for root2, dirs2, files2 in walklevel(new_path, 0): for found_dir in dirs2: ret.append("%s/%s" % (new_path, found_dir)) return ret def get_term_from_path(the_dir): """Gets a search term from the directory""" parts = the_dir.split('/') return { 'artist': parts[-2], 'album': parts[-1] } def get_url_amazon(term): """Returns the amazon search url get_url""" params = { 'url': 'search-alias=digital-music', 'x': '0', 'y': '0', 'field-keywords': term } return "http://www.amazon.com/s/ref=nb_sb_noss?%s" % urllib.urlencode(params) def get_url(term): """Returns the amazon search url get_url""" params = { 'mode': 'b', 'QT': term } return "http://www.emusic.com/search.html?%s" % urllib.urlencode(params) def get_album_img_src_amazon(url): """parses the url and find the link for the album""" html = urllib2.urlopen(url) soup = BeautifulSoup(html) imgSrc = soup.find('a', {'id': 'mp3StoreShovelerShvlLink0'}).img['src'] imgParts = imgSrc.split('_'); imgParts.pop(-2) return '_'.join(imgParts) def get_album_img_srcs_emusic(url): """parses the url and find the link for the album""" html = urllib2.urlopen(url) soup = BeautifulSoup(html) ul = soup.find('ul', {'class': 'resultList'}) li = ul.findAll('li')[0] img = li.find('img') img_src = img['src'] img_parts = img_src.split('/') img_parts[-1] = '600x600.jpg' large_img = '/'.join(img_parts) img_parts[-1] = '1400x1400.jpg' huge_img = '/'.join(img_parts) return { 'large': large_img, 'huge': huge_img } def get_file_name(info, size): """docstring for get_file_name""" return 'imgs/%s.%s.%d.jpg' % (info['artist'], info['album'], size) def save_file(in_stream, size, info): """docstring for save_file""" file_name = get_file_name(info, size) output = open(file_name, 'wb') output.write(in_stream.read()) output.close() def main(): """The main script.""" # Get the path for the directory try: base_dir = sys.argv[1] except IndexError: print "Must enter path of directory to search. Example: %s ./foo" % \ sys.argv[0] os.abort() if os.path.exists(base_dir) == False: print "Could not find path: '%s'" % base_dir os.abort() # Remove trailing / if base_dir[-1] == "/": base_dir = base_dir[:-1] search_dirs = get_search_dirs(base_dir) for the_dir in search_dirs: search_terms = get_term_from_path(the_dir) found_file = False #for tmp_size in (1400, 600, 500): for tmp_size in (500, ): tmp_file_name = get_file_name(search_terms, tmp_size) if os.path.exists(tmp_file_name): print "Skipping check, found '%s'" % tmp_file_name found_file = True break if found_file: continue #img_src = None #url = get_url(search_terms['album']) #try: # img_src = get_album_img_srcs_emusic(url) #except: # img_src = None #if img_src != None: # img_file = urllib2.urlopen(img_src['huge']) # if img_file.info().getmaintype() == 'image': # save_file(img_file, 1400, search_terms) # print "Saved huge emusic cover for %s" % search_terms['album'] # continue # img_file = urllib2.urlopen(img_src['large']) # if img_file.info().getmaintype() == 'image': # save_file(img_file, 600, search_terms) # print "Saved large emusic cover for %s" % search_terms['album'] # continue # If emusic doesn't have it, try amazon url = get_url_amazon("%s %s" % (search_terms['artist'], search_terms['album'])) try: img_src = get_album_img_src_amazon(url) img_file = urllib2.urlopen(img_src) except: print "Skipped, could not find cover art for %s" % search_terms['album'] continue save_file(img_file, 500, search_terms) print "Saved large amazon cover for %s" % search_terms['album'] if __name__ == "__main__": main()