You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			172 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			172 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			Python
		
	
#!/usr/bin/env python
 | 
						|
"""
 | 
						|
This is a program that attempts to get all the album art from amazon
 | 
						|
as is possible.
 | 
						|
"""
 | 
						|
 | 
						|
import os
 | 
						|
import sys
 | 
						|
import urllib
 | 
						|
import urllib2
 | 
						|
 | 
						|
from BeautifulSoup import BeautifulSoup
 | 
						|
 | 
						|
def walklevel(some_dir, level=1):
 | 
						|
    some_dir = some_dir.rstrip(os.path.sep)
 | 
						|
    assert os.path.isdir(some_dir)
 | 
						|
    num_sep = some_dir.count(os.path.sep)
 | 
						|
    for root, dirs, files in os.walk(some_dir):
 | 
						|
        yield root, dirs, files
 | 
						|
        num_sep_this = root.count(os.path.sep)
 | 
						|
        if num_sep + level <= num_sep_this:
 | 
						|
            del dirs[:]
 | 
						|
 | 
						|
def get_search_dirs(base = '.'):
 | 
						|
    ret = []
 | 
						|
    for root, dirs, files in walklevel(base, 0):
 | 
						|
        for the_dir in dirs:
 | 
						|
            new_path = "%s/%s" %(root, the_dir)
 | 
						|
            for root2, dirs2, files2 in walklevel(new_path, 0):
 | 
						|
                for found_dir in dirs2:
 | 
						|
                    ret.append("%s/%s" % (new_path, found_dir))
 | 
						|
    return ret
 | 
						|
 | 
						|
def get_term_from_path(the_dir):
 | 
						|
    """Gets a search term from the directory"""
 | 
						|
    parts = the_dir.split('/')
 | 
						|
    return {
 | 
						|
        'artist': parts[-2],
 | 
						|
        'album': parts[-1]
 | 
						|
    }
 | 
						|
 | 
						|
def get_url_amazon(term):
 | 
						|
    """Returns the amazon search url get_url"""
 | 
						|
    params = {
 | 
						|
        'url': 'search-alias=digital-music',
 | 
						|
        'x': '0',
 | 
						|
        'y': '0',
 | 
						|
        'field-keywords': term
 | 
						|
    }
 | 
						|
    return "http://www.amazon.com/s/ref=nb_sb_noss?%s" % urllib.urlencode(params)
 | 
						|
 | 
						|
def get_url(term):
 | 
						|
    """Returns the amazon search url get_url"""
 | 
						|
    params = {
 | 
						|
        'mode': 'b',
 | 
						|
        'QT': term
 | 
						|
    }
 | 
						|
    return "http://www.emusic.com/search.html?%s" % urllib.urlencode(params)
 | 
						|
 | 
						|
def get_album_img_src_amazon(url):
 | 
						|
    """parses the url and find the link for the album"""
 | 
						|
    html = urllib2.urlopen(url)
 | 
						|
    soup = BeautifulSoup(html)
 | 
						|
    imgSrc = soup.find('a', {'id': 'mp3StoreShovelerShvlLink0'}).img['src']
 | 
						|
    imgParts = imgSrc.split('_');
 | 
						|
    imgParts.pop(-2)
 | 
						|
    return '_'.join(imgParts)
 | 
						|
 | 
						|
def get_album_img_srcs_emusic(url):
 | 
						|
    """parses the url and find the link for the album"""
 | 
						|
    html = urllib2.urlopen(url)
 | 
						|
    soup = BeautifulSoup(html)
 | 
						|
 | 
						|
    ul = soup.find('ul', {'class': 'resultList'})
 | 
						|
    li = ul.findAll('li')[0]
 | 
						|
    img = li.find('img')
 | 
						|
    img_src = img['src']
 | 
						|
 | 
						|
    img_parts = img_src.split('/')
 | 
						|
    img_parts[-1] = '600x600.jpg'
 | 
						|
    large_img = '/'.join(img_parts)
 | 
						|
    img_parts[-1] = '1400x1400.jpg'
 | 
						|
    huge_img = '/'.join(img_parts)
 | 
						|
 | 
						|
    return {
 | 
						|
        'large': large_img,
 | 
						|
        'huge': huge_img
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
def get_file_name(info, size):
 | 
						|
    """docstring for get_file_name"""
 | 
						|
    return 'imgs/%s.%s.%d.jpg' % (info['artist'], info['album'], size)
 | 
						|
 | 
						|
def save_file(in_stream, size, info):
 | 
						|
    """docstring for save_file"""
 | 
						|
    file_name = get_file_name(info, size)
 | 
						|
    output = open(file_name, 'wb')
 | 
						|
    output.write(in_stream.read())
 | 
						|
    output.close()
 | 
						|
 | 
						|
def main():
 | 
						|
    """The main script."""
 | 
						|
 | 
						|
    # Get the path for the directory
 | 
						|
    try:
 | 
						|
        base_dir = sys.argv[1]
 | 
						|
    except IndexError:
 | 
						|
        print "Must enter path of directory to search. Example: %s ./foo" % \
 | 
						|
                sys.argv[0]
 | 
						|
        os.abort()
 | 
						|
 | 
						|
    if os.path.exists(base_dir) == False:
 | 
						|
        print "Could not find path: '%s'" % base_dir
 | 
						|
        os.abort()
 | 
						|
 | 
						|
    # Remove trailing /
 | 
						|
    if base_dir[-1] == "/":
 | 
						|
        base_dir = base_dir[:-1]
 | 
						|
 | 
						|
    search_dirs = get_search_dirs(base_dir)
 | 
						|
 | 
						|
    for the_dir in search_dirs:
 | 
						|
 | 
						|
        search_terms = get_term_from_path(the_dir)
 | 
						|
 | 
						|
        found_file = False
 | 
						|
        #for tmp_size in (1400, 600, 500):
 | 
						|
        for tmp_size in (500, ):
 | 
						|
            tmp_file_name = get_file_name(search_terms, tmp_size)
 | 
						|
            if os.path.exists(tmp_file_name):
 | 
						|
                print "Skipping check, found '%s'" % tmp_file_name
 | 
						|
                found_file = True
 | 
						|
                break
 | 
						|
        if found_file:
 | 
						|
            continue
 | 
						|
 | 
						|
        #img_src = None
 | 
						|
        #url = get_url(search_terms['album'])
 | 
						|
        #try:
 | 
						|
        #    img_src = get_album_img_srcs_emusic(url)
 | 
						|
        #except:
 | 
						|
        #    img_src = None
 | 
						|
 | 
						|
        #if img_src != None:
 | 
						|
        #    img_file = urllib2.urlopen(img_src['huge'])
 | 
						|
        #    if img_file.info().getmaintype() == 'image':
 | 
						|
        #        save_file(img_file, 1400, search_terms)
 | 
						|
        #        print "Saved huge emusic cover for %s" % search_terms['album']
 | 
						|
        #        continue
 | 
						|
 | 
						|
        #    img_file = urllib2.urlopen(img_src['large'])
 | 
						|
        #    if img_file.info().getmaintype() == 'image':
 | 
						|
        #        save_file(img_file, 600, search_terms)
 | 
						|
        #        print "Saved large emusic cover for %s" % search_terms['album']
 | 
						|
        #        continue
 | 
						|
 | 
						|
        # If emusic doesn't have it, try amazon
 | 
						|
        url = get_url_amazon("%s %s" % (search_terms['artist'], search_terms['album']))
 | 
						|
        try:
 | 
						|
            img_src = get_album_img_src_amazon(url)
 | 
						|
            img_file = urllib2.urlopen(img_src)
 | 
						|
        except:
 | 
						|
            print "Skipped, could not find cover art for %s" % search_terms['album']
 | 
						|
            continue
 | 
						|
        save_file(img_file, 500, search_terms)
 | 
						|
        print "Saved large amazon cover for %s" % search_terms['album']
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    main()
 | 
						|
 |