#!/usr/bin/perl -w
#This little script will search AMAZON.COM for a CD
#It gets the info from ID Tags in an MP3
#I chose not to simply search on album tags as a lot of the
#mp3's that float around do not have those tags in them (In my experience)

use strict;
use HTTP::Request::Common;
use LWP::UserAgent;
use Image::Grab;
use MP3::Info;

my %songs;			
unless ($ARGV[1]){
	print 'Usage is "getimage mp3file outputfile"' . "\n";
	exit;
}
my $file=$ARGV[0];		#mp3 file
my $outfile=$ARGV[1];		#out (to .jpg)

my $tag=get_mp3tag($file);	#get the ID tag
my $artist=$tag->{ARTIST};	#get the artist name	
my $title=$tag->{TITLE};	#and the title
unless ($artist && $title){	#quit unless they both exist
	print "Could not get song info, please try a different song\n";
	exit;
}

print "Using $artist and $title\n";	   
my $ua= LWP::UserAgent->new();	 #our web browser

#Amazons search engine

my $req = POST 'http://www.amazon.com/exec/obidos/search-handle-form/',
     	         [
			"size" => "1000",	#set the size big for lots of hits
			"index" => "music",	#we don't want to search books :-)
			"field-artist" => $artist,      
		 ];					

my $a_results = $ua->request($req)->as_string;
die "Could not access $req: $!" unless $a_results;
my @a_results = split /\n/, $a_results;

#parse the page - get just the cd's (vs ads etc)
my @artists=&parse(@a_results);	

#amazon has import CD's - so reverse it to get most common hits to the top
@artists=reverse @artists;

$req = POST 'http://www.amazon.com/exec/obidos/search-handle-form/',
     	          [ 
			"size" => "1000",	#see artists above
			"index" => "music-tracks",
			"field-keywords" => $title,
		 ];
my $t_results = $ua->request($req)->as_string;
die "Could not access $req: $!" unless $t_results;
my @t_results = split /\n/, $t_results;
my @songs=(&parse(@t_results));		#parse results

#make a hash from the parsed page - quick searching
foreach (@songs){$songs{$_}=1}				

#get the image:

my $song;

#check artist results and compare to song results
#only one match will come from here (though there may be a couple)
foreach (@artists){
	if ($songs{$_}){
		$song=$_;
	}
}

#no match was found so..
die "Sorry, no match found, please try a different CD\n" unless $song;

#match was found - continue on
#this is how amazon names their jpgs
$song=$song . '.01.LZZZZZZZ.jpg';

#I couldn't make this work with LWP - so using Image::Grab
my $image = new Image::Grab;
$image->url("http:\/\/images.amazon.com\/images\/P\/$song");
$image->grab;

#if there is no large image just die (we don't care about small ones)
die "Could not access the image - probably don't have a large one" unless $image->image;
open (OUT, ">$outfile") || die "Could not create $outfile: $!";

#dos (windows) needs the next line
if ($^O =~ /ms/i){binmode OUT}
print OUT $image->image;
close OUT;

#subs from here on down
sub parse{
	my @lines=@_;
	my @matches;
	foreach (@lines){

		#one of 2 possible hits from amazon - not sure why the
		#pages come out like this
		#first hit is easy, just look for the URLS containing:
		if (/\/ASIN\/(.*)/ || /\/detail\/-\/music\/(.*)/){
			my $link = $1;
			$link =~ s/\/.*//;
			push (@matches, $link);
		}

		#second possibilty requires more work, have to get another page
		elsif (/^Location: (.*)/){
			my $ua=LWP::UserAgent->new();	
			my $req=HTTP::Request->new('GET', $1);
			my $res=$ua->request($req)->as_string;
			my @results=split /\n/,$res;

			#then repeat the above
			foreach (@results){
				if (/\/ASIN\/(.*)/ || /\/detail\/-\/music\/(.*)/){
					my $link = $1;
					$link =~ s/\/.*//;
					push (@matches, $link);
				}
			}
		}
	}
	return @matches;
}