#!/usr/bin/perl my $progname = $0; $progname =~ s,.*/,,; # use basename only my $version = "0.1"; use strict; use LWP::UserAgent; # these are configurable - they may change from time to time according to # CDDB.COM website file system structure: my $base_url="http://www.gracenote.com"; my $search_uri="/php/search-adv.php3?q="; # default items to show per page: my $page_count=10; # current result/album+artist item: my $page_curr=1; # for debugging: my $debug; # build query list from args: my $query_list=join("+", @ARGV); # if -h flag set or no args/search list, show usage: if($query_list =~ /-[hH]/ || $query_list eq "") { usage(); } if($query_list =~ s/\+?-n\+(\d+)\+?//){ # a number to show per page is given # if number / page is > 50, show usage (max per page from cddb.com is 50): if($1 > 50){ usage();} $page_count=$1; } # debug mode?: if( $query_list =~ s/\+?-d\+?// ){ $debug=1; } # build query url: my $query_url=$base_url.$search_uri.$query_list."&f=all&s=$page_curr&n=$page_count"; print "Query URL: $query_url\n" if $debug; # start off with first url: main($query_url); # this sub is called recursively, once for each 'page' of results ($page_curr to ($page_curr + $page_count)): sub main(){ # run the query on the query url: my $result=get_url(shift); if($result->is_success){ # we got a result, parse it: my @result_lines = split("\n", $result->content); # strip out the album/artist pairs from the results page: my (@album_url) = get_album_url(@result_lines); # display results for user to choose an album: &choose_album(@album_url) ; } else { die(" Error retrieving $query_url. Check and compare the base search URL, \$base_url (=$base_url), and the search URI, \$search_uri (=$search_uri), in the code against the currently working url/uri at gracenote.com\n\n"); } } sub get_url(){ my $url = shift; # create user agent object: my $cddb_ua = new LWP::UserAgent; $cddb_ua->agent("$progname/0.1 "); # build the request object: my $cddb_req = new HTTP::Request GET => $url; # make the request: return my $cddb_res = $cddb_ua->request($cddb_req); } # sub returns a hash of url -> artists / album names: sub get_album_url(){ my ($list_started, $list_ended, @result_list); foreach (@_){ # does this line tell us what page we're looking at # ie:

# Displaying disc 1-10 of 2542 matching CDs

(/

.*?(Displaying disc .*? of .*? matching CDs).*?<\/p>/)&& (push @result_list, $1); # is this start of list?: (//) && ($list_started = 1) && (next); (//) && ($list_ended); # save this list item into array: if($list_started && !$list_ended){ # a list item looks like this: #

  • The Beastie Boys / Hello Nasty
       Just A Test
    # strip out urls / album title/artist: /A HREF="(.*)" >(.*)<\/A>/; my $tmp="$1##$2"; # me being stupid and forgetting how to use hashes ;) push(@result_list, $tmp); } } # make sure the first item in @result_list is the 'Displaying disc x of n matching CDs ($result_list[0] =~ /^Displaying disc/) || die("Unable to retrieve paging info\n"); return @result_list; } sub choose_album(){ my $page_info=shift; my @album_url=@_; my $last_page; # print paging info: print $page_info,"\n"; for(my $i=0; $i < $page_count; $i++){ my (undef, $album) = split "##", $album_url[$i]; printf("%2s. %s\n", $i+1, $album); } if(scalar(@album_url) < $page_count ){ $page_count=@album_url; $last_page=1; } print "Select album (0, ..., $page_count)\n"; print "'q' to quit\n"; $last_page ? "" : print "Any other key for more...\n"; while(){ chomp; if(/^(\d+)$/){ get_track_listing($album_url[$1-1]); exit; } elsif(/[qQ]/) { exit; } else { # increment current item by $page_count: $page_curr=$page_curr+$page_count; my $query_url=$base_url.$search_uri.$query_list."&f=all&s=$page_curr&n=$page_count"; # add $page_count onto $page_curr in URL # works ok but we want $page_curr globally accessible #$query_url=~s/&s=(\d+)&/"&s=".int($1+$page_count)."&"/e; $query_url=~s/&s=(\d+)&/&s=$page_curr&/; &main($query_url); } } } sub get_track_listing(){ my ($uri, $album_artist)= split "##", shift; my $url=$base_url.$uri; my ($artist, $album) = split " / ", $album_artist; my $outfile = $album." - ".$artist.".txt"; # fetch the page containing the track list: my $result = &get_url($url); # open the output file for printing track list to: open(OUTFILE, ">$outfile") || die("Unable to open $outfile for writing\n"); if($result->is_success){ # we got the html page containing the track list ok, # parse out the track listing now. # track items look like this: #
  • Super Disco Breakin'
    my @result_lines=split("\n", $result->content); foreach(@result_lines){ if( m#
  • (.*?)
    #){ print OUTFILE $1,"\n"; print $1,"\n" if $debug; } } } else { die("Unable to retrieve $url\n"); } } sub usage{ die<<"EOT"; Usage: $progname [-h] [-d] [-n x] keyword1 ... keywordn Search/query the cddb.com website for CD-ROM listings including the search keywords keyword1 to keywordn. Invoked with argument '-h' prints this help. Invoked with argument '-d' prints debug info. Invoked with argument '-n x' prints x number of results per page. Max x == 50 (max number of 'hits' per page allowed by cddb.com). EOT } 1; __END__ =head1 NAME cddb_get_tracklist.pl - search for CD discs matching keywords entered on command-line. =head1 SYNOPSIS cddb_get_tracklist.pl david holmes Fetch a list of all albums listed on cddb.com containing the words 'david holmes' in. Note this searches for occurences of 'david holmes'in any of album name, artist or track titles. =head1 DESCRIPTION Fetches a list of albums from the CDDB website matching the search string entered on the command line. An individual album can then be selected from this list so that the track listing for that album can be 'dumped' into a file in the current directory. With additional arguments, the script will also vary the number of album titles per page to display. =head1 README Author: Jez Hancock Date: 20020622113210 Modules used: LWP::UserAgent Notes: You may want to change the output file name format, I use 'album_title - artist.txt', which is good for me, but a lot of ppl don't like spaces in filenames... up to you... The code isn't that hot, and no doubt there are untold bugs... feel free to modify the code as you like, please just mail me if you do make any considerable changes - nice to hear about offspring making it in the world ;) The code is liable to 'break' at such time that the fine folk at http://gracenote.com decide to change the search URL/URI format. This shouldn't be too hard to fix and should just be a matter of finding out the new format and editing the strings $base_url and $search_uri accordingly below. Wish list: To have the numbering fixed when a user 'pages' from one screen of results to the next. Presently, first page will show result items numbered: '1 ... 10', second page will then show items numbered: '1 ... 10' also. This works ok, just an aesthetic thing ;) This script is totally raw! I only hacked it up because I couldn't find it anywhere else (to my surprise). Hope others find it useful... if you do let me know! Jez =head1 USAGE C Search/query the cddb.com website for CD-ROM listings including the search keywords keyword1 to keywordn. Invoked with argument '-h' prints this help. Invoked with argument '-d' prints debug info. Invoked with argument '-n x' prints x number of results per page. max x == 50 (max number of 'hits' per page allowed by cddb.com). =head1 PREREQUISITES This script requires the C module. =head1 AUTHOR Copyright 1998-2000, Jez Hancock All rights reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Address bug reports and comments to: jez.hancock@munkboxen.mine.nu =head1 BUGS HTML Character Entity References aren't translated into ascii equivalents (ie & isn't translated into '@') Minimal paging, could be tweaked. =head1 SEE ALSO C Interesting looking PM I found only after authoring this hack. =head1 OSNAMES any =head1 SCRIPT CATEGORIES Audio/MP3 =cut