#!/usr/bin/perl
my $progname = $0;
$progname =~ s,.*/,,; # use basename only
my $version = "0.1";
use strict;
use LWP::UserAgent;
# these are configurable - they may change from time to time according to
# CDDB.COM website file system structure:
my $base_url="http://www.gracenote.com";
my $search_uri="/php/search-adv.php3?q=";
# default items to show per page:
my $page_count=10;
# current result/album+artist item:
my $page_curr=1;
# for debugging:
my $debug;
# build query list from args:
my $query_list=join("+", @ARGV);
# if -h flag set or no args/search list, show usage:
if($query_list =~ /-[hH]/ || $query_list eq "") { usage(); }
if($query_list =~ s/\+?-n\+(\d+)\+?//){
# a number to show per page is given
# if number / page is > 50, show usage (max per page from cddb.com is 50):
if($1 > 50){ usage();}
$page_count=$1;
}
# debug mode?:
if( $query_list =~ s/\+?-d\+?// ){
$debug=1;
}
# build query url:
my $query_url=$base_url.$search_uri.$query_list."&f=all&s=$page_curr&n=$page_count";
print "Query URL: $query_url\n" if $debug;
# start off with first url:
main($query_url);
# this sub is called recursively, once for each 'page' of results ($page_curr to ($page_curr + $page_count)):
sub main(){
# run the query on the query url:
my $result=get_url(shift);
if($result->is_success){
# we got a result, parse it:
my @result_lines = split("\n", $result->content);
# strip out the album/artist pairs from the results page:
my (@album_url) = get_album_url(@result_lines);
# display results for user to choose an album:
&choose_album(@album_url) ;
} else {
die("
Error retrieving $query_url.
Check and compare the base search URL, \$base_url (=$base_url),
and the search URI, \$search_uri (=$search_uri),
in the code against the currently working url/uri at gracenote.com\n\n");
}
}
sub get_url(){
my $url = shift;
# create user agent object:
my $cddb_ua = new LWP::UserAgent;
$cddb_ua->agent("$progname/0.1 ");
# build the request object:
my $cddb_req = new HTTP::Request GET => $url;
# make the request:
return my $cddb_res = $cddb_ua->request($cddb_req);
}
# sub returns a hash of url -> artists / album names:
sub get_album_url(){
my ($list_started, $list_ended, @result_list);
foreach (@_){
# does this line tell us what page we're looking at
# ie:
# Displaying disc 1-10 of 2542 matching CDs
(/.*?(Displaying disc .*? of .*? matching CDs).*?<\/p>/)&& (push @result_list, $1);
# is this start of list?:
(//) && ($list_started = 1) && (next);
(//) && ($list_ended);
# save this list item into array:
if($list_started && !$list_ended){
# a list item looks like this:
#
The Beastie Boys / Hello Nasty
Just A Test
# strip out urls / album title/artist:
/A HREF="(.*)" >(.*)<\/A>/;
my $tmp="$1##$2"; # me being stupid and forgetting how to use hashes ;)
push(@result_list, $tmp);
}
}
# make sure the first item in @result_list is the 'Displaying disc x of n matching CDs
($result_list[0] =~ /^Displaying disc/) || die("Unable to retrieve paging info\n");
return @result_list;
}
sub choose_album(){
my $page_info=shift;
my @album_url=@_;
my $last_page;
# print paging info:
print $page_info,"\n";
for(my $i=0; $i < $page_count; $i++){
my (undef, $album) = split "##", $album_url[$i];
printf("%2s. %s\n", $i+1, $album);
}
if(scalar(@album_url) < $page_count ){
$page_count=@album_url;
$last_page=1;
}
print "Select album (0, ..., $page_count)\n";
print "'q' to quit\n";
$last_page ? "" : print "Any other key for more...\n";
while(){
chomp;
if(/^(\d+)$/){
get_track_listing($album_url[$1-1]);
exit;
} elsif(/[qQ]/) {
exit;
} else {
# increment current item by $page_count:
$page_curr=$page_curr+$page_count;
my $query_url=$base_url.$search_uri.$query_list."&f=all&s=$page_curr&n=$page_count";
# add $page_count onto $page_curr in URL
# works ok but we want $page_curr globally accessible
#$query_url=~s/&s=(\d+)&/"&s=".int($1+$page_count)."&"/e;
$query_url=~s/&s=(\d+)&/&s=$page_curr&/;
&main($query_url);
}
}
}
sub get_track_listing(){
my ($uri, $album_artist)= split "##", shift;
my $url=$base_url.$uri;
my ($artist, $album) = split " / ", $album_artist;
my $outfile = $album." - ".$artist.".txt";
# fetch the page containing the track list:
my $result = &get_url($url);
# open the output file for printing track list to:
open(OUTFILE, ">$outfile") || die("Unable to open $outfile for writing\n");
if($result->is_success){
# we got the html page containing the track list ok,
# parse out the track listing now.
# track items look like this:
# Super Disco Breakin'
my @result_lines=split("\n", $result->content);
foreach(@result_lines){
if( m#(.*?)
#){
print OUTFILE $1,"\n";
print $1,"\n" if $debug;
}
}
} else {
die("Unable to retrieve $url\n");
}
}
sub usage{
die<<"EOT";
Usage: $progname [-h] [-d] [-n x] keyword1 ... keywordn
Search/query the cddb.com website for CD-ROM listings including
the search keywords keyword1 to keywordn.
Invoked with argument '-h' prints this help.
Invoked with argument '-d' prints debug info.
Invoked with argument '-n x' prints x number of results per page.
Max x == 50 (max number of 'hits' per page allowed by cddb.com).
EOT
}
1;
__END__
=head1 NAME
cddb_get_tracklist.pl - search for CD discs matching keywords entered on command-line.
=head1 SYNOPSIS
cddb_get_tracklist.pl david holmes
Fetch a list of all albums listed on cddb.com containing the words
'david holmes' in.
Note this searches for occurences of 'david holmes'in any of album name, artist or track titles.
=head1 DESCRIPTION
Fetches a list of albums from the CDDB website matching the search string
entered on the command line.
An individual album can then be selected from this list so that the
track listing for that album can be 'dumped' into a file in the current directory.
With additional arguments, the script will also vary the number of album titles
per page to display.
=head1 README
Author:
Jez Hancock
Date:
20020622113210
Modules used:
LWP::UserAgent
Notes:
You may want to change the output file name format, I use
'album_title - artist.txt', which is good for me, but a lot of ppl
don't like spaces in filenames... up to you...
The code isn't that hot, and no doubt there are untold bugs... feel free
to modify the code as you like, please just mail me if you do make any considerable
changes - nice to hear about offspring making it in the world ;)
The code is liable to 'break' at such time that the fine folk at http://gracenote.com
decide to change the search URL/URI format. This shouldn't be too hard to fix and should
just be a matter of finding out the new format and editing the strings $base_url and $search_uri
accordingly below.
Wish list:
To have the numbering fixed when a user 'pages' from one screen of results to the next.
Presently, first page will show result items numbered: '1 ... 10', second page will then
show items numbered: '1 ... 10' also. This works ok, just an aesthetic thing ;)
This script is totally raw! I only hacked it up because I couldn't find it anywhere else
(to my surprise). Hope others find it useful... if you do let me know!
Jez
=head1 USAGE
C
Search/query the cddb.com website for CD-ROM listings including
the search keywords keyword1 to keywordn.
Invoked with argument '-h' prints this help.
Invoked with argument '-d' prints debug info.
Invoked with argument '-n x' prints x number of results per page.
max x == 50 (max number of 'hits' per page allowed by cddb.com).
=head1 PREREQUISITES
This script requires the C module.
=head1 AUTHOR
Copyright 1998-2000, Jez Hancock All rights reserved.
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
Address bug reports and comments to: jez.hancock@munkboxen.mine.nu
=head1 BUGS
HTML Character Entity References aren't translated into ascii equivalents
(ie & isn't translated into '@')
Minimal paging, could be tweaked.
=head1 SEE ALSO
C
Interesting looking PM I found only after authoring this hack.
=head1 OSNAMES
any
=head1 SCRIPT CATEGORIES
Audio/MP3
=cut