sub game_of_day { my $outfile = shift or die "must supply directory to dump game to"; # retrieve http://www.chessgames.com my $html = get $home; # parse the page $tb->parse($html); my $god; # god == Game of the Day # make it so that text nodes are changed into nodes with tags # just like any other HTML aspect. # then they can be searched with look_down $tb->objectify_text; # Find the place in the HTML where Game of the Day is my $G = $tb->look_down ( '_tag' => '~text', text => 'Game of the Day' ); # warn $G->as_HTML; # find _all_ tr in the lineage of the found node... I don't know a # way to limit the search my @U = $G->look_up ( '_tag' => 'tr', ); # by inspecting the output of $tree->dump, I saw that certain parts of the # tree had certain absolute addresses from the root of the tree. # I had planned a neat API allowing one to access various aspects of the # Game of the Day, but for now, I just want the chessgame! my %address = ( 'date' => '0.1.2.0.0.0.0.0.0.0.0.0.0.0.2.0', 'game_url' => '0.1.2.0.0.0.0.0.0.0.0.1.0.0.0.1', 'white_player' => '0.1.2.0.0.0.0.0.0.0.0.1.0.0.0.1.0', 'black_player' => '0.1.2.0.0.0.0.0.0.0.0.1.0.0.0.1.4', 'game_title' => '0.1.2.0.0.0.0.0.0.0.0.1.0.0.0.3.0', ); # debugging output while ( my ($k, $v) = each %address ) { warn " ** $k ** ", $/, $tb->address($v)->as_HTML, $/ } # lets get the URL of the game my $game_url = $tb->address($address{game_url})->attr('href'); my ($game_id) = $game_url =~ m/(\d+)/; # let's get the game, faking out the web spider filter in the process: my $pgn = _get "http://www.chessgames.com/perl/nph-chesspgndownload?gid=$game_id"; # let's save it to disk open F, ">$outfile" or die "error opening $outfile for writing: $!"; print F $pgn; close(F) }