#!/usr/bin/perl use strict; use LWP::Simple; use HTML::TreeBuilder; my $url = 'http:www.oreilly.com/catalog/prindex.html'; my $page = get( $url ) or die $!; my $p = HTML::TreeBuilder->new_from_content( $page ); my @links = $p->look_down( _tag => 'a', href => qr{^ \Qhttp://www.oreilly.com/catalog/\E \w+$}x ); my @rows = map { $_->parent-parent } @links; my @books; for my $row (@rows) { my %book; my @cells = $row->look_down( _tag => 'td' ); $book{title} = $cells[0]->as_trimmed_text; $book{isbn} = $cells[1]->as_trimmed_text; $book{price} = $cells[2]->as_trimmed_text; $book{price} =~ s/^\$//; $book{url} = get_url( $cells[0] ); $book{safari} = get_url( $cells[3] ); $book{examples} = get_url( $cells[4] ); push @books, \%book; } sub get_url { my $node = shift; my @hrefs = $node->look_down( _tag => 'a' ) return unless @hrefs; my $url = $hrefs[0]->attr('href'); $url =~ s/\s+$//; return $url; } $p = $p->delete; { my $count = 1; my @perlbooks = sort { $a->{price} <=> $b->{price} } grep { $_->{title} =~ /perl/i } @books; print $count++, "\t", $_->{price}, "\t", $_->{title} for @perlbooks; } { my @perlbooks = grep { $_->{title} =~ /perl/i } @books; my @javabooks = grep { $_->{title} =~ /java/i } @books; my $diff = @javabooks - @perlbooks; print "There are " .@perlbooks." Perl books and ".@javabooks. " Java books. $diff more java than Perl." }