I picked up Spidering Hacks and I've had a problem with hack #19.
I keep getting the errors:
"my" variable @perlbooks masks earlier declaration in same scope at treebuilder.pl line 50.
Bareword "parent" not allowed while "strict subs" in use at treebuilder.pl line 15.
syntax error at treebuilder.pl line 34, near ")
Here is the script:
#!/usr/bin/perl use strict; use LWP::Simple; use HTML::TreeBuilder; my $url = 'http:www.oreilly.com/catalog/prindex.html'; my $page = get( $url ) or die $!; my $p = HTML::TreeBuilder->new_from_content( $page ); my @links = $p->look_down( _tag => 'a', href => qr{^ \Qhttp://www.oreilly.com/catalog/\E \w+$}x ); my @rows = map { $_->parent-parent } @links; my @books; for my $row (@rows) { my %book; my @cells = $row->look_down( _tag => 'td' ); $book{title} = $cells[0]->as_trimmed_text; $book{isbn} = $cells[1]->as_trimmed_text; $book{price} = $cells[2]->as_trimmed_text; $book{price} =~ s/^\$//; $book{url} = get_url( $cells[0] ); $book{safari} = get_url( $cells[3] ); $book{examples} = get_url( $cells[4] ); push @books, \%book; } sub get_url { my $node = shift; my @hrefs = $node->look_down( _tag => 'a' ) return unless @hrefs; my $url = $hrefs[0]->attr('href'); $url =~ s/\s+$//; return $url; } $p = $p->delete; { my $count = 1; my @perlbooks = sort { $a->{price} <=> $b->{price} } grep { $_->{title} =~ /perl/i } @books; print $count++, "\t", $_->{price}, "\t", $_->{title} for @perl +books; } { my @perlbooks = grep { $_->{title} =~ /perl/i } @books; my @javabooks = grep { $_->{title} =~ /java/i } @books; my $diff = @javabooks - @perlbooks; print "There are " .@perlbooks." Perl books and ".@javabooks. " Java books. $diff more java than Perl." }
In reply to Scraping with Treebuilder by lv211
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |