in reply to Re: Extracting span and meta content with HTML::TreeBuilder
in thread Extracting span and meta content with HTML::TreeBuilder
for my $page (@$review_pages) { my $html = get $page->[1]; $html =~ s/([^[:ascii:]]+)/unidecode($1)/ge; my $tree = HTML::TreeBuilder->new; # empty tree $tree->parse($html); print "Review for $page->[0]\n"; my @items = $tree->look_down( 'class', 'review-content' ) or die("no items: $!\n"); for my $item (@items) { my @meta = $item->look_down( '_tag', 'meta' ) or die("no meta: $!\n"); # dies here for my $meta_item (@meta) { print $meta_item->attr('itemprop'); print ' = '; print $meta_item->attr('content') . "\n"; } } }
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^3: Extracting span and meta content with HTML::TreeBuilder
by poj (Abbot) on Jul 16, 2014 at 21:35 UTC | |
by wrinkles (Pilgrim) on Jul 16, 2014 at 22:17 UTC | |
by tangent (Parson) on Jul 17, 2014 at 01:54 UTC | |
by poj (Abbot) on Jul 17, 2014 at 12:20 UTC | |
by wrinkles (Pilgrim) on Jul 18, 2014 at 01:41 UTC | |
by Anonymous Monk on Jul 18, 2014 at 02:44 UTC |