in reply to Re^4: Extracting span and meta content with HTML::TreeBuilder
in thread Extracting span and meta content with HTML::TreeBuilder
poj#!perl use strict; use HTML::TreeBuilder::XPath; my $tree = HTML::TreeBuilder::XPath->new; $tree->parse_file('test.htm'); # change to ->parse($html) my $xpath = '//meta[@itemprop=~/author|datePublished|ratingValue/]'; my @items = $tree->findnodes( $xpath ) or die("no items: $!\n"); my $rec={}; my $count = 0; for my $item (@items) { my $prop = $item->attr('itemprop'); $rec->{$prop} = $item->attr('content'); if ($prop eq 'datePublished'){ print ++$count." "; print $rec->{'author'}." ; "; print $rec->{'ratingValue'}." ; "; print $rec->{'datePublished'}."\n"; }; }
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^6: Extracting span and meta content with HTML::TreeBuilder
by wrinkles (Pilgrim) on Jul 18, 2014 at 01:41 UTC | |
by Anonymous Monk on Jul 18, 2014 at 02:44 UTC |