in reply to Re: Extracting data-structure from HTML using Web::Scraper
in thread Extracting data-structure from HTML using Web::Scraper

And XML::Twig since the logic is the same

#!/usr/bin/perl -- use strict; use warnings; use Data::Dump; use XML::Twig; my $sample = q{ <html><body> <h4 class="bla">July 12</h4> <p>Tim</p> <p>Jon</p> <h4 class="bla">July 13</h4> <p>James</p> <p>Eric</p> <p>Jerry</p> <p>Susie</p> <h4 class="date">July 14</h4> <p>Kami</p> <p>Darryl</p> </body></html> }; my @root; my $xml = XML::Twig->new( twig_handlers => { '//body/h4' => sub { dd $_->path; pop @root; push @root, {}, $_->text; }, '//body/p' => sub { dd $_->path; push @{ $root[-2]->{ $root[-1] # key } } , $_->text; }, }, ); $xml->xparse( $sample ); pop @root if not ref $root[-1]; dd \@root; __END__ "/html/body/h4" "/html/body/p" "/html/body/p" "/html/body/h4" "/html/body/p" "/html/body/p" "/html/body/p" "/html/body/p" "/html/body/h4" "/html/body/p" "/html/body/p" [ { "July 12" => ["Tim", "Jon"] }, { "July 13" => ["James", "Eric", "Jerry", "Susie"] }, { "July 14" => ["Kami", "Darryl"] }, ]