Re^2: Extracting data-structure from HTML using Web::Scraper

And XML::Twig since the logic is the same

#!/usr/bin/perl --
use strict; use warnings;
use Data::Dump;
use XML::Twig;

my $sample = q{
<html><body>
    <h4 class="bla">July 12</h4>
    <p>Tim</p>
    <p>Jon</p>
    <h4 class="bla">July 13</h4>
    <p>James</p>
    <p>Eric</p>
    <p>Jerry</p>
    <p>Susie</p>
    <h4 class="date">July 14</h4>
    <p>Kami</p>
    <p>Darryl</p>
</body></html>
};


my @root;
my $xml = XML::Twig->new(
    twig_handlers => {
        '//body/h4' => sub {
            dd $_->path;
            pop @root;
            push @root, {},  $_->text;
        },
        '//body/p' => sub {
            dd $_->path;
            push @{
                $root[-2]->{
                    $root[-1] # key
                }
            } , $_->text;
        },
    },
);

$xml->xparse( $sample );

pop @root if not ref $root[-1];

dd \@root;

__END__

"/html/body/h4"
"/html/body/p"
"/html/body/p"
"/html/body/h4"
"/html/body/p"
"/html/body/p"
"/html/body/p"
"/html/body/p"
"/html/body/h4"
"/html/body/p"
"/html/body/p"
[
  { "July 12" => ["Tim", "Jon"] },
  { "July 13" => ["James", "Eric", "Jerry", "Susie"] },
  { "July 14" => ["Kami", "Darryl"] },
]
[download]

Comment on Re^2: Extracting data-structure from HTML using Web::Scraper Download Code