#! perl -w
use HTML::TokeParser::Simple;
my $p = HTML::TokeParser::Simple->new('Scroll_of_colors.html');
$p->get_token('table');
my($key, @table); # "globals" that have to stick across loops
while(my $t = $p->get_token) {
if(my $f = $t->is_start_tag('th') .. $t->is_end_tag('th')) {
if($f == 1) {
if($key) { # end of previous section
$hash{$key} = { @table };
}
$_ = '';
@table = ();
} elsif($t->is_text) {
$_ .= $t->as_is;
} elsif($f =~ /E/) {
s/\s+/ /g;
s/^ //;
s/ $//;
$key = $_;
}
} elsif($f = $t->is_start_tag('td') .. $t->is_end_tag('td') || $t->is_end_tag('tr')) {
if($f == 1) {
$_ = "";
my $colspan = $t->get_attr('colspan');
if($colspan) {
push @table, $colspan == 2 ? '*' : '='; # fake attribute names
}
} elsif ($f =~ /E/) {
s/\s+/ /g;
s/^ //;
s/[ :]+$//;
push @table, $_; # key or value
} elsif ($t->is_text) {
$_ .= $t->as_is;
}
} elsif($t->is_end_tag('table')) {
# end of last section
if($key && @table) {
$hash{$key} = { @table };
}
last;
}
}
use Data::Dumper;
print Dumper \%hash;