#! perl -w use HTML::TokeParser::Simple; my $p = HTML::TokeParser::Simple->new('Scroll_of_colors.html'); $p->get_token('table'); my($key, @table); # "globals" that have to stick across loops while(my $t = $p->get_token) { if(my $f = $t->is_start_tag('th') .. $t->is_end_tag('th')) { if($f == 1) { if($key) { # end of previous section $hash{$key} = { @table }; } $_ = ''; @table = (); } elsif($t->is_text) { $_ .= $t->as_is; } elsif($f =~ /E/) { s/\s+/ /g; s/^ //; s/ $//; $key = $_; } } elsif($f = $t->is_start_tag('td') .. $t->is_end_tag('td') || $t->is_end_tag('tr')) { if($f == 1) { $_ = ""; my $colspan = $t->get_attr('colspan'); if($colspan) { push @table, $colspan == 2 ? '*' : '='; # fake attribute names } } elsif ($f =~ /E/) { s/\s+/ /g; s/^ //; s/[ :]+$//; push @table, $_; # key or value } elsif ($t->is_text) { $_ .= $t->as_is; } } elsif($t->is_end_tag('table')) { # end of last section if($key && @table) { $hash{$key} = { @table }; } last; } } use Data::Dumper; print Dumper \%hash;