package Table; use strict; use HTML::Parser; ## PRIVATE my $table = []; my $tb_count; my $tb_idx; my $row; my $column; my $table_status; my @save; sub new { my $type = shift; return bless $table, $type; } sub parse_it { my $self = shift; my $src = shift; my $p = HTML::Parser->new( api_version => 3, handlers => [ start => [ \&_start, "tagname"], end => [ \&_end, "tagname"], text => [ \&_text, "dtext"], ], marked_sections => 1, ); if (ref($src)){ $p->parse($$src) or return; }else{ $p->parse_file($src) or return; } return 1; } sub _start { my $tag = shift; if ($tag eq 'table'){ push @save, [$tb_idx, $row, $column]; $row = $column = 0; ++$tb_count; $tb_idx = $tb_count; ++$table_status; } $row++ if ($tag eq 'tr'); $column++ if ($tag eq 'td'); } sub _end { my $tag = shift; if ($tag eq 'table') { ($tb_idx, $row, $column) = @{ pop @save }; --$table_status; } $column = 0 if ($tag eq 'tr'); } sub _text { my $text = shift; $text =~ s/\xa0//; $table->[$tb_idx][$row][$column] .= $text if ($table_status) && ($text !~ m/^\s+$/) && ($text); } return 1;