#!/usr/local/bin/perl -w
use strict;
use HTML::Parser;
my @table;
my @save;
my $count = 0;
my $row = 0;
my $column = 0;
my $in_table = 0;
my $p = HTML::Parser->new( api_version =>3,
handlers => [ start => [\&_start, "tagname, attr"],
end => [\&_end, "tagname"],
text => [\&_text, "dtext"],
],
marked_sections =>1,
);
$p->parse_file('test.html');
sub _start {
my ($tag, $attr) = shift;
if ($tag eq 'table'){
push @save, [$row,$column];
$row = $column = 0;
++$count;
$in_table++;
}
$row++ if ($tag eq 'tr');
$column++ if ($tag eq 'td');
}
sub _end {
my ($tag, $attr) = shift;
if ($tag eq 'table') {
($row, $column) = @{ pop @save };
--$in_table;
}
$column = 0 if ($tag eq 'tr');
}
sub _text {
my $text = shift;
chomp $text;
$text =~ s/\xa0//; # some reason data has bunch of \xA0 characters ?? ??
return unless $text;
$table[$count][$row][$column] .= $text if ($in_table) && ($text !~ m/^\s+$/);
}
## print data
print 'ACCOUNT: ',$table[12][1][2], "\n";
print 'BALANCE: ',$table[12][1][3], "\n";
print 'AVAILABLE: ',$table[12][1][4], "\n";