#!/usr/local/bin/perl -w use strict; use HTML::Parser; my @table; my @save; my $count = 0; my $row = 0; my $column = 0; my $in_table = 0; my $p = HTML::Parser->new( api_version =>3, handlers => [ start => [\&_start, "tagname, attr"], end => [\&_end, "tagname"], text => [\&_text, "dtext"], ], marked_sections =>1, ); $p->parse_file('test.html'); sub _start { my ($tag, $attr) = shift; if ($tag eq 'table'){ push @save, [$row,$column]; $row = $column = 0; ++$count; $in_table++; } $row++ if ($tag eq 'tr'); $column++ if ($tag eq 'td'); } sub _end { my ($tag, $attr) = shift; if ($tag eq 'table') { ($row, $column) = @{ pop @save }; --$in_table; } $column = 0 if ($tag eq 'tr'); } sub _text { my $text = shift; chomp $text; $text =~ s/\xa0//; # some reason data has bunch of \xA0 characters ?? ?? return unless $text; $table[$count][$row][$column] .= $text if ($in_table) && ($text !~ m/^\s+$/); } ## print data print 'ACCOUNT: ',$table[12][1][2], "\n"; print 'BALANCE: ',$table[12][1][3], "\n"; print 'AVAILABLE: ',$table[12][1][4], "\n";