use Data::Dumper; $Data::Dumper::Useqq = 1; foreach my $ts ($te->table_states) { foreach my $row ($ts->rows) { print Dumper $row; } } #### $VAR1 = [ "\n\t\tACME Communications, Inc.\n\t\t\n\t", "\n\t\t\n\t\tACME\n\t", "\n\t\t-\$0.43\n\t", "\n\n\t\n\t\t\240\n\t\t-\$0.67\n\t\t\n\t\n\n\t" ]; ... $VAR1 = [ "\n\t\tJP Realty Inc.\n\t\t\n\t\t\n\t\t*\n\t\t\n\t\t\n\t", "\n\t\t\n\t\tJPR\n\t", "\n\t\t\$0.64\n\t", "\n\n\t\n\t\t\240\n\t\t\n\t\t\n\t\n\n\t" ]; ... #### foreach (@$row) { tr{ \t\n\xA0}{ }s; s{^\s+}{}; s{\s+$}{}; } #### #!/usr/bin/perl use warnings; use strict; use LWP::Simple; use HTML::TableExtract; my $file = 'earnings.dat'; my $url = "http://www.earnings.com/fin/earnListing.jsp?date=2003-05-04"; # Be kind to the website, at least during testing! # Get the $url only when $file is missing or more than 1 day old. mirror($url, $file) unless -e $file or -M $file > 1; # Slurp $file into $content. my $content = do { local (*F, $/); open F, $file or die; ; }; my $te = HTML::TableExtract->new( headers => [qw(Company Symbol Estimate Actual)], ); $te->parse($content); #use Data::Dumper; #$Data::Dumper::Useqq = 1; # Examine all matching tables foreach my $ts ($te->table_states) { print "Table (", join(',', $ts->coords), "):\n"; foreach my $row ($ts->rows) { foreach (@$row) { tr{ \t\n\xA0}{ }s; s{^\s+}{}; s{\s+$}{}; } # print Dumper $row; print join(',', @$row), "\n"; } }