use Data::Dumper;
$Data::Dumper::Useqq = 1;
foreach my $ts ($te->table_states) {
foreach my $row ($ts->rows) {
print Dumper $row;
}
}
####
$VAR1 = [
"\n\t\tACME Communications, Inc.\n\t\t\n\t",
"\n\t\t\n\t\tACME\n\t",
"\n\t\t-\$0.43\n\t",
"\n\n\t\n\t\t\240\n\t\t-\$0.67\n\t\t\n\t\n\n\t"
];
...
$VAR1 = [
"\n\t\tJP Realty Inc.\n\t\t\n\t\t\n\t\t*\n\t\t\n\t\t\n\t",
"\n\t\t\n\t\tJPR\n\t",
"\n\t\t\$0.64\n\t",
"\n\n\t\n\t\t\240\n\t\t\n\t\t\n\t\n\n\t"
];
...
####
foreach (@$row) {
tr{ \t\n\xA0}{ }s;
s{^\s+}{};
s{\s+$}{};
}
####
#!/usr/bin/perl
use warnings;
use strict;
use LWP::Simple;
use HTML::TableExtract;
my $file = 'earnings.dat';
my $url =
"http://www.earnings.com/fin/earnListing.jsp?date=2003-05-04";
# Be kind to the website, at least during testing!
# Get the $url only when $file is missing or more than 1 day old.
mirror($url, $file) unless -e $file or -M $file > 1;
# Slurp $file into $content.
my $content = do {
local (*F, $/);
open F, $file or die;
;
};
my $te = HTML::TableExtract->new(
headers => [qw(Company Symbol Estimate Actual)],
);
$te->parse($content);
#use Data::Dumper;
#$Data::Dumper::Useqq = 1;
# Examine all matching tables
foreach my $ts ($te->table_states) {
print "Table (", join(',', $ts->coords), "):\n";
foreach my $row ($ts->rows) {
foreach (@$row) {
tr{ \t\n\xA0}{ }s;
s{^\s+}{};
s{\s+$}{};
}
# print Dumper $row;
print join(',', @$row), "\n";
}
}