#!/usr/bin/perl use warnings; use strict; use LWP::Simple; use HTML::TableExtract; my $file = 'earnings.dat'; my $url = "http://www.earnings.com/fin/earnListing.jsp?date=2003-05-04"; # Be kind to the website, at least during testing! # Get the $url only when $file is missing or more than 1 day old. mirror($url, $file) unless -e $file or -M $file > 1; # Slurp $file into $content. my $content = do { local (*F, $/); open F, $file or die; ; }; my $te = HTML::TableExtract->new( headers => [qw(Company Symbol Estimate Actual)], ); $te->parse($content); #use Data::Dumper; #$Data::Dumper::Useqq = 1; # Examine all matching tables foreach my $ts ($te->table_states) { print "Table (", join(',', $ts->coords), "):\n"; foreach my $row ($ts->rows) { foreach (@$row) { tr{ \t\n\xA0}{ }s; s{^\s+}{}; s{\s+$}{}; } # print Dumper $row; print join(',', @$row), "\n"; } }