#!/usr/bin/perl
use lib qw( ..);
use HTML::TableExtract;
use LWP::Simple;
use Data::Dumper;
my $te = new HTML::TableExtract( depth=>3,count=>0); # which table do we want
# Get the total number of records according to MDJONLINE
my $content = get(
"http://www.mdjonline.com/classifieds/legals/index.inn?loc=detail&main=Legals&sub=Foreclosures&Count=0")
|| die "Couldn't get URL\n";
$content =~ m{Total\s?ads: (\d+)} || die "No dice\n";
my $total = $1;
# Set some intial values for record number and iterations
my $iter = ($total - ($total % 10)); # calculate iterations to nearest num divisible by 10
my $recnum = 0; # number of start record for next dump
while($recnum <= $iter) {
my $mdjURL = "http://www.mdjonline.com/classifieds/legals/index.inn?loc=detail&main=Legals&sub=Foreclosures&Count=".$recnum;
my $content = get($mdjURL);
$te->parse($content);
foreach $ts ($te->table_states) {
foreach $row ($ts->rows) {
print Dumper $row;
}
}
$recnum += 10;
}
####
#!/usr/bin/perl
use lib qw( ..);
use HTML::TableExtract;
use LWP::Simple;
use Data::Dumper;
# Get the total number of records according to MDJONLINE
my $content = get(
"http://www.mdjonline.com/classifieds/legals/index.inn?loc=detail&main=Legals&sub=Foreclosures&Count=0")
|| die "Couldn't get URL\n";
$content =~ m{Total\s?ads: (\d+)} || die "No dice\n";
my $total = $1;
# Set some intial values for record number and iterations
my $iter = ($total - ($total % 10)); # calculate iterations to nearest num divisible by 10
my $recnum = 0; # number of start record for next dump
while($recnum <= $iter) {
my $te = new HTML::TableExtract( depth=>3,count=>0);
my $mdjURL = "http://www.mdjonline.com/classifieds/legals/index.inn?loc=detail&main=Legals&sub=Foreclosures&Count=".$recnum;
my $content = get($mdjURL);
$te->parse($content);
foreach $ts ($te->table_states) {
foreach $row ($ts->rows) {
print Dumper $row;
}
}
$recnum += 10;
}