Perl_Necklace has asked for the wisdom of the Perl Monks concerning the following question:
use strict; use warnings; use diagnostics; use HTML::TableExtract; use WWW::Mechanize; use Time::HiRes; my $random = rand(10); my $huffdata = "C:/huff_data.txt"; open (MYFILE, "+>>", "$huffdata") or die "unable to open $huffdata $!" +; my $url = "http://fundrace.huffingtonpost.com/neighbors.php?type=name& +lname=SMITH"; sub parse_and_save{ sleep($random); my $mech = WWW::Mechanize->new; $mech->get($url); my $text = $mech->content; my $te = HTML::TableExtract->new( headers => [qw(Donor Contribution Ad +dress)] );; $te->parse($text); my $row; foreach $row ($te->rows) { print MYFILE join(",", @$row); } my @links = $mech->find_link( text_regex => qr/more/i ) or die "no lin +ks found"; for (@links){ $url = $_->url_abs($/); parse_and_save(); } } parse_and_save();
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: web scraping script help
by pemungkah (Priest) on Sep 18, 2007 at 21:25 UTC | |
by Perl_Necklace (Initiate) on Sep 19, 2007 at 16:10 UTC | |
|
Re: web scraping script help
by n8g (Sexton) on Sep 18, 2007 at 20:28 UTC | |
by Perl_Necklace (Initiate) on Sep 18, 2007 at 20:44 UTC | |
by n8g (Sexton) on Sep 18, 2007 at 20:57 UTC |