#loop through each url
foreach my $row (@urlset)
{
parsedocument($row);
}
sub parsedocument
{
my ($url) = @_;
print "$url
";
my $ua = LWP::UserAgent->new;
$ua->env_proxy();
# Set up a callback that collect image links
my @imgs = ();
my $callback = sub {
my($tag, %attr) = @_;
return if $tag ne 'a'; # we only look closer at
push(@imgs, values %attr);
};
my $p = HTML::LinkExtor->new($callback);
# Request document and parse it as it arrives
my $res = $ua->request(HTTP::Request->new(GET => $url),
sub {$p->parse($_[0])});
# Expand all image URLs to absolute ones
my $base = $res->base;
@imgs = map { $_ = url($_, $base)->abs; } @imgs;
foreach my $row (@imgs)
{
if ($row =~/jpg$/)
{
print "$row
";
}
}
}