my $url;
my %stat; # the cache hash where pages and link are accumulated in their keys
my $ua = LWP::UserAgent->new;
my $parser = HTML::LinkExtor->new;
my $resp = $ua->get($url);
$parser->parse($resp->content);
my $base = $resp->base;
####
foreach my $link_found( $parser->links ) {
next unless $$link_found[1] eq 'src';
my $uriobj = URI->new( $$link_found[2]);
my $absurl = $uriobj->abs($base);
#if is a frame add to pages adding an iteration to this sub
if ($$link_found[0] eq 'frame'||$$link_found[0] eq 'iframe') {
push @{$stat{'pages'}}, "$absurl";
next
} #? need to stringify $absurl
#else is a content and we add this to the cache hash
$stat{cache}{ $absurl }=[] # will store there length and time later on
}
####
if ($render){
mkdir "$ENV{TEMP}\\_temp_files"||die;
open RENDER, "> $ENV{TEMP}/_temp.html"|| die "unable to write to %TEMP%\\_temp.html";
# locaclize src
(my $localcont = $resp->content ) =~s/src="([^"]*)\//src=".\/_temp_files\//gm;
# translate chars to be filesystem safe
$localcont =~ s/(:?src=".\/_temp_files\/)[\?=&,;:]+(:?")/_/gm;
print RENDER $localcont;
close RENDER;
}
####
# foreach link's $url
if ($render){
(my $ele = $url )=~s/^.*\///;
$ele =~ s/[\?=&,;:]/_/gm; ##same regex as above?
open RENDER, "> $ENV{TEMP}\\_temp_files\\$ele"|| die "unable to write to %TEMP%\\_temp_files\\$ele";
binmode RENDER;
print RENDER $resp->content;
close RENDER;
}