my $url; my %stat; # the cache hash where pages and link are accumulated in their keys my $ua = LWP::UserAgent->new; my $parser = HTML::LinkExtor->new; my $resp = $ua->get($url); $parser->parse($resp->content); my $base = $resp->base; #### foreach my $link_found( $parser->links ) { next unless $$link_found[1] eq 'src'; my $uriobj = URI->new( $$link_found[2]); my $absurl = $uriobj->abs($base); #if is a frame add to pages adding an iteration to this sub if ($$link_found[0] eq 'frame'||$$link_found[0] eq 'iframe') { push @{$stat{'pages'}}, "$absurl"; next } #? need to stringify $absurl #else is a content and we add this to the cache hash $stat{cache}{ $absurl }=[] # will store there length and time later on } #### if ($render){ mkdir "$ENV{TEMP}\\_temp_files"||die; open RENDER, "> $ENV{TEMP}/_temp.html"|| die "unable to write to %TEMP%\\_temp.html"; # locaclize src (my $localcont = $resp->content ) =~s/src="([^"]*)\//src=".\/_temp_files\//gm; # translate chars to be filesystem safe $localcont =~ s/(:?src=".\/_temp_files\/)[\?=&,;:]+(:?")/_/gm; print RENDER $localcont; close RENDER; } #### # foreach link's $url if ($render){ (my $ele = $url )=~s/^.*\///; $ele =~ s/[\?=&,;:]/_/gm; ##same regex as above? open RENDER, "> $ENV{TEMP}\\_temp_files\\$ele"|| die "unable to write to %TEMP%\\_temp_files\\$ele"; binmode RENDER; print RENDER $resp->content; close RENDER; }