webtimeload.pl -u www.perlmonks.org #### #!/usr/bin/perl use strict; use warnings; use LWP::UserAgent; use Time::HiRes qw(gettimeofday tv_interval); use Getopt::Long; use HTML::Parse; $|++; # options my (@url, $count, $sleep, $timeout,$showhead,$protocol,$verbosity); #other global my ($ssep, $bsep,%stat); $bsep = ('=' x 68)."\n"; $ssep = ('-' x 68)."\n"; &helpme()if( (! GetOptions( 'count=s' => \$count, 'url=s' => \@url, 'sleep=n' => \$sleep, 'timeout=n' => \$timeout, 'header' => \$showhead, 'protocol=s' => \$protocol, 'verbosity=n' => \$verbosity, )) or (!$url[0])); # some default values $count ||= 1; $sleep ||= 1; $timeout||=15; $verbosity = defined $verbosity ? $verbosity : 1 ; $protocol||= 'http'; $protocol =~ s![:/]+!!g; ################################################################################ # LWP::UA initialize my $ua = LWP::UserAgent->new; $ua->agent("libwww-perl/5.10.1"); $ua->timeout($timeout); ################################################################################ foreach my $repetition(1..$count) { foreach my $u (@url) { unless ( $u =~ m/^$protocol/ ){$u=$protocol.'://'.$u}; $u =~s/\/$//; #removing an eventual / as last char $u=lc($u); undef %stat; %stat=( sbody =>0, # size of the body skeleton of each frame sintlink =>0, # size internal linked content sextlink =>0, # size external linked content tbody =>0, # time taken by body skeleton of each frame tintlink =>0, # time taken by internal content textlink =>0, # time taken by external content cintlink =>0, # count of internal link cextlink =>0, # count of external link brokenlink=>[], # broken links mainurl => $u, # url got as arg pages =>[$u], # main page or all the frames cache =>{}, # cache for included resources respcode =>undef, # respmess =>undef, # some responses resptitle =>undef, # respserver=>undef, # resplength=>undef, # resphead =>undef, # ); foreach my $page ( @{$stat{'pages'}}) { &get_page($page) } foreach my $content_link (keys %{$stat{'cache'}}) { &time_link($content_link) } &report; } sleep ($sleep) unless $repetition == $count; } ################################################################################ sub get_page { my $urltoget = shift; my $t0 = [gettimeofday]; my $resp = $ua->get($urltoget); $stat{tbody} += tv_interval ($t0, [gettimeofday]); # add the content_length declared for EACH FRame/page loaded $stat{resplength} += ($resp->content_length||0); # add the real bytes length obtained for EACH FRame/page loaded $stat{sbody} += (length ($resp->content)|| 0); # add some more info only for the principal page (not for any frame loaded) $stat{respcode} = $stat{respcode} || $resp->code; $stat{respmess} = $stat{respmess} || $resp->message; $stat{resptitle} = $stat{resptitle} || $resp->title; $stat{respserver} = $stat{respserver} || $resp->server; $stat{resphead} = $stat{resphead} || $resp->headers_as_string; # now parse the HTLM my $parsed_html = parse_html($resp->content); foreach my $link_found(@{ $parsed_html->extract_links(qw(body img src frame )) }) { next if $$link_found[0] =~ /#/; # brutally skip anchors my $uriobj = URI->new( $$link_found[0]); my $absurl = $uriobj->abs($urltoget); #if is a frame add to pages adding an iteration to this sub if ($$link_found[3] eq 'frame') {push @{$stat{'pages'}}, "$absurl";next} #? need to stringify $absurl #else is a content and we add this to the cache hash else {$stat{cache}{ $absurl }=[] } # will store there length and time later on } } ################################################################################ sub time_link { my $url = shift; my $t0 = [gettimeofday]; my $resp = $ua->get($url); my $ttaken = tv_interval ($t0, [gettimeofday]); my $bytesrec = length($resp->content); if ($resp->is_success()) { @{$stat{'cache'}{$url}} = ($bytesrec, $ttaken ); #official regex from http://search.cpan.org/~gaas/URI-1.58/URI.pm my($scheme, $domain, $path, $query, $fragment) = $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; #tell internal from external content if ($stat{mainurl} =~ /$domain/) { $stat{sintlink} += $bytesrec ; $stat{tintlink} += $ttaken ; $stat{cintlink}++; } else { $stat{sextlink} += $bytesrec ; $stat{textlink} += $ttaken ; $stat{cextlink}++; } } else { push @{$stat{'brokenlink'}},$url; } } ################################################################################ sub Arrotonda_Mega { my( $size, $n ) =( shift, 0 ); return "0 bytes" unless defined $size; return "0 bytes" unless $size > 0; ++$n and $size /= 1024 until $size < 1024; return sprintf "%.4f %s", $size, ( qw[ bytes Kb Mb Gb ] )[ $n ]; } ################################################################################ sub report { my $totsize = $stat{'sbody'} + $stat{'sintlink'} + $stat{'sextlink'}; my $tottime = $stat{'tbody'} + $stat{'tintlink'} + $stat{'textlink'}; #################### if ($verbosity == 0){ print scalar localtime (time),"\t", $stat{mainurl},"\t",$stat{respcode},"\t", $totsize,"\t", $tottime,"\t", (&Arrotonda_Mega($totsize / $tottime) ),"/sec","\n"; } #################### elsif ($verbosity == 1){ print $bsep,$stat{mainurl},"\t",scalar localtime (time),"\n",$ssep, $stat{respcode},"\t",$stat{respmess}||'UNDEF',"\t",$stat{resptitle}||'UNDEF',"\n", $ssep, "downloaded ",&Arrotonda_Mega($totsize)," ($totsize bytes) in ", $tottime," seconds (", &Arrotonda_Mega($totsize / $tottime),"/s)\n",$bsep; } #################### elsif ($verbosity > 1){ print $bsep,$stat{mainurl},"\t",scalar localtime (time),"\n",$ssep, "Response code: ",$stat{respcode}||'UNDEF',"\n", "Response message: ",$stat{respmess}||'UNDEF',"\n", "Response server: ",$stat{respserver}||'UNDEF',"\n", "Response declared length: ",$stat{resplength}||'UNDEF',"\n", "Response title: ",$stat{resptitle}||'UNDEF',"\n", $ssep, "main page content (",scalar @{$stat{pages}},"):\t",&Arrotonda_Mega($stat{sbody}), " in ",$stat{tbody}," seconds \@ ", &Arrotonda_Mega($stat{sbody} / $stat{tbody}),"/s\n"; if ($verbosity > 2) { print $ssep,"\tdetail of loaded pages (url):\n",$ssep, "\t",join ("\n\t", @{$stat{pages}}),"\n",$ssep; } ### report about extra downloaded content locale to $url if ($stat{cintlink} > 0) { print "included content ($stat{cintlink}):\t",&Arrotonda_Mega($stat{sintlink}), " in ",$stat{tintlink}," seconds \@ ", &Arrotonda_Mega($stat{sintlink} / $stat{tintlink}),"/s\n"; } else {print "no included content found.\n"} ### report about extra downloaded content external to $url if ($stat{cextlink} > 0) { print "external content ($stat{cextlink}):\t",&Arrotonda_Mega($stat{sextlink}), " in ",$stat{textlink}," seconds \@ ", &Arrotonda_Mega($stat{sextlink} / $stat{textlink}),"/s\n"; } else {print "no external content found.\n"} ### report about broken links if (scalar @{$stat{brokenlink}} > 0) { print "broken links found:\t",scalar @{$stat{brokenlink}},"\n",$ssep; if ($verbosity > 2){ print "\tdetail of broken kins (url:\n",$ssep, (join ("\n", @{$stat{brokenlink}})),$ssep; } } else {print "no broken links found.\n",$ssep} if ( ($verbosity > 2) && keys %{$stat{cache}} > 0) { print "\tdetail of loaded content (url bytes seconds):\n",$ssep, (map { "\t$_ @{$stat{cache}{$_}}\n" } sort keys %{$stat{cache}} ), $ssep; } ## total print "downloaded ",&Arrotonda_Mega($totsize)," ($totsize bytes) in ", $tottime," seconds (", &Arrotonda_Mega($totsize / $tottime),"/s)\n",$bsep; } # verbosity set to smtng strange switch to defaults else{ $verbosity = 1; &report } # eventually print headers if ($showhead) {print "headers received:\n",$ssep,$stat{resphead},"\n",$bsep;} } ################################################################################ sub helpme { print <