#!/usr/bin/perl use strict; use warnings; use LWP::UserAgent; use HTML::Parse; use Data::Dumper; $|++; foreach my $url (@ARGV){ my $totsize = 0; my (@intlink,@extlink,@brokenlink ); print"PROCESSING:\t$url\n"; $url = 'http://'.$url; $url =~ s/\s+//g; #delete spaces $url =~s/\/$//; #removing an eventual / as last char my $ua = new LWP::UserAgent; $ua->agent("libwww-perl/5.10.1"); my $response = $ua->get($url); my $body = $response->content; print "body size:\t",length($body),"\n"; $totsize += length($body); my $parsed_html = parse_html($body); for (@{ $parsed_html->extract_links(qw(body img src)) }) { #print "@$_\n";next; my ($link) = @$_; # internal included content if ($link =~ /^\// || $link =~ /^$url/) { $link= $url.$link unless $link =~ /^$url/; push @intlink, $link; #DEBUG a:->$link<\n"; } # external included content elsif ($$_[0] =~ /http:\/\//) { push @extlink, $link; #print "DEBUG b:->$link<-\n"; } # ? included content else { push @intlink, $link; #print "DEBUG c:->$link<-\n"; } } print "-" x34,"\n","code\tbytes\tlink\n","-" x34,"\n"; $totsize += (&get_links ($url, @intlink)||0); $totsize += (&get_links ($url, @extlink)||0); print "\n\nTOTSIZE: ".&Arrotonda_Mega($totsize)." ($totsize bytes)\n" } sub get_links { my $urlbase = shift; my @links = @_; my $totsize; my $ua = new LWP::UserAgent; $ua->agent("libwww-perl/5.10.1"); my $request = HTTP::Request->new('GET'); foreach my $url (@links) { next if $url =~ /^#/; $request->url($url); my $response = $ua->request($request); print $response->code."\t".length($response->content)."\t$url\n"; $totsize += length($response->content) } return $totsize; } ################################################################################ sub Arrotonda_Mega { my( $size, $n ) =( shift, 0 ); return "0 bytes" unless defined $size; return "0 bytes" unless $size > 0; ++$n and $size /= 1024 until $size < 1024; return sprintf "%.4f %s", $size, ( qw[ bytes Kb Mb Gb ] )[ $n ]; } ################################################################################