use strict;
use LWP::Simple;
use HTML::LinkExtractor;
my $url = shift || 'http://www.google.com';
my $html = get($url);
my $Total = length $html;
print "initial size $Total\n";
my $LX = new HTML::LinkExtractor(
sub {
my( $X, $tag ) = @_;
unless( grep {$_ eq $tag->{tag} } @HTML::LinkExtractor::TAGS_IN_NEED ) {
print "$$tag{tag}\n";
for my $urlAttr ( @{$HTML::LinkExtractor::TAGS{$$tag{tag}}} ) {
if( exists $$tag{$urlAttr} ) {
my $size = (head( $$tag{$urlAttr} ))[1];
$Total += $size if $size;
print "adding $size\n" if $size;
}
}
}
},
$url,
0
);
$LX->parse(\$html);
print "The total size of \n$url\n is $Total bytes\n";
__END__
use Data::Dumper;
use HTML::LinkExtractor;
print Dumper \@HTML::LinkExtractor::VALID_URL_ATTRIBUTES;
print Dumper \%HTML::LinkExtractor::TAGS;
print Dumper \@HTML::LinkExtractor::TAGS_IN_NEED;