#!/usr/bin/perl package Metabot; use warnings; use strict; use WWW::SimpleRobot; use HTML::Entities; require HTML::Parser; use Lingua::EN::Summarize; use HTML::Summary; use HTML::TreeBuilder; use Lingua::EN::Keywords; use HTML::Tree; use LWP::Simple; @Metabot::ISA = qw(HTML::Parser); my $url = $ARGV[0]; my $parser = Metabot->new; my $robot = WWW::SimpleRobot->new( URLS => [ $url ], FOLLOW_REGEX => "^$url", DEPTH => 2, TRAVERSAL => 'depth', VISIT_CALLBACK => sub { my ( $url, $depth, $html, $links) = @_; print "\nURL: $url - depth $depth\n"; $html = decode_entities($html); $html =~ s/document\.write\(.+?\)\;//g; $html =~ s/\&\#.+?\;//g; my ($tree, $title, $titleastext, $newtitle, $newtitleh1, $newtitleastexth1, $newtitleastexth1clipped, $newtitleh2, $newtitleastexth2, $newtitleastexth2clipped, $newtitleh3, $newtitleastexth3, $newtitleastexth3clipped, $newtitleh4, $newtitleastexth4, $newtitleastexth4clipped, $newtitlep, $newtitleastextp, $newtitleastextpclipped, $summary, $var, $newmetadescription, $newmetakeywords); $tree = HTML::Tree->new(); $tree->parse($html); $title = $tree->look_down( '_tag' , 'title' ); $titleastext = $title->as_text; use HTML::Element; if ($titleastext){ print "\nTitle: $titleastext\n\n"; } else { $newtitle = HTML::Element->new('title'); $newtitle = $newtitleh1; $newtitleh1 = $tree->look_down( '_tag' , 'h1' ); if ($newtitleh1){ $newtitleastexth1 = $newtitleh1->as_text; } } if ($newtitleastexth1){ $newtitleastexth1clipped = substr($newtitleastexth1, 0, 65); $html->push_content($newtitleastexth1clipped); print "\n$url does not have a title. We created one from\n the first 66 characters your first headline tag \
:\n
$newtitleastexth1clipped.\n
Please change if desired.\n\n"
} else {
print "$url does not have a title and we are unable to suggest any.";
}
$tree = new HTML::TreeBuilder;
$tree->parse($html);
$summary = summarize( $html, filter => 'html', maxlength => 500 );
$summary =~ s/\s+/ /gs;
$var = substr($summary, 0, 155);
print "Using Lingua::EN::Summarize Summary: $var\n\n";
$newmetadescription = HTML::Element->new('meta', href => "$html");
$newmetadescription->attr('name', 'description');
$newmetadescription->attr('content', "$var");
$html->push_content($newmetadescription);
$newmetadescription = $newmetadescription->delete;
my @keywords = keywords($title.$summary);
print "Keywords: " . join(", ", @keywords) . "\n\n";
$newmetakeywords = HTML::Element->new('meta', href => "$html");
$newmetakeywords->attr('name', 'keywords');
$newmetakeywords->attr('content', "@keywords");
$html->push_content($newmetakeywords);
$newmetakeywords = $newmetakeywords->delete;
# $tree = HTML::TreeBuilder->new_from_content($html);
# foreach my $img ($tree->look_down('_tag', 'img')) {
# my $alatt = $img->attr('alt')
# if (!$alatt) {
# $imgsrcattribute = $img->attr('src');
# use File::Basename;
# my @suffixlist = qw(gif jpg jpeg png bmp php ico GIF JPG JPEG PNG BMP PHP ICO);
# my $imgfilenopathnoext = fileparse($imgsrcattribute,@suffixlist);
# my $newalttag = HTML::Element->new('img', alt=>"");
# $newalttag->attr('alt', "$imgfilenopathnoext");
# $html->push_content($imgfilenopathnoext);
# $imgfilenopathnoext = $imgfilenopathnoext->delete;
# print "Alt tag not found for $imgsrcattribute \nInserted src file name $imgfilenopathnoext for alt attribute";
# }
# }
Second(@ARGV);
exit(0);
sub Second {
my ( $url, $depth, $html, $links) = @_;
print "\nURL: $url - depth $depth\n";
$html = decode_entities($html);
$html =~ s/document\.write\(.+?\)\;//g;
$html =~ s/\&\#.+?\;//g;
my $tree = HTML::TreeBuilder->new();
$tree->parse($html);
local $\ = $/;
print $_->as_HTML
for $tree->look_down( qw' _tag img ',
sub { not defined $_[0]->attr('alt') } );
print '---';
print $_->as_HTML
for $tree->look_down( qw' _tag img ',
sub { not length $_[0]->attr('alt') } );
print '---';
$_->attr( alt => MAlt($_) )
for $tree->look_down( qw' _tag img ',
sub { not length $_[0]->attr('alt') } );
print $_->as_HTML for $tree->look_down(qw' _tag img ');
} ## end sub Main
sub MAlt {
my $imgscalar = $_[0];
my $imgsrc = $imgscalar->attr('src');
use File::Basename;
my @suffixlist = qw(gif jpg jpeg png bmp php ico GIF JPG JPEG PNG BMP PHP ICO);
my $imgfilenopathnoext = fileparse($imgsrc,@suffixlist);
'!' . $imgfilenopathnoext;
}
}
,
BROKEN_LINK_CALLBACK =>
sub {
my ( $url, $linked_from, $depth ) = @_;
print STDERR "$url looks like a broken link on $linked_from\n";
print STDERR "Depth = $depth\n";
}
);
$robot->traverse;
my @urls = @{$robot->urls};
my @pages = @{$robot->pages};
for my $page ( @pages ) {
my $url = $page->{url};
my $depth = $page->{depth};
my $modification_time = $page->{modification_time};
}
sub text {
my ($self,$text) = @_;
$self->{TEXT} .= $text;
}
####
Can't call method "push_content" without a package or object reference at seo_f
xer.pl line 126.