use strict; use LWP::Simple; use HTML::TokeParser; use HTML::Entities; my @newspages = qw( http://osis.nima.mil http://osis.nima.mil/myhot.html http://osis.nima.mil/myoffices.html http://osis.nima.mil/mytraining.html http://osis.nima.mil/mygeospatial.html ); for (@newspages) { my $html = $_; my ($junk,$short) = split(/\./,$html); # get domain name my $body .= "$short
"; my $get = get("$html"); my $p = HTML::TokeParser->new(\$get); while (my $token = $p->get_tag("a")) { my $url = $token->[1]{href} || "-"; my $text = $p->get_trimmed_text("/a"); unless ($url =~ /^mailto|^javascript/){ # don't grab javascrpt or mailto's $body .= "$text
\n"; } } $body .= "" } my $body .= ""; open(OUT,">news.txt"); # send to an html file print OUT "$body"; #### Use of uninitialized value in substr at C:/Perl/site/lib/HTML/PullParser.pm line 82. Use of uninitialized value in length at C:/Perl/site/lib/HTML/PullParser.pm line 85. Use of uninitialized value in substr at C:/Perl/site/lib/HTML/PullParser.pm line 82. Use of uninitialized value in length at C:/Perl/site/lib/HTML/PullParser.pm line 85.