use strict;
use LWP::Simple;
use HTML::TokeParser;
use HTML::Entities;
my @newspages = qw(
http://osis.nima.mil
http://osis.nima.mil/myhot.html
http://osis.nima.mil/myoffices.html
http://osis.nima.mil/mytraining.html
http://osis.nima.mil/mygeospatial.html
);
for (@newspages) {
my $html = $_;
my ($junk,$short) = split(/\./,$html); # get domain name
my $body .= "
$short ";
my $get = get("$html");
my $p = HTML::TokeParser->new(\$get);
while (my $token = $p->get_tag("a")) {
my $url = $token->[1]{href} || "-";
my $text = $p->get_trimmed_text("/a");
unless ($url =~ /^mailto|^javascript/){ # don't grab javascrpt or mailto's
$body .= "$text \n"; }
} $body .= " | "
}
my $body .= "";
open(OUT,">news.txt"); # send to an html file
print OUT "$body";