I know you can teach a man to fish, but when you have fish just laying around I feel like I gotta give some away.....
this works for me, do with it what you like
#!/usr/bin/perl -w
use strict;
use LWP::Simple;
use HTML::TokeParser;
use HTML::Entities;
# @newspages are pages I don't really wanna read, but I'd rather just
+ have the links
my @newspages = qw(
http://www.surfstation.lu/00_news.asp
http://www.cubadust.com/news.htm
http://www.caffemocha.com/cgi-bin/index.htm
http://www.halfproject.com/news.php
http://www.reinvent.co.nz/v2/skins/news2002.asp
);
my $body = <<END_HTML;
<html>
<head>
<title>Silent11 helps out</title>
<style>
body {margin:0; background-color:e25805;font-family: arial; color:blac
+k;font-size:10px;}
a {font-family: arial; color: yellow; text-decoration: none; font-size
+: 10px;}
a:hover {text-decoration: underline overline; background-color:ora
+nge}
td {font-size:10px; color: darkred;}
</style>
</head>
<body>
<table>
<tr>
END_HTML
for (@newspages) {
my $html = $_;
my ($junk,$short) = split(/\./,$html); # get domain name
$body .= "<td valign=top>$short<br>";
my $get = get("$html");
my $p = HTML::TokeParser->new(\$get);
while (my $token = $p->get_tag("a")) {
my $url = $token->[1]{href} || "-";
my $text = $p->get_trimmed_text("/a");
unless ($url =~ /^mailto|^javascript/){ # don't grab javascri
+pt or mailto's (not perfect)
$body .= "<a href=\"$url\" target=\"new\">$text</a><br>\n"; }
} $body .= "</td>"
}
$body .= "</tr></table>";
open(OUT,">news.file.html"); # send to an html file
print OUT "$body";
-Silent11 |