-Silent11#!/usr/bin/perl -w use strict; use LWP::Simple; use HTML::TokeParser; use HTML::Entities; # @newspages are pages I don't really wanna read, but I'd rather just + have the links my @newspages = qw( http://www.surfstation.lu/00_news.asp http://www.cubadust.com/news.htm http://www.caffemocha.com/cgi-bin/index.htm http://www.halfproject.com/news.php http://www.reinvent.co.nz/v2/skins/news2002.asp ); my $body = <<END_HTML; <html> <head> <title>Silent11 helps out</title> <style> body {margin:0; background-color:e25805;font-family: arial; color:blac +k;font-size:10px;} a {font-family: arial; color: yellow; text-decoration: none; font-size +: 10px;} a:hover {text-decoration: underline overline; background-color:ora +nge} td {font-size:10px; color: darkred;} </style> </head> <body> <table> <tr> END_HTML for (@newspages) { my $html = $_; my ($junk,$short) = split(/\./,$html); # get domain name $body .= "<td valign=top>$short<br>"; my $get = get("$html"); my $p = HTML::TokeParser->new(\$get); while (my $token = $p->get_tag("a")) { my $url = $token->[1]{href} || "-"; my $text = $p->get_trimmed_text("/a"); unless ($url =~ /^mailto|^javascript/){ # don't grab javascri +pt or mailto's (not perfect) $body .= "<a href=\"$url\" target=\"new\">$text</a><br>\n"; } } $body .= "</td>" } $body .= "</tr></table>"; open(OUT,">news.file.html"); # send to an html file print OUT "$body";
In reply to Re: Links
by silent11
in thread How to check links on a web page?
by Anonymous Monk
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |