#!/usr/bin/perl
use strict;
use warnings;
use HTML::TokeParser::Simple; # friendlier tokens
use LWP::Simple;
my $html = get("http://pvpgnservers.ath.cx");
=head1 MY Test HTML
The "TH" is the 1st trimmeg, so we gotta "seek" to it.
Next is a check, to make sure there is a link to index_address.html
And if that passes, it means the html ain't changed significantly, so
LOOOOOOOOOOOOOOOP
while we got TR's {
eat a TD and get_trimmed_text 8 times in a row
}
my $html = q{
Address |
Description/URL |
Location |
Uptime |
Contact |
Software |
Users |
Games |
211.62.58.113 |
unknown |
unknown |
0d 03:26 |
a PvPGN user |
PvPGN BnetD Mod 1.1.6 Linux |
1158 |
320 |
};
=cut
my $p = new HTML::TokeParser::Simple(\$html);
$p->get_tag('th') or die "crap";
die "change code, stuff changed"
unless
$p->get_tag('a')->return_attr->{href} =~ /index_adress.html/i;
while( my $t = $p->get_tag('tr') ) {
for(1..8){
$p->get_tag('td'); # cause the next token ain't "text"
print $p->get_trimmed_text('/td')."\n";
}
}