#!/usr/bin/perl -w
use strict;
use LWP;
use Getopt::Std;
use XML::Twig;
use Getopt::Std;
my %opt;
getopts( 'kf:' => \%opt);
my $params= { node_id => 3989,
n0 => 220215,
BIT => 'use XML::Twig;', # text to search for
BIS => '-', # string separator
BH => 1, # match in title too
HIT => "",
HIR => 0, # ??
a => 'mirod', # just one author
xa => 0, # exclude author
xs => 0, # exclude section
BES => '-',
HER => 0, # ??
xr => 0, # exclude root nodes
re => 'S', # ??
go => 'Search',
#displaytype => 'xml',
};
my $URL= "http://perlmonks.org/index.pl";
my $doc;
if( my $file= $opt{f})
{ open( IN, "<$file") or die $!;
local $/= undef;
$doc=<IN>;
close IN;
}
else
{ my $browser= LWP::UserAgent->new();
my $resp= $browser->post( $URL, $params);
$doc= $resp->content;
}
my $TMP= "$0.tmp";
open( TMP, ">$TMP") or die $!;
print TMP $doc;
close TMP;
system "tidy -asxhtml -numeric -modify $TMP";
my $t= XML::Twig->new( keep_encoding => 1);
$t->parsefile( $TMP);
my $content= $t->first_elt( '[@class="main_content"]');
my $table= $content->first_child( 'table');
my $data;
foreach my $tr ($table->children( 'tr'))
{ my @td= $tr->children( 'td');
$td[1]->cut; $td[3]->cut;
my $a= $td[2]->first_child( 'a');
my $href= $a->att( 'href');
$href="http://perlmonks.org/$href";
$a->set_att( href => $href);
}
$table->sort_children_on_field( 'td', order => 'reverse');
$table->print;
warn "done\n";
if( $opt{k})
{ warn "raw data is in $TMP\n"; }
else
{ unlink $TMP; }
|