#!/usr/bin/perl -w # Small CGI program that does a normal perlmonks.org search # and weighs the result depending on number of matched words # and if it is a root node or a reply. # TODO: remove common words like 'in' and 'from'? # BUG: If search results in just one node, this will report 'none found'. use strict; use CGI; use LWP::Simple; use HTML::TokeParser; # Buffering off $| = 1; my $q = CGI->new; # Print out the html print $q->header; print $q->start_html(-title => 'Perlmonks weighted search'); print $q->h1('Perlmonks weighted search'); print '
Search perlmonks.org:
'; print $q->start_form(-method => 'get', -action => $q->self_url); print $q->textfield(-name => 'query'); print $q->br, $q->br; print $q->submit(-name => 'search', -value => 'Search'); print $q->end_form; # Do we have a search? if($q->param('search') eq 'Search') { print $q->hr; my $search = $q->param('query'); # Do a search on perlmonks and weigh it: my @list = &get_weighted( $search ); # Did we get any results? if(@list) { print "Weighted results for $search:
"; # Print the list print 'No results were found for $search.
"; } } print $q->end_html; # The do-it-all sub sub get_weighted { # The search query: my $search = shift; # Split it up my @query = split ' ', $search; # ...and regroup (prob not necessary) my $search_words = join '%20', @query; # Fetch from the real thing: my $html = get "http://www.perlmonks.org/index.pl?node=$search_words" or die "$!"; my $parser = HTML::TokeParser->new(\$html); # Place to store our node titles my @results = (); # We only want links within a list: while($parser->get_tag('li')) { # The first tag after the