Category: Web/USENET
Author/Contact Info iamjafi@gmail.com
Description: This is a short program that I wrote to help me keep track of what .NZB files are available on various USENET search websites. The program grabs the RSS feeds from various sites and compares their contents to a list of keywords. Any matching entries are written to a new RSS file stored locally on my computer.

Update: 9/24/06 - I found an RSS feed that requires gzip compression. Since LWP::Simple doesn't support gzip, I have changed the code to use URI::Fetch instead. Thanks to wolv's comment here.
#!/usr/bin/perl
# Binbot.pl: Reads a list of rss feeds from a file and searches for ke
+ywords, writing matching <title> and
# <link> elements to a new rss file.
# I use it for tracking USENET binaries, but it could be used for any 
+pupose.
#Files: 
#    urls.txt - A text file consisting of URLs to rss feeds, one per l
+ine. 
#        e.g. http://www.whatever.com/some-rss.rss
#    keywords.txt - A text file consisting of keywords to search for, 
+one per line.
# 9/24/06 - switched to URI::Fetch (from LWP::Simple) to allow reading
+ gzip compressed feeds.

use strict;
use warnings;

use XML::RSS::Parser::Lite;
use XML::RSS::SimpleGen;
use URI::Fetch;


#variables
my $url_file = "urls.txt";
my $rss_file = "binbot.rss";
my $keyword_file = "keywords.txt";
my @keywordlist;
my @rsslist;

#load keywords
open (KEYWORDFILE, $keyword_file) || die "No Keyword file ($keyword_fi
+le) found, aborting.\n";
while (<KEYWORDFILE>){
    chomp;
    push (@keywordlist, $_);
}
close KEYWORDFILE;

#load list of rss files
open (URLFILE,"$url_file") || die "No URL file ($url_file) found, abor
+ting.\n";
while (<URLFILE>){
    chomp;
    push (@rsslist, $_);
}
close URLFILE;

#create the output rss image
rss_new('htp://www.whatever_you_want.com',"BinBot Results");

#loop through the rss files searching for keywords
foreach my $rss (@rsslist) {

    print "Reading $rss\n";

    #Fetch the file. If successful, search for keywords
    my $res = URI::Fetch->fetch($rss) or die URI::Fetch->errstr;
    if ($res->is_success) {
              my $xml = $res->content;
          my $rp = new XML::RSS::Parser::Lite;
        $rp->parse($xml);
    
        # Search for keywords in rss file
        for (my $i = 0; $i < $rp->count(); $i++) {
            my $item = $rp->get($i);
            
            foreach my $kw (@keywordlist) {
                my $title = $item->get('title');
                my $url = $item->get('url');
                my $desc = $item->get('description');
                    
                # If the keyword exists in the title, add the entry to
+ the rss image.
                if ( $title =~ /$kw/i ) { 
                    print "Matched keyword $kw\n";
                    rss_item($url,$title,$desc) 
                }
            }
        }
    
    } else {
              print $res->status_line, "\n";
      }

}

# Save the rss image to a file.
rss_save("$rss_file");