#!/usr/bin/perl
# Binbot.pl: Reads a list of rss feeds from a file and searches for ke
+ywords, writing matching <title> and
# <link> elements to a new rss file.
# I use it for tracking USENET binaries, but it could be used for any
+pupose.
#Files:
# urls.txt - A text file consisting of URLs to rss feeds, one per l
+ine.
# e.g. http://www.whatever.com/some-rss.rss
# keywords.txt - A text file consisting of keywords to search for,
+one per line.
# 9/24/06 - switched to URI::Fetch (from LWP::Simple) to allow reading
+ gzip compressed feeds.
use strict;
use warnings;
use XML::RSS::Parser::Lite;
use XML::RSS::SimpleGen;
use URI::Fetch;
#variables
my $url_file = "urls.txt";
my $rss_file = "binbot.rss";
my $keyword_file = "keywords.txt";
my @keywordlist;
my @rsslist;
#load keywords
open (KEYWORDFILE, $keyword_file) || die "No Keyword file ($keyword_fi
+le) found, aborting.\n";
while (<KEYWORDFILE>){
chomp;
push (@keywordlist, $_);
}
close KEYWORDFILE;
#load list of rss files
open (URLFILE,"$url_file") || die "No URL file ($url_file) found, abor
+ting.\n";
while (<URLFILE>){
chomp;
push (@rsslist, $_);
}
close URLFILE;
#create the output rss image
rss_new('htp://www.whatever_you_want.com',"BinBot Results");
#loop through the rss files searching for keywords
foreach my $rss (@rsslist) {
print "Reading $rss\n";
#Fetch the file. If successful, search for keywords
my $res = URI::Fetch->fetch($rss) or die URI::Fetch->errstr;
if ($res->is_success) {
my $xml = $res->content;
my $rp = new XML::RSS::Parser::Lite;
$rp->parse($xml);
# Search for keywords in rss file
for (my $i = 0; $i < $rp->count(); $i++) {
my $item = $rp->get($i);
foreach my $kw (@keywordlist) {
my $title = $item->get('title');
my $url = $item->get('url');
my $desc = $item->get('description');
# If the keyword exists in the title, add the entry to
+ the rss image.
if ( $title =~ /$kw/i ) {
print "Matched keyword $kw\n";
rss_item($url,$title,$desc)
}
}
}
} else {
print $res->status_line, "\n";
}
}
# Save the rss image to a file.
rss_save("$rss_file");
|