my $positive = {
word1 => 2,
word2 => 4,
word3 => 1,
};
####
sub invert_string {
my ($string, $weight, $hash) = @_;
$hash->{$_} += $weight for
grep { !$StopWords{$_} }
@{words(lc($string))};
}
####
my $sentence1 = {
wordA => 2,
wordB => 1,
};
####
sub invert_item {
my $item = shift;
my %hash;
invert_string($item->{title}, 2, \%hash);
invert_string($item->{description}, 1, \%hash);
return \%hash;
}
####
#!/usr/bin/perl
use XML::RSS;
use Algorithm::NaiveBayes;
use Lingua::EN::Splitter qw(words);
use Lingua::EN::StopWords qw(%StopWords);
my $nb = Algorithm::NaiveBayes->new( );
for my $category (qw(interesting boring)) {
my $rss = new XML::RSS;
$rss->parsefile("$category.rdf");
$nb->add_instance(attributes => invert_item($_),
label => $category) for @{$rss->{'items'}};
}
$nb->train; # Work out all the probabilities
####
my $rss = new XML::RSS;
$rss->parsefile("$category.rdf");
$nb->add_instance(attributes => invert_item($_),
label => $category) for @{$rss->{'items'}};
}