my $positive = { word1 => 2, word2 => 4, word3 => 1, }; #### sub invert_string { my ($string, $weight, $hash) = @_; $hash->{$_} += $weight for grep { !$StopWords{$_} } @{words(lc($string))}; } #### my $sentence1 = { wordA => 2, wordB => 1, }; #### sub invert_item { my $item = shift; my %hash; invert_string($item->{title}, 2, \%hash); invert_string($item->{description}, 1, \%hash); return \%hash; } #### #!/usr/bin/perl use XML::RSS; use Algorithm::NaiveBayes; use Lingua::EN::Splitter qw(words); use Lingua::EN::StopWords qw(%StopWords); my $nb = Algorithm::NaiveBayes->new( ); for my $category (qw(interesting boring)) { my $rss = new XML::RSS; $rss->parsefile("$category.rdf"); $nb->add_instance(attributes => invert_item($_), label => $category) for @{$rss->{'items'}}; } $nb->train; # Work out all the probabilities #### my $rss = new XML::RSS; $rss->parsefile("$category.rdf"); $nb->add_instance(attributes => invert_item($_), label => $category) for @{$rss->{'items'}}; }