my %good; # count of token occurance in "good" email my %bad; # count of token occurance in "bad" email my $ngood; # number of "good" messages my $nbad; # number of "bad" messages sub findProb { my $word = shift; my $g = 2 * $good{$word} || 0; my $b = $bad{$word} || 0; return undef unless ($g + $b) > 5; my $num = min(1.0, $b/$nbad); my $denom = min(1.0, $g/$ngood) + min(1.0, $b/$nbad); my $prob = $num / $denom; return 0.99 if $prob > 0.99; return 0.01 if $prob < 0.01; return $prob; }