# find_hits $file_name, @keywords;
#
# Returns a list of "score: line" strinsg.  Example:
#
# If file "foo.txt" contains:
#
#   Larry Wall, Programming Perl, Reference, x, y, perl
#   Peter Scott, Perl Medic, Legacy herding, z, a, perl
#
# A call to this function might look like this:
#
#   my @hits = find_hits "foo.txt", "medic";
#
# And it would return a one-element list:
#
#  '32: Peter Scot...a, perl'

sub find_hits( $ @ )
{
    my ( $file, @keywords ) = @_;

    # compose a regex for quick rejection of non-matching lines:
    my $any_keyword_re = join '|', map quotemeta $_, @keywords;
    $any_keyword_re = qr/$any_keyword_re/i;

    # and a keyword for the whole phrase
    my $phrase_re = join '\W+', @keywords;
    $phrase_re = qr/$phrase_re/i;

    # open input file for read
    open my $in, $file or croak "opening $file for read: $!";

    my @rv; # for accumulating return values

    while ( <$in> )
    {
        # reject lines with no matches out of hand
        next unless m/$any_keyword_re/;

        # any match at all is one point.
        my $score = 1;

        # split into fields for further scoring.
        my ( undef, $title, $desc, undef, undef, $keys ) =
            split /\t/;

        # title matches are worth 5 points each
        while ( $title =~ m/$any_keyword_re/g ) { $score += 5 }

        # description matches are only 1 point
        while ( $desc =~ m/$any_keyword_re/g ) { $score += 1 }

        # keyword matches are 4 points
        while ( $keys =~ m/$any_keyword_re/g ) { $score += 4 }

        # phrase matches (against entire line) are 10 points
        while ( m/$phrase_re/g ) { $score += 10 }

        # multiple matches are worth 10x the number
        # of keywords that matched.
        my $n_matches = () = m/$any_keyword_re/g; # see perlfaq4
        if ( $n_matches > 1 ) { $score += 10*$n_matches }

        # finally, format $score and save for returning
        # to the caller
        push @rv, sprintf "%03d: %s", $score, $_;
    }

    return @rv;
}