#   Step through each term found in the parsed content and proceed with
#   term indexing

foreach my $term ( split /\s+/, $self->{'_content'} )
{
    #   Normalise the search term, allowing only characters in the range of a-z,
    #   A-Z, digits and the underscore character.  All terms are then dropped to
    #   lowercase to improve the likelihood of matching search results.

    $term = $self->_normalise( $term );
    next unless length $term;
    my ( $stem ) = @{ Lingua::Stem::stem( $term ) };

    #   Increment the frequency counters for the stemmed term - The _index_count is
    #   the count of the number of documents which the stemmed term appears in (not
    #   the total count of all appearances of the stemmed term in all documents)
    #   while _index_frequency is the number of occurences of the stemmed term in
    #   the current document indexed by $url
    #
    #   The hash _index_stem is important to prevent duplicate document counting
    #   for documents which may have a stemmed term appear more than once.

    ++${$self->{'_index_count'}}{$stem} unless ${$self->{'_index_stem'}}{$stem}++;
    ++${${$self->{'_index_frequency'}}{$stem}}{$url};
}

##</code><code>##

sub weights
{
    my ( $self ) = @_;

    #   Step through each stemmed term indexed

    foreach my $stem ( keys %{$self->{'_index_count'}} )
    {
        #   Step through each document in which the stemmed term $stem appears,
        #   calculate its weight and store this ranking in the %weights hash.

        my %weights;
        foreach my $url ( keys %{${$self->{'_index_frequency'}}{$stem}} )
        {
            $weights{$url} = sprintf "%.2f", ${${$self->{'_index_frequency'}}{$stem}}{$url} * log( ( scalar keys %{$self->{'_crawl_visited'}} ) / $self->{'_index_count'}->{$stem} );
        }

        #   Store ranking score in tied hash - Note the fashion by which the hash
        #   reference is built first and then assigned to the MLDBM-tied hash.  This is
        #   required due to the limitations of the Perl TIEHASH interface which has no
        #   support for multi-dimensional ties.

        ${$self->{'_tied_weight'}}{$stem} = \%weights;
    }
}