%Time ExclSec CumulS #Calls sec/call Csec/c Name
66.7 1.270 1.659 30 0.0423 0.0553 PhraseHighlight::highlight_text
11.5 0.220 0.220 30 0.0073 0.0073 PhraseHighlight::split_by_wordchars
8.93 0.170 0.170 30 0.0057 0.0057 PhraseHighlight::build_highlighted_text
2.63 0.050 1.719 1 0.0499 1.7188 SwishQuery::run_swish
2.10 0.040 0.040 11 0.0036 0.0036 CGI::_compile
2.10 0.040 0.090 3 0.0133 0.0299 SwishSearch::BEGIN
0.53 0.010 0.010 1 0.0100 0.0100 Exporter::export
0.53 0.010 0.010 1 0.0100 0.0100 Config::BEGIN
0.53 0.010 0.010 95 0.0001 0.0001 SwishQuery::config
0.53 0.010 0.010 1 0.0100 0.0100 vars::BEGIN
0.53 0.010 0.010 1 0.0100 0.0100 Fh::BEGIN
0.53 0.010 0.030 3 0.0033 0.0100 CGI::BEGIN
0.53 0.010 1.777 1 0.0100 1.7770 SwishSearch::process_request
0.53 0.010 1.728 1 0.0099 1.7285 SwishQuery::run_query
####
@words = split /$wc_regexp/, $$text_ref;
####
WORD:
while ( $word_pos * 2 < @words ) {
PHRASE:
foreach my $phrase ( @phrases ) {
# is phrase is longer than what's left?
next PHRASE if ($word_pos + @$phrase -1) * 2 > @words;
my $end_pos = 0; # end offset of the current phrase
# now compare all the words in the phrase
my ( $begin, $word, $end );
for my $match_word ( @$phrase ) {
my $cur_word = $words[ ($word_pos + $end_pos) * 2 ];
# split word into ( begin_chars, swish_word, end_chars )
unless ( $cur_word =~ /$extract_regexp/ ) {
warn "Failed to parse IgnoreFirst/Last from word '";
next PHRASE;
}
( $begin, $word, $end ) = ( $1, $2, $3 );
# swish works only with lowercase words
my $check_word = lc $word;
if ( $end_pos && exists $self->{stopwords}{$check_word} ) {
$end_pos++;
print STDERR " Found stopword '$check_word' in the middle of phrase - * MATCH *\n";
# go on to check this match word with the next word.
redo if ( $word_pos + $end_pos ) * 2 < @words;
# No more words to match with, so go on to next pharse.
next PHRASE;
}
# We may be using a fuzzy search such as stemming or soundex
if ( $stemmer_function ) {
my $w = $stemmer_function->($check_word);
$check_word = $w if $w;
}
# Now we are ready to compare the "swish word".
# Note that swish allows wildcard (truncation) in a query
# e.g. you can search for run* to get run, runs, runner, running...
if ( substr( $match_word, -1 ) eq '*' ) {
next PHRASE if index( $check_word, substr($match_word, 0, length( $match_word ) - 1) ) != 0;
} else {
next PHRASE if $check_word ne $match_word;
}
print STDERR " *** Word Matched '$check_word' *** \n" if DEBUG_HIGHLIGHT;
$end_pos++;
}
print STDERR " *** PHRASE MATCHED (word:$word_pos offset:$end_pos) *** \n" if DEBUG_HIGHLIGHT;
# We are currently at the end word, so it's easy to set highlight for the *last* word
$end_pos--;
if ( !$end_pos ) { # only one word in the "phrase"
$words[$word_pos * 2] = "$begin$on_flag$word$off_flag$end";
} else {
$words[($word_pos + $end_pos) * 2 ] = "$begin$word$off_flag$end";
#Now, reload first word of match
$words[$word_pos * 2] =~ /$extract_regexp/ or die "2 Why didn't '$words[$word_pos]' =~ /$extract_regexp/?";
# Strip ignorefirst and ignorelast
( $begin, $word, $end ) = ( $1, $2, $3 ); # probably should cache this!
$words[$word_pos * 2] = "$begin$on_flag$word$end";
}
# Now, flag the words around to be shown
my $start = ($word_pos - $Show_Words + 1) * 2;
my $stop = ($word_pos + $end_pos + $Show_Words - 2) * 2;
if ( $start < 0 ) {
$stop = $stop - $start;
$start = 0;
}
$stop = $#words if $stop > $#words;
$flags[$_]++ for $start .. $stop;
# Now, we only both with marking the first $occurences of matches
# All done, and mark where to stop looking
if ( $occurrences-- <= 0 ) {
$last = $end;
last WORD;
}
# Now reset $word_pos to word following last matches word
$word_pos += $end_pos; # continue will still be executed
next WORD;
}
} continue {
$word_pos ++;
}