%Time ExclSec CumulS #Calls sec/call Csec/c Name 66.7 1.270 1.659 30 0.0423 0.0553 PhraseHighlight::highlight_text 11.5 0.220 0.220 30 0.0073 0.0073 PhraseHighlight::split_by_wordchars 8.93 0.170 0.170 30 0.0057 0.0057 PhraseHighlight::build_highlighted_text 2.63 0.050 1.719 1 0.0499 1.7188 SwishQuery::run_swish 2.10 0.040 0.040 11 0.0036 0.0036 CGI::_compile 2.10 0.040 0.090 3 0.0133 0.0299 SwishSearch::BEGIN 0.53 0.010 0.010 1 0.0100 0.0100 Exporter::export 0.53 0.010 0.010 1 0.0100 0.0100 Config::BEGIN 0.53 0.010 0.010 95 0.0001 0.0001 SwishQuery::config 0.53 0.010 0.010 1 0.0100 0.0100 vars::BEGIN 0.53 0.010 0.010 1 0.0100 0.0100 Fh::BEGIN 0.53 0.010 0.030 3 0.0033 0.0100 CGI::BEGIN 0.53 0.010 1.777 1 0.0100 1.7770 SwishSearch::process_request 0.53 0.010 1.728 1 0.0099 1.7285 SwishQuery::run_query #### @words = split /$wc_regexp/, $$text_ref; #### WORD: while ( $word_pos * 2 < @words ) { PHRASE: foreach my $phrase ( @phrases ) { # is phrase is longer than what's left? next PHRASE if ($word_pos + @$phrase -1) * 2 > @words; my $end_pos = 0; # end offset of the current phrase # now compare all the words in the phrase my ( $begin, $word, $end ); for my $match_word ( @$phrase ) { my $cur_word = $words[ ($word_pos + $end_pos) * 2 ]; # split word into ( begin_chars, swish_word, end_chars ) unless ( $cur_word =~ /$extract_regexp/ ) { warn "Failed to parse IgnoreFirst/Last from word '"; next PHRASE; } ( $begin, $word, $end ) = ( $1, $2, $3 ); # swish works only with lowercase words my $check_word = lc $word; if ( $end_pos && exists $self->{stopwords}{$check_word} ) { $end_pos++; print STDERR " Found stopword '$check_word' in the middle of phrase - * MATCH *\n"; # go on to check this match word with the next word. redo if ( $word_pos + $end_pos ) * 2 < @words; # No more words to match with, so go on to next pharse. next PHRASE; } # We may be using a fuzzy search such as stemming or soundex if ( $stemmer_function ) { my $w = $stemmer_function->($check_word); $check_word = $w if $w; } # Now we are ready to compare the "swish word". # Note that swish allows wildcard (truncation) in a query # e.g. you can search for run* to get run, runs, runner, running... if ( substr( $match_word, -1 ) eq '*' ) { next PHRASE if index( $check_word, substr($match_word, 0, length( $match_word ) - 1) ) != 0; } else { next PHRASE if $check_word ne $match_word; } print STDERR " *** Word Matched '$check_word' *** \n" if DEBUG_HIGHLIGHT; $end_pos++; } print STDERR " *** PHRASE MATCHED (word:$word_pos offset:$end_pos) *** \n" if DEBUG_HIGHLIGHT; # We are currently at the end word, so it's easy to set highlight for the *last* word $end_pos--; if ( !$end_pos ) { # only one word in the "phrase" $words[$word_pos * 2] = "$begin$on_flag$word$off_flag$end"; } else { $words[($word_pos + $end_pos) * 2 ] = "$begin$word$off_flag$end"; #Now, reload first word of match $words[$word_pos * 2] =~ /$extract_regexp/ or die "2 Why didn't '$words[$word_pos]' =~ /$extract_regexp/?"; # Strip ignorefirst and ignorelast ( $begin, $word, $end ) = ( $1, $2, $3 ); # probably should cache this! $words[$word_pos * 2] = "$begin$on_flag$word$end"; } # Now, flag the words around to be shown my $start = ($word_pos - $Show_Words + 1) * 2; my $stop = ($word_pos + $end_pos + $Show_Words - 2) * 2; if ( $start < 0 ) { $stop = $stop - $start; $start = 0; } $stop = $#words if $stop > $#words; $flags[$_]++ for $start .. $stop; # Now, we only both with marking the first $occurences of matches # All done, and mark where to stop looking if ( $occurrences-- <= 0 ) { $last = $end; last WORD; } # Now reset $word_pos to word following last matches word $word_pos += $end_pos; # continue will still be executed next WORD; } } continue { $word_pos ++; }