You're not comparing regex vs index, you're comparing a buggy and inefficient implementation of regex with a non-equivalent implementation using index.
Fixed:
use strict; use warnings; use Benchmark qw(cmpthese); my @strings = ( 'Some string about this long or so, maybe this long', 'I like pizza this long or so, maybe this long', 'this long or so, maybe this French Fries long', 'This Sugar Rush Rocks. maybe this do not stop the clock.', ); my %hash = ( 'Sugar Rush Rocks' => 'whatever', 'long' => 'itsgood', 'this long' => 'ilikeit', 'maybe this' => 'itsokay', 'Some String' => 'loooveit' ); my @keys_sorted_by_length_desc = sort { length $b <=> length $a } keys %hash; cmpthese(-2, { Regex => \&use_regex, Index => \&use_index, Index2 => \&use_index2, }); sub use_regex { my %re_keys = map { $_ => qr/\b\L\Q$_\E\b/ } @keys_sorted_by_lengt +h_desc; for my $string (@strings) { my $s = lc($string); for my $key (@keys_sorted_by_length_desc) { if ( $s =~ $re_keys{$key} ) { #print "Found '$key' in '$string'\n"; last; } } } } sub use_index { my @lc_keys = map lc, @keys_sorted_by_length_desc; for my $string (@strings) { my $s = lc($string); for my $key (@keys_sorted_by_length_desc) { my $lc_key = lc($key); my $spos = index($s, $lc_key); if ($spos >= 0 && ( $spos == 0 || substr($s, $spos-1, 1) = +~ /\W/ )) { my $epos = $spos + length($lc_key); if ($epos == length($s) || substr($s, $epos, 1) =~ /\W +/) { #print "Found '$key' in '$string'\n"; last; } } } } } sub use_index2 { for my $string (@strings) { ( my $s = " \L$string " ) =~ s/\W/ /g; for my $key (@keys_sorted_by_length_desc) { my $lc_key = " \L$key "; if (index($s, $lc_key) >= 0) { #print "Found '$key' in '$string'\n"; last; } } } }
Results:
Rate Regex Index2 Index Regex 23206/s -- -33% -56% Index2 34824/s 50% -- -35% Index 53339/s 130% 53% --
Update: Fixed many many problems.
In reply to Re^2: Matching Many Strings against a Large List of Hash Keys (case insensitively, longest key first)
by ikegami
in thread Matching Many Strings against a Large List of Hash Keys (case insensitively, longest key first)
by Anonymous Monk
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |