#!/usr/bin/perl use strict; use warnings; use Test::More 'no_plan'; use Benchmark qw/:all :hireswallclock/; my $str='aabcdabcabcecdecd'; sub blazar { my ($str, $min_len, $min_rep)=@_; my $l=length($str); my %count; for my $off (0..$l-1) { for my $len ($min_len .. $l-$off) { my $s = substr $str, $off, $len; $count{ $s } ||= ()= $str =~ /$s/g; } $count{$_} < $min_rep and delete $count{$_} for keys %count; } \%count; } sub kramba { my ($str, $min_len, $min_rep)=@_; my %count; local *count = sub { my( $string) = @_; my $length = length( $string ); if ($length < $min_len) { for (keys %count) { delete $count{$_} if $count{$_} < $min_rep; } return \%count; } for my $l ($min_len..$length) { my $s = substr( $string, 0, $l ); $count{ $s } += 1; } count( substr( $string, 1 ) ); }; count($str); \%count; } sub ikegami { my ($str, $min_len, $min_rep)=@_; local our %counts; use re 'eval'; $str =~ / (.{$min_len,}) # or (.+) (?(?{ !$counts{$1} })(?=.*\1)) (?{ ++$counts{$1} }) (?!) /x; for (keys %counts) { delete $counts{$_} if $counts{$_} < $min_rep; } \%counts; } sub lodin { my ($str, $min_len, $min_rep)=@_; local our %count; use re 'eval'; $str =~ / (.{$min_len,}) (?(?{ $count{$1} }) (?!) ) (?> .*? \1 ){@{[ $min_rep - 2 ]}} .* \1 (?{ ($count{$1} ||= $min_rep-1)++ }) (?!) /x; \%count; } { my %cache; sub _reference { $cache{$_[0]} ||= blazar @_ } } for my $len (1..3) { for my $rep (2..3) { my $tag="len=$len, rep=$rep"; is_deeply kramba($str,$len,$rep), _reference($str,$len,$rep), "kramba $tag"; is_deeply ikegami($str,$len,$rep), _reference($str,$len,$rep), "ikegami $tag"; is_deeply lodin($str,$len,$rep), _reference($str,$len,$rep), "lodin $tag"; } } __END__ #### #!/usr/bin/perl use strict; use warnings; use Test::More 'no_plan'; use Benchmark qw/:all :hireswallclock/; my $str='aabcdabcabcecdecd'; sub blazar { my ($str, $min_len, $min_rep)=@_; my $l=length($str); my %count; for my $off (0..$l-1) { for my $len ($min_len .. $l-$off) { my $s = substr $str, $off, $len; $count{ $s } ||= ()= $str =~ /$s/g; } $count{$_} < $min_rep and delete $count{$_} for keys %count; } \%count; } sub kramba { my ($str, $min_len, $min_rep)=@_; my %count; no warnings 'recursion'; local *count = sub { my( $string) = @_; my $length = length( $string ); if ($length < $min_len) { for (keys %count) { delete $count{$_} if $count{$_} < $min_rep; } return \%count; } for my $l ($min_len..$length) { my $s = substr( $string, 0, $l ); $count{ $s } += 1; } count( substr( $string, 1 ) ); }; count($str); \%count; } sub ikegami { my ($str, $min_len, $min_rep)=@_; local our %counts; use re 'eval'; $str =~ / (.{$min_len,}) # or (.+) (?(?{ !$counts{$1} })(?=.*\1)) (?{ ++$counts{$1} }) (?!) /x; for (keys %counts) { delete $counts{$_} if $counts{$_} < $min_rep; } \%counts; } sub lodin { my ($str, $min_len, $min_rep)=@_; local our %count; use re 'eval'; $str =~ / (.{$min_len,}) (?(?{ $count{$1} }) (?!) ) (?> .*? \1 ){@{[ $min_rep - 2 ]}} .* \1 (?{ ($count{$1} ||= $min_rep-1)++ }) (?!) /x; \%count; } { my %cache; sub _reference { $cache{"@_"} ||= blazar @_ } } for my $s ( map {$str x $_} 1,3,42) { for my $len (1..2) { for my $rep (2..3) { my $strlen=length $s; print "\nstring length=$strlen, len=$len, rep=$rep\n\n"; cmpthese +($strlen < 100 ? 10_000 : -60) => { kramba => sub { kramba($s,$len,$rep) }, ikegami => sub { ikegami($s,$len,$rep) }, lodin => sub { lodin($s,$len,$rep) }, }; } } } print "\n"; for my $len (1..3) { for my $rep (2..3) { my $tag="len=$len, rep=$rep"; is_deeply kramba($str,$len,$rep), _reference($str,$len,$rep), "kramba $tag"; is_deeply ikegami($str,$len,$rep), _reference($str,$len,$rep), "ikegami $tag"; is_deeply lodin($str,$len,$rep), _reference($str,$len,$rep), "lodin $tag"; } } __END__ #### string length=17, len=1, rep=2 Rate kramba ikegami lodin kramba 1860/s -- -59% -60% ikegami 4570/s 146% -- -1% lodin 4636/s 149% 1% -- string length=17, len=1, rep=3 Rate kramba lodin ikegami kramba 1866/s -- -58% -60% lodin 4413/s 137% -- -5% ikegami 4638/s 149% 5% -- string length=17, len=2, rep=2 Rate kramba lodin ikegami kramba 2000/s -- -63% -64% lodin 5470/s 174% -- -3% ikegami 5618/s 181% 3% -- string length=17, len=2, rep=3 Rate kramba lodin ikegami kramba 1905/s -- -59% -66% lodin 4604/s 142% -- -18% ikegami 5612/s 195% 22% -- string length=51, len=1, rep=2 Rate kramba lodin ikegami kramba 212/s -- -0% -18% lodin 212/s 0% -- -18% ikegami 259/s 22% 22% -- string length=51, len=1, rep=3 Rate kramba ikegami lodin kramba 201/s -- -5% -6% ikegami 212/s 6% -- -1% lodin 215/s 7% 1% -- string length=51, len=2, rep=2 Rate kramba lodin ikegami kramba 202/s -- -5% -7% lodin 213/s 5% -- -2% ikegami 217/s 7% 2% -- string length=51, len=2, rep=3 Rate kramba ikegami lodin kramba 200/s -- -5% -10% ikegami 210/s 5% -- -5% lodin 221/s 11% 5% -- string length=714, len=1, rep=2 s/iter lodin ikegami kramba lodin 2.50 -- -5% -50% ikegami 2.37 6% -- -48% kramba 1.24 102% 91% -- string length=714, len=1, rep=3 s/iter lodin ikegami kramba lodin 3.48 -- -30% -64% ikegami 2.45 42% -- -49% kramba 1.25 180% 96% -- string length=714, len=2, rep=2 s/iter lodin ikegami kramba lodin 2.50 -- -1% -51% ikegami 2.49 1% -- -50% kramba 1.24 103% 101% -- string length=714, len=2, rep=3 s/iter lodin ikegami kramba lodin 3.40 -- -28% -63% ikegami 2.44 39% -- -49% kramba 1.25 173% 96% -- ok 1 - kramba len=1, rep=2 ok 2 - ikegami len=1, rep=2 ok 3 - lodin len=1, rep=2 ok 4 - kramba len=1, rep=3 ok 5 - ikegami len=1, rep=3 ok 6 - lodin len=1, rep=3 ok 7 - kramba len=2, rep=2 ok 8 - ikegami len=2, rep=2 ok 9 - lodin len=2, rep=2 ok 10 - kramba len=2, rep=3 ok 11 - ikegami len=2, rep=3 ok 12 - lodin len=2, rep=3 ok 13 - kramba len=3, rep=2 ok 14 - ikegami len=3, rep=2 ok 15 - lodin len=3, rep=2 ok 16 - kramba len=3, rep=3 ok 17 - ikegami len=3, rep=3 ok 18 - lodin len=3, rep=3 1..18