#!/usr/bin/perl
use strict;
use warnings;
use Test::More 'no_plan';
use Benchmark qw/:all :hireswallclock/;
my $str='aabcdabcabcecdecd';
sub blazar {
my ($str, $min_len, $min_rep)=@_;
my $l=length($str);
my %count;
for my $off (0..$l-1) {
for my $len ($min_len .. $l-$off) {
my $s = substr $str, $off, $len;
$count{ $s } ||= ()= $str =~ /$s/g;
}
$count{$_} < $min_rep and
delete $count{$_} for keys %count;
}
\%count;
}
sub kramba {
my ($str, $min_len, $min_rep)=@_;
my %count;
local *count = sub {
my( $string) = @_;
my $length = length( $string );
if ($length < $min_len) {
for (keys %count) {
delete $count{$_}
if $count{$_} < $min_rep;
}
return \%count;
}
for my $l ($min_len..$length) {
my $s = substr( $string, 0, $l );
$count{ $s } += 1;
}
count( substr( $string, 1 ) );
};
count($str);
\%count;
}
sub ikegami {
my ($str, $min_len, $min_rep)=@_;
local our %counts;
use re 'eval';
$str =~ /
(.{$min_len,}) # or (.+)
(?(?{ !$counts{$1} })(?=.*\1))
(?{ ++$counts{$1} })
(?!)
/x;
for (keys %counts) {
delete $counts{$_} if $counts{$_} < $min_rep;
}
\%counts;
}
sub lodin {
my ($str, $min_len, $min_rep)=@_;
local our %count;
use re 'eval';
$str =~ /
(.{$min_len,})
(?(?{ $count{$1} })
(?!)
)
(?>
.*?
\1
){@{[ $min_rep - 2 ]}}
.*
\1
(?{ ($count{$1} ||= $min_rep-1)++ })
(?!)
/x;
\%count;
}
{
my %cache;
sub _reference { $cache{$_[0]} ||= blazar @_ }
}
for my $len (1..3) {
for my $rep (2..3) {
my $tag="len=$len, rep=$rep";
is_deeply kramba($str,$len,$rep), _reference($str,$len,$rep), "kramba $tag";
is_deeply ikegami($str,$len,$rep), _reference($str,$len,$rep), "ikegami $tag";
is_deeply lodin($str,$len,$rep), _reference($str,$len,$rep), "lodin $tag";
}
}
__END__
####
#!/usr/bin/perl
use strict;
use warnings;
use Test::More 'no_plan';
use Benchmark qw/:all :hireswallclock/;
my $str='aabcdabcabcecdecd';
sub blazar {
my ($str, $min_len, $min_rep)=@_;
my $l=length($str);
my %count;
for my $off (0..$l-1) {
for my $len ($min_len .. $l-$off) {
my $s = substr $str, $off, $len;
$count{ $s } ||= ()= $str =~ /$s/g;
}
$count{$_} < $min_rep and
delete $count{$_} for keys %count;
}
\%count;
}
sub kramba {
my ($str, $min_len, $min_rep)=@_;
my %count;
no warnings 'recursion';
local *count = sub {
my( $string) = @_;
my $length = length( $string );
if ($length < $min_len) {
for (keys %count) {
delete $count{$_}
if $count{$_} < $min_rep;
}
return \%count;
}
for my $l ($min_len..$length) {
my $s = substr( $string, 0, $l );
$count{ $s } += 1;
}
count( substr( $string, 1 ) );
};
count($str);
\%count;
}
sub ikegami {
my ($str, $min_len, $min_rep)=@_;
local our %counts;
use re 'eval';
$str =~ /
(.{$min_len,}) # or (.+)
(?(?{ !$counts{$1} })(?=.*\1))
(?{ ++$counts{$1} })
(?!)
/x;
for (keys %counts) {
delete $counts{$_} if $counts{$_} < $min_rep;
}
\%counts;
}
sub lodin {
my ($str, $min_len, $min_rep)=@_;
local our %count;
use re 'eval';
$str =~ /
(.{$min_len,})
(?(?{ $count{$1} })
(?!)
)
(?>
.*?
\1
){@{[ $min_rep - 2 ]}}
.*
\1
(?{ ($count{$1} ||= $min_rep-1)++ })
(?!)
/x;
\%count;
}
{
my %cache;
sub _reference { $cache{"@_"} ||= blazar @_ }
}
for my $s ( map {$str x $_} 1,3,42) {
for my $len (1..2) {
for my $rep (2..3) {
my $strlen=length $s;
print "\nstring length=$strlen, len=$len, rep=$rep\n\n";
cmpthese +($strlen < 100 ? 10_000 : -60) => {
kramba => sub { kramba($s,$len,$rep) },
ikegami => sub { ikegami($s,$len,$rep) },
lodin => sub { lodin($s,$len,$rep) },
};
}
}
}
print "\n";
for my $len (1..3) {
for my $rep (2..3) {
my $tag="len=$len, rep=$rep";
is_deeply kramba($str,$len,$rep), _reference($str,$len,$rep), "kramba $tag";
is_deeply ikegami($str,$len,$rep), _reference($str,$len,$rep), "ikegami $tag";
is_deeply lodin($str,$len,$rep), _reference($str,$len,$rep), "lodin $tag";
}
}
__END__
####
string length=17, len=1, rep=2
Rate kramba ikegami lodin
kramba 1860/s -- -59% -60%
ikegami 4570/s 146% -- -1%
lodin 4636/s 149% 1% --
string length=17, len=1, rep=3
Rate kramba lodin ikegami
kramba 1866/s -- -58% -60%
lodin 4413/s 137% -- -5%
ikegami 4638/s 149% 5% --
string length=17, len=2, rep=2
Rate kramba lodin ikegami
kramba 2000/s -- -63% -64%
lodin 5470/s 174% -- -3%
ikegami 5618/s 181% 3% --
string length=17, len=2, rep=3
Rate kramba lodin ikegami
kramba 1905/s -- -59% -66%
lodin 4604/s 142% -- -18%
ikegami 5612/s 195% 22% --
string length=51, len=1, rep=2
Rate kramba lodin ikegami
kramba 212/s -- -0% -18%
lodin 212/s 0% -- -18%
ikegami 259/s 22% 22% --
string length=51, len=1, rep=3
Rate kramba ikegami lodin
kramba 201/s -- -5% -6%
ikegami 212/s 6% -- -1%
lodin 215/s 7% 1% --
string length=51, len=2, rep=2
Rate kramba lodin ikegami
kramba 202/s -- -5% -7%
lodin 213/s 5% -- -2%
ikegami 217/s 7% 2% --
string length=51, len=2, rep=3
Rate kramba ikegami lodin
kramba 200/s -- -5% -10%
ikegami 210/s 5% -- -5%
lodin 221/s 11% 5% --
string length=714, len=1, rep=2
s/iter lodin ikegami kramba
lodin 2.50 -- -5% -50%
ikegami 2.37 6% -- -48%
kramba 1.24 102% 91% --
string length=714, len=1, rep=3
s/iter lodin ikegami kramba
lodin 3.48 -- -30% -64%
ikegami 2.45 42% -- -49%
kramba 1.25 180% 96% --
string length=714, len=2, rep=2
s/iter lodin ikegami kramba
lodin 2.50 -- -1% -51%
ikegami 2.49 1% -- -50%
kramba 1.24 103% 101% --
string length=714, len=2, rep=3
s/iter lodin ikegami kramba
lodin 3.40 -- -28% -63%
ikegami 2.44 39% -- -49%
kramba 1.25 173% 96% --
ok 1 - kramba len=1, rep=2
ok 2 - ikegami len=1, rep=2
ok 3 - lodin len=1, rep=2
ok 4 - kramba len=1, rep=3
ok 5 - ikegami len=1, rep=3
ok 6 - lodin len=1, rep=3
ok 7 - kramba len=2, rep=2
ok 8 - ikegami len=2, rep=2
ok 9 - lodin len=2, rep=2
ok 10 - kramba len=2, rep=3
ok 11 - ikegami len=2, rep=3
ok 12 - lodin len=2, rep=3
ok 13 - kramba len=3, rep=2
ok 14 - ikegami len=3, rep=2
ok 15 - lodin len=3, rep=2
ok 16 - kramba len=3, rep=3
ok 17 - ikegami len=3, rep=3
ok 18 - lodin len=3, rep=3
1..18