package Eertree;
use strict;
use warnings;
use Exporter 'import';
our @EXPORT_OK = qw/ ee /;
sub ee {
my $str = shift;
my $FULL = shift; # if true: return "nodes" arrayref "as is", then DIY:
# extract and expand to palindrome complete list
# (it's trivial, isn't it?), work with each
# palindrome's position, do statistics, etc.
# if false: just return unique palindromes as a list
# (note: it's faster because there's less info
# collected)
my @nodes = (
{ len => -1, lnk => 0 },
{ len => 0, lnk => 0 }
);
my $lpspi = 0; # "longest proper suffix-palindrome index"
# (of substring processed so far)
for my $i ( 0 .. length( $str ) - 1 ) {
my $c = substr $str, $i, 1;
my $current = $lpspi;
my $n = $nodes[ $current ];
while ( $c ne substr $str, $i - $n-> { len } - 1, 1 ) {
$current = $n-> { lnk };
$n = $nodes[ $current ]
}
my $suffix; # trailing palindrome index
# after adding current character
if ( $current == 0 ) { # lone character "palindrome"
# (note: this and following branches have some code in common i.e.
# aren't dry, I didn't test whether consolidation would impair
# speed, but I think a couple of vague conditionals, instead,
# could impair clarity
if ( exists $n-> { edges }{ $c }) { # already seen?
$suffix = $n-> { edges }{ $c };
push @{ $nodes[ $suffix ]{ offsets }}, $i if $FULL
}
else {
my $p = { len => 1, lnk => 1, offsets => [ $i ]};
push @nodes, $p;
$n-> { edges }{ $c } = $suffix = $#nodes
}
}
else {
if ( exists $n-> { edges }{ $c }) { # already seen?
$suffix = $n-> { edges }{ $c };
_descend( $suffix, $i, \@nodes ) if $FULL
}
else {
my $L = 2 + $n-> { len };
my $p = {
len => $L,
offsets => [ $i - $L + 1 ],
};
push @nodes, $p;
$n-> { edges }{ $c } = $suffix = $#nodes;
do {
$current = $n-> { lnk };
$n = $nodes[ $current ];
} while ( $c ne substr $str, $i - $n-> { len } - 1, 1 );
$p-> { lnk } = $n-> { edges }{ $c };
_descend( $p-> { lnk }, $i, \@nodes ) if $FULL
}
}
# make sure it's "proper"
$lpspi = ( $i + 1 ) == $nodes[ $suffix ]{ len }
? $nodes[ $suffix ]{ lnk }
: $suffix;
}
return \@nodes if $FULL;
splice @nodes, 0, 2;
return map substr( $str, $_-> { offsets }[ 0 ], $_-> { len }), @nodes
}
sub _descend {
my ( $x, $i, $nodes ) = @_;
while ( $x > 1 ) {
my $n_ = $nodes-> [ $x ];
push @{ $n_-> { offsets }}, $i - $n_-> { len } + 1;
$x = $n_-> { lnk }
}
}
1;
####
my $s = join "", map chr(ord('a') + int rand 26), 1 .. 200;
cmpthese(-3, {
eertree => sub { 'String::Eertree'->new(string => $s)->uniq_palindromes },
rosetta_new => sub { rosetta_fixed($s) },
ee => sub { ee($s) },
});
Rate rosetta_new eertree ee
rosetta_new 271/s -- -58% -93%
eertree 651/s 140% -- -84%
ee 3964/s 1362% 509% --
##
##
cmpthese(-3, {
eertree => sub { 'String::Eertree'->new(string => $s) },
ee => sub { ee($s, 1) },
});
Rate eertree ee
eertree 648/s -- -80%
ee 3199/s 394% --
##
##
+
2.5 +-------------------------------------------------+
| + + + + + + + + + |
| |
| |
| B |
| |
| B |
2 |-+ +-|
| B |
| B |
| |
| B |
| |
| B |
1.5 |-+ B +-|
| |
| B |
| |
| B |
| |
| B |
| B |
1 |-+ +-|
| B |
| |
| B |
| B A |
| A A |
| B A A |
0.5 |-+ A A +-|
| B A A |
| B A |
| A A |
| B A A |
| B A A |
| A A + + + + + + + + |
0 +-------------------------------------------------+
0 2 4 6 8 10 12 14 16 18 20
use strict;
use warnings;
use feature 'say';
use Data::Dump 'dd';
use Time::HiRes 'time';
use String::Eertree;
use lib '.';
use Eertree 'ee';
use Benchmark qw/ timeit :hireswallclock /;
use Chart::Gnuplot;
use File::Spec::Functions 'catfile';
use File::Basename 'dirname';
my $GP = catfile dirname( $^X ), '../../c/bin/gnuplot.exe';
die unless -x $GP; # assume Strawberry Perl
use constant REPEATS => 5;
STDOUT-> autoflush( 1 );
sub get_sample {
my $n = shift;
my $str = 'amanaplanacanalpanama';
my $s = '';
for ( 1 .. $n * 1000 ) {
substr $s, rand( length $s ), 0, $str
}
return $s
}
my ( @xdata, @ydata1, @ydata2, @datasets );
for my $n ( 1 .. 20) {
print "$n ";
my $s = get_sample( $n );
push @xdata, $n;
{
my $t = timeit REPEATS, sub { ee( $s, 1 )};
my $y = $t-> [ 1 ] / $t-> [ -1 ];
push @ydata1, $y;
}
{
my $t = timeit REPEATS, sub { String::Eertree-> new( string => $s )};
my $y = $t-> [ 1 ] / $t-> [ -1 ];
push @ydata2, $y;
}
}
print "\n\n";
push @datasets, Chart::Gnuplot::DataSet-> new(
xdata => \@xdata,
ydata => \@ydata1,
style => 'points',
),
Chart::Gnuplot::DataSet-> new(
xdata => \@xdata,
ydata => \@ydata2,
style => 'points',
);
my $chart = Chart::Gnuplot-> new(
gnuplot => $GP,
terminal => 'dumb size 60, 40',
);
$chart-> plot2d( @datasets );