package Eertree; use strict; use warnings; use Exporter 'import'; our @EXPORT_OK = qw/ ee /; sub ee { my $str = shift; my $FULL = shift; # if true: return "nodes" arrayref "as is", then DIY: # extract and expand to palindrome complete list # (it's trivial, isn't it?), work with each # palindrome's position, do statistics, etc. # if false: just return unique palindromes as a list # (note: it's faster because there's less info # collected) my @nodes = ( { len => -1, lnk => 0 }, { len => 0, lnk => 0 } ); my $lpspi = 0; # "longest proper suffix-palindrome index" # (of substring processed so far) for my $i ( 0 .. length( $str ) - 1 ) { my $c = substr $str, $i, 1; my $current = $lpspi; my $n = $nodes[ $current ]; while ( $c ne substr $str, $i - $n-> { len } - 1, 1 ) { $current = $n-> { lnk }; $n = $nodes[ $current ] } my $suffix; # trailing palindrome index # after adding current character if ( $current == 0 ) { # lone character "palindrome" # (note: this and following branches have some code in common i.e. # aren't dry, I didn't test whether consolidation would impair # speed, but I think a couple of vague conditionals, instead, # could impair clarity if ( exists $n-> { edges }{ $c }) { # already seen? $suffix = $n-> { edges }{ $c }; push @{ $nodes[ $suffix ]{ offsets }}, $i if $FULL } else { my $p = { len => 1, lnk => 1, offsets => [ $i ]}; push @nodes, $p; $n-> { edges }{ $c } = $suffix = $#nodes } } else { if ( exists $n-> { edges }{ $c }) { # already seen? $suffix = $n-> { edges }{ $c }; _descend( $suffix, $i, \@nodes ) if $FULL } else { my $L = 2 + $n-> { len }; my $p = { len => $L, offsets => [ $i - $L + 1 ], }; push @nodes, $p; $n-> { edges }{ $c } = $suffix = $#nodes; do { $current = $n-> { lnk }; $n = $nodes[ $current ]; } while ( $c ne substr $str, $i - $n-> { len } - 1, 1 ); $p-> { lnk } = $n-> { edges }{ $c }; _descend( $p-> { lnk }, $i, \@nodes ) if $FULL } } # make sure it's "proper" $lpspi = ( $i + 1 ) == $nodes[ $suffix ]{ len } ? $nodes[ $suffix ]{ lnk } : $suffix; } return \@nodes if $FULL; splice @nodes, 0, 2; return map substr( $str, $_-> { offsets }[ 0 ], $_-> { len }), @nodes } sub _descend { my ( $x, $i, $nodes ) = @_; while ( $x > 1 ) { my $n_ = $nodes-> [ $x ]; push @{ $n_-> { offsets }}, $i - $n_-> { len } + 1; $x = $n_-> { lnk } } } 1; #### my $s = join "", map chr(ord('a') + int rand 26), 1 .. 200; cmpthese(-3, { eertree => sub { 'String::Eertree'->new(string => $s)->uniq_palindromes }, rosetta_new => sub { rosetta_fixed($s) }, ee => sub { ee($s) }, }); Rate rosetta_new eertree ee rosetta_new 271/s -- -58% -93% eertree 651/s 140% -- -84% ee 3964/s 1362% 509% -- #### cmpthese(-3, { eertree => sub { 'String::Eertree'->new(string => $s) }, ee => sub { ee($s, 1) }, }); Rate eertree ee eertree 648/s -- -80% ee 3199/s 394% -- #### + 2.5 +-------------------------------------------------+ | + + + + + + + + + | | | | | | B | | | | B | 2 |-+ +-| | B | | B | | | | B | | | | B | 1.5 |-+ B +-| | | | B | | | | B | | | | B | | B | 1 |-+ +-| | B | | | | B | | B A | | A A | | B A A | 0.5 |-+ A A +-| | B A A | | B A | | A A | | B A A | | B A A | | A A + + + + + + + + | 0 +-------------------------------------------------+ 0 2 4 6 8 10 12 14 16 18 20 use strict; use warnings; use feature 'say'; use Data::Dump 'dd'; use Time::HiRes 'time'; use String::Eertree; use lib '.'; use Eertree 'ee'; use Benchmark qw/ timeit :hireswallclock /; use Chart::Gnuplot; use File::Spec::Functions 'catfile'; use File::Basename 'dirname'; my $GP = catfile dirname( $^X ), '../../c/bin/gnuplot.exe'; die unless -x $GP; # assume Strawberry Perl use constant REPEATS => 5; STDOUT-> autoflush( 1 ); sub get_sample { my $n = shift; my $str = 'amanaplanacanalpanama'; my $s = ''; for ( 1 .. $n * 1000 ) { substr $s, rand( length $s ), 0, $str } return $s } my ( @xdata, @ydata1, @ydata2, @datasets ); for my $n ( 1 .. 20) { print "$n "; my $s = get_sample( $n ); push @xdata, $n; { my $t = timeit REPEATS, sub { ee( $s, 1 )}; my $y = $t-> [ 1 ] / $t-> [ -1 ]; push @ydata1, $y; } { my $t = timeit REPEATS, sub { String::Eertree-> new( string => $s )}; my $y = $t-> [ 1 ] / $t-> [ -1 ]; push @ydata2, $y; } } print "\n\n"; push @datasets, Chart::Gnuplot::DataSet-> new( xdata => \@xdata, ydata => \@ydata1, style => 'points', ), Chart::Gnuplot::DataSet-> new( xdata => \@xdata, ydata => \@ydata2, style => 'points', ); my $chart = Chart::Gnuplot-> new( gnuplot => $GP, terminal => 'dumb size 60, 40', ); $chart-> plot2d( @datasets );