use warnings;
use strict;
use Benchmark qw(cmpthese);
my $str;
my $len = 0;
while ($len < 1000000) {
my $runLen = int rand (50);
$str .= chr (ord ('a') + int rand (26)) x $runLen;
$len += $runLen;
}
my ($runlen, $start) = Index ();
print "Index: Run from $start for $runlen\n";
($runlen, $start) = Linear ();
print "Linear: Run from $start for $runlen\n";
($runlen, $start) = RegexSort ();
print "RegexSort: Run from $start for $runlen\n";
cmpthese (-5,
{
Index => \&Index,
Linear => \&Linear,
RegexSort => \&RegexSort,
}
);
sub Index {
my $sstr = substr ($str, 0, 1) . (substr ($str, 1) ^ $str);
my @bestRuns;
my $match = "\0";
my $bestRunLen = 1;
my $scan = 0;
while (-1 != ($scan = index $sstr, $match, $scan)) {
my $runLen = length ((substr ($sstr, $scan) =~ /(\0+)/)[0]);
if ($runLen > $bestRunLen) {
# new best match
@bestRuns = ();
$bestRunLen = $runLen;
$match = "\0" x ($bestRunLen);
}
push @bestRuns, $scan - 1;
$scan += $bestRunLen;
}
return ($bestRunLen + 1, $bestRuns[0]);
}
sub Linear {
my ($c, $maxn, $n, $maxc) = ('', 0);
my $bestEnd = 0;
for my $index (0..(length($str)-1)) {
$_ = substr($str, $index, 1);
if ($_ ne $c) {
$n = 1;
$c = $_;
}
else {
$n++;
if ($n > $maxn) {
$maxn = $n;
$maxc = $c;
$bestEnd = $index
}
}
}
return ($maxn, $bestEnd - $maxn + 1);
}
sub RegexSort {
return (length ((sort {length $b <=> length $a} $str =~ m[((.)\2+)
+]g)[0]), -1);
}
Note that the first three lines of each group are the check results. RegexSort doesn't generate a start index for the match so -1 is shown. However the same length is generated in each case so it is presumed that the same longest match is being found.