#! perl -slw
use strict;
our $L ||= 200;
our $SEED ||= 1;
our $RANGE ||= "'a'..'c'";
#srand $SEED;
my @chars = eval $RANGE;
my $s = join'', map{ ( @chars )[ rand @chars ] } 1 .. $L;
my @rls; use constant { BASESEQ => 0, SAVED => 1, OFFSET => 2, LENGTH => 3, REPEATS => 4 };
## Find runs an record them
push @rls, [
$1,
length $2,
pos($s) - length( $1 ),
length( $1 ) + length( $2 ),
length( $2 ) / length( $1 ),
] while $s =~ m[(.+)(?=(\1{1,254}))]g;
## Eliminate overlaps deleting the
## lesser saving in each case.
for my $p ( 0 .. $#rls ) {
next unless defined $rls[ $p ];
for my $q ( $p+1 .. $#rls ) {
next unless defined $rls[ $p ]
and defined $rls[ $q ];
last if $rls[ $q ][ OFFSET ]
> $rls[ $p ][ OFFSET ] + $rls[ $p ][ LENGTH ];
$rls[ $q ][ OFFSET ] >= $rls[ $p ][ OFFSET ]
&& $rls[ $q ][ OFFSET ] < $rls[ $p ][ OFFSET ] + $rls[ $p ][ LENGTH ]
and delete $rls[
$rls[ $p ][ SAVED ] < $rls[ $q ][ SAVED ]
? $p
: $q
];
}
}
## Remove the overlaps
@rls = grep defined, @rls;
## Display the results
print $s;
print ' ' x $_->[ OFFSET ], uc( $_->[ BASESEQ ] ), $_->[ BASESEQ ] x $_->[ REPEATS ] for @rls;
__END__
####
[ 2:44:41.87] P:\test>390333 -L=70 -RANGE='a'..'b'
abaabbaaaabbaaaabaabaabaaaababaababbbbbaababbaabbaaaaabbbbaabbaababbba
AABBAAaabbaa AABaabaab AABABaabab Bbb ABBAabba Aaa BBAAbbaa Bbb
[ 2:44:42.69] P:\test>390333 -L=70 -RANGE='a'..'b'
abbbaaabbaabbbbbbbbbaaabbaaaaabbbabaaaabaaababaababbabbaaaaaabbbbbabbb
Bbb AABBaabb BBAAAbbaaa Aa Bbb AABABaabab Bb AAAaaa BBbb Bbb
[ 2:44:43.55] P:\test>390333 -L=70 -RANGE='a'..'b'
aabbbaaaabaabbbbbbbabaaabaaaaaabaabbbabaabbaabbaababbaaabaabaaabbababb
Aa Bbb AABaab BBbb BAAAbaaa AABaab BAABbaabbaab Bb AABaab Aaa BAba Bb
[ 2:44:44.37] P:\test>390333 -L=70 -RANGE='a'..'b'
aabbbbaaabbbabababbbabbbbbababababbaabaabaaaabaababbaabbabbabbbbaabbbb
Aa BBbb Aaa BAbaba BBbb BABAbaba BAAbaabaa AABaab ABBabbabb Bb Aa BBbb
####
#! perl -slw
use strict;
use constant {
BASESEQ => 0, SAVED => 1, OFFSET => 2,
LENGTH => 3, REPEATS => 4
};
our $L ||= 200;
our $RANGE ||= "'a'..'c'";
#srand $SEED if $SEED;
sub findRuns {
my $s = shift;
my @rls;
push @rls, [
$1,
length $2,
pos($s) - length( $1 ),
length( $1 ) + length( $2 ),
length( $2 ) / length( $1 ) + 1,
] while $s =~ m[
.*?
( [a-z]+ )
(?= ( \1{1,254} )
[^)]*
(?: \( | $ ) )
]gx;
for my $p ( 0 .. $#rls ) {
next unless defined $rls[ $p ];
for my $q ( $p+1 .. $#rls ) {
next unless defined $rls[ $p ] and defined $rls[ $q ];
last if $rls[ $q ][ OFFSET ]
> $rls[ $p ][ OFFSET ] + $rls[ $p ][ LENGTH ];
while(
$rls[ $q ][ OFFSET ] >= $rls[ $p ][ OFFSET ]
&& $rls[ $q ][ OFFSET ]
< $rls[ $p ][ OFFSET ] + $rls[ $p ][ LENGTH ]
) {
if( $rls[ $p ][ SAVED ] < $rls[ $q ][ SAVED ] ) {
last unless $rls[ $p ][ REPEATS ]-- > 2;
$rls[ $p ][ REPEATS ] -= length $rls[ $p ][ BASESEQ ];
}
else {
last unless $rls[ $q ][ REPEATS ]-- > 2;
$rls[ $q ][ REPEATS ] -= length $rls[ $q ][ BASESEQ ];
$rls[ $q ][ OFFSET ] += length $rls[ $q ][ BASESEQ ];
}
}
$rls[ $q ][ OFFSET ] >= $rls[ $p ][ OFFSET ]
&& $rls[ $q ][ OFFSET ] < $rls[ $p ][ OFFSET ]
+ $rls[ $p ][ LENGTH ]
and delete $rls[
$rls[ $p ][ SAVED ] < $rls[ $q ][ SAVED ]
? $p
: $q
];
}
}
@rls = grep defined, @rls;
for ( @rls ) {
my $re = qr[(?:$_->[ BASESEQ ]){$_->[ &REPEATS ]}];
$s =~ s[ ( (?: ^ | \) ) [^(]*? ) $re]
{ ($1||'') . '(' . $_->[ REPEATS ] . ":$_->[ BASESEQ ])" }ex;
}
$s = findRuns( $s ) if $s =~ m[
(?: ^ | \) ) [^(]*? ( [a-z]+ ) \1 [^)]*? (?: \( | $ )
]x;
return $s;
}
my @chars = eval $RANGE;
my $s = join'', map{ ( @chars )[ rand @chars ] } 1 .. $L;
print scalar localtime, ' : ', $s, $/;
my $encoded = findRuns $s;
print scalar localtime, ' : ', $encoded, $/;
$encoded =~ s[\((\d+):([^)]+)\)][$2 x $1]eg;
print scalar localtime, ' : ', $encoded, $/;
####
[ 6:10:08.93] P:\test>390333 -L=70 -RANGE='a'..'b'
Sun Sep 12 06:11:42 2004 :
aabbbabababaaabaaaabababbabaaabaaabaabbabbbabababaabbaaabbaabbbabaaaab
Sun Sep 12 06:11:42 2004 :
(2:a)(2:b)(2:baba)(2:a)b(3:a)(3:ab)ba(2:baaa)ba(2:abb)(3:ba)b(2:aabba)a(3:b)ab(2:aa)b
Sun Sep 12 06:11:42 2004 :
aabbbabababaaabaaaabababbabaaabaaabaabbabbbabababaabbaaabbaabbbabaaaab
[ 6:11:42.57] P:\test>390333 -L=70 -RANGE='a'..'b'
Sun Sep 12 06:11:46 2004 :
bbaaaabbabbbbababaabbbaaababbababbbbbbbabbbbabaaaabaababaaaabbaabbbaaa
Sun Sep 12 06:11:46 2004 :
(2:b)(3:a)(2:abb)b(3:ba)a(3:b)(2:a)(2:ababb)b(2:bbbba)b(2:a)(2:aab)ab(2:a)(2:aabb)b(3:a)
Sun Sep 12 06:11:46 2004 :
bbaaaabbabbbbababaabbbaaababbababbbbbbbabbbbabaaaabaababaaaabbaabbbaaa