#! perl -slw use strict; use constant { BASESEQ => 0, SAVED => 1, OFFSET => 2, LENGTH => 3, REPEATS => 4 }; our $L ||= 200; our $RANGE ||= "'a'..'c'"; #srand $SEED if $SEED; sub findRuns { my $s = shift; my @rls; push @rls, [ $1, length $2, pos($s) - length( $1 ), length( $1 ) + length( $2 ), length( $2 ) / length( $1 ) + 1, ] while $s =~ m[ .*? ( [a-z]+ ) (?= ( \1{1,254} ) [^)]* (?: \( | $ ) ) ]gx; for my $p ( 0 .. $#rls ) { next unless defined $rls[ $p ]; for my $q ( $p+1 .. $#rls ) { next unless defined $rls[ $p ] and defined $rls[ $q ]; last if $rls[ $q ][ OFFSET ] > $rls[ $p ][ OFFSET ] + $rls[ $p ][ LENGTH ]; while( $rls[ $q ][ OFFSET ] >= $rls[ $p ][ OFFSET ] && $rls[ $q ][ OFFSET ] < $rls[ $p ][ OFFSET ] + $rls[ $p ][ LENGTH ] ) { if( $rls[ $p ][ SAVED ] < $rls[ $q ][ SAVED ] ) { last unless $rls[ $p ][ REPEATS ]-- > 2; $rls[ $p ][ REPEATS ] -= length $rls[ $p ][ BASESEQ ]; } else { last unless $rls[ $q ][ REPEATS ]-- > 2; $rls[ $q ][ REPEATS ] -= length $rls[ $q ][ BASESEQ ]; $rls[ $q ][ OFFSET ] += length $rls[ $q ][ BASESEQ ]; } } $rls[ $q ][ OFFSET ] >= $rls[ $p ][ OFFSET ] && $rls[ $q ][ OFFSET ] < $rls[ $p ][ OFFSET ] + $rls[ $p ][ LENGTH ] and delete $rls[ $rls[ $p ][ SAVED ] < $rls[ $q ][ SAVED ] ? $p : $q ]; } } @rls = grep defined, @rls; for ( @rls ) { my $re = qr[(?:$_->[ BASESEQ ]){$_->[ &REPEATS ]}]; $s =~ s[ ( (?: ^ | \) ) [^(]*? ) $re] { ($1||'') . '(' . $_->[ REPEATS ] . ":$_->[ BASESEQ ])" }ex; } $s = findRuns( $s ) if $s =~ m[ (?: ^ | \) ) [^(]*? ( [a-z]+ ) \1 [^)]*? (?: \( | $ ) ]x; return $s; } my @chars = eval $RANGE; my $s = join'', map{ ( @chars )[ rand @chars ] } 1 .. $L; print scalar localtime, ' : ', $s, $/; my $encoded = findRuns $s; print scalar localtime, ' : ', $encoded, $/; $encoded =~ s[\((\d+):([^)]+)\)][$2 x $1]eg; print scalar localtime, ' : ', $encoded, $/;