# Favour shortest sequence. my $thresh = 1; # Match *more than* $thresh times. while () { chomp; while (/((.{1,6}?)\2{$thresh,})/g) { printf( "Found %d %-6s (length=%d, total=%2d) at pos %2d in %s\n", length($1) / length($2), # Number of matches. $2, # Sequence. length($2), # Length of sequence. length($1), # Length of match. $-[0], # Start position. $_ # String we're searching. ); } } __DATA__ CATCATCATCATCAT AAAGTCAAAGTCAAAGTC gives: Found 5 CAT (length=3, total=15) at pos 0 in CATCATCATCATCAT Found 3 A (length=1, total= 3) at pos 0 in AAAGTCAAAGTCAAAGTC Found 2 GTCAAA (length=6, total=12) at pos 3 in AAAGTCAAAGTCAAAGTC