use warnings; use Data::Dumper; my $fseq = 'CCCCGCGC'; my @nsub = ('CCCCG', 'CCCGC', 'CGCGC'); my @results; for (1 .. $#nsub) { my $current = $nsub[$_]; my $previous = $nsub[$_ -1]; # the "#" is used as a separator. # put a different character (or sequence of characters) # if you believe that it could also be in your strings if ( "$previous#$current" =~ /(\w+)#\1/ ) { my $found = $1; printf "%d -> %s (%s) %d -> %s \n", $_ -1, $previous, $found, $_, $current; $current =~ s/^$found/"-" x length($found)/e; $previous =~ s/$found$/"-" x length($found)/e; push @results, [ $_ -1, $previous]; push @results, [ $_, $current]; } else { printf "%d -> no overlap\n", $_ } } print Data::Dumper->Dump([ \@results], ['result']); #### 0 -> CCCCG (CCCG) 1 -> CCCGC 1 -> CCCGC (CGC) 2 -> CGCGC $result = [ [ 0, 'C----' ], [ 1, '----C' ], [ 1, 'CC---' ], [ 2, '---GC' ] ];