in reply to Mustering Reads

The following does seem to do the trick. It should work for any number of "repo"s. It does assume all are of the same length, but it wouldn't be complicated to adjust for varying lengths:
use 5.010; use strict; use warnings; my $k = 25; my %readrepo =( readA => "GCTGAGGCAGGAGAATTGCTTGAACCTGGGAGGCA", readB => "TACTCAGGAGGCTGAGGCAGGAGAATTGCTTGAAC", readC => "GCTGAGGCAGGAGAATTGCTTGAACTTAGGGGATG", readD => "TACTCGGGAGGCTGAGGCAGGAGAATTGCTTGAAC", ); my @readstoconcate = ( "readA_1", "readB_2", "readC_1", "readD_2", ); my @prefixes; my @postfixes; my $fixed; foreach my $read (@readstoconcate) { my ($entry, $tag) = split '_', $read; if ($tag == 1) { push @postfixes, substr $readrepo{$entry}, $k; $fixed ||= substr $readrepo{$entry}, 0, $k; } else { my $k1 = length($readrepo{$entry}) - $k; push @prefixes, substr $readrepo{$entry}, 0, $k1; $fixed ||= substr $readrepo{$entry}, $k1; } } # # Assume all entries in %readrepo are the same length. # foreach my $set (\@prefixes, \@postfixes) { next unless @$set; for (my $i = 0; $i < length $$set[0]; $i++) { my $l = "\x0"; my @c = grep {my $r = ($_ ne $l); $l = $_; $r} map {substr $_, $i, 1} @$set; local $" = ","; print @c == 1 ? $c[0] : "(@c)"; } } continue { state $flag = 0; print $fixed unless $flag++; } print "\n"; __END__ TACTC(A,G)GGAGGCTGAGGCAGGAGAATTGCTTGAAC(C,T)T(G,A)GG(A,G)G(G,A)(C,T)(A +,G)