use 5.010; use strict; use warnings; my $k = 25; my %readrepo =( readA => "GCTGAGGCAGGAGAATTGCTTGAACCTGGGAGGCA", readB => "TACTCAGGAGGCTGAGGCAGGAGAATTGCTTGAAC", readC => "GCTGAGGCAGGAGAATTGCTTGAACTTAGGGGATG", readD => "TACTCGGGAGGCTGAGGCAGGAGAATTGCTTGAAC", ); my @readstoconcate = ( "readA_1", "readB_2", "readC_1", "readD_2", ); my @prefixes; my @postfixes; my $fixed; foreach my $read (@readstoconcate) { my ($entry, $tag) = split '_', $read; if ($tag == 1) { push @postfixes, substr $readrepo{$entry}, $k; $fixed ||= substr $readrepo{$entry}, 0, $k; } else { my $k1 = length($readrepo{$entry}) - $k; push @prefixes, substr $readrepo{$entry}, 0, $k1; $fixed ||= substr $readrepo{$entry}, $k1; } } # # Assume all entries in %readrepo are the same length. # foreach my $set (\@prefixes, \@postfixes) { next unless @$set; for (my $i = 0; $i < length $$set[0]; $i++) { my $l = "\x0"; my @c = grep {my $r = ($_ ne $l); $l = $_; $r} map {substr $_, $i, 1} @$set; local $" = ","; print @c == 1 ? $c[0] : "(@c)"; } } continue { state $flag = 0; print $fixed unless $flag++; } print "\n"; __END__ TACTC(A,G)GGAGGCTGAGGCAGGAGAATTGCTTGAAC(C,T)T(G,A)GG(A,G)G(G,A)(C,T)(A,G)