This produces a slightly different output for your sample than you provided--but I think mine is right :)
(You'll have to explain to me why it isn't, if it isn't.)
This could probably be made quite a bit more efficient with further thought, but I wanted to check if it is correct first.
#! perl -slw use strict; my $k = 25; my %repo = ( "readA" => "GCTGAGGCAGGAGAATTGCTTGAACCTGGGAGGCA", "readB" => "TACTCAGGAGGCTGAGGCAGGAGAATTGCTTGAAC", "readC" => "GCTGAGGCAGGAGAATTGCTTGAACTTAGGGGATG", "readD" => "TACTCGGGAGGCTGAGGCAGGAGAATTGCTTGAAC", ); my @order = ( "readA_1", "readB_2", "readC_1", "readD_2"); my( @heads, @tails, $common ); while( @order ) { my( $s1, $p1, $s2, $p2 ) = map split( '_', shift @order ), 1 .. 2; ( $s1, $s2 ) = ( $s2, $s1 ) if $p1 > $p2; push @heads, substr $repo{ $s2 }, 0, length( $repo{ $s2 } ) - $k; push @tails, substr $repo{ $s1 }, $k; $common = substr $repo{ $s1 }, -$k unless $common; } my $head = ''; for my $p ( 0 .. length( $heads[0] )-1 ) { my %uniq; ++$uniq{ substr $heads[ $_ ], $p, 1 } for 0 .. $#heads; if( keys %uniq > 1 ) { $head .= '(' . join( ',', keys %uniq ) . ')'; } else { $head .= each %uniq; } } my $tail = ''; for my $p ( 0 .. length( $tails[0] )-1 ) { my %uniq; ++$uniq{ substr $tails[ $_ ], $p, 1 } for 0 .. $#tails; if( keys %uniq > 1 ) { $tail .= '(' . join( ',', keys %uniq ) . ')'; } else { $tail .= each %uniq; } } print $head, $common, $tail; __END__ c:\test>868716 TACTC(A,G)GGAGGAGAATTGCTTGAACCTGGGAGGCA(T,C)T(A,G)GG(A,G)G(A,G)(T,C)(A +,G)
In reply to Re: Mustering Reads
by BrowserUk
in thread Mustering Reads
by neversaint
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |