use strict; use List::MoreUtils qw(uniq); # initialize sequence data and emtpy match set my $seq = [ [ qw(A C G C A T T C A) ], [ qw(A C T G G A T A C) ], [ qw(T C A G C C A T C) ] ]; my $n = scalar(@$seq); # num of sequences my $m = scalar(@{$seq->[0]}); # num of positions my $match = {}; # first iterate over all the possible patterns for the sequence data for my $pat (1 .. 2**$m-1) { # now create the pattern by converting the pattern number to # binary and then to a slice with which to compare sequences. # pretty slick, if you ask me. my @pattern = reverse split //, binary($pat); my @slice; for (0 .. $m-1) { push @slice, $_ if $pattern[$_] } # now iterate over every sequence. check to see if it matches any # of the others based on the given pattern. for my $i (0 .. $n-2) { for my $j ( $i+1 .. $n-1 ) { if ( join("", @{$seq->[$i]}[@slice]) eq join("", @{$seq->[$j]}[@slice]) ) { # construct a hash key and store the matches my $key = join "", map { $pattern[$_] ? $seq->[$i][$_] : "." } (0 .. $m-1); push @{$match->{$key}}, $i,$j ; } } } } # now strip out the duplicate sequence numbers for each match (an # unfortunate step I had trouble avoiding), then display the matches for my $key (keys %$match) { @{$match->{$key}} = uniq @{$match->{$key}}; print $key , " occurs in " , join(",", @{$match->{$key}}) , "\n"; } # a short decimal to binary converter I borrowed from Mark Dominus sub binary { my ($n) = @_; return $n if $n == 0 || $n == 1; my $k = int($n/2); my $b = $n % 2; my $E = binary($k); return $E . $b; } #### ...G....C occurs in 1,2 .C......C occurs in 1,2 A.....T.. occurs in 0,1 .C.G....C occurs in 1,2 ........C occurs in 1,2 ...G..... occurs in 1,2 ......T.. occurs in 0,1 .C.G..... occurs in 1,2 A........ occurs in 0,1 AC....... occurs in 0,1 .C....T.. occurs in 0,1 AC....T.. occurs in 0,1 .C....... occurs in 0,1,2