use strict; use List::MoreUtils qw(uniq); # initialize sequence data and emtpy match set my $seq = [ [ qw(A C G C A T T C A) ], [ qw(A C T G G A T A C) ], [ qw(T C A G C C A T C) ] ]; my $n = scalar(@$seq); # num of sequences my $m = scalar(@{$seq->[0]}); # num of positions my $match = {}; # first iterate over all the possible patterns for the sequence data for my $pat (1 .. 2**$m-1) { # now create the pattern by converting the pattern number to # binary and then to a slice with which to compare sequences. # pretty slick, if you ask me. my @pattern = reverse split //, binary($pat); my @slice; for (0 .. $m-1) { push @slice, $_ if $pattern[$_] } # now iterate over every sequence. check to see if it matches any # of the others based on the given pattern. for my $i (0 .. $n-2) { for my $j ( $i+1 .. $n-1 ) { if ( join("", @{$seq->[$i]}[@slice]) eq join("", @{$seq->[$j]}[@slice]) ) { # construct a hash key and store the matches my $key = join "", map { $pattern[$_] ? $seq->[$i][$_] : "." } (0 .. $m-1); push @{$match->{$key}}, $i,$j ; } } } } # now strip out the duplicate sequence numbers for each match (an # unfortunate step I had trouble avoiding), then display the matches for my $key (keys %$match) { @{$match->{$key}} = uniq @{$match->{$key}}; print $key , " occurs in " , join(",", @{$match->{$key}}) , "\n"; } # a short decimal to binary converter I borrowed from Mark Dominus sub binary { my ($n) = @_; return $n if $n == 0 || $n == 1; my $k = int($n/2); my $b = $n % 2; my $E = binary($k); return $E . $b; }