S4 ABIC S1 AB-C S2 A--C #### S4 ABICID S2 A--C-D S1 AB-C-D #### # With major improvement by "canonizable" Adrade :-) sub align_v3 { my ($hashref,$seq) = @_; my @in = split(/\s/,$seq); my $test = join('(.*?)',@in); my @hyph_padded_seq; my @sequences; my $reduce; for (keys %$hashref) { $reduce = length(${$hashref->{$_}}[0]); last; } for my $key( sort {$a cmp $b} keys(%$hashref)) { my $string = join('',@{$hashref->{$key}}); if($string =~ $test) { push(@sequences, $hashref->{$key}); } } my $regex = '.*'.join('(.*)',@in).'.*'; my @in_the_running; my $biglen = 0; for (@sequences) { my @letters = @$_; # split(//,$_); s/.*($in[0].*$in[$#in]).*/$1/; my ($first, $last) = (0,0); for (@letters) { last if $_ eq $in[0]; $first++; } for (reverse @letters) { last if $_ eq $in[$#in]; $last++; } $last = $#letters - $last; @letters = @letters[$first..$last]; my $asstr = join('',@letters); if ((my @gaps) = ($asstr =~ m/$regex/o)) { my $len = length(join('',@gaps)) / $reduce; if ($len > $biglen) { unshift(@in_the_running, [ @letters ]); $biglen = length(join('',@gaps)); } elsif ($len == $biglen) { unshift(@in_the_running, [ @letters ]); } else { push(@in_the_running, [ @letters ]); } } } #print @{$in_the_running[0]}, "\n"; push @hyph_padded_seq, join('',@{$in_the_running[0]}); my @base = @{$in_the_running[0]}; for my $seqno (1..$#in_the_running) { # my @seq = split(//,$in_the_running[$seqno]); my @seq = @{$in_the_running[$seqno]}; my $count = $#base; my @disp; for my $q (reverse @base) { if (($seq[$#seq] eq $q) || ($count == $#seq)) { push(@disp, pop(@seq)); } else { push(@disp, '-' x $reduce); } $count--; } push @hyph_padded_seq, join('', reverse(@disp)); #print reverse(@disp), "\n"; } return @hyph_padded_seq; } # A hash version can be found here: http://paste.phpfi.com/61624