S4 ABIC
S1 AB-C
S2 A--C
####
S4 ABICID
S2 A--C-D
S1 AB-C-D
####
# With major improvement by "canonizable" Adrade :-)
sub align_v3
{
my ($hashref,$seq) = @_;
my @in = split(/\s/,$seq);
my $test = join('(.*?)',@in);
my @hyph_padded_seq;
my @sequences;
my $reduce;
for (keys %$hashref) { $reduce = length(${$hashref->{$_}}[0]);
last; }
for my $key( sort {$a cmp $b} keys(%$hashref))
{
my $string = join('',@{$hashref->{$key}});
if($string =~ $test)
{
push(@sequences, $hashref->{$key});
}
}
my $regex = '.*'.join('(.*)',@in).'.*';
my @in_the_running;
my $biglen = 0;
for (@sequences)
{
my @letters = @$_; # split(//,$_);
s/.*($in[0].*$in[$#in]).*/$1/;
my ($first, $last) = (0,0);
for (@letters) { last if $_ eq $in[0]; $first++; }
for (reverse @letters) { last if $_ eq $in[$#in];
$last++;
}
$last = $#letters - $last;
@letters = @letters[$first..$last];
my $asstr = join('',@letters);
if ((my @gaps) = ($asstr =~ m/$regex/o))
{
my $len = length(join('',@gaps)) / $reduce;
if ($len > $biglen)
{
unshift(@in_the_running, [ @letters ]);
$biglen = length(join('',@gaps));
}
elsif ($len == $biglen)
{
unshift(@in_the_running, [ @letters ]);
}
else
{
push(@in_the_running, [ @letters ]);
}
}
}
#print @{$in_the_running[0]}, "\n";
push @hyph_padded_seq, join('',@{$in_the_running[0]});
my @base = @{$in_the_running[0]};
for my $seqno (1..$#in_the_running)
{
# my @seq = split(//,$in_the_running[$seqno]);
my @seq = @{$in_the_running[$seqno]};
my $count = $#base;
my @disp;
for my $q (reverse @base)
{
if (($seq[$#seq] eq $q) || ($count == $#seq))
{
push(@disp, pop(@seq));
}
else
{
push(@disp, '-' x $reduce);
}
$count--;
}
push @hyph_padded_seq, join('', reverse(@disp));
#print reverse(@disp), "\n";
}
return @hyph_padded_seq;
}
# A hash version can be found here: http://paste.phpfi.com/61624