#!/usr/bin/perl -w use strict; use Data::Dumper; my @freq_set = ( [ '3 A B', '2 A H', '3 A C', '4 A D', '2 B C', '2 B D', '2 C H', '4 C D', '2 D H' ], # There are some other array set # So yes, this is meant to be an AoA ); my $hash = { 'S1' => [ 'A', 'B', 'C','D','H','A' ], 'S2' => [ 'A', 'C', 'D','B','G','J' ], 'S3' => [ 'C', 'A', 'D','H','M','K' ], 'S4' => [ 'A', 'B', 'I','C','I','D' ] }; # These 3 lines have strange behaviour # If you move these three lines after foreach(@test) block # it will print nothing, but here it prints the correct answer my $seq = 'C D'; my %alignment = align($hash,$seq); print Dumper \%alignment; #-------Begin Loop--------------- my @test; foreach my $f ( 0 .. $#freq_set ) { foreach my $m ( 0 .. $#{$freq_set[$f]} ) { my ($sp,$mt) = $freq_set[$f][$m] =~/(\d+)\s+(.*)/; push @test, $mt; } } my %aln; foreach ( @test ) { $aln{$_} = { align($hash,$_) }; } #---------BEGIN Strange Behavior-------------- # If you move above three lines here then, # it print *empty* hashes for both of Dumper print. Why? print "From LOOP\n"; print Dumper $aln{'C D'}; #------------ Subroutine --------------- sub align { my ($hashref,$seq) = @_; my @in = split(/\s/,$seq); my $test = join('(.*?)',@in); my %hyph_padded_seq; my %sequences; my $reduce; for (keys %$hashref) { $reduce = length(${$hashref->{$_}}[0]); last; } for my $key( sort {$a cmp $b} keys(%$hashref)) { my $string = join('_',@{$hashref->{$key}}); if($string =~ $test) { $sequences{$key} = $hashref->{$key}; } } my $regex = '.*'.join('(.*)',@in).'.*'; my %in_the_running; my $biglen = 0; foreach my $seq (keys %sequences) { my @dum_in_the_running; my @letters = @{$sequences{$seq}}; #s/.*($in[0].*$in[$#in]).*/$1/; my ($first, $last) = (0,0); for (@letters) { last if $_ eq $in[0]; $first++; } for (reverse @letters) { last if $_ eq $in[$#in]; $last++; } $last = $#letters - $last; @letters = @letters[$first..$last]; my $asstr = join('',@letters); if ((my @gaps) = ($asstr =~ m/$regex/o)) { my $len = length(join('',@gaps)) / $reduce; if ($len > $biglen) { unshift(@dum_in_the_running, @letters ); $in_the_running{$seq} = [@dum_in_the_running]; $biglen = length(join('',@gaps)); } elsif ($len == $biglen) { unshift(@dum_in_the_running, @letters ); $in_the_running{$seq} = [@dum_in_the_running]; } else { push(@dum_in_the_running, @letters ); $in_the_running{$seq} = [@dum_in_the_running]; } } } #print Dumper \%in_the_running; if ( %in_the_running ) { my @keys_from_bigap = ( sort { @{$in_the_running{$b}} <=> @{$in_the_running{$a}} } keys %in_the_running ); $hyph_padded_seq{$keys_from_bigap[0]} = join('',@{$in_the_running{$keys_from_bigap[0]}}); my @base = @{$in_the_running{$keys_from_bigap[0]}}; for my $seqno (1..$#keys_from_bigap) { # my @seq = split(//,$in_the_running[$seqno]); my @seq = @{$in_the_running{$keys_from_bigap[$seqno]}}; my $count = $#base; my @disp; for my $q (reverse @base) { # Add this condition, when query is only 1 push(@seq,'-' x $reduce) if (!$seq[$#seq]); if (($seq[$#seq] eq $q) || ($count == $#seq)) { push(@disp, pop(@seq)); } else { push(@disp, '-' x $reduce); } $count--; } $hyph_padded_seq{$keys_from_bigap[$seqno]} = join('', reverse(@disp)); } } return %hyph_padded_seq; }