#!/usr/bin/perl -w use strict; use Data::Dumper; my $hash = { 'S1' => [ 'A','B','C','D','H','A' ], 'S2' => [ 'A','C','D','B','G','J' ], 'S3' => [ 'C', 'A', 'D', 'H','M','K' ], 'S4' => [ 'A', 'B', 'I', 'C','I','D' ] }; my $seq1 = 'A C D'; # there is always a space between the query chars #This two are for testing my $seq2 = 'A B C'; my $seq3 = 'A B'; my @alignment = align($hash,$seq); print Dumper \@alignment; sub align { my ($hashref,$seq) = @_; my @seq_ar = split(/\s/,$seq); my $test = join('(.*?)',@seq_ar); #Attempt to find how many possible gap my $count_stars = $test =~ tr/\*//; print "Stars: $count_stars\n"; my @hyph_padded_seq; for my $key( sort {$a cmp $b} keys(%$hashref)) { my $string = join('',@{$hashref->{$key}}); # whatever strings that go here are # guaranteed to contain $seq if($string =~ $test) { my $gap_sum; # I may be doing something silly here # Here I'm trying to find the sum of the gap # which is supposed to be captured by # regex memory variable $1, $2 .. etc using loop for ( 1..$count_stars ) { # Creating memory variable $1,$2..etc # which doesnt' work # and create error my $mem = '$'.$_; # counting (chars) gap that is stored in memory variable my $count_gap = $mem =~ tr/[A-Z]//; $gap_sum += $count_gap; # Next what I intended to do is # to replace 2 corresponding consecutive chars # with "-" of size $1 $2 of highest $gap_sum...etc # then push every newly created seq with (-) # into array @hyph_padded_seq # I really am stuck here. } } } return @hyph_padded_seq; }