#!/usr/bin/perl -w
use strict;
use Data::Dumper;

my $hash = {
             'S1' => [ 'A','B','C','D','H','A' ],
             'S2' => [ 'A','C','D','B','G','J' ],
             'S3' => [ 'C', 'A', 'D', 'H','M','K' ],
             'S4' => [ 'A', 'B', 'I', 'C','I','D' ]
           };

my $seq1 = 'A C D'; # there is always a space between the query chars

#This two are for testing
my $seq2 = 'A B C';
my $seq3 = 'A B';

my @alignment = align($hash,$seq);
print Dumper \@alignment;

sub align
{
    my ($hashref,$seq) = @_;
    my @seq_ar = split(/\s/,$seq);
    my $test = join('(.*?)',@seq_ar);

    #Attempt to find how many possible gap
    my $count_stars = $test =~ tr/\*//;
    print "Stars: $count_stars\n";

    my @hyph_padded_seq;

    for my $key( sort {$a cmp $b} keys(%$hashref))
    {
        my $string = join('',@{$hashref->{$key}});
         
        # whatever strings that go here are
        # guaranteed to contain $seq

        if($string =~ $test)
        {
            my $gap_sum;

            # I may be doing something silly here
            # Here I'm trying to find the sum of the gap
            # which is supposed to be captured by
            # regex memory variable $1, $2 .. etc using loop
            
            for ( 1..$count_stars  )
            {
              # Creating memory variable $1,$2..etc
              # which doesnt' work
              # and create error

              my $mem = '$'.$_;

              # counting (chars) gap that is stored in memory variable
              my $count_gap = $mem =~ tr/[A-Z]//;
              $gap_sum += $count_gap;

              # Next what I intended to do is
              # to replace 2 corresponding consecutive chars
              # with "-" of size $1 $2 of highest $gap_sum...etc
              # then push every newly created seq with (-) 
              # into array @hyph_padded_seq

              # I really am stuck here.
            }

        }
    }
    return @hyph_padded_seq;
}

##</code><code>##

Query: "A C D"     Query: "A B C"    Query: "A B"   Query: "C D"
Answers:           Answers:          Answers:       Answers:
[                  [                 [              [
  'AB-C-D',         'ABIC',           'A--B',        'C-D',
  'A--C-D',         'AB_C'            'ACDB',        'C-D',
  'ABICID',        ]                  'A--B'         'CAD',
]                                    ]               'CID'
                                                    ]

##</code><code>##

Take Query "A C D" as an example:

             'S1' => [ 'A', 'B', 'C', 'D','H','A' ],
             'S2' => [ 'A', 'C', 'D', 'B','G','J' ],
             'S3' => [ 'C', 'A', 'D', 'H','M','K' ],
             'S4' => [ 'A', 'B', 'I', 'C','I','D' ]  

1. String "A C D" can be found only in S1,S2,S4. 
   Thus, it was array from S1,S2,S4 that is taken for alignment.

2. See, S4->"ABICID" gives the biggest gap compare to S1,S2.
   As shown here:
   S4->"ABICID" gives A[BI]C[I]D, since there are 2 + 1 gaps. 
                                  2 for 'BI', 1 for 'I'
   S1->"ABCDHA" gives A[B]CD, only 1 gaps for [B]
   S2->"ACDBGJ" gives ACD,    without any gaps in between.

3. Then S1,S2 must be align *based* on S4. That means
   the maximum span of S1, S2 is until D ends.
   S1->"ABCD" 
   S2->"ACD"
   These are the two strings we align with S4->"ABICID"

4. Now, lets align S1->"ABCD" with S4->"ABICID"
   In S1, there are 1 gap in ABC in comparison to S4
   like this:
          S1->  AB-C
          S4->  ABIC
   
   There is another 1 gap in CD in comparison to S4
   like this:

          S1-> C-D
          S4-> CID

    Thus the full alignment between S1 and S4 are:

        S1-> AB-C-D
        S4-> ABICID

5. Then align S2->"ACD" with S4->"ABICID"
   In S2, there are 2 gaps in AC in comparison to S4
   like this:
          S2->  A--C
          S4->  ABIC
   
   There is another 1 gap in CD in comparison to S4
   like this:

          S2-> C-D
          S4-> CID

    Thus the full alignment between S1 and S4 are:

        S2-> A--C-D
        S4-> ABICID

6. The final alignment gives:
        S1-> AB-C-D
        S2-> A--C-D
        S4-> ABICID