AAA ATG TTT GTC Note that the length of these strings in the set maybe greater than 3. #### $VAR1 = { 'A' => [2,1,1], 'T' => [1,3,1], 'C' => [0,0,1], 'G' => [1,0,1] }; So the size of the array is the same with the length of the string. In my case I need the variation of it, namely the probability of the each base occur in the particular position: $VAR = { 'A' => ['0.5','0.25','0.25'], 'T' => ['0.25','0.75','0.25'], 'C' => ['0','0','0.25'], 'G' => ['0.25','0','0.25'] } #### #!/usr/bin/perl -w use strict; use Data::Dumper; use Carp; my @Array = ('AAA', 'ATG', 'TTT', 'GTC'); my ($PWM) = compute_pwm(@Array); print Dumper $PWM; #The code that does the job sub compute_pwm { my @mi = @_; my $motif_count; my $L; #-------Beginning of PWM processing ---------------- foreach my $mi (@mi) { chomp($mi); $mi =~ s/\s//g; $L = $mi; my @words = split( /\W+/, $mi ); $motif_count += @words; } $motif_count = 0; my $w = length($L); my @A = (); my @T = (); my @C = (); my @G = (); for ( my $j = 0; $j < $w; $j++ ) { # Initialize the base counts. my $a = 0; my $c = 0; my $g = 0; my $t = 0; foreach my $mi (@mi) { chomp($mi); my $L = $mi; my $sb = substr( $L, $j, 1 ); while ( $sb =~ /a/ig ) { $a++ } while ( $sb =~ /t/ig ) { $t++ } while ( $sb =~ /c/ig ) { $c++ } while ( $sb =~ /g/ig ) { $g++ } } push( @A, $a ); push( @T, $t ); push( @C, $c ); push( @G, $g ); } my $sumA = sumArray(@A); my $sumT = sumArray(@T); my $sumC = sumArray(@C); my $sumG = sumArray(@G); my @m = (); my @Anm1 = (); my @Tnm1 = (); my @Cnm1 = (); my @Gnm1 = (); my @sPos = (); #summing up A,T,C,G for all position for ( my $b = 0; $b < $w; $b++ ) { my $sumPos = $A[$b] + $T[$b] + $C[$b] + $G[$b]; push( @sPos, $sumPos ); my $nm1A = $A[$b]/$sumPos; my $nm1T = $T[$b]/$sumPos; my $nm1C = $C[$b]/$sumPos; my $nm1G = $G[$b]/$sumPos; push( @Anm1, $nm1A ); push( @Tnm1, $nm1T ); push( @Cnm1, $nm1C ); push( @Gnm1, $nm1G ); } my @PWM = pwm( \@Anm1, \@Tnm1, \@Cnm1, \@Gnm1 ); return \@PWM; } #--------------- Subs of the subroutines ---------------------------- sub pwm { #PWM in forms of AoH my ($A,$T,$C,$G) = @_; #input are array references my (%Ah,%Th,%Ch,%Gh); $Ah{'A'} = [@$A]; $Th{'T'} = [@$T]; $Ch{'C'} = [@$C]; $Gh{'G'} = [@$G]; my @PWM; #AoH push @PWM, {%Ah,%Th,%Ch,%Gh}; return @PWM; } sub sumArray { my @arr = @_; my $sum = 0; my $count = $#arr + 1; for(my $i=0;$i<$count;$i++){ $sum += $arr[$i]; } return $sum; }