AAA
ATG
TTT
GTC
Note that the length of these strings in the set
maybe greater than 3.
####
$VAR1 = {
'A' => [2,1,1],
'T' => [1,3,1],
'C' => [0,0,1],
'G' => [1,0,1]
};
So the size of the array is the same with the length of the string.
In my case I need the variation of it, namely the probability of the
each base occur in the particular position:
$VAR = {
'A' => ['0.5','0.25','0.25'],
'T' => ['0.25','0.75','0.25'],
'C' => ['0','0','0.25'],
'G' => ['0.25','0','0.25']
}
####
#!/usr/bin/perl -w
use strict;
use Data::Dumper;
use Carp;
my @Array = ('AAA',
'ATG',
'TTT',
'GTC');
my ($PWM) = compute_pwm(@Array);
print Dumper $PWM;
#The code that does the job
sub compute_pwm {
my @mi = @_;
my $motif_count;
my $L;
#-------Beginning of PWM processing ----------------
foreach my $mi (@mi) {
chomp($mi);
$mi =~ s/\s//g;
$L = $mi;
my @words = split( /\W+/, $mi );
$motif_count += @words;
}
$motif_count = 0;
my $w = length($L);
my @A = ();
my @T = ();
my @C = ();
my @G = ();
for ( my $j = 0; $j < $w; $j++ ) {
# Initialize the base counts.
my $a = 0;
my $c = 0;
my $g = 0;
my $t = 0;
foreach my $mi (@mi) {
chomp($mi);
my $L = $mi;
my $sb = substr( $L, $j, 1 );
while ( $sb =~ /a/ig ) { $a++ }
while ( $sb =~ /t/ig ) { $t++ }
while ( $sb =~ /c/ig ) { $c++ }
while ( $sb =~ /g/ig ) { $g++ }
}
push( @A, $a );
push( @T, $t );
push( @C, $c );
push( @G, $g );
}
my $sumA = sumArray(@A);
my $sumT = sumArray(@T);
my $sumC = sumArray(@C);
my $sumG = sumArray(@G);
my @m = ();
my @Anm1 = ();
my @Tnm1 = ();
my @Cnm1 = ();
my @Gnm1 = ();
my @sPos = ();
#summing up A,T,C,G for all position
for ( my $b = 0; $b < $w; $b++ ) {
my $sumPos = $A[$b] + $T[$b] + $C[$b] + $G[$b];
push( @sPos, $sumPos );
my $nm1A = $A[$b]/$sumPos;
my $nm1T = $T[$b]/$sumPos;
my $nm1C = $C[$b]/$sumPos;
my $nm1G = $G[$b]/$sumPos;
push( @Anm1, $nm1A );
push( @Tnm1, $nm1T );
push( @Cnm1, $nm1C );
push( @Gnm1, $nm1G );
}
my @PWM = pwm( \@Anm1, \@Tnm1, \@Cnm1, \@Gnm1 );
return \@PWM;
}
#--------------- Subs of the subroutines ----------------------------
sub pwm {
#PWM in forms of AoH
my ($A,$T,$C,$G) = @_; #input are array references
my (%Ah,%Th,%Ch,%Gh);
$Ah{'A'} = [@$A];
$Th{'T'} = [@$T];
$Ch{'C'} = [@$C];
$Gh{'G'} = [@$G];
my @PWM; #AoH
push @PWM, {%Ah,%Th,%Ch,%Gh};
return @PWM;
}
sub sumArray
{
my @arr = @_;
my $sum = 0;
my $count = $#arr + 1;
for(my $i=0;$i<$count;$i++){
$sum += $arr[$i];
}
return $sum;
}