Would give the following result:AAA ATG TTT GTC Note that the length of these strings in the set maybe greater than 3.
Below is my incredibly naive, inefficient and ugly code. Can any body suggest a better and faster solution than this:$VAR1 = { 'A' => [2,1,1], 'T' => [1,3,1], 'C' => [0,0,1], 'G' => [1,0,1] }; So the size of the array is the same with the length of the string. In my case I need the variation of it, namely the probability of the each base occur in the particular position: $VAR = { 'A' => ['0.5','0.25','0.25'], 'T' => ['0.25','0.75','0.25'], 'C' => ['0','0','0.25'], 'G' => ['0.25','0','0.25'] }
#!/usr/bin/perl -w use strict; use Data::Dumper; use Carp; my @Array = ('AAA', 'ATG', 'TTT', 'GTC'); my ($PWM) = compute_pwm(@Array); print Dumper $PWM; #The code that does the job sub compute_pwm { my @mi = @_; my $motif_count; my $L; #-------Beginning of PWM processing ---------------- foreach my $mi (@mi) { chomp($mi); $mi =~ s/\s//g; $L = $mi; my @words = split( /\W+/, $mi ); $motif_count += @words; } $motif_count = 0; my $w = length($L); my @A = (); my @T = (); my @C = (); my @G = (); for ( my $j = 0; $j < $w; $j++ ) { # Initialize the base counts. my $a = 0; my $c = 0; my $g = 0; my $t = 0; foreach my $mi (@mi) { chomp($mi); my $L = $mi; my $sb = substr( $L, $j, 1 ); while ( $sb =~ /a/ig ) { $a++ } while ( $sb =~ /t/ig ) { $t++ } while ( $sb =~ /c/ig ) { $c++ } while ( $sb =~ /g/ig ) { $g++ } } push( @A, $a ); push( @T, $t ); push( @C, $c ); push( @G, $g ); } my $sumA = sumArray(@A); my $sumT = sumArray(@T); my $sumC = sumArray(@C); my $sumG = sumArray(@G); my @m = (); my @Anm1 = (); my @Tnm1 = (); my @Cnm1 = (); my @Gnm1 = (); my @sPos = (); #summing up A,T,C,G for all position for ( my $b = 0; $b < $w; $b++ ) { my $sumPos = $A[$b] + $T[$b] + $C[$b] + $G[$b]; push( @sPos, $sumPos ); my $nm1A = $A[$b]/$sumPos; my $nm1T = $T[$b]/$sumPos; my $nm1C = $C[$b]/$sumPos; my $nm1G = $G[$b]/$sumPos; push( @Anm1, $nm1A ); push( @Tnm1, $nm1T ); push( @Cnm1, $nm1C ); push( @Gnm1, $nm1G ); } my @PWM = pwm( \@Anm1, \@Tnm1, \@Cnm1, \@Gnm1 ); return \@PWM; } #--------------- Subs of the subroutines ---------------------------- sub pwm { #PWM in forms of AoH my ($A,$T,$C,$G) = @_; #input are array references my (%Ah,%Th,%Ch,%Gh); $Ah{'A'} = [@$A]; $Th{'T'} = [@$T]; $Ch{'C'} = [@$C]; $Gh{'G'} = [@$G]; my @PWM; #AoH push @PWM, {%Ah,%Th,%Ch,%Gh}; return @PWM; } sub sumArray { my @arr = @_; my $sum = 0; my $count = $#arr + 1; for(my $i=0;$i<$count;$i++){ $sum += $arr[$i]; } return $sum; }
In reply to Position Weight Matrix of Set of Strings by monkfan
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |