monkfan has asked for the wisdom of the Perl Monks concerning the following question:
Would give the following result:AAA ATG TTT GTC Note that the length of these strings in the set maybe greater than 3.
Below is my incredibly naive, inefficient and ugly code. Can any body suggest a better and faster solution than this:$VAR1 = { 'A' => [2,1,1], 'T' => [1,3,1], 'C' => [0,0,1], 'G' => [1,0,1] }; So the size of the array is the same with the length of the string. In my case I need the variation of it, namely the probability of the each base occur in the particular position: $VAR = { 'A' => ['0.5','0.25','0.25'], 'T' => ['0.25','0.75','0.25'], 'C' => ['0','0','0.25'], 'G' => ['0.25','0','0.25'] }
#!/usr/bin/perl -w use strict; use Data::Dumper; use Carp; my @Array = ('AAA', 'ATG', 'TTT', 'GTC'); my ($PWM) = compute_pwm(@Array); print Dumper $PWM; #The code that does the job sub compute_pwm { my @mi = @_; my $motif_count; my $L; #-------Beginning of PWM processing ---------------- foreach my $mi (@mi) { chomp($mi); $mi =~ s/\s//g; $L = $mi; my @words = split( /\W+/, $mi ); $motif_count += @words; } $motif_count = 0; my $w = length($L); my @A = (); my @T = (); my @C = (); my @G = (); for ( my $j = 0; $j < $w; $j++ ) { # Initialize the base counts. my $a = 0; my $c = 0; my $g = 0; my $t = 0; foreach my $mi (@mi) { chomp($mi); my $L = $mi; my $sb = substr( $L, $j, 1 ); while ( $sb =~ /a/ig ) { $a++ } while ( $sb =~ /t/ig ) { $t++ } while ( $sb =~ /c/ig ) { $c++ } while ( $sb =~ /g/ig ) { $g++ } } push( @A, $a ); push( @T, $t ); push( @C, $c ); push( @G, $g ); } my $sumA = sumArray(@A); my $sumT = sumArray(@T); my $sumC = sumArray(@C); my $sumG = sumArray(@G); my @m = (); my @Anm1 = (); my @Tnm1 = (); my @Cnm1 = (); my @Gnm1 = (); my @sPos = (); #summing up A,T,C,G for all position for ( my $b = 0; $b < $w; $b++ ) { my $sumPos = $A[$b] + $T[$b] + $C[$b] + $G[$b]; push( @sPos, $sumPos ); my $nm1A = $A[$b]/$sumPos; my $nm1T = $T[$b]/$sumPos; my $nm1C = $C[$b]/$sumPos; my $nm1G = $G[$b]/$sumPos; push( @Anm1, $nm1A ); push( @Tnm1, $nm1T ); push( @Cnm1, $nm1C ); push( @Gnm1, $nm1G ); } my @PWM = pwm( \@Anm1, \@Tnm1, \@Cnm1, \@Gnm1 ); return \@PWM; } #--------------- Subs of the subroutines ---------------------------- sub pwm { #PWM in forms of AoH my ($A,$T,$C,$G) = @_; #input are array references my (%Ah,%Th,%Ch,%Gh); $Ah{'A'} = [@$A]; $Th{'T'} = [@$T]; $Ch{'C'} = [@$C]; $Gh{'G'} = [@$G]; my @PWM; #AoH push @PWM, {%Ah,%Th,%Ch,%Gh}; return @PWM; } sub sumArray { my @arr = @_; my $sum = 0; my $count = $#arr + 1; for(my $i=0;$i<$count;$i++){ $sum += $arr[$i]; } return $sum; }
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: Position Weight Matrix of Set of Strings
by BrowserUk (Patriarch) on Feb 16, 2006 at 10:31 UTC | |
by monkfan (Curate) on Sep 06, 2006 at 08:49 UTC | |
by BrowserUk (Patriarch) on Sep 06, 2006 at 09:41 UTC | |
by neversaint (Deacon) on Sep 07, 2006 at 02:31 UTC | |
by BrowserUk (Patriarch) on Sep 07, 2006 at 03:55 UTC | |
| |
|
Re: Position Weight Matrix of Set of Strings
by TedPride (Priest) on Feb 16, 2006 at 13:09 UTC | |
|
Re: Position Weight Matrix of Set of Strings
by tweetiepooh (Hermit) on Feb 16, 2006 at 10:11 UTC |