in reply to Finding Consensus String
Update: more efficient way to set %H would be:use strict; use warnings; my @instances = qw ( AAAAA ATCGA ATAAA ); my @instances2 = qw ( AAAAA AACGA ATAAA ATAAA ); print consensus(@instances),"\n"; # ATAAA print consensus(@instances2),"\n"; # ATAAA exit; sub consensus{ my @mi = @_; chomp(@mi); my $motif_count=0; my @words =(); my %H = ( A=>[], T=>[], C=>[], G=>[] ); s/\s//g for @mi; my ($w) = sort {$b <=> $a} map {length} @mi; # set w to the lengt +h of the longest element foreach my $j ( 0 .. $w-1 ){ # Initialize the base counts. my %h = ( a=>0, t=>0, c=>0, g=>0 ); foreach my $mi ( @mi ) { $h{ lc substr($mi,$j,1) }++; # example: $h{g}++ +; } push @{$H{ uc $_ }}, $h{$_} for keys %h; # example: push @{ +$H{G}}, $g; } my @cons = (); my %prefOrder = ( A=>1, T=>2, C=>3, G=>4 ); foreach my $B ( 0 .. $w-1 ){ push @cons, [ sort { $H{$b}->[$B] <=> $H{$a}->[$B] || $prefOrder +{$b} <=> $prefOrder{$a} } qw/A T G C/ ]->[0]; } return @cons; }
my %H = ( A=>[], T=>[], C=>[], G=>[] ); my @mi_letters = map { [split '', uc $_] } @mi; foreach my $j ( 0 .. $w-1 ){ $H{ $_->[$j] }->[$j]++ for @mi_letters; } # note: to avoid warnings, change the sort further down to include t +his (the '||0'): # ($H{$b}->[$B]||0) <=> ($H{$a}->[$B]||0)
|
|---|