SOMEN has asked for the wisdom of the Perl Monks concerning the following question:
this code fails to count the dipeptides globally. gives count less than the actual value. Unable to figure it out...anyone plese help......thanks in advance
#!/usr/bin/perl use strict; use warnings; use List::Util qw(sum); print "Please type the filename of the DNA sequence MMta: "; my $file = <STDIN>; chomp $file; # Use three arg open and test for errors open my $fh_in, '<', $file or die "Unable to open '$file': $!"; my %count; my $seq; # loop on every lines of input file while(my $line = <$fh_in>) { chomp $line; # Find the sequence name $seq = $1 if $line =~ /(>.*?cds)/; # Count while($line =~ /(AA|AL|DA|DE|DV|VD|DW|QD|SD|HD|ED|DY|VE|EN|EI|KE|N +V|VP|FV|SS|WK|KK)/ig) { $count{$seq}{$1}++; } } open my $fh_out, '>', 'countbase.txt' or die "Unable to open 'countbas +e.txt': $!"; # Display the result separately for each sequence foreach(keys %count) { my $sum = sum(values %{$count{$_}}); my $abs = 22 - scalar keys %{$count{$_}}; print $fh_out $_,"\nsum = ",$sum,"\nabs = ", $abs,"\n"; }
|
|---|