#!/usr/bin/perl use strict; use warnings; print 'Please enter protein sequence filename: '; chomp( my $prot_filename = ); open my $PROTFILE, '<', $prot_filename or die "Cannot open '$prot_filename' because: $!"; my $report_name = $prot_filename.'_report'; open my $out_file, '>', $report_name or die "Cannot open '$report_name' because: $!"; $/ = ''; # Set paragraph mode my @count=(); my %absent=(); my $name; my $len; while ( my $para = <$PROTFILE> ) { # Remove fasta header line if ( $para =~ s/^>(.*)//m ){ $name = $1; }; # Remove comment line(s) $para =~ s/^\s*#.*//mg; #Remove trailing spaces between text #$space =~ s/\s+$//; my %prot; $para =~ s/([ACDEFGHIKLMNPQRSTVWY])/ ++$prot{ $1 } /eg; $len = length($para); my $num = scalar keys %prot; push @count,[$num,$name]; printf "Counted %d for %s ..\n",$num,substr($name,0,50); print $out_file "$name\n"; print $out_file join( ' ', map "$_=$prot{$_}", sort keys %prot ), "\n"; printf $out_file "Amino acid alphabet = %d\n\n",$num ; print $out_file "Sequence length = ", $len; # count absent for ('A'..'Z'){ ++$absent{$_} unless exists $prot{$_}; }; }; # sort names by count in ascending order to get lowest my @sorted = sort { $a->[0] <=> $b->[0] } @count; my $lowest = $sorted[0]->[0]; # maybe more than 1 lowest printf $out_file "Least number of amino acids is %d in these entries\n",$lowest; my @lowest = grep { $_->[0] == $lowest } @sorted; print $out_file "$_->[1]\n" for @lowest; # show all results print $out_file "\nAll results in ascending count\n"; for (@sorted){ printf $out_file "%d %s\n",@$_; }; close $out_file; print "Results are printed in $report_name\n"; # print absent counts print "\nExclusion of various amino acids in $prot_filename is as follows\n"; for (sort keys %absent){ printf "%s=%d\n",$_,$absent{$_}; };