1.800 A Ala -3.500 BB Asx 2.500 C Cys Note: Columns 1-8 must contain 1 numeric value only -3.500 D Asp -3.500 E Glu Note: This file is required for amphpathic helic 2.800 F Phe -0.400 G Gly -3.200 H His 4.500 I Ile -3.900 K Lys 3.800 L Leu 1.900 M Met -3.500 N Asn -1.600 P Pro -3.500 Q Gln -4.500 R Arg -0.800 S Ser -0.700 T Thr 4.200 V Val -0.900 W Trp -0.490 X- Unk -1.300 Y Tyr -3.500 ZZ Glx -0.490 ** *** #### my $filename = 'all.fasta.txt'; open (my $fh, "<", $filename) or die $!; my %s;# a hash of arrays, to hold each line of sequence my %seq; #a hash to hold the AA sequences. my $key; while (<$fh>){ #Read the FASTA file. chomp; if (/>/){ s/>//; $key= $_; }else{ push (@{$s{$key}}, $_); } } foreach my $a (keys %s){ my $s= join("", @{$s{$a}}); $seq{$a}=$s; #print("$a\t$s\n"); } my @aa= qw(A R N D C Q E G H I L K M F P S T W Y V); my $aa= join("\t", @aa); print ("Sequence\t$aa\n"); foreach my $k (keys %seq){ my %count; # a hash to hold the count for each amino acid in the protein. my @seq= split(//, $seq{$k}); foreach my $r(@seq){ $count{$r}++; } my @row; push(@row, $k); foreach my $a (@aa){ $count{$a}||=0; $count{$a}; #= sprintf("%0.1f",($count{$a}/length($seq{$k}))*100); push(@row,$count{$a}); } my $row= join("\t",@row); print("\n$row\n"); } #### >gi|6103257|emb|CAB07737.2| glycoprotein [Viral hemorrhagic septicemia virus] MEWNTFFLVILIIIIKSTTPQITQRPPVENISTYHADWDTPLYTHPSNCREDSFVPIRPAQLRCPHEFED INKGLVSVPTQIIHLPLSVTSVSAVASGHYLHRVTYRVTCSTSFFGGQTIEKTILEAKLSRQEATNEASK DHEYPFFPEPSCIWMKNNVHKDITHYYKTPKTVSVDLYSRKFLNPDFIEGVCTTSPCQTHWQGVYWVGAT ..... ...and then the next one...