1.800 A Ala
-3.500 BB Asx
2.500 C Cys Note: Columns 1-8 must contain 1 numeric value only
-3.500 D Asp
-3.500 E Glu Note: This file is required for amphpathic helic
2.800 F Phe
-0.400 G Gly
-3.200 H His
4.500 I Ile
-3.900 K Lys
3.800 L Leu
1.900 M Met
-3.500 N Asn
-1.600 P Pro
-3.500 Q Gln
-4.500 R Arg
-0.800 S Ser
-0.700 T Thr
4.200 V Val
-0.900 W Trp
-0.490 X- Unk
-1.300 Y Tyr
-3.500 ZZ Glx
-0.490 ** ***
####
my $filename = 'all.fasta.txt';
open (my $fh, "<", $filename) or die $!;
my %s;# a hash of arrays, to hold each line of sequence
my %seq; #a hash to hold the AA sequences.
my $key;
while (<$fh>){ #Read the FASTA file.
chomp;
if (/>/){
s/>//;
$key= $_;
}else{
push (@{$s{$key}}, $_);
}
}
foreach my $a (keys %s){
my $s= join("", @{$s{$a}});
$seq{$a}=$s;
#print("$a\t$s\n");
}
my @aa= qw(A R N D C Q E G H I L K M F P S T W Y V);
my $aa= join("\t", @aa);
print ("Sequence\t$aa\n");
foreach my $k (keys %seq){
my %count; # a hash to hold the count for each amino acid in the protein.
my @seq= split(//, $seq{$k});
foreach my $r(@seq){
$count{$r}++;
}
my @row;
push(@row, $k);
foreach my $a (@aa){
$count{$a}||=0;
$count{$a}; #= sprintf("%0.1f",($count{$a}/length($seq{$k}))*100);
push(@row,$count{$a});
}
my $row= join("\t",@row);
print("\n$row\n");
}
####
>gi|6103257|emb|CAB07737.2| glycoprotein [Viral hemorrhagic septicemia virus] MEWNTFFLVILIIIIKSTTPQITQRPPVENISTYHADWDTPLYTHPSNCREDSFVPIRPAQLRCPHEFED INKGLVSVPTQIIHLPLSVTSVSAVASGHYLHRVTYRVTCSTSFFGGQTIEKTILEAKLSRQEATNEASK DHEYPFFPEPSCIWMKNNVHKDITHYYKTPKTVSVDLYSRKFLNPDFIEGVCTTSPCQTHWQGVYWVGAT ..... ...and then the next one...