foreach (@file) { chomp; my @r = split (/\t/, $_); push(@{$genes{$r[0]}{$r[3]}{$r[13]}},$r[13]); ## Pushing a hash that includes {Gene name}{SampleIdentifier}{AA site} } foreach (@file2){ # Initialize arrays w/ 0's for each gene with size of length for each gene chomp; my @r=split(/\t/,$_); my @zeros = (0) x $r[1]; @{$s_l{$r[0]}} = @zeros; } foreach my $key1 ( sort keys %genes ) { ### this part stores the mutations with and with out Amino acid (string info) the second hash site is only numbers otherwise ignore this section foreach my $key2 ( sort keys %{$genes{$key1}} ) { foreach my $key3 ( sort keys %{$genes{$key1}{$key2}} ) { push(@{$AA{$key1}{$key3}},$key3); my $key4=$key3; $key4=~s/\D\.\D([0-9]+)\D/$1/; $key4=~s/(\*|\?|s\d+)//; push (@{$site{$key1}{$key4}},$key4); } } } #this is the problem section foreach my $key1 ( sort keys %AA ) { foreach my $key4 ( sort keys %{$site{$key1}} ) { $site_length_catch{$key1}[$key4] = scalar ( @{$site{$key1}{$key4}} ); } }