158256496-stool1_revised_C989779_1_gene1513 strand:+ 203907.Bfl015 63.04 46 17 0 1 46 1 46 1e-07 59.3 158256496-stool1_revised_C989779_1_gene1513 strand:+ 326427.Cagg_1194 65.22 46 16 0 2 47 3 48 1e-07 59.3 158256496-stool1_revised_C989779_1_gene1513 strand:+ 334413.FMG_1629 70.45 44 13 0 1 44 4 47 1e-07 59.3 158256496-stool1_revised_C989779_1_gene1513 strand:+ 224915.bbp013 63.04 46 17 0 1 46 1 46 1e-07 59.3 158256496-stool1_revised_C989779_1_gene1513 strand:+ 543302.AaLAA1DRAFT_2702 72.09 43 12 0 1 43 29 71 1e-07 59.3 158256496-stool1_revised_C989779_1_gene1517 strand:+ 484018.BACPLE_00501 77.43 505 114 0 1 505 1 505 0.0 848 158256496-stool1_revised_C989779_1_gene1517 strand:+ 547042.BACCOPRO_00289 77.43 505 114 0 1 505 5 509 0.0 848 158256496-stool1_revised_C989779_1_gene1517 strand:+ 449673.BACSTE_02782 77.56 508 111 1 1 505 1 508 0.0 847 158256496-stool1_revised_C989779_1_gene1517 strand:+ 665953.HMPREF1016_02618 77.76 508 110 1 1 505 1 508 0.0 845 158256496-stool1_revised_C989779_1_gene1517 strand:+ 762984.HMPREF9445_02115 77.17 508 113 1 1 505 1 508 0.0 845 158256496-stool1_revised_C989779_1_gene1517 strand:+ 702446.CUU_1632 77.03 505 116 0 1 505 1 505 0.0 845 158256496-stool1_revised_C989779_1_gene1517 strand:+ 469593.HMPREF9011_00038 77.03 505 116 0 1 505 1 505 0.0 845 158256496-stool1_revised_C989779_1_gene1517 strand:+ 457394.BSFG_02153 77.03 505 116 0 1 505 1 505 0.0 845 158256496-stool1_revised_C989779_1_gene1517 strand:+ 435590.BVU_1475 77.03 505 116 0 1 505 1 505 0.0 845 #### ############### read in blast table and parse ####### my $in_blast_tab=$ARGV[0]; open(IN,$in_blast_tab) or die "cannot open $in_blast_tab\n"; my %HoFlg1; my %HoFlg2; #my $Flag1; my $Flag2; while (my $line=) { chomp $line; my ($Query_id,$strand,$Subj_id,$Perc_iden,$align_len,$num_mm,$gap,$q_start,$q_end,$s_start,$s_end,$e_value,$bit_score)=split("\t",$line); # extra field of strand next if ($bit_score <60); my ($Flag1, $Flag2 ) = &Flag( $Subj_id, \%proph_prots, \%euk_prots, \%vir_prots ); $HoFlg1{$Query_id}{$Subj_id}{$bit_score}=$Flag1; $HoFlg2{$Query_id}{$Subj_id}{$bit_score}=$Flag2; print join("\t",$Query_id,$Subj_id,$bit_score,$Flag1,$Flag2)."\n"; } sub Flag { my $Subj_id=$_[0]; my $proph_prots=$_[1]; my $euk_prots=$_[2]; my $vir_prots=$_[3]; my $Flag1; my $Flag2; if (exists $proph_prots{$Subj_id}){ $Flag1="Proph"; $Flag2="Phage"; } elsif (exists $euk_prots{$Subj_id}){ $Flag1="Euk"; $Flag2="Euk" } elsif(exists $vir_prots{$Subj_id}){ $Flag1="Vir"; $Flag2="Phage"; } else { $Flag1="Bact"; $Flag2="Bact"; } return($Flag1,$Flag2); } ##### For each Query_id find top bit score my %top_bit; foreach my $key1 (sort keys %HoFlg2){ foreach my $key2 (keys %{$HoFlg2{$key1}}){ print "key1\t$key1\tkey2$key2\tbitscore\t $HoFlg2{$key1}{$key2}\n" $top_bit{$key1}=$HoFlg2{$key1}{$key2}; #??probably wrong } print "for query\t$key1 top bit score is\t$top_bit{$key1}\n"; }