#!/usr/local/bin/perl #The Script will parse a NCBI Blastx output file and output the top N hits of each blast result. #For each hit following results are reported #Accessionnumber, Length, Description, Evalue, Bitscore, Queryframe, QueryStart, QueryEnd, Hit end, positives & identicals #The results are delimited & ready for import into a spread sheet program for browsing & further analysis. #use strict; use warnings; use Bio::SearchIO; #Usage information die "Usage:$0 \n", if (@ARGV != 3); my ($infile,$numHits,$outfile) = @ARGV; print "Parsing the blast resut......."; my $in = Bio::SearchIO -> new(-format => 'blast', -file => $infile); open (OUT, ">$outfile" ) or die "Cannot open $outfile :$!" #print the header info for tab delimited columns #print OUT "query_name\tquery_length\taccession_number\tlength\tdescription\tE Value\tbit score\tframe\tquery_start\t"; #print OUT "query_end\thit_start\thit_end\tpositives\tidentical\n"; #extraction of information for each result recursively while (my $result = $in => next_result ) { print OUT $result-> query_name . "\t"; #the length of the query sequence print OUT $result -> query_length; #output "no hits found" if there is no hits if ($result -> num_hits == 0) { print OUT "\tNo hits found\n"; } else { my $count = 0; #process each hits recursively while (my $hit = $result-> next_hit) { print OUT "\t" if ($count > 0); #get the accession numbers of the hits print OUT "\t" . $hit -> accession . "\t"; #get the lengths of the hit sequences print OUT $hit -> length . "\t"; #get the description of the hit sequences print OUT $hit -> description . "\t"; #get the E value of the hit print OUT $hit -> significance . "\t"; #get the bit score of the hit print OUT $hits -> bits . "\t"; my $hspcount = 0; # process the top HSP for the top hit while (my $hsp = $hit -> next_hsp) { print OUT "\t\t\t\t\t\t\t", if ($hspcount > 0); #get the frame of the query sequence print OUT $hsp -> frame . "\t"; #get the start and the end of the query sequence in the alignment print OUT $hsp -> start ('query') . "\t" . $hsp -> end ('query') . "\t"; #get the start and end of the hit sequence in the alignment print OUT $hsp -> start ('hit') . "\t" . $hsp -> end ('hit') . "\t"; # get the similarity value print OUT "%.lf" , ($hsp -> frac_conserved * 100); print OUT "%\t"; # get the identity value print OUT "%.lf" , ($hsp -> frac_identity * 100); print OUT "%\n" ; $hspcount++; } $count++; #flow control for the number of hits needed last if ($count == $numHits ); } } } close OUT ; print "DONE !!! \n";