blastx
BLASTX 2.2.27+
Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.
/Applications/blast-2.2.27+/db/COG_Nov2012/protein.sequences.v9.0
Query_1
HKUN3Y301D9XQX
508
BLOSUM62
10
11
1
L;
1
Query_1
HKUN3Y301D9XQX
508
1
gnl|BL_ORD_ID|1515029
43989.cce_0262 (Cyanothece ATCC 51142)
1515029
65
1
40.0466
92
0.00664016
155
253
12
44
-1
0
17
27
0
33
LRGAICSMEHIEEALGKLKDWARKLIELLLGPR
ITGAVCLMDYLEKVLEKLRELAQKLIETLLGPQ
+ GA+C M+++E+ L KL++ A+KLIE LLGP+
####
#!/usr/local/bin/perl
# Usage information
#Usage: $0 -i -o -n -b
# -t
use strict;
use warnings;
use Bio::SearchIO;
use Getopt::Std;#needed for flagging parameters
sub main{
my %opt;
#note: colons after letter mean the flag expects an argument
getopt('i:o:n:b:t:', \%opt);
print "Parsing the BLAST result ...\n";
my $in = Bio::SearchIO->new(-format => 'blastxml', -file => $opt{i});
open (OUT,">$opt{o}") or die "Cannot open $opt{o}: $!";
open (OUT2,">$opt{t}") or die "Cannot open $opt{t}: $!";
open (OUT3, ">$opt{o}.header") or die "Cannot open $opt{o}.header: $!";
# print the header info for tab-deliminated columns
print OUT "query_name\tquery_length\taccession_number\tsubject_length\tsubject_description\tE value\tbit score\tframe\tquery_start\t";
print OUT "query_end\thit_start\thit_end\t%_conserved\t%_identical\n";
print OUT2 "query_name\tquery_length\taccession_number\tsubject_length\tsubject_description\tE value\tbit score\tframe\tquery_start\t";
print OUT2 "query_end\thit_start\thit_end\t%_conserved\t%_identical\n";
# extraction of information for each result recursively
while ( my $result = $in->next_result ) {
#prints query info for reads WITHOUT hits into -t ="bad" file
if ( $result->num_hits == 0 ) {
print OUT2 $result->query_description . "\t";
print OUT2 $result->query_length . "\t";
print OUT2 "No hits found\n";
}
else {
my $count = 0;
# process each hit recursively
while (my $hit = $result->next_hit) {
#prints query info for reads WITH hits BELOW bit-score input value into -t = "bad" file
if ( $hit->bits < $opt{b}) {
print OUT2 $result->query_description . "\t";
print OUT2 $result->query_length . "\t";
print OUT2 "below bit score\n";}
#prints query and other info for reads WITH hits ABOVE bit-score input into -o = "good" file
elsif ( $hit->bits >= $opt{b}) {
print OUT $result->query_description . "\t";
print OUT3 $result->query_description . "\n";
print OUT $result->query_length . "\t";
print OUT $hit->accession . "\t";
print OUT $hit->length . "\t";
print OUT $hit->description . "\t";
print OUT $hit->significance . "\t";
print OUT $hit->bits . "\t";
my $hspcount = 0;
# process the top HSP for the top number of hits (user defined) into -o file
while (my $hsp = $hit->next_hsp) {
if ($hit->bits >= $opt{b}) {
print OUT "\t\t\t\t\t\t\t", if ($hspcount > 0);
print OUT $hsp->query->frame . "\t";
print OUT $hsp->start('query') . "\t" . $hsp->end('query'). "\t";
print OUT $hsp->start('hit') . "\t" . $hsp->end('hit') . "\t";
printf OUT "%.1f" , ($hsp->frac_conserved * 100);
print OUT "%\t";
printf OUT "%.1f" , ($hsp->frac_identical * 100);
print OUT "%\n";
$hspcount++;
}
}
}
$count++;
# flow control for the number of hits needed
last if ($count == $opt{n});
}
}
}
close OUT;
close OUT2;
}
main();
print " DONE!!!\n";
####
query_name query_length accession_number subject_length subject_description E value bit score frame query_start query_end hit_start hit_end %_conserved %_identical
HKUN3Y301D9XQX length=508 xy=1636_1159 region=1 run=R_2012_03_16_06_53_48_ 508 1515029 65 (Cyanothece ATCC 51142) 0.00664016 40.0466 0 155 253 12 44 81.8% 51.5%