QURY: Contig103
SIZE: 264
GINO: 116055070
STID: unnamed protein product [Ostreococc...
SCOR: 40
EVAL: 0.003
QURY: Contig228
SIZE: 116
GINO: 162464429
NPNO: NP_001104917.1
STID: eucaryotic initiation factor7 [...
SCOR: 34
EVAL: 0.004
QURY: Contig1032
SIZE: 217
GINO: 168018663
NPNO: XP_001761865.1
STID: predicted protein [Physcomitrel...
SCOR: 34
EVAL: 2e-004
####
QURY: Heritiera_allseq.fna.Contig1032
SIZE: 217
GINO: 157349006
STID: unnamed protein product [Vitis vini...
SCOR: 59
EVAL: 6e-015
####
#!/usr/bin/perl
# the infile "query.txt" has multiple blast hits per each query
# the outfile needs to have the first blast hit with the Ref number or the best hit
# the second outfile "reflist.txt" is to get the ref numbers to do batch entrez
$infile = "query.txt"; #output of blast
open (IN, $infile) or die "can't open file: $!";
$outfile = "besthit.txt";
open (OUT, ">$outfile") or die "can't open file: $!";
$ofile = "reflist.txt";
open (NP, ">$ofile") or die "can't open file: $!";
while () {
if ($_ =~ /^Query/) {
chomp;
$query = $_;
$query =~ s/^Query=\s//;
}
elsif ($_ =~ s/^\s+\((\d+)\sletters\)/$1/) {
chomp;
$length = $_;
}
elsif ($_ =~ /\|ref\|/) {
chomp;
$np = $_;
until (/^\s+/) {
$_ = ;
$line = join (" ", $line, $_);
}
# print OUT "$np\n";
$id = substr $np, 0, 70;
$score = substr $np, 70, 6;
$score =~ s/^\s//;
$evalue = substr $np, 76, 6;
@id = split(/\|/, $id);
$gino = $id[1];
$npno = $id[3];
$stid = $id[4];
$stid =~ s/^\s//;
print NP "$npno\n";
print OUT "\nQURY: $query\n";
print OUT "SIZE: $length\n";
print OUT "GINO: $gino\n";
print OUT "NPNO: $npno\n";
print OUT "STID: $stid\n";
print OUT "SCOR: $score\n";
print OUT "EVAL: $evalue\n";
} #elsif end
elsif ($_ =~ /^gi/) {
chomp;
$np = $_;
until (/^\s+/) {
$_ = ;
$line = join (" ", $line, $_);
}
# print OUT "$np\n";
$id = substr $np, 0, 70;
$score = substr $np, 70, 6;
$score =~ s/^\s//;
$evalue = substr $np, 76, 6;
@id = split(/\|/, $id);
$gino = $id[1];
$stid = $id[4];
$stid =~ s/^\s//;
print OUT "\nQURY: $query\n";
print OUT "SIZE: $length\n";
print OUT "GINO: $gino\n";
print OUT "STID: $stid\n";
print OUT "SCOR: $score\n";
print OUT "EVAL: $evalue\n";
} #elsif end
} #while end
close IN;
close OUT;
close NP;
####
Query= Contig103
(264 letters)
Score E
Sequences producing significant alignments: (bits) Value
gi|116055070|emb|CAL57466.1| unnamed protein product [Ostreococc... 40 0.003
Database: All non-redundant GenBank CDS
translations+PDB+SwissProt+PIR+PRF excluding environmental samples
S1: 41 (20.4 bits)
S2: 89 (38.9 bits)
Query= Contig228
(116 letters)
Score E
Sequences producing significant alignments: (bits) Value
gi|162464429|ref|NP_001104917.1| eucaryotic initiation factor7 [... 34 0.004
gi|164449261|gb|ABY56090.1| eukaryotic initiation factor iso4E [... 40 0.006
gi|170753|gb|AAA34296.1| initiation factor (iso)4F p28 subunit 33 0.007
gi|547713|sp|Q03389|IF4E2_WHEAT Eukaryotic translation initiatio... 33 0.007
Database: All non-redundant GenBank CDS
translations+PDB+SwissProt+PIR+PRF excluding environmental samples
S1: 41 (20.4 bits)
S2: 89 (38.9 bits)
Query= Contig1032
(217 letters)
Score E
Sequences producing significant alignments: (bits) Value
gi|157349006|emb|CAO24084.1| unnamed protein product [Vitis vini... 59 6e-015
gi|168018663|ref|XP_001761865.1| predicted protein [Physcomitrel... 34 2e-004
gi|168018829|ref|XP_001761948.1| predicted protein [Physcomitrel... 34 2e-004
Database: All non-redundant GenBank CDS
translations+PDB+SwissProt+PIR+PRF excluding environmental samples