#!/usr/bin/perl # the infile "query.txt" has multiple blast hits per each query # the outfile needs to have the first blast hit with the Ref number or the best hit # the second outfile "reflist.txt" is to get the ref numbers to do batch entrez $infile = "query.txt"; #output of blast open (IN, $infile) or die "can't open file: $!"; $outfile = "besthit.txt"; open (OUT, ">$outfile") or die "can't open file: $!"; $ofile = "reflist.txt"; open (NP, ">$ofile") or die "can't open file: $!"; while () { if ($_ =~ /^Query/) { chomp; $query = $_; $query =~ s/^Query=\s//; } elsif ($_ =~ s/^\s+\((\d+)\sletters\)/$1/) { chomp; $length = $_; } elsif ($_ =~ /\|ref\|/) { chomp; $np = $_; until (/^\s+/) { $_ = ; $line = join (" ", $line, $_); } # print OUT "$np\n"; $id = substr $np, 0, 70; $score = substr $np, 70, 6; $score =~ s/^\s//; $evalue = substr $np, 76, 6; @id = split(/\|/, $id); $gino = $id[1]; $npno = $id[3]; $stid = $id[4]; $stid =~ s/^\s//; print NP "$npno\n"; print OUT "\nQURY: $query\n"; print OUT "SIZE: $length\n"; print OUT "GINO: $gino\n"; print OUT "NPNO: $npno\n"; print OUT "STID: $stid\n"; print OUT "SCOR: $score\n"; print OUT "EVAL: $evalue\n"; } #elsif end elsif ($_ =~ /^gi/) { chomp; $np = $_; until (/^\s+/) { $_ = ; $line = join (" ", $line, $_); } # print OUT "$np\n"; $id = substr $np, 0, 70; $score = substr $np, 70, 6; $score =~ s/^\s//; $evalue = substr $np, 76, 6; @id = split(/\|/, $id); $gino = $id[1]; $stid = $id[4]; $stid =~ s/^\s//; print OUT "\nQURY: $query\n"; print OUT "SIZE: $length\n"; print OUT "GINO: $gino\n"; print OUT "STID: $stid\n"; print OUT "SCOR: $score\n"; print OUT "EVAL: $evalue\n"; } #elsif end } #while end close IN; close OUT; close NP;