>sp|P48255|ABCX_CYAPA Probable ATP-dependent transporter ycf16 OS=Cyanophora paradoxa GN=ycf16 PE=3 SV=1 MSTEKTKILEVKNLKAQVDGTEILKGVNLTINSGEIHAIMGPNGSGKSTFSKILAGHPAYQVTGGEILFKNKNLLELEPEERARAGVFLAFQYPIEIAGVSNIDFLRLAYNNRRKEEGLTELDPLTFYSIVKEKLNVVKMDPHFLNRNVNEGFSGGEKKRNEILQMALLNPSLAILDETDSGLDIDALRIVAEGVNQLSNKENSIILITHYQRLLDYIVPDYIHVMQNGRILKTGGAELAKELEIKGYDWLNELEMVKK CYAPA
####
Taxon Mnemonic Scientific name Common name Synonym Other Names Reviewed Rank Lineage Parent
43989 CYAA5 Cyanothece sp. (strain ATCC 51142) Cyanothece (strain ATCC 51142); Cyanothece 51142; Cyanothece ATCC51142; Cyanothece sp. ATCC 51142; Cyanothece sp. BH68; Cyanothece sp. BH68K reviewed Species Bacteria; Cyanobacteria; Chroococcales; Cyanothece 43988
####
my $slash = "/";
if("$^O" eq "MSWin32"){
$slash = "\\";
}
# restrict the search to a specific taxonomy
my $taxon = $ARGV[3];
$annotation .= "\t$taxon";
open(tax_file, "..".$slash."dataset".$slash."taxonomy.tab") or die "couldn't open taxonomy.tab";
my @taxR = ;
close tax_file;
if($taxon){
@taxR = grep { /$taxon/i } @taxR;
for(my $e = 0; $e < scalar(@taxR); $e++){
my @taxRR = split(/\t/, $taxR[$e]);
$taxR[$e] = $taxRR[1];
}
}
my %taxR = map { $_ => 1 } @taxR;
print "cyaa5 = ".$taxR{"CYAA5"};#prints cyaa5 = 1
#skipping a bunch of unrelated stuff
open(ps_file, "..".$slash."dataset".$slash.$tempFile) or die "couldn't open $tempFile";
while(){
chomp;
my @curLine = split(/\t/, $_);
my $filter = 1;
if($taxon){
print "$curLine[2]\t$taxR{$curLine[2]}\n";#produced weird output with when run with the huge protein file will post below
#these commented out lines are previous attempts that work on windows but not Mac
#$filter = $curLine[2] ~~ @taxR;
#$filter = scalar(grep( /^$curLine[2]$/, @taxR ));
#$filter = ( first { $_ eq $curLine[2] } @taxR );
#print $taxR{curLine[2]}."\n";
$filter = $taxR{$curLine[2]};
}
if($filter){
checkSeq(@curLine);
}
}
close ps_file;
####
FRG3G
FRG3G
IIV3
IIV3
FRG3G
FRG3G
IIV3
FRG3G
IIV6
FRG3G
FRG3G
####
GLOVI 1
GRATL
PORPU
PORYE
PROM0 1
PROM2 1
PROM3 1
PROM4 1
PROM5 1
PROM9 1
####
ECOSM
ECOUT
ENT38
ERWCT
ESCF3
PECCP
CYAP4 1
DEIRA
DELAS
DESAP
DESHY