>sp|P48255|ABCX_CYAPA Probable ATP-dependent transporter ycf16 OS=Cyanophora paradoxa GN=ycf16 PE=3 SV=1 MSTEKTKILEVKNLKAQVDGTEILKGVNLTINSGEIHAIMGPNGSGKSTFSKILAGHPAYQVTGGEILFKNKNLLELEPEERARAGVFLAFQYPIEIAGVSNIDFLRLAYNNRRKEEGLTELDPLTFYSIVKEKLNVVKMDPHFLNRNVNEGFSGGEKKRNEILQMALLNPSLAILDETDSGLDIDALRIVAEGVNQLSNKENSIILITHYQRLLDYIVPDYIHVMQNGRILKTGGAELAKELEIKGYDWLNELEMVKK CYAPA #### Taxon Mnemonic Scientific name Common name Synonym Other Names Reviewed Rank Lineage Parent 43989 CYAA5 Cyanothece sp. (strain ATCC 51142) Cyanothece (strain ATCC 51142); Cyanothece 51142; Cyanothece ATCC51142; Cyanothece sp. ATCC 51142; Cyanothece sp. BH68; Cyanothece sp. BH68K reviewed Species Bacteria; Cyanobacteria; Chroococcales; Cyanothece 43988 #### my $slash = "/"; if("$^O" eq "MSWin32"){ $slash = "\\"; } # restrict the search to a specific taxonomy my $taxon = $ARGV[3]; $annotation .= "\t$taxon"; open(tax_file, "..".$slash."dataset".$slash."taxonomy.tab") or die "couldn't open taxonomy.tab"; my @taxR = ; close tax_file; if($taxon){ @taxR = grep { /$taxon/i } @taxR; for(my $e = 0; $e < scalar(@taxR); $e++){ my @taxRR = split(/\t/, $taxR[$e]); $taxR[$e] = $taxRR[1]; } } my %taxR = map { $_ => 1 } @taxR; print "cyaa5 = ".$taxR{"CYAA5"};#prints cyaa5 = 1 #skipping a bunch of unrelated stuff open(ps_file, "..".$slash."dataset".$slash.$tempFile) or die "couldn't open $tempFile"; while(){ chomp; my @curLine = split(/\t/, $_); my $filter = 1; if($taxon){ print "$curLine[2]\t$taxR{$curLine[2]}\n";#produced weird output with when run with the huge protein file will post below #these commented out lines are previous attempts that work on windows but not Mac #$filter = $curLine[2] ~~ @taxR; #$filter = scalar(grep( /^$curLine[2]$/, @taxR )); #$filter = ( first { $_ eq $curLine[2] } @taxR ); #print $taxR{curLine[2]}."\n"; $filter = $taxR{$curLine[2]}; } if($filter){ checkSeq(@curLine); } } close ps_file; #### FRG3G FRG3G IIV3 IIV3 FRG3G FRG3G IIV3 FRG3G IIV6 FRG3G FRG3G #### GLOVI 1 GRATL PORPU PORYE PROM0 1 PROM2 1 PROM3 1 PROM4 1 PROM5 1 PROM9 1 #### ECOSM ECOUT ENT38 ERWCT ESCF3 PECCP CYAP4 1 DEIRA DELAS DESAP DESHY