LOC_Os01g01010.1 : PS00022 EGF_1 EGF-like domain signature 1. 20 - 31 CtCtaAgaGAaC L=(-1) 392 - 403 CtCccTtcGTtC L=(-1) 740 - 751 CaCtaTtcGAgC L=(-1) 905 - 916 CgCtgTtgGAtC L=(-1) 1034 - 1045 CcCcgGtgGTgC L=(-1) 2169 - 2180 CaCcgGgtGAaC L=(-1) LOC_Os01g01010.1 : PS00099 THIOLASE_3 Thiolases active site. 26 - 39 GAGAACGAgAgAaG L=(-1) 221 - 234 GACTACCGaAtAaG L=(-1) 2732 - 2745 GAAAACAAgAgAcG L=(-1) LOC_Os01g01010.1 : PS00197 2FE2S_FER_1 2Fe-2S ferredoxin-type iron-sulfur binding region signature. 98 - 106 CGAGACGAC L=(-1) 480 - 488 CAAGACAAC L=(-1) 771 - 779 CTTGGCTGC L=(-1) 976 - 984 CAAGTCAAC L=(-1) 2314 - 2322 CAAGACATC L=(-1) 2390 - 2398 CGTAGCAGC L=(-1) LOC_Os01g01010.1 : PS00227 TUBULIN Tubulin subunits alpha, beta, and gamma signature. 890 - 896 AGGTGAG L=(-1) LOC_Os01g01010.1 : PS01177 ANAPHYLATOXIN_1 Anaphylatoxin domain signature. 226 - 257 CCgaAtaagagaaGCAggc......AggCagacaaaCC L=(-1) 264 - 296 CCaaGgagtcctcGCTgagg.....AagCtttggatCC L=(-1) 362 - 396 CCtaGgtcgcat.GCAtcatcaga.TttCaatctc.CC L=(-1) LOC_Os01g01010.1 : PS01185 CTCK_1 C-terminal cystine knot signature. 536 - 572 CCgtgcgggcggcgcCatGgccaacctccagCgCgg..C L=(-1) LOC_Os01g01010.1 : PS01208 VWFC_1 VWFC domain signature. 557 - 614 CaacCTCcagcgcggcgttggCtcc.CtcgtccgtgaCattggcgacccctg..CCtcaaC L=(-1) 578 - 623 CtccCTCgtccgtgacattggCgaccCctgc......Ctcaacccat.......CCcc..C L=(-1) LOC_Os01g01010.1 : PS50842 EXPANSIN_EG45 Expansin, family-45 endoglucanase-like domain profile. 1624 - 1711 GGACACTGcaccgAATTGTGGTTGATGTGGTTAGAACGGATAGTCAtcttgATTTCTATg L=-1 LOC_Os01g01010.2 : PS00022 EGF_1 EGF-like domain signature 1. 298 - 309 CtCccTtcGTtC L=(-1) 646 - 657 CaCtaTtcGAgC L=(-1) 811 - 822 CgCtgTtgGAtC L=(-1) 940 - 951 CcCcgGtgGTgC L=(-1) LOC_Os01g01010.2 : PS00099 THIOLASE_3 Thiolases active site. 140 - 153 GACTACCGaAtAaG L=(-1) 2188 - 2201 GAAAACAAgAgAcG L=(-1) LOC_Os01g01010.2 : PS00197 2FE2S_FER_1 2Fe-2S ferredoxin-type iron-sulfur binding region signature. 17 - 25 CGAGACGAC L=(-1) 386 - 394 CAAGACAAC L=(-1) 677 - 685 CTTGGCTGC L=(-1) 882 - 890 CAAGTCAAC L=(-1) LOC_Os01g01010.2 : PS00227 TUBULIN Tubulin subunits alpha, beta, and gamma signature. 796 - 802 AGGTGAG L=(-1) LOC_Os01g01010.2 : PS01177 ANAPHYLATOXIN_1 Anaphylatoxin domain signature. 145 - 176 CCgaAtaagagaaGCAggc......AggCagacaaaCC L=(-1) 183 - 215 CCaaGgagtcctcGCTgagg.....AagCtttggatCC L=(-1) LOC_Os01g01010.2 : PS01185 CTCK_1 C-terminal cystine knot signature. 442 - 478 CCgtgcgggcggcgcCatGgccaacctccagCgCgg..C L=(-1) LOC_Os01g01010.2 : PS01208 VWFC_1 VWFC domain signature. 463 - 520 CaacCTCcagcgcggcgttggCtcc.CtcgtccgtgaCattggcgacccctg..CCtcaaC L=(-1) 484 - 529 CtccCTCgtccgtgacattggCgaccCctgc......Ctcaacccat.......CCcc..C L=(-1) #### #!/usr/local/bin/perl use strict; use warnings; open (FILE, "<:utf8", "outputps_scan_chr1_.out"); my @lines = ; my @uniq = (); my @waste = (); my %seen = (); foreach my $line (@lines) { my $pat = $line =~ m/^LOC_Os0[1-7]g[0-9]*.[0-9]\s/; if (!$seen{$pat}++) { push (@uniq, $line); my $new_uniq++; } else { push (@wastee, $line); } open (MYFILE, ">:utf8", "data.txt"); print MYFILE @uniq; open (WASTE, ">:utf8", "waste.txt"); print WASTE @waste; } close (MYFILE); close (WASTE); close (FILE);