use strict; use warnings; my $idsfile = "sample_IDs.txt"; my $seqfile = "sample_reads.fasta"; my %ids = (); open FILE, $idsfile; while() { chomp; $ids{$_} += 1; } close FILE; local $/ = "\n>"; # read by FASTA record open FASTA, $seqfile; while () { chomp; my $seq = $_; my ($id) = $seq =~ /^>*(\S+)/; # parse ID as first word in FASTA header if (exists($ids{$id})) { $seq =~ s/^>*.+\n//; # remove FASTA header $seq =~ s/\n//g; # remove endlines print "$seq\n"; } } close FASTA; #### sample_ID.txt >comp10000_c0_seq1 15 >comp10001_c0_seq1 79 >comp10002_c0_seq1 7521 >comp10003_c0_seq1 41 >comp10004_c0_seq1 25 >comp10005_c0_seq1 96 >comp10006_c0_seq1 84 >comp10007_c0_seq1 63 >comp10008_c0_seq1 19 >comp1000_c0_seq1 35 >comp10010_c0_seq1 44 >comp10011_c0_seq1 32 >comp10012_c0_seq1 451 >comp10014_c0_seq1 6845 >comp10015_c0_seq1 521 >comp10016_c0_seq1 254 >comp10017_c0_seq1 51 >comp10018_c0_seq1 4512 >comp10019_c0_seq1 64 #### sample_reads.fasta >comp10000_c0_seq0 len=159 path=[12:0-66 885:67-79 1106:80-158] GGTTAATATTCCCGAGCCACGAGATTGGAGGGACGGCGTCCAGAGCACCTGCGGACTGAT AGAATAGCCCGTTGGAGGTACTGAGTTGGAGAGGATTAAAAGACTCTCATAATACAAGGC CCGTACTAAGCCCACTTACGATGGAATAGATGGTAGGAA >comp10001_c0_seq2 len=100 path=[1:0-60 177:61-65 600:66-99] ATCCCGCACGATTCCTGGAAACACTATCTCACCCCCAAAAGTGAAGAACCGTATAGAAAC TCGAATACCTTGGTTTCGAGTACATCTTGTGCTCTTGAAT >comp10002_c0_seq3 len=99 path=[2446:0-34 1163:35-98] TTTTTTGTGATATATTAAATAATATATAAAAATACTATGGCAGGAAGTTTAAATAAAGTC TTATTAATAGGCCGTTTAGGCGCAGACCCAGATATAAAA

>comp10003_c0_seq1 len=166 path=[748:0-22 1004:23-46 2527:47-165] AAGTAGCCTATGCGCTACAGTAAGAAAGACAGGTGAAAAAATGGAAGTAAAACAATTAGA TGACTACTTTGGATATACAGAAAAGGGCAGTTCCTTAGAGGGGGAATTACGAGCAGGACT AACGACATTCTTGACAATGGCGTACATTCTGTTTGTGAACCCAGAC >comp10004_c0_seq1 len=143 path=[2167:0-44 2322:45-68 2508:69-142] AATCTTTAATTTAAACTTAAAAAAAATTAACTTTTGAAAGGAATTAAAATGGAAAAAGAA ATGTTAGTAGTAGCTAAATTAAAAGAAGGTACATTTGAAAAATTTATGGGTTTCATGCAA TCGCCTGAAGGTTTAGCAGAAAG >comp10005_c0_seq1 len=135 path=[2666:0-71 4268:72-134] AATATTACCAGAAGTTACAGGTGATGTGACTTATTTACATTGCTTCGGTGAGTGTTCAGG TGATGGTACAGGTGAATGCCCAAGTGGCGCTGTAACATGGATGCTTACAATGACTGTAAA TACTGCTAATATCAC >comp10006_c0_seq1 len=116 path=[4850:0-56 4046:57-115] ATATAAGGTAGAAAATTACATTAACTATCTTTATCTATTTTATCACATTAGATATACAAA CTTTTCCTTACAGAGAGGTCAAAAAATATGGAAGGAACTTTACTCTTAGCTAAATT >comp10007_c0_seq1 CCGGGTTAGTCCGAATTTATTTTTTATATAAGGTAGATTTACAAAGTTATATGATTTTTT TAATAGCCGCAATAGCACTCTTCATCTGGGTTGCATACACCGCAATGGAATTAAGAAAAG CAACGCT >comp10008_c0_seq1 len=298 path=[4908:0-189 5729:190-297] ATTTTGATTTCAGAAAGATGTGTAACTGATGACGTGTTTACTTCAGTTCATATTGAAGAA TATGAAATAATGGCAAGAGACACTAAACTAGGTGAAGAAGAAATTACCAGAGATATTCCT AATGTTAATGAAGAAGCTTTAAAAAATCTTGATGAATCAGGGATAGTTTACATTGGAGCT GAAGTAAACGCTGGTGATATTTTAGTTGGGAAAGTGACACCCAAAGGCGAGTCGCCAATG ACCCCAGAAGAAAAATTGCTTCGAGCAATTTTTGGAGAGAAAGCTTCTGATGTAAGGG >comp1000_c0_seq1 len=120 path=[5871:0-53 6857:54-119] ATTATAACAAGAGGAGTGCACATGAGAAAAATGATATCATTGATTTCTGCGGTGCTAGTA ATGTTTGCTTTTTCAAGCTCTATTACTTCAATCGCGAAATCAGCAGAGTTCTTTACAATA >comp10010_c0_seq1 len=108 path=[7438:0-30 7277:31-107] ATTCAGACTTTCTAACCAGACAGATGATCTAGATTGGATCGTTGGTCTTTTCTACCAAGA AAGAGAAGAAGGTTGGGATTTCGAAACGGAAACCCACAACTACAGAAA >comp10011_c0_seq1 len=105 path=[7198:0-63 8366:64-81 10234:82-104] ATTGTTAAGTTTGTGTTTTTATGAATTGAAAATAAGAACTACAAACACGCTATTGAAAAT TGTAAATATGTTTTCGGCTTTCATCGATTAATCAGAAGGTGATTG >comp10012_c0_seq1 len=134 path=[8117:0-30 7737:31-54 7822:55-133] ATATGTTTTTTTTATTAACACTTAAATTAACTAAATGGTACGATTCTTGAATAGATGATC GAAATAGTTGGAGTTTTGTAATGGATTTAGCCTCCCTAATTGGACTTGTTGGTGCAGTCG GTATGATTATAGCA >comp10014_c0_seq1en=175 path=[7957:0-85 8425:86-174] CTCGCGGTCTTTCAGAGGATGCTGAGAAATTCCTAGAGAAGGTTCGTGCGAATGGTGGCT CCAAGATGATTAAGACACACTGCCGAAGTATGTACGTATTACCAGAGATGGTCGGTACTA >comp10015_c0_seq1 len=122 path=[8708:0-78 9126:79-121] AAGTGAATGTTTTTTAATTAAAACTTAGGTGGGAACCTGTAAAAAGGCGCACCATCGACC GGTCATGGGCTTCGGTCCAAGATCTGAGTATGAGTTTTTATGCTGGGACCCGAAAGATGG TG >comp10016_c0_seq1 len=210 path=[9869:0-132 11973:133-209] TTGTAGTATTTATGAATAAAACTGACCAAGTTGATGATGAAGAACTTGTTGAGCTTGTTG AAATGGAGCTTCGCGACCTATTAAATGAATATGAATTCCCAGGTGATGATATTCCTATAG TTAAAGGTTCTGCACTTAAAGCTCTTGATAATGTTTCTGATGAAGCTGCTACTGCTTGTA TAGCTGAACTTATGGAAGCTTGTGATAGTT >comp10017_c0_seq0 len=103 path=[11024:0-19 11107:20-43

11572:44-48 11236:49-102] TTTTTTGGTCAACTAGGTGGCTGAGAATCATTATTCTATGGTTATAAGGCACCCTTATTG ACATTTAAACGTTGGAGGAAATCATAAATGCGTATTAAATCAA >comp10018_c0_seq2 len=77 path=[12879:0-24 13307:25-76] ATAAAACACAATACACCGCCCATCTTATGCGGTGTCTAAATATAACTTAACCCTATTAAG TTTCCTAATAAACTTAT >comp10019_c0_seq3 len=112 path=[14245:0-49 14086:50-111] ATTTAGGACAAGTCTCTGCTGCAATGGGAACAGGAGTTGATCCTGGAGCTGCAATTGGAA ATGTAGCAGGTGCTGCGATGGCAGCTGAGGGAGCAGGCGGATTAGCAAGTGC