use strict; use warnings; use autodie; $_ = do { local $/; }; while (/ Name: \s+ (.*?) $ # $ ends 1st capture at end-of-line .*? # (ignored) Nucleotide \s+ Sequence: \s+ (.*?) # 2nd capture group... (?: GeneID | \z) # terminated by 'GeneID' or # end-of-string /gmsx) { my ($name, $seq) = ($1, $2); chomp($name, $seq); print "\n$name\n\n$seq\n"; } __DATA__ GeneID: 1002 Name: cadherin 4, type 1, R-cadherin (retinal) Chromo: 20 Cytoband: 20q13.3 Nucleotide Sequence: atgaccgcgggcgccggcgtgctccttctgctgctctcgctctccggc acagcgagactggagatatcgtcacagtggcggctggcctggaccgagagaaagttcagcagtacacag cagcttgcgcatcctgtacctggaggccgggatgtatgacgtccccatcatcgtcacagactctggaaa GeneID: 10077 Name: tetraspanin 32 Chromo: 11 Cytoband: 11p15.5 Nucleotide Sequence: atggggccttggagtcgagtcagggttgccaaatgccagatgctggtc GeneID: 10078 Name: tumor suppressing subtransferable candidate 4 Chromo: 11 Cytoband: 11p15.5 Nucleotide Sequence: atggctgaggcaggaacaggtgagccgtcccccagcgtggagggcgaa