use warnings; use strict; my $base = qr{ [ATCGatcg] }xms; my $sequence = qr{ \b $base+ \b }xms; print "sequence '$1' \n\n" if 'GATTACA' =~ m{ ($sequence) }xms; my $text = q{ A DNA sequence consists in ATCG base pairs. Here's a AATCCGCTGATT sequence. Such sequences act to define protein structure after mediation by RNA transcription. }; my $n_captured = my @captures = $text =~ m{ $sequence }xmsg; printf "captured %d sequences: %s \n\n", $n_captured, join ' ', map qq{'$_'}, @captures; my $codon = qr{ $base{3} }xms; my $codons = qr{ \b $codon+ \b }xms; $n_captured = @captures = $text =~ m{ $codons }xmsg; printf "captured %d codon sequences: %s \n\n", $n_captured, join ' ', map qq{'$_'}, @captures; #### c:\@Work\Perl\monks\undergradguy>perl extract_dna_seq_1.pl sequence 'GATTACA' captured 5 sequences: 'A' 'ATCG' 'a' 'AATCCGCTGATT' 'act' captured 2 codon sequences: 'AATCCGCTGATT' 'act'