c:\@Work\Perl\monks>perl -wMstrict -le
"use Data::Dump qw(dd);
;;
my $seq = 'xABCxABCxxxWXYxWXZxxxABCxxWXYx';
;;
my $subseq = qr{ ABC \w* (?: WXY | WXZ) }xms;
;;
my @all = find_all($seq, $subseq);
dd \@all;
;;
;;
sub find_all {
my ($seq, $regex) = @_;
;;
local our @hits;
use re 'eval';
$seq =~ m{
($regex) (?{ push @hits, [ $^N, $-[1] ] }) (?!)
}xmsg;
;;
return @hits;
}
"
[
["ABCxABCxxxWXYxWXZxxxABCxxWXY", 1],
["ABCxABCxxxWXYxWXZ", 1],
["ABCxABCxxxWXY", 1],
["ABCxxxWXYxWXZxxxABCxxWXY", 5],
["ABCxxxWXYxWXZ", 5],
["ABCxxxWXY", 5],
["ABCxxWXY", 21],
]
(I'm just using ...ABCxxWXY... to make the permutations and overlaps clear.) (Update: The number that is the second item in each array reference returned is the base-0 offset of the start of the matching subsequence.)
Update: Using your original sequence:
c:\@Work\Perl\monks>perl -wMstrict -le
"use Data::Dump qw(dd);
;;
my $seq = 'AATGGTTTCTCCCATCTCTCCATCGGCATAAAAATACAGAATGATCTAA';
;;
my $subseq = qr{ ATG \w* (?: TAG | TAA | TGA) }xms;
;;
my @all = find_all($seq, $subseq);
dd \@all;
;;
;;
sub find_all {
my ($seq, $regex) = @_;
;;
local our @hits;
use re 'eval';
$seq =~ m{
($regex) (?{ push @hits, [ $^N, $-[1] ] }) (?!)
}xmsg;
;;
return @hits;
}
"
[
["ATGGTTTCTCCCATCTCTCCATCGGCATAAAAATACAGAATGATCTAA", 1],
["ATGGTTTCTCCCATCTCTCCATCGGCATAAAAATACAGAATGA", 1],
["ATGGTTTCTCCCATCTCTCCATCGGCATAA", 1],
["ATGATCTAA", 40],
]
This works with Perl 5.8+ regexes. What version of Perl are you using — it might make a difference in future?
Update 2: Remembering that DNA sequences may sometimes be loooong, it may be advantageous to pass the sequence by reference. Note that both the function and the function invocation must change.
c:\@Work\Perl\monks>perl -wMstrict -le
"use Data::Dump qw(dd);
;;
my $seq = 'AATGGTTTCTCCCATCTCTCCATCGGCATAAAAATACAGAATGATCTAA';
;;
my $subseq = qr{ ATG \w* (?: TAG | TAA | TGA) }xms;
;;
my @all = find_all(\$seq, $subseq);
dd \@all;
;;
;;
sub find_all {
my ($sr_seq, $regex) = @_;
;;
local our @hits;
use re 'eval';
$$sr_seq =~ m{
($regex) (?{ push @hits, [ $^N, $-[1] ] }) (?!)
}xmsg;
;;
return @hits;
}
"
[
["ATGGTTTCTCCCATCTCTCCATCGGCATAAAAATACAGAATGATCTAA", 1],
["ATGGTTTCTCCCATCTCTCCATCGGCATAAAAATACAGAATGA", 1],
["ATGGTTTCTCCCATCTCTCCATCGGCATAA", 1],
["ATGATCTAA", 40],
]
Still runs under Perl 5.8.
Give a man a fish: <%-{-{-{-<
|