#! perl -slw use vars qw[$LEN $N $MAX]; use strict; ($LEN, $N, $MAX) = ($LEN||1000, $N||10, $MAX||20); sub rndStr{local $"=''; "@_[map{rand @_} 0 .. shift]"; } #!" sub findStuff (\$\$) { my ($href, $nref) = @_; my @needles; while ($$nref) { for my $start (0 .. length($$nref) - 1) { my $bit = substr $$nref, $start; next unless 1+index( $$href, $bit ); push @needles, $bit; $$nref = substr $$nref, 0, $start; last; } } $$href =~ s[($_)(?!\})][{$1}]g for sort{ length $b <=> length $a } @needles; $$href =~ s[ ({[^{}]*?) ({) ([^{}]*?) (}) ([^}]*?}) ][$1$3$5]gx; return $$href =~ m[(\G[^{]+{[^}]+})]sg;} print 'Results from sample data'; my $haystack = 'xxxxxxATGGAGyxxxTCGAzxxxxCGAATTTGAAxxwGAAT'; my $needle = 'ATGGAGTCGACGAATTTGAAGAAT'; my @matches = findStuff $haystack, $needle; m[(^.*?)({.*}$)] and printf "%*s was preceeded by %s\n", $MAX+4, $2, $1 for @matches; $haystack = rndStr $LEN, qw[A C G T]; my $p=0; my @needles = map{ my $n = substr($haystack , $p += 4 + rand( $LEN / $N ) , 4 + rand( do{ my $tmp = $LEN - $p; $tmp > $MAX ? $MAX - 4 : $tmp - 4 } ) ); # print $n,':',length $n; $n; } 1 .. $N; $needle = join '', @needles; print <<"EOS"; Results from test data of $N needles; length (4-$MAX) within a haystack of $LEN chars EOS @matches = findStuff $haystack, $needle; m[(^.*?)({.*}$)] and printf "%*s was preceeded by %-60.60s %s\n" , $MAX + 2 , $2 , $1 , length $1 > 60 ? '... ' . substr( $1, -10) : '' for @matches;