#!/usr/bin/perl use warnings; use strict; use feature qw{ say }; my $small_str = 'AAATTGGTGTATATGAAAGACCTCGACGCTATTTAGAAAGAGAGAGCAATATT +TCAAGAAT' . 'GCATGCGTCAATTTTACGCAGACTATCTTTCTAGGGTTAAATATACTGACAGTGTGCAGTGAC +TCACAAAA' . 'GATGATTA'; my $reference_str = 'ACAATGAGATCACATGGACACAGGAAGGGGAATATCACACTCTGGGGAC +TGTGGTGG' . 'GGTCGGGGGAGGGGGGAGGGATAGCATTGGGAGATATACCTAATGCTAGATGACGTCCATACT +GAGAATCA' . 'TGTTAACATTAGTGGGTGCAGCGCACAAGCATGGCACATGTATACATATGTAACTAACCTGCA +CAATGTGC' . 'ACATGTACCCTAAAACTTAGAGTATAATAAAAAAAAAAAAAAAAAAAAAAAAAAACACATTAA +AAAAAAAA' . 'AAAACAACAAAACAAAGCAAACATGGAAATGTTTGTTATTTTAATTGTTATGATGGTTTCATG +GCTGTTTG' . 'CATGTGTCAAAACTCATCAAATTTGTGTACGTTAAATATGTGAAACTTATTGTATGCTGGTTA +CACCTCAA' . 'TAAAGCTGTTAAATTTAAAAAAAAAAAAAAAAAAAAAAATCACCAATAGTTGCTGCTAGAAAT +CCAGTGTC' . 'ACAAAAGGCCAAAGTTTATTGACAAATTGGTGTATATGAAAGACCTCGACGCTATTTAGAAAG +AGAGAGCA' . 'ATATTTCAAGAATGCATGCGTCAATTTTACGCAGACTATCTTTCTAGGGTTAATCTAGCTGCA +TCAGGATC' . 'ATATCGTCGGGTCTTTTTTCCGGCTCAGTCATCGCCCAAGCTGGCGCTATCTGGGCATCGGGG +AGGAAGAA' . 'GCCCGTGCCTTTTCCCGCGAGGTTGAAGCGGCATGGAAAGAGTTTGCCGAGGATGACTGCTGC +TGCATTGA' . 'CGTTGAGCGAAAACGCACGTTTACCATGATGATTCGGGAAGGTGTGGCCATGCACGCCTTTAA +CGGTGAAC' . 'TGTTCGTTCAGGCCACCTGGGATACCAGTTCGTCGCGGCTTTTCCGGACACAGTTCCGGATGG +TCAGCCCG' . 'AAGCGCATCAGCAACCCGAACAATACCGGCGACAGCCGGAACTGCCGTGCCGGTGTGCAGATT +AATGACAG' . 'CGGTGCGGCGCTGGGATATTACGTCAGCGAGGACGGGTATCCTGGCTGGATGCCGCAGAAATG +GACATGGA' . 'TACCCCGTGAGTTACCCGGCGGGCGCGCTTGGCGTAATCATGGTCATAGCTGTTTCCTGTGTG +AAATTGTT' . 'ATCCGCTCACAATTCCACACAACATACGAGCCGGAAGCATAAAGTGTAAAGCCTGGGGTGCCT +AATGAGTG' . 'AGCTAACTCACATTAATTGCGTTGCGCTCACTGCCCGCTTTCCAGTCGGGAAACCTGTCGTGC +CAGCTGCA' . 'TTAATGAATCGGCCAACGCGCGGGGAGAGGCGGTTTGCGTATTGGGCGCTCTTCCGCTTCCTC +GCTCACTG' . 'ACTCGCTGCGCTCGGTCGTTCGGCTGCGGCGAGCGGTATCAGCTCACTCAAAGGCGGTAATAC +GGTTATCC' . 'ACAGAATCAGGGGATAACGCAGGAAAGAACATGTGAGCAAAAGGCCAGCAAAAGGCCAGGAAC +CGTAAAAA' . 'GGCCGCGTTGCTGGCGTTTTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACG +CTCAAGTC' . 'AGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCTCCCTCG +TGCGCTCT' . 'CCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCG +CTTTCTCA' . 'TAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTGTGCA +CGAACCCC' . 'CCGTTCAGCCCGACCGCTGCGCCTTATCCGGTAACTATCGTCTTGAGTCCAACCCGGTAAGAC +ACGACTTA' . 'TCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACA +GAGTTCTT' . 'GAAGTGGTGGCCTAACTACGGCTACACTAGAAGGACAGTATTTGGTATCTGCGCTCTGCTGAA +GCCAGTTA' . 'CCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTT +TTTTTGTT' . 'TGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGAAGATCCTTTGATCTTTTCTACG +GGGTCTGA' . 'CGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTT +CACCTAGA' . 'TCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTG +ACAGTTAC' . 'CAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCC +TGACTCCC' . 'CGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACC +GCGAGACC' . 'CACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAA +GTGGTCCT' . 'GCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCG +CCAGTTAA' . 'TAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTAT +GGCTTCAT' . 'TCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGG +TTAGCTCC' . 'TTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTATCACTCATGGTTATGGCA +GCACTGCA' . 'TAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTGGTGAGTACTCAACCAA +GTCATTCT' . 'GAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAATACGGGATAATACCGCGC +CACATAGC' . 'AGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTA +CCGCTGTT' . 'GAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCAC +CAGCGTTT' . 'CTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAAAAGGGCGACACGGAAATG +TTGAATAC' . 'TCAT'; for my $start_pos (0 .. length($reference_str) - 11) { my $xor = $small_str ^ substr $reference_str, $start_pos, length($ +reference_str) - 1; for my $inner_pos (0 .. length($xor) - 10) { if (9 <= substr($xor, $inner_pos, 10) =~ tr/\0//) { say $start_pos + $inner_pos; say substr $small_str, $inner_pos, 10; say substr $reference_str, $start_pos + $inner_pos, 10; } } }
TODO: Handle matches where the two strings don't fully overlap.
Update Probably like this? It wraps the longer string in characters that can never match, so some simple math is needed to get the original position back:
my $stretch = 10; my $mismatches = 1; substr $reference_str, 0, 0, 'x' x (length($small_str) - 1); $reference_str .= 'x' x (length($small_str) - 1); for my $start_pos (0 .. length($reference_str) - 1 - $stretch) { my $substr = substr $reference_str, $start_pos, length($small_str) + - 1; my $xor = $small_str ^ $substr; for my $inner_pos (0 .. length($xor) - $stretch) { if ($stretch - $mismatches <= substr($xor, $inner_pos, $stretc +h) =~ tr/\0//) { my $match = substr $reference_str, $start_pos + $inner_pos +, $stretch; next unless -1 == index $match, 'x'; say $start_pos + $inner_pos + 1 - length $small_str; say substr $small_str, $inner_pos, $stretch; say $match; } } }
($q=q:Sq=~/;[c](.)(.)/;chr(-||-|5+lengthSq)`"S|oS2"`map{chr |+ord }map{substrSq`S_+|`|}3E|-|`7**2-3:)=~y+S|`+$1,++print+eval$q,q,a,
In reply to Re: How do you match a stretch of at least N characters
by choroba
in thread How do you match a stretch of at least N characters
by Anonymous Monk
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |