in reply to How do you match a stretch of at least N characters
#!/usr/bin/perl use warnings; use strict; use feature qw{ say }; my $small_str = 'AAATTGGTGTATATGAAAGACCTCGACGCTATTTAGAAAGAGAGAGCAATATT +TCAAGAAT' . 'GCATGCGTCAATTTTACGCAGACTATCTTTCTAGGGTTAAATATACTGACAGTGTGCAGTGAC +TCACAAAA' . 'GATGATTA'; my $reference_str = 'ACAATGAGATCACATGGACACAGGAAGGGGAATATCACACTCTGGGGAC +TGTGGTGG' . 'GGTCGGGGGAGGGGGGAGGGATAGCATTGGGAGATATACCTAATGCTAGATGACGTCCATACT +GAGAATCA' . 'TGTTAACATTAGTGGGTGCAGCGCACAAGCATGGCACATGTATACATATGTAACTAACCTGCA +CAATGTGC' . 'ACATGTACCCTAAAACTTAGAGTATAATAAAAAAAAAAAAAAAAAAAAAAAAAAACACATTAA +AAAAAAAA' . 'AAAACAACAAAACAAAGCAAACATGGAAATGTTTGTTATTTTAATTGTTATGATGGTTTCATG +GCTGTTTG' . 'CATGTGTCAAAACTCATCAAATTTGTGTACGTTAAATATGTGAAACTTATTGTATGCTGGTTA +CACCTCAA' . 'TAAAGCTGTTAAATTTAAAAAAAAAAAAAAAAAAAAAAATCACCAATAGTTGCTGCTAGAAAT +CCAGTGTC' . 'ACAAAAGGCCAAAGTTTATTGACAAATTGGTGTATATGAAAGACCTCGACGCTATTTAGAAAG +AGAGAGCA' . 'ATATTTCAAGAATGCATGCGTCAATTTTACGCAGACTATCTTTCTAGGGTTAATCTAGCTGCA +TCAGGATC' . 'ATATCGTCGGGTCTTTTTTCCGGCTCAGTCATCGCCCAAGCTGGCGCTATCTGGGCATCGGGG +AGGAAGAA' . 'GCCCGTGCCTTTTCCCGCGAGGTTGAAGCGGCATGGAAAGAGTTTGCCGAGGATGACTGCTGC +TGCATTGA' . 'CGTTGAGCGAAAACGCACGTTTACCATGATGATTCGGGAAGGTGTGGCCATGCACGCCTTTAA +CGGTGAAC' . 'TGTTCGTTCAGGCCACCTGGGATACCAGTTCGTCGCGGCTTTTCCGGACACAGTTCCGGATGG +TCAGCCCG' . 'AAGCGCATCAGCAACCCGAACAATACCGGCGACAGCCGGAACTGCCGTGCCGGTGTGCAGATT +AATGACAG' . 'CGGTGCGGCGCTGGGATATTACGTCAGCGAGGACGGGTATCCTGGCTGGATGCCGCAGAAATG +GACATGGA' . 'TACCCCGTGAGTTACCCGGCGGGCGCGCTTGGCGTAATCATGGTCATAGCTGTTTCCTGTGTG +AAATTGTT' . 'ATCCGCTCACAATTCCACACAACATACGAGCCGGAAGCATAAAGTGTAAAGCCTGGGGTGCCT +AATGAGTG' . 'AGCTAACTCACATTAATTGCGTTGCGCTCACTGCCCGCTTTCCAGTCGGGAAACCTGTCGTGC +CAGCTGCA' . 'TTAATGAATCGGCCAACGCGCGGGGAGAGGCGGTTTGCGTATTGGGCGCTCTTCCGCTTCCTC +GCTCACTG' . 'ACTCGCTGCGCTCGGTCGTTCGGCTGCGGCGAGCGGTATCAGCTCACTCAAAGGCGGTAATAC +GGTTATCC' . 'ACAGAATCAGGGGATAACGCAGGAAAGAACATGTGAGCAAAAGGCCAGCAAAAGGCCAGGAAC +CGTAAAAA' . 'GGCCGCGTTGCTGGCGTTTTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACG +CTCAAGTC' . 'AGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCTCCCTCG +TGCGCTCT' . 'CCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCG +CTTTCTCA' . 'TAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTGTGCA +CGAACCCC' . 'CCGTTCAGCCCGACCGCTGCGCCTTATCCGGTAACTATCGTCTTGAGTCCAACCCGGTAAGAC +ACGACTTA' . 'TCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACA +GAGTTCTT' . 'GAAGTGGTGGCCTAACTACGGCTACACTAGAAGGACAGTATTTGGTATCTGCGCTCTGCTGAA +GCCAGTTA' . 'CCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTT +TTTTTGTT' . 'TGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGAAGATCCTTTGATCTTTTCTACG +GGGTCTGA' . 'CGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTT +CACCTAGA' . 'TCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTG +ACAGTTAC' . 'CAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCC +TGACTCCC' . 'CGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACC +GCGAGACC' . 'CACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAA +GTGGTCCT' . 'GCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCG +CCAGTTAA' . 'TAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTAT +GGCTTCAT' . 'TCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGG +TTAGCTCC' . 'TTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTATCACTCATGGTTATGGCA +GCACTGCA' . 'TAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTGGTGAGTACTCAACCAA +GTCATTCT' . 'GAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAATACGGGATAATACCGCGC +CACATAGC' . 'AGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTA +CCGCTGTT' . 'GAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCAC +CAGCGTTT' . 'CTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAAAAGGGCGACACGGAAATG +TTGAATAC' . 'TCAT'; for my $start_pos (0 .. length($reference_str) - 11) { my $xor = $small_str ^ substr $reference_str, $start_pos, length($ +reference_str) - 1; for my $inner_pos (0 .. length($xor) - 10) { if (9 <= substr($xor, $inner_pos, 10) =~ tr/\0//) { say $start_pos + $inner_pos; say substr $small_str, $inner_pos, 10; say substr $reference_str, $start_pos + $inner_pos, 10; } } }
TODO: Handle matches where the two strings don't fully overlap.
Update Probably like this? It wraps the longer string in characters that can never match, so some simple math is needed to get the original position back:
my $stretch = 10; my $mismatches = 1; substr $reference_str, 0, 0, 'x' x (length($small_str) - 1); $reference_str .= 'x' x (length($small_str) - 1); for my $start_pos (0 .. length($reference_str) - 1 - $stretch) { my $substr = substr $reference_str, $start_pos, length($small_str) + - 1; my $xor = $small_str ^ $substr; for my $inner_pos (0 .. length($xor) - $stretch) { if ($stretch - $mismatches <= substr($xor, $inner_pos, $stretc +h) =~ tr/\0//) { my $match = substr $reference_str, $start_pos + $inner_pos +, $stretch; next unless -1 == index $match, 'x'; say $start_pos + $inner_pos + 1 - length $small_str; say substr $small_str, $inner_pos, $stretch; say $match; } } }
($q=q:Sq=~/;[c](.)(.)/;chr(-||-|5+lengthSq)`"S|oS2"`map{chr |+ord }map{substrSq`S_+|`|}3E|-|`7**2-3:)=~y+S|`+$1,++print+eval$q,q,a,
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: How do you match a stretch of at least N characters
by Anonymous Monk on Sep 11, 2017 at 14:39 UTC | |
by choroba (Cardinal) on Sep 11, 2017 at 14:46 UTC | |
by Anonymous Monk on Sep 11, 2017 at 14:52 UTC | |
by choroba (Cardinal) on Sep 11, 2017 at 17:04 UTC | |
by Anonymous Monk on Sep 11, 2017 at 17:31 UTC |