in reply to Re^2: Tips on how to perform this regex query
in thread Tips on how to perform this regex query

corrected and improved
#! perl -slw use strict; my $bigstring="MNRIYSLRYSAVARGFIAVSEFARKCVHKSVRRLCFPVLLLIPVLFSAGSLAGTV +NNELGYQLFRDFAENKGMFRPGATNIAIYNKQGEFVGTLDKAAMPDFSAVDSEIGVATLINPQYIASVK +HNGGYTNVSFGDGENRYNIVDRNNAPSLDFHAPRLDKLVTEVAPTAVTAQGAVAGAYLDKERYPVFYRL +GSGTQYIKDSNGQLTKMGGAYSWLTGGTVGSLSSYQNGEMISTSSGLVFDYKLNGAMPIYGEAGDSGSP +LFAFDTVQNKWVLVGVLTAGNGAGGRGNNWAVIPLDFIGQKFNEDNDAPVTFRTSEGGALEWSFNSSTG +AGALTQGTTTYAMHGQQGNDLNAGKNLIFQGQNGQINLKDSVSQGAGSLTFRDNYTVTTSNGSTWTGAG +IVVDNGVSVNWQVNGVKGDNLHKIGEGTLTVQGTGINEGGLKVGDGKVVLNQQADNKGQVQAFSSVNIA +SGRPTVVLTDERQVNPDTVSWGYRGGTLDVNGNSLTFHQLKAADYGAVLANNVDKRATITLDYALRADK +VALNGWSESGKGTAGNLYKYNNPYTNTTDYFILKQSTYGYFPTDQSSNATWEFVGHSQGDAQKLVADRF +NTAGYLFHGQLKGNLNVDNRLPEGVTGALVMDGAADISGTFTQENGRLTLQGHPVIHAYNTQSVADKLA +ASGDHSVLTQPTSFSQEDWENRSFTFDRLSLKNTDFGLGRNATLNTTIQADNSSVTLGDSRVFIDKNDG +QGTAFTLEEGTSVATKDADKSVFNGTVNLDNQSVLNINDIFNGGIQANNSTVNISSDSAVLGNSTLTST +ALNLNKGANALASQSFVSDGPVNISDATLSLNSRPDEVSHTLLPVYDYAGSWNLKGDDARLNVGPYSML +SGNINVQDKGTVTLGGEGELSPDLTLQNQMLYSLFNGYRNIWSGSLNAPDATVSMTDTQWSMNGNSTAG +NMKLNRTIVGFNGGTSPFTTLTTDNLDAVQSAFVMRTDLNKADKLVINKSATGHDNSIWVNFLKKPSNK +DTLDIPLVSAPEATADNLFRASTRVVGFSDVTPILSVRKEDGKKEWVLDGYQVARNDGQGKAAATFMHI +SYNNFITEVNNLNKRMGDLRDINGEAGTWVRLLNGSGSADGGFTDHYTLLQMGADRKHELGSMDLFTGV +MATYTDTDASADLYSGKTKSWGGGFYASGLFRSGAYFDVIAKYIHNENKYDLNFAGAGKQNFRSHSLYA +GAEVGYRYHLTDTTFVEPQAELVWGRLQGQTFNWNDSGMDVSMRRNSVNPLVGRTGVVSGKTFSGKDWS +LTARAGLHYEFDLTDSADVHLKDAAGEHQINGRKDSRMLYGVGLNARFGDNTRLGLEVERSAFGKYNTD +DAINANIRYSF"; my $smallstring="GTMARNDGQGKAAATFMHISYNNFITEVDNLNKRMGDLRDINGEAGTWVRLLN +GSGSADGGFTDHYTLLQMGADRKHELGSMDLFTGVMATYTDTDASADLYSGKTKSWGGGFYASGLFRSG +AYFDVIAKYIHNENKYDLNFAGAGKQNFRSHSLYAGAEVGYRYHLTDTTFVEPQAELVWGRLQGQTFNW +NDSGMDVSMRRNSVNPLVGRTGVVSGKTFSGKDWSLTARAGLHYEFDLTDSADVHLKDAAGEHQINGRK +DSRMLYGVGLNARFGDNTRLGLEVERSAFGKYNTDDAINANIRYSFLE"; my $lenBig = length $bigstring; my $lenSmall = length $smallstring; my $threshold = 0.9; for my $o ( 0 .. $lenBig - $threshold*$lenSmall ) { my $masked = substr( $bigstring, $o, $lenSmall ) ^ $smallstring; my $matched = $masked =~ tr[\0][]; if( ( $matched / $lenSmall ) > $threshold ) { $masked =~ tr[\1-\255][ ]; $masked =~ tr[\0][*]; printf "%.2f%% match at offset %u\n", $matched / $lenSmall * 1 +00, $o; print substr $bigstring, $o, $lenSmall; print $smallstring; print $masked; } }
98.05% match at offset 1071 YQVARNDGQGKAAATFMHISYNNFITEVNNLNKRMGDLRDINGEAGTWVRLLNGSGSADGGFTDHYTLLQ +MGADRKHELGSMDLFTGVMATYTDTDASADLYSGKTKSWGGGFYASGLFRSGAYFDVIAKYIHNENKYD +LNFAGAGKQNFRSHSLYAGAEVGYRYHLTDTTFVEPQAELVWGRLQGQTFNWNDSGMDVSMRRNSVNPL +VGRTGVVSGKTFSGKDWSLTARAGLHYEFDLTDSADVHLKDAAGEHQINGRKDSRMLYGVGLNARFGDN +TRLGLEVERSAFGKYNTDDAINANIRYSF GTMARNDGQGKAAATFMHISYNNFITEVDNLNKRMGDLRDINGEAGTWVRLLNGSGSADGGFTDHYTLLQ +MGADRKHELGSMDLFTGVMATYTDTDASADLYSGKTKSWGGGFYASGLFRSGAYFDVIAKYIHNENKYD +LNFAGAGKQNFRSHSLYAGAEVGYRYHLTDTTFVEPQAELVWGRLQGQTFNWNDSGMDVSMRRNSVNPL +VGRTGVVSGKTFSGKDWSLTARAGLHYEFDLTDSADVHLKDAAGEHQINGRKDSRMLYGVGLNARFGDN +TRLGLEVERSAFGKYNTDDAINANIRYSFLE ************************* ***************************************** +********************************************************************* +********************************************************************* +********************************************************************* +*****************************

Replies are listed 'Best First'.
Re^4: Tips on how to perform this regex query
by Anonymous Monk on Jan 12, 2014 at 00:34 UTC
    Better
    for my $o ( ($threshold-1)*$lenSmall .. $lenBig - $threshold*$lenSmall + ) {
    But substr needs to handle negative offsets then.

    Left as exercise!