#!/usr/bin/env python import pprint import random import sys # Set up your needle, haystack and alphabet size haystack = "00001110100111001011101101110001111010111100111110111111100000001000100100001010010101000001101000110010011000101101010110011011000000" hay_len = len(haystack) ruler = "" cases = [ { 'NDL': "100001010010101000001101", 'syms':3, 'sz':8 }, { 'NDL': "100001010010101000001101", 'syms':4, 'sz':6 }, { 'NDL': "100001010010101000001101", 'syms':6, 'sz':4 }, { 'NDL': "100001010010101000001101", 'syms':8, 'sz':3 }, { 'NDL': "000010100101010000011010", 'syms':3, 'sz':8 }, { 'NDL': "000010100101010000011010", 'syms':4, 'sz':6 }, { 'NDL': "000010100101010000011010", 'syms':6, 'sz':4 }, { 'NDL': "000010100101010000011010", 'syms':8, 'sz':3 }, { 'NDL': "1000010100101010000011010", 'syms':5, 'sz':5 } ] case = 0 try: case = int(sys.argv[1]) except: pass needle = cases[case]['NDL'] num_syms = cases[case]['syms'] symbol_size = cases[case]['sz'] def main(): global ruler print "\n{0}\nHaystack: {1}\n Needle: {2}\n".format(syms(70, '-'), haystack, needle) # Use brute force to find all matches positions = brute_force(needle, haystack) if len(positions)>0: ruler = "".join([ "V" if x in positions else " " for x in range(0, len(haystack)) ]) print "Brute force:\n{0}\n{1}".format(ruler, haystack) print "{0}{1}\n".format(spaces(positions[0]), needle) else: print "Brute force: not found" if not find_needle(haystack, needle, num_syms, symbol_size): print "**** Case failed! ****" def find_needle(haystack, needle, num_syms, symbol_size): sym_marker = syms(symbol_size, '^') needle_syms = [ needle[i*symbol_size:(i+1)*symbol_size] for i in range(0, num_syms)] max_shift = (num_syms-1)*symbol_size print "Modified BM search: {0} syms of {1} bits: {2}".format( num_syms, symbol_size, ", ".join(needle_syms)) # Compute delta table dd1 = { } for i in reversed(range(0, (1<0: bitofs = tmp[-1] shift = max_shift - bitofs ttt = ", ".join(map(str,tmp)) dd1[sym] = shift #bitofs # Uncomment to see the delta table #print pprint.pprint(dd1) # Search loop print "Searching for a needle in the haystack" needle_ptr = 0 while needle_ptr + len(needle) <= len(haystack): cmp_ptr = needle_ptr + max_shift found = haystack[cmp_ptr:cmp_ptr+symbol_size] print "\n{0}\n{1}".format(ruler, haystack) print "{0}{2} (pos={1})".format(spaces(cmp_ptr), cmp_ptr, sym_marker) print "{0}{1}".format(spaces(cmp_ptr), found) print "{0}{1}".format(spaces(needle_ptr), needle) if found in dd1: if dd1[found] == 0: fl_match = True for t in range(0, num_syms): if needle_syms[t] != haystack[needle_ptr + t*symbol_size:needle_ptr + (t+1)*symbol_size]: fl_match = False if fl_match: print "MATCH FOUND!" print "\n{0}\n{1}\n{2}{3}\n".format(haystack,ruler,spaces(needle_ptr),needle) return True else: print "Byte matched, string mismatch, trying next position" needle_ptr += 1 else: print "found {0}, delta=>{1}".format(found, dd1[found]) needle_ptr += dd1[found] else: print "not found, big shift" needle_ptr += len(needle)-symbol_size return False def to_binstr(n,len): bv = [ "1" if n & (1<## #!/usr/bin/env python import random import pprint def to_binstr(n,len): bv = [ "1" if n & (1<0: bitofs = tmp[-1] delta[i] = bitofs dd1[sym] = bitofs print "NEEDLE: ", needle needle_syms = [ needle[i*symbol_size:(i+1)*symbol_size] for i in range(0, num_syms)] # Search loop ptr = len(needle) - symbol_size while ptr < len(haystack): print haystack print ruler print "{0}^ (pos={1})".format(spaces(ptr-1), ptr) found = haystack[ptr - symbol_size:ptr] print "{0}{1}".format(spaces(ptr-symbol_size), found) print "{0}{1}".format(spaces(ptr-len(needle)), needle) if found in dd1: print "found {0}, delta=>{1}".format(found, dd1[found]) if dd1[found] == (num_syms-1)*symbol_size: print "MATCHED!" break elif dd1[found]>0: print "shifting" ptr += dd1[found] else: print "WTF?" break else: print "not found, big shift" ptr += len(needle)-symbol_size #### $ python boyer_moore_bitstr.py 00001110100111001011101101110001111010111100111110111111100000001000100100001010010101000001101000110010011000101101010110011011000000 ^ 100001010010101000001101 NEEDLE: 100001010010101000001101 00001110100111001011101101110001111010111100111110111111100000001000100100001010010101000001101000110010011000101101010110011011000000 ^ ^ (pos=16) 10011100 100001010010101000001101 not found, big shift 00001110100111001011101101110001111010111100111110111111100000001000100100001010010101000001101000110010011000101101010110011011000000 ^ ^ (pos=32) 01110001 100001010010101000001101 not found, big shift 00001110100111001011101101110001111010111100111110111111100000001000100100001010010101000001101000110010011000101101010110011011000000 ^ ^ (pos=48) 11001111 100001010010101000001101 not found, big shift 00001110100111001011101101110001111010111100111110111111100000001000100100001010010101000001101000110010011000101101010110011011000000 ^ ^ (pos=64) 10000000 100001010010101000001101 not found, big shift 00001110100111001011101101110001111010111100111110111111100000001000100100001010010101000001101000110010011000101101010110011011000000 ^ ^ (pos=80) 00001010 100001010010101000001101 found 00001010, delta=>1 shifting 00001110100111001011101101110001111010111100111110111111100000001000100100001010010101000001101000110010011000101101010110011011000000 ^ ^ (pos=81) 00010100 100001010010101000001101 found 00010100, delta=>2 shifting 00001110100111001011101101110001111010111100111110111111100000001000100100001010010101000001101000110010011000101101010110011011000000 ^ ^ (pos=83) 01010010 100001010010101000001101 found 01010010, delta=>4 shifting 00001110100111001011101101110001111010111100111110111111100000001000100100001010010101000001101000110010011000101101010110011011000000 ^ ^ (pos=87) 00101010 100001010010101000001101 found 00101010, delta=>8 shifting 00001110100111001011101101110001111010111100111110111111100000001000100100001010010101000001101000110010011000101101010110011011000000 ^ ^ (pos=95) 00001101 100001010010101000001101 found 00001101, delta=>16 MATCHED! Roboticus@Waubli ~ $