in reply to Re: [OT] The interesting problem of comparing (long) bit-strings.
in thread [OT] The interesting problem of comparing bit-strings.
Alright. Here's a demo of alpha skip search on bitstrings. The needle size is fixed, but it should be trivial to follow this with a longer compare. A possibility exists that some off-by-one errors have crept in. If that's the case, just deduct from my paycheck. :-)
Alpha skip is quadratic in the worst case and probably not the best choice when nasty matches are expected. Anyway, I'm curious how this might hold up against the optimized brute-force routines.
// Feed this with random needle and haystack: // dd if=/dev/urandom count=1 bs=77777 | ./a.out // // Fixed size needle. No optimizations attempted. #include <stdio.h> #include <stdint.h> #include <string.h> #include <unistd.h> enum { M = 8192, logM = 13, Mmask = M - 1, Mskip = M - logM + 1, Mnil }; typedef uint32_t U32; typedef uint16_t U16; typedef struct { U16 head[M], next[M]; // index chains } skiptab; // extract logM bits at bit offset i. assert(logM <= 16) static inline size_t fbits(char *v, size_t i) { U32 *x = (U32 *)&v[2 * (i >> 4)]; return Mmask & (*x >> (i & 15)); } // 'tis rotten. should make shifted copies of x and memcmp the middle. static size_t xcmp(char *y, size_t j, char *x) { size_t i, diff = fbits(y, j) - fbits(x, 0); for (i = M % logM; i < M && !diff; i += logM) { diff = fbits(y, i + j) - fbits(x, i); } return diff; } static void prep(char *x, skiptab *Z) { size_t i, f; for (i = 0; i < M; i++) { Z->head[i] = Mnil; } for (i = 0; i < Mskip; i++) { f = fbits(x, i); Z->next[i] = Z->head[f]; Z->head[f] = i; } } // y of length n is haystack; x is the needle size_t search(char *y, size_t n, char *x, size_t m) { skiptab Z[1]; size_t i, j; // assert(m == M); prep(x, Z); n -= logM; for (i = -1; (i += Mskip) <= n;) { j = fbits(y, i); for (j = Z->head[j]; j != Mnil; j = Z->next[j]) { if (!xcmp(y, i-j, x)) return i-j; } } return -1; } // one-bit rotate void rot1(char *v, size_t n) { size_t i, c; for (c = i = 0; i < n; c >>= 8, i++) { v[i] = c += *(unsigned char *)(v+i) << 1; } v[0] += c; } int main(int argc, char *argv[]) { static char x[M/8], y[10*M]; size_t r, i, n, m = sizeof x; if (read(0, x, sizeof x) != m) return printf("read x"), 1; n = read(0, y, sizeof y); if (n < m + 10) return printf("read y"), 1; r = y[n -= 1] & 7; i = *(U32*)&y[n -= 4]; i = i % (n - m - 1); printf("rot %zu at %zu; bitoff=%zu\n", r, i, r + i*8); memcpy(y + i, x, m); // stuff the needle for (; r; r--) rot1(y + i, m + 1); // and rotate printf("search=%ld\n", (long)search(y, n*8, x, m*8)); return 0; }
Note: to change the needle size, just edit the M and logM values. (Compile with -fno-strict-aliasing, or fix the type-punning).
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^3: [OT] The interesting problem of comparing (long) bit-strings. (Doesn't work!)
by BrowserUk (Patriarch) on Mar 29, 2015 at 16:29 UTC | |
|
Re^3: [OT] The interesting problem of comparing (long) bit-strings.
by BrowserUk (Patriarch) on Mar 29, 2015 at 09:38 UTC | |
by oiskuu (Hermit) on Mar 29, 2015 at 10:32 UTC |