// Feed this with random needle and haystack: // dd if=/dev/urandom count=1 bs=77777 | ./a.out // // Fixed size needle. No optimizations attempted. #include #include #include #include enum { M = 8192, logM = 13, Mmask = M - 1, Mskip = M - logM + 1, Mnil }; typedef uint32_t U32; typedef uint16_t U16; typedef struct { U16 head[M], next[M]; // index chains } skiptab; // extract logM bits at bit offset i. assert(logM <= 16) static inline size_t fbits(char *v, size_t i) { U32 *x = (U32 *)&v[2 * (i >> 4)]; return Mmask & (*x >> (i & 15)); } // 'tis rotten. should make shifted copies of x and memcmp the middle. static size_t xcmp(char *y, size_t j, char *x) { size_t i, diff = fbits(y, j) - fbits(x, 0); for (i = M % logM; i < M && !diff; i += logM) { diff = fbits(y, i + j) - fbits(x, i); } return diff; } static void prep(char *x, skiptab *Z) { size_t i, f; for (i = 0; i < M; i++) { Z->head[i] = Mnil; } for (i = 0; i < Mskip; i++) { f = fbits(x, i); Z->next[i] = Z->head[f]; Z->head[f] = i; } } // y of length n is haystack; x is the needle size_t search(char *y, size_t n, char *x, size_t m) { skiptab Z[1]; size_t i, j; // assert(m == M); prep(x, Z); n -= logM; for (i = -1; (i += Mskip) <= n;) { j = fbits(y, i); for (j = Z->head[j]; j != Mnil; j = Z->next[j]) { if (!xcmp(y, i-j, x)) return i-j; } } return -1; } // one-bit rotate void rot1(char *v, size_t n) { size_t i, c; for (c = i = 0; i < n; c >>= 8, i++) { v[i] = c += *(unsigned char *)(v+i) << 1; } v[0] += c; } int main(int argc, char *argv[]) { static char x[M/8], y[10*M]; size_t r, i, n, m = sizeof x; if (read(0, x, sizeof x) != m) return printf("read x"), 1; n = read(0, y, sizeof y); if (n < m + 10) return printf("read y"), 1; r = y[n -= 1] & 7; i = *(U32*)&y[n -= 4]; i = i % (n - m - 1); printf("rot %zu at %zu; bitoff=%zu\n", r, i, r + i*8); memcpy(y + i, x, m); // stuff the needle for (; r; r--) rot1(y + i, m + 1); // and rotate printf("search=%ld\n", (long)search(y, n*8, x, m*8)); return 0; }