in reply to Re^9: Why Boyer-Moore, Horspool, alpha-skip et.al don't work for bit strings. (Thank you Anonymonk + others.)
in thread Why Boyer-Moore, Horspool, alpha-skip et.al don't work for bit strings. (And is there an alternative that does?)
They both have a loop dependency on the table element. If the table is not densely populated, it can be beneficial to split the loop increment to either side of the test-and-branch. A pos += max_shift on the branch-predicted path has no memory dependency, thus allowing the loop to run speculatively ahead several iterations.
That sounds interesting. I think I get what you mean by the third sentence; but the emboldened bit of the second sentence could do with a little fleshing out?
If it helps, here is the 16-bit table code. Perhaps you could indicate what you would modify? No need to test; just suggest :)
static U64 count = 0; // just for gathering statistics. #define SL( v, o ) __shiftleft128( *((v)+1), *((v)), o ) void scan16( I64 *table, const U64 *pin, U8 opin, U64 lpin ) { U64 i; for( i = 0; i < 65536; ++i ) table[ i ] = lpin - 16 + 1; for( i = 0; i < lpin - 16 +1; ++i ) { U64 q = SL( pin+(i/64), (opin+i)%64 ); U16 idx = (q & 0xffff000000000000) >> 48ull; table[ idx ] = lpin - 16 - i; if( table[ idx ] < 1 ) table[ idx ] = 1; } } __forceinline U8 cmpBits4( const register U64 *hay, const register U64 *pin, U8 regi +ster ohay, U8 register opin, U64 lpin ) { U64 register i; for( i=0; i < lpin/64; ++i, ++hay, ++pin ) { ++count; if( SL( hay, ohay ) != SL( pin, opin ) ) return 0; } { U8 lastbits = lpin % 64; U64 mask = ( ( 1ull << ( lastbits ) ) -1 ) << ( 64 - lastbits +); ++count; if( ( SL( hay, ohay ) & mask ) != ( SL( pin, opin ) & mask ) ) + return 0; } return 1; } U64 bitstrstr4( const register U64 *hay, U64 *pin, U8 register ohay, U +8 register opin, U64 lhay, U64 lpin ) { U64 register i; I64 table[65536]; scan16( table, pin, opin, lpin ); count = 0; for( i = ohay; i < lhay+ohay-lpin; ) { if( cmpBits4( hay+(i/64), pin, i%64, opin, lpin ) ) return i; else { U16 idx = ( SL( hay+((i+lpin-16)/64), (i+lpin-16)%64 ) & 0 +xffff000000000000 ) >> 48; i += table[ idx ]; } } return -1; }
|
|---|