use strict;
use warnings;
# $leading_0s[$n] is a regex matching any byte that begins with at lea
+st $n '0' bits
my @leading_0s = (qr//, map {
my $max = chr(0xFF >> $_);
qr/[\0-$max]/;
} 1..8);
# $trailing_0s[$n] is a regex matching any byte that ends with at leas
+t $n '0' bits
my @trailing_0s = (qr//, map {
my $step = 1 << $_;
my @ok = map { chr($step * $_) } 0..0xFF >> $_;
local $" = q[];
qr/[\Q@ok\E]/
} 1..8);
# Test if a string contains $n consecutive '0' bits.
# If yes, returns the bit offset of the match.
# Otherwise, returns -1.
sub match_0s {
my $n = shift; # number of consecutive '0' bits to be matched
local $_ = shift || $_; # string to match against
for my $trailing (0..8) {
my $full = $n - $trailing >> 3;
my $leading = $n - $trailing & 7;
if (/$trailing_0s[$trailing]\0{$full}$leading_0s[$leading]/) {
return $-[0] << 3 | -$trailing & 7;
}
}
return -1;
}
$_ = "ABC`\0abc";
print match_0s(14), $/; # 27
print match_0s(15), $/; # -1
print match_0s(24, 0.0.0), $/; # 0
Tested, but not benchmarked. It may or may not be faster that the straightforward approach using unpack. To change endianness, just switch @leading_0s and @trailing_0s.
EDIT: now works even when the string is made entirely of null bytes.
EDIT: doesn't work for $n < 8. This case is left as an exercise to the reader. (: