#! perl -slw use strict; $|=1; my $BSIZE ||= 1_000_000; open my $fh, '+<:raw', $ARGV[ 0 ] or die "$ARGV[ 0 ] : $!"; my $pattern = $ARGV[ 1 ] or die "no pattern supplied"; my $overlap = int( ( length( $pattern ) + 1 ) / 8 ); my $buffer = ''; my $buffs = 0; my $found = 0; while( sysread( $fh, $buffer, $BSIZE, length $buffer ) ) { ## Convert the buffer to asciiized bits; my $bits = unpack 'B*', $buffer; printf "\r$buffs: [$found] "; ## Search for the pattern my $p = 0; while( $p = 1 + index( $bits, $pattern, $p ) ) { ## And record the hits $found++; ## Calculate byte/bit offsets # my $byte = ( $buffs * $BSIZE ) # - ( $overap * $buffs ) # + int( ( $p - 1 ) / 8 ); # my $bit = ( $p - 1 ) % 8; # printf "\rFound it at byte: $byte bit: $bit '%s'", # substr( $bits, $p-1, length( $pattern ) );; } ## Keep track of the number of buffers process $buffs++; ## Move enough bytes to the front of the buffer ## to ensure overlap. $buffer = substr( $buffer, -$overlap ); } print "Found $found occurances of '$pattern'"; __END__ [16:40:12.64] P:\test>429065 data\100millionbytes.dat 1111111111 100: [0] Found 0 occurances of '1111111111' [16:40:46.04] P:\test> [16:41:37.46] P:\test>429065 data\100millionbytes.dat 1100000011 100: [24999982] Found 24999990 occurances of '1100000011' [16:41:56.28] P:\test> [16:42:03.65] P:\test>429065 data\1000millionbytes.dat 1100000011 1000: [249999876] Found 249999900 occurances of '1100000011' [16:45:24.09] P:\test>