in reply to Re^2: Fastest Search method for strings in large file
in thread Fastest Search method for strings in large file
Re: Fastest Search method for strings in large file modified to print whole "\n" dilimited records to stdout:
#! perl -slw use strict; use List::Util qw[ max ]; our $BUFSIZE ||= 2**16; my @needles = qw[ 12345 67890 ]; my $regex = '(?:' . join( '|', map quotemeta, @needles ) . ')'; my $maxLen = max map length, @needles; open FILE, '<', $ARGV[ 0 ] or die "$ARGV[ 0 ]: $!"; my( $toRead, $soFar, $offset ) = ( $BUFSIZE, 0, 0 ); while( my $read = sysread FILE, $_, $toRead, $offset ) { if( m[$regex] ) { while( m[^([^\n]*$regex[^\n]*$)]mg ) { print $1; } } $soFar += $read; my $len = length() - rindex $_, "\n"; substr $_, 0, $len, substr $_, -$len ; $offset = $len; $toRead = $BUFSIZE - $len; }
On my system, performance tails off sharply with BUFSIZEs above 2**16.
|
|---|