#!/usr/bin/env perl # # search a large file for lines containing a regex # use strict; use warnings; use Data::Dump 'pp'; my @rexlist; my $cnt=0; while () { next if /^\s*($|#)/; s/\s+$//; my ($name, $rex) = split /:/, $_; my $regex = qr/$rex/; ++$cnt; open my $FH, '>', "FILESRCH.$cnt" or die $!; push @rexlist, [ $regex, $name, $FH ]; } open my $IFH, '<', "a_big_file" or die "$!"; $cnt =0; my %cnts; my $lines=0; my $start = time; while (my $line = <$IFH>) { ++$cnt; ++$lines; if ($lines % 100000 == 0) { my $secs = time - $start; print "$lines: $secs s\n"; } #last if $cnt>50; #print "$.: $line"; my $matches = 0; for my $r (@rexlist) { my ($rex, $name, $OFH) = @$r; if ($line =~ $rex) { print $OFH $line; #print "match $matches ($name)\n"; ++$cnts{$name}; } ++$matches; } #print "\n"; } print pp(\%cnts); __DATA__ aNumber:'\d+' CorporateRecord:'CORPORATE' null:NULL oldRec:'200[0-3]-\d\d-\d\d newRec:'20?[4-9]-\d\d-\d\d newRec2: '201\d-\d\d-\d\d