2 files x 65_000 lines x 60 fields x 8 chars = ~60MB data
|-delimited ASCII
search query is field 1
data contains no 'escaped' |'s (e.g. \| or "xx|xx")
####
[mk_ecl_index]
#!/usr/bin/perl
use DB_File;
for my $filename (@ARGV)
{
my %ecl;
tie %ecl, DB_File, "$filename.db"
or die "Can't tie $filename.db: $!";
open ASCII, "<$filename";
while ()
{
chomp;
next unless m{^ ([^\|]*
\|
[^\|]*)
\|}x; # first two fields
# can't store refs in basic DB_File
# but data guaranteed not to contain \n, so... :-/
$ecl{$1} = ''
unless defined $ecl{$1};
$ecl{$1} .= $_ . "\n";
}
close ASCII;
untie %ecl;
}
[grep_ecl]
#!/usr/bin/perl
# n.b. args opposite of Unix grep; filename, query, q2...
my $filename = shift;
my %index;
tie %index, DB_File, "$filename.db"
or die "Can't tie to $filename.db: $!";
for my $query (@ARGV)
{
if ( exists $index{$query} )
{
print $index{$query}; # newlines already provided
} else {
print STDERR "$0: $filename: $query not found\n";
}
}
untie %index;