#!/usr/bin/perl

use strict;

# Berkeley DBMs are my fav.
use DB_File;

my %Index;

# remove the old index and start fresh
unlink("/home/maverick/tmp/index_dbm");
tie (%Index,'DB_File',"/home/maverick/tmp/index_dbm",O_RDWR|O_CREAT,0640,$DB_BTREE) || die "Tie Failed: $!";

foreach my $file (glob("/home/maverick/tmp/*.txt")) {

        open(F,$file) || die "Can't open $file: $!";

        # slirp up the file and make a list of words
        my @words = map { split(/\W+/,$_) } <F>;

        # add this file to the list of matches for this word
        my %uniq;
        foreach (@words) {
                if (!defined($uniq{$_})) {
                        # we've not seen this word before, so we add it.
                        # I'm also assuming that ~ is safe to use as a seperator.
                        if (!defined($Index{$_})) {
                                # it's the first additon of this word, so I don't need to prepend a '~'
                                $Index{$_} = $file;
                        }
                        else {
                                $Index{$_} .= "~$file";
                        }
                        $uniq{$_} = 1;
                }
        }
	close(F);
}
untie %Index;

##</code><code>##

#!/usr/bin/perl

use strict;

use DB_File;

my %Index;
tie (%Index,'DB_File',"/home/maverick/tmp/index_dbm",O_RDWR,0640,$DB_BTREE) || die "Tie Failed: $!";

print ">";
while(<>) {
	# chop off the newline
        $_ =~ s/[\r\n]//go;
        if (defined($Index{$_})) {
                print "$_ found in:\n";
		# replace all the ~ with \n (without modifying the index)
                print join("\n",split(/~/,$Index{$_})),"\n";
        }
        else {
                print "Not Found\n";
        }
        print ">";
}
untie(%Index);

##</code><code>##

here's a file that contains a bunch of random
keywords on many different lines that we can use for the sake of example.

##</code><code>##

here's another file that contains even more random text
for the sake of example.  I hope this helps solve the problem
presented by tenfourty.

##</code><code>##

darkstar:~/tmp>./mkindex.pl 
darkstar:~/tmp>./search.pl 
>tenfourty
tenfourty found in:
/home/maverick/tmp/file2.txt
>maverick
Not Found
>example
example found in:
/home/maverick/tmp/file1.txt
/home/maverick/tmp/file2.txt
>text
text found in:
/home/maverick/tmp/file2.txt
>