#!/usr/bin/perl use strict; # Berkeley DBMs are my fav. use DB_File; my %Index; # remove the old index and start fresh unlink("/home/maverick/tmp/index_dbm"); tie (%Index,'DB_File',"/home/maverick/tmp/index_dbm",O_RDWR|O_CREAT,0640,$DB_BTREE) || die "Tie Failed: $!"; foreach my $file (glob("/home/maverick/tmp/*.txt")) { open(F,$file) || die "Can't open $file: $!"; # slirp up the file and make a list of words my @words = map { split(/\W+/,$_) } ; # add this file to the list of matches for this word my %uniq; foreach (@words) { if (!defined($uniq{$_})) { # we've not seen this word before, so we add it. # I'm also assuming that ~ is safe to use as a seperator. if (!defined($Index{$_})) { # it's the first additon of this word, so I don't need to prepend a '~' $Index{$_} = $file; } else { $Index{$_} .= "~$file"; } $uniq{$_} = 1; } } close(F); } untie %Index; #### #!/usr/bin/perl use strict; use DB_File; my %Index; tie (%Index,'DB_File',"/home/maverick/tmp/index_dbm",O_RDWR,0640,$DB_BTREE) || die "Tie Failed: $!"; print ">"; while(<>) { # chop off the newline $_ =~ s/[\r\n]//go; if (defined($Index{$_})) { print "$_ found in:\n"; # replace all the ~ with \n (without modifying the index) print join("\n",split(/~/,$Index{$_})),"\n"; } else { print "Not Found\n"; } print ">"; } untie(%Index); #### here's a file that contains a bunch of random keywords on many different lines that we can use for the sake of example. #### here's another file that contains even more random text for the sake of example. I hope this helps solve the problem presented by tenfourty. #### darkstar:~/tmp>./mkindex.pl darkstar:~/tmp>./search.pl >tenfourty tenfourty found in: /home/maverick/tmp/file2.txt >maverick Not Found >example example found in: /home/maverick/tmp/file1.txt /home/maverick/tmp/file2.txt >text text found in: /home/maverick/tmp/file2.txt >