Category: Text
Author/Contact Info Patrick van Venetien / patrick@fictional.net
Description: Perl module for indexing text. It does not read files itself, it just indexes text you feed it. Making it a true indexer for various usage.

It includes the indexing engine and a search engine. It uses Search::Dict

The search engine supports a simple query language and the code includes a query parser supporting nested queries like
(perl and monk (not china))

Installation: put it in (lib/)Search/
usage: use Search::Findex;

Gonna try to add it to CPAN (my first public Perl contribution to the community)

* Org post: Aug 23, 2002
* Update: Nov 14, 2002 - Delete method, Documentation updated, Sorting updated, several bugfixes.

License: GNU

#
# Document Index and search engine
#
# Todo
#    deletemarking ipv realtime delete
#    resultcache per zoekwoord
#    searchresult op date
package Findex;
use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
use Search::Dict;

require Exporter;

$VERSION = '1.00';
@ISA = qw(Exporter AutoLoader);
@EXPORT = qw(
);


#
# Constructor.
#
sub new {
    #
    # $self is an anonymous hash.
    #
    my $self = {};

    #
    # Initialize data for this object.
    #
    $self->{INDEXDIR} = 'myindex';
    $self->{INDEX} = undef;
    $self->{INDEXSEPERATOR} = chr(149);
    $self->{CHANGEDINDEX} = undef;
    $self->{OPTIMIZETIME} = time; # see if index was changed
    $self->{SCORE}=undef;
    $self->{HITWORDS}=undef;
    
    # Words no use to index
    $self->{STOPWORDS}="\w|aan|bij|de|den|der|dit|een|en|haar|hem|hen|
+het|hij|ik|in|je|maar|of|op|over|van|voor";
    $self->{SPECIALCHARS}="üéâäàçêëèïîìÄÅÉæÆôöòûùÿÖÜáíóúñÑãõØøœŒÀÃÕ".c
+hr(192).chr(193)."ÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛܧßàßâãäåµçèéêëìíîïðòóôõûü
+";
    $self->{LINKCHARS}='\-\'\`';

    #
    # Let the object know its class.
    #
    bless($self);

    return $self;
}

#
# Destructor.
#
sub DESTROY {
#    print "Argh. Life was sweet.\n";
}

#
# Data-access methods.
# You can pass a parameter, in which case
# the attribute gets set to the new value.
# If you skip the parameter, the value is
# returned.
#
sub indexdir {
    #
    # Extract first parameter--the object ref.
    #
    my $self = shift;

    #
    # If there are any arguments left, then set value.
    #
    if (@_) {
   $self->{INDEXDIR} = shift;
    }
    mkdir $self->{INDEXDIR} if ! -e $self->{INDEXDIR};
    return $self->{INDEXDIR};
}

sub getnewfragname {
    my $self = shift;
    my $indexname = shift;    
    my $fragnr = "0";
    return if !$indexname;
    $indexname = $self->{INDEXDIR}."/$indexname";
    while(-e $indexname."-$fragnr.ndx"){
        $fragnr ++;
    }
    return $indexname."-$fragnr.ndx";
}

sub updateindex {
    my $self = shift;
    my ($fragdx,$key) = "";
    my $schars=$self->{SPECIALCHARS};
    my $seperator=$self->{INDEXSEPERATOR};
    $self->{CHANGEDINDEX}=undef;
    
#    print "Updating index\n";
    mkdir $self->{INDEXDIR} if ! -d $self->{INDEXDIR};

    foreach (keys %{$self->{INDEX}}){
        my $sortval = "";
        my %sortthis = undef;
        foreach(split(/\n/,$self->{INDEX}{$_})){
            $sortthis{"$1"}.=$2 if /^([^ \?]+)\s*\?(.*)$/s;
        }
        open(INDEX,">".$self->getnewfragname($_));
        foreach $sortval (sort keys %sortthis){
            next if ($sortval=~/^\s*$/ || $sortthis{$sortval} =~/^\s*$
+/);
            $sortval." " if $sortval=~/^\d+$/;
            print INDEX $sortval."\?".$sortthis{$sortval}."\n";
        }
        close(INDEX);
        $self->{CHANGEDINDEX}{"$_-$fragdx.ndx"}=1;
    }
#    print "Sorting id's and file\n";
    $self->sortindex("idindex-all.ndx");
    $self->sortindex("fileindex-all.ndx");
    $self->{INDEX}=undef;
    return;
}

sub updatedoc {
    my $self = shift;
    my $string = shift; # Unique identifier
    my $date = shift;
    my $data = shift;
    my $docdata = shift;

    $self->removedoc($string);

    my $docid = $self->adddoc($string,$date,$data);
    $self->indexdoc($docdata,$docid);
    return $docid;
}
#
# Index a document - indexdoc ( document, docid/docstring)
#
sub indexdoc {
    my $self = shift;
    my $document = lc(shift);
    my $docid = shift;
    my $indexname = "";
    my (@words,$word, %sword) = "";
    my $schars=$self->{SPECIALCHARS};
    my $stopwords = $self->{STOPWORDS};
    my $linkchars = $self->{LINKCHARS};
    $document=~s/([\w$schars]+)[$linkchars]([\w$schars]+)/$1 $2 $1$2/i
+gs; # create 3 words if linked words
    foreach $word (split(/[^\w$schars]+/s,$document)){
#        print $word."\n" if $word =~/[$schars]/igs;
#    foreach $word (split(/[\s\.\{\}\[\]\(\)\+\,\/\\\#\!\?\"\'\:\;\&\<
+\>\`\~\*\=]+/s,$document)){
        next if $sword{$word};
        next if $word=~/^($stopwords|[a-z0-9])$/i;
        next if ! ($indexname = $self->getindexbasename($word));

        $sword{$word}++;
        $self->{INDEX}{$indexname}.=$word."\?".$self->{INDEXSEPERATOR}
+.$docid."\n";
    }

    return $docid;
}

# Sort the given index
sub sortindex {
    my $self = shift;
    my $indexfile = shift;
    my $seperator = $self->{INDEXSEPERATOR};
    my ($sortval,@sortindex) = "";
    #print "$indexfile\n";
    return if $indexfile !~ /\.ndx$/i;

    #print "Optimize index $indexfile\n";
    open(INDEX,$self->{INDEXDIR}."/$indexfile") || return; # Read inde
+x
    @sortindex=<INDEX>;
    close(INDEX);
#    print "Sorting $indexfile\n";
    open(INDEX,">".$self->{INDEXDIR}."/$indexfile"); # Read index
    foreach (sort @sortindex){
        print INDEX $_;
    }
    close(INDEX);
}

sub optimizeall {
    my $self = shift;
    my ($baseindex,@indexes,@indexdata) = "";
    $self->{CHANGEDINDEX}=undef;
    # join fragmented indexes
    opendir(INDEXES,$self->{INDEXDIR});
    @indexes=grep(!/\-all/,readdir(INDEXES));
    closedir(INDEXES);
    foreach(@indexes){
        $baseindex = "";
        $baseindex=$1 if /^([\d]+)\-/;
        next if !$baseindex;
#        print "Joining $baseindex with $_\n";
        open(INDEX,$self->{INDEXDIR}."/$_") || next;
        open(TINDEX,">>".$self->{INDEXDIR}."/$baseindex\-all.ndx");
        @indexdata=<INDEX>;
        print TINDEX @indexdata;
        close(INDEX);
        unlink $self->{INDEXDIR}."/$_";
    }    

    $self->optimizeindex;
    $self->{OPTIMIZETIME}=time;
}
sub optimizeindex {
    my $self = shift;
    my $seperator = $self->{INDEXSEPERATOR};
    my ($indexfile,$indexfile,$sortval,@sortindex, @indexfiles) = "";

    @indexfiles=(keys %{$self->{CHANGEDINDEX}});
    $self->{CHANGEDINDEX}=undef;
    if(!@indexfiles){
        opendir(INDEXDIR,$self->{INDEXDIR});
        @indexfiles=readdir(INDEXDIR);
        closedir(INDEXDIR);
    }
    
    foreach $indexfile (@indexfiles){
        next if $indexfile !~ /\.ndx$/;
        next if $indexfile!~/^([\d]+|all)\-/;

        my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime
+,$ctime,$blksize,$blocks) = stat($self->{INDEXDIR}."/$indexfile");
#        print "$indexfile $mtime -> ".$self->{OPTIMIZETIME}."\n";
        next if $mtime < ($self->{OPTIMIZETIME}-5);
#        print "Optimize index $indexfile\n";
        open(INDEX,$self->{INDEXDIR}."/$indexfile"); # Read index
        @sortindex=<INDEX>;
        close(INDEX);
        chomp(@sortindex);
        
        my %sortthis = undef;
        foreach(@sortindex){
            $sortthis{"$1"}.=$2 if /^([^ \?]+)\s*\?(.+)$/s;
        }
        open(INDEX,">".$self->{INDEXDIR}."/$indexfile");
        foreach $sortval (sort keys %sortthis){
            next if ($sortval=~/^\s*$/ || $sortthis{$sortval} =~/^\s*$
+/);
            my %svalues = undef;
            foreach ($sortthis{$sortval}=~ /($seperator[^$seperator]*)
+/gs){
                    $svalues{$_}=1;
                }
            $sortthis{$sortval}="";
            foreach(keys %svalues){
                $sortthis{$sortval}.=$_ if $sortthis{$sortval}!~/$_/ ;
            }
            print INDEX $sortval."\?".$sortthis{$sortval}."\n" if $sor
+tthis{$sortval};
        }
        close(INDEX);
    }
    $self->{OPTIMIZETIME} = time; # see if index was changed
    $self->removedocs;
}

#
# remove a document from the index - removedoc ( docid )
#
sub removedoc {
    my $self = shift;
    my $docid = shift;
    open(REMOVEDOC,">>".$self->{INDEXDIR}."/.delete");
    print REMOVEDOC $self->getid($docid)."\n";
    close(REMOVEDOC);
}
sub removedocs {
    my $self = shift;
    my $sepchar = $self->{INDEXSEPERATOR};
    my ($found,@indexdata) = "";
    open(DELETEDOCS,$self->{INDEXDIR}."/.delete");
    my @Adeleteids= <DELETEDOCS>;
    close(DELETEDOCS);
    chomp(@Adeleteids);
    
    if(scalar(@Adeleteids)){
        my $deleteids = join('|',@Adeleteids);
        $deleteids=~s/\.delete//igs;

#    print "Removing $deleteids\n";
        
        opendir(INDEXES,$self->{INDEXDIR});
        foreach(readdir INDEXES){
            next if !/\.ndx/;
            open(INDEX,$self->{INDEXDIR}."/$_");
            @indexdata=<INDEX>;
            close(INDEX);
            $found=0;
            foreach(@indexdata){
                my $orgline=$_;
                if(/$sepchar($deleteids)[$sepchar\n\r]/){
                    $found=1;
                    $_=~s/$sepchar($deleteids)([$sepchar\n\r])/$2/g;
                } elsif(/^[^\?]+\?($deleteids)\n$/){
                    $found=1;
                    $_=~s/^[^\?]+\?($deleteids)\n$//g;
                } elsif(/^($deleteids)\s*\?.*$/){
                    $found=1;
                    $_=~s/^($deleteids)\s*\?.*$//g;
                }
                $_="" if /^[\s\n\r]*$/ || /[^\?]+\?[\s\n\r]*$/;
            }
            if($found){
                open(INDEX,">".$self->{INDEXDIR}."/$_");
                print INDEX @indexdata;
                close(INDEX);
            }
        }
        closedir(INDEXES);    
        unlink($self->{INDEXDIR}."/.delete");
    }
    return 1;
}

sub getiddata {
    my $self = shift;
    my $id = shift;
    my $foundid = undef;

    open(FILE,$self->{INDEXDIR}."/idindex-all.ndx"); # || print "Error
+ opening ID Index.\n";
    my $newpos=look *FILE, $id, 0, 0;
    $foundid = <FILE>;
    close(FILE);
    chomp($foundid);
    if($foundid =~s/^$id \?//){
        return split(/$self->{INDEXSEPERATOR}/,$foundid);
    } else {
        return 0;
    }
    
}
# idgenerator ( string,date, data);
sub getid {
    my $self = shift;
    my $string = shift;
    my $id = undef;
    my $foundid = undef;
    open(FILE,$self->{INDEXDIR}."/fileindex-all.ndx");# || print "Erro
+r opening ID Index.\n";
    my $newpos=look *FILE, $string, 0, 0;
    $foundid = <FILE>;
    close(FILE);
#    print "Searched $string : Found : $foundid\n";
    if($foundid =~/^$string\?(\d+)$/){
        return $1;
    }
    return;
}

sub adddoc {
    my $self = shift;
    my $string = shift;
    my $date = shift;
    my $data = shift;
    my $id = undef;

    my $id = $self->generateid;

    $data=~s/[\r]//igs if $data=~/\n/;
    $data=~s/[\n\r]/\\n/igs;
    open(FILE,">>".$self->{INDEXDIR}."/idindex-all.ndx");# || print "E
+rror writing new ID.\n";
    print FILE "$id \?$string".$self->{INDEXSEPERATOR}.$date.$self->{I
+NDEXSEPERATOR}.$data."\n";
    close(FILE);
    
    open(FILE,">>".$self->{INDEXDIR}."/fileindex-all.ndx") || print "E
+rror writing new ID.\n";
    print FILE "$string\?$id\n";
    close(FILE);
    $self->{CHANGEDINDEX}{"idindex-all.ndx"}++;
    $self->{CHANGEDINDEX}{"fileindex-all.ndx"}++;
    return $id;

}

# generate uniqueid
sub generateid {
    my $self = shift;
    my $id=0;
    open(LASTID,$self->{INDEXDIR}."/lastid") || print "Error generatin
+g id\n";
    $id=<LASTID> || 1;
    close(LASTID);

    open(LASTID,">".$self->{INDEXDIR}."/lastid");
    print LASTID ($id+1);
    close(LASTID);
#    $id=~s/[^\w\d]//igs;
    return $id;
}

# determine indexname from word
sub getindexbasename {
    my $self = shift;
    my $indexname = shift;
    my $schars=$self->{SPECIALCHARS};
#    if($indexname=~/^([\w$schars])([\w$schars])/){
#        $indexname = ord($1).ord($2);
    if($indexname=~/^([\w$schars])/){
        $indexname = ord($1);
        return $indexname;
    } else {
        return 0;
    }
}

# determine indexnames from word returns list
sub getindexnames {
    my $self = shift;
    my $indexname = shift;
    my $schars=$self->{SPECIALCHARS};
    my @indexes = "";
    $indexname=$self->getindexbasename($indexname);
    return 0 if !$indexname;
    my $fragnr = "0";
    while(-e $self->{INDEXDIR}."/$indexname-$fragnr.ndx"){
        push(@indexes,"$indexname-$fragnr.ndx");
        $fragnr++;
    }
    push(@indexes,"$indexname-all.ndx") if -e $self->{INDEXDIR}."/$ind
+exname-all.ndx";
    return @indexes;
}


sub search {
    my $self = shift;
    my $query = shift;
    my (@words, $seekword, $parsedseekword, $orgseekword, $foundword) 
+= "";
    my $wordsquery = $query;
    my $indexname = "";    
    my $seperator = $self->{INDEXSEPERATOR};
    my $schars=$self->{SPECIALCHARS};
    my (@results, $fragdx,%ids, %cids, %foundwords, $id, %hitwords) = 
+undef;

    $wordsquery=~s/\s(and|or|not)\s/ /igs;
    @words=split(/[\(\)\s]+/,$wordsquery);
    foreach $seekword (@words){
        %cids = undef;
        $orgseekword = $seekword;
        $parsedseekword= $seekword;
        $parsedseekword=~s/\*.*$/\.\*/;
        $seekword=~s/\*.*$//;
        foreach $indexname ($self->getindexnames(lc($seekword))){
            next if !$indexname;
            open(FILE,$self->{INDEXDIR}."/$indexname") || print "error
+ opening Index $indexname for $seekword\n";
            my $newpos=look *FILE, $seekword, 0, 0;
            $foundword=<FILE>;
            while(){
                chomp $foundword;
                last if $foundword !~/^$seekword/i;
                if($foundword =~/^$parsedseekword\s*\?/i){
                    my $ffoundword=$1 if $foundword=~/^([^\?\s]+)\?/;
                    foreach ($foundword=~ /[$seperator]([^$seperator\n
+\r\s]+)/gs){
                        $ids{$_}++;
                        $foundwords{$_}.=";$orgseekword " if !$cids{$_
+}; # maak gevonden woordenlijst voor dit artikel
                        $hitwords{$_}.=";$ffoundword";
                        $cids{$_}++;
                    }
                }
                last if eof(FILE);
                $foundword=<FILE>;
            }
            close(FILE);
        }
    }
    $self->{SCORE}=undef;
    $self->{HITWORDS}=undef;
    undef @results;
    for $id (reverse sort {$ids{$a} <=> $ids{$b}} keys %ids) {
        next if !$self->ParseQuery($query,$foundwords{$id});
        push(@results,$id);
        $self->{HITWORDS}{$id}=$hitwords{$id};
        $self->{SCORE}{$id}=$ids{$id};
    }
    return(@results);
}
# "(test and bertje) or (test or (quasi and blah) or blah)" , " Search
+ this text"
sub ParseQuery {
    my $self = shift;
    my $query = "(@_[0])";
    my $search = ";@_[1];";
    my $subq = "";
    my $result = 0;

    # despace
    $query=~s/^\s+//g;
    $query=~s/\s+$//g;
    $query=~s/\(\s*/\(/g;
    $query=~s/\s*\)/\)/g;
    $query=~s/\s+/ /g;
    
    # Add ()
    $query =~ s/(\W)([^\s\)\(]+)\s(and|or|not)\s([^\s\)\(]+)(\W)/$1($2
+ $3 $4)$5/gs;
    while($query=~/\(([^\(\)]*)\)/){
        $subq=$1;
        if($subq =~ /^([^\s]+)\s+(and|or|not)\s+([^\s]+)$/i){
            if($2 eq 'not'){
                $result="$1 and ".$self->Logics($2,"$3",$search);
            } else {
                $result=$self->Logics($2,"$1\;$3",$search);
            }
        } elsif($subq =~ /^([^\s]+)$/i){
            $result=$self->Logics("AND","$1",$search);
        } else {
            $result = $subq;
        }
        $query=~s/\([^\(\)]*\)/$result/;
        $query =~ s/(\W)([^\s\)\(]+)\s(and|or|not)\s([^\s\)\(]+)(\W)/$
+1($2 $3 $4)$5/gs;
    }
    return $query;
}


# ( "[AND/OR/NOT]" , "test;blah", "test;words;blah")
# return 1 if true, else 0
sub Logics {
    my $self = shift;
    my $term = uc(shift);
    my @words = split(/[\s\;]+/,shift);
    my $awords = ";".join(';',split(/[\s\;]+/,shift)).";";
    my ($lcount, $twords) ="";
#    print "Term: $term - Words: @words - Aword: $awords\n";
    my $schars = $self->{SPECIALCHARS};
    $awords=~s/[^\w$schars]/\;/igs;
    
    $lcount=0;
    if($term eq 'AND'){
        foreach(@words){
            $twords=$awords;
            if($_ eq '1'){
                $lcount++;
            } elsif($_ eq '0'){
            } elsif($twords =~/\;$_\;/igs){
                $lcount++;
            } else {
            #print "\;$_\; ! $awords\n";
            }
        }
        if($lcount eq scalar(@words)){
            return 1;
        } else {
            return 0;
        }
    } elsif($term eq 'OR'){
        $twords=$awords;
        foreach(@words){
            if($_ eq '1'){
                $lcount++;
            } elsif($_ eq '0'){
            } elsif($twords =~/\;$_\;/igs){
                $lcount++;
            }
        }
        if($lcount>0){
            return 1;
        } else {
            return 0;
        }
    } elsif($term eq 'NOT'){
        $twords=$awords;
        foreach(@words){
            if($_ eq '1'){
                $lcount++;
            } elsif($_ eq '0'){
            } elsif($twords =~/\;$_\;/igs){
                $lcount++;
            }
        }
        if($lcount>0){
            return 0;
        } else {
            return 1;
        }
    }
}

1;
__END__


=head1 NAME

Search::Findex Text indexer and searcher

=head1 SYNOPSIS

    use Search::Findex;

=head1 DESCRIPTION

    This module creates a searchable-index. It can index any kind of t
+extual data. It's 
    reasonable fast, can index tenthousands of documents a hour.
    Further, it searches very fast too, using Search:Dict, retrieving 
+document id's within 
    milliseconds.

    Tests with 4000 documents (avg 4kbyte size) were indexed within 10
+ minutes and multiple searches showed 
    results within a second, with performance of +/- 14 docs/sec on a 
+Cel500 (time incl. writing index).
    
    Multiple queries searches on this 4000 document index between 0.01
+ and 1.2 seconds depending on 
    the complexity of the query.
    
    # Object creation
    use Findex;
    my $myindex = new Findex;

    # create a directory ./myindex where the indexes will be stored
    my $myindex->indexdir("myindex");

    # Register a filename/url with date and description
    my $docid = $myindex->getid($filename,-M $filename ,"File descript
+ion");

    # Index filedata
    $myindex->indexdoc($filedata,$docid);

    # Register a second filename/url with date and description
    my $docid2 = $myindex->getid($filename2,-M $filename2 ,"File descr
+iption2");
    # Index the other filedata
    $myindex->indexdoc($filedata2,$docid2); 

    # Write the new index, always after finishing indexing, advisable 
+every x-hundred indexed document
    $myindex->updateindex;
    
    # Execute AND OR NOT query 
    my @results = $myindex->search("gates and (microsoft or (bill not 
+clinton))");
    # Print the score for result 0
    print $myindex->{SCORE}{@results[0]};

    # Print the words that were found for result 0
    print $myindex->{HITWORDS}{@results[0]};

    # Print all field data of each ID.
    # ($filename,-M $filename ,"File description2") = getiddata
    foreach $id (@results){
        foreach $field ($myindex->getiddata($id)){
            print "$field\n";
        }
    }
    # Removed document from indexes
    $myindex->removedoc($docid);

    # optimize all indexes (takes a few seconds)
    $myindex->optimizeall;

=head1 Methods

    $number = $indexobject->getid($identifierstring, $date, $data);
    
    Generate a unique documentID for this document. If identifierstrin
+g already is 
    registered it returns nothing.
    
    $indexobject->indexdoc( $documentstring, $documentID);

    Index a document and store it's results in a temporary memorybuffe
+r under documentid.
    
    $indexobject->updateindex
    
    Write the temporary memorybuffer with indexdata to the index files
    
    $indexobject->optimizeall
    
    Updateindex creates new indexfiles at each update. Optimizeall mer
+ges indexfiles to 
    single larger files that speed up the searching a bit and optimize
+ filesystem usage.
    
    $indexobject->removedoc( $number)
    
    Removes documentID from all indexes.
    
    @array = $indexobject->search( $stringquery)
    
    Execute a logical query on the index with possible uses of and or 
+not. Default the 
    results are returned sorted at score.


=head1 Variables

    - Usage variables
    $indexobject->{INDEXDIR}
    
    The directory where the index is or should be located. Needs to be
+ set before indexing
    and/or searching.
    
    $indexobject->{SCORE}
    
    A hash in the form $indexobject->{SCORE}{$id} that shows the score
+ for an document id
    after a search.

    $indexobject->{HITWORDS}

    A hash in the form $indexobject->{HITWORDS}{$id} that shows the wo
+rds that search found
    for an document id after a search.

    - Language dependend variables

    $indexobject->{STOPWORDS}

    A list of words to be ignored while indexing (default Dutch words)
    
    $indexobject->{SPECIALCHARS}
    
    A list of special characters that beside \w need to be indexed.

    $indexobject->{LINKCHARS}
    
    A list of characters that are used to link words. Linked words wil
+l be indexed as
    3 seperate words.

    Example: "foo-bar" will be indexed as foo, bar and foobar.
    
    - Internal variables
    
    $indexobject->{INDEX}

    An internal hash with index data that still needs to be written (d
+one with updateindex)
    
    $indexobject->{INDEXSEPERATOR}

    The seperator to use within index data files.


=head1 AUTHOR

Patrick van Venetien - www.zojoh.com / www.fictional.net

=head1 COPYRIGHT

Copyright 2002 Patrick van Venetien.  All rights reserved.

=head1 SEE ALSO

perl(1).

=cut
Replies are listed 'Best First'.
Re: Text indexer/search engine
by zentara (Cardinal) on Aug 23, 2002 at 15:36 UTC
     Hi, this looks like it could be a useful module.  I came across
    an odd problem with it on linux. When I download it, it's got the 
    windows line endings. When I do a dos2unix conversion on it, the
    file gets chopped off at a char(1) at the line below.
    The c dos2unix program I've been using has never 
    given me problems before. It translated OK with
     'tr -d '\015' < Findex.pm > Findex.pm.out
    
    $self->{SPECIALCHARS}="üéâäàçêëèïîìÄÅÉæÆôöòûùÿÖÜáíóúñÑãõØøœŒÀÃÕ"
      Thanx, sorry for the windows line posting. I'll adjust it in a next update. Just like more documentation

      I didn't test it with any x-system, but i tried to make sure it would be able to function on those real machines too.

      Thank you for taking the time to debug it a bit!

      I might take the ParseQuery out of this package because i think it might be interesting enough to be a package of it's own.

      The documentation on this function isn't very well, but basically you can release a query onto a textstring.

      &ParseQuery("(foo or fu) and (bar not kung)","foo test bar");

      Does anyone agree ?