#!/usr/bin/env perl use 5.014; use strictures; use Lucy; use Time::HiRes "gettimeofday", "tv_interval"; my $index = "./lucy.index"; my $schema = Lucy::Plan::Schema->new; my $easyanalyzer = Lucy::Analysis::EasyAnalyzer ->new( language => 'en' ); my $text_type = Lucy::Plan::FullTextType ->new( analyzer => $easyanalyzer, ); my $string_type = Lucy::Plan::StringType ->new(); $schema->spec_field( name => 'id', type => $string_type ); $schema->spec_field( name => 'content', type => $text_type ); my $indexer = Lucy::Index::Indexer ->new( schema => $schema, index => $index, create => 1, truncate => 1, ); while () { my ( $id1, $id2maybe, $text ) = /\A([0-9]+);(?:([0-9]+);)?(.+)/; for my $id ( grep defined, $id1, $id2maybe ) { $indexer->add_doc({ id => $id, content => $text }); } } $indexer->commit; my $searcher = Lucy::Search::IndexSearcher ->new( index => $index ); print "Query (q to quit): "; while ( my $q = ) { chomp $q; exit if $q =~ /\Aq(uit)?\z/i; my $t0 = [gettimeofday()]; my $hits = $searcher->hits( query => $q, ); while ( my $hit = $hits->next ) { printf "%12d -> %s\n", $hit->{id}, $hit->{content}; } printf "\nMatched %s record%s in %1.1f milliseconds\n", $hits->total_hits, $hits->total_hits == 1 ? "" : "s", 1_000 * tv_interval( $t0, [gettimeofday()] ); print "\nQuery: "; } __DATA__ Your 200 lines of test data… #### moo@cow[51]~>perl pm-1118102 Query (q to quit): archaea 259697659 -> root;cellular organisms;Archaea;Euryarchaeota;Thermococci;Thermococcales;Thermococcaceae;Pyrococcus;Pyrococcus abyssi;Pyrococcus abyssi GE5; 272844 -> root;cellular organisms;Archaea;Euryarchaeota;Thermococci;Thermococcales;Thermococcaceae;Pyrococcus;Pyrococcus abyssi;Pyrococcus abyssi GE5; 289191770 -> root;cellular organisms;Archaea;Euryarchaeota;Methanococci;Methanococcales;Methanocaldococcaceae;Methanocaldococcus;Methanocaldococcus sp. FS406-22; 644281 -> root;cellular organisms;Archaea;Euryarchaeota;Methanococci;Methanococcales;Methanocaldococcaceae;Methanocaldococcus;Methanocaldococcus sp. FS406-22; 490653205 -> root;cellular organisms;Archaea;Euryarchaeota;Halobacteria;Halobacteriales;Halobacteriaceae;Haloarcula;Haloarcula vallismortis; 28442 -> root;cellular organisms;Archaea;Euryarchaeota;Halobacteria;Halobacteriales;Halobacteriaceae;Haloarcula;Haloarcula vallismortis; 493010542 -> root;cellular organisms;Archaea;Euryarchaeota;Halobacteria;Halobacteriales;Halobacteriaceae;Natronorubrum;Natronorubrum tibetense; 63128 -> root;cellular organisms;Archaea;Euryarchaeota;Halobacteria;Halobacteriales;Halobacteriaceae;Natronorubrum;Natronorubrum tibetense; 500681908 -> root;cellular organisms;Archaea;Euryarchaeota;Methanococci;Methanococcales;Methanococcaceae;Methanococcus;Methanococcus aeolicus; 42879 -> root;cellular organisms;Archaea;Euryarchaeota;Methanococci;Methanococcales;Methanococcaceae;Methanococcus;Methanococcus aeolicus; Matched 12 records in 0.4 milliseconds Query: 283552125 283552125 -> root;Viruses;ssRNA viruses;ssRNA negative-strand viruses;Orthomyxoviridae;Influenzavirus A;Influenza A virus;H5N1 subtype;Influenza A virus (A/chicken/Nigeria/08RS848-4/2006(H5N1)); Matched 1 record in 0.2 milliseconds