use 5.014;
use strictures;
use List::Util "shuffle";
open my $words, "<", "/usr/share/dict/words" or die $!;
chomp ( my @words = <$words> );
my $top = @words - 40;
@words = shuffle @words;
open my $db, ">", "/tmp/PM.db" or die $!;
for my $id ( 999_999 .. 999_999_999 )
{
use integer;
my $end = rand($top);
my $range = rand(35) + 5;
my $start = $end - $range;
$start = 0 if $start < 0;
say {$db} join ";", $id, shuffle @words[ $start .. $end ];
last if -s $db > 32_000_000_000;
}
####
use 5.014;
use strictures;
use Lucy;
my $index = "./lucy.index";
my $schema = Lucy::Plan::Schema->new;
my $easyanalyzer = Lucy::Analysis::EasyAnalyzer
->new( language => 'en' );
my $text_type = Lucy::Plan::FullTextType
->new( analyzer => $easyanalyzer, );
my $string_type = Lucy::Plan::StringType->new();
$schema->spec_field( name => 'id', type => $string_type );
$schema->spec_field( name => 'content', type => $text_type );
open my $db, "<", "/tmp/PM.db" or die $!;
my $indexer = get_indexer();
my $counter = 1;
while (<$db>)
{
chomp;
my ( $id, $text ) = split /;/, $_, 2;
$indexer->add_doc({ id => $id,
content => $text });
unless ( $counter++ % 100_000 )
{
print "committing a batch...\n";
$indexer->commit;
$indexer = get_indexer();
}
}
print "optimizing and committing...\n";
$indexer->optimize;
$indexer->commit;
sub get_indexer {
Lucy::Index::Indexer
->new( schema => $schema,
index => $index,
create => 1 );
}
##
##
use 5.014;
use strictures;
use Lucy;
use Time::HiRes "gettimeofday", "tv_interval";
use Number::Format "format_number";
my $index = "./lucy.index";
my $searcher = Lucy::Search::IndexSearcher
->new( index => $index );
my $all = $searcher->hits( query => Lucy::Search::MatchAllQuery->new );
print "Searching ", format_number($all->total_hits), " records.\n";
print "Query (q to quit): ";
while ( my $q = )
{
chomp $q;
exit if $q =~ /\Aq(uit)?\z/i;
my $t0 = [gettimeofday()];
my $hits = $searcher->hits( query => $q,
num_wanted => 3 );
printf "\nMatched %s record%s in %1.2f milliseconds\n",
format_number($hits->total_hits),
$hits->total_hits == 1 ? "" : "s",
1_000 * tv_interval( $t0, [gettimeofday()] );
while ( my $hit = $hits->next )
{
printf "%12d -> %s\n", $hit->{id}, $hit->{content};
}
print "\nQuery: ";
}
##
##
Searching 126,871,745 records.
Query (q to quit): ohai
Matched 0 records in 1.33 milliseconds
Query: taco
Matched 0 records in 0.30 milliseconds
Query: dingo
Matched 12,498 records in 17.69 milliseconds
79136688 -> incandescency;scratchiness;ungnarred;dingo;desmachymatous;verderer
78453332 -> dingo;verderer;incandescency;ungnarred;coinsurance;scratchiness;desmachymatous
78367042 -> verderer;ungnarred;incandescency;dingo;desmachymatous;scratchiness
Query: 78311109
Matched 1 record in 80.07 milliseconds
78311109 -> revealing;sulfocarbimide;Darwinize;reproclamation;intermedial;Cinclidae
Query: perl
Matched 12,511 records in 34.92 milliseconds
78437383 -> unnoticeableness;radiectomy;brogger;rumorer;oreillet;befan;perle
59450674 -> perle;Avery;autoxidizability;tidewaiter;radiectomy;filthily
59125043 -> oreillet;perle;Avery;autoxidizability;filthily;tidewaiter;radiectomy
Query: pollen OR bee
Matched 61,997 records in 27.14 milliseconds
127851379 -> sley;Phalaris;pollen;brasque;snuffle;excalate;operculigenous
79011524 -> rave;uliginose;gibel;pollened;uncomprised;salve;topognosia
78853424 -> topognosia;gibel;rave;uncomprised;pollened;uliginose;salve
Query: pollen
Matched 24,674 records in 1.58 milliseconds
127851379 -> sley;Phalaris;pollen;brasque;snuffle;excalate;operculigenous
79011524 -> rave;uliginose;gibel;pollened;uncomprised;salve;topognosia
78853424 -> topognosia;gibel;rave;uncomprised;pollened;uliginose;salve
Query: pollen AND bee
Matched 0 records in 21.61 milliseconds