use 5.014; use strictures; use Lucy; my $index = "./lucy.index"; my $schema = Lucy::Plan::Schema->new; my $easyanalyzer = Lucy::Analysis::EasyAnalyzer ->new( language => 'en' ); my $text_type = Lucy::Plan::FullTextType ->new( analyzer => $easyanalyzer, ); my $string_type = Lucy::Plan::StringType->new(); $schema->spec_field( name => 'id', type => $string_type ); $schema->spec_field( name => 'content', type => $text_type ); open my $db, "<", "/tmp/PM.db" or die $!; my $indexer = get_indexer(); my $counter = 1; while (<$db>) { chomp; my ( $id, $text ) = split /;/, $_, 2; $indexer->add_doc({ id => $id, content => $text }); unless ( $counter++ % 100_000 ) { print "committing a batch...\n"; $indexer->commit; $indexer = get_indexer(); } } print "optimizing and committing...\n"; $indexer->optimize; $indexer->commit; sub get_indexer { Lucy::Index::Indexer ->new( schema => $schema, index => $index, create => 1 ); }