Lucy in cgi perl

chella2104@gmail.com has asked for the wisdom of the Perl Monks concerning the following question:

This is my Perl index.pl file

 #!/usr/local/bin/perl
use strict;
use warnings;
# (Change configuration variables as needed.)
my $path_to_index = '/path/to/indexdir';
my $source  = '/path/to/datadir';
use File::Spec::Functions qw( catfile );
use Lucy::Plan::Schema;
use Lucy::Plan::FullTextType;
use Lucy::Analysis::EasyAnalyzer;
use Lucy::Index::Indexer;

# Create Schema.
my $schema = Lucy::Plan::Schema->new;
my $easyanalyzer = Lucy::Analysis::EasyAnalyzer->new(
    language => 'en',
);
my $title_type = Lucy::Plan::FullTextType->new( 
    analyzer => $easyanalyzer,
);
my $content_type = Lucy::Plan::FullTextType->new(
    analyzer      => $easyanalyzer,
    highlightable => 1,
     boost         => 2.0,   
);
my $url_type = Lucy::Plan::StringType->new( indexed => 0, );
my $cat_type = Lucy::Plan::StringType->new( stored => 0, );
$schema->spec_field( name => 'title',    type => $title_type );
$schema->spec_field( name => 'content',  type => $content_type );
$schema->spec_field( name => 'url',      type => $url_type );
$schema->spec_field( name => 'category', type => $cat_type );

# Create an Indexer object.
my $indexer = Lucy::Index::Indexer->new(
    index    => $path_to_index,
    schema   => $schema,
    create   => 1,
    truncate => 1,
);

# Collect names of source files.
opendir( my $dh, $source )
    or die "Couldn't opendir '$source': $!";
my @filenames = grep { $_ =~ /\.txt/ } readdir $dh;

# Iterate over list of source files.
for my $filename (@filenames) {
    print "Indexing $filename\n";
    my $doc = parse_file($filename);
    $indexer->add_doc($doc);
}

# Finalize the index and print a confirmation message.
$indexer->commit;
print "Finished.\n";

# Parse a file from our US Constitution collection and return a hashre
+f with
# the fields title, body, url, and category.
sub parse_file {
    my $filename = shift;
    my $filepath = catfile( $source, $filename );
    open( my $fh, '<', $filepath ) or die "Can't open '$filepath': $!"
+;my $text='';
    while(<$fh>){$text .= "$_";}
#   my $text = do { local "$_\n"; <$fh> };    # slurp file content
    
    #my $title    = $1;
    my $bodytext = $text ;
    my $category
        = $filename =~ /sample/      ? 'Student'
            :   die "Can't derive category for $filename";
    return {
        title    => $category,
        content  => $bodytext,
        url      => "/home/kbs/datadir/$filename",
        category => $category,
    
    };
}
[download]

This is my search.cgi file

#!/usr/bin/perl
use strict;
use warnings;
my $path_to_index = '/path/to/indexdir';
use CGI;
use List::Util qw( max min );
use POSIX qw( ceil );
use Encode qw( decode );
use Lucy::Search::IndexSearcher;
use Lucy::Highlight::Highlighter;
use Lucy::Search::QueryParser;
use Lucy::Search::TermQuery;
use Lucy::Search::ANDQuery;
my $cgi       = CGI->new;
my $q         = decode( "UTF-8", $cgi->param('q') || '' );
my $offset    = decode( "UTF-8", $cgi->param('offset') || 0 );
my $category  = decode( "UTF-8", $cgi->param('category') || '' );
my $page_size = 10;
# Create an IndexSearcher and a QueryParser.
my $searcher = Lucy::Search::IndexSearcher->new( 
    index => $path_to_index,
);
my $qparser = Lucy::Search::QueryParser->new( 
    schema => $searcher->get_schema,
);

# Build up a Query.
my $query = $qparser->parse($q);
if ($category) {
    my $category_query = Lucy::Search::TermQuery->new(
        field => 'category', 
        term  => $category,
    );
    $query = Lucy::Search::ANDQuery->new(
        children => [ $query, $category_query ]
    );
}

# Execute the Query and get a Hits object.
my $hits = $searcher->hits(
    query      => $query,
    offset     => $offset,
    num_wanted => $page_size,
);
my $hit_count = $hits->total_hits;

# Arrange for highlighted excerpts to be created.
my $highlighter = Lucy::Highlight::Highlighter->new(
    searcher => $searcher,
    query    => $q,
    field    => 'content'
);

# Create result list.
my $report = '';
while ( my $hit = $hits->next ) {
    my $score   = sprintf( "%0.3f", $hit->get_score );
    my $excerpt = $highlighter->create_excerpt($hit);my @exi=split (/\
+n/,$excerpt);
    
    $report .= qq|
        <p>
          <strong>$hit->{title}</strong></a>
          <em>$score</em> ; 
foreach my $v(@exi){
    if ($v=~ /$q/){
        $report .= qq|
                  <br />
                      $v 
                  <br />|;
            }
    
        }
    $report .= qq|
          <span class="excerptURL">$hit->{url}</span>
        </p>
    |;
}
blast_out_content( $q, $report );
sub blast_out_content {
    my ( $query_string, $hit_list, $paging_info, $category_select ) = 
+@_;
    my $escaped_q = CGI::escapeHTML($query_string);
    binmode( STDOUT, ":encoding(UTF-8)" );
    print qq|Content-type: text/html; charset=UTF-8\n\n|;
    print qq|
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
    "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
  <meta http-equiv="Content-type" 
    content="text/html;charset=UTF-8">
  <link rel="stylesheet" type="text/css" 
    href="/us_constitution/uscon.css">
  <title>Lucy: $escaped_q</title>
</head>

<body>

  <div id="navigation">
    <form id="usconSearch" action="">
      <strong>
        Search the Name:
      </strong>
      <input type="text" name="q" id="q" value="$escaped_q">
      
      <input type="submit" value="=&gt;">
    </form>
  </div><!--navigation-->

  <div id="bodytext">

  $hit_list

  $paging_info

  </div><!--bodytext-->
    <p style="font-size: smaller; color: #666">
      <em>
       
      </em>
    </p>
  </div><!--bodytext-->

</body>

</html>
|;
}
[download]

This is my text file

ID    Name    Dept    Addr
1    Chella    IT    cbe
2    Sara    CSE    cbe
3    aaaaa    EEE    CBE
4    chella    CSE    thi
5    sara    ECE    che
6    dddd    MECH    che
[download]

when i give query like chella It only print

1 Chella IT cbe

But I want output like

1 Chella IT cbe

4 chella CSE thi

Anyone can help me ????

Comment on Lucy in cgi perl Select or Download Code

Replies are listed 'Best First'.
Re: Lucy in cgi perl by Corion (Patriarch) on Apr 11, 2016 at 13:22 UTC
Can you reduce your two programs into one, shorter program that still exhibits the same problem? For example, I don't think the HTML generation is necessary to reproduce the problem. Have you verified that both documents get inserted into your search index?	[reply]
Re: Lucy in cgi perl by Your Mother (Archbishop) on Apr 11, 2016 at 14:26 UTC
What you describe is not actually possible with the code you have. Your `parse_file` sub is eating your entire data file as one record, not six. Have you edited the code since your unwanted results? Or is your text file really six different text files?	[reply] [d/l]
Re^2: Lucy in cgi perl by chella2104@gmail.com (Sexton) on Apr 12, 2016 at 05:34 UTC
Can u tell me how to get only needed result	[reply]
Re^3: Lucy in cgi perl by Your Mother (Archbishop) on Apr 12, 2016 at 13:50 UTC
Maybe if I knew what you needed. I started to rewrite the stuff for you into a smaller working example but by the time I got the CGI it became less clear what you really want to index and search on and how you want to show it. Looking at your data I think a database would be a superior solution to a search engine like Lucy but maybe the data is just for testing and this is just a practice/toy application? The surface issue in the results you're having might be as simple to fix as discarding this stuff. It certainly doesn't belong and is probably muddying your chance at understanding what's really going on with searches and the indexing. `my @exi=split (/\n/,$excerpt); ... foreach my $v(@exi){ if ($v=~ /$q/){ $report .= qq\| <br /> $v <br />\|;` [download]	[reply] [d/l]