in reply to KinoSearch - is there a way to iterate over all documents in an index?
I know this is kinda old but I actually had occasion to do precisely what the author is asking here.
In a nutshell...sub get_term_data { my ( $self ) = @_; my @fields = $self->get_field_names(); my $termdata = {}; for my $field (@fields) { my $field_vector = $self->{field_vectors}{$field}; if ( !defined $field_vector ) { my $field_string = $self->{field_strings}{$field}; return unless defined $field_string; $field_vector = $self->{field_vectors}{$field} = _extract_tv_cache($field_string); } my @terms_for_field = keys %{$field_vector}; for my $term (@terms_for_field) { my ($positions, $starts, $ends) = _extract_posdata($fie +ld_vector- >{$term}); my $termvector = KinoSearch::Index::TermVector->new( field => $field, text => $term, positions => $positions, start_offsets => $starts, end_offsets => $ends, ); # ok.. we have the term vector. how do we get the # term frequency for this document? my $term_freq = scalar(@{$positions}); $termdata->{$term} = $term_freq; } } return $termdata; }
|
|---|