#!/usr/bin/perl
use strict; # https://perlmonks.org/?node_id=11129602
use warnings;
use Algorithm::Diff qw(traverse_sequences);
use Tk;
use Tk::ROText;
my $file = 'KJV_fixed.csv'; # download from 11129602 (see above)
my $verses = do { local (@ARGV, $/) = $file; <> };
my $theverse = '19,14,1';
my $thebody = '';
my $font = 'times 12';
my $bestcount = 20;
my $mw = MainWindow->new;
$mw->geometry( '+0+200' );
$mw->title('String Comparison & Equivalence Challenge');
$mw->Button(-text => 'Exit', -command => sub {$mw->destroy},
)->pack(-side => 'bottom', -fill => 'x');
my $ro = $mw->Scrolled(ROText => -scrollbars => 'osoe',
-height => 20, -width => 100,
-font =>$font, -spacing3 => 5, -wrap => 'word',
)->pack(-side => 'bottom', -fill => 'both', -expand => 1);
my $button = $mw->Button(-text => 'Find Closest', -command => \&closes
+t,
)->pack(-side => 'bottom', -fill => 'x');
my $thetext = $mw->Scrolled(ROText => -scrollbars => 'osoe',
-height => 4, -wrap => 'word',
-font =>$font,
)->pack(-side => 'bottom', -fill => 'x', -expand => 0);
$mw->Label(-text => 'Verse', -fg => 'blue',
)->pack(-side => 'left', -fill => 'x');
my $entry = $mw->Entry( -textvariable => \$theverse,
)->pack(-side => 'left', -fill => 'x');
$entry->focus;
$mw->Button(-text => 'Find Verse', -command => \&findverse,
)->pack(-side => 'left', -fill => 'x');
$entry->bind('<Return>' => \&findverse);
$mw->Entry(-textvariable => \$bestcount, -width => 5,
)->pack(-side => 'right');
$mw->Label(-text => 'How Many', -fg => 'blue',
)->pack(-side => 'right');
MainLoop;
exit;
sub findverse
{
$thetext->delete('1.0' => 'end');
$thebody = '';
if( $verses =~ /^$theverse,(.*)/m )
{
$thebody = $1;
$thetext->insert(end => $thebody);
closest();
}
else { $thetext->insert(end => '*** Verse not found ***'); }
}
sub closest
{
$ro->delete('1.0' => 'end');
$ro->insert(end => "\nSearching ..." );
$mw->update;
my @matches;
while( $verses =~ /^((\d+,\d+,\d+),(.*))/gm )
{
$2 eq $theverse and next;
my $whole = $1;
my $text = $3;
my @sortfrom = sort my @from = $thebody =~ /\w+/g;
my @sortto = sort my @to = $text =~ /\w+/g;
my $total = my $good = 0;
traverse_sequences( \@from, \@to,
my $actions = {
MATCH => sub {$total += 10; $good += 10},
DISCARD_A => sub {$total++},
DISCARD_B => sub {$total++},
} );
traverse_sequences( \@sortfrom, \@sortto, $actions );
push @matches, sprintf "%06d %s", $good / ++$total * 1e6, $whole;
}
$ro->insert(end => ' Sorting ...' );
$mw->update;
@matches = reverse sort @matches;
$ro->delete('1.0' => 'end');
$ro->insert(end => join "", map s/^(\d\d)\d*/$1%/r . "\n",
@matches[0 .. $bestcount - 1] );
}
It takes about 2.6 seconds to go through the entire file searching for matches on my machine.