#!/usr/bin/perl
use strict; # https://perlmonks.org/?node_id=11129602
use warnings;
use Algorithm::Diff qw(traverse_sequences);
use Tk;
use Tk::ROText;
my $file = 'KJV_fixed.csv'; # download from 11129602 (see above)
my $verses = do { local (@ARGV, $/) = $file; <> };
my $theverse = '19,14,1';
my $thebody = '';
my $font = 'times 12';
my $bestcount = 20;
my $mw = MainWindow->new;
$mw->geometry( '+0+200' );
$mw->title('String Comparison & Equivalence Challenge');
$mw->Button(-text => 'Exit', -command => sub {$mw->destroy},
)->pack(-side => 'bottom', -fill => 'x');
my $ro = $mw->Scrolled(ROText => -scrollbars => 'osoe',
-height => 20, -width => 100,
-font =>$font, -spacing3 => 5, -wrap => 'word',
)->pack(-side => 'bottom', -fill => 'both', -expand => 1);
my $button = $mw->Button(-text => 'Find Closest', -command => \&closes
+t,
)->pack(-side => 'bottom', -fill => 'x');
my $thetext = $mw->Scrolled(ROText => -scrollbars => 'osoe',
-height => 4, -wrap => 'word',
-font =>$font,
)->pack(-side => 'bottom', -fill => 'x', -expand => 0);
$mw->Label(-text => 'Verse', -fg => 'blue',
)->pack(-side => 'left', -fill => 'x');
my $entry = $mw->Entry( -textvariable => \$theverse,
)->pack(-side => 'left', -fill => 'x');
$entry->focus;
$mw->Button(-text => 'Find Verse', -command => \&findverse,
)->pack(-side => 'left', -fill => 'x');
$entry->bind('<Return>' => \&findverse);
$mw->Entry(-textvariable => \$bestcount, -width => 5,
)->pack(-side => 'right');
$mw->Label(-text => 'How Many', -fg => 'blue',
)->pack(-side => 'right');
MainLoop;
exit;
sub findverse
{
$thetext->delete('1.0' => 'end');
$thebody = '';
if( $verses =~ /^$theverse,(.*)/m )
{
$thebody = $1;
$thetext->insert(end => $thebody);
closest();
}
else { $thetext->insert(end => '*** Verse not found ***'); }
}
sub closest
{
$ro->delete('1.0' => 'end');
$ro->insert(end => "\nSearching ..." );
$mw->update;
my @matches;
while( $verses =~ /^((\d+,\d+,\d+),(.*))/gm )
{
$2 eq $theverse and next;
my $whole = $1;
my $text = $3;
my @sortfrom = sort my @from = $thebody =~ /\w+/g;
my @sortto = sort my @to = $text =~ /\w+/g;
my $total = my $good = 0;
traverse_sequences( \@from, \@to,
my $actions = {
MATCH => sub {$total += 10; $good += 10},
DISCARD_A => sub {$total++},
DISCARD_B => sub {$total++},
} );
traverse_sequences( \@sortfrom, \@sortto, $actions );
push @matches, sprintf "%06d %s", $good / ++$total * 1e6, $whole;
}
$ro->insert(end => ' Sorting ...' );
$mw->update;
@matches = reverse sort @matches;
$ro->delete('1.0' => 'end');
$ro->insert(end => join "", map s/^(\d\d)\d*/$1%/r . "\n",
@matches[0 .. $bestcount - 1] );
}
It takes about 2.6 seconds to go through the entire file searching for matches on my machine.
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.