# LeadingDistinct.pm 15jun15waw
# for each token in a set of tokens, find minimum leading
# substring that uniquely distinguishes the token
# from all other tokens in the set.
# works with Perl 5.8.9
package LeadingDistinct;
use warnings;
use strict;
use List::Util qw(max);
use List::MoreUtils qw(uniq);
sub extract {
my (@tokens,
) = @_;
@tokens = ('', uniq(sort @tokens), '');
return
map { $tokens[$_], distinguish(@tokens[ $_-1, $_, $_+1 ]) }
1 .. $#tokens-1
;
}
sub distinguish {
my ($before,
$word,
$after,
) = @_; # contiguous sequence of three alpha-sorted words
return substr $word, 0, 1 + max diff($word, $before), diff($word, $after);
}
sub diff {
my ($w1,
$w2,
) = @_;
my $x = $w1 ^ $w2;
$x =~ s{ [^\x00] .* \z }{}xms;
return length $x;
}
1;
####
# LeadingDistinct.t test LeadingDistinct.pm 15jun15waw
use warnings;
use strict;
use Test::More
# tests => ?? + 1 # Test::NoWarnings adds 1 test
'no_plan'
;
use Test::NoWarnings;
# normal tokens (alpha and underscore).
use constant TOKENS => qw/ab abcd abcdef ghi ghij ghijk
lnm lnmopq lnmopqrst uvw xyz dcba edcba xyzzy xyzzz
pqrst pqrtu
report_time report_day reset read
/;
use constant TOKENS_AND_UNIQUE => (
ab => 'ab',
abcd => 'abcd',
abcdef => 'abcde',
dcba => 'd',
edcba => 'e',
ghi => 'ghi',
ghij => 'ghij',
ghijk => 'ghijk',
lnm => 'lnm',
lnmopq => 'lnmopq',
lnmopqrst => 'lnmopqr',
pqrst => 'pqrs',
pqrtu => 'pqrt',
read => 'rea',
report_day => 'report_d',
report_time => 'report_t',
reset => 'res',
uvw => 'u',
xyz => 'xyz',
xyzzy => 'xyzzy',
xyzzz => 'xyzzz',
);
# some tokens to test handling of metacharacters.
use constant WEIRD_TOKENS => qw(foo? foo?* foo+++ foo?*+ foo???);
use constant WEIRD_TOKENS_AND_UNIQUE => (
'foo+++' => 'foo+',
'foo?' => 'foo?',
'foo?*' => 'foo?*',
'foo?*+' => 'foo?*+',
'foo???' => 'foo??',
);
BEGIN { use_ok 'LeadingDistinct'; }
is_deeply { LeadingDistinct::extract(TOKENS) }, { TOKENS_AND_UNIQUE },
"normal chars: alphas & underscores";
is_deeply { LeadingDistinct::extract(TOKENS, WEIRD_TOKENS) },
{ TOKENS_AND_UNIQUE, WEIRD_TOKENS_AND_UNIQUE },
"normal chars and some metacharacters";
####
c:\@Work\Perl\monks\gator456>perl LeadingDistinct.t
ok 1 - use LeadingDistinct;
ok 2 - normal chars: alphas & underscores
ok 3 - normal chars and some metacharacters
ok 4 - no warnings
1..4
####
# extract_min_uniq_2.pl 15jun15waw
use warnings;
use strict;
use LeadingDistinct;
use Data::Dump qw(dd);
use constant DEBUGGING => 0;
use constant {
PR_DB1 => 1 && DEBUGGING,
PR_DB2 => 1 && DEBUGGING,
PR_DB3 => 1 && DEBUGGING,
};
my @tokens = qw/report_time report_day reset read/;
my %min_distinct = LeadingDistinct::extract(@tokens);
dd \%min_distinct if PR_DB1;
for my $k (keys %min_distinct) {
$min_distinct{$k} = max_optional_rx($k, $min_distinct{$k});
}
dd \%min_distinct if PR_DB2;
my ($find) =
map qr{ (?) {
next unless m{ \A ($find) }xms;
print "$1 \n";
}
# subroutines ######################################################
sub max_optional_rx {
my ($whole, # full word
$unique, # distinctive characters at start of word
) = @_;
my @tail = split //, substr $whole, length $unique;
my $opt_tail = optional_tail(@tail);
return qr{ \Q$unique\E $opt_tail }xms;
}
sub optional_tail {
my $rx = '';
return $rx unless @_;
$rx = quotemeta shift @_;
for my $c (@_) {
$c = quotemeta $c;
$rx .= " (?: $c)?";
}
$rx = qr{ $rx }xms;
return "$rx?";
}
__DATA__
report_t
14:09:33 PDT
report_d
Fri Jun 12 2015
report (should not show up)
res
Resetting the time
report_time
00:00:00
rea foo.bar
Info: reading file foo.bar
####
c:\@Work\Perl\monks\gator456>perl extract_min_uniq_2.pl
report_t
report_d
res
report_time
rea
####
sub diff {
($_[0] ^ $_[1]) =~ m{ \A \x00* }xms;
return $+[0];
}