# LeadingDistinct.pm 15jun15waw # for each token in a set of tokens, find minimum leading # substring that uniquely distinguishes the token # from all other tokens in the set. # works with Perl 5.8.9 package LeadingDistinct; use warnings; use strict; use List::Util qw(max); use List::MoreUtils qw(uniq); sub extract { my (@tokens, ) = @_; @tokens = ('', uniq(sort @tokens), ''); return map { $tokens[$_], distinguish(@tokens[ $_-1, $_, $_+1 ]) } 1 .. $#tokens-1 ; } sub distinguish { my ($before, $word, $after, ) = @_; # contiguous sequence of three alpha-sorted words return substr $word, 0, 1 + max diff($word, $before), diff($word, $after); } sub diff { my ($w1, $w2, ) = @_; my $x = $w1 ^ $w2; $x =~ s{ [^\x00] .* \z }{}xms; return length $x; } 1;