that outputs...use strict; use warnings; my $data = do { local $/; lc <DATA> }; my @words = $data =~ /\w+/g; { no warnings; sub cmpix { for (my $off = 0;; $off++) { my $cmp = $words[$a+$off] cmp $words[$b+$off]; return $cmp if $cmp; } } } my @ixs = sort cmpix 0..$#words; # replace this code with duplication detector (it # should be easy!): for my $ix (@ixs) { print substr(join(" ", @words[$ix..$#words]), 0, 50), "\n"; } __DATA__ ...
then finding duplicates is pretty obvious as they appear in consecutive entries in @ixs.$ perl ./t.pl 1519 2nd may 1519 a character in the discworld series of novels is b at clos luc france on 2nd may 1519 based largely on leonardo da vinci leonardo da vin character in the discworld series of novels is bas clos luc france on 2nd may 1519 da vinci died at clos luc france on 2nd may 1519 da vinci leonardo da vinci died at clos luc france died at clos luc france on 2nd may 1519 discworld series of novels is based largely on leo france on 2nd may 1519 in the discworld series of novels is based largely is based largely on leonardo da vinci leonardo da largely on leonardo da vinci leonardo da vinci die leonard of quirm a character in the discworld seri leonardo da vinci died at clos luc france on 2nd m leonardo da vinci leonardo da vinci died at clos l luc france on 2nd may 1519 may 1519 novels is based largely on leonardo da vinci leona of novels is based largely on leonardo da vinci le of quirm a character in the discworld series of no on 2nd may 1519 on leonardo da vinci leonardo da vinci died at clo quirm a character in the discworld series of novel series of novels is based largely on leonardo da v the discworld series of novels is based largely on vinci died at clos luc france on 2nd may 1519 vinci leonardo da vinci died at clos luc france on
In reply to Re: Finding recurring phrases
by salva
in thread Finding recurring phrases
by Anonymous Monk
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |