use feature ':5.10'; use strict; use List::MoreUtils qw( natatime ); my $test_sentence = "Perl is a high-level, general-purpose, interpreted, dynamic programming language."; sub uniq_phrases { my $sentence = shift; my ( $min, $max ); given ( scalar @_ ) { when (2) { ( $min, $max ) = @_; } when (1) { ( $min, $max ) = ($_[0], $_[0]); } default { ( $min, $max ) = ( 2, 4 ); }; } my @words = split( /\s+/, $sentence ); my @pairs; for my $size ( $min .. $max ) { my %seen; for my $window ( 0 .. ( $#words - $size ) ) { my $it = natatime $size, @words[ $window .. $#words ]; while ( my @p = $it->() ) { next if @p != $size; my $p = join( " ", @p ); next if $seen{$p}++; push @pairs, $p; } } } return wantarray ? @pairs : \@pairs; } say join( "\n", sort { $a cmp $b } uniq_phrases( $test_sentence, 4 ) ); __END__ #### (use '[clojure.contrib.str-utils :only (re-split str-join)]) (def test-sentence "Perl is a high-level, general-purpose, interpreted, dynamic programming language.") (defn uniq-phrases "Find unique phrases of the given lengths (def 2-4 words; max may be omitted)" ([sentence] (uniq-phrases sentence 2 4)) ([sentence len] (uniq-phrases sentence len len)) ([sentence min-len max-len] (let [words (re-split #"\s+" sentence) pairs (set (mapcat #(partition %1 1 words) (range min-len (inc max-len))))] (map #(str-join " " %) pairs)))) (defn print-phrases [p] (println (str-join "\n" (sort p)))) (print-phrases (uniq-phrases test-sentence 2 9)) ; user=> (time (print-phrases (uniq-phrases test-sentence 4 4))) ; Perl is a high-level, ; a high-level, general-purpose, interpreted, ; general-purpose, interpreted, dynamic programming ; high-level, general-purpose, interpreted, dynamic ; interpreted, dynamic programming language. ; is a high-level, general-purpose, ; "Elapsed time: 1.227 msecs"