(use '[clojure.contrib.str-utils :only (re-split str-join)]) (def test-sentence "Perl is a high-level, general-purpose, interpreted, dynamic programming language.") (defn uniq-phrases "Find unique phrases of the given lengths (def 2-4 words; max may be omitted)" ([sentence] (uniq-phrases sentence 2 4)) ([sentence len] (uniq-phrases sentence len len)) ([sentence min-len max-len] (let [words (re-split #"\s+" sentence) pairs (set (mapcat #(partition %1 1 words) (range min-len (inc max-len))))] (map #(str-join " " %) pairs)))) (defn print-phrases [p] (println (str-join "\n" (sort p)))) (print-phrases (uniq-phrases test-sentence 2 9)) ; user=> (time (print-phrases (uniq-phrases test-sentence 4 4))) ; Perl is a high-level, ; a high-level, general-purpose, interpreted, ; general-purpose, interpreted, dynamic programming ; high-level, general-purpose, interpreted, dynamic ; interpreted, dynamic programming language. ; is a high-level, general-purpose, ; "Elapsed time: 1.227 msecs"