use feature ':5.10'; use strict; use List::MoreUtils qw( natatime ); my $test_sentence = "Perl is a high-level, general-purpose, interpreted, dynamic pro +gramming language."; sub uniq_phrases { my $sentence = shift; my ( $min, $max ); given ( scalar @_ ) { when (2) { ( $min, $max ) = @_; } when (1) { ( $min, $max ) = ($_[0], $_[0]); } default { ( $min, $max ) = ( 2, 4 ); }; } my @words = split( /\s+/, $sentence ); my @pairs; for my $size ( $min .. $max ) { my %seen; for my $window ( 0 .. ( $#words - $size ) ) { my $it = natatime $size, @words[ $window .. $#words ]; while ( my @p = $it->() ) { next if @p != $size; my $p = join( " ", @p ); next if $seen{$p}++; push @pairs, $p; } } } return wantarray ? @pairs : \@pairs; } say join( "\n", sort { $a cmp $b } uniq_phrases( $test_sentence, 4 ) ) +; __END__
Or . . .
(use '[clojure.contrib.str-utils :only (re-split str-join)]) (def test-sentence "Perl is a high-level, general-purpose, interpreted, dynamic program +ming language.") (defn uniq-phrases "Find unique phrases of the given lengths (def 2-4 words; max may be + omitted)" ([sentence] (uniq-phrases sentence 2 4)) ([sentence len] (uniq-phrases sentence len len)) ([sentence min-len max-len] (let [words (re-split #"\s+" sentence) pairs (set (mapcat #(partition %1 1 words) (range min-len (i +nc max-len))))] (map #(str-join " " %) pairs)))) (defn print-phrases [p] (println (str-join "\n" (sort p)))) (print-phrases (uniq-phrases test-sentence 2 9)) ; user=> (time (print-phrases (uniq-phrases test-sentence 4 4))) ; Perl is a high-level, ; a high-level, general-purpose, interpreted, ; general-purpose, interpreted, dynamic programming ; high-level, general-purpose, interpreted, dynamic ; interpreted, dynamic programming language. ; is a high-level, general-purpose, ; "Elapsed time: 1.227 msecs"

The cake is a lie.
The cake is a lie.
The cake is a lie.


In reply to Re: extract phrases of n-words length by Fletch
in thread extract phrases of n-words length by arun_kom

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.