c:\@Work\Perl\monks>perl -wMstrict -le "my $sentence = 'this is the text to play with'; ;; my $ngramWindow_MIN = 1; my $ngramWindow_MAX = 3; ;; my @word_ngrams; ;; for my $ngramWindow ($ngramWindow_MIN .. $ngramWindow_MAX) { my $m = $ngramWindow - 1; my $ngram = qr{ \b [[:alpha:]]+ (?: \s+ [[:alpha:]]+){$m} \b }xms; ;; while ($sentence =~ m{ (?= ($ngram)) }xmsg) { push @word_ngrams, [ $1, $-[1] ]; } } ;; for my $ar_wng (@word_ngrams) { my ($word_ngram, $sentence_offset) = @$ar_wng; print qq{'$word_ngram' at sentence offset $sentence_offset}; } " 'this' at sentence offset 0 'is' at sentence offset 5 'the' at sentence offset 8 'text' at sentence offset 12 'to' at sentence offset 17 'play' at sentence offset 20 'with' at sentence offset 25 'this is' at sentence offset 0 'is the' at sentence offset 5 'the text' at sentence offset 8 'text to' at sentence offset 12 'to play' at sentence offset 17 'play with' at sentence offset 20 'this is the' at sentence offset 0 'is the text' at sentence offset 5 'the text to' at sentence offset 8 'text to play' at sentence offset 12 'to play with' at sentence offset 17 #### c:\@Work\Perl\monks>perl -wMstrict -le "my $sentence = 'this is the text to play with'; ;; my $ngramWindow_MIN = 1; my $ngramWindow_MAX = 3; ;; for my $ngramWindow ($ngramWindow_MIN .. $ngramWindow_MAX) { print qq{$ngramWindow-word ngrams of '$sentence'}; my $m = $ngramWindow - 1; my $ngram = qr{ \b [[:alpha:]]+ (?: \s+ [[:alpha:]]+){$m} \b }xms; ;; my @word_ngrams = $sentence =~ m{ (?= ($ngram)) }xmsg; ;; for my $word_ngram (@word_ngrams) { print qq{ '$word_ngram'}; } } " 1-word ngrams of 'this is the text to play with' 'this' 'is' 'the' 'text' 'to' 'play' 'with' 2-word ngrams of 'this is the text to play with' 'this is' 'is the' 'the text' 'text to' 'to play' 'play with' 3-word ngrams of 'this is the text to play with' 'this is the' 'is the text' 'the text to' 'text to play' 'to play with'