use strict; use warnings; use Data::Dump 'dd'; my $sentence = "this is the text to play with"; my $ngramWindow_MIN = 2; my $ngramWindow_MAX = 3; my $word = qr/(?\b\S+)(?:\s|$)/; my $ngram = join '', $word x $ngramWindow_MIN, qr/(?:$word)?/ x ( $ngramWindow_MAX - $ngramWindow_MIN ); my $re = qr/$ngram(?{ dd \@{^CAPTURE}; dd $-{word}; dd ${^CAPTURE_ALL}{word}; print "\n"; })(*F)/; $sentence =~ /$re/g; __END__ ["this", "is", "the"] ["this", "is", "the"] "this" ["this", "is"] ["this", "is", undef] "this" ["is", "the", "text"] ["is", "the", "text"] "is" ["is", "the"] ["is", "the", undef] "is" ["the", "text", "to"] ["the", "text", "to"] "the" ["the", "text"] ["the", "text", undef] "the" ["text", "to", "play"] ["text", "to", "play"] "text" ["text", "to"] ["text", "to", undef] "text" ["to", "play", "with"] ["to", "play", "with"] "to" ["to", "play"] ["to", "play", undef] "to" ["play", "with"] ["play", "with", undef] "play"