use strict; use warnings; use feature 'say'; my $sentence = "this is the text to play with"; my $ngramWindow_MIN = 2; my $ngramWindow_MAX = 3; my $word = qr/(\b\S+)(?:\s|$)/; # "word" is $1, rather my $ngram = join '', $word x $ngramWindow_MIN, qr/(?:$word)?/ x ( $ngramWindow_MAX - $ngramWindow_MIN ); my $re = qr/$ngram(?{ say "@{^CAPTURE}" # or do anything with @words i.e. @{^CAPTURE} })(*F)/; $sentence =~ /$re/g;