$sublen = $num * 2 + 1; while () { chomp; for my $ofs ( 0 .. length()-$sublen ) { my $ngram = substr( $_, $ofs, $sublen ); next unless $ngram =~ /^.{$num}$graph/; # store this ngram to your hash } }