while (<>) { chomp; my $offset = 0; my $limit = length(); while (( my $found = index( $_, $graph, $offset )) >= 0 ) { my $bgn = ( $found - $num > 0 ) ? $found - $num : 0; my $end = ( $found + $num +1 < $limit ) ? $found + $num +1 : $limit; push @{$graph_contexts{substr( $_, $bgn, $end - $bgn )}}, $_; $offset = $found + 1; } } #### $sublen = $num * 2 + 1; while () { chomp; for my $ofs ( 0 .. length()-$sublen ) { my $ngram = substr( $_, $ofs, $sublen ); next unless $ngram =~ /^.{$num}$graph/; # store this ngram to your hash } }