my %hash; $whitespace=" \n\t"; $punctuation=".,!?"; $non_delimiters="[^$whitespace$punctuation]"; while () { push @{$hash{$1}}, $2 while m/($non_delimiters+)\s+going\s+($non_delimiters+)/g; } use Data::Dumper; print Dumper \%hash; __DATA__ I am going home. I am going to bed. What's going on? #### $VAR1 = { 'What\'s' => [ 'on' ], 'am' => [ 'home', 'to' ] };