use strict; use warnings; my %mycorpus = ( a => "date:#20180101# comment:#d1 d2 d3 d4 d5 d6#", b => "date:#20180101# comment:#b1 b2 b3 b4 b5 b6 b7# comment:#c1 c2 c3 c4 c5 c6#", c => "date:#20180101# comment:#d1 d2 d3 d4 d5 d6#", ); my %counts; foreach my $filename ( sort keys %mycorpus ) { my $date; my $dataset = ''; my $word = ''; while ( $mycorpus{$filename} =~ /date:#(\d+)#/g ) { $date = $1; } while ( $mycorpus{$filename} =~ /comment:#(.*?)#/g ) { $dataset .= "$1 "; } while ( $dataset =~ m/(\w+) \s (?= (\w+) \s (\w+) \s (\w+) \s (\w+) )/gx ) { $word = "$1 $2 $3 $4 $5"; $counts{$date}{$word}++; } } use Data::Dumper; print Dumper \%counts;