sub getCorpus {				
my %corpus;					
my $text;					
 opendir (DR, "$_[0]") || die ("Cannot open directory");  
my @files = readdir(DR);
 for my $i (0 .. $#files) {			
 if ($files[$i] =~ /\.txt/ && $files[$i] !~ /\._/) {	
 {
local $/ = undef ;							
 open(FILE, "$_[0]/$files[$i]") or die ("file not found");
 $text = <FILE> ;	
}
 $files[$i] =~ s{\.txt}{};
 $corpus{$files[$i]} = $text; 
}
}
return %corpus;					#Returns a hash called corpus
}

my %mycorpus = (
    a => "date:#20180101# title:#cat dog# text:#sheep sheep sheep sheep#" ,

    b => "date:#20180101# title:#cow puppy# text:#pig pig pig#",
);

my %counts;
foreach my $filename (sort keys %mycorpus) {
        my $date;
        my $dataset = '';

				#get date
        while ($mycorpus{$filename} =~ /date:#(\d{8})#/g){ 
            $date = $1; 
        }
		
				#get part 1 of dataset
         while ($mycorpus{$filename} =~ /title:#(.*?)#/g){
            $dataset = $1;
					#Actions usually performed here which clean the titles
												}	
				#get part 2 of dataset																
		  while ($mycorpus{$filename} =~ /text:#(.*?)#/g){
            $dataset = $1;
			#Actions usually performed here which clean the text
												}
			my @words = split /\W+/, $dataset;
			
			foreach my $word (@words){
				
				if ($word =~ /(\w+)/gi){
					$word =~ tr/A-Z/a-z/;
					$counts{$date}{$word}++;
					$word_types{$word}++;
					$overallcounts{$date}++;
				}
			}   
		}

 use Data::Dumper;
 print Dumper \%counts;

##</code><code>##

sub getCorpus {				
my %corpus;					
my $text;					
 opendir (DR, "$_[0]") || die ("Cannot open directory");  
my @files = readdir(DR);
 for my $i (0 .. $#files) {			
 if ($files[$i] =~ /\.txt/ && $files[$i] !~ /\._/) {	
 {
local $/ = undef ;							
 open(FILE, "$_[0]/$files[$i]") or die ("file not found");
 $text = <FILE> ;	
}
 $files[$i] =~ s{\.txt}{};
 $corpus{$files[$i]} = $text; 
}
}
return %corpus;					#Returns a hash called corpus
}

my %mycorpus = (
    a => "date:#20180101# title:#cat dog# text:#sheep sheep sheep sheep#" ,

    b => "date:#20180101# comment:#woof woof#",

    c => "date:#20180101# title:#cow puppy# text:#pig pig pig#",
);



my %counts;
foreach my $filename (sort keys %mycorpus) {
        my $date;
        my $dataset = '';

				
        while ($mycorpus{$filename} =~ /date:#(\d{8})#/g){ 
            $date = $1; 
        }
            
         while ($mycorpus{$filename} =~ /title:#(.*?)#/g){
            $dataset = $1;
					#Actions usually performed here which clean the titles (i.e. substituting certain characters)
												}	
												
		 while ($mycorpus{$filename} =~ /text:#(.*?)#/g){
            $dataset = $1;
			#Actions usually performed here which clean the text
					
												}
																				
		  while ($mycorpus{$filename} =~ /comment:#(.*?)#/g){
				$dataset = $1;
					#Actions usually performed here which clean the comments	
												 }	
			
			my @words = split /\W+/, $dataset;
			
			foreach my $word (@words){
				
				if ($word =~ /(\w+)/gi){
					$word =~ tr/A-Z/a-z/;
					$counts{$date}{$word}++;
					$word_types{$word}++;
					$overallcounts{$date}++;
				}
			}   
		}

 use Data::Dumper;
 print Dumper \%counts;
 
##</code><code>##

$VAR1 = {
          '20180101' => {
                          'puppy' => 1
                          'dog' => 1
                          'cat' => 1
                          'cow' => 1
                          'sheep' => 4,
                          'woof' => 2,
                          'pig' => 3
                        }
        };

##</code><code>##

$VAR1 = {
          '20180101' => {
                          'sheep' => 4,
                          'woof' => 2,
                          'pig' => 3
                        }
        };