>>>FILE-EN>>> The cat sees the dog The rat is in the cat The cat runs >>>>>FILE-RU>>>>>> Koshka vidit sobaku Krisa v koshke Koshka bezhit #### #!/usr/bin/perl use strict; use utf8; use warnings; use Data::Dumper; open ENGLISH, "corpus.e" or die $!; open FOREIGN, "corpus.f" or die $!; my @sents_en; my @sents_f; while (){ chomp; push @sents_en, $_; } while (){ chomp; push @sents_f, $_; } my %uniform; my $k;#index of english/foreign sentence for ($k = 0; $k <= $#sents_en; $k++){ my @words_en; my @words_f; @words_en = map { split / / } $sents_en[$k]; @words_f = map { split / / } $sents_f[$k]; my $j; for ($j = 0; $j <= $#words_en; $j++ ){ my $i; my %seen; for ($i = 0; $i <= $#words_f; $i++){ #$seen{$words_f[$i]}++; #TRY TO COUNT UNIQUE WORDS if ( defined( $uniform{ $words_en[$j] } ) ) { # and !$seen{$words_f[$i]}) ) { $uniform{ $words_en[$j] } ++; } else { $uniform{ $words_en[$j]} = 1; } } } } print Dumper \%uniform; #### $VAR1 = { 'the' => 6, 'rat' => 3, 'is' => 3, 'cat' => 8, 'dog' => 3, 'in' => 3, 'runs' => 2, 'sees' => 3, 'The' => 8 };