>>>FILE-EN>>>
The cat sees the dog
The rat is in the cat
The cat runs
>>>>>FILE-RU>>>>>>
Koshka vidit sobaku
Krisa v koshke
Koshka bezhit
####
#!/usr/bin/perl
use strict;
use utf8;
use warnings;
use Data::Dumper;
open ENGLISH, "corpus.e" or die $!;
open FOREIGN, "corpus.f" or die $!;
my @sents_en; my @sents_f;
while (){
chomp;
push @sents_en, $_;
}
while (){
chomp;
push @sents_f, $_;
}
my %uniform;
my $k;#index of english/foreign sentence
for ($k = 0; $k <= $#sents_en; $k++){
my @words_en; my @words_f;
@words_en = map { split / / } $sents_en[$k];
@words_f = map { split / / } $sents_f[$k];
my $j;
for ($j = 0; $j <= $#words_en; $j++ ){
my $i;
my %seen;
for ($i = 0; $i <= $#words_f; $i++){
#$seen{$words_f[$i]}++; #TRY TO COUNT UNIQUE WORDS
if ( defined( $uniform{ $words_en[$j] } ) ) { # and !$seen{$words_f[$i]}) ) {
$uniform{ $words_en[$j] } ++;
}
else {
$uniform{ $words_en[$j]} = 1;
}
}
}
}
print Dumper \%uniform;
####
$VAR1 = {
'the' => 6,
'rat' => 3,
'is' => 3,
'cat' => 8,
'dog' => 3,
'in' => 3,
'runs' => 2,
'sees' => 3,
'The' => 8
};