#! perl -slw
use strict;
use Time::HiRes qw[ time ];
sub MI {
my( $string_es, $string_en, $hash_es, $hash_en ) = @_;
my @array_es = split ' ', $hash_es->{ $string_es };
my @array_en = split ' ', $hash_en->{ $string_en };
my $prob_es = ( @array_es ) / 6939873;
my $prob_en = ( @array_en ) / 6939873;
my $intersection = Intersection( \@array_es, \@array_en );
my $prob_es_en= $intersection / 6939873;
$prob_es_en = ($prob_es_en + ($prob_es * $prob_en * 0.1 ) ) / 1.1;
my $mi = $prob_es_en * log( $prob_es_en / ( $prob_es * $prob_en) );
return $mi;
}
sub Intersection {
my( $refA, $refB ) = @_;
my %counts;
++$counts{ $_ } for @$refA;
++$counts{ $_ } for @$refB;
my $intersects = 0;
$counts{ $_ } > 1 and ++$intersects for keys %counts;
return $intersects;
}
our $N //= 1e4;
my $hypo = 'fred';
my $text = 'bill';
my %hash_es; $hash_es{ $hypo } = join ' ', 1 .. $N;
my %hash_en; $hash_en{ $text } = join ' ', 1 .. $N;
my $start = time;
my $MI_T = MI( $hypo, $text, \%hash_es, \%hash_en );
printf "Took: %f seconds\n", time() - $start;
__END__
C:\test>888162
Took: 0.046187 seconds
C:\test>888162 -N=1e5
Took: 0.677000 seconds
C:\test>888162 -N=1e6
Took: 7.680000 seconds
C:\test>888162 -N=2e6
Took: 15.748000 seconds
C:\test>888162 -N=3e6
Took: 25.244000 seconds
####
my @array_es = split ' ', $hash_es->{ $string_es };
my $prob_es = ( @array_es ) / 6939873;
####
my $n_array_es = $hash_es->{ $string_es } =~ tr[ ][ ];
++$n_array_es;
####
sub Intersection {
my( $refA, $refB ) = @_;
my %counts;
++$counts{ $1 } while $$refA =~ m[(\S+)]g;
my $intersects = 0;
exists $counts{ $1 } and ++$intersects while $$refB =~ m[(\S+)]g;
return $intersects;
}
####
#! perl -slw
use strict;
use Time::HiRes qw[ time ];
sub MI {
my( $string_es, $string_en, $hash_es, $hash_en ) = @_;
my $n_array_es = $hash_es->{ $string_es } =~ tr[ ][ ];
++$n_array_es;
my $n_array_en = $hash_en->{ $string_en } =~ tr[ ][ ];
++$n_array_en;
my $prob_es = ( $n_array_es ) / 6939873;
my $prob_en = ( $n_array_en ) / 6939873;
## Notice I am passing references to the hash values here!
my $intersection = Intersection(
\$hash_es->{ $string_es },
\$hash_en->{ $string_en }
);
my $prob_es_en= $intersection / 6939873;
$prob_es_en = ( $prob_es_en + ( $prob_es * $prob_en * 0.1 ) ) / 1.1;
my $mi = $prob_es_en * log( $prob_es_en / ( $prob_es * $prob_en) );
return $mi;
}
sub Intersection {
my( $refA, $refB ) = @_;
// And dereferencing them here!!
my %counts;
++$counts{ $1 } while $$refA =~ m[(\S+)]g;
my $intersects = 0;
exists $counts{ $1 } and ++$intersects while $$refB =~ m[(\S+)]g;
return $intersects;
}
our $N //= 1e4;
my $hypo = 'fred';
my $text = 'bill';
my %hash_es; $hash_es{ $hypo } = join ' ', 1 .. $N;
my %hash_en; $hash_en{ $text } = join ' ', 1 .. $N;
my $start = time;
my $MI_T = MI( $hypo, $text, \%hash_es, \%hash_en );
printf "Took: %f seconds\n", time() - $start;
__END__
C:\test>888162
Took: 0.103639 seconds
C:\test>888162 -N=1e6
Took: 4.962000 seconds
C:\test>888162 -N=3e6
Took: 15.616000 seconds