#! perl -slw
use strict;
use Time::HiRes qw[ time ];
sub MI {
my( $string_es, $string_en, $hash_es, $hash_en ) = @_;
my $n_array_es = $hash_es->{ $string_es } =~ tr[ ][ ];
++$n_array_es;
my $n_array_en = $hash_en->{ $string_en } =~ tr[ ][ ];
++$n_array_en;
my $prob_es = ( $n_array_es ) / 6939873;
my $prob_en = ( $n_array_en ) / 6939873;
my $intersection = Intersection(
\$hash_es->{ $string_es }, \$hash_en->{ $string_en }
);
my $prob_es_en= $intersection / 6939873;
$prob_es_en = ( $prob_es_en + ( $prob_es * $prob_en * 0.1 ) ) / 1.1;
my $mi = $prob_es_en * log( $prob_es_en / ( $prob_es * $prob_en) );
return $mi;
}
sub Intersection {
my( $refA, $refB ) = @_;
my $bits = '';
vec( $bits, $1, 1 ) = 1 while $$refA =~ m[(\S+)]g;
my $intersects = 0;
vec( $bits, $1, 1 ) && ++$intersects while $$refB =~ m[(\S+)]g;
return $intersects;
}
our $N //= 1e4;
my $hypo = 'fred';
my $text = 'bill';
my %hash_es; $hash_es{ $hypo } = join ' ', 1 .. $N;
my %hash_en; $hash_en{ $text } = join ' ', 1 .. $N;
my $start = time;
my $MI_T = MI( $hypo, $text, \%hash_es, \%hash_en );
printf "Took: %f seconds\n", time() - $start;
__END__
C:\test>888162 -N=3e6
Took: 5.966000 seconds
####
my $n_es = unpack '%32b*', $hash_es->{ $string_es };
my $n_en = unpack '%32b*', $hash_en->{ $string_en };
####
my $intersections = unpack '%32b*',
$hash_es->{ $string_es } & $hash_es->{ $string_es };
####
#! perl -slw
use strict;
use Time::HiRes qw[ time ];
sub MI {
my( $string_es, $string_en, $hash_es, $hash_en ) = @_;
my $n_es = unpack '%32b*', $hash_es->{ $string_es };
my $n_en = unpack '%32b*', $hash_en->{ $string_en };
my $prob_es = ( $n_es ) / 6939873;
my $prob_en = ( $n_en ) / 6939873;
my $intersection = unpack '%32b*',
$hash_es->{ $string_es } & $hash_es->{ $string_es };
my $prob_es_en= $intersection / 6939873;
$prob_es_en = ( $prob_es_en + ( $prob_es * $prob_en * 0.1 ) ) / 1.1;
my $mi = $prob_es_en * log( $prob_es_en / ( $prob_es * $prob_en) );
return $mi;
}
our $N //= 1e4;
my $hypo = 'fred';
my $text = 'bill';
my %hash_es; $hash_es{ $hypo } = '';
vec( $hash_es{ $hypo }, $_, 1 ) = 1 for 1 .. $N;
my %hash_en; $hash_en{ $text } = '';
vec( $hash_en{ $text }, $_, 1 ) = 1 for 1 .. $N;
my $start = time;
my $MI_T = MI( $hypo, $text, \%hash_es, \%hash_en );
printf "Took: %f seconds\n", time() - $start;
__END__
C:\test>888162 -N=3e6
Took: 0.001901 seconds