#! perl -slw use strict; use Time::HiRes qw[ time ]; sub MI { my( $string_es, $string_en, $hash_es, $hash_en ) = @_; my @array_es = split ' ', $hash_es->{ $string_es }; my @array_en = split ' ', $hash_en->{ $string_en }; my $prob_es = ( @array_es ) / 6939873; my $prob_en = ( @array_en ) / 6939873; my $intersection = Intersection( \@array_es, \@array_en ); my $prob_es_en= $intersection / 6939873; $prob_es_en = ($prob_es_en + ($prob_es * $prob_en * 0.1 ) ) / 1.1; my $mi = $prob_es_en * log( $prob_es_en / ( $prob_es * $prob_en) ); return $mi; } sub Intersection { my( $refA, $refB ) = @_; my %counts; ++$counts{ $_ } for @$refA; ++$counts{ $_ } for @$refB; my $intersects = 0; $counts{ $_ } > 1 and ++$intersects for keys %counts; return $intersects; } our $N //= 1e4; my $hypo = 'fred'; my $text = 'bill'; my %hash_es; $hash_es{ $hypo } = join ' ', 1 .. $N; my %hash_en; $hash_en{ $text } = join ' ', 1 .. $N; my $start = time; my $MI_T = MI( $hypo, $text, \%hash_es, \%hash_en ); printf "Took: %f seconds\n", time() - $start; __END__ C:\test>888162 Took: 0.046187 seconds C:\test>888162 -N=1e5 Took: 0.677000 seconds C:\test>888162 -N=1e6 Took: 7.680000 seconds C:\test>888162 -N=2e6 Took: 15.748000 seconds C:\test>888162 -N=3e6 Took: 25.244000 seconds