#! perl -slw use strict; use Time::HiRes qw[ time ]; sub MI { my( $string_es, $string_en, $hash_es, $hash_en ) = @_; my $n_array_es = $hash_es->{ $string_es } =~ tr[ ][ ]; ++$n_array_es; my $n_array_en = $hash_en->{ $string_en } =~ tr[ ][ ]; ++$n_array_en; my $prob_es = ( $n_array_es ) / 6939873; my $prob_en = ( $n_array_en ) / 6939873; ## Notice I am passing references to the hash values here! my $intersection = Intersection( \$hash_es->{ $string_es }, \$hash_en->{ $string_en } ); my $prob_es_en= $intersection / 6939873; $prob_es_en = ( $prob_es_en + ( $prob_es * $prob_en * 0.1 ) ) / 1.1; my $mi = $prob_es_en * log( $prob_es_en / ( $prob_es * $prob_en) ); return $mi; } sub Intersection { my( $refA, $refB ) = @_; // And dereferencing them here!! my %counts; ++$counts{ $1 } while $$refA =~ m[(\S+)]g; my $intersects = 0; exists $counts{ $1 } and ++$intersects while $$refB =~ m[(\S+)]g; return $intersects; } our $N //= 1e4; my $hypo = 'fred'; my $text = 'bill'; my %hash_es; $hash_es{ $hypo } = join ' ', 1 .. $N; my %hash_en; $hash_en{ $text } = join ' ', 1 .. $N; my $start = time; my $MI_T = MI( $hypo, $text, \%hash_es, \%hash_en ); printf "Took: %f seconds\n", time() - $start; __END__ C:\test>888162 Took: 0.103639 seconds C:\test>888162 -N=1e6 Took: 4.962000 seconds C:\test>888162 -N=3e6 Took: 15.616000 seconds