use strict; use warnings; package Fishers_Exact; require Exporter; { no warnings 'once'; *import = \&Exporter::import; } our @EXPORT_OK; our %EXPORT_TAGS = ( all => \@EXPORT_OK ); our $VERSION = 0.03; my %CFG; BEGIN { %CFG = ( VERBOSE => 0, PRECISION => 0, ); } BEGIN { push @EXPORT_OK, 'fishers_exact_2' } sub fishers_exact_2 { my ($a, $r1, $c1, $n, $precision) = @_; my ($b, $c, $d) = ($r1 - $a, $c1 - $a, $n - $r1 - $c1 + $a); fishers_exact($a, $b, $c, $d, $precision); } BEGIN { push @EXPORT_OK, 'fishers_exact' } sub fishers_exact { my ($a, $b, $c, $d, $precision) = @_; $precision = precision() unless defined $precision; my $epsilon = $precision * log 10; # $precision is the maximum adjustment to the log10 of the p-value # that we will bother to make; if the sum of the remaining terms in # the summation will result in a smaller adjustment, we neglect # them. In other words, if $p_i is the i-th partial sum for # computing the p-value (which is performed by the loop below), and # if $p is the value of the entire sum, then after computing $p_i we # will check whether # # log10( $p ) - log10( $p_i ) < $precision # # or equivalently, # # $p - $p_i < $p_i*( 10**$precision - 1 ) # # To ensure this, we can use the criterion # # $p - $p_i < $p_i*log( 10 )*$precision < $p_i*( 10**$precision - 1 ) # # ...which, for a small positive $precision, is only slightly more # stringent than the original criterion. (The second inequality # above follows from Taylor's theorem applied to 10**$precision, for # $precision > 0.) Hence we precompute $epsilon as log( 10 ) times # the $precision. my ($r1, $c1, $N) = ($a + $b, $a + $c, $a + $b + $c + $d); my $test = $a*$N - $r1*$c1; if ($test < 0) { if ($d < $a) { ($a, $b, $c, $d) = ($d, $c, $b, $a); } } else { if ($b < $c) { ($a, $b, $c, $d) = ($b, $a, $d, $c); } else { ($a, $b, $c, $d) = ($c, $d, $a, $b); } } # NOTE: $r1 and $c1 can't be computed any sooner because of # the normalization step above. ($r1, $c1) = ($a + $b, $a + $c); my $numerator = ln_fact( $r1 ) + ln_fact( $c + $d ) + ln_fact( $c1 ) + ln_fact( $b + $d ) - ln_fact( $N ); my $p_val = 0; # Don't use 0. !!! The decimal point # will cause Math::Pari to discard # precision. my $first; { my $delta = exp($numerator - (ln_fact( $a ) + ln_fact( $b ) + ln_fact( $c ) + ln_fact( $d ))); $p_val += $delta; $first = $delta unless defined $first; last if $a < 1 or $epsilon and ($delta*$a < $epsilon*$p_val); # The RHS of the second inequality above is explained in the # earlier comment following the definition of $epsilon. The LHS # is an upper bound for the expression $p - $p_i, as defined in # said comment. This is because the *number* of elements *left* # in the sum, including the one corresponding to $a = 0, equals # the *current* value of $a. All these subsequent elements are <= # $delta (since we are summing strictly on one side of the # distribution's maximum; that's the purpose of the earlier # ( $test < 0 ) test); therefore $a * $delta is an upper bound for # the remainder of the sum. Hence, if the tested inequality # holds, then the criterion for stopping is met a fortiori: # # $p - $p_i <= $delta*$a < $epsilon*$p_val --$a; ++$b; ++$c; --$d; redo; } $p_val = 1 - $p_val + $first if $test < 0; return $p_val; } { my $max_arr; my @ln_fact; my $two_pi; my $pi_over_3; my $half; BEGIN { $max_arr = 10000; $#ln_fact = $max_arr - 1; use Math::Pari qw( PARI factorial ); $two_pi = PARI( 2 * atan2 0, -1 ); $pi_over_3 = PARI( atan2( 0, -1 )/3 ); $half = PARI( 0.5 ); } sub ln_fact { my $n = PARI( shift() ); die "Bad args to ln_fact: $n" if $n < 0; if ( $n < $max_arr ) { $ln_fact[ $n ] ||= log factorial( $n ); return $ln_fact[ $n ]; } else { # Gosper's approximation; from # http://mathworld.wolfram.com/StirlingsApproximation.html return $half * log( $two_pi*$n + $pi_over_3 ) + $n * log( $n ) - $n; } } } BEGIN { push @EXPORT_OK, map lc, keys %CFG } sub AUTOLOAD { our $AUTOLOAD; ( my $name = uc $AUTOLOAD ) =~ s/.*:://; if ( defined $CFG{ $name } ) { my $getset = sub { my $ret = $CFG{ $name }; $CFG{ $name } = shift if @_; return $ret; }; { no strict 'refs'; *$AUTOLOAD = $getset; } goto &$AUTOLOAD; } else { die "Unknown function: $AUTOLOAD"; } } BEGIN { for my $k ( keys %CFG ) { my $method = lc $k; push @EXPORT_OK, $method; my $getset = sub { my $ret = $CFG{ $k }; $CFG{ $k } = shift if @_; return $ret; }; { no strict 'refs'; *$method = $getset; } } } BEGIN { use Carp; $SIG{ __DIE__ } = sub { verbose() ? Carp::confess( @_ ) : die @_ }; }