use strict;
use warnings;

package Fishers_Exact;
require Exporter;
{
  no warnings 'once';
  *import = \&Exporter::import;
}
our @EXPORT_OK;
our %EXPORT_TAGS = ( all => \@EXPORT_OK );
our $VERSION = 0.03;

my %CFG;
BEGIN {
  %CFG = (
          VERBOSE   => 0,
          PRECISION => 0,
         );
}

BEGIN { push @EXPORT_OK, 'fishers_exact_2' }
sub fishers_exact_2 {
  my ($a, $r1, $c1, $n, $precision) = @_;
  my ($b, $c, $d) = ($r1 - $a, $c1 - $a, $n - $r1 - $c1 + $a);
  fishers_exact($a, $b, $c, $d, $precision);
}

BEGIN { push @EXPORT_OK, 'fishers_exact' }
sub fishers_exact {
  my ($a, $b, $c, $d, $precision) = @_;
  $precision = precision() unless defined $precision;
  my $epsilon = $precision * log 10;
  # $precision is the maximum adjustment to the log10 of the p-value
  # that we will bother to make; if the sum of the remaining terms in
  # the summation will result in a smaller adjustment, we neglect
  # them.  In other words, if $p_i is the i-th partial sum for
  # computing the p-value (which is performed by the loop below), and
  # if $p is the value of the entire sum, then after computing $p_i we
  # will check whether
  #
  # log10( $p ) - log10( $p_i ) < $precision
  #
  # or equivalently,
  #
  # $p - $p_i < $p_i*( 10**$precision - 1 )
  #
  # To ensure this, we can use the criterion
  #
  # $p - $p_i < $p_i*log( 10 )*$precision < $p_i*( 10**$precision - 1 )
  #
  # ...which, for a small positive $precision, is only slightly more
  # stringent than the original criterion.  (The second inequality
  # above follows from Taylor's theorem applied to 10**$precision, for
  # $precision > 0.)  Hence we precompute $epsilon as log( 10 ) times
  # the $precision.

  my ($r1, $c1, $N) =
    ($a + $b, $a + $c,
     $a + $b + $c + $d);

  my $test = $a*$N - $r1*$c1;
  if ($test < 0) {
    if ($d < $a) {
	($a, $b, $c, $d) = ($d, $c, $b, $a);
    }
  }
  else {
    if ($b < $c) {
      ($a, $b, $c, $d) = ($b, $a, $d, $c);
    }
    else {
      ($a, $b, $c, $d) = ($c, $d, $a, $b);
    }
  }

  # NOTE: $r1 and $c1 can't be computed any sooner because of
  # the normalization step above.
  ($r1, $c1) = ($a + $b, $a + $c);
  my $numerator = ln_fact( $r1 ) + ln_fact( $c + $d ) +
    ln_fact( $c1 ) + ln_fact( $b + $d ) - ln_fact( $N );

  my $p_val = 0;                # Don't use 0. !!!  The decimal point
                                # will cause Math::Pari to discard
                                # precision.
  my $first;

  {
    my $delta = exp($numerator - (ln_fact( $a ) + ln_fact( $b ) +
				  ln_fact( $c ) + ln_fact( $d )));

    $p_val += $delta;

    $first = $delta unless defined $first;
    last if $a < 1 or $epsilon and ($delta*$a < $epsilon*$p_val);
    # The RHS of the second inequality above is explained in the
    # earlier comment following the definition of $epsilon.  The LHS
    # is an upper bound for the expression $p - $p_i, as defined in
    # said comment.  This is because the *number* of elements *left*
    # in the sum, including the one corresponding to $a = 0, equals
    # the *current* value of $a.  All these subsequent elements are <=
    # $delta (since we are summing strictly on one side of the
    # distribution's maximum; that's the purpose of the earlier
    # ( $test < 0 ) test); therefore $a * $delta is an upper bound for
    # the remainder of the sum.  Hence, if the tested inequality
    # holds, then the criterion for stopping is met a fortiori:
    #
    # $p - $p_i <= $delta*$a < $epsilon*$p_val

    --$a; ++$b; ++$c; --$d;
    redo;
  }

  $p_val = 1 - $p_val + $first if $test < 0;
  return $p_val;
}

{
  my $max_arr;
  my @ln_fact;
  my $two_pi;
  my $pi_over_3;
  my $half;
  BEGIN {
    $max_arr   = 10000;
    $#ln_fact  = $max_arr - 1;
    use Math::Pari qw( PARI factorial );
    $two_pi    = PARI( 2 * atan2 0, -1 );
    $pi_over_3 = PARI( atan2( 0, -1 )/3 );
    $half      = PARI( 0.5 );
  }
  sub ln_fact {
    my $n = PARI( shift() );
    die "Bad args to ln_fact: $n" if $n < 0;
    if ( $n < $max_arr ) {
      $ln_fact[ $n ] ||= log factorial( $n );
      return $ln_fact[ $n ];
    }
    else {
      # Gosper's approximation; from
      # http://mathworld.wolfram.com/StirlingsApproximation.html
      return $half * log( $two_pi*$n + $pi_over_3 )
             +  $n * log( $n )
             -  $n;
    }
  }
}

BEGIN { push @EXPORT_OK, map lc, keys %CFG }

sub AUTOLOAD {
  our $AUTOLOAD;
  ( my $name = uc $AUTOLOAD ) =~ s/.*:://;
  if ( defined $CFG{ $name } ) {
    my $getset =
      sub {
        my $ret = $CFG{ $name };
        $CFG{ $name } = shift if @_;
        return $ret;
      };
    {
      no strict 'refs';
      *$AUTOLOAD = $getset;
    }
    goto &$AUTOLOAD;
  }
  else {
    die "Unknown function: $AUTOLOAD";
  }
}

BEGIN {
  for my $k ( keys %CFG ) {
    my $method = lc $k;
    push @EXPORT_OK, $method;
    my $getset =
      sub {
        my $ret = $CFG{ $k };
        $CFG{ $k } = shift if @_;
        return $ret;
      };
    {
      no strict 'refs';
      *$method = $getset;
    }
  }
}

BEGIN {
  use Carp;
  $SIG{ __DIE__ } = sub { verbose() ? Carp::confess( @_ )
                                    : die @_ };
}