note
BioLion
<p>I was really desperate for a perl implemetation of FET, [cpan://Text::NSP::Measures::2D::Fisher] just didn't stand up in any way - the 'basic usage' they give doesn't even work, even once you have fixed their syntax errors and got it to compile!<readmore>Taken from [http://search.cpan.org/~tpederse/Text-NSP-1.09/lib/Text/NSP/Measures/2D/Fisher2/twotailed.pm]<code>use Text::NSP::Measures::2D::Fisher2::twotailed;
my $npp = 60; my $n1p = 20; my $np1 = 20; my $n11 = 10;
$twotailed_value = calculateStatistic( n11=>$n11,
n1p=>$n1p,
np1=>$np1,
npp=>$npp);
if( ($errorCode = getErrorCode()))
{
print STDERR $errorCode." - ".getErrorMessage();
}
else
{
print getStatisticName."value for bigram is ".$twotailed_value;
}</code> So that was the end of that trail as far as i was concerned!</readmore></p>
<p>So obviously i end up looking in perlmonks for something better though through! So, following up from [resonator]'s comments, i made some test code for the posted code :<readmore><code>#! usr/bin/perl
use warnings;
use strict;
use Test;
# use a BEGIN block so we print our plan before MyModule is loaded
BEGIN { plan tests => 40, todo => [], }
use lib "$ENV{HOME}/Desktop/perl/lib";
use Statistics::FET;
use Statistics::FishersExactTest;
print "***888 - Original Statistics::FET - 888***\n";
## two tailed
print "\n***\ntwotailed a-b-c-d\n***\n";
ok( Statistics::FET::fishers_exact(100 , 345 , 150 , 450, 'true', ), qr/^0.378973935/ );
ok( Statistics::FET::fishers_exact(150 , 345 , 150 , 500, 'true', ), qr/^0.006634280/ );
ok( Statistics::FET::fishers_exact(110 , 345 , 250 , 500, 'true', ), qr/^0.000737131/ );
ok( Statistics::FET::fishers_exact(11 , 34 , 25 , 50, 'true', ), qr/^0.410784065/ );
ok( Statistics::FET::fishers_exact(11 , 34 , 25 , 60, 'true', ), qr/^0.680962824/ );
print "\n***\ntwotailed (b-a-d-c)\n***\n";
ok( Statistics::FET::fishers_exact(345 , 100 , 450 , 150, 'true', ), qr/^0.378973935/ );
ok( Statistics::FET::fishers_exact(345 , 150 , 500 , 150, 'true', ), qr/^0.006634280/ );
ok( Statistics::FET::fishers_exact(345 , 110 , 500 , 250, 'true', ), qr/^0.000737131/ );
ok( Statistics::FET::fishers_exact(34 , 11 , 50 , 25, 'true', ), qr/^0.410784065/ );
ok( Statistics::FET::fishers_exact(34 , 11 , 60 , 25, 'true', ), qr/^0.680962824/ );
## one tailed
## - return 'Right' tail
print "\n***\nright tailed (a-b-c-d)\n***\n";
ok( Statistics::FET::fishers_exact(100 , 345 , 150 , 450, ), qr/^0.846286197/ );
ok( Statistics::FET::fishers_exact(150 , 345 , 150 , 500, ), qr/^0.003674851/ );
ok( Statistics::FET::fishers_exact(110 , 345 , 250 , 500, ), qr/^0.999730774/ );
ok( Statistics::FET::fishers_exact(11 , 34 , 25 , 50, ), qr/^0.892322547/ );
ok( Statistics::FET::fishers_exact(11 , 34 , 25 , 60, ), qr/^0.789358818/ );
## one tailed
## - return 'left' tail
print "\n***\nleft tailed b-a-c-d\n***\n";
ok( Statistics::FET::fishers_exact(345 , 100 , 450 , 150, ), qr/^0.191219881/ );
ok( Statistics::FET::fishers_exact(345 , 150 , 500 , 150, ), qr/^0.997565862/ );
ok( Statistics::FET::fishers_exact(345 , 110 , 500 , 250, ), qr/^0.000436655/ );
ok( Statistics::FET::fishers_exact(34 , 11 , 50 , 25, ), qr/^0.206057717/ );
ok( Statistics::FET::fishers_exact(34 , 11 , 60 , 25, ), qr/^0.349218359/ );
print "***888 - bugfixed Statistics::FishersExactTest - 888***\n";
## two tailed
print "\n***\ntwotailed a-b-c-d\n***\n";
ok( Statistics::FishersExactTest::fishers_exact(100 , 345 , 150 , 450, 'true', ), qr/^0.378973935/ );
ok( Statistics::FishersExactTest::fishers_exact(150 , 345 , 150 , 500, 'true', ), qr/^0.006634280/ );
ok( Statistics::FishersExactTest::fishers_exact(110 , 345 , 250 , 500, 'true', ), qr/^0.000737131/ );
ok( Statistics::FishersExactTest::fishers_exact(11 , 34 , 25 , 50, 'true', ), qr/^0.410784065/ );
ok( Statistics::FishersExactTest::fishers_exact(11 , 34 , 25 , 60, 'true', ), qr/^0.680962824/ );
print "\n***\ntwotailed (b-a-d-c)\n***\n";
ok( Statistics::FishersExactTest::fishers_exact(345 , 100 , 450 , 150, 'true', ), qr/^0.378973935/ );
ok( Statistics::FishersExactTest::fishers_exact(345 , 150 , 500 , 150, 'true', ), qr/^0.006634280/ );
ok( Statistics::FishersExactTest::fishers_exact(345 , 110 , 500 , 250, 'true', ), qr/^0.000737131/ );
ok( Statistics::FishersExactTest::fishers_exact(34 , 11 , 50 , 25, 'true', ), qr/^0.410784065/ );
ok( Statistics::FishersExactTest::fishers_exact(34 , 11 , 60 , 25, 'true', ), qr/^0.680962824/ );
## one tailed
## - return 'Right' tail
print "\n***\nright tailed (a-b-c-d)\n***\n";
ok( Statistics::FishersExactTest::fishers_exact(100 , 345 , 150 , 450, ), qr/^0.846286197/ );
ok( Statistics::FishersExactTest::fishers_exact(150 , 345 , 150 , 500, ), qr/^0.003674851/ );
ok( Statistics::FishersExactTest::fishers_exact(110 , 345 , 250 , 500, ), qr/^0.999730774/ );
ok( Statistics::FishersExactTest::fishers_exact(11 , 34 , 25 , 50, ), qr/^0.892322547/ );
ok( Statistics::FishersExactTest::fishers_exact(11 , 34 , 25 , 60, ), qr/^0.789358818/ );
## one tailed
## - return 'left' tail
print "\n***\nleft tailed b-a-c-d\n***\n";
ok( Statistics::FishersExactTest::fishers_exact(345 , 100 , 450 , 150, ), qr/^0.191219881/ );
ok( Statistics::FishersExactTest::fishers_exact(345 , 150 , 500 , 150, ), qr/^0.997565862/ );
ok( Statistics::FishersExactTest::fishers_exact(345 , 110 , 500 , 250, ), qr/^0.000436655/ );
ok( Statistics::FishersExactTest::fishers_exact(34 , 11 , 50 , 25, ), qr/^0.206057717/ );
ok( Statistics::FishersExactTest::fishers_exact(34 , 11 , 60 , 25, ), qr/^0.349218359/ );
__DATA__
Fisher's Exact Test
http://www.langsrud.com/fisher.htm
# 1 + 6 + 11 + 16
------------------------------------------
TABLE = [ 100 , 345 , 150 , 450 ]
Left : p-value = 0.191219881514147
Right : p-value = 0.8462861978699979
2-Tail : p-value = 0.37897393517037725
# 2 + 7 + 12 + 17
------------------------------------------
TABLE = [ 150 , 345 , 150 , 500 ]
Left : p-value = 0.9975658620326552
Right : p-value = 0.0036748512483445626
2-Tail : p-value = 0.006634280583017804
# 3 + 8 + 13 + 18
------------------------------------------
TABLE = [ 110 , 345 , 250 , 500 ]
Left : p-value = 0.00043665577543502157
Right : p-value = 0.9997307748788292
2-Tail : p-value = 0.0007371317326469082
# 4 + 9 + 14 + 19
------------------------------------------
TABLE = [ 11 , 34 , 25 , 50 ]
Left : p-value = 0.20605771741014312
Right : p-value = 0.8923225471119328
2-Tail : p-value = 0.41078406593596795
# 5 + 10 + 15 + 20
------------------------------------------
TABLE = [ 11 , 34 , 25 , 60 ]
Left : p-value = 0.34921835927372824
Right : p-value = 0.7893588188856397
2-Tail : p-value = 0.6809628247605259
------------------------------------------
</code></readmore></p>
<p>And it seemed to backup what [resonator] said, but [tlm]'s code did get the answer right for *some* of the tests, so i was encouraged and made the following changes (extremely minor!). Mainly (erm... only) adding in : </p><code> TEST:
## simplified test equation
my $test = $a*$d - $b*$c;
## introduced switching around of input pairs
if ($test < 0 && $ts){
($a, $b, $c, $d, ) = ($b, $a, $d, $c,);
goto TEST;
}</code><p> right after the 'test' bit that had people confused!So, now it passes all tests!<readmore><code>perl Desktop/perl/lib/Statistics/FET_test_complete.pl
1..40
# Running under perl version 5.010000 for linux
# Current time local: Wed Jul 15 11:39:44 2009
# Current time GMT: Wed Jul 15 10:39:44 2009
# Using Test.pm version 1.25
***888 - Original Statistics::FET - 888***
***
twotailed a-b-c-d
***
not ok 1
# Test 1 got: "1" (Desktop/perl/lib/Statistics/FET_test_complete.pl at line 18)
# Expected: "(?-xism:^0.378973935)"
# Desktop/perl/lib/Statistics/FET_test_complete.pl line 18 is: ok( Statistics::FET::fishers_exact(100 , 345 , 150 , 450, 'true', ), qr/^0.378973935/ );
ok 2
not ok 3
# Test 3 got: "1" (Desktop/perl/lib/Statistics/FET_test_complete.pl at line 20)
# Expected: "(?-xism:^0.000737131)"
# Desktop/perl/lib/Statistics/FET_test_complete.pl line 20 is: ok( Statistics::FET::fishers_exact(110 , 345 , 250 , 500, 'true', ), qr/^0.000737131/ );
not ok 4
# Test 4 got: "1" (Desktop/perl/lib/Statistics/FET_test_complete.pl at line 21)
# Expected: "(?-xism:^0.410784065)"
# Desktop/perl/lib/Statistics/FET_test_complete.pl line 21 is: ok( Statistics::FET::fishers_exact(11 , 34 , 25 , 50, 'true', ), qr/^0.410784065/ );
not ok 5
# Test 5 got: "1" (Desktop/perl/lib/Statistics/FET_test_complete.pl at line 22)
# Expected: "(?-xism:^0.680962824)"
# Desktop/perl/lib/Statistics/FET_test_complete.pl line 22 is: ok( Statistics::FET::fishers_exact(11 , 34 , 25 , 60, 'true', ), qr/^0.680962824/ );
***
twotailed (b-a-d-c)
***
ok 6
not ok 7
# Test 7 got: "1" (Desktop/perl/lib/Statistics/FET_test_complete.pl at line 27)
# Expected: "(?-xism:^0.006634280)"
# Desktop/perl/lib/Statistics/FET_test_complete.pl line 27 is: ok( Statistics::FET::fishers_exact(345 , 150 , 500 , 150, 'true', ), qr/^0.006634280/ );
ok 8
ok 9
ok 10
***
right tailed (a-b-c-d)
***
ok 11
ok 12
ok 13
ok 14
ok 15
***
left tailed b-a-c-d
***
ok 16
ok 17
ok 18
ok 19
ok 20
***888 - bugfixed Statistics::FishersExactTest - 888***
***
twotailed a-b-c-d
***
ok 21
ok 22
ok 23
ok 24
ok 25
***
twotailed (b-a-d-c)
***
ok 26
ok 27
ok 28
ok 29
ok 30
***
right tailed (a-b-c-d)
***
ok 31
ok 32
ok 33
ok 34
ok 35
***
left tailed b-a-c-d
***
ok 36
ok 37
ok 38
ok 39
ok 40
</code></readmore></p><p>So here is some updated code for anyone who is interested!</p>
<readmore>
<code>=head1 NAME
Statistics::FET - Fisher's Exact Test statistic (2x2 case)
=head1 SYNOPSIS
use Statistics::FET 'fishers_exact';
=head1 DESCRIPTION
This module exports only one function, C<fishers_exact>, which
computes the one- or two-sided Fisher's Exact Test statistic for the 2
x 2 case. In the following documentation I will be referring to the
following family of 2 x 2 contingency tables
* | * | r1 = a+b
--------+-------+----------
* | * | r2 = c+d
--------+-------+----------
c1 | c2 | N
= a+c | = b+d | = a+b+c+d
The *'s mark the cells, N is the total number of points represented by
the table, and r1, r2, c1, c2 are the marginals. As suggested by the
equalities, the letters a, b, c, d refer to the various cells (reading
them left-to-right, top-to-bottom). Depending on context, the letter
c (for example) will refer *either* to the lower left cell of the
table *or* to a specific value in that cell. Same for a, b, and d.
In what follows, H(x) (or more precisely, H(x; r1, r2, c1)) refers
to the hypergeometric expression
r1!*r2!*c1!*c2!
-------------------------------------
(r1+r2)!*x!*(r1-x)!*(c1-x)!*(r2-c1+x)!
(I omit c2 from the parametrization of H because c2 = r1 + r2 - c1.)
=head1 FUNCTION
=over 4
=item fishers_exact( $a, $b, $c, $d, $two_sided )
The paramater C<$two_sided> is optional. If missing or false
C<fishers_exact> computes the one-sided FET p-value. More
specifically, it computes the sum of H(x; a+b, c+d, a+c) for x = a to
x = min(a+b, a+c) - "the right side". (If you want "the left side", i.e. the sum of H(x; a+b, c+d, a+c) for x
= max(0, a-d) to x = a, then compute C<fishers_exact( $b, $a, $d, $c )
+> or
C<fishers_exact( $c, $d, $a, $b )> (these two are equivalent).)
If C<$two_sided> is true, the returned p-value will be for the two-sid
+ed
FET.
=back
=cut
## let the code begin...
use strict;
use warnings;
package Statistics::FishersExactTest;
use Exporter 'import';
use Math::Pari ();
my $Tolerance = 1;
$Tolerance /= 2 while 1 + $Tolerance/2 > 1;
@Statistics::FET::EXPORT_OK = 'fishers_exact';
sub fishers_exact {
my ( $a, $b, $c, $d, $ts ) = @_;
TEST:
## simplified test equation
my $test = $a*$d - $b*$c;
## introduced switching around of input pairs
if ($test < 0 && $ts){
($a, $b, $c, $d, ) = ($b, $a, $d, $c,);
goto TEST;
}
if ($test < 0 and $ts){
die "Cannot compute two tailed FET for input values given \( ".(join ", ", @_)."\)\n";
}
# below here, $test < 0 implies !$ts;
my $p_val;
if ( $test < 0 ) {
if ( $d < $a ) {
$p_val = _fishers_exact( $d, $c, $b, $a, $ts, 1 );
}
else {
$p_val = _fishers_exact( $a, $b, $c, $d, $ts, 1 );
}
}
else {
if ( $b < $c ) {
$p_val = _fishers_exact( $b, $a, $d, $c, $ts, 0 );
}
else {
$p_val = _fishers_exact( $c, $d, $a, $b, $ts, 0 );
}
}
return $p_val;
}
sub _fishers_exact {
my ( $a, $b, $c, $d, $ts, $complement ) = @_;
die "Bad args\n" if $ts && $complement;
my ( $aa, $bb, $cc, $dd ) = ( $a, $b, $c, $d );
my $first = my $delta = _single_term( $aa, $bb, $cc, $dd );
my $p_val = 0;
{
$p_val += $delta;
last if $aa < 1;
$delta *= ( ( $aa-- * $dd-- )/( ++$bb * ++$cc ) );
redo;
}
if ( $ts ) {
my $m = $b < $c ? $b : $c;
($aa, $bb, $cc, $dd ) = ( $a + $m, $b - $m, $c - $m, $d + $m );
$delta = _single_term( $aa, $bb, $cc, $dd );
my $bound = -$Tolerance;
while ( $bound <= ( $first - $delta )/$first && $aa > $a ) {
$p_val += $delta;
$delta *= ( ( $aa-- * $dd-- )/( ++$bb * ++$cc ) );
}
}
elsif ( $complement ) {
$p_val = 1 - $p_val + $first;
}
return $p_val;
}
sub _single_term {
my ( $a, $b, $c, $d ) = @_;
my ( $r1, $r2 ) = ($a + $b, $c + $d);
my ( $c1, $c2 ) = ($a + $c, $b + $d);
my $N = $r1 + $r2;
return exp( _ln_fact( $r1 ) + _ln_fact( $r2 ) +
_ln_fact( $c1 ) + _ln_fact( $c2 ) -
_ln_fact( $N ) -
( _ln_fact( $a ) + _ln_fact( $b ) +
_ln_fact( $c ) + _ln_fact( $d ) ) );
}
{
my $two_pi;
my $pi_over_3;
my $half;
BEGIN {
$two_pi = Math::Pari::PARI( 2 * atan2 0, -1 );
$pi_over_3 = Math::Pari::PARI( atan2( 0, -1 )/3 );
$half = Math::Pari::PARI( 0.5 );
}
sub _ln_fact {
my $n = Math::Pari::PARI( shift() );
die "Bad args to _ln_fact: $n" if $n < 0;
my $ln_fact;
eval {
$ln_fact = log Math::Pari::factorial( $n );
};
if ( $@ ) {
die $@ unless $@ =~ /\QPARI: *** exponent overflow/;
# Gosper's approximation; from
# http://mathworld.wolfram.com/StirlingsApproximation.html
$ln_fact = $half * log( $two_pi*$n + $pi_over_3 )
+ $n * log( $n )
- $n;
}
return $ln_fact;
}
}
1;
__END__
</code></readmore><p>Further tests|opinions are always welcome! I was surprised there was not more FET mods there, at least that i could find!</p>
<div class="pmsig"><div class="pmsig-656648">
<i>Just a something something...</i>
</div></div>
482919
482919