swl has asked for the wisdom of the Perl Monks concerning the following question:
Does anyone know if there are perl bindings to MINPACK or some other curve fitting process?
So far I've been able to track down PDL::Fit::LM, PDL::Fit::Levmar and Starlink::AST. However, all have issues for my use case where I want to fit non-linear curves to a large number (thousands) of data sets, and for which I need to try a variety of functional shapes to identify which one fits best (four parameter logistic, power curve, etc).
PDL::Fit::Levmar does not index correctly on metacpan, so cpanm does not find it (nor does cpan). syphilis has windows PPMs available. However, the distro itself has several open issues for recent versions of PDL and it is unclear if it is still maintained. Until that's all sorted out I'm reluctant to use it in anything that will eventually need automated installation via cpanm.
lmfit from PDL::Fit::LM, but it is sensitive to the starting conditions. Specification of the formula is also not simple, as one needs to provide a subroutine for the derivatives (although one can use Math::Symbolic to generate one).
Starlink::AST includes cminpack, but appears to use a limited subset for standard polynomials. It also seems not to call it from pure perl code.
There is also Algorithm::CurveFit, but from memory is also sensitive to the starting conditions.
Code using PDL::Fit::Levmar and PDL::Fit::LM with a sample of my data is below. Running the same data through minpack via the nlsLM function in the minpack.lm package for R gives good results without being sensitive to the starting conditions (see second code block and results). minpack.lm is just a set of bindings around the minpack library, so I would expect the same results are possible for perl if bindings were to exist.
# perl code #use 5.024; use PDL; use PDL::NiceSlice; #use PDL::GSL::INTERP; use PDL::Fit::LM; use PDL::Stats; my @data = <DATA>; my $header = shift @data; my (@xarr, @yarr); foreach my $line (@data) { chomp $line; my @line_arr = split /\s+/, $line; push @xarr, $line_arr[0]; push @yarr, $line_arr[1]; } my $len = @xarr; my @target_iters = (0 .. $#xarr); my $x = pdl(@xarr[@target_iters]); my $y = pdl(@yarr[@target_iters]); my @init = (-1, 1, 2134, 2.27395324135227); # try levmar { my ($a, $b, $c, $d); use PDL::Fit::Levmar; my $func = sub { my ($p, $yy, $x) = @_; my ($a,$b, $c, $d) = $p->list; $yy = $d + ($a - $d) / (1 + ($x / $c)**$b); }; my $yy = pdl (@yarr[@target_iters]); my $params = pdl (@init); my $result_hash = levmar( $params, $yy, $x, FUNC => $func, ); say "------LEVMAR REPORT:----"; print levmar_report($result_hash); say "-----END LEVMAR REPORT----\n\n"; } # now try lmfit my ($ym, $finalp, $covar, $iters); say '----PDL::Fit::LM----'; while (1) { my $initp = pdl \@init; say "lmfit trying initp: $initp"; my $sigma = 1; ($ym, $finalp, $covar, $iters) = lmfit $x, $y, $sigma, \&symm_sig_func, $initp, {Maxiter => 300, Eps => 1e-10}; #say $finalp; #say $iters; #say $covar; #say $ym->sum; #my ($a, $b, $c, $d) = @{$finalp->unpdl}; #my $ppred = $d + ($a - $d) / (1 + ($x / $c)**$b); #say $ym; last if ($ym->sum ne 'NaN'); # try some smaller starting conditions $init[2] /= 2; $init[3] /= 2; } say "lmfit finalp: $finalp"; say "iters: $iters"; #say $ym->(0:10); #say 'RSQ: ' . (1 - ($y - $ym)->stdv / $y->stdv); my $meany = $y->average; say 'RSQ: ' . (1 - (($y - $ym)**2)->sum / (($y - $meany)**2)->sum); my ($a, $b, $c, $d) = @{$finalp->unpdl}; my $predx = pdl (@xarr); my $obsy = pdl (@yarr); my $pred = $d + ($a - $d) / (1 + ($predx / $c)**$b); my $pp = $pred->unpdl; #say join ' ', @$pp[0..10]; $meany = $obsy->average; my $fit_rsq = (1 - (($obsy - $pred)**2)->sum / (($obsy - $meany)**2)- +>sum); #my $aa = (1 - (($obsy - $pred)**2)->sum / (($obsy->stdv)*$obsy->nelem +)**2)->flat; say "lmfit RSQ: $fit_rsq"; say 'lmfit RMSE: ' . (($obsy - $pred)**2)->average; sub symm_sig_func { # leave this line as is my ($x, $par, $ym, $dyda) = @_; my ($a, $b, $c, $d) = @{$par->(0:3)->unpdl}; #$c = List::Util::max (0, $c); # avoid log of neg vals # Write function with dependent variable $ym, # independent variable $x, and fit parameters as specified above. # Use the .= (dot equals) assignment operator to express the equal +ity # (not just a plain equals) $ym .= $d + ($a - $d) / (1 + ($x / $c)**$b); my @dy = map {$dyda->(,($_))} (0..3); #my @dy = @{$dyda->unpdl}; # Partial derivative of the function with respect to parameters. # Again, note .= assignment operator (not just "equals") # # http://www.numberempire.com/derivativecalculator.php # but could use Math::Symbolic # formula: d+(a-d)/(1+(x/c)^b) # del(a) # c^b/(x^b+c^b) $dy[0] .= $c**$b / ($x**$b + $c**$b); # del(b) # ( (c^b*d-a*c^b)*x^b*log(x) # +(a*c^b*log(c) # -c^b*log(c)*d)*x^b) # / (x^(2*b)+2*c^b*x^b+c^(2*b)) $dy[1] .= eval { ( ($c**$b*$d-$a*$c**$b)*$x**$b*log($x) + ($a*$c**$b*log($c)-$c**$b*log($c)*$d)*$x**$b ) / ($x**(2*$b)+2*$c**$b*$x**$b+$c**(2*$b)) }; # del(c) # -((b*c^b*d-a*b*c^b)*x^b) # / (c*x^(2*b)+2*c^(b+1)*x^b+c^(2*b+1)) $dy[2] .= -1 * (($b * $c**$b * $d - $a * $b * $c**$b) * $x**$b) / ($c * $x**(2 * $b) + 2 * $c**($b + 1) * $x**$b + $c**(2 * $b + 1) ); #say 'DEL C: ' . $dy[2]; # del(d) # x^b/(x^b+c^b) $dy[3] .= $x**$b / ($x**$b + $c**$b); } __DATA__ x est 1 0.100593962 42 1.154242343 83 1.474328422 124 1.654247992 165 1.775551897 207 1.866286312 248 1.934367102 289 1.988797445 330 2.033521036 371 2.071092313 412 2.103249977 454 2.131859705 495 2.15646498 536 2.178448777 577 2.198304263 619 2.21682969 660 2.233437802 701 2.248813997 742 2.263134718 783 2.276540152 825 2.289441049 866 2.301316212 907 2.312557085 948 2.323226159 989 2.333376276 1031 2.343283185 1072 2.352515249 1113 2.361348508 1154 2.369813742 1195 2.377938641 1237 2.385935063 1278 2.393445607 1319 2.400685418 1360 2.407673856 1401 2.414428798 1443 2.421123656 1484 2.42745521 1525 2.433599534 1566 2.43956963 1607 2.445377464 1648 2.451034016 1649 2.451170177 1690 2.456681041 1731 2.462054198 1772 2.467293088 1813 2.472401069 1855 2.477501313 1896 2.482354207 1937 2.487085836 1978 2.491699229 2019 2.496197342 2061 2.500688643 2102 2.504962126 2143 2.509128821 2184 2.513191399 2225 2.517152459 2266 2.52101454 2308 2.524870771 2349 2.528539984 2390 2.53211751 2431 2.53560564 2472 2.539006607 2514 2.542402424 2555 2.545633551 2596 2.548783938 2637 2.551855602 2678 2.554850511 2720 2.557840884 2761 2.560686231 2802 2.563460478 2843 2.5661654 2884 2.568802732 2926 2.57143607 2967 2.573941696 3008 2.576384711 3049 2.57876668 3090 2.581089127 3132 2.583408057 3173 2.585614525 3214 2.587765856 3255 2.58986343
This results in:
Use of uninitialized value $ninf[6] in sprintf at c:/berrybrew/5.26.1_ +64_pdl/perl/site/lib/PDL/Fit/Levmar.pm line 1250, <DATA> line 82. ------LEVMAR REPORT:---- Estimated parameters: [-1 1 2134 2.2739532] Covariance: [ [-Inf -Inf NaN Inf] [ Inf NaN NaN NaN] [ Inf Inf Inf -Inf] [-Inf Inf NaN NaN] ] ||e||_2 at initial parameters = Inf Errors at estimated parameters: ||e||_2 = Inf ||J^T e||_inf = 0 ||Dp||_2 = 1.79769e+308 \mu/max[J^T J]_ii ] = 0 number of iterations = 0 reason for termination: = number of function evaluations = 1 number of jacobian evaluations = 0 number of linear systems solved = 0 -----END LEVMAR REPORT---- ----PDL::Fit::LM---- lmfit trying initp: [-1 1 2134 2.2739532] lmfit trying initp: [-1 1 1067 1.1369766] lmfit trying initp: [-1 1 533.5 0.56848831] lmfit trying initp: [-1 1 266.75 0.28424416] lmfit trying initp: [-1 1 133.375 0.14212208] lmfit trying initp: [-1 1 66.6875 0.071061039] lmfit finalp: [-0.2066481 0.54367392 60.393814 2.8999219] iters: 14 RSQ: 0.999354122952557 lmfit RSQ: 0.999354122952557 lmfit RMSE: 8.35455913578738e-005
minpack via R:
# R CODE my.data = ' x y 1 0.100593962 42 1.154242343 83 1.474328422 124 1.654247992 165 1.775551897 207 1.866286312 248 1.934367102 289 1.988797445 330 2.033521036 371 2.071092313 412 2.103249977 454 2.131859705 495 2.15646498 536 2.178448777 577 2.198304263 619 2.21682969 660 2.233437802 701 2.248813997 742 2.263134718 783 2.276540152 825 2.289441049 866 2.301316212 907 2.312557085 948 2.323226159 989 2.333376276 1031 2.343283185 1072 2.352515249 1113 2.361348508 1154 2.369813742 1195 2.377938641 1237 2.385935063 1278 2.393445607 1319 2.400685418 1360 2.407673856 1401 2.414428798 1443 2.421123656 1484 2.42745521 1525 2.433599534 1566 2.43956963 1607 2.445377464 1648 2.451034016 1649 2.451170177 1690 2.456681041 1731 2.462054198 1772 2.467293088 1813 2.472401069 1855 2.477501313 1896 2.482354207 1937 2.487085836 1978 2.491699229 2019 2.496197342 2061 2.500688643 2102 2.504962126 2143 2.509128821 2184 2.513191399 2225 2.517152459 2266 2.52101454 2308 2.524870771 2349 2.528539984 2390 2.53211751 2431 2.53560564 2472 2.539006607 2514 2.542402424 2555 2.545633551 2596 2.548783938 2637 2.551855602 2678 2.554850511 2720 2.557840884 2761 2.560686231 2802 2.563460478 2843 2.5661654 2884 2.568802732 2926 2.57143607 2967 2.573941696 3008 2.576384711 3049 2.57876668 3090 2.581089127 3132 2.583408057 3173 2.585614525 3214 2.587765856 3255 2.58986343 ' data = read.delim(textConnection(my.data),header=TRUE,sep="\t",strip.w +hite=TRUE) ##install.packages("minpack.lm") library(minpack.lm) init_cond = list(a = -1, b = 1, c = 2134, d = 2.27395324135227) formula = as.formula ("y ~ d + (a - d) / (1 + (x / c)**b)") nlsLM(formula, data=data, start=init_cond)
Results in:
> nlsLM(formula, data=data, start=init_cond) Nonlinear regression model model: y ~ d + (a - d)/(1 + (x/c)^b) data: data a b c d -0.2066 0.5437 60.3938 2.8999 residual sum-of-squares: 0.006767 Number of iterations to convergence: 16 Achieved convergence tolerance: 1.49e-08
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: perl minpack bindings or other function fitting code?
by swl (Prior) on Nov 06, 2017 at 02:49 UTC | |
by swl (Prior) on Mar 25, 2018 at 22:42 UTC | |
by etj (Priest) on May 07, 2022 at 23:04 UTC |