Home | History | Annotate | Download | only in lib
      1 /*
      2  * Library:   lmfit (Levenberg-Marquardt least squares fitting)
      3  *
      4  * File:      lmmin.c
      5  *
      6  * Contents:  Levenberg-Marquardt minimization.
      7  *
      8  * Copyright: MINPACK authors, The University of Chikago (1980-1999)
      9  *            Joachim Wuttke, Forschungszentrum Juelich GmbH (2004-2013)
     10  *
     11  * License:   see ../COPYING (FreeBSD)
     12  *
     13  * Homepage:  apps.jcns.fz-juelich.de/lmfit
     14  */
     15 
     16 #include <assert.h>
     17 #include <stdlib.h>
     18 #include <stdio.h>
     19 #include <math.h>
     20 #include <float.h>
     21 #include "lmmin.h"
     22 
     23 #define MIN(a, b) (((a) <= (b)) ? (a) : (b))
     24 #define MAX(a, b) (((a) >= (b)) ? (a) : (b))
     25 #define SQR(x) (x) * (x)
     26 
     27 /* Declare functions that do the heavy numerics.
     28    Implementions are in this source file, below lmmin.
     29    Dependences: lmmin calls lmpar, which calls qrfac and qrsolv. */
     30 void lm_lmpar(const int n, double* r, const int ldr, const int* Pivot,
     31               const double* diag, const double* qtb, const double delta,
     32               double* par, double* x, double* Sdiag, double* aux, double* xdi);
     33 void lm_qrfac(const int m, const int n, double* A, int* Pivot, double* Rdiag,
     34               double* Acnorm, double* W);
     35 void lm_qrsolv(const int n, double* r, const int ldr, const int* Pivot,
     36                const double* diag, const double* qtb, double* x,
     37                double* Sdiag, double* W);
     38 
     39 /******************************************************************************/
     40 /*  Numeric constants                                                         */
     41 /******************************************************************************/
     42 
     43 /* Set machine-dependent constants to values from float.h. */
     44 #define LM_MACHEP DBL_EPSILON       /* resolution of arithmetic */
     45 #define LM_DWARF DBL_MIN            /* smallest nonzero number */
     46 #define LM_SQRT_DWARF sqrt(DBL_MIN) /* square should not underflow */
     47 #define LM_SQRT_GIANT sqrt(DBL_MAX) /* square should not overflow */
     48 #define LM_USERTOL 30 * LM_MACHEP   /* users are recommended to require this */
     49 
     50 /* If the above values do not work, the following seem good for an x86:
     51  LM_MACHEP     .555e-16
     52  LM_DWARF      9.9e-324
     53  LM_SQRT_DWARF 1.e-160
     54  LM_SQRT_GIANT 1.e150
     55  LM_USER_TOL   1.e-14
     56    The following values should work on any machine:
     57  LM_MACHEP     1.2e-16
     58  LM_DWARF      1.0e-38
     59  LM_SQRT_DWARF 3.834e-20
     60  LM_SQRT_GIANT 1.304e19
     61  LM_USER_TOL   1.e-14
     62 */
     63 
     64 /* Predefined control parameter sets (msgfile=NULL means stdout). */
     65 const lm_control_struct lm_control_double = {
     66     LM_USERTOL, LM_USERTOL, LM_USERTOL, LM_USERTOL,
     67     100., 100, 1, NULL, 0, -1, -1};
     68 const lm_control_struct lm_control_float = {
     69     1.e-7, 1.e-7, 1.e-7, 1.e-7,
     70     100., 100, 1, NULL, 0, -1, -1};
     71 
     72 /******************************************************************************/
     73 /*  Message texts (indexed by status.info)                                    */
     74 /******************************************************************************/
     75 
     76 const char* lm_infmsg[] = {
     77     "found zero (sum of squares below underflow limit)",
     78     "converged  (the relative error in the sum of squares is at most tol)",
     79     "converged  (the relative error of the parameter vector is at most tol)",
     80     "converged  (both errors are at most tol)",
     81     "trapped    (by degeneracy; increasing epsilon might help)",
     82     "exhausted  (number of function calls exceeding preset patience)",
     83     "failed     (ftol<tol: cannot reduce sum of squares any further)",
     84     "failed     (xtol<tol: cannot improve approximate solution any further)",
     85     "failed     (gtol<tol: cannot improve approximate solution any further)",
     86     "crashed    (not enough memory)",
     87     "exploded   (fatal coding error: improper input parameters)",
     88     "stopped    (break requested within function evaluation)",
     89     "found nan  (function value is not-a-number or infinite)"};
     90 
     91 const char* lm_shortmsg[] = {
     92     "found zero",
     93     "converged (f)",
     94     "converged (p)",
     95     "converged (2)",
     96     "degenerate",
     97     "call limit",
     98     "failed (f)",
     99     "failed (p)",
    100     "failed (o)",
    101     "no memory",
    102     "invalid input",
    103     "user break",
    104     "found nan"};
    105 
    106 /******************************************************************************/
    107 /*  Monitoring auxiliaries.                                                   */
    108 /******************************************************************************/
    109 
    110 void lm_print_pars(int nout, const double* par, FILE* fout)
    111 {
    112     int i;
    113     for (i = 0; i < nout; ++i)
    114         fprintf(fout, " %16.9g", par[i]);
    115     fprintf(fout, "\n");
    116 }
    117 
    118 /******************************************************************************/
    119 /*  lmmin (main minimization routine)                                         */
    120 /******************************************************************************/
    121 
    122 void lmmin(const int n, double* x, const int m, const void* data,
    123            void (*evaluate)(const double* par, const int m_dat,
    124                             const void* data, double* fvec, int* userbreak),
    125            const lm_control_struct* C, lm_status_struct* S)
    126 {
    127     int j, i;
    128     double actred, dirder, fnorm, fnorm1, gnorm, pnorm, prered, ratio, step,
    129         sum, temp, temp1, temp2, temp3;
    130 
    131     /***  Initialize internal variables.  ***/
    132 
    133     int maxfev = C->patience * (n+1);
    134 
    135     int inner_success; /* flag for loop control */
    136     double lmpar = 0;  /* Levenberg-Marquardt parameter */
    137     double delta = 0;
    138     double xnorm = 0;
    139     double eps = sqrt(MAX(C->epsilon, LM_MACHEP)); /* for forward differences */
    140 
    141     int nout = C->n_maxpri == -1 ? n : MIN(C->n_maxpri, n);
    142 
    143     /* Reinterpret C->msgfile=NULL as stdout (which is unavailable for
    144        compile-time initialization of lm_control_double and similar). */
    145     FILE* msgfile = C->msgfile ? C->msgfile : stdout;
    146 
    147     /***  Default status info; must be set before first return statement.  ***/
    148 
    149     S->outcome = 0; /* status code */
    150     S->userbreak = 0;
    151     S->nfev = 0; /* function evaluation counter */
    152 
    153     /***  Check input parameters for errors.  ***/
    154 
    155     if (n <= 0) {
    156         fprintf(stderr, "lmmin: invalid number of parameters %i\n", n);
    157         S->outcome = 10;
    158         return;
    159     }
    160     if (m < n) {
    161         fprintf(stderr, "lmmin: number of data points (%i) "
    162                         "smaller than number of parameters (%i)\n",
    163                 m, n);
    164         S->outcome = 10;
    165         return;
    166     }
    167     if (C->ftol < 0 || C->xtol < 0 || C->gtol < 0) {
    168         fprintf(stderr,
    169                 "lmmin: negative tolerance (at least one of %g %g %g)\n",
    170                 C->ftol, C->xtol, C->gtol);
    171         S->outcome = 10;
    172         return;
    173     }
    174     if (maxfev <= 0) {
    175         fprintf(stderr, "lmmin: nonpositive function evaluations limit %i\n",
    176                 maxfev);
    177         S->outcome = 10;
    178         return;
    179     }
    180     if (C->stepbound <= 0) {
    181         fprintf(stderr, "lmmin: nonpositive stepbound %g\n", C->stepbound);
    182         S->outcome = 10;
    183         return;
    184     }
    185     if (C->scale_diag != 0 && C->scale_diag != 1) {
    186         fprintf(stderr, "lmmin: logical variable scale_diag=%i, "
    187                         "should be 0 or 1\n",
    188                 C->scale_diag);
    189         S->outcome = 10;
    190         return;
    191     }
    192 
    193     /***  Allocate work space.  ***/
    194 
    195     /* Allocate total workspace with just one system call */
    196     char* ws;
    197     if ((ws = malloc((2*m + 5*n + m*n) * sizeof(double) +
    198                      n * sizeof(int))) == NULL) {
    199         S->outcome = 9;
    200         return;
    201     }
    202 
    203     /* Assign workspace segments. */
    204     char* pws = ws;
    205     double* fvec = (double*)pws;
    206     pws += m * sizeof(double) / sizeof(char);
    207     double* diag = (double*)pws;
    208     pws += n * sizeof(double) / sizeof(char);
    209     double* qtf = (double*)pws;
    210     pws += n * sizeof(double) / sizeof(char);
    211     double* fjac = (double*)pws;
    212     pws += n * m * sizeof(double) / sizeof(char);
    213     double* wa1 = (double*)pws;
    214     pws += n * sizeof(double) / sizeof(char);
    215     double* wa2 = (double*)pws;
    216     pws += n * sizeof(double) / sizeof(char);
    217     double* wa3 = (double*)pws;
    218     pws += n * sizeof(double) / sizeof(char);
    219     double* wf = (double*)pws;
    220     pws += m * sizeof(double) / sizeof(char);
    221     int* Pivot = (int*)pws;
    222     pws += n * sizeof(int) / sizeof(char);
    223 
    224     /* Initialize diag. */
    225     if (!C->scale_diag)
    226         for (j = 0; j < n; j++)
    227             diag[j] = 1;
    228 
    229     /***  Evaluate function at starting point and calculate norm.  ***/
    230 
    231     if (C->verbosity) {
    232         fprintf(msgfile, "lmmin start ");
    233         lm_print_pars(nout, x, msgfile);
    234     }
    235     (*evaluate)(x, m, data, fvec, &(S->userbreak));
    236     if (C->verbosity > 4)
    237         for (i = 0; i < m; ++i)
    238             fprintf(msgfile, "    fvec[%4i] = %18.8g\n", i, fvec[i]);
    239     S->nfev = 1;
    240     if (S->userbreak)
    241         goto terminate;
    242     fnorm = lm_enorm(m, fvec);
    243     if (C->verbosity)
    244         fprintf(msgfile, "  fnorm = %18.8g\n", fnorm);
    245 
    246     if (!isfinite(fnorm)) {
    247         S->outcome = 12; /* nan */
    248         goto terminate;
    249     } else if (fnorm <= LM_DWARF) {
    250         S->outcome = 0; /* sum of squares almost zero, nothing to do */
    251         goto terminate;
    252     }
    253 
    254     /***  The outer loop: compute gradient, then descend.  ***/
    255 
    256     for (int outer = 0;; ++outer) {
    257 
    258         /** Calculate the Jacobian. **/
    259         for (j = 0; j < n; j++) {
    260             temp = x[j];
    261             step = MAX(eps * eps, eps * fabs(temp));
    262             x[j] += step; /* replace temporarily */
    263             (*evaluate)(x, m, data, wf, &(S->userbreak));
    264             ++(S->nfev);
    265             if (S->userbreak)
    266                 goto terminate;
    267             for (i = 0; i < m; i++)
    268                 fjac[j*m+i] = (wf[i] - fvec[i]) / step;
    269             x[j] = temp; /* restore */
    270         }
    271         if (C->verbosity >= 10) {
    272             /* print the entire matrix */
    273             printf("\nlmmin Jacobian\n");
    274             for (i = 0; i < m; i++) {
    275                 printf("  ");
    276                 for (j = 0; j < n; j++)
    277                     printf("%.5e ", fjac[j*m+i]);
    278                 printf("\n");
    279             }
    280         }
    281 
    282         /** Compute the QR factorization of the Jacobian. **/
    283 
    284         /* fjac is an m by n array. The upper n by n submatrix of fjac is made
    285          *   to contain an upper triangular matrix R with diagonal elements of
    286          *   nonincreasing magnitude such that
    287          *
    288          *         P^T*(J^T*J)*P = R^T*R
    289          *
    290          *         (NOTE: ^T stands for matrix transposition),
    291          *
    292          *   where P is a permutation matrix and J is the final calculated
    293          *   Jacobian. Column j of P is column Pivot(j) of the identity matrix.
    294          *   The lower trapezoidal part of fjac contains information generated
    295          *   during the computation of R.
    296          *
    297          * Pivot is an integer array of length n. It defines a permutation
    298          *   matrix P such that jac*P = Q*R, where jac is the final calculated
    299          *   Jacobian, Q is orthogonal (not stored), and R is upper triangular
    300          *   with diagonal elements of nonincreasing magnitude. Column j of P
    301          *   is column Pivot(j) of the identity matrix.
    302          */
    303 
    304         lm_qrfac(m, n, fjac, Pivot, wa1, wa2, wa3);
    305         /* return values are Pivot, wa1=rdiag, wa2=acnorm */
    306 
    307         /** Form Q^T * fvec, and store first n components in qtf. **/
    308         for (i = 0; i < m; i++)
    309             wf[i] = fvec[i];
    310 
    311         for (j = 0; j < n; j++) {
    312             temp3 = fjac[j*m+j];
    313             if (temp3 != 0) {
    314                 sum = 0;
    315                 for (i = j; i < m; i++)
    316                     sum += fjac[j*m+i] * wf[i];
    317                 temp = -sum / temp3;
    318                 for (i = j; i < m; i++)
    319                     wf[i] += fjac[j*m+i] * temp;
    320             }
    321             fjac[j*m+j] = wa1[j];
    322             qtf[j] = wf[j];
    323         }
    324 
    325         /**  Compute norm of scaled gradient and detect degeneracy. **/
    326         gnorm = 0;
    327         for (j = 0; j < n; j++) {
    328             if (wa2[Pivot[j]] == 0)
    329                 continue;
    330             sum = 0;
    331             for (i = 0; i <= j; i++)
    332                 sum += fjac[j*m+i] * qtf[i];
    333             gnorm = MAX(gnorm, fabs(sum / wa2[Pivot[j]] / fnorm));
    334         }
    335 
    336         if (gnorm <= C->gtol) {
    337             S->outcome = 4;
    338             goto terminate;
    339         }
    340 
    341         /** Initialize or update diag and delta. **/
    342         if (!outer) { /* first iteration only */
    343             if (C->scale_diag) {
    344                 /* diag := norms of the columns of the initial Jacobian */
    345                 for (j = 0; j < n; j++)
    346                     diag[j] = wa2[j] ? wa2[j] : 1;
    347                 /* xnorm := || D x || */
    348                 for (j = 0; j < n; j++)
    349                     wa3[j] = diag[j] * x[j];
    350                 xnorm = lm_enorm(n, wa3);
    351                 if (C->verbosity >= 2) {
    352                     fprintf(msgfile, "lmmin diag  ");
    353                     lm_print_pars(nout, x, msgfile); // xnorm
    354                     fprintf(msgfile, "  xnorm = %18.8g\n", xnorm);
    355                 }
    356                 /* Only now print the header for the loop table. */
    357                 if (C->verbosity >= 3) {
    358                     fprintf(msgfile, "  o  i     lmpar    prered"
    359                                      "          ratio    dirder      delta"
    360                                      "      pnorm                 fnorm");
    361                     for (i = 0; i < nout; ++i)
    362                         fprintf(msgfile, "               p%i", i);
    363                     fprintf(msgfile, "\n");
    364                 }
    365             } else {
    366                 xnorm = lm_enorm(n, x);
    367             }
    368             if (!isfinite(xnorm)) {
    369                 S->outcome = 12; /* nan */
    370                 goto terminate;
    371             }
    372             /* Initialize the step bound delta. */
    373             if (xnorm)
    374                 delta = C->stepbound * xnorm;
    375             else
    376                 delta = C->stepbound;
    377         } else {
    378             if (C->scale_diag) {
    379                 for (j = 0; j < n; j++)
    380                     diag[j] = MAX(diag[j], wa2[j]);
    381             }
    382         }
    383 
    384         /** The inner loop. **/
    385         int inner = 0;
    386         do {
    387 
    388             /** Determine the Levenberg-Marquardt parameter. **/
    389             lm_lmpar(n, fjac, m, Pivot, diag, qtf, delta, &lmpar,
    390                      wa1, wa2, wf, wa3);
    391             /* used return values are fjac (partly), lmpar, wa1=x, wa3=diag*x */
    392 
    393             /* Predict scaled reduction. */
    394             pnorm = lm_enorm(n, wa3);
    395             if (!isfinite(pnorm)) {
    396                 S->outcome = 12; /* nan */
    397                 goto terminate;
    398             }
    399             temp2 = lmpar * SQR(pnorm / fnorm);
    400             for (j = 0; j < n; j++) {
    401                 wa3[j] = 0;
    402                 for (i = 0; i <= j; i++)
    403                     wa3[i] -= fjac[j*m+i] * wa1[Pivot[j]];
    404             }
    405             temp1 = SQR(lm_enorm(n, wa3) / fnorm);
    406             if (!isfinite(temp1)) {
    407                 S->outcome = 12; /* nan */
    408                 goto terminate;
    409             }
    410             prered = temp1 + 2*temp2;
    411             dirder = -temp1 + temp2; /* scaled directional derivative */
    412 
    413             /* At first call, adjust the initial step bound. */
    414             if (!outer && pnorm < delta)
    415                 delta = pnorm;
    416 
    417             /** Evaluate the function at x + p. **/
    418             for (j = 0; j < n; j++)
    419                 wa2[j] = x[j] - wa1[j];
    420             (*evaluate)(wa2, m, data, wf, &(S->userbreak));
    421             ++(S->nfev);
    422             if (S->userbreak)
    423                 goto terminate;
    424             fnorm1 = lm_enorm(m, wf);
    425             if (!isfinite(fnorm1)) {
    426                 S->outcome = 12; /* nan */
    427                 goto terminate;
    428             }
    429 
    430             /** Evaluate the scaled reduction. **/
    431 
    432             /* Actual scaled reduction. */
    433             actred = 1 - SQR(fnorm1 / fnorm);
    434 
    435             /* Ratio of actual to predicted reduction. */
    436             ratio = prered ? actred / prered : 0;
    437 
    438             if (C->verbosity == 2) {
    439                 fprintf(msgfile, "lmmin (%i:%i) ", outer, inner);
    440                 lm_print_pars(nout, wa2, msgfile); // fnorm1,
    441             } else if (C->verbosity >= 3) {
    442                 printf("%3i %2i %9.2g %9.2g %14.6g"
    443                        " %9.2g %10.3e %10.3e %21.15e",
    444                        outer, inner, lmpar, prered, ratio,
    445                        dirder, delta, pnorm, fnorm1);
    446                 for (i = 0; i < nout; ++i)
    447                     fprintf(msgfile, " %16.9g", wa2[i]);
    448                 fprintf(msgfile, "\n");
    449             }
    450 
    451             /* Update the step bound. */
    452             if (ratio <= 0.25) {
    453                 if (actred >= 0)
    454                     temp = 0.5;
    455                 else if (actred > -99) /* -99 = 1-1/0.1^2 */
    456                     temp = MAX(dirder / (2*dirder + actred), 0.1);
    457                 else
    458                     temp = 0.1;
    459                 delta = temp * MIN(delta, pnorm / 0.1);
    460                 lmpar /= temp;
    461             } else if (ratio >= 0.75) {
    462                 delta = 2 * pnorm;
    463                 lmpar *= 0.5;
    464             } else if (!lmpar) {
    465                 delta = 2 * pnorm;
    466             }
    467 
    468             /**  On success, update solution, and test for convergence. **/
    469 
    470             inner_success = ratio >= 1e-4;
    471             if (inner_success) {
    472 
    473                 /* Update x, fvec, and their norms. */
    474                 if (C->scale_diag) {
    475                     for (j = 0; j < n; j++) {
    476                         x[j] = wa2[j];
    477                         wa2[j] = diag[j] * x[j];
    478                     }
    479                 } else {
    480                     for (j = 0; j < n; j++)
    481                         x[j] = wa2[j];
    482                 }
    483                 for (i = 0; i < m; i++)
    484                     fvec[i] = wf[i];
    485                 xnorm = lm_enorm(n, wa2);
    486                 if (!isfinite(xnorm)) {
    487                     S->outcome = 12; /* nan */
    488                     goto terminate;
    489                 }
    490                 fnorm = fnorm1;
    491             }
    492 
    493             /* Convergence tests. */
    494             S->outcome = 0;
    495             if (fnorm <= LM_DWARF)
    496                 goto terminate; /* success: sum of squares almost zero */
    497             /* Test two criteria (both may be fulfilled). */
    498             if (fabs(actred) <= C->ftol && prered <= C->ftol && ratio <= 2)
    499                 S->outcome = 1; /* success: x almost stable */
    500             if (delta <= C->xtol * xnorm)
    501                 S->outcome += 2; /* success: sum of squares almost stable */
    502             if (S->outcome != 0) {
    503                 goto terminate;
    504             }
    505 
    506             /** Tests for termination and stringent tolerances. **/
    507             if (S->nfev >= maxfev) {
    508                 S->outcome = 5;
    509                 goto terminate;
    510             }
    511             if (fabs(actred) <= LM_MACHEP && prered <= LM_MACHEP &&
    512                 ratio <= 2) {
    513                 S->outcome = 6;
    514                 goto terminate;
    515             }
    516             if (delta <= LM_MACHEP * xnorm) {
    517                 S->outcome = 7;
    518                 goto terminate;
    519             }
    520             if (gnorm <= LM_MACHEP) {
    521                 S->outcome = 8;
    522                 goto terminate;
    523             }
    524 
    525             /** End of the inner loop. Repeat if iteration unsuccessful. **/
    526             ++inner;
    527         } while (!inner_success);
    528 
    529     }; /***  End of the outer loop.  ***/
    530 
    531 terminate:
    532     S->fnorm = lm_enorm(m, fvec);
    533     if (C->verbosity >= 2)
    534         printf("lmmin outcome (%i) xnorm %g ftol %g xtol %g\n", S->outcome,
    535                xnorm, C->ftol, C->xtol);
    536     if (C->verbosity & 1) {
    537         fprintf(msgfile, "lmmin final ");
    538         lm_print_pars(nout, x, msgfile); // S->fnorm,
    539         fprintf(msgfile, "  fnorm = %18.8g\n", S->fnorm);
    540     }
    541     if (S->userbreak) /* user-requested break */
    542         S->outcome = 11;
    543 
    544     /***  Deallocate the workspace.  ***/
    545     free(ws);
    546 
    547 } /*** lmmin. ***/
    548 
    549 /******************************************************************************/
    550 /*  lm_lmpar (determine Levenberg-Marquardt parameter)                        */
    551 /******************************************************************************/
    552 
    553 void lm_lmpar(const int n, double* r, const int ldr, const int* Pivot,
    554               const double* diag, const double* qtb, const double delta,
    555               double* par, double* x, double* Sdiag, double* aux, double* xdi)
    556 /*     Given an m by n matrix A, an n by n nonsingular diagonal matrix D,
    557  *     an m-vector b, and a positive number delta, the problem is to
    558  *     determine a parameter value par such that if x solves the system
    559  *
    560  *          A*x = b  and  sqrt(par)*D*x = 0
    561  *
    562  *     in the least squares sense, and dxnorm is the Euclidean norm of D*x,
    563  *     then either par=0 and (dxnorm-delta) < 0.1*delta, or par>0 and
    564  *     abs(dxnorm-delta) < 0.1*delta.
    565  *
    566  *     Using lm_qrsolv, this subroutine completes the solution of the
    567  *     problem if it is provided with the necessary information from the
    568  *     QR factorization, with column pivoting, of A. That is, if A*P = Q*R,
    569  *     where P is a permutation matrix, Q has orthogonal columns, and R is
    570  *     an upper triangular matrix with diagonal elements of nonincreasing
    571  *     magnitude, then lmpar expects the full upper triangle of R, the
    572  *     permutation matrix P, and the first n components of Q^T*b. On output
    573  *     lmpar also provides an upper triangular matrix S such that
    574  *
    575  *          P^T*(A^T*A + par*D*D)*P = S^T*S.
    576  *
    577  *     S is employed within lmpar and may be of separate interest.
    578  *
    579  *     Only a few iterations are generally needed for convergence of the
    580  *     algorithm. If, however, the limit of 10 iterations is reached, then
    581  *     the output par will contain the best value obtained so far.
    582  *
    583  *     Parameters:
    584  *
    585  *      n is a positive integer INPUT variable set to the order of r.
    586  *
    587  *      r is an n by n array. On INPUT the full upper triangle must contain
    588  *        the full upper triangle of the matrix R. On OUTPUT the full upper
    589  *        triangle is unaltered, and the strict lower triangle contains the
    590  *        strict upper triangle (transposed) of the upper triangular matrix S.
    591  *
    592  *      ldr is a positive integer INPUT variable not less than n which
    593  *        specifies the leading dimension of the array R.
    594  *
    595  *      Pivot is an integer INPUT array of length n which defines the
    596  *        permutation matrix P such that A*P = Q*R. Column j of P is column
    597  *        Pivot(j) of the identity matrix.
    598  *
    599  *      diag is an INPUT array of length n which must contain the diagonal
    600  *        elements of the matrix D.
    601  *
    602  *      qtb is an INPUT array of length n which must contain the first
    603  *        n elements of the vector Q^T*b.
    604  *
    605  *      delta is a positive INPUT variable which specifies an upper bound
    606  *        on the Euclidean norm of D*x.
    607  *
    608  *      par is a nonnegative variable. On INPUT par contains an initial
    609  *        estimate of the Levenberg-Marquardt parameter. On OUTPUT par
    610  *        contains the final estimate.
    611  *
    612  *      x is an OUTPUT array of length n which contains the least-squares
    613  *        solution of the system A*x = b, sqrt(par)*D*x = 0, for the output par.
    614  *
    615  *      Sdiag is an array of length n needed as workspace; on OUTPUT it
    616  *        contains the diagonal elements of the upper triangular matrix S.
    617  *
    618  *      aux is a multi-purpose work array of length n.
    619  *
    620  *      xdi is a work array of length n. On OUTPUT: diag[j] * x[j].
    621  *
    622  */
    623 {
    624     int i, iter, j, nsing;
    625     double dxnorm, fp, fp_old, gnorm, parc, parl, paru;
    626     double sum, temp;
    627     static double p1 = 0.1;
    628 
    629     /*** Compute and store in x the Gauss-Newton direction. If the Jacobian
    630          is rank-deficient, obtain a least-squares solution. ***/
    631 
    632     nsing = n;
    633     for (j = 0; j < n; j++) {
    634         aux[j] = qtb[j];
    635         if (r[j*ldr+j] == 0 && nsing == n)
    636             nsing = j;
    637         if (nsing < n)
    638             aux[j] = 0;
    639     }
    640     for (j = nsing-1; j >= 0; j--) {
    641         aux[j] = aux[j] / r[j+ldr*j];
    642         temp = aux[j];
    643         for (i = 0; i < j; i++)
    644             aux[i] -= r[j*ldr+i] * temp;
    645     }
    646 
    647     for (j = 0; j < n; j++)
    648         x[Pivot[j]] = aux[j];
    649 
    650     /*** Initialize the iteration counter, evaluate the function at the origin,
    651          and test for acceptance of the Gauss-Newton direction. ***/
    652 
    653     for (j = 0; j < n; j++)
    654         xdi[j] = diag[j] * x[j];
    655     dxnorm = lm_enorm(n, xdi);
    656     fp = dxnorm - delta;
    657     if (fp <= p1 * delta) {
    658 #ifdef LMFIT_DEBUG_MESSAGES
    659         printf("debug lmpar nsing=%d, n=%d, terminate[fp<=p1*del]\n", nsing, n);
    660 #endif
    661         *par = 0;
    662         return;
    663     }
    664 
    665     /*** If the Jacobian is not rank deficient, the Newton step provides a
    666          lower bound, parl, for the zero of the function. Otherwise set this
    667          bound to zero. ***/
    668 
    669     parl = 0;
    670     if (nsing >= n) {
    671         for (j = 0; j < n; j++)
    672             aux[j] = diag[Pivot[j]] * xdi[Pivot[j]] / dxnorm;
    673 
    674         for (j = 0; j < n; j++) {
    675             sum = 0;
    676             for (i = 0; i < j; i++)
    677                 sum += r[j*ldr+i] * aux[i];
    678             aux[j] = (aux[j] - sum) / r[j+ldr*j];
    679         }
    680         temp = lm_enorm(n, aux);
    681         parl = fp / delta / temp / temp;
    682     }
    683 
    684     /*** Calculate an upper bound, paru, for the zero of the function. ***/
    685 
    686     for (j = 0; j < n; j++) {
    687         sum = 0;
    688         for (i = 0; i <= j; i++)
    689             sum += r[j*ldr+i] * qtb[i];
    690         aux[j] = sum / diag[Pivot[j]];
    691     }
    692     gnorm = lm_enorm(n, aux);
    693     paru = gnorm / delta;
    694     if (paru == 0)
    695         paru = LM_DWARF / MIN(delta, p1);
    696 
    697     /*** If the input par lies outside of the interval (parl,paru),
    698          set par to the closer endpoint. ***/
    699 
    700     *par = MAX(*par, parl);
    701     *par = MIN(*par, paru);
    702     if (*par == 0)
    703         *par = gnorm / dxnorm;
    704 
    705     /*** Iterate. ***/
    706 
    707     for (iter = 0;; iter++) {
    708 
    709         /** Evaluate the function at the current value of par. **/
    710         if (*par == 0)
    711             *par = MAX(LM_DWARF, 0.001 * paru);
    712         temp = sqrt(*par);
    713         for (j = 0; j < n; j++)
    714             aux[j] = temp * diag[j];
    715 
    716         lm_qrsolv(n, r, ldr, Pivot, aux, qtb, x, Sdiag, xdi);
    717         /* return values are r, x, Sdiag */
    718 
    719         for (j = 0; j < n; j++)
    720             xdi[j] = diag[j] * x[j]; /* used as output */
    721         dxnorm = lm_enorm(n, xdi);
    722         fp_old = fp;
    723         fp = dxnorm - delta;
    724 
    725         /** If the function is small enough, accept the current value
    726             of par. Also test for the exceptional cases where parl
    727             is zero or the number of iterations has reached 10. **/
    728         if (fabs(fp) <= p1 * delta ||
    729             (parl == 0 && fp <= fp_old && fp_old < 0) || iter == 10) {
    730 #ifdef LMFIT_DEBUG_MESSAGES
    731             printf("debug lmpar nsing=%d, iter=%d, "
    732                    "par=%.4e [%.4e %.4e], delta=%.4e, fp=%.4e\n",
    733                    nsing, iter, *par, parl, paru, delta, fp);
    734 #endif
    735             break; /* the only exit from the iteration. */
    736         }
    737 
    738         /** Compute the Newton correction. **/
    739         for (j = 0; j < n; j++)
    740             aux[j] = diag[Pivot[j]] * xdi[Pivot[j]] / dxnorm;
    741 
    742         for (j = 0; j < n; j++) {
    743             aux[j] = aux[j] / Sdiag[j];
    744             for (i = j+1; i < n; i++)
    745                 aux[i] -= r[j*ldr+i] * aux[j];
    746         }
    747         temp = lm_enorm(n, aux);
    748         parc = fp / delta / temp / temp;
    749 
    750         /** Depending on the sign of the function, update parl or paru. **/
    751         if (fp > 0)
    752             parl = MAX(parl, *par);
    753         else /* fp < 0 [the case fp==0 is precluded by the break condition] */
    754             paru = MIN(paru, *par);
    755 
    756         /** Compute an improved estimate for par. **/
    757         *par = MAX(parl, *par + parc);
    758     }
    759 
    760 } /*** lm_lmpar. ***/
    761 
    762 /******************************************************************************/
    763 /*  lm_qrfac (QR factorization, from lapack)                                  */
    764 /******************************************************************************/
    765 
    766 void lm_qrfac(const int m, const int n, double* A, int* Pivot, double* Rdiag,
    767               double* Acnorm, double* W)
    768 /*
    769  *     This subroutine uses Householder transformations with column pivoting
    770  *     to compute a QR factorization of the m by n matrix A. That is, qrfac
    771  *     determines an orthogonal matrix Q, a permutation matrix P, and an
    772  *     upper trapezoidal matrix R with diagonal elements of nonincreasing
    773  *     magnitude, such that A*P = Q*R. The Householder transformation for
    774  *     column k, k = 1,2,...,n, is of the form
    775  *
    776  *          I - 2*w*wT/|w|^2
    777  *
    778  *     where w has zeroes in the first k-1 positions.
    779  *
    780  *     Parameters:
    781  *
    782  *      m is an INPUT parameter set to the number of rows of A.
    783  *
    784  *      n is an INPUT parameter set to the number of columns of A.
    785  *
    786  *      A is an m by n array. On INPUT, A contains the matrix for which the
    787  *        QR factorization is to be computed. On OUTPUT the strict upper
    788  *        trapezoidal part of A contains the strict upper trapezoidal part
    789  *        of R, and the lower trapezoidal part of A contains a factored form
    790  *        of Q (the non-trivial elements of the vectors w described above).
    791  *
    792  *      Pivot is an integer OUTPUT array of length n that describes the
    793  *        permutation matrix P. Column j of P is column Pivot(j) of the
    794  *        identity matrix.
    795  *
    796  *      Rdiag is an OUTPUT array of length n which contains the diagonal
    797  *        elements of R.
    798  *
    799  *      Acnorm is an OUTPUT array of length n which contains the norms of
    800  *        the corresponding columns of the input matrix A. If this information
    801  *        is not needed, then Acnorm can share storage with Rdiag.
    802  *
    803  *      W is a work array of length n.
    804  *
    805  */
    806 {
    807     int i, j, k, kmax;
    808     double ajnorm, sum, temp;
    809 
    810 #ifdef LMFIT_DEBUG_MESSAGES
    811     printf("debug qrfac\n");
    812 #endif
    813 
    814     /** Compute initial column norms;
    815         initialize Pivot with identity permutation. ***/
    816     for (j = 0; j < n; j++) {
    817         W[j] = Rdiag[j] = Acnorm[j] = lm_enorm(m, &A[j*m]);
    818         Pivot[j] = j;
    819     }
    820 
    821     /** Loop over columns of A. **/
    822     assert(n <= m);
    823     for (j = 0; j < n; j++) {
    824 
    825         /** Bring the column of largest norm into the pivot position. **/
    826         kmax = j;
    827         for (k = j+1; k < n; k++)
    828             if (Rdiag[k] > Rdiag[kmax])
    829                 kmax = k;
    830 
    831         if (kmax != j) {
    832             /* Swap columns j and kmax. */
    833             k = Pivot[j];
    834             Pivot[j] = Pivot[kmax];
    835             Pivot[kmax] = k;
    836             for (i = 0; i < m; i++) {
    837                 temp = A[j*m+i];
    838                 A[j*m+i] = A[kmax*m+i];
    839                 A[kmax*m+i] = temp;
    840             }
    841             /* Half-swap: Rdiag[j], W[j] won't be needed any further. */
    842             Rdiag[kmax] = Rdiag[j];
    843             W[kmax] = W[j];
    844         }
    845 
    846         /** Compute the Householder reflection vector w_j to reduce the
    847             j-th column of A to a multiple of the j-th unit vector. **/
    848         ajnorm = lm_enorm(m-j, &A[j*m+j]);
    849         if (ajnorm == 0) {
    850             Rdiag[j] = 0;
    851             continue;
    852         }
    853 
    854         /* Let the partial column vector A[j][j:] contain w_j := e_j+-a_j/|a_j|,
    855            where the sign +- is chosen to avoid cancellation in w_jj. */
    856         if (A[j*m+j] < 0)
    857             ajnorm = -ajnorm;
    858         for (i = j; i < m; i++)
    859             A[j*m+i] /= ajnorm;
    860         A[j*m+j] += 1;
    861 
    862         /** Apply the Householder transformation U_w := 1 - 2*w_j.w_j/|w_j|^2
    863             to the remaining columns, and update the norms. **/
    864         for (k = j+1; k < n; k++) {
    865             /* Compute scalar product w_j * a_j. */
    866             sum = 0;
    867             for (i = j; i < m; i++)
    868                 sum += A[j*m+i] * A[k*m+i];
    869 
    870             /* Normalization is simplified by the coincidence |w_j|^2=2w_jj. */
    871             temp = sum / A[j*m+j];
    872 
    873             /* Carry out transform U_w_j * a_k. */
    874             for (i = j; i < m; i++)
    875                 A[k*m+i] -= temp * A[j*m+i];
    876 
    877             /* No idea what happens here. */
    878             if (Rdiag[k] != 0) {
    879                 temp = A[m*k+j] / Rdiag[k];
    880                 if (fabs(temp) < 1) {
    881                     Rdiag[k] *= sqrt(1 - SQR(temp));
    882                     temp = Rdiag[k] / W[k];
    883                 } else
    884                     temp = 0;
    885                 if (temp == 0 || 0.05 * SQR(temp) <= LM_MACHEP) {
    886                     Rdiag[k] = lm_enorm(m-j-1, &A[m*k+j+1]);
    887                     W[k] = Rdiag[k];
    888                 }
    889             }
    890         }
    891 
    892         Rdiag[j] = -ajnorm;
    893     }
    894 } /*** lm_qrfac. ***/
    895 
    896 /******************************************************************************/
    897 /*  lm_qrsolv (linear least-squares)                                          */
    898 /******************************************************************************/
    899 
    900 void lm_qrsolv(const int n, double* r, const int ldr, const int* Pivot,
    901                const double* diag, const double* qtb, double* x,
    902                double* Sdiag, double* W)
    903 /*
    904  *     Given an m by n matrix A, an n by n diagonal matrix D, and an
    905  *     m-vector b, the problem is to determine an x which solves the
    906  *     system
    907  *
    908  *          A*x = b  and  D*x = 0
    909  *
    910  *     in the least squares sense.
    911  *
    912  *     This subroutine completes the solution of the problem if it is
    913  *     provided with the necessary information from the QR factorization,
    914  *     with column pivoting, of A. That is, if A*P = Q*R, where P is a
    915  *     permutation matrix, Q has orthogonal columns, and R is an upper
    916  *     triangular matrix with diagonal elements of nonincreasing magnitude,
    917  *     then qrsolv expects the full upper triangle of R, the permutation
    918  *     matrix P, and the first n components of Q^T*b. The system
    919  *     A*x = b, D*x = 0, is then equivalent to
    920  *
    921  *          R*z = Q^T*b,  P^T*D*P*z = 0,
    922  *
    923  *     where x = P*z. If this system does not have full rank, then a least
    924  *     squares solution is obtained. On output qrsolv also provides an upper
    925  *     triangular matrix S such that
    926  *
    927  *          P^T*(A^T*A + D*D)*P = S^T*S.
    928  *
    929  *     S is computed within qrsolv and may be of separate interest.
    930  *
    931  *     Parameters:
    932  *
    933  *      n is a positive integer INPUT variable set to the order of R.
    934  *
    935  *      r is an n by n array. On INPUT the full upper triangle must contain
    936  *        the full upper triangle of the matrix R. On OUTPUT the full upper
    937  *        triangle is unaltered, and the strict lower triangle contains the
    938  *        strict upper triangle (transposed) of the upper triangular matrix S.
    939  *
    940  *      ldr is a positive integer INPUT variable not less than n which
    941  *        specifies the leading dimension of the array R.
    942  *
    943  *      Pivot is an integer INPUT array of length n which defines the
    944  *        permutation matrix P such that A*P = Q*R. Column j of P is column
    945  *        Pivot(j) of the identity matrix.
    946  *
    947  *      diag is an INPUT array of length n which must contain the diagonal
    948  *        elements of the matrix D.
    949  *
    950  *      qtb is an INPUT array of length n which must contain the first
    951  *        n elements of the vector Q^T*b.
    952  *
    953  *      x is an OUTPUT array of length n which contains the least-squares
    954  *        solution of the system A*x = b, D*x = 0.
    955  *
    956  *      Sdiag is an OUTPUT array of length n which contains the diagonal
    957  *        elements of the upper triangular matrix S.
    958  *
    959  *      W is a work array of length n.
    960  *
    961  */
    962 {
    963     int i, kk, j, k, nsing;
    964     double qtbpj, sum, temp;
    965     double _sin, _cos, _tan, _cot; /* local variables, not functions */
    966 
    967     /*** Copy R and Q^T*b to preserve input and initialize S.
    968          In particular, save the diagonal elements of R in x. ***/
    969 
    970     for (j = 0; j < n; j++) {
    971         for (i = j; i < n; i++)
    972             r[j*ldr+i] = r[i*ldr+j];
    973         x[j] = r[j*ldr+j];
    974         W[j] = qtb[j];
    975     }
    976 
    977     /*** Eliminate the diagonal matrix D using a Givens rotation. ***/
    978 
    979     for (j = 0; j < n; j++) {
    980 
    981         /*** Prepare the row of D to be eliminated, locating the diagonal
    982              element using P from the QR factorization. ***/
    983 
    984         if (diag[Pivot[j]] != 0) {
    985             for (k = j; k < n; k++)
    986                 Sdiag[k] = 0;
    987             Sdiag[j] = diag[Pivot[j]];
    988 
    989             /*** The transformations to eliminate the row of D modify only
    990                  a single element of Q^T*b beyond the first n, which is
    991                  initially 0. ***/
    992 
    993             qtbpj = 0;
    994             for (k = j; k < n; k++) {
    995 
    996                 /** Determine a Givens rotation which eliminates the
    997                     appropriate element in the current row of D. **/
    998                 if (Sdiag[k] == 0)
    999                     continue;
   1000                 kk = k + ldr * k;
   1001                 if (fabs(r[kk]) < fabs(Sdiag[k])) {
   1002                     _cot = r[kk] / Sdiag[k];
   1003                     _sin = 1 / hypot(1, _cot);
   1004                     _cos = _sin * _cot;
   1005                 } else {
   1006                     _tan = Sdiag[k] / r[kk];
   1007                     _cos = 1 / hypot(1, _tan);
   1008                     _sin = _cos * _tan;
   1009                 }
   1010 
   1011                 /** Compute the modified diagonal element of R and
   1012                     the modified element of (Q^T*b,0). **/
   1013                 r[kk] = _cos * r[kk] + _sin * Sdiag[k];
   1014                 temp = _cos * W[k] + _sin * qtbpj;
   1015                 qtbpj = -_sin * W[k] + _cos * qtbpj;
   1016                 W[k] = temp;
   1017 
   1018                 /** Accumulate the tranformation in the row of S. **/
   1019                 for (i = k+1; i < n; i++) {
   1020                     temp = _cos * r[k*ldr+i] + _sin * Sdiag[i];
   1021                     Sdiag[i] = -_sin * r[k*ldr+i] + _cos * Sdiag[i];
   1022                     r[k*ldr+i] = temp;
   1023                 }
   1024             }
   1025         }
   1026 
   1027         /** Store the diagonal element of S and restore
   1028             the corresponding diagonal element of R. **/
   1029         Sdiag[j] = r[j*ldr+j];
   1030         r[j*ldr+j] = x[j];
   1031     }
   1032 
   1033     /*** Solve the triangular system for z. If the system is singular, then
   1034         obtain a least-squares solution. ***/
   1035 
   1036     nsing = n;
   1037     for (j = 0; j < n; j++) {
   1038         if (Sdiag[j] == 0 && nsing == n)
   1039             nsing = j;
   1040         if (nsing < n)
   1041             W[j] = 0;
   1042     }
   1043 
   1044     for (j = nsing-1; j >= 0; j--) {
   1045         sum = 0;
   1046         for (i = j+1; i < nsing; i++)
   1047             sum += r[j*ldr+i] * W[i];
   1048         W[j] = (W[j] - sum) / Sdiag[j];
   1049     }
   1050 
   1051     /*** Permute the components of z back to components of x. ***/
   1052 
   1053     for (j = 0; j < n; j++)
   1054         x[Pivot[j]] = W[j];
   1055 
   1056 } /*** lm_qrsolv. ***/
   1057 
   1058 /******************************************************************************/
   1059 /*  lm_enorm (Euclidean norm)                                                 */
   1060 /******************************************************************************/
   1061 
   1062 double lm_enorm(int n, const double* x)
   1063 /*     This function calculates the Euclidean norm of an n-vector x.
   1064  *
   1065  *     The Euclidean norm is computed by accumulating the sum of squares
   1066  *     in three different sums. The sums of squares for the small and large
   1067  *     components are scaled so that no overflows occur. Non-destructive
   1068  *     underflows are permitted. Underflows and overflows do not occur in
   1069  *     the computation of the unscaled sum of squares for the intermediate
   1070  *     components. The definitions of small, intermediate and large components
   1071  *     depend on two constants, LM_SQRT_DWARF and LM_SQRT_GIANT. The main
   1072  *     restrictions on these constants are that LM_SQRT_DWARF**2 not underflow
   1073  *     and LM_SQRT_GIANT**2 not overflow.
   1074  *
   1075  *     Parameters:
   1076  *
   1077  *      n is a positive integer INPUT variable.
   1078  *
   1079  *      x is an INPUT array of length n.
   1080  */
   1081 {
   1082     int i;
   1083     double agiant, s1, s2, s3, xabs, x1max, x3max;
   1084 
   1085     s1 = 0;
   1086     s2 = 0;
   1087     s3 = 0;
   1088     x1max = 0;
   1089     x3max = 0;
   1090     agiant = LM_SQRT_GIANT / n;
   1091 
   1092     /** Sum squares. **/
   1093     for (i = 0; i < n; i++) {
   1094         xabs = fabs(x[i]);
   1095         if (xabs > LM_SQRT_DWARF) {
   1096             if (xabs < agiant) {
   1097                 s2 += SQR(xabs);
   1098             } else if (xabs > x1max) {
   1099                 s1 = 1 + s1 * SQR(x1max / xabs);
   1100                 x1max = xabs;
   1101             } else {
   1102                 s1 += SQR(xabs / x1max);
   1103             }
   1104         } else if (xabs > x3max) {
   1105             s3 = 1 + s3 * SQR(x3max / xabs);
   1106             x3max = xabs;
   1107         } else if (xabs != 0) {
   1108             s3 += SQR(xabs / x3max);
   1109         }
   1110     }
   1111 
   1112     /** Calculate the norm. **/
   1113     if (s1 != 0)
   1114         return x1max * sqrt(s1 + (s2 / x1max) / x1max);
   1115     else if (s2 != 0)
   1116         if (s2 >= x3max)
   1117             return sqrt(s2 * (1 + (x3max / s2) * (x3max * s3)));
   1118         else
   1119             return sqrt(x3max * ((s2 / x3max) + (x3max * s3)));
   1120     else
   1121         return x3max * sqrt(s3);
   1122 
   1123 } /*** lm_enorm. ***/
   1124