1 /* 2 * Library: lmfit (Levenberg-Marquardt least squares fitting) 3 * 4 * File: lmmin.c 5 * 6 * Contents: Levenberg-Marquardt minimization. 7 * 8 * Copyright: MINPACK authors, The University of Chikago (1980-1999) 9 * Joachim Wuttke, Forschungszentrum Juelich GmbH (2004-2013) 10 * 11 * License: see ../COPYING (FreeBSD) 12 * 13 * Homepage: apps.jcns.fz-juelich.de/lmfit 14 */ 15 16 #include <assert.h> 17 #include <stdlib.h> 18 #include <stdio.h> 19 #include <math.h> 20 #include <float.h> 21 #include "lmmin.h" 22 23 #define MIN(a, b) (((a) <= (b)) ? (a) : (b)) 24 #define MAX(a, b) (((a) >= (b)) ? (a) : (b)) 25 #define SQR(x) (x) * (x) 26 27 /* Declare functions that do the heavy numerics. 28 Implementions are in this source file, below lmmin. 29 Dependences: lmmin calls lmpar, which calls qrfac and qrsolv. */ 30 void lm_lmpar(const int n, double* r, const int ldr, const int* Pivot, 31 const double* diag, const double* qtb, const double delta, 32 double* par, double* x, double* Sdiag, double* aux, double* xdi); 33 void lm_qrfac(const int m, const int n, double* A, int* Pivot, double* Rdiag, 34 double* Acnorm, double* W); 35 void lm_qrsolv(const int n, double* r, const int ldr, const int* Pivot, 36 const double* diag, const double* qtb, double* x, 37 double* Sdiag, double* W); 38 39 /******************************************************************************/ 40 /* Numeric constants */ 41 /******************************************************************************/ 42 43 /* Set machine-dependent constants to values from float.h. */ 44 #define LM_MACHEP DBL_EPSILON /* resolution of arithmetic */ 45 #define LM_DWARF DBL_MIN /* smallest nonzero number */ 46 #define LM_SQRT_DWARF sqrt(DBL_MIN) /* square should not underflow */ 47 #define LM_SQRT_GIANT sqrt(DBL_MAX) /* square should not overflow */ 48 #define LM_USERTOL 30 * LM_MACHEP /* users are recommended to require this */ 49 50 /* If the above values do not work, the following seem good for an x86: 51 LM_MACHEP .555e-16 52 LM_DWARF 9.9e-324 53 LM_SQRT_DWARF 1.e-160 54 LM_SQRT_GIANT 1.e150 55 LM_USER_TOL 1.e-14 56 The following values should work on any machine: 57 LM_MACHEP 1.2e-16 58 LM_DWARF 1.0e-38 59 LM_SQRT_DWARF 3.834e-20 60 LM_SQRT_GIANT 1.304e19 61 LM_USER_TOL 1.e-14 62 */ 63 64 /* Predefined control parameter sets (msgfile=NULL means stdout). */ 65 const lm_control_struct lm_control_double = { 66 LM_USERTOL, LM_USERTOL, LM_USERTOL, LM_USERTOL, 67 100., 100, 1, NULL, 0, -1, -1}; 68 const lm_control_struct lm_control_float = { 69 1.e-7, 1.e-7, 1.e-7, 1.e-7, 70 100., 100, 1, NULL, 0, -1, -1}; 71 72 /******************************************************************************/ 73 /* Message texts (indexed by status.info) */ 74 /******************************************************************************/ 75 76 const char* lm_infmsg[] = { 77 "found zero (sum of squares below underflow limit)", 78 "converged (the relative error in the sum of squares is at most tol)", 79 "converged (the relative error of the parameter vector is at most tol)", 80 "converged (both errors are at most tol)", 81 "trapped (by degeneracy; increasing epsilon might help)", 82 "exhausted (number of function calls exceeding preset patience)", 83 "failed (ftol<tol: cannot reduce sum of squares any further)", 84 "failed (xtol<tol: cannot improve approximate solution any further)", 85 "failed (gtol<tol: cannot improve approximate solution any further)", 86 "crashed (not enough memory)", 87 "exploded (fatal coding error: improper input parameters)", 88 "stopped (break requested within function evaluation)", 89 "found nan (function value is not-a-number or infinite)"}; 90 91 const char* lm_shortmsg[] = { 92 "found zero", 93 "converged (f)", 94 "converged (p)", 95 "converged (2)", 96 "degenerate", 97 "call limit", 98 "failed (f)", 99 "failed (p)", 100 "failed (o)", 101 "no memory", 102 "invalid input", 103 "user break", 104 "found nan"}; 105 106 /******************************************************************************/ 107 /* Monitoring auxiliaries. */ 108 /******************************************************************************/ 109 110 void lm_print_pars(int nout, const double* par, FILE* fout) 111 { 112 int i; 113 for (i = 0; i < nout; ++i) 114 fprintf(fout, " %16.9g", par[i]); 115 fprintf(fout, "\n"); 116 } 117 118 /******************************************************************************/ 119 /* lmmin (main minimization routine) */ 120 /******************************************************************************/ 121 122 void lmmin(const int n, double* x, const int m, const void* data, 123 void (*evaluate)(const double* par, const int m_dat, 124 const void* data, double* fvec, int* userbreak), 125 const lm_control_struct* C, lm_status_struct* S) 126 { 127 int j, i; 128 double actred, dirder, fnorm, fnorm1, gnorm, pnorm, prered, ratio, step, 129 sum, temp, temp1, temp2, temp3; 130 131 /*** Initialize internal variables. ***/ 132 133 int maxfev = C->patience * (n+1); 134 135 int inner_success; /* flag for loop control */ 136 double lmpar = 0; /* Levenberg-Marquardt parameter */ 137 double delta = 0; 138 double xnorm = 0; 139 double eps = sqrt(MAX(C->epsilon, LM_MACHEP)); /* for forward differences */ 140 141 int nout = C->n_maxpri == -1 ? n : MIN(C->n_maxpri, n); 142 143 /* Reinterpret C->msgfile=NULL as stdout (which is unavailable for 144 compile-time initialization of lm_control_double and similar). */ 145 FILE* msgfile = C->msgfile ? C->msgfile : stdout; 146 147 /*** Default status info; must be set before first return statement. ***/ 148 149 S->outcome = 0; /* status code */ 150 S->userbreak = 0; 151 S->nfev = 0; /* function evaluation counter */ 152 153 /*** Check input parameters for errors. ***/ 154 155 if (n <= 0) { 156 fprintf(stderr, "lmmin: invalid number of parameters %i\n", n); 157 S->outcome = 10; 158 return; 159 } 160 if (m < n) { 161 fprintf(stderr, "lmmin: number of data points (%i) " 162 "smaller than number of parameters (%i)\n", 163 m, n); 164 S->outcome = 10; 165 return; 166 } 167 if (C->ftol < 0 || C->xtol < 0 || C->gtol < 0) { 168 fprintf(stderr, 169 "lmmin: negative tolerance (at least one of %g %g %g)\n", 170 C->ftol, C->xtol, C->gtol); 171 S->outcome = 10; 172 return; 173 } 174 if (maxfev <= 0) { 175 fprintf(stderr, "lmmin: nonpositive function evaluations limit %i\n", 176 maxfev); 177 S->outcome = 10; 178 return; 179 } 180 if (C->stepbound <= 0) { 181 fprintf(stderr, "lmmin: nonpositive stepbound %g\n", C->stepbound); 182 S->outcome = 10; 183 return; 184 } 185 if (C->scale_diag != 0 && C->scale_diag != 1) { 186 fprintf(stderr, "lmmin: logical variable scale_diag=%i, " 187 "should be 0 or 1\n", 188 C->scale_diag); 189 S->outcome = 10; 190 return; 191 } 192 193 /*** Allocate work space. ***/ 194 195 /* Allocate total workspace with just one system call */ 196 char* ws; 197 if ((ws = malloc((2*m + 5*n + m*n) * sizeof(double) + 198 n * sizeof(int))) == NULL) { 199 S->outcome = 9; 200 return; 201 } 202 203 /* Assign workspace segments. */ 204 char* pws = ws; 205 double* fvec = (double*)pws; 206 pws += m * sizeof(double) / sizeof(char); 207 double* diag = (double*)pws; 208 pws += n * sizeof(double) / sizeof(char); 209 double* qtf = (double*)pws; 210 pws += n * sizeof(double) / sizeof(char); 211 double* fjac = (double*)pws; 212 pws += n * m * sizeof(double) / sizeof(char); 213 double* wa1 = (double*)pws; 214 pws += n * sizeof(double) / sizeof(char); 215 double* wa2 = (double*)pws; 216 pws += n * sizeof(double) / sizeof(char); 217 double* wa3 = (double*)pws; 218 pws += n * sizeof(double) / sizeof(char); 219 double* wf = (double*)pws; 220 pws += m * sizeof(double) / sizeof(char); 221 int* Pivot = (int*)pws; 222 pws += n * sizeof(int) / sizeof(char); 223 224 /* Initialize diag. */ 225 if (!C->scale_diag) 226 for (j = 0; j < n; j++) 227 diag[j] = 1; 228 229 /*** Evaluate function at starting point and calculate norm. ***/ 230 231 if (C->verbosity) { 232 fprintf(msgfile, "lmmin start "); 233 lm_print_pars(nout, x, msgfile); 234 } 235 (*evaluate)(x, m, data, fvec, &(S->userbreak)); 236 if (C->verbosity > 4) 237 for (i = 0; i < m; ++i) 238 fprintf(msgfile, " fvec[%4i] = %18.8g\n", i, fvec[i]); 239 S->nfev = 1; 240 if (S->userbreak) 241 goto terminate; 242 fnorm = lm_enorm(m, fvec); 243 if (C->verbosity) 244 fprintf(msgfile, " fnorm = %18.8g\n", fnorm); 245 246 if (!isfinite(fnorm)) { 247 S->outcome = 12; /* nan */ 248 goto terminate; 249 } else if (fnorm <= LM_DWARF) { 250 S->outcome = 0; /* sum of squares almost zero, nothing to do */ 251 goto terminate; 252 } 253 254 /*** The outer loop: compute gradient, then descend. ***/ 255 256 for (int outer = 0;; ++outer) { 257 258 /** Calculate the Jacobian. **/ 259 for (j = 0; j < n; j++) { 260 temp = x[j]; 261 step = MAX(eps * eps, eps * fabs(temp)); 262 x[j] += step; /* replace temporarily */ 263 (*evaluate)(x, m, data, wf, &(S->userbreak)); 264 ++(S->nfev); 265 if (S->userbreak) 266 goto terminate; 267 for (i = 0; i < m; i++) 268 fjac[j*m+i] = (wf[i] - fvec[i]) / step; 269 x[j] = temp; /* restore */ 270 } 271 if (C->verbosity >= 10) { 272 /* print the entire matrix */ 273 printf("\nlmmin Jacobian\n"); 274 for (i = 0; i < m; i++) { 275 printf(" "); 276 for (j = 0; j < n; j++) 277 printf("%.5e ", fjac[j*m+i]); 278 printf("\n"); 279 } 280 } 281 282 /** Compute the QR factorization of the Jacobian. **/ 283 284 /* fjac is an m by n array. The upper n by n submatrix of fjac is made 285 * to contain an upper triangular matrix R with diagonal elements of 286 * nonincreasing magnitude such that 287 * 288 * P^T*(J^T*J)*P = R^T*R 289 * 290 * (NOTE: ^T stands for matrix transposition), 291 * 292 * where P is a permutation matrix and J is the final calculated 293 * Jacobian. Column j of P is column Pivot(j) of the identity matrix. 294 * The lower trapezoidal part of fjac contains information generated 295 * during the computation of R. 296 * 297 * Pivot is an integer array of length n. It defines a permutation 298 * matrix P such that jac*P = Q*R, where jac is the final calculated 299 * Jacobian, Q is orthogonal (not stored), and R is upper triangular 300 * with diagonal elements of nonincreasing magnitude. Column j of P 301 * is column Pivot(j) of the identity matrix. 302 */ 303 304 lm_qrfac(m, n, fjac, Pivot, wa1, wa2, wa3); 305 /* return values are Pivot, wa1=rdiag, wa2=acnorm */ 306 307 /** Form Q^T * fvec, and store first n components in qtf. **/ 308 for (i = 0; i < m; i++) 309 wf[i] = fvec[i]; 310 311 for (j = 0; j < n; j++) { 312 temp3 = fjac[j*m+j]; 313 if (temp3 != 0) { 314 sum = 0; 315 for (i = j; i < m; i++) 316 sum += fjac[j*m+i] * wf[i]; 317 temp = -sum / temp3; 318 for (i = j; i < m; i++) 319 wf[i] += fjac[j*m+i] * temp; 320 } 321 fjac[j*m+j] = wa1[j]; 322 qtf[j] = wf[j]; 323 } 324 325 /** Compute norm of scaled gradient and detect degeneracy. **/ 326 gnorm = 0; 327 for (j = 0; j < n; j++) { 328 if (wa2[Pivot[j]] == 0) 329 continue; 330 sum = 0; 331 for (i = 0; i <= j; i++) 332 sum += fjac[j*m+i] * qtf[i]; 333 gnorm = MAX(gnorm, fabs(sum / wa2[Pivot[j]] / fnorm)); 334 } 335 336 if (gnorm <= C->gtol) { 337 S->outcome = 4; 338 goto terminate; 339 } 340 341 /** Initialize or update diag and delta. **/ 342 if (!outer) { /* first iteration only */ 343 if (C->scale_diag) { 344 /* diag := norms of the columns of the initial Jacobian */ 345 for (j = 0; j < n; j++) 346 diag[j] = wa2[j] ? wa2[j] : 1; 347 /* xnorm := || D x || */ 348 for (j = 0; j < n; j++) 349 wa3[j] = diag[j] * x[j]; 350 xnorm = lm_enorm(n, wa3); 351 if (C->verbosity >= 2) { 352 fprintf(msgfile, "lmmin diag "); 353 lm_print_pars(nout, x, msgfile); // xnorm 354 fprintf(msgfile, " xnorm = %18.8g\n", xnorm); 355 } 356 /* Only now print the header for the loop table. */ 357 if (C->verbosity >= 3) { 358 fprintf(msgfile, " o i lmpar prered" 359 " ratio dirder delta" 360 " pnorm fnorm"); 361 for (i = 0; i < nout; ++i) 362 fprintf(msgfile, " p%i", i); 363 fprintf(msgfile, "\n"); 364 } 365 } else { 366 xnorm = lm_enorm(n, x); 367 } 368 if (!isfinite(xnorm)) { 369 S->outcome = 12; /* nan */ 370 goto terminate; 371 } 372 /* Initialize the step bound delta. */ 373 if (xnorm) 374 delta = C->stepbound * xnorm; 375 else 376 delta = C->stepbound; 377 } else { 378 if (C->scale_diag) { 379 for (j = 0; j < n; j++) 380 diag[j] = MAX(diag[j], wa2[j]); 381 } 382 } 383 384 /** The inner loop. **/ 385 int inner = 0; 386 do { 387 388 /** Determine the Levenberg-Marquardt parameter. **/ 389 lm_lmpar(n, fjac, m, Pivot, diag, qtf, delta, &lmpar, 390 wa1, wa2, wf, wa3); 391 /* used return values are fjac (partly), lmpar, wa1=x, wa3=diag*x */ 392 393 /* Predict scaled reduction. */ 394 pnorm = lm_enorm(n, wa3); 395 if (!isfinite(pnorm)) { 396 S->outcome = 12; /* nan */ 397 goto terminate; 398 } 399 temp2 = lmpar * SQR(pnorm / fnorm); 400 for (j = 0; j < n; j++) { 401 wa3[j] = 0; 402 for (i = 0; i <= j; i++) 403 wa3[i] -= fjac[j*m+i] * wa1[Pivot[j]]; 404 } 405 temp1 = SQR(lm_enorm(n, wa3) / fnorm); 406 if (!isfinite(temp1)) { 407 S->outcome = 12; /* nan */ 408 goto terminate; 409 } 410 prered = temp1 + 2*temp2; 411 dirder = -temp1 + temp2; /* scaled directional derivative */ 412 413 /* At first call, adjust the initial step bound. */ 414 if (!outer && pnorm < delta) 415 delta = pnorm; 416 417 /** Evaluate the function at x + p. **/ 418 for (j = 0; j < n; j++) 419 wa2[j] = x[j] - wa1[j]; 420 (*evaluate)(wa2, m, data, wf, &(S->userbreak)); 421 ++(S->nfev); 422 if (S->userbreak) 423 goto terminate; 424 fnorm1 = lm_enorm(m, wf); 425 if (!isfinite(fnorm1)) { 426 S->outcome = 12; /* nan */ 427 goto terminate; 428 } 429 430 /** Evaluate the scaled reduction. **/ 431 432 /* Actual scaled reduction. */ 433 actred = 1 - SQR(fnorm1 / fnorm); 434 435 /* Ratio of actual to predicted reduction. */ 436 ratio = prered ? actred / prered : 0; 437 438 if (C->verbosity == 2) { 439 fprintf(msgfile, "lmmin (%i:%i) ", outer, inner); 440 lm_print_pars(nout, wa2, msgfile); // fnorm1, 441 } else if (C->verbosity >= 3) { 442 printf("%3i %2i %9.2g %9.2g %14.6g" 443 " %9.2g %10.3e %10.3e %21.15e", 444 outer, inner, lmpar, prered, ratio, 445 dirder, delta, pnorm, fnorm1); 446 for (i = 0; i < nout; ++i) 447 fprintf(msgfile, " %16.9g", wa2[i]); 448 fprintf(msgfile, "\n"); 449 } 450 451 /* Update the step bound. */ 452 if (ratio <= 0.25) { 453 if (actred >= 0) 454 temp = 0.5; 455 else if (actred > -99) /* -99 = 1-1/0.1^2 */ 456 temp = MAX(dirder / (2*dirder + actred), 0.1); 457 else 458 temp = 0.1; 459 delta = temp * MIN(delta, pnorm / 0.1); 460 lmpar /= temp; 461 } else if (ratio >= 0.75) { 462 delta = 2 * pnorm; 463 lmpar *= 0.5; 464 } else if (!lmpar) { 465 delta = 2 * pnorm; 466 } 467 468 /** On success, update solution, and test for convergence. **/ 469 470 inner_success = ratio >= 1e-4; 471 if (inner_success) { 472 473 /* Update x, fvec, and their norms. */ 474 if (C->scale_diag) { 475 for (j = 0; j < n; j++) { 476 x[j] = wa2[j]; 477 wa2[j] = diag[j] * x[j]; 478 } 479 } else { 480 for (j = 0; j < n; j++) 481 x[j] = wa2[j]; 482 } 483 for (i = 0; i < m; i++) 484 fvec[i] = wf[i]; 485 xnorm = lm_enorm(n, wa2); 486 if (!isfinite(xnorm)) { 487 S->outcome = 12; /* nan */ 488 goto terminate; 489 } 490 fnorm = fnorm1; 491 } 492 493 /* Convergence tests. */ 494 S->outcome = 0; 495 if (fnorm <= LM_DWARF) 496 goto terminate; /* success: sum of squares almost zero */ 497 /* Test two criteria (both may be fulfilled). */ 498 if (fabs(actred) <= C->ftol && prered <= C->ftol && ratio <= 2) 499 S->outcome = 1; /* success: x almost stable */ 500 if (delta <= C->xtol * xnorm) 501 S->outcome += 2; /* success: sum of squares almost stable */ 502 if (S->outcome != 0) { 503 goto terminate; 504 } 505 506 /** Tests for termination and stringent tolerances. **/ 507 if (S->nfev >= maxfev) { 508 S->outcome = 5; 509 goto terminate; 510 } 511 if (fabs(actred) <= LM_MACHEP && prered <= LM_MACHEP && 512 ratio <= 2) { 513 S->outcome = 6; 514 goto terminate; 515 } 516 if (delta <= LM_MACHEP * xnorm) { 517 S->outcome = 7; 518 goto terminate; 519 } 520 if (gnorm <= LM_MACHEP) { 521 S->outcome = 8; 522 goto terminate; 523 } 524 525 /** End of the inner loop. Repeat if iteration unsuccessful. **/ 526 ++inner; 527 } while (!inner_success); 528 529 }; /*** End of the outer loop. ***/ 530 531 terminate: 532 S->fnorm = lm_enorm(m, fvec); 533 if (C->verbosity >= 2) 534 printf("lmmin outcome (%i) xnorm %g ftol %g xtol %g\n", S->outcome, 535 xnorm, C->ftol, C->xtol); 536 if (C->verbosity & 1) { 537 fprintf(msgfile, "lmmin final "); 538 lm_print_pars(nout, x, msgfile); // S->fnorm, 539 fprintf(msgfile, " fnorm = %18.8g\n", S->fnorm); 540 } 541 if (S->userbreak) /* user-requested break */ 542 S->outcome = 11; 543 544 /*** Deallocate the workspace. ***/ 545 free(ws); 546 547 } /*** lmmin. ***/ 548 549 /******************************************************************************/ 550 /* lm_lmpar (determine Levenberg-Marquardt parameter) */ 551 /******************************************************************************/ 552 553 void lm_lmpar(const int n, double* r, const int ldr, const int* Pivot, 554 const double* diag, const double* qtb, const double delta, 555 double* par, double* x, double* Sdiag, double* aux, double* xdi) 556 /* Given an m by n matrix A, an n by n nonsingular diagonal matrix D, 557 * an m-vector b, and a positive number delta, the problem is to 558 * determine a parameter value par such that if x solves the system 559 * 560 * A*x = b and sqrt(par)*D*x = 0 561 * 562 * in the least squares sense, and dxnorm is the Euclidean norm of D*x, 563 * then either par=0 and (dxnorm-delta) < 0.1*delta, or par>0 and 564 * abs(dxnorm-delta) < 0.1*delta. 565 * 566 * Using lm_qrsolv, this subroutine completes the solution of the 567 * problem if it is provided with the necessary information from the 568 * QR factorization, with column pivoting, of A. That is, if A*P = Q*R, 569 * where P is a permutation matrix, Q has orthogonal columns, and R is 570 * an upper triangular matrix with diagonal elements of nonincreasing 571 * magnitude, then lmpar expects the full upper triangle of R, the 572 * permutation matrix P, and the first n components of Q^T*b. On output 573 * lmpar also provides an upper triangular matrix S such that 574 * 575 * P^T*(A^T*A + par*D*D)*P = S^T*S. 576 * 577 * S is employed within lmpar and may be of separate interest. 578 * 579 * Only a few iterations are generally needed for convergence of the 580 * algorithm. If, however, the limit of 10 iterations is reached, then 581 * the output par will contain the best value obtained so far. 582 * 583 * Parameters: 584 * 585 * n is a positive integer INPUT variable set to the order of r. 586 * 587 * r is an n by n array. On INPUT the full upper triangle must contain 588 * the full upper triangle of the matrix R. On OUTPUT the full upper 589 * triangle is unaltered, and the strict lower triangle contains the 590 * strict upper triangle (transposed) of the upper triangular matrix S. 591 * 592 * ldr is a positive integer INPUT variable not less than n which 593 * specifies the leading dimension of the array R. 594 * 595 * Pivot is an integer INPUT array of length n which defines the 596 * permutation matrix P such that A*P = Q*R. Column j of P is column 597 * Pivot(j) of the identity matrix. 598 * 599 * diag is an INPUT array of length n which must contain the diagonal 600 * elements of the matrix D. 601 * 602 * qtb is an INPUT array of length n which must contain the first 603 * n elements of the vector Q^T*b. 604 * 605 * delta is a positive INPUT variable which specifies an upper bound 606 * on the Euclidean norm of D*x. 607 * 608 * par is a nonnegative variable. On INPUT par contains an initial 609 * estimate of the Levenberg-Marquardt parameter. On OUTPUT par 610 * contains the final estimate. 611 * 612 * x is an OUTPUT array of length n which contains the least-squares 613 * solution of the system A*x = b, sqrt(par)*D*x = 0, for the output par. 614 * 615 * Sdiag is an array of length n needed as workspace; on OUTPUT it 616 * contains the diagonal elements of the upper triangular matrix S. 617 * 618 * aux is a multi-purpose work array of length n. 619 * 620 * xdi is a work array of length n. On OUTPUT: diag[j] * x[j]. 621 * 622 */ 623 { 624 int i, iter, j, nsing; 625 double dxnorm, fp, fp_old, gnorm, parc, parl, paru; 626 double sum, temp; 627 static double p1 = 0.1; 628 629 /*** Compute and store in x the Gauss-Newton direction. If the Jacobian 630 is rank-deficient, obtain a least-squares solution. ***/ 631 632 nsing = n; 633 for (j = 0; j < n; j++) { 634 aux[j] = qtb[j]; 635 if (r[j*ldr+j] == 0 && nsing == n) 636 nsing = j; 637 if (nsing < n) 638 aux[j] = 0; 639 } 640 for (j = nsing-1; j >= 0; j--) { 641 aux[j] = aux[j] / r[j+ldr*j]; 642 temp = aux[j]; 643 for (i = 0; i < j; i++) 644 aux[i] -= r[j*ldr+i] * temp; 645 } 646 647 for (j = 0; j < n; j++) 648 x[Pivot[j]] = aux[j]; 649 650 /*** Initialize the iteration counter, evaluate the function at the origin, 651 and test for acceptance of the Gauss-Newton direction. ***/ 652 653 for (j = 0; j < n; j++) 654 xdi[j] = diag[j] * x[j]; 655 dxnorm = lm_enorm(n, xdi); 656 fp = dxnorm - delta; 657 if (fp <= p1 * delta) { 658 #ifdef LMFIT_DEBUG_MESSAGES 659 printf("debug lmpar nsing=%d, n=%d, terminate[fp<=p1*del]\n", nsing, n); 660 #endif 661 *par = 0; 662 return; 663 } 664 665 /*** If the Jacobian is not rank deficient, the Newton step provides a 666 lower bound, parl, for the zero of the function. Otherwise set this 667 bound to zero. ***/ 668 669 parl = 0; 670 if (nsing >= n) { 671 for (j = 0; j < n; j++) 672 aux[j] = diag[Pivot[j]] * xdi[Pivot[j]] / dxnorm; 673 674 for (j = 0; j < n; j++) { 675 sum = 0; 676 for (i = 0; i < j; i++) 677 sum += r[j*ldr+i] * aux[i]; 678 aux[j] = (aux[j] - sum) / r[j+ldr*j]; 679 } 680 temp = lm_enorm(n, aux); 681 parl = fp / delta / temp / temp; 682 } 683 684 /*** Calculate an upper bound, paru, for the zero of the function. ***/ 685 686 for (j = 0; j < n; j++) { 687 sum = 0; 688 for (i = 0; i <= j; i++) 689 sum += r[j*ldr+i] * qtb[i]; 690 aux[j] = sum / diag[Pivot[j]]; 691 } 692 gnorm = lm_enorm(n, aux); 693 paru = gnorm / delta; 694 if (paru == 0) 695 paru = LM_DWARF / MIN(delta, p1); 696 697 /*** If the input par lies outside of the interval (parl,paru), 698 set par to the closer endpoint. ***/ 699 700 *par = MAX(*par, parl); 701 *par = MIN(*par, paru); 702 if (*par == 0) 703 *par = gnorm / dxnorm; 704 705 /*** Iterate. ***/ 706 707 for (iter = 0;; iter++) { 708 709 /** Evaluate the function at the current value of par. **/ 710 if (*par == 0) 711 *par = MAX(LM_DWARF, 0.001 * paru); 712 temp = sqrt(*par); 713 for (j = 0; j < n; j++) 714 aux[j] = temp * diag[j]; 715 716 lm_qrsolv(n, r, ldr, Pivot, aux, qtb, x, Sdiag, xdi); 717 /* return values are r, x, Sdiag */ 718 719 for (j = 0; j < n; j++) 720 xdi[j] = diag[j] * x[j]; /* used as output */ 721 dxnorm = lm_enorm(n, xdi); 722 fp_old = fp; 723 fp = dxnorm - delta; 724 725 /** If the function is small enough, accept the current value 726 of par. Also test for the exceptional cases where parl 727 is zero or the number of iterations has reached 10. **/ 728 if (fabs(fp) <= p1 * delta || 729 (parl == 0 && fp <= fp_old && fp_old < 0) || iter == 10) { 730 #ifdef LMFIT_DEBUG_MESSAGES 731 printf("debug lmpar nsing=%d, iter=%d, " 732 "par=%.4e [%.4e %.4e], delta=%.4e, fp=%.4e\n", 733 nsing, iter, *par, parl, paru, delta, fp); 734 #endif 735 break; /* the only exit from the iteration. */ 736 } 737 738 /** Compute the Newton correction. **/ 739 for (j = 0; j < n; j++) 740 aux[j] = diag[Pivot[j]] * xdi[Pivot[j]] / dxnorm; 741 742 for (j = 0; j < n; j++) { 743 aux[j] = aux[j] / Sdiag[j]; 744 for (i = j+1; i < n; i++) 745 aux[i] -= r[j*ldr+i] * aux[j]; 746 } 747 temp = lm_enorm(n, aux); 748 parc = fp / delta / temp / temp; 749 750 /** Depending on the sign of the function, update parl or paru. **/ 751 if (fp > 0) 752 parl = MAX(parl, *par); 753 else /* fp < 0 [the case fp==0 is precluded by the break condition] */ 754 paru = MIN(paru, *par); 755 756 /** Compute an improved estimate for par. **/ 757 *par = MAX(parl, *par + parc); 758 } 759 760 } /*** lm_lmpar. ***/ 761 762 /******************************************************************************/ 763 /* lm_qrfac (QR factorization, from lapack) */ 764 /******************************************************************************/ 765 766 void lm_qrfac(const int m, const int n, double* A, int* Pivot, double* Rdiag, 767 double* Acnorm, double* W) 768 /* 769 * This subroutine uses Householder transformations with column pivoting 770 * to compute a QR factorization of the m by n matrix A. That is, qrfac 771 * determines an orthogonal matrix Q, a permutation matrix P, and an 772 * upper trapezoidal matrix R with diagonal elements of nonincreasing 773 * magnitude, such that A*P = Q*R. The Householder transformation for 774 * column k, k = 1,2,...,n, is of the form 775 * 776 * I - 2*w*wT/|w|^2 777 * 778 * where w has zeroes in the first k-1 positions. 779 * 780 * Parameters: 781 * 782 * m is an INPUT parameter set to the number of rows of A. 783 * 784 * n is an INPUT parameter set to the number of columns of A. 785 * 786 * A is an m by n array. On INPUT, A contains the matrix for which the 787 * QR factorization is to be computed. On OUTPUT the strict upper 788 * trapezoidal part of A contains the strict upper trapezoidal part 789 * of R, and the lower trapezoidal part of A contains a factored form 790 * of Q (the non-trivial elements of the vectors w described above). 791 * 792 * Pivot is an integer OUTPUT array of length n that describes the 793 * permutation matrix P. Column j of P is column Pivot(j) of the 794 * identity matrix. 795 * 796 * Rdiag is an OUTPUT array of length n which contains the diagonal 797 * elements of R. 798 * 799 * Acnorm is an OUTPUT array of length n which contains the norms of 800 * the corresponding columns of the input matrix A. If this information 801 * is not needed, then Acnorm can share storage with Rdiag. 802 * 803 * W is a work array of length n. 804 * 805 */ 806 { 807 int i, j, k, kmax; 808 double ajnorm, sum, temp; 809 810 #ifdef LMFIT_DEBUG_MESSAGES 811 printf("debug qrfac\n"); 812 #endif 813 814 /** Compute initial column norms; 815 initialize Pivot with identity permutation. ***/ 816 for (j = 0; j < n; j++) { 817 W[j] = Rdiag[j] = Acnorm[j] = lm_enorm(m, &A[j*m]); 818 Pivot[j] = j; 819 } 820 821 /** Loop over columns of A. **/ 822 assert(n <= m); 823 for (j = 0; j < n; j++) { 824 825 /** Bring the column of largest norm into the pivot position. **/ 826 kmax = j; 827 for (k = j+1; k < n; k++) 828 if (Rdiag[k] > Rdiag[kmax]) 829 kmax = k; 830 831 if (kmax != j) { 832 /* Swap columns j and kmax. */ 833 k = Pivot[j]; 834 Pivot[j] = Pivot[kmax]; 835 Pivot[kmax] = k; 836 for (i = 0; i < m; i++) { 837 temp = A[j*m+i]; 838 A[j*m+i] = A[kmax*m+i]; 839 A[kmax*m+i] = temp; 840 } 841 /* Half-swap: Rdiag[j], W[j] won't be needed any further. */ 842 Rdiag[kmax] = Rdiag[j]; 843 W[kmax] = W[j]; 844 } 845 846 /** Compute the Householder reflection vector w_j to reduce the 847 j-th column of A to a multiple of the j-th unit vector. **/ 848 ajnorm = lm_enorm(m-j, &A[j*m+j]); 849 if (ajnorm == 0) { 850 Rdiag[j] = 0; 851 continue; 852 } 853 854 /* Let the partial column vector A[j][j:] contain w_j := e_j+-a_j/|a_j|, 855 where the sign +- is chosen to avoid cancellation in w_jj. */ 856 if (A[j*m+j] < 0) 857 ajnorm = -ajnorm; 858 for (i = j; i < m; i++) 859 A[j*m+i] /= ajnorm; 860 A[j*m+j] += 1; 861 862 /** Apply the Householder transformation U_w := 1 - 2*w_j.w_j/|w_j|^2 863 to the remaining columns, and update the norms. **/ 864 for (k = j+1; k < n; k++) { 865 /* Compute scalar product w_j * a_j. */ 866 sum = 0; 867 for (i = j; i < m; i++) 868 sum += A[j*m+i] * A[k*m+i]; 869 870 /* Normalization is simplified by the coincidence |w_j|^2=2w_jj. */ 871 temp = sum / A[j*m+j]; 872 873 /* Carry out transform U_w_j * a_k. */ 874 for (i = j; i < m; i++) 875 A[k*m+i] -= temp * A[j*m+i]; 876 877 /* No idea what happens here. */ 878 if (Rdiag[k] != 0) { 879 temp = A[m*k+j] / Rdiag[k]; 880 if (fabs(temp) < 1) { 881 Rdiag[k] *= sqrt(1 - SQR(temp)); 882 temp = Rdiag[k] / W[k]; 883 } else 884 temp = 0; 885 if (temp == 0 || 0.05 * SQR(temp) <= LM_MACHEP) { 886 Rdiag[k] = lm_enorm(m-j-1, &A[m*k+j+1]); 887 W[k] = Rdiag[k]; 888 } 889 } 890 } 891 892 Rdiag[j] = -ajnorm; 893 } 894 } /*** lm_qrfac. ***/ 895 896 /******************************************************************************/ 897 /* lm_qrsolv (linear least-squares) */ 898 /******************************************************************************/ 899 900 void lm_qrsolv(const int n, double* r, const int ldr, const int* Pivot, 901 const double* diag, const double* qtb, double* x, 902 double* Sdiag, double* W) 903 /* 904 * Given an m by n matrix A, an n by n diagonal matrix D, and an 905 * m-vector b, the problem is to determine an x which solves the 906 * system 907 * 908 * A*x = b and D*x = 0 909 * 910 * in the least squares sense. 911 * 912 * This subroutine completes the solution of the problem if it is 913 * provided with the necessary information from the QR factorization, 914 * with column pivoting, of A. That is, if A*P = Q*R, where P is a 915 * permutation matrix, Q has orthogonal columns, and R is an upper 916 * triangular matrix with diagonal elements of nonincreasing magnitude, 917 * then qrsolv expects the full upper triangle of R, the permutation 918 * matrix P, and the first n components of Q^T*b. The system 919 * A*x = b, D*x = 0, is then equivalent to 920 * 921 * R*z = Q^T*b, P^T*D*P*z = 0, 922 * 923 * where x = P*z. If this system does not have full rank, then a least 924 * squares solution is obtained. On output qrsolv also provides an upper 925 * triangular matrix S such that 926 * 927 * P^T*(A^T*A + D*D)*P = S^T*S. 928 * 929 * S is computed within qrsolv and may be of separate interest. 930 * 931 * Parameters: 932 * 933 * n is a positive integer INPUT variable set to the order of R. 934 * 935 * r is an n by n array. On INPUT the full upper triangle must contain 936 * the full upper triangle of the matrix R. On OUTPUT the full upper 937 * triangle is unaltered, and the strict lower triangle contains the 938 * strict upper triangle (transposed) of the upper triangular matrix S. 939 * 940 * ldr is a positive integer INPUT variable not less than n which 941 * specifies the leading dimension of the array R. 942 * 943 * Pivot is an integer INPUT array of length n which defines the 944 * permutation matrix P such that A*P = Q*R. Column j of P is column 945 * Pivot(j) of the identity matrix. 946 * 947 * diag is an INPUT array of length n which must contain the diagonal 948 * elements of the matrix D. 949 * 950 * qtb is an INPUT array of length n which must contain the first 951 * n elements of the vector Q^T*b. 952 * 953 * x is an OUTPUT array of length n which contains the least-squares 954 * solution of the system A*x = b, D*x = 0. 955 * 956 * Sdiag is an OUTPUT array of length n which contains the diagonal 957 * elements of the upper triangular matrix S. 958 * 959 * W is a work array of length n. 960 * 961 */ 962 { 963 int i, kk, j, k, nsing; 964 double qtbpj, sum, temp; 965 double _sin, _cos, _tan, _cot; /* local variables, not functions */ 966 967 /*** Copy R and Q^T*b to preserve input and initialize S. 968 In particular, save the diagonal elements of R in x. ***/ 969 970 for (j = 0; j < n; j++) { 971 for (i = j; i < n; i++) 972 r[j*ldr+i] = r[i*ldr+j]; 973 x[j] = r[j*ldr+j]; 974 W[j] = qtb[j]; 975 } 976 977 /*** Eliminate the diagonal matrix D using a Givens rotation. ***/ 978 979 for (j = 0; j < n; j++) { 980 981 /*** Prepare the row of D to be eliminated, locating the diagonal 982 element using P from the QR factorization. ***/ 983 984 if (diag[Pivot[j]] != 0) { 985 for (k = j; k < n; k++) 986 Sdiag[k] = 0; 987 Sdiag[j] = diag[Pivot[j]]; 988 989 /*** The transformations to eliminate the row of D modify only 990 a single element of Q^T*b beyond the first n, which is 991 initially 0. ***/ 992 993 qtbpj = 0; 994 for (k = j; k < n; k++) { 995 996 /** Determine a Givens rotation which eliminates the 997 appropriate element in the current row of D. **/ 998 if (Sdiag[k] == 0) 999 continue; 1000 kk = k + ldr * k; 1001 if (fabs(r[kk]) < fabs(Sdiag[k])) { 1002 _cot = r[kk] / Sdiag[k]; 1003 _sin = 1 / hypot(1, _cot); 1004 _cos = _sin * _cot; 1005 } else { 1006 _tan = Sdiag[k] / r[kk]; 1007 _cos = 1 / hypot(1, _tan); 1008 _sin = _cos * _tan; 1009 } 1010 1011 /** Compute the modified diagonal element of R and 1012 the modified element of (Q^T*b,0). **/ 1013 r[kk] = _cos * r[kk] + _sin * Sdiag[k]; 1014 temp = _cos * W[k] + _sin * qtbpj; 1015 qtbpj = -_sin * W[k] + _cos * qtbpj; 1016 W[k] = temp; 1017 1018 /** Accumulate the tranformation in the row of S. **/ 1019 for (i = k+1; i < n; i++) { 1020 temp = _cos * r[k*ldr+i] + _sin * Sdiag[i]; 1021 Sdiag[i] = -_sin * r[k*ldr+i] + _cos * Sdiag[i]; 1022 r[k*ldr+i] = temp; 1023 } 1024 } 1025 } 1026 1027 /** Store the diagonal element of S and restore 1028 the corresponding diagonal element of R. **/ 1029 Sdiag[j] = r[j*ldr+j]; 1030 r[j*ldr+j] = x[j]; 1031 } 1032 1033 /*** Solve the triangular system for z. If the system is singular, then 1034 obtain a least-squares solution. ***/ 1035 1036 nsing = n; 1037 for (j = 0; j < n; j++) { 1038 if (Sdiag[j] == 0 && nsing == n) 1039 nsing = j; 1040 if (nsing < n) 1041 W[j] = 0; 1042 } 1043 1044 for (j = nsing-1; j >= 0; j--) { 1045 sum = 0; 1046 for (i = j+1; i < nsing; i++) 1047 sum += r[j*ldr+i] * W[i]; 1048 W[j] = (W[j] - sum) / Sdiag[j]; 1049 } 1050 1051 /*** Permute the components of z back to components of x. ***/ 1052 1053 for (j = 0; j < n; j++) 1054 x[Pivot[j]] = W[j]; 1055 1056 } /*** lm_qrsolv. ***/ 1057 1058 /******************************************************************************/ 1059 /* lm_enorm (Euclidean norm) */ 1060 /******************************************************************************/ 1061 1062 double lm_enorm(int n, const double* x) 1063 /* This function calculates the Euclidean norm of an n-vector x. 1064 * 1065 * The Euclidean norm is computed by accumulating the sum of squares 1066 * in three different sums. The sums of squares for the small and large 1067 * components are scaled so that no overflows occur. Non-destructive 1068 * underflows are permitted. Underflows and overflows do not occur in 1069 * the computation of the unscaled sum of squares for the intermediate 1070 * components. The definitions of small, intermediate and large components 1071 * depend on two constants, LM_SQRT_DWARF and LM_SQRT_GIANT. The main 1072 * restrictions on these constants are that LM_SQRT_DWARF**2 not underflow 1073 * and LM_SQRT_GIANT**2 not overflow. 1074 * 1075 * Parameters: 1076 * 1077 * n is a positive integer INPUT variable. 1078 * 1079 * x is an INPUT array of length n. 1080 */ 1081 { 1082 int i; 1083 double agiant, s1, s2, s3, xabs, x1max, x3max; 1084 1085 s1 = 0; 1086 s2 = 0; 1087 s3 = 0; 1088 x1max = 0; 1089 x3max = 0; 1090 agiant = LM_SQRT_GIANT / n; 1091 1092 /** Sum squares. **/ 1093 for (i = 0; i < n; i++) { 1094 xabs = fabs(x[i]); 1095 if (xabs > LM_SQRT_DWARF) { 1096 if (xabs < agiant) { 1097 s2 += SQR(xabs); 1098 } else if (xabs > x1max) { 1099 s1 = 1 + s1 * SQR(x1max / xabs); 1100 x1max = xabs; 1101 } else { 1102 s1 += SQR(xabs / x1max); 1103 } 1104 } else if (xabs > x3max) { 1105 s3 = 1 + s3 * SQR(x3max / xabs); 1106 x3max = xabs; 1107 } else if (xabs != 0) { 1108 s3 += SQR(xabs / x3max); 1109 } 1110 } 1111 1112 /** Calculate the norm. **/ 1113 if (s1 != 0) 1114 return x1max * sqrt(s1 + (s2 / x1max) / x1max); 1115 else if (s2 != 0) 1116 if (s2 >= x3max) 1117 return sqrt(s2 * (1 + (x3max / s2) * (x3max * s3))); 1118 else 1119 return sqrt(x3max * ((s2 / x3max) + (x3max * s3))); 1120 else 1121 return x3max * sqrt(s3); 1122 1123 } /*** lm_enorm. ***/ 1124