1 // Ceres Solver - A fast non-linear least squares minimizer 2 // Copyright 2012 Google Inc. All rights reserved. 3 // http://code.google.com/p/ceres-solver/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are met: 7 // 8 // * Redistributions of source code must retain the above copyright notice, 9 // this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above copyright notice, 11 // this list of conditions and the following disclaimer in the documentation 12 // and/or other materials provided with the distribution. 13 // * Neither the name of Google Inc. nor the names of its contributors may be 14 // used to endorse or promote products derived from this software without 15 // specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 // POSSIBILITY OF SUCH DAMAGE. 28 // 29 // Author: sameeragarwal (at) google.com (Sameer Agarwal) 30 31 #include <iomanip> 32 #include <iostream> // NOLINT 33 34 #include "ceres/line_search.h" 35 36 #include "ceres/fpclassify.h" 37 #include "ceres/evaluator.h" 38 #include "ceres/internal/eigen.h" 39 #include "ceres/polynomial.h" 40 #include "ceres/stringprintf.h" 41 #include "glog/logging.h" 42 43 namespace ceres { 44 namespace internal { 45 namespace { 46 // Precision used for floating point values in error message output. 47 const int kErrorMessageNumericPrecision = 8; 48 49 FunctionSample ValueSample(const double x, const double value) { 50 FunctionSample sample; 51 sample.x = x; 52 sample.value = value; 53 sample.value_is_valid = true; 54 return sample; 55 }; 56 57 FunctionSample ValueAndGradientSample(const double x, 58 const double value, 59 const double gradient) { 60 FunctionSample sample; 61 sample.x = x; 62 sample.value = value; 63 sample.gradient = gradient; 64 sample.value_is_valid = true; 65 sample.gradient_is_valid = true; 66 return sample; 67 }; 68 69 } // namespace 70 71 72 std::ostream& operator<<(std::ostream &os, const FunctionSample& sample); 73 74 // Convenience stream operator for pushing FunctionSamples into log messages. 75 std::ostream& operator<<(std::ostream &os, const FunctionSample& sample) { 76 os << sample.ToDebugString(); 77 return os; 78 } 79 80 LineSearch::LineSearch(const LineSearch::Options& options) 81 : options_(options) {} 82 83 LineSearch* LineSearch::Create(const LineSearchType line_search_type, 84 const LineSearch::Options& options, 85 string* error) { 86 LineSearch* line_search = NULL; 87 switch (line_search_type) { 88 case ceres::ARMIJO: 89 line_search = new ArmijoLineSearch(options); 90 break; 91 case ceres::WOLFE: 92 line_search = new WolfeLineSearch(options); 93 break; 94 default: 95 *error = string("Invalid line search algorithm type: ") + 96 LineSearchTypeToString(line_search_type) + 97 string(", unable to create line search."); 98 return NULL; 99 } 100 return line_search; 101 } 102 103 LineSearchFunction::LineSearchFunction(Evaluator* evaluator) 104 : evaluator_(evaluator), 105 position_(evaluator->NumParameters()), 106 direction_(evaluator->NumEffectiveParameters()), 107 evaluation_point_(evaluator->NumParameters()), 108 scaled_direction_(evaluator->NumEffectiveParameters()), 109 gradient_(evaluator->NumEffectiveParameters()) { 110 } 111 112 void LineSearchFunction::Init(const Vector& position, 113 const Vector& direction) { 114 position_ = position; 115 direction_ = direction; 116 } 117 118 bool LineSearchFunction::Evaluate(double x, double* f, double* g) { 119 scaled_direction_ = x * direction_; 120 if (!evaluator_->Plus(position_.data(), 121 scaled_direction_.data(), 122 evaluation_point_.data())) { 123 return false; 124 } 125 126 if (g == NULL) { 127 return (evaluator_->Evaluate(evaluation_point_.data(), 128 f, NULL, NULL, NULL) && 129 IsFinite(*f)); 130 } 131 132 if (!evaluator_->Evaluate(evaluation_point_.data(), 133 f, 134 NULL, 135 gradient_.data(), NULL)) { 136 return false; 137 } 138 139 *g = direction_.dot(gradient_); 140 return IsFinite(*f) && IsFinite(*g); 141 } 142 143 double LineSearchFunction::DirectionInfinityNorm() const { 144 return direction_.lpNorm<Eigen::Infinity>(); 145 } 146 147 // Returns step_size \in [min_step_size, max_step_size] which minimizes the 148 // polynomial of degree defined by interpolation_type which interpolates all 149 // of the provided samples with valid values. 150 double LineSearch::InterpolatingPolynomialMinimizingStepSize( 151 const LineSearchInterpolationType& interpolation_type, 152 const FunctionSample& lowerbound, 153 const FunctionSample& previous, 154 const FunctionSample& current, 155 const double min_step_size, 156 const double max_step_size) const { 157 if (!current.value_is_valid || 158 (interpolation_type == BISECTION && 159 max_step_size <= current.x)) { 160 // Either: sample is invalid; or we are using BISECTION and contracting 161 // the step size. 162 return min(max(current.x * 0.5, min_step_size), max_step_size); 163 } else if (interpolation_type == BISECTION) { 164 CHECK_GT(max_step_size, current.x); 165 // We are expanding the search (during a Wolfe bracketing phase) using 166 // BISECTION interpolation. Using BISECTION when trying to expand is 167 // strictly speaking an oxymoron, but we define this to mean always taking 168 // the maximum step size so that the Armijo & Wolfe implementations are 169 // agnostic to the interpolation type. 170 return max_step_size; 171 } 172 // Only check if lower-bound is valid here, where it is required 173 // to avoid replicating current.value_is_valid == false 174 // behaviour in WolfeLineSearch. 175 CHECK(lowerbound.value_is_valid) 176 << std::scientific << std::setprecision(kErrorMessageNumericPrecision) 177 << "Ceres bug: lower-bound sample for interpolation is invalid, " 178 << "please contact the developers!, interpolation_type: " 179 << LineSearchInterpolationTypeToString(interpolation_type) 180 << ", lowerbound: " << lowerbound << ", previous: " << previous 181 << ", current: " << current; 182 183 // Select step size by interpolating the function and gradient values 184 // and minimizing the corresponding polynomial. 185 vector<FunctionSample> samples; 186 samples.push_back(lowerbound); 187 188 if (interpolation_type == QUADRATIC) { 189 // Two point interpolation using function values and the 190 // gradient at the lower bound. 191 samples.push_back(ValueSample(current.x, current.value)); 192 193 if (previous.value_is_valid) { 194 // Three point interpolation, using function values and the 195 // gradient at the lower bound. 196 samples.push_back(ValueSample(previous.x, previous.value)); 197 } 198 } else if (interpolation_type == CUBIC) { 199 // Two point interpolation using the function values and the gradients. 200 samples.push_back(current); 201 202 if (previous.value_is_valid) { 203 // Three point interpolation using the function values and 204 // the gradients. 205 samples.push_back(previous); 206 } 207 } else { 208 LOG(FATAL) << "Ceres bug: No handler for interpolation_type: " 209 << LineSearchInterpolationTypeToString(interpolation_type) 210 << ", please contact the developers!"; 211 } 212 213 double step_size = 0.0, unused_min_value = 0.0; 214 MinimizeInterpolatingPolynomial(samples, min_step_size, max_step_size, 215 &step_size, &unused_min_value); 216 return step_size; 217 } 218 219 ArmijoLineSearch::ArmijoLineSearch(const LineSearch::Options& options) 220 : LineSearch(options) {} 221 222 void ArmijoLineSearch::Search(const double step_size_estimate, 223 const double initial_cost, 224 const double initial_gradient, 225 Summary* summary) { 226 *CHECK_NOTNULL(summary) = LineSearch::Summary(); 227 CHECK_GE(step_size_estimate, 0.0); 228 CHECK_GT(options().sufficient_decrease, 0.0); 229 CHECK_LT(options().sufficient_decrease, 1.0); 230 CHECK_GT(options().max_num_iterations, 0); 231 Function* function = options().function; 232 233 // Note initial_cost & initial_gradient are evaluated at step_size = 0, 234 // not step_size_estimate, which is our starting guess. 235 const FunctionSample initial_position = 236 ValueAndGradientSample(0.0, initial_cost, initial_gradient); 237 238 FunctionSample previous = ValueAndGradientSample(0.0, 0.0, 0.0); 239 previous.value_is_valid = false; 240 241 FunctionSample current = ValueAndGradientSample(step_size_estimate, 0.0, 0.0); 242 current.value_is_valid = false; 243 244 // As the Armijo line search algorithm always uses the initial point, for 245 // which both the function value and derivative are known, when fitting a 246 // minimizing polynomial, we can fit up to a quadratic without requiring the 247 // gradient at the current query point. 248 const bool interpolation_uses_gradient_at_current_sample = 249 options().interpolation_type == CUBIC; 250 const double descent_direction_max_norm = 251 static_cast<const LineSearchFunction*>(function)->DirectionInfinityNorm(); 252 253 ++summary->num_function_evaluations; 254 if (interpolation_uses_gradient_at_current_sample) { 255 ++summary->num_gradient_evaluations; 256 } 257 current.value_is_valid = 258 function->Evaluate(current.x, 259 ¤t.value, 260 interpolation_uses_gradient_at_current_sample 261 ? ¤t.gradient : NULL); 262 current.gradient_is_valid = 263 interpolation_uses_gradient_at_current_sample && current.value_is_valid; 264 while (!current.value_is_valid || 265 current.value > (initial_cost 266 + options().sufficient_decrease 267 * initial_gradient 268 * current.x)) { 269 // If current.value_is_valid is false, we treat it as if the cost at that 270 // point is not large enough to satisfy the sufficient decrease condition. 271 ++summary->num_iterations; 272 if (summary->num_iterations >= options().max_num_iterations) { 273 summary->error = 274 StringPrintf("Line search failed: Armijo failed to find a point " 275 "satisfying the sufficient decrease condition within " 276 "specified max_num_iterations: %d.", 277 options().max_num_iterations); 278 LOG_IF(WARNING, !options().is_silent) << summary->error; 279 return; 280 } 281 282 const double step_size = 283 this->InterpolatingPolynomialMinimizingStepSize( 284 options().interpolation_type, 285 initial_position, 286 previous, 287 current, 288 (options().max_step_contraction * current.x), 289 (options().min_step_contraction * current.x)); 290 291 if (step_size * descent_direction_max_norm < options().min_step_size) { 292 summary->error = 293 StringPrintf("Line search failed: step_size too small: %.5e " 294 "with descent_direction_max_norm: %.5e.", step_size, 295 descent_direction_max_norm); 296 LOG_IF(WARNING, !options().is_silent) << summary->error; 297 return; 298 } 299 300 previous = current; 301 current.x = step_size; 302 303 ++summary->num_function_evaluations; 304 if (interpolation_uses_gradient_at_current_sample) { 305 ++summary->num_gradient_evaluations; 306 } 307 current.value_is_valid = 308 function->Evaluate(current.x, 309 ¤t.value, 310 interpolation_uses_gradient_at_current_sample 311 ? ¤t.gradient : NULL); 312 current.gradient_is_valid = 313 interpolation_uses_gradient_at_current_sample && current.value_is_valid; 314 } 315 316 summary->optimal_step_size = current.x; 317 summary->success = true; 318 } 319 320 WolfeLineSearch::WolfeLineSearch(const LineSearch::Options& options) 321 : LineSearch(options) {} 322 323 void WolfeLineSearch::Search(const double step_size_estimate, 324 const double initial_cost, 325 const double initial_gradient, 326 Summary* summary) { 327 *CHECK_NOTNULL(summary) = LineSearch::Summary(); 328 // All parameters should have been validated by the Solver, but as 329 // invalid values would produce crazy nonsense, hard check them here. 330 CHECK_GE(step_size_estimate, 0.0); 331 CHECK_GT(options().sufficient_decrease, 0.0); 332 CHECK_GT(options().sufficient_curvature_decrease, 333 options().sufficient_decrease); 334 CHECK_LT(options().sufficient_curvature_decrease, 1.0); 335 CHECK_GT(options().max_step_expansion, 1.0); 336 337 // Note initial_cost & initial_gradient are evaluated at step_size = 0, 338 // not step_size_estimate, which is our starting guess. 339 const FunctionSample initial_position = 340 ValueAndGradientSample(0.0, initial_cost, initial_gradient); 341 342 bool do_zoom_search = false; 343 // Important: The high/low in bracket_high & bracket_low refer to their 344 // _function_ values, not their step sizes i.e. it is _not_ required that 345 // bracket_low.x < bracket_high.x. 346 FunctionSample solution, bracket_low, bracket_high; 347 348 // Wolfe bracketing phase: Increases step_size until either it finds a point 349 // that satisfies the (strong) Wolfe conditions, or an interval that brackets 350 // step sizes which satisfy the conditions. From Nocedal & Wright [1] p61 the 351 // interval: (step_size_{k-1}, step_size_{k}) contains step lengths satisfying 352 // the strong Wolfe conditions if one of the following conditions are met: 353 // 354 // 1. step_size_{k} violates the sufficient decrease (Armijo) condition. 355 // 2. f(step_size_{k}) >= f(step_size_{k-1}). 356 // 3. f'(step_size_{k}) >= 0. 357 // 358 // Caveat: If f(step_size_{k}) is invalid, then step_size is reduced, ignoring 359 // this special case, step_size monotonically increases during bracketing. 360 if (!this->BracketingPhase(initial_position, 361 step_size_estimate, 362 &bracket_low, 363 &bracket_high, 364 &do_zoom_search, 365 summary)) { 366 // Failed to find either a valid point, a valid bracket satisfying the Wolfe 367 // conditions, or even a step size > minimum tolerance satisfying the Armijo 368 // condition. 369 return; 370 } 371 372 if (!do_zoom_search) { 373 // Either: Bracketing phase already found a point satisfying the strong 374 // Wolfe conditions, thus no Zoom required. 375 // 376 // Or: Bracketing failed to find a valid bracket or a point satisfying the 377 // strong Wolfe conditions within max_num_iterations, or whilst searching 378 // shrank the bracket width until it was below our minimum tolerance. 379 // As these are 'artificial' constraints, and we would otherwise fail to 380 // produce a valid point when ArmijoLineSearch would succeed, we return the 381 // point with the lowest cost found thus far which satsifies the Armijo 382 // condition (but not the Wolfe conditions). 383 summary->optimal_step_size = bracket_low.x; 384 summary->success = true; 385 return; 386 } 387 388 VLOG(3) << std::scientific << std::setprecision(kErrorMessageNumericPrecision) 389 << "Starting line search zoom phase with bracket_low: " 390 << bracket_low << ", bracket_high: " << bracket_high 391 << ", bracket width: " << fabs(bracket_low.x - bracket_high.x) 392 << ", bracket abs delta cost: " 393 << fabs(bracket_low.value - bracket_high.value); 394 395 // Wolfe Zoom phase: Called when the Bracketing phase finds an interval of 396 // non-zero, finite width that should bracket step sizes which satisfy the 397 // (strong) Wolfe conditions (before finding a step size that satisfies the 398 // conditions). Zoom successively decreases the size of the interval until a 399 // step size which satisfies the Wolfe conditions is found. The interval is 400 // defined by bracket_low & bracket_high, which satisfy: 401 // 402 // 1. The interval bounded by step sizes: bracket_low.x & bracket_high.x 403 // contains step sizes that satsify the strong Wolfe conditions. 404 // 2. bracket_low.x is of all the step sizes evaluated *which satisifed the 405 // Armijo sufficient decrease condition*, the one which generated the 406 // smallest function value, i.e. bracket_low.value < 407 // f(all other steps satisfying Armijo). 408 // - Note that this does _not_ (necessarily) mean that initially 409 // bracket_low.value < bracket_high.value (although this is typical) 410 // e.g. when bracket_low = initial_position, and bracket_high is the 411 // first sample, and which does not satisfy the Armijo condition, 412 // but still has bracket_high.value < initial_position.value. 413 // 3. bracket_high is chosen after bracket_low, s.t. 414 // bracket_low.gradient * (bracket_high.x - bracket_low.x) < 0. 415 if (!this->ZoomPhase(initial_position, 416 bracket_low, 417 bracket_high, 418 &solution, 419 summary) && !solution.value_is_valid) { 420 // Failed to find a valid point (given the specified decrease parameters) 421 // within the specified bracket. 422 return; 423 } 424 // Ensure that if we ran out of iterations whilst zooming the bracket, or 425 // shrank the bracket width to < tolerance and failed to find a point which 426 // satisfies the strong Wolfe curvature condition, that we return the point 427 // amongst those found thus far, which minimizes f() and satisfies the Armijo 428 // condition. 429 solution = 430 solution.value_is_valid && solution.value <= bracket_low.value 431 ? solution : bracket_low; 432 433 summary->optimal_step_size = solution.x; 434 summary->success = true; 435 } 436 437 // Returns true if either: 438 // 439 // A termination condition satisfying the (strong) Wolfe bracketing conditions 440 // is found: 441 // 442 // - A valid point, defined as a bracket of zero width [zoom not required]. 443 // - A valid bracket (of width > tolerance), [zoom required]. 444 // 445 // Or, searching was stopped due to an 'artificial' constraint, i.e. not 446 // a condition imposed / required by the underlying algorithm, but instead an 447 // engineering / implementation consideration. But a step which exceeds the 448 // minimum step size, and satsifies the Armijo condition was still found, 449 // and should thus be used [zoom not required]. 450 // 451 // Returns false if no step size > minimum step size was found which 452 // satisfies at least the Armijo condition. 453 bool WolfeLineSearch::BracketingPhase( 454 const FunctionSample& initial_position, 455 const double step_size_estimate, 456 FunctionSample* bracket_low, 457 FunctionSample* bracket_high, 458 bool* do_zoom_search, 459 Summary* summary) { 460 Function* function = options().function; 461 462 FunctionSample previous = initial_position; 463 FunctionSample current = ValueAndGradientSample(step_size_estimate, 0.0, 0.0); 464 current.value_is_valid = false; 465 466 const double descent_direction_max_norm = 467 static_cast<const LineSearchFunction*>(function)->DirectionInfinityNorm(); 468 469 *do_zoom_search = false; 470 *bracket_low = initial_position; 471 472 // As we require the gradient to evaluate the Wolfe condition, we always 473 // calculate it together with the value, irrespective of the interpolation 474 // type. As opposed to only calculating the gradient after the Armijo 475 // condition is satisifed, as the computational saving from this approach 476 // would be slight (perhaps even negative due to the extra call). Also, 477 // always calculating the value & gradient together protects against us 478 // reporting invalid solutions if the cost function returns slightly different 479 // function values when evaluated with / without gradients (due to numerical 480 // issues). 481 ++summary->num_function_evaluations; 482 ++summary->num_gradient_evaluations; 483 current.value_is_valid = 484 function->Evaluate(current.x, 485 ¤t.value, 486 ¤t.gradient); 487 current.gradient_is_valid = current.value_is_valid; 488 489 while (true) { 490 ++summary->num_iterations; 491 492 if (current.value_is_valid && 493 (current.value > (initial_position.value 494 + options().sufficient_decrease 495 * initial_position.gradient 496 * current.x) || 497 (previous.value_is_valid && current.value > previous.value))) { 498 // Bracket found: current step size violates Armijo sufficient decrease 499 // condition, or has stepped past an inflection point of f() relative to 500 // previous step size. 501 *do_zoom_search = true; 502 *bracket_low = previous; 503 *bracket_high = current; 504 VLOG(3) << std::scientific 505 << std::setprecision(kErrorMessageNumericPrecision) 506 << "Bracket found: current step (" << current.x 507 << ") violates Armijo sufficient condition, or has passed an " 508 << "inflection point of f() based on value."; 509 break; 510 } 511 512 if (current.value_is_valid && 513 fabs(current.gradient) <= 514 -options().sufficient_curvature_decrease * initial_position.gradient) { 515 // Current step size satisfies the strong Wolfe conditions, and is thus a 516 // valid termination point, therefore a Zoom not required. 517 *bracket_low = current; 518 *bracket_high = current; 519 VLOG(3) << std::scientific 520 << std::setprecision(kErrorMessageNumericPrecision) 521 << "Bracketing phase found step size: " << current.x 522 << ", satisfying strong Wolfe conditions, initial_position: " 523 << initial_position << ", current: " << current; 524 break; 525 526 } else if (current.value_is_valid && current.gradient >= 0) { 527 // Bracket found: current step size has stepped past an inflection point 528 // of f(), but Armijo sufficient decrease is still satisfied and 529 // f(current) is our best minimum thus far. Remember step size 530 // monotonically increases, thus previous_step_size < current_step_size 531 // even though f(previous) > f(current). 532 *do_zoom_search = true; 533 // Note inverse ordering from first bracket case. 534 *bracket_low = current; 535 *bracket_high = previous; 536 VLOG(3) << "Bracket found: current step (" << current.x 537 << ") satisfies Armijo, but has gradient >= 0, thus have passed " 538 << "an inflection point of f()."; 539 break; 540 541 } else if (current.value_is_valid && 542 fabs(current.x - previous.x) * descent_direction_max_norm 543 < options().min_step_size) { 544 // We have shrunk the search bracket to a width less than our tolerance, 545 // and still not found either a point satisfying the strong Wolfe 546 // conditions, or a valid bracket containing such a point. Stop searching 547 // and set bracket_low to the size size amongst all those tested which 548 // minimizes f() and satisfies the Armijo condition. 549 LOG_IF(WARNING, !options().is_silent) 550 << "Line search failed: Wolfe bracketing phase shrank " 551 << "bracket width: " << fabs(current.x - previous.x) 552 << ", to < tolerance: " << options().min_step_size 553 << ", with descent_direction_max_norm: " 554 << descent_direction_max_norm << ", and failed to find " 555 << "a point satisfying the strong Wolfe conditions or a " 556 << "bracketing containing such a point. Accepting " 557 << "point found satisfying Armijo condition only, to " 558 << "allow continuation."; 559 *bracket_low = current; 560 break; 561 562 } else if (summary->num_iterations >= options().max_num_iterations) { 563 // Check num iterations bound here so that we always evaluate the 564 // max_num_iterations-th iteration against all conditions, and 565 // then perform no additional (unused) evaluations. 566 summary->error = 567 StringPrintf("Line search failed: Wolfe bracketing phase failed to " 568 "find a point satisfying strong Wolfe conditions, or a " 569 "bracket containing such a point within specified " 570 "max_num_iterations: %d", options().max_num_iterations); 571 LOG_IF(WARNING, !options().is_silent) << summary->error; 572 // Ensure that bracket_low is always set to the step size amongst all 573 // those tested which minimizes f() and satisfies the Armijo condition 574 // when we terminate due to the 'artificial' max_num_iterations condition. 575 *bracket_low = 576 current.value_is_valid && current.value < bracket_low->value 577 ? current : *bracket_low; 578 break; 579 } 580 // Either: f(current) is invalid; or, f(current) is valid, but does not 581 // satisfy the strong Wolfe conditions itself, or the conditions for 582 // being a boundary of a bracket. 583 584 // If f(current) is valid, (but meets no criteria) expand the search by 585 // increasing the step size. 586 const double max_step_size = 587 current.value_is_valid 588 ? (current.x * options().max_step_expansion) : current.x; 589 590 // We are performing 2-point interpolation only here, but the API of 591 // InterpolatingPolynomialMinimizingStepSize() allows for up to 592 // 3-point interpolation, so pad call with a sample with an invalid 593 // value that will therefore be ignored. 594 const FunctionSample unused_previous; 595 DCHECK(!unused_previous.value_is_valid); 596 // Contracts step size if f(current) is not valid. 597 const double step_size = 598 this->InterpolatingPolynomialMinimizingStepSize( 599 options().interpolation_type, 600 previous, 601 unused_previous, 602 current, 603 previous.x, 604 max_step_size); 605 if (step_size * descent_direction_max_norm < options().min_step_size) { 606 summary->error = 607 StringPrintf("Line search failed: step_size too small: %.5e " 608 "with descent_direction_max_norm: %.5e", step_size, 609 descent_direction_max_norm); 610 LOG_IF(WARNING, !options().is_silent) << summary->error; 611 return false; 612 } 613 614 previous = current.value_is_valid ? current : previous; 615 current.x = step_size; 616 617 ++summary->num_function_evaluations; 618 ++summary->num_gradient_evaluations; 619 current.value_is_valid = 620 function->Evaluate(current.x, 621 ¤t.value, 622 ¤t.gradient); 623 current.gradient_is_valid = current.value_is_valid; 624 } 625 626 // Ensure that even if a valid bracket was found, we will only mark a zoom 627 // as required if the bracket's width is greater than our minimum tolerance. 628 if (*do_zoom_search && 629 fabs(bracket_high->x - bracket_low->x) * descent_direction_max_norm 630 < options().min_step_size) { 631 *do_zoom_search = false; 632 } 633 634 return true; 635 } 636 637 // Returns true iff solution satisfies the strong Wolfe conditions. Otherwise, 638 // on return false, if we stopped searching due to the 'artificial' condition of 639 // reaching max_num_iterations, solution is the step size amongst all those 640 // tested, which satisfied the Armijo decrease condition and minimized f(). 641 bool WolfeLineSearch::ZoomPhase(const FunctionSample& initial_position, 642 FunctionSample bracket_low, 643 FunctionSample bracket_high, 644 FunctionSample* solution, 645 Summary* summary) { 646 Function* function = options().function; 647 648 CHECK(bracket_low.value_is_valid && bracket_low.gradient_is_valid) 649 << std::scientific << std::setprecision(kErrorMessageNumericPrecision) 650 << "Ceres bug: f_low input to Wolfe Zoom invalid, please contact " 651 << "the developers!, initial_position: " << initial_position 652 << ", bracket_low: " << bracket_low 653 << ", bracket_high: "<< bracket_high; 654 // We do not require bracket_high.gradient_is_valid as the gradient condition 655 // for a valid bracket is only dependent upon bracket_low.gradient, and 656 // in order to minimize jacobian evaluations, bracket_high.gradient may 657 // not have been calculated (if bracket_high.value does not satisfy the 658 // Armijo sufficient decrease condition and interpolation method does not 659 // require it). 660 // 661 // We also do not require that: bracket_low.value < bracket_high.value, 662 // although this is typical. This is to deal with the case when 663 // bracket_low = initial_position, bracket_high is the first sample, 664 // and bracket_high does not satisfy the Armijo condition, but still has 665 // bracket_high.value < initial_position.value. 666 CHECK(bracket_high.value_is_valid) 667 << std::scientific << std::setprecision(kErrorMessageNumericPrecision) 668 << "Ceres bug: f_high input to Wolfe Zoom invalid, please " 669 << "contact the developers!, initial_position: " << initial_position 670 << ", bracket_low: " << bracket_low 671 << ", bracket_high: "<< bracket_high; 672 673 if (bracket_low.gradient * (bracket_high.x - bracket_low.x) >= 0) { 674 // The third condition for a valid initial bracket: 675 // 676 // 3. bracket_high is chosen after bracket_low, s.t. 677 // bracket_low.gradient * (bracket_high.x - bracket_low.x) < 0. 678 // 679 // is not satisfied. As this can happen when the users' cost function 680 // returns inconsistent gradient values relative to the function values, 681 // we do not CHECK_LT(), but we do stop processing and return an invalid 682 // value. 683 summary->error = 684 StringPrintf("Line search failed: Wolfe zoom phase passed a bracket " 685 "which does not satisfy: bracket_low.gradient * " 686 "(bracket_high.x - bracket_low.x) < 0 [%.8e !< 0] " 687 "with initial_position: %s, bracket_low: %s, bracket_high:" 688 " %s, the most likely cause of which is the cost function " 689 "returning inconsistent gradient & function values.", 690 bracket_low.gradient * (bracket_high.x - bracket_low.x), 691 initial_position.ToDebugString().c_str(), 692 bracket_low.ToDebugString().c_str(), 693 bracket_high.ToDebugString().c_str()); 694 LOG_IF(WARNING, !options().is_silent) << summary->error; 695 solution->value_is_valid = false; 696 return false; 697 } 698 699 const int num_bracketing_iterations = summary->num_iterations; 700 const double descent_direction_max_norm = 701 static_cast<const LineSearchFunction*>(function)->DirectionInfinityNorm(); 702 703 while (true) { 704 // Set solution to bracket_low, as it is our best step size (smallest f()) 705 // found thus far and satisfies the Armijo condition, even though it does 706 // not satisfy the Wolfe condition. 707 *solution = bracket_low; 708 if (summary->num_iterations >= options().max_num_iterations) { 709 summary->error = 710 StringPrintf("Line search failed: Wolfe zoom phase failed to " 711 "find a point satisfying strong Wolfe conditions " 712 "within specified max_num_iterations: %d, " 713 "(num iterations taken for bracketing: %d).", 714 options().max_num_iterations, num_bracketing_iterations); 715 LOG_IF(WARNING, !options().is_silent) << summary->error; 716 return false; 717 } 718 if (fabs(bracket_high.x - bracket_low.x) * descent_direction_max_norm 719 < options().min_step_size) { 720 // Bracket width has been reduced below tolerance, and no point satisfying 721 // the strong Wolfe conditions has been found. 722 summary->error = 723 StringPrintf("Line search failed: Wolfe zoom bracket width: %.5e " 724 "too small with descent_direction_max_norm: %.5e.", 725 fabs(bracket_high.x - bracket_low.x), 726 descent_direction_max_norm); 727 LOG_IF(WARNING, !options().is_silent) << summary->error; 728 return false; 729 } 730 731 ++summary->num_iterations; 732 // Polynomial interpolation requires inputs ordered according to step size, 733 // not f(step size). 734 const FunctionSample& lower_bound_step = 735 bracket_low.x < bracket_high.x ? bracket_low : bracket_high; 736 const FunctionSample& upper_bound_step = 737 bracket_low.x < bracket_high.x ? bracket_high : bracket_low; 738 // We are performing 2-point interpolation only here, but the API of 739 // InterpolatingPolynomialMinimizingStepSize() allows for up to 740 // 3-point interpolation, so pad call with a sample with an invalid 741 // value that will therefore be ignored. 742 const FunctionSample unused_previous; 743 DCHECK(!unused_previous.value_is_valid); 744 solution->x = 745 this->InterpolatingPolynomialMinimizingStepSize( 746 options().interpolation_type, 747 lower_bound_step, 748 unused_previous, 749 upper_bound_step, 750 lower_bound_step.x, 751 upper_bound_step.x); 752 // No check on magnitude of step size being too small here as it is 753 // lower-bounded by the initial bracket start point, which was valid. 754 // 755 // As we require the gradient to evaluate the Wolfe condition, we always 756 // calculate it together with the value, irrespective of the interpolation 757 // type. As opposed to only calculating the gradient after the Armijo 758 // condition is satisifed, as the computational saving from this approach 759 // would be slight (perhaps even negative due to the extra call). Also, 760 // always calculating the value & gradient together protects against us 761 // reporting invalid solutions if the cost function returns slightly 762 // different function values when evaluated with / without gradients (due 763 // to numerical issues). 764 ++summary->num_function_evaluations; 765 ++summary->num_gradient_evaluations; 766 solution->value_is_valid = 767 function->Evaluate(solution->x, 768 &solution->value, 769 &solution->gradient); 770 solution->gradient_is_valid = solution->value_is_valid; 771 if (!solution->value_is_valid) { 772 summary->error = 773 StringPrintf("Line search failed: Wolfe Zoom phase found " 774 "step_size: %.5e, for which function is invalid, " 775 "between low_step: %.5e and high_step: %.5e " 776 "at which function is valid.", 777 solution->x, bracket_low.x, bracket_high.x); 778 LOG_IF(WARNING, !options().is_silent) << summary->error; 779 return false; 780 } 781 782 VLOG(3) << "Zoom iteration: " 783 << summary->num_iterations - num_bracketing_iterations 784 << ", bracket_low: " << bracket_low 785 << ", bracket_high: " << bracket_high 786 << ", minimizing solution: " << *solution; 787 788 if ((solution->value > (initial_position.value 789 + options().sufficient_decrease 790 * initial_position.gradient 791 * solution->x)) || 792 (solution->value >= bracket_low.value)) { 793 // Armijo sufficient decrease not satisfied, or not better 794 // than current lowest sample, use as new upper bound. 795 bracket_high = *solution; 796 continue; 797 } 798 799 // Armijo sufficient decrease satisfied, check strong Wolfe condition. 800 if (fabs(solution->gradient) <= 801 -options().sufficient_curvature_decrease * initial_position.gradient) { 802 // Found a valid termination point satisfying strong Wolfe conditions. 803 VLOG(3) << std::scientific 804 << std::setprecision(kErrorMessageNumericPrecision) 805 << "Zoom phase found step size: " << solution->x 806 << ", satisfying strong Wolfe conditions."; 807 break; 808 809 } else if (solution->gradient * (bracket_high.x - bracket_low.x) >= 0) { 810 bracket_high = bracket_low; 811 } 812 813 bracket_low = *solution; 814 } 815 // Solution contains a valid point which satisfies the strong Wolfe 816 // conditions. 817 return true; 818 } 819 820 } // namespace internal 821 } // namespace ceres 822