1 // Ceres Solver - A fast non-linear least squares minimizer 2 // Copyright 2012 Google Inc. All rights reserved. 3 // http://code.google.com/p/ceres-solver/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are met: 7 // 8 // * Redistributions of source code must retain the above copyright notice, 9 // this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above copyright notice, 11 // this list of conditions and the following disclaimer in the documentation 12 // and/or other materials provided with the distribution. 13 // * Neither the name of Google Inc. nor the names of its contributors may be 14 // used to endorse or promote products derived from this software without 15 // specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 // POSSIBILITY OF SUCH DAMAGE. 28 // 29 // Author: sameeragarwal (at) google.com (Sameer Agarwal) 30 31 #include "ceres/internal/eigen.h" 32 #include "ceres/low_rank_inverse_hessian.h" 33 #include "glog/logging.h" 34 35 namespace ceres { 36 namespace internal { 37 38 LowRankInverseHessian::LowRankInverseHessian( 39 int num_parameters, 40 int max_num_corrections, 41 bool use_approximate_eigenvalue_scaling) 42 : num_parameters_(num_parameters), 43 max_num_corrections_(max_num_corrections), 44 use_approximate_eigenvalue_scaling_(use_approximate_eigenvalue_scaling), 45 num_corrections_(0), 46 approximate_eigenvalue_scale_(1.0), 47 delta_x_history_(num_parameters, max_num_corrections), 48 delta_gradient_history_(num_parameters, max_num_corrections), 49 delta_x_dot_delta_gradient_(max_num_corrections) { 50 } 51 52 bool LowRankInverseHessian::Update(const Vector& delta_x, 53 const Vector& delta_gradient) { 54 const double delta_x_dot_delta_gradient = delta_x.dot(delta_gradient); 55 if (delta_x_dot_delta_gradient <= 1e-10) { 56 VLOG(2) << "Skipping LBFGS Update, delta_x_dot_delta_gradient too small: " 57 << delta_x_dot_delta_gradient; 58 return false; 59 } 60 61 if (num_corrections_ == max_num_corrections_) { 62 // TODO(sameeragarwal): This can be done more efficiently using 63 // a circular buffer/indexing scheme, but for simplicity we will 64 // do the expensive copy for now. 65 delta_x_history_.block(0, 0, num_parameters_, max_num_corrections_ - 1) = 66 delta_x_history_ 67 .block(0, 1, num_parameters_, max_num_corrections_ - 1); 68 69 delta_gradient_history_ 70 .block(0, 0, num_parameters_, max_num_corrections_ - 1) = 71 delta_gradient_history_ 72 .block(0, 1, num_parameters_, max_num_corrections_ - 1); 73 74 delta_x_dot_delta_gradient_.head(num_corrections_ - 1) = 75 delta_x_dot_delta_gradient_.tail(num_corrections_ - 1); 76 } else { 77 ++num_corrections_; 78 } 79 80 delta_x_history_.col(num_corrections_ - 1) = delta_x; 81 delta_gradient_history_.col(num_corrections_ - 1) = delta_gradient; 82 delta_x_dot_delta_gradient_(num_corrections_ - 1) = 83 delta_x_dot_delta_gradient; 84 approximate_eigenvalue_scale_ = 85 delta_x_dot_delta_gradient / delta_gradient.squaredNorm(); 86 return true; 87 } 88 89 void LowRankInverseHessian::RightMultiply(const double* x_ptr, 90 double* y_ptr) const { 91 ConstVectorRef gradient(x_ptr, num_parameters_); 92 VectorRef search_direction(y_ptr, num_parameters_); 93 94 search_direction = gradient; 95 96 Vector alpha(num_corrections_); 97 98 for (int i = num_corrections_ - 1; i >= 0; --i) { 99 alpha(i) = delta_x_history_.col(i).dot(search_direction) / 100 delta_x_dot_delta_gradient_(i); 101 search_direction -= alpha(i) * delta_gradient_history_.col(i); 102 } 103 104 if (use_approximate_eigenvalue_scaling_) { 105 // Rescale the initial inverse Hessian approximation (H_0) to be iteratively 106 // updated so that it is of similar 'size' to the true inverse Hessian along 107 // the most recent search direction. As shown in [1]: 108 // 109 // \gamma_k = (delta_gradient_{k-1}' * delta_x_{k-1}) / 110 // (delta_gradient_{k-1}' * delta_gradient_{k-1}) 111 // 112 // Satisfies: 113 // 114 // (1 / \lambda_m) <= \gamma_k <= (1 / \lambda_1) 115 // 116 // Where \lambda_1 & \lambda_m are the smallest and largest eigenvalues of 117 // the true Hessian (not the inverse) along the most recent search direction 118 // respectively. Thus \gamma is an approximate eigenvalue of the true 119 // inverse Hessian, and choosing: H_0 = I * \gamma will yield a starting 120 // point that has a similar scale to the true inverse Hessian. This 121 // technique is widely reported to often improve convergence, however this 122 // is not universally true, particularly if there are errors in the initial 123 // jacobians, or if there are significant differences in the sensitivity 124 // of the problem to the parameters (i.e. the range of the magnitudes of 125 // the components of the gradient is large). 126 // 127 // The original origin of this rescaling trick is somewhat unclear, the 128 // earliest reference appears to be Oren [1], however it is widely discussed 129 // without specific attributation in various texts including [2] (p143/178). 130 // 131 // [1] Oren S.S., Self-scaling variable metric (SSVM) algorithms Part II: 132 // Implementation and experiments, Management Science, 133 // 20(5), 863-874, 1974. 134 // [2] Nocedal J., Wright S., Numerical Optimization, Springer, 1999. 135 search_direction *= approximate_eigenvalue_scale_; 136 } 137 138 for (int i = 0; i < num_corrections_; ++i) { 139 const double beta = delta_gradient_history_.col(i).dot(search_direction) / 140 delta_x_dot_delta_gradient_(i); 141 search_direction += delta_x_history_.col(i) * (alpha(i) - beta); 142 } 143 } 144 145 } // namespace internal 146 } // namespace ceres 147