1 // Ceres Solver - A fast non-linear least squares minimizer 2 // Copyright 2012 Google Inc. All rights reserved. 3 // http://code.google.com/p/ceres-solver/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are met: 7 // 8 // * Redistributions of source code must retain the above copyright notice, 9 // this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above copyright notice, 11 // this list of conditions and the following disclaimer in the documentation 12 // and/or other materials provided with the distribution. 13 // * Neither the name of Google Inc. nor the names of its contributors may be 14 // used to endorse or promote products derived from this software without 15 // specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 // POSSIBILITY OF SUCH DAMAGE. 28 // 29 // Author: sameeragarwal (at) google.com (Sameer Agarwal) 30 31 #ifndef CERES_NO_LINE_SEARCH_MINIMIZER 32 33 #include "ceres/line_search_direction.h" 34 #include "ceres/line_search_minimizer.h" 35 #include "ceres/low_rank_inverse_hessian.h" 36 #include "ceres/internal/eigen.h" 37 #include "glog/logging.h" 38 39 namespace ceres { 40 namespace internal { 41 42 class SteepestDescent : public LineSearchDirection { 43 public: 44 virtual ~SteepestDescent() {} 45 bool NextDirection(const LineSearchMinimizer::State& previous, 46 const LineSearchMinimizer::State& current, 47 Vector* search_direction) { 48 *search_direction = -current.gradient; 49 return true; 50 } 51 }; 52 53 class NonlinearConjugateGradient : public LineSearchDirection { 54 public: 55 NonlinearConjugateGradient(const NonlinearConjugateGradientType type, 56 const double function_tolerance) 57 : type_(type), 58 function_tolerance_(function_tolerance) { 59 } 60 61 bool NextDirection(const LineSearchMinimizer::State& previous, 62 const LineSearchMinimizer::State& current, 63 Vector* search_direction) { 64 double beta = 0.0; 65 Vector gradient_change; 66 switch (type_) { 67 case FLETCHER_REEVES: 68 beta = current.gradient_squared_norm / previous.gradient_squared_norm; 69 break; 70 case POLAK_RIBIRERE: 71 gradient_change = current.gradient - previous.gradient; 72 beta = (current.gradient.dot(gradient_change) / 73 previous.gradient_squared_norm); 74 break; 75 case HESTENES_STIEFEL: 76 gradient_change = current.gradient - previous.gradient; 77 beta = (current.gradient.dot(gradient_change) / 78 previous.search_direction.dot(gradient_change)); 79 break; 80 default: 81 LOG(FATAL) << "Unknown nonlinear conjugate gradient type: " << type_; 82 } 83 84 *search_direction = -current.gradient + beta * previous.search_direction; 85 const double directional_derivative = 86 current.gradient.dot(*search_direction); 87 if (directional_derivative > -function_tolerance_) { 88 LOG(WARNING) << "Restarting non-linear conjugate gradients: " 89 << directional_derivative; 90 *search_direction = -current.gradient; 91 }; 92 93 return true; 94 } 95 96 private: 97 const NonlinearConjugateGradientType type_; 98 const double function_tolerance_; 99 }; 100 101 class LBFGS : public LineSearchDirection { 102 public: 103 LBFGS(const int num_parameters, 104 const int max_lbfgs_rank, 105 const bool use_approximate_eigenvalue_bfgs_scaling) 106 : low_rank_inverse_hessian_(num_parameters, 107 max_lbfgs_rank, 108 use_approximate_eigenvalue_bfgs_scaling), 109 is_positive_definite_(true) {} 110 111 virtual ~LBFGS() {} 112 113 bool NextDirection(const LineSearchMinimizer::State& previous, 114 const LineSearchMinimizer::State& current, 115 Vector* search_direction) { 116 CHECK(is_positive_definite_) 117 << "Ceres bug: NextDirection() called on L-BFGS after inverse Hessian " 118 << "approximation has become indefinite, please contact the " 119 << "developers!"; 120 121 low_rank_inverse_hessian_.Update( 122 previous.search_direction * previous.step_size, 123 current.gradient - previous.gradient); 124 search_direction->setZero(); 125 low_rank_inverse_hessian_.RightMultiply(current.gradient.data(), 126 search_direction->data()); 127 *search_direction *= -1.0; 128 129 if (search_direction->dot(current.gradient) >= 0.0) { 130 LOG(WARNING) << "Numerical failure in L-BFGS update: inverse Hessian " 131 << "approximation is not positive definite, and thus " 132 << "initial gradient for search direction is positive: " 133 << search_direction->dot(current.gradient); 134 is_positive_definite_ = false; 135 return false; 136 } 137 138 return true; 139 } 140 141 private: 142 LowRankInverseHessian low_rank_inverse_hessian_; 143 bool is_positive_definite_; 144 }; 145 146 class BFGS : public LineSearchDirection { 147 public: 148 BFGS(const int num_parameters, 149 const bool use_approximate_eigenvalue_scaling) 150 : num_parameters_(num_parameters), 151 use_approximate_eigenvalue_scaling_(use_approximate_eigenvalue_scaling), 152 initialized_(false), 153 is_positive_definite_(true) { 154 LOG_IF(WARNING, num_parameters_ >= 1e3) 155 << "BFGS line search being created with: " << num_parameters_ 156 << " parameters, this will allocate a dense approximate inverse Hessian" 157 << " of size: " << num_parameters_ << " x " << num_parameters_ 158 << ", consider using the L-BFGS memory-efficient line search direction " 159 << "instead."; 160 // Construct inverse_hessian_ after logging warning about size s.t. if the 161 // allocation crashes us, the log will highlight what the issue likely was. 162 inverse_hessian_ = Matrix::Identity(num_parameters, num_parameters); 163 } 164 165 virtual ~BFGS() {} 166 167 bool NextDirection(const LineSearchMinimizer::State& previous, 168 const LineSearchMinimizer::State& current, 169 Vector* search_direction) { 170 CHECK(is_positive_definite_) 171 << "Ceres bug: NextDirection() called on BFGS after inverse Hessian " 172 << "approximation has become indefinite, please contact the " 173 << "developers!"; 174 175 const Vector delta_x = previous.search_direction * previous.step_size; 176 const Vector delta_gradient = current.gradient - previous.gradient; 177 const double delta_x_dot_delta_gradient = delta_x.dot(delta_gradient); 178 179 if (delta_x_dot_delta_gradient <= 1e-10) { 180 VLOG(2) << "Skipping BFGS Update, delta_x_dot_delta_gradient too " 181 << "small: " << delta_x_dot_delta_gradient; 182 } else { 183 // Update dense inverse Hessian approximation. 184 185 if (!initialized_ && use_approximate_eigenvalue_scaling_) { 186 // Rescale the initial inverse Hessian approximation (H_0) to be 187 // iteratively updated so that it is of similar 'size' to the true 188 // inverse Hessian at the start point. As shown in [1]: 189 // 190 // \gamma = (delta_gradient_{0}' * delta_x_{0}) / 191 // (delta_gradient_{0}' * delta_gradient_{0}) 192 // 193 // Satisfies: 194 // 195 // (1 / \lambda_m) <= \gamma <= (1 / \lambda_1) 196 // 197 // Where \lambda_1 & \lambda_m are the smallest and largest eigenvalues 198 // of the true initial Hessian (not the inverse) respectively. Thus, 199 // \gamma is an approximate eigenvalue of the true inverse Hessian, and 200 // choosing: H_0 = I * \gamma will yield a starting point that has a 201 // similar scale to the true inverse Hessian. This technique is widely 202 // reported to often improve convergence, however this is not 203 // universally true, particularly if there are errors in the initial 204 // gradients, or if there are significant differences in the sensitivity 205 // of the problem to the parameters (i.e. the range of the magnitudes of 206 // the components of the gradient is large). 207 // 208 // The original origin of this rescaling trick is somewhat unclear, the 209 // earliest reference appears to be Oren [1], however it is widely 210 // discussed without specific attributation in various texts including 211 // [2] (p143). 212 // 213 // [1] Oren S.S., Self-scaling variable metric (SSVM) algorithms 214 // Part II: Implementation and experiments, Management Science, 215 // 20(5), 863-874, 1974. 216 // [2] Nocedal J., Wright S., Numerical Optimization, Springer, 1999. 217 inverse_hessian_ *= 218 delta_x_dot_delta_gradient / delta_gradient.dot(delta_gradient); 219 } 220 initialized_ = true; 221 222 // Efficient O(num_parameters^2) BFGS update [2]. 223 // 224 // Starting from dense BFGS update detailed in Nocedal [2] p140/177 and 225 // using: y_k = delta_gradient, s_k = delta_x: 226 // 227 // \rho_k = 1.0 / (s_k' * y_k) 228 // V_k = I - \rho_k * y_k * s_k' 229 // H_k = (V_k' * H_{k-1} * V_k) + (\rho_k * s_k * s_k') 230 // 231 // This update involves matrix, matrix products which naively O(N^3), 232 // however we can exploit our knowledge that H_k is positive definite 233 // and thus by defn. symmetric to reduce the cost of the update: 234 // 235 // Expanding the update above yields: 236 // 237 // H_k = H_{k-1} + 238 // \rho_k * ( (1.0 + \rho_k * y_k' * H_k * y_k) * s_k * s_k' - 239 // (s_k * y_k' * H_k + H_k * y_k * s_k') ) 240 // 241 // Using: A = (s_k * y_k' * H_k), and the knowledge that H_k = H_k', the 242 // last term simplifies to (A + A'). Note that although A is not symmetric 243 // (A + A') is symmetric. For ease of construction we also define 244 // B = (1 + \rho_k * y_k' * H_k * y_k) * s_k * s_k', which is by defn 245 // symmetric due to construction from: s_k * s_k'. 246 // 247 // Now we can write the BFGS update as: 248 // 249 // H_k = H_{k-1} + \rho_k * (B - (A + A')) 250 251 // For efficiency, as H_k is by defn. symmetric, we will only maintain the 252 // *lower* triangle of H_k (and all intermediary terms). 253 254 const double rho_k = 1.0 / delta_x_dot_delta_gradient; 255 256 // Calculate: A = s_k * y_k' * H_k 257 Matrix A = delta_x * (delta_gradient.transpose() * 258 inverse_hessian_.selfadjointView<Eigen::Lower>()); 259 260 // Calculate scalar: (1 + \rho_k * y_k' * H_k * y_k) 261 const double delta_x_times_delta_x_transpose_scale_factor = 262 (1.0 + (rho_k * delta_gradient.transpose() * 263 inverse_hessian_.selfadjointView<Eigen::Lower>() * 264 delta_gradient)); 265 // Calculate: B = (1 + \rho_k * y_k' * H_k * y_k) * s_k * s_k' 266 Matrix B = Matrix::Zero(num_parameters_, num_parameters_); 267 B.selfadjointView<Eigen::Lower>(). 268 rankUpdate(delta_x, delta_x_times_delta_x_transpose_scale_factor); 269 270 // Finally, update inverse Hessian approximation according to: 271 // H_k = H_{k-1} + \rho_k * (B - (A + A')). Note that (A + A') is 272 // symmetric, even though A is not. 273 inverse_hessian_.triangularView<Eigen::Lower>() += 274 rho_k * (B - A - A.transpose()); 275 } 276 277 *search_direction = 278 inverse_hessian_.selfadjointView<Eigen::Lower>() * 279 (-1.0 * current.gradient); 280 281 if (search_direction->dot(current.gradient) >= 0.0) { 282 LOG(WARNING) << "Numerical failure in BFGS update: inverse Hessian " 283 << "approximation is not positive definite, and thus " 284 << "initial gradient for search direction is positive: " 285 << search_direction->dot(current.gradient); 286 is_positive_definite_ = false; 287 return false; 288 } 289 290 return true; 291 } 292 293 private: 294 const int num_parameters_; 295 const bool use_approximate_eigenvalue_scaling_; 296 Matrix inverse_hessian_; 297 bool initialized_; 298 bool is_positive_definite_; 299 }; 300 301 LineSearchDirection* 302 LineSearchDirection::Create(const LineSearchDirection::Options& options) { 303 if (options.type == STEEPEST_DESCENT) { 304 return new SteepestDescent; 305 } 306 307 if (options.type == NONLINEAR_CONJUGATE_GRADIENT) { 308 return new NonlinearConjugateGradient( 309 options.nonlinear_conjugate_gradient_type, 310 options.function_tolerance); 311 } 312 313 if (options.type == ceres::LBFGS) { 314 return new ceres::internal::LBFGS( 315 options.num_parameters, 316 options.max_lbfgs_rank, 317 options.use_approximate_eigenvalue_bfgs_scaling); 318 } 319 320 if (options.type == ceres::BFGS) { 321 return new ceres::internal::BFGS( 322 options.num_parameters, 323 options.use_approximate_eigenvalue_bfgs_scaling); 324 } 325 326 LOG(ERROR) << "Unknown line search direction type: " << options.type; 327 return NULL; 328 } 329 330 } // namespace internal 331 } // namespace ceres 332 333 #endif // CERES_NO_LINE_SEARCH_MINIMIZER 334