1 // Ceres Solver - A fast non-linear least squares minimizer 2 // Copyright 2012 Google Inc. All rights reserved. 3 // http://code.google.com/p/ceres-solver/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are met: 7 // 8 // * Redistributions of source code must retain the above copyright notice, 9 // this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above copyright notice, 11 // this list of conditions and the following disclaimer in the documentation 12 // and/or other materials provided with the distribution. 13 // * Neither the name of Google Inc. nor the names of its contributors may be 14 // used to endorse or promote products derived from this software without 15 // specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 // POSSIBILITY OF SUCH DAMAGE. 28 // 29 // Author: sameeragarwal (at) google.com (Sameer Agarwal) 30 // 31 // NIST non-linear regression problems solved using Ceres. 32 // 33 // The data was obtained from 34 // http://www.itl.nist.gov/div898/strd/nls/nls_main.shtml, where more 35 // background on these problems can also be found. 36 // 37 // Currently not all problems are solved successfully. Some of the 38 // failures are due to convergence to a local minimum, and some fail 39 // because of numerical issues. 40 // 41 // TODO(sameeragarwal): Fix numerical issues so that all the problems 42 // converge and then look at convergence to the wrong solution issues. 43 44 #include <iostream> 45 #include <fstream> 46 #include "ceres/ceres.h" 47 #include "ceres/split.h" 48 #include "gflags/gflags.h" 49 #include "glog/logging.h" 50 #include "Eigen/Core" 51 52 DEFINE_string(nist_data_dir, "", "Directory containing the NIST non-linear" 53 "regression examples"); 54 DEFINE_string(trust_region_strategy, "levenberg_marquardt", 55 "Options are: levenberg_marquardt, dogleg"); 56 DEFINE_string(dogleg, "traditional_dogleg", 57 "Options are: traditional_dogleg, subspace_dogleg"); 58 DEFINE_string(linear_solver, "dense_qr", "Options are: " 59 "sparse_cholesky, dense_qr, dense_normal_cholesky and" 60 "cgnr"); 61 DEFINE_string(preconditioner, "jacobi", "Options are: " 62 "identity, jacobi"); 63 DEFINE_int32(num_iterations, 10000, "Number of iterations"); 64 DEFINE_bool(nonmonotonic_steps, false, "Trust region algorithm can use" 65 " nonmonotic steps"); 66 DEFINE_double(initial_trust_region_radius, 1e4, "Initial trust region radius"); 67 68 using Eigen::Dynamic; 69 using Eigen::RowMajor; 70 typedef Eigen::Matrix<double, Dynamic, 1> Vector; 71 typedef Eigen::Matrix<double, Dynamic, Dynamic, RowMajor> Matrix; 72 73 bool GetAndSplitLine(std::ifstream& ifs, std::vector<std::string>* pieces) { 74 pieces->clear(); 75 char buf[256]; 76 ifs.getline(buf, 256); 77 ceres::SplitStringUsing(std::string(buf), " ", pieces); 78 return true; 79 } 80 81 void SkipLines(std::ifstream& ifs, int num_lines) { 82 char buf[256]; 83 for (int i = 0; i < num_lines; ++i) { 84 ifs.getline(buf, 256); 85 } 86 } 87 88 bool IsSuccessfulTermination(ceres::SolverTerminationType status) { 89 return 90 (status == ceres::FUNCTION_TOLERANCE) || 91 (status == ceres::GRADIENT_TOLERANCE) || 92 (status == ceres::PARAMETER_TOLERANCE) || 93 (status == ceres::USER_SUCCESS); 94 } 95 96 class NISTProblem { 97 public: 98 explicit NISTProblem(const std::string& filename) { 99 std::ifstream ifs(filename.c_str(), std::ifstream::in); 100 101 std::vector<std::string> pieces; 102 SkipLines(ifs, 24); 103 GetAndSplitLine(ifs, &pieces); 104 const int kNumResponses = std::atoi(pieces[1].c_str()); 105 106 GetAndSplitLine(ifs, &pieces); 107 const int kNumPredictors = std::atoi(pieces[0].c_str()); 108 109 GetAndSplitLine(ifs, &pieces); 110 const int kNumObservations = std::atoi(pieces[0].c_str()); 111 112 SkipLines(ifs, 4); 113 GetAndSplitLine(ifs, &pieces); 114 const int kNumParameters = std::atoi(pieces[0].c_str()); 115 SkipLines(ifs, 8); 116 117 // Get the first line of initial and final parameter values to 118 // determine the number of tries. 119 GetAndSplitLine(ifs, &pieces); 120 const int kNumTries = pieces.size() - 4; 121 122 predictor_.resize(kNumObservations, kNumPredictors); 123 response_.resize(kNumObservations, kNumResponses); 124 initial_parameters_.resize(kNumTries, kNumParameters); 125 final_parameters_.resize(1, kNumParameters); 126 127 // Parse the line for parameter b1. 128 int parameter_id = 0; 129 for (int i = 0; i < kNumTries; ++i) { 130 initial_parameters_(i, parameter_id) = std::atof(pieces[i + 2].c_str()); 131 } 132 final_parameters_(0, parameter_id) = std::atof(pieces[2 + kNumTries].c_str()); 133 134 // Parse the remaining parameter lines. 135 for (int parameter_id = 1; parameter_id < kNumParameters; ++parameter_id) { 136 GetAndSplitLine(ifs, &pieces); 137 // b2, b3, .... 138 for (int i = 0; i < kNumTries; ++i) { 139 initial_parameters_(i, parameter_id) = std::atof(pieces[i + 2].c_str()); 140 } 141 final_parameters_(0, parameter_id) = std::atof(pieces[2 + kNumTries].c_str()); 142 } 143 144 // Certfied cost 145 SkipLines(ifs, 1); 146 GetAndSplitLine(ifs, &pieces); 147 certified_cost_ = std::atof(pieces[4].c_str()) / 2.0; 148 149 // Read the observations. 150 SkipLines(ifs, 18 - kNumParameters); 151 for (int i = 0; i < kNumObservations; ++i) { 152 GetAndSplitLine(ifs, &pieces); 153 // Response. 154 for (int j = 0; j < kNumResponses; ++j) { 155 response_(i, j) = std::atof(pieces[j].c_str()); 156 } 157 158 // Predictor variables. 159 for (int j = 0; j < kNumPredictors; ++j) { 160 predictor_(i, j) = std::atof(pieces[j + kNumResponses].c_str()); 161 } 162 } 163 } 164 165 Matrix initial_parameters(int start) const { return initial_parameters_.row(start); } 166 Matrix final_parameters() const { return final_parameters_; } 167 Matrix predictor() const { return predictor_; } 168 Matrix response() const { return response_; } 169 int predictor_size() const { return predictor_.cols(); } 170 int num_observations() const { return predictor_.rows(); } 171 int response_size() const { return response_.cols(); } 172 int num_parameters() const { return initial_parameters_.cols(); } 173 int num_starts() const { return initial_parameters_.rows(); } 174 double certified_cost() const { return certified_cost_; } 175 176 private: 177 Matrix predictor_; 178 Matrix response_; 179 Matrix initial_parameters_; 180 Matrix final_parameters_; 181 double certified_cost_; 182 }; 183 184 #define NIST_BEGIN(CostFunctionName) \ 185 struct CostFunctionName { \ 186 CostFunctionName(const double* const x, \ 187 const double* const y) \ 188 : x_(*x), y_(*y) {} \ 189 double x_; \ 190 double y_; \ 191 template <typename T> \ 192 bool operator()(const T* const b, T* residual) const { \ 193 const T y(y_); \ 194 const T x(x_); \ 195 residual[0] = y - ( 196 197 #define NIST_END ); return true; }}; 198 199 // y = b1 * (b2+x)**(-1/b3) + e 200 NIST_BEGIN(Bennet5) 201 b[0] * pow(b[1] + x, T(-1.0) / b[2]) 202 NIST_END 203 204 // y = b1*(1-exp[-b2*x]) + e 205 NIST_BEGIN(BoxBOD) 206 b[0] * (T(1.0) - exp(-b[1] * x)) 207 NIST_END 208 209 // y = exp[-b1*x]/(b2+b3*x) + e 210 NIST_BEGIN(Chwirut) 211 exp(-b[0] * x) / (b[1] + b[2] * x) 212 NIST_END 213 214 // y = b1*x**b2 + e 215 NIST_BEGIN(DanWood) 216 b[0] * pow(x, b[1]) 217 NIST_END 218 219 // y = b1*exp( -b2*x ) + b3*exp( -(x-b4)**2 / b5**2 ) 220 // + b6*exp( -(x-b7)**2 / b8**2 ) + e 221 NIST_BEGIN(Gauss) 222 b[0] * exp(-b[1] * x) + 223 b[2] * exp(-pow((x - b[3])/b[4], 2)) + 224 b[5] * exp(-pow((x - b[6])/b[7],2)) 225 NIST_END 226 227 // y = b1*exp(-b2*x) + b3*exp(-b4*x) + b5*exp(-b6*x) + e 228 NIST_BEGIN(Lanczos) 229 b[0] * exp(-b[1] * x) + b[2] * exp(-b[3] * x) + b[4] * exp(-b[5] * x) 230 NIST_END 231 232 // y = (b1+b2*x+b3*x**2+b4*x**3) / 233 // (1+b5*x+b6*x**2+b7*x**3) + e 234 NIST_BEGIN(Hahn1) 235 (b[0] + b[1] * x + b[2] * x * x + b[3] * x * x * x) / 236 (T(1.0) + b[4] * x + b[5] * x * x + b[6] * x * x * x) 237 NIST_END 238 239 // y = (b1 + b2*x + b3*x**2) / 240 // (1 + b4*x + b5*x**2) + e 241 NIST_BEGIN(Kirby2) 242 (b[0] + b[1] * x + b[2] * x * x) / 243 (T(1.0) + b[3] * x + b[4] * x * x) 244 NIST_END 245 246 // y = b1*(x**2+x*b2) / (x**2+x*b3+b4) + e 247 NIST_BEGIN(MGH09) 248 b[0] * (x * x + x * b[1]) / (x * x + x * b[2] + b[3]) 249 NIST_END 250 251 // y = b1 * exp[b2/(x+b3)] + e 252 NIST_BEGIN(MGH10) 253 b[0] * exp(b[1] / (x + b[2])) 254 NIST_END 255 256 // y = b1 + b2*exp[-x*b4] + b3*exp[-x*b5] 257 NIST_BEGIN(MGH17) 258 b[0] + b[1] * exp(-x * b[3]) + b[2] * exp(-x * b[4]) 259 NIST_END 260 261 // y = b1*(1-exp[-b2*x]) + e 262 NIST_BEGIN(Misra1a) 263 b[0] * (T(1.0) - exp(-b[1] * x)) 264 NIST_END 265 266 // y = b1 * (1-(1+b2*x/2)**(-2)) + e 267 NIST_BEGIN(Misra1b) 268 b[0] * (T(1.0) - T(1.0)/ ((T(1.0) + b[1] * x / 2.0) * (T(1.0) + b[1] * x / 2.0))) 269 NIST_END 270 271 // y = b1 * (1-(1+2*b2*x)**(-.5)) + e 272 NIST_BEGIN(Misra1c) 273 b[0] * (T(1.0) - pow(T(1.0) + T(2.0) * b[1] * x, -0.5)) 274 NIST_END 275 276 // y = b1*b2*x*((1+b2*x)**(-1)) + e 277 NIST_BEGIN(Misra1d) 278 b[0] * b[1] * x / (T(1.0) + b[1] * x) 279 NIST_END 280 281 const double kPi = 3.141592653589793238462643383279; 282 // pi = 3.141592653589793238462643383279E0 283 // y = b1 - b2*x - arctan[b3/(x-b4)]/pi + e 284 NIST_BEGIN(Roszman1) 285 b[0] - b[1] * x - atan2(b[2], (x - b[3]))/T(kPi) 286 NIST_END 287 288 // y = b1 / (1+exp[b2-b3*x]) + e 289 NIST_BEGIN(Rat42) 290 b[0] / (T(1.0) + exp(b[1] - b[2] * x)) 291 NIST_END 292 293 // y = b1 / ((1+exp[b2-b3*x])**(1/b4)) + e 294 NIST_BEGIN(Rat43) 295 b[0] / pow(T(1.0) + exp(b[1] - b[2] * x), T(1.0) / b[3]) 296 NIST_END 297 298 // y = (b1 + b2*x + b3*x**2 + b4*x**3) / 299 // (1 + b5*x + b6*x**2 + b7*x**3) + e 300 NIST_BEGIN(Thurber) 301 (b[0] + b[1] * x + b[2] * x * x + b[3] * x * x * x) / 302 (T(1.0) + b[4] * x + b[5] * x * x + b[6] * x * x * x) 303 NIST_END 304 305 // y = b1 + b2*cos( 2*pi*x/12 ) + b3*sin( 2*pi*x/12 ) 306 // + b5*cos( 2*pi*x/b4 ) + b6*sin( 2*pi*x/b4 ) 307 // + b8*cos( 2*pi*x/b7 ) + b9*sin( 2*pi*x/b7 ) + e 308 NIST_BEGIN(ENSO) 309 b[0] + b[1] * cos(T(2.0 * kPi) * x / T(12.0)) + 310 b[2] * sin(T(2.0 * kPi) * x / T(12.0)) + 311 b[4] * cos(T(2.0 * kPi) * x / b[3]) + 312 b[5] * sin(T(2.0 * kPi) * x / b[3]) + 313 b[7] * cos(T(2.0 * kPi) * x / b[6]) + 314 b[8] * sin(T(2.0 * kPi) * x / b[6]) 315 NIST_END 316 317 // y = (b1/b2) * exp[-0.5*((x-b3)/b2)**2] + e 318 NIST_BEGIN(Eckerle4) 319 b[0] / b[1] * exp(T(-0.5) * pow((x - b[2])/b[1], 2)) 320 NIST_END 321 322 struct Nelson { 323 public: 324 Nelson(const double* const x, const double* const y) 325 : x1_(x[0]), x2_(x[1]), y_(y[0]) {} 326 327 template <typename T> 328 bool operator()(const T* const b, T* residual) const { 329 // log[y] = b1 - b2*x1 * exp[-b3*x2] + e 330 residual[0] = T(log(y_)) - (b[0] - b[1] * T(x1_) * exp(-b[2] * T(x2_))); 331 return true; 332 } 333 334 private: 335 double x1_; 336 double x2_; 337 double y_; 338 }; 339 340 template <typename Model, int num_residuals, int num_parameters> 341 int RegressionDriver(const std::string& filename, 342 const ceres::Solver::Options& options) { 343 NISTProblem nist_problem(FLAGS_nist_data_dir + filename); 344 CHECK_EQ(num_residuals, nist_problem.response_size()); 345 CHECK_EQ(num_parameters, nist_problem.num_parameters()); 346 347 Matrix predictor = nist_problem.predictor(); 348 Matrix response = nist_problem.response(); 349 Matrix final_parameters = nist_problem.final_parameters(); 350 std::vector<ceres::Solver::Summary> summaries(nist_problem.num_starts() + 1); 351 std::cerr << filename << std::endl; 352 353 // Each NIST problem comes with multiple starting points, so we 354 // construct the problem from scratch for each case and solve it. 355 for (int start = 0; start < nist_problem.num_starts(); ++start) { 356 Matrix initial_parameters = nist_problem.initial_parameters(start); 357 358 ceres::Problem problem; 359 for (int i = 0; i < nist_problem.num_observations(); ++i) { 360 problem.AddResidualBlock( 361 new ceres::AutoDiffCostFunction<Model, num_residuals, num_parameters>( 362 new Model(predictor.data() + nist_problem.predictor_size() * i, 363 response.data() + nist_problem.response_size() * i)), 364 NULL, 365 initial_parameters.data()); 366 } 367 368 Solve(options, &problem, &summaries[start]); 369 } 370 371 const double certified_cost = nist_problem.certified_cost(); 372 373 int num_success = 0; 374 const int kMinNumMatchingDigits = 4; 375 for (int start = 0; start < nist_problem.num_starts(); ++start) { 376 const ceres::Solver::Summary& summary = summaries[start]; 377 378 int num_matching_digits = 0; 379 if (IsSuccessfulTermination(summary.termination_type) 380 && summary.final_cost < certified_cost) { 381 num_matching_digits = kMinNumMatchingDigits + 1; 382 } else { 383 num_matching_digits = 384 -std::log10(fabs(summary.final_cost - certified_cost) / certified_cost); 385 } 386 387 std::cerr << "start " << start + 1 << " " ; 388 if (num_matching_digits <= kMinNumMatchingDigits) { 389 std::cerr << "FAILURE"; 390 } else { 391 std::cerr << "SUCCESS"; 392 ++num_success; 393 } 394 std::cerr << " summary: " 395 << summary.BriefReport() 396 << " Certified cost: " << certified_cost 397 << std::endl; 398 399 } 400 401 return num_success; 402 } 403 404 void SetMinimizerOptions(ceres::Solver::Options* options) { 405 CHECK(ceres::StringToLinearSolverType(FLAGS_linear_solver, 406 &options->linear_solver_type)); 407 CHECK(ceres::StringToPreconditionerType(FLAGS_preconditioner, 408 &options->preconditioner_type)); 409 CHECK(ceres::StringToTrustRegionStrategyType( 410 FLAGS_trust_region_strategy, 411 &options->trust_region_strategy_type)); 412 CHECK(ceres::StringToDoglegType(FLAGS_dogleg, &options->dogleg_type)); 413 414 options->max_num_iterations = FLAGS_num_iterations; 415 options->use_nonmonotonic_steps = FLAGS_nonmonotonic_steps; 416 options->initial_trust_region_radius = FLAGS_initial_trust_region_radius; 417 options->function_tolerance = 1e-18; 418 options->gradient_tolerance = 1e-18; 419 options->parameter_tolerance = 1e-18; 420 } 421 422 void SolveNISTProblems() { 423 if (FLAGS_nist_data_dir.empty()) { 424 LOG(FATAL) << "Must specify the directory containing the NIST problems"; 425 } 426 427 ceres::Solver::Options options; 428 SetMinimizerOptions(&options); 429 430 std::cerr << "Lower Difficulty\n"; 431 int easy_success = 0; 432 easy_success += RegressionDriver<Misra1a, 1, 2>("Misra1a.dat", options); 433 easy_success += RegressionDriver<Chwirut, 1, 3>("Chwirut1.dat", options); 434 easy_success += RegressionDriver<Chwirut, 1, 3>("Chwirut2.dat", options); 435 easy_success += RegressionDriver<Lanczos, 1, 6>("Lanczos3.dat", options); 436 easy_success += RegressionDriver<Gauss, 1, 8>("Gauss1.dat", options); 437 easy_success += RegressionDriver<Gauss, 1, 8>("Gauss2.dat", options); 438 easy_success += RegressionDriver<DanWood, 1, 2>("DanWood.dat", options); 439 easy_success += RegressionDriver<Misra1b, 1, 2>("Misra1b.dat", options); 440 441 std::cerr << "\nMedium Difficulty\n"; 442 int medium_success = 0; 443 medium_success += RegressionDriver<Kirby2, 1, 5>("Kirby2.dat", options); 444 medium_success += RegressionDriver<Hahn1, 1, 7>("Hahn1.dat", options); 445 medium_success += RegressionDriver<Nelson, 1, 3>("Nelson.dat", options); 446 medium_success += RegressionDriver<MGH17, 1, 5>("MGH17.dat", options); 447 medium_success += RegressionDriver<Lanczos, 1, 6>("Lanczos1.dat", options); 448 medium_success += RegressionDriver<Lanczos, 1, 6>("Lanczos2.dat", options); 449 medium_success += RegressionDriver<Gauss, 1, 8>("Gauss3.dat", options); 450 medium_success += RegressionDriver<Misra1c, 1, 2>("Misra1c.dat", options); 451 medium_success += RegressionDriver<Misra1d, 1, 2>("Misra1d.dat", options); 452 medium_success += RegressionDriver<Roszman1, 1, 4>("Roszman1.dat", options); 453 medium_success += RegressionDriver<ENSO, 1, 9>("ENSO.dat", options); 454 455 std::cerr << "\nHigher Difficulty\n"; 456 int hard_success = 0; 457 hard_success += RegressionDriver<MGH09, 1, 4>("MGH09.dat", options); 458 hard_success += RegressionDriver<Thurber, 1, 7>("Thurber.dat", options); 459 hard_success += RegressionDriver<BoxBOD, 1, 2>("BoxBOD.dat", options); 460 hard_success += RegressionDriver<Rat42, 1, 3>("Rat42.dat", options); 461 hard_success += RegressionDriver<MGH10, 1, 3>("MGH10.dat", options); 462 463 hard_success += RegressionDriver<Eckerle4, 1, 3>("Eckerle4.dat", options); 464 hard_success += RegressionDriver<Rat43, 1, 4>("Rat43.dat", options); 465 hard_success += RegressionDriver<Bennet5, 1, 3>("Bennett5.dat", options); 466 467 std::cerr << "\n"; 468 std::cerr << "Easy : " << easy_success << "/16\n"; 469 std::cerr << "Medium : " << medium_success << "/22\n"; 470 std::cerr << "Hard : " << hard_success << "/16\n"; 471 std::cerr << "Total : " << easy_success + medium_success + hard_success << "/54\n"; 472 } 473 474 int main(int argc, char** argv) { 475 google::ParseCommandLineFlags(&argc, &argv, true); 476 google::InitGoogleLogging(argv[0]); 477 SolveNISTProblems(); 478 return 0; 479 }; 480