1 #include "common/math/levenberg_marquardt.h" 2 3 #include <stdbool.h> 4 #include <stdio.h> 5 #include <string.h> 6 7 #include "common/math/mat.h" 8 #include "common/math/vec.h" 9 10 // FORWARD DECLARATIONS 11 //////////////////////////////////////////////////////////////////////// 12 static bool checkRelativeStepSize(const float *step, const float *state, 13 size_t dim, float relative_error_threshold); 14 15 static bool computeResidualAndGradients(ResidualAndJacobianFunction func, 16 const float *state, const void *f_data, 17 float *jacobian, 18 float gradient_threshold, 19 size_t state_dim, size_t meas_dim, 20 float *residual, float *gradient, 21 float *hessian); 22 23 static bool computeStep(const float *gradient, float *hessian, float *L, 24 float damping_factor, size_t dim, float *step); 25 26 const static float kEps = 1e-10f; 27 28 // FUNCTION IMPLEMENTATIONS 29 //////////////////////////////////////////////////////////////////////// 30 void lmSolverInit(struct LmSolver *solver, const struct LmParams *params, 31 ResidualAndJacobianFunction func) { 32 ASSERT_NOT_NULL(solver); 33 ASSERT_NOT_NULL(params); 34 ASSERT_NOT_NULL(func); 35 memset(solver, 0, sizeof(struct LmSolver)); 36 memcpy(&solver->params, params, sizeof(struct LmParams)); 37 solver->func = func; 38 solver->num_iter = 0; 39 } 40 41 void lmSolverDestroy(struct LmSolver *solver) { 42 (void)solver; 43 } 44 45 void lmSolverSetData(struct LmSolver *solver, struct LmData *data) { 46 ASSERT_NOT_NULL(solver); 47 ASSERT_NOT_NULL(data); 48 solver->data = data; 49 } 50 51 enum LmStatus lmSolverSolve(struct LmSolver *solver, const float *initial_state, 52 void *f_data, size_t state_dim, size_t meas_dim, 53 float *state) { 54 // Initialize parameters. 55 float damping_factor = 0.0f; 56 float v = 2.0f; 57 58 // Check dimensions. 59 if (meas_dim > MAX_LM_MEAS_DIMENSION || state_dim > MAX_LM_STATE_DIMENSION) { 60 return INVALID_DATA_DIMENSIONS; 61 } 62 63 // Check pointers (note that f_data can be null if no additional data is 64 // required by the error function). 65 ASSERT_NOT_NULL(solver); 66 ASSERT_NOT_NULL(initial_state); 67 ASSERT_NOT_NULL(state); 68 ASSERT_NOT_NULL(solver->data); 69 70 // Allocate memory for intermediate variables. 71 float state_new[MAX_LM_STATE_DIMENSION]; 72 struct LmData *data = solver->data; 73 74 // state = initial_state, num_iter = 0 75 memcpy(state, initial_state, sizeof(float) * state_dim); 76 solver->num_iter = 0; 77 78 // Compute initial cost function gradient and return if already sufficiently 79 // small to satisfy solution. 80 if (computeResidualAndGradients(solver->func, state, f_data, data->temp, 81 solver->params.gradient_threshold, state_dim, 82 meas_dim, data->residual, 83 data->gradient, 84 data->hessian)) { 85 return GRADIENT_SUFFICIENTLY_SMALL; 86 } 87 88 // Initialize damping parameter. 89 damping_factor = solver->params.initial_u_scale * 90 matMaxDiagonalElement(data->hessian, state_dim); 91 92 // Iterate solution. 93 for (solver->num_iter = 0; 94 solver->num_iter < solver->params.max_iterations; 95 ++solver->num_iter) { 96 97 // Compute new solver step. 98 if (!computeStep(data->gradient, data->hessian, data->temp, damping_factor, 99 state_dim, data->step)) { 100 return CHOLESKY_FAIL; 101 } 102 103 // If the new step is already sufficiently small, we have a solution. 104 if (checkRelativeStepSize(data->step, state, state_dim, 105 solver->params.relative_step_threshold)) { 106 return RELATIVE_STEP_SUFFICIENTLY_SMALL; 107 } 108 109 // state_new = state + step. 110 vecAdd(state_new, state, data->step, state_dim); 111 112 // Compute new cost function residual. 113 solver->func(state_new, f_data, data->residual_new, NULL); 114 115 // Compute ratio of expected to actual cost function gain for this step. 116 const float gain_ratio = computeGainRatio(data->residual, 117 data->residual_new, 118 data->step, data->gradient, 119 damping_factor, state_dim, 120 meas_dim); 121 122 // If gain ratio is positive, the step size is good, otherwise adjust 123 // damping factor and compute a new step. 124 if (gain_ratio > 0.0f) { 125 // Set state to new state vector: state = state_new. 126 memcpy(state, state_new, sizeof(float) * state_dim); 127 128 // Check if cost function gradient is now sufficiently small, 129 // in which case we have a local solution. 130 if (computeResidualAndGradients(solver->func, state, f_data, data->temp, 131 solver->params.gradient_threshold, 132 state_dim, meas_dim, data->residual, 133 data->gradient, data->hessian)) { 134 return GRADIENT_SUFFICIENTLY_SMALL; 135 } 136 137 // Update damping factor based on gain ratio. 138 // Note, this update logic comes from Equation 2.21 in the following: 139 // [Madsen, Kaj, Hans Bruun Nielsen, and Ole Tingleff. 140 // "Methods for non-linear least squares problems." (2004)]. 141 const float tmp = 2.f * gain_ratio - 1.f; 142 damping_factor *= NANO_MAX(0.33333f, 1.f - tmp * tmp * tmp); 143 v = 2.f; 144 } else { 145 // Update damping factor and try again. 146 damping_factor *= v; 147 v *= 2.f; 148 } 149 } 150 151 return HIT_MAX_ITERATIONS; 152 } 153 154 float computeGainRatio(const float *residual, const float *residual_new, 155 const float *step, const float *gradient, 156 float damping_factor, size_t state_dim, 157 size_t meas_dim) { 158 // Compute true_gain = residual' residual - residual_new' residual_new. 159 const float true_gain = vecDot(residual, residual, meas_dim) 160 - vecDot(residual_new, residual_new, meas_dim); 161 162 // predicted gain = 0.5 * step' * (damping_factor * step + gradient). 163 float tmp[MAX_LM_STATE_DIMENSION]; 164 vecScalarMul(tmp, step, damping_factor, state_dim); 165 vecAddInPlace(tmp, gradient, state_dim); 166 const float predicted_gain = 0.5f * vecDot(step, tmp, state_dim); 167 168 // Check that we don't divide by zero! If denominator is too small, 169 // set gain_ratio = 1 to use the current step. 170 if (predicted_gain < kEps) { 171 return 1.f; 172 } 173 174 return true_gain / predicted_gain; 175 } 176 177 /* 178 * Tests if a solution is found based on the size of the step relative to the 179 * current state magnitude. Returns true if a solution is found. 180 * 181 * TODO(dvitus): consider optimization of this function to use squared norm 182 * rather than norm for relative error computation to avoid square root. 183 */ 184 bool checkRelativeStepSize(const float *step, const float *state, 185 size_t dim, float relative_error_threshold) { 186 // r = eps * (||x|| + eps) 187 const float relative_error = relative_error_threshold * 188 (vecNorm(state, dim) + relative_error_threshold); 189 190 // solved if ||step|| <= r 191 // use squared version of this compare to avoid square root. 192 return (vecNormSquared(step, dim) <= relative_error * relative_error); 193 } 194 195 /* 196 * Computes the residual, f(x), as well as the gradient and hessian of the cost 197 * function for the given state. 198 * 199 * Returns a boolean indicating if the computed gradient is sufficiently small 200 * to indicate that a solution has been found. 201 * 202 * INPUTS: 203 * state: state estimate (x) for which to compute the gradient & hessian. 204 * f_data: pointer to parameter data needed for the residual or jacobian. 205 * jacobian: pointer to temporary memory for storing jacobian. 206 * Must be at least MAX_LM_STATE_DIMENSION * MAX_LM_MEAS_DIMENSION. 207 * gradient_threshold: if gradient is below this threshold, function returns 1. 208 * 209 * OUTPUTS: 210 * residual: f(x). 211 * gradient: - J' f(x), where J = df(x)/dx 212 * hessian: df^2(x)/dx^2 = J' J 213 */ 214 bool computeResidualAndGradients(ResidualAndJacobianFunction func, 215 const float *state, const void *f_data, 216 float *jacobian, float gradient_threshold, 217 size_t state_dim, size_t meas_dim, 218 float *residual, float *gradient, 219 float *hessian) { 220 // Compute residual and Jacobian. 221 ASSERT_NOT_NULL(state); 222 ASSERT_NOT_NULL(residual); 223 ASSERT_NOT_NULL(gradient); 224 ASSERT_NOT_NULL(hessian); 225 func(state, f_data, residual, jacobian); 226 227 // Compute the cost function hessian = jacobian' jacobian and 228 // gradient = -jacobian' residual 229 matTransposeMultiplyMat(hessian, jacobian, meas_dim, state_dim); 230 matTransposeMultiplyVec(gradient, jacobian, residual, meas_dim, state_dim); 231 vecScalarMulInPlace(gradient, -1.f, state_dim); 232 233 // Check if solution is found (cost function gradient is sufficiently small). 234 return (vecMaxAbsoluteValue(gradient, state_dim) < gradient_threshold); 235 } 236 237 /* 238 * Computes the Levenberg-Marquardt solver step to satisfy the following: 239 * (J'J + uI) * step = - J' f 240 * 241 * INPUTS: 242 * gradient: -J'f 243 * hessian: J'J 244 * L: temp memory of at least MAX_LM_STATE_DIMENSION * MAX_LM_STATE_DIMENSION. 245 * damping_factor: u 246 * dim: state dimension 247 * 248 * OUTPUTS: 249 * step: solution to the above equation. 250 * Function returns false if the solution fails (due to cholesky failure), 251 * otherwise returns true. 252 * 253 * Note that the hessian is modified in this function in order to reduce 254 * local memory requirements. 255 */ 256 bool computeStep(const float *gradient, float *hessian, float *L, 257 float damping_factor, size_t dim, float *step) { 258 259 // 1) A = hessian + damping_factor * Identity. 260 matAddConstantDiagonal(hessian, damping_factor, dim); 261 262 // 2) Solve A * step = gradient for step. 263 // a) compute cholesky decomposition of A = L L^T. 264 if (!matCholeskyDecomposition(L, hessian, dim)) { 265 return false; 266 } 267 268 // b) solve for step via back-solve. 269 return matLinearSolveCholesky(step, L, gradient, dim); 270 } 271