Home | History | Annotate | Download | only in math
      1 #include "common/math/levenberg_marquardt.h"
      2 
      3 #include <stdbool.h>
      4 #include <stdio.h>
      5 #include <string.h>
      6 
      7 #include "common/math/mat.h"
      8 #include "common/math/vec.h"
      9 
     10 // FORWARD DECLARATIONS
     11 ////////////////////////////////////////////////////////////////////////
     12 static bool checkRelativeStepSize(const float *step, const float *state,
     13                                   size_t dim, float relative_error_threshold);
     14 
     15 static bool computeResidualAndGradients(ResidualAndJacobianFunction func,
     16                                         const float *state, const void *f_data,
     17                                         float *jacobian,
     18                                         float gradient_threshold,
     19                                         size_t state_dim, size_t meas_dim,
     20                                         float *residual, float *gradient,
     21                                         float *hessian);
     22 
     23 static bool computeStep(const float *gradient, float *hessian, float *L,
     24                         float damping_factor, size_t dim, float *step);
     25 
     26 const static float kEps = 1e-10f;
     27 
     28 // FUNCTION IMPLEMENTATIONS
     29 ////////////////////////////////////////////////////////////////////////
     30 void lmSolverInit(struct LmSolver *solver, const struct LmParams *params,
     31                   ResidualAndJacobianFunction func) {
     32   ASSERT_NOT_NULL(solver);
     33   ASSERT_NOT_NULL(params);
     34   ASSERT_NOT_NULL(func);
     35   memset(solver, 0, sizeof(struct LmSolver));
     36   memcpy(&solver->params, params, sizeof(struct LmParams));
     37   solver->func = func;
     38   solver->num_iter = 0;
     39 }
     40 
     41 void lmSolverDestroy(struct LmSolver *solver) {
     42   (void)solver;
     43 }
     44 
     45 void lmSolverSetData(struct LmSolver *solver, struct LmData *data) {
     46   ASSERT_NOT_NULL(solver);
     47   ASSERT_NOT_NULL(data);
     48   solver->data = data;
     49 }
     50 
     51 enum LmStatus lmSolverSolve(struct LmSolver *solver, const float *initial_state,
     52                             void *f_data, size_t state_dim, size_t meas_dim,
     53                             float *state) {
     54   // Initialize parameters.
     55   float damping_factor = 0.0f;
     56   float v = 2.0f;
     57 
     58   // Check dimensions.
     59   if (meas_dim > MAX_LM_MEAS_DIMENSION || state_dim > MAX_LM_STATE_DIMENSION) {
     60     return INVALID_DATA_DIMENSIONS;
     61   }
     62 
     63   // Check pointers (note that f_data can be null if no additional data is
     64   // required by the error function).
     65   ASSERT_NOT_NULL(solver);
     66   ASSERT_NOT_NULL(initial_state);
     67   ASSERT_NOT_NULL(state);
     68   ASSERT_NOT_NULL(solver->data);
     69 
     70   // Allocate memory for intermediate variables.
     71   float state_new[MAX_LM_STATE_DIMENSION];
     72   struct LmData *data = solver->data;
     73 
     74   // state = initial_state, num_iter = 0
     75   memcpy(state, initial_state, sizeof(float) * state_dim);
     76   solver->num_iter = 0;
     77 
     78   // Compute initial cost function gradient and return if already sufficiently
     79   // small to satisfy solution.
     80   if (computeResidualAndGradients(solver->func, state, f_data, data->temp,
     81                                   solver->params.gradient_threshold, state_dim,
     82                                   meas_dim, data->residual,
     83                                   data->gradient,
     84                                   data->hessian)) {
     85     return GRADIENT_SUFFICIENTLY_SMALL;
     86   }
     87 
     88   // Initialize damping parameter.
     89   damping_factor = solver->params.initial_u_scale *
     90       matMaxDiagonalElement(data->hessian, state_dim);
     91 
     92   // Iterate solution.
     93   for (solver->num_iter = 0;
     94        solver->num_iter < solver->params.max_iterations;
     95        ++solver->num_iter) {
     96 
     97     // Compute new solver step.
     98     if (!computeStep(data->gradient, data->hessian, data->temp, damping_factor,
     99                      state_dim, data->step)) {
    100       return CHOLESKY_FAIL;
    101     }
    102 
    103     // If the new step is already sufficiently small, we have a solution.
    104     if (checkRelativeStepSize(data->step, state, state_dim,
    105                               solver->params.relative_step_threshold)) {
    106       return RELATIVE_STEP_SUFFICIENTLY_SMALL;
    107     }
    108 
    109     // state_new = state + step.
    110     vecAdd(state_new, state, data->step, state_dim);
    111 
    112     // Compute new cost function residual.
    113     solver->func(state_new, f_data, data->residual_new, NULL);
    114 
    115     // Compute ratio of expected to actual cost function gain for this step.
    116     const float gain_ratio = computeGainRatio(data->residual,
    117                                               data->residual_new,
    118                                               data->step, data->gradient,
    119                                               damping_factor, state_dim,
    120                                               meas_dim);
    121 
    122     // If gain ratio is positive, the step size is good, otherwise adjust
    123     // damping factor and compute a new step.
    124     if (gain_ratio > 0.0f) {
    125       // Set state to new state vector: state = state_new.
    126       memcpy(state, state_new, sizeof(float) * state_dim);
    127 
    128       // Check if cost function gradient is now sufficiently small,
    129       // in which case we have a local solution.
    130       if (computeResidualAndGradients(solver->func, state, f_data, data->temp,
    131                                       solver->params.gradient_threshold,
    132                                       state_dim, meas_dim, data->residual,
    133                                       data->gradient, data->hessian)) {
    134         return GRADIENT_SUFFICIENTLY_SMALL;
    135       }
    136 
    137       // Update damping factor based on gain ratio.
    138       // Note, this update logic comes from Equation 2.21 in the following:
    139       // [Madsen, Kaj, Hans Bruun Nielsen, and Ole Tingleff.
    140       // "Methods for non-linear least squares problems." (2004)].
    141       const float tmp = 2.f * gain_ratio - 1.f;
    142       damping_factor *= NANO_MAX(0.33333f, 1.f - tmp * tmp * tmp);
    143       v = 2.f;
    144     } else {
    145       // Update damping factor and try again.
    146       damping_factor *= v;
    147       v *= 2.f;
    148     }
    149   }
    150 
    151   return HIT_MAX_ITERATIONS;
    152 }
    153 
    154 float computeGainRatio(const float *residual, const float *residual_new,
    155                        const float *step, const float *gradient,
    156                        float damping_factor, size_t state_dim,
    157                        size_t meas_dim) {
    158   // Compute true_gain = residual' residual - residual_new' residual_new.
    159   const float true_gain = vecDot(residual, residual, meas_dim)
    160       - vecDot(residual_new, residual_new, meas_dim);
    161 
    162   // predicted gain = 0.5 * step' * (damping_factor * step + gradient).
    163   float tmp[MAX_LM_STATE_DIMENSION];
    164   vecScalarMul(tmp, step, damping_factor, state_dim);
    165   vecAddInPlace(tmp, gradient, state_dim);
    166   const float predicted_gain = 0.5f * vecDot(step, tmp, state_dim);
    167 
    168   // Check that we don't divide by zero! If denominator is too small,
    169   // set gain_ratio = 1 to use the current step.
    170   if (predicted_gain < kEps) {
    171     return 1.f;
    172   }
    173 
    174   return true_gain / predicted_gain;
    175 }
    176 
    177 /*
    178  * Tests if a solution is found based on the size of the step relative to the
    179  * current state magnitude. Returns true if a solution is found.
    180  *
    181  * TODO(dvitus): consider optimization of this function to use squared norm
    182  * rather than norm for relative error computation to avoid square root.
    183  */
    184 bool checkRelativeStepSize(const float *step, const float *state,
    185                            size_t dim, float relative_error_threshold) {
    186   // r = eps * (||x|| + eps)
    187   const float relative_error = relative_error_threshold *
    188       (vecNorm(state, dim) + relative_error_threshold);
    189 
    190   // solved if ||step|| <= r
    191   // use squared version of this compare to avoid square root.
    192   return (vecNormSquared(step, dim) <= relative_error * relative_error);
    193 }
    194 
    195 /*
    196  * Computes the residual, f(x), as well as the gradient and hessian of the cost
    197  * function for the given state.
    198  *
    199  * Returns a boolean indicating if the computed gradient is sufficiently small
    200  * to indicate that a solution has been found.
    201  *
    202  * INPUTS:
    203  * state: state estimate (x) for which to compute the gradient & hessian.
    204  * f_data: pointer to parameter data needed for the residual or jacobian.
    205  * jacobian: pointer to temporary memory for storing jacobian.
    206  *           Must be at least MAX_LM_STATE_DIMENSION * MAX_LM_MEAS_DIMENSION.
    207  * gradient_threshold: if gradient is below this threshold, function returns 1.
    208  *
    209  * OUTPUTS:
    210  * residual: f(x).
    211  * gradient: - J' f(x), where J = df(x)/dx
    212  * hessian: df^2(x)/dx^2 = J' J
    213  */
    214 bool computeResidualAndGradients(ResidualAndJacobianFunction func,
    215                                  const float *state, const void *f_data,
    216                                  float *jacobian, float gradient_threshold,
    217                                  size_t state_dim, size_t meas_dim,
    218                                  float *residual, float *gradient,
    219                                  float *hessian) {
    220   // Compute residual and Jacobian.
    221   ASSERT_NOT_NULL(state);
    222   ASSERT_NOT_NULL(residual);
    223   ASSERT_NOT_NULL(gradient);
    224   ASSERT_NOT_NULL(hessian);
    225   func(state, f_data, residual, jacobian);
    226 
    227   // Compute the cost function hessian = jacobian' jacobian and
    228   // gradient = -jacobian' residual
    229   matTransposeMultiplyMat(hessian, jacobian, meas_dim, state_dim);
    230   matTransposeMultiplyVec(gradient, jacobian, residual, meas_dim, state_dim);
    231   vecScalarMulInPlace(gradient, -1.f, state_dim);
    232 
    233   // Check if solution is found (cost function gradient is sufficiently small).
    234   return (vecMaxAbsoluteValue(gradient, state_dim) < gradient_threshold);
    235 }
    236 
    237 /*
    238  * Computes the Levenberg-Marquardt solver step to satisfy the following:
    239  *    (J'J + uI) * step = - J' f
    240  *
    241  * INPUTS:
    242  * gradient:  -J'f
    243  * hessian:  J'J
    244  * L: temp memory of at least MAX_LM_STATE_DIMENSION * MAX_LM_STATE_DIMENSION.
    245  * damping_factor: u
    246  * dim: state dimension
    247  *
    248  * OUTPUTS:
    249  * step: solution to the above equation.
    250  * Function returns false if the solution fails (due to cholesky failure),
    251  * otherwise returns true.
    252  *
    253  * Note that the hessian is modified in this function in order to reduce
    254  * local memory requirements.
    255  */
    256 bool computeStep(const float *gradient, float *hessian, float *L,
    257                  float damping_factor, size_t dim, float *step) {
    258 
    259   // 1) A = hessian + damping_factor * Identity.
    260   matAddConstantDiagonal(hessian, damping_factor, dim);
    261 
    262   // 2) Solve A * step = gradient for step.
    263   // a) compute cholesky decomposition of A = L L^T.
    264   if (!matCholeskyDecomposition(L, hessian, dim)) {
    265     return false;
    266   }
    267 
    268   // b) solve for step via back-solve.
    269   return matLinearSolveCholesky(step, L, gradient, dim);
    270 }
    271