Home | History | Annotate | Download | only in ceres
      1 // Ceres Solver - A fast non-linear least squares minimizer
      2 // Copyright 2013 Google Inc. All rights reserved.
      3 // http://code.google.com/p/ceres-solver/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are met:
      7 //
      8 // * Redistributions of source code must retain the above copyright notice,
      9 //   this list of conditions and the following disclaimer.
     10 // * Redistributions in binary form must reproduce the above copyright notice,
     11 //   this list of conditions and the following disclaimer in the documentation
     12 //   and/or other materials provided with the distribution.
     13 // * Neither the name of Google Inc. nor the names of its contributors may be
     14 //   used to endorse or promote products derived from this software without
     15 //   specific prior written permission.
     16 //
     17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27 // POSSIBILITY OF SUCH DAMAGE.
     28 //
     29 // Author: sameeragarwal (at) google.com (Sameer Agarwal)
     30 
     31 #ifndef CERES_PUBLIC_COVARIANCE_H_
     32 #define CERES_PUBLIC_COVARIANCE_H_
     33 
     34 #include <utility>
     35 #include <vector>
     36 #include "ceres/internal/port.h"
     37 #include "ceres/internal/scoped_ptr.h"
     38 #include "ceres/types.h"
     39 
     40 namespace ceres {
     41 
     42 class Problem;
     43 
     44 namespace internal {
     45 class CovarianceImpl;
     46 }  // namespace internal
     47 
     48 // WARNING
     49 // =======
     50 // It is very easy to use this class incorrectly without understanding
     51 // the underlying mathematics. Please read and understand the
     52 // documentation completely before attempting to use this class.
     53 //
     54 //
     55 // This class allows the user to evaluate the covariance for a
     56 // non-linear least squares problem and provides random access to its
     57 // blocks
     58 //
     59 // Background
     60 // ==========
     61 // One way to assess the quality of the solution returned by a
     62 // non-linear least squares solve is to analyze the covariance of the
     63 // solution.
     64 //
     65 // Let us consider the non-linear regression problem
     66 //
     67 //   y = f(x) + N(0, I)
     68 //
     69 // i.e., the observation y is a random non-linear function of the
     70 // independent variable x with mean f(x) and identity covariance. Then
     71 // the maximum likelihood estimate of x given observations y is the
     72 // solution to the non-linear least squares problem:
     73 //
     74 //  x* = arg min_x |f(x)|^2
     75 //
     76 // And the covariance of x* is given by
     77 //
     78 //  C(x*) = inverse[J'(x*)J(x*)]
     79 //
     80 // Here J(x*) is the Jacobian of f at x*. The above formula assumes
     81 // that J(x*) has full column rank.
     82 //
     83 // If J(x*) is rank deficient, then the covariance matrix C(x*) is
     84 // also rank deficient and is given by
     85 //
     86 //  C(x*) =  pseudoinverse[J'(x*)J(x*)]
     87 //
     88 // Note that in the above, we assumed that the covariance
     89 // matrix for y was identity. This is an important assumption. If this
     90 // is not the case and we have
     91 //
     92 //  y = f(x) + N(0, S)
     93 //
     94 // Where S is a positive semi-definite matrix denoting the covariance
     95 // of y, then the maximum likelihood problem to be solved is
     96 //
     97 //  x* = arg min_x f'(x) inverse[S] f(x)
     98 //
     99 // and the corresponding covariance estimate of x* is given by
    100 //
    101 //  C(x*) = inverse[J'(x*) inverse[S] J(x*)]
    102 //
    103 // So, if it is the case that the observations being fitted to have a
    104 // covariance matrix not equal to identity, then it is the user's
    105 // responsibility that the corresponding cost functions are correctly
    106 // scaled, e.g. in the above case the cost function for this problem
    107 // should evaluate S^{-1/2} f(x) instead of just f(x), where S^{-1/2}
    108 // is the inverse square root of the covariance matrix S.
    109 //
    110 // This class allows the user to evaluate the covariance for a
    111 // non-linear least squares problem and provides random access to its
    112 // blocks. The computation assumes that the CostFunctions compute
    113 // residuals such that their covariance is identity.
    114 //
    115 // Since the computation of the covariance matrix requires computing
    116 // the inverse of a potentially large matrix, this can involve a
    117 // rather large amount of time and memory. However, it is usually the
    118 // case that the user is only interested in a small part of the
    119 // covariance matrix. Quite often just the block diagonal. This class
    120 // allows the user to specify the parts of the covariance matrix that
    121 // she is interested in and then uses this information to only compute
    122 // and store those parts of the covariance matrix.
    123 //
    124 // Rank of the Jacobian
    125 // --------------------
    126 // As we noted above, if the jacobian is rank deficient, then the
    127 // inverse of J'J is not defined and instead a pseudo inverse needs to
    128 // be computed.
    129 //
    130 // The rank deficiency in J can be structural -- columns which are
    131 // always known to be zero or numerical -- depending on the exact
    132 // values in the Jacobian.
    133 //
    134 // Structural rank deficiency occurs when the problem contains
    135 // parameter blocks that are constant. This class correctly handles
    136 // structural rank deficiency like that.
    137 //
    138 // Numerical rank deficiency, where the rank of the matrix cannot be
    139 // predicted by its sparsity structure and requires looking at its
    140 // numerical values is more complicated. Here again there are two
    141 // cases.
    142 //
    143 //   a. The rank deficiency arises from overparameterization. e.g., a
    144 //   four dimensional quaternion used to parameterize SO(3), which is
    145 //   a three dimensional manifold. In cases like this, the user should
    146 //   use an appropriate LocalParameterization. Not only will this lead
    147 //   to better numerical behaviour of the Solver, it will also expose
    148 //   the rank deficiency to the Covariance object so that it can
    149 //   handle it correctly.
    150 //
    151 //   b. More general numerical rank deficiency in the Jacobian
    152 //   requires the computation of the so called Singular Value
    153 //   Decomposition (SVD) of J'J. We do not know how to do this for
    154 //   large sparse matrices efficiently. For small and moderate sized
    155 //   problems this is done using dense linear algebra.
    156 //
    157 // Gauge Invariance
    158 // ----------------
    159 // In structure from motion (3D reconstruction) problems, the
    160 // reconstruction is ambiguous upto a similarity transform. This is
    161 // known as a Gauge Ambiguity. Handling Gauges correctly requires the
    162 // use of SVD or custom inversion algorithms. For small problems the
    163 // user can use the dense algorithm. For more details see
    164 //
    165 // Ken-ichi Kanatani, Daniel D. Morris: Gauges and gauge
    166 // transformations for uncertainty description of geometric structure
    167 // with indeterminacy. IEEE Transactions on Information Theory 47(5):
    168 // 2017-2028 (2001)
    169 //
    170 // Example Usage
    171 // =============
    172 //
    173 //  double x[3];
    174 //  double y[2];
    175 //
    176 //  Problem problem;
    177 //  problem.AddParameterBlock(x, 3);
    178 //  problem.AddParameterBlock(y, 2);
    179 //  <Build Problem>
    180 //  <Solve Problem>
    181 //
    182 //  Covariance::Options options;
    183 //  Covariance covariance(options);
    184 //
    185 //  vector<pair<const double*, const double*> > covariance_blocks;
    186 //  covariance_blocks.push_back(make_pair(x, x));
    187 //  covariance_blocks.push_back(make_pair(y, y));
    188 //  covariance_blocks.push_back(make_pair(x, y));
    189 //
    190 //  CHECK(covariance.Compute(covariance_blocks, &problem));
    191 //
    192 //  double covariance_xx[3 * 3];
    193 //  double covariance_yy[2 * 2];
    194 //  double covariance_xy[3 * 2];
    195 //  covariance.GetCovarianceBlock(x, x, covariance_xx)
    196 //  covariance.GetCovarianceBlock(y, y, covariance_yy)
    197 //  covariance.GetCovarianceBlock(x, y, covariance_xy)
    198 //
    199 class Covariance {
    200  public:
    201   struct Options {
    202     Options()
    203 #ifndef CERES_NO_SUITESPARSE
    204         : algorithm_type(SPARSE_QR),
    205 #else
    206         : algorithm_type(DENSE_SVD),
    207 #endif
    208           min_reciprocal_condition_number(1e-14),
    209           null_space_rank(0),
    210           num_threads(1),
    211           apply_loss_function(true) {
    212     }
    213 
    214     // Ceres supports three different algorithms for covariance
    215     // estimation, which represent different tradeoffs in speed,
    216     // accuracy and reliability.
    217     //
    218     // 1. DENSE_SVD uses Eigen's JacobiSVD to perform the
    219     //    computations. It computes the singular value decomposition
    220     //
    221     //      U * S * V' = J
    222     //
    223     //    and then uses it to compute the pseudo inverse of J'J as
    224     //
    225     //      pseudoinverse[J'J]^ = V * pseudoinverse[S] * V'
    226     //
    227     //    It is an accurate but slow method and should only be used
    228     //    for small to moderate sized problems. It can handle
    229     //    full-rank as well as rank deficient Jacobians.
    230     //
    231     // 2. SPARSE_CHOLESKY uses the CHOLMOD sparse Cholesky
    232     //    factorization library to compute the decomposition :
    233     //
    234     //      R'R = J'J
    235     //
    236     //    and then
    237     //
    238     //      [J'J]^-1  = [R'R]^-1
    239     //
    240     //    It a fast algorithm for sparse matrices that should be used
    241     //    when the Jacobian matrix J is well conditioned. For
    242     //    ill-conditioned matrices, this algorithm can fail
    243     //    unpredictabily. This is because Cholesky factorization is
    244     //    not a rank-revealing factorization, i.e., it cannot reliably
    245     //    detect when the matrix being factorized is not of full
    246     //    rank. SuiteSparse/CHOLMOD supplies a heuristic for checking
    247     //    if the matrix is rank deficient (cholmod_rcond), but it is
    248     //    only a heuristic and can have both false positive and false
    249     //    negatives.
    250     //
    251     //    Recent versions of SuiteSparse (>= 4.2.0) provide a much
    252     //    more efficient method for solving for rows of the covariance
    253     //    matrix. Therefore, if you are doing SPARSE_CHOLESKY, we
    254     //    strongly recommend using a recent version of SuiteSparse.
    255     //
    256     // 3. SPARSE_QR uses the SuiteSparseQR sparse QR factorization
    257     //    library to compute the decomposition
    258     //
    259     //      Q * R = J
    260     //
    261     //    [J'J]^-1 = [R*R']^-1
    262     //
    263     //    It is a moderately fast algorithm for sparse matrices, which
    264     //    at the price of more time and memory than the
    265     //    SPARSE_CHOLESKY algorithm is numerically better behaved and
    266     //    is rank revealing, i.e., it can reliably detect when the
    267     //    Jacobian matrix is rank deficient.
    268     //
    269     // Neither SPARSE_CHOLESKY or SPARSE_QR are capable of computing
    270     // the covariance if the Jacobian is rank deficient.
    271 
    272     CovarianceAlgorithmType algorithm_type;
    273 
    274     // If the Jacobian matrix is near singular, then inverting J'J
    275     // will result in unreliable results, e.g, if
    276     //
    277     //   J = [1.0 1.0         ]
    278     //       [1.0 1.0000001   ]
    279     //
    280     // which is essentially a rank deficient matrix, we have
    281     //
    282     //   inv(J'J) = [ 2.0471e+14  -2.0471e+14]
    283     //              [-2.0471e+14   2.0471e+14]
    284     //
    285     // This is not a useful result. Therefore, by default
    286     // Covariance::Compute will return false if a rank deficient
    287     // Jacobian is encountered. How rank deficiency is detected
    288     // depends on the algorithm being used.
    289     //
    290     // 1. DENSE_SVD
    291     //
    292     //      min_sigma / max_sigma < sqrt(min_reciprocal_condition_number)
    293     //
    294     //    where min_sigma and max_sigma are the minimum and maxiumum
    295     //    singular values of J respectively.
    296     //
    297     // 2. SPARSE_CHOLESKY
    298     //
    299     //      cholmod_rcond < min_reciprocal_conditioner_number
    300     //
    301     //    Here cholmod_rcond is a crude estimate of the reciprocal
    302     //    condition number of J'J by using the maximum and minimum
    303     //    diagonal entries of the Cholesky factor R. There are no
    304     //    theoretical guarantees associated with this test. It can
    305     //    give false positives and negatives. Use at your own
    306     //    risk. The default value of min_reciprocal_condition_number
    307     //    has been set to a conservative value, and sometimes the
    308     //    Covariance::Compute may return false even if it is possible
    309     //    to estimate the covariance reliably. In such cases, the user
    310     //    should exercise their judgement before lowering the value of
    311     //    min_reciprocal_condition_number.
    312     //
    313     // 3. SPARSE_QR
    314     //
    315     //      rank(J) < num_col(J)
    316     //
    317     //   Here rank(J) is the estimate of the rank of J returned by the
    318     //   SuiteSparseQR algorithm. It is a fairly reliable indication
    319     //   of rank deficiency.
    320     //
    321     double min_reciprocal_condition_number;
    322 
    323     // When using DENSE_SVD, the user has more control in dealing with
    324     // singular and near singular covariance matrices.
    325     //
    326     // As mentioned above, when the covariance matrix is near
    327     // singular, instead of computing the inverse of J'J, the
    328     // Moore-Penrose pseudoinverse of J'J should be computed.
    329     //
    330     // If J'J has the eigen decomposition (lambda_i, e_i), where
    331     // lambda_i is the i^th eigenvalue and e_i is the corresponding
    332     // eigenvector, then the inverse of J'J is
    333     //
    334     //   inverse[J'J] = sum_i e_i e_i' / lambda_i
    335     //
    336     // and computing the pseudo inverse involves dropping terms from
    337     // this sum that correspond to small eigenvalues.
    338     //
    339     // How terms are dropped is controlled by
    340     // min_reciprocal_condition_number and null_space_rank.
    341     //
    342     // If null_space_rank is non-negative, then the smallest
    343     // null_space_rank eigenvalue/eigenvectors are dropped
    344     // irrespective of the magnitude of lambda_i. If the ratio of the
    345     // smallest non-zero eigenvalue to the largest eigenvalue in the
    346     // truncated matrix is still below
    347     // min_reciprocal_condition_number, then the Covariance::Compute()
    348     // will fail and return false.
    349     //
    350     // Setting null_space_rank = -1 drops all terms for which
    351     //
    352     //   lambda_i / lambda_max < min_reciprocal_condition_number.
    353     //
    354     // This option has no effect on the SPARSE_CHOLESKY or SPARSE_QR
    355     // algorithms.
    356     int null_space_rank;
    357 
    358     int num_threads;
    359 
    360     // Even though the residual blocks in the problem may contain loss
    361     // functions, setting apply_loss_function to false will turn off
    362     // the application of the loss function to the output of the cost
    363     // function and in turn its effect on the covariance.
    364     //
    365     // TODO(sameergaarwal): Expand this based on Jim's experiments.
    366     bool apply_loss_function;
    367   };
    368 
    369   explicit Covariance(const Options& options);
    370   ~Covariance();
    371 
    372   // Compute a part of the covariance matrix.
    373   //
    374   // The vector covariance_blocks, indexes into the covariance matrix
    375   // block-wise using pairs of parameter blocks. This allows the
    376   // covariance estimation algorithm to only compute and store these
    377   // blocks.
    378   //
    379   // Since the covariance matrix is symmetric, if the user passes
    380   // (block1, block2), then GetCovarianceBlock can be called with
    381   // block1, block2 as well as block2, block1.
    382   //
    383   // covariance_blocks cannot contain duplicates. Bad things will
    384   // happen if they do.
    385   //
    386   // Note that the list of covariance_blocks is only used to determine
    387   // what parts of the covariance matrix are computed. The full
    388   // Jacobian is used to do the computation, i.e. they do not have an
    389   // impact on what part of the Jacobian is used for computation.
    390   //
    391   // The return value indicates the success or failure of the
    392   // covariance computation. Please see the documentation for
    393   // Covariance::Options for more on the conditions under which this
    394   // function returns false.
    395   bool Compute(
    396       const vector<pair<const double*, const double*> >& covariance_blocks,
    397       Problem* problem);
    398 
    399   // Return the block of the covariance matrix corresponding to
    400   // parameter_block1 and parameter_block2.
    401   //
    402   // Compute must be called before the first call to
    403   // GetCovarianceBlock and the pair <parameter_block1,
    404   // parameter_block2> OR the pair <parameter_block2,
    405   // parameter_block1> must have been present in the vector
    406   // covariance_blocks when Compute was called. Otherwise
    407   // GetCovarianceBlock will return false.
    408   //
    409   // covariance_block must point to a memory location that can store a
    410   // parameter_block1_size x parameter_block2_size matrix. The
    411   // returned covariance will be a row-major matrix.
    412   bool GetCovarianceBlock(const double* parameter_block1,
    413                           const double* parameter_block2,
    414                           double* covariance_block) const;
    415 
    416  private:
    417   internal::scoped_ptr<internal::CovarianceImpl> impl_;
    418 };
    419 
    420 }  // namespace ceres
    421 
    422 #endif  // CERES_PUBLIC_COVARIANCE_H_
    423