Home | History | Annotate | Download | only in ceres
      1 // Ceres Solver - A fast non-linear least squares minimizer
      2 // Copyright 2010, 2011, 2012 Google Inc. All rights reserved.
      3 // http://code.google.com/p/ceres-solver/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are met:
      7 //
      8 // * Redistributions of source code must retain the above copyright notice,
      9 //   this list of conditions and the following disclaimer.
     10 // * Redistributions in binary form must reproduce the above copyright notice,
     11 //   this list of conditions and the following disclaimer in the documentation
     12 //   and/or other materials provided with the distribution.
     13 // * Neither the name of Google Inc. nor the names of its contributors may be
     14 //   used to endorse or promote products derived from this software without
     15 //   specific prior written permission.
     16 //
     17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27 // POSSIBILITY OF SUCH DAMAGE.
     28 //
     29 // Author: sameeragarwal (at) google.com (Sameer Agarwal)
     30 //
     31 // The LossFunction interface is the way users describe how residuals
     32 // are converted to cost terms for the overall problem cost function.
     33 // For the exact manner in which loss functions are converted to the
     34 // overall cost for a problem, see problem.h.
     35 //
     36 // For least squares problem where there are no outliers and standard
     37 // squared loss is expected, it is not necessary to create a loss
     38 // function; instead passing a NULL to the problem when adding
     39 // residuals implies a standard squared loss.
     40 //
     41 // For least squares problems where the minimization may encounter
     42 // input terms that contain outliers, that is, completely bogus
     43 // measurements, it is important to use a loss function that reduces
     44 // their associated penalty.
     45 //
     46 // Consider a structure from motion problem. The unknowns are 3D
     47 // points and camera parameters, and the measurements are image
     48 // coordinates describing the expected reprojected position for a
     49 // point in a camera. For example, we want to model the geometry of a
     50 // street scene with fire hydrants and cars, observed by a moving
     51 // camera with unknown parameters, and the only 3D points we care
     52 // about are the pointy tippy-tops of the fire hydrants. Our magic
     53 // image processing algorithm, which is responsible for producing the
     54 // measurements that are input to Ceres, has found and matched all
     55 // such tippy-tops in all image frames, except that in one of the
     56 // frame it mistook a car's headlight for a hydrant. If we didn't do
     57 // anything special (i.e. if we used a basic quadratic loss), the
     58 // residual for the erroneous measurement will result in extreme error
     59 // due to the quadratic nature of squared loss. This results in the
     60 // entire solution getting pulled away from the optimimum to reduce
     61 // the large error that would otherwise be attributed to the wrong
     62 // measurement.
     63 //
     64 // Using a robust loss function, the cost for large residuals is
     65 // reduced. In the example above, this leads to outlier terms getting
     66 // downweighted so they do not overly influence the final solution.
     67 //
     68 // What cost function is best?
     69 //
     70 // In general, there isn't a principled way to select a robust loss
     71 // function. The authors suggest starting with a non-robust cost, then
     72 // only experimenting with robust loss functions if standard squared
     73 // loss doesn't work.
     74 
     75 #ifndef CERES_PUBLIC_LOSS_FUNCTION_H_
     76 #define CERES_PUBLIC_LOSS_FUNCTION_H_
     77 
     78 #include "glog/logging.h"
     79 #include "ceres/internal/macros.h"
     80 #include "ceres/internal/scoped_ptr.h"
     81 #include "ceres/types.h"
     82 #include "ceres/internal/disable_warnings.h"
     83 
     84 namespace ceres {
     85 
     86 class CERES_EXPORT LossFunction {
     87  public:
     88   virtual ~LossFunction() {}
     89 
     90   // For a residual vector with squared 2-norm 'sq_norm', this method
     91   // is required to fill in the value and derivatives of the loss
     92   // function (rho in this example):
     93   //
     94   //   out[0] = rho(sq_norm),
     95   //   out[1] = rho'(sq_norm),
     96   //   out[2] = rho''(sq_norm),
     97   //
     98   // Here the convention is that the contribution of a term to the
     99   // cost function is given by 1/2 rho(s),  where
    100   //
    101   //   s = ||residuals||^2.
    102   //
    103   // Calling the method with a negative value of 's' is an error and
    104   // the implementations are not required to handle that case.
    105   //
    106   // Most sane choices of rho() satisfy:
    107   //
    108   //   rho(0) = 0,
    109   //   rho'(0) = 1,
    110   //   rho'(s) < 1 in outlier region,
    111   //   rho''(s) < 0 in outlier region,
    112   //
    113   // so that they mimic the least squares cost for small residuals.
    114   virtual void Evaluate(double sq_norm, double out[3]) const = 0;
    115 };
    116 
    117 // Some common implementations follow below.
    118 //
    119 // Note: in the region of interest (i.e. s < 3) we have:
    120 //   TrivialLoss >= HuberLoss >= SoftLOneLoss >= CauchyLoss
    121 
    122 
    123 // This corresponds to no robustification.
    124 //
    125 //   rho(s) = s
    126 //
    127 // At s = 0: rho = [0, 1, 0].
    128 //
    129 // It is not normally necessary to use this, as passing NULL for the
    130 // loss function when building the problem accomplishes the same
    131 // thing.
    132 class CERES_EXPORT TrivialLoss : public LossFunction {
    133  public:
    134   virtual void Evaluate(double, double*) const;
    135 };
    136 
    137 // Scaling
    138 // -------
    139 // Given one robustifier
    140 //   s -> rho(s)
    141 // one can change the length scale at which robustification takes
    142 // place, by adding a scale factor 'a' as follows:
    143 //
    144 //   s -> a^2 rho(s / a^2).
    145 //
    146 // The first and second derivatives are:
    147 //
    148 //   s -> rho'(s / a^2),
    149 //   s -> (1 / a^2) rho''(s / a^2),
    150 //
    151 // but the behaviour near s = 0 is the same as the original function,
    152 // i.e.
    153 //
    154 //   rho(s) = s + higher order terms,
    155 //   a^2 rho(s / a^2) = s + higher order terms.
    156 //
    157 // The scalar 'a' should be positive.
    158 //
    159 // The reason for the appearance of squaring is that 'a' is in the
    160 // units of the residual vector norm whereas 's' is a squared
    161 // norm. For applications it is more convenient to specify 'a' than
    162 // its square. The commonly used robustifiers below are described in
    163 // un-scaled format (a = 1) but their implementations work for any
    164 // non-zero value of 'a'.
    165 
    166 // Huber.
    167 //
    168 //   rho(s) = s               for s <= 1,
    169 //   rho(s) = 2 sqrt(s) - 1   for s >= 1.
    170 //
    171 // At s = 0: rho = [0, 1, 0].
    172 //
    173 // The scaling parameter 'a' corresponds to 'delta' on this page:
    174 //   http://en.wikipedia.org/wiki/Huber_Loss_Function
    175 class CERES_EXPORT HuberLoss : public LossFunction {
    176  public:
    177   explicit HuberLoss(double a) : a_(a), b_(a * a) { }
    178   virtual void Evaluate(double, double*) const;
    179 
    180  private:
    181   const double a_;
    182   // b = a^2.
    183   const double b_;
    184 };
    185 
    186 // Soft L1, similar to Huber but smooth.
    187 //
    188 //   rho(s) = 2 (sqrt(1 + s) - 1).
    189 //
    190 // At s = 0: rho = [0, 1, -1/2].
    191 class CERES_EXPORT SoftLOneLoss : public LossFunction {
    192  public:
    193   explicit SoftLOneLoss(double a) : b_(a * a), c_(1 / b_) { }
    194   virtual void Evaluate(double, double*) const;
    195 
    196  private:
    197   // b = a^2.
    198   const double b_;
    199   // c = 1 / a^2.
    200   const double c_;
    201 };
    202 
    203 // Inspired by the Cauchy distribution
    204 //
    205 //   rho(s) = log(1 + s).
    206 //
    207 // At s = 0: rho = [0, 1, -1].
    208 class CERES_EXPORT CauchyLoss : public LossFunction {
    209  public:
    210   explicit CauchyLoss(double a) : b_(a * a), c_(1 / b_) { }
    211   virtual void Evaluate(double, double*) const;
    212 
    213  private:
    214   // b = a^2.
    215   const double b_;
    216   // c = 1 / a^2.
    217   const double c_;
    218 };
    219 
    220 // Loss that is capped beyond a certain level using the arc-tangent function.
    221 // The scaling parameter 'a' determines the level where falloff occurs.
    222 // For costs much smaller than 'a', the loss function is linear and behaves like
    223 // TrivialLoss, and for values much larger than 'a' the value asymptotically
    224 // approaches the constant value of a * PI / 2.
    225 //
    226 //   rho(s) = a atan(s / a).
    227 //
    228 // At s = 0: rho = [0, 1, 0].
    229 class CERES_EXPORT ArctanLoss : public LossFunction {
    230  public:
    231   explicit ArctanLoss(double a) : a_(a), b_(1 / (a * a)) { }
    232   virtual void Evaluate(double, double*) const;
    233 
    234  private:
    235   const double a_;
    236   // b = 1 / a^2.
    237   const double b_;
    238 };
    239 
    240 // Loss function that maps to approximately zero cost in a range around the
    241 // origin, and reverts to linear in error (quadratic in cost) beyond this range.
    242 // The tolerance parameter 'a' sets the nominal point at which the
    243 // transition occurs, and the transition size parameter 'b' sets the nominal
    244 // distance over which most of the transition occurs. Both a and b must be
    245 // greater than zero, and typically b will be set to a fraction of a.
    246 // The slope rho'[s] varies smoothly from about 0 at s <= a - b to
    247 // about 1 at s >= a + b.
    248 //
    249 // The term is computed as:
    250 //
    251 //   rho(s) = b log(1 + exp((s - a) / b)) - c0.
    252 //
    253 // where c0 is chosen so that rho(0) == 0
    254 //
    255 //   c0 = b log(1 + exp(-a / b)
    256 //
    257 // This has the following useful properties:
    258 //
    259 //   rho(s) == 0               for s = 0
    260 //   rho'(s) ~= 0              for s << a - b
    261 //   rho'(s) ~= 1              for s >> a + b
    262 //   rho''(s) > 0              for all s
    263 //
    264 // In addition, all derivatives are continuous, and the curvature is
    265 // concentrated in the range a - b to a + b.
    266 //
    267 // At s = 0: rho = [0, ~0, ~0].
    268 class CERES_EXPORT TolerantLoss : public LossFunction {
    269  public:
    270   explicit TolerantLoss(double a, double b);
    271   virtual void Evaluate(double, double*) const;
    272 
    273  private:
    274   const double a_, b_, c_;
    275 };
    276 
    277 // Composition of two loss functions.  The error is the result of first
    278 // evaluating g followed by f to yield the composition f(g(s)).
    279 // The loss functions must not be NULL.
    280 class ComposedLoss : public LossFunction {
    281  public:
    282   explicit ComposedLoss(const LossFunction* f, Ownership ownership_f,
    283                         const LossFunction* g, Ownership ownership_g);
    284   virtual ~ComposedLoss();
    285   virtual void Evaluate(double, double*) const;
    286 
    287  private:
    288   internal::scoped_ptr<const LossFunction> f_, g_;
    289   const Ownership ownership_f_, ownership_g_;
    290 };
    291 
    292 // The discussion above has to do with length scaling: it affects the space
    293 // in which s is measured. Sometimes you want to simply scale the output
    294 // value of the robustifier. For example, you might want to weight
    295 // different error terms differently (e.g., weight pixel reprojection
    296 // errors differently from terrain errors).
    297 //
    298 // If rho is the wrapped robustifier, then this simply outputs
    299 // s -> a * rho(s)
    300 //
    301 // The first and second derivatives are, not surprisingly
    302 // s -> a * rho'(s)
    303 // s -> a * rho''(s)
    304 //
    305 // Since we treat the a NULL Loss function as the Identity loss
    306 // function, rho = NULL is a valid input and will result in the input
    307 // being scaled by a. This provides a simple way of implementing a
    308 // scaled ResidualBlock.
    309 class CERES_EXPORT ScaledLoss : public LossFunction {
    310  public:
    311   // Constructs a ScaledLoss wrapping another loss function. Takes
    312   // ownership of the wrapped loss function or not depending on the
    313   // ownership parameter.
    314   ScaledLoss(const LossFunction* rho, double a, Ownership ownership) :
    315       rho_(rho), a_(a), ownership_(ownership) { }
    316 
    317   virtual ~ScaledLoss() {
    318     if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
    319       rho_.release();
    320     }
    321   }
    322   virtual void Evaluate(double, double*) const;
    323 
    324  private:
    325   internal::scoped_ptr<const LossFunction> rho_;
    326   const double a_;
    327   const Ownership ownership_;
    328   CERES_DISALLOW_COPY_AND_ASSIGN(ScaledLoss);
    329 };
    330 
    331 // Sometimes after the optimization problem has been constructed, we
    332 // wish to mutate the scale of the loss function. For example, when
    333 // performing estimation from data which has substantial outliers,
    334 // convergence can be improved by starting out with a large scale,
    335 // optimizing the problem and then reducing the scale. This can have
    336 // better convergence behaviour than just using a loss function with a
    337 // small scale.
    338 //
    339 // This templated class allows the user to implement a loss function
    340 // whose scale can be mutated after an optimization problem has been
    341 // constructed.
    342 //
    343 // Example usage
    344 //
    345 //  Problem problem;
    346 //
    347 //  // Add parameter blocks
    348 //
    349 //  CostFunction* cost_function =
    350 //    new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>(
    351 //      new UW_Camera_Mapper(feature_x, feature_y));
    352 //
    353 //  LossFunctionWrapper* loss_function(new HuberLoss(1.0), TAKE_OWNERSHIP);
    354 //
    355 //  problem.AddResidualBlock(cost_function, loss_function, parameters);
    356 //
    357 //  Solver::Options options;
    358 //  Solger::Summary summary;
    359 //
    360 //  Solve(options, &problem, &summary)
    361 //
    362 //  loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP);
    363 //
    364 //  Solve(options, &problem, &summary)
    365 //
    366 class CERES_EXPORT LossFunctionWrapper : public LossFunction {
    367  public:
    368   LossFunctionWrapper(LossFunction* rho, Ownership ownership)
    369       : rho_(rho), ownership_(ownership) {
    370   }
    371 
    372   virtual ~LossFunctionWrapper() {
    373     if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
    374       rho_.release();
    375     }
    376   }
    377 
    378   virtual void Evaluate(double sq_norm, double out[3]) const {
    379     CHECK_NOTNULL(rho_.get());
    380     rho_->Evaluate(sq_norm, out);
    381   }
    382 
    383   void Reset(LossFunction* rho, Ownership ownership) {
    384     if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
    385       rho_.release();
    386     }
    387     rho_.reset(rho);
    388     ownership_ = ownership;
    389   }
    390 
    391  private:
    392   internal::scoped_ptr<const LossFunction> rho_;
    393   Ownership ownership_;
    394   CERES_DISALLOW_COPY_AND_ASSIGN(LossFunctionWrapper);
    395 };
    396 
    397 }  // namespace ceres
    398 
    399 #include "ceres/internal/disable_warnings.h"
    400 
    401 #endif  // CERES_PUBLIC_LOSS_FUNCTION_H_
    402