Home | History | Annotate | Download | only in ceres
      1 // Ceres Solver - A fast non-linear least squares minimizer
      2 // Copyright 2010, 2011, 2012 Google Inc. All rights reserved.
      3 // http://code.google.com/p/ceres-solver/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are met:
      7 //
      8 // * Redistributions of source code must retain the above copyright notice,
      9 //   this list of conditions and the following disclaimer.
     10 // * Redistributions in binary form must reproduce the above copyright notice,
     11 //   this list of conditions and the following disclaimer in the documentation
     12 //   and/or other materials provided with the distribution.
     13 // * Neither the name of Google Inc. nor the names of its contributors may be
     14 //   used to endorse or promote products derived from this software without
     15 //   specific prior written permission.
     16 //
     17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27 // POSSIBILITY OF SUCH DAMAGE.
     28 //
     29 // Author: sameeragarwal (at) google.com (Sameer Agarwal)
     30 
     31 #ifndef CERES_INTERNAL_SCHUR_ELIMINATOR_H_
     32 #define CERES_INTERNAL_SCHUR_ELIMINATOR_H_
     33 
     34 #include <map>
     35 #include <vector>
     36 #include "ceres/mutex.h"
     37 #include "ceres/block_random_access_matrix.h"
     38 #include "ceres/block_sparse_matrix.h"
     39 #include "ceres/block_structure.h"
     40 #include "ceres/linear_solver.h"
     41 #include "ceres/internal/eigen.h"
     42 #include "ceres/internal/scoped_ptr.h"
     43 
     44 namespace ceres {
     45 namespace internal {
     46 
     47 // Classes implementing the SchurEliminatorBase interface implement
     48 // variable elimination for linear least squares problems. Assuming
     49 // that the input linear system Ax = b can be partitioned into
     50 //
     51 //  E y + F z = b
     52 //
     53 // Where x = [y;z] is a partition of the variables.  The paritioning
     54 // of the variables is such that, E'E is a block diagonal matrix. Or
     55 // in other words, the parameter blocks in E form an independent set
     56 // of the of the graph implied by the block matrix A'A. Then, this
     57 // class provides the functionality to compute the Schur complement
     58 // system
     59 //
     60 //   S z = r
     61 //
     62 // where
     63 //
     64 //   S = F'F - F'E (E'E)^{-1} E'F and r = F'b - F'E(E'E)^(-1) E'b
     65 //
     66 // This is the Eliminate operation, i.e., construct the linear system
     67 // obtained by eliminating the variables in E.
     68 //
     69 // The eliminator also provides the reverse functionality, i.e. given
     70 // values for z it can back substitute for the values of y, by solving the
     71 // linear system
     72 //
     73 //  Ey = b - F z
     74 //
     75 // which is done by observing that
     76 //
     77 //  y = (E'E)^(-1) [E'b - E'F z]
     78 //
     79 // The eliminator has a number of requirements.
     80 //
     81 // The rows of A are ordered so that for every variable block in y,
     82 // all the rows containing that variable block occur as a vertically
     83 // contiguous block. i.e the matrix A looks like
     84 //
     85 //              E                 F                   chunk
     86 //  A = [ y1   0   0   0 |  z1    0    0   0    z5]     1
     87 //      [ y1   0   0   0 |  z1   z2    0   0     0]     1
     88 //      [  0  y2   0   0 |   0    0   z3   0     0]     2
     89 //      [  0   0  y3   0 |  z1   z2   z3  z4    z5]     3
     90 //      [  0   0  y3   0 |  z1    0    0   0    z5]     3
     91 //      [  0   0   0  y4 |   0    0    0   0    z5]     4
     92 //      [  0   0   0  y4 |   0   z2    0   0     0]     4
     93 //      [  0   0   0  y4 |   0    0    0   0     0]     4
     94 //      [  0   0   0   0 |  z1    0    0   0     0] non chunk blocks
     95 //      [  0   0   0   0 |   0    0   z3  z4    z5] non chunk blocks
     96 //
     97 // This structure should be reflected in the corresponding
     98 // CompressedRowBlockStructure object associated with A. The linear
     99 // system Ax = b should either be well posed or the array D below
    100 // should be non-null and the diagonal matrix corresponding to it
    101 // should be non-singular. For simplicity of exposition only the case
    102 // with a null D is described.
    103 //
    104 // The usual way to do the elimination is as follows. Starting with
    105 //
    106 //  E y + F z = b
    107 //
    108 // we can form the normal equations,
    109 //
    110 //  E'E y + E'F z = E'b
    111 //  F'E y + F'F z = F'b
    112 //
    113 // multiplying both sides of the first equation by (E'E)^(-1) and then
    114 // by F'E we get
    115 //
    116 //  F'E y + F'E (E'E)^(-1) E'F z =  F'E (E'E)^(-1) E'b
    117 //  F'E y +                F'F z =  F'b
    118 //
    119 // now subtracting the two equations we get
    120 //
    121 // [FF' - F'E (E'E)^(-1) E'F] z = F'b - F'E(E'E)^(-1) E'b
    122 //
    123 // Instead of forming the normal equations and operating on them as
    124 // general sparse matrices, the algorithm here deals with one
    125 // parameter block in y at a time. The rows corresponding to a single
    126 // parameter block yi are known as a chunk, and the algorithm operates
    127 // on one chunk at a time. The mathematics remains the same since the
    128 // reduced linear system can be shown to be the sum of the reduced
    129 // linear systems for each chunk. This can be seen by observing two
    130 // things.
    131 //
    132 //  1. E'E is a block diagonal matrix.
    133 //
    134 //  2. When E'F is computed, only the terms within a single chunk
    135 //  interact, i.e for y1 column blocks when transposed and multiplied
    136 //  with F, the only non-zero contribution comes from the blocks in
    137 //  chunk1.
    138 //
    139 // Thus, the reduced linear system
    140 //
    141 //  FF' - F'E (E'E)^(-1) E'F
    142 //
    143 // can be re-written as
    144 //
    145 //  sum_k F_k F_k' - F_k'E_k (E_k'E_k)^(-1) E_k' F_k
    146 //
    147 // Where the sum is over chunks and E_k'E_k is dense matrix of size y1
    148 // x y1.
    149 //
    150 // Advanced usage. Uptil now it has been assumed that the user would
    151 // be interested in all of the Schur Complement S. However, it is also
    152 // possible to use this eliminator to obtain an arbitrary submatrix of
    153 // the full Schur complement. When the eliminator is generating the
    154 // blocks of S, it asks the RandomAccessBlockMatrix instance passed to
    155 // it if it has storage for that block. If it does, the eliminator
    156 // computes/updates it, if not it is skipped. This is useful when one
    157 // is interested in constructing a preconditioner based on the Schur
    158 // Complement, e.g., computing the block diagonal of S so that it can
    159 // be used as a preconditioner for an Iterative Substructuring based
    160 // solver [See Agarwal et al, Bundle Adjustment in the Large, ECCV
    161 // 2008 for an example of such use].
    162 //
    163 // Example usage: Please see schur_complement_solver.cc
    164 class SchurEliminatorBase {
    165  public:
    166   virtual ~SchurEliminatorBase() {}
    167 
    168   // Initialize the eliminator. It is the user's responsibilty to call
    169   // this function before calling Eliminate or BackSubstitute. It is
    170   // also the caller's responsibilty to ensure that the
    171   // CompressedRowBlockStructure object passed to this method is the
    172   // same one (or is equivalent to) the one associated with the
    173   // BlockSparseMatrix objects below.
    174   virtual void Init(int num_eliminate_blocks,
    175                     const CompressedRowBlockStructure* bs) = 0;
    176 
    177   // Compute the Schur complement system from the augmented linear
    178   // least squares problem [A;D] x = [b;0]. The left hand side and the
    179   // right hand side of the reduced linear system are returned in lhs
    180   // and rhs respectively.
    181   //
    182   // It is the caller's responsibility to construct and initialize
    183   // lhs. Depending upon the structure of the lhs object passed here,
    184   // the full or a submatrix of the Schur complement will be computed.
    185   //
    186   // Since the Schur complement is a symmetric matrix, only the upper
    187   // triangular part of the Schur complement is computed.
    188   virtual void Eliminate(const BlockSparseMatrix* A,
    189                          const double* b,
    190                          const double* D,
    191                          BlockRandomAccessMatrix* lhs,
    192                          double* rhs) = 0;
    193 
    194   // Given values for the variables z in the F block of A, solve for
    195   // the optimal values of the variables y corresponding to the E
    196   // block in A.
    197   virtual void BackSubstitute(const BlockSparseMatrix* A,
    198                               const double* b,
    199                               const double* D,
    200                               const double* z,
    201                               double* y) = 0;
    202   // Factory
    203   static SchurEliminatorBase* Create(const LinearSolver::Options& options);
    204 };
    205 
    206 // Templated implementation of the SchurEliminatorBase interface. The
    207 // templating is on the sizes of the row, e and f blocks sizes in the
    208 // input matrix. In many problems, the sizes of one or more of these
    209 // blocks are constant, in that case, its worth passing these
    210 // parameters as template arguments so that they are visible to the
    211 // compiler and can be used for compile time optimization of the low
    212 // level linear algebra routines.
    213 //
    214 // This implementation is mulithreaded using OpenMP. The level of
    215 // parallelism is controlled by LinearSolver::Options::num_threads.
    216 template <int kRowBlockSize = Eigen::Dynamic,
    217           int kEBlockSize = Eigen::Dynamic,
    218           int kFBlockSize = Eigen::Dynamic >
    219 class SchurEliminator : public SchurEliminatorBase {
    220  public:
    221   explicit SchurEliminator(const LinearSolver::Options& options)
    222       : num_threads_(options.num_threads) {
    223   }
    224 
    225   // SchurEliminatorBase Interface
    226   virtual ~SchurEliminator();
    227   virtual void Init(int num_eliminate_blocks,
    228                     const CompressedRowBlockStructure* bs);
    229   virtual void Eliminate(const BlockSparseMatrix* A,
    230                          const double* b,
    231                          const double* D,
    232                          BlockRandomAccessMatrix* lhs,
    233                          double* rhs);
    234   virtual void BackSubstitute(const BlockSparseMatrix* A,
    235                               const double* b,
    236                               const double* D,
    237                               const double* z,
    238                               double* y);
    239 
    240  private:
    241   // Chunk objects store combinatorial information needed to
    242   // efficiently eliminate a whole chunk out of the least squares
    243   // problem. Consider the first chunk in the example matrix above.
    244   //
    245   //      [ y1   0   0   0 |  z1    0    0   0    z5]
    246   //      [ y1   0   0   0 |  z1   z2    0   0     0]
    247   //
    248   // One of the intermediate quantities that needs to be calculated is
    249   // for each row the product of the y block transposed with the
    250   // non-zero z block, and the sum of these blocks across rows. A
    251   // temporary array "buffer_" is used for computing and storing them
    252   // and the buffer_layout maps the indices of the z-blocks to
    253   // position in the buffer_ array.  The size of the chunk is the
    254   // number of row blocks/residual blocks for the particular y block
    255   // being considered.
    256   //
    257   // For the example chunk shown above,
    258   //
    259   // size = 2
    260   //
    261   // The entries of buffer_layout will be filled in the following order.
    262   //
    263   // buffer_layout[z1] = 0
    264   // buffer_layout[z5] = y1 * z1
    265   // buffer_layout[z2] = y1 * z1 + y1 * z5
    266   typedef map<int, int> BufferLayoutType;
    267   struct Chunk {
    268     Chunk() : size(0) {}
    269     int size;
    270     int start;
    271     BufferLayoutType buffer_layout;
    272   };
    273 
    274   void ChunkDiagonalBlockAndGradient(
    275       const Chunk& chunk,
    276       const BlockSparseMatrix* A,
    277       const double* b,
    278       int row_block_counter,
    279       typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix* eet,
    280       double* g,
    281       double* buffer,
    282       BlockRandomAccessMatrix* lhs);
    283 
    284   void UpdateRhs(const Chunk& chunk,
    285                  const BlockSparseMatrix* A,
    286                  const double* b,
    287                  int row_block_counter,
    288                  const double* inverse_ete_g,
    289                  double* rhs);
    290 
    291   void ChunkOuterProduct(const CompressedRowBlockStructure* bs,
    292                          const Matrix& inverse_eet,
    293                          const double* buffer,
    294                          const BufferLayoutType& buffer_layout,
    295                          BlockRandomAccessMatrix* lhs);
    296   void EBlockRowOuterProduct(const BlockSparseMatrix* A,
    297                              int row_block_index,
    298                              BlockRandomAccessMatrix* lhs);
    299 
    300 
    301   void NoEBlockRowsUpdate(const BlockSparseMatrix* A,
    302                              const double* b,
    303                              int row_block_counter,
    304                              BlockRandomAccessMatrix* lhs,
    305                              double* rhs);
    306 
    307   void NoEBlockRowOuterProduct(const BlockSparseMatrix* A,
    308                                int row_block_index,
    309                                BlockRandomAccessMatrix* lhs);
    310 
    311   int num_eliminate_blocks_;
    312 
    313   // Block layout of the columns of the reduced linear system. Since
    314   // the f blocks can be of varying size, this vector stores the
    315   // position of each f block in the row/col of the reduced linear
    316   // system. Thus lhs_row_layout_[i] is the row/col position of the
    317   // i^th f block.
    318   vector<int> lhs_row_layout_;
    319 
    320   // Combinatorial structure of the chunks in A. For more information
    321   // see the documentation of the Chunk object above.
    322   vector<Chunk> chunks_;
    323 
    324   // TODO(sameeragarwal): The following two arrays contain per-thread
    325   // storage. They should be refactored into a per thread struct.
    326 
    327   // Buffer to store the products of the y and z blocks generated
    328   // during the elimination phase. buffer_ is of size num_threads *
    329   // buffer_size_. Each thread accesses the chunk
    330   //
    331   //   [thread_id * buffer_size_ , (thread_id + 1) * buffer_size_]
    332   //
    333   scoped_array<double> buffer_;
    334 
    335   // Buffer to store per thread matrix matrix products used by
    336   // ChunkOuterProduct. Like buffer_ it is of size num_threads *
    337   // buffer_size_. Each thread accesses the chunk
    338   //
    339   //   [thread_id * buffer_size_ , (thread_id + 1) * buffer_size_ -1]
    340   //
    341   scoped_array<double> chunk_outer_product_buffer_;
    342 
    343   int buffer_size_;
    344   int num_threads_;
    345   int uneliminated_row_begins_;
    346 
    347   // Locks for the blocks in the right hand side of the reduced linear
    348   // system.
    349   vector<Mutex*> rhs_locks_;
    350 };
    351 
    352 }  // namespace internal
    353 }  // namespace ceres
    354 
    355 #endif  // CERES_INTERNAL_SCHUR_ELIMINATOR_H_
    356