1 // Ceres Solver - A fast non-linear least squares minimizer 2 // Copyright 2010, 2011, 2012 Google Inc. All rights reserved. 3 // http://code.google.com/p/ceres-solver/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are met: 7 // 8 // * Redistributions of source code must retain the above copyright notice, 9 // this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above copyright notice, 11 // this list of conditions and the following disclaimer in the documentation 12 // and/or other materials provided with the distribution. 13 // * Neither the name of Google Inc. nor the names of its contributors may be 14 // used to endorse or promote products derived from this software without 15 // specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 // POSSIBILITY OF SUCH DAMAGE. 28 // 29 // Author: sameeragarwal (at) google.com (Sameer Agarwal) 30 // 31 // TODO(sameeragarwal): row_block_counter can perhaps be replaced by 32 // Chunk::start ? 33 34 #ifndef CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_ 35 #define CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_ 36 37 #ifdef CERES_USE_OPENMP 38 #include <omp.h> 39 #endif 40 41 // Eigen has an internal threshold switching between different matrix 42 // multiplication algorithms. In particular for matrices larger than 43 // EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD it uses a cache friendly 44 // matrix matrix product algorithm that has a higher setup cost. For 45 // matrix sizes close to this threshold, especially when the matrices 46 // are thin and long, the default choice may not be optimal. This is 47 // the case for us, as the default choice causes a 30% performance 48 // regression when we moved from Eigen2 to Eigen3. 49 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 10 50 51 #include <algorithm> 52 #include <map> 53 #include <glog/logging.h> 54 #include "Eigen/Dense" 55 #include "ceres/block_random_access_matrix.h" 56 #include "ceres/block_sparse_matrix.h" 57 #include "ceres/block_structure.h" 58 #include "ceres/map_util.h" 59 #include "ceres/schur_eliminator.h" 60 #include "ceres/stl_util.h" 61 #include "ceres/internal/eigen.h" 62 #include "ceres/internal/scoped_ptr.h" 63 64 namespace ceres { 65 namespace internal { 66 67 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 68 SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::~SchurEliminator() { 69 STLDeleteElements(&rhs_locks_); 70 } 71 72 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 73 void 74 SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>:: 75 Init(int num_eliminate_blocks, const CompressedRowBlockStructure* bs) { 76 CHECK_GT(num_eliminate_blocks, 0) 77 << "SchurComplementSolver cannot be initialized with " 78 << "num_eliminate_blocks = 0."; 79 80 num_eliminate_blocks_ = num_eliminate_blocks; 81 82 const int num_col_blocks = bs->cols.size(); 83 const int num_row_blocks = bs->rows.size(); 84 85 buffer_size_ = 1; 86 chunks_.clear(); 87 lhs_row_layout_.clear(); 88 89 int lhs_num_rows = 0; 90 // Add a map object for each block in the reduced linear system 91 // and build the row/column block structure of the reduced linear 92 // system. 93 lhs_row_layout_.resize(num_col_blocks - num_eliminate_blocks_); 94 for (int i = num_eliminate_blocks_; i < num_col_blocks; ++i) { 95 lhs_row_layout_[i - num_eliminate_blocks_] = lhs_num_rows; 96 lhs_num_rows += bs->cols[i].size; 97 } 98 99 int r = 0; 100 // Iterate over the row blocks of A, and detect the chunks. The 101 // matrix should already have been ordered so that all rows 102 // containing the same y block are vertically contiguous. Along 103 // the way also compute the amount of space each chunk will need 104 // to perform the elimination. 105 while (r < num_row_blocks) { 106 const int chunk_block_id = bs->rows[r].cells.front().block_id; 107 if (chunk_block_id >= num_eliminate_blocks_) { 108 break; 109 } 110 111 chunks_.push_back(Chunk()); 112 Chunk& chunk = chunks_.back(); 113 chunk.size = 0; 114 chunk.start = r; 115 int buffer_size = 0; 116 const int e_block_size = bs->cols[chunk_block_id].size; 117 118 // Add to the chunk until the first block in the row is 119 // different than the one in the first row for the chunk. 120 while (r + chunk.size < num_row_blocks) { 121 const CompressedRow& row = bs->rows[r + chunk.size]; 122 if (row.cells.front().block_id != chunk_block_id) { 123 break; 124 } 125 126 // Iterate over the blocks in the row, ignoring the first 127 // block since it is the one to be eliminated. 128 for (int c = 1; c < row.cells.size(); ++c) { 129 const Cell& cell = row.cells[c]; 130 if (InsertIfNotPresent( 131 &(chunk.buffer_layout), cell.block_id, buffer_size)) { 132 buffer_size += e_block_size * bs->cols[cell.block_id].size; 133 } 134 } 135 136 buffer_size_ = max(buffer_size, buffer_size_); 137 ++chunk.size; 138 } 139 140 CHECK_GT(chunk.size, 0); 141 r += chunk.size; 142 } 143 const Chunk& chunk = chunks_.back(); 144 145 uneliminated_row_begins_ = chunk.start + chunk.size; 146 if (num_threads_ > 1) { 147 random_shuffle(chunks_.begin(), chunks_.end()); 148 } 149 150 buffer_.reset(new double[buffer_size_ * num_threads_]); 151 152 STLDeleteElements(&rhs_locks_); 153 rhs_locks_.resize(num_col_blocks - num_eliminate_blocks_); 154 for (int i = 0; i < num_col_blocks - num_eliminate_blocks_; ++i) { 155 rhs_locks_[i] = new Mutex; 156 } 157 158 VLOG(1) << "Eliminator threads: " << num_threads_; 159 } 160 161 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 162 void 163 SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>:: 164 Eliminate(const BlockSparseMatrixBase* A, 165 const double* b, 166 const double* D, 167 BlockRandomAccessMatrix* lhs, 168 double* rhs) { 169 if (lhs->num_rows() > 0) { 170 lhs->SetZero(); 171 VectorRef(rhs, lhs->num_rows()).setZero(); 172 } 173 174 const CompressedRowBlockStructure* bs = A->block_structure(); 175 const int num_col_blocks = bs->cols.size(); 176 177 // Add the diagonal to the schur complement. 178 if (D != NULL) { 179 #pragma omp parallel for num_threads(num_threads_) schedule(dynamic) 180 for (int i = num_eliminate_blocks_; i < num_col_blocks; ++i) { 181 const int block_id = i - num_eliminate_blocks_; 182 int r, c, row_stride, col_stride; 183 CellInfo* cell_info = lhs->GetCell(block_id, block_id, 184 &r, &c, 185 &row_stride, &col_stride); 186 if (cell_info != NULL) { 187 const int block_size = bs->cols[i].size; 188 typename EigenTypes<kFBlockSize>::ConstVectorRef 189 diag(D + bs->cols[i].position, block_size); 190 191 CeresMutexLock l(&cell_info->m); 192 MatrixRef m(cell_info->values, row_stride, col_stride); 193 m.block(r, c, block_size, block_size).diagonal() 194 += diag.array().square().matrix(); 195 } 196 } 197 } 198 199 // Eliminate y blocks one chunk at a time. For each chunk,x3 200 // compute the entries of the normal equations and the gradient 201 // vector block corresponding to the y block and then apply 202 // Gaussian elimination to them. The matrix ete stores the normal 203 // matrix corresponding to the block being eliminated and array 204 // buffer_ contains the non-zero blocks in the row corresponding 205 // to this y block in the normal equations. This computation is 206 // done in ChunkDiagonalBlockAndGradient. UpdateRhs then applies 207 // gaussian elimination to the rhs of the normal equations, 208 // updating the rhs of the reduced linear system by modifying rhs 209 // blocks for all the z blocks that share a row block/residual 210 // term with the y block. EliminateRowOuterProduct does the 211 // corresponding operation for the lhs of the reduced linear 212 // system. 213 #pragma omp parallel for num_threads(num_threads_) schedule(dynamic) 214 for (int i = 0; i < chunks_.size(); ++i) { 215 #ifdef CERES_USE_OPENMP 216 int thread_id = omp_get_thread_num(); 217 #else 218 int thread_id = 0; 219 #endif 220 double* buffer = buffer_.get() + thread_id * buffer_size_; 221 const Chunk& chunk = chunks_[i]; 222 const int e_block_id = bs->rows[chunk.start].cells.front().block_id; 223 const int e_block_size = bs->cols[e_block_id].size; 224 225 VectorRef(buffer, buffer_size_).setZero(); 226 227 typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix 228 ete(e_block_size, e_block_size); 229 230 if (D != NULL) { 231 const typename EigenTypes<kEBlockSize>::ConstVectorRef 232 diag(D + bs->cols[e_block_id].position, e_block_size); 233 ete = diag.array().square().matrix().asDiagonal(); 234 } else { 235 ete.setZero(); 236 } 237 238 typename EigenTypes<kEBlockSize>::Vector g(e_block_size); 239 g.setZero(); 240 241 // We are going to be computing 242 // 243 // S += F'F - F'E(E'E)^{-1}E'F 244 // 245 // for each Chunk. The computation is broken down into a number of 246 // function calls as below. 247 248 // Compute the outer product of the e_blocks with themselves (ete 249 // = E'E). Compute the product of the e_blocks with the 250 // corresonding f_blocks (buffer = E'F), the gradient of the terms 251 // in this chunk (g) and add the outer product of the f_blocks to 252 // Schur complement (S += F'F). 253 ChunkDiagonalBlockAndGradient( 254 chunk, A, b, chunk.start, &ete, &g, buffer, lhs); 255 256 // Normally one wouldn't compute the inverse explicitly, but 257 // e_block_size will typically be a small number like 3, in 258 // which case its much faster to compute the inverse once and 259 // use it to multiply other matrices/vectors instead of doing a 260 // Solve call over and over again. 261 typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix inverse_ete = 262 ete 263 .template selfadjointView<Eigen::Upper>() 264 .ldlt() 265 .solve(Matrix::Identity(e_block_size, e_block_size)); 266 267 // For the current chunk compute and update the rhs of the reduced 268 // linear system. 269 // 270 // rhs = F'b - F'E(E'E)^(-1) E'b 271 UpdateRhs(chunk, A, b, chunk.start, inverse_ete * g, rhs); 272 273 // S -= F'E(E'E)^{-1}E'F 274 ChunkOuterProduct(bs, inverse_ete, buffer, chunk.buffer_layout, lhs); 275 } 276 277 // For rows with no e_blocks, the schur complement update reduces to 278 // S += F'F. 279 NoEBlockRowsUpdate(A, b, uneliminated_row_begins_, lhs, rhs); 280 } 281 282 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 283 void 284 SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>:: 285 BackSubstitute(const BlockSparseMatrixBase* A, 286 const double* b, 287 const double* D, 288 const double* z, 289 double* y) { 290 const CompressedRowBlockStructure* bs = A->block_structure(); 291 #pragma omp parallel for num_threads(num_threads_) schedule(dynamic) 292 for (int i = 0; i < chunks_.size(); ++i) { 293 const Chunk& chunk = chunks_[i]; 294 const int e_block_id = bs->rows[chunk.start].cells.front().block_id; 295 const int e_block_size = bs->cols[e_block_id].size; 296 297 typename EigenTypes<kEBlockSize>::VectorRef y_block( 298 y + bs->cols[e_block_id].position, e_block_size); 299 300 typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix 301 ete(e_block_size, e_block_size); 302 if (D != NULL) { 303 const typename EigenTypes<kEBlockSize>::ConstVectorRef 304 diag(D + bs->cols[e_block_id].position, e_block_size); 305 ete = diag.array().square().matrix().asDiagonal(); 306 } else { 307 ete.setZero(); 308 } 309 310 for (int j = 0; j < chunk.size; ++j) { 311 const CompressedRow& row = bs->rows[chunk.start + j]; 312 const double* row_values = A->RowBlockValues(chunk.start + j); 313 const Cell& e_cell = row.cells.front(); 314 DCHECK_EQ(e_block_id, e_cell.block_id); 315 const typename EigenTypes<kRowBlockSize, kEBlockSize>::ConstMatrixRef 316 e_block(row_values + e_cell.position, 317 row.block.size, 318 e_block_size); 319 320 typename EigenTypes<kRowBlockSize>::Vector 321 sj = 322 typename EigenTypes<kRowBlockSize>::ConstVectorRef 323 (b + bs->rows[chunk.start + j].block.position, 324 row.block.size); 325 326 for (int c = 1; c < row.cells.size(); ++c) { 327 const int f_block_id = row.cells[c].block_id; 328 const int f_block_size = bs->cols[f_block_id].size; 329 const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef 330 f_block(row_values + row.cells[c].position, 331 row.block.size, f_block_size); 332 const int r_block = f_block_id - num_eliminate_blocks_; 333 334 sj -= f_block * 335 typename EigenTypes<kFBlockSize>::ConstVectorRef 336 (z + lhs_row_layout_[r_block], f_block_size); 337 } 338 339 y_block += e_block.transpose() * sj; 340 ete.template selfadjointView<Eigen::Upper>() 341 .rankUpdate(e_block.transpose(), 1.0); 342 } 343 344 y_block = 345 ete 346 .template selfadjointView<Eigen::Upper>() 347 .ldlt() 348 .solve(y_block); 349 } 350 } 351 352 // Update the rhs of the reduced linear system. Compute 353 // 354 // F'b - F'E(E'E)^(-1) E'b 355 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 356 void 357 SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>:: 358 UpdateRhs(const Chunk& chunk, 359 const BlockSparseMatrixBase* A, 360 const double* b, 361 int row_block_counter, 362 const Vector& inverse_ete_g, 363 double* rhs) { 364 const CompressedRowBlockStructure* bs = A->block_structure(); 365 const int e_block_size = inverse_ete_g.rows(); 366 int b_pos = bs->rows[row_block_counter].block.position; 367 for (int j = 0; j < chunk.size; ++j) { 368 const CompressedRow& row = bs->rows[row_block_counter + j]; 369 const double *row_values = A->RowBlockValues(row_block_counter + j); 370 const Cell& e_cell = row.cells.front(); 371 372 const typename EigenTypes<kRowBlockSize, kEBlockSize>::ConstMatrixRef 373 e_block(row_values + e_cell.position, 374 row.block.size, 375 e_block_size); 376 377 const typename EigenTypes<kRowBlockSize>::Vector 378 sj = 379 typename EigenTypes<kRowBlockSize>::ConstVectorRef 380 (b + b_pos, row.block.size) - e_block * (inverse_ete_g); 381 382 for (int c = 1; c < row.cells.size(); ++c) { 383 const int block_id = row.cells[c].block_id; 384 const int block_size = bs->cols[block_id].size; 385 const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef 386 b(row_values + row.cells[c].position, 387 row.block.size, block_size); 388 389 const int block = block_id - num_eliminate_blocks_; 390 CeresMutexLock l(rhs_locks_[block]); 391 typename EigenTypes<kFBlockSize>::VectorRef 392 (rhs + lhs_row_layout_[block], block_size).noalias() 393 += b.transpose() * sj; 394 } 395 b_pos += row.block.size; 396 } 397 } 398 399 // Given a Chunk - set of rows with the same e_block, e.g. in the 400 // following Chunk with two rows. 401 // 402 // E F 403 // [ y11 0 0 0 | z11 0 0 0 z51] 404 // [ y12 0 0 0 | z12 z22 0 0 0] 405 // 406 // this function computes twp matrices. The diagonal block matrix 407 // 408 // ete = y11 * y11' + y12 * y12' 409 // 410 // and the off diagonal blocks in the Guass Newton Hessian. 411 // 412 // buffer = [y11'(z11 + z12), y12' * z22, y11' * z51] 413 // 414 // which are zero compressed versions of the block sparse matrices E'E 415 // and E'F. 416 // 417 // and the gradient of the e_block, E'b. 418 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 419 void 420 SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>:: 421 ChunkDiagonalBlockAndGradient( 422 const Chunk& chunk, 423 const BlockSparseMatrixBase* A, 424 const double* b, 425 int row_block_counter, 426 typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix* ete, 427 typename EigenTypes<kEBlockSize>::Vector* g, 428 double* buffer, 429 BlockRandomAccessMatrix* lhs) { 430 const CompressedRowBlockStructure* bs = A->block_structure(); 431 432 int b_pos = bs->rows[row_block_counter].block.position; 433 const int e_block_size = ete->rows(); 434 435 // Iterate over the rows in this chunk, for each row, compute the 436 // contribution of its F blocks to the Schur complement, the 437 // contribution of its E block to the matrix EE' (ete), and the 438 // corresponding block in the gradient vector. 439 for (int j = 0; j < chunk.size; ++j) { 440 const CompressedRow& row = bs->rows[row_block_counter + j]; 441 const double *row_values = A->RowBlockValues(row_block_counter + j); 442 443 if (row.cells.size() > 1) { 444 EBlockRowOuterProduct(A, row_block_counter + j, lhs); 445 } 446 447 // Extract the e_block, ETE += E_i' E_i 448 const Cell& e_cell = row.cells.front(); 449 const typename EigenTypes<kRowBlockSize, kEBlockSize>::ConstMatrixRef 450 e_block(row_values + e_cell.position, 451 row.block.size, 452 e_block_size); 453 454 ete->template selfadjointView<Eigen::Upper>() 455 .rankUpdate(e_block.transpose(), 1.0); 456 457 // g += E_i' b_i 458 g->noalias() += e_block.transpose() * 459 typename EigenTypes<kRowBlockSize>::ConstVectorRef 460 (b + b_pos, row.block.size); 461 462 // buffer = E'F. This computation is done by iterating over the 463 // f_blocks for each row in the chunk. 464 for (int c = 1; c < row.cells.size(); ++c) { 465 const int f_block_id = row.cells[c].block_id; 466 const int f_block_size = bs->cols[f_block_id].size; 467 const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef 468 f_block(row_values + row.cells[c].position, 469 row.block.size, f_block_size); 470 471 double* buffer_ptr = 472 buffer + FindOrDie(chunk.buffer_layout, f_block_id); 473 474 typename EigenTypes<kEBlockSize, kFBlockSize>::MatrixRef 475 (buffer_ptr, e_block_size, f_block_size).noalias() 476 += e_block.transpose() * f_block; 477 } 478 b_pos += row.block.size; 479 } 480 } 481 482 // Compute the outer product F'E(E'E)^{-1}E'F and subtract it from the 483 // Schur complement matrix, i.e 484 // 485 // S -= F'E(E'E)^{-1}E'F. 486 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 487 void 488 SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>:: 489 ChunkOuterProduct(const CompressedRowBlockStructure* bs, 490 const Matrix& inverse_ete, 491 const double* buffer, 492 const BufferLayoutType& buffer_layout, 493 BlockRandomAccessMatrix* lhs) { 494 // This is the most computationally expensive part of this 495 // code. Profiling experiments reveal that the bottleneck is not the 496 // computation of the right-hand matrix product, but memory 497 // references to the left hand side. 498 const int e_block_size = inverse_ete.rows(); 499 BufferLayoutType::const_iterator it1 = buffer_layout.begin(); 500 // S(i,j) -= bi' * ete^{-1} b_j 501 for (; it1 != buffer_layout.end(); ++it1) { 502 const int block1 = it1->first - num_eliminate_blocks_; 503 const int block1_size = bs->cols[it1->first].size; 504 505 const typename EigenTypes<kEBlockSize, kFBlockSize>::ConstMatrixRef 506 b1(buffer + it1->second, e_block_size, block1_size); 507 const typename EigenTypes<kFBlockSize, kEBlockSize>::Matrix 508 b1_transpose_inverse_ete = b1.transpose() * inverse_ete; 509 510 BufferLayoutType::const_iterator it2 = it1; 511 for (; it2 != buffer_layout.end(); ++it2) { 512 const int block2 = it2->first - num_eliminate_blocks_; 513 514 int r, c, row_stride, col_stride; 515 CellInfo* cell_info = lhs->GetCell(block1, block2, 516 &r, &c, 517 &row_stride, &col_stride); 518 if (cell_info == NULL) { 519 continue; 520 } 521 522 const int block2_size = bs->cols[it2->first].size; 523 const typename EigenTypes<kEBlockSize, kFBlockSize>::ConstMatrixRef 524 b2(buffer + it2->second, e_block_size, block2_size); 525 526 CeresMutexLock l(&cell_info->m); 527 MatrixRef m(cell_info->values, row_stride, col_stride); 528 529 // We explicitly construct a block object here instead of using 530 // m.block(), as m.block() variant of the constructor does not 531 // allow mixing of template sizing and runtime sizing parameters 532 // like the Matrix class does. 533 Eigen::Block<MatrixRef, kFBlockSize, kFBlockSize> 534 block(m, r, c, block1_size, block2_size); 535 #ifdef CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG 536 // Removing the ".noalias()" annotation on the following statement is 537 // necessary to produce a correct build with the Android NDK, including 538 // versions 6, 7, 8, and 8b, when built with STLPort and the 539 // non-standalone toolchain (i.e. ndk-build). This appears to be a 540 // compiler bug; if the workaround is not in place, the line 541 // 542 // block.noalias() -= b1_transpose_inverse_ete * b2; 543 // 544 // gets compiled to 545 // 546 // block.noalias() += b1_transpose_inverse_ete * b2; 547 // 548 // which breaks schur elimination. Introducing a temporary by removing the 549 // .noalias() annotation causes the issue to disappear. Tracking this 550 // issue down was tricky, since the test suite doesn't run when built with 551 // the non-standalone toolchain. 552 // 553 // TODO(keir): Make a reproduction case for this and send it upstream. 554 block -= b1_transpose_inverse_ete * b2; 555 #else 556 block.noalias() -= b1_transpose_inverse_ete * b2; 557 #endif // CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG 558 } 559 } 560 } 561 562 // For rows with no e_blocks, the schur complement update reduces to S 563 // += F'F. This function iterates over the rows of A with no e_block, 564 // and calls NoEBlockRowOuterProduct on each row. 565 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 566 void 567 SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>:: 568 NoEBlockRowsUpdate(const BlockSparseMatrixBase* A, 569 const double* b, 570 int row_block_counter, 571 BlockRandomAccessMatrix* lhs, 572 double* rhs) { 573 const CompressedRowBlockStructure* bs = A->block_structure(); 574 for (; row_block_counter < bs->rows.size(); ++row_block_counter) { 575 const CompressedRow& row = bs->rows[row_block_counter]; 576 const double *row_values = A->RowBlockValues(row_block_counter); 577 for (int c = 0; c < row.cells.size(); ++c) { 578 const int block_id = row.cells[c].block_id; 579 const int block_size = bs->cols[block_id].size; 580 const int block = block_id - num_eliminate_blocks_; 581 VectorRef(rhs + lhs_row_layout_[block], block_size).noalias() 582 += (ConstMatrixRef(row_values + row.cells[c].position, 583 row.block.size, block_size).transpose() * 584 ConstVectorRef(b + row.block.position, row.block.size)); 585 } 586 NoEBlockRowOuterProduct(A, row_block_counter, lhs); 587 } 588 } 589 590 591 // A row r of A, which has no e_blocks gets added to the Schur 592 // Complement as S += r r'. This function is responsible for computing 593 // the contribution of a single row r to the Schur complement. It is 594 // very similar in structure to EBlockRowOuterProduct except for 595 // one difference. It does not use any of the template 596 // parameters. This is because the algorithm used for detecting the 597 // static structure of the matrix A only pays attention to rows with 598 // e_blocks. This is becase rows without e_blocks are rare and 599 // typically arise from regularization terms in the original 600 // optimization problem, and have a very different structure than the 601 // rows with e_blocks. Including them in the static structure 602 // detection will lead to most template parameters being set to 603 // dynamic. Since the number of rows without e_blocks is small, the 604 // lack of templating is not an issue. 605 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 606 void 607 SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>:: 608 NoEBlockRowOuterProduct(const BlockSparseMatrixBase* A, 609 int row_block_index, 610 BlockRandomAccessMatrix* lhs) { 611 const CompressedRowBlockStructure* bs = A->block_structure(); 612 const CompressedRow& row = bs->rows[row_block_index]; 613 const double *row_values = A->RowBlockValues(row_block_index); 614 for (int i = 0; i < row.cells.size(); ++i) { 615 const int block1 = row.cells[i].block_id - num_eliminate_blocks_; 616 DCHECK_GE(block1, 0); 617 618 const int block1_size = bs->cols[row.cells[i].block_id].size; 619 const ConstMatrixRef b1(row_values + row.cells[i].position, 620 row.block.size, block1_size); 621 int r, c, row_stride, col_stride; 622 CellInfo* cell_info = lhs->GetCell(block1, block1, 623 &r, &c, 624 &row_stride, &col_stride); 625 if (cell_info != NULL) { 626 CeresMutexLock l(&cell_info->m); 627 MatrixRef m(cell_info->values, row_stride, col_stride); 628 m.block(r, c, block1_size, block1_size) 629 .selfadjointView<Eigen::Upper>() 630 .rankUpdate(b1.transpose(), 1.0); 631 } 632 633 for (int j = i + 1; j < row.cells.size(); ++j) { 634 const int block2 = row.cells[j].block_id - num_eliminate_blocks_; 635 DCHECK_GE(block2, 0); 636 DCHECK_LT(block1, block2); 637 int r, c, row_stride, col_stride; 638 CellInfo* cell_info = lhs->GetCell(block1, block2, 639 &r, &c, 640 &row_stride, &col_stride); 641 if (cell_info == NULL) { 642 continue; 643 } 644 645 const int block2_size = bs->cols[row.cells[j].block_id].size; 646 CeresMutexLock l(&cell_info->m); 647 MatrixRef m(cell_info->values, row_stride, col_stride); 648 m.block(r, c, block1_size, block2_size).noalias() += 649 b1.transpose() * ConstMatrixRef(row_values + row.cells[j].position, 650 row.block.size, 651 block2_size); 652 } 653 } 654 } 655 656 // For a row with an e_block, compute the contribition S += F'F. This 657 // function has the same structure as NoEBlockRowOuterProduct, except 658 // that this function uses the template parameters. 659 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 660 void 661 SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>:: 662 EBlockRowOuterProduct(const BlockSparseMatrixBase* A, 663 int row_block_index, 664 BlockRandomAccessMatrix* lhs) { 665 const CompressedRowBlockStructure* bs = A->block_structure(); 666 const CompressedRow& row = bs->rows[row_block_index]; 667 const double *row_values = A->RowBlockValues(row_block_index); 668 for (int i = 1; i < row.cells.size(); ++i) { 669 const int block1 = row.cells[i].block_id - num_eliminate_blocks_; 670 DCHECK_GE(block1, 0); 671 672 const int block1_size = bs->cols[row.cells[i].block_id].size; 673 const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef 674 b1(row_values + row.cells[i].position, 675 row.block.size, block1_size); 676 { 677 int r, c, row_stride, col_stride; 678 CellInfo* cell_info = lhs->GetCell(block1, block1, 679 &r, &c, 680 &row_stride, &col_stride); 681 if (cell_info == NULL) { 682 continue; 683 } 684 685 CeresMutexLock l(&cell_info->m); 686 MatrixRef m(cell_info->values, row_stride, col_stride); 687 688 Eigen::Block<MatrixRef, kFBlockSize, kFBlockSize> 689 block(m, r, c, block1_size, block1_size); 690 block.template selfadjointView<Eigen::Upper>() 691 .rankUpdate(b1.transpose(), 1.0); 692 } 693 694 for (int j = i + 1; j < row.cells.size(); ++j) { 695 const int block2 = row.cells[j].block_id - num_eliminate_blocks_; 696 DCHECK_GE(block2, 0); 697 DCHECK_LT(block1, block2); 698 const int block2_size = bs->cols[row.cells[j].block_id].size; 699 int r, c, row_stride, col_stride; 700 CellInfo* cell_info = lhs->GetCell(block1, block2, 701 &r, &c, 702 &row_stride, &col_stride); 703 if (cell_info == NULL) { 704 continue; 705 } 706 707 const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef 708 b2(row_values + row.cells[j].position, 709 row.block.size, 710 block2_size); 711 712 CeresMutexLock l(&cell_info->m); 713 MatrixRef m(cell_info->values, row_stride, col_stride); 714 Eigen::Block<MatrixRef, kFBlockSize, kFBlockSize> 715 block(m, r, c, block1_size, block2_size); 716 block.noalias() += b1.transpose() * b2; 717 } 718 } 719 } 720 721 } // namespace internal 722 } // namespace ceres 723 724 #endif // CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_ 725