1 // Ceres Solver - A fast non-linear least squares minimizer 2 // Copyright 2012 Google Inc. All rights reserved. 3 // http://code.google.com/p/ceres-solver/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are met: 7 // 8 // * Redistributions of source code must retain the above copyright notice, 9 // this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above copyright notice, 11 // this list of conditions and the following disclaimer in the documentation 12 // and/or other materials provided with the distribution. 13 // * Neither the name of Google Inc. nor the names of its contributors may be 14 // used to endorse or promote products derived from this software without 15 // specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 // POSSIBILITY OF SUCH DAMAGE. 28 // 29 // Author: mierle (at) gmail.com (Keir Mierle) 30 // sameeragarwal (at) google.com (Sameer Agarwal) 31 // thadh (at) gmail.com (Thad Hughes) 32 // 33 // This autodiff implementation differs from the one found in 34 // autodiff_cost_function.h by supporting autodiff on cost functions with 35 // variable numbers of parameters with variable sizes. With the other 36 // implementation, all the sizes (both the number of parameter blocks and the 37 // size of each block) must be fixed at compile time. 38 // 39 // The functor API differs slightly from the API for fixed size autodiff; the 40 // expected interface for the cost functors is: 41 // 42 // struct MyCostFunctor { 43 // template<typename T> 44 // bool operator()(T const* const* parameters, T* residuals) const { 45 // // Use parameters[i] to access the i'th parameter block. 46 // } 47 // } 48 // 49 // Since the sizing of the parameters is done at runtime, you must also specify 50 // the sizes after creating the dynamic autodiff cost function. For example: 51 // 52 // DynamicAutoDiffCostFunction<MyCostFunctor, 3> cost_function( 53 // new MyCostFunctor()); 54 // cost_function.AddParameterBlock(5); 55 // cost_function.AddParameterBlock(10); 56 // cost_function.SetNumResiduals(21); 57 // 58 // Under the hood, the implementation evaluates the cost function multiple 59 // times, computing a small set of the derivatives (four by default, controlled 60 // by the Stride template parameter) with each pass. There is a tradeoff with 61 // the size of the passes; you may want to experiment with the stride. 62 63 #ifndef CERES_PUBLIC_DYNAMIC_AUTODIFF_COST_FUNCTION_H_ 64 #define CERES_PUBLIC_DYNAMIC_AUTODIFF_COST_FUNCTION_H_ 65 66 #include <cmath> 67 #include <numeric> 68 #include <vector> 69 70 #include "ceres/cost_function.h" 71 #include "ceres/internal/scoped_ptr.h" 72 #include "ceres/jet.h" 73 #include "glog/logging.h" 74 75 namespace ceres { 76 77 template <typename CostFunctor, int Stride = 4> 78 class DynamicAutoDiffCostFunction : public CostFunction { 79 public: 80 explicit DynamicAutoDiffCostFunction(CostFunctor* functor) 81 : functor_(functor) {} 82 83 virtual ~DynamicAutoDiffCostFunction() {} 84 85 void AddParameterBlock(int size) { 86 mutable_parameter_block_sizes()->push_back(size); 87 } 88 89 void SetNumResiduals(int num_residuals) { 90 set_num_residuals(num_residuals); 91 } 92 93 virtual bool Evaluate(double const* const* parameters, 94 double* residuals, 95 double** jacobians) const { 96 CHECK_GT(num_residuals(), 0) 97 << "You must call DynamicAutoDiffCostFunction::SetNumResiduals() " 98 << "before DynamicAutoDiffCostFunction::Evaluate()."; 99 100 if (jacobians == NULL) { 101 return (*functor_)(parameters, residuals); 102 } 103 104 // The difficulty with Jets, as implemented in Ceres, is that they were 105 // originally designed for strictly compile-sized use. At this point, there 106 // is a large body of code that assumes inside a cost functor it is 107 // acceptable to do e.g. T(1.5) and get an appropriately sized jet back. 108 // 109 // Unfortunately, it is impossible to communicate the expected size of a 110 // dynamically sized jet to the static instantiations that existing code 111 // depends on. 112 // 113 // To work around this issue, the solution here is to evaluate the 114 // jacobians in a series of passes, each one computing Stripe * 115 // num_residuals() derivatives. This is done with small, fixed-size jets. 116 const int num_parameter_blocks = parameter_block_sizes().size(); 117 const int num_parameters = std::accumulate(parameter_block_sizes().begin(), 118 parameter_block_sizes().end(), 119 0); 120 121 // Allocate scratch space for the strided evaluation. 122 vector<Jet<double, Stride> > input_jets(num_parameters); 123 vector<Jet<double, Stride> > output_jets(num_residuals()); 124 125 // Make the parameter pack that is sent to the functor (reused). 126 vector<Jet<double, Stride>* > jet_parameters(num_parameter_blocks, 127 static_cast<Jet<double, Stride>* >(NULL)); 128 int num_active_parameters = 0; 129 130 // To handle constant parameters between non-constant parameter blocks, the 131 // start position --- a raw parameter index --- of each contiguous block of 132 // non-constant parameters is recorded in start_derivative_section. 133 vector<int> start_derivative_section; 134 bool in_derivative_section = false; 135 int parameter_cursor = 0; 136 137 // Discover the derivative sections and set the parameter values. 138 for (int i = 0; i < num_parameter_blocks; ++i) { 139 jet_parameters[i] = &input_jets[parameter_cursor]; 140 141 const int parameter_block_size = parameter_block_sizes()[i]; 142 if (jacobians[i] != NULL) { 143 if (!in_derivative_section) { 144 start_derivative_section.push_back(parameter_cursor); 145 in_derivative_section = true; 146 } 147 148 num_active_parameters += parameter_block_size; 149 } else { 150 in_derivative_section = false; 151 } 152 153 for (int j = 0; j < parameter_block_size; ++j, parameter_cursor++) { 154 input_jets[parameter_cursor].a = parameters[i][j]; 155 } 156 } 157 158 // When `num_active_parameters % Stride != 0` then it can be the case 159 // that `active_parameter_count < Stride` while parameter_cursor is less 160 // than the total number of parameters and with no remaining non-constant 161 // parameter blocks. Pushing parameter_cursor (the total number of 162 // parameters) as a final entry to start_derivative_section is required 163 // because if a constant parameter block is encountered after the 164 // last non-constant block then current_derivative_section is incremented 165 // and would otherwise index an invalid position in 166 // start_derivative_section. Setting the final element to the total number 167 // of parameters means that this can only happen at most once in the loop 168 // below. 169 start_derivative_section.push_back(parameter_cursor); 170 171 // Evaluate all of the strides. Each stride is a chunk of the derivative to 172 // evaluate, typically some size proportional to the size of the SIMD 173 // registers of the CPU. 174 int num_strides = static_cast<int>(ceil(num_active_parameters / 175 static_cast<float>(Stride))); 176 177 int current_derivative_section = 0; 178 int current_derivative_section_cursor = 0; 179 180 for (int pass = 0; pass < num_strides; ++pass) { 181 // Set most of the jet components to zero, except for 182 // non-constant #Stride parameters. 183 const int initial_derivative_section = current_derivative_section; 184 const int initial_derivative_section_cursor = 185 current_derivative_section_cursor; 186 187 int active_parameter_count = 0; 188 parameter_cursor = 0; 189 190 for (int i = 0; i < num_parameter_blocks; ++i) { 191 for (int j = 0; j < parameter_block_sizes()[i]; 192 ++j, parameter_cursor++) { 193 input_jets[parameter_cursor].v.setZero(); 194 if (active_parameter_count < Stride && 195 parameter_cursor >= ( 196 start_derivative_section[current_derivative_section] + 197 current_derivative_section_cursor)) { 198 if (jacobians[i] != NULL) { 199 input_jets[parameter_cursor].v[active_parameter_count] = 1.0; 200 ++active_parameter_count; 201 ++current_derivative_section_cursor; 202 } else { 203 ++current_derivative_section; 204 current_derivative_section_cursor = 0; 205 } 206 } 207 } 208 } 209 210 if (!(*functor_)(&jet_parameters[0], &output_jets[0])) { 211 return false; 212 } 213 214 // Copy the pieces of the jacobians into their final place. 215 active_parameter_count = 0; 216 217 current_derivative_section = initial_derivative_section; 218 current_derivative_section_cursor = initial_derivative_section_cursor; 219 220 for (int i = 0, parameter_cursor = 0; i < num_parameter_blocks; ++i) { 221 for (int j = 0; j < parameter_block_sizes()[i]; 222 ++j, parameter_cursor++) { 223 if (active_parameter_count < Stride && 224 parameter_cursor >= ( 225 start_derivative_section[current_derivative_section] + 226 current_derivative_section_cursor)) { 227 if (jacobians[i] != NULL) { 228 for (int k = 0; k < num_residuals(); ++k) { 229 jacobians[i][k * parameter_block_sizes()[i] + j] = 230 output_jets[k].v[active_parameter_count]; 231 } 232 ++active_parameter_count; 233 ++current_derivative_section_cursor; 234 } else { 235 ++current_derivative_section; 236 current_derivative_section_cursor = 0; 237 } 238 } 239 } 240 } 241 242 // Only copy the residuals over once (even though we compute them on 243 // every loop). 244 if (pass == num_strides - 1) { 245 for (int k = 0; k < num_residuals(); ++k) { 246 residuals[k] = output_jets[k].a; 247 } 248 } 249 } 250 return true; 251 } 252 253 private: 254 internal::scoped_ptr<CostFunctor> functor_; 255 }; 256 257 } // namespace ceres 258 259 #endif // CERES_PUBLIC_DYNAMIC_AUTODIFF_COST_FUNCTION_H_ 260