1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #include "precomp.hpp" 44 45 #if !defined HAVE_CUDA || defined(CUDA_DISABLER) 46 47 Ptr<OpticalFlowDual_TVL1> cv::cuda::OpticalFlowDual_TVL1::create(double, double, double, int, int, double, int, double, double, bool) { throw_no_cuda(); return Ptr<OpticalFlowDual_TVL1>(); } 48 49 #else 50 51 using namespace cv; 52 using namespace cv::cuda; 53 54 namespace tvl1flow 55 { 56 void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy, cudaStream_t stream); 57 void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, 58 PtrStepSzf u1, PtrStepSzf u2, 59 PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, 60 PtrStepSzf grad, PtrStepSzf rho, 61 cudaStream_t stream); 62 void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy, 63 PtrStepSzf grad, PtrStepSzf rho_c, 64 PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, 65 PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf error, 66 float l_t, float theta, float gamma, bool calcError, 67 cudaStream_t stream); 68 void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, 69 PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, 70 float taut, float gamma, 71 cudaStream_t stream); 72 } 73 74 namespace 75 { 76 class OpticalFlowDual_TVL1_Impl : public OpticalFlowDual_TVL1 77 { 78 public: 79 OpticalFlowDual_TVL1_Impl(double tau, double lambda, double theta, int nscales, int warps, double epsilon, 80 int iterations, double scaleStep, double gamma, bool useInitialFlow) : 81 tau_(tau), lambda_(lambda), gamma_(gamma), theta_(theta), nscales_(nscales), warps_(warps), 82 epsilon_(epsilon), iterations_(iterations), scaleStep_(scaleStep), useInitialFlow_(useInitialFlow) 83 { 84 } 85 86 virtual double getTau() const { return tau_; } 87 virtual void setTau(double tau) { tau_ = tau; } 88 89 virtual double getLambda() const { return lambda_; } 90 virtual void setLambda(double lambda) { lambda_ = lambda; } 91 92 virtual double getGamma() const { return gamma_; } 93 virtual void setGamma(double gamma) { gamma_ = gamma; } 94 95 virtual double getTheta() const { return theta_; } 96 virtual void setTheta(double theta) { theta_ = theta; } 97 98 virtual int getNumScales() const { return nscales_; } 99 virtual void setNumScales(int nscales) { nscales_ = nscales; } 100 101 virtual int getNumWarps() const { return warps_; } 102 virtual void setNumWarps(int warps) { warps_ = warps; } 103 104 virtual double getEpsilon() const { return epsilon_; } 105 virtual void setEpsilon(double epsilon) { epsilon_ = epsilon; } 106 107 virtual int getNumIterations() const { return iterations_; } 108 virtual void setNumIterations(int iterations) { iterations_ = iterations; } 109 110 virtual double getScaleStep() const { return scaleStep_; } 111 virtual void setScaleStep(double scaleStep) { scaleStep_ = scaleStep; } 112 113 virtual bool getUseInitialFlow() const { return useInitialFlow_; } 114 virtual void setUseInitialFlow(bool useInitialFlow) { useInitialFlow_ = useInitialFlow; } 115 116 virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow, Stream& stream); 117 118 private: 119 double tau_; 120 double lambda_; 121 double gamma_; 122 double theta_; 123 int nscales_; 124 int warps_; 125 double epsilon_; 126 int iterations_; 127 double scaleStep_; 128 bool useInitialFlow_; 129 130 private: 131 void calcImpl(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, Stream& stream); 132 void procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2, GpuMat& u3, Stream& stream); 133 134 std::vector<GpuMat> I0s; 135 std::vector<GpuMat> I1s; 136 std::vector<GpuMat> u1s; 137 std::vector<GpuMat> u2s; 138 std::vector<GpuMat> u3s; 139 140 GpuMat I1x_buf; 141 GpuMat I1y_buf; 142 143 GpuMat I1w_buf; 144 GpuMat I1wx_buf; 145 GpuMat I1wy_buf; 146 147 GpuMat grad_buf; 148 GpuMat rho_c_buf; 149 150 GpuMat p11_buf; 151 GpuMat p12_buf; 152 GpuMat p21_buf; 153 GpuMat p22_buf; 154 GpuMat p31_buf; 155 GpuMat p32_buf; 156 157 GpuMat diff_buf; 158 GpuMat norm_buf; 159 }; 160 161 void OpticalFlowDual_TVL1_Impl::calc(InputArray _frame0, InputArray _frame1, InputOutputArray _flow, Stream& stream) 162 { 163 const GpuMat frame0 = _frame0.getGpuMat(); 164 const GpuMat frame1 = _frame1.getGpuMat(); 165 166 BufferPool pool(stream); 167 GpuMat flowx = pool.getBuffer(frame0.size(), CV_32FC1); 168 GpuMat flowy = pool.getBuffer(frame0.size(), CV_32FC1); 169 170 calcImpl(frame0, frame1, flowx, flowy, stream); 171 172 GpuMat flows[] = {flowx, flowy}; 173 cuda::merge(flows, 2, _flow, stream); 174 } 175 176 void OpticalFlowDual_TVL1_Impl::calcImpl(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, Stream& stream) 177 { 178 CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 ); 179 CV_Assert( I0.size() == I1.size() ); 180 CV_Assert( I0.type() == I1.type() ); 181 CV_Assert( !useInitialFlow_ || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) ); 182 CV_Assert( nscales_ > 0 ); 183 184 // allocate memory for the pyramid structure 185 I0s.resize(nscales_); 186 I1s.resize(nscales_); 187 u1s.resize(nscales_); 188 u2s.resize(nscales_); 189 u3s.resize(nscales_); 190 191 I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0, stream); 192 I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0, stream); 193 194 if (!useInitialFlow_) 195 { 196 flowx.create(I0.size(), CV_32FC1); 197 flowy.create(I0.size(), CV_32FC1); 198 } 199 200 u1s[0] = flowx; 201 u2s[0] = flowy; 202 if (gamma_) 203 { 204 u3s[0].create(I0.size(), CV_32FC1); 205 } 206 207 I1x_buf.create(I0.size(), CV_32FC1); 208 I1y_buf.create(I0.size(), CV_32FC1); 209 210 I1w_buf.create(I0.size(), CV_32FC1); 211 I1wx_buf.create(I0.size(), CV_32FC1); 212 I1wy_buf.create(I0.size(), CV_32FC1); 213 214 grad_buf.create(I0.size(), CV_32FC1); 215 rho_c_buf.create(I0.size(), CV_32FC1); 216 217 p11_buf.create(I0.size(), CV_32FC1); 218 p12_buf.create(I0.size(), CV_32FC1); 219 p21_buf.create(I0.size(), CV_32FC1); 220 p22_buf.create(I0.size(), CV_32FC1); 221 if (gamma_) 222 { 223 p31_buf.create(I0.size(), CV_32FC1); 224 p32_buf.create(I0.size(), CV_32FC1); 225 } 226 diff_buf.create(I0.size(), CV_32FC1); 227 228 // create the scales 229 for (int s = 1; s < nscales_; ++s) 230 { 231 cuda::resize(I0s[s-1], I0s[s], Size(), scaleStep_, scaleStep_, INTER_LINEAR, stream); 232 cuda::resize(I1s[s-1], I1s[s], Size(), scaleStep_, scaleStep_, INTER_LINEAR, stream); 233 234 if (I0s[s].cols < 16 || I0s[s].rows < 16) 235 { 236 nscales_ = s; 237 break; 238 } 239 240 if (useInitialFlow_) 241 { 242 cuda::resize(u1s[s-1], u1s[s], Size(), scaleStep_, scaleStep_, INTER_LINEAR, stream); 243 cuda::resize(u2s[s-1], u2s[s], Size(), scaleStep_, scaleStep_, INTER_LINEAR, stream); 244 245 cuda::multiply(u1s[s], Scalar::all(scaleStep_), u1s[s], 1, -1, stream); 246 cuda::multiply(u2s[s], Scalar::all(scaleStep_), u2s[s], 1, -1, stream); 247 } 248 else 249 { 250 u1s[s].create(I0s[s].size(), CV_32FC1); 251 u2s[s].create(I0s[s].size(), CV_32FC1); 252 } 253 if (gamma_) 254 { 255 u3s[s].create(I0s[s].size(), CV_32FC1); 256 } 257 } 258 259 if (!useInitialFlow_) 260 { 261 u1s[nscales_-1].setTo(Scalar::all(0), stream); 262 u2s[nscales_-1].setTo(Scalar::all(0), stream); 263 } 264 if (gamma_) 265 { 266 u3s[nscales_ - 1].setTo(Scalar::all(0), stream); 267 } 268 269 // pyramidal structure for computing the optical flow 270 for (int s = nscales_ - 1; s >= 0; --s) 271 { 272 // compute the optical flow at the current scale 273 procOneScale(I0s[s], I1s[s], u1s[s], u2s[s], u3s[s], stream); 274 275 // if this was the last scale, finish now 276 if (s == 0) 277 break; 278 279 // otherwise, upsample the optical flow 280 281 // zoom the optical flow for the next finer scale 282 cuda::resize(u1s[s], u1s[s - 1], I0s[s - 1].size(), 0, 0, INTER_LINEAR, stream); 283 cuda::resize(u2s[s], u2s[s - 1], I0s[s - 1].size(), 0, 0, INTER_LINEAR, stream); 284 if (gamma_) 285 { 286 cuda::resize(u3s[s], u3s[s - 1], I0s[s - 1].size(), 0, 0, INTER_LINEAR, stream); 287 } 288 289 // scale the optical flow with the appropriate zoom factor 290 cuda::multiply(u1s[s - 1], Scalar::all(1/scaleStep_), u1s[s - 1], 1, -1, stream); 291 cuda::multiply(u2s[s - 1], Scalar::all(1/scaleStep_), u2s[s - 1], 1, -1, stream); 292 } 293 } 294 295 void OpticalFlowDual_TVL1_Impl::procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2, GpuMat& u3, Stream& _stream) 296 { 297 using namespace tvl1flow; 298 299 cudaStream_t stream = StreamAccessor::getStream(_stream); 300 301 const double scaledEpsilon = epsilon_ * epsilon_ * I0.size().area(); 302 303 CV_DbgAssert( I1.size() == I0.size() ); 304 CV_DbgAssert( I1.type() == I0.type() ); 305 CV_DbgAssert( u1.size() == I0.size() ); 306 CV_DbgAssert( u2.size() == u1.size() ); 307 308 GpuMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows)); 309 GpuMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows)); 310 centeredGradient(I1, I1x, I1y, stream); 311 312 GpuMat I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows)); 313 GpuMat I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows)); 314 GpuMat I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows)); 315 316 GpuMat grad = grad_buf(Rect(0, 0, I0.cols, I0.rows)); 317 GpuMat rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows)); 318 319 GpuMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows)); 320 GpuMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows)); 321 GpuMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows)); 322 GpuMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows)); 323 GpuMat p31, p32; 324 if (gamma_) 325 { 326 p31 = p31_buf(Rect(0, 0, I0.cols, I0.rows)); 327 p32 = p32_buf(Rect(0, 0, I0.cols, I0.rows)); 328 } 329 p11.setTo(Scalar::all(0), _stream); 330 p12.setTo(Scalar::all(0), _stream); 331 p21.setTo(Scalar::all(0), _stream); 332 p22.setTo(Scalar::all(0), _stream); 333 if (gamma_) 334 { 335 p31.setTo(Scalar::all(0), _stream); 336 p32.setTo(Scalar::all(0), _stream); 337 } 338 339 GpuMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows)); 340 341 const float l_t = static_cast<float>(lambda_ * theta_); 342 const float taut = static_cast<float>(tau_ / theta_); 343 344 for (int warpings = 0; warpings < warps_; ++warpings) 345 { 346 warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c, stream); 347 348 double error = std::numeric_limits<double>::max(); 349 double prevError = 0.0; 350 for (int n = 0; error > scaledEpsilon && n < iterations_; ++n) 351 { 352 // some tweaks to make sum operation less frequently 353 bool calcError = (epsilon_ > 0) && (n & 0x1) && (prevError < scaledEpsilon); 354 estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, diff, l_t, static_cast<float>(theta_), gamma_, calcError, stream); 355 if (calcError) 356 { 357 _stream.waitForCompletion(); 358 error = cuda::sum(diff, norm_buf)[0]; 359 prevError = error; 360 } 361 else 362 { 363 error = std::numeric_limits<double>::max(); 364 prevError -= scaledEpsilon; 365 } 366 367 estimateDualVariables(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma_, stream); 368 } 369 } 370 } 371 } 372 373 Ptr<OpticalFlowDual_TVL1> cv::cuda::OpticalFlowDual_TVL1::create( 374 double tau, double lambda, double theta, int nscales, int warps, 375 double epsilon, int iterations, double scaleStep, double gamma, bool useInitialFlow) 376 { 377 return makePtr<OpticalFlowDual_TVL1_Impl>(tau, lambda, theta, nscales, warps, 378 epsilon, iterations, scaleStep, gamma, useInitialFlow); 379 } 380 381 #endif // !defined HAVE_CUDA || defined(CUDA_DISABLER) 382