Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 /*
     44 //
     45 // This implementation is based on Javier Snchez Prez <jsanchez (at) dis.ulpgc.es> implementation.
     46 // Original BSD license:
     47 //
     48 // Copyright (c) 2011, Javier Snchez Prez, Enric Meinhardt Llopis
     49 // All rights reserved.
     50 //
     51 // Redistribution and use in source and binary forms, with or without
     52 // modification, are permitted provided that the following conditions are met:
     53 //
     54 // * Redistributions of source code must retain the above copyright notice, this
     55 //   list of conditions and the following disclaimer.
     56 //
     57 // * Redistributions in binary form must reproduce the above copyright notice,
     58 //   this list of conditions and the following disclaimer in the documentation
     59 //   and/or other materials provided with the distribution.
     60 //
     61 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     62 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     63 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     64 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
     65 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     66 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     67 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     68 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     69 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     70 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     71 // POSSIBILITY OF SUCH DAMAGE.
     72 //
     73 */
     74 
     75 #include "precomp.hpp"
     76 #include "opencl_kernels_video.hpp"
     77 
     78 #include <limits>
     79 #include <iomanip>
     80 #include <iostream>
     81 #include "opencv2/core/opencl/ocl_defs.hpp"
     82 
     83 
     84 
     85 using namespace cv;
     86 
     87 namespace {
     88 
     89 class OpticalFlowDual_TVL1 : public DualTVL1OpticalFlow
     90 {
     91 public:
     92     OpticalFlowDual_TVL1();
     93 
     94     void calc(InputArray I0, InputArray I1, InputOutputArray flow);
     95     void collectGarbage();
     96 
     97     CV_IMPL_PROPERTY(double, Tau, tau)
     98     CV_IMPL_PROPERTY(double, Lambda, lambda)
     99     CV_IMPL_PROPERTY(double, Theta, theta)
    100     CV_IMPL_PROPERTY(double, Gamma, gamma)
    101     CV_IMPL_PROPERTY(int, ScalesNumber, nscales)
    102     CV_IMPL_PROPERTY(int, WarpingsNumber, warps)
    103     CV_IMPL_PROPERTY(double, Epsilon, epsilon)
    104     CV_IMPL_PROPERTY(int, InnerIterations, innerIterations)
    105     CV_IMPL_PROPERTY(int, OuterIterations, outerIterations)
    106     CV_IMPL_PROPERTY(bool, UseInitialFlow, useInitialFlow)
    107     CV_IMPL_PROPERTY(double, ScaleStep, scaleStep)
    108     CV_IMPL_PROPERTY(int, MedianFiltering, medianFiltering)
    109 
    110 protected:
    111     double tau;
    112     double lambda;
    113     double theta;
    114     double gamma;
    115     int nscales;
    116     int warps;
    117     double epsilon;
    118     int innerIterations;
    119     int outerIterations;
    120     bool useInitialFlow;
    121     double scaleStep;
    122     int medianFiltering;
    123 
    124 private:
    125    void procOneScale(const Mat_<float>& I0, const Mat_<float>& I1, Mat_<float>& u1, Mat_<float>& u2, Mat_<float>& u3);
    126 
    127     bool procOneScale_ocl(const UMat& I0, const UMat& I1, UMat& u1, UMat& u2);
    128 
    129     bool calc_ocl(InputArray I0, InputArray I1, InputOutputArray flow);
    130     struct dataMat
    131     {
    132         std::vector<Mat_<float> > I0s;
    133         std::vector<Mat_<float> > I1s;
    134         std::vector<Mat_<float> > u1s;
    135         std::vector<Mat_<float> > u2s;
    136         std::vector<Mat_<float> > u3s;
    137 
    138         Mat_<float> I1x_buf;
    139         Mat_<float> I1y_buf;
    140 
    141         Mat_<float> flowMap1_buf;
    142         Mat_<float> flowMap2_buf;
    143 
    144         Mat_<float> I1w_buf;
    145         Mat_<float> I1wx_buf;
    146         Mat_<float> I1wy_buf;
    147 
    148         Mat_<float> grad_buf;
    149         Mat_<float> rho_c_buf;
    150 
    151         Mat_<float> v1_buf;
    152         Mat_<float> v2_buf;
    153         Mat_<float> v3_buf;
    154 
    155         Mat_<float> p11_buf;
    156         Mat_<float> p12_buf;
    157         Mat_<float> p21_buf;
    158         Mat_<float> p22_buf;
    159         Mat_<float> p31_buf;
    160         Mat_<float> p32_buf;
    161 
    162         Mat_<float> div_p1_buf;
    163         Mat_<float> div_p2_buf;
    164         Mat_<float> div_p3_buf;
    165 
    166         Mat_<float> u1x_buf;
    167         Mat_<float> u1y_buf;
    168         Mat_<float> u2x_buf;
    169         Mat_<float> u2y_buf;
    170         Mat_<float> u3x_buf;
    171         Mat_<float> u3y_buf;
    172     } dm;
    173     struct dataUMat
    174     {
    175         std::vector<UMat> I0s;
    176         std::vector<UMat> I1s;
    177         std::vector<UMat> u1s;
    178         std::vector<UMat> u2s;
    179 
    180         UMat I1x_buf;
    181         UMat I1y_buf;
    182 
    183         UMat I1w_buf;
    184         UMat I1wx_buf;
    185         UMat I1wy_buf;
    186 
    187         UMat grad_buf;
    188         UMat rho_c_buf;
    189 
    190         UMat p11_buf;
    191         UMat p12_buf;
    192         UMat p21_buf;
    193         UMat p22_buf;
    194 
    195         UMat diff_buf;
    196         UMat norm_buf;
    197     } dum;
    198 };
    199 
    200 namespace cv_ocl_tvl1flow
    201 {
    202     bool centeredGradient(const UMat &src, UMat &dx, UMat &dy);
    203 
    204     bool warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UMat &I1y,
    205         UMat &u1, UMat &u2, UMat &I1w, UMat &I1wx, UMat &I1wy,
    206         UMat &grad, UMat &rho);
    207 
    208     bool estimateU(UMat &I1wx, UMat &I1wy, UMat &grad,
    209         UMat &rho_c, UMat &p11, UMat &p12,
    210         UMat &p21, UMat &p22, UMat &u1,
    211         UMat &u2, UMat &error, float l_t, float theta, char calc_error);
    212 
    213     bool estimateDualVariables(UMat &u1, UMat &u2,
    214         UMat &p11, UMat &p12, UMat &p21, UMat &p22, float taut);
    215 }
    216 
    217 bool cv_ocl_tvl1flow::centeredGradient(const UMat &src, UMat &dx, UMat &dy)
    218 {
    219     size_t globalsize[2] = { src.cols, src.rows };
    220 
    221     ocl::Kernel kernel;
    222     if (!kernel.create("centeredGradientKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, ""))
    223         return false;
    224 
    225     int idxArg = 0;
    226     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(src));//src mat
    227     idxArg = kernel.set(idxArg, (int)(src.cols));//src mat col
    228     idxArg = kernel.set(idxArg, (int)(src.rows));//src mat rows
    229     idxArg = kernel.set(idxArg, (int)(src.step / src.elemSize()));//src mat step
    230     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dx));//res mat dx
    231     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dy));//res mat dy
    232     idxArg = kernel.set(idxArg, (int)(dx.step/dx.elemSize()));//res mat step
    233     return kernel.run(2, globalsize, NULL, false);
    234 }
    235 
    236 bool cv_ocl_tvl1flow::warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UMat &I1y,
    237     UMat &u1, UMat &u2, UMat &I1w, UMat &I1wx, UMat &I1wy,
    238     UMat &grad, UMat &rho)
    239 {
    240     size_t globalsize[2] = { I0.cols, I0.rows };
    241 
    242     ocl::Kernel kernel;
    243     if (!kernel.create("warpBackwardKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, ""))
    244         return false;
    245 
    246     int idxArg = 0;
    247     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I0));//I0 mat
    248     int I0_step = (int)(I0.step / I0.elemSize());
    249     idxArg = kernel.set(idxArg, I0_step);//I0_step
    250     idxArg = kernel.set(idxArg, (int)(I0.cols));//I0_col
    251     idxArg = kernel.set(idxArg, (int)(I0.rows));//I0_row
    252     ocl::Image2D imageI1(I1);
    253     ocl::Image2D imageI1x(I1x);
    254     ocl::Image2D imageI1y(I1y);
    255     idxArg = kernel.set(idxArg, imageI1);//image2d_t tex_I1
    256     idxArg = kernel.set(idxArg, imageI1x);//image2d_t tex_I1x
    257     idxArg = kernel.set(idxArg, imageI1y);//image2d_t tex_I1y
    258     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u1));//const float* u1
    259     idxArg = kernel.set(idxArg, (int)(u1.step / u1.elemSize()));//int u1_step
    260     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u2));//const float* u2
    261     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1w));///float* I1w
    262     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1wx));//float* I1wx
    263     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1wy));//float* I1wy
    264     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(grad));//float* grad
    265     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(rho));//float* rho
    266     idxArg = kernel.set(idxArg, (int)(I1w.step / I1w.elemSize()));//I1w_step
    267     idxArg = kernel.set(idxArg, (int)(u2.step / u2.elemSize()));//u2_step
    268     int u1_offset_x = (int)((u1.offset) % (u1.step));
    269     u1_offset_x = (int)(u1_offset_x / u1.elemSize());
    270     idxArg = kernel.set(idxArg, (int)u1_offset_x );//u1_offset_x
    271     idxArg = kernel.set(idxArg, (int)(u1.offset/u1.step));//u1_offset_y
    272     int u2_offset_x = (int)((u2.offset) % (u2.step));
    273     u2_offset_x = (int) (u2_offset_x / u2.elemSize());
    274     idxArg = kernel.set(idxArg, (int)u2_offset_x);//u2_offset_x
    275     idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step));//u2_offset_y
    276     return kernel.run(2, globalsize, NULL, false);
    277 }
    278 
    279 bool cv_ocl_tvl1flow::estimateU(UMat &I1wx, UMat &I1wy, UMat &grad,
    280     UMat &rho_c, UMat &p11, UMat &p12,
    281     UMat &p21, UMat &p22, UMat &u1,
    282     UMat &u2, UMat &error, float l_t, float theta, char calc_error)
    283 {
    284     size_t globalsize[2] = { I1wx.cols, I1wx.rows };
    285 
    286     ocl::Kernel kernel;
    287     if (!kernel.create("estimateUKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, ""))
    288         return false;
    289 
    290     int idxArg = 0;
    291     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I1wx)); //const float* I1wx
    292     idxArg = kernel.set(idxArg, (int)(I1wx.cols)); //int I1wx_col
    293     idxArg = kernel.set(idxArg, (int)(I1wx.rows)); //int I1wx_row
    294     idxArg = kernel.set(idxArg, (int)(I1wx.step/I1wx.elemSize())); //int I1wx_step
    295     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I1wy)); //const float* I1wy
    296     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(grad)); //const float* grad
    297     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(rho_c)); //const float* rho_c
    298     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p11)); //const float* p11
    299     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p12)); //const float* p12
    300     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p21)); //const float* p21
    301     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p22)); //const float* p22
    302     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(u1)); //float* u1
    303     idxArg = kernel.set(idxArg, (int)(u1.step / u1.elemSize())); //int u1_step
    304     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(u2)); //float* u2
    305     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(error)); //float* error
    306     idxArg = kernel.set(idxArg, (float)l_t); //float l_t
    307     idxArg = kernel.set(idxArg, (float)theta); //float theta
    308     idxArg = kernel.set(idxArg, (int)(u2.step / u2.elemSize()));//int u2_step
    309     int u1_offset_x = (int)(u1.offset % u1.step);
    310     u1_offset_x = (int) (u1_offset_x  / u1.elemSize());
    311     idxArg = kernel.set(idxArg, (int)u1_offset_x); //int u1_offset_x
    312     idxArg = kernel.set(idxArg, (int)(u1.offset/u1.step)); //int u1_offset_y
    313     int u2_offset_x = (int)(u2.offset % u2.step);
    314     u2_offset_x = (int)(u2_offset_x / u2.elemSize());
    315     idxArg = kernel.set(idxArg, (int)u2_offset_x ); //int u2_offset_x
    316     idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y
    317     idxArg = kernel.set(idxArg, (char)calc_error);    //char calc_error
    318 
    319     return kernel.run(2, globalsize, NULL, false);
    320 }
    321 
    322 bool cv_ocl_tvl1flow::estimateDualVariables(UMat &u1, UMat &u2,
    323     UMat &p11, UMat &p12, UMat &p21, UMat &p22, float taut)
    324 {
    325     size_t globalsize[2] = { u1.cols, u1.rows };
    326 
    327     ocl::Kernel kernel;
    328     if (!kernel.create("estimateDualVariablesKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, ""))
    329         return false;
    330 
    331     int idxArg = 0;
    332     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u1));// const float* u1
    333     idxArg = kernel.set(idxArg, (int)(u1.cols)); //int u1_col
    334     idxArg = kernel.set(idxArg, (int)(u1.rows)); //int u1_row
    335     idxArg = kernel.set(idxArg, (int)(u1.step/u1.elemSize())); //int u1_step
    336     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u2)); // const float* u2
    337     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p11)); // float* p11
    338     idxArg = kernel.set(idxArg, (int)(p11.step/p11.elemSize())); //int p11_step
    339     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p12)); // float* p12
    340     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p21)); // float* p21
    341     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p22)); // float* p22
    342     idxArg = kernel.set(idxArg, (float)(taut));    //float taut
    343     idxArg = kernel.set(idxArg, (int)(u2.step/u2.elemSize())); //int u2_step
    344     int u1_offset_x = (int)(u1.offset % u1.step);
    345     u1_offset_x = (int)(u1_offset_x / u1.elemSize());
    346     idxArg = kernel.set(idxArg, u1_offset_x); //int u1_offset_x
    347     idxArg = kernel.set(idxArg, (int)(u1.offset / u1.step)); //int u1_offset_y
    348     int u2_offset_x = (int)(u2.offset % u2.step);
    349     u2_offset_x = (int)(u2_offset_x / u2.elemSize());
    350     idxArg = kernel.set(idxArg, u2_offset_x); //int u2_offset_x
    351     idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y
    352 
    353     return kernel.run(2, globalsize, NULL, false);
    354 
    355 }
    356 
    357 OpticalFlowDual_TVL1::OpticalFlowDual_TVL1()
    358 {
    359     tau            = 0.25;
    360     lambda         = 0.15;
    361     theta          = 0.3;
    362     nscales        = 5;
    363     warps          = 5;
    364     epsilon        = 0.01;
    365     gamma          = 0.;
    366     innerIterations = 30;
    367     outerIterations = 10;
    368     useInitialFlow = false;
    369     medianFiltering = 5;
    370     scaleStep      = 0.8;
    371 }
    372 
    373 void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray _flow)
    374 {
    375     CV_OCL_RUN(_flow.isUMat() &&
    376                ocl::Image2D::isFormatSupported(CV_32F, 1, false),
    377                calc_ocl(_I0, _I1, _flow))
    378 
    379     Mat I0 = _I0.getMat();
    380     Mat I1 = _I1.getMat();
    381 
    382     CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 );
    383     CV_Assert( I0.size() == I1.size() );
    384     CV_Assert( I0.type() == I1.type() );
    385     CV_Assert( !useInitialFlow || (_flow.size() == I0.size() && _flow.type() == CV_32FC2) );
    386     CV_Assert( nscales > 0 );
    387     bool use_gamma = gamma != 0;
    388     // allocate memory for the pyramid structure
    389     dm.I0s.resize(nscales);
    390     dm.I1s.resize(nscales);
    391     dm.u1s.resize(nscales);
    392     dm.u2s.resize(nscales);
    393     dm.u3s.resize(nscales);
    394 
    395     I0.convertTo(dm.I0s[0], dm.I0s[0].depth(), I0.depth() == CV_8U ? 1.0 : 255.0);
    396     I1.convertTo(dm.I1s[0], dm.I1s[0].depth(), I1.depth() == CV_8U ? 1.0 : 255.0);
    397 
    398     dm.u1s[0].create(I0.size());
    399     dm.u2s[0].create(I0.size());
    400     if (use_gamma) dm.u3s[0].create(I0.size());
    401 
    402     if (useInitialFlow)
    403     {
    404         Mat_<float> mv[] = { dm.u1s[0], dm.u2s[0] };
    405         split(_flow.getMat(), mv);
    406     }
    407 
    408     dm.I1x_buf.create(I0.size());
    409     dm.I1y_buf.create(I0.size());
    410 
    411     dm.flowMap1_buf.create(I0.size());
    412     dm.flowMap2_buf.create(I0.size());
    413 
    414     dm.I1w_buf.create(I0.size());
    415     dm.I1wx_buf.create(I0.size());
    416     dm.I1wy_buf.create(I0.size());
    417 
    418     dm.grad_buf.create(I0.size());
    419     dm.rho_c_buf.create(I0.size());
    420 
    421     dm.v1_buf.create(I0.size());
    422     dm.v2_buf.create(I0.size());
    423     dm.v3_buf.create(I0.size());
    424 
    425     dm.p11_buf.create(I0.size());
    426     dm.p12_buf.create(I0.size());
    427     dm.p21_buf.create(I0.size());
    428     dm.p22_buf.create(I0.size());
    429     dm.p31_buf.create(I0.size());
    430     dm.p32_buf.create(I0.size());
    431 
    432     dm.div_p1_buf.create(I0.size());
    433     dm.div_p2_buf.create(I0.size());
    434     dm.div_p3_buf.create(I0.size());
    435 
    436     dm.u1x_buf.create(I0.size());
    437     dm.u1y_buf.create(I0.size());
    438     dm.u2x_buf.create(I0.size());
    439     dm.u2y_buf.create(I0.size());
    440     dm.u3x_buf.create(I0.size());
    441     dm.u3y_buf.create(I0.size());
    442 
    443     // create the scales
    444     for (int s = 1; s < nscales; ++s)
    445     {
    446         resize(dm.I0s[s - 1], dm.I0s[s], Size(), scaleStep, scaleStep);
    447         resize(dm.I1s[s - 1], dm.I1s[s], Size(), scaleStep, scaleStep);
    448 
    449         if (dm.I0s[s].cols < 16 || dm.I0s[s].rows < 16)
    450         {
    451             nscales = s;
    452             break;
    453         }
    454 
    455         if (useInitialFlow)
    456         {
    457             resize(dm.u1s[s - 1], dm.u1s[s], Size(), scaleStep, scaleStep);
    458             resize(dm.u2s[s - 1], dm.u2s[s], Size(), scaleStep, scaleStep);
    459 
    460             multiply(dm.u1s[s], Scalar::all(scaleStep), dm.u1s[s]);
    461             multiply(dm.u2s[s], Scalar::all(scaleStep), dm.u2s[s]);
    462         }
    463         else
    464         {
    465             dm.u1s[s].create(dm.I0s[s].size());
    466             dm.u2s[s].create(dm.I0s[s].size());
    467         }
    468         if (use_gamma) dm.u3s[s].create(dm.I0s[s].size());
    469     }
    470     if (!useInitialFlow)
    471     {
    472         dm.u1s[nscales - 1].setTo(Scalar::all(0));
    473         dm.u2s[nscales - 1].setTo(Scalar::all(0));
    474     }
    475     if (use_gamma) dm.u3s[nscales - 1].setTo(Scalar::all(0));
    476     // pyramidal structure for computing the optical flow
    477     for (int s = nscales - 1; s >= 0; --s)
    478     {
    479         // compute the optical flow at the current scale
    480         procOneScale(dm.I0s[s], dm.I1s[s], dm.u1s[s], dm.u2s[s], dm.u3s[s]);
    481 
    482         // if this was the last scale, finish now
    483         if (s == 0)
    484             break;
    485 
    486         // otherwise, upsample the optical flow
    487 
    488         // zoom the optical flow for the next finer scale
    489         resize(dm.u1s[s], dm.u1s[s - 1], dm.I0s[s - 1].size());
    490         resize(dm.u2s[s], dm.u2s[s - 1], dm.I0s[s - 1].size());
    491         if (use_gamma) resize(dm.u3s[s], dm.u3s[s - 1], dm.I0s[s - 1].size());
    492 
    493         // scale the optical flow with the appropriate zoom factor (don't scale u3!)
    494         multiply(dm.u1s[s - 1], Scalar::all(1 / scaleStep), dm.u1s[s - 1]);
    495         multiply(dm.u2s[s - 1], Scalar::all(1 / scaleStep), dm.u2s[s - 1]);
    496     }
    497 
    498     Mat uxy[] = { dm.u1s[0], dm.u2s[0] };
    499     merge(uxy, 2, _flow);
    500 }
    501 
    502 bool OpticalFlowDual_TVL1::calc_ocl(InputArray _I0, InputArray _I1, InputOutputArray _flow)
    503 {
    504     UMat I0 = _I0.getUMat();
    505     UMat I1 = _I1.getUMat();
    506 
    507     CV_Assert(I0.type() == CV_8UC1 || I0.type() == CV_32FC1);
    508     CV_Assert(I0.size() == I1.size());
    509     CV_Assert(I0.type() == I1.type());
    510     CV_Assert(!useInitialFlow || (_flow.size() == I0.size() && _flow.type() == CV_32FC2));
    511     CV_Assert(nscales > 0);
    512 
    513     // allocate memory for the pyramid structure
    514     dum.I0s.resize(nscales);
    515     dum.I1s.resize(nscales);
    516     dum.u1s.resize(nscales);
    517     dum.u2s.resize(nscales);
    518     //I0s_step == I1s_step
    519     double alpha = I0.depth() == CV_8U ? 1.0 : 255.0;
    520 
    521     I0.convertTo(dum.I0s[0], CV_32F, alpha);
    522     I1.convertTo(dum.I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0);
    523 
    524     dum.u1s[0].create(I0.size(), CV_32FC1);
    525     dum.u2s[0].create(I0.size(), CV_32FC1);
    526 
    527     if (useInitialFlow)
    528     {
    529         std::vector<UMat> umv;
    530         umv.push_back(dum.u1s[0]);
    531         umv.push_back(dum.u2s[0]);
    532         cv::split(_flow,umv);
    533     }
    534 
    535     dum.I1x_buf.create(I0.size(), CV_32FC1);
    536     dum.I1y_buf.create(I0.size(), CV_32FC1);
    537 
    538     dum.I1w_buf.create(I0.size(), CV_32FC1);
    539     dum.I1wx_buf.create(I0.size(), CV_32FC1);
    540     dum.I1wy_buf.create(I0.size(), CV_32FC1);
    541 
    542     dum.grad_buf.create(I0.size(), CV_32FC1);
    543     dum.rho_c_buf.create(I0.size(), CV_32FC1);
    544 
    545     dum.p11_buf.create(I0.size(), CV_32FC1);
    546     dum.p12_buf.create(I0.size(), CV_32FC1);
    547     dum.p21_buf.create(I0.size(), CV_32FC1);
    548     dum.p22_buf.create(I0.size(), CV_32FC1);
    549 
    550     dum.diff_buf.create(I0.size(), CV_32FC1);
    551 
    552     // create the scales
    553     for (int s = 1; s < nscales; ++s)
    554     {
    555         resize(dum.I0s[s - 1], dum.I0s[s], Size(), scaleStep, scaleStep);
    556         resize(dum.I1s[s - 1], dum.I1s[s], Size(), scaleStep, scaleStep);
    557 
    558         if (dum.I0s[s].cols < 16 || dum.I0s[s].rows < 16)
    559         {
    560             nscales = s;
    561             break;
    562         }
    563 
    564         if (useInitialFlow)
    565         {
    566             resize(dum.u1s[s - 1], dum.u1s[s], Size(), scaleStep, scaleStep);
    567             resize(dum.u2s[s - 1], dum.u2s[s], Size(), scaleStep, scaleStep);
    568 
    569             //scale by scale factor
    570             multiply(dum.u1s[s], Scalar::all(scaleStep), dum.u1s[s]);
    571             multiply(dum.u2s[s], Scalar::all(scaleStep), dum.u2s[s]);
    572         }
    573     }
    574 
    575     // pyramidal structure for computing the optical flow
    576     for (int s = nscales - 1; s >= 0; --s)
    577     {
    578         // compute the optical flow at the current scale
    579         if (!OpticalFlowDual_TVL1::procOneScale_ocl(dum.I0s[s], dum.I1s[s], dum.u1s[s], dum.u2s[s]))
    580             return false;
    581 
    582         // if this was the last scale, finish now
    583         if (s == 0)
    584             break;
    585 
    586         // zoom the optical flow for the next finer scale
    587         resize(dum.u1s[s], dum.u1s[s - 1], dum.I0s[s - 1].size());
    588         resize(dum.u2s[s], dum.u2s[s - 1], dum.I0s[s - 1].size());
    589 
    590         // scale the optical flow with the appropriate zoom factor
    591         multiply(dum.u1s[s - 1], Scalar::all(1 / scaleStep), dum.u1s[s - 1]);
    592         multiply(dum.u2s[s - 1], Scalar::all(1 / scaleStep), dum.u2s[s - 1]);
    593     }
    594 
    595     std::vector<UMat> uxy;
    596     uxy.push_back(dum.u1s[0]);
    597     uxy.push_back(dum.u2s[0]);
    598     merge(uxy, _flow);
    599     return true;
    600 }
    601 
    602 ////////////////////////////////////////////////////////////
    603 // buildFlowMap
    604 
    605 struct BuildFlowMapBody : ParallelLoopBody
    606 {
    607     void operator() (const Range& range) const;
    608 
    609     Mat_<float> u1;
    610     Mat_<float> u2;
    611     mutable Mat_<float> map1;
    612     mutable Mat_<float> map2;
    613 };
    614 
    615 void BuildFlowMapBody::operator() (const Range& range) const
    616 {
    617     for (int y = range.start; y < range.end; ++y)
    618     {
    619         const float* u1Row = u1[y];
    620         const float* u2Row = u2[y];
    621 
    622         float* map1Row = map1[y];
    623         float* map2Row = map2[y];
    624 
    625         for (int x = 0; x < u1.cols; ++x)
    626         {
    627             map1Row[x] = x + u1Row[x];
    628             map2Row[x] = y + u2Row[x];
    629         }
    630     }
    631 }
    632 
    633 void buildFlowMap(const Mat_<float>& u1, const Mat_<float>& u2, Mat_<float>& map1, Mat_<float>& map2)
    634 {
    635     CV_DbgAssert( u2.size() == u1.size() );
    636     CV_DbgAssert( map1.size() == u1.size() );
    637     CV_DbgAssert( map2.size() == u1.size() );
    638 
    639     BuildFlowMapBody body;
    640 
    641     body.u1 = u1;
    642     body.u2 = u2;
    643     body.map1 = map1;
    644     body.map2 = map2;
    645 
    646     parallel_for_(Range(0, u1.rows), body);
    647 }
    648 
    649 ////////////////////////////////////////////////////////////
    650 // centeredGradient
    651 
    652 struct CenteredGradientBody : ParallelLoopBody
    653 {
    654     void operator() (const Range& range) const;
    655 
    656     Mat_<float> src;
    657     mutable Mat_<float> dx;
    658     mutable Mat_<float> dy;
    659 };
    660 
    661 void CenteredGradientBody::operator() (const Range& range) const
    662 {
    663     const int last_col = src.cols - 1;
    664 
    665     for (int y = range.start; y < range.end; ++y)
    666     {
    667         const float* srcPrevRow = src[y - 1];
    668         const float* srcCurRow = src[y];
    669         const float* srcNextRow = src[y + 1];
    670 
    671         float* dxRow = dx[y];
    672         float* dyRow = dy[y];
    673 
    674         for (int x = 1; x < last_col; ++x)
    675         {
    676             dxRow[x] = 0.5f * (srcCurRow[x + 1] - srcCurRow[x - 1]);
    677             dyRow[x] = 0.5f * (srcNextRow[x] - srcPrevRow[x]);
    678         }
    679     }
    680 }
    681 
    682 void centeredGradient(const Mat_<float>& src, Mat_<float>& dx, Mat_<float>& dy)
    683 {
    684     CV_DbgAssert( src.rows > 2 && src.cols > 2 );
    685     CV_DbgAssert( dx.size() == src.size() );
    686     CV_DbgAssert( dy.size() == src.size() );
    687 
    688     const int last_row = src.rows - 1;
    689     const int last_col = src.cols - 1;
    690 
    691     // compute the gradient on the center body of the image
    692     {
    693         CenteredGradientBody body;
    694 
    695         body.src = src;
    696         body.dx = dx;
    697         body.dy = dy;
    698 
    699         parallel_for_(Range(1, last_row), body);
    700     }
    701 
    702     // compute the gradient on the first and last rows
    703     for (int x = 1; x < last_col; ++x)
    704     {
    705         dx(0, x) = 0.5f * (src(0, x + 1) - src(0, x - 1));
    706         dy(0, x) = 0.5f * (src(1, x) - src(0, x));
    707 
    708         dx(last_row, x) = 0.5f * (src(last_row, x + 1) - src(last_row, x - 1));
    709         dy(last_row, x) = 0.5f * (src(last_row, x) - src(last_row - 1, x));
    710     }
    711 
    712     // compute the gradient on the first and last columns
    713     for (int y = 1; y < last_row; ++y)
    714     {
    715         dx(y, 0) = 0.5f * (src(y, 1) - src(y, 0));
    716         dy(y, 0) = 0.5f * (src(y + 1, 0) - src(y - 1, 0));
    717 
    718         dx(y, last_col) = 0.5f * (src(y, last_col) - src(y, last_col - 1));
    719         dy(y, last_col) = 0.5f * (src(y + 1, last_col) - src(y - 1, last_col));
    720     }
    721 
    722     // compute the gradient at the four corners
    723     dx(0, 0) = 0.5f * (src(0, 1) - src(0, 0));
    724     dy(0, 0) = 0.5f * (src(1, 0) - src(0, 0));
    725 
    726     dx(0, last_col) = 0.5f * (src(0, last_col) - src(0, last_col - 1));
    727     dy(0, last_col) = 0.5f * (src(1, last_col) - src(0, last_col));
    728 
    729     dx(last_row, 0) = 0.5f * (src(last_row, 1) - src(last_row, 0));
    730     dy(last_row, 0) = 0.5f * (src(last_row, 0) - src(last_row - 1, 0));
    731 
    732     dx(last_row, last_col) = 0.5f * (src(last_row, last_col) - src(last_row, last_col - 1));
    733     dy(last_row, last_col) = 0.5f * (src(last_row, last_col) - src(last_row - 1, last_col));
    734 }
    735 
    736 ////////////////////////////////////////////////////////////
    737 // forwardGradient
    738 
    739 struct ForwardGradientBody : ParallelLoopBody
    740 {
    741     void operator() (const Range& range) const;
    742 
    743     Mat_<float> src;
    744     mutable Mat_<float> dx;
    745     mutable Mat_<float> dy;
    746 };
    747 
    748 void ForwardGradientBody::operator() (const Range& range) const
    749 {
    750     const int last_col = src.cols - 1;
    751 
    752     for (int y = range.start; y < range.end; ++y)
    753     {
    754         const float* srcCurRow = src[y];
    755         const float* srcNextRow = src[y + 1];
    756 
    757         float* dxRow = dx[y];
    758         float* dyRow = dy[y];
    759 
    760         for (int x = 0; x < last_col; ++x)
    761         {
    762             dxRow[x] = srcCurRow[x + 1] - srcCurRow[x];
    763             dyRow[x] = srcNextRow[x] - srcCurRow[x];
    764         }
    765     }
    766 }
    767 
    768 void forwardGradient(const Mat_<float>& src, Mat_<float>& dx, Mat_<float>& dy)
    769 {
    770     CV_DbgAssert( src.rows > 2 && src.cols > 2 );
    771     CV_DbgAssert( dx.size() == src.size() );
    772     CV_DbgAssert( dy.size() == src.size() );
    773 
    774     const int last_row = src.rows - 1;
    775     const int last_col = src.cols - 1;
    776 
    777     // compute the gradient on the central body of the image
    778     {
    779         ForwardGradientBody body;
    780 
    781         body.src = src;
    782         body.dx = dx;
    783         body.dy = dy;
    784 
    785         parallel_for_(Range(0, last_row), body);
    786     }
    787 
    788     // compute the gradient on the last row
    789     for (int x = 0; x < last_col; ++x)
    790     {
    791         dx(last_row, x) = src(last_row, x + 1) - src(last_row, x);
    792         dy(last_row, x) = 0.0f;
    793     }
    794 
    795     // compute the gradient on the last column
    796     for (int y = 0; y < last_row; ++y)
    797     {
    798         dx(y, last_col) = 0.0f;
    799         dy(y, last_col) = src(y + 1, last_col) - src(y, last_col);
    800     }
    801 
    802     dx(last_row, last_col) = 0.0f;
    803     dy(last_row, last_col) = 0.0f;
    804 }
    805 
    806 ////////////////////////////////////////////////////////////
    807 // divergence
    808 
    809 struct DivergenceBody : ParallelLoopBody
    810 {
    811     void operator() (const Range& range) const;
    812 
    813     Mat_<float> v1;
    814     Mat_<float> v2;
    815     mutable Mat_<float> div;
    816 };
    817 
    818 void DivergenceBody::operator() (const Range& range) const
    819 {
    820     for (int y = range.start; y < range.end; ++y)
    821     {
    822         const float* v1Row = v1[y];
    823         const float* v2PrevRow = v2[y - 1];
    824         const float* v2CurRow = v2[y];
    825 
    826         float* divRow = div[y];
    827 
    828         for(int x = 1; x < v1.cols; ++x)
    829         {
    830             const float v1x = v1Row[x] - v1Row[x - 1];
    831             const float v2y = v2CurRow[x] - v2PrevRow[x];
    832 
    833             divRow[x] = v1x + v2y;
    834         }
    835     }
    836 }
    837 
    838 void divergence(const Mat_<float>& v1, const Mat_<float>& v2, Mat_<float>& div)
    839 {
    840     CV_DbgAssert( v1.rows > 2 && v1.cols > 2 );
    841     CV_DbgAssert( v2.size() == v1.size() );
    842     CV_DbgAssert( div.size() == v1.size() );
    843 
    844     {
    845         DivergenceBody body;
    846 
    847         body.v1 = v1;
    848         body.v2 = v2;
    849         body.div = div;
    850 
    851         parallel_for_(Range(1, v1.rows), body);
    852     }
    853 
    854     // compute the divergence on the first row
    855     for(int x = 1; x < v1.cols; ++x)
    856         div(0, x) = v1(0, x) - v1(0, x - 1) + v2(0, x);
    857 
    858     // compute the divergence on the first column
    859     for (int y = 1; y < v1.rows; ++y)
    860         div(y, 0) = v1(y, 0) + v2(y, 0) - v2(y - 1, 0);
    861 
    862     div(0, 0) = v1(0, 0) + v2(0, 0);
    863 }
    864 
    865 ////////////////////////////////////////////////////////////
    866 // calcGradRho
    867 
    868 struct CalcGradRhoBody : ParallelLoopBody
    869 {
    870     void operator() (const Range& range) const;
    871 
    872     Mat_<float> I0;
    873     Mat_<float> I1w;
    874     Mat_<float> I1wx;
    875     Mat_<float> I1wy;
    876     Mat_<float> u1;
    877     Mat_<float> u2;
    878     mutable Mat_<float> grad;
    879     mutable Mat_<float> rho_c;
    880 };
    881 
    882 void CalcGradRhoBody::operator() (const Range& range) const
    883 {
    884     for (int y = range.start; y < range.end; ++y)
    885     {
    886         const float* I0Row = I0[y];
    887         const float* I1wRow = I1w[y];
    888         const float* I1wxRow = I1wx[y];
    889         const float* I1wyRow = I1wy[y];
    890         const float* u1Row = u1[y];
    891         const float* u2Row = u2[y];
    892 
    893         float* gradRow = grad[y];
    894         float* rhoRow = rho_c[y];
    895 
    896         for (int x = 0; x < I0.cols; ++x)
    897         {
    898             const float Ix2 = I1wxRow[x] * I1wxRow[x];
    899             const float Iy2 = I1wyRow[x] * I1wyRow[x];
    900 
    901             // store the |Grad(I1)|^2
    902             gradRow[x] = Ix2 + Iy2;
    903 
    904             // compute the constant part of the rho function
    905             rhoRow[x] = (I1wRow[x] - I1wxRow[x] * u1Row[x] - I1wyRow[x] * u2Row[x] - I0Row[x]);
    906         }
    907     }
    908 }
    909 
    910 void calcGradRho(const Mat_<float>& I0, const Mat_<float>& I1w, const Mat_<float>& I1wx, const Mat_<float>& I1wy, const Mat_<float>& u1, const Mat_<float>& u2,
    911     Mat_<float>& grad, Mat_<float>& rho_c)
    912 {
    913     CV_DbgAssert( I1w.size() == I0.size() );
    914     CV_DbgAssert( I1wx.size() == I0.size() );
    915     CV_DbgAssert( I1wy.size() == I0.size() );
    916     CV_DbgAssert( u1.size() == I0.size() );
    917     CV_DbgAssert( u2.size() == I0.size() );
    918     CV_DbgAssert( grad.size() == I0.size() );
    919     CV_DbgAssert( rho_c.size() == I0.size() );
    920 
    921     CalcGradRhoBody body;
    922 
    923     body.I0 = I0;
    924     body.I1w = I1w;
    925     body.I1wx = I1wx;
    926     body.I1wy = I1wy;
    927     body.u1 = u1;
    928     body.u2 = u2;
    929     body.grad = grad;
    930     body.rho_c = rho_c;
    931 
    932     parallel_for_(Range(0, I0.rows), body);
    933 }
    934 
    935 ////////////////////////////////////////////////////////////
    936 // estimateV
    937 
    938 struct EstimateVBody : ParallelLoopBody
    939 {
    940     void operator() (const Range& range) const;
    941 
    942     Mat_<float> I1wx;
    943     Mat_<float> I1wy;
    944     Mat_<float> u1;
    945     Mat_<float> u2;
    946     Mat_<float> u3;
    947     Mat_<float> grad;
    948     Mat_<float> rho_c;
    949     mutable Mat_<float> v1;
    950     mutable Mat_<float> v2;
    951     mutable Mat_<float> v3;
    952     float l_t;
    953     float gamma;
    954 };
    955 
    956 void EstimateVBody::operator() (const Range& range) const
    957 {
    958     bool use_gamma = gamma != 0;
    959     for (int y = range.start; y < range.end; ++y)
    960     {
    961         const float* I1wxRow = I1wx[y];
    962         const float* I1wyRow = I1wy[y];
    963         const float* u1Row = u1[y];
    964         const float* u2Row = u2[y];
    965         const float* u3Row = use_gamma?u3[y]:NULL;
    966         const float* gradRow = grad[y];
    967         const float* rhoRow = rho_c[y];
    968 
    969         float* v1Row = v1[y];
    970         float* v2Row = v2[y];
    971         float* v3Row = use_gamma ? v3[y]:NULL;
    972 
    973         for (int x = 0; x < I1wx.cols; ++x)
    974         {
    975             const float rho = use_gamma ? rhoRow[x] + (I1wxRow[x] * u1Row[x] + I1wyRow[x] * u2Row[x]) + gamma * u3Row[x] :
    976                                           rhoRow[x] + (I1wxRow[x] * u1Row[x] + I1wyRow[x] * u2Row[x]);
    977             float d1 = 0.0f;
    978             float d2 = 0.0f;
    979             float d3 = 0.0f;
    980             if (rho < -l_t * gradRow[x])
    981             {
    982                 d1 = l_t * I1wxRow[x];
    983                 d2 = l_t * I1wyRow[x];
    984                 if (use_gamma) d3 = l_t * gamma;
    985             }
    986             else if (rho > l_t * gradRow[x])
    987             {
    988                 d1 = -l_t * I1wxRow[x];
    989                 d2 = -l_t * I1wyRow[x];
    990                 if (use_gamma) d3 = -l_t * gamma;
    991             }
    992             else if (gradRow[x] > std::numeric_limits<float>::epsilon())
    993             {
    994                 float fi = -rho / gradRow[x];
    995                 d1 = fi * I1wxRow[x];
    996                 d2 = fi * I1wyRow[x];
    997                 if (use_gamma) d3 = fi * gamma;
    998             }
    999 
   1000             v1Row[x] = u1Row[x] + d1;
   1001             v2Row[x] = u2Row[x] + d2;
   1002             if (use_gamma) v3Row[x] = u3Row[x] + d3;
   1003         }
   1004     }
   1005 }
   1006 
   1007 void estimateV(const Mat_<float>& I1wx, const Mat_<float>& I1wy, const Mat_<float>& u1, const Mat_<float>& u2, const Mat_<float>& u3, const Mat_<float>& grad, const Mat_<float>& rho_c,
   1008    Mat_<float>& v1, Mat_<float>& v2, Mat_<float>& v3, float l_t, float gamma)
   1009 {
   1010     CV_DbgAssert( I1wy.size() == I1wx.size() );
   1011     CV_DbgAssert( u1.size() == I1wx.size() );
   1012     CV_DbgAssert( u2.size() == I1wx.size() );
   1013     CV_DbgAssert( grad.size() == I1wx.size() );
   1014     CV_DbgAssert( rho_c.size() == I1wx.size() );
   1015     CV_DbgAssert( v1.size() == I1wx.size() );
   1016     CV_DbgAssert( v2.size() == I1wx.size() );
   1017 
   1018     EstimateVBody body;
   1019     bool use_gamma = gamma != 0;
   1020     body.I1wx = I1wx;
   1021     body.I1wy = I1wy;
   1022     body.u1 = u1;
   1023     body.u2 = u2;
   1024     if (use_gamma) body.u3 = u3;
   1025     body.grad = grad;
   1026     body.rho_c = rho_c;
   1027     body.v1 = v1;
   1028     body.v2 = v2;
   1029     if (use_gamma) body.v3 = v3;
   1030     body.l_t = l_t;
   1031     body.gamma = gamma;
   1032     parallel_for_(Range(0, I1wx.rows), body);
   1033 }
   1034 
   1035 ////////////////////////////////////////////////////////////
   1036 // estimateU
   1037 
   1038 float estimateU(const Mat_<float>& v1, const Mat_<float>& v2, const Mat_<float>& v3,
   1039             const Mat_<float>& div_p1, const Mat_<float>& div_p2, const Mat_<float>& div_p3,
   1040             Mat_<float>& u1, Mat_<float>& u2, Mat_<float>& u3,
   1041             float theta, float gamma)
   1042 {
   1043     CV_DbgAssert( v2.size() == v1.size() );
   1044     CV_DbgAssert( div_p1.size() == v1.size() );
   1045     CV_DbgAssert( div_p2.size() == v1.size() );
   1046     CV_DbgAssert( u1.size() == v1.size() );
   1047     CV_DbgAssert( u2.size() == v1.size() );
   1048 
   1049     float error = 0.0f;
   1050     bool use_gamma = gamma != 0;
   1051     for (int y = 0; y < v1.rows; ++y)
   1052     {
   1053         const float* v1Row = v1[y];
   1054         const float* v2Row = v2[y];
   1055         const float* v3Row = use_gamma?v3[y]:NULL;
   1056         const float* divP1Row = div_p1[y];
   1057         const float* divP2Row = div_p2[y];
   1058         const float* divP3Row = use_gamma?div_p3[y]:NULL;
   1059 
   1060         float* u1Row = u1[y];
   1061         float* u2Row = u2[y];
   1062         float* u3Row = use_gamma?u3[y]:NULL;
   1063 
   1064 
   1065         for (int x = 0; x < v1.cols; ++x)
   1066         {
   1067             const float u1k = u1Row[x];
   1068             const float u2k = u2Row[x];
   1069             const float u3k = use_gamma?u3Row[x]:0;
   1070 
   1071             u1Row[x] = v1Row[x] + theta * divP1Row[x];
   1072             u2Row[x] = v2Row[x] + theta * divP2Row[x];
   1073             if (use_gamma) u3Row[x] = v3Row[x] + theta * divP3Row[x];
   1074             error += use_gamma?(u1Row[x] - u1k) * (u1Row[x] - u1k) + (u2Row[x] - u2k) * (u2Row[x] - u2k) + (u3Row[x] - u3k) * (u3Row[x] - u3k):
   1075                                (u1Row[x] - u1k) * (u1Row[x] - u1k) + (u2Row[x] - u2k) * (u2Row[x] - u2k);
   1076         }
   1077     }
   1078 
   1079     return error;
   1080 }
   1081 
   1082 ////////////////////////////////////////////////////////////
   1083 // estimateDualVariables
   1084 
   1085 struct EstimateDualVariablesBody : ParallelLoopBody
   1086 {
   1087     void operator() (const Range& range) const;
   1088 
   1089     Mat_<float> u1x;
   1090     Mat_<float> u1y;
   1091     Mat_<float> u2x;
   1092     Mat_<float> u2y;
   1093     Mat_<float> u3x;
   1094     Mat_<float> u3y;
   1095     mutable Mat_<float> p11;
   1096     mutable Mat_<float> p12;
   1097     mutable Mat_<float> p21;
   1098     mutable Mat_<float> p22;
   1099     mutable Mat_<float> p31;
   1100     mutable Mat_<float> p32;
   1101     float taut;
   1102     bool use_gamma;
   1103 };
   1104 
   1105 void EstimateDualVariablesBody::operator() (const Range& range) const
   1106 {
   1107     for (int y = range.start; y < range.end; ++y)
   1108     {
   1109         const float* u1xRow = u1x[y];
   1110         const float* u1yRow = u1y[y];
   1111         const float* u2xRow = u2x[y];
   1112         const float* u2yRow = u2y[y];
   1113         const float* u3xRow = u3x[y];
   1114         const float* u3yRow = u3y[y];
   1115 
   1116         float* p11Row = p11[y];
   1117         float* p12Row = p12[y];
   1118         float* p21Row = p21[y];
   1119         float* p22Row = p22[y];
   1120         float* p31Row = p31[y];
   1121         float* p32Row = p32[y];
   1122 
   1123         for (int x = 0; x < u1x.cols; ++x)
   1124         {
   1125             const float g1 = static_cast<float>(hypot(u1xRow[x], u1yRow[x]));
   1126             const float g2 = static_cast<float>(hypot(u2xRow[x], u2yRow[x]));
   1127             const float g3 = static_cast<float>(hypot(u3xRow[x], u3yRow[x]));
   1128 
   1129             const float ng1  = 1.0f + taut * g1;
   1130             const float ng2 =  1.0f + taut * g2;
   1131             const float ng3 = 1.0f + taut * g3;
   1132 
   1133             p11Row[x] = (p11Row[x] + taut * u1xRow[x]) / ng1;
   1134             p12Row[x] = (p12Row[x] + taut * u1yRow[x]) / ng1;
   1135             p21Row[x] = (p21Row[x] + taut * u2xRow[x]) / ng2;
   1136             p22Row[x] = (p22Row[x] + taut * u2yRow[x]) / ng2;
   1137             if (use_gamma) p31Row[x] = (p31Row[x] + taut * u3xRow[x]) / ng3;
   1138             if (use_gamma) p32Row[x] = (p32Row[x] + taut * u3yRow[x]) / ng3;
   1139         }
   1140     }
   1141 }
   1142 
   1143 void estimateDualVariables(const Mat_<float>& u1x, const Mat_<float>& u1y,
   1144                      const Mat_<float>& u2x, const Mat_<float>& u2y,
   1145                      const Mat_<float>& u3x, const Mat_<float>& u3y,
   1146                            Mat_<float>& p11, Mat_<float>& p12,
   1147                      Mat_<float>& p21, Mat_<float>& p22,
   1148                      Mat_<float>& p31, Mat_<float>& p32,
   1149                      float taut, bool use_gamma)
   1150 {
   1151     CV_DbgAssert( u1y.size() == u1x.size() );
   1152     CV_DbgAssert( u2x.size() == u1x.size() );
   1153     CV_DbgAssert( u3x.size() == u1x.size() );
   1154     CV_DbgAssert( u2y.size() == u1x.size() );
   1155     CV_DbgAssert( u3y.size() == u1x.size() );
   1156     CV_DbgAssert( p11.size() == u1x.size() );
   1157     CV_DbgAssert( p12.size() == u1x.size() );
   1158     CV_DbgAssert( p21.size() == u1x.size() );
   1159     CV_DbgAssert( p22.size() == u1x.size() );
   1160     CV_DbgAssert( p31.size() == u1x.size() );
   1161     CV_DbgAssert( p32.size() == u1x.size() );
   1162 
   1163     EstimateDualVariablesBody body;
   1164 
   1165     body.u1x = u1x;
   1166     body.u1y = u1y;
   1167     body.u2x = u2x;
   1168     body.u2y = u2y;
   1169     body.u3x = u3x;
   1170     body.u3y = u3y;
   1171     body.p11 = p11;
   1172     body.p12 = p12;
   1173     body.p21 = p21;
   1174     body.p22 = p22;
   1175     body.p31 = p31;
   1176     body.p32 = p32;
   1177     body.taut = taut;
   1178     body.use_gamma = use_gamma;
   1179 
   1180     parallel_for_(Range(0, u1x.rows), body);
   1181 }
   1182 
   1183 bool OpticalFlowDual_TVL1::procOneScale_ocl(const UMat& I0, const UMat& I1, UMat& u1, UMat& u2)
   1184 {
   1185     using namespace cv_ocl_tvl1flow;
   1186 
   1187     const double scaledEpsilon = epsilon * epsilon * I0.size().area();
   1188 
   1189     CV_DbgAssert(I1.size() == I0.size());
   1190     CV_DbgAssert(I1.type() == I0.type());
   1191     CV_DbgAssert(u1.empty() || u1.size() == I0.size());
   1192     CV_DbgAssert(u2.size() == u1.size());
   1193 
   1194     if (u1.empty())
   1195     {
   1196         u1.create(I0.size(), CV_32FC1);
   1197         u1.setTo(Scalar::all(0));
   1198 
   1199         u2.create(I0.size(), CV_32FC1);
   1200         u2.setTo(Scalar::all(0));
   1201     }
   1202 
   1203     UMat I1x = dum.I1x_buf(Rect(0, 0, I0.cols, I0.rows));
   1204     UMat I1y = dum.I1y_buf(Rect(0, 0, I0.cols, I0.rows));
   1205 
   1206     if (!centeredGradient(I1, I1x, I1y))
   1207         return false;
   1208 
   1209     UMat I1w = dum.I1w_buf(Rect(0, 0, I0.cols, I0.rows));
   1210     UMat I1wx = dum.I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
   1211     UMat I1wy = dum.I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
   1212 
   1213     UMat grad = dum.grad_buf(Rect(0, 0, I0.cols, I0.rows));
   1214     UMat rho_c = dum.rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
   1215 
   1216     UMat p11 = dum.p11_buf(Rect(0, 0, I0.cols, I0.rows));
   1217     UMat p12 = dum.p12_buf(Rect(0, 0, I0.cols, I0.rows));
   1218     UMat p21 = dum.p21_buf(Rect(0, 0, I0.cols, I0.rows));
   1219     UMat p22 = dum.p22_buf(Rect(0, 0, I0.cols, I0.rows));
   1220     p11.setTo(Scalar::all(0));
   1221     p12.setTo(Scalar::all(0));
   1222     p21.setTo(Scalar::all(0));
   1223     p22.setTo(Scalar::all(0));
   1224 
   1225     UMat diff = dum.diff_buf(Rect(0, 0, I0.cols, I0.rows));
   1226 
   1227     const float l_t = static_cast<float>(lambda * theta);
   1228     const float taut = static_cast<float>(tau / theta);
   1229     int n;
   1230 
   1231     for (int warpings = 0; warpings < warps; ++warpings)
   1232     {
   1233         if (!warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c))
   1234             return false;
   1235 
   1236         double error = std::numeric_limits<double>::max();
   1237         double prev_error = 0;
   1238 
   1239         for (int n_outer = 0; error > scaledEpsilon && n_outer < outerIterations; ++n_outer)
   1240         {
   1241             if (medianFiltering > 1) {
   1242                 cv::medianBlur(u1, u1, medianFiltering);
   1243                 cv::medianBlur(u2, u2, medianFiltering);
   1244             }
   1245             for (int n_inner = 0; error > scaledEpsilon && n_inner < innerIterations; ++n_inner)
   1246             {
   1247                 // some tweaks to make sum operation less frequently
   1248                 n = n_inner + n_outer*innerIterations;
   1249                 char calc_error = (n & 0x1) && (prev_error < scaledEpsilon);
   1250                 if (!estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22,
   1251                     u1, u2, diff, l_t, static_cast<float>(theta), calc_error))
   1252                     return false;
   1253                 if (calc_error)
   1254                 {
   1255                     error = cv::sum(diff)[0];
   1256                     prev_error = error;
   1257                 }
   1258                 else
   1259                 {
   1260                     error = std::numeric_limits<double>::max();
   1261                     prev_error -= scaledEpsilon;
   1262                 }
   1263                 if (!estimateDualVariables(u1, u2, p11, p12, p21, p22, taut))
   1264                     return false;
   1265             }
   1266         }
   1267     }
   1268     return true;
   1269 }
   1270 
   1271 void OpticalFlowDual_TVL1::procOneScale(const Mat_<float>& I0, const Mat_<float>& I1, Mat_<float>& u1, Mat_<float>& u2, Mat_<float>& u3)
   1272 {
   1273     const float scaledEpsilon = static_cast<float>(epsilon * epsilon * I0.size().area());
   1274 
   1275     CV_DbgAssert( I1.size() == I0.size() );
   1276     CV_DbgAssert( I1.type() == I0.type() );
   1277     CV_DbgAssert( u1.size() == I0.size() );
   1278     CV_DbgAssert( u2.size() == u1.size() );
   1279 
   1280     Mat_<float> I1x = dm.I1x_buf(Rect(0, 0, I0.cols, I0.rows));
   1281     Mat_<float> I1y = dm.I1y_buf(Rect(0, 0, I0.cols, I0.rows));
   1282     centeredGradient(I1, I1x, I1y);
   1283 
   1284     Mat_<float> flowMap1 = dm.flowMap1_buf(Rect(0, 0, I0.cols, I0.rows));
   1285     Mat_<float> flowMap2 = dm.flowMap2_buf(Rect(0, 0, I0.cols, I0.rows));
   1286 
   1287     Mat_<float> I1w = dm.I1w_buf(Rect(0, 0, I0.cols, I0.rows));
   1288     Mat_<float> I1wx = dm.I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
   1289     Mat_<float> I1wy = dm.I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
   1290 
   1291     Mat_<float> grad = dm.grad_buf(Rect(0, 0, I0.cols, I0.rows));
   1292     Mat_<float> rho_c = dm.rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
   1293 
   1294     Mat_<float> v1 = dm.v1_buf(Rect(0, 0, I0.cols, I0.rows));
   1295     Mat_<float> v2 = dm.v2_buf(Rect(0, 0, I0.cols, I0.rows));
   1296     Mat_<float> v3 = dm.v3_buf(Rect(0, 0, I0.cols, I0.rows));
   1297 
   1298     Mat_<float> p11 = dm.p11_buf(Rect(0, 0, I0.cols, I0.rows));
   1299     Mat_<float> p12 = dm.p12_buf(Rect(0, 0, I0.cols, I0.rows));
   1300     Mat_<float> p21 = dm.p21_buf(Rect(0, 0, I0.cols, I0.rows));
   1301     Mat_<float> p22 = dm.p22_buf(Rect(0, 0, I0.cols, I0.rows));
   1302     Mat_<float> p31 = dm.p31_buf(Rect(0, 0, I0.cols, I0.rows));
   1303     Mat_<float> p32 = dm.p32_buf(Rect(0, 0, I0.cols, I0.rows));
   1304     p11.setTo(Scalar::all(0));
   1305     p12.setTo(Scalar::all(0));
   1306     p21.setTo(Scalar::all(0));
   1307     p22.setTo(Scalar::all(0));
   1308     bool use_gamma = gamma != 0.;
   1309     if (use_gamma) p31.setTo(Scalar::all(0));
   1310     if (use_gamma) p32.setTo(Scalar::all(0));
   1311 
   1312     Mat_<float> div_p1 = dm.div_p1_buf(Rect(0, 0, I0.cols, I0.rows));
   1313     Mat_<float> div_p2 = dm.div_p2_buf(Rect(0, 0, I0.cols, I0.rows));
   1314     Mat_<float> div_p3 = dm.div_p3_buf(Rect(0, 0, I0.cols, I0.rows));
   1315 
   1316     Mat_<float> u1x = dm.u1x_buf(Rect(0, 0, I0.cols, I0.rows));
   1317     Mat_<float> u1y = dm.u1y_buf(Rect(0, 0, I0.cols, I0.rows));
   1318     Mat_<float> u2x = dm.u2x_buf(Rect(0, 0, I0.cols, I0.rows));
   1319     Mat_<float> u2y = dm.u2y_buf(Rect(0, 0, I0.cols, I0.rows));
   1320     Mat_<float> u3x = dm.u3x_buf(Rect(0, 0, I0.cols, I0.rows));
   1321     Mat_<float> u3y = dm.u3y_buf(Rect(0, 0, I0.cols, I0.rows));
   1322 
   1323     const float l_t = static_cast<float>(lambda * theta);
   1324     const float taut = static_cast<float>(tau / theta);
   1325 
   1326     for (int warpings = 0; warpings < warps; ++warpings)
   1327     {
   1328         // compute the warping of the target image and its derivatives
   1329         buildFlowMap(u1, u2, flowMap1, flowMap2);
   1330         remap(I1, I1w, flowMap1, flowMap2, INTER_CUBIC);
   1331         remap(I1x, I1wx, flowMap1, flowMap2, INTER_CUBIC);
   1332         remap(I1y, I1wy, flowMap1, flowMap2, INTER_CUBIC);
   1333         //calculate I1(x+u0) and its gradient
   1334         calcGradRho(I0, I1w, I1wx, I1wy, u1, u2, grad, rho_c);
   1335 
   1336         float error = std::numeric_limits<float>::max();
   1337         for (int n_outer = 0; error > scaledEpsilon && n_outer < outerIterations; ++n_outer)
   1338         {
   1339             if (medianFiltering > 1) {
   1340                 cv::medianBlur(u1, u1, medianFiltering);
   1341                 cv::medianBlur(u2, u2, medianFiltering);
   1342             }
   1343             for (int n_inner = 0; error > scaledEpsilon && n_inner < innerIterations; ++n_inner)
   1344             {
   1345                 // estimate the values of the variable (v1, v2) (thresholding operator TH)
   1346                 estimateV(I1wx, I1wy, u1, u2, u3, grad, rho_c, v1, v2, v3, l_t, static_cast<float>(gamma));
   1347 
   1348                 // compute the divergence of the dual variable (p1, p2, p3)
   1349                 divergence(p11, p12, div_p1);
   1350                 divergence(p21, p22, div_p2);
   1351                 if (use_gamma) divergence(p31, p32, div_p3);
   1352 
   1353                 // estimate the values of the optical flow (u1, u2)
   1354                 error = estimateU(v1, v2, v3, div_p1, div_p2, div_p3, u1, u2, u3, static_cast<float>(theta), static_cast<float>(gamma));
   1355 
   1356                 // compute the gradient of the optical flow (Du1, Du2)
   1357                 forwardGradient(u1, u1x, u1y);
   1358                 forwardGradient(u2, u2x, u2y);
   1359                 if (use_gamma) forwardGradient(u3, u3x, u3y);
   1360 
   1361                 // estimate the values of the dual variable (p1, p2, p3)
   1362                 estimateDualVariables(u1x, u1y, u2x, u2y, u3x, u3y, p11, p12, p21, p22, p31, p32, taut, use_gamma);
   1363             }
   1364         }
   1365     }
   1366 }
   1367 
   1368 void OpticalFlowDual_TVL1::collectGarbage()
   1369 {
   1370     //dataMat structure dm
   1371     dm.I0s.clear();
   1372     dm.I1s.clear();
   1373     dm.u1s.clear();
   1374     dm.u2s.clear();
   1375 
   1376     dm.I1x_buf.release();
   1377     dm.I1y_buf.release();
   1378 
   1379     dm.flowMap1_buf.release();
   1380     dm.flowMap2_buf.release();
   1381 
   1382     dm.I1w_buf.release();
   1383     dm.I1wx_buf.release();
   1384     dm.I1wy_buf.release();
   1385 
   1386     dm.grad_buf.release();
   1387     dm.rho_c_buf.release();
   1388 
   1389     dm.v1_buf.release();
   1390     dm.v2_buf.release();
   1391 
   1392     dm.p11_buf.release();
   1393     dm.p12_buf.release();
   1394     dm.p21_buf.release();
   1395     dm.p22_buf.release();
   1396 
   1397     dm.div_p1_buf.release();
   1398     dm.div_p2_buf.release();
   1399 
   1400     dm.u1x_buf.release();
   1401     dm.u1y_buf.release();
   1402     dm.u2x_buf.release();
   1403     dm.u2y_buf.release();
   1404 
   1405     //dataUMat structure dum
   1406     dum.I0s.clear();
   1407     dum.I1s.clear();
   1408     dum.u1s.clear();
   1409     dum.u2s.clear();
   1410 
   1411     dum.I1x_buf.release();
   1412     dum.I1y_buf.release();
   1413 
   1414     dum.I1w_buf.release();
   1415     dum.I1wx_buf.release();
   1416     dum.I1wy_buf.release();
   1417 
   1418     dum.grad_buf.release();
   1419     dum.rho_c_buf.release();
   1420 
   1421     dum.p11_buf.release();
   1422     dum.p12_buf.release();
   1423     dum.p21_buf.release();
   1424     dum.p22_buf.release();
   1425 
   1426     dum.diff_buf.release();
   1427     dum.norm_buf.release();
   1428 }
   1429 
   1430 } // namespace
   1431 
   1432 Ptr<DualTVL1OpticalFlow> cv::createOptFlow_DualTVL1()
   1433 {
   1434     return makePtr<OpticalFlowDual_TVL1>();
   1435 }
   1436