Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #include "precomp.hpp"
     44 #include "opencl_kernels_video.hpp"
     45 
     46 #if defined __APPLE__ || defined ANDROID
     47 #define SMALL_LOCALSIZE
     48 #endif
     49 
     50 //
     51 // 2D dense optical flow algorithm from the following paper:
     52 // Gunnar Farneback. "Two-Frame Motion Estimation Based on Polynomial Expansion".
     53 // Proceedings of the 13th Scandinavian Conference on Image Analysis, Gothenburg, Sweden
     54 //
     55 
     56 namespace cv
     57 {
     58 
     59 static void
     60 FarnebackPrepareGaussian(int n, double sigma, float *g, float *xg, float *xxg,
     61                          double &ig11, double &ig03, double &ig33, double &ig55)
     62 {
     63     if( sigma < FLT_EPSILON )
     64         sigma = n*0.3;
     65 
     66     double s = 0.;
     67     for (int x = -n; x <= n; x++)
     68     {
     69         g[x] = (float)std::exp(-x*x/(2*sigma*sigma));
     70         s += g[x];
     71     }
     72 
     73     s = 1./s;
     74     for (int x = -n; x <= n; x++)
     75     {
     76         g[x] = (float)(g[x]*s);
     77         xg[x] = (float)(x*g[x]);
     78         xxg[x] = (float)(x*x*g[x]);
     79     }
     80 
     81     Mat_<double> G(6, 6);
     82     G.setTo(0);
     83 
     84     for (int y = -n; y <= n; y++)
     85     {
     86         for (int x = -n; x <= n; x++)
     87         {
     88             G(0,0) += g[y]*g[x];
     89             G(1,1) += g[y]*g[x]*x*x;
     90             G(3,3) += g[y]*g[x]*x*x*x*x;
     91             G(5,5) += g[y]*g[x]*x*x*y*y;
     92         }
     93     }
     94 
     95     //G[0][0] = 1.;
     96     G(2,2) = G(0,3) = G(0,4) = G(3,0) = G(4,0) = G(1,1);
     97     G(4,4) = G(3,3);
     98     G(3,4) = G(4,3) = G(5,5);
     99 
    100     // invG:
    101     // [ x        e  e    ]
    102     // [    y             ]
    103     // [       y          ]
    104     // [ e        z       ]
    105     // [ e           z    ]
    106     // [                u ]
    107     Mat_<double> invG = G.inv(DECOMP_CHOLESKY);
    108 
    109     ig11 = invG(1,1);
    110     ig03 = invG(0,3);
    111     ig33 = invG(3,3);
    112     ig55 = invG(5,5);
    113 }
    114 
    115 static void
    116 FarnebackPolyExp( const Mat& src, Mat& dst, int n, double sigma )
    117 {
    118     int k, x, y;
    119 
    120     CV_Assert( src.type() == CV_32FC1 );
    121     int width = src.cols;
    122     int height = src.rows;
    123     AutoBuffer<float> kbuf(n*6 + 3), _row((width + n*2)*3);
    124     float* g = kbuf + n;
    125     float* xg = g + n*2 + 1;
    126     float* xxg = xg + n*2 + 1;
    127     float *row = (float*)_row + n*3;
    128     double ig11, ig03, ig33, ig55;
    129 
    130     FarnebackPrepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55);
    131 
    132     dst.create( height, width, CV_32FC(5));
    133 
    134     for( y = 0; y < height; y++ )
    135     {
    136         float g0 = g[0], g1, g2;
    137         const float *srow0 = src.ptr<float>(y), *srow1 = 0;
    138         float *drow = dst.ptr<float>(y);
    139 
    140         // vertical part of convolution
    141         for( x = 0; x < width; x++ )
    142         {
    143             row[x*3] = srow0[x]*g0;
    144             row[x*3+1] = row[x*3+2] = 0.f;
    145         }
    146 
    147         for( k = 1; k <= n; k++ )
    148         {
    149             g0 = g[k]; g1 = xg[k]; g2 = xxg[k];
    150             srow0 = src.ptr<float>(std::max(y-k,0));
    151             srow1 = src.ptr<float>(std::min(y+k,height-1));
    152 
    153             for( x = 0; x < width; x++ )
    154             {
    155                 float p = srow0[x] + srow1[x];
    156                 float t0 = row[x*3] + g0*p;
    157                 float t1 = row[x*3+1] + g1*(srow1[x] - srow0[x]);
    158                 float t2 = row[x*3+2] + g2*p;
    159 
    160                 row[x*3] = t0;
    161                 row[x*3+1] = t1;
    162                 row[x*3+2] = t2;
    163             }
    164         }
    165 
    166         // horizontal part of convolution
    167         for( x = 0; x < n*3; x++ )
    168         {
    169             row[-1-x] = row[2-x];
    170             row[width*3+x] = row[width*3+x-3];
    171         }
    172 
    173         for( x = 0; x < width; x++ )
    174         {
    175             g0 = g[0];
    176             // r1 ~ 1, r2 ~ x, r3 ~ y, r4 ~ x^2, r5 ~ y^2, r6 ~ xy
    177             double b1 = row[x*3]*g0, b2 = 0, b3 = row[x*3+1]*g0,
    178                 b4 = 0, b5 = row[x*3+2]*g0, b6 = 0;
    179 
    180             for( k = 1; k <= n; k++ )
    181             {
    182                 double tg = row[(x+k)*3] + row[(x-k)*3];
    183                 g0 = g[k];
    184                 b1 += tg*g0;
    185                 b4 += tg*xxg[k];
    186                 b2 += (row[(x+k)*3] - row[(x-k)*3])*xg[k];
    187                 b3 += (row[(x+k)*3+1] + row[(x-k)*3+1])*g0;
    188                 b6 += (row[(x+k)*3+1] - row[(x-k)*3+1])*xg[k];
    189                 b5 += (row[(x+k)*3+2] + row[(x-k)*3+2])*g0;
    190             }
    191 
    192             // do not store r1
    193             drow[x*5+1] = (float)(b2*ig11);
    194             drow[x*5] = (float)(b3*ig11);
    195             drow[x*5+3] = (float)(b1*ig03 + b4*ig33);
    196             drow[x*5+2] = (float)(b1*ig03 + b5*ig33);
    197             drow[x*5+4] = (float)(b6*ig55);
    198         }
    199     }
    200 
    201     row -= n*3;
    202 }
    203 
    204 
    205 /*static void
    206 FarnebackPolyExpPyr( const Mat& src0, Vector<Mat>& pyr, int maxlevel, int n, double sigma )
    207 {
    208     Vector<Mat> imgpyr;
    209     buildPyramid( src0, imgpyr, maxlevel );
    210 
    211     for( int i = 0; i <= maxlevel; i++ )
    212         FarnebackPolyExp( imgpyr[i], pyr[i], n, sigma );
    213 }*/
    214 
    215 
    216 static void
    217 FarnebackUpdateMatrices( const Mat& _R0, const Mat& _R1, const Mat& _flow, Mat& matM, int _y0, int _y1 )
    218 {
    219     const int BORDER = 5;
    220     static const float border[BORDER] = {0.14f, 0.14f, 0.4472f, 0.4472f, 0.4472f};
    221 
    222     int x, y, width = _flow.cols, height = _flow.rows;
    223     const float* R1 = _R1.ptr<float>();
    224     size_t step1 = _R1.step/sizeof(R1[0]);
    225 
    226     matM.create(height, width, CV_32FC(5));
    227 
    228     for( y = _y0; y < _y1; y++ )
    229     {
    230         const float* flow = _flow.ptr<float>(y);
    231         const float* R0 = _R0.ptr<float>(y);
    232         float* M = matM.ptr<float>(y);
    233 
    234         for( x = 0; x < width; x++ )
    235         {
    236             float dx = flow[x*2], dy = flow[x*2+1];
    237             float fx = x + dx, fy = y + dy;
    238 
    239 #if 1
    240             int x1 = cvFloor(fx), y1 = cvFloor(fy);
    241             const float* ptr = R1 + y1*step1 + x1*5;
    242             float r2, r3, r4, r5, r6;
    243 
    244             fx -= x1; fy -= y1;
    245 
    246             if( (unsigned)x1 < (unsigned)(width-1) &&
    247                 (unsigned)y1 < (unsigned)(height-1) )
    248             {
    249                 float a00 = (1.f-fx)*(1.f-fy), a01 = fx*(1.f-fy),
    250                       a10 = (1.f-fx)*fy, a11 = fx*fy;
    251 
    252                 r2 = a00*ptr[0] + a01*ptr[5] + a10*ptr[step1] + a11*ptr[step1+5];
    253                 r3 = a00*ptr[1] + a01*ptr[6] + a10*ptr[step1+1] + a11*ptr[step1+6];
    254                 r4 = a00*ptr[2] + a01*ptr[7] + a10*ptr[step1+2] + a11*ptr[step1+7];
    255                 r5 = a00*ptr[3] + a01*ptr[8] + a10*ptr[step1+3] + a11*ptr[step1+8];
    256                 r6 = a00*ptr[4] + a01*ptr[9] + a10*ptr[step1+4] + a11*ptr[step1+9];
    257 
    258                 r4 = (R0[x*5+2] + r4)*0.5f;
    259                 r5 = (R0[x*5+3] + r5)*0.5f;
    260                 r6 = (R0[x*5+4] + r6)*0.25f;
    261             }
    262 #else
    263             int x1 = cvRound(fx), y1 = cvRound(fy);
    264             const float* ptr = R1 + y1*step1 + x1*5;
    265             float r2, r3, r4, r5, r6;
    266 
    267             if( (unsigned)x1 < (unsigned)width &&
    268                 (unsigned)y1 < (unsigned)height )
    269             {
    270                 r2 = ptr[0];
    271                 r3 = ptr[1];
    272                 r4 = (R0[x*5+2] + ptr[2])*0.5f;
    273                 r5 = (R0[x*5+3] + ptr[3])*0.5f;
    274                 r6 = (R0[x*5+4] + ptr[4])*0.25f;
    275             }
    276 #endif
    277             else
    278             {
    279                 r2 = r3 = 0.f;
    280                 r4 = R0[x*5+2];
    281                 r5 = R0[x*5+3];
    282                 r6 = R0[x*5+4]*0.5f;
    283             }
    284 
    285             r2 = (R0[x*5] - r2)*0.5f;
    286             r3 = (R0[x*5+1] - r3)*0.5f;
    287 
    288             r2 += r4*dy + r6*dx;
    289             r3 += r6*dy + r5*dx;
    290 
    291             if( (unsigned)(x - BORDER) >= (unsigned)(width - BORDER*2) ||
    292                 (unsigned)(y - BORDER) >= (unsigned)(height - BORDER*2))
    293             {
    294                 float scale = (x < BORDER ? border[x] : 1.f)*
    295                     (x >= width - BORDER ? border[width - x - 1] : 1.f)*
    296                     (y < BORDER ? border[y] : 1.f)*
    297                     (y >= height - BORDER ? border[height - y - 1] : 1.f);
    298 
    299                 r2 *= scale; r3 *= scale; r4 *= scale;
    300                 r5 *= scale; r6 *= scale;
    301             }
    302 
    303             M[x*5]   = r4*r4 + r6*r6; // G(1,1)
    304             M[x*5+1] = (r4 + r5)*r6;  // G(1,2)=G(2,1)
    305             M[x*5+2] = r5*r5 + r6*r6; // G(2,2)
    306             M[x*5+3] = r4*r2 + r6*r3; // h(1)
    307             M[x*5+4] = r6*r2 + r5*r3; // h(2)
    308         }
    309     }
    310 }
    311 
    312 
    313 static void
    314 FarnebackUpdateFlow_Blur( const Mat& _R0, const Mat& _R1,
    315                           Mat& _flow, Mat& matM, int block_size,
    316                           bool update_matrices )
    317 {
    318     int x, y, width = _flow.cols, height = _flow.rows;
    319     int m = block_size/2;
    320     int y0 = 0, y1;
    321     int min_update_stripe = std::max((1 << 10)/width, block_size);
    322     double scale = 1./(block_size*block_size);
    323 
    324     AutoBuffer<double> _vsum((width+m*2+2)*5);
    325     double* vsum = _vsum + (m+1)*5;
    326 
    327     // init vsum
    328     const float* srow0 = matM.ptr<float>();
    329     for( x = 0; x < width*5; x++ )
    330         vsum[x] = srow0[x]*(m+2);
    331 
    332     for( y = 1; y < m; y++ )
    333     {
    334         srow0 = matM.ptr<float>(std::min(y,height-1));
    335         for( x = 0; x < width*5; x++ )
    336             vsum[x] += srow0[x];
    337     }
    338 
    339     // compute blur(G)*flow=blur(h)
    340     for( y = 0; y < height; y++ )
    341     {
    342         double g11, g12, g22, h1, h2;
    343         float* flow = _flow.ptr<float>(y);
    344 
    345         srow0 = matM.ptr<float>(std::max(y-m-1,0));
    346         const float* srow1 = matM.ptr<float>(std::min(y+m,height-1));
    347 
    348         // vertical blur
    349         for( x = 0; x < width*5; x++ )
    350             vsum[x] += srow1[x] - srow0[x];
    351 
    352         // update borders
    353         for( x = 0; x < (m+1)*5; x++ )
    354         {
    355             vsum[-1-x] = vsum[4-x];
    356             vsum[width*5+x] = vsum[width*5+x-5];
    357         }
    358 
    359         // init g** and h*
    360         g11 = vsum[0]*(m+2);
    361         g12 = vsum[1]*(m+2);
    362         g22 = vsum[2]*(m+2);
    363         h1 = vsum[3]*(m+2);
    364         h2 = vsum[4]*(m+2);
    365 
    366         for( x = 1; x < m; x++ )
    367         {
    368             g11 += vsum[x*5];
    369             g12 += vsum[x*5+1];
    370             g22 += vsum[x*5+2];
    371             h1 += vsum[x*5+3];
    372             h2 += vsum[x*5+4];
    373         }
    374 
    375         // horizontal blur
    376         for( x = 0; x < width; x++ )
    377         {
    378             g11 += vsum[(x+m)*5] - vsum[(x-m)*5 - 5];
    379             g12 += vsum[(x+m)*5 + 1] - vsum[(x-m)*5 - 4];
    380             g22 += vsum[(x+m)*5 + 2] - vsum[(x-m)*5 - 3];
    381             h1 += vsum[(x+m)*5 + 3] - vsum[(x-m)*5 - 2];
    382             h2 += vsum[(x+m)*5 + 4] - vsum[(x-m)*5 - 1];
    383 
    384             double g11_ = g11*scale;
    385             double g12_ = g12*scale;
    386             double g22_ = g22*scale;
    387             double h1_ = h1*scale;
    388             double h2_ = h2*scale;
    389 
    390             double idet = 1./(g11_*g22_ - g12_*g12_+1e-3);
    391 
    392             flow[x*2] = (float)((g11_*h2_-g12_*h1_)*idet);
    393             flow[x*2+1] = (float)((g22_*h1_-g12_*h2_)*idet);
    394         }
    395 
    396         y1 = y == height - 1 ? height : y - block_size;
    397         if( update_matrices && (y1 == height || y1 >= y0 + min_update_stripe) )
    398         {
    399             FarnebackUpdateMatrices( _R0, _R1, _flow, matM, y0, y1 );
    400             y0 = y1;
    401         }
    402     }
    403 }
    404 
    405 
    406 static void
    407 FarnebackUpdateFlow_GaussianBlur( const Mat& _R0, const Mat& _R1,
    408                                   Mat& _flow, Mat& matM, int block_size,
    409                                   bool update_matrices )
    410 {
    411     int x, y, i, width = _flow.cols, height = _flow.rows;
    412     int m = block_size/2;
    413     int y0 = 0, y1;
    414     int min_update_stripe = std::max((1 << 10)/width, block_size);
    415     double sigma = m*0.3, s = 1;
    416 
    417     AutoBuffer<float> _vsum((width+m*2+2)*5 + 16), _hsum(width*5 + 16);
    418     AutoBuffer<float> _kernel((m+1)*5 + 16);
    419     AutoBuffer<float*> _srow(m*2+1);
    420     float *vsum = alignPtr((float*)_vsum + (m+1)*5, 16), *hsum = alignPtr((float*)_hsum, 16);
    421     float* kernel = (float*)_kernel;
    422     const float** srow = (const float**)&_srow[0];
    423     kernel[0] = (float)s;
    424 
    425     for( i = 1; i <= m; i++ )
    426     {
    427         float t = (float)std::exp(-i*i/(2*sigma*sigma) );
    428         kernel[i] = t;
    429         s += t*2;
    430     }
    431 
    432     s = 1./s;
    433     for( i = 0; i <= m; i++ )
    434         kernel[i] = (float)(kernel[i]*s);
    435 
    436 #if CV_SSE2
    437     float* simd_kernel = alignPtr(kernel + m+1, 16);
    438     volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE);
    439     if( useSIMD )
    440     {
    441         for( i = 0; i <= m; i++ )
    442             _mm_store_ps(simd_kernel + i*4, _mm_set1_ps(kernel[i]));
    443     }
    444 #endif
    445 
    446     // compute blur(G)*flow=blur(h)
    447     for( y = 0; y < height; y++ )
    448     {
    449         double g11, g12, g22, h1, h2;
    450         float* flow = _flow.ptr<float>(y);
    451 
    452         // vertical blur
    453         for( i = 0; i <= m; i++ )
    454         {
    455             srow[m-i] = matM.ptr<float>(std::max(y-i,0));
    456             srow[m+i] = matM.ptr<float>(std::min(y+i,height-1));
    457         }
    458 
    459         x = 0;
    460 #if CV_SSE2
    461         if( useSIMD )
    462         {
    463             for( ; x <= width*5 - 16; x += 16 )
    464             {
    465                 const float *sptr0 = srow[m], *sptr1;
    466                 __m128 g4 = _mm_load_ps(simd_kernel);
    467                 __m128 s0, s1, s2, s3;
    468                 s0 = _mm_mul_ps(_mm_loadu_ps(sptr0 + x), g4);
    469                 s1 = _mm_mul_ps(_mm_loadu_ps(sptr0 + x + 4), g4);
    470                 s2 = _mm_mul_ps(_mm_loadu_ps(sptr0 + x + 8), g4);
    471                 s3 = _mm_mul_ps(_mm_loadu_ps(sptr0 + x + 12), g4);
    472 
    473                 for( i = 1; i <= m; i++ )
    474                 {
    475                     __m128 x0, x1;
    476                     sptr0 = srow[m+i], sptr1 = srow[m-i];
    477                     g4 = _mm_load_ps(simd_kernel + i*4);
    478                     x0 = _mm_add_ps(_mm_loadu_ps(sptr0 + x), _mm_loadu_ps(sptr1 + x));
    479                     x1 = _mm_add_ps(_mm_loadu_ps(sptr0 + x + 4), _mm_loadu_ps(sptr1 + x + 4));
    480                     s0 = _mm_add_ps(s0, _mm_mul_ps(x0, g4));
    481                     s1 = _mm_add_ps(s1, _mm_mul_ps(x1, g4));
    482                     x0 = _mm_add_ps(_mm_loadu_ps(sptr0 + x + 8), _mm_loadu_ps(sptr1 + x + 8));
    483                     x1 = _mm_add_ps(_mm_loadu_ps(sptr0 + x + 12), _mm_loadu_ps(sptr1 + x + 12));
    484                     s2 = _mm_add_ps(s2, _mm_mul_ps(x0, g4));
    485                     s3 = _mm_add_ps(s3, _mm_mul_ps(x1, g4));
    486                 }
    487 
    488                 _mm_store_ps(vsum + x, s0);
    489                 _mm_store_ps(vsum + x + 4, s1);
    490                 _mm_store_ps(vsum + x + 8, s2);
    491                 _mm_store_ps(vsum + x + 12, s3);
    492             }
    493 
    494             for( ; x <= width*5 - 4; x += 4 )
    495             {
    496                 const float *sptr0 = srow[m], *sptr1;
    497                 __m128 g4 = _mm_load_ps(simd_kernel);
    498                 __m128 s0 = _mm_mul_ps(_mm_loadu_ps(sptr0 + x), g4);
    499 
    500                 for( i = 1; i <= m; i++ )
    501                 {
    502                     sptr0 = srow[m+i], sptr1 = srow[m-i];
    503                     g4 = _mm_load_ps(simd_kernel + i*4);
    504                     __m128 x0 = _mm_add_ps(_mm_loadu_ps(sptr0 + x), _mm_loadu_ps(sptr1 + x));
    505                     s0 = _mm_add_ps(s0, _mm_mul_ps(x0, g4));
    506                 }
    507                 _mm_store_ps(vsum + x, s0);
    508             }
    509         }
    510 #endif
    511         for( ; x < width*5; x++ )
    512         {
    513             float s0 = srow[m][x]*kernel[0];
    514             for( i = 1; i <= m; i++ )
    515                 s0 += (srow[m+i][x] + srow[m-i][x])*kernel[i];
    516             vsum[x] = s0;
    517         }
    518 
    519         // update borders
    520         for( x = 0; x < m*5; x++ )
    521         {
    522             vsum[-1-x] = vsum[4-x];
    523             vsum[width*5+x] = vsum[width*5+x-5];
    524         }
    525 
    526         // horizontal blur
    527         x = 0;
    528 #if CV_SSE2
    529         if( useSIMD )
    530         {
    531             for( ; x <= width*5 - 8; x += 8 )
    532             {
    533                 __m128 g4 = _mm_load_ps(simd_kernel);
    534                 __m128 s0 = _mm_mul_ps(_mm_loadu_ps(vsum + x), g4);
    535                 __m128 s1 = _mm_mul_ps(_mm_loadu_ps(vsum + x + 4), g4);
    536 
    537                 for( i = 1; i <= m; i++ )
    538                 {
    539                     g4 = _mm_load_ps(simd_kernel + i*4);
    540                     __m128 x0 = _mm_add_ps(_mm_loadu_ps(vsum + x - i*5),
    541                                            _mm_loadu_ps(vsum + x + i*5));
    542                     __m128 x1 = _mm_add_ps(_mm_loadu_ps(vsum + x - i*5 + 4),
    543                                            _mm_loadu_ps(vsum + x + i*5 + 4));
    544                     s0 = _mm_add_ps(s0, _mm_mul_ps(x0, g4));
    545                     s1 = _mm_add_ps(s1, _mm_mul_ps(x1, g4));
    546                 }
    547 
    548                 _mm_store_ps(hsum + x, s0);
    549                 _mm_store_ps(hsum + x + 4, s1);
    550             }
    551         }
    552 #endif
    553         for( ; x < width*5; x++ )
    554         {
    555             float sum = vsum[x]*kernel[0];
    556             for( i = 1; i <= m; i++ )
    557                 sum += kernel[i]*(vsum[x - i*5] + vsum[x + i*5]);
    558             hsum[x] = sum;
    559         }
    560 
    561         for( x = 0; x < width; x++ )
    562         {
    563             g11 = hsum[x*5];
    564             g12 = hsum[x*5+1];
    565             g22 = hsum[x*5+2];
    566             h1 = hsum[x*5+3];
    567             h2 = hsum[x*5+4];
    568 
    569             double idet = 1./(g11*g22 - g12*g12 + 1e-3);
    570 
    571             flow[x*2] = (float)((g11*h2-g12*h1)*idet);
    572             flow[x*2+1] = (float)((g22*h1-g12*h2)*idet);
    573         }
    574 
    575         y1 = y == height - 1 ? height : y - block_size;
    576         if( update_matrices && (y1 == height || y1 >= y0 + min_update_stripe) )
    577         {
    578             FarnebackUpdateMatrices( _R0, _R1, _flow, matM, y0, y1 );
    579             y0 = y1;
    580         }
    581     }
    582 }
    583 
    584 }
    585 
    586 namespace cv
    587 {
    588 class FarnebackOpticalFlow
    589 {
    590 public:
    591     FarnebackOpticalFlow()
    592     {
    593         numLevels = 5;
    594         pyrScale = 0.5;
    595         fastPyramids = false;
    596         winSize = 13;
    597         numIters = 10;
    598         polyN = 5;
    599         polySigma = 1.1;
    600         flags = 0;
    601     }
    602 
    603     int numLevels;
    604     double pyrScale;
    605     bool fastPyramids;
    606     int winSize;
    607     int numIters;
    608     int polyN;
    609     double polySigma;
    610     int flags;
    611 
    612     bool operator ()(const UMat &frame0, const UMat &frame1, UMat &flowx, UMat &flowy)
    613     {
    614         CV_Assert(frame0.channels() == 1 && frame1.channels() == 1);
    615         CV_Assert(frame0.size() == frame1.size());
    616         CV_Assert(polyN == 5 || polyN == 7);
    617         CV_Assert(!fastPyramids || std::abs(pyrScale - 0.5) < 1e-6);
    618 
    619         const int min_size = 32;
    620 
    621         Size size = frame0.size();
    622         UMat prevFlowX, prevFlowY, curFlowX, curFlowY;
    623 
    624         flowx.create(size, CV_32F);
    625         flowy.create(size, CV_32F);
    626         UMat flowx0 = flowx;
    627         UMat flowy0 = flowy;
    628 
    629         // Crop unnecessary levels
    630         double scale = 1;
    631         int numLevelsCropped = 0;
    632         for (; numLevelsCropped < numLevels; numLevelsCropped++)
    633         {
    634             scale *= pyrScale;
    635             if (size.width*scale < min_size || size.height*scale < min_size)
    636                 break;
    637         }
    638 
    639         frame0.convertTo(frames_[0], CV_32F);
    640         frame1.convertTo(frames_[1], CV_32F);
    641 
    642         if (fastPyramids)
    643         {
    644             // Build Gaussian pyramids using pyrDown()
    645             pyramid0_.resize(numLevelsCropped + 1);
    646             pyramid1_.resize(numLevelsCropped + 1);
    647             pyramid0_[0] = frames_[0];
    648             pyramid1_[0] = frames_[1];
    649             for (int i = 1; i <= numLevelsCropped; ++i)
    650             {
    651                 pyrDown(pyramid0_[i - 1], pyramid0_[i]);
    652                 pyrDown(pyramid1_[i - 1], pyramid1_[i]);
    653             }
    654         }
    655 
    656         setPolynomialExpansionConsts(polyN, polySigma);
    657 
    658         for (int k = numLevelsCropped; k >= 0; k--)
    659         {
    660             scale = 1;
    661             for (int i = 0; i < k; i++)
    662                 scale *= pyrScale;
    663 
    664             double sigma = (1./scale - 1) * 0.5;
    665             int smoothSize = cvRound(sigma*5) | 1;
    666             smoothSize = std::max(smoothSize, 3);
    667 
    668             int width = cvRound(size.width*scale);
    669             int height = cvRound(size.height*scale);
    670 
    671             if (fastPyramids)
    672             {
    673                 width = pyramid0_[k].cols;
    674                 height = pyramid0_[k].rows;
    675             }
    676 
    677             if (k > 0)
    678             {
    679                 curFlowX.create(height, width, CV_32F);
    680                 curFlowY.create(height, width, CV_32F);
    681             }
    682             else
    683             {
    684                 curFlowX = flowx0;
    685                 curFlowY = flowy0;
    686             }
    687 
    688             if (prevFlowX.empty())
    689             {
    690                 if (flags & cv::OPTFLOW_USE_INITIAL_FLOW)
    691                 {
    692                     resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR);
    693                     resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR);
    694                     multiply(scale, curFlowX, curFlowX);
    695                     multiply(scale, curFlowY, curFlowY);
    696                 }
    697                 else
    698                 {
    699                     curFlowX.setTo(0);
    700                     curFlowY.setTo(0);
    701                 }
    702             }
    703             else
    704             {
    705                 resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR);
    706                 resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR);
    707                 multiply(1./pyrScale, curFlowX, curFlowX);
    708                 multiply(1./pyrScale, curFlowY, curFlowY);
    709             }
    710 
    711             UMat M = allocMatFromBuf(5*height, width, CV_32F, M_);
    712             UMat bufM = allocMatFromBuf(5*height, width, CV_32F, bufM_);
    713             UMat R[2] =
    714             {
    715                 allocMatFromBuf(5*height, width, CV_32F, R_[0]),
    716                 allocMatFromBuf(5*height, width, CV_32F, R_[1])
    717             };
    718 
    719             if (fastPyramids)
    720             {
    721                 if (!polynomialExpansionOcl(pyramid0_[k], R[0]))
    722                     return false;
    723                 if (!polynomialExpansionOcl(pyramid1_[k], R[1]))
    724                     return false;
    725             }
    726             else
    727             {
    728                 UMat blurredFrame[2] =
    729                 {
    730                     allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[0]),
    731                     allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[1])
    732                 };
    733                 UMat pyrLevel[2] =
    734                 {
    735                     allocMatFromBuf(height, width, CV_32F, pyrLevel_[0]),
    736                     allocMatFromBuf(height, width, CV_32F, pyrLevel_[1])
    737                 };
    738 
    739                 setGaussianBlurKernel(smoothSize, sigma);
    740 
    741                 for (int i = 0; i < 2; i++)
    742                 {
    743                     if (!gaussianBlurOcl(frames_[i], smoothSize/2, blurredFrame[i]))
    744                         return false;
    745                     resize(blurredFrame[i], pyrLevel[i], Size(width, height), INTER_LINEAR);
    746                     if (!polynomialExpansionOcl(pyrLevel[i], R[i]))
    747                         return false;
    748                 }
    749             }
    750 
    751             if (!updateMatricesOcl(curFlowX, curFlowY, R[0], R[1], M))
    752                 return false;
    753 
    754             if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
    755                 setGaussianBlurKernel(winSize, winSize/2*0.3f);
    756             for (int i = 0; i < numIters; i++)
    757             {
    758                 if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
    759                 {
    760                     if (!updateFlow_gaussianBlur(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1))
    761                         return false;
    762                 }
    763                 else
    764                 {
    765                     if (!updateFlow_boxFilter(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1))
    766                         return false;
    767                 }
    768             }
    769 
    770             prevFlowX = curFlowX;
    771             prevFlowY = curFlowY;
    772         }
    773 
    774         flowx = curFlowX;
    775         flowy = curFlowY;
    776         return true;
    777     }
    778 
    779     void releaseMemory()
    780     {
    781         frames_[0].release();
    782         frames_[1].release();
    783         pyrLevel_[0].release();
    784         pyrLevel_[1].release();
    785         M_.release();
    786         bufM_.release();
    787         R_[0].release();
    788         R_[1].release();
    789         blurredFrame_[0].release();
    790         blurredFrame_[1].release();
    791         pyramid0_.clear();
    792         pyramid1_.clear();
    793     }
    794 private:
    795     UMat m_g;
    796     UMat m_xg;
    797     UMat m_xxg;
    798 
    799     double m_igd[4];
    800     float  m_ig[4];
    801     void setPolynomialExpansionConsts(int n, double sigma)
    802     {
    803         std::vector<float> buf(n*6 + 3);
    804         float* g = &buf[0] + n;
    805         float* xg = g + n*2 + 1;
    806         float* xxg = xg + n*2 + 1;
    807 
    808         FarnebackPrepareGaussian(n, sigma, g, xg, xxg, m_igd[0], m_igd[1], m_igd[2], m_igd[3]);
    809 
    810         cv::Mat t_g(1, n + 1, CV_32FC1, g);     t_g.copyTo(m_g);
    811         cv::Mat t_xg(1, n + 1, CV_32FC1, xg);   t_xg.copyTo(m_xg);
    812         cv::Mat t_xxg(1, n + 1, CV_32FC1, xxg); t_xxg.copyTo(m_xxg);
    813 
    814         m_ig[0] = static_cast<float>(m_igd[0]);
    815         m_ig[1] = static_cast<float>(m_igd[1]);
    816         m_ig[2] = static_cast<float>(m_igd[2]);
    817         m_ig[3] = static_cast<float>(m_igd[3]);
    818     }
    819 private:
    820     UMat m_gKer;
    821     inline void setGaussianBlurKernel(int smoothSize, double sigma)
    822     {
    823         Mat g = getGaussianKernel(smoothSize, sigma, CV_32F);
    824         Mat gKer(1, smoothSize/2 + 1, CV_32FC1, g.ptr<float>(smoothSize/2));
    825         gKer.copyTo(m_gKer);
    826     }
    827 private:
    828     UMat frames_[2];
    829     UMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
    830     std::vector<UMat> pyramid0_, pyramid1_;
    831 
    832     static UMat allocMatFromBuf(int rows, int cols, int type, UMat &mat)
    833     {
    834         if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
    835             return mat(Rect(0, 0, cols, rows));
    836         return mat = UMat(rows, cols, type);
    837     }
    838 private:
    839 #define DIVUP(total, grain) (((total) + (grain) - 1) / (grain))
    840 
    841     bool gaussianBlurOcl(const UMat &src, int ksizeHalf, UMat &dst)
    842     {
    843 #ifdef SMALL_LOCALSIZE
    844         size_t localsize[2] = { 128, 1};
    845 #else
    846         size_t localsize[2] = { 256, 1};
    847 #endif
    848         size_t globalsize[2] = { src.cols, src.rows};
    849         int smem_size = (int)((localsize[0] + 2*ksizeHalf) * sizeof(float));
    850         ocl::Kernel kernel;
    851         if (!kernel.create("gaussianBlur", cv::ocl::video::optical_flow_farneback_oclsrc, ""))
    852             return false;
    853 
    854         CV_Assert(dst.size() == src.size());
    855         int idxArg = 0;
    856         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(src));
    857         idxArg = kernel.set(idxArg, (int)(src.step / src.elemSize()));
    858         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst));
    859         idxArg = kernel.set(idxArg, (int)(dst.step / dst.elemSize()));
    860         idxArg = kernel.set(idxArg, dst.rows);
    861         idxArg = kernel.set(idxArg, dst.cols);
    862         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(m_gKer));
    863         idxArg = kernel.set(idxArg, (int)ksizeHalf);
    864         kernel.set(idxArg, (void *)NULL, smem_size);
    865         return kernel.run(2, globalsize, localsize, false);
    866     }
    867     bool gaussianBlur5Ocl(const UMat &src, int ksizeHalf, UMat &dst)
    868     {
    869         int height = src.rows / 5;
    870 #ifdef SMALL_LOCALSIZE
    871         size_t localsize[2] = { 128, 1};
    872 #else
    873         size_t localsize[2] = { 256, 1};
    874 #endif
    875         size_t globalsize[2] = { src.cols, height};
    876         int smem_size = (int)((localsize[0] + 2*ksizeHalf) * 5 * sizeof(float));
    877         ocl::Kernel kernel;
    878         if (!kernel.create("gaussianBlur5", cv::ocl::video::optical_flow_farneback_oclsrc, ""))
    879             return false;
    880 
    881         int idxArg = 0;
    882         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(src));
    883         idxArg = kernel.set(idxArg, (int)(src.step / src.elemSize()));
    884         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst));
    885         idxArg = kernel.set(idxArg, (int)(dst.step / dst.elemSize()));
    886         idxArg = kernel.set(idxArg, height);
    887         idxArg = kernel.set(idxArg, src.cols);
    888         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(m_gKer));
    889         idxArg = kernel.set(idxArg, (int)ksizeHalf);
    890         kernel.set(idxArg, (void *)NULL, smem_size);
    891         return kernel.run(2, globalsize, localsize, false);
    892     }
    893     bool polynomialExpansionOcl(const UMat &src, UMat &dst)
    894     {
    895 #ifdef SMALL_LOCALSIZE
    896         size_t localsize[2] = { 128, 1};
    897 #else
    898         size_t localsize[2] = { 256, 1};
    899 #endif
    900         size_t globalsize[2] = { DIVUP(src.cols, localsize[0] - 2*polyN) * localsize[0], src.rows};
    901 
    902 #if 0
    903         const cv::ocl::Device &device = cv::ocl::Device::getDefault();
    904         bool useDouble = (0 != device.doubleFPConfig());
    905 
    906         cv::String build_options = cv::format("-D polyN=%d -D USE_DOUBLE=%d", polyN, useDouble ? 1 : 0);
    907 #else
    908         cv::String build_options = cv::format("-D polyN=%d", polyN);
    909 #endif
    910         ocl::Kernel kernel;
    911         if (!kernel.create("polynomialExpansion", cv::ocl::video::optical_flow_farneback_oclsrc, build_options))
    912             return false;
    913 
    914         int smem_size = (int)(3 * localsize[0] * sizeof(float));
    915         int idxArg = 0;
    916         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(src));
    917         idxArg = kernel.set(idxArg, (int)(src.step / src.elemSize()));
    918         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst));
    919         idxArg = kernel.set(idxArg, (int)(dst.step / dst.elemSize()));
    920         idxArg = kernel.set(idxArg, src.rows);
    921         idxArg = kernel.set(idxArg, src.cols);
    922         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(m_g));
    923         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(m_xg));
    924         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(m_xxg));
    925         idxArg = kernel.set(idxArg, (void *)NULL, smem_size);
    926         kernel.set(idxArg, (void *)m_ig, 4 * sizeof(float));
    927         return kernel.run(2, globalsize, localsize, false);
    928     }
    929     bool boxFilter5Ocl(const UMat &src, int ksizeHalf, UMat &dst)
    930     {
    931         int height = src.rows / 5;
    932 #ifdef SMALL_LOCALSIZE
    933         size_t localsize[2] = { 128, 1};
    934 #else
    935         size_t localsize[2] = { 256, 1};
    936 #endif
    937         size_t globalsize[2] = { src.cols, height};
    938 
    939         ocl::Kernel kernel;
    940         if (!kernel.create("boxFilter5", cv::ocl::video::optical_flow_farneback_oclsrc, ""))
    941             return false;
    942 
    943         int smem_size = (int)((localsize[0] + 2*ksizeHalf) * 5 * sizeof(float));
    944 
    945         int idxArg = 0;
    946         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(src));
    947         idxArg = kernel.set(idxArg, (int)(src.step / src.elemSize()));
    948         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst));
    949         idxArg = kernel.set(idxArg, (int)(dst.step / dst.elemSize()));
    950         idxArg = kernel.set(idxArg, height);
    951         idxArg = kernel.set(idxArg, src.cols);
    952         idxArg = kernel.set(idxArg, (int)ksizeHalf);
    953         kernel.set(idxArg, (void *)NULL, smem_size);
    954         return kernel.run(2, globalsize, localsize, false);
    955     }
    956 
    957     bool updateFlowOcl(const UMat &M, UMat &flowx, UMat &flowy)
    958     {
    959 #ifdef SMALL_LOCALSIZE
    960         size_t localsize[2] = { 32, 4};
    961 #else
    962         size_t localsize[2] = { 32, 8};
    963 #endif
    964         size_t globalsize[2] = { flowx.cols, flowx.rows};
    965 
    966         ocl::Kernel kernel;
    967         if (!kernel.create("updateFlow", cv::ocl::video::optical_flow_farneback_oclsrc, ""))
    968             return false;
    969 
    970         int idxArg = 0;
    971         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(M));
    972         idxArg = kernel.set(idxArg, (int)(M.step / M.elemSize()));
    973         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(flowx));
    974         idxArg = kernel.set(idxArg, (int)(flowx.step / flowx.elemSize()));
    975         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(flowy));
    976         idxArg = kernel.set(idxArg, (int)(flowy.step / flowy.elemSize()));
    977         idxArg = kernel.set(idxArg, (int)flowy.rows);
    978         kernel.set(idxArg, (int)flowy.cols);
    979         return kernel.run(2, globalsize, localsize, false);
    980     }
    981     bool updateMatricesOcl(const UMat &flowx, const UMat &flowy, const UMat &R0, const UMat &R1, UMat &M)
    982     {
    983 #ifdef SMALL_LOCALSIZE
    984         size_t localsize[2] = { 32, 4};
    985 #else
    986         size_t localsize[2] = { 32, 8};
    987 #endif
    988         size_t globalsize[2] = { flowx.cols, flowx.rows};
    989 
    990         ocl::Kernel kernel;
    991         if (!kernel.create("updateMatrices", cv::ocl::video::optical_flow_farneback_oclsrc, ""))
    992             return false;
    993 
    994         int idxArg = 0;
    995         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(flowx));
    996         idxArg = kernel.set(idxArg, (int)(flowx.step / flowx.elemSize()));
    997         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(flowy));
    998         idxArg = kernel.set(idxArg, (int)(flowy.step / flowy.elemSize()));
    999         idxArg = kernel.set(idxArg, (int)flowx.rows);
   1000         idxArg = kernel.set(idxArg, (int)flowx.cols);
   1001         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(R0));
   1002         idxArg = kernel.set(idxArg, (int)(R0.step / R0.elemSize()));
   1003         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(R1));
   1004         idxArg = kernel.set(idxArg, (int)(R1.step / R1.elemSize()));
   1005         idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(M));
   1006         kernel.set(idxArg, (int)(M.step / M.elemSize()));
   1007         return kernel.run(2, globalsize, localsize, false);
   1008     }
   1009 
   1010     bool updateFlow_boxFilter(
   1011         const UMat& R0, const UMat& R1, UMat& flowx, UMat &flowy,
   1012         UMat& M, UMat &bufM, int blockSize, bool updateMatrices)
   1013     {
   1014         if (!boxFilter5Ocl(M, blockSize/2, bufM))
   1015             return false;
   1016         swap(M, bufM);
   1017         if (!updateFlowOcl(M, flowx, flowy))
   1018             return false;
   1019         if (updateMatrices)
   1020             if (!updateMatricesOcl(flowx, flowy, R0, R1, M))
   1021                 return false;
   1022         return true;
   1023     }
   1024     bool updateFlow_gaussianBlur(
   1025         const UMat& R0, const UMat& R1, UMat& flowx, UMat& flowy,
   1026         UMat& M, UMat &bufM, int blockSize, bool updateMatrices)
   1027     {
   1028         if (!gaussianBlur5Ocl(M, blockSize/2, bufM))
   1029             return false;
   1030         swap(M, bufM);
   1031         if (!updateFlowOcl(M, flowx, flowy))
   1032             return false;
   1033         if (updateMatrices)
   1034             if (!updateMatricesOcl(flowx, flowy, R0, R1, M))
   1035                 return false;
   1036         return true;
   1037     }
   1038 };
   1039 
   1040 static bool ocl_calcOpticalFlowFarneback( InputArray _prev0, InputArray _next0,
   1041                             InputOutputArray _flow0, double pyr_scale, int levels, int winsize,
   1042                             int iterations, int poly_n, double poly_sigma, int flags )
   1043 {
   1044     if ((5 != poly_n) && (7 != poly_n))
   1045         return false;
   1046     if (_next0.size() != _prev0.size())
   1047         return false;
   1048     int typePrev = _prev0.type();
   1049     int typeNext = _next0.type();
   1050     if ((1 != CV_MAT_CN(typePrev)) || (1 != CV_MAT_CN(typeNext)))
   1051         return false;
   1052 
   1053     FarnebackOpticalFlow opticalFlow;
   1054     opticalFlow.numLevels   = levels;
   1055     opticalFlow.pyrScale    = pyr_scale;
   1056     opticalFlow.fastPyramids= false;
   1057     opticalFlow.winSize     = winsize;
   1058     opticalFlow.numIters    = iterations;
   1059     opticalFlow.polyN       = poly_n;
   1060     opticalFlow.polySigma   = poly_sigma;
   1061     opticalFlow.flags       = flags;
   1062 
   1063     std::vector<UMat> flowar;
   1064     if (!_flow0.empty())
   1065         split(_flow0, flowar);
   1066     else
   1067     {
   1068         flowar.push_back(UMat());
   1069         flowar.push_back(UMat());
   1070     }
   1071     if (!opticalFlow(_prev0.getUMat(), _next0.getUMat(), flowar[0], flowar[1]))
   1072         return false;
   1073     merge(flowar, _flow0);
   1074     return true;
   1075 }
   1076 }
   1077 
   1078 void cv::calcOpticalFlowFarneback( InputArray _prev0, InputArray _next0,
   1079                                InputOutputArray _flow0, double pyr_scale, int levels, int winsize,
   1080                                int iterations, int poly_n, double poly_sigma, int flags )
   1081 {
   1082     bool use_opencl = ocl::useOpenCL() && _flow0.isUMat();
   1083     if( use_opencl && ocl_calcOpticalFlowFarneback(_prev0, _next0, _flow0, pyr_scale, levels, winsize, iterations, poly_n, poly_sigma, flags))
   1084     {
   1085         CV_IMPL_ADD(CV_IMPL_OCL);
   1086         return;
   1087     }
   1088 
   1089     Mat prev0 = _prev0.getMat(), next0 = _next0.getMat();
   1090     const int min_size = 32;
   1091     const Mat* img[2] = { &prev0, &next0 };
   1092 
   1093     int i, k;
   1094     double scale;
   1095     Mat prevFlow, flow, fimg;
   1096 
   1097     CV_Assert( prev0.size() == next0.size() && prev0.channels() == next0.channels() &&
   1098         prev0.channels() == 1 && pyr_scale < 1 );
   1099     _flow0.create( prev0.size(), CV_32FC2 );
   1100     Mat flow0 = _flow0.getMat();
   1101 
   1102     for( k = 0, scale = 1; k < levels; k++ )
   1103     {
   1104         scale *= pyr_scale;
   1105         if( prev0.cols*scale < min_size || prev0.rows*scale < min_size )
   1106             break;
   1107     }
   1108 
   1109     levels = k;
   1110 
   1111     for( k = levels; k >= 0; k-- )
   1112     {
   1113         for( i = 0, scale = 1; i < k; i++ )
   1114             scale *= pyr_scale;
   1115 
   1116         double sigma = (1./scale-1)*0.5;
   1117         int smooth_sz = cvRound(sigma*5)|1;
   1118         smooth_sz = std::max(smooth_sz, 3);
   1119 
   1120         int width = cvRound(prev0.cols*scale);
   1121         int height = cvRound(prev0.rows*scale);
   1122 
   1123         if( k > 0 )
   1124             flow.create( height, width, CV_32FC2 );
   1125         else
   1126             flow = flow0;
   1127 
   1128         if( prevFlow.empty() )
   1129         {
   1130             if( flags & OPTFLOW_USE_INITIAL_FLOW )
   1131             {
   1132                 resize( flow0, flow, Size(width, height), 0, 0, INTER_AREA );
   1133                 flow *= scale;
   1134             }
   1135             else
   1136                 flow = Mat::zeros( height, width, CV_32FC2 );
   1137         }
   1138         else
   1139         {
   1140             resize( prevFlow, flow, Size(width, height), 0, 0, INTER_LINEAR );
   1141             flow *= 1./pyr_scale;
   1142         }
   1143 
   1144         Mat R[2], I, M;
   1145         for( i = 0; i < 2; i++ )
   1146         {
   1147             img[i]->convertTo(fimg, CV_32F);
   1148             GaussianBlur(fimg, fimg, Size(smooth_sz, smooth_sz), sigma, sigma);
   1149             resize( fimg, I, Size(width, height), INTER_LINEAR );
   1150             FarnebackPolyExp( I, R[i], poly_n, poly_sigma );
   1151         }
   1152 
   1153         FarnebackUpdateMatrices( R[0], R[1], flow, M, 0, flow.rows );
   1154 
   1155         for( i = 0; i < iterations; i++ )
   1156         {
   1157             if( flags & OPTFLOW_FARNEBACK_GAUSSIAN )
   1158                 FarnebackUpdateFlow_GaussianBlur( R[0], R[1], flow, M, winsize, i < iterations - 1 );
   1159             else
   1160                 FarnebackUpdateFlow_Blur( R[0], R[1], flow, M, winsize, i < iterations - 1 );
   1161         }
   1162 
   1163         prevFlow = flow;
   1164     }
   1165 }
   1166