Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 #include "precomp.hpp"
     43 #include <float.h>
     44 #include <stdio.h>
     45 #include "lkpyramid.hpp"
     46 #include "opencl_kernels_video.hpp"
     47 
     48 #define  CV_DESCALE(x,n)     (((x) + (1 << ((n)-1))) >> (n))
     49 
     50 namespace
     51 {
     52 static void calcSharrDeriv(const cv::Mat& src, cv::Mat& dst)
     53 {
     54     using namespace cv;
     55     using cv::detail::deriv_type;
     56     int rows = src.rows, cols = src.cols, cn = src.channels(), colsn = cols*cn, depth = src.depth();
     57     CV_Assert(depth == CV_8U);
     58     dst.create(rows, cols, CV_MAKETYPE(DataType<deriv_type>::depth, cn*2));
     59 
     60 #ifdef HAVE_TEGRA_OPTIMIZATION
     61     if (tegra::useTegra() && tegra::calcSharrDeriv(src, dst))
     62         return;
     63 #endif
     64 
     65     int x, y, delta = (int)alignSize((cols + 2)*cn, 16);
     66     AutoBuffer<deriv_type> _tempBuf(delta*2 + 64);
     67     deriv_type *trow0 = alignPtr(_tempBuf + cn, 16), *trow1 = alignPtr(trow0 + delta, 16);
     68 
     69 #if CV_SSE2
     70     __m128i z = _mm_setzero_si128(), c3 = _mm_set1_epi16(3), c10 = _mm_set1_epi16(10);
     71 #endif
     72 
     73 #if CV_NEON
     74     const uint16x8_t q8 = vdupq_n_u16(3);
     75     const uint8x8_t d18 = vdup_n_u8(10);
     76 
     77     const int16x8_t q8i = vdupq_n_s16(3);
     78     const int16x8_t q9 = vdupq_n_s16(10);
     79 #endif
     80 
     81     for( y = 0; y < rows; y++ )
     82     {
     83         const uchar* srow0 = src.ptr<uchar>(y > 0 ? y-1 : rows > 1 ? 1 : 0);
     84         const uchar* srow1 = src.ptr<uchar>(y);
     85         const uchar* srow2 = src.ptr<uchar>(y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0);
     86         deriv_type* drow = dst.ptr<deriv_type>(y);
     87 
     88         // do vertical convolution
     89         x = 0;
     90 #if CV_SSE2
     91         for( ; x <= colsn - 8; x += 8 )
     92         {
     93             __m128i s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(srow0 + x)), z);
     94             __m128i s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(srow1 + x)), z);
     95             __m128i s2 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(srow2 + x)), z);
     96             __m128i t0 = _mm_add_epi16(_mm_mullo_epi16(_mm_add_epi16(s0, s2), c3), _mm_mullo_epi16(s1, c10));
     97             __m128i t1 = _mm_sub_epi16(s2, s0);
     98             _mm_store_si128((__m128i*)(trow0 + x), t0);
     99             _mm_store_si128((__m128i*)(trow1 + x), t1);
    100         }
    101 #endif
    102 
    103 #if CV_NEON
    104         for( ; x <= colsn - 8; x += 8)
    105         {
    106             uint8x8_t d0 = vld1_u8((const uint8_t*)&srow0[x]);
    107             uint8x8_t d1 = vld1_u8((const uint8_t*)&srow1[x]);
    108             uint8x8_t d2 = vld1_u8((const uint8_t*)&srow2[x]);
    109             uint16x8_t q4 = vaddl_u8(d0, d2);
    110             uint16x8_t q11 = vsubl_u8(d2, d0);
    111             uint16x8_t q5 = vmulq_u16(q4, q8);
    112             uint16x8_t q6 = vmull_u8(d1, d18);
    113             uint16x8_t q10 = vaddq_u16(q6, q5);
    114             vst1q_u16((uint16_t*)&trow0[x], q10);
    115             vst1q_u16((uint16_t*)&trow1[x], q11);
    116 
    117         }
    118 #endif
    119 
    120         for( ; x < colsn; x++ )
    121         {
    122             int t0 = (srow0[x] + srow2[x])*3 + srow1[x]*10;
    123             int t1 = srow2[x] - srow0[x];
    124             trow0[x] = (deriv_type)t0;
    125             trow1[x] = (deriv_type)t1;
    126         }
    127 
    128         // make border
    129         int x0 = (cols > 1 ? 1 : 0)*cn, x1 = (cols > 1 ? cols-2 : 0)*cn;
    130         for( int k = 0; k < cn; k++ )
    131         {
    132             trow0[-cn + k] = trow0[x0 + k]; trow0[colsn + k] = trow0[x1 + k];
    133             trow1[-cn + k] = trow1[x0 + k]; trow1[colsn + k] = trow1[x1 + k];
    134         }
    135 
    136         // do horizontal convolution, interleave the results and store them to dst
    137         x = 0;
    138 #if CV_SSE2
    139         for( ; x <= colsn - 8; x += 8 )
    140         {
    141             __m128i s0 = _mm_loadu_si128((const __m128i*)(trow0 + x - cn));
    142             __m128i s1 = _mm_loadu_si128((const __m128i*)(trow0 + x + cn));
    143             __m128i s2 = _mm_loadu_si128((const __m128i*)(trow1 + x - cn));
    144             __m128i s3 = _mm_load_si128((const __m128i*)(trow1 + x));
    145             __m128i s4 = _mm_loadu_si128((const __m128i*)(trow1 + x + cn));
    146 
    147             __m128i t0 = _mm_sub_epi16(s1, s0);
    148             __m128i t1 = _mm_add_epi16(_mm_mullo_epi16(_mm_add_epi16(s2, s4), c3), _mm_mullo_epi16(s3, c10));
    149             __m128i t2 = _mm_unpacklo_epi16(t0, t1);
    150             t0 = _mm_unpackhi_epi16(t0, t1);
    151             // this can probably be replaced with aligned stores if we aligned dst properly.
    152             _mm_storeu_si128((__m128i*)(drow + x*2), t2);
    153             _mm_storeu_si128((__m128i*)(drow + x*2 + 8), t0);
    154         }
    155 #endif
    156 
    157 #if CV_NEON
    158         for( ; x <= colsn - 8; x += 8 )
    159         {
    160 
    161             int16x8_t q0 = vld1q_s16((const int16_t*)&trow0[x+cn]);
    162             int16x8_t q1 = vld1q_s16((const int16_t*)&trow0[x-cn]);
    163             int16x8_t q2 = vld1q_s16((const int16_t*)&trow1[x+cn]);
    164             int16x8_t q3 = vld1q_s16((const int16_t*)&trow1[x-cn]);
    165             int16x8_t q5 = vsubq_s16(q0, q1);
    166             int16x8_t q6 = vaddq_s16(q2, q3);
    167             int16x8_t q4 = vld1q_s16((const int16_t*)&trow1[x]);
    168             int16x8_t q7 = vmulq_s16(q6, q8i);
    169             int16x8_t q10 = vmulq_s16(q4, q9);
    170             int16x8_t q11 = vaddq_s16(q7, q10);
    171             int16x4_t d22 = vget_low_s16(q11);
    172             int16x4_t d23 = vget_high_s16(q11);
    173             int16x4_t d11 = vget_high_s16(q5);
    174             int16x4_t d10 = vget_low_s16(q5);
    175             int16x4x2_t q5x2, q11x2;
    176             q5x2.val[0] = d10; q5x2.val[1] = d22;
    177             q11x2.val[0] = d11; q11x2.val[1] = d23;
    178             vst2_s16((int16_t*)&drow[x*2], q5x2);
    179             vst2_s16((int16_t*)&drow[(x*2)+8], q11x2);
    180 
    181         }
    182 #endif
    183         for( ; x < colsn; x++ )
    184         {
    185             deriv_type t0 = (deriv_type)(trow0[x+cn] - trow0[x-cn]);
    186             deriv_type t1 = (deriv_type)((trow1[x+cn] + trow1[x-cn])*3 + trow1[x]*10);
    187             drow[x*2] = t0; drow[x*2+1] = t1;
    188         }
    189     }
    190 }
    191 
    192 }//namespace
    193 
    194 cv::detail::LKTrackerInvoker::LKTrackerInvoker(
    195                       const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg,
    196                       const Point2f* _prevPts, Point2f* _nextPts,
    197                       uchar* _status, float* _err,
    198                       Size _winSize, TermCriteria _criteria,
    199                       int _level, int _maxLevel, int _flags, float _minEigThreshold )
    200 {
    201     prevImg = &_prevImg;
    202     prevDeriv = &_prevDeriv;
    203     nextImg = &_nextImg;
    204     prevPts = _prevPts;
    205     nextPts = _nextPts;
    206     status = _status;
    207     err = _err;
    208     winSize = _winSize;
    209     criteria = _criteria;
    210     level = _level;
    211     maxLevel = _maxLevel;
    212     flags = _flags;
    213     minEigThreshold = _minEigThreshold;
    214 }
    215 
    216 #if defined __arm__ && !CV_NEON
    217 typedef int64 acctype;
    218 typedef int itemtype;
    219 #else
    220 typedef float acctype;
    221 typedef float itemtype;
    222 #endif
    223 
    224 void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
    225 {
    226     Point2f halfWin((winSize.width-1)*0.5f, (winSize.height-1)*0.5f);
    227     const Mat& I = *prevImg;
    228     const Mat& J = *nextImg;
    229     const Mat& derivI = *prevDeriv;
    230 
    231     int j, cn = I.channels(), cn2 = cn*2;
    232     cv::AutoBuffer<deriv_type> _buf(winSize.area()*(cn + cn2));
    233     int derivDepth = DataType<deriv_type>::depth;
    234 
    235     Mat IWinBuf(winSize, CV_MAKETYPE(derivDepth, cn), (deriv_type*)_buf);
    236     Mat derivIWinBuf(winSize, CV_MAKETYPE(derivDepth, cn2), (deriv_type*)_buf + winSize.area()*cn);
    237 
    238     for( int ptidx = range.start; ptidx < range.end; ptidx++ )
    239     {
    240         Point2f prevPt = prevPts[ptidx]*(float)(1./(1 << level));
    241         Point2f nextPt;
    242         if( level == maxLevel )
    243         {
    244             if( flags & OPTFLOW_USE_INITIAL_FLOW )
    245                 nextPt = nextPts[ptidx]*(float)(1./(1 << level));
    246             else
    247                 nextPt = prevPt;
    248         }
    249         else
    250             nextPt = nextPts[ptidx]*2.f;
    251         nextPts[ptidx] = nextPt;
    252 
    253         Point2i iprevPt, inextPt;
    254         prevPt -= halfWin;
    255         iprevPt.x = cvFloor(prevPt.x);
    256         iprevPt.y = cvFloor(prevPt.y);
    257 
    258         if( iprevPt.x < -winSize.width || iprevPt.x >= derivI.cols ||
    259             iprevPt.y < -winSize.height || iprevPt.y >= derivI.rows )
    260         {
    261             if( level == 0 )
    262             {
    263                 if( status )
    264                     status[ptidx] = false;
    265                 if( err )
    266                     err[ptidx] = 0;
    267             }
    268             continue;
    269         }
    270 
    271         float a = prevPt.x - iprevPt.x;
    272         float b = prevPt.y - iprevPt.y;
    273         const int W_BITS = 14, W_BITS1 = 14;
    274         const float FLT_SCALE = 1.f/(1 << 20);
    275         int iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS));
    276         int iw01 = cvRound(a*(1.f - b)*(1 << W_BITS));
    277         int iw10 = cvRound((1.f - a)*b*(1 << W_BITS));
    278         int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
    279 
    280         int dstep = (int)(derivI.step/derivI.elemSize1());
    281         int stepI = (int)(I.step/I.elemSize1());
    282         int stepJ = (int)(J.step/J.elemSize1());
    283         acctype iA11 = 0, iA12 = 0, iA22 = 0;
    284         float A11, A12, A22;
    285 
    286 #if CV_SSE2
    287         __m128i qw0 = _mm_set1_epi32(iw00 + (iw01 << 16));
    288         __m128i qw1 = _mm_set1_epi32(iw10 + (iw11 << 16));
    289         __m128i z = _mm_setzero_si128();
    290         __m128i qdelta_d = _mm_set1_epi32(1 << (W_BITS1-1));
    291         __m128i qdelta = _mm_set1_epi32(1 << (W_BITS1-5-1));
    292         __m128 qA11 = _mm_setzero_ps(), qA12 = _mm_setzero_ps(), qA22 = _mm_setzero_ps();
    293 #endif
    294 
    295 #if CV_NEON
    296 
    297         int CV_DECL_ALIGNED(16) nA11[] = {0, 0, 0, 0}, nA12[] = {0, 0, 0, 0}, nA22[] = {0, 0, 0, 0};
    298         const int shifter1 = -(W_BITS - 5); //negative so it shifts right
    299         const int shifter2 = -(W_BITS);
    300 
    301         const int16x4_t d26 = vdup_n_s16((int16_t)iw00);
    302         const int16x4_t d27 = vdup_n_s16((int16_t)iw01);
    303         const int16x4_t d28 = vdup_n_s16((int16_t)iw10);
    304         const int16x4_t d29 = vdup_n_s16((int16_t)iw11);
    305         const int32x4_t q11 = vdupq_n_s32((int32_t)shifter1);
    306         const int32x4_t q12 = vdupq_n_s32((int32_t)shifter2);
    307 
    308 #endif
    309 
    310         // extract the patch from the first image, compute covariation matrix of derivatives
    311         int x, y;
    312         for( y = 0; y < winSize.height; y++ )
    313         {
    314             const uchar* src = I.ptr() + (y + iprevPt.y)*stepI + iprevPt.x*cn;
    315             const deriv_type* dsrc = derivI.ptr<deriv_type>() + (y + iprevPt.y)*dstep + iprevPt.x*cn2;
    316 
    317             deriv_type* Iptr = IWinBuf.ptr<deriv_type>(y);
    318             deriv_type* dIptr = derivIWinBuf.ptr<deriv_type>(y);
    319 
    320             x = 0;
    321 
    322 #if CV_SSE2
    323             for( ; x <= winSize.width*cn - 4; x += 4, dsrc += 4*2, dIptr += 4*2 )
    324             {
    325                 __m128i v00, v01, v10, v11, t0, t1;
    326 
    327                 v00 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x)), z);
    328                 v01 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + cn)), z);
    329                 v10 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + stepI)), z);
    330                 v11 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + stepI + cn)), z);
    331 
    332                 t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
    333                                    _mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
    334                 t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta), W_BITS1-5);
    335                 _mm_storel_epi64((__m128i*)(Iptr + x), _mm_packs_epi32(t0,t0));
    336 
    337                 v00 = _mm_loadu_si128((const __m128i*)(dsrc));
    338                 v01 = _mm_loadu_si128((const __m128i*)(dsrc + cn2));
    339                 v10 = _mm_loadu_si128((const __m128i*)(dsrc + dstep));
    340                 v11 = _mm_loadu_si128((const __m128i*)(dsrc + dstep + cn2));
    341 
    342                 t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
    343                                    _mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
    344                 t1 = _mm_add_epi32(_mm_madd_epi16(_mm_unpackhi_epi16(v00, v01), qw0),
    345                                    _mm_madd_epi16(_mm_unpackhi_epi16(v10, v11), qw1));
    346                 t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta_d), W_BITS1);
    347                 t1 = _mm_srai_epi32(_mm_add_epi32(t1, qdelta_d), W_BITS1);
    348                 v00 = _mm_packs_epi32(t0, t1); // Ix0 Iy0 Ix1 Iy1 ...
    349 
    350                 _mm_storeu_si128((__m128i*)dIptr, v00);
    351                 t0 = _mm_srai_epi32(v00, 16); // Iy0 Iy1 Iy2 Iy3
    352                 t1 = _mm_srai_epi32(_mm_slli_epi32(v00, 16), 16); // Ix0 Ix1 Ix2 Ix3
    353 
    354                 __m128 fy = _mm_cvtepi32_ps(t0);
    355                 __m128 fx = _mm_cvtepi32_ps(t1);
    356 
    357                 qA22 = _mm_add_ps(qA22, _mm_mul_ps(fy, fy));
    358                 qA12 = _mm_add_ps(qA12, _mm_mul_ps(fx, fy));
    359                 qA11 = _mm_add_ps(qA11, _mm_mul_ps(fx, fx));
    360             }
    361 #endif
    362 
    363 #if CV_NEON
    364             for( ; x <= winSize.width*cn - 4; x += 4, dsrc += 4*2, dIptr += 4*2 )
    365             {
    366 
    367                 uint8x8_t d0 = vld1_u8(&src[x]);
    368                 uint8x8_t d2 = vld1_u8(&src[x+cn]);
    369                 uint16x8_t q0 = vmovl_u8(d0);
    370                 uint16x8_t q1 = vmovl_u8(d2);
    371 
    372                 int32x4_t q5 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q0)), d26);
    373                 int32x4_t q6 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q1)), d27);
    374 
    375                 uint8x8_t d4 = vld1_u8(&src[x + stepI]);
    376                 uint8x8_t d6 = vld1_u8(&src[x + stepI + cn]);
    377                 uint16x8_t q2 = vmovl_u8(d4);
    378                 uint16x8_t q3 = vmovl_u8(d6);
    379 
    380                 int32x4_t q7 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q2)), d28);
    381                 int32x4_t q8 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q3)), d29);
    382 
    383                 q5 = vaddq_s32(q5, q6);
    384                 q7 = vaddq_s32(q7, q8);
    385                 q5 = vaddq_s32(q5, q7);
    386 
    387                 int16x4x2_t d0d1 = vld2_s16(dsrc);
    388                 int16x4x2_t d2d3 = vld2_s16(&dsrc[cn2]);
    389 
    390                 q5 = vqrshlq_s32(q5, q11);
    391 
    392                 int32x4_t q4 = vmull_s16(d0d1.val[0], d26);
    393                 q6 = vmull_s16(d0d1.val[1], d26);
    394 
    395                 int16x4_t nd0 = vmovn_s32(q5);
    396 
    397                 q7 = vmull_s16(d2d3.val[0], d27);
    398                 q8 = vmull_s16(d2d3.val[1], d27);
    399 
    400                 vst1_s16(&Iptr[x], nd0);
    401 
    402                 int16x4x2_t d4d5 = vld2_s16(&dsrc[dstep]);
    403                 int16x4x2_t d6d7 = vld2_s16(&dsrc[dstep+cn2]);
    404 
    405                 q4 = vaddq_s32(q4, q7);
    406                 q6 = vaddq_s32(q6, q8);
    407 
    408                 q7 = vmull_s16(d4d5.val[0], d28);
    409                 int32x4_t nq0 = vmull_s16(d4d5.val[1], d28);
    410                 q8 = vmull_s16(d6d7.val[0], d29);
    411                 int32x4_t q15 = vmull_s16(d6d7.val[1], d29);
    412 
    413                 q7 = vaddq_s32(q7, q8);
    414                 nq0 = vaddq_s32(nq0, q15);
    415 
    416                 q4 = vaddq_s32(q4, q7);
    417                 q6 = vaddq_s32(q6, nq0);
    418 
    419                 int32x4_t nq1 = vld1q_s32(nA12);
    420                 int32x4_t nq2 = vld1q_s32(nA22);
    421                 nq0 = vld1q_s32(nA11);
    422 
    423                 q4 = vqrshlq_s32(q4, q12);
    424                 q6 = vqrshlq_s32(q6, q12);
    425 
    426                 q7 = vmulq_s32(q4, q4);
    427                 q8 = vmulq_s32(q4, q6);
    428                 q15 = vmulq_s32(q6, q6);
    429 
    430                 nq0 = vaddq_s32(nq0, q7);
    431                 nq1 = vaddq_s32(nq1, q8);
    432                 nq2 = vaddq_s32(nq2, q15);
    433 
    434                 vst1q_s32(nA11, nq0);
    435                 vst1q_s32(nA12, nq1);
    436                 vst1q_s32(nA22, nq2);
    437 
    438                 int16x4_t d8 = vmovn_s32(q4);
    439                 int16x4_t d12 = vmovn_s32(q6);
    440 
    441                 int16x4x2_t d8d12;
    442                 d8d12.val[0] = d8; d8d12.val[1] = d12;
    443                 vst2_s16(dIptr, d8d12);
    444             }
    445 #endif
    446 
    447             for( ; x < winSize.width*cn; x++, dsrc += 2, dIptr += 2 )
    448             {
    449                 int ival = CV_DESCALE(src[x]*iw00 + src[x+cn]*iw01 +
    450                                       src[x+stepI]*iw10 + src[x+stepI+cn]*iw11, W_BITS1-5);
    451                 int ixval = CV_DESCALE(dsrc[0]*iw00 + dsrc[cn2]*iw01 +
    452                                        dsrc[dstep]*iw10 + dsrc[dstep+cn2]*iw11, W_BITS1);
    453                 int iyval = CV_DESCALE(dsrc[1]*iw00 + dsrc[cn2+1]*iw01 + dsrc[dstep+1]*iw10 +
    454                                        dsrc[dstep+cn2+1]*iw11, W_BITS1);
    455 
    456                 Iptr[x] = (short)ival;
    457                 dIptr[0] = (short)ixval;
    458                 dIptr[1] = (short)iyval;
    459 
    460                 iA11 += (itemtype)(ixval*ixval);
    461                 iA12 += (itemtype)(ixval*iyval);
    462                 iA22 += (itemtype)(iyval*iyval);
    463             }
    464         }
    465 
    466 #if CV_SSE2
    467         float CV_DECL_ALIGNED(16) A11buf[4], A12buf[4], A22buf[4];
    468         _mm_store_ps(A11buf, qA11);
    469         _mm_store_ps(A12buf, qA12);
    470         _mm_store_ps(A22buf, qA22);
    471         iA11 += A11buf[0] + A11buf[1] + A11buf[2] + A11buf[3];
    472         iA12 += A12buf[0] + A12buf[1] + A12buf[2] + A12buf[3];
    473         iA22 += A22buf[0] + A22buf[1] + A22buf[2] + A22buf[3];
    474 #endif
    475 
    476 #if CV_NEON
    477         iA11 += (float)(nA11[0] + nA11[1] + nA11[2] + nA11[3]);
    478         iA12 += (float)(nA12[0] + nA12[1] + nA12[2] + nA12[3]);
    479         iA22 += (float)(nA22[0] + nA22[1] + nA22[2] + nA22[3]);
    480 #endif
    481 
    482         A11 = iA11*FLT_SCALE;
    483         A12 = iA12*FLT_SCALE;
    484         A22 = iA22*FLT_SCALE;
    485 
    486         float D = A11*A22 - A12*A12;
    487         float minEig = (A22 + A11 - std::sqrt((A11-A22)*(A11-A22) +
    488                         4.f*A12*A12))/(2*winSize.width*winSize.height);
    489 
    490         if( err && (flags & OPTFLOW_LK_GET_MIN_EIGENVALS) != 0 )
    491             err[ptidx] = (float)minEig;
    492 
    493         if( minEig < minEigThreshold || D < FLT_EPSILON )
    494         {
    495             if( level == 0 && status )
    496                 status[ptidx] = false;
    497             continue;
    498         }
    499 
    500         D = 1.f/D;
    501 
    502         nextPt -= halfWin;
    503         Point2f prevDelta;
    504 
    505         for( j = 0; j < criteria.maxCount; j++ )
    506         {
    507             inextPt.x = cvFloor(nextPt.x);
    508             inextPt.y = cvFloor(nextPt.y);
    509 
    510             if( inextPt.x < -winSize.width || inextPt.x >= J.cols ||
    511                inextPt.y < -winSize.height || inextPt.y >= J.rows )
    512             {
    513                 if( level == 0 && status )
    514                     status[ptidx] = false;
    515                 break;
    516             }
    517 
    518             a = nextPt.x - inextPt.x;
    519             b = nextPt.y - inextPt.y;
    520             iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS));
    521             iw01 = cvRound(a*(1.f - b)*(1 << W_BITS));
    522             iw10 = cvRound((1.f - a)*b*(1 << W_BITS));
    523             iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
    524             acctype ib1 = 0, ib2 = 0;
    525             float b1, b2;
    526 #if CV_SSE2
    527             qw0 = _mm_set1_epi32(iw00 + (iw01 << 16));
    528             qw1 = _mm_set1_epi32(iw10 + (iw11 << 16));
    529             __m128 qb0 = _mm_setzero_ps(), qb1 = _mm_setzero_ps();
    530 #endif
    531 
    532 #if CV_NEON
    533             int CV_DECL_ALIGNED(16) nB1[] = {0,0,0,0}, nB2[] = {0,0,0,0};
    534 
    535             const int16x4_t d26_2 = vdup_n_s16((int16_t)iw00);
    536             const int16x4_t d27_2 = vdup_n_s16((int16_t)iw01);
    537             const int16x4_t d28_2 = vdup_n_s16((int16_t)iw10);
    538             const int16x4_t d29_2 = vdup_n_s16((int16_t)iw11);
    539 
    540 #endif
    541 
    542             for( y = 0; y < winSize.height; y++ )
    543             {
    544                 const uchar* Jptr = J.ptr() + (y + inextPt.y)*stepJ + inextPt.x*cn;
    545                 const deriv_type* Iptr = IWinBuf.ptr<deriv_type>(y);
    546                 const deriv_type* dIptr = derivIWinBuf.ptr<deriv_type>(y);
    547 
    548                 x = 0;
    549 
    550 #if CV_SSE2
    551                 for( ; x <= winSize.width*cn - 8; x += 8, dIptr += 8*2 )
    552                 {
    553                     __m128i diff0 = _mm_loadu_si128((const __m128i*)(Iptr + x)), diff1;
    554                     __m128i v00 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x)), z);
    555                     __m128i v01 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + cn)), z);
    556                     __m128i v10 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + stepJ)), z);
    557                     __m128i v11 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + stepJ + cn)), z);
    558 
    559                     __m128i t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
    560                                                _mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
    561                     __m128i t1 = _mm_add_epi32(_mm_madd_epi16(_mm_unpackhi_epi16(v00, v01), qw0),
    562                                                _mm_madd_epi16(_mm_unpackhi_epi16(v10, v11), qw1));
    563                     t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta), W_BITS1-5);
    564                     t1 = _mm_srai_epi32(_mm_add_epi32(t1, qdelta), W_BITS1-5);
    565                     diff0 = _mm_subs_epi16(_mm_packs_epi32(t0, t1), diff0);
    566                     diff1 = _mm_unpackhi_epi16(diff0, diff0);
    567                     diff0 = _mm_unpacklo_epi16(diff0, diff0); // It0 It0 It1 It1 ...
    568                     v00 = _mm_loadu_si128((const __m128i*)(dIptr)); // Ix0 Iy0 Ix1 Iy1 ...
    569                     v01 = _mm_loadu_si128((const __m128i*)(dIptr + 8));
    570                     v10 = _mm_mullo_epi16(v00, diff0);
    571                     v11 = _mm_mulhi_epi16(v00, diff0);
    572                     v00 = _mm_unpacklo_epi16(v10, v11);
    573                     v10 = _mm_unpackhi_epi16(v10, v11);
    574                     qb0 = _mm_add_ps(qb0, _mm_cvtepi32_ps(v00));
    575                     qb1 = _mm_add_ps(qb1, _mm_cvtepi32_ps(v10));
    576                     v10 = _mm_mullo_epi16(v01, diff1);
    577                     v11 = _mm_mulhi_epi16(v01, diff1);
    578                     v00 = _mm_unpacklo_epi16(v10, v11);
    579                     v10 = _mm_unpackhi_epi16(v10, v11);
    580                     qb0 = _mm_add_ps(qb0, _mm_cvtepi32_ps(v00));
    581                     qb1 = _mm_add_ps(qb1, _mm_cvtepi32_ps(v10));
    582                 }
    583 #endif
    584 
    585 #if CV_NEON
    586                 for( ; x <= winSize.width*cn - 8; x += 8, dIptr += 8*2 )
    587                 {
    588 
    589                     uint8x8_t d0 = vld1_u8(&Jptr[x]);
    590                     uint8x8_t d2 = vld1_u8(&Jptr[x+cn]);
    591                     uint8x8_t d4 = vld1_u8(&Jptr[x+stepJ]);
    592                     uint8x8_t d6 = vld1_u8(&Jptr[x+stepJ+cn]);
    593 
    594                     uint16x8_t q0 = vmovl_u8(d0);
    595                     uint16x8_t q1 = vmovl_u8(d2);
    596                     uint16x8_t q2 = vmovl_u8(d4);
    597                     uint16x8_t q3 = vmovl_u8(d6);
    598 
    599                     int32x4_t nq4 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q0)), d26_2);
    600                     int32x4_t nq5 = vmull_s16(vget_high_s16(vreinterpretq_s16_u16(q0)), d26_2);
    601 
    602                     int32x4_t nq6 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q1)), d27_2);
    603                     int32x4_t nq7 = vmull_s16(vget_high_s16(vreinterpretq_s16_u16(q1)), d27_2);
    604 
    605                     int32x4_t nq8 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q2)), d28_2);
    606                     int32x4_t nq9 = vmull_s16(vget_high_s16(vreinterpretq_s16_u16(q2)), d28_2);
    607 
    608                     int32x4_t nq10 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q3)), d29_2);
    609                     int32x4_t nq11 = vmull_s16(vget_high_s16(vreinterpretq_s16_u16(q3)), d29_2);
    610 
    611                     nq4 = vaddq_s32(nq4, nq6);
    612                     nq5 = vaddq_s32(nq5, nq7);
    613                     nq8 = vaddq_s32(nq8, nq10);
    614                     nq9 = vaddq_s32(nq9, nq11);
    615 
    616                     int16x8_t q6 = vld1q_s16(&Iptr[x]);
    617 
    618                     nq4 = vaddq_s32(nq4, nq8);
    619                     nq5 = vaddq_s32(nq5, nq9);
    620 
    621                     nq8 = vmovl_s16(vget_high_s16(q6));
    622                     nq6 = vmovl_s16(vget_low_s16(q6));
    623 
    624                     nq4 = vqrshlq_s32(nq4, q11);
    625                     nq5 = vqrshlq_s32(nq5, q11);
    626 
    627                     int16x8x2_t q0q1 = vld2q_s16(dIptr);
    628                     nq11 = vld1q_s32(nB1);
    629                     int32x4_t nq15 = vld1q_s32(nB2);
    630 
    631                     nq4 = vsubq_s32(nq4, nq6);
    632                     nq5 = vsubq_s32(nq5, nq8);
    633 
    634                     int32x4_t nq2 = vmovl_s16(vget_low_s16(q0q1.val[0]));
    635                     int32x4_t nq3 = vmovl_s16(vget_high_s16(q0q1.val[0]));
    636 
    637                     nq7 = vmovl_s16(vget_low_s16(q0q1.val[1]));
    638                     nq8 = vmovl_s16(vget_high_s16(q0q1.val[1]));
    639 
    640                     nq9 = vmulq_s32(nq4, nq2);
    641                     nq10 = vmulq_s32(nq5, nq3);
    642 
    643                     nq4 = vmulq_s32(nq4, nq7);
    644                     nq5 = vmulq_s32(nq5, nq8);
    645 
    646                     nq9 = vaddq_s32(nq9, nq10);
    647                     nq4 = vaddq_s32(nq4, nq5);
    648 
    649                     nq11 = vaddq_s32(nq11, nq9);
    650                     nq15 = vaddq_s32(nq15, nq4);
    651 
    652                     vst1q_s32(nB1, nq11);
    653                     vst1q_s32(nB2, nq15);
    654                 }
    655 #endif
    656 
    657                 for( ; x < winSize.width*cn; x++, dIptr += 2 )
    658                 {
    659                     int diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
    660                                           Jptr[x+stepJ]*iw10 + Jptr[x+stepJ+cn]*iw11,
    661                                           W_BITS1-5) - Iptr[x];
    662                     ib1 += (itemtype)(diff*dIptr[0]);
    663                     ib2 += (itemtype)(diff*dIptr[1]);
    664                 }
    665             }
    666 
    667 #if CV_SSE2
    668             float CV_DECL_ALIGNED(16) bbuf[4];
    669             _mm_store_ps(bbuf, _mm_add_ps(qb0, qb1));
    670             ib1 += bbuf[0] + bbuf[2];
    671             ib2 += bbuf[1] + bbuf[3];
    672 #endif
    673 
    674 #if CV_NEON
    675 
    676             ib1 += (float)(nB1[0] + nB1[1] + nB1[2] + nB1[3]);
    677             ib2 += (float)(nB2[0] + nB2[1] + nB2[2] + nB2[3]);
    678 #endif
    679 
    680             b1 = ib1*FLT_SCALE;
    681             b2 = ib2*FLT_SCALE;
    682 
    683             Point2f delta( (float)((A12*b2 - A22*b1) * D),
    684                           (float)((A12*b1 - A11*b2) * D));
    685             //delta = -delta;
    686 
    687             nextPt += delta;
    688             nextPts[ptidx] = nextPt + halfWin;
    689 
    690             if( delta.ddot(delta) <= criteria.epsilon )
    691                 break;
    692 
    693             if( j > 0 && std::abs(delta.x + prevDelta.x) < 0.01 &&
    694                std::abs(delta.y + prevDelta.y) < 0.01 )
    695             {
    696                 nextPts[ptidx] -= delta*0.5f;
    697                 break;
    698             }
    699             prevDelta = delta;
    700         }
    701 
    702         if( status[ptidx] && err && level == 0 && (flags & OPTFLOW_LK_GET_MIN_EIGENVALS) == 0 )
    703         {
    704             Point2f nextPoint = nextPts[ptidx] - halfWin;
    705             Point inextPoint;
    706 
    707             inextPoint.x = cvFloor(nextPoint.x);
    708             inextPoint.y = cvFloor(nextPoint.y);
    709 
    710             if( inextPoint.x < -winSize.width || inextPoint.x >= J.cols ||
    711                 inextPoint.y < -winSize.height || inextPoint.y >= J.rows )
    712             {
    713                 if( status )
    714                     status[ptidx] = false;
    715                 continue;
    716             }
    717 
    718             float aa = nextPoint.x - inextPoint.x;
    719             float bb = nextPoint.y - inextPoint.y;
    720             iw00 = cvRound((1.f - aa)*(1.f - bb)*(1 << W_BITS));
    721             iw01 = cvRound(aa*(1.f - bb)*(1 << W_BITS));
    722             iw10 = cvRound((1.f - aa)*bb*(1 << W_BITS));
    723             iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
    724             float errval = 0.f;
    725 
    726             for( y = 0; y < winSize.height; y++ )
    727             {
    728                 const uchar* Jptr = J.ptr() + (y + inextPoint.y)*stepJ + inextPoint.x*cn;
    729                 const deriv_type* Iptr = IWinBuf.ptr<deriv_type>(y);
    730 
    731                 for( x = 0; x < winSize.width*cn; x++ )
    732                 {
    733                     int diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
    734                                           Jptr[x+stepJ]*iw10 + Jptr[x+stepJ+cn]*iw11,
    735                                           W_BITS1-5) - Iptr[x];
    736                     errval += std::abs((float)diff);
    737                 }
    738             }
    739             err[ptidx] = errval * 1.f/(32*winSize.width*cn*winSize.height);
    740         }
    741     }
    742 }
    743 
    744 int cv::buildOpticalFlowPyramid(InputArray _img, OutputArrayOfArrays pyramid, Size winSize, int maxLevel, bool withDerivatives,
    745                                 int pyrBorder, int derivBorder, bool tryReuseInputImage)
    746 {
    747     Mat img = _img.getMat();
    748     CV_Assert(img.depth() == CV_8U && winSize.width > 2 && winSize.height > 2 );
    749     int pyrstep = withDerivatives ? 2 : 1;
    750 
    751     pyramid.create(1, (maxLevel + 1) * pyrstep, 0 /*type*/, -1, true, 0);
    752 
    753     int derivType = CV_MAKETYPE(DataType<cv::detail::deriv_type>::depth, img.channels() * 2);
    754 
    755     //level 0
    756     bool lvl0IsSet = false;
    757     if(tryReuseInputImage && img.isSubmatrix() && (pyrBorder & BORDER_ISOLATED) == 0)
    758     {
    759         Size wholeSize;
    760         Point ofs;
    761         img.locateROI(wholeSize, ofs);
    762         if (ofs.x >= winSize.width && ofs.y >= winSize.height
    763               && ofs.x + img.cols + winSize.width <= wholeSize.width
    764               && ofs.y + img.rows + winSize.height <= wholeSize.height)
    765         {
    766             pyramid.getMatRef(0) = img;
    767             lvl0IsSet = true;
    768         }
    769     }
    770 
    771     if(!lvl0IsSet)
    772     {
    773         Mat& temp = pyramid.getMatRef(0);
    774 
    775         if(!temp.empty())
    776             temp.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width);
    777         if(temp.type() != img.type() || temp.cols != winSize.width*2 + img.cols || temp.rows != winSize.height * 2 + img.rows)
    778             temp.create(img.rows + winSize.height*2, img.cols + winSize.width*2, img.type());
    779 
    780         if(pyrBorder == BORDER_TRANSPARENT)
    781             img.copyTo(temp(Rect(winSize.width, winSize.height, img.cols, img.rows)));
    782         else
    783             copyMakeBorder(img, temp, winSize.height, winSize.height, winSize.width, winSize.width, pyrBorder);
    784         temp.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width);
    785     }
    786 
    787     Size sz = img.size();
    788     Mat prevLevel = pyramid.getMatRef(0);
    789     Mat thisLevel = prevLevel;
    790 
    791     for(int level = 0; level <= maxLevel; ++level)
    792     {
    793         if (level != 0)
    794         {
    795             Mat& temp = pyramid.getMatRef(level * pyrstep);
    796 
    797             if(!temp.empty())
    798                 temp.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width);
    799             if(temp.type() != img.type() || temp.cols != winSize.width*2 + sz.width || temp.rows != winSize.height * 2 + sz.height)
    800                 temp.create(sz.height + winSize.height*2, sz.width + winSize.width*2, img.type());
    801 
    802             thisLevel = temp(Rect(winSize.width, winSize.height, sz.width, sz.height));
    803             pyrDown(prevLevel, thisLevel, sz);
    804 
    805             if(pyrBorder != BORDER_TRANSPARENT)
    806                 copyMakeBorder(thisLevel, temp, winSize.height, winSize.height, winSize.width, winSize.width, pyrBorder|BORDER_ISOLATED);
    807             temp.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width);
    808         }
    809 
    810         if(withDerivatives)
    811         {
    812             Mat& deriv = pyramid.getMatRef(level * pyrstep + 1);
    813 
    814             if(!deriv.empty())
    815                 deriv.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width);
    816             if(deriv.type() != derivType || deriv.cols != winSize.width*2 + sz.width || deriv.rows != winSize.height * 2 + sz.height)
    817                 deriv.create(sz.height + winSize.height*2, sz.width + winSize.width*2, derivType);
    818 
    819             Mat derivI = deriv(Rect(winSize.width, winSize.height, sz.width, sz.height));
    820             calcSharrDeriv(thisLevel, derivI);
    821 
    822             if(derivBorder != BORDER_TRANSPARENT)
    823                 copyMakeBorder(derivI, deriv, winSize.height, winSize.height, winSize.width, winSize.width, derivBorder|BORDER_ISOLATED);
    824             deriv.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width);
    825         }
    826 
    827         sz = Size((sz.width+1)/2, (sz.height+1)/2);
    828         if( sz.width <= winSize.width || sz.height <= winSize.height )
    829         {
    830             pyramid.create(1, (level + 1) * pyrstep, 0 /*type*/, -1, true, 0);//check this
    831             return level;
    832         }
    833 
    834         prevLevel = thisLevel;
    835     }
    836 
    837     return maxLevel;
    838 }
    839 
    840 namespace cv
    841 {
    842     class PyrLKOpticalFlow
    843     {
    844         struct dim3
    845         {
    846             unsigned int x, y, z;
    847             dim3() : x(0), y(0), z(0) { }
    848         };
    849     public:
    850         PyrLKOpticalFlow()
    851         {
    852             winSize = Size(21, 21);
    853             maxLevel = 3;
    854             iters = 30;
    855             derivLambda = 0.5;
    856             useInitialFlow = false;
    857 
    858             waveSize = 0;
    859         }
    860 
    861         bool checkParam()
    862         {
    863             iters = std::min(std::max(iters, 0), 100);
    864 
    865             derivLambda = std::min(std::max(derivLambda, 0.0), 1.0);
    866             if (derivLambda < 0)
    867                 return false;
    868             if (maxLevel < 0 || winSize.width <= 2 || winSize.height <= 2)
    869                 return false;
    870             calcPatchSize();
    871             if (patch.x <= 0 || patch.x >= 6 || patch.y <= 0 || patch.y >= 6)
    872                 return false;
    873             if (!initWaveSize())
    874                 return false;
    875             return true;
    876         }
    877 
    878         bool sparse(const UMat &prevImg, const UMat &nextImg, const UMat &prevPts, UMat &nextPts, UMat &status, UMat &err)
    879         {
    880             if (!checkParam())
    881                 return false;
    882 
    883             UMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
    884             UMat temp2 = nextPts.reshape(1);
    885             multiply(1.0f / (1 << maxLevel) /2.0f, temp1, temp2);
    886 
    887             status.setTo(Scalar::all(1));
    888 
    889             // build the image pyramids.
    890             std::vector<UMat> prevPyr; prevPyr.resize(maxLevel + 1);
    891             std::vector<UMat> nextPyr; nextPyr.resize(maxLevel + 1);
    892 
    893             // allocate buffers with aligned pitch to be able to use cl_khr_image2d_from_buffer extention
    894             // This is the required pitch alignment in pixels
    895             int pitchAlign = (int)ocl::Device::getDefault().imagePitchAlignment();
    896             if (pitchAlign>0)
    897             {
    898                 prevPyr[0] = UMat(prevImg.rows,(prevImg.cols+pitchAlign-1)&(-pitchAlign),CV_32FC1).colRange(0,prevImg.cols);
    899                 nextPyr[0] = UMat(nextImg.rows,(nextImg.cols+pitchAlign-1)&(-pitchAlign),CV_32FC1).colRange(0,nextImg.cols);
    900                 for (int level = 1; level <= maxLevel; ++level)
    901                 {
    902                     int cols,rows;
    903                     // allocate buffers with aligned pitch to be able to use image on buffer extention
    904                     cols = (prevPyr[level - 1].cols+1)/2;
    905                     rows = (prevPyr[level - 1].rows+1)/2;
    906                     prevPyr[level] = UMat(rows,(cols+pitchAlign-1)&(-pitchAlign),prevPyr[level-1].type()).colRange(0,cols);
    907                     cols = (nextPyr[level - 1].cols+1)/2;
    908                     rows = (nextPyr[level - 1].rows+1)/2;
    909                     nextPyr[level] = UMat(rows,(cols+pitchAlign-1)&(-pitchAlign),nextPyr[level-1].type()).colRange(0,cols);
    910                 }
    911             }
    912 
    913             prevImg.convertTo(prevPyr[0], CV_32F);
    914             nextImg.convertTo(nextPyr[0], CV_32F);
    915 
    916             for (int level = 1; level <= maxLevel; ++level)
    917             {
    918                 pyrDown(prevPyr[level - 1], prevPyr[level]);
    919                 pyrDown(nextPyr[level - 1], nextPyr[level]);
    920             }
    921 
    922             // dI/dx ~ Ix, dI/dy ~ Iy
    923             for (int level = maxLevel; level >= 0; level--)
    924             {
    925                 if (!lkSparse_run(prevPyr[level], nextPyr[level], prevPts,
    926                                   nextPts, status, err,
    927                                   prevPts.cols, level))
    928                     return false;
    929             }
    930             return true;
    931         }
    932 
    933         Size winSize;
    934         int maxLevel;
    935         int iters;
    936         double derivLambda;
    937         bool useInitialFlow;
    938 
    939     private:
    940         int waveSize;
    941         bool initWaveSize()
    942         {
    943             waveSize = 1;
    944             if (isDeviceCPU())
    945                 return true;
    946 
    947             ocl::Kernel kernel;
    948             if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, ""))
    949                 return false;
    950             waveSize = (int)kernel.preferedWorkGroupSizeMultiple();
    951             return true;
    952         }
    953         dim3 patch;
    954         void calcPatchSize()
    955         {
    956             dim3 block;
    957 
    958             if (winSize.width > 32 && winSize.width > 2 * winSize.height)
    959             {
    960                 block.x = 32;
    961                 block.y = 8;
    962             }
    963             else
    964             {
    965                 block.x = 16;
    966                 block.y = 16;
    967             }
    968 
    969             patch.x = (winSize.width  + block.x - 1) / block.x;
    970             patch.y = (winSize.height + block.y - 1) / block.y;
    971 
    972             block.z = patch.z = 1;
    973         }
    974 
    975         bool lkSparse_run(UMat &I, UMat &J, const UMat &prevPts, UMat &nextPts, UMat &status, UMat& err,
    976             int ptcount, int level)
    977         {
    978             size_t localThreads[3]  = { 8, 8};
    979             size_t globalThreads[3] = { 8 * ptcount, 8};
    980             char calcErr = (0 == level) ? 1 : 0;
    981 
    982             cv::String build_options;
    983             if (isDeviceCPU())
    984                 build_options = " -D CPU";
    985             else
    986                 build_options = cv::format("-D WAVE_SIZE=%d", waveSize);
    987 
    988             ocl::Kernel kernel;
    989             if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, build_options))
    990                 return false;
    991 
    992             CV_Assert(I.depth() == CV_32F && J.depth() == CV_32F);
    993             ocl::Image2D imageI(I, false, ocl::Image2D::canCreateAlias(I));
    994             ocl::Image2D imageJ(J, false, ocl::Image2D::canCreateAlias(J));
    995 
    996             int idxArg = 0;
    997             idxArg = kernel.set(idxArg, imageI); //image2d_t I
    998             idxArg = kernel.set(idxArg, imageJ); //image2d_t J
    999             idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(prevPts)); // __global const float2* prevPts
   1000             idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(nextPts)); // __global const float2* nextPts
   1001             idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(status)); // __global uchar* status
   1002             idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(err)); // __global float* err
   1003             idxArg = kernel.set(idxArg, (int)level); // const int level
   1004             idxArg = kernel.set(idxArg, (int)I.rows); // const int rows
   1005             idxArg = kernel.set(idxArg, (int)I.cols); // const int cols
   1006             idxArg = kernel.set(idxArg, (int)patch.x); // int PATCH_X
   1007             idxArg = kernel.set(idxArg, (int)patch.y); // int PATCH_Y
   1008             idxArg = kernel.set(idxArg, (int)winSize.width); // int c_winSize_x
   1009             idxArg = kernel.set(idxArg, (int)winSize.height); // int c_winSize_y
   1010             idxArg = kernel.set(idxArg, (int)iters); // int c_iters
   1011             idxArg = kernel.set(idxArg, (char)calcErr); //char calcErr
   1012             return kernel.run(2, globalThreads, localThreads, false);
   1013         }
   1014     private:
   1015         inline static bool isDeviceCPU()
   1016         {
   1017             return (cv::ocl::Device::TYPE_CPU == cv::ocl::Device::getDefault().type());
   1018         }
   1019     };
   1020 
   1021 
   1022     static bool ocl_calcOpticalFlowPyrLK(InputArray _prevImg, InputArray _nextImg,
   1023                                   InputArray _prevPts, InputOutputArray _nextPts,
   1024                                   OutputArray _status, OutputArray _err,
   1025                                   Size winSize, int maxLevel,
   1026                                   TermCriteria criteria,
   1027                                   int flags/*, double minEigThreshold*/ )
   1028     {
   1029         if (0 != (OPTFLOW_LK_GET_MIN_EIGENVALS & flags))
   1030             return false;
   1031         if (!cv::ocl::Device::getDefault().imageSupport())
   1032             return false;
   1033         if (_nextImg.size() != _prevImg.size())
   1034             return false;
   1035         int typePrev = _prevImg.type();
   1036         int typeNext = _nextImg.type();
   1037         if ((1 != CV_MAT_CN(typePrev)) || (1 != CV_MAT_CN(typeNext)))
   1038             return false;
   1039         if ((0 != CV_MAT_DEPTH(typePrev)) || (0 != CV_MAT_DEPTH(typeNext)))
   1040             return false;
   1041 
   1042         if (_prevPts.empty() || _prevPts.type() != CV_32FC2 || (!_prevPts.isContinuous()))
   1043             return false;
   1044         if ((1 != _prevPts.size().height) && (1 != _prevPts.size().width))
   1045             return false;
   1046         size_t npoints = _prevPts.total();
   1047         bool useInitialFlow  = (0 != (flags & OPTFLOW_USE_INITIAL_FLOW));
   1048         if (useInitialFlow)
   1049         {
   1050             if (_nextPts.empty() || _nextPts.type() != CV_32FC2 || (!_prevPts.isContinuous()))
   1051                 return false;
   1052             if ((1 != _nextPts.size().height) && (1 != _nextPts.size().width))
   1053                 return false;
   1054             if (_nextPts.total() != npoints)
   1055                 return false;
   1056         }
   1057         else
   1058         {
   1059             _nextPts.create(_prevPts.size(), _prevPts.type());
   1060         }
   1061 
   1062         PyrLKOpticalFlow opticalFlow;
   1063         opticalFlow.winSize     = winSize;
   1064         opticalFlow.maxLevel    = maxLevel;
   1065         opticalFlow.iters       = criteria.maxCount;
   1066         opticalFlow.derivLambda = criteria.epsilon;
   1067         opticalFlow.useInitialFlow  = useInitialFlow;
   1068 
   1069         if (!opticalFlow.checkParam())
   1070             return false;
   1071 
   1072         UMat umatErr;
   1073         if (_err.needed())
   1074         {
   1075             _err.create((int)npoints, 1, CV_32FC1);
   1076             umatErr = _err.getUMat();
   1077         }
   1078         else
   1079             umatErr.create((int)npoints, 1, CV_32FC1);
   1080 
   1081         _status.create((int)npoints, 1, CV_8UC1);
   1082         UMat umatNextPts = _nextPts.getUMat();
   1083         UMat umatStatus = _status.getUMat();
   1084         return opticalFlow.sparse(_prevImg.getUMat(), _nextImg.getUMat(), _prevPts.getUMat(), umatNextPts, umatStatus, umatErr);
   1085     }
   1086 };
   1087 
   1088 void cv::calcOpticalFlowPyrLK( InputArray _prevImg, InputArray _nextImg,
   1089                            InputArray _prevPts, InputOutputArray _nextPts,
   1090                            OutputArray _status, OutputArray _err,
   1091                            Size winSize, int maxLevel,
   1092                            TermCriteria criteria,
   1093                            int flags, double minEigThreshold )
   1094 {
   1095     bool use_opencl = ocl::useOpenCL() &&
   1096                       (_prevImg.isUMat() || _nextImg.isUMat()) &&
   1097                       ocl::Image2D::isFormatSupported(CV_32F, 1, false);
   1098     if ( use_opencl && ocl_calcOpticalFlowPyrLK(_prevImg, _nextImg, _prevPts, _nextPts, _status, _err, winSize, maxLevel, criteria, flags/*, minEigThreshold*/))
   1099     {
   1100         CV_IMPL_ADD(CV_IMPL_OCL);
   1101         return;
   1102     }
   1103 
   1104     Mat prevPtsMat = _prevPts.getMat();
   1105     const int derivDepth = DataType<cv::detail::deriv_type>::depth;
   1106 
   1107     CV_Assert( maxLevel >= 0 && winSize.width > 2 && winSize.height > 2 );
   1108 
   1109     int level=0, i, npoints;
   1110     CV_Assert( (npoints = prevPtsMat.checkVector(2, CV_32F, true)) >= 0 );
   1111 
   1112     if( npoints == 0 )
   1113     {
   1114         _nextPts.release();
   1115         _status.release();
   1116         _err.release();
   1117         return;
   1118     }
   1119 
   1120     if( !(flags & OPTFLOW_USE_INITIAL_FLOW) )
   1121         _nextPts.create(prevPtsMat.size(), prevPtsMat.type(), -1, true);
   1122 
   1123     Mat nextPtsMat = _nextPts.getMat();
   1124     CV_Assert( nextPtsMat.checkVector(2, CV_32F, true) == npoints );
   1125 
   1126     const Point2f* prevPts = prevPtsMat.ptr<Point2f>();
   1127     Point2f* nextPts = nextPtsMat.ptr<Point2f>();
   1128 
   1129     _status.create((int)npoints, 1, CV_8U, -1, true);
   1130     Mat statusMat = _status.getMat(), errMat;
   1131     CV_Assert( statusMat.isContinuous() );
   1132     uchar* status = statusMat.ptr();
   1133     float* err = 0;
   1134 
   1135     for( i = 0; i < npoints; i++ )
   1136         status[i] = true;
   1137 
   1138     if( _err.needed() )
   1139     {
   1140         _err.create((int)npoints, 1, CV_32F, -1, true);
   1141         errMat = _err.getMat();
   1142         CV_Assert( errMat.isContinuous() );
   1143         err = errMat.ptr<float>();
   1144     }
   1145 
   1146     std::vector<Mat> prevPyr, nextPyr;
   1147     int levels1 = -1;
   1148     int lvlStep1 = 1;
   1149     int levels2 = -1;
   1150     int lvlStep2 = 1;
   1151 
   1152     if(_prevImg.kind() == _InputArray::STD_VECTOR_MAT)
   1153     {
   1154         _prevImg.getMatVector(prevPyr);
   1155 
   1156         levels1 = int(prevPyr.size()) - 1;
   1157         CV_Assert(levels1 >= 0);
   1158 
   1159         if (levels1 % 2 == 1 && prevPyr[0].channels() * 2 == prevPyr[1].channels() && prevPyr[1].depth() == derivDepth)
   1160         {
   1161             lvlStep1 = 2;
   1162             levels1 /= 2;
   1163         }
   1164 
   1165         // ensure that pyramid has reqired padding
   1166         if(levels1 > 0)
   1167         {
   1168             Size fullSize;
   1169             Point ofs;
   1170             prevPyr[lvlStep1].locateROI(fullSize, ofs);
   1171             CV_Assert(ofs.x >= winSize.width && ofs.y >= winSize.height
   1172                 && ofs.x + prevPyr[lvlStep1].cols + winSize.width <= fullSize.width
   1173                 && ofs.y + prevPyr[lvlStep1].rows + winSize.height <= fullSize.height);
   1174         }
   1175 
   1176         if(levels1 < maxLevel)
   1177             maxLevel = levels1;
   1178     }
   1179 
   1180     if(_nextImg.kind() == _InputArray::STD_VECTOR_MAT)
   1181     {
   1182         _nextImg.getMatVector(nextPyr);
   1183 
   1184         levels2 = int(nextPyr.size()) - 1;
   1185         CV_Assert(levels2 >= 0);
   1186 
   1187         if (levels2 % 2 == 1 && nextPyr[0].channels() * 2 == nextPyr[1].channels() && nextPyr[1].depth() == derivDepth)
   1188         {
   1189             lvlStep2 = 2;
   1190             levels2 /= 2;
   1191         }
   1192 
   1193         // ensure that pyramid has reqired padding
   1194         if(levels2 > 0)
   1195         {
   1196             Size fullSize;
   1197             Point ofs;
   1198             nextPyr[lvlStep2].locateROI(fullSize, ofs);
   1199             CV_Assert(ofs.x >= winSize.width && ofs.y >= winSize.height
   1200                 && ofs.x + nextPyr[lvlStep2].cols + winSize.width <= fullSize.width
   1201                 && ofs.y + nextPyr[lvlStep2].rows + winSize.height <= fullSize.height);
   1202         }
   1203 
   1204         if(levels2 < maxLevel)
   1205             maxLevel = levels2;
   1206     }
   1207 
   1208     if (levels1 < 0)
   1209         maxLevel = buildOpticalFlowPyramid(_prevImg, prevPyr, winSize, maxLevel, false);
   1210 
   1211     if (levels2 < 0)
   1212         maxLevel = buildOpticalFlowPyramid(_nextImg, nextPyr, winSize, maxLevel, false);
   1213 
   1214     if( (criteria.type & TermCriteria::COUNT) == 0 )
   1215         criteria.maxCount = 30;
   1216     else
   1217         criteria.maxCount = std::min(std::max(criteria.maxCount, 0), 100);
   1218     if( (criteria.type & TermCriteria::EPS) == 0 )
   1219         criteria.epsilon = 0.01;
   1220     else
   1221         criteria.epsilon = std::min(std::max(criteria.epsilon, 0.), 10.);
   1222     criteria.epsilon *= criteria.epsilon;
   1223 
   1224     // dI/dx ~ Ix, dI/dy ~ Iy
   1225     Mat derivIBuf;
   1226     if(lvlStep1 == 1)
   1227         derivIBuf.create(prevPyr[0].rows + winSize.height*2, prevPyr[0].cols + winSize.width*2, CV_MAKETYPE(derivDepth, prevPyr[0].channels() * 2));
   1228 
   1229     for( level = maxLevel; level >= 0; level-- )
   1230     {
   1231         Mat derivI;
   1232         if(lvlStep1 == 1)
   1233         {
   1234             Size imgSize = prevPyr[level * lvlStep1].size();
   1235             Mat _derivI( imgSize.height + winSize.height*2,
   1236                 imgSize.width + winSize.width*2, derivIBuf.type(), derivIBuf.ptr() );
   1237             derivI = _derivI(Rect(winSize.width, winSize.height, imgSize.width, imgSize.height));
   1238             calcSharrDeriv(prevPyr[level * lvlStep1], derivI);
   1239             copyMakeBorder(derivI, _derivI, winSize.height, winSize.height, winSize.width, winSize.width, BORDER_CONSTANT|BORDER_ISOLATED);
   1240         }
   1241         else
   1242             derivI = prevPyr[level * lvlStep1 + 1];
   1243 
   1244         CV_Assert(prevPyr[level * lvlStep1].size() == nextPyr[level * lvlStep2].size());
   1245         CV_Assert(prevPyr[level * lvlStep1].type() == nextPyr[level * lvlStep2].type());
   1246 
   1247 #ifdef HAVE_TEGRA_OPTIMIZATION
   1248         typedef tegra::LKTrackerInvoker<cv::detail::LKTrackerInvoker> LKTrackerInvoker;
   1249 #else
   1250         typedef cv::detail::LKTrackerInvoker LKTrackerInvoker;
   1251 #endif
   1252 
   1253         parallel_for_(Range(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI,
   1254                                                           nextPyr[level * lvlStep2], prevPts, nextPts,
   1255                                                           status, err,
   1256                                                           winSize, criteria, level, maxLevel,
   1257                                                           flags, (float)minEigThreshold));
   1258     }
   1259 }
   1260 
   1261 namespace cv
   1262 {
   1263 
   1264 static void
   1265 getRTMatrix( const Point2f* a, const Point2f* b,
   1266              int count, Mat& M, bool fullAffine )
   1267 {
   1268     CV_Assert( M.isContinuous() );
   1269 
   1270     if( fullAffine )
   1271     {
   1272         double sa[6][6]={{0.}}, sb[6]={0.};
   1273         Mat A( 6, 6, CV_64F, &sa[0][0] ), B( 6, 1, CV_64F, sb );
   1274         Mat MM = M.reshape(1, 6);
   1275 
   1276         for( int i = 0; i < count; i++ )
   1277         {
   1278             sa[0][0] += a[i].x*a[i].x;
   1279             sa[0][1] += a[i].y*a[i].x;
   1280             sa[0][2] += a[i].x;
   1281 
   1282             sa[1][1] += a[i].y*a[i].y;
   1283             sa[1][2] += a[i].y;
   1284 
   1285             sa[2][2] += 1;
   1286 
   1287             sb[0] += a[i].x*b[i].x;
   1288             sb[1] += a[i].y*b[i].x;
   1289             sb[2] += b[i].x;
   1290             sb[3] += a[i].x*b[i].y;
   1291             sb[4] += a[i].y*b[i].y;
   1292             sb[5] += b[i].y;
   1293         }
   1294 
   1295         sa[3][4] = sa[4][3] = sa[1][0] = sa[0][1];
   1296         sa[3][5] = sa[5][3] = sa[2][0] = sa[0][2];
   1297         sa[4][5] = sa[5][4] = sa[2][1] = sa[1][2];
   1298 
   1299         sa[3][3] = sa[0][0];
   1300         sa[4][4] = sa[1][1];
   1301         sa[5][5] = sa[2][2];
   1302 
   1303         solve( A, B, MM, DECOMP_EIG );
   1304     }
   1305     else
   1306     {
   1307         double sa[4][4]={{0.}}, sb[4]={0.}, m[4];
   1308         Mat A( 4, 4, CV_64F, sa ), B( 4, 1, CV_64F, sb );
   1309         Mat MM( 4, 1, CV_64F, m );
   1310 
   1311         for( int i = 0; i < count; i++ )
   1312         {
   1313             sa[0][0] += a[i].x*a[i].x + a[i].y*a[i].y;
   1314             sa[0][2] += a[i].x;
   1315             sa[0][3] += a[i].y;
   1316 
   1317 
   1318             sa[2][1] += -a[i].y;
   1319             sa[2][2] += 1;
   1320 
   1321             sa[3][0] += a[i].y;
   1322             sa[3][1] += a[i].x;
   1323             sa[3][3] += 1;
   1324 
   1325             sb[0] += a[i].x*b[i].x + a[i].y*b[i].y;
   1326             sb[1] += a[i].x*b[i].y - a[i].y*b[i].x;
   1327             sb[2] += b[i].x;
   1328             sb[3] += b[i].y;
   1329         }
   1330 
   1331         sa[1][1] = sa[0][0];
   1332         sa[2][1] = sa[1][2] = -sa[0][3];
   1333         sa[3][1] = sa[1][3] = sa[2][0] = sa[0][2];
   1334         sa[2][2] = sa[3][3] = count;
   1335         sa[3][0] = sa[0][3];
   1336 
   1337         solve( A, B, MM, DECOMP_EIG );
   1338 
   1339         double* om = M.ptr<double>();
   1340         om[0] = om[4] = m[0];
   1341         om[1] = -m[1];
   1342         om[3] = m[1];
   1343         om[2] = m[2];
   1344         om[5] = m[3];
   1345     }
   1346 }
   1347 
   1348 }
   1349 
   1350 cv::Mat cv::estimateRigidTransform( InputArray src1, InputArray src2, bool fullAffine )
   1351 {
   1352     Mat M(2, 3, CV_64F), A = src1.getMat(), B = src2.getMat();
   1353 
   1354     const int COUNT = 15;
   1355     const int WIDTH = 160, HEIGHT = 120;
   1356     const int RANSAC_MAX_ITERS = 500;
   1357     const int RANSAC_SIZE0 = 3;
   1358     const double RANSAC_GOOD_RATIO = 0.5;
   1359 
   1360     std::vector<Point2f> pA, pB;
   1361     std::vector<int> good_idx;
   1362     std::vector<uchar> status;
   1363 
   1364     double scale = 1.;
   1365     int i, j, k, k1;
   1366 
   1367     RNG rng((uint64)-1);
   1368     int good_count = 0;
   1369 
   1370     if( A.size() != B.size() )
   1371         CV_Error( Error::StsUnmatchedSizes, "Both input images must have the same size" );
   1372 
   1373     if( A.type() != B.type() )
   1374         CV_Error( Error::StsUnmatchedFormats, "Both input images must have the same data type" );
   1375 
   1376     int count = A.checkVector(2);
   1377 
   1378     if( count > 0 )
   1379     {
   1380         A.reshape(2, count).convertTo(pA, CV_32F);
   1381         B.reshape(2, count).convertTo(pB, CV_32F);
   1382     }
   1383     else if( A.depth() == CV_8U )
   1384     {
   1385         int cn = A.channels();
   1386         CV_Assert( cn == 1 || cn == 3 || cn == 4 );
   1387         Size sz0 = A.size();
   1388         Size sz1(WIDTH, HEIGHT);
   1389 
   1390         scale = std::max(1., std::max( (double)sz1.width/sz0.width, (double)sz1.height/sz0.height ));
   1391 
   1392         sz1.width = cvRound( sz0.width * scale );
   1393         sz1.height = cvRound( sz0.height * scale );
   1394 
   1395         bool equalSizes = sz1.width == sz0.width && sz1.height == sz0.height;
   1396 
   1397         if( !equalSizes || cn != 1 )
   1398         {
   1399             Mat sA, sB;
   1400 
   1401             if( cn != 1 )
   1402             {
   1403                 Mat gray;
   1404                 cvtColor(A, gray, COLOR_BGR2GRAY);
   1405                 resize(gray, sA, sz1, 0., 0., INTER_AREA);
   1406                 cvtColor(B, gray, COLOR_BGR2GRAY);
   1407                 resize(gray, sB, sz1, 0., 0., INTER_AREA);
   1408             }
   1409             else
   1410             {
   1411                 resize(A, sA, sz1, 0., 0., INTER_AREA);
   1412                 resize(B, sB, sz1, 0., 0., INTER_AREA);
   1413             }
   1414 
   1415             A = sA;
   1416             B = sB;
   1417         }
   1418 
   1419         int count_y = COUNT;
   1420         int count_x = cvRound((double)COUNT*sz1.width/sz1.height);
   1421         count = count_x * count_y;
   1422 
   1423         pA.resize(count);
   1424         pB.resize(count);
   1425         status.resize(count);
   1426 
   1427         for( i = 0, k = 0; i < count_y; i++ )
   1428             for( j = 0; j < count_x; j++, k++ )
   1429             {
   1430                 pA[k].x = (j+0.5f)*sz1.width/count_x;
   1431                 pA[k].y = (i+0.5f)*sz1.height/count_y;
   1432             }
   1433 
   1434         // find the corresponding points in B
   1435         calcOpticalFlowPyrLK(A, B, pA, pB, status, noArray(), Size(21, 21), 3,
   1436                              TermCriteria(TermCriteria::MAX_ITER,40,0.1));
   1437 
   1438         // repack the remained points
   1439         for( i = 0, k = 0; i < count; i++ )
   1440             if( status[i] )
   1441             {
   1442                 if( i > k )
   1443                 {
   1444                     pA[k] = pA[i];
   1445                     pB[k] = pB[i];
   1446                 }
   1447                 k++;
   1448             }
   1449         count = k;
   1450         pA.resize(count);
   1451         pB.resize(count);
   1452     }
   1453     else
   1454         CV_Error( Error::StsUnsupportedFormat, "Both input images must have either 8uC1 or 8uC3 type" );
   1455 
   1456     good_idx.resize(count);
   1457 
   1458     if( count < RANSAC_SIZE0 )
   1459         return Mat();
   1460 
   1461     Rect brect = boundingRect(pB);
   1462 
   1463     // RANSAC stuff:
   1464     // 1. find the consensus
   1465     for( k = 0; k < RANSAC_MAX_ITERS; k++ )
   1466     {
   1467         int idx[RANSAC_SIZE0];
   1468         Point2f a[RANSAC_SIZE0];
   1469         Point2f b[RANSAC_SIZE0];
   1470 
   1471         // choose random 3 non-complanar points from A & B
   1472         for( i = 0; i < RANSAC_SIZE0; i++ )
   1473         {
   1474             for( k1 = 0; k1 < RANSAC_MAX_ITERS; k1++ )
   1475             {
   1476                 idx[i] = rng.uniform(0, count);
   1477 
   1478                 for( j = 0; j < i; j++ )
   1479                 {
   1480                     if( idx[j] == idx[i] )
   1481                         break;
   1482                     // check that the points are not very close one each other
   1483                     if( fabs(pA[idx[i]].x - pA[idx[j]].x) +
   1484                         fabs(pA[idx[i]].y - pA[idx[j]].y) < FLT_EPSILON )
   1485                         break;
   1486                     if( fabs(pB[idx[i]].x - pB[idx[j]].x) +
   1487                         fabs(pB[idx[i]].y - pB[idx[j]].y) < FLT_EPSILON )
   1488                         break;
   1489                 }
   1490 
   1491                 if( j < i )
   1492                     continue;
   1493 
   1494                 if( i+1 == RANSAC_SIZE0 )
   1495                 {
   1496                     // additional check for non-complanar vectors
   1497                     a[0] = pA[idx[0]];
   1498                     a[1] = pA[idx[1]];
   1499                     a[2] = pA[idx[2]];
   1500 
   1501                     b[0] = pB[idx[0]];
   1502                     b[1] = pB[idx[1]];
   1503                     b[2] = pB[idx[2]];
   1504 
   1505                     double dax1 = a[1].x - a[0].x, day1 = a[1].y - a[0].y;
   1506                     double dax2 = a[2].x - a[0].x, day2 = a[2].y - a[0].y;
   1507                     double dbx1 = b[1].x - b[0].x, dby1 = b[1].y - b[0].y;
   1508                     double dbx2 = b[2].x - b[0].x, dby2 = b[2].y - b[0].y;
   1509                     const double eps = 0.01;
   1510 
   1511                     if( fabs(dax1*day2 - day1*dax2) < eps*std::sqrt(dax1*dax1+day1*day1)*std::sqrt(dax2*dax2+day2*day2) ||
   1512                         fabs(dbx1*dby2 - dby1*dbx2) < eps*std::sqrt(dbx1*dbx1+dby1*dby1)*std::sqrt(dbx2*dbx2+dby2*dby2) )
   1513                         continue;
   1514                 }
   1515                 break;
   1516             }
   1517 
   1518             if( k1 >= RANSAC_MAX_ITERS )
   1519                 break;
   1520         }
   1521 
   1522         if( i < RANSAC_SIZE0 )
   1523             continue;
   1524 
   1525         // estimate the transformation using 3 points
   1526         getRTMatrix( a, b, 3, M, fullAffine );
   1527 
   1528         const double* m = M.ptr<double>();
   1529         for( i = 0, good_count = 0; i < count; i++ )
   1530         {
   1531             if( std::abs( m[0]*pA[i].x + m[1]*pA[i].y + m[2] - pB[i].x ) +
   1532                 std::abs( m[3]*pA[i].x + m[4]*pA[i].y + m[5] - pB[i].y ) < std::max(brect.width,brect.height)*0.05 )
   1533                 good_idx[good_count++] = i;
   1534         }
   1535 
   1536         if( good_count >= count*RANSAC_GOOD_RATIO )
   1537             break;
   1538     }
   1539 
   1540     if( k >= RANSAC_MAX_ITERS )
   1541         return Mat();
   1542 
   1543     if( good_count < count )
   1544     {
   1545         for( i = 0; i < good_count; i++ )
   1546         {
   1547             j = good_idx[i];
   1548             pA[i] = pA[j];
   1549             pB[i] = pB[j];
   1550         }
   1551     }
   1552 
   1553     getRTMatrix( &pA[0], &pB[0], good_count, M, fullAffine );
   1554     M.at<double>(0, 2) /= scale;
   1555     M.at<double>(1, 2) /= scale;
   1556 
   1557     return M;
   1558 }
   1559 
   1560 /* End of file. */
   1561