Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved.
     16 // Third party copyrights are property of their respective owners.
     17 //
     18 // Redistribution and use in source and binary forms, with or without modification,
     19 // are permitted provided that the following conditions are met:
     20 //
     21 //   * Redistribution's of source code must retain the above copyright notice,
     22 //     this list of conditions and the following disclaimer.
     23 //
     24 //   * Redistribution's in binary form must reproduce the above copyright notice,
     25 //     this list of conditions and the following disclaimer in the documentation
     26 //     and/or other materials provided with the distribution.
     27 //
     28 //   * The name of the copyright holders may not be used to endorse or promote products
     29 //     derived from this software without specific prior written permission.
     30 //
     31 // This software is provided by the copyright holders and contributors "as is" and
     32 // any express or implied warranties, including, but not limited to, the implied
     33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     34 // In no event shall the Intel Corporation or contributors be liable for any direct,
     35 // indirect, incidental, special, exemplary, or consequential damages
     36 // (including, but not limited to, procurement of substitute goods or services;
     37 // loss of use, data, or profits; or business interruption) however caused
     38 // and on any theory of liability, whether in contract, strict liability,
     39 // or tort (including negligence or otherwise) arising in any way out of
     40 // the use of this software, even if advised of the possibility of such damage.
     41 //
     42 //M*/
     43 
     44 #include "precomp.hpp"
     45 #include "opencl_kernels_imgproc.hpp"
     46 
     47 namespace cv
     48 {
     49 
     50 static void calcMinEigenVal( const Mat& _cov, Mat& _dst )
     51 {
     52     int i, j;
     53     Size size = _cov.size();
     54 #if CV_SSE
     55     volatile bool simd = checkHardwareSupport(CV_CPU_SSE);
     56 #endif
     57 
     58     if( _cov.isContinuous() && _dst.isContinuous() )
     59     {
     60         size.width *= size.height;
     61         size.height = 1;
     62     }
     63 
     64     for( i = 0; i < size.height; i++ )
     65     {
     66         const float* cov = _cov.ptr<float>(i);
     67         float* dst = _dst.ptr<float>(i);
     68         j = 0;
     69     #if CV_SSE
     70         if( simd )
     71         {
     72             __m128 half = _mm_set1_ps(0.5f);
     73             for( ; j <= size.width - 4; j += 4 )
     74             {
     75                 __m128 t0 = _mm_loadu_ps(cov + j*3); // a0 b0 c0 x
     76                 __m128 t1 = _mm_loadu_ps(cov + j*3 + 3); // a1 b1 c1 x
     77                 __m128 t2 = _mm_loadu_ps(cov + j*3 + 6); // a2 b2 c2 x
     78                 __m128 t3 = _mm_loadu_ps(cov + j*3 + 9); // a3 b3 c3 x
     79                 __m128 a, b, c, t;
     80                 t = _mm_unpacklo_ps(t0, t1); // a0 a1 b0 b1
     81                 c = _mm_unpackhi_ps(t0, t1); // c0 c1 x x
     82                 b = _mm_unpacklo_ps(t2, t3); // a2 a3 b2 b3
     83                 c = _mm_movelh_ps(c, _mm_unpackhi_ps(t2, t3)); // c0 c1 c2 c3
     84                 a = _mm_movelh_ps(t, b);
     85                 b = _mm_movehl_ps(b, t);
     86                 a = _mm_mul_ps(a, half);
     87                 c = _mm_mul_ps(c, half);
     88                 t = _mm_sub_ps(a, c);
     89                 t = _mm_add_ps(_mm_mul_ps(t, t), _mm_mul_ps(b,b));
     90                 a = _mm_sub_ps(_mm_add_ps(a, c), _mm_sqrt_ps(t));
     91                 _mm_storeu_ps(dst + j, a);
     92             }
     93         }
     94     #elif CV_NEON
     95         float32x4_t v_half = vdupq_n_f32(0.5f);
     96         for( ; j <= size.width - 4; j += 4 )
     97         {
     98             float32x4x3_t v_src = vld3q_f32(cov + j * 3);
     99             float32x4_t v_a = vmulq_f32(v_src.val[0], v_half);
    100             float32x4_t v_b = v_src.val[1];
    101             float32x4_t v_c = vmulq_f32(v_src.val[2], v_half);
    102 
    103             float32x4_t v_t = vsubq_f32(v_a, v_c);
    104             v_t = vmlaq_f32(vmulq_f32(v_t, v_t), v_b, v_b);
    105             vst1q_f32(dst + j, vsubq_f32(vaddq_f32(v_a, v_c), cv_vsqrtq_f32(v_t)));
    106         }
    107     #endif
    108         for( ; j < size.width; j++ )
    109         {
    110             float a = cov[j*3]*0.5f;
    111             float b = cov[j*3+1];
    112             float c = cov[j*3+2]*0.5f;
    113             dst[j] = (float)((a + c) - std::sqrt((a - c)*(a - c) + b*b));
    114         }
    115     }
    116 }
    117 
    118 
    119 static void calcHarris( const Mat& _cov, Mat& _dst, double k )
    120 {
    121     int i, j;
    122     Size size = _cov.size();
    123 #if CV_SSE
    124     volatile bool simd = checkHardwareSupport(CV_CPU_SSE);
    125 #endif
    126 
    127     if( _cov.isContinuous() && _dst.isContinuous() )
    128     {
    129         size.width *= size.height;
    130         size.height = 1;
    131     }
    132 
    133     for( i = 0; i < size.height; i++ )
    134     {
    135         const float* cov = _cov.ptr<float>(i);
    136         float* dst = _dst.ptr<float>(i);
    137         j = 0;
    138 
    139     #if CV_SSE
    140         if( simd )
    141         {
    142             __m128 k4 = _mm_set1_ps((float)k);
    143             for( ; j <= size.width - 4; j += 4 )
    144             {
    145                 __m128 t0 = _mm_loadu_ps(cov + j*3); // a0 b0 c0 x
    146                 __m128 t1 = _mm_loadu_ps(cov + j*3 + 3); // a1 b1 c1 x
    147                 __m128 t2 = _mm_loadu_ps(cov + j*3 + 6); // a2 b2 c2 x
    148                 __m128 t3 = _mm_loadu_ps(cov + j*3 + 9); // a3 b3 c3 x
    149                 __m128 a, b, c, t;
    150                 t = _mm_unpacklo_ps(t0, t1); // a0 a1 b0 b1
    151                 c = _mm_unpackhi_ps(t0, t1); // c0 c1 x x
    152                 b = _mm_unpacklo_ps(t2, t3); // a2 a3 b2 b3
    153                 c = _mm_movelh_ps(c, _mm_unpackhi_ps(t2, t3)); // c0 c1 c2 c3
    154                 a = _mm_movelh_ps(t, b);
    155                 b = _mm_movehl_ps(b, t);
    156                 t = _mm_add_ps(a, c);
    157                 a = _mm_sub_ps(_mm_mul_ps(a, c), _mm_mul_ps(b, b));
    158                 t = _mm_mul_ps(_mm_mul_ps(k4, t), t);
    159                 a = _mm_sub_ps(a, t);
    160                 _mm_storeu_ps(dst + j, a);
    161             }
    162         }
    163     #elif CV_NEON
    164         float32x4_t v_k = vdupq_n_f32((float)k);
    165 
    166         for( ; j <= size.width - 4; j += 4 )
    167         {
    168             float32x4x3_t v_src = vld3q_f32(cov + j * 3);
    169             float32x4_t v_a = v_src.val[0], v_b = v_src.val[1], v_c = v_src.val[2];
    170             float32x4_t v_ac_bb = vmlsq_f32(vmulq_f32(v_a, v_c), v_b, v_b);
    171             float32x4_t v_ac = vaddq_f32(v_a, v_c);
    172             vst1q_f32(dst + j, vmlsq_f32(v_ac_bb, v_k, vmulq_f32(v_ac, v_ac)));
    173         }
    174     #endif
    175 
    176         for( ; j < size.width; j++ )
    177         {
    178             float a = cov[j*3];
    179             float b = cov[j*3+1];
    180             float c = cov[j*3+2];
    181             dst[j] = (float)(a*c - b*b - k*(a + c)*(a + c));
    182         }
    183     }
    184 }
    185 
    186 
    187 static void eigen2x2( const float* cov, float* dst, int n )
    188 {
    189     for( int j = 0; j < n; j++ )
    190     {
    191         double a = cov[j*3];
    192         double b = cov[j*3+1];
    193         double c = cov[j*3+2];
    194 
    195         double u = (a + c)*0.5;
    196         double v = std::sqrt((a - c)*(a - c)*0.25 + b*b);
    197         double l1 = u + v;
    198         double l2 = u - v;
    199 
    200         double x = b;
    201         double y = l1 - a;
    202         double e = fabs(x);
    203 
    204         if( e + fabs(y) < 1e-4 )
    205         {
    206             y = b;
    207             x = l1 - c;
    208             e = fabs(x);
    209             if( e + fabs(y) < 1e-4 )
    210             {
    211                 e = 1./(e + fabs(y) + FLT_EPSILON);
    212                 x *= e, y *= e;
    213             }
    214         }
    215 
    216         double d = 1./std::sqrt(x*x + y*y + DBL_EPSILON);
    217         dst[6*j] = (float)l1;
    218         dst[6*j + 2] = (float)(x*d);
    219         dst[6*j + 3] = (float)(y*d);
    220 
    221         x = b;
    222         y = l2 - a;
    223         e = fabs(x);
    224 
    225         if( e + fabs(y) < 1e-4 )
    226         {
    227             y = b;
    228             x = l2 - c;
    229             e = fabs(x);
    230             if( e + fabs(y) < 1e-4 )
    231             {
    232                 e = 1./(e + fabs(y) + FLT_EPSILON);
    233                 x *= e, y *= e;
    234             }
    235         }
    236 
    237         d = 1./std::sqrt(x*x + y*y + DBL_EPSILON);
    238         dst[6*j + 1] = (float)l2;
    239         dst[6*j + 4] = (float)(x*d);
    240         dst[6*j + 5] = (float)(y*d);
    241     }
    242 }
    243 
    244 static void calcEigenValsVecs( const Mat& _cov, Mat& _dst )
    245 {
    246     Size size = _cov.size();
    247     if( _cov.isContinuous() && _dst.isContinuous() )
    248     {
    249         size.width *= size.height;
    250         size.height = 1;
    251     }
    252 
    253     for( int i = 0; i < size.height; i++ )
    254     {
    255         const float* cov = _cov.ptr<float>(i);
    256         float* dst = _dst.ptr<float>(i);
    257 
    258         eigen2x2(cov, dst, size.width);
    259     }
    260 }
    261 
    262 
    263 enum { MINEIGENVAL=0, HARRIS=1, EIGENVALSVECS=2 };
    264 
    265 
    266 static void
    267 cornerEigenValsVecs( const Mat& src, Mat& eigenv, int block_size,
    268                      int aperture_size, int op_type, double k=0.,
    269                      int borderType=BORDER_DEFAULT )
    270 {
    271 #ifdef HAVE_TEGRA_OPTIMIZATION
    272     if (tegra::useTegra() && tegra::cornerEigenValsVecs(src, eigenv, block_size, aperture_size, op_type, k, borderType))
    273         return;
    274 #endif
    275 #if CV_SSE2
    276     bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
    277 #endif
    278 
    279     int depth = src.depth();
    280     double scale = (double)(1 << ((aperture_size > 0 ? aperture_size : 3) - 1)) * block_size;
    281     if( aperture_size < 0 )
    282         scale *= 2.0;
    283     if( depth == CV_8U )
    284         scale *= 255.0;
    285     scale = 1.0/scale;
    286 
    287     CV_Assert( src.type() == CV_8UC1 || src.type() == CV_32FC1 );
    288 
    289     Mat Dx, Dy;
    290     if( aperture_size > 0 )
    291     {
    292         Sobel( src, Dx, CV_32F, 1, 0, aperture_size, scale, 0, borderType );
    293         Sobel( src, Dy, CV_32F, 0, 1, aperture_size, scale, 0, borderType );
    294     }
    295     else
    296     {
    297         Scharr( src, Dx, CV_32F, 1, 0, scale, 0, borderType );
    298         Scharr( src, Dy, CV_32F, 0, 1, scale, 0, borderType );
    299     }
    300 
    301     Size size = src.size();
    302     Mat cov( size, CV_32FC3 );
    303     int i, j;
    304 
    305     for( i = 0; i < size.height; i++ )
    306     {
    307         float* cov_data = cov.ptr<float>(i);
    308         const float* dxdata = Dx.ptr<float>(i);
    309         const float* dydata = Dy.ptr<float>(i);
    310         j = 0;
    311 
    312         #if CV_NEON
    313         for( ; j <= size.width - 4; j += 4 )
    314         {
    315             float32x4_t v_dx = vld1q_f32(dxdata + j);
    316             float32x4_t v_dy = vld1q_f32(dydata + j);
    317 
    318             float32x4x3_t v_dst;
    319             v_dst.val[0] = vmulq_f32(v_dx, v_dx);
    320             v_dst.val[1] = vmulq_f32(v_dx, v_dy);
    321             v_dst.val[2] = vmulq_f32(v_dy, v_dy);
    322 
    323             vst3q_f32(cov_data + j * 3, v_dst);
    324         }
    325         #elif CV_SSE2
    326         if (haveSSE2)
    327         {
    328             for( ; j <= size.width - 8; j += 8 )
    329             {
    330                 __m128 v_dx_0 = _mm_loadu_ps(dxdata + j);
    331                 __m128 v_dx_1 = _mm_loadu_ps(dxdata + j + 4);
    332                 __m128 v_dy_0 = _mm_loadu_ps(dydata + j);
    333                 __m128 v_dy_1 = _mm_loadu_ps(dydata + j + 4);
    334 
    335                 __m128 v_dx2_0 = _mm_mul_ps(v_dx_0, v_dx_0);
    336                 __m128 v_dxy_0 = _mm_mul_ps(v_dx_0, v_dy_0);
    337                 __m128 v_dy2_0 = _mm_mul_ps(v_dy_0, v_dy_0);
    338                 __m128 v_dx2_1 = _mm_mul_ps(v_dx_1, v_dx_1);
    339                 __m128 v_dxy_1 = _mm_mul_ps(v_dx_1, v_dy_1);
    340                 __m128 v_dy2_1 = _mm_mul_ps(v_dy_1, v_dy_1);
    341 
    342                 _mm_interleave_ps(v_dx2_0, v_dx2_1, v_dxy_0, v_dxy_1, v_dy2_0, v_dy2_1);
    343 
    344                 _mm_storeu_ps(cov_data + j * 3, v_dx2_0);
    345                 _mm_storeu_ps(cov_data + j * 3 + 4, v_dx2_1);
    346                 _mm_storeu_ps(cov_data + j * 3 + 8, v_dxy_0);
    347                 _mm_storeu_ps(cov_data + j * 3 + 12, v_dxy_1);
    348                 _mm_storeu_ps(cov_data + j * 3 + 16, v_dy2_0);
    349                 _mm_storeu_ps(cov_data + j * 3 + 20, v_dy2_1);
    350             }
    351         }
    352         #endif
    353 
    354         for( ; j < size.width; j++ )
    355         {
    356             float dx = dxdata[j];
    357             float dy = dydata[j];
    358 
    359             cov_data[j*3] = dx*dx;
    360             cov_data[j*3+1] = dx*dy;
    361             cov_data[j*3+2] = dy*dy;
    362         }
    363     }
    364 
    365     boxFilter(cov, cov, cov.depth(), Size(block_size, block_size),
    366         Point(-1,-1), false, borderType );
    367 
    368     if( op_type == MINEIGENVAL )
    369         calcMinEigenVal( cov, eigenv );
    370     else if( op_type == HARRIS )
    371         calcHarris( cov, eigenv, k );
    372     else if( op_type == EIGENVALSVECS )
    373         calcEigenValsVecs( cov, eigenv );
    374 }
    375 
    376 #ifdef HAVE_OPENCL
    377 
    378 static bool extractCovData(InputArray _src, UMat & Dx, UMat & Dy, int depth,
    379                            float scale, int aperture_size, int borderType)
    380 {
    381     UMat src = _src.getUMat();
    382 
    383     Size wholeSize;
    384     Point ofs;
    385     src.locateROI(wholeSize, ofs);
    386 
    387     const int sobel_lsz = 16;
    388     if ((aperture_size == 3 || aperture_size == 5 || aperture_size == 7 || aperture_size == -1) &&
    389         wholeSize.height > sobel_lsz + (aperture_size >> 1) &&
    390         wholeSize.width > sobel_lsz + (aperture_size >> 1))
    391     {
    392         CV_Assert(depth == CV_8U || depth == CV_32F);
    393 
    394         Dx.create(src.size(), CV_32FC1);
    395         Dy.create(src.size(), CV_32FC1);
    396 
    397         size_t localsize[2] = { sobel_lsz, sobel_lsz };
    398         size_t globalsize[2] = { localsize[0] * (1 + (src.cols - 1) / localsize[0]),
    399                                  localsize[1] * (1 + (src.rows - 1) / localsize[1]) };
    400 
    401         int src_offset_x = (int)((src.offset % src.step) / src.elemSize());
    402         int src_offset_y = (int)(src.offset / src.step);
    403 
    404         const char * const borderTypes[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT",
    405                                              "BORDER_WRAP", "BORDER_REFLECT101" };
    406 
    407         ocl::Kernel k(format("sobel%d", aperture_size).c_str(), ocl::imgproc::covardata_oclsrc,
    408                       cv::format("-D BLK_X=%d -D BLK_Y=%d -D %s -D SRCTYPE=%s%s",
    409                                  (int)localsize[0], (int)localsize[1], borderTypes[borderType], ocl::typeToStr(depth),
    410                                  aperture_size < 0 ? " -D SCHARR" : ""));
    411         if (k.empty())
    412             return false;
    413 
    414         k.args(ocl::KernelArg::PtrReadOnly(src), (int)src.step, src_offset_x, src_offset_y,
    415                ocl::KernelArg::WriteOnlyNoSize(Dx), ocl::KernelArg::WriteOnly(Dy),
    416                wholeSize.height, wholeSize.width, scale);
    417 
    418         return k.run(2, globalsize, localsize, false);
    419     }
    420     else
    421     {
    422         if (aperture_size > 0)
    423         {
    424             Sobel(_src, Dx, CV_32F, 1, 0, aperture_size, scale, 0, borderType);
    425             Sobel(_src, Dy, CV_32F, 0, 1, aperture_size, scale, 0, borderType);
    426         }
    427         else
    428         {
    429             Scharr(_src, Dx, CV_32F, 1, 0, scale, 0, borderType);
    430             Scharr(_src, Dy, CV_32F, 0, 1, scale, 0, borderType);
    431         }
    432     }
    433 
    434     return true;
    435 }
    436 
    437 static bool ocl_cornerMinEigenValVecs(InputArray _src, OutputArray _dst, int block_size,
    438                                       int aperture_size, double k, int borderType, int op_type)
    439 {
    440     CV_Assert(op_type == HARRIS || op_type == MINEIGENVAL);
    441 
    442     if ( !(borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE ||
    443            borderType == BORDER_REFLECT || borderType == BORDER_REFLECT_101) )
    444         return false;
    445 
    446     int type = _src.type(), depth = CV_MAT_DEPTH(type);
    447     if ( !(type == CV_8UC1 || type == CV_32FC1) )
    448         return false;
    449 
    450     const char * const borderTypes[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT",
    451                                          "BORDER_WRAP", "BORDER_REFLECT101" };
    452     const char * const cornerType[] = { "CORNER_MINEIGENVAL", "CORNER_HARRIS", 0 };
    453 
    454 
    455     double scale = (double)(1 << ((aperture_size > 0 ? aperture_size : 3) - 1)) * block_size;
    456     if (aperture_size < 0)
    457         scale *= 2.0;
    458     if (depth == CV_8U)
    459         scale *= 255.0;
    460     scale = 1.0 / scale;
    461 
    462     UMat Dx, Dy;
    463     if (!extractCovData(_src, Dx, Dy, depth, (float)scale, aperture_size, borderType))
    464         return false;
    465 
    466     ocl::Kernel cornelKernel("corner", ocl::imgproc::corner_oclsrc,
    467                              format("-D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s -D %s",
    468                                     block_size / 2, block_size / 2, block_size, block_size,
    469                                     borderTypes[borderType], cornerType[op_type]));
    470     if (cornelKernel.empty())
    471         return false;
    472 
    473     _dst.createSameSize(_src, CV_32FC1);
    474     UMat dst = _dst.getUMat();
    475 
    476     cornelKernel.args(ocl::KernelArg::ReadOnly(Dx), ocl::KernelArg::ReadOnly(Dy),
    477                       ocl::KernelArg::WriteOnly(dst), (float)k);
    478 
    479     size_t blockSizeX = 256, blockSizeY = 1;
    480     size_t gSize = blockSizeX - block_size / 2 * 2;
    481     size_t globalSizeX = (Dx.cols) % gSize == 0 ? Dx.cols / gSize * blockSizeX : (Dx.cols / gSize + 1) * blockSizeX;
    482     size_t rows_per_thread = 2;
    483     size_t globalSizeY = ((Dx.rows + rows_per_thread - 1) / rows_per_thread) % blockSizeY == 0 ?
    484                          ((Dx.rows + rows_per_thread - 1) / rows_per_thread) :
    485                          (((Dx.rows + rows_per_thread - 1) / rows_per_thread) / blockSizeY + 1) * blockSizeY;
    486 
    487     size_t globalsize[2] = { globalSizeX, globalSizeY }, localsize[2] = { blockSizeX, blockSizeY };
    488     return cornelKernel.run(2, globalsize, localsize, false);
    489 }
    490 
    491 static bool ocl_preCornerDetect( InputArray _src, OutputArray _dst, int ksize, int borderType, int depth )
    492 {
    493     UMat Dx, Dy, D2x, D2y, Dxy;
    494 
    495     if (!extractCovData(_src, Dx, Dy, depth, 1, ksize, borderType))
    496         return false;
    497 
    498     Sobel( _src, D2x, CV_32F, 2, 0, ksize, 1, 0, borderType );
    499     Sobel( _src, D2y, CV_32F, 0, 2, ksize, 1, 0, borderType );
    500     Sobel( _src, Dxy, CV_32F, 1, 1, ksize, 1, 0, borderType );
    501 
    502     _dst.create( _src.size(), CV_32FC1 );
    503     UMat dst = _dst.getUMat();
    504 
    505     double factor = 1 << (ksize - 1);
    506     if( depth == CV_8U )
    507         factor *= 255;
    508     factor = 1./(factor * factor * factor);
    509 
    510     ocl::Kernel k("preCornerDetect", ocl::imgproc::precornerdetect_oclsrc);
    511     if (k.empty())
    512         return false;
    513 
    514     k.args(ocl::KernelArg::ReadOnlyNoSize(Dx), ocl::KernelArg::ReadOnlyNoSize(Dy),
    515            ocl::KernelArg::ReadOnlyNoSize(D2x), ocl::KernelArg::ReadOnlyNoSize(D2y),
    516            ocl::KernelArg::ReadOnlyNoSize(Dxy), ocl::KernelArg::WriteOnly(dst), (float)factor);
    517 
    518     size_t globalsize[2] = { dst.cols, dst.rows };
    519     return k.run(2, globalsize, NULL, false);
    520 }
    521 
    522 #endif
    523 
    524 }
    525 
    526 void cv::cornerMinEigenVal( InputArray _src, OutputArray _dst, int blockSize, int ksize, int borderType )
    527 {
    528     CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
    529                ocl_cornerMinEigenValVecs(_src, _dst, blockSize, ksize, 0.0, borderType, MINEIGENVAL))
    530 
    531     Mat src = _src.getMat();
    532     _dst.create( src.size(), CV_32FC1 );
    533     Mat dst = _dst.getMat();
    534 #if defined(HAVE_IPP) && (IPP_VERSION_MAJOR >= 8)
    535     CV_IPP_CHECK()
    536     {
    537         typedef IppStatus (CV_STDCALL * ippiMinEigenValGetBufferSize)(IppiSize, int, int, int*);
    538         typedef IppStatus (CV_STDCALL * ippiMinEigenVal)(const void*, int, Ipp32f*, int, IppiSize, IppiKernelType, int, int, Ipp8u*);
    539         IppiKernelType kerType;
    540         int kerSize = ksize;
    541         if (ksize < 0)
    542         {
    543             kerType = ippKernelScharr;
    544             kerSize = 3;
    545         } else
    546         {
    547             kerType = ippKernelSobel;
    548         }
    549         bool isolated = (borderType & BORDER_ISOLATED) != 0;
    550         int borderTypeNI = borderType & ~BORDER_ISOLATED;
    551         if ((borderTypeNI == BORDER_REPLICATE && (!src.isSubmatrix() || isolated)) &&
    552             (kerSize == 3 || kerSize == 5) && (blockSize == 3 || blockSize == 5))
    553         {
    554             ippiMinEigenValGetBufferSize getBufferSizeFunc = 0;
    555             ippiMinEigenVal minEigenValFunc = 0;
    556             float norm_coef = 0.f;
    557 
    558             if (src.type() == CV_8UC1)
    559             {
    560                 getBufferSizeFunc = (ippiMinEigenValGetBufferSize) ippiMinEigenValGetBufferSize_8u32f_C1R;
    561                 minEigenValFunc = (ippiMinEigenVal) ippiMinEigenVal_8u32f_C1R;
    562                 norm_coef = 1.f / 255.f;
    563             } else if (src.type() == CV_32FC1)
    564             {
    565                 getBufferSizeFunc = (ippiMinEigenValGetBufferSize) ippiMinEigenValGetBufferSize_32f_C1R;
    566                 minEigenValFunc = (ippiMinEigenVal) ippiMinEigenVal_32f_C1R;
    567                 norm_coef = 255.f;
    568             }
    569             norm_coef = kerType == ippKernelSobel ? norm_coef : norm_coef / 2.45f;
    570 
    571             if (getBufferSizeFunc && minEigenValFunc)
    572             {
    573                 int bufferSize;
    574                 IppiSize srcRoi = { src.cols, src.rows };
    575                 IppStatus ok = getBufferSizeFunc(srcRoi, kerSize, blockSize, &bufferSize);
    576                 if (ok >= 0)
    577                 {
    578                     AutoBuffer<uchar> buffer(bufferSize);
    579                     ok = minEigenValFunc(src.ptr(), (int) src.step, dst.ptr<Ipp32f>(), (int) dst.step, srcRoi, kerType, kerSize, blockSize, buffer);
    580                     CV_SUPPRESS_DEPRECATED_START
    581                     if (ok >= 0) ok = ippiMulC_32f_C1IR(norm_coef, dst.ptr<Ipp32f>(), (int) dst.step, srcRoi);
    582                     CV_SUPPRESS_DEPRECATED_END
    583                     if (ok >= 0)
    584                     {
    585                         CV_IMPL_ADD(CV_IMPL_IPP);
    586                         return;
    587                     }
    588                 }
    589                 setIppErrorStatus();
    590             }
    591         }
    592     }
    593 #endif
    594     cornerEigenValsVecs( src, dst, blockSize, ksize, MINEIGENVAL, 0, borderType );
    595 }
    596 
    597 void cv::cornerHarris( InputArray _src, OutputArray _dst, int blockSize, int ksize, double k, int borderType )
    598 {
    599     CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
    600                ocl_cornerMinEigenValVecs(_src, _dst, blockSize, ksize, k, borderType, HARRIS))
    601 
    602     Mat src = _src.getMat();
    603     _dst.create( src.size(), CV_32FC1 );
    604     Mat dst = _dst.getMat();
    605 
    606 #if IPP_VERSION_X100 >= 801 && 0
    607     CV_IPP_CHECK()
    608     {
    609         int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
    610         int borderTypeNI = borderType & ~BORDER_ISOLATED;
    611         bool isolated = (borderType & BORDER_ISOLATED) != 0;
    612 
    613         if ( (ksize == 3 || ksize == 5) && (type == CV_8UC1 || type == CV_32FC1) &&
    614             (borderTypeNI == BORDER_CONSTANT || borderTypeNI == BORDER_REPLICATE) && cn == 1 && (!src.isSubmatrix() || isolated) )
    615         {
    616             IppiSize roisize = { src.cols, src.rows };
    617             IppiMaskSize masksize = ksize == 5 ? ippMskSize5x5 : ippMskSize3x3;
    618             IppDataType datatype = type == CV_8UC1 ? ipp8u : ipp32f;
    619             Ipp32s bufsize = 0;
    620 
    621             double scale = (double)(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize;
    622             if (ksize < 0)
    623                 scale *= 2.0;
    624             if (depth == CV_8U)
    625                 scale *= 255.0;
    626             scale = std::pow(scale, -4.0);
    627 
    628             if (ippiHarrisCornerGetBufferSize(roisize, masksize, blockSize, datatype, cn, &bufsize) >= 0)
    629             {
    630                 Ipp8u * buffer = ippsMalloc_8u(bufsize);
    631                 IppiDifferentialKernel filterType = ksize > 0 ? ippFilterSobel : ippFilterScharr;
    632                 IppiBorderType borderTypeIpp = borderTypeNI == BORDER_CONSTANT ? ippBorderConst : ippBorderRepl;
    633                 IppStatus status = (IppStatus)-1;
    634 
    635                 if (depth == CV_8U)
    636                     status = ippiHarrisCorner_8u32f_C1R((const Ipp8u *)src.data, (int)src.step, (Ipp32f *)dst.data, (int)dst.step, roisize,
    637                                                         filterType, masksize, blockSize, (Ipp32f)k, (Ipp32f)scale, borderTypeIpp, 0, buffer);
    638                 else if (depth == CV_32F)
    639                     status = ippiHarrisCorner_32f_C1R((const Ipp32f *)src.data, (int)src.step, (Ipp32f *)dst.data, (int)dst.step, roisize,
    640                                                       filterType, masksize, blockSize, (Ipp32f)k, (Ipp32f)scale, borderTypeIpp, 0, buffer);
    641                 ippsFree(buffer);
    642 
    643                 if (status >= 0)
    644                 {
    645                     CV_IMPL_ADD(CV_IMPL_IPP);
    646                     return;
    647                 }
    648             }
    649             setIppErrorStatus();
    650         }
    651     }
    652 #endif
    653 
    654     cornerEigenValsVecs( src, dst, blockSize, ksize, HARRIS, k, borderType );
    655 }
    656 
    657 
    658 void cv::cornerEigenValsAndVecs( InputArray _src, OutputArray _dst, int blockSize, int ksize, int borderType )
    659 {
    660     Mat src = _src.getMat();
    661     Size dsz = _dst.size();
    662     int dtype = _dst.type();
    663 
    664     if( dsz.height != src.rows || dsz.width*CV_MAT_CN(dtype) != src.cols*6 || CV_MAT_DEPTH(dtype) != CV_32F )
    665         _dst.create( src.size(), CV_32FC(6) );
    666     Mat dst = _dst.getMat();
    667     cornerEigenValsVecs( src, dst, blockSize, ksize, EIGENVALSVECS, 0, borderType );
    668 }
    669 
    670 
    671 void cv::preCornerDetect( InputArray _src, OutputArray _dst, int ksize, int borderType )
    672 {
    673     int type = _src.type();
    674     CV_Assert( type == CV_8UC1 || type == CV_32FC1 );
    675 
    676     CV_OCL_RUN( _src.dims() <= 2 && _dst.isUMat(),
    677                 ocl_preCornerDetect(_src, _dst, ksize, borderType, CV_MAT_DEPTH(type)))
    678 
    679     Mat Dx, Dy, D2x, D2y, Dxy, src = _src.getMat();
    680     _dst.create( src.size(), CV_32FC1 );
    681     Mat dst = _dst.getMat();
    682 
    683     Sobel( src, Dx, CV_32F, 1, 0, ksize, 1, 0, borderType );
    684     Sobel( src, Dy, CV_32F, 0, 1, ksize, 1, 0, borderType );
    685     Sobel( src, D2x, CV_32F, 2, 0, ksize, 1, 0, borderType );
    686     Sobel( src, D2y, CV_32F, 0, 2, ksize, 1, 0, borderType );
    687     Sobel( src, Dxy, CV_32F, 1, 1, ksize, 1, 0, borderType );
    688 
    689     double factor = 1 << (ksize - 1);
    690     if( src.depth() == CV_8U )
    691         factor *= 255;
    692     factor = 1./(factor * factor * factor);
    693 #if CV_NEON || CV_SSE2
    694     float factor_f = (float)factor;
    695 #endif
    696 
    697 #if CV_SSE2
    698     volatile bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2);
    699     __m128 v_factor = _mm_set1_ps(factor_f), v_m2 = _mm_set1_ps(-2.0f);
    700 #endif
    701 
    702     Size size = src.size();
    703     int i, j;
    704     for( i = 0; i < size.height; i++ )
    705     {
    706         float* dstdata = dst.ptr<float>(i);
    707         const float* dxdata = Dx.ptr<float>(i);
    708         const float* dydata = Dy.ptr<float>(i);
    709         const float* d2xdata = D2x.ptr<float>(i);
    710         const float* d2ydata = D2y.ptr<float>(i);
    711         const float* dxydata = Dxy.ptr<float>(i);
    712 
    713         j = 0;
    714 
    715 #if CV_SSE2
    716         if (haveSSE2)
    717         {
    718             for( ; j <= size.width - 4; j += 4 )
    719             {
    720                 __m128 v_dx = _mm_loadu_ps((const float *)(dxdata + j));
    721                 __m128 v_dy = _mm_loadu_ps((const float *)(dydata + j));
    722 
    723                 __m128 v_s1 = _mm_mul_ps(_mm_mul_ps(v_dx, v_dx), _mm_loadu_ps((const float *)(d2ydata + j)));
    724                 __m128 v_s2 = _mm_mul_ps(_mm_mul_ps(v_dy, v_dy), _mm_loadu_ps((const float *)(d2xdata + j)));
    725                 __m128 v_s3 = _mm_mul_ps(_mm_mul_ps(v_dx, v_dy), _mm_loadu_ps((const float *)(dxydata + j)));
    726                 v_s1 = _mm_mul_ps(v_factor, _mm_add_ps(v_s1, _mm_add_ps(v_s2, _mm_mul_ps(v_s3, v_m2))));
    727                 _mm_storeu_ps(dstdata + j, v_s1);
    728             }
    729         }
    730 #elif CV_NEON
    731         for( ; j <= size.width - 4; j += 4 )
    732         {
    733             float32x4_t v_dx = vld1q_f32(dxdata + j), v_dy = vld1q_f32(dydata + j);
    734             float32x4_t v_s = vmulq_f32(v_dx, vmulq_f32(v_dx, vld1q_f32(d2ydata + j)));
    735             v_s = vmlaq_f32(v_s, vld1q_f32(d2xdata + j), vmulq_f32(v_dy, v_dy));
    736             v_s = vmlaq_f32(v_s, vld1q_f32(dxydata + j), vmulq_n_f32(vmulq_f32(v_dy, v_dx), -2));
    737             vst1q_f32(dstdata + j, vmulq_n_f32(v_s, factor_f));
    738         }
    739 #endif
    740 
    741         for( ; j < size.width; j++ )
    742         {
    743             float dx = dxdata[j];
    744             float dy = dydata[j];
    745             dstdata[j] = (float)(factor*(dx*dx*d2ydata[j] + dy*dy*d2xdata[j] - 2*dx*dy*dxydata[j]));
    746         }
    747     }
    748 }
    749 
    750 CV_IMPL void
    751 cvCornerMinEigenVal( const CvArr* srcarr, CvArr* dstarr,
    752                      int block_size, int aperture_size )
    753 {
    754     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
    755 
    756     CV_Assert( src.size() == dst.size() && dst.type() == CV_32FC1 );
    757     cv::cornerMinEigenVal( src, dst, block_size, aperture_size, cv::BORDER_REPLICATE );
    758 }
    759 
    760 CV_IMPL void
    761 cvCornerHarris( const CvArr* srcarr, CvArr* dstarr,
    762                 int block_size, int aperture_size, double k )
    763 {
    764     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
    765 
    766     CV_Assert( src.size() == dst.size() && dst.type() == CV_32FC1 );
    767     cv::cornerHarris( src, dst, block_size, aperture_size, k, cv::BORDER_REPLICATE );
    768 }
    769 
    770 
    771 CV_IMPL void
    772 cvCornerEigenValsAndVecs( const void* srcarr, void* dstarr,
    773                           int block_size, int aperture_size )
    774 {
    775     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
    776 
    777     CV_Assert( src.rows == dst.rows && src.cols*6 == dst.cols*dst.channels() && dst.depth() == CV_32F );
    778     cv::cornerEigenValsAndVecs( src, dst, block_size, aperture_size, cv::BORDER_REPLICATE );
    779 }
    780 
    781 
    782 CV_IMPL void
    783 cvPreCornerDetect( const void* srcarr, void* dstarr, int aperture_size )
    784 {
    785     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
    786 
    787     CV_Assert( src.size() == dst.size() && dst.type() == CV_32FC1 );
    788     cv::preCornerDetect( src, dst, aperture_size, cv::BORDER_REPLICATE );
    789 }
    790 
    791 /* End of file */
    792