Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                        Intel License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2014, Itseez, Inc, all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of Intel Corporation may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #include "precomp.hpp"
     44 #include "opencl_kernels_imgproc.hpp"
     45 
     46 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
     47 static IppStatus sts = ippInit();
     48 #endif
     49 
     50 /****************************************************************************************\
     51                              Sobel & Scharr Derivative Filters
     52 \****************************************************************************************/
     53 
     54 namespace cv
     55 {
     56 
     57 static void getScharrKernels( OutputArray _kx, OutputArray _ky,
     58                               int dx, int dy, bool normalize, int ktype )
     59 {
     60     const int ksize = 3;
     61 
     62     CV_Assert( ktype == CV_32F || ktype == CV_64F );
     63     _kx.create(ksize, 1, ktype, -1, true);
     64     _ky.create(ksize, 1, ktype, -1, true);
     65     Mat kx = _kx.getMat();
     66     Mat ky = _ky.getMat();
     67 
     68     CV_Assert( dx >= 0 && dy >= 0 && dx+dy == 1 );
     69 
     70     for( int k = 0; k < 2; k++ )
     71     {
     72         Mat* kernel = k == 0 ? &kx : &ky;
     73         int order = k == 0 ? dx : dy;
     74         int kerI[3];
     75 
     76         if( order == 0 )
     77             kerI[0] = 3, kerI[1] = 10, kerI[2] = 3;
     78         else if( order == 1 )
     79             kerI[0] = -1, kerI[1] = 0, kerI[2] = 1;
     80 
     81         Mat temp(kernel->rows, kernel->cols, CV_32S, &kerI[0]);
     82         double scale = !normalize || order == 1 ? 1. : 1./32;
     83         temp.convertTo(*kernel, ktype, scale);
     84     }
     85 }
     86 
     87 
     88 static void getSobelKernels( OutputArray _kx, OutputArray _ky,
     89                              int dx, int dy, int _ksize, bool normalize, int ktype )
     90 {
     91     int i, j, ksizeX = _ksize, ksizeY = _ksize;
     92     if( ksizeX == 1 && dx > 0 )
     93         ksizeX = 3;
     94     if( ksizeY == 1 && dy > 0 )
     95         ksizeY = 3;
     96 
     97     CV_Assert( ktype == CV_32F || ktype == CV_64F );
     98 
     99     _kx.create(ksizeX, 1, ktype, -1, true);
    100     _ky.create(ksizeY, 1, ktype, -1, true);
    101     Mat kx = _kx.getMat();
    102     Mat ky = _ky.getMat();
    103 
    104     if( _ksize % 2 == 0 || _ksize > 31 )
    105         CV_Error( CV_StsOutOfRange, "The kernel size must be odd and not larger than 31" );
    106     std::vector<int> kerI(std::max(ksizeX, ksizeY) + 1);
    107 
    108     CV_Assert( dx >= 0 && dy >= 0 && dx+dy > 0 );
    109 
    110     for( int k = 0; k < 2; k++ )
    111     {
    112         Mat* kernel = k == 0 ? &kx : &ky;
    113         int order = k == 0 ? dx : dy;
    114         int ksize = k == 0 ? ksizeX : ksizeY;
    115 
    116         CV_Assert( ksize > order );
    117 
    118         if( ksize == 1 )
    119             kerI[0] = 1;
    120         else if( ksize == 3 )
    121         {
    122             if( order == 0 )
    123                 kerI[0] = 1, kerI[1] = 2, kerI[2] = 1;
    124             else if( order == 1 )
    125                 kerI[0] = -1, kerI[1] = 0, kerI[2] = 1;
    126             else
    127                 kerI[0] = 1, kerI[1] = -2, kerI[2] = 1;
    128         }
    129         else
    130         {
    131             int oldval, newval;
    132             kerI[0] = 1;
    133             for( i = 0; i < ksize; i++ )
    134                 kerI[i+1] = 0;
    135 
    136             for( i = 0; i < ksize - order - 1; i++ )
    137             {
    138                 oldval = kerI[0];
    139                 for( j = 1; j <= ksize; j++ )
    140                 {
    141                     newval = kerI[j]+kerI[j-1];
    142                     kerI[j-1] = oldval;
    143                     oldval = newval;
    144                 }
    145             }
    146 
    147             for( i = 0; i < order; i++ )
    148             {
    149                 oldval = -kerI[0];
    150                 for( j = 1; j <= ksize; j++ )
    151                 {
    152                     newval = kerI[j-1] - kerI[j];
    153                     kerI[j-1] = oldval;
    154                     oldval = newval;
    155                 }
    156             }
    157         }
    158 
    159         Mat temp(kernel->rows, kernel->cols, CV_32S, &kerI[0]);
    160         double scale = !normalize ? 1. : 1./(1 << (ksize-order-1));
    161         temp.convertTo(*kernel, ktype, scale);
    162     }
    163 }
    164 
    165 }
    166 
    167 void cv::getDerivKernels( OutputArray kx, OutputArray ky, int dx, int dy,
    168                           int ksize, bool normalize, int ktype )
    169 {
    170     if( ksize <= 0 )
    171         getScharrKernels( kx, ky, dx, dy, normalize, ktype );
    172     else
    173         getSobelKernels( kx, ky, dx, dy, ksize, normalize, ktype );
    174 }
    175 
    176 
    177 cv::Ptr<cv::FilterEngine> cv::createDerivFilter(int srcType, int dstType,
    178                                                 int dx, int dy, int ksize, int borderType )
    179 {
    180     Mat kx, ky;
    181     getDerivKernels( kx, ky, dx, dy, ksize, false, CV_32F );
    182     return createSeparableLinearFilter(srcType, dstType,
    183         kx, ky, Point(-1,-1), 0, borderType );
    184 }
    185 
    186 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
    187 
    188 #define IPP_RETURN_ERROR    {setIppErrorStatus(); return false;}
    189 
    190 namespace cv
    191 {
    192 #if IPP_VERSION_X100 >= 801
    193 static bool IPPDerivScharr(InputArray _src, OutputArray _dst, int ddepth, int dx, int dy, double scale, double delta, int borderType)
    194 {
    195     if ((0 > dx) || (0 > dy) || (1 != dx + dy))
    196         return false;
    197     if (fabs(delta) > FLT_EPSILON)
    198         return false;
    199 
    200     IppiBorderType ippiBorderType = ippiGetBorderType(borderType & (~BORDER_ISOLATED));
    201     if ((int)ippiBorderType < 0)
    202         return false;
    203 
    204     int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype);
    205     if (ddepth < 0)
    206         ddepth = sdepth;
    207     int dtype = CV_MAKETYPE(ddepth, cn);
    208 
    209     Mat src = _src.getMat();
    210     if (0 == (BORDER_ISOLATED & borderType))
    211     {
    212         Size size; Point offset;
    213         src.locateROI(size, offset);
    214         if (0 < offset.x)
    215             ippiBorderType = (IppiBorderType)(ippiBorderType | ippBorderInMemLeft);
    216         if (0 < offset.y)
    217             ippiBorderType = (IppiBorderType)(ippiBorderType | ippBorderInMemTop);
    218         if (offset.x + src.cols < size.width)
    219             ippiBorderType = (IppiBorderType)(ippiBorderType | ippBorderInMemRight);
    220         if (offset.y + src.rows < size.height)
    221             ippiBorderType = (IppiBorderType)(ippiBorderType | ippBorderInMemBottom);
    222     }
    223 
    224     bool horz = (0 == dx) && (1 == dy);
    225     IppiSize roiSize = {src.cols, src.rows};
    226 
    227     _dst.create( _src.size(), dtype);
    228     Mat dst = _dst.getMat();
    229     IppStatus sts = ippStsErr;
    230     if ((CV_8U == stype) && (CV_16S == dtype))
    231     {
    232         int bufferSize = 0; Ipp8u *pBuffer;
    233         if (horz)
    234         {
    235             if (0 > ippiFilterScharrHorizMaskBorderGetBufferSize(roiSize, ippMskSize3x3, ipp8u, ipp16s, 1, &bufferSize))
    236                 IPP_RETURN_ERROR
    237             pBuffer = ippsMalloc_8u(bufferSize);
    238             if (NULL == pBuffer)
    239                 IPP_RETURN_ERROR
    240             sts = ippiFilterScharrHorizMaskBorder_8u16s_C1R(src.ptr(), (int)src.step, dst.ptr<Ipp16s>(), (int)dst.step, roiSize, ippMskSize3x3, ippiBorderType, 0, pBuffer);
    241         }
    242         else
    243         {
    244             if (0 > ippiFilterScharrVertMaskBorderGetBufferSize(roiSize, ippMskSize3x3, ipp8u, ipp16s, 1, &bufferSize))
    245                 IPP_RETURN_ERROR
    246             pBuffer = ippsMalloc_8u(bufferSize);
    247             if (NULL == pBuffer)
    248                 IPP_RETURN_ERROR
    249             sts = ippiFilterScharrVertMaskBorder_8u16s_C1R(src.ptr(), (int)src.step, dst.ptr<Ipp16s>(), (int)dst.step, roiSize, ippMskSize3x3, ippiBorderType, 0, pBuffer);
    250         }
    251         ippsFree(pBuffer);
    252     }
    253     else if ((CV_16S == stype) && (CV_16S == dtype))
    254     {
    255         int bufferSize = 0; Ipp8u *pBuffer;
    256         if (horz)
    257         {
    258             if (0 > ippiFilterScharrHorizMaskBorderGetBufferSize(roiSize, ippMskSize3x3, ipp16s, ipp16s, 1, &bufferSize))
    259                 IPP_RETURN_ERROR
    260             pBuffer = ippsMalloc_8u(bufferSize);
    261             if (NULL == pBuffer)
    262                 IPP_RETURN_ERROR
    263             sts = ippiFilterScharrHorizMaskBorder_16s_C1R(src.ptr<Ipp16s>(), (int)src.step, dst.ptr<Ipp16s>(), (int)dst.step, roiSize, ippMskSize3x3, ippiBorderType, 0, pBuffer);
    264         }
    265         else
    266         {
    267             if (0 > ippiFilterScharrVertMaskBorderGetBufferSize(roiSize, ippMskSize3x3, ipp16s, ipp16s, 1, &bufferSize))
    268                 IPP_RETURN_ERROR
    269             pBuffer = ippsMalloc_8u(bufferSize);
    270             if (NULL == pBuffer)
    271                 IPP_RETURN_ERROR
    272             sts = ippiFilterScharrVertMaskBorder_16s_C1R(src.ptr<Ipp16s>(), (int)src.step, dst.ptr<Ipp16s>(), (int)dst.step, roiSize, ippMskSize3x3, ippiBorderType, 0, pBuffer);
    273         }
    274         ippsFree(pBuffer);
    275     }
    276     else if ((CV_32F == stype) && (CV_32F == dtype))
    277     {
    278         int bufferSize = 0; Ipp8u *pBuffer;
    279         if (horz)
    280         {
    281             if (0 > ippiFilterScharrHorizMaskBorderGetBufferSize(roiSize, ippMskSize3x3, ipp32f, ipp32f, 1, &bufferSize))
    282                 IPP_RETURN_ERROR
    283             pBuffer = ippsMalloc_8u(bufferSize);
    284             if (NULL == pBuffer)
    285                 IPP_RETURN_ERROR
    286             sts = ippiFilterScharrHorizMaskBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step, dst.ptr<Ipp32f>(), (int)dst.step, roiSize, ippMskSize3x3, ippiBorderType, 0, pBuffer);
    287         }
    288         else
    289         {
    290             if (0 > ippiFilterScharrVertMaskBorderGetBufferSize(roiSize, ippMskSize3x3, ipp32f, ipp32f, 1, &bufferSize))
    291                 IPP_RETURN_ERROR
    292             pBuffer = ippsMalloc_8u(bufferSize);
    293             if (NULL == pBuffer)
    294                 IPP_RETURN_ERROR
    295             sts = ippiFilterScharrVertMaskBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step, dst.ptr<Ipp32f>(), (int)dst.step, roiSize, ippMskSize3x3, ippiBorderType, 0, pBuffer);
    296         }
    297         ippsFree(pBuffer);
    298         if (sts < 0)
    299             IPP_RETURN_ERROR;
    300 
    301         if (FLT_EPSILON < fabs(scale - 1.0))
    302             sts = ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, roiSize);
    303     }
    304     return (0 <= sts);
    305 }
    306 #elif IPP_VERSION_X100 >= 700
    307 static bool IPPDerivScharr(InputArray _src, OutputArray _dst, int ddepth, int dx, int dy, double scale, double delta, int borderType)
    308 {
    309     if (BORDER_REPLICATE != borderType)
    310         return false;
    311     if ((0 > dx) || (0 > dy) || (1 != dx + dy))
    312         return false;
    313     if (fabs(delta) > FLT_EPSILON)
    314         return false;
    315 
    316     Mat src = _src.getMat(), dst = _dst.getMat();
    317 
    318     int bufSize = 0;
    319     cv::AutoBuffer<char> buffer;
    320     IppiSize roi = ippiSize(src.cols, src.rows);
    321 
    322     if( ddepth < 0 )
    323         ddepth = src.depth();
    324 
    325     dst.create( src.size(), CV_MAKETYPE(ddepth, src.channels()) );
    326 
    327     switch(src.type())
    328     {
    329     case CV_8UC1:
    330         {
    331             if(scale != 1)
    332                 return false;
    333 
    334             switch(dst.type())
    335             {
    336             case CV_16S:
    337                 {
    338                     if ((dx == 1) && (dy == 0))
    339                     {
    340                         if (0 > ippiFilterScharrVertGetBufferSize_8u16s_C1R(roi,&bufSize))
    341                             return false;
    342                         buffer.allocate(bufSize);
    343                         return (0 <= ippiFilterScharrVertBorder_8u16s_C1R(src.ptr<Ipp8u>(), (int)src.step,
    344                                         dst.ptr<Ipp16s>(), (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
    345                     }
    346                     if ((dx == 0) && (dy == 1))
    347                     {
    348                         if (0 > ippiFilterScharrHorizGetBufferSize_8u16s_C1R(roi,&bufSize))
    349                             return false;
    350                         buffer.allocate(bufSize);
    351                         return (0 <= ippiFilterScharrHorizBorder_8u16s_C1R(src.ptr<Ipp8u>(), (int)src.step,
    352                                             dst.ptr<Ipp16s>(), (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
    353                     }
    354                     return false;
    355                 }
    356             default:
    357                 return false;
    358             }
    359         }
    360     case CV_32FC1:
    361         {
    362             switch(dst.type())
    363             {
    364             case CV_32FC1:
    365                 {
    366                     if ((dx == 1) && (dy == 0))
    367                     {
    368                         if (0 > ippiFilterScharrVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize))
    369                             return false;
    370                         buffer.allocate(bufSize);
    371 
    372                         if (0 > ippiFilterScharrVertBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step,
    373                                         dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(src.cols, src.rows),
    374                                         ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
    375                         {
    376                             return false;
    377                         }
    378 
    379                         if (scale != 1)
    380                             /* IPP is fast, so MulC produce very little perf degradation.*/
    381                             //ippiMulC_32f_C1IR((Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
    382                             ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
    383                         return true;
    384                     }
    385                     if ((dx == 0) && (dy == 1))
    386                     {
    387                         if (0 > ippiFilterScharrHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize))
    388                             return false;
    389                         buffer.allocate(bufSize);
    390 
    391                         if (0 > ippiFilterScharrHorizBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step,
    392                                         dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(src.cols, src.rows),
    393                                         ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
    394                             return false;
    395 
    396                         if (scale != 1)
    397                             ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
    398                         return true;
    399                     }
    400                 }
    401             default:
    402                 return false;
    403             }
    404         }
    405     default:
    406         return false;
    407     }
    408 }
    409 #endif
    410 
    411 static bool IPPDerivSobel(InputArray _src, OutputArray _dst, int ddepth, int dx, int dy, int ksize, double scale, double delta, int borderType)
    412 {
    413     if ((borderType != BORDER_REPLICATE) || ((3 != ksize) && (5 != ksize)))
    414         return false;
    415     if (fabs(delta) > FLT_EPSILON)
    416         return false;
    417     if (1 != _src.channels())
    418         return false;
    419 
    420     int bufSize = 0;
    421     cv::AutoBuffer<char> buffer;
    422     Mat src = _src.getMat(), dst = _dst.getMat();
    423     if ( ddepth < 0 )
    424         ddepth = src.depth();
    425 
    426     if (src.type() == CV_8U && dst.type() == CV_16S && scale == 1)
    427     {
    428         if ((dx == 1) && (dy == 0))
    429         {
    430             if (0 > ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
    431                 IPP_RETURN_ERROR
    432             buffer.allocate(bufSize);
    433 
    434             if (0 > ippiFilterSobelNegVertBorder_8u16s_C1R(src.ptr<Ipp8u>(), (int)src.step,
    435                                 dst.ptr<Ipp16s>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
    436                                 ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
    437                 IPP_RETURN_ERROR
    438             return true;
    439         }
    440 
    441         if ((dx == 0) && (dy == 1))
    442         {
    443             if (0 > ippiFilterSobelHorizGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
    444                 IPP_RETURN_ERROR
    445             buffer.allocate(bufSize);
    446 
    447             if (0 > ippiFilterSobelHorizBorder_8u16s_C1R(src.ptr<Ipp8u>(), (int)src.step,
    448                                 dst.ptr<Ipp16s>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
    449                                 ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
    450                 IPP_RETURN_ERROR
    451             return true;
    452         }
    453 
    454 #if !defined(HAVE_IPP_ICV_ONLY)
    455         if ((dx == 2) && (dy == 0))
    456         {
    457             if (0 > ippiFilterSobelVertSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
    458                 IPP_RETURN_ERROR
    459             buffer.allocate(bufSize);
    460 
    461             if (0 > ippiFilterSobelVertSecondBorder_8u16s_C1R(src.ptr<Ipp8u>(), (int)src.step,
    462                                 dst.ptr<Ipp16s>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
    463                                 ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
    464                 IPP_RETURN_ERROR
    465             return true;
    466         }
    467 
    468         if ((dx == 0) && (dy == 2))
    469         {
    470             if (0 > ippiFilterSobelHorizSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
    471                 IPP_RETURN_ERROR
    472             buffer.allocate(bufSize);
    473 
    474             if (0 > ippiFilterSobelHorizSecondBorder_8u16s_C1R(src.ptr<Ipp8u>(), (int)src.step,
    475                                 dst.ptr<Ipp16s>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
    476                                 ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
    477                 IPP_RETURN_ERROR
    478             return true;
    479         }
    480 #endif
    481     }
    482 
    483     if (src.type() == CV_32F && dst.type() == CV_32F)
    484     {
    485 #if 0
    486         if ((dx == 1) && (dy == 0))
    487         {
    488             if (0 > ippiFilterSobelNegVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), &bufSize))
    489                 IPP_RETURN_ERROR
    490             buffer.allocate(bufSize);
    491 
    492             if (0 > ippiFilterSobelNegVertBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step,
    493                             dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
    494                             ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
    495                 IPP_RETURN_ERROR
    496             if(scale != 1)
    497                 ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
    498             return true;
    499         }
    500 
    501         if ((dx == 0) && (dy == 1))
    502         {
    503             if (0 > ippiFilterSobelHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
    504                 IPP_RETURN_ERROR
    505             buffer.allocate(bufSize);
    506             if (0 > ippiFilterSobelHorizBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step,
    507                             dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
    508                             ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
    509                 IPP_RETURN_ERROR
    510             if(scale != 1)
    511                 ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
    512             return true;
    513         }
    514 #endif
    515 #if !defined(HAVE_IPP_ICV_ONLY)
    516         if((dx == 2) && (dy == 0))
    517         {
    518             if (0 > ippiFilterSobelVertSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
    519                 IPP_RETURN_ERROR
    520             buffer.allocate(bufSize);
    521 
    522             if (0 > ippiFilterSobelVertSecondBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step,
    523                             dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
    524                             ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
    525                 IPP_RETURN_ERROR
    526             if(scale != 1)
    527                 ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
    528             return true;
    529         }
    530 
    531         if((dx == 0) && (dy == 2))
    532         {
    533             if (0 > ippiFilterSobelHorizSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
    534                 IPP_RETURN_ERROR
    535             buffer.allocate(bufSize);
    536 
    537             if (0 > ippiFilterSobelHorizSecondBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step,
    538                             dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
    539                             ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
    540                 IPP_RETURN_ERROR
    541 
    542             if(scale != 1)
    543                 ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
    544             return true;
    545         }
    546 #endif
    547     }
    548     return false;
    549 }
    550 
    551 }
    552 
    553 #endif
    554 
    555 void cv::Sobel( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
    556                 int ksize, double scale, double delta, int borderType )
    557 {
    558     int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype);
    559     if (ddepth < 0)
    560         ddepth = sdepth;
    561     int dtype = CV_MAKE_TYPE(ddepth, cn);
    562     _dst.create( _src.size(), dtype );
    563 
    564 #ifdef HAVE_TEGRA_OPTIMIZATION
    565     if (tegra::useTegra() && scale == 1.0 && delta == 0)
    566     {
    567         Mat src = _src.getMat(), dst = _dst.getMat();
    568         if (ksize == 3 && tegra::sobel3x3(src, dst, dx, dy, borderType))
    569             return;
    570         if (ksize == -1 && tegra::scharr(src, dst, dx, dy, borderType))
    571             return;
    572     }
    573 #endif
    574 
    575 #ifdef HAVE_IPP
    576     CV_IPP_CHECK()
    577     {
    578         if (ksize < 0)
    579         {
    580             if (IPPDerivScharr(_src, _dst, ddepth, dx, dy, scale, delta, borderType))
    581             {
    582                 CV_IMPL_ADD(CV_IMPL_IPP);
    583                 return;
    584             }
    585         }
    586         else if (0 < ksize)
    587         {
    588             if (IPPDerivSobel(_src, _dst, ddepth, dx, dy, ksize, scale, delta, borderType))
    589             {
    590                 CV_IMPL_ADD(CV_IMPL_IPP);
    591                 return;
    592             }
    593         }
    594     }
    595 #endif
    596     int ktype = std::max(CV_32F, std::max(ddepth, sdepth));
    597 
    598     Mat kx, ky;
    599     getDerivKernels( kx, ky, dx, dy, ksize, false, ktype );
    600     if( scale != 1 )
    601     {
    602         // usually the smoothing part is the slowest to compute,
    603         // so try to scale it instead of the faster differenciating part
    604         if( dx == 0 )
    605             kx *= scale;
    606         else
    607             ky *= scale;
    608     }
    609     sepFilter2D( _src, _dst, ddepth, kx, ky, Point(-1, -1), delta, borderType );
    610 }
    611 
    612 
    613 void cv::Scharr( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
    614                  double scale, double delta, int borderType )
    615 {
    616     int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype);
    617     if (ddepth < 0)
    618         ddepth = sdepth;
    619     int dtype = CV_MAKETYPE(ddepth, cn);
    620     _dst.create( _src.size(), dtype );
    621 
    622 #ifdef HAVE_TEGRA_OPTIMIZATION
    623     if (tegra::useTegra() && scale == 1.0 && delta == 0)
    624     {
    625         Mat src = _src.getMat(), dst = _dst.getMat();
    626         if (tegra::scharr(src, dst, dx, dy, borderType))
    627             return;
    628     }
    629 #endif
    630 
    631 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
    632     CV_IPP_CHECK()
    633     {
    634         if (IPPDerivScharr(_src, _dst, ddepth, dx, dy, scale, delta, borderType))
    635         {
    636             CV_IMPL_ADD(CV_IMPL_IPP);
    637             return;
    638         }
    639     }
    640 #endif
    641     int ktype = std::max(CV_32F, std::max(ddepth, sdepth));
    642 
    643     Mat kx, ky;
    644     getScharrKernels( kx, ky, dx, dy, false, ktype );
    645     if( scale != 1 )
    646     {
    647         // usually the smoothing part is the slowest to compute,
    648         // so try to scale it instead of the faster differenciating part
    649         if( dx == 0 )
    650             kx *= scale;
    651         else
    652             ky *= scale;
    653     }
    654     sepFilter2D( _src, _dst, ddepth, kx, ky, Point(-1, -1), delta, borderType );
    655 }
    656 
    657 #ifdef HAVE_OPENCL
    658 
    659 namespace cv {
    660 
    661 #define LAPLACIAN_LOCAL_MEM(tileX, tileY, ksize, elsize) (((tileX) + 2 * (int)((ksize) / 2)) * (3 * (tileY) + 2 * (int)((ksize) / 2)) * elsize)
    662 
    663 static bool ocl_Laplacian5(InputArray _src, OutputArray _dst,
    664                            const Mat & kd, const Mat & ks, double scale, double delta,
    665                            int borderType, int depth, int ddepth)
    666 {
    667     const size_t tileSizeX = 16;
    668     const size_t tileSizeYmin = 8;
    669 
    670     const ocl::Device dev = ocl::Device::getDefault();
    671 
    672     int stype = _src.type();
    673     int sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), esz = CV_ELEM_SIZE(stype);
    674 
    675     bool doubleSupport = dev.doubleFPConfig() > 0;
    676     if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
    677         return false;
    678 
    679     Mat kernelX = kd.reshape(1, 1);
    680     if (kernelX.cols % 2 != 1)
    681         return false;
    682     Mat kernelY = ks.reshape(1, 1);
    683     if (kernelY.cols % 2 != 1)
    684         return false;
    685     CV_Assert(kernelX.cols == kernelY.cols);
    686 
    687     size_t wgs = dev.maxWorkGroupSize();
    688     size_t lmsz = dev.localMemSize();
    689     size_t src_step = _src.step(), src_offset = _src.offset();
    690     const size_t tileSizeYmax = wgs / tileSizeX;
    691 
    692     // workaround for Nvidia: 3 channel vector type takes 4*elem_size in local memory
    693     int loc_mem_cn = dev.vendorID() == ocl::Device::VENDOR_NVIDIA && cn == 3 ? 4 : cn;
    694 
    695     if (((src_offset % src_step) % esz == 0) &&
    696         (
    697          (borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE) ||
    698          ((borderType == BORDER_REFLECT || borderType == BORDER_WRAP || borderType == BORDER_REFLECT_101) &&
    699           (_src.cols() >= (int) (kernelX.cols + tileSizeX) && _src.rows() >= (int) (kernelY.cols + tileSizeYmax)))
    700         ) &&
    701         (tileSizeX * tileSizeYmin <= wgs) &&
    702         (LAPLACIAN_LOCAL_MEM(tileSizeX, tileSizeYmin, kernelX.cols, loc_mem_cn * 4) <= lmsz)
    703        )
    704     {
    705         Size size = _src.size(), wholeSize;
    706         Point origin;
    707         int dtype = CV_MAKE_TYPE(ddepth, cn);
    708         int wdepth = CV_32F;
    709 
    710         size_t tileSizeY = tileSizeYmax;
    711         while ((tileSizeX * tileSizeY > wgs) || (LAPLACIAN_LOCAL_MEM(tileSizeX, tileSizeY, kernelX.cols, loc_mem_cn * 4) > lmsz))
    712         {
    713             tileSizeY /= 2;
    714         }
    715         size_t lt2[2] = { tileSizeX, tileSizeY};
    716         size_t gt2[2] = { lt2[0] * (1 + (size.width - 1) / lt2[0]), lt2[1] };
    717 
    718         char cvt[2][40];
    719         const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
    720                                            "BORDER_REFLECT_101" };
    721 
    722         String opts = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUS=%d%s%s"
    723                                  " -D convertToWT=%s -D convertToDT=%s"
    724                                  " -D %s -D srcT1=%s -D dstT1=%s -D WT1=%s"
    725                                  " -D srcT=%s -D dstT=%s -D WT=%s"
    726                                  " -D CN=%d ",
    727                                  (int)lt2[0], (int)lt2[1], kernelX.cols / 2,
    728                                  ocl::kernelToStr(kernelX, wdepth, "KERNEL_MATRIX_X").c_str(),
    729                                  ocl::kernelToStr(kernelY, wdepth, "KERNEL_MATRIX_Y").c_str(),
    730                                  ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]),
    731                                  ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]),
    732                                  borderMap[borderType],
    733                                  ocl::typeToStr(sdepth), ocl::typeToStr(ddepth), ocl::typeToStr(wdepth),
    734                                  ocl::typeToStr(CV_MAKETYPE(sdepth, cn)),
    735                                  ocl::typeToStr(CV_MAKETYPE(ddepth, cn)),
    736                                  ocl::typeToStr(CV_MAKETYPE(wdepth, cn)),
    737                                  cn);
    738 
    739         ocl::Kernel k("laplacian", ocl::imgproc::laplacian5_oclsrc, opts);
    740         if (k.empty())
    741             return false;
    742         UMat src = _src.getUMat();
    743         _dst.create(size, dtype);
    744         UMat dst = _dst.getUMat();
    745 
    746         int src_offset_x = static_cast<int>((src_offset % src_step) / esz);
    747         int src_offset_y = static_cast<int>(src_offset / src_step);
    748 
    749         src.locateROI(wholeSize, origin);
    750 
    751         k.args(ocl::KernelArg::PtrReadOnly(src), (int)src_step, src_offset_x, src_offset_y,
    752                wholeSize.height, wholeSize.width, ocl::KernelArg::WriteOnly(dst),
    753                static_cast<float>(scale), static_cast<float>(delta));
    754 
    755         return k.run(2, gt2, lt2, false);
    756     }
    757     int iscale = cvRound(scale), idelta = cvRound(delta);
    758     bool floatCoeff = std::fabs(delta - idelta) > DBL_EPSILON || std::fabs(scale - iscale) > DBL_EPSILON;
    759     int wdepth = std::max(depth, floatCoeff ? CV_32F : CV_32S), kercn = 1;
    760 
    761     if (!doubleSupport && wdepth == CV_64F)
    762         return false;
    763 
    764     char cvt[2][40];
    765     ocl::Kernel k("sumConvert", ocl::imgproc::laplacian5_oclsrc,
    766                   format("-D ONLY_SUM_CONVERT "
    767                          "-D srcT=%s -D WT=%s -D dstT=%s -D coeffT=%s -D wdepth=%d "
    768                          "-D convertToWT=%s -D convertToDT=%s%s",
    769                          ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
    770                          ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)),
    771                          ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
    772                          ocl::typeToStr(wdepth), wdepth,
    773                          ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]),
    774                          ocl::convertTypeStr(wdepth, ddepth, kercn, cvt[1]),
    775                          doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
    776     if (k.empty())
    777         return false;
    778 
    779     UMat d2x, d2y;
    780     sepFilter2D(_src, d2x, depth, kd, ks, Point(-1, -1), 0, borderType);
    781     sepFilter2D(_src, d2y, depth, ks, kd, Point(-1, -1), 0, borderType);
    782 
    783     UMat dst = _dst.getUMat();
    784 
    785     ocl::KernelArg d2xarg = ocl::KernelArg::ReadOnlyNoSize(d2x),
    786             d2yarg = ocl::KernelArg::ReadOnlyNoSize(d2y),
    787             dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);
    788 
    789     if (wdepth >= CV_32F)
    790         k.args(d2xarg, d2yarg, dstarg, (float)scale, (float)delta);
    791     else
    792         k.args(d2xarg, d2yarg, dstarg, iscale, idelta);
    793 
    794     size_t globalsize[] = { dst.cols * cn / kercn, dst.rows };
    795     return k.run(2, globalsize, NULL, false);
    796 }
    797 
    798 }
    799 
    800 #endif
    801 
    802 void cv::Laplacian( InputArray _src, OutputArray _dst, int ddepth, int ksize,
    803                     double scale, double delta, int borderType )
    804 {
    805     int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype);
    806     if (ddepth < 0)
    807         ddepth = sdepth;
    808     _dst.create( _src.size(), CV_MAKETYPE(ddepth, cn) );
    809 
    810 #ifdef HAVE_IPP
    811     CV_IPP_CHECK()
    812     {
    813         if ((ksize == 3 || ksize == 5) && ((borderType & BORDER_ISOLATED) != 0 || !_src.isSubmatrix()) &&
    814             ((stype == CV_8UC1 && ddepth == CV_16S) || (ddepth == CV_32F && stype == CV_32FC1)) && !ocl::useOpenCL())
    815         {
    816             int iscale = saturate_cast<int>(scale), idelta = saturate_cast<int>(delta);
    817             bool floatScale = std::fabs(scale - iscale) > DBL_EPSILON, needScale = iscale != 1;
    818             bool floatDelta = std::fabs(delta - idelta) > DBL_EPSILON, needDelta = delta != 0;
    819             int borderTypeNI = borderType & ~BORDER_ISOLATED;
    820             Mat src = _src.getMat(), dst = _dst.getMat();
    821 
    822             if (src.data != dst.data)
    823             {
    824                 Ipp32s bufsize;
    825                 IppStatus status = (IppStatus)-1;
    826                 IppiSize roisize = { src.cols, src.rows };
    827                 IppiMaskSize masksize = ksize == 3 ? ippMskSize3x3 : ippMskSize5x5;
    828                 IppiBorderType borderTypeIpp = ippiGetBorderType(borderTypeNI);
    829 
    830 #define IPP_FILTER_LAPLACIAN(ippsrctype, ippdsttype, ippfavor) \
    831         do \
    832         { \
    833             if (borderTypeIpp >= 0 && ippiFilterLaplacianGetBufferSize_##ippfavor##_C1R(roisize, masksize, &bufsize) >= 0) \
    834             { \
    835                 Ipp8u * buffer = ippsMalloc_8u(bufsize); \
    836                 status = ippiFilterLaplacianBorder_##ippfavor##_C1R(src.ptr<ippsrctype>(), (int)src.step, dst.ptr<ippdsttype>(), \
    837                                                                     (int)dst.step, roisize, masksize, borderTypeIpp, 0, buffer); \
    838                 ippsFree(buffer); \
    839             } \
    840         } while ((void)0, 0)
    841 
    842                 CV_SUPPRESS_DEPRECATED_START
    843                 if (sdepth == CV_8U && ddepth == CV_16S && !floatScale && !floatDelta)
    844                 {
    845                     IPP_FILTER_LAPLACIAN(Ipp8u, Ipp16s, 8u16s);
    846 
    847                     if (needScale && status >= 0)
    848                         status = ippiMulC_16s_C1IRSfs((Ipp16s)iscale, dst.ptr<Ipp16s>(), (int)dst.step, roisize, 0);
    849                     if (needDelta && status >= 0)
    850                         status = ippiAddC_16s_C1IRSfs((Ipp16s)idelta, dst.ptr<Ipp16s>(), (int)dst.step, roisize, 0);
    851                 }
    852                 else if (sdepth == CV_32F && ddepth == CV_32F)
    853                 {
    854                     IPP_FILTER_LAPLACIAN(Ipp32f, Ipp32f, 32f);
    855 
    856                     if (needScale && status >= 0)
    857                         status = ippiMulC_32f_C1IR((Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, roisize);
    858                     if (needDelta && status >= 0)
    859                         status = ippiAddC_32f_C1IR((Ipp32f)delta, dst.ptr<Ipp32f>(), (int)dst.step, roisize);
    860                 }
    861                 CV_SUPPRESS_DEPRECATED_END
    862 
    863                 if (status >= 0)
    864                 {
    865                     CV_IMPL_ADD(CV_IMPL_IPP);
    866                     return;
    867                 }
    868                 setIppErrorStatus();
    869             }
    870         }
    871 #undef IPP_FILTER_LAPLACIAN
    872     }
    873 #endif
    874 
    875 #ifdef HAVE_TEGRA_OPTIMIZATION
    876     if (tegra::useTegra() && scale == 1.0 && delta == 0)
    877     {
    878         Mat src = _src.getMat(), dst = _dst.getMat();
    879         if (ksize == 1 && tegra::laplace1(src, dst, borderType))
    880             return;
    881         if (ksize == 3 && tegra::laplace3(src, dst, borderType))
    882             return;
    883         if (ksize == 5 && tegra::laplace5(src, dst, borderType))
    884             return;
    885     }
    886 #endif
    887 
    888     if( ksize == 1 || ksize == 3 )
    889     {
    890         float K[2][9] =
    891         {
    892             { 0, 1, 0, 1, -4, 1, 0, 1, 0 },
    893             { 2, 0, 2, 0, -8, 0, 2, 0, 2 }
    894         };
    895         Mat kernel(3, 3, CV_32F, K[ksize == 3]);
    896         if( scale != 1 )
    897             kernel *= scale;
    898         filter2D( _src, _dst, ddepth, kernel, Point(-1, -1), delta, borderType );
    899     }
    900     else
    901     {
    902         int ktype = std::max(CV_32F, std::max(ddepth, sdepth));
    903         int wdepth = sdepth == CV_8U && ksize <= 5 ? CV_16S : sdepth <= CV_32F ? CV_32F : CV_64F;
    904         int wtype = CV_MAKETYPE(wdepth, cn);
    905         Mat kd, ks;
    906         getSobelKernels( kd, ks, 2, 0, ksize, false, ktype );
    907 
    908         CV_OCL_RUN(_dst.isUMat(),
    909                    ocl_Laplacian5(_src, _dst, kd, ks, scale,
    910                                   delta, borderType, wdepth, ddepth))
    911 
    912         const size_t STRIPE_SIZE = 1 << 14;
    913         Ptr<FilterEngine> fx = createSeparableLinearFilter(stype,
    914             wtype, kd, ks, Point(-1,-1), 0, borderType, borderType, Scalar() );
    915         Ptr<FilterEngine> fy = createSeparableLinearFilter(stype,
    916             wtype, ks, kd, Point(-1,-1), 0, borderType, borderType, Scalar() );
    917 
    918         Mat src = _src.getMat(), dst = _dst.getMat();
    919         int y = fx->start(src), dsty = 0, dy = 0;
    920         fy->start(src);
    921         const uchar* sptr = src.ptr(y);
    922 
    923         int dy0 = std::min(std::max((int)(STRIPE_SIZE/(CV_ELEM_SIZE(stype)*src.cols)), 1), src.rows);
    924         Mat d2x( dy0 + kd.rows - 1, src.cols, wtype );
    925         Mat d2y( dy0 + kd.rows - 1, src.cols, wtype );
    926 
    927         for( ; dsty < src.rows; sptr += dy0*src.step, dsty += dy )
    928         {
    929             fx->proceed( sptr, (int)src.step, dy0, d2x.ptr(), (int)d2x.step );
    930             dy = fy->proceed( sptr, (int)src.step, dy0, d2y.ptr(), (int)d2y.step );
    931             if( dy > 0 )
    932             {
    933                 Mat dstripe = dst.rowRange(dsty, dsty + dy);
    934                 d2x.rows = d2y.rows = dy; // modify the headers, which should work
    935                 d2x += d2y;
    936                 d2x.convertTo( dstripe, ddepth, scale, delta );
    937             }
    938         }
    939     }
    940 }
    941 
    942 /////////////////////////////////////////////////////////////////////////////////////////
    943 
    944 CV_IMPL void
    945 cvSobel( const void* srcarr, void* dstarr, int dx, int dy, int aperture_size )
    946 {
    947     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
    948 
    949     CV_Assert( src.size() == dst.size() && src.channels() == dst.channels() );
    950 
    951     cv::Sobel( src, dst, dst.depth(), dx, dy, aperture_size, 1, 0, cv::BORDER_REPLICATE );
    952     if( CV_IS_IMAGE(srcarr) && ((IplImage*)srcarr)->origin && dy % 2 != 0 )
    953         dst *= -1;
    954 }
    955 
    956 
    957 CV_IMPL void
    958 cvLaplace( const void* srcarr, void* dstarr, int aperture_size )
    959 {
    960     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
    961 
    962     CV_Assert( src.size() == dst.size() && src.channels() == dst.channels() );
    963 
    964     cv::Laplacian( src, dst, dst.depth(), aperture_size, 1, 0, cv::BORDER_REPLICATE );
    965 }
    966 
    967 /* End of file. */
    968