Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #include "precomp.hpp"
     44 #include <limits.h>
     45 #include "opencl_kernels_imgproc.hpp"
     46 
     47 /****************************************************************************************\
     48                      Basic Morphological Operations: Erosion & Dilation
     49 \****************************************************************************************/
     50 
     51 namespace cv
     52 {
     53 
     54 template<typename T> struct MinOp
     55 {
     56     typedef T type1;
     57     typedef T type2;
     58     typedef T rtype;
     59     T operator ()(const T a, const T b) const { return std::min(a, b); }
     60 };
     61 
     62 template<typename T> struct MaxOp
     63 {
     64     typedef T type1;
     65     typedef T type2;
     66     typedef T rtype;
     67     T operator ()(const T a, const T b) const { return std::max(a, b); }
     68 };
     69 
     70 #undef CV_MIN_8U
     71 #undef CV_MAX_8U
     72 #define CV_MIN_8U(a,b)       ((a) - CV_FAST_CAST_8U((a) - (b)))
     73 #define CV_MAX_8U(a,b)       ((a) + CV_FAST_CAST_8U((b) - (a)))
     74 
     75 template<> inline uchar MinOp<uchar>::operator ()(const uchar a, const uchar b) const { return CV_MIN_8U(a, b); }
     76 template<> inline uchar MaxOp<uchar>::operator ()(const uchar a, const uchar b) const { return CV_MAX_8U(a, b); }
     77 
     78 struct MorphRowNoVec
     79 {
     80     MorphRowNoVec(int, int) {}
     81     int operator()(const uchar*, uchar*, int, int) const { return 0; }
     82 };
     83 
     84 struct MorphColumnNoVec
     85 {
     86     MorphColumnNoVec(int, int) {}
     87     int operator()(const uchar**, uchar*, int, int, int) const { return 0; }
     88 };
     89 
     90 struct MorphNoVec
     91 {
     92     int operator()(uchar**, int, uchar*, int) const { return 0; }
     93 };
     94 
     95 #if CV_SSE2
     96 
     97 template<class VecUpdate> struct MorphRowIVec
     98 {
     99     enum { ESZ = VecUpdate::ESZ };
    100 
    101     MorphRowIVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {}
    102     int operator()(const uchar* src, uchar* dst, int width, int cn) const
    103     {
    104         if( !checkHardwareSupport(CV_CPU_SSE2) )
    105             return 0;
    106 
    107         cn *= ESZ;
    108         int i, k, _ksize = ksize*cn;
    109         width = (width & -4)*cn;
    110         VecUpdate updateOp;
    111 
    112         for( i = 0; i <= width - 16; i += 16 )
    113         {
    114             __m128i s = _mm_loadu_si128((const __m128i*)(src + i));
    115             for( k = cn; k < _ksize; k += cn )
    116             {
    117                 __m128i x = _mm_loadu_si128((const __m128i*)(src + i + k));
    118                 s = updateOp(s, x);
    119             }
    120             _mm_storeu_si128((__m128i*)(dst + i), s);
    121         }
    122 
    123         for( ; i < width; i += 4 )
    124         {
    125             __m128i s = _mm_cvtsi32_si128(*(const int*)(src + i));
    126             for( k = cn; k < _ksize; k += cn )
    127             {
    128                 __m128i x = _mm_cvtsi32_si128(*(const int*)(src + i + k));
    129                 s = updateOp(s, x);
    130             }
    131             *(int*)(dst + i) = _mm_cvtsi128_si32(s);
    132         }
    133 
    134         return i/ESZ;
    135     }
    136 
    137     int ksize, anchor;
    138 };
    139 
    140 
    141 template<class VecUpdate> struct MorphRowFVec
    142 {
    143     MorphRowFVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {}
    144     int operator()(const uchar* src, uchar* dst, int width, int cn) const
    145     {
    146         if( !checkHardwareSupport(CV_CPU_SSE) )
    147             return 0;
    148 
    149         int i, k, _ksize = ksize*cn;
    150         width = (width & -4)*cn;
    151         VecUpdate updateOp;
    152 
    153         for( i = 0; i < width; i += 4 )
    154         {
    155             __m128 s = _mm_loadu_ps((const float*)src + i);
    156             for( k = cn; k < _ksize; k += cn )
    157             {
    158                 __m128 x = _mm_loadu_ps((const float*)src + i + k);
    159                 s = updateOp(s, x);
    160             }
    161             _mm_storeu_ps((float*)dst + i, s);
    162         }
    163 
    164         return i;
    165     }
    166 
    167     int ksize, anchor;
    168 };
    169 
    170 
    171 template<class VecUpdate> struct MorphColumnIVec
    172 {
    173     enum { ESZ = VecUpdate::ESZ };
    174 
    175     MorphColumnIVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {}
    176     int operator()(const uchar** src, uchar* dst, int dststep, int count, int width) const
    177     {
    178         if( !checkHardwareSupport(CV_CPU_SSE2) )
    179             return 0;
    180 
    181         int i = 0, k, _ksize = ksize;
    182         width *= ESZ;
    183         VecUpdate updateOp;
    184 
    185         for( i = 0; i < count + ksize - 1; i++ )
    186             CV_Assert( ((size_t)src[i] & 15) == 0 );
    187 
    188         for( ; _ksize > 1 && count > 1; count -= 2, dst += dststep*2, src += 2 )
    189         {
    190             for( i = 0; i <= width - 32; i += 32 )
    191             {
    192                 const uchar* sptr = src[1] + i;
    193                 __m128i s0 = _mm_load_si128((const __m128i*)sptr);
    194                 __m128i s1 = _mm_load_si128((const __m128i*)(sptr + 16));
    195                 __m128i x0, x1;
    196 
    197                 for( k = 2; k < _ksize; k++ )
    198                 {
    199                     sptr = src[k] + i;
    200                     x0 = _mm_load_si128((const __m128i*)sptr);
    201                     x1 = _mm_load_si128((const __m128i*)(sptr + 16));
    202                     s0 = updateOp(s0, x0);
    203                     s1 = updateOp(s1, x1);
    204                 }
    205 
    206                 sptr = src[0] + i;
    207                 x0 = _mm_load_si128((const __m128i*)sptr);
    208                 x1 = _mm_load_si128((const __m128i*)(sptr + 16));
    209                 _mm_storeu_si128((__m128i*)(dst + i), updateOp(s0, x0));
    210                 _mm_storeu_si128((__m128i*)(dst + i + 16), updateOp(s1, x1));
    211 
    212                 sptr = src[k] + i;
    213                 x0 = _mm_load_si128((const __m128i*)sptr);
    214                 x1 = _mm_load_si128((const __m128i*)(sptr + 16));
    215                 _mm_storeu_si128((__m128i*)(dst + dststep + i), updateOp(s0, x0));
    216                 _mm_storeu_si128((__m128i*)(dst + dststep + i + 16), updateOp(s1, x1));
    217             }
    218 
    219             for( ; i <= width - 8; i += 8 )
    220             {
    221                 __m128i s0 = _mm_loadl_epi64((const __m128i*)(src[1] + i)), x0;
    222 
    223                 for( k = 2; k < _ksize; k++ )
    224                 {
    225                     x0 = _mm_loadl_epi64((const __m128i*)(src[k] + i));
    226                     s0 = updateOp(s0, x0);
    227                 }
    228 
    229                 x0 = _mm_loadl_epi64((const __m128i*)(src[0] + i));
    230                 _mm_storel_epi64((__m128i*)(dst + i), updateOp(s0, x0));
    231                 x0 = _mm_loadl_epi64((const __m128i*)(src[k] + i));
    232                 _mm_storel_epi64((__m128i*)(dst + dststep + i), updateOp(s0, x0));
    233             }
    234         }
    235 
    236         for( ; count > 0; count--, dst += dststep, src++ )
    237         {
    238             for( i = 0; i <= width - 32; i += 32 )
    239             {
    240                 const uchar* sptr = src[0] + i;
    241                 __m128i s0 = _mm_load_si128((const __m128i*)sptr);
    242                 __m128i s1 = _mm_load_si128((const __m128i*)(sptr + 16));
    243                 __m128i x0, x1;
    244 
    245                 for( k = 1; k < _ksize; k++ )
    246                 {
    247                     sptr = src[k] + i;
    248                     x0 = _mm_load_si128((const __m128i*)sptr);
    249                     x1 = _mm_load_si128((const __m128i*)(sptr + 16));
    250                     s0 = updateOp(s0, x0);
    251                     s1 = updateOp(s1, x1);
    252                 }
    253                 _mm_storeu_si128((__m128i*)(dst + i), s0);
    254                 _mm_storeu_si128((__m128i*)(dst + i + 16), s1);
    255             }
    256 
    257             for( ; i <= width - 8; i += 8 )
    258             {
    259                 __m128i s0 = _mm_loadl_epi64((const __m128i*)(src[0] + i)), x0;
    260 
    261                 for( k = 1; k < _ksize; k++ )
    262                 {
    263                     x0 = _mm_loadl_epi64((const __m128i*)(src[k] + i));
    264                     s0 = updateOp(s0, x0);
    265                 }
    266                 _mm_storel_epi64((__m128i*)(dst + i), s0);
    267             }
    268         }
    269 
    270         return i/ESZ;
    271     }
    272 
    273     int ksize, anchor;
    274 };
    275 
    276 
    277 template<class VecUpdate> struct MorphColumnFVec
    278 {
    279     MorphColumnFVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {}
    280     int operator()(const uchar** _src, uchar* _dst, int dststep, int count, int width) const
    281     {
    282         if( !checkHardwareSupport(CV_CPU_SSE) )
    283             return 0;
    284 
    285         int i = 0, k, _ksize = ksize;
    286         VecUpdate updateOp;
    287 
    288         for( i = 0; i < count + ksize - 1; i++ )
    289             CV_Assert( ((size_t)_src[i] & 15) == 0 );
    290 
    291         const float** src = (const float**)_src;
    292         float* dst = (float*)_dst;
    293         dststep /= sizeof(dst[0]);
    294 
    295         for( ; _ksize > 1 && count > 1; count -= 2, dst += dststep*2, src += 2 )
    296         {
    297             for( i = 0; i <= width - 16; i += 16 )
    298             {
    299                 const float* sptr = src[1] + i;
    300                 __m128 s0 = _mm_load_ps(sptr);
    301                 __m128 s1 = _mm_load_ps(sptr + 4);
    302                 __m128 s2 = _mm_load_ps(sptr + 8);
    303                 __m128 s3 = _mm_load_ps(sptr + 12);
    304                 __m128 x0, x1, x2, x3;
    305 
    306                 for( k = 2; k < _ksize; k++ )
    307                 {
    308                     sptr = src[k] + i;
    309                     x0 = _mm_load_ps(sptr);
    310                     x1 = _mm_load_ps(sptr + 4);
    311                     s0 = updateOp(s0, x0);
    312                     s1 = updateOp(s1, x1);
    313                     x2 = _mm_load_ps(sptr + 8);
    314                     x3 = _mm_load_ps(sptr + 12);
    315                     s2 = updateOp(s2, x2);
    316                     s3 = updateOp(s3, x3);
    317                 }
    318 
    319                 sptr = src[0] + i;
    320                 x0 = _mm_load_ps(sptr);
    321                 x1 = _mm_load_ps(sptr + 4);
    322                 x2 = _mm_load_ps(sptr + 8);
    323                 x3 = _mm_load_ps(sptr + 12);
    324                 _mm_storeu_ps(dst + i, updateOp(s0, x0));
    325                 _mm_storeu_ps(dst + i + 4, updateOp(s1, x1));
    326                 _mm_storeu_ps(dst + i + 8, updateOp(s2, x2));
    327                 _mm_storeu_ps(dst + i + 12, updateOp(s3, x3));
    328 
    329                 sptr = src[k] + i;
    330                 x0 = _mm_load_ps(sptr);
    331                 x1 = _mm_load_ps(sptr + 4);
    332                 x2 = _mm_load_ps(sptr + 8);
    333                 x3 = _mm_load_ps(sptr + 12);
    334                 _mm_storeu_ps(dst + dststep + i, updateOp(s0, x0));
    335                 _mm_storeu_ps(dst + dststep + i + 4, updateOp(s1, x1));
    336                 _mm_storeu_ps(dst + dststep + i + 8, updateOp(s2, x2));
    337                 _mm_storeu_ps(dst + dststep + i + 12, updateOp(s3, x3));
    338             }
    339 
    340             for( ; i <= width - 4; i += 4 )
    341             {
    342                 __m128 s0 = _mm_load_ps(src[1] + i), x0;
    343 
    344                 for( k = 2; k < _ksize; k++ )
    345                 {
    346                     x0 = _mm_load_ps(src[k] + i);
    347                     s0 = updateOp(s0, x0);
    348                 }
    349 
    350                 x0 = _mm_load_ps(src[0] + i);
    351                 _mm_storeu_ps(dst + i, updateOp(s0, x0));
    352                 x0 = _mm_load_ps(src[k] + i);
    353                 _mm_storeu_ps(dst + dststep + i, updateOp(s0, x0));
    354             }
    355         }
    356 
    357         for( ; count > 0; count--, dst += dststep, src++ )
    358         {
    359             for( i = 0; i <= width - 16; i += 16 )
    360             {
    361                 const float* sptr = src[0] + i;
    362                 __m128 s0 = _mm_load_ps(sptr);
    363                 __m128 s1 = _mm_load_ps(sptr + 4);
    364                 __m128 s2 = _mm_load_ps(sptr + 8);
    365                 __m128 s3 = _mm_load_ps(sptr + 12);
    366                 __m128 x0, x1, x2, x3;
    367 
    368                 for( k = 1; k < _ksize; k++ )
    369                 {
    370                     sptr = src[k] + i;
    371                     x0 = _mm_load_ps(sptr);
    372                     x1 = _mm_load_ps(sptr + 4);
    373                     s0 = updateOp(s0, x0);
    374                     s1 = updateOp(s1, x1);
    375                     x2 = _mm_load_ps(sptr + 8);
    376                     x3 = _mm_load_ps(sptr + 12);
    377                     s2 = updateOp(s2, x2);
    378                     s3 = updateOp(s3, x3);
    379                 }
    380                 _mm_storeu_ps(dst + i, s0);
    381                 _mm_storeu_ps(dst + i + 4, s1);
    382                 _mm_storeu_ps(dst + i + 8, s2);
    383                 _mm_storeu_ps(dst + i + 12, s3);
    384             }
    385 
    386             for( i = 0; i <= width - 4; i += 4 )
    387             {
    388                 __m128 s0 = _mm_load_ps(src[0] + i), x0;
    389                 for( k = 1; k < _ksize; k++ )
    390                 {
    391                     x0 = _mm_load_ps(src[k] + i);
    392                     s0 = updateOp(s0, x0);
    393                 }
    394                 _mm_storeu_ps(dst + i, s0);
    395             }
    396         }
    397 
    398         return i;
    399     }
    400 
    401     int ksize, anchor;
    402 };
    403 
    404 
    405 template<class VecUpdate> struct MorphIVec
    406 {
    407     enum { ESZ = VecUpdate::ESZ };
    408 
    409     int operator()(uchar** src, int nz, uchar* dst, int width) const
    410     {
    411         if( !checkHardwareSupport(CV_CPU_SSE2) )
    412             return 0;
    413 
    414         int i, k;
    415         width *= ESZ;
    416         VecUpdate updateOp;
    417 
    418         for( i = 0; i <= width - 32; i += 32 )
    419         {
    420             const uchar* sptr = src[0] + i;
    421             __m128i s0 = _mm_loadu_si128((const __m128i*)sptr);
    422             __m128i s1 = _mm_loadu_si128((const __m128i*)(sptr + 16));
    423             __m128i x0, x1;
    424 
    425             for( k = 1; k < nz; k++ )
    426             {
    427                 sptr = src[k] + i;
    428                 x0 = _mm_loadu_si128((const __m128i*)sptr);
    429                 x1 = _mm_loadu_si128((const __m128i*)(sptr + 16));
    430                 s0 = updateOp(s0, x0);
    431                 s1 = updateOp(s1, x1);
    432             }
    433             _mm_storeu_si128((__m128i*)(dst + i), s0);
    434             _mm_storeu_si128((__m128i*)(dst + i + 16), s1);
    435         }
    436 
    437         for( ; i <= width - 8; i += 8 )
    438         {
    439             __m128i s0 = _mm_loadl_epi64((const __m128i*)(src[0] + i)), x0;
    440 
    441             for( k = 1; k < nz; k++ )
    442             {
    443                 x0 = _mm_loadl_epi64((const __m128i*)(src[k] + i));
    444                 s0 = updateOp(s0, x0);
    445             }
    446             _mm_storel_epi64((__m128i*)(dst + i), s0);
    447         }
    448 
    449         return i/ESZ;
    450     }
    451 };
    452 
    453 
    454 template<class VecUpdate> struct MorphFVec
    455 {
    456     int operator()(uchar** _src, int nz, uchar* _dst, int width) const
    457     {
    458         if( !checkHardwareSupport(CV_CPU_SSE) )
    459             return 0;
    460 
    461         const float** src = (const float**)_src;
    462         float* dst = (float*)_dst;
    463         int i, k;
    464         VecUpdate updateOp;
    465 
    466         for( i = 0; i <= width - 16; i += 16 )
    467         {
    468             const float* sptr = src[0] + i;
    469             __m128 s0 = _mm_loadu_ps(sptr);
    470             __m128 s1 = _mm_loadu_ps(sptr + 4);
    471             __m128 s2 = _mm_loadu_ps(sptr + 8);
    472             __m128 s3 = _mm_loadu_ps(sptr + 12);
    473             __m128 x0, x1, x2, x3;
    474 
    475             for( k = 1; k < nz; k++ )
    476             {
    477                 sptr = src[k] + i;
    478                 x0 = _mm_loadu_ps(sptr);
    479                 x1 = _mm_loadu_ps(sptr + 4);
    480                 x2 = _mm_loadu_ps(sptr + 8);
    481                 x3 = _mm_loadu_ps(sptr + 12);
    482                 s0 = updateOp(s0, x0);
    483                 s1 = updateOp(s1, x1);
    484                 s2 = updateOp(s2, x2);
    485                 s3 = updateOp(s3, x3);
    486             }
    487             _mm_storeu_ps(dst + i, s0);
    488             _mm_storeu_ps(dst + i + 4, s1);
    489             _mm_storeu_ps(dst + i + 8, s2);
    490             _mm_storeu_ps(dst + i + 12, s3);
    491         }
    492 
    493         for( ; i <= width - 4; i += 4 )
    494         {
    495             __m128 s0 = _mm_loadu_ps(src[0] + i), x0;
    496 
    497             for( k = 1; k < nz; k++ )
    498             {
    499                 x0 = _mm_loadu_ps(src[k] + i);
    500                 s0 = updateOp(s0, x0);
    501             }
    502             _mm_storeu_ps(dst + i, s0);
    503         }
    504 
    505         for( ; i < width; i++ )
    506         {
    507             __m128 s0 = _mm_load_ss(src[0] + i), x0;
    508 
    509             for( k = 1; k < nz; k++ )
    510             {
    511                 x0 = _mm_load_ss(src[k] + i);
    512                 s0 = updateOp(s0, x0);
    513             }
    514             _mm_store_ss(dst + i, s0);
    515         }
    516 
    517         return i;
    518     }
    519 };
    520 
    521 struct VMin8u
    522 {
    523     enum { ESZ = 1 };
    524     __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_min_epu8(a,b); }
    525 };
    526 struct VMax8u
    527 {
    528     enum { ESZ = 1 };
    529     __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_max_epu8(a,b); }
    530 };
    531 struct VMin16u
    532 {
    533     enum { ESZ = 2 };
    534     __m128i operator()(const __m128i& a, const __m128i& b) const
    535     { return _mm_subs_epu16(a,_mm_subs_epu16(a,b)); }
    536 };
    537 struct VMax16u
    538 {
    539     enum { ESZ = 2 };
    540     __m128i operator()(const __m128i& a, const __m128i& b) const
    541     { return _mm_adds_epu16(_mm_subs_epu16(a,b), b); }
    542 };
    543 struct VMin16s
    544 {
    545     enum { ESZ = 2 };
    546     __m128i operator()(const __m128i& a, const __m128i& b) const
    547     { return _mm_min_epi16(a, b); }
    548 };
    549 struct VMax16s
    550 {
    551     enum { ESZ = 2 };
    552     __m128i operator()(const __m128i& a, const __m128i& b) const
    553     { return _mm_max_epi16(a, b); }
    554 };
    555 struct VMin32f { __m128 operator()(const __m128& a, const __m128& b) const { return _mm_min_ps(a,b); }};
    556 struct VMax32f { __m128 operator()(const __m128& a, const __m128& b) const { return _mm_max_ps(a,b); }};
    557 
    558 typedef MorphRowIVec<VMin8u> ErodeRowVec8u;
    559 typedef MorphRowIVec<VMax8u> DilateRowVec8u;
    560 typedef MorphRowIVec<VMin16u> ErodeRowVec16u;
    561 typedef MorphRowIVec<VMax16u> DilateRowVec16u;
    562 typedef MorphRowIVec<VMin16s> ErodeRowVec16s;
    563 typedef MorphRowIVec<VMax16s> DilateRowVec16s;
    564 typedef MorphRowFVec<VMin32f> ErodeRowVec32f;
    565 typedef MorphRowFVec<VMax32f> DilateRowVec32f;
    566 
    567 typedef MorphColumnIVec<VMin8u> ErodeColumnVec8u;
    568 typedef MorphColumnIVec<VMax8u> DilateColumnVec8u;
    569 typedef MorphColumnIVec<VMin16u> ErodeColumnVec16u;
    570 typedef MorphColumnIVec<VMax16u> DilateColumnVec16u;
    571 typedef MorphColumnIVec<VMin16s> ErodeColumnVec16s;
    572 typedef MorphColumnIVec<VMax16s> DilateColumnVec16s;
    573 typedef MorphColumnFVec<VMin32f> ErodeColumnVec32f;
    574 typedef MorphColumnFVec<VMax32f> DilateColumnVec32f;
    575 
    576 typedef MorphIVec<VMin8u> ErodeVec8u;
    577 typedef MorphIVec<VMax8u> DilateVec8u;
    578 typedef MorphIVec<VMin16u> ErodeVec16u;
    579 typedef MorphIVec<VMax16u> DilateVec16u;
    580 typedef MorphIVec<VMin16s> ErodeVec16s;
    581 typedef MorphIVec<VMax16s> DilateVec16s;
    582 typedef MorphFVec<VMin32f> ErodeVec32f;
    583 typedef MorphFVec<VMax32f> DilateVec32f;
    584 
    585 #else
    586 
    587 #ifdef HAVE_TEGRA_OPTIMIZATION
    588 using tegra::ErodeRowVec8u;
    589 using tegra::DilateRowVec8u;
    590 
    591 using tegra::ErodeColumnVec8u;
    592 using tegra::DilateColumnVec8u;
    593 #else
    594 typedef MorphRowNoVec ErodeRowVec8u;
    595 typedef MorphRowNoVec DilateRowVec8u;
    596 
    597 typedef MorphColumnNoVec ErodeColumnVec8u;
    598 typedef MorphColumnNoVec DilateColumnVec8u;
    599 #endif
    600 
    601 typedef MorphRowNoVec ErodeRowVec16u;
    602 typedef MorphRowNoVec DilateRowVec16u;
    603 typedef MorphRowNoVec ErodeRowVec16s;
    604 typedef MorphRowNoVec DilateRowVec16s;
    605 typedef MorphRowNoVec ErodeRowVec32f;
    606 typedef MorphRowNoVec DilateRowVec32f;
    607 
    608 typedef MorphColumnNoVec ErodeColumnVec16u;
    609 typedef MorphColumnNoVec DilateColumnVec16u;
    610 typedef MorphColumnNoVec ErodeColumnVec16s;
    611 typedef MorphColumnNoVec DilateColumnVec16s;
    612 typedef MorphColumnNoVec ErodeColumnVec32f;
    613 typedef MorphColumnNoVec DilateColumnVec32f;
    614 
    615 typedef MorphNoVec ErodeVec8u;
    616 typedef MorphNoVec DilateVec8u;
    617 typedef MorphNoVec ErodeVec16u;
    618 typedef MorphNoVec DilateVec16u;
    619 typedef MorphNoVec ErodeVec16s;
    620 typedef MorphNoVec DilateVec16s;
    621 typedef MorphNoVec ErodeVec32f;
    622 typedef MorphNoVec DilateVec32f;
    623 
    624 #endif
    625 
    626 typedef MorphRowNoVec ErodeRowVec64f;
    627 typedef MorphRowNoVec DilateRowVec64f;
    628 typedef MorphColumnNoVec ErodeColumnVec64f;
    629 typedef MorphColumnNoVec DilateColumnVec64f;
    630 typedef MorphNoVec ErodeVec64f;
    631 typedef MorphNoVec DilateVec64f;
    632 
    633 
    634 template<class Op, class VecOp> struct MorphRowFilter : public BaseRowFilter
    635 {
    636     typedef typename Op::rtype T;
    637 
    638     MorphRowFilter( int _ksize, int _anchor ) : vecOp(_ksize, _anchor)
    639     {
    640         ksize = _ksize;
    641         anchor = _anchor;
    642     }
    643 
    644     void operator()(const uchar* src, uchar* dst, int width, int cn)
    645     {
    646         int i, j, k, _ksize = ksize*cn;
    647         const T* S = (const T*)src;
    648         Op op;
    649         T* D = (T*)dst;
    650 
    651         if( _ksize == cn )
    652         {
    653             for( i = 0; i < width*cn; i++ )
    654                 D[i] = S[i];
    655             return;
    656         }
    657 
    658         int i0 = vecOp(src, dst, width, cn);
    659         width *= cn;
    660 
    661         for( k = 0; k < cn; k++, S++, D++ )
    662         {
    663             for( i = i0; i <= width - cn*2; i += cn*2 )
    664             {
    665                 const T* s = S + i;
    666                 T m = s[cn];
    667                 for( j = cn*2; j < _ksize; j += cn )
    668                     m = op(m, s[j]);
    669                 D[i] = op(m, s[0]);
    670                 D[i+cn] = op(m, s[j]);
    671             }
    672 
    673             for( ; i < width; i += cn )
    674             {
    675                 const T* s = S + i;
    676                 T m = s[0];
    677                 for( j = cn; j < _ksize; j += cn )
    678                     m = op(m, s[j]);
    679                 D[i] = m;
    680             }
    681         }
    682     }
    683 
    684     VecOp vecOp;
    685 };
    686 
    687 
    688 template<class Op, class VecOp> struct MorphColumnFilter : public BaseColumnFilter
    689 {
    690     typedef typename Op::rtype T;
    691 
    692     MorphColumnFilter( int _ksize, int _anchor ) : vecOp(_ksize, _anchor)
    693     {
    694         ksize = _ksize;
    695         anchor = _anchor;
    696     }
    697 
    698     void operator()(const uchar** _src, uchar* dst, int dststep, int count, int width)
    699     {
    700         int i, k, _ksize = ksize;
    701         const T** src = (const T**)_src;
    702         T* D = (T*)dst;
    703         Op op;
    704 
    705         int i0 = vecOp(_src, dst, dststep, count, width);
    706         dststep /= sizeof(D[0]);
    707 
    708         for( ; _ksize > 1 && count > 1; count -= 2, D += dststep*2, src += 2 )
    709         {
    710             i = i0;
    711             #if CV_ENABLE_UNROLLED
    712             for( ; i <= width - 4; i += 4 )
    713             {
    714                 const T* sptr = src[1] + i;
    715                 T s0 = sptr[0], s1 = sptr[1], s2 = sptr[2], s3 = sptr[3];
    716 
    717                 for( k = 2; k < _ksize; k++ )
    718                 {
    719                     sptr = src[k] + i;
    720                     s0 = op(s0, sptr[0]); s1 = op(s1, sptr[1]);
    721                     s2 = op(s2, sptr[2]); s3 = op(s3, sptr[3]);
    722                 }
    723 
    724                 sptr = src[0] + i;
    725                 D[i] = op(s0, sptr[0]);
    726                 D[i+1] = op(s1, sptr[1]);
    727                 D[i+2] = op(s2, sptr[2]);
    728                 D[i+3] = op(s3, sptr[3]);
    729 
    730                 sptr = src[k] + i;
    731                 D[i+dststep] = op(s0, sptr[0]);
    732                 D[i+dststep+1] = op(s1, sptr[1]);
    733                 D[i+dststep+2] = op(s2, sptr[2]);
    734                 D[i+dststep+3] = op(s3, sptr[3]);
    735             }
    736             #endif
    737             for( ; i < width; i++ )
    738             {
    739                 T s0 = src[1][i];
    740 
    741                 for( k = 2; k < _ksize; k++ )
    742                     s0 = op(s0, src[k][i]);
    743 
    744                 D[i] = op(s0, src[0][i]);
    745                 D[i+dststep] = op(s0, src[k][i]);
    746             }
    747         }
    748 
    749         for( ; count > 0; count--, D += dststep, src++ )
    750         {
    751             i = i0;
    752             #if CV_ENABLE_UNROLLED
    753             for( ; i <= width - 4; i += 4 )
    754             {
    755                 const T* sptr = src[0] + i;
    756                 T s0 = sptr[0], s1 = sptr[1], s2 = sptr[2], s3 = sptr[3];
    757 
    758                 for( k = 1; k < _ksize; k++ )
    759                 {
    760                     sptr = src[k] + i;
    761                     s0 = op(s0, sptr[0]); s1 = op(s1, sptr[1]);
    762                     s2 = op(s2, sptr[2]); s3 = op(s3, sptr[3]);
    763                 }
    764 
    765                 D[i] = s0; D[i+1] = s1;
    766                 D[i+2] = s2; D[i+3] = s3;
    767             }
    768             #endif
    769             for( ; i < width; i++ )
    770             {
    771                 T s0 = src[0][i];
    772                 for( k = 1; k < _ksize; k++ )
    773                     s0 = op(s0, src[k][i]);
    774                 D[i] = s0;
    775             }
    776         }
    777     }
    778 
    779     VecOp vecOp;
    780 };
    781 
    782 
    783 template<class Op, class VecOp> struct MorphFilter : BaseFilter
    784 {
    785     typedef typename Op::rtype T;
    786 
    787     MorphFilter( const Mat& _kernel, Point _anchor )
    788     {
    789         anchor = _anchor;
    790         ksize = _kernel.size();
    791         CV_Assert( _kernel.type() == CV_8U );
    792 
    793         std::vector<uchar> coeffs; // we do not really the values of non-zero
    794         // kernel elements, just their locations
    795         preprocess2DKernel( _kernel, coords, coeffs );
    796         ptrs.resize( coords.size() );
    797     }
    798 
    799     void operator()(const uchar** src, uchar* dst, int dststep, int count, int width, int cn)
    800     {
    801         const Point* pt = &coords[0];
    802         const T** kp = (const T**)&ptrs[0];
    803         int i, k, nz = (int)coords.size();
    804         Op op;
    805 
    806         width *= cn;
    807         for( ; count > 0; count--, dst += dststep, src++ )
    808         {
    809             T* D = (T*)dst;
    810 
    811             for( k = 0; k < nz; k++ )
    812                 kp[k] = (const T*)src[pt[k].y] + pt[k].x*cn;
    813 
    814             i = vecOp(&ptrs[0], nz, dst, width);
    815             #if CV_ENABLE_UNROLLED
    816             for( ; i <= width - 4; i += 4 )
    817             {
    818                 const T* sptr = kp[0] + i;
    819                 T s0 = sptr[0], s1 = sptr[1], s2 = sptr[2], s3 = sptr[3];
    820 
    821                 for( k = 1; k < nz; k++ )
    822                 {
    823                     sptr = kp[k] + i;
    824                     s0 = op(s0, sptr[0]); s1 = op(s1, sptr[1]);
    825                     s2 = op(s2, sptr[2]); s3 = op(s3, sptr[3]);
    826                 }
    827 
    828                 D[i] = s0; D[i+1] = s1;
    829                 D[i+2] = s2; D[i+3] = s3;
    830             }
    831             #endif
    832             for( ; i < width; i++ )
    833             {
    834                 T s0 = kp[0][i];
    835                 for( k = 1; k < nz; k++ )
    836                     s0 = op(s0, kp[k][i]);
    837                 D[i] = s0;
    838             }
    839         }
    840     }
    841 
    842     std::vector<Point> coords;
    843     std::vector<uchar*> ptrs;
    844     VecOp vecOp;
    845 };
    846 
    847 }
    848 
    849 /////////////////////////////////// External Interface /////////////////////////////////////
    850 
    851 cv::Ptr<cv::BaseRowFilter> cv::getMorphologyRowFilter(int op, int type, int ksize, int anchor)
    852 {
    853     int depth = CV_MAT_DEPTH(type);
    854     if( anchor < 0 )
    855         anchor = ksize/2;
    856     CV_Assert( op == MORPH_ERODE || op == MORPH_DILATE );
    857     if( op == MORPH_ERODE )
    858     {
    859         if( depth == CV_8U )
    860             return makePtr<MorphRowFilter<MinOp<uchar>,
    861                                       ErodeRowVec8u> >(ksize, anchor);
    862         if( depth == CV_16U )
    863             return makePtr<MorphRowFilter<MinOp<ushort>,
    864                                       ErodeRowVec16u> >(ksize, anchor);
    865         if( depth == CV_16S )
    866             return makePtr<MorphRowFilter<MinOp<short>,
    867                                       ErodeRowVec16s> >(ksize, anchor);
    868         if( depth == CV_32F )
    869             return makePtr<MorphRowFilter<MinOp<float>,
    870                                       ErodeRowVec32f> >(ksize, anchor);
    871         if( depth == CV_64F )
    872             return makePtr<MorphRowFilter<MinOp<double>,
    873                                       ErodeRowVec64f> >(ksize, anchor);
    874     }
    875     else
    876     {
    877         if( depth == CV_8U )
    878             return makePtr<MorphRowFilter<MaxOp<uchar>,
    879                                       DilateRowVec8u> >(ksize, anchor);
    880         if( depth == CV_16U )
    881             return makePtr<MorphRowFilter<MaxOp<ushort>,
    882                                       DilateRowVec16u> >(ksize, anchor);
    883         if( depth == CV_16S )
    884             return makePtr<MorphRowFilter<MaxOp<short>,
    885                                       DilateRowVec16s> >(ksize, anchor);
    886         if( depth == CV_32F )
    887             return makePtr<MorphRowFilter<MaxOp<float>,
    888                                       DilateRowVec32f> >(ksize, anchor);
    889         if( depth == CV_64F )
    890             return makePtr<MorphRowFilter<MaxOp<double>,
    891                                       DilateRowVec64f> >(ksize, anchor);
    892     }
    893 
    894     CV_Error_( CV_StsNotImplemented, ("Unsupported data type (=%d)", type));
    895     return Ptr<BaseRowFilter>();
    896 }
    897 
    898 cv::Ptr<cv::BaseColumnFilter> cv::getMorphologyColumnFilter(int op, int type, int ksize, int anchor)
    899 {
    900     int depth = CV_MAT_DEPTH(type);
    901     if( anchor < 0 )
    902         anchor = ksize/2;
    903     CV_Assert( op == MORPH_ERODE || op == MORPH_DILATE );
    904     if( op == MORPH_ERODE )
    905     {
    906         if( depth == CV_8U )
    907             return makePtr<MorphColumnFilter<MinOp<uchar>,
    908                                          ErodeColumnVec8u> >(ksize, anchor);
    909         if( depth == CV_16U )
    910             return makePtr<MorphColumnFilter<MinOp<ushort>,
    911                                          ErodeColumnVec16u> >(ksize, anchor);
    912         if( depth == CV_16S )
    913             return makePtr<MorphColumnFilter<MinOp<short>,
    914                                          ErodeColumnVec16s> >(ksize, anchor);
    915         if( depth == CV_32F )
    916             return makePtr<MorphColumnFilter<MinOp<float>,
    917                                          ErodeColumnVec32f> >(ksize, anchor);
    918         if( depth == CV_64F )
    919             return makePtr<MorphColumnFilter<MinOp<double>,
    920                                          ErodeColumnVec64f> >(ksize, anchor);
    921     }
    922     else
    923     {
    924         if( depth == CV_8U )
    925             return makePtr<MorphColumnFilter<MaxOp<uchar>,
    926                                          DilateColumnVec8u> >(ksize, anchor);
    927         if( depth == CV_16U )
    928             return makePtr<MorphColumnFilter<MaxOp<ushort>,
    929                                          DilateColumnVec16u> >(ksize, anchor);
    930         if( depth == CV_16S )
    931             return makePtr<MorphColumnFilter<MaxOp<short>,
    932                                          DilateColumnVec16s> >(ksize, anchor);
    933         if( depth == CV_32F )
    934             return makePtr<MorphColumnFilter<MaxOp<float>,
    935                                          DilateColumnVec32f> >(ksize, anchor);
    936         if( depth == CV_64F )
    937             return makePtr<MorphColumnFilter<MaxOp<double>,
    938                                          DilateColumnVec64f> >(ksize, anchor);
    939     }
    940 
    941     CV_Error_( CV_StsNotImplemented, ("Unsupported data type (=%d)", type));
    942     return Ptr<BaseColumnFilter>();
    943 }
    944 
    945 
    946 cv::Ptr<cv::BaseFilter> cv::getMorphologyFilter(int op, int type, InputArray _kernel, Point anchor)
    947 {
    948     Mat kernel = _kernel.getMat();
    949     int depth = CV_MAT_DEPTH(type);
    950     anchor = normalizeAnchor(anchor, kernel.size());
    951     CV_Assert( op == MORPH_ERODE || op == MORPH_DILATE );
    952     if( op == MORPH_ERODE )
    953     {
    954         if( depth == CV_8U )
    955             return makePtr<MorphFilter<MinOp<uchar>, ErodeVec8u> >(kernel, anchor);
    956         if( depth == CV_16U )
    957             return makePtr<MorphFilter<MinOp<ushort>, ErodeVec16u> >(kernel, anchor);
    958         if( depth == CV_16S )
    959             return makePtr<MorphFilter<MinOp<short>, ErodeVec16s> >(kernel, anchor);
    960         if( depth == CV_32F )
    961             return makePtr<MorphFilter<MinOp<float>, ErodeVec32f> >(kernel, anchor);
    962         if( depth == CV_64F )
    963             return makePtr<MorphFilter<MinOp<double>, ErodeVec64f> >(kernel, anchor);
    964     }
    965     else
    966     {
    967         if( depth == CV_8U )
    968             return makePtr<MorphFilter<MaxOp<uchar>, DilateVec8u> >(kernel, anchor);
    969         if( depth == CV_16U )
    970             return makePtr<MorphFilter<MaxOp<ushort>, DilateVec16u> >(kernel, anchor);
    971         if( depth == CV_16S )
    972             return makePtr<MorphFilter<MaxOp<short>, DilateVec16s> >(kernel, anchor);
    973         if( depth == CV_32F )
    974             return makePtr<MorphFilter<MaxOp<float>, DilateVec32f> >(kernel, anchor);
    975         if( depth == CV_64F )
    976             return makePtr<MorphFilter<MaxOp<double>, DilateVec64f> >(kernel, anchor);
    977     }
    978 
    979     CV_Error_( CV_StsNotImplemented, ("Unsupported data type (=%d)", type));
    980     return Ptr<BaseFilter>();
    981 }
    982 
    983 
    984 cv::Ptr<cv::FilterEngine> cv::createMorphologyFilter( int op, int type, InputArray _kernel,
    985                                                       Point anchor, int _rowBorderType, int _columnBorderType,
    986                                                       const Scalar& _borderValue )
    987 {
    988     Mat kernel = _kernel.getMat();
    989     anchor = normalizeAnchor(anchor, kernel.size());
    990 
    991     Ptr<BaseRowFilter> rowFilter;
    992     Ptr<BaseColumnFilter> columnFilter;
    993     Ptr<BaseFilter> filter2D;
    994 
    995     if( countNonZero(kernel) == kernel.rows*kernel.cols )
    996     {
    997         // rectangular structuring element
    998         rowFilter = getMorphologyRowFilter(op, type, kernel.cols, anchor.x);
    999         columnFilter = getMorphologyColumnFilter(op, type, kernel.rows, anchor.y);
   1000     }
   1001     else
   1002         filter2D = getMorphologyFilter(op, type, kernel, anchor);
   1003 
   1004     Scalar borderValue = _borderValue;
   1005     if( (_rowBorderType == BORDER_CONSTANT || _columnBorderType == BORDER_CONSTANT) &&
   1006             borderValue == morphologyDefaultBorderValue() )
   1007     {
   1008         int depth = CV_MAT_DEPTH(type);
   1009         CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_16S ||
   1010                    depth == CV_32F || depth == CV_64F );
   1011         if( op == MORPH_ERODE )
   1012             borderValue = Scalar::all( depth == CV_8U ? (double)UCHAR_MAX :
   1013                                        depth == CV_16U ? (double)USHRT_MAX :
   1014                                        depth == CV_16S ? (double)SHRT_MAX :
   1015                                        depth == CV_32F ? (double)FLT_MAX : DBL_MAX);
   1016         else
   1017             borderValue = Scalar::all( depth == CV_8U || depth == CV_16U ?
   1018                                            0. :
   1019                                        depth == CV_16S ? (double)SHRT_MIN :
   1020                                        depth == CV_32F ? (double)-FLT_MAX : -DBL_MAX);
   1021     }
   1022 
   1023     return makePtr<FilterEngine>(filter2D, rowFilter, columnFilter,
   1024                                  type, type, type, _rowBorderType, _columnBorderType, borderValue );
   1025 }
   1026 
   1027 
   1028 cv::Mat cv::getStructuringElement(int shape, Size ksize, Point anchor)
   1029 {
   1030     int i, j;
   1031     int r = 0, c = 0;
   1032     double inv_r2 = 0;
   1033 
   1034     CV_Assert( shape == MORPH_RECT || shape == MORPH_CROSS || shape == MORPH_ELLIPSE );
   1035 
   1036     anchor = normalizeAnchor(anchor, ksize);
   1037 
   1038     if( ksize == Size(1,1) )
   1039         shape = MORPH_RECT;
   1040 
   1041     if( shape == MORPH_ELLIPSE )
   1042     {
   1043         r = ksize.height/2;
   1044         c = ksize.width/2;
   1045         inv_r2 = r ? 1./((double)r*r) : 0;
   1046     }
   1047 
   1048     Mat elem(ksize, CV_8U);
   1049 
   1050     for( i = 0; i < ksize.height; i++ )
   1051     {
   1052         uchar* ptr = elem.ptr(i);
   1053         int j1 = 0, j2 = 0;
   1054 
   1055         if( shape == MORPH_RECT || (shape == MORPH_CROSS && i == anchor.y) )
   1056             j2 = ksize.width;
   1057         else if( shape == MORPH_CROSS )
   1058             j1 = anchor.x, j2 = j1 + 1;
   1059         else
   1060         {
   1061             int dy = i - r;
   1062             if( std::abs(dy) <= r )
   1063             {
   1064                 int dx = saturate_cast<int>(c*std::sqrt((r*r - dy*dy)*inv_r2));
   1065                 j1 = std::max( c - dx, 0 );
   1066                 j2 = std::min( c + dx + 1, ksize.width );
   1067             }
   1068         }
   1069 
   1070         for( j = 0; j < j1; j++ )
   1071             ptr[j] = 0;
   1072         for( ; j < j2; j++ )
   1073             ptr[j] = 1;
   1074         for( ; j < ksize.width; j++ )
   1075             ptr[j] = 0;
   1076     }
   1077 
   1078     return elem;
   1079 }
   1080 
   1081 namespace cv
   1082 {
   1083 
   1084 class MorphologyRunner : public ParallelLoopBody
   1085 {
   1086 public:
   1087     MorphologyRunner(Mat _src, Mat _dst, int _nStripes, int _iterations,
   1088                      int _op, Mat _kernel, Point _anchor,
   1089                      int _rowBorderType, int _columnBorderType, const Scalar& _borderValue) :
   1090         borderValue(_borderValue)
   1091     {
   1092         src = _src;
   1093         dst = _dst;
   1094 
   1095         nStripes = _nStripes;
   1096         iterations = _iterations;
   1097 
   1098         op = _op;
   1099         kernel = _kernel;
   1100         anchor = _anchor;
   1101         rowBorderType = _rowBorderType;
   1102         columnBorderType = _columnBorderType;
   1103     }
   1104 
   1105     void operator () ( const Range& range ) const
   1106     {
   1107         int row0 = std::min(cvRound(range.start * src.rows / nStripes), src.rows);
   1108         int row1 = std::min(cvRound(range.end * src.rows / nStripes), src.rows);
   1109 
   1110         /*if(0)
   1111             printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n",
   1112                    src.rows, src.cols, range.start, range.end, row0, row1);*/
   1113 
   1114         Mat srcStripe = src.rowRange(row0, row1);
   1115         Mat dstStripe = dst.rowRange(row0, row1);
   1116 
   1117         Ptr<FilterEngine> f = createMorphologyFilter(op, src.type(), kernel, anchor,
   1118                                                      rowBorderType, columnBorderType, borderValue );
   1119 
   1120         f->apply( srcStripe, dstStripe );
   1121         for( int i = 1; i < iterations; i++ )
   1122             f->apply( dstStripe, dstStripe );
   1123     }
   1124 
   1125 private:
   1126     Mat src;
   1127     Mat dst;
   1128     int nStripes;
   1129     int iterations;
   1130 
   1131     int op;
   1132     Mat kernel;
   1133     Point anchor;
   1134     int rowBorderType;
   1135     int columnBorderType;
   1136     Scalar borderValue;
   1137 };
   1138 
   1139 #if IPP_VERSION_X100 >= 801
   1140 static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kernel,
   1141                               const Size& ksize, const Point &anchor, bool rectKernel)
   1142 {
   1143     int type = src.type();
   1144     const Mat* _src = &src;
   1145     Mat temp;
   1146     if (src.data == dst.data)
   1147     {
   1148         src.copyTo(temp);
   1149         _src = &temp;
   1150     }
   1151 
   1152     IppiSize roiSize = {src.cols, src.rows};
   1153     IppiSize kernelSize = {ksize.width, ksize.height};
   1154 
   1155     if (!rectKernel)
   1156     {
   1157 #if 1
   1158         if (((kernel.cols - 1) / 2 != anchor.x) || ((kernel.rows - 1) / 2 != anchor.y))
   1159             return false;
   1160         #define IPP_MORPH_CASE(cvtype, flavor, data_type) \
   1161         case cvtype: \
   1162             {\
   1163                 int specSize = 0, bufferSize = 0;\
   1164                 if (0 > ippiMorphologyBorderGetSize_##flavor(roiSize.width, kernelSize, &specSize, &bufferSize))\
   1165                     return false;\
   1166                 IppiMorphState *pSpec = (IppiMorphState*)ippMalloc(specSize);\
   1167                 Ipp8u *pBuffer = (Ipp8u*)ippMalloc(bufferSize);\
   1168                 if (0 > ippiMorphologyBorderInit_##flavor(roiSize.width, kernel.ptr(), kernelSize, pSpec, pBuffer))\
   1169                 {\
   1170                     ippFree(pBuffer);\
   1171                     ippFree(pSpec);\
   1172                     return false;\
   1173                 }\
   1174                 bool ok = false;\
   1175                 if (op == MORPH_ERODE)\
   1176                     ok = (0 <= ippiErodeBorder_##flavor(_src->ptr<Ipp##data_type>(), (int)_src->step[0], dst.ptr<Ipp##data_type>(), (int)dst.step[0],\
   1177                                             roiSize, ippBorderRepl, 0, pSpec, pBuffer));\
   1178                 else\
   1179                     ok = (0 <= ippiDilateBorder_##flavor(_src->ptr<Ipp##data_type>(), (int)_src->step[0], dst.ptr<Ipp##data_type>(), (int)dst.step[0],\
   1180                                             roiSize, ippBorderRepl, 0, pSpec, pBuffer));\
   1181                 ippFree(pBuffer);\
   1182                 ippFree(pSpec);\
   1183                 return ok;\
   1184             }\
   1185             break;
   1186 #else
   1187         IppiPoint point = {anchor.x, anchor.y};
   1188         // this is case, which can be used with the anchor not in center of the kernel, but
   1189         // ippiMorphologyBorderGetSize_, ippiErodeBorderReplicate_ and ippiDilateBorderReplicate_ are deprecated.
   1190         #define IPP_MORPH_CASE(cvtype, flavor, data_type) \
   1191         case cvtype: \
   1192             {\
   1193                 int specSize = 0;\
   1194                 int bufferSize = 0;\
   1195                 if (0 > ippiMorphologyGetSize_##flavor( roiSize.width, kernel.ptr() kernelSize, &specSize))\
   1196                     return false;\
   1197                 bool ok = false;\
   1198                 IppiMorphState* pState = (IppiMorphState*)ippMalloc(specSize);\
   1199                 if (ippiMorphologyInit_##flavor(roiSize.width, kernel.ptr(), kernelSize, point, pState) >= 0)\
   1200                 {\
   1201                     if (op == MORPH_ERODE)\
   1202                         ok = ippiErodeBorderReplicate_##flavor(_src->ptr<Ipp##data_type>(), (int)_src->step[0],\
   1203                             dst.ptr<Ipp##data_type>(), (int)dst.step[0],\
   1204                             roiSize, ippBorderRepl, pState ) >= 0;\
   1205                     else\
   1206                         ok = ippiDilateBorderReplicate_##flavor(_src->ptr<Ipp##data_type>(), (int)_src->step[0],\
   1207                             dst.ptr<Ipp##data_type>(), (int)dst.step[0],\
   1208                             roiSize, ippBorderRepl, pState ) >= 0;\
   1209                 }\
   1210                 ippFree(pState);\
   1211                 return ok;\
   1212             }\
   1213             break;
   1214 #endif
   1215         switch (type)
   1216         {
   1217         IPP_MORPH_CASE(CV_8UC1, 8u_C1R, 8u);
   1218         IPP_MORPH_CASE(CV_8UC3, 8u_C3R, 8u);
   1219         IPP_MORPH_CASE(CV_8UC4, 8u_C4R, 8u);
   1220         IPP_MORPH_CASE(CV_32FC1, 32f_C1R, 32f);
   1221         IPP_MORPH_CASE(CV_32FC3, 32f_C3R, 32f);
   1222         IPP_MORPH_CASE(CV_32FC4, 32f_C4R, 32f);
   1223         default:
   1224             ;
   1225         }
   1226 
   1227         #undef IPP_MORPH_CASE
   1228     }
   1229     else
   1230     {
   1231         IppiPoint point = {anchor.x, anchor.y};
   1232 
   1233         #define IPP_MORPH_CASE(cvtype, flavor, data_type) \
   1234         case cvtype: \
   1235             {\
   1236                 int bufSize = 0;\
   1237                 if (0 > ippiFilterMinGetBufferSize_##flavor(src.cols, kernelSize, &bufSize))\
   1238                     return false;\
   1239                 AutoBuffer<uchar> buf(bufSize + 64);\
   1240                 uchar* buffer = alignPtr((uchar*)buf, 32);\
   1241                 if (op == MORPH_ERODE)\
   1242                     return (0 <= ippiFilterMinBorderReplicate_##flavor(_src->ptr<Ipp##data_type>(), (int)_src->step[0], dst.ptr<Ipp##data_type>(), (int)dst.step[0], roiSize, kernelSize, point, buffer));\
   1243                 return (0 <= ippiFilterMaxBorderReplicate_##flavor(_src->ptr<Ipp##data_type>(), (int)_src->step[0], dst.ptr<Ipp##data_type>(), (int)dst.step[0], roiSize, kernelSize, point, buffer));\
   1244             }\
   1245             break;
   1246 
   1247         switch (type)
   1248         {
   1249         IPP_MORPH_CASE(CV_8UC1, 8u_C1R, 8u);
   1250         IPP_MORPH_CASE(CV_8UC3, 8u_C3R, 8u);
   1251         IPP_MORPH_CASE(CV_8UC4, 8u_C4R, 8u);
   1252         IPP_MORPH_CASE(CV_32FC1, 32f_C1R, 32f);
   1253         IPP_MORPH_CASE(CV_32FC3, 32f_C3R, 32f);
   1254         IPP_MORPH_CASE(CV_32FC4, 32f_C4R, 32f);
   1255         default:
   1256             ;
   1257         }
   1258         #undef IPP_MORPH_CASE
   1259     }
   1260     return false;
   1261 }
   1262 
   1263 static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
   1264     const Mat& _kernel, Point anchor, int iterations,
   1265     int borderType, const Scalar &borderValue)
   1266 {
   1267     Mat src = _src.getMat(), kernel = _kernel;
   1268     int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
   1269 
   1270     if( !( depth == CV_8U || depth == CV_32F ) || !(cn == 1 || cn == 3 || cn == 4) ||
   1271         !( borderType == cv::BORDER_REPLICATE || (borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue() &&
   1272         kernel.size() == Size(3,3)) ) || !( op == MORPH_DILATE || op == MORPH_ERODE) || _src.isSubmatrix() )
   1273         return false;
   1274 
   1275     // In case BORDER_CONSTANT, IPPMorphReplicate works correct with kernels of size 3*3 only
   1276     if( borderType == cv::BORDER_CONSTANT && kernel.data )
   1277     {
   1278         int x, y;
   1279         for( y = 0; y < kernel.rows; y++ )
   1280         {
   1281             if( kernel.at<uchar>(y, anchor.x) != 0 )
   1282                 continue;
   1283             for( x = 0; x < kernel.cols; x++ )
   1284             {
   1285                 if( kernel.at<uchar>(y,x) != 0 )
   1286                     return false;
   1287             }
   1288         }
   1289         for( x = 0; x < kernel.cols; x++ )
   1290         {
   1291             if( kernel.at<uchar>(anchor.y, x) != 0 )
   1292                 continue;
   1293             for( y = 0; y < kernel.rows; y++ )
   1294             {
   1295                 if( kernel.at<uchar>(y,x) != 0 )
   1296                     return false;
   1297             }
   1298         }
   1299 
   1300     }
   1301     Size ksize = !kernel.empty() ? kernel.size() : Size(3,3);
   1302 
   1303     _dst.create( src.size(), src.type() );
   1304     Mat dst = _dst.getMat();
   1305 
   1306     if( iterations == 0 || kernel.rows*kernel.cols == 1 )
   1307     {
   1308         src.copyTo(dst);
   1309         return true;
   1310     }
   1311 
   1312     bool rectKernel = false;
   1313     if( kernel.empty() )
   1314     {
   1315         ksize = Size(1+iterations*2,1+iterations*2);
   1316         anchor = Point(iterations, iterations);
   1317         rectKernel = true;
   1318         iterations = 1;
   1319     }
   1320     else if( iterations >= 1 && countNonZero(kernel) == kernel.rows*kernel.cols )
   1321     {
   1322         ksize = Size(ksize.width + (iterations-1)*(ksize.width-1),
   1323              ksize.height + (iterations-1)*(ksize.height-1)),
   1324         anchor = Point(anchor.x*iterations, anchor.y*iterations);
   1325         kernel = Mat();
   1326         rectKernel = true;
   1327         iterations = 1;
   1328     }
   1329 
   1330     // TODO: implement the case of iterations > 1.
   1331     if( iterations > 1 )
   1332         return false;
   1333 
   1334     return IPPMorphReplicate( op, src, dst, kernel, ksize, anchor, rectKernel );
   1335 }
   1336 #endif
   1337 
   1338 #ifdef HAVE_OPENCL
   1339 
   1340 #define ROUNDUP(sz, n)      ((sz) + (n) - 1 - (((sz) + (n) - 1) % (n)))
   1341 
   1342 static bool ocl_morphSmall( InputArray _src, OutputArray _dst, InputArray _kernel, Point anchor, int borderType,
   1343                             int op, int actual_op = -1, InputArray _extraMat = noArray())
   1344 {
   1345     const ocl::Device & dev = ocl::Device::getDefault();
   1346     int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), esz = CV_ELEM_SIZE(type);
   1347     bool doubleSupport = dev.doubleFPConfig() > 0;
   1348 
   1349     if (cn > 4 || (!doubleSupport && depth == CV_64F) ||
   1350         _src.offset() % esz != 0 || _src.step() % esz != 0)
   1351         return false;
   1352 
   1353     bool haveExtraMat = !_extraMat.empty();
   1354     CV_Assert(actual_op <= 3 || haveExtraMat);
   1355 
   1356     Size ksize = _kernel.size();
   1357     if (anchor.x < 0)
   1358         anchor.x = ksize.width / 2;
   1359     if (anchor.y < 0)
   1360         anchor.y = ksize.height / 2;
   1361 
   1362     Size size = _src.size(), wholeSize;
   1363     bool isolated = (borderType & BORDER_ISOLATED) != 0;
   1364     borderType &= ~BORDER_ISOLATED;
   1365     int wdepth = depth, wtype = type;
   1366     if (depth == CV_8U)
   1367     {
   1368         wdepth = CV_32S;
   1369         wtype = CV_MAKETYPE(wdepth, cn);
   1370     }
   1371     char cvt[2][40];
   1372 
   1373     const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE",
   1374                                        "BORDER_REFLECT", 0, "BORDER_REFLECT_101" };
   1375     size_t globalsize[2] = { size.width, size.height };
   1376 
   1377     UMat src = _src.getUMat();
   1378     if (!isolated)
   1379     {
   1380         Point ofs;
   1381         src.locateROI(wholeSize, ofs);
   1382     }
   1383 
   1384     int h = isolated ? size.height : wholeSize.height;
   1385     int w = isolated ? size.width : wholeSize.width;
   1386     if (w < ksize.width || h < ksize.height)
   1387         return false;
   1388 
   1389     // Figure out what vector size to use for loading the pixels.
   1390     int pxLoadNumPixels = cn != 1 || size.width % 4 ? 1 : 4;
   1391     int pxLoadVecSize = cn * pxLoadNumPixels;
   1392 
   1393     // Figure out how many pixels per work item to compute in X and Y
   1394     // directions.  Too many and we run out of registers.
   1395     int pxPerWorkItemX = 1, pxPerWorkItemY = 1;
   1396     if (cn <= 2 && ksize.width <= 4 && ksize.height <= 4)
   1397     {
   1398         pxPerWorkItemX = size.width % 8 ? size.width % 4 ? size.width % 2 ? 1 : 2 : 4 : 8;
   1399         pxPerWorkItemY = size.height % 2 ? 1 : 2;
   1400     }
   1401     else if (cn < 4 || (ksize.width <= 4 && ksize.height <= 4))
   1402     {
   1403         pxPerWorkItemX = size.width % 2 ? 1 : 2;
   1404         pxPerWorkItemY = size.height % 2 ? 1 : 2;
   1405     }
   1406     globalsize[0] = size.width / pxPerWorkItemX;
   1407     globalsize[1] = size.height / pxPerWorkItemY;
   1408 
   1409     // Need some padding in the private array for pixels
   1410     int privDataWidth = ROUNDUP(pxPerWorkItemX + ksize.width - 1, pxLoadNumPixels);
   1411 
   1412     // Make the global size a nice round number so the runtime can pick
   1413     // from reasonable choices for the workgroup size
   1414     const int wgRound = 256;
   1415     globalsize[0] = ROUNDUP(globalsize[0], wgRound);
   1416 
   1417     if (actual_op < 0)
   1418         actual_op = op;
   1419 
   1420     // build processing
   1421     String processing;
   1422     Mat kernel8u;
   1423     _kernel.getMat().convertTo(kernel8u, CV_8U);
   1424     for (int y = 0; y < kernel8u.rows; ++y)
   1425         for (int x = 0; x < kernel8u.cols; ++x)
   1426             if (kernel8u.at<uchar>(y, x) != 0)
   1427                 processing += format("PROCESS(%d,%d)", y, x);
   1428 
   1429 
   1430     static const char * const op2str[] = { "OP_ERODE", "OP_DILATE", NULL, NULL, "OP_GRADIENT", "OP_TOPHAT", "OP_BLACKHAT" };
   1431     String opts = format("-D cn=%d "
   1432             "-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d "
   1433             "-D PX_LOAD_VEC_SIZE=%d -D PX_LOAD_NUM_PX=%d -D DEPTH_%d "
   1434             "-D PX_PER_WI_X=%d -D PX_PER_WI_Y=%d -D PRIV_DATA_WIDTH=%d -D %s -D %s "
   1435             "-D PX_LOAD_X_ITERATIONS=%d -D PX_LOAD_Y_ITERATIONS=%d "
   1436             "-D srcT=%s -D srcT1=%s -D dstT=srcT -D dstT1=srcT1 -D WT=%s -D WT1=%s "
   1437             "-D convertToWT=%s -D convertToDstT=%s -D PX_LOAD_FLOAT_VEC_CONV=convert_%s -D PROCESS_ELEM_=%s -D %s%s",
   1438             cn, anchor.x, anchor.y, ksize.width, ksize.height,
   1439             pxLoadVecSize, pxLoadNumPixels, depth,
   1440             pxPerWorkItemX, pxPerWorkItemY, privDataWidth, borderMap[borderType],
   1441             isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED",
   1442             privDataWidth / pxLoadNumPixels, pxPerWorkItemY + ksize.height - 1,
   1443             ocl::typeToStr(type), ocl::typeToStr(depth),
   1444             haveExtraMat ? ocl::typeToStr(wtype):"srcT",//to prevent overflow - WT
   1445             haveExtraMat ? ocl::typeToStr(wdepth):"srcT1",//to prevent overflow - WT1
   1446             haveExtraMat ? ocl::convertTypeStr(depth, wdepth, cn, cvt[0]) : "noconvert",//to prevent overflow - src to WT
   1447             haveExtraMat ? ocl::convertTypeStr(wdepth, depth, cn, cvt[1]) : "noconvert",//to prevent overflow - WT to dst
   1448             ocl::typeToStr(CV_MAKE_TYPE(haveExtraMat ? wdepth : depth, pxLoadVecSize)), //PX_LOAD_FLOAT_VEC_CONV
   1449             processing.c_str(), op2str[op],
   1450             actual_op == op ? "" : cv::format(" -D %s", op2str[actual_op]).c_str());
   1451 
   1452     ocl::Kernel kernel("filterSmall", cv::ocl::imgproc::filterSmall_oclsrc, opts);
   1453     if (kernel.empty())
   1454         return false;
   1455 
   1456     _dst.create(size, type);
   1457     UMat dst = _dst.getUMat();
   1458 
   1459     UMat source;
   1460     if(src.u != dst.u)
   1461         source = src;
   1462     else
   1463     {
   1464         Point ofs;
   1465         int cols =  src.cols, rows = src.rows;
   1466         src.locateROI(wholeSize, ofs);
   1467         src.adjustROI(ofs.y, wholeSize.height - rows - ofs.y, ofs.x, wholeSize.width - cols - ofs.x);
   1468         src.copyTo(source);
   1469 
   1470         src.adjustROI(-ofs.y, -wholeSize.height + rows + ofs.y, -ofs.x, -wholeSize.width + cols + ofs.x);
   1471         source.adjustROI(-ofs.y, -wholeSize.height + rows + ofs.y, -ofs.x, -wholeSize.width + cols + ofs.x);
   1472         source.locateROI(wholeSize, ofs);
   1473     }
   1474 
   1475     UMat extraMat = _extraMat.getUMat();
   1476 
   1477     int idxArg = kernel.set(0, ocl::KernelArg::PtrReadOnly(source));
   1478     idxArg = kernel.set(idxArg, (int)source.step);
   1479     int srcOffsetX = (int)((source.offset % source.step) / source.elemSize());
   1480     int srcOffsetY = (int)(source.offset / source.step);
   1481     int srcEndX = isolated ? srcOffsetX + size.width : wholeSize.width;
   1482     int srcEndY = isolated ? srcOffsetY + size.height : wholeSize.height;
   1483     idxArg = kernel.set(idxArg, srcOffsetX);
   1484     idxArg = kernel.set(idxArg, srcOffsetY);
   1485     idxArg = kernel.set(idxArg, srcEndX);
   1486     idxArg = kernel.set(idxArg, srcEndY);
   1487     idxArg = kernel.set(idxArg, ocl::KernelArg::WriteOnly(dst));
   1488 
   1489     if (haveExtraMat)
   1490     {
   1491         idxArg = kernel.set(idxArg, ocl::KernelArg::ReadOnlyNoSize(extraMat));
   1492     }
   1493 
   1494     return kernel.run(2, globalsize, NULL, false);
   1495 
   1496 }
   1497 
   1498 static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel,
   1499                         Point anchor, int iterations, int op, int borderType,
   1500                         const Scalar &, int actual_op = -1, InputArray _extraMat = noArray())
   1501 {
   1502     const ocl::Device & dev = ocl::Device::getDefault();
   1503     int type = _src.type(), depth = CV_MAT_DEPTH(type),
   1504             cn = CV_MAT_CN(type), esz = CV_ELEM_SIZE(type);
   1505     Mat kernel = _kernel.getMat();
   1506     Size ksize = !kernel.empty() ? kernel.size() : Size(3, 3), ssize = _src.size();
   1507 
   1508     bool doubleSupport = dev.doubleFPConfig() > 0;
   1509     if ((depth == CV_64F && !doubleSupport) || borderType != BORDER_CONSTANT)
   1510         return false;
   1511 
   1512     bool haveExtraMat = !_extraMat.empty();
   1513     CV_Assert(actual_op <= 3 || haveExtraMat);
   1514 
   1515     if (kernel.empty())
   1516     {
   1517         kernel = getStructuringElement(MORPH_RECT, Size(1+iterations*2,1+iterations*2));
   1518         anchor = Point(iterations, iterations);
   1519         iterations = 1;
   1520     }
   1521     else if( iterations > 1 && countNonZero(kernel) == kernel.rows*kernel.cols )
   1522     {
   1523         anchor = Point(anchor.x*iterations, anchor.y*iterations);
   1524         kernel = getStructuringElement(MORPH_RECT,
   1525                                        Size(ksize.width + (iterations-1)*(ksize.width-1),
   1526                                             ksize.height + (iterations-1)*(ksize.height-1)),
   1527                                        anchor);
   1528         iterations = 1;
   1529     }
   1530 
   1531     // try to use OpenCL kernel adopted for small morph kernel
   1532     if (dev.isIntel() && !(dev.type() & ocl::Device::TYPE_CPU) &&
   1533         ((ksize.width < 5 && ksize.height < 5 && esz <= 4) ||
   1534          (ksize.width == 5 && ksize.height == 5 && cn == 1)) &&
   1535          (iterations == 1)
   1536 #if defined __APPLE__
   1537          && cn == 1
   1538 #endif
   1539          )
   1540     {
   1541         if (ocl_morphSmall(_src, _dst, kernel, anchor, borderType, op, actual_op, _extraMat))
   1542             return true;
   1543     }
   1544 
   1545     if (iterations == 0 || kernel.rows*kernel.cols == 1)
   1546     {
   1547         _src.copyTo(_dst);
   1548         return true;
   1549     }
   1550 
   1551 #ifdef ANDROID
   1552     size_t localThreads[2] = { 16, 8 };
   1553 #else
   1554     size_t localThreads[2] = { 16, 16 };
   1555 #endif
   1556     size_t globalThreads[2] = { ssize.width, ssize.height };
   1557 
   1558 #ifdef __APPLE__
   1559     if( actual_op != MORPH_ERODE && actual_op != MORPH_DILATE )
   1560         localThreads[0] = localThreads[1] = 4;
   1561 #endif
   1562 
   1563     if (localThreads[0]*localThreads[1] * 2 < (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1))
   1564         return false;
   1565 
   1566 #ifdef ANDROID
   1567     if (dev.isNVidia())
   1568         return false;
   1569 #endif
   1570 
   1571     // build processing
   1572     String processing;
   1573     Mat kernel8u;
   1574     kernel.convertTo(kernel8u, CV_8U);
   1575     for (int y = 0; y < kernel8u.rows; ++y)
   1576         for (int x = 0; x < kernel8u.cols; ++x)
   1577             if (kernel8u.at<uchar>(y, x) != 0)
   1578                 processing += format("PROCESS(%d,%d)", y, x);
   1579 
   1580     static const char * const op2str[] = { "OP_ERODE", "OP_DILATE", NULL, NULL, "OP_GRADIENT", "OP_TOPHAT", "OP_BLACKHAT" };
   1581 
   1582     char cvt[2][50];
   1583     int wdepth = std::max(depth, CV_32F), scalarcn = cn == 3 ? 4 : cn;
   1584 
   1585     if (actual_op < 0)
   1586         actual_op = op;
   1587 
   1588     std::vector<ocl::Kernel> kernels(iterations);
   1589     for (int i = 0; i < iterations; i++)
   1590     {
   1591         int current_op = iterations == i + 1 ? actual_op : op;
   1592         String buildOptions = format("-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D %s%s"
   1593                                      " -D PROCESS_ELEMS=%s -D T=%s -D DEPTH_%d -D cn=%d -D T1=%s"
   1594                                      " -D convertToWT=%s -D convertToT=%s -D ST=%s%s",
   1595                                      anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], op2str[op],
   1596                                      doubleSupport ? " -D DOUBLE_SUPPORT" : "", processing.c_str(),
   1597                                      ocl::typeToStr(type), depth, cn, ocl::typeToStr(depth),
   1598                                      ocl::convertTypeStr(depth, wdepth, cn, cvt[0]),
   1599                                      ocl::convertTypeStr(wdepth, depth, cn, cvt[1]),
   1600                                      ocl::typeToStr(CV_MAKE_TYPE(depth, scalarcn)),
   1601                                      current_op == op ? "" : cv::format(" -D %s", op2str[current_op]).c_str());
   1602 
   1603         kernels[i].create("morph", ocl::imgproc::morph_oclsrc, buildOptions);
   1604         if (kernels[i].empty())
   1605             return false;
   1606     }
   1607 
   1608     UMat src = _src.getUMat(), extraMat = _extraMat.getUMat();
   1609     _dst.create(src.size(), src.type());
   1610     UMat dst = _dst.getUMat();
   1611 
   1612     if (iterations == 1 && src.u != dst.u)
   1613     {
   1614         Size wholesize;
   1615         Point ofs;
   1616         src.locateROI(wholesize, ofs);
   1617         int wholecols = wholesize.width, wholerows = wholesize.height;
   1618 
   1619         if (haveExtraMat)
   1620             kernels[0].args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnlyNoSize(dst),
   1621                         ofs.x, ofs.y, src.cols, src.rows, wholecols, wholerows,
   1622                         ocl::KernelArg::ReadOnlyNoSize(extraMat));
   1623         else
   1624             kernels[0].args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnlyNoSize(dst),
   1625                         ofs.x, ofs.y, src.cols, src.rows, wholecols, wholerows);
   1626 
   1627         return kernels[0].run(2, globalThreads, localThreads, false);
   1628     }
   1629 
   1630     for (int i = 0; i < iterations; i++)
   1631     {
   1632         UMat source;
   1633         Size wholesize;
   1634         Point ofs;
   1635 
   1636         if (i == 0)
   1637         {
   1638             int cols =  src.cols, rows = src.rows;
   1639             src.locateROI(wholesize, ofs);
   1640             src.adjustROI(ofs.y, wholesize.height - rows - ofs.y, ofs.x, wholesize.width - cols - ofs.x);
   1641             if(src.u != dst.u)
   1642                 source = src;
   1643             else
   1644                 src.copyTo(source);
   1645 
   1646             src.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
   1647             source.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
   1648         }
   1649         else
   1650         {
   1651             int cols =  dst.cols, rows = dst.rows;
   1652             dst.locateROI(wholesize, ofs);
   1653             dst.adjustROI(ofs.y, wholesize.height - rows - ofs.y, ofs.x, wholesize.width - cols - ofs.x);
   1654             dst.copyTo(source);
   1655             dst.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
   1656             source.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
   1657         }
   1658         source.locateROI(wholesize, ofs);
   1659 
   1660         if (haveExtraMat && iterations == i + 1)
   1661             kernels[i].args(ocl::KernelArg::ReadOnlyNoSize(source), ocl::KernelArg::WriteOnlyNoSize(dst),
   1662                 ofs.x, ofs.y, source.cols, source.rows, wholesize.width, wholesize.height,
   1663                 ocl::KernelArg::ReadOnlyNoSize(extraMat));
   1664         else
   1665             kernels[i].args(ocl::KernelArg::ReadOnlyNoSize(source), ocl::KernelArg::WriteOnlyNoSize(dst),
   1666                 ofs.x, ofs.y, source.cols, source.rows, wholesize.width, wholesize.height);
   1667 
   1668         if (!kernels[i].run(2, globalThreads, localThreads, false))
   1669             return false;
   1670     }
   1671 
   1672     return true;
   1673 }
   1674 
   1675 #endif
   1676 
   1677 static void morphOp( int op, InputArray _src, OutputArray _dst,
   1678                      InputArray _kernel,
   1679                      Point anchor, int iterations,
   1680                      int borderType, const Scalar& borderValue )
   1681 {
   1682     Mat kernel = _kernel.getMat();
   1683     Size ksize = !kernel.empty() ? kernel.size() : Size(3,3);
   1684     anchor = normalizeAnchor(anchor, ksize);
   1685 
   1686     CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2 && _src.channels() <= 4 &&
   1687                borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue() &&
   1688                (op == MORPH_ERODE || op == MORPH_DILATE) &&
   1689                anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1,
   1690                ocl_morphOp(_src, _dst, kernel, anchor, iterations, op, borderType, borderValue) )
   1691 
   1692     if (iterations == 0 || kernel.rows*kernel.cols == 1)
   1693     {
   1694         _src.copyTo(_dst);
   1695         return;
   1696     }
   1697 
   1698     if (kernel.empty())
   1699     {
   1700         kernel = getStructuringElement(MORPH_RECT, Size(1+iterations*2,1+iterations*2));
   1701         anchor = Point(iterations, iterations);
   1702         iterations = 1;
   1703     }
   1704     else if( iterations > 1 && countNonZero(kernel) == kernel.rows*kernel.cols )
   1705     {
   1706         anchor = Point(anchor.x*iterations, anchor.y*iterations);
   1707         kernel = getStructuringElement(MORPH_RECT,
   1708                                        Size(ksize.width + (iterations-1)*(ksize.width-1),
   1709                                             ksize.height + (iterations-1)*(ksize.height-1)),
   1710                                        anchor);
   1711         iterations = 1;
   1712     }
   1713 
   1714 #if IPP_VERSION_X100 >= 801
   1715     CV_IPP_CHECK()
   1716     {
   1717         if( IPPMorphOp(op, _src, _dst, kernel, anchor, iterations, borderType, borderValue) )
   1718         {
   1719             CV_IMPL_ADD(CV_IMPL_IPP);
   1720             return;
   1721         }
   1722     }
   1723 #endif
   1724 
   1725     Mat src = _src.getMat();
   1726     _dst.create( src.size(), src.type() );
   1727     Mat dst = _dst.getMat();
   1728 
   1729     int nStripes = 1;
   1730 #if defined HAVE_TEGRA_OPTIMIZATION
   1731     if (src.data != dst.data && iterations == 1 &&  //NOTE: threads are not used for inplace processing
   1732         (borderType & BORDER_ISOLATED) == 0 && //TODO: check border types
   1733         src.rows >= 64 ) //NOTE: just heuristics
   1734         nStripes = 4;
   1735 #endif
   1736 
   1737     parallel_for_(Range(0, nStripes),
   1738                   MorphologyRunner(src, dst, nStripes, iterations, op, kernel, anchor, borderType, borderType, borderValue));
   1739 }
   1740 
   1741 }
   1742 
   1743 void cv::erode( InputArray src, OutputArray dst, InputArray kernel,
   1744                 Point anchor, int iterations,
   1745                 int borderType, const Scalar& borderValue )
   1746 {
   1747     morphOp( MORPH_ERODE, src, dst, kernel, anchor, iterations, borderType, borderValue );
   1748 }
   1749 
   1750 
   1751 void cv::dilate( InputArray src, OutputArray dst, InputArray kernel,
   1752                  Point anchor, int iterations,
   1753                  int borderType, const Scalar& borderValue )
   1754 {
   1755     morphOp( MORPH_DILATE, src, dst, kernel, anchor, iterations, borderType, borderValue );
   1756 }
   1757 
   1758 #ifdef HAVE_OPENCL
   1759 
   1760 namespace cv {
   1761 
   1762 static bool ocl_morphologyEx(InputArray _src, OutputArray _dst, int op,
   1763                              InputArray kernel, Point anchor, int iterations,
   1764                              int borderType, const Scalar& borderValue)
   1765 {
   1766     _dst.createSameSize(_src, _src.type());
   1767     bool submat = _dst.isSubmatrix();
   1768     UMat temp;
   1769     _OutputArray _temp = submat ? _dst : _OutputArray(temp);
   1770 
   1771     switch( op )
   1772     {
   1773     case MORPH_ERODE:
   1774         if (!ocl_morphOp( _src, _dst, kernel, anchor, iterations, MORPH_ERODE, borderType, borderValue ))
   1775             return false;
   1776         break;
   1777     case MORPH_DILATE:
   1778         if (!ocl_morphOp( _src, _dst, kernel, anchor, iterations, MORPH_DILATE, borderType, borderValue ))
   1779             return false;
   1780         break;
   1781     case MORPH_OPEN:
   1782         if (!ocl_morphOp( _src, _temp, kernel, anchor, iterations, MORPH_ERODE, borderType, borderValue ))
   1783             return false;
   1784         if (!ocl_morphOp( _temp, _dst, kernel, anchor, iterations, MORPH_DILATE, borderType, borderValue ))
   1785             return false;
   1786         break;
   1787     case MORPH_CLOSE:
   1788         if (!ocl_morphOp( _src, _temp, kernel, anchor, iterations, MORPH_DILATE, borderType, borderValue ))
   1789             return false;
   1790         if (!ocl_morphOp( _temp, _dst, kernel, anchor, iterations, MORPH_ERODE, borderType, borderValue ))
   1791             return false;
   1792         break;
   1793     case MORPH_GRADIENT:
   1794         if (!ocl_morphOp( _src, temp, kernel, anchor, iterations, MORPH_ERODE, borderType, borderValue ))
   1795             return false;
   1796         if (!ocl_morphOp( _src, _dst, kernel, anchor, iterations, MORPH_DILATE, borderType, borderValue, MORPH_GRADIENT, temp ))
   1797             return false;
   1798         break;
   1799     case MORPH_TOPHAT:
   1800         if (!ocl_morphOp( _src, _temp, kernel, anchor, iterations, MORPH_ERODE, borderType, borderValue ))
   1801             return false;
   1802         if (!ocl_morphOp( _temp, _dst, kernel, anchor, iterations, MORPH_DILATE, borderType, borderValue, MORPH_TOPHAT, _src ))
   1803             return false;
   1804         break;
   1805     case MORPH_BLACKHAT:
   1806         if (!ocl_morphOp( _src, _temp, kernel, anchor, iterations, MORPH_DILATE, borderType, borderValue ))
   1807             return false;
   1808         if (!ocl_morphOp( _temp, _dst, kernel, anchor, iterations, MORPH_ERODE, borderType, borderValue, MORPH_BLACKHAT, _src ))
   1809             return false;
   1810         break;
   1811     default:
   1812         CV_Error( CV_StsBadArg, "unknown morphological operation" );
   1813     }
   1814 
   1815     return true;
   1816 }
   1817 
   1818 }
   1819 
   1820 #endif
   1821 
   1822 void cv::morphologyEx( InputArray _src, OutputArray _dst, int op,
   1823                        InputArray _kernel, Point anchor, int iterations,
   1824                        int borderType, const Scalar& borderValue )
   1825 {
   1826     Mat kernel = _kernel.getMat();
   1827     if (kernel.empty())
   1828     {
   1829         kernel = getStructuringElement(MORPH_RECT, Size(3,3), Point(1,1));
   1830     }
   1831 #ifdef HAVE_OPENCL
   1832     Size ksize = kernel.size();
   1833     anchor = normalizeAnchor(anchor, ksize);
   1834 
   1835     CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2 && _src.channels() <= 4 &&
   1836         anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1 &&
   1837         borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue(),
   1838         ocl_morphologyEx(_src, _dst, op, kernel, anchor, iterations, borderType, borderValue))
   1839 #endif
   1840 
   1841     Mat src = _src.getMat(), temp;
   1842     _dst.create(src.size(), src.type());
   1843     Mat dst = _dst.getMat();
   1844 
   1845     switch( op )
   1846     {
   1847     case MORPH_ERODE:
   1848         erode( src, dst, kernel, anchor, iterations, borderType, borderValue );
   1849         break;
   1850     case MORPH_DILATE:
   1851         dilate( src, dst, kernel, anchor, iterations, borderType, borderValue );
   1852         break;
   1853     case MORPH_OPEN:
   1854         erode( src, dst, kernel, anchor, iterations, borderType, borderValue );
   1855         dilate( dst, dst, kernel, anchor, iterations, borderType, borderValue );
   1856         break;
   1857     case CV_MOP_CLOSE:
   1858         dilate( src, dst, kernel, anchor, iterations, borderType, borderValue );
   1859         erode( dst, dst, kernel, anchor, iterations, borderType, borderValue );
   1860         break;
   1861     case CV_MOP_GRADIENT:
   1862         erode( src, temp, kernel, anchor, iterations, borderType, borderValue );
   1863         dilate( src, dst, kernel, anchor, iterations, borderType, borderValue );
   1864         dst -= temp;
   1865         break;
   1866     case CV_MOP_TOPHAT:
   1867         if( src.data != dst.data )
   1868             temp = dst;
   1869         erode( src, temp, kernel, anchor, iterations, borderType, borderValue );
   1870         dilate( temp, temp, kernel, anchor, iterations, borderType, borderValue );
   1871         dst = src - temp;
   1872         break;
   1873     case CV_MOP_BLACKHAT:
   1874         if( src.data != dst.data )
   1875             temp = dst;
   1876         dilate( src, temp, kernel, anchor, iterations, borderType, borderValue );
   1877         erode( temp, temp, kernel, anchor, iterations, borderType, borderValue );
   1878         dst = temp - src;
   1879         break;
   1880     default:
   1881         CV_Error( CV_StsBadArg, "unknown morphological operation" );
   1882     }
   1883 }
   1884 
   1885 CV_IMPL IplConvKernel *
   1886 cvCreateStructuringElementEx( int cols, int rows,
   1887                               int anchorX, int anchorY,
   1888                               int shape, int *values )
   1889 {
   1890     cv::Size ksize = cv::Size(cols, rows);
   1891     cv::Point anchor = cv::Point(anchorX, anchorY);
   1892     CV_Assert( cols > 0 && rows > 0 && anchor.inside(cv::Rect(0,0,cols,rows)) &&
   1893                (shape != CV_SHAPE_CUSTOM || values != 0));
   1894 
   1895     int i, size = rows * cols;
   1896     int element_size = sizeof(IplConvKernel) + size*sizeof(int);
   1897     IplConvKernel *element = (IplConvKernel*)cvAlloc(element_size + 32);
   1898 
   1899     element->nCols = cols;
   1900     element->nRows = rows;
   1901     element->anchorX = anchorX;
   1902     element->anchorY = anchorY;
   1903     element->nShiftR = shape < CV_SHAPE_ELLIPSE ? shape : CV_SHAPE_CUSTOM;
   1904     element->values = (int*)(element + 1);
   1905 
   1906     if( shape == CV_SHAPE_CUSTOM )
   1907     {
   1908         for( i = 0; i < size; i++ )
   1909             element->values[i] = values[i];
   1910     }
   1911     else
   1912     {
   1913         cv::Mat elem = cv::getStructuringElement(shape, ksize, anchor);
   1914         for( i = 0; i < size; i++ )
   1915             element->values[i] = elem.ptr()[i];
   1916     }
   1917 
   1918     return element;
   1919 }
   1920 
   1921 
   1922 CV_IMPL void
   1923 cvReleaseStructuringElement( IplConvKernel ** element )
   1924 {
   1925     if( !element )
   1926         CV_Error( CV_StsNullPtr, "" );
   1927     cvFree( element );
   1928 }
   1929 
   1930 
   1931 static void convertConvKernel( const IplConvKernel* src, cv::Mat& dst, cv::Point& anchor )
   1932 {
   1933     if(!src)
   1934     {
   1935         anchor = cv::Point(1,1);
   1936         dst.release();
   1937         return;
   1938     }
   1939     anchor = cv::Point(src->anchorX, src->anchorY);
   1940     dst.create(src->nRows, src->nCols, CV_8U);
   1941 
   1942     int i, size = src->nRows*src->nCols;
   1943     for( i = 0; i < size; i++ )
   1944         dst.ptr()[i] = (uchar)(src->values[i] != 0);
   1945 }
   1946 
   1947 
   1948 CV_IMPL void
   1949 cvErode( const CvArr* srcarr, CvArr* dstarr, IplConvKernel* element, int iterations )
   1950 {
   1951     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), kernel;
   1952     CV_Assert( src.size() == dst.size() && src.type() == dst.type() );
   1953     cv::Point anchor;
   1954     convertConvKernel( element, kernel, anchor );
   1955     cv::erode( src, dst, kernel, anchor, iterations, cv::BORDER_REPLICATE );
   1956 }
   1957 
   1958 
   1959 CV_IMPL void
   1960 cvDilate( const CvArr* srcarr, CvArr* dstarr, IplConvKernel* element, int iterations )
   1961 {
   1962     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), kernel;
   1963     CV_Assert( src.size() == dst.size() && src.type() == dst.type() );
   1964     cv::Point anchor;
   1965     convertConvKernel( element, kernel, anchor );
   1966     cv::dilate( src, dst, kernel, anchor, iterations, cv::BORDER_REPLICATE );
   1967 }
   1968 
   1969 
   1970 CV_IMPL void
   1971 cvMorphologyEx( const void* srcarr, void* dstarr, void*,
   1972                 IplConvKernel* element, int op, int iterations )
   1973 {
   1974     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), kernel;
   1975     CV_Assert( src.size() == dst.size() && src.type() == dst.type() );
   1976     cv::Point anchor;
   1977     IplConvKernel* temp_element = NULL;
   1978     if (!element)
   1979     {
   1980         temp_element = cvCreateStructuringElementEx(3, 3, 1, 1, CV_SHAPE_RECT);
   1981     } else {
   1982         temp_element = element;
   1983     }
   1984     convertConvKernel( temp_element, kernel, anchor );
   1985     if (!element)
   1986     {
   1987         cvReleaseStructuringElement(&temp_element);
   1988     }
   1989     cv::morphologyEx( src, dst, op, kernel, anchor, iterations, cv::BORDER_REPLICATE );
   1990 }
   1991 
   1992 /* End of file. */
   1993