Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //                           License Agreement
     10 //                For Open Source Computer Vision Library
     11 //
     12 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     13 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
     14 // Copyright (C) 2014, Itseez Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 /* ////////////////////////////////////////////////////////////////////
     44 //
     45 //  Mat basic operations: Copy, Set
     46 //
     47 // */
     48 
     49 #include "precomp.hpp"
     50 #include "opencl_kernels_core.hpp"
     51 
     52 namespace cv
     53 {
     54 
     55 template<typename T> static void
     56 copyMask_(const uchar* _src, size_t sstep, const uchar* mask, size_t mstep, uchar* _dst, size_t dstep, Size size)
     57 {
     58     for( ; size.height--; mask += mstep, _src += sstep, _dst += dstep )
     59     {
     60         const T* src = (const T*)_src;
     61         T* dst = (T*)_dst;
     62         int x = 0;
     63          #if CV_ENABLE_UNROLLED
     64         for( ; x <= size.width - 4; x += 4 )
     65         {
     66             if( mask[x] )
     67                 dst[x] = src[x];
     68             if( mask[x+1] )
     69                 dst[x+1] = src[x+1];
     70             if( mask[x+2] )
     71                 dst[x+2] = src[x+2];
     72             if( mask[x+3] )
     73                 dst[x+3] = src[x+3];
     74         }
     75         #endif
     76         for( ; x < size.width; x++ )
     77             if( mask[x] )
     78                 dst[x] = src[x];
     79     }
     80 }
     81 
     82 template<> void
     83 copyMask_<uchar>(const uchar* _src, size_t sstep, const uchar* mask, size_t mstep, uchar* _dst, size_t dstep, Size size)
     84 {
     85 #if defined HAVE_IPP
     86     CV_IPP_CHECK()
     87     {
     88         if (ippiCopy_8u_C1MR(_src, (int)sstep, _dst, (int)dstep, ippiSize(size), mask, (int)mstep) >= 0)
     89         {
     90             CV_IMPL_ADD(CV_IMPL_IPP);
     91             return;
     92         }
     93         setIppErrorStatus();
     94     }
     95 #endif
     96 
     97     for( ; size.height--; mask += mstep, _src += sstep, _dst += dstep )
     98     {
     99         const uchar* src = (const uchar*)_src;
    100         uchar* dst = (uchar*)_dst;
    101         int x = 0;
    102         #if CV_SSE4_2
    103         if(USE_SSE4_2)//
    104         {
    105             __m128i zero = _mm_setzero_si128 ();
    106 
    107              for( ; x <= size.width - 16; x += 16 )
    108              {
    109                  const __m128i rSrc = _mm_lddqu_si128((const __m128i*)(src+x));
    110                  __m128i _mask = _mm_lddqu_si128((const __m128i*)(mask+x));
    111                  __m128i rDst = _mm_lddqu_si128((__m128i*)(dst+x));
    112                  __m128i _negMask = _mm_cmpeq_epi8(_mask, zero);
    113                  rDst = _mm_blendv_epi8(rSrc, rDst, _negMask);
    114                  _mm_storeu_si128((__m128i*)(dst + x), rDst);
    115              }
    116         }
    117         #elif CV_NEON
    118         uint8x16_t v_one = vdupq_n_u8(1);
    119         for( ; x <= size.width - 16; x += 16 )
    120         {
    121             uint8x16_t v_mask = vcgeq_u8(vld1q_u8(mask + x), v_one);
    122             uint8x16_t v_dst = vld1q_u8(dst + x), v_src = vld1q_u8(src + x);
    123             vst1q_u8(dst + x, vbslq_u8(v_mask, v_src, v_dst));
    124         }
    125         #endif
    126         for( ; x < size.width; x++ )
    127             if( mask[x] )
    128                 dst[x] = src[x];
    129     }
    130 }
    131 
    132 template<> void
    133 copyMask_<ushort>(const uchar* _src, size_t sstep, const uchar* mask, size_t mstep, uchar* _dst, size_t dstep, Size size)
    134 {
    135 #if defined HAVE_IPP
    136     CV_IPP_CHECK()
    137     {
    138         if (ippiCopy_16u_C1MR((const Ipp16u *)_src, (int)sstep, (Ipp16u *)_dst, (int)dstep, ippiSize(size), mask, (int)mstep) >= 0)
    139         {
    140             CV_IMPL_ADD(CV_IMPL_IPP);
    141             return;
    142         }
    143         setIppErrorStatus();
    144     }
    145 #endif
    146 
    147     for( ; size.height--; mask += mstep, _src += sstep, _dst += dstep )
    148     {
    149         const ushort* src = (const ushort*)_src;
    150         ushort* dst = (ushort*)_dst;
    151         int x = 0;
    152         #if CV_SSE4_2
    153         if(USE_SSE4_2)//
    154         {
    155             __m128i zero = _mm_setzero_si128 ();
    156             for( ; x <= size.width - 8; x += 8 )
    157             {
    158                  const __m128i rSrc =_mm_lddqu_si128((const __m128i*)(src+x));
    159                  __m128i _mask = _mm_loadl_epi64((const __m128i*)(mask+x));
    160                  _mask = _mm_unpacklo_epi8(_mask, _mask);
    161                  __m128i rDst = _mm_lddqu_si128((const __m128i*)(dst+x));
    162                  __m128i _negMask = _mm_cmpeq_epi8(_mask, zero);
    163                  rDst = _mm_blendv_epi8(rSrc, rDst, _negMask);
    164                  _mm_storeu_si128((__m128i*)(dst + x), rDst);
    165              }
    166         }
    167         #elif CV_NEON
    168         uint8x8_t v_one = vdup_n_u8(1);
    169         for( ; x <= size.width - 8; x += 8 )
    170         {
    171             uint8x8_t v_mask = vcge_u8(vld1_u8(mask + x), v_one);
    172             uint8x8x2_t v_mask2 = vzip_u8(v_mask, v_mask);
    173             uint16x8_t v_mask_res = vreinterpretq_u16_u8(vcombine_u8(v_mask2.val[0], v_mask2.val[1]));
    174 
    175             uint16x8_t v_src = vld1q_u16(src + x), v_dst = vld1q_u16(dst + x);
    176             vst1q_u16(dst + x, vbslq_u16(v_mask_res, v_src, v_dst));
    177         }
    178         #endif
    179         for( ; x < size.width; x++ )
    180             if( mask[x] )
    181                 dst[x] = src[x];
    182     }
    183 }
    184 
    185 static void
    186 copyMaskGeneric(const uchar* _src, size_t sstep, const uchar* mask, size_t mstep, uchar* _dst, size_t dstep, Size size, void* _esz)
    187 {
    188     size_t k, esz = *(size_t*)_esz;
    189     for( ; size.height--; mask += mstep, _src += sstep, _dst += dstep )
    190     {
    191         const uchar* src = _src;
    192         uchar* dst = _dst;
    193         int x = 0;
    194         for( ; x < size.width; x++, src += esz, dst += esz )
    195         {
    196             if( !mask[x] )
    197                 continue;
    198             for( k = 0; k < esz; k++ )
    199                 dst[k] = src[k];
    200         }
    201     }
    202 }
    203 
    204 
    205 #define DEF_COPY_MASK(suffix, type) \
    206 static void copyMask##suffix(const uchar* src, size_t sstep, const uchar* mask, size_t mstep, \
    207                              uchar* dst, size_t dstep, Size size, void*) \
    208 { \
    209     copyMask_<type>(src, sstep, mask, mstep, dst, dstep, size); \
    210 }
    211 
    212 #if defined HAVE_IPP
    213 #define DEF_COPY_MASK_F(suffix, type, ippfavor, ipptype) \
    214 static void copyMask##suffix(const uchar* src, size_t sstep, const uchar* mask, size_t mstep, \
    215                              uchar* dst, size_t dstep, Size size, void*) \
    216 { \
    217     CV_IPP_CHECK()\
    218     {\
    219         if (ippiCopy_##ippfavor((const ipptype *)src, (int)sstep, (ipptype *)dst, (int)dstep, ippiSize(size), (const Ipp8u *)mask, (int)mstep) >= 0) \
    220         {\
    221             CV_IMPL_ADD(CV_IMPL_IPP);\
    222             return;\
    223         }\
    224         setIppErrorStatus(); \
    225     }\
    226     copyMask_<type>(src, sstep, mask, mstep, dst, dstep, size); \
    227 }
    228 #else
    229 #define DEF_COPY_MASK_F(suffix, type, ippfavor, ipptype) \
    230 static void copyMask##suffix(const uchar* src, size_t sstep, const uchar* mask, size_t mstep, \
    231                              uchar* dst, size_t dstep, Size size, void*) \
    232 { \
    233     copyMask_<type>(src, sstep, mask, mstep, dst, dstep, size); \
    234 }
    235 #endif
    236 
    237 
    238 DEF_COPY_MASK(8u, uchar)
    239 DEF_COPY_MASK(16u, ushort)
    240 DEF_COPY_MASK_F(8uC3, Vec3b, 8u_C3MR, Ipp8u)
    241 DEF_COPY_MASK_F(32s, int, 32s_C1MR, Ipp32s)
    242 DEF_COPY_MASK_F(16uC3, Vec3s, 16u_C3MR, Ipp16u)
    243 DEF_COPY_MASK(32sC2, Vec2i)
    244 DEF_COPY_MASK_F(32sC3, Vec3i, 32s_C3MR, Ipp32s)
    245 DEF_COPY_MASK_F(32sC4, Vec4i, 32s_C4MR, Ipp32s)
    246 DEF_COPY_MASK(32sC6, Vec6i)
    247 DEF_COPY_MASK(32sC8, Vec8i)
    248 
    249 BinaryFunc copyMaskTab[] =
    250 {
    251     0,
    252     copyMask8u,
    253     copyMask16u,
    254     copyMask8uC3,
    255     copyMask32s,
    256     0,
    257     copyMask16uC3,
    258     0,
    259     copyMask32sC2,
    260     0, 0, 0,
    261     copyMask32sC3,
    262     0, 0, 0,
    263     copyMask32sC4,
    264     0, 0, 0, 0, 0, 0, 0,
    265     copyMask32sC6,
    266     0, 0, 0, 0, 0, 0, 0,
    267     copyMask32sC8
    268 };
    269 
    270 BinaryFunc getCopyMaskFunc(size_t esz)
    271 {
    272     return esz <= 32 && copyMaskTab[esz] ? copyMaskTab[esz] : copyMaskGeneric;
    273 }
    274 
    275 /* dst = src */
    276 void Mat::copyTo( OutputArray _dst ) const
    277 {
    278     int dtype = _dst.type();
    279     if( _dst.fixedType() && dtype != type() )
    280     {
    281         CV_Assert( channels() == CV_MAT_CN(dtype) );
    282         convertTo( _dst, dtype );
    283         return;
    284     }
    285 
    286     if( empty() )
    287     {
    288         _dst.release();
    289         return;
    290     }
    291 
    292     if( _dst.isUMat() )
    293     {
    294         _dst.create( dims, size.p, type() );
    295         UMat dst = _dst.getUMat();
    296 
    297         size_t i, sz[CV_MAX_DIM], dstofs[CV_MAX_DIM], esz = elemSize();
    298         for( i = 0; i < (size_t)dims; i++ )
    299             sz[i] = size.p[i];
    300         sz[dims-1] *= esz;
    301         dst.ndoffset(dstofs);
    302         dstofs[dims-1] *= esz;
    303         dst.u->currAllocator->upload(dst.u, data, dims, sz, dstofs, dst.step.p, step.p);
    304         return;
    305     }
    306 
    307     if( dims <= 2 )
    308     {
    309         _dst.create( rows, cols, type() );
    310         Mat dst = _dst.getMat();
    311         if( data == dst.data )
    312             return;
    313 
    314         if( rows > 0 && cols > 0 )
    315         {
    316             const uchar* sptr = data;
    317             uchar* dptr = dst.data;
    318 
    319             Size sz = getContinuousSize(*this, dst);
    320             size_t len = sz.width*elemSize();
    321 
    322 #if defined HAVE_IPP
    323             CV_IPP_CHECK()
    324             {
    325                 if (ippiCopy_8u_C1R(sptr, (int)step, dptr, (int)dst.step, ippiSize((int)len, sz.height)) >= 0)
    326                 {
    327                     CV_IMPL_ADD(CV_IMPL_IPP)
    328                     return;
    329                 }
    330                 setIppErrorStatus();
    331             }
    332 #endif
    333 
    334             for( ; sz.height--; sptr += step, dptr += dst.step )
    335                 memcpy( dptr, sptr, len );
    336         }
    337         return;
    338     }
    339 
    340     _dst.create( dims, size, type() );
    341     Mat dst = _dst.getMat();
    342     if( data == dst.data )
    343         return;
    344 
    345     if( total() != 0 )
    346     {
    347         const Mat* arrays[] = { this, &dst };
    348         uchar* ptrs[2];
    349         NAryMatIterator it(arrays, ptrs, 2);
    350         size_t sz = it.size*elemSize();
    351 
    352         for( size_t i = 0; i < it.nplanes; i++, ++it )
    353             memcpy(ptrs[1], ptrs[0], sz);
    354     }
    355 }
    356 
    357 void Mat::copyTo( OutputArray _dst, InputArray _mask ) const
    358 {
    359     Mat mask = _mask.getMat();
    360     if( !mask.data )
    361     {
    362         copyTo(_dst);
    363         return;
    364     }
    365 
    366     int cn = channels(), mcn = mask.channels();
    367     CV_Assert( mask.depth() == CV_8U && (mcn == 1 || mcn == cn) );
    368     bool colorMask = mcn > 1;
    369 
    370     size_t esz = colorMask ? elemSize1() : elemSize();
    371     BinaryFunc copymask = getCopyMaskFunc(esz);
    372 
    373     uchar* data0 = _dst.getMat().data;
    374     _dst.create( dims, size, type() );
    375     Mat dst = _dst.getMat();
    376 
    377     if( dst.data != data0 ) // do not leave dst uninitialized
    378         dst = Scalar(0);
    379 
    380     if( dims <= 2 )
    381     {
    382         CV_Assert( size() == mask.size() );
    383         Size sz = getContinuousSize(*this, dst, mask, mcn);
    384         copymask(data, step, mask.data, mask.step, dst.data, dst.step, sz, &esz);
    385         return;
    386     }
    387 
    388     const Mat* arrays[] = { this, &dst, &mask, 0 };
    389     uchar* ptrs[3];
    390     NAryMatIterator it(arrays, ptrs);
    391     Size sz((int)(it.size*mcn), 1);
    392 
    393     for( size_t i = 0; i < it.nplanes; i++, ++it )
    394         copymask(ptrs[0], 0, ptrs[2], 0, ptrs[1], 0, sz, &esz);
    395 }
    396 
    397 Mat& Mat::operator = (const Scalar& s)
    398 {
    399     const Mat* arrays[] = { this };
    400     uchar* dptr;
    401     NAryMatIterator it(arrays, &dptr, 1);
    402     size_t elsize = it.size*elemSize();
    403     const int64* is = (const int64*)&s.val[0];
    404 
    405     if( is[0] == 0 && is[1] == 0 && is[2] == 0 && is[3] == 0 )
    406     {
    407 #if defined HAVE_IPP && !defined HAVE_IPP_ICV_ONLY && 0
    408         CV_IPP_CHECK()
    409         {
    410             if (dims <= 2 || isContinuous())
    411             {
    412                 IppiSize roisize = { cols, rows };
    413                 if (isContinuous())
    414                 {
    415                     roisize.width = (int)total();
    416                     roisize.height = 1;
    417 
    418                     if (ippsZero_8u(data, static_cast<int>(roisize.width * elemSize())) >= 0)
    419                     {
    420                         CV_IMPL_ADD(CV_IMPL_IPP)
    421                         return *this;
    422                     }
    423                     setIppErrorStatus();
    424                 }
    425                 roisize.width *= (int)elemSize();
    426 
    427                 if (ippiSet_8u_C1R(0, data, (int)step, roisize) >= 0)
    428                 {
    429                     CV_IMPL_ADD(CV_IMPL_IPP)
    430                     return *this;
    431                 }
    432                 setIppErrorStatus();
    433             }
    434         }
    435 #endif
    436 
    437         for( size_t i = 0; i < it.nplanes; i++, ++it )
    438             memset( dptr, 0, elsize );
    439     }
    440     else
    441     {
    442         if( it.nplanes > 0 )
    443         {
    444             double scalar[12];
    445             scalarToRawData(s, scalar, type(), 12);
    446             size_t blockSize = 12*elemSize1();
    447 
    448             for( size_t j = 0; j < elsize; j += blockSize )
    449             {
    450                 size_t sz = MIN(blockSize, elsize - j);
    451                 memcpy( dptr + j, scalar, sz );
    452             }
    453         }
    454 
    455         for( size_t i = 1; i < it.nplanes; i++ )
    456         {
    457             ++it;
    458             memcpy( dptr, data, elsize );
    459         }
    460     }
    461     return *this;
    462 }
    463 
    464 
    465 Mat& Mat::setTo(InputArray _value, InputArray _mask)
    466 {
    467     if( empty() )
    468         return *this;
    469 
    470     Mat value = _value.getMat(), mask = _mask.getMat();
    471 
    472     CV_Assert( checkScalar(value, type(), _value.kind(), _InputArray::MAT ));
    473     CV_Assert( mask.empty() || (mask.type() == CV_8U && size == mask.size) );
    474 
    475 #if defined HAVE_IPP
    476     CV_IPP_CHECK()
    477     {
    478         int cn = channels(), depth0 = depth();
    479 
    480         if (!mask.empty() && (dims <= 2 || (isContinuous() && mask.isContinuous())) &&
    481                 (/*depth0 == CV_8U ||*/ depth0 == CV_16U || depth0 == CV_16S || depth0 == CV_32S || depth0 == CV_32F) &&
    482                 (cn == 1 || cn == 3 || cn == 4))
    483         {
    484             uchar _buf[32];
    485             void * buf = _buf;
    486             convertAndUnrollScalar( value, type(), _buf, 1 );
    487 
    488             IppStatus status = (IppStatus)-1;
    489             IppiSize roisize = { cols, rows };
    490             int mstep = (int)mask.step[0], dstep = (int)step[0];
    491 
    492             if (isContinuous() && mask.isContinuous())
    493             {
    494                 roisize.width = (int)total();
    495                 roisize.height = 1;
    496             }
    497 
    498             if (cn == 1)
    499             {
    500                 /*if (depth0 == CV_8U)
    501                     status = ippiSet_8u_C1MR(*(Ipp8u *)buf, (Ipp8u *)data, dstep, roisize, mask.data, mstep);
    502                 else*/ if (depth0 == CV_16U)
    503                     status = ippiSet_16u_C1MR(*(Ipp16u *)buf, (Ipp16u *)data, dstep, roisize, mask.data, mstep);
    504                 else if (depth0 == CV_16S)
    505                     status = ippiSet_16s_C1MR(*(Ipp16s *)buf, (Ipp16s *)data, dstep, roisize, mask.data, mstep);
    506                 else if (depth0 == CV_32S)
    507                     status = ippiSet_32s_C1MR(*(Ipp32s *)buf, (Ipp32s *)data, dstep, roisize, mask.data, mstep);
    508                 else if (depth0 == CV_32F)
    509                     status = ippiSet_32f_C1MR(*(Ipp32f *)buf, (Ipp32f *)data, dstep, roisize, mask.data, mstep);
    510             }
    511             else if (cn == 3 || cn == 4)
    512             {
    513 #define IPP_SET(ippfavor, ippcn) \
    514         do \
    515         { \
    516             typedef Ipp##ippfavor ipptype; \
    517             ipptype ippvalue[4] = { ((ipptype *)buf)[0], ((ipptype *)buf)[1], ((ipptype *)buf)[2], ((ipptype *)buf)[3] }; \
    518             status = ippiSet_##ippfavor##_C##ippcn##MR(ippvalue, (ipptype *)data, dstep, roisize, mask.data, mstep); \
    519         } while ((void)0, 0)
    520 
    521 #define IPP_SET_CN(ippcn) \
    522         do \
    523         { \
    524             if (cn == ippcn) \
    525             { \
    526                 /*if (depth0 == CV_8U) \
    527                     IPP_SET(8u, ippcn); \
    528                 else*/ if (depth0 == CV_16U) \
    529                     IPP_SET(16u, ippcn); \
    530                 else if (depth0 == CV_16S) \
    531                     IPP_SET(16s, ippcn); \
    532                 else if (depth0 == CV_32S) \
    533                     IPP_SET(32s, ippcn); \
    534                 else if (depth0 == CV_32F) \
    535                     IPP_SET(32f, ippcn); \
    536             } \
    537         } while ((void)0, 0)
    538 
    539                 IPP_SET_CN(3);
    540                 IPP_SET_CN(4);
    541 
    542 #undef IPP_SET_CN
    543 #undef IPP_SET
    544             }
    545 
    546             if (status >= 0)
    547             {
    548                 CV_IMPL_ADD(CV_IMPL_IPP);
    549                 return *this;
    550             }
    551             setIppErrorStatus();
    552         }
    553     }
    554 #endif
    555 
    556     size_t esz = elemSize();
    557     BinaryFunc copymask = getCopyMaskFunc(esz);
    558 
    559     const Mat* arrays[] = { this, !mask.empty() ? &mask : 0, 0 };
    560     uchar* ptrs[2]={0,0};
    561     NAryMatIterator it(arrays, ptrs);
    562     int totalsz = (int)it.size, blockSize0 = std::min(totalsz, (int)((BLOCK_SIZE + esz-1)/esz));
    563     AutoBuffer<uchar> _scbuf(blockSize0*esz + 32);
    564     uchar* scbuf = alignPtr((uchar*)_scbuf, (int)sizeof(double));
    565     convertAndUnrollScalar( value, type(), scbuf, blockSize0 );
    566 
    567     for( size_t i = 0; i < it.nplanes; i++, ++it )
    568     {
    569         for( int j = 0; j < totalsz; j += blockSize0 )
    570         {
    571             Size sz(std::min(blockSize0, totalsz - j), 1);
    572             size_t blockSize = sz.width*esz;
    573             if( ptrs[1] )
    574             {
    575                 copymask(scbuf, 0, ptrs[1], 0, ptrs[0], 0, sz, &esz);
    576                 ptrs[1] += sz.width;
    577             }
    578             else
    579                 memcpy(ptrs[0], scbuf, blockSize);
    580             ptrs[0] += blockSize;
    581         }
    582     }
    583     return *this;
    584 }
    585 
    586 
    587 static void
    588 flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz )
    589 {
    590     int i, j, limit = (int)(((size.width + 1)/2)*esz);
    591     AutoBuffer<int> _tab(size.width*esz);
    592     int* tab = _tab;
    593 
    594     for( i = 0; i < size.width; i++ )
    595         for( size_t k = 0; k < esz; k++ )
    596             tab[i*esz + k] = (int)((size.width - i - 1)*esz + k);
    597 
    598     for( ; size.height--; src += sstep, dst += dstep )
    599     {
    600         for( i = 0; i < limit; i++ )
    601         {
    602             j = tab[i];
    603             uchar t0 = src[i], t1 = src[j];
    604             dst[i] = t1; dst[j] = t0;
    605         }
    606     }
    607 }
    608 
    609 static void
    610 flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size, size_t esz )
    611 {
    612     const uchar* src1 = src0 + (size.height - 1)*sstep;
    613     uchar* dst1 = dst0 + (size.height - 1)*dstep;
    614     size.width *= (int)esz;
    615 
    616     for( int y = 0; y < (size.height + 1)/2; y++, src0 += sstep, src1 -= sstep,
    617                                                   dst0 += dstep, dst1 -= dstep )
    618     {
    619         int i = 0;
    620         if( ((size_t)src0|(size_t)dst0|(size_t)src1|(size_t)dst1) % sizeof(int) == 0 )
    621         {
    622             for( ; i <= size.width - 16; i += 16 )
    623             {
    624                 int t0 = ((int*)(src0 + i))[0];
    625                 int t1 = ((int*)(src1 + i))[0];
    626 
    627                 ((int*)(dst0 + i))[0] = t1;
    628                 ((int*)(dst1 + i))[0] = t0;
    629 
    630                 t0 = ((int*)(src0 + i))[1];
    631                 t1 = ((int*)(src1 + i))[1];
    632 
    633                 ((int*)(dst0 + i))[1] = t1;
    634                 ((int*)(dst1 + i))[1] = t0;
    635 
    636                 t0 = ((int*)(src0 + i))[2];
    637                 t1 = ((int*)(src1 + i))[2];
    638 
    639                 ((int*)(dst0 + i))[2] = t1;
    640                 ((int*)(dst1 + i))[2] = t0;
    641 
    642                 t0 = ((int*)(src0 + i))[3];
    643                 t1 = ((int*)(src1 + i))[3];
    644 
    645                 ((int*)(dst0 + i))[3] = t1;
    646                 ((int*)(dst1 + i))[3] = t0;
    647             }
    648 
    649             for( ; i <= size.width - 4; i += 4 )
    650             {
    651                 int t0 = ((int*)(src0 + i))[0];
    652                 int t1 = ((int*)(src1 + i))[0];
    653 
    654                 ((int*)(dst0 + i))[0] = t1;
    655                 ((int*)(dst1 + i))[0] = t0;
    656             }
    657         }
    658 
    659         for( ; i < size.width; i++ )
    660         {
    661             uchar t0 = src0[i];
    662             uchar t1 = src1[i];
    663 
    664             dst0[i] = t1;
    665             dst1[i] = t0;
    666         }
    667     }
    668 }
    669 
    670 #ifdef HAVE_OPENCL
    671 
    672 enum { FLIP_COLS = 1 << 0, FLIP_ROWS = 1 << 1, FLIP_BOTH = FLIP_ROWS | FLIP_COLS };
    673 
    674 static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
    675 {
    676     CV_Assert(flipCode >= -1 && flipCode <= 1);
    677 
    678     const ocl::Device & dev = ocl::Device::getDefault();
    679     int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
    680             flipType, kercn = std::min(ocl::predictOptimalVectorWidth(_src, _dst), 4);
    681 
    682     bool doubleSupport = dev.doubleFPConfig() > 0;
    683     if (!doubleSupport && depth == CV_64F)
    684         kercn = cn;
    685 
    686     if (cn > 4)
    687         return false;
    688 
    689     const char * kernelName;
    690     if (flipCode == 0)
    691         kernelName = "arithm_flip_rows", flipType = FLIP_ROWS;
    692     else if (flipCode > 0)
    693         kernelName = "arithm_flip_cols", flipType = FLIP_COLS;
    694     else
    695         kernelName = "arithm_flip_rows_cols", flipType = FLIP_BOTH;
    696 
    697     int pxPerWIy = (dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1;
    698     kercn = (cn!=3 || flipType == FLIP_ROWS) ? std::max(kercn, cn) : cn;
    699 
    700     ocl::Kernel k(kernelName, ocl::core::flip_oclsrc,
    701         format( "-D T=%s -D T1=%s -D cn=%d -D PIX_PER_WI_Y=%d -D kercn=%d",
    702                 kercn != cn ? ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)) : ocl::vecopTypeToStr(CV_MAKE_TYPE(depth, kercn)),
    703                 kercn != cn ? ocl::typeToStr(depth) : ocl::vecopTypeToStr(depth), cn, pxPerWIy, kercn));
    704     if (k.empty())
    705         return false;
    706 
    707     Size size = _src.size();
    708     _dst.create(size, type);
    709     UMat src = _src.getUMat(), dst = _dst.getUMat();
    710 
    711     int cols = size.width * cn / kercn, rows = size.height;
    712     cols = flipType == FLIP_COLS ? (cols + 1) >> 1 : cols;
    713     rows = flipType & FLIP_ROWS ? (rows + 1) >> 1 : rows;
    714 
    715     k.args(ocl::KernelArg::ReadOnlyNoSize(src),
    716            ocl::KernelArg::WriteOnly(dst, cn, kercn), rows, cols);
    717 
    718     size_t maxWorkGroupSize = dev.maxWorkGroupSize();
    719     CV_Assert(maxWorkGroupSize % 4 == 0);
    720 
    721     size_t globalsize[2] = { cols, (rows + pxPerWIy - 1) / pxPerWIy },
    722             localsize[2] = { maxWorkGroupSize / 4, 4 };
    723     return k.run(2, globalsize, (flipType == FLIP_COLS) && !dev.isIntel() ? localsize : NULL, false);
    724 }
    725 
    726 #endif
    727 
    728 void flip( InputArray _src, OutputArray _dst, int flip_mode )
    729 {
    730     CV_Assert( _src.dims() <= 2 );
    731     Size size = _src.size();
    732 
    733     if (flip_mode < 0)
    734     {
    735         if (size.width == 1)
    736             flip_mode = 0;
    737         if (size.height == 1)
    738             flip_mode = 1;
    739     }
    740 
    741     if ((size.width == 1 && flip_mode > 0) ||
    742         (size.height == 1 && flip_mode == 0) ||
    743         (size.height == 1 && size.width == 1 && flip_mode < 0))
    744     {
    745         return _src.copyTo(_dst);
    746     }
    747 
    748     CV_OCL_RUN( _dst.isUMat(), ocl_flip(_src, _dst, flip_mode))
    749 
    750     Mat src = _src.getMat();
    751     int type = src.type();
    752     _dst.create( size, type );
    753     Mat dst = _dst.getMat();
    754     size_t esz = CV_ELEM_SIZE(type);
    755 
    756 #if defined HAVE_IPP
    757     CV_IPP_CHECK()
    758     {
    759         typedef IppStatus (CV_STDCALL * ippiMirror)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize, IppiAxis flip);
    760         typedef IppStatus (CV_STDCALL * ippiMirrorI)(const void * pSrcDst, int srcDstStep, IppiSize roiSize, IppiAxis flip);
    761         ippiMirror ippFunc = 0;
    762         ippiMirrorI ippFuncI = 0;
    763 
    764         if (src.data == dst.data)
    765         {
    766             CV_SUPPRESS_DEPRECATED_START
    767             ippFuncI =
    768                 type == CV_8UC1 ? (ippiMirrorI)ippiMirror_8u_C1IR :
    769                 type == CV_8UC3 ? (ippiMirrorI)ippiMirror_8u_C3IR :
    770                 type == CV_8UC4 ? (ippiMirrorI)ippiMirror_8u_C4IR :
    771                 type == CV_16UC1 ? (ippiMirrorI)ippiMirror_16u_C1IR :
    772                 type == CV_16UC3 ? (ippiMirrorI)ippiMirror_16u_C3IR :
    773                 type == CV_16UC4 ? (ippiMirrorI)ippiMirror_16u_C4IR :
    774                 type == CV_16SC1 ? (ippiMirrorI)ippiMirror_16s_C1IR :
    775                 type == CV_16SC3 ? (ippiMirrorI)ippiMirror_16s_C3IR :
    776                 type == CV_16SC4 ? (ippiMirrorI)ippiMirror_16s_C4IR :
    777                 type == CV_32SC1 ? (ippiMirrorI)ippiMirror_32s_C1IR :
    778                 type == CV_32SC3 ? (ippiMirrorI)ippiMirror_32s_C3IR :
    779                 type == CV_32SC4 ? (ippiMirrorI)ippiMirror_32s_C4IR :
    780                 type == CV_32FC1 ? (ippiMirrorI)ippiMirror_32f_C1IR :
    781                 type == CV_32FC3 ? (ippiMirrorI)ippiMirror_32f_C3IR :
    782                 type == CV_32FC4 ? (ippiMirrorI)ippiMirror_32f_C4IR : 0;
    783             CV_SUPPRESS_DEPRECATED_END
    784         }
    785         else
    786         {
    787             ippFunc =
    788                 type == CV_8UC1 ? (ippiMirror)ippiMirror_8u_C1R :
    789                 type == CV_8UC3 ? (ippiMirror)ippiMirror_8u_C3R :
    790                 type == CV_8UC4 ? (ippiMirror)ippiMirror_8u_C4R :
    791                 type == CV_16UC1 ? (ippiMirror)ippiMirror_16u_C1R :
    792                 type == CV_16UC3 ? (ippiMirror)ippiMirror_16u_C3R :
    793                 type == CV_16UC4 ? (ippiMirror)ippiMirror_16u_C4R :
    794                 type == CV_16SC1 ? (ippiMirror)ippiMirror_16s_C1R :
    795                 type == CV_16SC3 ? (ippiMirror)ippiMirror_16s_C3R :
    796                 type == CV_16SC4 ? (ippiMirror)ippiMirror_16s_C4R :
    797                 type == CV_32SC1 ? (ippiMirror)ippiMirror_32s_C1R :
    798                 type == CV_32SC3 ? (ippiMirror)ippiMirror_32s_C3R :
    799                 type == CV_32SC4 ? (ippiMirror)ippiMirror_32s_C4R :
    800                 type == CV_32FC1 ? (ippiMirror)ippiMirror_32f_C1R :
    801                 type == CV_32FC3 ? (ippiMirror)ippiMirror_32f_C3R :
    802                 type == CV_32FC4 ? (ippiMirror)ippiMirror_32f_C4R : 0;
    803         }
    804         IppiAxis axis = flip_mode == 0 ? ippAxsHorizontal :
    805             flip_mode > 0 ? ippAxsVertical : ippAxsBoth;
    806         IppiSize roisize = { dst.cols, dst.rows };
    807 
    808         if (ippFunc != 0)
    809         {
    810             if (ippFunc(src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, ippiSize(src.cols, src.rows), axis) >= 0)
    811             {
    812                 CV_IMPL_ADD(CV_IMPL_IPP);
    813                 return;
    814             }
    815             setIppErrorStatus();
    816         }
    817         else if (ippFuncI != 0)
    818         {
    819             if (ippFuncI(dst.ptr(), (int)dst.step, roisize, axis) >= 0)
    820             {
    821                 CV_IMPL_ADD(CV_IMPL_IPP);
    822                 return;
    823             }
    824             setIppErrorStatus();
    825         }
    826     }
    827 #endif
    828 
    829     if( flip_mode <= 0 )
    830         flipVert( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz );
    831     else
    832         flipHoriz( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz );
    833 
    834     if( flip_mode < 0 )
    835         flipHoriz( dst.ptr(), dst.step, dst.ptr(), dst.step, dst.size(), esz );
    836 }
    837 
    838 #if defined HAVE_OPENCL && !defined __APPLE__
    839 
    840 static bool ocl_repeat(InputArray _src, int ny, int nx, OutputArray _dst)
    841 {
    842     if (ny == 1 && nx == 1)
    843     {
    844         _src.copyTo(_dst);
    845         return true;
    846     }
    847 
    848     int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
    849             rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1,
    850             kercn = ocl::predictOptimalVectorWidth(_src, _dst);
    851 
    852     ocl::Kernel k("repeat", ocl::core::repeat_oclsrc,
    853                   format("-D T=%s -D nx=%d -D ny=%d -D rowsPerWI=%d -D cn=%d",
    854                          ocl::memopTypeToStr(CV_MAKE_TYPE(depth, kercn)),
    855                          nx, ny, rowsPerWI, kercn));
    856     if (k.empty())
    857         return false;
    858 
    859     UMat src = _src.getUMat(), dst = _dst.getUMat();
    860     k.args(ocl::KernelArg::ReadOnly(src, cn, kercn), ocl::KernelArg::WriteOnlyNoSize(dst));
    861 
    862     size_t globalsize[] = { src.cols * cn / kercn, (src.rows + rowsPerWI - 1) / rowsPerWI };
    863     return k.run(2, globalsize, NULL, false);
    864 }
    865 
    866 #endif
    867 
    868 void repeat(InputArray _src, int ny, int nx, OutputArray _dst)
    869 {
    870     CV_Assert( _src.dims() <= 2 );
    871     CV_Assert( ny > 0 && nx > 0 );
    872 
    873     Size ssize = _src.size();
    874     _dst.create(ssize.height*ny, ssize.width*nx, _src.type());
    875 
    876 #if !defined __APPLE__
    877     CV_OCL_RUN(_dst.isUMat(),
    878                ocl_repeat(_src, ny, nx, _dst))
    879 #endif
    880 
    881     Mat src = _src.getMat(), dst = _dst.getMat();
    882     Size dsize = dst.size();
    883     int esz = (int)src.elemSize();
    884     int x, y;
    885     ssize.width *= esz; dsize.width *= esz;
    886 
    887     for( y = 0; y < ssize.height; y++ )
    888     {
    889         for( x = 0; x < dsize.width; x += ssize.width )
    890             memcpy( dst.ptr(y) + x, src.ptr(y), ssize.width );
    891     }
    892 
    893     for( ; y < dsize.height; y++ )
    894         memcpy( dst.ptr(y), dst.ptr(y - ssize.height), dsize.width );
    895 }
    896 
    897 Mat repeat(const Mat& src, int ny, int nx)
    898 {
    899     if( nx == 1 && ny == 1 )
    900         return src;
    901     Mat dst;
    902     repeat(src, ny, nx, dst);
    903     return dst;
    904 }
    905 
    906 
    907 } // cv
    908 
    909 
    910 /*
    911  Various border types, image boundaries are denoted with '|'
    912 
    913  * BORDER_REPLICATE:     aaaaaa|abcdefgh|hhhhhhh
    914  * BORDER_REFLECT:       fedcba|abcdefgh|hgfedcb
    915  * BORDER_REFLECT_101:   gfedcb|abcdefgh|gfedcba
    916  * BORDER_WRAP:          cdefgh|abcdefgh|abcdefg
    917  * BORDER_CONSTANT:      iiiiii|abcdefgh|iiiiiii  with some specified 'i'
    918  */
    919 int cv::borderInterpolate( int p, int len, int borderType )
    920 {
    921     if( (unsigned)p < (unsigned)len )
    922         ;
    923     else if( borderType == BORDER_REPLICATE )
    924         p = p < 0 ? 0 : len - 1;
    925     else if( borderType == BORDER_REFLECT || borderType == BORDER_REFLECT_101 )
    926     {
    927         int delta = borderType == BORDER_REFLECT_101;
    928         if( len == 1 )
    929             return 0;
    930         do
    931         {
    932             if( p < 0 )
    933                 p = -p - 1 + delta;
    934             else
    935                 p = len - 1 - (p - len) - delta;
    936         }
    937         while( (unsigned)p >= (unsigned)len );
    938     }
    939     else if( borderType == BORDER_WRAP )
    940     {
    941         CV_Assert(len > 0);
    942         if( p < 0 )
    943             p -= ((p-len+1)/len)*len;
    944         if( p >= len )
    945             p %= len;
    946     }
    947     else if( borderType == BORDER_CONSTANT )
    948         p = -1;
    949     else
    950         CV_Error( CV_StsBadArg, "Unknown/unsupported border type" );
    951     return p;
    952 }
    953 
    954 namespace
    955 {
    956 
    957 void copyMakeBorder_8u( const uchar* src, size_t srcstep, cv::Size srcroi,
    958                         uchar* dst, size_t dststep, cv::Size dstroi,
    959                         int top, int left, int cn, int borderType )
    960 {
    961     const int isz = (int)sizeof(int);
    962     int i, j, k, elemSize = 1;
    963     bool intMode = false;
    964 
    965     if( (cn | srcstep | dststep | (size_t)src | (size_t)dst) % isz == 0 )
    966     {
    967         cn /= isz;
    968         elemSize = isz;
    969         intMode = true;
    970     }
    971 
    972     cv::AutoBuffer<int> _tab((dstroi.width - srcroi.width)*cn);
    973     int* tab = _tab;
    974     int right = dstroi.width - srcroi.width - left;
    975     int bottom = dstroi.height - srcroi.height - top;
    976 
    977     for( i = 0; i < left; i++ )
    978     {
    979         j = cv::borderInterpolate(i - left, srcroi.width, borderType)*cn;
    980         for( k = 0; k < cn; k++ )
    981             tab[i*cn + k] = j + k;
    982     }
    983 
    984     for( i = 0; i < right; i++ )
    985     {
    986         j = cv::borderInterpolate(srcroi.width + i, srcroi.width, borderType)*cn;
    987         for( k = 0; k < cn; k++ )
    988             tab[(i+left)*cn + k] = j + k;
    989     }
    990 
    991     srcroi.width *= cn;
    992     dstroi.width *= cn;
    993     left *= cn;
    994     right *= cn;
    995 
    996     uchar* dstInner = dst + dststep*top + left*elemSize;
    997 
    998     for( i = 0; i < srcroi.height; i++, dstInner += dststep, src += srcstep )
    999     {
   1000         if( dstInner != src )
   1001             memcpy(dstInner, src, srcroi.width*elemSize);
   1002 
   1003         if( intMode )
   1004         {
   1005             const int* isrc = (int*)src;
   1006             int* idstInner = (int*)dstInner;
   1007             for( j = 0; j < left; j++ )
   1008                 idstInner[j - left] = isrc[tab[j]];
   1009             for( j = 0; j < right; j++ )
   1010                 idstInner[j + srcroi.width] = isrc[tab[j + left]];
   1011         }
   1012         else
   1013         {
   1014             for( j = 0; j < left; j++ )
   1015                 dstInner[j - left] = src[tab[j]];
   1016             for( j = 0; j < right; j++ )
   1017                 dstInner[j + srcroi.width] = src[tab[j + left]];
   1018         }
   1019     }
   1020 
   1021     dstroi.width *= elemSize;
   1022     dst += dststep*top;
   1023 
   1024     for( i = 0; i < top; i++ )
   1025     {
   1026         j = cv::borderInterpolate(i - top, srcroi.height, borderType);
   1027         memcpy(dst + (i - top)*dststep, dst + j*dststep, dstroi.width);
   1028     }
   1029 
   1030     for( i = 0; i < bottom; i++ )
   1031     {
   1032         j = cv::borderInterpolate(i + srcroi.height, srcroi.height, borderType);
   1033         memcpy(dst + (i + srcroi.height)*dststep, dst + j*dststep, dstroi.width);
   1034     }
   1035 }
   1036 
   1037 
   1038 void copyMakeConstBorder_8u( const uchar* src, size_t srcstep, cv::Size srcroi,
   1039                              uchar* dst, size_t dststep, cv::Size dstroi,
   1040                              int top, int left, int cn, const uchar* value )
   1041 {
   1042     int i, j;
   1043     cv::AutoBuffer<uchar> _constBuf(dstroi.width*cn);
   1044     uchar* constBuf = _constBuf;
   1045     int right = dstroi.width - srcroi.width - left;
   1046     int bottom = dstroi.height - srcroi.height - top;
   1047 
   1048     for( i = 0; i < dstroi.width; i++ )
   1049     {
   1050         for( j = 0; j < cn; j++ )
   1051             constBuf[i*cn + j] = value[j];
   1052     }
   1053 
   1054     srcroi.width *= cn;
   1055     dstroi.width *= cn;
   1056     left *= cn;
   1057     right *= cn;
   1058 
   1059     uchar* dstInner = dst + dststep*top + left;
   1060 
   1061     for( i = 0; i < srcroi.height; i++, dstInner += dststep, src += srcstep )
   1062     {
   1063         if( dstInner != src )
   1064             memcpy( dstInner, src, srcroi.width );
   1065         memcpy( dstInner - left, constBuf, left );
   1066         memcpy( dstInner + srcroi.width, constBuf, right );
   1067     }
   1068 
   1069     dst += dststep*top;
   1070 
   1071     for( i = 0; i < top; i++ )
   1072         memcpy(dst + (i - top)*dststep, constBuf, dstroi.width);
   1073 
   1074     for( i = 0; i < bottom; i++ )
   1075         memcpy(dst + (i + srcroi.height)*dststep, constBuf, dstroi.width);
   1076 }
   1077 
   1078 }
   1079 
   1080 #ifdef HAVE_OPENCL
   1081 
   1082 namespace cv {
   1083 
   1084 static bool ocl_copyMakeBorder( InputArray _src, OutputArray _dst, int top, int bottom,
   1085                                 int left, int right, int borderType, const Scalar& value )
   1086 {
   1087     int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type),
   1088             rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
   1089     bool isolated = (borderType & BORDER_ISOLATED) != 0;
   1090     borderType &= ~cv::BORDER_ISOLATED;
   1091 
   1092     if ( !(borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE || borderType == BORDER_REFLECT ||
   1093            borderType == BORDER_WRAP || borderType == BORDER_REFLECT_101) ||
   1094          cn > 4)
   1095         return false;
   1096 
   1097     const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101" };
   1098     int scalarcn = cn == 3 ? 4 : cn;
   1099     int sctype = CV_MAKETYPE(depth, scalarcn);
   1100     String buildOptions = format("-D T=%s -D %s -D T1=%s -D cn=%d -D ST=%s -D rowsPerWI=%d",
   1101                                  ocl::memopTypeToStr(type), borderMap[borderType],
   1102                                  ocl::memopTypeToStr(depth), cn,
   1103                                  ocl::memopTypeToStr(sctype), rowsPerWI);
   1104 
   1105     ocl::Kernel k("copyMakeBorder", ocl::core::copymakeborder_oclsrc, buildOptions);
   1106     if (k.empty())
   1107         return false;
   1108 
   1109     UMat src = _src.getUMat();
   1110     if( src.isSubmatrix() && !isolated )
   1111     {
   1112         Size wholeSize;
   1113         Point ofs;
   1114         src.locateROI(wholeSize, ofs);
   1115         int dtop = std::min(ofs.y, top);
   1116         int dbottom = std::min(wholeSize.height - src.rows - ofs.y, bottom);
   1117         int dleft = std::min(ofs.x, left);
   1118         int dright = std::min(wholeSize.width - src.cols - ofs.x, right);
   1119         src.adjustROI(dtop, dbottom, dleft, dright);
   1120         top -= dtop;
   1121         left -= dleft;
   1122         bottom -= dbottom;
   1123         right -= dright;
   1124     }
   1125 
   1126     _dst.create(src.rows + top + bottom, src.cols + left + right, type);
   1127     UMat dst = _dst.getUMat();
   1128 
   1129     if (top == 0 && left == 0 && bottom == 0 && right == 0)
   1130     {
   1131         if(src.u != dst.u || src.step != dst.step)
   1132             src.copyTo(dst);
   1133         return true;
   1134     }
   1135 
   1136     k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst),
   1137            top, left, ocl::KernelArg::Constant(Mat(1, 1, sctype, value)));
   1138 
   1139     size_t globalsize[2] = { dst.cols, (dst.rows + rowsPerWI - 1) / rowsPerWI };
   1140     return k.run(2, globalsize, NULL, false);
   1141 }
   1142 
   1143 }
   1144 
   1145 #endif
   1146 
   1147 void cv::copyMakeBorder( InputArray _src, OutputArray _dst, int top, int bottom,
   1148                          int left, int right, int borderType, const Scalar& value )
   1149 {
   1150     CV_Assert( top >= 0 && bottom >= 0 && left >= 0 && right >= 0 );
   1151 
   1152     CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2,
   1153                ocl_copyMakeBorder(_src, _dst, top, bottom, left, right, borderType, value))
   1154 
   1155     Mat src = _src.getMat();
   1156     int type = src.type();
   1157 
   1158     if( src.isSubmatrix() && (borderType & BORDER_ISOLATED) == 0 )
   1159     {
   1160         Size wholeSize;
   1161         Point ofs;
   1162         src.locateROI(wholeSize, ofs);
   1163         int dtop = std::min(ofs.y, top);
   1164         int dbottom = std::min(wholeSize.height - src.rows - ofs.y, bottom);
   1165         int dleft = std::min(ofs.x, left);
   1166         int dright = std::min(wholeSize.width - src.cols - ofs.x, right);
   1167         src.adjustROI(dtop, dbottom, dleft, dright);
   1168         top -= dtop;
   1169         left -= dleft;
   1170         bottom -= dbottom;
   1171         right -= dright;
   1172     }
   1173 
   1174     _dst.create( src.rows + top + bottom, src.cols + left + right, type );
   1175     Mat dst = _dst.getMat();
   1176 
   1177     if(top == 0 && left == 0 && bottom == 0 && right == 0)
   1178     {
   1179         if(src.data != dst.data || src.step != dst.step)
   1180             src.copyTo(dst);
   1181         return;
   1182     }
   1183 
   1184     borderType &= ~BORDER_ISOLATED;
   1185 
   1186 #if defined HAVE_IPP && 0
   1187     CV_IPP_CHECK()
   1188     {
   1189         typedef IppStatus (CV_STDCALL * ippiCopyMakeBorder)(const void * pSrc, int srcStep, IppiSize srcRoiSize, void * pDst,
   1190                                                             int dstStep, IppiSize dstRoiSize, int topBorderHeight, int leftBorderWidth);
   1191         typedef IppStatus (CV_STDCALL * ippiCopyMakeBorderI)(const void * pSrc, int srcDstStep, IppiSize srcRoiSize, IppiSize dstRoiSize,
   1192                                                              int topBorderHeight, int leftborderwidth);
   1193         typedef IppStatus (CV_STDCALL * ippiCopyConstBorder)(const void * pSrc, int srcStep, IppiSize srcRoiSize, void * pDst, int dstStep,
   1194                                                              IppiSize dstRoiSize, int topBorderHeight, int leftBorderWidth, void * value);
   1195 
   1196         IppiSize srcRoiSize = { src.cols, src.rows }, dstRoiSize = { dst.cols, dst.rows };
   1197         ippiCopyMakeBorder ippFunc = 0;
   1198         ippiCopyMakeBorderI ippFuncI = 0;
   1199         ippiCopyConstBorder ippFuncConst = 0;
   1200         bool inplace = dst.datastart == src.datastart;
   1201 
   1202         if (borderType == BORDER_CONSTANT)
   1203         {
   1204              ippFuncConst =
   1205     //             type == CV_8UC1 ? (ippiCopyConstBorder)ippiCopyConstBorder_8u_C1R : bug in IPP 8.1
   1206                  type == CV_16UC1 ? (ippiCopyConstBorder)ippiCopyConstBorder_16u_C1R :
   1207     //             type == CV_16SC1 ? (ippiCopyConstBorder)ippiCopyConstBorder_16s_C1R : bug in IPP 8.1
   1208     //             type == CV_32SC1 ? (ippiCopyConstBorder)ippiCopyConstBorder_32s_C1R : bug in IPP 8.1
   1209     //             type == CV_32FC1 ? (ippiCopyConstBorder)ippiCopyConstBorder_32f_C1R : bug in IPP 8.1
   1210                  type == CV_8UC3 ? (ippiCopyConstBorder)ippiCopyConstBorder_8u_C3R :
   1211                  type == CV_16UC3 ? (ippiCopyConstBorder)ippiCopyConstBorder_16u_C3R :
   1212                  type == CV_16SC3 ? (ippiCopyConstBorder)ippiCopyConstBorder_16s_C3R :
   1213                  type == CV_32SC3 ? (ippiCopyConstBorder)ippiCopyConstBorder_32s_C3R :
   1214                  type == CV_32FC3 ? (ippiCopyConstBorder)ippiCopyConstBorder_32f_C3R :
   1215                  type == CV_8UC4 ? (ippiCopyConstBorder)ippiCopyConstBorder_8u_C4R :
   1216                  type == CV_16UC4 ? (ippiCopyConstBorder)ippiCopyConstBorder_16u_C4R :
   1217                  type == CV_16SC4 ? (ippiCopyConstBorder)ippiCopyConstBorder_16s_C4R :
   1218                  type == CV_32SC4 ? (ippiCopyConstBorder)ippiCopyConstBorder_32s_C4R :
   1219                  type == CV_32FC4 ? (ippiCopyConstBorder)ippiCopyConstBorder_32f_C4R : 0;
   1220         }
   1221         else if (borderType == BORDER_WRAP)
   1222         {
   1223             if (inplace)
   1224             {
   1225                 CV_SUPPRESS_DEPRECATED_START
   1226                 ippFuncI =
   1227                     type == CV_32SC1 ? (ippiCopyMakeBorderI)ippiCopyWrapBorder_32s_C1IR :
   1228                     type == CV_32FC1 ? (ippiCopyMakeBorderI)ippiCopyWrapBorder_32s_C1IR : 0;
   1229                 CV_SUPPRESS_DEPRECATED_END
   1230             }
   1231             else
   1232             {
   1233                 ippFunc =
   1234                     type == CV_32SC1 ? (ippiCopyMakeBorder)ippiCopyWrapBorder_32s_C1R :
   1235                     type == CV_32FC1 ? (ippiCopyMakeBorder)ippiCopyWrapBorder_32s_C1R : 0;
   1236             }
   1237         }
   1238         else if (borderType == BORDER_REPLICATE)
   1239         {
   1240             if (inplace)
   1241             {
   1242                 CV_SUPPRESS_DEPRECATED_START
   1243                 ippFuncI =
   1244                     type == CV_8UC1 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_8u_C1IR :
   1245                     type == CV_16UC1 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_16u_C1IR :
   1246                     type == CV_16SC1 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_16s_C1IR :
   1247                     type == CV_32SC1 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_32s_C1IR :
   1248                     type == CV_32FC1 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_32f_C1IR :
   1249                     type == CV_8UC3 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_8u_C3IR :
   1250                     type == CV_16UC3 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_16u_C3IR :
   1251                     type == CV_16SC3 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_16s_C3IR :
   1252                     type == CV_32SC3 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_32s_C3IR :
   1253                     type == CV_32FC3 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_32f_C3IR :
   1254                     type == CV_8UC4 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_8u_C4IR :
   1255                     type == CV_16UC4 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_16u_C4IR :
   1256                     type == CV_16SC4 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_16s_C4IR :
   1257                     type == CV_32SC4 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_32s_C4IR :
   1258                     type == CV_32FC4 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_32f_C4IR : 0;
   1259                 CV_SUPPRESS_DEPRECATED_END
   1260             }
   1261             else
   1262             {
   1263                  ippFunc =
   1264                      type == CV_8UC1 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_8u_C1R :
   1265                      type == CV_16UC1 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_16u_C1R :
   1266                      type == CV_16SC1 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_16s_C1R :
   1267                      type == CV_32SC1 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_32s_C1R :
   1268                      type == CV_32FC1 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_32f_C1R :
   1269                      type == CV_8UC3 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_8u_C3R :
   1270                      type == CV_16UC3 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_16u_C3R :
   1271                      type == CV_16SC3 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_16s_C3R :
   1272                      type == CV_32SC3 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_32s_C3R :
   1273                      type == CV_32FC3 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_32f_C3R :
   1274                      type == CV_8UC4 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_8u_C4R :
   1275                      type == CV_16UC4 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_16u_C4R :
   1276                      type == CV_16SC4 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_16s_C4R :
   1277                      type == CV_32SC4 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_32s_C4R :
   1278                      type == CV_32FC4 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_32f_C4R : 0;
   1279             }
   1280         }
   1281 
   1282         if (ippFunc || ippFuncI || ippFuncConst)
   1283         {
   1284             uchar scbuf[32];
   1285             scalarToRawData(value, scbuf, type);
   1286 
   1287             if ( (ippFunc && ippFunc(src.data, (int)src.step, srcRoiSize, dst.data, (int)dst.step, dstRoiSize, top, left) >= 0) ||
   1288                  (ippFuncI && ippFuncI(src.data, (int)src.step, srcRoiSize, dstRoiSize, top, left) >= 0) ||
   1289                  (ippFuncConst && ippFuncConst(src.data, (int)src.step, srcRoiSize, dst.data, (int)dst.step,
   1290                                                dstRoiSize, top, left, scbuf) >= 0))
   1291             {
   1292                 CV_IMPL_ADD(CV_IMPL_IPP);
   1293                 return;
   1294             }
   1295 
   1296             setIppErrorStatus();
   1297         }
   1298     }
   1299 #endif
   1300 
   1301     if( borderType != BORDER_CONSTANT )
   1302         copyMakeBorder_8u( src.ptr(), src.step, src.size(),
   1303                            dst.ptr(), dst.step, dst.size(),
   1304                            top, left, (int)src.elemSize(), borderType );
   1305     else
   1306     {
   1307         int cn = src.channels(), cn1 = cn;
   1308         AutoBuffer<double> buf(cn);
   1309         if( cn > 4 )
   1310         {
   1311             CV_Assert( value[0] == value[1] && value[0] == value[2] && value[0] == value[3] );
   1312             cn1 = 1;
   1313         }
   1314         scalarToRawData(value, buf, CV_MAKETYPE(src.depth(), cn1), cn);
   1315         copyMakeConstBorder_8u( src.ptr(), src.step, src.size(),
   1316                                 dst.ptr(), dst.step, dst.size(),
   1317                                 top, left, (int)src.elemSize(), (uchar*)(double*)buf );
   1318     }
   1319 }
   1320 
   1321 /* dst = src */
   1322 CV_IMPL void
   1323 cvCopy( const void* srcarr, void* dstarr, const void* maskarr )
   1324 {
   1325     if( CV_IS_SPARSE_MAT(srcarr) && CV_IS_SPARSE_MAT(dstarr))
   1326     {
   1327         CV_Assert( maskarr == 0 );
   1328         CvSparseMat* src1 = (CvSparseMat*)srcarr;
   1329         CvSparseMat* dst1 = (CvSparseMat*)dstarr;
   1330         CvSparseMatIterator iterator;
   1331         CvSparseNode* node;
   1332 
   1333         dst1->dims = src1->dims;
   1334         memcpy( dst1->size, src1->size, src1->dims*sizeof(src1->size[0]));
   1335         dst1->valoffset = src1->valoffset;
   1336         dst1->idxoffset = src1->idxoffset;
   1337         cvClearSet( dst1->heap );
   1338 
   1339         if( src1->heap->active_count >= dst1->hashsize*CV_SPARSE_HASH_RATIO )
   1340         {
   1341             cvFree( &dst1->hashtable );
   1342             dst1->hashsize = src1->hashsize;
   1343             dst1->hashtable =
   1344                 (void**)cvAlloc( dst1->hashsize*sizeof(dst1->hashtable[0]));
   1345         }
   1346 
   1347         memset( dst1->hashtable, 0, dst1->hashsize*sizeof(dst1->hashtable[0]));
   1348 
   1349         for( node = cvInitSparseMatIterator( src1, &iterator );
   1350              node != 0; node = cvGetNextSparseNode( &iterator ))
   1351         {
   1352             CvSparseNode* node_copy = (CvSparseNode*)cvSetNew( dst1->heap );
   1353             int tabidx = node->hashval & (dst1->hashsize - 1);
   1354             memcpy( node_copy, node, dst1->heap->elem_size );
   1355             node_copy->next = (CvSparseNode*)dst1->hashtable[tabidx];
   1356             dst1->hashtable[tabidx] = node_copy;
   1357         }
   1358         return;
   1359     }
   1360     cv::Mat src = cv::cvarrToMat(srcarr, false, true, 1), dst = cv::cvarrToMat(dstarr, false, true, 1);
   1361     CV_Assert( src.depth() == dst.depth() && src.size == dst.size );
   1362 
   1363     int coi1 = 0, coi2 = 0;
   1364     if( CV_IS_IMAGE(srcarr) )
   1365         coi1 = cvGetImageCOI((const IplImage*)srcarr);
   1366     if( CV_IS_IMAGE(dstarr) )
   1367         coi2 = cvGetImageCOI((const IplImage*)dstarr);
   1368 
   1369     if( coi1 || coi2 )
   1370     {
   1371         CV_Assert( (coi1 != 0 || src.channels() == 1) &&
   1372             (coi2 != 0 || dst.channels() == 1) );
   1373 
   1374         int pair[] = { std::max(coi1-1, 0), std::max(coi2-1, 0) };
   1375         cv::mixChannels( &src, 1, &dst, 1, pair, 1 );
   1376         return;
   1377     }
   1378     else
   1379         CV_Assert( src.channels() == dst.channels() );
   1380 
   1381     if( !maskarr )
   1382         src.copyTo(dst);
   1383     else
   1384         src.copyTo(dst, cv::cvarrToMat(maskarr));
   1385 }
   1386 
   1387 CV_IMPL void
   1388 cvSet( void* arr, CvScalar value, const void* maskarr )
   1389 {
   1390     cv::Mat m = cv::cvarrToMat(arr);
   1391     if( !maskarr )
   1392         m = value;
   1393     else
   1394         m.setTo(cv::Scalar(value), cv::cvarrToMat(maskarr));
   1395 }
   1396 
   1397 CV_IMPL void
   1398 cvSetZero( CvArr* arr )
   1399 {
   1400     if( CV_IS_SPARSE_MAT(arr) )
   1401     {
   1402         CvSparseMat* mat1 = (CvSparseMat*)arr;
   1403         cvClearSet( mat1->heap );
   1404         if( mat1->hashtable )
   1405             memset( mat1->hashtable, 0, mat1->hashsize*sizeof(mat1->hashtable[0]));
   1406         return;
   1407     }
   1408     cv::Mat m = cv::cvarrToMat(arr);
   1409     m = cv::Scalar(0);
   1410 }
   1411 
   1412 CV_IMPL void
   1413 cvFlip( const CvArr* srcarr, CvArr* dstarr, int flip_mode )
   1414 {
   1415     cv::Mat src = cv::cvarrToMat(srcarr);
   1416     cv::Mat dst;
   1417 
   1418     if (!dstarr)
   1419       dst = src;
   1420     else
   1421       dst = cv::cvarrToMat(dstarr);
   1422 
   1423     CV_Assert( src.type() == dst.type() && src.size() == dst.size() );
   1424     cv::flip( src, dst, flip_mode );
   1425 }
   1426 
   1427 CV_IMPL void
   1428 cvRepeat( const CvArr* srcarr, CvArr* dstarr )
   1429 {
   1430     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
   1431     CV_Assert( src.type() == dst.type() &&
   1432         dst.rows % src.rows == 0 && dst.cols % src.cols == 0 );
   1433     cv::repeat(src, dst.rows/src.rows, dst.cols/src.cols, dst);
   1434 }
   1435 
   1436 /* End of file. */
   1437