Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #include "precomp.hpp"
     44 #include "opencl_kernels_imgproc.hpp"
     45 
     46 namespace cv
     47 {
     48 
     49 static void
     50 thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
     51 {
     52     int i, j, j_scalar = 0;
     53     uchar tab[256];
     54     Size roi = _src.size();
     55     roi.width *= _src.channels();
     56     size_t src_step = _src.step;
     57     size_t dst_step = _dst.step;
     58 
     59     if( _src.isContinuous() && _dst.isContinuous() )
     60     {
     61         roi.width *= roi.height;
     62         roi.height = 1;
     63         src_step = dst_step = roi.width;
     64     }
     65 
     66 #ifdef HAVE_TEGRA_OPTIMIZATION
     67     if (tegra::useTegra() && tegra::thresh_8u(_src, _dst, roi.width, roi.height, thresh, maxval, type))
     68         return;
     69 #endif
     70 
     71 #if defined(HAVE_IPP)
     72     CV_IPP_CHECK()
     73     {
     74         IppiSize sz = { roi.width, roi.height };
     75         CV_SUPPRESS_DEPRECATED_START
     76         switch( type )
     77         {
     78         case THRESH_TRUNC:
     79 #ifndef HAVE_IPP_ICV_ONLY
     80             if (_src.data == _dst.data && ippiThreshold_GT_8u_C1IR(_dst.ptr(), (int)dst_step, sz, thresh) >= 0)
     81             {
     82                 CV_IMPL_ADD(CV_IMPL_IPP);
     83                 return;
     84             }
     85 #endif
     86             if (ippiThreshold_GT_8u_C1R(_src.ptr(), (int)src_step, _dst.ptr(), (int)dst_step, sz, thresh) >= 0)
     87             {
     88                 CV_IMPL_ADD(CV_IMPL_IPP);
     89                 return;
     90             }
     91             setIppErrorStatus();
     92             break;
     93         case THRESH_TOZERO:
     94 #ifndef HAVE_IPP_ICV_ONLY
     95             if (_src.data == _dst.data && ippiThreshold_LTVal_8u_C1IR(_dst.ptr(), (int)dst_step, sz, thresh+1, 0) >= 0)
     96             {
     97                 CV_IMPL_ADD(CV_IMPL_IPP);
     98                 return;
     99             }
    100 #endif
    101             if (ippiThreshold_LTVal_8u_C1R(_src.ptr(), (int)src_step, _dst.ptr(), (int)dst_step, sz, thresh+1, 0) >= 0)
    102             {
    103                 CV_IMPL_ADD(CV_IMPL_IPP);
    104                 return;
    105             }
    106             setIppErrorStatus();
    107             break;
    108         case THRESH_TOZERO_INV:
    109 #ifndef HAVE_IPP_ICV_ONLY
    110             if (_src.data == _dst.data && ippiThreshold_GTVal_8u_C1IR(_dst.ptr(), (int)dst_step, sz, thresh, 0) >= 0)
    111             {
    112                 CV_IMPL_ADD(CV_IMPL_IPP);
    113                 return;
    114             }
    115 #endif
    116             if (ippiThreshold_GTVal_8u_C1R(_src.ptr(), (int)src_step, _dst.ptr(), (int)dst_step, sz, thresh, 0) >= 0)
    117             {
    118                 CV_IMPL_ADD(CV_IMPL_IPP);
    119                 return;
    120             }
    121             setIppErrorStatus();
    122             break;
    123         }
    124         CV_SUPPRESS_DEPRECATED_END
    125     }
    126 #endif
    127 
    128     switch( type )
    129     {
    130     case THRESH_BINARY:
    131         for( i = 0; i <= thresh; i++ )
    132             tab[i] = 0;
    133         for( ; i < 256; i++ )
    134             tab[i] = maxval;
    135         break;
    136     case THRESH_BINARY_INV:
    137         for( i = 0; i <= thresh; i++ )
    138             tab[i] = maxval;
    139         for( ; i < 256; i++ )
    140             tab[i] = 0;
    141         break;
    142     case THRESH_TRUNC:
    143         for( i = 0; i <= thresh; i++ )
    144             tab[i] = (uchar)i;
    145         for( ; i < 256; i++ )
    146             tab[i] = thresh;
    147         break;
    148     case THRESH_TOZERO:
    149         for( i = 0; i <= thresh; i++ )
    150             tab[i] = 0;
    151         for( ; i < 256; i++ )
    152             tab[i] = (uchar)i;
    153         break;
    154     case THRESH_TOZERO_INV:
    155         for( i = 0; i <= thresh; i++ )
    156             tab[i] = (uchar)i;
    157         for( ; i < 256; i++ )
    158             tab[i] = 0;
    159         break;
    160     default:
    161         CV_Error( CV_StsBadArg, "Unknown threshold type" );
    162     }
    163 
    164 #if CV_SSE2
    165     if( checkHardwareSupport(CV_CPU_SSE2) )
    166     {
    167         __m128i _x80 = _mm_set1_epi8('\x80');
    168         __m128i thresh_u = _mm_set1_epi8(thresh);
    169         __m128i thresh_s = _mm_set1_epi8(thresh ^ 0x80);
    170         __m128i maxval_ = _mm_set1_epi8(maxval);
    171         j_scalar = roi.width & -8;
    172 
    173         for( i = 0; i < roi.height; i++ )
    174         {
    175             const uchar* src = _src.ptr() + src_step*i;
    176             uchar* dst = _dst.ptr() + dst_step*i;
    177 
    178             switch( type )
    179             {
    180             case THRESH_BINARY:
    181                 for( j = 0; j <= roi.width - 32; j += 32 )
    182                 {
    183                     __m128i v0, v1;
    184                     v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
    185                     v1 = _mm_loadu_si128( (const __m128i*)(src + j + 16) );
    186                     v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
    187                     v1 = _mm_cmpgt_epi8( _mm_xor_si128(v1, _x80), thresh_s );
    188                     v0 = _mm_and_si128( v0, maxval_ );
    189                     v1 = _mm_and_si128( v1, maxval_ );
    190                     _mm_storeu_si128( (__m128i*)(dst + j), v0 );
    191                     _mm_storeu_si128( (__m128i*)(dst + j + 16), v1 );
    192                 }
    193 
    194                 for( ; j <= roi.width - 8; j += 8 )
    195                 {
    196                     __m128i v0 = _mm_loadl_epi64( (const __m128i*)(src + j) );
    197                     v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
    198                     v0 = _mm_and_si128( v0, maxval_ );
    199                     _mm_storel_epi64( (__m128i*)(dst + j), v0 );
    200                 }
    201                 break;
    202 
    203             case THRESH_BINARY_INV:
    204                 for( j = 0; j <= roi.width - 32; j += 32 )
    205                 {
    206                     __m128i v0, v1;
    207                     v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
    208                     v1 = _mm_loadu_si128( (const __m128i*)(src + j + 16) );
    209                     v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
    210                     v1 = _mm_cmpgt_epi8( _mm_xor_si128(v1, _x80), thresh_s );
    211                     v0 = _mm_andnot_si128( v0, maxval_ );
    212                     v1 = _mm_andnot_si128( v1, maxval_ );
    213                     _mm_storeu_si128( (__m128i*)(dst + j), v0 );
    214                     _mm_storeu_si128( (__m128i*)(dst + j + 16), v1 );
    215                 }
    216 
    217                 for( ; j <= roi.width - 8; j += 8 )
    218                 {
    219                     __m128i v0 = _mm_loadl_epi64( (const __m128i*)(src + j) );
    220                     v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
    221                     v0 = _mm_andnot_si128( v0, maxval_ );
    222                     _mm_storel_epi64( (__m128i*)(dst + j), v0 );
    223                 }
    224                 break;
    225 
    226             case THRESH_TRUNC:
    227                 for( j = 0; j <= roi.width - 32; j += 32 )
    228                 {
    229                     __m128i v0, v1;
    230                     v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
    231                     v1 = _mm_loadu_si128( (const __m128i*)(src + j + 16) );
    232                     v0 = _mm_subs_epu8( v0, _mm_subs_epu8( v0, thresh_u ));
    233                     v1 = _mm_subs_epu8( v1, _mm_subs_epu8( v1, thresh_u ));
    234                     _mm_storeu_si128( (__m128i*)(dst + j), v0 );
    235                     _mm_storeu_si128( (__m128i*)(dst + j + 16), v1 );
    236                 }
    237 
    238                 for( ; j <= roi.width - 8; j += 8 )
    239                 {
    240                     __m128i v0 = _mm_loadl_epi64( (const __m128i*)(src + j) );
    241                     v0 = _mm_subs_epu8( v0, _mm_subs_epu8( v0, thresh_u ));
    242                     _mm_storel_epi64( (__m128i*)(dst + j), v0 );
    243                 }
    244                 break;
    245 
    246             case THRESH_TOZERO:
    247                 for( j = 0; j <= roi.width - 32; j += 32 )
    248                 {
    249                     __m128i v0, v1;
    250                     v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
    251                     v1 = _mm_loadu_si128( (const __m128i*)(src + j + 16) );
    252                     v0 = _mm_and_si128( v0, _mm_cmpgt_epi8(_mm_xor_si128(v0, _x80), thresh_s ));
    253                     v1 = _mm_and_si128( v1, _mm_cmpgt_epi8(_mm_xor_si128(v1, _x80), thresh_s ));
    254                     _mm_storeu_si128( (__m128i*)(dst + j), v0 );
    255                     _mm_storeu_si128( (__m128i*)(dst + j + 16), v1 );
    256                 }
    257 
    258                 for( ; j <= roi.width - 8; j += 8 )
    259                 {
    260                     __m128i v0 = _mm_loadl_epi64( (const __m128i*)(src + j) );
    261                     v0 = _mm_and_si128( v0, _mm_cmpgt_epi8(_mm_xor_si128(v0, _x80), thresh_s ));
    262                     _mm_storel_epi64( (__m128i*)(dst + j), v0 );
    263                 }
    264                 break;
    265 
    266             case THRESH_TOZERO_INV:
    267                 for( j = 0; j <= roi.width - 32; j += 32 )
    268                 {
    269                     __m128i v0, v1;
    270                     v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
    271                     v1 = _mm_loadu_si128( (const __m128i*)(src + j + 16) );
    272                     v0 = _mm_andnot_si128( _mm_cmpgt_epi8(_mm_xor_si128(v0, _x80), thresh_s ), v0 );
    273                     v1 = _mm_andnot_si128( _mm_cmpgt_epi8(_mm_xor_si128(v1, _x80), thresh_s ), v1 );
    274                     _mm_storeu_si128( (__m128i*)(dst + j), v0 );
    275                     _mm_storeu_si128( (__m128i*)(dst + j + 16), v1 );
    276                 }
    277 
    278                 for( ; j <= roi.width - 8; j += 8 )
    279                 {
    280                     __m128i v0 = _mm_loadl_epi64( (const __m128i*)(src + j) );
    281                     v0 = _mm_andnot_si128( _mm_cmpgt_epi8(_mm_xor_si128(v0, _x80), thresh_s ), v0 );
    282                     _mm_storel_epi64( (__m128i*)(dst + j), v0 );
    283                 }
    284                 break;
    285             }
    286         }
    287     }
    288 #elif CV_NEON
    289     uint8x16_t v_thresh = vdupq_n_u8(thresh), v_maxval = vdupq_n_u8(maxval);
    290 
    291     switch( type )
    292     {
    293     case THRESH_BINARY:
    294         for( i = 0; i < roi.height; i++ )
    295         {
    296             const uchar* src = _src.ptr() + src_step*i;
    297             uchar* dst = _dst.ptr() + dst_step*i;
    298 
    299             for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16)
    300                 vst1q_u8(dst + j_scalar, vandq_u8(vcgtq_u8(vld1q_u8(src + j_scalar), v_thresh), v_maxval));
    301         }
    302         break;
    303 
    304     case THRESH_BINARY_INV:
    305         for( i = 0; i < roi.height; i++ )
    306         {
    307             const uchar* src = _src.ptr() + src_step*i;
    308             uchar* dst = _dst.ptr() + dst_step*i;
    309 
    310             for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16)
    311                 vst1q_u8(dst + j_scalar, vandq_u8(vcleq_u8(vld1q_u8(src + j_scalar), v_thresh), v_maxval));
    312         }
    313         break;
    314 
    315     case THRESH_TRUNC:
    316         for( i = 0; i < roi.height; i++ )
    317         {
    318             const uchar* src = _src.ptr() + src_step*i;
    319             uchar* dst = _dst.ptr() + dst_step*i;
    320 
    321             for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16)
    322                 vst1q_u8(dst + j_scalar, vminq_u8(vld1q_u8(src + j_scalar), v_thresh));
    323         }
    324         break;
    325 
    326     case THRESH_TOZERO:
    327         for( i = 0; i < roi.height; i++ )
    328         {
    329             const uchar* src = _src.ptr() + src_step*i;
    330             uchar* dst = _dst.ptr() + dst_step*i;
    331 
    332             for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16)
    333             {
    334                 uint8x16_t v_src = vld1q_u8(src + j_scalar), v_mask = vcgtq_u8(v_src, v_thresh);
    335                 vst1q_u8(dst + j_scalar, vandq_u8(v_mask, v_src));
    336             }
    337         }
    338         break;
    339 
    340     case THRESH_TOZERO_INV:
    341         for( i = 0; i < roi.height; i++ )
    342         {
    343             const uchar* src = _src.ptr() + src_step*i;
    344             uchar* dst = _dst.ptr() + dst_step*i;
    345 
    346             for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16)
    347             {
    348                 uint8x16_t v_src = vld1q_u8(src + j_scalar), v_mask = vcleq_u8(v_src, v_thresh);
    349                 vst1q_u8(dst + j_scalar, vandq_u8(v_mask, v_src));
    350             }
    351         }
    352         break;
    353     default:
    354         return CV_Error( CV_StsBadArg, "" );
    355     }
    356 #endif
    357 
    358     if( j_scalar < roi.width )
    359     {
    360         for( i = 0; i < roi.height; i++ )
    361         {
    362             const uchar* src = _src.ptr() + src_step*i;
    363             uchar* dst = _dst.ptr() + dst_step*i;
    364             j = j_scalar;
    365 #if CV_ENABLE_UNROLLED
    366             for( ; j <= roi.width - 4; j += 4 )
    367             {
    368                 uchar t0 = tab[src[j]];
    369                 uchar t1 = tab[src[j+1]];
    370 
    371                 dst[j] = t0;
    372                 dst[j+1] = t1;
    373 
    374                 t0 = tab[src[j+2]];
    375                 t1 = tab[src[j+3]];
    376 
    377                 dst[j+2] = t0;
    378                 dst[j+3] = t1;
    379             }
    380 #endif
    381             for( ; j < roi.width; j++ )
    382                 dst[j] = tab[src[j]];
    383         }
    384     }
    385 }
    386 
    387 
    388 static void
    389 thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
    390 {
    391     int i, j;
    392     Size roi = _src.size();
    393     roi.width *= _src.channels();
    394     const short* src = _src.ptr<short>();
    395     short* dst = _dst.ptr<short>();
    396     size_t src_step = _src.step/sizeof(src[0]);
    397     size_t dst_step = _dst.step/sizeof(dst[0]);
    398 
    399 #if CV_SSE2
    400     volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE);
    401 #endif
    402 
    403     if( _src.isContinuous() && _dst.isContinuous() )
    404     {
    405         roi.width *= roi.height;
    406         roi.height = 1;
    407         src_step = dst_step = roi.width;
    408     }
    409 
    410 #ifdef HAVE_TEGRA_OPTIMIZATION
    411     if (tegra::useTegra() && tegra::thresh_16s(_src, _dst, roi.width, roi.height, thresh, maxval, type))
    412         return;
    413 #endif
    414 
    415 #if defined(HAVE_IPP)
    416     CV_IPP_CHECK()
    417     {
    418         IppiSize sz = { roi.width, roi.height };
    419         CV_SUPPRESS_DEPRECATED_START
    420         switch( type )
    421         {
    422         case THRESH_TRUNC:
    423 #ifndef HAVE_IPP_ICV_ONLY
    424             if (_src.data == _dst.data && ippiThreshold_GT_16s_C1IR(dst, (int)dst_step*sizeof(dst[0]), sz, thresh) >= 0)
    425             {
    426                 CV_IMPL_ADD(CV_IMPL_IPP);
    427                 return;
    428             }
    429 #endif
    430             if (ippiThreshold_GT_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh) >= 0)
    431             {
    432                 CV_IMPL_ADD(CV_IMPL_IPP);
    433                 return;
    434             }
    435             setIppErrorStatus();
    436             break;
    437         case THRESH_TOZERO:
    438 #ifndef HAVE_IPP_ICV_ONLY
    439             if (_src.data == _dst.data && ippiThreshold_LTVal_16s_C1IR(dst, (int)dst_step*sizeof(dst[0]), sz, thresh + 1, 0) >= 0)
    440             {
    441                 CV_IMPL_ADD(CV_IMPL_IPP);
    442                 return;
    443             }
    444 #endif
    445             if (ippiThreshold_LTVal_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh+1, 0) >= 0)
    446             {
    447                 CV_IMPL_ADD(CV_IMPL_IPP);
    448                 return;
    449             }
    450             setIppErrorStatus();
    451             break;
    452         case THRESH_TOZERO_INV:
    453 #ifndef HAVE_IPP_ICV_ONLY
    454             if (_src.data == _dst.data && ippiThreshold_GTVal_16s_C1IR(dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0) >= 0)
    455             {
    456                 CV_IMPL_ADD(CV_IMPL_IPP);
    457                 return;
    458             }
    459 #endif
    460             if (ippiThreshold_GTVal_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0) >= 0)
    461             {
    462                 CV_IMPL_ADD(CV_IMPL_IPP);
    463                 return;
    464             }
    465             setIppErrorStatus();
    466             break;
    467         }
    468         CV_SUPPRESS_DEPRECATED_END
    469     }
    470 #endif
    471 
    472     switch( type )
    473     {
    474     case THRESH_BINARY:
    475         for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
    476         {
    477             j = 0;
    478         #if CV_SSE2
    479             if( useSIMD )
    480             {
    481                 __m128i thresh8 = _mm_set1_epi16(thresh), maxval8 = _mm_set1_epi16(maxval);
    482                 for( ; j <= roi.width - 16; j += 16 )
    483                 {
    484                     __m128i v0, v1;
    485                     v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
    486                     v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) );
    487                     v0 = _mm_cmpgt_epi16( v0, thresh8 );
    488                     v1 = _mm_cmpgt_epi16( v1, thresh8 );
    489                     v0 = _mm_and_si128( v0, maxval8 );
    490                     v1 = _mm_and_si128( v1, maxval8 );
    491                     _mm_storeu_si128((__m128i*)(dst + j), v0 );
    492                     _mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
    493                 }
    494             }
    495         #elif CV_NEON
    496             int16x8_t v_thresh = vdupq_n_s16(thresh), v_maxval = vdupq_n_s16(maxval);
    497 
    498             for( ; j <= roi.width - 8; j += 8 )
    499             {
    500                 uint16x8_t v_mask = vcgtq_s16(vld1q_s16(src + j), v_thresh);
    501                 vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_maxval));
    502             }
    503         #endif
    504 
    505             for( ; j < roi.width; j++ )
    506                 dst[j] = src[j] > thresh ? maxval : 0;
    507         }
    508         break;
    509 
    510     case THRESH_BINARY_INV:
    511         for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
    512         {
    513             j = 0;
    514         #if CV_SSE2
    515             if( useSIMD )
    516             {
    517                 __m128i thresh8 = _mm_set1_epi16(thresh), maxval8 = _mm_set1_epi16(maxval);
    518                 for( ; j <= roi.width - 16; j += 16 )
    519                 {
    520                     __m128i v0, v1;
    521                     v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
    522                     v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) );
    523                     v0 = _mm_cmpgt_epi16( v0, thresh8 );
    524                     v1 = _mm_cmpgt_epi16( v1, thresh8 );
    525                     v0 = _mm_andnot_si128( v0, maxval8 );
    526                     v1 = _mm_andnot_si128( v1, maxval8 );
    527                     _mm_storeu_si128((__m128i*)(dst + j), v0 );
    528                     _mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
    529                 }
    530             }
    531         #elif CV_NEON
    532             int16x8_t v_thresh = vdupq_n_s16(thresh), v_maxval = vdupq_n_s16(maxval);
    533 
    534             for( ; j <= roi.width - 8; j += 8 )
    535             {
    536                 uint16x8_t v_mask = vcleq_s16(vld1q_s16(src + j), v_thresh);
    537                 vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_maxval));
    538             }
    539         #endif
    540 
    541             for( ; j < roi.width; j++ )
    542                 dst[j] = src[j] <= thresh ? maxval : 0;
    543         }
    544         break;
    545 
    546     case THRESH_TRUNC:
    547         for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
    548         {
    549             j = 0;
    550         #if CV_SSE2
    551             if( useSIMD )
    552             {
    553                 __m128i thresh8 = _mm_set1_epi16(thresh);
    554                 for( ; j <= roi.width - 16; j += 16 )
    555                 {
    556                     __m128i v0, v1;
    557                     v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
    558                     v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) );
    559                     v0 = _mm_min_epi16( v0, thresh8 );
    560                     v1 = _mm_min_epi16( v1, thresh8 );
    561                     _mm_storeu_si128((__m128i*)(dst + j), v0 );
    562                     _mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
    563                 }
    564             }
    565         #elif CV_NEON
    566             int16x8_t v_thresh = vdupq_n_s16(thresh);
    567 
    568             for( ; j <= roi.width - 8; j += 8 )
    569                 vst1q_s16(dst + j, vminq_s16(vld1q_s16(src + j), v_thresh));
    570         #endif
    571 
    572             for( ; j < roi.width; j++ )
    573                 dst[j] = std::min(src[j], thresh);
    574         }
    575         break;
    576 
    577     case THRESH_TOZERO:
    578         for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
    579         {
    580             j = 0;
    581         #if CV_SSE2
    582             if( useSIMD )
    583             {
    584                 __m128i thresh8 = _mm_set1_epi16(thresh);
    585                 for( ; j <= roi.width - 16; j += 16 )
    586                 {
    587                     __m128i v0, v1;
    588                     v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
    589                     v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) );
    590                     v0 = _mm_and_si128(v0, _mm_cmpgt_epi16(v0, thresh8));
    591                     v1 = _mm_and_si128(v1, _mm_cmpgt_epi16(v1, thresh8));
    592                     _mm_storeu_si128((__m128i*)(dst + j), v0 );
    593                     _mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
    594                 }
    595             }
    596         #elif CV_NEON
    597             int16x8_t v_thresh = vdupq_n_s16(thresh);
    598 
    599             for( ; j <= roi.width - 8; j += 8 )
    600             {
    601                 int16x8_t v_src = vld1q_s16(src + j);
    602                 uint16x8_t v_mask = vcgtq_s16(v_src, v_thresh);
    603                 vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_src));
    604             }
    605         #endif
    606 
    607             for( ; j < roi.width; j++ )
    608             {
    609                 short v = src[j];
    610                 dst[j] = v > thresh ? v : 0;
    611             }
    612         }
    613         break;
    614 
    615     case THRESH_TOZERO_INV:
    616         for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
    617         {
    618             j = 0;
    619         #if CV_SSE2
    620             if( useSIMD )
    621             {
    622                 __m128i thresh8 = _mm_set1_epi16(thresh);
    623                 for( ; j <= roi.width - 16; j += 16 )
    624                 {
    625                     __m128i v0, v1;
    626                     v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
    627                     v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) );
    628                     v0 = _mm_andnot_si128(_mm_cmpgt_epi16(v0, thresh8), v0);
    629                     v1 = _mm_andnot_si128(_mm_cmpgt_epi16(v1, thresh8), v1);
    630                     _mm_storeu_si128((__m128i*)(dst + j), v0 );
    631                     _mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
    632                 }
    633             }
    634         #elif CV_NEON
    635             int16x8_t v_thresh = vdupq_n_s16(thresh);
    636 
    637             for( ; j <= roi.width - 8; j += 8 )
    638             {
    639                 int16x8_t v_src = vld1q_s16(src + j);
    640                 uint16x8_t v_mask = vcleq_s16(v_src, v_thresh);
    641                 vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_src));
    642             }
    643         #endif
    644             for( ; j < roi.width; j++ )
    645             {
    646                 short v = src[j];
    647                 dst[j] = v <= thresh ? v : 0;
    648             }
    649         }
    650         break;
    651     default:
    652         return CV_Error( CV_StsBadArg, "" );
    653     }
    654 }
    655 
    656 
    657 static void
    658 thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
    659 {
    660     int i, j;
    661     Size roi = _src.size();
    662     roi.width *= _src.channels();
    663     const float* src = _src.ptr<float>();
    664     float* dst = _dst.ptr<float>();
    665     size_t src_step = _src.step/sizeof(src[0]);
    666     size_t dst_step = _dst.step/sizeof(dst[0]);
    667 
    668 #if CV_SSE2
    669     volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE);
    670 #endif
    671 
    672     if( _src.isContinuous() && _dst.isContinuous() )
    673     {
    674         roi.width *= roi.height;
    675         roi.height = 1;
    676     }
    677 
    678 #ifdef HAVE_TEGRA_OPTIMIZATION
    679     if (tegra::useTegra() && tegra::thresh_32f(_src, _dst, roi.width, roi.height, thresh, maxval, type))
    680         return;
    681 #endif
    682 
    683 #if defined(HAVE_IPP)
    684     CV_IPP_CHECK()
    685     {
    686         IppiSize sz = { roi.width, roi.height };
    687         switch( type )
    688         {
    689         case THRESH_TRUNC:
    690             if (0 <= ippiThreshold_GT_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh))
    691             {
    692                 CV_IMPL_ADD(CV_IMPL_IPP);
    693                 return;
    694             }
    695             setIppErrorStatus();
    696             break;
    697         case THRESH_TOZERO:
    698             if (0 <= ippiThreshold_LTVal_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh+FLT_EPSILON, 0))
    699             {
    700                 CV_IMPL_ADD(CV_IMPL_IPP);
    701                 return;
    702             }
    703             setIppErrorStatus();
    704             break;
    705         case THRESH_TOZERO_INV:
    706             if (0 <= ippiThreshold_GTVal_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0))
    707             {
    708                 CV_IMPL_ADD(CV_IMPL_IPP);
    709                 return;
    710             }
    711             setIppErrorStatus();
    712             break;
    713         }
    714     }
    715 #endif
    716 
    717     switch( type )
    718     {
    719         case THRESH_BINARY:
    720             for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
    721             {
    722                 j = 0;
    723 #if CV_SSE2
    724                 if( useSIMD )
    725                 {
    726                     __m128 thresh4 = _mm_set1_ps(thresh), maxval4 = _mm_set1_ps(maxval);
    727                     for( ; j <= roi.width - 8; j += 8 )
    728                     {
    729                         __m128 v0, v1;
    730                         v0 = _mm_loadu_ps( src + j );
    731                         v1 = _mm_loadu_ps( src + j + 4 );
    732                         v0 = _mm_cmpgt_ps( v0, thresh4 );
    733                         v1 = _mm_cmpgt_ps( v1, thresh4 );
    734                         v0 = _mm_and_ps( v0, maxval4 );
    735                         v1 = _mm_and_ps( v1, maxval4 );
    736                         _mm_storeu_ps( dst + j, v0 );
    737                         _mm_storeu_ps( dst + j + 4, v1 );
    738                     }
    739                 }
    740 #elif CV_NEON
    741                 float32x4_t v_thresh = vdupq_n_f32(thresh);
    742                 uint32x4_t v_maxval = vreinterpretq_u32_f32(vdupq_n_f32(maxval));
    743 
    744                 for( ; j <= roi.width - 4; j += 4 )
    745                 {
    746                     float32x4_t v_src = vld1q_f32(src + j);
    747                     uint32x4_t v_dst = vandq_u32(vcgtq_f32(v_src, v_thresh), v_maxval);
    748                     vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst));
    749                 }
    750 #endif
    751 
    752                 for( ; j < roi.width; j++ )
    753                     dst[j] = src[j] > thresh ? maxval : 0;
    754             }
    755             break;
    756 
    757         case THRESH_BINARY_INV:
    758             for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
    759             {
    760                 j = 0;
    761 #if CV_SSE2
    762                 if( useSIMD )
    763                 {
    764                     __m128 thresh4 = _mm_set1_ps(thresh), maxval4 = _mm_set1_ps(maxval);
    765                     for( ; j <= roi.width - 8; j += 8 )
    766                     {
    767                         __m128 v0, v1;
    768                         v0 = _mm_loadu_ps( src + j );
    769                         v1 = _mm_loadu_ps( src + j + 4 );
    770                         v0 = _mm_cmple_ps( v0, thresh4 );
    771                         v1 = _mm_cmple_ps( v1, thresh4 );
    772                         v0 = _mm_and_ps( v0, maxval4 );
    773                         v1 = _mm_and_ps( v1, maxval4 );
    774                         _mm_storeu_ps( dst + j, v0 );
    775                         _mm_storeu_ps( dst + j + 4, v1 );
    776                     }
    777                 }
    778 #elif CV_NEON
    779                 float32x4_t v_thresh = vdupq_n_f32(thresh);
    780                 uint32x4_t v_maxval = vreinterpretq_u32_f32(vdupq_n_f32(maxval));
    781 
    782                 for( ; j <= roi.width - 4; j += 4 )
    783                 {
    784                     float32x4_t v_src = vld1q_f32(src + j);
    785                     uint32x4_t v_dst = vandq_u32(vcleq_f32(v_src, v_thresh), v_maxval);
    786                     vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst));
    787                 }
    788 #endif
    789 
    790                 for( ; j < roi.width; j++ )
    791                     dst[j] = src[j] <= thresh ? maxval : 0;
    792             }
    793             break;
    794 
    795         case THRESH_TRUNC:
    796             for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
    797             {
    798                 j = 0;
    799 #if CV_SSE2
    800                 if( useSIMD )
    801                 {
    802                     __m128 thresh4 = _mm_set1_ps(thresh);
    803                     for( ; j <= roi.width - 8; j += 8 )
    804                     {
    805                         __m128 v0, v1;
    806                         v0 = _mm_loadu_ps( src + j );
    807                         v1 = _mm_loadu_ps( src + j + 4 );
    808                         v0 = _mm_min_ps( v0, thresh4 );
    809                         v1 = _mm_min_ps( v1, thresh4 );
    810                         _mm_storeu_ps( dst + j, v0 );
    811                         _mm_storeu_ps( dst + j + 4, v1 );
    812                     }
    813                 }
    814 #elif CV_NEON
    815                 float32x4_t v_thresh = vdupq_n_f32(thresh);
    816 
    817                 for( ; j <= roi.width - 4; j += 4 )
    818                     vst1q_f32(dst + j, vminq_f32(vld1q_f32(src + j), v_thresh));
    819 #endif
    820 
    821                 for( ; j < roi.width; j++ )
    822                     dst[j] = std::min(src[j], thresh);
    823             }
    824             break;
    825 
    826         case THRESH_TOZERO:
    827             for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
    828             {
    829                 j = 0;
    830 #if CV_SSE2
    831                 if( useSIMD )
    832                 {
    833                     __m128 thresh4 = _mm_set1_ps(thresh);
    834                     for( ; j <= roi.width - 8; j += 8 )
    835                     {
    836                         __m128 v0, v1;
    837                         v0 = _mm_loadu_ps( src + j );
    838                         v1 = _mm_loadu_ps( src + j + 4 );
    839                         v0 = _mm_and_ps(v0, _mm_cmpgt_ps(v0, thresh4));
    840                         v1 = _mm_and_ps(v1, _mm_cmpgt_ps(v1, thresh4));
    841                         _mm_storeu_ps( dst + j, v0 );
    842                         _mm_storeu_ps( dst + j + 4, v1 );
    843                     }
    844                 }
    845 #elif CV_NEON
    846                 float32x4_t v_thresh = vdupq_n_f32(thresh);
    847 
    848                 for( ; j <= roi.width - 4; j += 4 )
    849                 {
    850                     float32x4_t v_src = vld1q_f32(src + j);
    851                     uint32x4_t v_dst = vandq_u32(vcgtq_f32(v_src, v_thresh),
    852                                                  vreinterpretq_u32_f32(v_src));
    853                     vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst));
    854                 }
    855 #endif
    856 
    857                 for( ; j < roi.width; j++ )
    858                 {
    859                     float v = src[j];
    860                     dst[j] = v > thresh ? v : 0;
    861                 }
    862             }
    863             break;
    864 
    865         case THRESH_TOZERO_INV:
    866             for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
    867             {
    868                 j = 0;
    869 #if CV_SSE2
    870                 if( useSIMD )
    871                 {
    872                     __m128 thresh4 = _mm_set1_ps(thresh);
    873                     for( ; j <= roi.width - 8; j += 8 )
    874                     {
    875                         __m128 v0, v1;
    876                         v0 = _mm_loadu_ps( src + j );
    877                         v1 = _mm_loadu_ps( src + j + 4 );
    878                         v0 = _mm_and_ps(v0, _mm_cmple_ps(v0, thresh4));
    879                         v1 = _mm_and_ps(v1, _mm_cmple_ps(v1, thresh4));
    880                         _mm_storeu_ps( dst + j, v0 );
    881                         _mm_storeu_ps( dst + j + 4, v1 );
    882                     }
    883                 }
    884 #elif CV_NEON
    885                 float32x4_t v_thresh = vdupq_n_f32(thresh);
    886 
    887                 for( ; j <= roi.width - 4; j += 4 )
    888                 {
    889                     float32x4_t v_src = vld1q_f32(src + j);
    890                     uint32x4_t v_dst = vandq_u32(vcleq_f32(v_src, v_thresh),
    891                                                  vreinterpretq_u32_f32(v_src));
    892                     vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst));
    893                 }
    894 #endif
    895                 for( ; j < roi.width; j++ )
    896                 {
    897                     float v = src[j];
    898                     dst[j] = v <= thresh ? v : 0;
    899                 }
    900             }
    901             break;
    902         default:
    903             return CV_Error( CV_StsBadArg, "" );
    904     }
    905 }
    906 
    907 
    908 static double
    909 getThreshVal_Otsu_8u( const Mat& _src )
    910 {
    911     Size size = _src.size();
    912     int step = (int) _src.step;
    913     if( _src.isContinuous() )
    914     {
    915         size.width *= size.height;
    916         size.height = 1;
    917         step = size.width;
    918     }
    919 
    920 #if IPP_VERSION_X100 >= 801 && !defined(HAVE_IPP_ICV_ONLY)
    921     CV_IPP_CHECK()
    922     {
    923         IppiSize srcSize = { size.width, size.height };
    924         Ipp8u thresh;
    925         CV_SUPPRESS_DEPRECATED_START
    926         IppStatus ok = ippiComputeThreshold_Otsu_8u_C1R(_src.ptr(), step, srcSize, &thresh);
    927         CV_SUPPRESS_DEPRECATED_END
    928         if (ok >= 0)
    929         {
    930             CV_IMPL_ADD(CV_IMPL_IPP);
    931             return thresh;
    932         }
    933         setIppErrorStatus();
    934     }
    935 #endif
    936 
    937     const int N = 256;
    938     int i, j, h[N] = {0};
    939     for( i = 0; i < size.height; i++ )
    940     {
    941         const uchar* src = _src.ptr() + step*i;
    942         j = 0;
    943         #if CV_ENABLE_UNROLLED
    944         for( ; j <= size.width - 4; j += 4 )
    945         {
    946             int v0 = src[j], v1 = src[j+1];
    947             h[v0]++; h[v1]++;
    948             v0 = src[j+2]; v1 = src[j+3];
    949             h[v0]++; h[v1]++;
    950         }
    951         #endif
    952         for( ; j < size.width; j++ )
    953             h[src[j]]++;
    954     }
    955 
    956     double mu = 0, scale = 1./(size.width*size.height);
    957     for( i = 0; i < N; i++ )
    958         mu += i*(double)h[i];
    959 
    960     mu *= scale;
    961     double mu1 = 0, q1 = 0;
    962     double max_sigma = 0, max_val = 0;
    963 
    964     for( i = 0; i < N; i++ )
    965     {
    966         double p_i, q2, mu2, sigma;
    967 
    968         p_i = h[i]*scale;
    969         mu1 *= q1;
    970         q1 += p_i;
    971         q2 = 1. - q1;
    972 
    973         if( std::min(q1,q2) < FLT_EPSILON || std::max(q1,q2) > 1. - FLT_EPSILON )
    974             continue;
    975 
    976         mu1 = (mu1 + i*p_i)/q1;
    977         mu2 = (mu - q1*mu1)/q2;
    978         sigma = q1*q2*(mu1 - mu2)*(mu1 - mu2);
    979         if( sigma > max_sigma )
    980         {
    981             max_sigma = sigma;
    982             max_val = i;
    983         }
    984     }
    985 
    986     return max_val;
    987 }
    988 
    989 static double
    990 getThreshVal_Triangle_8u( const Mat& _src )
    991 {
    992     Size size = _src.size();
    993     int step = (int) _src.step;
    994     if( _src.isContinuous() )
    995     {
    996         size.width *= size.height;
    997         size.height = 1;
    998         step = size.width;
    999     }
   1000 
   1001     const int N = 256;
   1002     int i, j, h[N] = {0};
   1003     for( i = 0; i < size.height; i++ )
   1004     {
   1005         const uchar* src = _src.ptr() + step*i;
   1006         j = 0;
   1007         #if CV_ENABLE_UNROLLED
   1008         for( ; j <= size.width - 4; j += 4 )
   1009         {
   1010             int v0 = src[j], v1 = src[j+1];
   1011             h[v0]++; h[v1]++;
   1012             v0 = src[j+2]; v1 = src[j+3];
   1013             h[v0]++; h[v1]++;
   1014         }
   1015         #endif
   1016         for( ; j < size.width; j++ )
   1017             h[src[j]]++;
   1018     }
   1019 
   1020     int left_bound = 0, right_bound = 0, max_ind = 0, max = 0;
   1021     int temp;
   1022     bool isflipped = false;
   1023 
   1024     for( i = 0; i < N; i++ )
   1025     {
   1026         if( h[i] > 0 )
   1027         {
   1028             left_bound = i;
   1029             break;
   1030         }
   1031     }
   1032     if( left_bound > 0 )
   1033         left_bound--;
   1034 
   1035     for( i = N-1; i > 0; i-- )
   1036     {
   1037         if( h[i] > 0 )
   1038         {
   1039             right_bound = i;
   1040             break;
   1041         }
   1042     }
   1043     if( right_bound < N-1 )
   1044         right_bound++;
   1045 
   1046     for( i = 0; i < N; i++ )
   1047     {
   1048         if( h[i] > max)
   1049         {
   1050             max = h[i];
   1051             max_ind = i;
   1052         }
   1053     }
   1054 
   1055     if( max_ind-left_bound < right_bound-max_ind)
   1056     {
   1057         isflipped = true;
   1058         i = 0, j = N-1;
   1059         while( i < j )
   1060         {
   1061             temp = h[i]; h[i] = h[j]; h[j] = temp;
   1062             i++; j--;
   1063         }
   1064         left_bound = N-1-right_bound;
   1065         max_ind = N-1-max_ind;
   1066     }
   1067 
   1068     double thresh = left_bound;
   1069     double a, b, dist = 0, tempdist;
   1070 
   1071     /*
   1072      * We do not need to compute precise distance here. Distance is maximized, so some constants can
   1073      * be omitted. This speeds up a computation a bit.
   1074      */
   1075     a = max; b = left_bound-max_ind;
   1076     for( i = left_bound+1; i <= max_ind; i++ )
   1077     {
   1078         tempdist = a*i + b*h[i];
   1079         if( tempdist > dist)
   1080         {
   1081             dist = tempdist;
   1082             thresh = i;
   1083         }
   1084     }
   1085     thresh--;
   1086 
   1087     if( isflipped )
   1088         thresh = N-1-thresh;
   1089 
   1090     return thresh;
   1091 }
   1092 
   1093 class ThresholdRunner : public ParallelLoopBody
   1094 {
   1095 public:
   1096     ThresholdRunner(Mat _src, Mat _dst, double _thresh, double _maxval, int _thresholdType)
   1097     {
   1098         src = _src;
   1099         dst = _dst;
   1100 
   1101         thresh = _thresh;
   1102         maxval = _maxval;
   1103         thresholdType = _thresholdType;
   1104     }
   1105 
   1106     void operator () ( const Range& range ) const
   1107     {
   1108         int row0 = range.start;
   1109         int row1 = range.end;
   1110 
   1111         Mat srcStripe = src.rowRange(row0, row1);
   1112         Mat dstStripe = dst.rowRange(row0, row1);
   1113 
   1114         if (srcStripe.depth() == CV_8U)
   1115         {
   1116             thresh_8u( srcStripe, dstStripe, (uchar)thresh, (uchar)maxval, thresholdType );
   1117         }
   1118         else if( srcStripe.depth() == CV_16S )
   1119         {
   1120             thresh_16s( srcStripe, dstStripe, (short)thresh, (short)maxval, thresholdType );
   1121         }
   1122         else if( srcStripe.depth() == CV_32F )
   1123         {
   1124             thresh_32f( srcStripe, dstStripe, (float)thresh, (float)maxval, thresholdType );
   1125         }
   1126     }
   1127 
   1128 private:
   1129     Mat src;
   1130     Mat dst;
   1131 
   1132     double thresh;
   1133     double maxval;
   1134     int thresholdType;
   1135 };
   1136 
   1137 #ifdef HAVE_OPENCL
   1138 
   1139 static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, double maxval, int thresh_type )
   1140 {
   1141     int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
   1142         kercn = ocl::predictOptimalVectorWidth(_src, _dst), ktype = CV_MAKE_TYPE(depth, kercn);
   1143     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
   1144 
   1145     if ( !(thresh_type == THRESH_BINARY || thresh_type == THRESH_BINARY_INV || thresh_type == THRESH_TRUNC ||
   1146            thresh_type == THRESH_TOZERO || thresh_type == THRESH_TOZERO_INV) ||
   1147          (!doubleSupport && depth == CV_64F))
   1148         return false;
   1149 
   1150     const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC",
   1151                                           "THRESH_TOZERO", "THRESH_TOZERO_INV" };
   1152     ocl::Device dev = ocl::Device::getDefault();
   1153     int stride_size = dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU) ? 4 : 1;
   1154 
   1155     ocl::Kernel k("threshold", ocl::imgproc::threshold_oclsrc,
   1156                   format("-D %s -D T=%s -D T1=%s -D STRIDE_SIZE=%d%s", thresholdMap[thresh_type],
   1157                          ocl::typeToStr(ktype), ocl::typeToStr(depth), stride_size,
   1158                          doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
   1159     if (k.empty())
   1160         return false;
   1161 
   1162     UMat src = _src.getUMat();
   1163     _dst.create(src.size(), type);
   1164     UMat dst = _dst.getUMat();
   1165 
   1166     if (depth <= CV_32S)
   1167         thresh = cvFloor(thresh);
   1168 
   1169     const double min_vals[] = { 0, CHAR_MIN, 0, SHRT_MIN, INT_MIN, -FLT_MAX, -DBL_MAX, 0 };
   1170     double min_val = min_vals[depth];
   1171 
   1172     k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst, cn, kercn),
   1173            ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(thresh))),
   1174            ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(maxval))),
   1175            ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(min_val))));
   1176 
   1177     size_t globalsize[2] = { dst.cols * cn / kercn, dst.rows };
   1178     globalsize[1] = (globalsize[1] + stride_size - 1) / stride_size;
   1179     return k.run(2, globalsize, NULL, false);
   1180 }
   1181 
   1182 #endif
   1183 
   1184 }
   1185 
   1186 double cv::threshold( InputArray _src, OutputArray _dst, double thresh, double maxval, int type )
   1187 {
   1188     CV_OCL_RUN_(_src.dims() <= 2 && _dst.isUMat(),
   1189                 ocl_threshold(_src, _dst, thresh, maxval, type), thresh)
   1190 
   1191     Mat src = _src.getMat();
   1192     int automatic_thresh = (type & ~CV_THRESH_MASK);
   1193     type &= THRESH_MASK;
   1194 
   1195     CV_Assert( automatic_thresh != (CV_THRESH_OTSU | CV_THRESH_TRIANGLE) );
   1196     if( automatic_thresh == CV_THRESH_OTSU )
   1197     {
   1198         CV_Assert( src.type() == CV_8UC1 );
   1199         thresh = getThreshVal_Otsu_8u( src );
   1200     }
   1201     else if( automatic_thresh == CV_THRESH_TRIANGLE )
   1202     {
   1203         CV_Assert( src.type() == CV_8UC1 );
   1204         thresh = getThreshVal_Triangle_8u( src );
   1205     }
   1206 
   1207     _dst.create( src.size(), src.type() );
   1208     Mat dst = _dst.getMat();
   1209 
   1210     if( src.depth() == CV_8U )
   1211     {
   1212         int ithresh = cvFloor(thresh);
   1213         thresh = ithresh;
   1214         int imaxval = cvRound(maxval);
   1215         if( type == THRESH_TRUNC )
   1216             imaxval = ithresh;
   1217         imaxval = saturate_cast<uchar>(imaxval);
   1218 
   1219         if( ithresh < 0 || ithresh >= 255 )
   1220         {
   1221             if( type == THRESH_BINARY || type == THRESH_BINARY_INV ||
   1222                 ((type == THRESH_TRUNC || type == THRESH_TOZERO_INV) && ithresh < 0) ||
   1223                 (type == THRESH_TOZERO && ithresh >= 255) )
   1224             {
   1225                 int v = type == THRESH_BINARY ? (ithresh >= 255 ? 0 : imaxval) :
   1226                         type == THRESH_BINARY_INV ? (ithresh >= 255 ? imaxval : 0) :
   1227                         /*type == THRESH_TRUNC ? imaxval :*/ 0;
   1228                 dst.setTo(v);
   1229             }
   1230             else
   1231                 src.copyTo(dst);
   1232             return thresh;
   1233         }
   1234         thresh = ithresh;
   1235         maxval = imaxval;
   1236     }
   1237     else if( src.depth() == CV_16S )
   1238     {
   1239         int ithresh = cvFloor(thresh);
   1240         thresh = ithresh;
   1241         int imaxval = cvRound(maxval);
   1242         if( type == THRESH_TRUNC )
   1243             imaxval = ithresh;
   1244         imaxval = saturate_cast<short>(imaxval);
   1245 
   1246         if( ithresh < SHRT_MIN || ithresh >= SHRT_MAX )
   1247         {
   1248             if( type == THRESH_BINARY || type == THRESH_BINARY_INV ||
   1249                ((type == THRESH_TRUNC || type == THRESH_TOZERO_INV) && ithresh < SHRT_MIN) ||
   1250                (type == THRESH_TOZERO && ithresh >= SHRT_MAX) )
   1251             {
   1252                 int v = type == THRESH_BINARY ? (ithresh >= SHRT_MAX ? 0 : imaxval) :
   1253                 type == THRESH_BINARY_INV ? (ithresh >= SHRT_MAX ? imaxval : 0) :
   1254                 /*type == THRESH_TRUNC ? imaxval :*/ 0;
   1255                 dst.setTo(v);
   1256             }
   1257             else
   1258                 src.copyTo(dst);
   1259             return thresh;
   1260         }
   1261         thresh = ithresh;
   1262         maxval = imaxval;
   1263     }
   1264     else if( src.depth() == CV_32F )
   1265         ;
   1266     else
   1267         CV_Error( CV_StsUnsupportedFormat, "" );
   1268 
   1269     parallel_for_(Range(0, dst.rows),
   1270                   ThresholdRunner(src, dst, thresh, maxval, type),
   1271                   dst.total()/(double)(1<<16));
   1272     return thresh;
   1273 }
   1274 
   1275 
   1276 void cv::adaptiveThreshold( InputArray _src, OutputArray _dst, double maxValue,
   1277                             int method, int type, int blockSize, double delta )
   1278 {
   1279     Mat src = _src.getMat();
   1280     CV_Assert( src.type() == CV_8UC1 );
   1281     CV_Assert( blockSize % 2 == 1 && blockSize > 1 );
   1282     Size size = src.size();
   1283 
   1284     _dst.create( size, src.type() );
   1285     Mat dst = _dst.getMat();
   1286 
   1287     if( maxValue < 0 )
   1288     {
   1289         dst = Scalar(0);
   1290         return;
   1291     }
   1292 
   1293     Mat mean;
   1294 
   1295     if( src.data != dst.data )
   1296         mean = dst;
   1297 
   1298     if( method == ADAPTIVE_THRESH_MEAN_C )
   1299         boxFilter( src, mean, src.type(), Size(blockSize, blockSize),
   1300                    Point(-1,-1), true, BORDER_REPLICATE );
   1301     else if( method == ADAPTIVE_THRESH_GAUSSIAN_C )
   1302         GaussianBlur( src, mean, Size(blockSize, blockSize), 0, 0, BORDER_REPLICATE );
   1303     else
   1304         CV_Error( CV_StsBadFlag, "Unknown/unsupported adaptive threshold method" );
   1305 
   1306     int i, j;
   1307     uchar imaxval = saturate_cast<uchar>(maxValue);
   1308     int idelta = type == THRESH_BINARY ? cvCeil(delta) : cvFloor(delta);
   1309     uchar tab[768];
   1310 
   1311     if( type == CV_THRESH_BINARY )
   1312         for( i = 0; i < 768; i++ )
   1313             tab[i] = (uchar)(i - 255 > -idelta ? imaxval : 0);
   1314     else if( type == CV_THRESH_BINARY_INV )
   1315         for( i = 0; i < 768; i++ )
   1316             tab[i] = (uchar)(i - 255 <= -idelta ? imaxval : 0);
   1317     else
   1318         CV_Error( CV_StsBadFlag, "Unknown/unsupported threshold type" );
   1319 
   1320     if( src.isContinuous() && mean.isContinuous() && dst.isContinuous() )
   1321     {
   1322         size.width *= size.height;
   1323         size.height = 1;
   1324     }
   1325 
   1326     for( i = 0; i < size.height; i++ )
   1327     {
   1328         const uchar* sdata = src.ptr(i);
   1329         const uchar* mdata = mean.ptr(i);
   1330         uchar* ddata = dst.ptr(i);
   1331 
   1332         for( j = 0; j < size.width; j++ )
   1333             ddata[j] = tab[sdata[j] - mdata[j] + 255];
   1334     }
   1335 }
   1336 
   1337 CV_IMPL double
   1338 cvThreshold( const void* srcarr, void* dstarr, double thresh, double maxval, int type )
   1339 {
   1340     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), dst0 = dst;
   1341 
   1342     CV_Assert( src.size == dst.size && src.channels() == dst.channels() &&
   1343         (src.depth() == dst.depth() || dst.depth() == CV_8U));
   1344 
   1345     thresh = cv::threshold( src, dst, thresh, maxval, type );
   1346     if( dst0.data != dst.data )
   1347         dst.convertTo( dst0, dst0.depth() );
   1348     return thresh;
   1349 }
   1350 
   1351 
   1352 CV_IMPL void
   1353 cvAdaptiveThreshold( const void *srcIm, void *dstIm, double maxValue,
   1354                      int method, int type, int blockSize, double delta )
   1355 {
   1356     cv::Mat src = cv::cvarrToMat(srcIm), dst = cv::cvarrToMat(dstIm);
   1357     CV_Assert( src.size == dst.size && src.type() == dst.type() );
   1358     cv::adaptiveThreshold( src, dst, maxValue, method, type, blockSize, delta );
   1359 }
   1360 
   1361 /* End of file. */
   1362