Home | History | Annotate | Download | only in opencl
      1 // This file is part of OpenCV project.
      2 // It is subject to the license terms in the LICENSE file found in the top-level directory
      3 // of this distribution and at http://opencv.org/license.html.
      4 
      5 // Copyright (C) 2014, Itseez, Inc., all rights reserved.
      6 // Third party copyrights are property of their respective owners.
      7 
      8 #ifdef DOUBLE_SUPPORT
      9 #ifdef cl_amd_fp64
     10 #pragma OPENCL EXTENSION cl_amd_fp64:enable
     11 #elif defined (cl_khr_fp64)
     12 #pragma OPENCL EXTENSION cl_khr_fp64:enable
     13 #endif
     14 #endif
     15 
     16 #ifdef DEPTH_0
     17 #define MIN_VAL 0
     18 #define MAX_VAL UCHAR_MAX
     19 #elif defined DEPTH_1
     20 #define MIN_VAL SCHAR_MIN
     21 #define MAX_VAL SCHAR_MAX
     22 #elif defined DEPTH_2
     23 #define MIN_VAL 0
     24 #define MAX_VAL USHRT_MAX
     25 #elif defined DEPTH_3
     26 #define MIN_VAL SHRT_MIN
     27 #define MAX_VAL SHRT_MAX
     28 #elif defined DEPTH_4
     29 #define MIN_VAL INT_MIN
     30 #define MAX_VAL INT_MAX
     31 #elif defined DEPTH_5
     32 #define MIN_VAL (-FLT_MAX)
     33 #define MAX_VAL FLT_MAX
     34 #elif defined DEPTH_6
     35 #define MIN_VAL (-DBL_MAX)
     36 #define MAX_VAL DBL_MAX
     37 #endif
     38 
     39 #define noconvert
     40 #define INDEX_MAX UINT_MAX
     41 
     42 #if wdepth <= 4
     43 #define MIN_ABS(a) convertFromU(abs(a))
     44 #define MIN_ABS2(a, b) convertFromU(abs_diff(a, b))
     45 #define MIN(a, b) min(a, b)
     46 #define MAX(a, b) max(a, b)
     47 #else
     48 #define MIN_ABS(a) fabs(a)
     49 #define MIN_ABS2(a, b) fabs(a - b)
     50 #define MIN(a, b) fmin(a, b)
     51 #define MAX(a, b) fmax(a, b)
     52 #endif
     53 
     54 #if kercn != 3
     55 #define loadpix(addr) *(__global const srcT *)(addr)
     56 #define srcTSIZE (int)sizeof(srcT)
     57 #else
     58 #define loadpix(addr) vload3(0, (__global const srcT1 *)(addr))
     59 #define srcTSIZE ((int)sizeof(srcT1) * 3)
     60 #endif
     61 
     62 #ifndef HAVE_MASK
     63 #undef srcTSIZE
     64 #define srcTSIZE (int)sizeof(srcT1)
     65 #endif
     66 
     67 #ifdef NEED_MINVAL
     68 #ifdef NEED_MINLOC
     69 #define CALC_MIN(p, inc) \
     70     if (minval > temp.p) \
     71     { \
     72         minval = temp.p; \
     73         minloc = id + inc; \
     74     }
     75 #else
     76 #define CALC_MIN(p, inc) \
     77     minval = MIN(minval, temp.p);
     78 #endif
     79 #else
     80 #define CALC_MIN(p, inc)
     81 #endif
     82 
     83 #ifdef NEED_MAXVAL
     84 #ifdef NEED_MAXLOC
     85 #define CALC_MAX(p, inc) \
     86     if (maxval < temp.p) \
     87     { \
     88         maxval = temp.p; \
     89         maxloc = id + inc; \
     90     }
     91 #else
     92 #define CALC_MAX(p, inc) \
     93     maxval = MAX(maxval, temp.p);
     94 #endif
     95 #else
     96 #define CALC_MAX(p, inc)
     97 #endif
     98 
     99 #ifdef OP_CALC2
    100 #define CALC_MAX2(p) \
    101     maxval2 = MAX(maxval2, temp2.p);
    102 #else
    103 #define CALC_MAX2(p)
    104 #endif
    105 
    106 #define CALC_P(p, inc) \
    107     CALC_MIN(p, inc) \
    108     CALC_MAX(p, inc) \
    109     CALC_MAX2(p)
    110 
    111 __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_offset, int cols,
    112                         int total, int groupnum, __global uchar * dstptr
    113 #ifdef HAVE_MASK
    114                         , __global const uchar * mask, int mask_step, int mask_offset
    115 #endif
    116 #ifdef HAVE_SRC2
    117                         , __global const uchar * src2ptr, int src2_step, int src2_offset
    118 #endif
    119                         )
    120 {
    121     int lid = get_local_id(0);
    122     int gid = get_group_id(0);
    123     int  id = get_global_id(0)
    124 #ifndef HAVE_MASK
    125     * kercn;
    126 #else
    127     ;
    128 #endif
    129 
    130     srcptr += src_offset;
    131 #ifdef HAVE_MASK
    132     mask += mask_offset;
    133 #endif
    134 #ifdef HAVE_SRC2
    135     src2ptr += src2_offset;
    136 #endif
    137 
    138 #ifdef NEED_MINVAL
    139     __local dstT1 localmem_min[WGS2_ALIGNED];
    140     dstT1 minval = MAX_VAL;
    141 #ifdef NEED_MINLOC
    142     __local uint localmem_minloc[WGS2_ALIGNED];
    143     uint minloc = INDEX_MAX;
    144 #endif
    145 #endif
    146 #ifdef NEED_MAXVAL
    147     dstT1 maxval = MIN_VAL;
    148     __local dstT1 localmem_max[WGS2_ALIGNED];
    149 #ifdef NEED_MAXLOC
    150     __local uint localmem_maxloc[WGS2_ALIGNED];
    151     uint maxloc = INDEX_MAX;
    152 #endif
    153 #endif
    154 #ifdef OP_CALC2
    155     __local dstT1 localmem_max2[WGS2_ALIGNED];
    156     dstT1 maxval2 = MIN_VAL;
    157 #endif
    158 
    159     int src_index;
    160 #ifdef HAVE_MASK
    161     int mask_index;
    162 #endif
    163 #ifdef HAVE_SRC2
    164     int src2_index;
    165 #endif
    166 
    167     dstT temp;
    168 #ifdef HAVE_SRC2
    169     dstT temp2;
    170 #endif
    171 
    172     for (int grain = groupnum * WGS
    173 #ifndef HAVE_MASK
    174         * kercn
    175 #endif
    176         ; id < total; id += grain)
    177     {
    178 #ifdef HAVE_MASK
    179 #ifdef HAVE_MASK_CONT
    180         mask_index = id;
    181 #else
    182         mask_index = mad24(id / cols, mask_step, id % cols);
    183 #endif
    184         if (mask[mask_index])
    185 #endif
    186         {
    187 #ifdef HAVE_SRC_CONT
    188             src_index = id * srcTSIZE;//mul24(id, srcTSIZE);
    189 #else
    190             src_index = mad24(id / cols, src_step, mul24(id % cols, srcTSIZE));
    191 #endif
    192             temp = convertToDT(loadpix(srcptr + src_index));
    193 #ifdef OP_ABS
    194             temp = MIN_ABS(temp);
    195 #endif
    196 
    197 #ifdef HAVE_SRC2
    198 #ifdef HAVE_SRC2_CONT
    199             src2_index = id * srcTSIZE; //mul24(id, srcTSIZE);
    200 #else
    201             src2_index = mad24(id / cols, src2_step, mul24(id % cols, srcTSIZE));
    202 #endif
    203             temp2 = convertToDT(loadpix(src2ptr + src2_index));
    204             temp = MIN_ABS2(temp, temp2);
    205 #ifdef OP_CALC2
    206             temp2 = MIN_ABS(temp2);
    207 #endif
    208 #endif
    209 
    210 #if kercn == 1
    211 #ifdef NEED_MINVAL
    212 #ifdef NEED_MINLOC
    213             if (minval > temp)
    214             {
    215                 minval = temp;
    216                 minloc = id;
    217             }
    218 #else
    219             minval = MIN(minval, temp);
    220 #endif
    221 #endif
    222 #ifdef NEED_MAXVAL
    223 #ifdef NEED_MAXLOC
    224             if (maxval < temp)
    225             {
    226                 maxval = temp;
    227                 maxloc = id;
    228             }
    229 #else
    230             maxval = MAX(maxval, temp);
    231 #endif
    232 #ifdef OP_CALC2
    233             maxval2 = MAX(maxval2, temp2);
    234 #endif
    235 #endif
    236 #elif kercn >= 2
    237             CALC_P(s0, 0)
    238             CALC_P(s1, 1)
    239 #if kercn >= 3
    240             CALC_P(s2, 2)
    241 #if kercn >= 4
    242             CALC_P(s3, 3)
    243 #if kercn >= 8
    244             CALC_P(s4, 4)
    245             CALC_P(s5, 5)
    246             CALC_P(s6, 6)
    247             CALC_P(s7, 7)
    248 #if kercn == 16
    249             CALC_P(s8, 8)
    250             CALC_P(s9, 9)
    251             CALC_P(sA, 10)
    252             CALC_P(sB, 11)
    253             CALC_P(sC, 12)
    254             CALC_P(sD, 13)
    255             CALC_P(sE, 14)
    256             CALC_P(sF, 15)
    257 #endif
    258 #endif
    259 #endif
    260 #endif
    261 #endif
    262         }
    263     }
    264 
    265     if (lid < WGS2_ALIGNED)
    266     {
    267 #ifdef NEED_MINVAL
    268         localmem_min[lid] = minval;
    269 #endif
    270 #ifdef NEED_MAXVAL
    271         localmem_max[lid] = maxval;
    272 #endif
    273 #ifdef NEED_MINLOC
    274         localmem_minloc[lid] = minloc;
    275 #endif
    276 #ifdef NEED_MAXLOC
    277         localmem_maxloc[lid] = maxloc;
    278 #endif
    279 #ifdef OP_CALC2
    280         localmem_max2[lid] = maxval2;
    281 #endif
    282     }
    283     barrier(CLK_LOCAL_MEM_FENCE);
    284 
    285     if (lid >= WGS2_ALIGNED && total >= WGS2_ALIGNED)
    286     {
    287         int lid3 = lid - WGS2_ALIGNED;
    288 #ifdef NEED_MINVAL
    289 #ifdef NEED_MINLOC
    290         if (localmem_min[lid3] >= minval)
    291         {
    292             if (localmem_min[lid3] == minval)
    293                 localmem_minloc[lid3] = min(localmem_minloc[lid3], minloc);
    294             else
    295                 localmem_minloc[lid3] = minloc,
    296             localmem_min[lid3] = minval;
    297         }
    298 #else
    299         localmem_min[lid3] = MIN(localmem_min[lid3], minval);
    300 #endif
    301 #endif
    302 #ifdef NEED_MAXVAL
    303 #ifdef NEED_MAXLOC
    304         if (localmem_max[lid3] <= maxval)
    305         {
    306             if (localmem_max[lid3] == maxval)
    307                 localmem_maxloc[lid3] = min(localmem_maxloc[lid3], maxloc);
    308             else
    309                 localmem_maxloc[lid3] = maxloc,
    310             localmem_max[lid3] = maxval;
    311         }
    312 #else
    313         localmem_max[lid3] = MAX(localmem_max[lid3], maxval);
    314 #endif
    315 #endif
    316 #ifdef OP_CALC2
    317         localmem_max2[lid3] = MAX(localmem_max2[lid3], maxval2);
    318 #endif
    319     }
    320     barrier(CLK_LOCAL_MEM_FENCE);
    321 
    322     for (int lsize = WGS2_ALIGNED >> 1; lsize > 0; lsize >>= 1)
    323     {
    324         if (lid < lsize)
    325         {
    326             int lid2 = lsize + lid;
    327 
    328 #ifdef NEED_MINVAL
    329 #ifdef NEED_MINLOC
    330             if (localmem_min[lid] >= localmem_min[lid2])
    331             {
    332                 if (localmem_min[lid] == localmem_min[lid2])
    333                     localmem_minloc[lid] = min(localmem_minloc[lid2], localmem_minloc[lid]);
    334                 else
    335                     localmem_minloc[lid] = localmem_minloc[lid2],
    336                 localmem_min[lid] = localmem_min[lid2];
    337             }
    338 #else
    339             localmem_min[lid] = MIN(localmem_min[lid], localmem_min[lid2]);
    340 #endif
    341 #endif
    342 #ifdef NEED_MAXVAL
    343 #ifdef NEED_MAXLOC
    344             if (localmem_max[lid] <= localmem_max[lid2])
    345             {
    346                 if (localmem_max[lid] == localmem_max[lid2])
    347                     localmem_maxloc[lid] = min(localmem_maxloc[lid2], localmem_maxloc[lid]);
    348                 else
    349                     localmem_maxloc[lid] = localmem_maxloc[lid2],
    350                 localmem_max[lid] = localmem_max[lid2];
    351             }
    352 #else
    353             localmem_max[lid] = MAX(localmem_max[lid], localmem_max[lid2]);
    354 #endif
    355 #endif
    356 #ifdef OP_CALC2
    357             localmem_max2[lid] = MAX(localmem_max2[lid], localmem_max2[lid2]);
    358 #endif
    359         }
    360         barrier(CLK_LOCAL_MEM_FENCE);
    361     }
    362 
    363     if (lid == 0)
    364     {
    365         int pos = 0;
    366 #ifdef NEED_MINVAL
    367         *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_min[0];
    368         pos = mad24(groupnum, (int)sizeof(dstT1), pos);
    369 #endif
    370 #ifdef NEED_MAXVAL
    371         *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_max[0];
    372         pos = mad24(groupnum, (int)sizeof(dstT1), pos);
    373 #endif
    374 #ifdef NEED_MINLOC
    375         *(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_minloc[0];
    376         pos = mad24(groupnum, (int)sizeof(uint), pos);
    377 #endif
    378 #ifdef NEED_MAXLOC
    379         *(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_maxloc[0];
    380 #ifdef OP_CALC2
    381         pos = mad24(groupnum, (int)sizeof(uint), pos);
    382 #endif
    383 #endif
    384 #ifdef OP_CALC2
    385         *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_max2[0];
    386 #endif
    387     }
    388 }
    389