1 // This file is part of OpenCV project. 2 // It is subject to the license terms in the LICENSE file found in the top-level directory 3 // of this distribution and at http://opencv.org/license.html. 4 5 // Copyright (C) 2014, Itseez, Inc., all rights reserved. 6 // Third party copyrights are property of their respective owners. 7 8 #ifdef DOUBLE_SUPPORT 9 #ifdef cl_amd_fp64 10 #pragma OPENCL EXTENSION cl_amd_fp64:enable 11 #elif defined (cl_khr_fp64) 12 #pragma OPENCL EXTENSION cl_khr_fp64:enable 13 #endif 14 #endif 15 16 #ifdef DEPTH_0 17 #define MIN_VAL 0 18 #define MAX_VAL UCHAR_MAX 19 #elif defined DEPTH_1 20 #define MIN_VAL SCHAR_MIN 21 #define MAX_VAL SCHAR_MAX 22 #elif defined DEPTH_2 23 #define MIN_VAL 0 24 #define MAX_VAL USHRT_MAX 25 #elif defined DEPTH_3 26 #define MIN_VAL SHRT_MIN 27 #define MAX_VAL SHRT_MAX 28 #elif defined DEPTH_4 29 #define MIN_VAL INT_MIN 30 #define MAX_VAL INT_MAX 31 #elif defined DEPTH_5 32 #define MIN_VAL (-FLT_MAX) 33 #define MAX_VAL FLT_MAX 34 #elif defined DEPTH_6 35 #define MIN_VAL (-DBL_MAX) 36 #define MAX_VAL DBL_MAX 37 #endif 38 39 #define noconvert 40 #define INDEX_MAX UINT_MAX 41 42 #if wdepth <= 4 43 #define MIN_ABS(a) convertFromU(abs(a)) 44 #define MIN_ABS2(a, b) convertFromU(abs_diff(a, b)) 45 #define MIN(a, b) min(a, b) 46 #define MAX(a, b) max(a, b) 47 #else 48 #define MIN_ABS(a) fabs(a) 49 #define MIN_ABS2(a, b) fabs(a - b) 50 #define MIN(a, b) fmin(a, b) 51 #define MAX(a, b) fmax(a, b) 52 #endif 53 54 #if kercn != 3 55 #define loadpix(addr) *(__global const srcT *)(addr) 56 #define srcTSIZE (int)sizeof(srcT) 57 #else 58 #define loadpix(addr) vload3(0, (__global const srcT1 *)(addr)) 59 #define srcTSIZE ((int)sizeof(srcT1) * 3) 60 #endif 61 62 #ifndef HAVE_MASK 63 #undef srcTSIZE 64 #define srcTSIZE (int)sizeof(srcT1) 65 #endif 66 67 #ifdef NEED_MINVAL 68 #ifdef NEED_MINLOC 69 #define CALC_MIN(p, inc) \ 70 if (minval > temp.p) \ 71 { \ 72 minval = temp.p; \ 73 minloc = id + inc; \ 74 } 75 #else 76 #define CALC_MIN(p, inc) \ 77 minval = MIN(minval, temp.p); 78 #endif 79 #else 80 #define CALC_MIN(p, inc) 81 #endif 82 83 #ifdef NEED_MAXVAL 84 #ifdef NEED_MAXLOC 85 #define CALC_MAX(p, inc) \ 86 if (maxval < temp.p) \ 87 { \ 88 maxval = temp.p; \ 89 maxloc = id + inc; \ 90 } 91 #else 92 #define CALC_MAX(p, inc) \ 93 maxval = MAX(maxval, temp.p); 94 #endif 95 #else 96 #define CALC_MAX(p, inc) 97 #endif 98 99 #ifdef OP_CALC2 100 #define CALC_MAX2(p) \ 101 maxval2 = MAX(maxval2, temp2.p); 102 #else 103 #define CALC_MAX2(p) 104 #endif 105 106 #define CALC_P(p, inc) \ 107 CALC_MIN(p, inc) \ 108 CALC_MAX(p, inc) \ 109 CALC_MAX2(p) 110 111 __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_offset, int cols, 112 int total, int groupnum, __global uchar * dstptr 113 #ifdef HAVE_MASK 114 , __global const uchar * mask, int mask_step, int mask_offset 115 #endif 116 #ifdef HAVE_SRC2 117 , __global const uchar * src2ptr, int src2_step, int src2_offset 118 #endif 119 ) 120 { 121 int lid = get_local_id(0); 122 int gid = get_group_id(0); 123 int id = get_global_id(0) 124 #ifndef HAVE_MASK 125 * kercn; 126 #else 127 ; 128 #endif 129 130 srcptr += src_offset; 131 #ifdef HAVE_MASK 132 mask += mask_offset; 133 #endif 134 #ifdef HAVE_SRC2 135 src2ptr += src2_offset; 136 #endif 137 138 #ifdef NEED_MINVAL 139 __local dstT1 localmem_min[WGS2_ALIGNED]; 140 dstT1 minval = MAX_VAL; 141 #ifdef NEED_MINLOC 142 __local uint localmem_minloc[WGS2_ALIGNED]; 143 uint minloc = INDEX_MAX; 144 #endif 145 #endif 146 #ifdef NEED_MAXVAL 147 dstT1 maxval = MIN_VAL; 148 __local dstT1 localmem_max[WGS2_ALIGNED]; 149 #ifdef NEED_MAXLOC 150 __local uint localmem_maxloc[WGS2_ALIGNED]; 151 uint maxloc = INDEX_MAX; 152 #endif 153 #endif 154 #ifdef OP_CALC2 155 __local dstT1 localmem_max2[WGS2_ALIGNED]; 156 dstT1 maxval2 = MIN_VAL; 157 #endif 158 159 int src_index; 160 #ifdef HAVE_MASK 161 int mask_index; 162 #endif 163 #ifdef HAVE_SRC2 164 int src2_index; 165 #endif 166 167 dstT temp; 168 #ifdef HAVE_SRC2 169 dstT temp2; 170 #endif 171 172 for (int grain = groupnum * WGS 173 #ifndef HAVE_MASK 174 * kercn 175 #endif 176 ; id < total; id += grain) 177 { 178 #ifdef HAVE_MASK 179 #ifdef HAVE_MASK_CONT 180 mask_index = id; 181 #else 182 mask_index = mad24(id / cols, mask_step, id % cols); 183 #endif 184 if (mask[mask_index]) 185 #endif 186 { 187 #ifdef HAVE_SRC_CONT 188 src_index = id * srcTSIZE;//mul24(id, srcTSIZE); 189 #else 190 src_index = mad24(id / cols, src_step, mul24(id % cols, srcTSIZE)); 191 #endif 192 temp = convertToDT(loadpix(srcptr + src_index)); 193 #ifdef OP_ABS 194 temp = MIN_ABS(temp); 195 #endif 196 197 #ifdef HAVE_SRC2 198 #ifdef HAVE_SRC2_CONT 199 src2_index = id * srcTSIZE; //mul24(id, srcTSIZE); 200 #else 201 src2_index = mad24(id / cols, src2_step, mul24(id % cols, srcTSIZE)); 202 #endif 203 temp2 = convertToDT(loadpix(src2ptr + src2_index)); 204 temp = MIN_ABS2(temp, temp2); 205 #ifdef OP_CALC2 206 temp2 = MIN_ABS(temp2); 207 #endif 208 #endif 209 210 #if kercn == 1 211 #ifdef NEED_MINVAL 212 #ifdef NEED_MINLOC 213 if (minval > temp) 214 { 215 minval = temp; 216 minloc = id; 217 } 218 #else 219 minval = MIN(minval, temp); 220 #endif 221 #endif 222 #ifdef NEED_MAXVAL 223 #ifdef NEED_MAXLOC 224 if (maxval < temp) 225 { 226 maxval = temp; 227 maxloc = id; 228 } 229 #else 230 maxval = MAX(maxval, temp); 231 #endif 232 #ifdef OP_CALC2 233 maxval2 = MAX(maxval2, temp2); 234 #endif 235 #endif 236 #elif kercn >= 2 237 CALC_P(s0, 0) 238 CALC_P(s1, 1) 239 #if kercn >= 3 240 CALC_P(s2, 2) 241 #if kercn >= 4 242 CALC_P(s3, 3) 243 #if kercn >= 8 244 CALC_P(s4, 4) 245 CALC_P(s5, 5) 246 CALC_P(s6, 6) 247 CALC_P(s7, 7) 248 #if kercn == 16 249 CALC_P(s8, 8) 250 CALC_P(s9, 9) 251 CALC_P(sA, 10) 252 CALC_P(sB, 11) 253 CALC_P(sC, 12) 254 CALC_P(sD, 13) 255 CALC_P(sE, 14) 256 CALC_P(sF, 15) 257 #endif 258 #endif 259 #endif 260 #endif 261 #endif 262 } 263 } 264 265 if (lid < WGS2_ALIGNED) 266 { 267 #ifdef NEED_MINVAL 268 localmem_min[lid] = minval; 269 #endif 270 #ifdef NEED_MAXVAL 271 localmem_max[lid] = maxval; 272 #endif 273 #ifdef NEED_MINLOC 274 localmem_minloc[lid] = minloc; 275 #endif 276 #ifdef NEED_MAXLOC 277 localmem_maxloc[lid] = maxloc; 278 #endif 279 #ifdef OP_CALC2 280 localmem_max2[lid] = maxval2; 281 #endif 282 } 283 barrier(CLK_LOCAL_MEM_FENCE); 284 285 if (lid >= WGS2_ALIGNED && total >= WGS2_ALIGNED) 286 { 287 int lid3 = lid - WGS2_ALIGNED; 288 #ifdef NEED_MINVAL 289 #ifdef NEED_MINLOC 290 if (localmem_min[lid3] >= minval) 291 { 292 if (localmem_min[lid3] == minval) 293 localmem_minloc[lid3] = min(localmem_minloc[lid3], minloc); 294 else 295 localmem_minloc[lid3] = minloc, 296 localmem_min[lid3] = minval; 297 } 298 #else 299 localmem_min[lid3] = MIN(localmem_min[lid3], minval); 300 #endif 301 #endif 302 #ifdef NEED_MAXVAL 303 #ifdef NEED_MAXLOC 304 if (localmem_max[lid3] <= maxval) 305 { 306 if (localmem_max[lid3] == maxval) 307 localmem_maxloc[lid3] = min(localmem_maxloc[lid3], maxloc); 308 else 309 localmem_maxloc[lid3] = maxloc, 310 localmem_max[lid3] = maxval; 311 } 312 #else 313 localmem_max[lid3] = MAX(localmem_max[lid3], maxval); 314 #endif 315 #endif 316 #ifdef OP_CALC2 317 localmem_max2[lid3] = MAX(localmem_max2[lid3], maxval2); 318 #endif 319 } 320 barrier(CLK_LOCAL_MEM_FENCE); 321 322 for (int lsize = WGS2_ALIGNED >> 1; lsize > 0; lsize >>= 1) 323 { 324 if (lid < lsize) 325 { 326 int lid2 = lsize + lid; 327 328 #ifdef NEED_MINVAL 329 #ifdef NEED_MINLOC 330 if (localmem_min[lid] >= localmem_min[lid2]) 331 { 332 if (localmem_min[lid] == localmem_min[lid2]) 333 localmem_minloc[lid] = min(localmem_minloc[lid2], localmem_minloc[lid]); 334 else 335 localmem_minloc[lid] = localmem_minloc[lid2], 336 localmem_min[lid] = localmem_min[lid2]; 337 } 338 #else 339 localmem_min[lid] = MIN(localmem_min[lid], localmem_min[lid2]); 340 #endif 341 #endif 342 #ifdef NEED_MAXVAL 343 #ifdef NEED_MAXLOC 344 if (localmem_max[lid] <= localmem_max[lid2]) 345 { 346 if (localmem_max[lid] == localmem_max[lid2]) 347 localmem_maxloc[lid] = min(localmem_maxloc[lid2], localmem_maxloc[lid]); 348 else 349 localmem_maxloc[lid] = localmem_maxloc[lid2], 350 localmem_max[lid] = localmem_max[lid2]; 351 } 352 #else 353 localmem_max[lid] = MAX(localmem_max[lid], localmem_max[lid2]); 354 #endif 355 #endif 356 #ifdef OP_CALC2 357 localmem_max2[lid] = MAX(localmem_max2[lid], localmem_max2[lid2]); 358 #endif 359 } 360 barrier(CLK_LOCAL_MEM_FENCE); 361 } 362 363 if (lid == 0) 364 { 365 int pos = 0; 366 #ifdef NEED_MINVAL 367 *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_min[0]; 368 pos = mad24(groupnum, (int)sizeof(dstT1), pos); 369 #endif 370 #ifdef NEED_MAXVAL 371 *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_max[0]; 372 pos = mad24(groupnum, (int)sizeof(dstT1), pos); 373 #endif 374 #ifdef NEED_MINLOC 375 *(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_minloc[0]; 376 pos = mad24(groupnum, (int)sizeof(uint), pos); 377 #endif 378 #ifdef NEED_MAXLOC 379 *(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_maxloc[0]; 380 #ifdef OP_CALC2 381 pos = mad24(groupnum, (int)sizeof(uint), pos); 382 #endif 383 #endif 384 #ifdef OP_CALC2 385 *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_max2[0]; 386 #endif 387 } 388 } 389