1 // This file is part of OpenCV project. 2 // It is subject to the license terms in the LICENSE file found in the top-level directory 3 // of this distribution and at http://opencv.org/license.html. 4 // 5 // Copyright (C) 2014, Itseez, Inc, all rights reserved. 6 7 // 8 // Common preprocessors macro 9 // 10 11 // 12 // TODO: Move this common code into "header" file 13 // 14 15 #ifndef NL // New Line: for preprocessor debugging 16 #define NL 17 #endif 18 19 #define REF(x) x 20 #define __CAT(x, y) x##y 21 #define CAT(x, y) __CAT(x, y) 22 23 // 24 // All matrixes are come with this description ("name" is a name of matrix): 25 // * name_CN - number of channels (1,2,3,4) 26 // * name_DEPTH - numeric value of CV_MAT_DEPTH(type). See CV_8U, CV_32S, etc macro below. 27 // 28 // Currently we also pass these attributes (to reduce this macro block): 29 // * name_T - datatype (int, float, uchar4, float4) 30 // * name_T1 - datatype for one channel (int, float, uchar). 31 // It is equal to result of "T1(name_T)" macro 32 // * name_TSIZE - CV_ELEM_SIZE(type). 33 // We can't use sizeof(name_T) here, because sizeof(float3) is usually equal to 8, not 6. 34 // * name_T1SIZE - CV_ELEM_SIZE1(type) 35 // 36 37 // 38 // Usage sample: 39 // 40 // #define workType TYPE(float, src_CN) 41 // #define convertToWorkType CONVERT_TO(workType) 42 // #define convertWorkTypeToDstType CONVERT(workType, dst_T) 43 // 44 // __kernel void kernelFn(DECLARE_MAT_ARG(src), DECLARE_MAT_ARG(dst)) 45 // { 46 // const int x = get_global_id(0); 47 // const int y = get_global_id(1); 48 // 49 // if (x < srcWidth && y < srcHeight) 50 // { 51 // int src_byteOffset = MAT_BYTE_OFFSET(src, x, y); 52 // int dst_byteOffset = MAT_BYTE_OFFSET(dst, x, y); 53 // workType value = convertToWorkType(LOAD_MAT_AT(src, src_byteOffset)); 54 // 55 // ... value processing ... 56 // 57 // STORE_MAT_AT(dst, dst_byteOffset, convertWorkTypeToDstType(value)); 58 // } 59 // } 60 // 61 62 #define DECLARE_MAT_ARG(name) \ 63 __global uchar* restrict name ## Ptr, \ 64 int name ## StepBytes, \ 65 int name ## Offset, \ 66 int name ## Height, \ 67 int name ## Width NL 68 69 #define MAT_BYTE_OFFSET(name, x, y) mad24((y)/* + name ## OffsetY*/, name ## StepBytes, ((x)/* + name ## OffsetX*/) * (int)(name ## _TSIZE) + name ## Offset) 70 #define MAT_RELATIVE_BYTE_OFFSET(name, x, y) mad24(y, name ## StepBytes, (x) * (int)(name ## _TSIZE)) 71 72 #define __LOAD_MAT_AT(name, byteOffset) *((const __global name ## _T*)(name ## Ptr + (byteOffset))) 73 #define __vload_CN__(name_cn) vload ## name_cn 74 #define __vload_CN_(name_cn) __vload_CN__(name_cn) 75 #define __vload_CN(name) __vload_CN_(name ## _CN) 76 #define __LOAD_MAT_AT_vload(name, byteOffset) __vload_CN(name)(0, ((const __global name ## _T1*)(name ## Ptr + (byteOffset)))) 77 #define __LOAD_MAT_AT_1 __LOAD_MAT_AT 78 #define __LOAD_MAT_AT_2 __LOAD_MAT_AT 79 #define __LOAD_MAT_AT_3 __LOAD_MAT_AT_vload 80 #define __LOAD_MAT_AT_4 __LOAD_MAT_AT 81 #define __LOAD_MAT_AT_CN__(name_cn) __LOAD_MAT_AT_ ## name_cn 82 #define __LOAD_MAT_AT_CN_(name_cn) __LOAD_MAT_AT_CN__(name_cn) 83 #define __LOAD_MAT_AT_CN(name) __LOAD_MAT_AT_CN_(name ## _CN) 84 #define LOAD_MAT_AT(name, byteOffset) __LOAD_MAT_AT_CN(name)(name, byteOffset) 85 86 #define __STORE_MAT_AT(name, byteOffset, v) *((__global name ## _T*)(name ## Ptr + (byteOffset))) = v 87 #define __vstore_CN__(name_cn) vstore ## name_cn 88 #define __vstore_CN_(name_cn) __vstore_CN__(name_cn) 89 #define __vstore_CN(name) __vstore_CN_(name ## _CN) 90 #define __STORE_MAT_AT_vstore(name, byteOffset, v) __vstore_CN(name)(v, 0, ((__global name ## _T1*)(name ## Ptr + (byteOffset)))) 91 #define __STORE_MAT_AT_1 __STORE_MAT_AT 92 #define __STORE_MAT_AT_2 __STORE_MAT_AT 93 #define __STORE_MAT_AT_3 __STORE_MAT_AT_vstore 94 #define __STORE_MAT_AT_4 __STORE_MAT_AT 95 #define __STORE_MAT_AT_CN__(name_cn) __STORE_MAT_AT_ ## name_cn 96 #define __STORE_MAT_AT_CN_(name_cn) __STORE_MAT_AT_CN__(name_cn) 97 #define __STORE_MAT_AT_CN(name) __STORE_MAT_AT_CN_(name ## _CN) 98 #define STORE_MAT_AT(name, byteOffset, v) __STORE_MAT_AT_CN(name)(name, byteOffset, v) 99 100 #define T1_uchar uchar 101 #define T1_uchar2 uchar 102 #define T1_uchar3 uchar 103 #define T1_uchar4 uchar 104 #define T1_char char 105 #define T1_char2 char 106 #define T1_char3 char 107 #define T1_char4 char 108 #define T1_ushort ushort 109 #define T1_ushort2 ushort 110 #define T1_ushort3 ushort 111 #define T1_ushort4 ushort 112 #define T1_short short 113 #define T1_short2 short 114 #define T1_short3 short 115 #define T1_short4 short 116 #define T1_int int 117 #define T1_int2 int 118 #define T1_int3 int 119 #define T1_int4 int 120 #define T1_float float 121 #define T1_float2 float 122 #define T1_float3 float 123 #define T1_float4 float 124 #define T1_double double 125 #define T1_double2 double 126 #define T1_double3 double 127 #define T1_double4 double 128 #define T1(type) REF(CAT(T1_, REF(type))) 129 130 #define uchar1 uchar 131 #define char1 char 132 #define short1 short 133 #define ushort1 ushort 134 #define int1 int 135 #define float1 float 136 #define double1 double 137 #define TYPE(type, cn) REF(CAT(REF(type), REF(cn))) 138 139 #define __CONVERT_MODE_uchar_uchar __NO_CONVERT 140 #define __CONVERT_MODE_uchar_char __CONVERT_sat 141 #define __CONVERT_MODE_uchar_ushort __CONVERT 142 #define __CONVERT_MODE_uchar_short __CONVERT 143 #define __CONVERT_MODE_uchar_int __CONVERT 144 #define __CONVERT_MODE_uchar_float __CONVERT 145 #define __CONVERT_MODE_uchar_double __CONVERT 146 #define __CONVERT_MODE_char_uchar __CONVERT_sat 147 #define __CONVERT_MODE_char_char __NO_CONVERT 148 #define __CONVERT_MODE_char_ushort __CONVERT_sat 149 #define __CONVERT_MODE_char_short __CONVERT 150 #define __CONVERT_MODE_char_int __CONVERT 151 #define __CONVERT_MODE_char_float __CONVERT 152 #define __CONVERT_MODE_char_double __CONVERT 153 #define __CONVERT_MODE_ushort_uchar __CONVERT_sat 154 #define __CONVERT_MODE_ushort_char __CONVERT_sat 155 #define __CONVERT_MODE_ushort_ushort __NO_CONVERT 156 #define __CONVERT_MODE_ushort_short __CONVERT_sat 157 #define __CONVERT_MODE_ushort_int __CONVERT 158 #define __CONVERT_MODE_ushort_float __CONVERT 159 #define __CONVERT_MODE_ushort_double __CONVERT 160 #define __CONVERT_MODE_short_uchar __CONVERT_sat 161 #define __CONVERT_MODE_short_char __CONVERT_sat 162 #define __CONVERT_MODE_short_ushort __CONVERT_sat 163 #define __CONVERT_MODE_short_short __NO_CONVERT 164 #define __CONVERT_MODE_short_int __CONVERT 165 #define __CONVERT_MODE_short_float __CONVERT 166 #define __CONVERT_MODE_short_double __CONVERT 167 #define __CONVERT_MODE_int_uchar __CONVERT_sat 168 #define __CONVERT_MODE_int_char __CONVERT_sat 169 #define __CONVERT_MODE_int_ushort __CONVERT_sat 170 #define __CONVERT_MODE_int_short __CONVERT_sat 171 #define __CONVERT_MODE_int_int __NO_CONVERT 172 #define __CONVERT_MODE_int_float __CONVERT 173 #define __CONVERT_MODE_int_double __CONVERT 174 #define __CONVERT_MODE_float_uchar __CONVERT_sat_rte 175 #define __CONVERT_MODE_float_char __CONVERT_sat_rte 176 #define __CONVERT_MODE_float_ushort __CONVERT_sat_rte 177 #define __CONVERT_MODE_float_short __CONVERT_sat_rte 178 #define __CONVERT_MODE_float_int __CONVERT_rte 179 #define __CONVERT_MODE_float_float __NO_CONVERT 180 #define __CONVERT_MODE_float_double __CONVERT 181 #define __CONVERT_MODE_double_uchar __CONVERT_sat_rte 182 #define __CONVERT_MODE_double_char __CONVERT_sat_rte 183 #define __CONVERT_MODE_double_ushort __CONVERT_sat_rte 184 #define __CONVERT_MODE_double_short __CONVERT_sat_rte 185 #define __CONVERT_MODE_double_int __CONVERT_rte 186 #define __CONVERT_MODE_double_float __CONVERT 187 #define __CONVERT_MODE_double_double __NO_CONVERT 188 #define __CONVERT_MODE(srcType, dstType) CAT(__CONVERT_MODE_, CAT(REF(T1(srcType)), CAT(_, REF(T1(dstType))))) 189 190 #define __ROUND_MODE__NO_CONVERT 191 #define __ROUND_MODE__CONVERT // nothing 192 #define __ROUND_MODE__CONVERT_rte _rte 193 #define __ROUND_MODE__CONVERT_sat _sat 194 #define __ROUND_MODE__CONVERT_sat_rte _sat_rte 195 #define ROUND_MODE(srcType, dstType) CAT(__ROUND_MODE_, __CONVERT_MODE(srcType, dstType)) 196 197 #define __CONVERT_ROUND(dstType, roundMode) CAT(CAT(convert_, REF(dstType)), roundMode) 198 #define __NO_CONVERT(dstType) // nothing 199 #define __CONVERT(dstType) __CONVERT_ROUND(dstType,) 200 #define __CONVERT_rte(dstType) __CONVERT_ROUND(dstType,_rte) 201 #define __CONVERT_sat(dstType) __CONVERT_ROUND(dstType,_sat) 202 #define __CONVERT_sat_rte(dstType) __CONVERT_ROUND(dstType,_sat_rte) 203 #define CONVERT(srcType, dstType) REF(__CONVERT_MODE(srcType,dstType))(dstType) 204 #define CONVERT_TO(dstType) __CONVERT_ROUND(dstType,) 205 206 // OpenCV depths 207 #define CV_8U 0 208 #define CV_8S 1 209 #define CV_16U 2 210 #define CV_16S 3 211 #define CV_32S 4 212 #define CV_32F 5 213 #define CV_64F 6 214 215 // 216 // End of common preprocessors macro 217 // 218 219 220 221 #if defined(DEFINE_feed) 222 223 #define workType TYPE(weight_T1, src_CN) 224 225 #if src_DEPTH == 3 && src_CN == 3 226 #define convertSrcToWorkType convert_float3 227 #else 228 #define convertSrcToWorkType CONVERT_TO(workType) 229 #endif 230 231 #if dst_DEPTH == 3 && dst_CN == 3 232 #define convertToDstType convert_short3 233 #else 234 #define convertToDstType CONVERT_TO(dst_T) // sat_rte provides incompatible results with CPU path 235 #endif 236 237 __kernel void feed( 238 DECLARE_MAT_ARG(src), DECLARE_MAT_ARG(weight), 239 DECLARE_MAT_ARG(dst), DECLARE_MAT_ARG(dstWeight) 240 ) 241 { 242 const int x = get_global_id(0); 243 const int y = get_global_id(1); 244 245 if (x < srcWidth && y < srcHeight) 246 { 247 int src_byteOffset = MAT_BYTE_OFFSET(src, x, y); 248 int weight_byteOffset = MAT_BYTE_OFFSET(weight, x, y); 249 int dst_byteOffset = MAT_BYTE_OFFSET(dst, x, y); 250 int dstWeight_byteOffset = MAT_BYTE_OFFSET(dstWeight, x, y); 251 252 weight_T w = LOAD_MAT_AT(weight, weight_byteOffset); 253 workType src_value = convertSrcToWorkType(LOAD_MAT_AT(src, src_byteOffset)); 254 STORE_MAT_AT(dst, dst_byteOffset, LOAD_MAT_AT(dst, dst_byteOffset) + convertToDstType(src_value * w)); 255 STORE_MAT_AT(dstWeight, dstWeight_byteOffset, LOAD_MAT_AT(dstWeight, dstWeight_byteOffset) + w); 256 } 257 } 258 259 #endif 260 261 #if defined(DEFINE_normalizeUsingWeightMap) 262 263 #if mat_DEPTH == 3 && mat_CN == 3 264 #define workType float3 265 #define convertSrcToWorkType convert_float3 266 #define convertToDstType convert_short3 267 #else 268 #define workType TYPE(weight_T1, mat_CN) 269 #define convertSrcToWorkType CONVERT_TO(workType) 270 #define convertToDstType CONVERT_TO(mat_T) // sat_rte provides incompatible results with CPU path 271 #endif 272 273 #if weight_DEPTH >= CV_32F 274 #define WEIGHT_EPS 1e-5f 275 #else 276 #define WEIGHT_EPS 0 277 #endif 278 279 __kernel void normalizeUsingWeightMap( 280 DECLARE_MAT_ARG(mat), DECLARE_MAT_ARG(weight) 281 ) 282 { 283 const int x = get_global_id(0); 284 const int y = get_global_id(1); 285 286 if (x < matWidth && y < matHeight) 287 { 288 int mat_byteOffset = MAT_BYTE_OFFSET(mat, x, y); 289 int weight_byteOffset = MAT_BYTE_OFFSET(weight, x, y); 290 291 weight_T w = LOAD_MAT_AT(weight, weight_byteOffset); 292 workType value = convertSrcToWorkType(LOAD_MAT_AT(mat, mat_byteOffset)); 293 value = value / (w + WEIGHT_EPS); 294 STORE_MAT_AT(mat, mat_byteOffset, convertToDstType(value)); 295 } 296 } 297 298 #endif 299