Home | History | Annotate | Download | only in photo
      1 // This file is auto-generated. Do not edit!
      2 
      3 #include "precomp.hpp"
      4 #include "opencl_kernels_photo.hpp"
      5 
      6 namespace cv
      7 {
      8 namespace ocl
      9 {
     10 namespace photo
     11 {
     12 
     13 const struct ProgramEntry nlmeans={"nlmeans",
     14 "#ifdef cl_amd_printf\n"
     15 "#pragma OPENCL_EXTENSION cl_amd_printf:enable\n"
     16 "#endif\n"
     17 "#ifdef DOUBLE_SUPPORT\n"
     18 "#ifdef cl_amd_fp64\n"
     19 "#pragma OPENCL EXTENSION cl_amd_fp64:enable\n"
     20 "#elif defined cl_khr_fp64\n"
     21 "#pragma OPENCL EXTENSION cl_khr_fp64:enable\n"
     22 "#endif\n"
     23 "#endif\n"
     24 "#ifdef OP_CALC_WEIGHTS\n"
     25 "__kernel void calcAlmostDist2Weight(__global wlut_t * almostDist2Weight, int almostMaxDist,\n"
     26 "FT almostDist2ActualDistMultiplier, int fixedPointMult,\n"
     27 "w_t den, FT WEIGHT_THRESHOLD)\n"
     28 "{\n"
     29 "int almostDist = get_global_id(0);\n"
     30 "if (almostDist < almostMaxDist)\n"
     31 "{\n"
     32 "FT dist = almostDist * almostDist2ActualDistMultiplier;\n"
     33 "#ifdef ABS\n"
     34 "w_t w = exp((w_t)(-dist*dist) * den);\n"
     35 "#else\n"
     36 "w_t w = exp((w_t)(-dist) * den);\n"
     37 "#endif\n"
     38 "wlut_t weight = convert_wlut_t(fixedPointMult * (isnan(w) ? (w_t)1.0 : w));\n"
     39 "almostDist2Weight[almostDist] =\n"
     40 "weight < (wlut_t)(WEIGHT_THRESHOLD * fixedPointMult) ? (wlut_t)0 : weight;\n"
     41 "}\n"
     42 "}\n"
     43 "#elif defined OP_CALC_FASTNLMEANS\n"
     44 "#define noconvert\n"
     45 "#define SEARCH_SIZE_SQ (SEARCH_SIZE * SEARCH_SIZE)\n"
     46 "inline int calcDist(pixel_t a, pixel_t b)\n"
     47 "{\n"
     48 "#ifdef ABS\n"
     49 "int_t retval = convert_int_t(abs_diff(a, b));\n"
     50 "#else\n"
     51 "int_t diff = convert_int_t(a) - convert_int_t(b);\n"
     52 "int_t retval = diff * diff;\n"
     53 "#endif\n"
     54 "#if cn == 1\n"
     55 "return retval;\n"
     56 "#elif cn == 2\n"
     57 "return retval.x + retval.y;\n"
     58 "#elif cn == 3\n"
     59 "return retval.x + retval.y + retval.z;\n"
     60 "#elif cn == 4\n"
     61 "return retval.x + retval.y + retval.z + retval.w;\n"
     62 "#else\n"
     63 "#error \"cn should be either 1, 2, 3 or 4\"\n"
     64 "#endif\n"
     65 "}\n"
     66 "#ifdef ABS\n"
     67 "inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_value, pixel_t up_value_t)\n"
     68 "{\n"
     69 "return calcDist(down_value, down_value_t) - calcDist(up_value, up_value_t);\n"
     70 "}\n"
     71 "#else\n"
     72 "inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_value, pixel_t up_value_t)\n"
     73 "{\n"
     74 "int_t A = convert_int_t(down_value) - convert_int_t(down_value_t);\n"
     75 "int_t B = convert_int_t(up_value) - convert_int_t(up_value_t);\n"
     76 "int_t retval = (A - B) * (A + B);\n"
     77 "#if cn == 1\n"
     78 "return retval;\n"
     79 "#elif cn == 2\n"
     80 "return retval.x + retval.y;\n"
     81 "#elif cn == 3\n"
     82 "return retval.x + retval.y + retval.z;\n"
     83 "#elif cn == 4\n"
     84 "return retval.x + retval.y + retval.z + retval.w;\n"
     85 "#else\n"
     86 "#error \"cn should be either 1, 2, 3 or 4\"\n"
     87 "#endif\n"
     88 "}\n"
     89 "#endif\n"
     90 "#define COND if (x == 0 && y == 0)\n"
     91 "inline void calcFirstElementInRow(__global const uchar * src, int src_step, int src_offset,\n"
     92 "__local int * dists, int y, int x, int id,\n"
     93 "__global int * col_dists, __global int * up_col_dists)\n"
     94 "{\n"
     95 "y -= TEMPLATE_SIZE2;\n"
     96 "int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2;\n"
     97 "int col_dists_current_private[TEMPLATE_SIZE];\n"
     98 "for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE)\n"
     99 "{\n"
    100 "int dist = 0, value;\n"
    101 "__global const pixel_t * src_template = (__global const pixel_t *)(src +\n"
    102 "mad24(sy + i / SEARCH_SIZE, src_step, mad24(psz, sx + i % SEARCH_SIZE, src_offset)));\n"
    103 "__global const pixel_t * src_current = (__global const pixel_t *)(src + mad24(y, src_step, mad24(psz, x, src_offset)));\n"
    104 "__global int * col_dists_current = col_dists + i * TEMPLATE_SIZE;\n"
    105 "#pragma unroll\n"
    106 "for (int j = 0; j < TEMPLATE_SIZE; ++j)\n"
    107 "col_dists_current_private[j] = 0;\n"
    108 "for (int ty = 0; ty < TEMPLATE_SIZE; ++ty)\n"
    109 "{\n"
    110 "#pragma unroll\n"
    111 "for (int tx = -TEMPLATE_SIZE2; tx <= TEMPLATE_SIZE2; ++tx)\n"
    112 "{\n"
    113 "value = calcDist(src_template[tx], src_current[tx]);\n"
    114 "col_dists_current_private[tx + TEMPLATE_SIZE2] += value;\n"
    115 "dist += value;\n"
    116 "}\n"
    117 "src_current = (__global const pixel_t *)((__global const uchar *)src_current + src_step);\n"
    118 "src_template = (__global const pixel_t *)((__global const uchar *)src_template + src_step);\n"
    119 "}\n"
    120 "#pragma unroll\n"
    121 "for (int j = 0; j < TEMPLATE_SIZE; ++j)\n"
    122 "col_dists_current[j] = col_dists_current_private[j];\n"
    123 "dists[i] = dist;\n"
    124 "up_col_dists[0 + i] = col_dists[TEMPLATE_SIZE - 1];\n"
    125 "}\n"
    126 "}\n"
    127 "inline void calcElementInFirstRow(__global const uchar * src, int src_step, int src_offset,\n"
    128 "__local int * dists, int y, int x0, int x, int id, int first,\n"
    129 "__global int * col_dists, __global int * up_col_dists)\n"
    130 "{\n"
    131 "x += TEMPLATE_SIZE2;\n"
    132 "y -= TEMPLATE_SIZE2;\n"
    133 "int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2;\n"
    134 "for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE)\n"
    135 "{\n"
    136 "__global const pixel_t * src_current = (__global const pixel_t *)(src + mad24(y, src_step, mad24(psz, x, src_offset)));\n"
    137 "__global const pixel_t * src_template = (__global const pixel_t *)(src +\n"
    138 "mad24(sy + i / SEARCH_SIZE, src_step, mad24(psz, sx + i % SEARCH_SIZE, src_offset)));\n"
    139 "__global int * col_dists_current = col_dists + TEMPLATE_SIZE * i;\n"
    140 "int col_dist = 0;\n"
    141 "#pragma unroll\n"
    142 "for (int ty = 0; ty < TEMPLATE_SIZE; ++ty)\n"
    143 "{\n"
    144 "col_dist += calcDist(src_current[0], src_template[0]);\n"
    145 "src_current = (__global const pixel_t *)((__global const uchar *)src_current + src_step);\n"
    146 "src_template = (__global const pixel_t *)((__global const uchar *)src_template + src_step);\n"
    147 "}\n"
    148 "dists[i] += col_dist - col_dists_current[first];\n"
    149 "col_dists_current[first] = col_dist;\n"
    150 "up_col_dists[mad24(x0, SEARCH_SIZE_SQ, i)] = col_dist;\n"
    151 "}\n"
    152 "}\n"
    153 "inline void calcElement(__global const uchar * src, int src_step, int src_offset,\n"
    154 "__local int * dists, int y, int x0, int x, int id, int first,\n"
    155 "__global int * col_dists, __global int * up_col_dists)\n"
    156 "{\n"
    157 "int sx = x + TEMPLATE_SIZE2;\n"
    158 "int sy_up = y - TEMPLATE_SIZE2 - 1;\n"
    159 "int sy_down = y + TEMPLATE_SIZE2;\n"
    160 "pixel_t up_value = *(__global const pixel_t *)(src + mad24(sy_up, src_step, mad24(psz, sx, src_offset)));\n"
    161 "pixel_t down_value = *(__global const pixel_t *)(src + mad24(sy_down, src_step, mad24(psz, sx, src_offset)));\n"
    162 "sx -= SEARCH_SIZE2;\n"
    163 "sy_up -= SEARCH_SIZE2;\n"
    164 "sy_down -= SEARCH_SIZE2;\n"
    165 "for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE)\n"
    166 "{\n"
    167 "int wx = i % SEARCH_SIZE, wy = i / SEARCH_SIZE;\n"
    168 "pixel_t up_value_t = *(__global const pixel_t *)(src + mad24(sy_up + wy, src_step, mad24(psz, sx + wx, src_offset)));\n"
    169 "pixel_t down_value_t = *(__global const pixel_t *)(src + mad24(sy_down + wy, src_step, mad24(psz, sx + wx, src_offset)));\n"
    170 "__global int * col_dists_current = col_dists + mad24(i, TEMPLATE_SIZE, first);\n"
    171 "__global int * up_col_dists_current = up_col_dists + mad24(x0, SEARCH_SIZE_SQ, i);\n"
    172 "int col_dist = up_col_dists_current[0] + calcDistUpDown(down_value, down_value_t, up_value, up_value_t);\n"
    173 "dists[i] += col_dist - col_dists_current[0];\n"
    174 "col_dists_current[0] = col_dist;\n"
    175 "up_col_dists_current[0] = col_dist;\n"
    176 "}\n"
    177 "}\n"
    178 "inline void convolveWindow(__global const uchar * src, int src_step, int src_offset,\n"
    179 "__local int * dists, __global const wlut_t * almostDist2Weight,\n"
    180 "__global uchar * dst, int dst_step, int dst_offset,\n"
    181 "int y, int x, int id, __local weight_t * weights_local,\n"
    182 "__local sum_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift)\n"
    183 "{\n"
    184 "int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2;\n"
    185 "weight_t weights = (weight_t)0;\n"
    186 "sum_t weighted_sum = (sum_t)0;\n"
    187 "for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE)\n"
    188 "{\n"
    189 "int src_index = mad24(sy + i / SEARCH_SIZE, src_step, mad24(i % SEARCH_SIZE + sx, psz, src_offset));\n"
    190 "sum_t src_value = convert_sum_t(*(__global const pixel_t *)(src + src_index));\n"
    191 "int almostAvgDist = dists[i] >> almostTemplateWindowSizeSqBinShift;\n"
    192 "weight_t weight = convert_weight_t(almostDist2Weight[almostAvgDist]);\n"
    193 "weights += weight;\n"
    194 "weighted_sum += (sum_t)weight * src_value;\n"
    195 "}\n"
    196 "weights_local[id] = weights;\n"
    197 "weighted_sum_local[id] = weighted_sum;\n"
    198 "barrier(CLK_LOCAL_MEM_FENCE);\n"
    199 "for (int lsize = CTA_SIZE >> 1; lsize > 2; lsize >>= 1)\n"
    200 "{\n"
    201 "if (id < lsize)\n"
    202 "{\n"
    203 "int id2 = lsize + id;\n"
    204 "weights_local[id] += weights_local[id2];\n"
    205 "weighted_sum_local[id] += weighted_sum_local[id2];\n"
    206 "}\n"
    207 "barrier(CLK_LOCAL_MEM_FENCE);\n"
    208 "}\n"
    209 "if (id == 0)\n"
    210 "{\n"
    211 "int dst_index = mad24(y, dst_step, mad24(psz, x, dst_offset));\n"
    212 "sum_t weighted_sum_local_0 = weighted_sum_local[0] + weighted_sum_local[1] +\n"
    213 "weighted_sum_local[2] + weighted_sum_local[3];\n"
    214 "weight_t weights_local_0 = weights_local[0] + weights_local[1] + weights_local[2] + weights_local[3];\n"
    215 "*(__global pixel_t *)(dst + dst_index) = convert_pixel_t(weighted_sum_local_0 / (sum_t)weights_local_0);\n"
    216 "}\n"
    217 "}\n"
    218 "__kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int src_offset,\n"
    219 "__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,\n"
    220 "__global const wlut_t * almostDist2Weight, __global uchar * buffer,\n"
    221 "int almostTemplateWindowSizeSqBinShift)\n"
    222 "{\n"
    223 "int block_x = get_group_id(0), nblocks_x = get_num_groups(0);\n"
    224 "int block_y = get_group_id(1);\n"
    225 "int id = get_local_id(0), first;\n"
    226 "__local int dists[SEARCH_SIZE_SQ];\n"
    227 "__local weight_t weights[CTA_SIZE];\n"
    228 "__local sum_t weighted_sum[CTA_SIZE];\n"
    229 "int x0 = block_x * BLOCK_COLS, x1 = min(x0 + BLOCK_COLS, dst_cols);\n"
    230 "int y0 = block_y * BLOCK_ROWS, y1 = min(y0 + BLOCK_ROWS, dst_rows);\n"
    231 "int block_data_start = SEARCH_SIZE_SQ * (mad24(block_y, dst_cols, x0) + mad24(block_y, nblocks_x, block_x) * TEMPLATE_SIZE);\n"
    232 "__global int * col_dists = (__global int *)(buffer + block_data_start * sizeof(int));\n"
    233 "__global int * up_col_dists = col_dists + SEARCH_SIZE_SQ * TEMPLATE_SIZE;\n"
    234 "for (int y = y0; y < y1; ++y)\n"
    235 "for (int x = x0; x < x1; ++x)\n"
    236 "{\n"
    237 "if (x == x0)\n"
    238 "{\n"
    239 "calcFirstElementInRow(src, src_step, src_offset, dists, y, x, id, col_dists, up_col_dists);\n"
    240 "first = 0;\n"
    241 "}\n"
    242 "else\n"
    243 "{\n"
    244 "if (y == y0)\n"
    245 "calcElementInFirstRow(src, src_step, src_offset, dists, y, x - x0, x, id, first, col_dists, up_col_dists);\n"
    246 "else\n"
    247 "calcElement(src, src_step, src_offset, dists, y, x - x0, x, id, first, col_dists, up_col_dists);\n"
    248 "first = (first + 1) % TEMPLATE_SIZE;\n"
    249 "}\n"
    250 "convolveWindow(src, src_step, src_offset, dists, almostDist2Weight, dst, dst_step, dst_offset,\n"
    251 "y, x, id, weights, weighted_sum, almostTemplateWindowSizeSqBinShift);\n"
    252 "}\n"
    253 "}\n"
    254 "#endif\n"
    255 , "094aea838a917cea483f77e19dd39de3"};
    256 ProgramSource nlmeans_oclsrc(nlmeans.programStr);
    257 }
    258 }}
    259