1 /* 2 * function: kernel_rgb_pipe 3 * input: image2d_t as read only 4 * output: image2d_t as write only 5 */ 6 7 #define WORK_ITEM_X_SIZE 1 8 #define WORK_ITEM_Y_SIZE 1 9 10 #define SHARED_PIXEL_X_OFFSET 1 11 #define SHARED_PIXEL_Y_OFFSET 1 12 13 #define SHARED_PIXEL_WIDTH 8 14 #define SHARED_PIXEL_HEIGHT 4 15 16 #define SHARED_PIXEL_X_SIZE (SHARED_PIXEL_WIDTH * WORK_ITEM_X_SIZE + SHARED_PIXEL_X_OFFSET * 2) 17 #define SHARED_PIXEL_Y_SIZE (SHARED_PIXEL_HEIGHT * WORK_ITEM_Y_SIZE + SHARED_PIXEL_Y_OFFSET * 2) 18 19 typedef struct { 20 float thr_r; 21 float thr_g; 22 float thr_b; 23 float gain; 24 } CLRgbTnrConfig; 25 26 __inline void cl_snr (__local float4 *in, float4 *out, int lx, int ly) 27 { 28 int tmp_id = (SHARED_PIXEL_Y_OFFSET + ly * WORK_ITEM_Y_SIZE) * SHARED_PIXEL_X_SIZE + SHARED_PIXEL_X_OFFSET + lx * WORK_ITEM_X_SIZE; 29 (*(out)).x = ((*(in + tmp_id)).x + (*(in + tmp_id - SHARED_PIXEL_X_SIZE - 1)).x + (*(in + tmp_id - SHARED_PIXEL_X_SIZE)).x + (*(in + tmp_id - SHARED_PIXEL_Y_OFFSET + 1)).x + (*(in + tmp_id - 1)).x + (*(in + tmp_id + 1)).x + (*(in + tmp_id + SHARED_PIXEL_X_SIZE - 1)).x + (*(in + tmp_id + SHARED_PIXEL_X_SIZE)).x + (*(in + tmp_id + SHARED_PIXEL_X_SIZE + 1)).x) / 9.0f; 30 31 (*(out)).y = ((*(in + tmp_id)).y + (*(in + tmp_id - SHARED_PIXEL_X_SIZE - 1)).y + (*(in + tmp_id - SHARED_PIXEL_X_SIZE)).y + (*(in + tmp_id - SHARED_PIXEL_Y_OFFSET + 1)).y + (*(in + tmp_id - 1)).y + (*(in + tmp_id + 1)).y + (*(in + tmp_id + SHARED_PIXEL_X_SIZE - 1)).y + (*(in + tmp_id + SHARED_PIXEL_X_SIZE)).y + (*(in + tmp_id + SHARED_PIXEL_X_SIZE + 1)).y) / 9.0f; 32 33 (*(out)).z = ((*(in + tmp_id)).z + (*(in + tmp_id - SHARED_PIXEL_X_SIZE - 1)).z + (*(in + tmp_id - SHARED_PIXEL_X_SIZE)).z + (*(in + tmp_id - SHARED_PIXEL_Y_OFFSET + 1)).z + (*(in + tmp_id - 1)).z + (*(in + tmp_id + 1)).z + (*(in + tmp_id + SHARED_PIXEL_X_SIZE - 1)).z + (*(in + tmp_id + SHARED_PIXEL_X_SIZE)).z + (*(in + tmp_id + SHARED_PIXEL_X_SIZE + 1)).z) / 9.0f; 34 35 } 36 37 __inline void cl_tnr (float4 *out, int gx, int gy, __read_only image2d_t inputFrame1, __read_only image2d_t inputFrame2, __read_only image2d_t inputFrame3, CLRgbTnrConfig tnr_config) 38 { 39 float4 var; 40 float gain; 41 42 sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; 43 44 float4 in1, in2, in3; 45 46 in1 = read_imagef(inputFrame1, sampler, (int2)(gx, gy)); 47 in2 = read_imagef(inputFrame2, sampler, (int2)(gx, gy)); 48 in3 = read_imagef(inputFrame3, sampler, (int2)(gx, gy)); 49 50 var.x = (fabs((*(out)).x - in1.x) + fabs(in1.x - in2.x) + fabs(in2.x - in3.x)) / 3.0f; 51 var.y = (fabs((*(out)).y - in1.y) + fabs(in1.y - in2.y) + fabs(in2.y - in3.y)) / 3.0f; 52 var.z = (fabs((*(out)).z - in1.z) + fabs(in1.z - in2.z) + fabs(in2.z - in3.z)) / 3.0f; 53 54 int cond = (var.x + var.y + var.z) < (tnr_config.thr_r + tnr_config.thr_g + tnr_config.thr_b); 55 gain = cond ? 1.0f : 0.0f; 56 (*(out)).x = (gain * (*(out)).x + gain * in1.x + gain * in2.x + in3.x) / (1.0f + 3 * gain); 57 (*(out)).y = (gain * (*(out)).y + gain * in1.y + gain * in2.y + in3.y) / (1.0f + 3 * gain); 58 (*(out)).z = (gain * (*(out)).z + gain * in1.z + gain * in2.z + in3.z) / (1.0f + 3 * gain); 59 } 60 61 __kernel void kernel_rgb_pipe (__write_only image2d_t output, CLRgbTnrConfig tnr_config, __read_only image2d_t inputFrame0, __read_only image2d_t inputFrame1, __read_only image2d_t inputFrame2, __read_only image2d_t inputFrame3) 62 { 63 sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; 64 65 int g_id_x = get_global_id (0); 66 int g_id_y = get_global_id (1); 67 int g_size_x = get_global_size (0); 68 int g_size_y = get_global_size (1); 69 70 int l_id_x = get_local_id (0); 71 int l_id_y = get_local_id (1); 72 int l_size_x = get_local_size (0); 73 int l_size_y = get_local_size (1); 74 75 __local float4 p[SHARED_PIXEL_X_SIZE * SHARED_PIXEL_Y_SIZE]; 76 77 float4 out; 78 int i = l_id_x + l_id_y * l_size_x; 79 int xstart = (g_id_x - l_id_x) - SHARED_PIXEL_X_OFFSET; 80 int ystart = (g_id_y - l_id_y) - SHARED_PIXEL_Y_OFFSET; 81 82 for(; i < SHARED_PIXEL_X_SIZE * SHARED_PIXEL_Y_SIZE; i += l_size_x * l_size_y) { 83 84 int x0 = i % SHARED_PIXEL_X_SIZE + xstart; 85 int y0 = i / SHARED_PIXEL_X_SIZE + ystart; 86 87 p[i] = read_imagef(inputFrame0, sampler, (int2)(x0, y0)); 88 } 89 90 barrier(CLK_LOCAL_MEM_FENCE); 91 92 cl_snr(&p[0], &out, l_id_x, l_id_y); 93 cl_tnr(&out, g_id_x, g_id_y, inputFrame1, inputFrame2, inputFrame3, tnr_config); 94 95 write_imagef(output, (int2)(g_id_x, g_id_y), out); 96 } 97 98