Home | History | Annotate | Download | only in cl_kernel
      1 /*
      2  * function: kernel_rgb_pipe
      3  * input:    image2d_t as read only
      4  * output:   image2d_t as write only
      5  */
      6 
      7 #define WORK_ITEM_X_SIZE 1
      8 #define WORK_ITEM_Y_SIZE 1
      9 
     10 #define SHARED_PIXEL_X_OFFSET 1
     11 #define SHARED_PIXEL_Y_OFFSET 1
     12 
     13 #define SHARED_PIXEL_WIDTH 8
     14 #define SHARED_PIXEL_HEIGHT 4
     15 
     16 #define SHARED_PIXEL_X_SIZE  (SHARED_PIXEL_WIDTH * WORK_ITEM_X_SIZE + SHARED_PIXEL_X_OFFSET * 2)
     17 #define SHARED_PIXEL_Y_SIZE  (SHARED_PIXEL_HEIGHT * WORK_ITEM_Y_SIZE + SHARED_PIXEL_Y_OFFSET * 2)
     18 
     19 typedef struct {
     20     float           thr_r;
     21     float           thr_g;
     22     float           thr_b;
     23     float           gain;
     24 } CLRgbTnrConfig;
     25 
     26 __inline void cl_snr (__local float4 *in, float4 *out, int lx, int ly)
     27 {
     28     int tmp_id = (SHARED_PIXEL_Y_OFFSET + ly * WORK_ITEM_Y_SIZE) * SHARED_PIXEL_X_SIZE + SHARED_PIXEL_X_OFFSET + lx * WORK_ITEM_X_SIZE;
     29     (*(out)).x = ((*(in + tmp_id)).x + (*(in + tmp_id - SHARED_PIXEL_X_SIZE - 1)).x + (*(in + tmp_id - SHARED_PIXEL_X_SIZE)).x + (*(in + tmp_id - SHARED_PIXEL_Y_OFFSET + 1)).x + (*(in + tmp_id - 1)).x + (*(in + tmp_id + 1)).x + (*(in + tmp_id + SHARED_PIXEL_X_SIZE - 1)).x + (*(in + tmp_id + SHARED_PIXEL_X_SIZE)).x + (*(in + tmp_id + SHARED_PIXEL_X_SIZE + 1)).x) / 9.0f;
     30 
     31     (*(out)).y = ((*(in + tmp_id)).y + (*(in + tmp_id - SHARED_PIXEL_X_SIZE - 1)).y + (*(in + tmp_id - SHARED_PIXEL_X_SIZE)).y + (*(in + tmp_id - SHARED_PIXEL_Y_OFFSET + 1)).y + (*(in + tmp_id - 1)).y + (*(in + tmp_id + 1)).y + (*(in + tmp_id + SHARED_PIXEL_X_SIZE - 1)).y + (*(in + tmp_id + SHARED_PIXEL_X_SIZE)).y + (*(in + tmp_id + SHARED_PIXEL_X_SIZE + 1)).y) / 9.0f;
     32 
     33     (*(out)).z = ((*(in + tmp_id)).z + (*(in + tmp_id - SHARED_PIXEL_X_SIZE - 1)).z + (*(in + tmp_id - SHARED_PIXEL_X_SIZE)).z + (*(in + tmp_id - SHARED_PIXEL_Y_OFFSET + 1)).z + (*(in + tmp_id - 1)).z + (*(in + tmp_id + 1)).z + (*(in + tmp_id + SHARED_PIXEL_X_SIZE - 1)).z + (*(in + tmp_id + SHARED_PIXEL_X_SIZE)).z + (*(in + tmp_id + SHARED_PIXEL_X_SIZE + 1)).z) / 9.0f;
     34 
     35 }
     36 
     37 __inline void cl_tnr (float4 *out, int gx, int gy, __read_only image2d_t inputFrame1, __read_only image2d_t inputFrame2, __read_only image2d_t inputFrame3, CLRgbTnrConfig tnr_config)
     38 {
     39     float4 var;
     40     float gain;
     41 
     42     sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
     43 
     44     float4 in1, in2, in3;
     45 
     46     in1 = read_imagef(inputFrame1, sampler, (int2)(gx, gy));
     47     in2 = read_imagef(inputFrame2, sampler, (int2)(gx, gy));
     48     in3 = read_imagef(inputFrame3, sampler, (int2)(gx, gy));
     49 
     50     var.x = (fabs((*(out)).x - in1.x) + fabs(in1.x - in2.x) + fabs(in2.x - in3.x)) / 3.0f;
     51     var.y = (fabs((*(out)).y - in1.y) + fabs(in1.y - in2.y) + fabs(in2.y - in3.y)) / 3.0f;
     52     var.z = (fabs((*(out)).z - in1.z) + fabs(in1.z - in2.z) + fabs(in2.z - in3.z)) / 3.0f;
     53 
     54     int cond = (var.x + var.y + var.z) < (tnr_config.thr_r + tnr_config.thr_g + tnr_config.thr_b);
     55     gain = cond ? 1.0f : 0.0f;
     56     (*(out)).x = (gain * (*(out)).x + gain * in1.x + gain * in2.x +  in3.x) / (1.0f + 3 * gain);
     57     (*(out)).y = (gain * (*(out)).y + gain * in1.y + gain * in2.y +  in3.y) / (1.0f + 3 * gain);
     58     (*(out)).z = (gain * (*(out)).z + gain * in1.z + gain * in2.z +  in3.z) / (1.0f + 3 * gain);
     59 }
     60 
     61 __kernel void kernel_rgb_pipe (__write_only image2d_t output, CLRgbTnrConfig tnr_config, __read_only image2d_t inputFrame0, __read_only image2d_t inputFrame1, __read_only image2d_t inputFrame2, __read_only image2d_t inputFrame3)
     62 {
     63     sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
     64 
     65     int g_id_x = get_global_id (0);
     66     int g_id_y = get_global_id (1);
     67     int g_size_x = get_global_size (0);
     68     int g_size_y = get_global_size (1);
     69 
     70     int l_id_x = get_local_id (0);
     71     int l_id_y = get_local_id (1);
     72     int l_size_x = get_local_size (0);
     73     int l_size_y = get_local_size (1);
     74 
     75     __local float4 p[SHARED_PIXEL_X_SIZE * SHARED_PIXEL_Y_SIZE];
     76 
     77     float4 out;
     78     int i = l_id_x + l_id_y * l_size_x;
     79     int xstart = (g_id_x - l_id_x) - SHARED_PIXEL_X_OFFSET;
     80     int ystart = (g_id_y - l_id_y) - SHARED_PIXEL_Y_OFFSET;
     81 
     82     for(; i < SHARED_PIXEL_X_SIZE * SHARED_PIXEL_Y_SIZE; i += l_size_x * l_size_y) {
     83 
     84         int x0 = i % SHARED_PIXEL_X_SIZE + xstart;
     85         int y0 = i / SHARED_PIXEL_X_SIZE + ystart;
     86 
     87         p[i] = read_imagef(inputFrame0, sampler, (int2)(x0, y0));
     88     }
     89 
     90     barrier(CLK_LOCAL_MEM_FENCE);
     91 
     92     cl_snr(&p[0], &out, l_id_x, l_id_y);
     93     cl_tnr(&out, g_id_x, g_id_y, inputFrame1, inputFrame2, inputFrame3, tnr_config);
     94 
     95     write_imagef(output, (int2)(g_id_x, g_id_y), out);
     96 }
     97 
     98