Home | History | Annotate | Download | only in cl_kernel
      1 /*
      2  * function: kernel_newtonemapping
      3  *     implementation of tone mapping
      4  * input:    image2d_t as read only
      5  * output:   image2d_t as write only
      6  */
      7 
      8 #define WORK_ITEM_X_SIZE 8
      9 #define WORK_ITEM_Y_SIZE 8
     10 #define BLOCK_FACTOR 4
     11 
     12 __kernel void kernel_newtonemapping (
     13     __read_only image2d_t input, __write_only image2d_t output,
     14     __global float *y_max, __global float *y_avg, __global float *hist_leq,
     15     int image_width, int image_height)
     16 {
     17     int g_id_x = get_global_id (0);
     18     int g_id_y = get_global_id (1);
     19 
     20     int group_id_x = get_group_id(0);
     21     int group_id_y = get_group_id(1);
     22 
     23     int local_id_x = get_local_id(0);
     24     int local_id_y = get_local_id(1);
     25 
     26     int g_size_x = get_global_size (0);
     27     int g_size_y = get_global_size (1);
     28 
     29     int local_index = local_id_y * WORK_ITEM_X_SIZE + local_id_x;
     30     int row_per_block = image_height / BLOCK_FACTOR;
     31     int col_per_block = image_width / BLOCK_FACTOR;
     32     int row_block_id = g_id_y / row_per_block;
     33     int col_block_id = g_id_x * 4 / col_per_block;
     34 
     35     sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
     36 
     37     float4 src_data_Gr = read_imagef (input, sampler, (int2)(g_id_x, g_id_y));
     38     float4 src_data_R = read_imagef (input, sampler, (int2)(g_id_x, g_id_y + image_height));
     39     float4 src_data_B = read_imagef (input, sampler, (int2)(g_id_x, g_id_y + image_height * 2));
     40     float4 src_data_Gb = read_imagef (input, sampler, (int2)(g_id_x, g_id_y + image_height * 3));
     41 
     42     float4 src_data_G = (src_data_Gr + src_data_Gb) / 2;
     43 
     44     float4 src_y_data = 0.0f;
     45     src_y_data = mad(src_data_R, 0.299f, src_y_data);
     46     src_y_data = mad(src_data_G, 0.587f, src_y_data);
     47     src_y_data = mad(src_data_B, 0.114f, src_y_data);
     48 
     49     float4 dst_y_data;
     50     float4 d, wd, haleq, s, ws;
     51     float4 total_w = 0.0f;
     52     float4 total_haleq = 0.0f;
     53 
     54     float4 corrd_x = mad((float4)g_id_x, 4.0f, (float4)(0.0f, 1.0f, 2.0f, 3.0f));
     55     float4 src_y = mad(src_y_data, 65535.0f, 0.5f) / 16.0f;
     56 
     57     for(int i = 0; i < BLOCK_FACTOR; i++)
     58     {
     59         for(int j = 0; j < BLOCK_FACTOR; j++)
     60         {
     61             int center_x = mad24(col_per_block, j, col_per_block / 2);
     62             int center_y = mad24(row_per_block, i, row_per_block / 2);
     63             int start_index = mad24(i, BLOCK_FACTOR, j) * 4096;
     64 
     65             float4 dy = (float4)((g_id_y - center_y) * (g_id_y - center_y));
     66             float4 dx = corrd_x - (float4)center_x;
     67 
     68             d = mad(dx, dx, dy);
     69 
     70             d = sqrt(d) + 100.0f;
     71             //wd = 100.0f / (d + 100.0f);
     72 
     73             s = fabs(src_y_data - (float4)y_avg[mad24(i, BLOCK_FACTOR, j)]) / (float4)y_max[mad24(i, BLOCK_FACTOR, j)] + 1.0f;
     74             //ws = 1.0f / (s + 1.0f);
     75 
     76             float4 w = 100.0f / (d * s);
     77             //w = wd * ws;
     78 
     79             haleq.x = hist_leq[start_index + (int)src_y.x];
     80             haleq.y = hist_leq[start_index + (int)src_y.y];
     81             haleq.z = hist_leq[start_index + (int)src_y.z];
     82             haleq.w = hist_leq[start_index + (int)src_y.w];
     83 
     84             total_w = total_w + w;
     85             total_haleq = mad(haleq, w, total_haleq);
     86         }
     87     }
     88 
     89     dst_y_data = total_haleq / total_w;
     90 
     91     float4 gain = (dst_y_data + 0.0001f) / (src_y_data + 0.0001f);
     92     src_data_Gr = src_data_Gr * gain;
     93     src_data_R = src_data_R * gain;
     94     src_data_B = src_data_B * gain;
     95     src_data_Gb = src_data_Gb * gain;
     96 
     97     write_imagef(output, (int2)(g_id_x, g_id_y), src_data_Gr);
     98     write_imagef(output, (int2)(g_id_x, g_id_y + image_height), src_data_R);
     99     write_imagef(output, (int2)(g_id_x, g_id_y + image_height * 2), src_data_B);
    100     write_imagef(output, (int2)(g_id_x, g_id_y + image_height * 3), src_data_Gb);
    101 }
    102