1 /* 2 * function: kernel_newtonemapping 3 * implementation of tone mapping 4 * input: image2d_t as read only 5 * output: image2d_t as write only 6 */ 7 8 #define WORK_ITEM_X_SIZE 8 9 #define WORK_ITEM_Y_SIZE 8 10 #define BLOCK_FACTOR 4 11 12 __kernel void kernel_newtonemapping ( 13 __read_only image2d_t input, __write_only image2d_t output, 14 __global float *y_max, __global float *y_avg, __global float *hist_leq, 15 int image_width, int image_height) 16 { 17 int g_id_x = get_global_id (0); 18 int g_id_y = get_global_id (1); 19 20 int group_id_x = get_group_id(0); 21 int group_id_y = get_group_id(1); 22 23 int local_id_x = get_local_id(0); 24 int local_id_y = get_local_id(1); 25 26 int g_size_x = get_global_size (0); 27 int g_size_y = get_global_size (1); 28 29 int local_index = local_id_y * WORK_ITEM_X_SIZE + local_id_x; 30 int row_per_block = image_height / BLOCK_FACTOR; 31 int col_per_block = image_width / BLOCK_FACTOR; 32 int row_block_id = g_id_y / row_per_block; 33 int col_block_id = g_id_x * 4 / col_per_block; 34 35 sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; 36 37 float4 src_data_Gr = read_imagef (input, sampler, (int2)(g_id_x, g_id_y)); 38 float4 src_data_R = read_imagef (input, sampler, (int2)(g_id_x, g_id_y + image_height)); 39 float4 src_data_B = read_imagef (input, sampler, (int2)(g_id_x, g_id_y + image_height * 2)); 40 float4 src_data_Gb = read_imagef (input, sampler, (int2)(g_id_x, g_id_y + image_height * 3)); 41 42 float4 src_data_G = (src_data_Gr + src_data_Gb) / 2; 43 44 float4 src_y_data = 0.0f; 45 src_y_data = mad(src_data_R, 0.299f, src_y_data); 46 src_y_data = mad(src_data_G, 0.587f, src_y_data); 47 src_y_data = mad(src_data_B, 0.114f, src_y_data); 48 49 float4 dst_y_data; 50 float4 d, wd, haleq, s, ws; 51 float4 total_w = 0.0f; 52 float4 total_haleq = 0.0f; 53 54 float4 corrd_x = mad((float4)g_id_x, 4.0f, (float4)(0.0f, 1.0f, 2.0f, 3.0f)); 55 float4 src_y = mad(src_y_data, 65535.0f, 0.5f) / 16.0f; 56 57 for(int i = 0; i < BLOCK_FACTOR; i++) 58 { 59 for(int j = 0; j < BLOCK_FACTOR; j++) 60 { 61 int center_x = mad24(col_per_block, j, col_per_block / 2); 62 int center_y = mad24(row_per_block, i, row_per_block / 2); 63 int start_index = mad24(i, BLOCK_FACTOR, j) * 4096; 64 65 float4 dy = (float4)((g_id_y - center_y) * (g_id_y - center_y)); 66 float4 dx = corrd_x - (float4)center_x; 67 68 d = mad(dx, dx, dy); 69 70 d = sqrt(d) + 100.0f; 71 //wd = 100.0f / (d + 100.0f); 72 73 s = fabs(src_y_data - (float4)y_avg[mad24(i, BLOCK_FACTOR, j)]) / (float4)y_max[mad24(i, BLOCK_FACTOR, j)] + 1.0f; 74 //ws = 1.0f / (s + 1.0f); 75 76 float4 w = 100.0f / (d * s); 77 //w = wd * ws; 78 79 haleq.x = hist_leq[start_index + (int)src_y.x]; 80 haleq.y = hist_leq[start_index + (int)src_y.y]; 81 haleq.z = hist_leq[start_index + (int)src_y.z]; 82 haleq.w = hist_leq[start_index + (int)src_y.w]; 83 84 total_w = total_w + w; 85 total_haleq = mad(haleq, w, total_haleq); 86 } 87 } 88 89 dst_y_data = total_haleq / total_w; 90 91 float4 gain = (dst_y_data + 0.0001f) / (src_y_data + 0.0001f); 92 src_data_Gr = src_data_Gr * gain; 93 src_data_R = src_data_R * gain; 94 src_data_B = src_data_B * gain; 95 src_data_Gb = src_data_Gb * gain; 96 97 write_imagef(output, (int2)(g_id_x, g_id_y), src_data_Gr); 98 write_imagef(output, (int2)(g_id_x, g_id_y + image_height), src_data_R); 99 write_imagef(output, (int2)(g_id_x, g_id_y + image_height * 2), src_data_B); 100 write_imagef(output, (int2)(g_id_x, g_id_y + image_height * 3), src_data_Gb); 101 } 102