1 /* 2 * function: kernel_yuv_pipe 3 * input: image2d_t as read only 4 * output: image2d_t as write only 5 */ 6 7 #pragma OPENCL FP_CONTRACT OFF 8 9 //#define USE_BUFFER_OBJECT 0 10 11 unsigned int get_sector_id (float u, float v) 12 { 13 u = fabs(u) > 0.00001f ? u : 0.00001f; 14 float tg = v / u; 15 unsigned int se = tg > 1.f ? (tg > 2.f ? 3 : 2) : (tg > 0.5f ? 1 : 0); 16 unsigned int so = tg > -1.f ? (tg > -0.5f ? 3 : 2) : (tg > -2.f ? 1 : 0); 17 return tg > 0 ? (u > 0 ? se : (se + 8)) : (u > 0 ? (so + 12) : (so + 4)); 18 } 19 20 __inline void cl_csc_rgbatonv12(float8 *R, float8 *G, float8 *B, float8 *out, __global float *matrix) 21 { 22 out[0] = mad(matrix[0], R[0], mad(matrix[1], G[0], matrix[2] * B[0])); 23 out[1] = mad(matrix[0], R[1], mad(matrix[1], G[1], matrix[2] * B[1])); 24 25 out[2].s0 = mad(matrix[3], R[0].s0, mad(matrix[4], G[0].s0, matrix[5] * B[0].s0)); 26 out[2].s1 = mad(matrix[6], R[0].s0, mad(matrix[7], G[0].s0, matrix[8] * B[0].s0)); 27 out[2].s2 = mad(matrix[3], R[0].s2, mad(matrix[4], G[0].s2, matrix[5] * B[0].s2)); 28 out[2].s3 = mad(matrix[6], R[0].s2, mad(matrix[7], G[0].s2, matrix[8] * B[0].s2)); 29 out[2].s4 = mad(matrix[3], R[0].s4, mad(matrix[4], G[0].s4, matrix[5] * B[0].s4)); 30 out[2].s5 = mad(matrix[6], R[0].s4, mad(matrix[7], G[0].s4, matrix[8] * B[0].s4)); 31 out[2].s6 = mad(matrix[3], R[0].s6, mad(matrix[4], G[0].s6, matrix[5] * B[0].s6)); 32 out[2].s7 = mad(matrix[6], R[0].s6, mad(matrix[7], G[0].s6, matrix[8] * B[0].s6)); 33 34 } 35 36 __inline void cl_macc(float8 *in, __global float *table) 37 { 38 unsigned int table_id[4]; 39 float8 out; 40 41 table_id[0] = get_sector_id(in[0].s0, in[0].s1); 42 table_id[1] = get_sector_id(in[0].s2, in[0].s3); 43 table_id[2] = get_sector_id(in[0].s4, in[0].s5); 44 table_id[3] = get_sector_id(in[0].s6, in[0].s7); 45 46 out.s0 = mad(in[0].s0, table[4 * table_id[0]], in[0].s1 * table[4 * table_id[0] + 1]) + 0.5f; 47 out.s1 = mad(in[0].s0, table[4 * table_id[0] + 2], in[0].s1 * table[4 * table_id[0] + 3]) + 0.5f; 48 out.s2 = mad(in[0].s2, table[4 * table_id[1]], in[0].s3 * table[4 * table_id[1] + 1]) + 0.5f; 49 out.s3 = mad(in[0].s2, table[4 * table_id[1] + 2], in[0].s3 * table[4 * table_id[1] + 3]) + 0.5f; 50 out.s4 = mad(in[0].s4, table[4 * table_id[0]], in[0].s5 * table[4 * table_id[0] + 1]) + 0.5f; 51 out.s5 = mad(in[0].s4, table[4 * table_id[0] + 2], in[0].s5 * table[4 * table_id[0] + 3]) + 0.5f; 52 out.s6 = mad(in[0].s6, table[4 * table_id[1]], in[0].s7 * table[4 * table_id[1] + 1]) + 0.5f; 53 out.s7 = mad(in[0].s6, table[4 * table_id[1] + 2], in[0].s7 * table[4 * table_id[1] + 3]) + 0.5f; 54 55 in[0] = out; 56 } 57 58 #if USE_BUFFER_OBJECT 59 __inline void cl_tnr_yuv( 60 float8 *in, __global uchar8 *inputFramePre, 61 int x, int y, 62 float gain_yuv, float thr_y, float thr_uv, 63 uint vertical_offset, uint x_offset) 64 #else 65 __inline void cl_tnr_yuv( 66 float8 *in, 67 __read_only image2d_t inputFramePre, __read_only image2d_t inputFramePreUV, 68 int x, int y, 69 float gain_yuv, float thr_y, float thr_uv, uint x_offset) 70 #endif 71 { 72 float8 in_prev[3]; 73 sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; 74 75 #if USE_BUFFER_OBJECT 76 in_prev[0] = convert_float8(inputFramePre[2 * y * x_offset + x]) / 256.0f; 77 in_prev[1] = convert_float8(inputFramePre[(2 * y + 1) * x_offset + x]) / 256.0f; 78 in_prev[2] = convert_float8(inputFramePre[(y + vertical_offset) * x_offset + x]) / 256.0f; 79 #else 80 in_prev[0] = convert_float8(as_uchar8(convert_ushort4(read_imageui(inputFramePre, sampler, (int2)(x, 2 * y))))) / 256.0f; 81 in_prev[1] = convert_float8(as_uchar8(convert_ushort4(read_imageui(inputFramePre, sampler, (int2)(x, 2 * y + 1))))) / 256.0f; 82 in_prev[2] = convert_float8(as_uchar8(convert_ushort4(read_imageui(inputFramePreUV, sampler, (int2)(x, y))))) / 256.0f; 83 #endif 84 85 float diff_max = 0.8f; 86 float diff_Y[4], coeff_Y[4]; 87 88 diff_Y[0] = 0.25f * (fabs(in[0].s0 - in_prev[0].s0) + fabs(in[0].s1 - in_prev[0].s1) + fabs(in[1].s0 - in_prev[1].s0) + fabs(in[1].s1 - in_prev[1].s1)); 89 diff_Y[1] = 0.25f * (fabs(in[0].s2 - in_prev[0].s2) + fabs(in[0].s3 - in_prev[0].s3) + fabs(in[1].s2 - in_prev[1].s2) + fabs(in[1].s3 - in_prev[1].s3)); 90 diff_Y[2] = 0.25f * (fabs(in[0].s4 - in_prev[0].s4) + fabs(in[0].s5 - in_prev[0].s5) + fabs(in[1].s4 - in_prev[1].s4) + fabs(in[1].s5 - in_prev[1].s5)); 91 diff_Y[3] = 0.25f * (fabs(in[0].s6 - in_prev[0].s6) + fabs(in[0].s7 - in_prev[0].s7) + fabs(in[1].s6 - in_prev[1].s6) + fabs(in[1].s7 - in_prev[1].s7)); 92 93 coeff_Y[0] = (diff_Y[0] < thr_y) ? gain_yuv : (mad(diff_Y[0], 1 - gain_yuv, diff_max * gain_yuv - thr_y) / (diff_max - thr_y)); 94 coeff_Y[1] = (diff_Y[1] < thr_y) ? gain_yuv : (mad(diff_Y[1], 1 - gain_yuv, diff_max * gain_yuv - thr_y) / (diff_max - thr_y)); 95 coeff_Y[2] = (diff_Y[2] < thr_y) ? gain_yuv : (mad(diff_Y[2], 1 - gain_yuv, diff_max * gain_yuv - thr_y) / (diff_max - thr_y)); 96 coeff_Y[3] = (diff_Y[3] < thr_y) ? gain_yuv : (mad(diff_Y[3], 1 - gain_yuv, diff_max * gain_yuv - thr_y) / (diff_max - thr_y)); 97 98 coeff_Y[0] = (coeff_Y[0] < 1.0f) ? coeff_Y[0] : 1.0f; 99 coeff_Y[1] = (coeff_Y[1] < 1.0f) ? coeff_Y[1] : 1.0f; 100 coeff_Y[2] = (coeff_Y[2] < 1.0f) ? coeff_Y[2] : 1.0f; 101 coeff_Y[3] = (coeff_Y[3] < 1.0f) ? coeff_Y[3] : 1.0f; 102 103 in[0].s01 = mad(in[0].s01 - in_prev[0].s01, coeff_Y[0], in_prev[0].s01); 104 in[1].s01 = mad(in[1].s01 - in_prev[1].s01, coeff_Y[0], in_prev[1].s01); 105 in[0].s23 = mad(in[0].s23 - in_prev[0].s23, coeff_Y[1], in_prev[0].s23); 106 in[1].s23 = mad(in[1].s23 - in_prev[1].s23, coeff_Y[1], in_prev[1].s23); 107 in[0].s45 = mad(in[0].s45 - in_prev[0].s45, coeff_Y[2], in_prev[0].s45); 108 in[1].s45 = mad(in[1].s45 - in_prev[1].s45, coeff_Y[2], in_prev[1].s45); 109 in[0].s67 = mad(in[0].s67 - in_prev[0].s67, coeff_Y[3], in_prev[0].s67); 110 in[1].s67 = mad(in[1].s67 - in_prev[1].s67, coeff_Y[3], in_prev[1].s67); 111 112 float diff_U[4], diff_V[4], coeff_U[4], coeff_V[4]; 113 114 diff_U[0] = fabs(in[3].s0 - in_prev[3].s0); 115 diff_U[1] = fabs(in[3].s2 - in_prev[3].s2); 116 diff_U[2] = fabs(in[3].s4 - in_prev[3].s4); 117 diff_U[3] = fabs(in[3].s6 - in_prev[3].s6); 118 119 diff_V[0] = fabs(in[3].s1 - in_prev[3].s1); 120 diff_V[1] = fabs(in[3].s3 - in_prev[3].s3); 121 diff_V[2] = fabs(in[3].s5 - in_prev[3].s5); 122 diff_V[3] = fabs(in[3].s7 - in_prev[3].s7); 123 124 coeff_U[0] = (diff_U[0] < thr_uv) ? gain_yuv : (mad(diff_U[0], 1 - gain_yuv, diff_max * gain_yuv - thr_uv) / (diff_max - thr_uv)); 125 coeff_U[1] = (diff_U[1] < thr_uv) ? gain_yuv : (mad(diff_U[1], 1 - gain_yuv, diff_max * gain_yuv - thr_uv) / (diff_max - thr_uv)); 126 coeff_U[2] = (diff_U[2] < thr_uv) ? gain_yuv : (mad(diff_U[2], 1 - gain_yuv, diff_max * gain_yuv - thr_uv) / (diff_max - thr_uv)); 127 coeff_U[3] = (diff_U[3] < thr_uv) ? gain_yuv : (mad(diff_U[3], 1 - gain_yuv, diff_max * gain_yuv - thr_uv) / (diff_max - thr_uv)); 128 129 coeff_V[0] = (diff_V[0] < thr_uv) ? gain_yuv : (mad(diff_V[0], 1 - gain_yuv, diff_max * gain_yuv - thr_uv) / (diff_max - thr_uv)); 130 coeff_V[1] = (diff_V[1] < thr_uv) ? gain_yuv : (mad(diff_V[1], 1 - gain_yuv, diff_max * gain_yuv - thr_uv) / (diff_max - thr_uv)); 131 coeff_V[2] = (diff_V[2] < thr_uv) ? gain_yuv : (mad(diff_V[2], 1 - gain_yuv, diff_max * gain_yuv - thr_uv) / (diff_max - thr_uv)); 132 coeff_V[3] = (diff_V[3] < thr_uv) ? gain_yuv : (mad(diff_V[3], 1 - gain_yuv, diff_max * gain_yuv - thr_uv) / (diff_max - thr_uv)); 133 134 coeff_U[0] = (coeff_U[0] < 1.0f) ? coeff_U[0] : 1.0f; 135 coeff_U[1] = (coeff_U[1] < 1.0f) ? coeff_U[1] : 1.0f; 136 coeff_U[2] = (coeff_U[2] < 1.0f) ? coeff_U[2] : 1.0f; 137 coeff_U[3] = (coeff_U[3] < 1.0f) ? coeff_U[3] : 1.0f; 138 139 coeff_V[0] = (coeff_V[0] < 1.0f) ? coeff_V[0] : 1.0f; 140 coeff_V[1] = (coeff_V[1] < 1.0f) ? coeff_V[1] : 1.0f; 141 coeff_V[2] = (coeff_V[2] < 1.0f) ? coeff_V[2] : 1.0f; 142 coeff_V[3] = (coeff_V[3] < 1.0f) ? coeff_V[3] : 1.0f; 143 144 in[2].s0 = mad(in[2].s0 - in_prev[2].s0, coeff_U[0], in_prev[2].s0); 145 in[2].s1 = mad(in[2].s1 - in_prev[2].s1, coeff_V[0], in_prev[2].s1); 146 in[2].s2 = mad(in[2].s2 - in_prev[2].s2, coeff_U[1], in_prev[2].s2); 147 in[2].s3 = mad(in[2].s3 - in_prev[2].s3, coeff_V[1], in_prev[2].s3); 148 in[2].s4 = mad(in[2].s4 - in_prev[2].s4, coeff_U[2], in_prev[2].s4); 149 in[2].s5 = mad(in[2].s5 - in_prev[2].s5, coeff_V[2], in_prev[2].s5); 150 in[2].s6 = mad(in[2].s6 - in_prev[2].s6, coeff_U[3], in_prev[2].s6); 151 in[2].s7 = mad(in[2].s7 - in_prev[2].s7, coeff_V[3], in_prev[2].s7); 152 153 } 154 155 #if USE_BUFFER_OBJECT 156 __kernel void kernel_yuv_pipe ( 157 __global uchar8 *output, 158 __global uchar8 *inputFramePre, uint vertical_offset, 159 uint plannar_offset, 160 __global float *matrix, __global float *table, 161 float yuv_gain, float thr_y, float thr_uv, uint tnr_yuv_enable, 162 __global ushort8 *inputFrame0) 163 164 #else 165 166 __kernel void kernel_yuv_pipe ( 167 __write_only image2d_t output, __write_only image2d_t output_uv, 168 __read_only image2d_t inputFramePre, __read_only image2d_t inputFramePreUV, 169 uint plannar_offset, 170 __global float *matrix, __global float *table, 171 float yuv_gain, float thr_y, float thr_uv, uint tnr_yuv_enable, 172 __read_only image2d_t inputFrame0) 173 174 #endif 175 { 176 int x = get_global_id (0); 177 int y = get_global_id (1); 178 int offsetX = get_global_size(0); 179 sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; 180 float8 inR[2], inG[2], inB[2]; 181 float8 out[3]; 182 183 #if USE_BUFFER_OBJECT 184 // x [0, 240] 185 // y [0, 540] 186 uint offsetE = 2 * y * offsetX + x; 187 uint offsetO = (2 * y + 1) * offsetX + x; 188 uint offsetUV = (y + vertical_offset) * offsetX + x; 189 uint offsetG = offsetX * plannar_offset; 190 uint offsetB = offsetX * plannar_offset * 2; 191 192 inR[0] = convert_float8(inputFrame0[offsetE]) / 65536.0f; 193 inR[1] = convert_float8(inputFrame0[offsetO]) / 65536.0f; 194 inG[0] = convert_float8(inputFrame0[offsetE + offsetG]) / 65536.0f; 195 inG[1] = convert_float8(inputFrame0[offsetO + offsetG]) / 65536.0f; 196 inB[0] = convert_float8(inputFrame0[offsetE + offsetB]) / 65536.0f; 197 inB[1] = convert_float8(inputFrame0[offsetO + offsetB]) / 65536.0f; 198 #else 199 inR[0] = convert_float8(as_ushort8(read_imageui(inputFrame0, sampler, (int2)(x, 2 * y)))) / 65536.0f; 200 inR[1] = convert_float8(as_ushort8(read_imageui(inputFrame0, sampler, (int2)(x, 2 * y + 1)))) / 65536.0f; 201 inG[0] = convert_float8(as_ushort8(read_imageui(inputFrame0, sampler, (int2)(x, 2 * y + plannar_offset)))) / 65536.0f; 202 inG[1] = convert_float8(as_ushort8(read_imageui(inputFrame0, sampler, (int2)(x, 2 * y + 1 + plannar_offset)))) / 65536.0f; 203 inB[0] = convert_float8(as_ushort8(read_imageui(inputFrame0, sampler, (int2)(x, 2 * y + plannar_offset * 2)))) / 65536.0f; 204 inB[1] = convert_float8(as_ushort8(read_imageui(inputFrame0, sampler, (int2)(x, 2 * y + 1 + plannar_offset * 2)))) / 65536.0f; 205 #endif 206 207 cl_csc_rgbatonv12(&inR[0], &inG[0], &inB[0], &out[0], matrix); 208 cl_macc(&out[2], table); 209 210 if (tnr_yuv_enable) { 211 #if USE_BUFFER_OBJECT 212 cl_tnr_yuv (&out[0], inputFramePre, x, y, yuv_gain, thr_y, thr_uv, vertical_offset, offsetX); 213 #else 214 cl_tnr_yuv (&out[0], inputFramePre, inputFramePreUV, x, y, yuv_gain, thr_y, thr_uv, offsetX); 215 #endif 216 217 } 218 219 #if USE_BUFFER_OBJECT 220 output[offsetE] = convert_uchar8(out[0] * 255.0f); 221 output[offsetO] = convert_uchar8(out[1] * 255.0f); 222 output[offsetUV] = convert_uchar8(out[2] * 255.0f); 223 #else 224 write_imageui(output, (int2)(x, 2 * y), convert_uint4(as_ushort4(convert_uchar8_sat(out[0] * 255.0f)))); 225 write_imageui(output, (int2)(x, 2 * y + 1), convert_uint4(as_ushort4(convert_uchar8_sat(out[1] * 255.0f)))); 226 write_imageui(output_uv, (int2)(x, y), convert_uint4(as_ushort4(convert_uchar8_sat(out[2] * 255.0f)))); 227 #endif 228 229 } 230 231