1 /****************************************************************************** 2 * 3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 /** 19 ******************************************************************************* 20 * @file 21 * ihevc_iquant_itrans_recon.c 22 * 23 * @brief 24 * Contains function definitions for inverse quantization, inverse 25 * transform and reconstruction 26 * 27 * @author 28 * 100470 29 * 30 * @par List of Functions: 31 * - ihevc_iquant_itrans_recon_4x4_ttype1() 32 * - ihevc_iquant_itrans_recon_4x4() 33 * 34 * @remarks 35 * None 36 * 37 ******************************************************************************* 38 */ 39 #include <stdio.h> 40 #include <string.h> 41 #include "ihevc_typedefs.h" 42 #include "ihevc_macros.h" 43 #include "ihevc_platform_macros.h" 44 #include "ihevc_defs.h" 45 #include "ihevc_trans_tables.h" 46 #include "ihevc_iquant_itrans_recon.h" 47 #include "ihevc_func_selector.h" 48 #include "ihevc_trans_macros.h" 49 50 /* All the functions here are replicated from ihevc_itrans.c and modified to */ 51 /* include reconstruction */ 52 53 /** 54 ******************************************************************************* 55 * 56 * @brief 57 * This function performs inverse quantization, inverse transform 58 * type1(DST) and reconstruction for 4x4 input block 59 * 60 * @par Description: 61 * Performs inverse quantization , inverse transform type 1 and adds 62 * prediction data and clips output to 8 bit 63 * 64 * @param[in] pi2_src 65 * Input 4x4 coefficients 66 * 67 * @param[in] pi2_tmp 68 * Temporary 4x4 buffer for storing inverse 69 * transform 1st stage output 70 * 71 * @param[in] pu1_pred 72 * Prediction 4x4 block 73 * 74 * @param[in] pi2_dequant_coeff 75 * Dequant Coeffs 76 * 77 * @param[out] pu1_dst 78 * Output 4x4 block 79 * 80 * @param[in] qp_div 81 * Quantization parameter / 6 82 * 83 * @param[in] qp_rem 84 * Quantization parameter % 6 85 * 86 * @param[in] src_strd 87 * Input stride 88 * 89 * @param[in] pred_strd 90 * Prediction stride 91 * 92 * @param[in] dst_strd 93 * Output Stride 94 * 95 * @param[in] zero_cols 96 * Zero columns in pi2_src 97 * 98 * @param[in] zero_rows 99 * Zero Rows in pi2_src 100 * 101 * @returns Void 102 * 103 * @remarks 104 * None 105 * 106 ******************************************************************************* 107 */ 108 109 void ihevc_iquant_itrans_recon_4x4_ttype1(WORD16 *pi2_src, 110 WORD16 *pi2_tmp, 111 UWORD8 *pu1_pred, 112 WORD16 *pi2_dequant_coeff, 113 UWORD8 *pu1_dst, 114 WORD32 qp_div, /* qpscaled / 6 */ 115 WORD32 qp_rem, /* qpscaled % 6 */ 116 WORD32 src_strd, 117 WORD32 pred_strd, 118 WORD32 dst_strd, 119 WORD32 zero_cols, 120 WORD32 zero_rows) 121 { 122 UNUSED(zero_rows); 123 /* Inverse Quant and Inverse Transform and Reconstruction */ 124 { 125 WORD32 i, c[4]; 126 WORD32 add; 127 WORD32 shift; 128 WORD16 *pi2_tmp_orig; 129 WORD32 shift_iq; 130 WORD32 trans_size; 131 /* Inverse Quantization constants */ 132 { 133 WORD32 log2_trans_size, bit_depth; 134 135 log2_trans_size = 2; 136 bit_depth = 8 + 0; 137 shift_iq = bit_depth + log2_trans_size - 5; 138 } 139 140 trans_size = TRANS_SIZE_4; 141 pi2_tmp_orig = pi2_tmp; 142 143 /* Inverse Transform 1st stage */ 144 shift = IT_SHIFT_STAGE_1; 145 add = 1 << (shift - 1); 146 147 for(i = 0; i < trans_size; i++) 148 { 149 /* Checking for Zero Cols */ 150 if((zero_cols & 1) == 1) 151 { 152 memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); 153 } 154 else 155 { 156 WORD32 iq_tmp_1, iq_tmp_2, iq_tmp_3; 157 // Intermediate Variables 158 IQUANT_4x4(iq_tmp_1, 159 pi2_src[0 * src_strd], 160 pi2_dequant_coeff[0 * trans_size] * g_ihevc_iquant_scales[qp_rem], 161 shift_iq, qp_div); 162 IQUANT_4x4(iq_tmp_2, 163 pi2_src[2 * src_strd], 164 pi2_dequant_coeff[2 * trans_size] * g_ihevc_iquant_scales[qp_rem], 165 shift_iq, qp_div); 166 c[0] = iq_tmp_1 + iq_tmp_2; 167 168 IQUANT_4x4(iq_tmp_1, 169 pi2_src[2 * src_strd], 170 pi2_dequant_coeff[2 * trans_size] * g_ihevc_iquant_scales[qp_rem], 171 shift_iq, qp_div); 172 IQUANT_4x4(iq_tmp_2, 173 pi2_src[3 * src_strd], 174 pi2_dequant_coeff[3 * trans_size] * g_ihevc_iquant_scales[qp_rem], 175 shift_iq, qp_div); 176 c[1] = iq_tmp_1 + iq_tmp_2; 177 178 IQUANT_4x4(iq_tmp_1, 179 pi2_src[0 * src_strd], 180 pi2_dequant_coeff[0 * trans_size] * g_ihevc_iquant_scales[qp_rem], 181 shift_iq, qp_div); 182 IQUANT_4x4(iq_tmp_2, 183 pi2_src[3 * src_strd], 184 pi2_dequant_coeff[3 * trans_size] * g_ihevc_iquant_scales[qp_rem], 185 shift_iq, qp_div); 186 c[2] = iq_tmp_1 - iq_tmp_2; 187 188 IQUANT_4x4(iq_tmp_1, 189 pi2_src[1 * src_strd], 190 pi2_dequant_coeff[1 * trans_size] * g_ihevc_iquant_scales[qp_rem], 191 shift_iq, qp_div); 192 c[3] = 74 * iq_tmp_1; 193 194 pi2_tmp[0] = 195 CLIP_S16((29 * c[0] + 55 * c[1] + c[3] + add) >> shift); 196 pi2_tmp[1] = 197 CLIP_S16((55 * c[2] - 29 * c[1] + c[3] + add) >> shift); 198 199 IQUANT_4x4(iq_tmp_1, 200 pi2_src[0 * src_strd], 201 pi2_dequant_coeff[0 * trans_size] * g_ihevc_iquant_scales[qp_rem], 202 shift_iq, qp_div); 203 IQUANT_4x4(iq_tmp_2, 204 pi2_src[2 * src_strd], 205 pi2_dequant_coeff[2 * trans_size] * g_ihevc_iquant_scales[qp_rem], 206 shift_iq, qp_div); 207 IQUANT_4x4(iq_tmp_3, 208 pi2_src[3 * src_strd], 209 pi2_dequant_coeff[3 * trans_size] * g_ihevc_iquant_scales[qp_rem], 210 shift_iq, qp_div); 211 212 pi2_tmp[2] = 213 CLIP_S16((74 * (iq_tmp_1 - iq_tmp_2 + iq_tmp_3) + add) >> shift); 214 pi2_tmp[3] = 215 CLIP_S16((55 * c[0] + 29 * c[2] - c[3] + add) >> shift); 216 } 217 pi2_src++; 218 pi2_dequant_coeff++; 219 pi2_tmp += trans_size; 220 zero_cols = zero_cols >> 1; 221 } 222 223 pi2_tmp = pi2_tmp_orig; 224 225 /* Inverse Transform 2nd stage */ 226 shift = IT_SHIFT_STAGE_2; 227 add = 1 << (shift - 1); 228 229 for(i = 0; i < trans_size; i++) 230 { 231 WORD32 itrans_out; 232 233 // Intermediate Variables 234 c[0] = pi2_tmp[0] + pi2_tmp[2 * trans_size]; 235 c[1] = pi2_tmp[2 * trans_size] + pi2_tmp[3 * trans_size]; 236 c[2] = pi2_tmp[0] - pi2_tmp[3 * trans_size]; 237 c[3] = 74 * pi2_tmp[trans_size]; 238 239 itrans_out = 240 CLIP_S16((29 * c[0] + 55 * c[1] + c[3] + add) >> shift); 241 pu1_dst[0] = CLIP_U8((itrans_out + pu1_pred[0])); 242 243 itrans_out = 244 CLIP_S16((55 * c[2] - 29 * c[1] + c[3] + add) >> shift); 245 pu1_dst[1] = CLIP_U8((itrans_out + pu1_pred[1])); 246 247 itrans_out = 248 CLIP_S16((74 * (pi2_tmp[0] - pi2_tmp[2 * trans_size] + pi2_tmp[3 * trans_size]) + add) >> shift); 249 pu1_dst[2] = CLIP_U8((itrans_out + pu1_pred[2])); 250 251 itrans_out = 252 CLIP_S16((55 * c[0] + 29 * c[2] - c[3] + add) >> shift); 253 pu1_dst[3] = CLIP_U8((itrans_out + pu1_pred[3])); 254 pi2_tmp++; 255 pu1_pred += pred_strd; 256 pu1_dst += dst_strd; 257 } 258 } 259 } 260 261 /** 262 ******************************************************************************* 263 * 264 * @brief 265 * This function performs inverse quantization, inverse transform and 266 * reconstruction for 4x4 input block 267 * 268 * @par Description: 269 * Performs inverse quantization , inverse transform and adds the 270 * prediction data and clips output to 8 bit 271 * 272 * @param[in] pi2_src 273 * Input 4x4 coefficients 274 * 275 * @param[in] pi2_tmp 276 * Temporary 4x4 buffer for storing inverse 277 * transform 1st stage output 278 * 279 * @param[in] pu1_pred 280 * Prediction 4x4 block 281 * 282 * @param[in] pi2_dequant_coeff 283 * Dequant Coeffs 284 * 285 * @param[out] pu1_dst 286 * Output 4x4 block 287 * 288 * @param[in] qp_div 289 * Quantization parameter / 6 290 * 291 * @param[in] qp_rem 292 * Quantization parameter % 6 293 * 294 * @param[in] src_strd 295 * Input stride 296 * 297 * @param[in] pred_strd 298 * Prediction stride 299 * 300 * @param[in] dst_strd 301 * Output Stride 302 * 303 * @param[in] zero_cols 304 * Zero columns in pi2_src 305 * 306 * @param[in] zero_rows 307 * Zero Rows in pi2_src 308 * 309 * @returns Void 310 * 311 * @remarks 312 * None 313 * 314 ******************************************************************************* 315 */ 316 317 void ihevc_iquant_itrans_recon_4x4(WORD16 *pi2_src, 318 WORD16 *pi2_tmp, 319 UWORD8 *pu1_pred, 320 WORD16 *pi2_dequant_coeff, 321 UWORD8 *pu1_dst, 322 WORD32 qp_div, /* qpscaled / 6 */ 323 WORD32 qp_rem, /* qpscaled % 6 */ 324 WORD32 src_strd, 325 WORD32 pred_strd, 326 WORD32 dst_strd, 327 WORD32 zero_cols, 328 WORD32 zero_rows) 329 { 330 UNUSED(zero_rows); 331 /* Inverse Transform */ 332 { 333 WORD32 j; 334 WORD32 e[2], o[2]; 335 WORD32 add; 336 WORD32 shift; 337 WORD16 *pi2_tmp_orig; 338 WORD32 shift_iq; 339 WORD32 trans_size; 340 /* Inverse Quantization constants */ 341 { 342 WORD32 log2_trans_size, bit_depth; 343 344 log2_trans_size = 2; 345 bit_depth = 8 + 0; 346 shift_iq = bit_depth + log2_trans_size - 5; 347 } 348 349 trans_size = TRANS_SIZE_4; 350 pi2_tmp_orig = pi2_tmp; 351 352 /* Inverse Transform 1st stage */ 353 shift = IT_SHIFT_STAGE_1; 354 add = 1 << (shift - 1); 355 356 for(j = 0; j < trans_size; j++) 357 { 358 /* Checking for Zero Cols */ 359 if((zero_cols & 1) == 1) 360 { 361 memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); 362 } 363 else 364 { 365 WORD32 iq_tmp_1, iq_tmp_2; 366 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 367 IQUANT_4x4(iq_tmp_1, 368 pi2_src[1 * src_strd], 369 pi2_dequant_coeff[1 * trans_size] * g_ihevc_iquant_scales[qp_rem], 370 shift_iq, qp_div); 371 IQUANT_4x4(iq_tmp_2, 372 pi2_src[3 * src_strd], 373 pi2_dequant_coeff[3 * trans_size] * g_ihevc_iquant_scales[qp_rem], 374 shift_iq, qp_div); 375 376 o[0] = g_ai2_ihevc_trans_4[1][0] * iq_tmp_1 377 + g_ai2_ihevc_trans_4[3][0] * iq_tmp_2; 378 o[1] = g_ai2_ihevc_trans_4[1][1] * iq_tmp_1 379 + g_ai2_ihevc_trans_4[3][1] * iq_tmp_2; 380 381 IQUANT_4x4(iq_tmp_1, 382 pi2_src[0 * src_strd], 383 pi2_dequant_coeff[0 * trans_size] * g_ihevc_iquant_scales[qp_rem], 384 shift_iq, qp_div); 385 IQUANT_4x4(iq_tmp_2, 386 pi2_src[2 * src_strd], 387 pi2_dequant_coeff[2 * trans_size] * g_ihevc_iquant_scales[qp_rem], 388 shift_iq, qp_div); 389 390 e[0] = g_ai2_ihevc_trans_4[0][0] * iq_tmp_1 391 + g_ai2_ihevc_trans_4[2][0] * iq_tmp_2; 392 e[1] = g_ai2_ihevc_trans_4[0][1] * iq_tmp_1 393 + g_ai2_ihevc_trans_4[2][1] * iq_tmp_2; 394 395 pi2_tmp[0] = 396 CLIP_S16(((e[0] + o[0] + add) >> shift)); 397 pi2_tmp[1] = 398 CLIP_S16(((e[1] + o[1] + add) >> shift)); 399 pi2_tmp[2] = 400 CLIP_S16(((e[1] - o[1] + add) >> shift)); 401 pi2_tmp[3] = 402 CLIP_S16(((e[0] - o[0] + add) >> shift)); 403 } 404 pi2_src++; 405 pi2_dequant_coeff++; 406 pi2_tmp += trans_size; 407 zero_cols = zero_cols >> 1; 408 } 409 410 pi2_tmp = pi2_tmp_orig; 411 412 /* Inverse Transform 2nd stage */ 413 shift = IT_SHIFT_STAGE_2; 414 add = 1 << (shift - 1); 415 416 for(j = 0; j < trans_size; j++) 417 { 418 WORD32 itrans_out; 419 420 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 421 o[0] = g_ai2_ihevc_trans_4[1][0] * pi2_tmp[trans_size] 422 + g_ai2_ihevc_trans_4[3][0] 423 * pi2_tmp[3 * trans_size]; 424 o[1] = g_ai2_ihevc_trans_4[1][1] * pi2_tmp[trans_size] 425 + g_ai2_ihevc_trans_4[3][1] 426 * pi2_tmp[3 * trans_size]; 427 e[0] = g_ai2_ihevc_trans_4[0][0] * pi2_tmp[0] 428 + g_ai2_ihevc_trans_4[2][0] 429 * pi2_tmp[2 * trans_size]; 430 e[1] = g_ai2_ihevc_trans_4[0][1] * pi2_tmp[0] 431 + g_ai2_ihevc_trans_4[2][1] 432 * pi2_tmp[2 * trans_size]; 433 434 itrans_out = 435 CLIP_S16(((e[0] + o[0] + add) >> shift)); 436 pu1_dst[0] = CLIP_U8((itrans_out + pu1_pred[0])); 437 438 itrans_out = 439 CLIP_S16(((e[1] + o[1] + add) >> shift)); 440 pu1_dst[1] = CLIP_U8((itrans_out + pu1_pred[1])); 441 442 itrans_out = 443 CLIP_S16(((e[1] - o[1] + add) >> shift)); 444 pu1_dst[2] = CLIP_U8((itrans_out + pu1_pred[2])); 445 446 itrans_out = 447 CLIP_S16(((e[0] - o[0] + add) >> shift)); 448 pu1_dst[3] = CLIP_U8((itrans_out + pu1_pred[3])); 449 450 pi2_tmp++; 451 pu1_pred += pred_strd; 452 pu1_dst += dst_strd; 453 454 } 455 } 456 } 457