1 /****************************************************************************** 2 * 3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 /** 19 ******************************************************************************* 20 * @file 21 * ihevc_chroma_itrans_recon_16x16.c 22 * 23 * @brief 24 * Contains function definitions for 16x16 inverse transform and reconstruction 25 * of chroma interleaved data. 26 * 27 * @author 28 * 100470 29 * 30 * @par List of Functions: 31 * - ihevc_chroma_itrans_recon_16x16() 32 * 33 * @remarks 34 * None 35 * 36 ******************************************************************************* 37 */ 38 39 #include <stdio.h> 40 #include <string.h> 41 #include "ihevc_typedefs.h" 42 #include "ihevc_macros.h" 43 #include "ihevc_platform_macros.h" 44 #include "ihevc_defs.h" 45 #include "ihevc_trans_tables.h" 46 #include "ihevc_chroma_itrans_recon.h" 47 #include "ihevc_func_selector.h" 48 #include "ihevc_trans_macros.h" 49 50 /* All the functions work one component(U or V) of interleaved data depending upon pointers passed to it */ 51 /* Data visualization */ 52 /* U V U V U V U V */ 53 /* U V U V U V U V */ 54 /* U V U V U V U V */ 55 /* U V U V U V U V */ 56 /* If the pointer points to first byte of above stream (U) , functions will operate on U component */ 57 /* If the pointer points to second byte of above stream (V) , functions will operate on V component */ 58 59 60 /** 61 ******************************************************************************* 62 * 63 * @brief 64 * This function performs Inverse transform and reconstruction for 16x16 65 * input block 66 * 67 * @par Description: 68 * Performs inverse transform and adds the prediction data and clips output 69 * to 8 bit 70 * 71 * @param[in] pi2_src 72 * Input 16x16 coefficients 73 * 74 * @param[in] pi2_tmp 75 * Temporary 16x16 buffer for storing inverse transform 76 * 1st stage output 77 * 78 * @param[in] pu1_pred 79 * Prediction 16x16 block 80 * 81 * @param[out] pu1_dst 82 * Output 16x16 block 83 * 84 * @param[in] src_strd 85 * Input stride 86 * 87 * @param[in] pred_strd 88 * Prediction stride 89 * 90 * @param[in] dst_strd 91 * Output Stride 92 * 93 * @param[in] shift 94 * Output shift 95 * 96 * @param[in] zero_cols 97 * Zero columns in pi2_src 98 * 99 * @returns Void 100 * 101 * @remarks 102 * None 103 * 104 ******************************************************************************* 105 */ 106 107 108 void ihevc_chroma_itrans_recon_16x16(WORD16 *pi2_src, 109 WORD16 *pi2_tmp, 110 UWORD8 *pu1_pred, 111 UWORD8 *pu1_dst, 112 WORD32 src_strd, 113 WORD32 pred_strd, 114 WORD32 dst_strd, 115 WORD32 zero_cols, 116 WORD32 zero_rows) 117 { 118 WORD32 j, k; 119 WORD32 e[8], o[8]; 120 WORD32 ee[4], eo[4]; 121 WORD32 eee[2], eeo[2]; 122 WORD32 add; 123 WORD32 shift; 124 WORD16 *pi2_tmp_orig; 125 WORD32 trans_size; 126 WORD32 row_limit_2nd_stage, zero_rows_2nd_stage = zero_cols; 127 128 trans_size = TRANS_SIZE_16; 129 pi2_tmp_orig = pi2_tmp; 130 131 if((zero_cols & 0xFFF0) == 0xFFF0) 132 row_limit_2nd_stage = 4; 133 else if((zero_cols & 0xFF00) == 0xFF00) 134 row_limit_2nd_stage = 8; 135 else 136 row_limit_2nd_stage = TRANS_SIZE_16; 137 138 if((zero_rows & 0xFFF0) == 0xFFF0) /* First 4 rows of input are non-zero */ 139 { 140 /************************************************************************************************/ 141 /**********************************START - IT_RECON_16x16****************************************/ 142 /************************************************************************************************/ 143 144 /* Inverse Transform 1st stage */ 145 shift = IT_SHIFT_STAGE_1; 146 add = 1 << (shift - 1); 147 148 for(j = 0; j < row_limit_2nd_stage; j++) 149 { 150 /* Checking for Zero Cols */ 151 if((zero_cols & 1) == 1) 152 { 153 memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); 154 } 155 else 156 { 157 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 158 for(k = 0; k < 8; k++) 159 { 160 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] 161 + g_ai2_ihevc_trans_16[3][k] 162 * pi2_src[3 * src_strd]; 163 } 164 for(k = 0; k < 4; k++) 165 { 166 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd]; 167 } 168 eeo[0] = 0; 169 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_src[0]; 170 eeo[1] = 0; 171 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_src[0]; 172 173 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 174 for(k = 0; k < 2; k++) 175 { 176 ee[k] = eee[k] + eeo[k]; 177 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 178 } 179 for(k = 0; k < 4; k++) 180 { 181 e[k] = ee[k] + eo[k]; 182 e[k + 4] = ee[3 - k] - eo[3 - k]; 183 } 184 for(k = 0; k < 8; k++) 185 { 186 pi2_tmp[k] = 187 CLIP_S16(((e[k] + o[k] + add) >> shift)); 188 pi2_tmp[k + 8] = 189 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 190 } 191 } 192 pi2_src++; 193 pi2_tmp += trans_size; 194 zero_cols = zero_cols >> 1; 195 } 196 197 pi2_tmp = pi2_tmp_orig; 198 199 /* Inverse Transform 2nd stage */ 200 shift = IT_SHIFT_STAGE_2; 201 add = 1 << (shift - 1); 202 if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ 203 { 204 for(j = 0; j < trans_size; j++) 205 { 206 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 207 for(k = 0; k < 8; k++) 208 { 209 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] 210 + g_ai2_ihevc_trans_16[3][k] 211 * pi2_tmp[3 * trans_size]; 212 } 213 for(k = 0; k < 4; k++) 214 { 215 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; 216 } 217 eeo[0] = 0; 218 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; 219 eeo[1] = 0; 220 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; 221 222 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 223 for(k = 0; k < 2; k++) 224 { 225 ee[k] = eee[k] + eeo[k]; 226 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 227 } 228 for(k = 0; k < 4; k++) 229 { 230 e[k] = ee[k] + eo[k]; 231 e[k + 4] = ee[3 - k] - eo[3 - k]; 232 } 233 for(k = 0; k < 8; k++) 234 { 235 WORD32 itrans_out; 236 itrans_out = 237 CLIP_S16(((e[k] + o[k] + add) >> shift)); 238 pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); 239 itrans_out = 240 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 241 pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); 242 } 243 pi2_tmp++; 244 pu1_pred += pred_strd; 245 pu1_dst += dst_strd; 246 } 247 } 248 else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 8 rows of output of 1st stage are non-zero */ 249 { 250 for(j = 0; j < trans_size; j++) 251 { 252 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 253 for(k = 0; k < 8; k++) 254 { 255 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] 256 + g_ai2_ihevc_trans_16[3][k] 257 * pi2_tmp[3 * trans_size] 258 + g_ai2_ihevc_trans_16[5][k] 259 * pi2_tmp[5 * trans_size] 260 + g_ai2_ihevc_trans_16[7][k] 261 * pi2_tmp[7 * trans_size]; 262 } 263 for(k = 0; k < 4; k++) 264 { 265 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] 266 + g_ai2_ihevc_trans_16[6][k] 267 * pi2_tmp[6 * trans_size]; 268 } 269 eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; 270 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; 271 eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; 272 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; 273 274 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 275 for(k = 0; k < 2; k++) 276 { 277 ee[k] = eee[k] + eeo[k]; 278 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 279 } 280 for(k = 0; k < 4; k++) 281 { 282 e[k] = ee[k] + eo[k]; 283 e[k + 4] = ee[3 - k] - eo[3 - k]; 284 } 285 for(k = 0; k < 8; k++) 286 { 287 WORD32 itrans_out; 288 itrans_out = 289 CLIP_S16(((e[k] + o[k] + add) >> shift)); 290 pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); 291 itrans_out = 292 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 293 pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); 294 } 295 pi2_tmp++; 296 pu1_pred += pred_strd; 297 pu1_dst += dst_strd; 298 } 299 } 300 else /* All rows of output of 1st stage are non-zero */ 301 { 302 for(j = 0; j < trans_size; j++) 303 { 304 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 305 for(k = 0; k < 8; k++) 306 { 307 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] 308 + g_ai2_ihevc_trans_16[3][k] 309 * pi2_tmp[3 * trans_size] 310 + g_ai2_ihevc_trans_16[5][k] 311 * pi2_tmp[5 * trans_size] 312 + g_ai2_ihevc_trans_16[7][k] 313 * pi2_tmp[7 * trans_size] 314 + g_ai2_ihevc_trans_16[9][k] 315 * pi2_tmp[9 * trans_size] 316 + g_ai2_ihevc_trans_16[11][k] 317 * pi2_tmp[11 * trans_size] 318 + g_ai2_ihevc_trans_16[13][k] 319 * pi2_tmp[13 * trans_size] 320 + g_ai2_ihevc_trans_16[15][k] 321 * pi2_tmp[15 * trans_size]; 322 } 323 for(k = 0; k < 4; k++) 324 { 325 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] 326 + g_ai2_ihevc_trans_16[6][k] 327 * pi2_tmp[6 * trans_size] 328 + g_ai2_ihevc_trans_16[10][k] 329 * pi2_tmp[10 * trans_size] 330 + g_ai2_ihevc_trans_16[14][k] 331 * pi2_tmp[14 * trans_size]; 332 } 333 eeo[0] = 334 g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] 335 + g_ai2_ihevc_trans_16[12][0] 336 * pi2_tmp[12 337 * trans_size]; 338 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] 339 + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; 340 eeo[1] = 341 g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] 342 + g_ai2_ihevc_trans_16[12][1] 343 * pi2_tmp[12 344 * trans_size]; 345 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] 346 + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; 347 348 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 349 for(k = 0; k < 2; k++) 350 { 351 ee[k] = eee[k] + eeo[k]; 352 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 353 } 354 for(k = 0; k < 4; k++) 355 { 356 e[k] = ee[k] + eo[k]; 357 e[k + 4] = ee[3 - k] - eo[3 - k]; 358 } 359 for(k = 0; k < 8; k++) 360 { 361 WORD32 itrans_out; 362 itrans_out = 363 CLIP_S16(((e[k] + o[k] + add) >> shift)); 364 pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); 365 itrans_out = 366 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 367 pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); 368 } 369 pi2_tmp++; 370 pu1_pred += pred_strd; 371 pu1_dst += dst_strd; 372 } 373 } 374 /************************************************************************************************/ 375 /************************************END - IT_RECON_16x16****************************************/ 376 /************************************************************************************************/ 377 } 378 else if((zero_rows & 0xFF00) == 0xFF00) /* First 8 rows of input are non-zero */ 379 { 380 /************************************************************************************************/ 381 /**********************************START - IT_RECON_16x16****************************************/ 382 /************************************************************************************************/ 383 384 /* Inverse Transform 1st stage */ 385 shift = IT_SHIFT_STAGE_1; 386 add = 1 << (shift - 1); 387 388 for(j = 0; j < row_limit_2nd_stage; j++) 389 { 390 /* Checking for Zero Cols */ 391 if((zero_cols & 1) == 1) 392 { 393 memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); 394 } 395 else 396 { 397 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 398 for(k = 0; k < 8; k++) 399 { 400 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] 401 + g_ai2_ihevc_trans_16[3][k] 402 * pi2_src[3 * src_strd] 403 + g_ai2_ihevc_trans_16[5][k] 404 * pi2_src[5 * src_strd] 405 + g_ai2_ihevc_trans_16[7][k] 406 * pi2_src[7 * src_strd]; 407 } 408 for(k = 0; k < 4; k++) 409 { 410 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd] 411 + g_ai2_ihevc_trans_16[6][k] 412 * pi2_src[6 * src_strd]; 413 } 414 eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd]; 415 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_src[0]; 416 eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd]; 417 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_src[0]; 418 419 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 420 for(k = 0; k < 2; k++) 421 { 422 ee[k] = eee[k] + eeo[k]; 423 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 424 } 425 for(k = 0; k < 4; k++) 426 { 427 e[k] = ee[k] + eo[k]; 428 e[k + 4] = ee[3 - k] - eo[3 - k]; 429 } 430 for(k = 0; k < 8; k++) 431 { 432 pi2_tmp[k] = 433 CLIP_S16(((e[k] + o[k] + add) >> shift)); 434 pi2_tmp[k + 8] = 435 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 436 } 437 } 438 pi2_src++; 439 pi2_tmp += trans_size; 440 zero_cols = zero_cols >> 1; 441 } 442 443 pi2_tmp = pi2_tmp_orig; 444 445 /* Inverse Transform 2nd stage */ 446 shift = IT_SHIFT_STAGE_2; 447 add = 1 << (shift - 1); 448 if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ 449 { 450 for(j = 0; j < trans_size; j++) 451 { 452 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 453 for(k = 0; k < 8; k++) 454 { 455 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] 456 + g_ai2_ihevc_trans_16[3][k] 457 * pi2_tmp[3 * trans_size]; 458 } 459 for(k = 0; k < 4; k++) 460 { 461 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; 462 } 463 eeo[0] = 0; 464 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; 465 eeo[1] = 0; 466 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; 467 468 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 469 for(k = 0; k < 2; k++) 470 { 471 ee[k] = eee[k] + eeo[k]; 472 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 473 } 474 for(k = 0; k < 4; k++) 475 { 476 e[k] = ee[k] + eo[k]; 477 e[k + 4] = ee[3 - k] - eo[3 - k]; 478 } 479 for(k = 0; k < 8; k++) 480 { 481 WORD32 itrans_out; 482 itrans_out = 483 CLIP_S16(((e[k] + o[k] + add) >> shift)); 484 pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); 485 itrans_out = 486 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 487 pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); 488 } 489 pi2_tmp++; 490 pu1_pred += pred_strd; 491 pu1_dst += dst_strd; 492 } 493 } 494 else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 8 rows of output of 1st stage are non-zero */ 495 { 496 for(j = 0; j < trans_size; j++) 497 { 498 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 499 for(k = 0; k < 8; k++) 500 { 501 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] 502 + g_ai2_ihevc_trans_16[3][k] 503 * pi2_tmp[3 * trans_size] 504 + g_ai2_ihevc_trans_16[5][k] 505 * pi2_tmp[5 * trans_size] 506 + g_ai2_ihevc_trans_16[7][k] 507 * pi2_tmp[7 * trans_size]; 508 } 509 for(k = 0; k < 4; k++) 510 { 511 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] 512 + g_ai2_ihevc_trans_16[6][k] 513 * pi2_tmp[6 * trans_size]; 514 } 515 eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; 516 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; 517 eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; 518 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; 519 520 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 521 for(k = 0; k < 2; k++) 522 { 523 ee[k] = eee[k] + eeo[k]; 524 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 525 } 526 for(k = 0; k < 4; k++) 527 { 528 e[k] = ee[k] + eo[k]; 529 e[k + 4] = ee[3 - k] - eo[3 - k]; 530 } 531 for(k = 0; k < 8; k++) 532 { 533 WORD32 itrans_out; 534 itrans_out = 535 CLIP_S16(((e[k] + o[k] + add) >> shift)); 536 pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); 537 itrans_out = 538 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 539 pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); 540 } 541 pi2_tmp++; 542 pu1_pred += pred_strd; 543 pu1_dst += dst_strd; 544 } 545 } 546 else /* All rows of output of 1st stage are non-zero */ 547 { 548 for(j = 0; j < trans_size; j++) 549 { 550 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 551 for(k = 0; k < 8; k++) 552 { 553 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] 554 + g_ai2_ihevc_trans_16[3][k] 555 * pi2_tmp[3 * trans_size] 556 + g_ai2_ihevc_trans_16[5][k] 557 * pi2_tmp[5 * trans_size] 558 + g_ai2_ihevc_trans_16[7][k] 559 * pi2_tmp[7 * trans_size] 560 + g_ai2_ihevc_trans_16[9][k] 561 * pi2_tmp[9 * trans_size] 562 + g_ai2_ihevc_trans_16[11][k] 563 * pi2_tmp[11 * trans_size] 564 + g_ai2_ihevc_trans_16[13][k] 565 * pi2_tmp[13 * trans_size] 566 + g_ai2_ihevc_trans_16[15][k] 567 * pi2_tmp[15 * trans_size]; 568 } 569 for(k = 0; k < 4; k++) 570 { 571 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] 572 + g_ai2_ihevc_trans_16[6][k] 573 * pi2_tmp[6 * trans_size] 574 + g_ai2_ihevc_trans_16[10][k] 575 * pi2_tmp[10 * trans_size] 576 + g_ai2_ihevc_trans_16[14][k] 577 * pi2_tmp[14 * trans_size]; 578 } 579 eeo[0] = 580 g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] 581 + g_ai2_ihevc_trans_16[12][0] 582 * pi2_tmp[12 583 * trans_size]; 584 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] 585 + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; 586 eeo[1] = 587 g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] 588 + g_ai2_ihevc_trans_16[12][1] 589 * pi2_tmp[12 590 * trans_size]; 591 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] 592 + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; 593 594 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 595 for(k = 0; k < 2; k++) 596 { 597 ee[k] = eee[k] + eeo[k]; 598 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 599 } 600 for(k = 0; k < 4; k++) 601 { 602 e[k] = ee[k] + eo[k]; 603 e[k + 4] = ee[3 - k] - eo[3 - k]; 604 } 605 for(k = 0; k < 8; k++) 606 { 607 WORD32 itrans_out; 608 itrans_out = 609 CLIP_S16(((e[k] + o[k] + add) >> shift)); 610 pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); 611 itrans_out = 612 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 613 pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); 614 } 615 pi2_tmp++; 616 pu1_pred += pred_strd; 617 pu1_dst += dst_strd; 618 } 619 } 620 /************************************************************************************************/ 621 /************************************END - IT_RECON_16x16****************************************/ 622 /************************************************************************************************/ 623 } 624 else /* All rows of input are non-zero */ 625 { 626 /************************************************************************************************/ 627 /**********************************START - IT_RECON_16x16****************************************/ 628 /************************************************************************************************/ 629 630 /* Inverse Transform 1st stage */ 631 shift = IT_SHIFT_STAGE_1; 632 add = 1 << (shift - 1); 633 634 for(j = 0; j < row_limit_2nd_stage; j++) 635 { 636 /* Checking for Zero Cols */ 637 if((zero_cols & 1) == 1) 638 { 639 memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); 640 } 641 else 642 { 643 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 644 for(k = 0; k < 8; k++) 645 { 646 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] 647 + g_ai2_ihevc_trans_16[3][k] 648 * pi2_src[3 * src_strd] 649 + g_ai2_ihevc_trans_16[5][k] 650 * pi2_src[5 * src_strd] 651 + g_ai2_ihevc_trans_16[7][k] 652 * pi2_src[7 * src_strd] 653 + g_ai2_ihevc_trans_16[9][k] 654 * pi2_src[9 * src_strd] 655 + g_ai2_ihevc_trans_16[11][k] 656 * pi2_src[11 * src_strd] 657 + g_ai2_ihevc_trans_16[13][k] 658 * pi2_src[13 * src_strd] 659 + g_ai2_ihevc_trans_16[15][k] 660 * pi2_src[15 * src_strd]; 661 } 662 for(k = 0; k < 4; k++) 663 { 664 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd] 665 + g_ai2_ihevc_trans_16[6][k] 666 * pi2_src[6 * src_strd] 667 + g_ai2_ihevc_trans_16[10][k] 668 * pi2_src[10 * src_strd] 669 + g_ai2_ihevc_trans_16[14][k] 670 * pi2_src[14 * src_strd]; 671 } 672 eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd] 673 + g_ai2_ihevc_trans_16[12][0] 674 * pi2_src[12 * src_strd]; 675 eee[0] = 676 g_ai2_ihevc_trans_16[0][0] * pi2_src[0] 677 + g_ai2_ihevc_trans_16[8][0] 678 * pi2_src[8 679 * src_strd]; 680 eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd] 681 + g_ai2_ihevc_trans_16[12][1] 682 * pi2_src[12 * src_strd]; 683 eee[1] = 684 g_ai2_ihevc_trans_16[0][1] * pi2_src[0] 685 + g_ai2_ihevc_trans_16[8][1] 686 * pi2_src[8 687 * src_strd]; 688 689 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 690 for(k = 0; k < 2; k++) 691 { 692 ee[k] = eee[k] + eeo[k]; 693 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 694 } 695 for(k = 0; k < 4; k++) 696 { 697 e[k] = ee[k] + eo[k]; 698 e[k + 4] = ee[3 - k] - eo[3 - k]; 699 } 700 for(k = 0; k < 8; k++) 701 { 702 pi2_tmp[k] = 703 CLIP_S16(((e[k] + o[k] + add) >> shift)); 704 pi2_tmp[k + 8] = 705 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 706 } 707 } 708 pi2_src++; 709 pi2_tmp += trans_size; 710 zero_cols = zero_cols >> 1; 711 } 712 713 pi2_tmp = pi2_tmp_orig; 714 715 /* Inverse Transform 2nd stage */ 716 shift = IT_SHIFT_STAGE_2; 717 add = 1 << (shift - 1); 718 if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ 719 { 720 for(j = 0; j < trans_size; j++) 721 { 722 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 723 for(k = 0; k < 8; k++) 724 { 725 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] 726 + g_ai2_ihevc_trans_16[3][k] 727 * pi2_tmp[3 * trans_size]; 728 } 729 for(k = 0; k < 4; k++) 730 { 731 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; 732 } 733 eeo[0] = 0; 734 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; 735 eeo[1] = 0; 736 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; 737 738 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 739 for(k = 0; k < 2; k++) 740 { 741 ee[k] = eee[k] + eeo[k]; 742 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 743 } 744 for(k = 0; k < 4; k++) 745 { 746 e[k] = ee[k] + eo[k]; 747 e[k + 4] = ee[3 - k] - eo[3 - k]; 748 } 749 for(k = 0; k < 8; k++) 750 { 751 WORD32 itrans_out; 752 itrans_out = 753 CLIP_S16(((e[k] + o[k] + add) >> shift)); 754 pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); 755 itrans_out = 756 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 757 pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); 758 } 759 pi2_tmp++; 760 pu1_pred += pred_strd; 761 pu1_dst += dst_strd; 762 } 763 } 764 else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 8 rows of output of 1st stage are non-zero */ 765 { 766 for(j = 0; j < trans_size; j++) 767 { 768 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 769 for(k = 0; k < 8; k++) 770 { 771 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] 772 + g_ai2_ihevc_trans_16[3][k] 773 * pi2_tmp[3 * trans_size] 774 + g_ai2_ihevc_trans_16[5][k] 775 * pi2_tmp[5 * trans_size] 776 + g_ai2_ihevc_trans_16[7][k] 777 * pi2_tmp[7 * trans_size]; 778 } 779 for(k = 0; k < 4; k++) 780 { 781 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] 782 + g_ai2_ihevc_trans_16[6][k] 783 * pi2_tmp[6 * trans_size]; 784 } 785 eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; 786 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; 787 eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; 788 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; 789 790 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 791 for(k = 0; k < 2; k++) 792 { 793 ee[k] = eee[k] + eeo[k]; 794 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 795 } 796 for(k = 0; k < 4; k++) 797 { 798 e[k] = ee[k] + eo[k]; 799 e[k + 4] = ee[3 - k] - eo[3 - k]; 800 } 801 for(k = 0; k < 8; k++) 802 { 803 WORD32 itrans_out; 804 itrans_out = 805 CLIP_S16(((e[k] + o[k] + add) >> shift)); 806 pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); 807 itrans_out = 808 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 809 pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); 810 } 811 pi2_tmp++; 812 pu1_pred += pred_strd; 813 pu1_dst += dst_strd; 814 } 815 } 816 else /* All rows of output of 1st stage are non-zero */ 817 { 818 for(j = 0; j < trans_size; j++) 819 { 820 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 821 for(k = 0; k < 8; k++) 822 { 823 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] 824 + g_ai2_ihevc_trans_16[3][k] 825 * pi2_tmp[3 * trans_size] 826 + g_ai2_ihevc_trans_16[5][k] 827 * pi2_tmp[5 * trans_size] 828 + g_ai2_ihevc_trans_16[7][k] 829 * pi2_tmp[7 * trans_size] 830 + g_ai2_ihevc_trans_16[9][k] 831 * pi2_tmp[9 * trans_size] 832 + g_ai2_ihevc_trans_16[11][k] 833 * pi2_tmp[11 * trans_size] 834 + g_ai2_ihevc_trans_16[13][k] 835 * pi2_tmp[13 * trans_size] 836 + g_ai2_ihevc_trans_16[15][k] 837 * pi2_tmp[15 * trans_size]; 838 } 839 for(k = 0; k < 4; k++) 840 { 841 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] 842 + g_ai2_ihevc_trans_16[6][k] 843 * pi2_tmp[6 * trans_size] 844 + g_ai2_ihevc_trans_16[10][k] 845 * pi2_tmp[10 * trans_size] 846 + g_ai2_ihevc_trans_16[14][k] 847 * pi2_tmp[14 * trans_size]; 848 } 849 eeo[0] = 850 g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] 851 + g_ai2_ihevc_trans_16[12][0] 852 * pi2_tmp[12 853 * trans_size]; 854 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] 855 + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; 856 eeo[1] = 857 g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] 858 + g_ai2_ihevc_trans_16[12][1] 859 * pi2_tmp[12 860 * trans_size]; 861 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] 862 + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; 863 864 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 865 for(k = 0; k < 2; k++) 866 { 867 ee[k] = eee[k] + eeo[k]; 868 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 869 } 870 for(k = 0; k < 4; k++) 871 { 872 e[k] = ee[k] + eo[k]; 873 e[k + 4] = ee[3 - k] - eo[3 - k]; 874 } 875 for(k = 0; k < 8; k++) 876 { 877 WORD32 itrans_out; 878 itrans_out = 879 CLIP_S16(((e[k] + o[k] + add) >> shift)); 880 pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); 881 itrans_out = 882 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 883 pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); 884 } 885 pi2_tmp++; 886 pu1_pred += pred_strd; 887 pu1_dst += dst_strd; 888 } 889 } 890 /************************************************************************************************/ 891 /************************************END - IT_RECON_16x16****************************************/ 892 /************************************************************************************************/ 893 } 894 } 895 896