1 /****************************************************************************** 2 * 3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 /** 19 ******************************************************************************* 20 * @file 21 * ihevc_itrans_recon_16x16.c 22 * 23 * @brief 24 * Contains function definitions for inverse transform and reconstruction 16x16 25 * 26 * 27 * @author 28 * 100470 29 * 30 * @par List of Functions: 31 * - ihevc_itrans_recon_16x16() 32 * 33 * @remarks 34 * None 35 * 36 ******************************************************************************* 37 */ 38 #include <stdio.h> 39 #include <string.h> 40 #include "ihevc_typedefs.h" 41 #include "ihevc_macros.h" 42 #include "ihevc_platform_macros.h" 43 #include "ihevc_defs.h" 44 #include "ihevc_trans_tables.h" 45 #include "ihevc_itrans_recon.h" 46 #include "ihevc_func_selector.h" 47 #include "ihevc_trans_macros.h" 48 49 /** 50 ******************************************************************************* 51 * 52 * @brief 53 * This function performs Inverse transform and reconstruction for 16x16 54 * input block 55 * 56 * @par Description: 57 * Performs inverse transform and adds the prediction data and clips output 58 * to 8 bit 59 * 60 * @param[in] pi2_src 61 * Input 16x16 coefficients 62 * 63 * @param[in] pi2_tmp 64 * Temporary 16x16 buffer for storing inverse 65 * 66 * transform 67 * 1st stage output 68 * 69 * @param[in] pu1_pred 70 * Prediction 16x16 block 71 * 72 * @param[out] pu1_dst 73 * Output 16x16 block 74 * 75 * @param[in] src_strd 76 * Input stride 77 * 78 * @param[in] pred_strd 79 * Prediction stride 80 * 81 * @param[in] dst_strd 82 * Output Stride 83 * 84 * @param[in] shift 85 * Output shift 86 * 87 * @param[in] zero_cols 88 * Zero columns in pi2_src 89 * 90 * @returns Void 91 * 92 * @remarks 93 * None 94 * 95 ******************************************************************************* 96 */ 97 98 void ihevc_itrans_recon_16x16(WORD16 *pi2_src, 99 WORD16 *pi2_tmp, 100 UWORD8 *pu1_pred, 101 UWORD8 *pu1_dst, 102 WORD32 src_strd, 103 WORD32 pred_strd, 104 WORD32 dst_strd, 105 WORD32 zero_cols, 106 WORD32 zero_rows) 107 { 108 WORD32 j, k; 109 WORD32 e[8], o[8]; 110 WORD32 ee[4], eo[4]; 111 WORD32 eee[2], eeo[2]; 112 WORD32 add; 113 WORD32 shift; 114 WORD16 *pi2_tmp_orig; 115 WORD32 trans_size; 116 WORD32 zero_rows_2nd_stage = zero_cols; 117 WORD32 row_limit_2nd_stage; 118 119 if((zero_cols & 0xFFF0) == 0xFFF0) 120 row_limit_2nd_stage = 4; 121 else if((zero_cols & 0xFF00) == 0xFF00) 122 row_limit_2nd_stage = 8; 123 else 124 row_limit_2nd_stage = TRANS_SIZE_16; 125 126 trans_size = TRANS_SIZE_16; 127 pi2_tmp_orig = pi2_tmp; 128 if((zero_rows & 0xFFF0) == 0xFFF0) /* First 4 rows of input are non-zero */ 129 { 130 /* Inverse Transform 1st stage */ 131 /************************************************************************************************/ 132 /**********************************START - IT_RECON_16x16****************************************/ 133 /************************************************************************************************/ 134 135 shift = IT_SHIFT_STAGE_1; 136 add = 1 << (shift - 1); 137 138 for(j = 0; j < row_limit_2nd_stage; j++) 139 { 140 /* Checking for Zero Cols */ 141 if((zero_cols & 1) == 1) 142 { 143 memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); 144 } 145 else 146 { 147 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 148 for(k = 0; k < 8; k++) 149 { 150 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] 151 + g_ai2_ihevc_trans_16[3][k] 152 * pi2_src[3 * src_strd]; 153 } 154 for(k = 0; k < 4; k++) 155 { 156 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd]; 157 } 158 eeo[0] = 0; 159 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_src[0]; 160 eeo[1] = 0; 161 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_src[0]; 162 163 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 164 for(k = 0; k < 2; k++) 165 { 166 ee[k] = eee[k] + eeo[k]; 167 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 168 } 169 for(k = 0; k < 4; k++) 170 { 171 e[k] = ee[k] + eo[k]; 172 e[k + 4] = ee[3 - k] - eo[3 - k]; 173 } 174 for(k = 0; k < 8; k++) 175 { 176 pi2_tmp[k] = 177 CLIP_S16(((e[k] + o[k] + add) >> shift)); 178 pi2_tmp[k + 8] = 179 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 180 } 181 } 182 pi2_src++; 183 pi2_tmp += trans_size; 184 zero_cols = zero_cols >> 1; 185 } 186 187 pi2_tmp = pi2_tmp_orig; 188 189 /* Inverse Transform 2nd stage */ 190 shift = IT_SHIFT_STAGE_2; 191 add = 1 << (shift - 1); 192 193 if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ 194 { 195 for(j = 0; j < trans_size; j++) 196 { 197 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 198 for(k = 0; k < 8; k++) 199 { 200 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] 201 + g_ai2_ihevc_trans_16[3][k] 202 * pi2_tmp[3 * trans_size]; 203 } 204 for(k = 0; k < 4; k++) 205 { 206 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; 207 } 208 eeo[0] = 0; 209 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; 210 eeo[1] = 0; 211 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; 212 213 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 214 for(k = 0; k < 2; k++) 215 { 216 ee[k] = eee[k] + eeo[k]; 217 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 218 } 219 for(k = 0; k < 4; k++) 220 { 221 e[k] = ee[k] + eo[k]; 222 e[k + 4] = ee[3 - k] - eo[3 - k]; 223 } 224 for(k = 0; k < 8; k++) 225 { 226 WORD32 itrans_out; 227 itrans_out = 228 CLIP_S16(((e[k] + o[k] + add) >> shift)); 229 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); 230 itrans_out = 231 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 232 pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); 233 } 234 pi2_tmp++; 235 pu1_pred += pred_strd; 236 pu1_dst += dst_strd; 237 } 238 } 239 else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 4 rows of output of 1st stage are non-zero */ 240 { 241 for(j = 0; j < trans_size; j++) 242 { 243 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 244 for(k = 0; k < 8; k++) 245 { 246 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] 247 + g_ai2_ihevc_trans_16[3][k] 248 * pi2_tmp[3 * trans_size] 249 + g_ai2_ihevc_trans_16[5][k] 250 * pi2_tmp[5 * trans_size] 251 + g_ai2_ihevc_trans_16[7][k] 252 * pi2_tmp[7 * trans_size]; 253 } 254 for(k = 0; k < 4; k++) 255 { 256 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] 257 + g_ai2_ihevc_trans_16[6][k] 258 * pi2_tmp[6 * trans_size]; 259 } 260 eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; 261 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; 262 eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; 263 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; 264 265 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 266 for(k = 0; k < 2; k++) 267 { 268 ee[k] = eee[k] + eeo[k]; 269 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 270 } 271 for(k = 0; k < 4; k++) 272 { 273 e[k] = ee[k] + eo[k]; 274 e[k + 4] = ee[3 - k] - eo[3 - k]; 275 } 276 for(k = 0; k < 8; k++) 277 { 278 WORD32 itrans_out; 279 itrans_out = 280 CLIP_S16(((e[k] + o[k] + add) >> shift)); 281 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); 282 itrans_out = 283 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 284 pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); 285 } 286 pi2_tmp++; 287 pu1_pred += pred_strd; 288 pu1_dst += dst_strd; 289 } 290 } 291 else /* All rows of output of 1st stage are non-zero */ 292 { 293 for(j = 0; j < trans_size; j++) 294 { 295 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 296 for(k = 0; k < 8; k++) 297 { 298 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] 299 + g_ai2_ihevc_trans_16[3][k] 300 * pi2_tmp[3 * trans_size] 301 + g_ai2_ihevc_trans_16[5][k] 302 * pi2_tmp[5 * trans_size] 303 + g_ai2_ihevc_trans_16[7][k] 304 * pi2_tmp[7 * trans_size] 305 + g_ai2_ihevc_trans_16[9][k] 306 * pi2_tmp[9 * trans_size] 307 + g_ai2_ihevc_trans_16[11][k] 308 * pi2_tmp[11 * trans_size] 309 + g_ai2_ihevc_trans_16[13][k] 310 * pi2_tmp[13 * trans_size] 311 + g_ai2_ihevc_trans_16[15][k] 312 * pi2_tmp[15 * trans_size]; 313 } 314 for(k = 0; k < 4; k++) 315 { 316 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] 317 + g_ai2_ihevc_trans_16[6][k] 318 * pi2_tmp[6 * trans_size] 319 + g_ai2_ihevc_trans_16[10][k] 320 * pi2_tmp[10 * trans_size] 321 + g_ai2_ihevc_trans_16[14][k] 322 * pi2_tmp[14 * trans_size]; 323 } 324 eeo[0] = 325 g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] 326 + g_ai2_ihevc_trans_16[12][0] 327 * pi2_tmp[12 328 * trans_size]; 329 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] 330 + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; 331 eeo[1] = 332 g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] 333 + g_ai2_ihevc_trans_16[12][1] 334 * pi2_tmp[12 335 * trans_size]; 336 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] 337 + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; 338 339 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 340 for(k = 0; k < 2; k++) 341 { 342 ee[k] = eee[k] + eeo[k]; 343 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 344 } 345 for(k = 0; k < 4; k++) 346 { 347 e[k] = ee[k] + eo[k]; 348 e[k + 4] = ee[3 - k] - eo[3 - k]; 349 } 350 for(k = 0; k < 8; k++) 351 { 352 WORD32 itrans_out; 353 itrans_out = 354 CLIP_S16(((e[k] + o[k] + add) >> shift)); 355 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); 356 itrans_out = 357 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 358 pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); 359 } 360 pi2_tmp++; 361 pu1_pred += pred_strd; 362 pu1_dst += dst_strd; 363 } 364 } 365 /************************************************************************************************/ 366 /************************************END - IT_RECON_16x16****************************************/ 367 /************************************************************************************************/ 368 } 369 else if((zero_rows & 0xFF00) == 0xFF00) /* First 8 rows of input are non-zero */ 370 { 371 /* Inverse Transform 1st stage */ 372 /************************************************************************************************/ 373 /**********************************START - IT_RECON_16x16****************************************/ 374 /************************************************************************************************/ 375 376 shift = IT_SHIFT_STAGE_1; 377 add = 1 << (shift - 1); 378 379 for(j = 0; j < row_limit_2nd_stage; j++) 380 { 381 /* Checking for Zero Cols */ 382 if((zero_cols & 1) == 1) 383 { 384 memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); 385 } 386 else 387 { 388 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 389 for(k = 0; k < 8; k++) 390 { 391 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] 392 + g_ai2_ihevc_trans_16[3][k] 393 * pi2_src[3 * src_strd] 394 + g_ai2_ihevc_trans_16[5][k] 395 * pi2_src[5 * src_strd] 396 + g_ai2_ihevc_trans_16[7][k] 397 * pi2_src[7 * src_strd]; 398 } 399 for(k = 0; k < 4; k++) 400 { 401 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd] 402 + g_ai2_ihevc_trans_16[6][k] 403 * pi2_src[6 * src_strd]; 404 } 405 eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd]; 406 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_src[0]; 407 eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd]; 408 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_src[0]; 409 410 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 411 for(k = 0; k < 2; k++) 412 { 413 ee[k] = eee[k] + eeo[k]; 414 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 415 } 416 for(k = 0; k < 4; k++) 417 { 418 e[k] = ee[k] + eo[k]; 419 e[k + 4] = ee[3 - k] - eo[3 - k]; 420 } 421 for(k = 0; k < 8; k++) 422 { 423 pi2_tmp[k] = 424 CLIP_S16(((e[k] + o[k] + add) >> shift)); 425 pi2_tmp[k + 8] = 426 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 427 } 428 } 429 pi2_src++; 430 pi2_tmp += trans_size; 431 zero_cols = zero_cols >> 1; 432 } 433 434 pi2_tmp = pi2_tmp_orig; 435 436 /* Inverse Transform 2nd stage */ 437 shift = IT_SHIFT_STAGE_2; 438 add = 1 << (shift - 1); 439 440 if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ 441 { 442 for(j = 0; j < trans_size; j++) 443 { 444 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 445 for(k = 0; k < 8; k++) 446 { 447 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] 448 + g_ai2_ihevc_trans_16[3][k] 449 * pi2_tmp[3 * trans_size]; 450 } 451 for(k = 0; k < 4; k++) 452 { 453 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; 454 } 455 eeo[0] = 0; 456 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; 457 eeo[1] = 0; 458 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; 459 460 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 461 for(k = 0; k < 2; k++) 462 { 463 ee[k] = eee[k] + eeo[k]; 464 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 465 } 466 for(k = 0; k < 4; k++) 467 { 468 e[k] = ee[k] + eo[k]; 469 e[k + 4] = ee[3 - k] - eo[3 - k]; 470 } 471 for(k = 0; k < 8; k++) 472 { 473 WORD32 itrans_out; 474 itrans_out = 475 CLIP_S16(((e[k] + o[k] + add) >> shift)); 476 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); 477 itrans_out = 478 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 479 pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); 480 } 481 pi2_tmp++; 482 pu1_pred += pred_strd; 483 pu1_dst += dst_strd; 484 } 485 } 486 else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 4 rows of output of 1st stage are non-zero */ 487 { 488 for(j = 0; j < trans_size; j++) 489 { 490 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 491 for(k = 0; k < 8; k++) 492 { 493 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] 494 + g_ai2_ihevc_trans_16[3][k] 495 * pi2_tmp[3 * trans_size] 496 + g_ai2_ihevc_trans_16[5][k] 497 * pi2_tmp[5 * trans_size] 498 + g_ai2_ihevc_trans_16[7][k] 499 * pi2_tmp[7 * trans_size]; 500 } 501 for(k = 0; k < 4; k++) 502 { 503 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] 504 + g_ai2_ihevc_trans_16[6][k] 505 * pi2_tmp[6 * trans_size]; 506 } 507 eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; 508 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; 509 eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; 510 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; 511 512 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 513 for(k = 0; k < 2; k++) 514 { 515 ee[k] = eee[k] + eeo[k]; 516 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 517 } 518 for(k = 0; k < 4; k++) 519 { 520 e[k] = ee[k] + eo[k]; 521 e[k + 4] = ee[3 - k] - eo[3 - k]; 522 } 523 for(k = 0; k < 8; k++) 524 { 525 WORD32 itrans_out; 526 itrans_out = 527 CLIP_S16(((e[k] + o[k] + add) >> shift)); 528 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); 529 itrans_out = 530 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 531 pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); 532 } 533 pi2_tmp++; 534 pu1_pred += pred_strd; 535 pu1_dst += dst_strd; 536 } 537 } 538 else /* All rows of output of 1st stage are non-zero */ 539 { 540 for(j = 0; j < trans_size; j++) 541 { 542 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 543 for(k = 0; k < 8; k++) 544 { 545 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] 546 + g_ai2_ihevc_trans_16[3][k] 547 * pi2_tmp[3 * trans_size] 548 + g_ai2_ihevc_trans_16[5][k] 549 * pi2_tmp[5 * trans_size] 550 + g_ai2_ihevc_trans_16[7][k] 551 * pi2_tmp[7 * trans_size] 552 + g_ai2_ihevc_trans_16[9][k] 553 * pi2_tmp[9 * trans_size] 554 + g_ai2_ihevc_trans_16[11][k] 555 * pi2_tmp[11 * trans_size] 556 + g_ai2_ihevc_trans_16[13][k] 557 * pi2_tmp[13 * trans_size] 558 + g_ai2_ihevc_trans_16[15][k] 559 * pi2_tmp[15 * trans_size]; 560 } 561 for(k = 0; k < 4; k++) 562 { 563 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] 564 + g_ai2_ihevc_trans_16[6][k] 565 * pi2_tmp[6 * trans_size] 566 + g_ai2_ihevc_trans_16[10][k] 567 * pi2_tmp[10 * trans_size] 568 + g_ai2_ihevc_trans_16[14][k] 569 * pi2_tmp[14 * trans_size]; 570 } 571 eeo[0] = 572 g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] 573 + g_ai2_ihevc_trans_16[12][0] 574 * pi2_tmp[12 575 * trans_size]; 576 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] 577 + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; 578 eeo[1] = 579 g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] 580 + g_ai2_ihevc_trans_16[12][1] 581 * pi2_tmp[12 582 * trans_size]; 583 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] 584 + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; 585 586 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 587 for(k = 0; k < 2; k++) 588 { 589 ee[k] = eee[k] + eeo[k]; 590 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 591 } 592 for(k = 0; k < 4; k++) 593 { 594 e[k] = ee[k] + eo[k]; 595 e[k + 4] = ee[3 - k] - eo[3 - k]; 596 } 597 for(k = 0; k < 8; k++) 598 { 599 WORD32 itrans_out; 600 itrans_out = 601 CLIP_S16(((e[k] + o[k] + add) >> shift)); 602 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); 603 itrans_out = 604 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 605 pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); 606 } 607 pi2_tmp++; 608 pu1_pred += pred_strd; 609 pu1_dst += dst_strd; 610 } 611 } 612 /************************************************************************************************/ 613 /************************************END - IT_RECON_16x16****************************************/ 614 /************************************************************************************************/ 615 } 616 else /* All rows of input are non-zero */ 617 { 618 /* Inverse Transform 1st stage */ 619 /************************************************************************************************/ 620 /**********************************START - IT_RECON_16x16****************************************/ 621 /************************************************************************************************/ 622 623 shift = IT_SHIFT_STAGE_1; 624 add = 1 << (shift - 1); 625 626 for(j = 0; j < row_limit_2nd_stage; j++) 627 { 628 /* Checking for Zero Cols */ 629 if((zero_cols & 1) == 1) 630 { 631 memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); 632 } 633 else 634 { 635 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 636 for(k = 0; k < 8; k++) 637 { 638 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] 639 + g_ai2_ihevc_trans_16[3][k] 640 * pi2_src[3 * src_strd] 641 + g_ai2_ihevc_trans_16[5][k] 642 * pi2_src[5 * src_strd] 643 + g_ai2_ihevc_trans_16[7][k] 644 * pi2_src[7 * src_strd] 645 + g_ai2_ihevc_trans_16[9][k] 646 * pi2_src[9 * src_strd] 647 + g_ai2_ihevc_trans_16[11][k] 648 * pi2_src[11 * src_strd] 649 + g_ai2_ihevc_trans_16[13][k] 650 * pi2_src[13 * src_strd] 651 + g_ai2_ihevc_trans_16[15][k] 652 * pi2_src[15 * src_strd]; 653 } 654 for(k = 0; k < 4; k++) 655 { 656 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd] 657 + g_ai2_ihevc_trans_16[6][k] 658 * pi2_src[6 * src_strd] 659 + g_ai2_ihevc_trans_16[10][k] 660 * pi2_src[10 * src_strd] 661 + g_ai2_ihevc_trans_16[14][k] 662 * pi2_src[14 * src_strd]; 663 } 664 eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd] 665 + g_ai2_ihevc_trans_16[12][0] 666 * pi2_src[12 * src_strd]; 667 eee[0] = 668 g_ai2_ihevc_trans_16[0][0] * pi2_src[0] 669 + g_ai2_ihevc_trans_16[8][0] 670 * pi2_src[8 671 * src_strd]; 672 eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd] 673 + g_ai2_ihevc_trans_16[12][1] 674 * pi2_src[12 * src_strd]; 675 eee[1] = 676 g_ai2_ihevc_trans_16[0][1] * pi2_src[0] 677 + g_ai2_ihevc_trans_16[8][1] 678 * pi2_src[8 679 * src_strd]; 680 681 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 682 for(k = 0; k < 2; k++) 683 { 684 ee[k] = eee[k] + eeo[k]; 685 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 686 } 687 for(k = 0; k < 4; k++) 688 { 689 e[k] = ee[k] + eo[k]; 690 e[k + 4] = ee[3 - k] - eo[3 - k]; 691 } 692 for(k = 0; k < 8; k++) 693 { 694 pi2_tmp[k] = 695 CLIP_S16(((e[k] + o[k] + add) >> shift)); 696 pi2_tmp[k + 8] = 697 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 698 } 699 } 700 pi2_src++; 701 pi2_tmp += trans_size; 702 zero_cols = zero_cols >> 1; 703 } 704 705 pi2_tmp = pi2_tmp_orig; 706 707 /* Inverse Transform 2nd stage */ 708 shift = IT_SHIFT_STAGE_2; 709 add = 1 << (shift - 1); 710 711 if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ 712 { 713 for(j = 0; j < trans_size; j++) 714 { 715 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 716 for(k = 0; k < 8; k++) 717 { 718 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] 719 + g_ai2_ihevc_trans_16[3][k] 720 * pi2_tmp[3 * trans_size]; 721 } 722 for(k = 0; k < 4; k++) 723 { 724 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; 725 } 726 eeo[0] = 0; 727 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; 728 eeo[1] = 0; 729 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; 730 731 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 732 for(k = 0; k < 2; k++) 733 { 734 ee[k] = eee[k] + eeo[k]; 735 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 736 } 737 for(k = 0; k < 4; k++) 738 { 739 e[k] = ee[k] + eo[k]; 740 e[k + 4] = ee[3 - k] - eo[3 - k]; 741 } 742 for(k = 0; k < 8; k++) 743 { 744 WORD32 itrans_out; 745 itrans_out = 746 CLIP_S16(((e[k] + o[k] + add) >> shift)); 747 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); 748 itrans_out = 749 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 750 pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); 751 } 752 pi2_tmp++; 753 pu1_pred += pred_strd; 754 pu1_dst += dst_strd; 755 } 756 } 757 else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 4 rows of output of 1st stage are non-zero */ 758 { 759 for(j = 0; j < trans_size; j++) 760 { 761 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 762 for(k = 0; k < 8; k++) 763 { 764 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] 765 + g_ai2_ihevc_trans_16[3][k] 766 * pi2_tmp[3 * trans_size] 767 + g_ai2_ihevc_trans_16[5][k] 768 * pi2_tmp[5 * trans_size] 769 + g_ai2_ihevc_trans_16[7][k] 770 * pi2_tmp[7 * trans_size]; 771 } 772 for(k = 0; k < 4; k++) 773 { 774 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] 775 + g_ai2_ihevc_trans_16[6][k] 776 * pi2_tmp[6 * trans_size]; 777 } 778 eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; 779 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; 780 eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; 781 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; 782 783 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 784 for(k = 0; k < 2; k++) 785 { 786 ee[k] = eee[k] + eeo[k]; 787 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 788 } 789 for(k = 0; k < 4; k++) 790 { 791 e[k] = ee[k] + eo[k]; 792 e[k + 4] = ee[3 - k] - eo[3 - k]; 793 } 794 for(k = 0; k < 8; k++) 795 { 796 WORD32 itrans_out; 797 itrans_out = 798 CLIP_S16(((e[k] + o[k] + add) >> shift)); 799 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); 800 itrans_out = 801 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 802 pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); 803 } 804 pi2_tmp++; 805 pu1_pred += pred_strd; 806 pu1_dst += dst_strd; 807 } 808 } 809 else /* All rows of output of 1st stage are non-zero */ 810 { 811 for(j = 0; j < trans_size; j++) 812 { 813 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 814 for(k = 0; k < 8; k++) 815 { 816 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] 817 + g_ai2_ihevc_trans_16[3][k] 818 * pi2_tmp[3 * trans_size] 819 + g_ai2_ihevc_trans_16[5][k] 820 * pi2_tmp[5 * trans_size] 821 + g_ai2_ihevc_trans_16[7][k] 822 * pi2_tmp[7 * trans_size] 823 + g_ai2_ihevc_trans_16[9][k] 824 * pi2_tmp[9 * trans_size] 825 + g_ai2_ihevc_trans_16[11][k] 826 * pi2_tmp[11 * trans_size] 827 + g_ai2_ihevc_trans_16[13][k] 828 * pi2_tmp[13 * trans_size] 829 + g_ai2_ihevc_trans_16[15][k] 830 * pi2_tmp[15 * trans_size]; 831 } 832 for(k = 0; k < 4; k++) 833 { 834 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] 835 + g_ai2_ihevc_trans_16[6][k] 836 * pi2_tmp[6 * trans_size] 837 + g_ai2_ihevc_trans_16[10][k] 838 * pi2_tmp[10 * trans_size] 839 + g_ai2_ihevc_trans_16[14][k] 840 * pi2_tmp[14 * trans_size]; 841 } 842 eeo[0] = 843 g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] 844 + g_ai2_ihevc_trans_16[12][0] 845 * pi2_tmp[12 846 * trans_size]; 847 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] 848 + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; 849 eeo[1] = 850 g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] 851 + g_ai2_ihevc_trans_16[12][1] 852 * pi2_tmp[12 853 * trans_size]; 854 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] 855 + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; 856 857 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 858 for(k = 0; k < 2; k++) 859 { 860 ee[k] = eee[k] + eeo[k]; 861 ee[k + 2] = eee[1 - k] - eeo[1 - k]; 862 } 863 for(k = 0; k < 4; k++) 864 { 865 e[k] = ee[k] + eo[k]; 866 e[k + 4] = ee[3 - k] - eo[3 - k]; 867 } 868 for(k = 0; k < 8; k++) 869 { 870 WORD32 itrans_out; 871 itrans_out = 872 CLIP_S16(((e[k] + o[k] + add) >> shift)); 873 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); 874 itrans_out = 875 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); 876 pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); 877 } 878 pi2_tmp++; 879 pu1_pred += pred_strd; 880 pu1_dst += dst_strd; 881 } 882 } 883 /************************************************************************************************/ 884 /************************************END - IT_RECON_16x16****************************************/ 885 /************************************************************************************************/ 886 } 887 888 } 889 890