1 /****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 /** 21 ******************************************************************************* 22 * @file 23 * ih264_resi_trans_quant.c 24 * 25 * @brief 26 * Contains function definitions single stage forward transform for H.264 27 * It will calculate the residue, do the cf and then do quantization 28 * 29 * @author 30 * Ittiam 31 * 32 * @par List of Functions: 33 * - ih264_resi_trans_quant_4x4() 34 * - ih264_resi_trans_quant_chroma_4x4 35 * - ih264_hadamard_quant_4x4 36 * - ih264_hadamard_quant_2x2_uv 37 * - ih264_resi_trans_quant_8x8 38 * 39 * @remarks 40 ******************************************************************************* 41 */ 42 43 /*****************************************************************************/ 44 /* File Includes */ 45 /*****************************************************************************/ 46 47 /* System include files */ 48 #include <stddef.h> 49 50 /* User include files */ 51 #include "ih264_typedefs.h" 52 #include "ih264_defs.h" 53 #include "ih264_size_defs.h" 54 #include "ih264_macros.h" 55 #include "ih264_trans_macros.h" 56 #include "ih264_trans_data.h" 57 #include "ih264_structs.h" 58 #include "ih264_trans_quant_itrans_iquant.h" 59 60 /** 61 ******************************************************************************* 62 * 63 * @brief 64 * This function performs forward transform and quantization on a 4*4 block 65 * 66 * @par Description: 67 * The function accepts source buffer and estimation buffer. From these, it 68 * computes the residue. This is residue is then transformed and quantized. 69 * The transform and quantization are in placed computed. They use the residue 70 * buffer for this. 71 * 72 * @param[in] pu1_src 73 * Pointer to source sub-block 74 * 75 * @param[in] pu1_pred 76 * Pointer to prediction sub-block 77 * 78 * @param[in] pi2_out 79 * Pointer to residual sub-block 80 * 81 * @param[in] src_strd 82 * Source stride 83 * 84 * @param[in] pred_strd 85 * Prediction stride 86 * 87 * @param[in] dst_strd 88 * Destination stride 89 * 90 * @param[in] u4_qbits 91 * QP_BITS_h264_4x4 + floor(QP/6) 92 * 93 * @param[in] pu2_threshold_matrix 94 * Pointer to Forward Quant Threshold Matrix 95 * 96 * @param[in] pu2_scale_matrix 97 * Pointer to Forward Quant Scale Matrix 98 * 99 * @param[in] u4_round_factor 100 * Quantization Round factor 101 * 102 * @param[out] pu1_nnz 103 * Total non-zero coefficients in the current sub-block 104 * 105 * @returns 106 * 107 * @remarks 108 * None 109 * 110 ******************************************************************************* 111 */ 112 void ih264_resi_trans_quant_4x4(UWORD8 *pu1_src, 113 UWORD8 *pu1_pred, 114 WORD16 *pi2_out, 115 WORD32 src_strd, 116 WORD32 pred_strd, 117 const UWORD16 *pu2_scale_matrix, 118 const UWORD16 *pu2_threshold_matrix, 119 UWORD32 u4_qbits, 120 UWORD32 u4_round_factor, 121 UWORD8 *pu1_nnz, 122 WORD16 *pi2_alt_dc_addr) 123 { 124 UWORD32 i; 125 WORD32 x0, x1, x2, x3, x4, x5, x6, x7; 126 WORD32 i4_value, i4_sign; 127 UWORD32 u4_abs_value; 128 WORD16 *pi2_out_tmp = pi2_out; 129 UWORD32 u4_nonzero_coeff = 0; 130 131 for (i = 0; i < SUB_BLK_WIDTH_4x4; i++) 132 { 133 /* computing prediction error (residue) */ 134 x4 = pu1_src[0] - pu1_pred[0]; 135 x5 = pu1_src[1] - pu1_pred[1]; 136 x6 = pu1_src[2] - pu1_pred[2]; 137 x7 = pu1_src[3] - pu1_pred[3]; 138 139 /* Horizontal transform */ 140 x0 = x4 + x7; 141 x1 = x5 + x6; 142 x2 = x5 - x6; 143 x3 = x4 - x7; 144 145 pi2_out_tmp[0] = x0 + x1; 146 pi2_out_tmp[1] = (x3 <<1) + x2; 147 pi2_out_tmp[2] = x0 - x1; 148 pi2_out_tmp[3] = x3 - (x2<<1); 149 150 /* pointing to next row; */ 151 pu1_src += src_strd; 152 pu1_pred += pred_strd; 153 pi2_out_tmp += 4; 154 155 } 156 pi2_out_tmp = pi2_out; 157 for (i = 0; i < SUB_BLK_WIDTH_4x4; i++) 158 { 159 160 /* Vertical transform and quantization */ 161 x4 = pi2_out_tmp[0]; 162 x5 = pi2_out_tmp[4]; 163 x6 = pi2_out_tmp[8]; 164 x7 = pi2_out_tmp[12]; 165 166 167 x0 = x4 + x7; 168 x1 = x5 + x6; 169 x2 = x5 - x6; 170 x3 = x4 - x7; 171 172 /* quantization is done in place */ 173 174 i4_value = x0 + x1; 175 176 if(i==0) 177 { 178 (*pi2_alt_dc_addr) = i4_value; 179 } 180 181 FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, u4_nonzero_coeff); 182 pi2_out_tmp[0] = i4_value; 183 184 185 i4_value = (x3 << 1) + x2; 186 FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[4], pu2_scale_matrix[4], u4_round_factor, u4_qbits, u4_nonzero_coeff); 187 pi2_out_tmp[4] = i4_value; 188 189 190 i4_value = x0 - x1; 191 FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits, u4_nonzero_coeff); 192 pi2_out_tmp[8] = i4_value; 193 194 195 i4_value = x3 - (x2 << 1); 196 FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[12], pu2_scale_matrix[12], u4_round_factor, u4_qbits, u4_nonzero_coeff); 197 pi2_out_tmp[12] = i4_value; 198 199 pi2_out_tmp ++; 200 pu2_scale_matrix++; 201 pu2_threshold_matrix++; 202 } 203 204 /* Return total nonzero coefficients in the current sub block */ 205 *pu1_nnz = u4_nonzero_coeff; 206 } 207 /** 208 ******************************************************************************* 209 * 210 * @brief 211 * This function performs forward transform and quantization on a 4*4 chroma block 212 * with interleaved values 213 * 214 * @par Description: 215 * The function accepts source buffer and estimation buffer. From these, it 216 * computes the residue. This is residue is then transformed and quantized. 217 * The transform and quantization are in placed computed. They use the residue 218 * buffer for this. 219 * 220 * @param[in] pu1_src 221 * Pointer to source sub-block 222 * 223 * @param[in] pu1_pred 224 * Pointer to prediction sub-block 225 * 226 * @param[in] pi2_out 227 * Pointer to residual sub-block 228 * 229 * @param[in] src_strd 230 * Source stride 231 * 232 * @param[in] pred_strd 233 * Prediction stride 234 * 235 * @param[in] dst_strd 236 * Destination stride 237 * 238 * @param[in] u4_qbits 239 * QP_BITS_h264_4x4 + floor(QP/6) 240 * 241 * @param[in] pu2_threshold_matrix 242 * Pointer to Forward Quant Threshold Matrix 243 * 244 * @param[in] pu2_scale_matrix 245 * Pointer to Forward Quant Scale Matrix 246 * 247 * @param[in] u4_round_factor 248 * Quantization Round factor 249 * 250 * @param[out] pu1_nnz 251 * Total non-zero coefficients in the current sub-block 252 * 253 * @returns 254 * 255 * @remarks 256 * None 257 * 258 ******************************************************************************* 259 */ 260 void ih264_resi_trans_quant_chroma_4x4(UWORD8 *pu1_src, 261 UWORD8 *pu1_pred, 262 WORD16 *pi2_out, 263 WORD32 src_strd, 264 WORD32 pred_strd, 265 const UWORD16 *pu2_scale_matrix, 266 const UWORD16 *pu2_threshold_matrix, 267 UWORD32 u4_qbits, 268 UWORD32 u4_round_factor, 269 UWORD8 *pu1_nnz, 270 WORD16 *pu1_dc_alt_addr) 271 { 272 UWORD32 i; 273 WORD32 x0, x1, x2, x3, x4, x5, x6, x7; 274 WORD32 i4_value, i4_sign; 275 UWORD32 u4_abs_value; 276 WORD16 *pi2_out_tmp = pi2_out; 277 UWORD32 u4_nonzero_coeff = 0; 278 279 for (i = 0; i < SUB_BLK_WIDTH_4x4; i++) 280 { 281 /* computing prediction error (residue) */ 282 x4 = pu1_src[0] - pu1_pred[0]; 283 x5 = pu1_src[2] - pu1_pred[2]; 284 x6 = pu1_src[4] - pu1_pred[4]; 285 x7 = pu1_src[6] - pu1_pred[6]; 286 287 /* Horizontal transform */ 288 x0 = x4 + x7; 289 x1 = x5 + x6; 290 x2 = x5 - x6; 291 x3 = x4 - x7; 292 293 pi2_out_tmp[0] = x0 + x1; 294 pi2_out_tmp[1] = (x3 <<1) + x2; 295 pi2_out_tmp[2] = x0 - x1; 296 pi2_out_tmp[3] = x3 - (x2<<1); 297 298 /* pointing to next row; */ 299 pu1_src += src_strd; 300 pu1_pred += pred_strd; 301 pi2_out_tmp += 4; 302 303 } 304 pi2_out_tmp = pi2_out; 305 for (i = 0; i < SUB_BLK_WIDTH_4x4; i++) 306 { 307 308 /* Vertical transform and quantization */ 309 x4 = pi2_out_tmp[0]; 310 x5 = pi2_out_tmp[4]; 311 x6 = pi2_out_tmp[8]; 312 x7 = pi2_out_tmp[12]; 313 314 315 x0 = x4 + x7; 316 x1 = x5 + x6; 317 x2 = x5 - x6; 318 x3 = x4 - x7; 319 320 /* quantization is done in place */ 321 322 i4_value = x0 + x1; 323 324 if(i==0) 325 { 326 *pu1_dc_alt_addr = i4_value; 327 } 328 329 FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], 330 pu2_scale_matrix[0], u4_round_factor, u4_qbits, 331 u4_nonzero_coeff); 332 pi2_out_tmp[0] = i4_value; 333 334 i4_value = (x3 << 1) + x2; 335 FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[4], 336 pu2_scale_matrix[4], u4_round_factor, u4_qbits, 337 u4_nonzero_coeff); 338 pi2_out_tmp[4] = i4_value; 339 340 i4_value = x0 - x1; 341 FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[8], 342 pu2_scale_matrix[8], u4_round_factor, u4_qbits, 343 u4_nonzero_coeff); 344 pi2_out_tmp[8] = i4_value; 345 346 i4_value = x3 - (x2 << 1); 347 FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[12], 348 pu2_scale_matrix[12], u4_round_factor, u4_qbits, 349 u4_nonzero_coeff); 350 pi2_out_tmp[12] = i4_value; 351 352 pi2_out_tmp ++; 353 pu2_scale_matrix++; 354 pu2_threshold_matrix++; 355 } 356 357 /* Return total nonzero coefficients in the current sub block */ 358 *pu1_nnz = u4_nonzero_coeff; 359 } 360 361 /** 362 ******************************************************************************* 363 * 364 * @brief 365 * This function performs forward hadamard transform and quantization on a 4*4 block 366 * 367 * @par Description: 368 * The function accepts source buffer and estimation buffer. From these, it 369 * computes the residue. This is residue is then transformed and quantized. 370 * The transform and quantization are in placed computed. They use the residue 371 * buffer for this. 372 * 373 * @param[in] pu1_src 374 * Pointer to source sub-block 375 * 376 * @param[in] pu1_pred 377 * Pointer to prediction sub-block 378 * 379 * @param[in] pi2_out 380 * Pointer to residual sub-block 381 * 382 * @param[in] src_strd 383 * Source stride 384 * 385 * @param[in] pred_strd 386 * Prediction stride 387 * 388 * @param[in] dst_strd 389 * Destination stride 390 * 391 * @param[in] u4_qbits 392 * QP_BITS_h264_4x4 + floor(QP/6) 393 * 394 * @param[in] pu2_threshold_matrix 395 * Pointer to Forward Quant Threshold Matrix 396 * 397 * @param[in] pu2_scale_matrix 398 * Pointer to Forward Quant Scale Matrix 399 * 400 * @param[in] u4_round_factor 401 * Quantization Round factor 402 * 403 * @param[out] pu1_nnz 404 * Total non-zero coefficients in the current sub-block 405 * 406 * @returns 407 * 408 * @remarks 409 * None 410 * 411 */ 412 413 void ih264_hadamard_quant_4x4(WORD16 *pi2_src, 414 WORD16 *pi2_dst, 415 const UWORD16 *pu2_scale_matrix, 416 const UWORD16 *pu2_threshold_matrix, 417 UWORD32 u4_qbits, 418 UWORD32 u4_round_factor, 419 UWORD8 *pu1_nnz) 420 { 421 WORD32 i; 422 WORD32 x0,x1,x2,x3,x4,x5,x6,x7,i4_value; 423 UWORD32 u4_abs_value; 424 WORD32 i4_sign; 425 426 *pu1_nnz = 0; 427 428 for (i = 0; i < SUB_BLK_WIDTH_4x4; i++) 429 { 430 x4 = pi2_src[0]; 431 x5 = pi2_src[1]; 432 x6 = pi2_src[2]; 433 x7 = pi2_src[3]; 434 435 x0 = x4 + x7; 436 x1 = x5 + x6; 437 x2 = x5 - x6; 438 x3 = x4 - x7; 439 440 pi2_dst[0] = x0 + x1; 441 pi2_dst[1] = x3 + x2; 442 pi2_dst[2] = x0 - x1; 443 pi2_dst[3] = x3 - x2; 444 445 pi2_src += 4; 446 pi2_dst += 4; 447 } 448 449 /* Vertical transform and quantization */ 450 pi2_dst -= SUB_BLK_WIDTH_4x4<<2; 451 452 for (i = 0; i < SUB_BLK_WIDTH_4x4; i++) 453 { 454 x4 = pi2_dst[0]; 455 x5 = pi2_dst[4]; 456 x6 = pi2_dst[8]; 457 x7 = pi2_dst[12] ; 458 459 x0 = x4 + x7; 460 x1 = x5 + x6; 461 x2 = x5 - x6; 462 x3 = x4 - x7; 463 464 465 i4_value = (x0 + x1) >> 1; 466 FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], 467 pu2_scale_matrix[0], u4_round_factor, u4_qbits, pu1_nnz[0]); 468 pi2_dst[0] = i4_value; 469 470 i4_value = (x3 + x2) >> 1; 471 FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], 472 pu2_scale_matrix[0], u4_round_factor, u4_qbits, pu1_nnz[0]); 473 pi2_dst[4] = i4_value; 474 475 i4_value = (x0 - x1) >> 1; 476 FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], 477 pu2_scale_matrix[0], u4_round_factor, u4_qbits, pu1_nnz[0]); 478 pi2_dst[8] = i4_value; 479 480 i4_value = (x3 - x2) >> 1; 481 FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], 482 pu2_scale_matrix[0], u4_round_factor, u4_qbits, pu1_nnz[0]); 483 pi2_dst[12] = i4_value; 484 485 pi2_dst ++; 486 } 487 } 488 489 /** 490 ******************************************************************************* 491 * 492 * @brief 493 * This function performs forward hadamard transform and quantization on a 2*2 block 494 * for both U and V planes 495 * 496 * @par Description: 497 * The function accepts source buffer and estimation buffer. From these, it 498 * computes the residue. This is residue is then transformed and quantized. 499 * The transform and quantization are in placed computed. They use the residue 500 * buffer for this. 501 * 502 * @param[in] pu1_src 503 * Pointer to source sub-block 504 * 505 * @param[in] pu1_pred 506 * Pointer to prediction sub-block 507 * 508 * @param[in] pi2_out 509 * Pointer to residual sub-block 510 * 511 * @param[in] src_strd 512 * Source stride 513 * 514 * @param[in] pred_strd 515 * Prediction stride 516 * 517 * @param[in] dst_strd 518 * Destination stride 519 * 520 * @param[in] u4_qbits 521 * QP_BITS_h264_4x4 + floor(QP/6) 522 * 523 * @param[in] pu2_threshold_matrix 524 * Pointer to Forward Quant Threshold Matrix 525 * 526 * @param[in] pu2_scale_matrix 527 * Pointer to Forward Quant Scale Matrix 528 * 529 * @param[in] u4_round_factor 530 * Quantization Round factor 531 * 532 * @param[out] pu1_nnz 533 * Total non-zero coefficients in the current sub-block 534 * 535 * @returns 536 * 537 * @remarks 538 * NNZ for dc is populated at 0 and 5th position of pu1_nnz 539 * 540 */ 541 542 void ih264_hadamard_quant_2x2_uv(WORD16 *pi2_src, 543 WORD16 *pi2_dst, 544 const UWORD16 *pu2_scale_matrix, 545 const UWORD16 *pu2_threshold_matrix, 546 UWORD32 u4_qbits, 547 UWORD32 u4_round_factor, 548 UWORD8 *pu1_nnz) 549 { 550 WORD32 x0, x1, x2, x3, x4, x5, x6, x7; 551 WORD32 i4_value, i4_sign, plane; 552 UWORD32 u4_abs_value; 553 554 for(plane = 0; plane < 2; plane++) 555 { 556 pu1_nnz[plane] = 0; 557 558 /* Horizontal transform */ 559 x4 = pi2_src[0]; 560 x5 = pi2_src[1]; 561 x6 = pi2_src[2]; 562 x7 = pi2_src[3]; 563 564 x0 = x4 + x5; 565 x1 = x4 - x5; 566 x2 = x6 + x7; 567 x3 = x6 - x7; 568 569 /* Vertical transform and quantization */ 570 i4_value = (x0 + x2); 571 FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], 572 pu2_scale_matrix[0], u4_round_factor, u4_qbits, 573 pu1_nnz[plane]); 574 pi2_dst[0] = i4_value; 575 576 i4_value = (x0 - x2); 577 FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], 578 pu2_scale_matrix[0], u4_round_factor, u4_qbits, 579 pu1_nnz[plane]); 580 pi2_dst[2] = i4_value; 581 582 i4_value = (x1 - x3); 583 FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], 584 pu2_scale_matrix[0], u4_round_factor, u4_qbits, 585 pu1_nnz[plane]); 586 pi2_dst[3] = i4_value; 587 588 i4_value = (x1 + x3); 589 FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], 590 pu2_scale_matrix[0], u4_round_factor, u4_qbits, 591 pu1_nnz[plane]); 592 pi2_dst[1] = i4_value; 593 594 pi2_dst += 4; 595 pi2_src += 4; 596 597 } 598 } 599 600 /* 601 ******************************************************************************* 602 * 603 * @brief 604 * This function performs Single stage forward transform CF8 and quantization on 8*8 blocks 605 * for h.264 606 * 607 * @par Description: 608 * Performs single stage 8x8 forward transform CF8 after calculating the residue 609 * The result is then quantized 610 * 611 * @param[in] pu1_src 612 * Input 8x8 pixels 613 * 614 * @param[in] pu1_pred 615 * Input 8x8 pixels 616 * 617 * @param[in] pi1_out 618 * Output 8x8 pixels 619 * 620 * @param[in] u4_thresh 621 * Threshold under which the coeffs are not quantized 622 * 623 * @param[in] u4_qp_div 624 * QP/6 625 * 626 * @param[in] u4_qp_rem 627 * QP%6 628 * 629 * @param[in] u2_src_stride 630 * Source stride 631 * 632 * @param[in] pred_strd 633 * stride for prediciton buffer 634 * 635 * @param[in] dst_strd 636 * stride for destination buffer 637 * 638 * @param[in] pu4_quant_mat 639 * Pointer to the 4x4 quantization matrix 640 * 641 * @returns Void 642 * 643 * 644 ******************************************************************************* 645 */ 646 void ih264_resi_trans_quant_8x8(UWORD8 *pu1_src, 647 UWORD8 *pu1_pred, 648 WORD16 *pi2_out, 649 WORD32 src_strd, 650 WORD32 pred_strd, 651 const UWORD16 *pu2_scale_matrix, 652 const UWORD16 *pu2_threshold_matrix, 653 UWORD32 u4_qbits, 654 UWORD32 u4_round_factor, 655 UWORD8 *pu1_nnz, 656 WORD16 *pu1_dc_alt_addr) 657 658 { 659 WORD16 *pi2_out_tmp = pi2_out; 660 UWORD32 i; 661 WORD32 a0, a1, a2, a3, a4, a5, a6, a7; 662 WORD32 r0, r1, r2, r3, r4, r5, r6, r7; 663 WORD32 i4_sign; 664 UWORD32 u4_abs_value; 665 UWORD32 u4_nonzero_coeff = 0; 666 667 UNUSED(pu1_dc_alt_addr); 668 669 /*Horizontal transform */ 670 /* we are going to use the a's and r's in a twisted way since */ 671 /*i dont want to declare more variables */ 672 for(i = 0; i < SUB_BLK_WIDTH_8x8; ++i) 673 { 674 r0 = pu1_src[0]; 675 r0 -= pu1_pred[0]; 676 r1 = pu1_src[1]; 677 r1 -= pu1_pred[1]; 678 r2 = pu1_src[2];r2 -= pu1_pred[2]; 679 r3 = pu1_src[3];r3 -= pu1_pred[3]; 680 r4 = pu1_src[4];r4 -= pu1_pred[4]; 681 r5 = pu1_src[5];r5 -= pu1_pred[5]; 682 r6 = pu1_src[6];r6 -= pu1_pred[6]; 683 r7 = pu1_src[7];r7 -= pu1_pred[7]; 684 685 686 a0 = r0 + r7; 687 a1 = r1 + r6; 688 a2 = r2 + r5; 689 a3 = r3 + r4; 690 691 a4 = a0 + a3; 692 a5 = a1 + a2; 693 a6 = a0 - a3; 694 a7 = a1 - a2; 695 696 pi2_out_tmp[0] = a4 + a5; 697 698 pi2_out_tmp[2] = a6 + (a7>>1); 699 pi2_out_tmp[4] = a4 - a5; 700 pi2_out_tmp[6] = (a6>>1) - a7; 701 702 a0 = r0 - r7; 703 a1 = r1 - r6; 704 a2 = r2 - r5; 705 a3 = r3 - r4; 706 707 a4 = a1 + a2 + ((a0>>1) + a0); 708 a5 = a0 - a3 - ((a2>>1) + a2); 709 a6 = a0 + a3 - ((a1>>1) + a1); 710 a7 = a1 - a2 + ((a3>>1) + a3); 711 712 pi2_out_tmp[1] = a4 + (a7>>2); 713 pi2_out_tmp[3] = a5 + (a6>>2); 714 pi2_out_tmp[5] = a6 - (a5>>2); 715 pi2_out_tmp[7] = (a4>>2) - a7; 716 717 pu1_src += src_strd; 718 pu1_pred += pred_strd; 719 pi2_out_tmp += 8; 720 } 721 722 /*vertical transform and quant */ 723 724 pi2_out_tmp = pi2_out; 725 726 for (i = 0; i < SUB_BLK_WIDTH_8x8; ++i) 727 { 728 729 r0 = pi2_out_tmp[0]; 730 r1 = pi2_out_tmp[8]; 731 r2 = pi2_out_tmp[16]; 732 r3 = pi2_out_tmp[24]; 733 r4 = pi2_out_tmp[32]; 734 r5 = pi2_out_tmp[40]; 735 r6 = pi2_out_tmp[48]; 736 r7 = pi2_out_tmp[56]; 737 738 a0 = r0 + r7; 739 a1 = r1 + r6; 740 a2 = r2 + r5; 741 a3 = r3 + r4; 742 743 a4 = a0 + a3; 744 a5 = a1 + a2; 745 a6 = a0 - a3; 746 a7 = a1 - a2; 747 748 a0 = r0 - r7; 749 a1 = r1 - r6; 750 a2 = r2 - r5; 751 a3 = r3 - r4; 752 753 r0 = a4 + a5; 754 r2 = a6 + (a7>>1); 755 r4 = a4 - a5; 756 r6 = (a6>>1) - a7; 757 758 a4 = a1 + a2 + ((a0>>1) + a0); 759 a5 = a0 - a3 - ((a2>>1) + a2); 760 a6 = a0 + a3 - ((a1>>1) + a1); 761 a7 = a1 - a2 + ((a3>>1) + a3); 762 763 r1 = a4 + (a7>>2); 764 r3 = a5 + (a6>>2); 765 r5 = a6 - (a5>>2); 766 r7 = (a4>>2) - a7; 767 768 FWD_QUANT(r0, u4_abs_value, i4_sign, pu2_threshold_matrix[0], 769 pu2_scale_matrix[0], u4_round_factor, u4_qbits, 770 u4_nonzero_coeff); 771 pi2_out_tmp[0] = r0; 772 773 FWD_QUANT(r1, u4_abs_value, i4_sign, pu2_threshold_matrix[8], 774 pu2_scale_matrix[8], u4_round_factor, u4_qbits, 775 u4_nonzero_coeff); 776 pi2_out_tmp[8] = r1; 777 778 FWD_QUANT(r2, u4_abs_value, i4_sign, pu2_threshold_matrix[16], 779 pu2_scale_matrix[16], u4_round_factor, u4_qbits, 780 u4_nonzero_coeff); 781 pi2_out_tmp[16] = r2; 782 783 FWD_QUANT(r3, u4_abs_value, i4_sign, pu2_threshold_matrix[24], 784 pu2_scale_matrix[24], u4_round_factor, u4_qbits, 785 u4_nonzero_coeff); 786 pi2_out_tmp[24] = r3; 787 788 FWD_QUANT(r4, u4_abs_value, i4_sign, pu2_threshold_matrix[32], 789 pu2_scale_matrix[32], u4_round_factor, u4_qbits, 790 u4_nonzero_coeff); 791 pi2_out_tmp[32] = r4; 792 793 FWD_QUANT(r5, u4_abs_value, i4_sign, pu2_threshold_matrix[40], 794 pu2_scale_matrix[40], u4_round_factor, u4_qbits, 795 u4_nonzero_coeff); 796 pi2_out_tmp[40] = r5; 797 798 FWD_QUANT(r6, u4_abs_value, i4_sign, pu2_threshold_matrix[48], 799 pu2_scale_matrix[48], u4_round_factor, u4_qbits, 800 u4_nonzero_coeff); 801 pi2_out_tmp[48] = r6; 802 803 FWD_QUANT(r7, u4_abs_value, i4_sign, pu2_threshold_matrix[56], 804 pu2_scale_matrix[56], u4_round_factor, u4_qbits, 805 u4_nonzero_coeff); 806 pi2_out_tmp[56] = r7; 807 808 pi2_out_tmp++; 809 pu2_scale_matrix++; 810 pu2_threshold_matrix++; 811 } 812 /* Return total nonzero coefficients in the current sub block */ 813 *pu1_nnz = u4_nonzero_coeff; 814 } 815