1 /****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 21 /** 22 ******************************************************************************* 23 * @file 24 * ih264e_core_coding.c 25 * 26 * @brief 27 * This file contains routines that perform luma and chroma core coding for 28 * intra macroblocks 29 * 30 * @author 31 * ittiam 32 * 33 * @par List of Functions: 34 * - ih264e_pack_l_mb_i16() 35 * - ih264e_pack_c_mb_i8() 36 * - ih264e_code_luma_intra_macroblock_16x16() 37 * - ih264e_code_luma_intra_macroblock_4x4() 38 * - ih264e_code_chroma_intra_macroblock_8x8() 39 * 40 * @remarks 41 * None 42 * 43 ******************************************************************************* 44 */ 45 46 /*****************************************************************************/ 47 /* File Includes */ 48 /*****************************************************************************/ 49 50 /* System include files */ 51 #include <stdio.h> 52 #include <string.h> 53 #include <assert.h> 54 55 /* User include files */ 56 #include "ih264e_config.h" 57 #include "ih264_typedefs.h" 58 #include "ih264_platform_macros.h" 59 #include "iv2.h" 60 #include "ive2.h" 61 #include "ih264_macros.h" 62 #include "ih264_defs.h" 63 #include "ih264e_defs.h" 64 #include "ih264_trans_data.h" 65 #include "ih264e_error.h" 66 #include "ih264e_bitstream.h" 67 #include "ime_distortion_metrics.h" 68 #include "ime_defs.h" 69 #include "ime_structs.h" 70 #include "ih264_structs.h" 71 #include "ih264_trans_quant_itrans_iquant.h" 72 #include "ih264_inter_pred_filters.h" 73 #include "ih264_mem_fns.h" 74 #include "ih264_padding.h" 75 #include "ih264_intra_pred_filters.h" 76 #include "ih264_deblk_edge_filters.h" 77 #include "ih264_cabac_tables.h" 78 #include "irc_cntrl_param.h" 79 #include "irc_frame_info_collector.h" 80 #include "ih264e_rate_control.h" 81 #include "ih264e_cabac_structs.h" 82 #include "ih264e_structs.h" 83 #include "ih264e_globals.h" 84 #include "ih264e_core_coding.h" 85 #include "ih264e_mc.h" 86 87 88 /*****************************************************************************/ 89 /* Function Definitions */ 90 /*****************************************************************************/ 91 92 /** 93 ******************************************************************************* 94 * 95 * @brief 96 * This function performs does the DCT transform then Hadamard transform 97 * and quantization for a macroblock when the mb mode is intra 16x16 mode 98 * 99 * @par Description: 100 * First cf4 is done on all 16 4x4 blocks of the 16x16 input block. 101 * Then hadamard transform is done on the DC coefficients 102 * Quantization is then performed on the 16x16 block, 4x4 wise 103 * 104 * @param[in] pu1_src 105 * Pointer to source sub-block 106 * 107 * @param[in] pu1_pred 108 * Pointer to prediction sub-block 109 * 110 * @param[in] pi2_out 111 * Pointer to residual sub-block 112 * The output will be in linear format 113 * The first 16 continuous locations will contain the values of Dc block 114 * After DC block and a stride 1st AC block will follow 115 * After one more stride next AC block will follow 116 * The blocks will be in raster scan order 117 * 118 * @param[in] src_strd 119 * Source stride 120 * 121 * @param[in] pred_strd 122 * Prediction stride 123 * 124 * @param[in] dst_strd 125 * Destination stride 126 * 127 * @param[in] pu2_scale_matrix 128 * The quantization matrix for 4x4 transform 129 * 130 * @param[in] pu2_threshold_matrix 131 * Threshold matrix 132 * 133 * @param[in] u4_qbits 134 * 15+QP/6 135 * 136 * @param[in] u4_round_factor 137 * Round factor for quant 138 * 139 * @param[out] pu1_nnz 140 * Memory to store the non-zeros after transform 141 * The first byte will be the nnz of DC block 142 * From the next byte the AC nnzs will be stored in raster scan order 143 * 144 * @param u4_dc_flag 145 * Signals if Dc transform is to be done or not 146 * 1 -> Dc transform will be done 147 * 0 -> Dc transform will not be done 148 * 149 * @remarks 150 * 151 ******************************************************************************* 152 */ 153 void ih264e_luma_16x16_resi_trans_dctrans_quant(codec_t *ps_codec, 154 UWORD8 *pu1_src, 155 UWORD8 *pu1_pred, 156 WORD16 *pi2_out, 157 WORD32 src_strd, 158 WORD32 pred_strd, 159 WORD32 dst_strd, 160 const UWORD16 *pu2_scale_matrix, 161 const UWORD16 *pu2_threshold_matrix, 162 UWORD32 u4_qbits, 163 UWORD32 u4_round_factor, 164 UWORD8 *pu1_nnz, 165 UWORD32 u4_dc_flag) 166 167 { 168 WORD32 blk_cntr; 169 WORD32 i4_offsetx, i4_offsety; 170 UWORD8 *pu1_curr_src, *pu1_curr_pred; 171 172 WORD16 *pi2_dc_str = pi2_out; 173 174 /* Move to the ac addresses */ 175 pu1_nnz++; 176 pi2_out += dst_strd; 177 178 for (blk_cntr = 0; blk_cntr < NUM_LUMA4x4_BLOCKS_IN_MB; blk_cntr++) 179 { 180 IND2SUB_LUMA_MB(blk_cntr, i4_offsetx, i4_offsety); 181 182 pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd; 183 pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd; 184 185 ps_codec->pf_resi_trans_quant_4x4(pu1_curr_src, pu1_curr_pred, 186 pi2_out + blk_cntr * dst_strd, 187 src_strd, pred_strd, pu2_scale_matrix, 188 pu2_threshold_matrix, u4_qbits, 189 u4_round_factor, &pu1_nnz[blk_cntr], 190 &pi2_dc_str[blk_cntr]); 191 192 } 193 194 if (!u4_dc_flag) 195 return; 196 197 /* 198 * In case of i16x16, we need to remove the contribution of dc coeffs into 199 * nnz of each block. We are doing that in the packing function 200 */ 201 202 /* Adjust pointers to point to dc values */ 203 pi2_out -= dst_strd; 204 pu1_nnz--; 205 206 u4_qbits++; 207 u4_round_factor <<= 1; 208 209 ps_codec->pf_hadamard_quant_4x4(pi2_dc_str, pi2_out, pu2_scale_matrix, 210 pu2_threshold_matrix, u4_qbits, 211 u4_round_factor, &pu1_nnz[0]); 212 } 213 214 /** 215 ******************************************************************************* 216 * 217 * @brief 218 * This function performs the intra 16x16 inverse transform process for H264 219 * it includes inverse Dc transform, inverse quant and then inverse transform 220 * 221 * @par Description: 222 * 223 * @param[in] pi2_src 224 * Input data, 16x16 size 225 * First 16 mem locations will have the Dc coffs in rater scan order in linear fashion 226 * after a stride 1st AC clock will be present again in raster can order 227 * Then each AC block of the 16x16 block will follow in raster scan order 228 * 229 * @param[in] pu1_pred 230 * The predicted data, 16x16 size 231 * Block by block form 232 * 233 * @param[in] pu1_out 234 * Output 16x16 235 * In block by block form 236 * 237 * @param[in] src_strd 238 * Source stride 239 * 240 * @param[in] pred_strd 241 * input stride for prediction buffer 242 * 243 * @param[in] out_strd 244 * input stride for output buffer 245 * 246 * @param[in] pu2_iscale_mat 247 * Inverse quantization matrix for 4x4 transform 248 * 249 * @param[in] pu2_weigh_mat 250 * weight matrix of 4x4 transform 251 * 252 * @param[in] qp_div 253 * QP/6 254 * 255 * @param[in] pi4_tmp 256 * Input temporary buffer 257 * needs to be at least 20 in size 258 * 259 * @param[in] pu4_cntrl 260 * Controls the transform path 261 * total Last 17 bits are used 262 * the 16th th bit will correspond to DC block 263 * and 32-17 will correspond to the ac blocks in raster scan order 264 * bit equaling zero indicates that the entire 4x4 block is zero for DC 265 * For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block is nonzero 266 * 267 * @param[in] pi4_tmp 268 * Input temporary buffer 269 * needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size 270 * 271 * @returns 272 * none 273 * 274 * @remarks 275 * The all zero case must be taken care outside 276 * 277 ******************************************************************************* 278 */ 279 void ih264e_luma_16x16_idctrans_iquant_itrans_recon(codec_t *ps_codec, 280 WORD16 *pi2_src, 281 UWORD8 *pu1_pred, 282 UWORD8 *pu1_out, 283 WORD32 src_strd, 284 WORD32 pred_strd, 285 WORD32 out_strd, 286 const UWORD16 *pu2_iscale_mat, 287 const UWORD16 *pu2_weigh_mat, 288 UWORD32 qp_div, 289 UWORD32 u4_cntrl, 290 UWORD32 u4_dc_trans_flag, 291 WORD32 *pi4_tmp) 292 { 293 /* Start index for inverse quant in a 4x4 block */ 294 WORD32 iq_start_idx = (u4_dc_trans_flag == 0) ? 0 : 1; 295 296 /* Cntrl bits for 4x4 transforms 297 * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path 298 * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path 299 * : dc block must contain only single dc coefficient 300 * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac 301 * : ie not (ac or dc) 302 */ 303 UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl; 304 305 /* tmp registers for block ids */ 306 UWORD32 u4_blk_id; 307 308 /* Subscrripts */ 309 WORD32 i4_offset_x, i4_offset_y; 310 311 UWORD8 *pu1_cur_prd_blk, *pu1_cur_out_blk; 312 313 /* Src and stride for dc coeffs */ 314 UWORD32 u4_dc_inc; 315 WORD16 *pi2_dc_src; 316 317 /* 318 * For intra blocks we need to do inverse dc transform 319 * In case if intra blocks, its here that we populate the dc bits in cntrl 320 * as they cannot be populated any earlier 321 */ 322 if (u4_dc_trans_flag) 323 { 324 UWORD32 cntr, u4_dc_cntrl; 325 /* Do inv hadamard and place the results at the start of each AC block */ 326 ps_codec->pf_ihadamard_scaling_4x4(pi2_src, pi2_src, pu2_iscale_mat, 327 pu2_weigh_mat, qp_div, pi4_tmp); 328 329 /* Update the cntrl flag */ 330 u4_dc_cntrl = 0; 331 for (cntr = 0; cntr < DC_COEFF_CNT_LUMA_MB; cntr++) 332 { 333 u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr)); 334 } 335 /* Mark dc bits as 1 if corresponding ac bit is 0 */ 336 u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl); 337 /* Combine both ac and dc bits */ 338 u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA) 339 | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_LUMA); 340 } 341 342 /* Source for dc coeffs 343 * If the block is intra, we have to read dc values from first row of src 344 * then stride for each block is 1, other wise its src stride 345 */ 346 pi2_dc_src = (iq_start_idx == 0) ? (pi2_src + src_strd) : pi2_src; 347 u4_dc_inc = (iq_start_idx == 0) ? src_strd : 1; 348 349 /* The AC blocks starts from 2nd row */ 350 pi2_src += src_strd; 351 352 /* Get the block bits */ 353 u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA); 354 u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_LUMA) << 16; 355 u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFFFF0000; 356 357 /* Get first block to process */ 358 DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id); 359 while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB) 360 { 361 /* Compute address of src blocks */ 362 WORD32 i4_src_offset = u4_dc_inc * u4_blk_id; 363 364 IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); 365 366 /* Compute address of out and pred blocks */ 367 pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; 368 pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; 369 370 /* Do inv dc transform */ 371 ps_codec->pf_iquant_itrans_recon_4x4_dc(pi2_dc_src + i4_src_offset, 372 pu1_cur_prd_blk, 373 pu1_cur_out_blk, pred_strd, 374 out_strd, pu2_iscale_mat, 375 pu2_weigh_mat, qp_div, NULL, 376 iq_start_idx, 377 pi2_dc_src + i4_src_offset); 378 /* Get next DC block to process */ 379 DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id); 380 } 381 382 /* now process ac/mixed blocks */ 383 DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id); 384 while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB) 385 { 386 387 WORD32 i4_src_offset = src_strd * u4_blk_id; 388 389 IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); 390 391 pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; 392 pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; 393 394 ps_codec->pf_iquant_itrans_recon_4x4(pi2_src + i4_src_offset, 395 pu1_cur_prd_blk, pu1_cur_out_blk, 396 pred_strd, out_strd, 397 pu2_iscale_mat, pu2_weigh_mat, 398 qp_div, (WORD16*) pi4_tmp, 399 iq_start_idx, 400 pi2_dc_src + u4_blk_id); 401 402 DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id); 403 } 404 405 /* Now process empty blocks */ 406 DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id); 407 while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB) 408 { 409 IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); 410 411 pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; 412 pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; 413 414 ps_codec->pf_inter_pred_luma_copy(pu1_cur_prd_blk, pu1_cur_out_blk, 415 pred_strd, out_strd, SIZE_4X4_BLK_HRZ, 416 SIZE_4X4_BLK_VERT, 0, 0); 417 418 DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id); 419 } 420 } 421 422 /** 423 ******************************************************************************* 424 * 425 * @brief 426 * This function performs does the DCT transform then Hadamard transform 427 * and quantization for a chroma macroblock 428 * 429 * @par Description: 430 * First cf4 is done on all 16 4x4 blocks of the 8x8input block 431 * Then hadamard transform is done on the DC coefficients 432 * Quantization is then performed on the 8x8 block, 4x4 wise 433 * 434 * @param[in] pu1_src 435 * Pointer to source sub-block 436 * The input is in interleaved format for two chroma planes 437 * 438 * @param[in] pu1_pred 439 * Pointer to prediction sub-block 440 * Prediction is in inter leaved format 441 * 442 * @param[in] pi2_out 443 * Pointer to residual sub-block 444 * The output will be in linear format 445 * The first 4 continuous locations will contain the values of DC block for U 446 * and then next 4 will contain for V. 447 * After DC block and a stride 1st AC block of U plane will follow 448 * After one more stride next AC block of V plane will follow 449 * The blocks will be in raster scan order 450 * 451 * After all the AC blocks of U plane AC blocks of V plane will follow in exact 452 * same way 453 * 454 * @param[in] src_strd 455 * Source stride 456 * 457 * @param[in] pred_strd 458 * Prediction stride 459 * 460 * @param[in] dst_strd 461 * Destination stride 462 * 463 * @param[in] pu2_scale_matrix 464 * The quantization matrix for 4x4 transform 465 * 466 * @param[in] pu2_threshold_matrix 467 * Threshold matrix 468 * 469 * @param[in] u4_qbits 470 * 15+QP/6 471 * 472 * @param[in] u4_round_factor 473 * Round factor for quant 474 * 475 * @param[out] pu1_nnz 476 * Memory to store the non-zeros after transform 477 * The first byte will be the nnz od DC block for U plane 478 * From the next byte the AC nnzs will be storerd in raster scan order 479 * The fifth byte will be nnz of Dc block of V plane 480 * Then Ac blocks will follow 481 * 482 * @param u4_dc_flag 483 * Signals if Dc transform is to be done or not 484 * 1 -> Dc transform will be done 485 * 0 -> Dc transform will not be done 486 * 487 * @remarks 488 * 489 ******************************************************************************* 490 */ 491 void ih264e_chroma_8x8_resi_trans_dctrans_quant(codec_t *ps_codec, 492 UWORD8 *pu1_src, 493 UWORD8 *pu1_pred, 494 WORD16 *pi2_out, 495 WORD32 src_strd, 496 WORD32 pred_strd, 497 WORD32 out_strd, 498 const UWORD16 *pu2_scale_matrix, 499 const UWORD16 *pu2_threshold_matrix, 500 UWORD32 u4_qbits, 501 UWORD32 u4_round_factor, 502 UWORD8 *pu1_nnz_c) 503 { 504 WORD32 blk_cntr; 505 WORD32 i4_offsetx, i4_offsety; 506 UWORD8 *pu1_curr_src, *pu1_curr_pred; 507 508 WORD16 pi2_dc_str[8]; 509 UWORD8 au1_dcnnz[2]; 510 511 /* Move to the ac addresses */ 512 pu1_nnz_c++; 513 pi2_out += out_strd; 514 515 for (blk_cntr = 0; blk_cntr < NUM_CHROMA4x4_BLOCKS_IN_MB; blk_cntr++) 516 { 517 IND2SUB_CHROMA_MB(blk_cntr, i4_offsetx, i4_offsety); 518 519 pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd; 520 pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd; 521 522 /* For chroma, v plane nnz is populated from position 5 */ 523 ps_codec->pf_resi_trans_quant_chroma_4x4( 524 pu1_curr_src, pu1_curr_pred, 525 pi2_out + blk_cntr * out_strd, src_strd, pred_strd, 526 pu2_scale_matrix, pu2_threshold_matrix, u4_qbits, 527 u4_round_factor, &pu1_nnz_c[blk_cntr + (blk_cntr > 3)], 528 &pi2_dc_str[blk_cntr]); 529 } 530 531 /* Adjust pointers to point to dc values */ 532 pi2_out -= out_strd; 533 pu1_nnz_c--; 534 535 u4_qbits++; 536 u4_round_factor <<= 1; 537 538 ps_codec->pf_hadamard_quant_2x2_uv(pi2_dc_str, pi2_out, pu2_scale_matrix, 539 pu2_threshold_matrix, u4_qbits, 540 u4_round_factor, au1_dcnnz); 541 542 /* Copy the dc nnzs */ 543 pu1_nnz_c[0] = au1_dcnnz[0]; 544 pu1_nnz_c[5] = au1_dcnnz[1]; 545 546 } 547 548 /** 549 ******************************************************************************* 550 * @brief 551 * This function performs the inverse transform with process for chroma MB of H264 552 * 553 * @par Description: 554 * Does inverse DC transform ,inverse quantization inverse transform 555 * 556 * @param[in] pi2_src 557 * Input data, 16x16 size 558 * The input is in the form of, first 4 locations will contain DC coeffs of 559 * U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane 560 * in raster scan order will follow, each block as linear array in raster scan order. 561 * After a stride next AC block will follow. After all AC blocks of U plane 562 * V plane AC blocks will follow in exact same order. 563 * 564 * @param[in] pu1_pred 565 * The predicted data, 8x16 size, U and V interleaved 566 * 567 * @param[in] pu1_out 568 * Output 8x16, U and V interleaved 569 * 570 * @param[in] src_strd 571 * Source stride 572 * 573 * @param[in] pred_strd 574 * input stride for prediction buffer 575 * 576 * @param[in] out_strd 577 * input stride for output buffer 578 * 579 * @param[in] pu2_iscale_mat 580 * Inverse quantization martix for 4x4 transform 581 * 582 * @param[in] pu2_weigh_mat 583 * weight matrix of 4x4 transform 584 * 585 * @param[in] qp_div 586 * QP/6 587 * 588 * @param[in] pi4_tmp 589 * Input temporary buffer 590 * needs to be at least COFF_CNT_SUB_BLK_4x4 + Number of Dc cofss for chroma * number of planes 591 * in size 592 * 593 * @param[in] pu4_cntrl 594 * Controls the transform path 595 * the 15 th bit will correspond to DC block of U plane , 14th will indicate the V plane Dc block 596 * 32-28 bits will indicate AC blocks of U plane in raster scan order 597 * 27-23 bits will indicate AC blocks of V plane in rater scan order 598 * The bit 1 implies that there is at least one non zero coeff in a block 599 * 600 * @returns 601 * none 602 * 603 * @remarks 604 ******************************************************************************* 605 */ 606 void ih264e_chroma_8x8_idctrans_iquant_itrans_recon(codec_t *ps_codec, 607 WORD16 *pi2_src, 608 UWORD8 *pu1_pred, 609 UWORD8 *pu1_out, 610 WORD32 src_strd, 611 WORD32 pred_strd, 612 WORD32 out_strd, 613 const UWORD16 *pu2_iscale_mat, 614 const UWORD16 *pu2_weigh_mat, 615 UWORD32 qp_div, 616 UWORD32 u4_cntrl, 617 WORD32 *pi4_tmp) 618 { 619 /* Cntrl bits for 4x4 transforms 620 * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path 621 * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path 622 * : dc block must contain only single dc coefficient 623 * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac 624 * : ie not (ac or dc) 625 */ 626 627 UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl; 628 629 /* tmp registers for block ids */ 630 WORD32 u4_blk_id; 631 632 /* Offsets for pointers */ 633 WORD32 i4_offset_x, i4_offset_y; 634 635 /* Pointer to 4x4 blocks */ 636 UWORD8 *pu1_cur_4x4_prd_blk, *pu1_cur_4x4_out_blk; 637 638 /* Tmp register for pointer to dc coffs */ 639 WORD16 *pi2_dc_src; 640 641 WORD16 i2_zero = 0; 642 643 /* Increment for dc block */ 644 WORD32 i4_dc_inc; 645 646 /* 647 * Lets do the inverse transform for dc coeffs in chroma 648 */ 649 if (u4_cntrl & CNTRL_FLAG_DCBLK_MASK_CHROMA) 650 { 651 UWORD32 cntr, u4_dc_cntrl; 652 /* Do inv hadamard for u an v block */ 653 654 ps_codec->pf_ihadamard_scaling_2x2_uv(pi2_src, pi2_src, pu2_iscale_mat, 655 pu2_weigh_mat, qp_div, NULL); 656 /* 657 * Update the cntrl flag 658 * Flag is updated as follows bits 15-11 -> u block dc bits 659 */ 660 u4_dc_cntrl = 0; 661 for (cntr = 0; cntr < 8; cntr++) 662 { 663 u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr)); 664 } 665 666 /* Mark dc bits as 1 if corresponding ac bit is 0 */ 667 u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl); 668 /* Combine both ac and dc bits */ 669 u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA) 670 | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_CHROMA); 671 672 /* Since we populated the dc coffs, we have to read them from there */ 673 pi2_dc_src = pi2_src; 674 i4_dc_inc = 1; 675 } 676 else 677 { 678 u4_cntrl = u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA; 679 pi2_dc_src = &i2_zero; 680 i4_dc_inc = 0; 681 } 682 683 /* Get the block bits */ 684 u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA); 685 u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_CHROMA) << 16; 686 u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFF000000; 687 688 /* The AC blocks starts from 2nd row */ 689 pi2_src += src_strd; 690 691 DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id); 692 while (u4_blk_id < 8) 693 { 694 WORD32 dc_src_offset = u4_blk_id * i4_dc_inc; 695 696 IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); 697 698 pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; 699 pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; 700 701 ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc( 702 pi2_dc_src + dc_src_offset, pu1_cur_4x4_prd_blk, 703 pu1_cur_4x4_out_blk, pred_strd, out_strd, NULL, NULL, 0, 704 NULL, pi2_dc_src + dc_src_offset); 705 /* Get next DC block to process */ 706 DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id); 707 } 708 709 /* now process ac/mixed blocks */ 710 DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id); 711 while (u4_blk_id < 8) 712 { 713 WORD32 i4_src_offset = src_strd * u4_blk_id; 714 WORD32 dc_src_offset = i4_dc_inc * u4_blk_id; 715 716 IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); 717 718 pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; 719 pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; 720 721 ps_codec->pf_iquant_itrans_recon_chroma_4x4(pi2_src + i4_src_offset, 722 pu1_cur_4x4_prd_blk, 723 pu1_cur_4x4_out_blk, 724 pred_strd, out_strd, 725 pu2_iscale_mat, 726 pu2_weigh_mat, qp_div, 727 (WORD16 *) pi4_tmp, 728 pi2_dc_src + dc_src_offset); 729 730 DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id); 731 } 732 733 /* Now process empty blocks */ 734 DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id); 735 while (u4_blk_id < 8) 736 { 737 IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); 738 739 pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; 740 pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; 741 742 ps_codec->pf_interleave_copy(pu1_cur_4x4_prd_blk, pu1_cur_4x4_out_blk, 743 pred_strd, out_strd, SIZE_4X4_BLK_VERT, 744 SIZE_4X4_BLK_HRZ); 745 746 DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id); 747 } 748 } 749 750 /** 751 ****************************************************************************** 752 * 753 * @brief This function packs residue of an i16x16 luma mb for entropy coding 754 * 755 * @par Description 756 * An i16 macro block contains two classes of units, dc 4x4 block and 757 * 4x4 ac blocks. while packing the mb, the dc block is sent first, and 758 * the 16 ac blocks are sent next in scan order. Each and every block is 759 * represented by 3 parameters (nnz, significant coefficient map and the 760 * residue coefficients itself). If a 4x4 unit does not have any coefficients 761 * then only nnz is sent. Inside a 4x4 block the individual coefficients are 762 * sent in scan order. 763 * 764 * The first byte of each block will be nnz of the block, if it is non zero, 765 * a 2 byte significance map is sent. This is followed by nonzero coefficients. 766 * This is repeated for 1 dc + 16 ac blocks. 767 * 768 * @param[in] pi2_res_mb 769 * pointer to residue mb 770 * 771 * @param[in, out] pv_mb_coeff_data 772 * buffer pointing to packed residue coefficients 773 * 774 * @param[in] u4_res_strd 775 * residual block stride 776 * 777 * @param[out] u1_cbp_l 778 * coded block pattern luma 779 * 780 * @param[in] pu1_nnz 781 * number of non zero coefficients in each 4x4 unit 782 * 783 * @param[out] 784 * Control signal for inverse transform of 16x16 blocks 785 * 786 * @return none 787 * 788 * @ remarks 789 * 790 ****************************************************************************** 791 */ 792 void ih264e_pack_l_mb_i16(WORD16 *pi2_res_mb, 793 void **pv_mb_coeff_data, 794 WORD32 i4_res_strd, 795 UWORD8 *u1_cbp_l, 796 UWORD8 *pu1_nnz, 797 UWORD32 *pu4_cntrl) 798 { 799 /* pointer to packed sub block buffer space */ 800 tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data), *ps_mb_coeff_data_ac; 801 802 /* no of non zero coefficients in the current sub block */ 803 UWORD32 u4_nnz_cnt; 804 805 /* significant coefficient map */ 806 UWORD32 u4_s_map; 807 808 /* pointer to scanning matrix */ 809 const UWORD8 *pu1_scan_order; 810 811 /* number of non zeros in sub block */ 812 UWORD32 u4_nnz; 813 814 /* coeff scan order */ 815 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; 816 817 /* temp var */ 818 UWORD32 coeff_cnt, mask, b4,u4_cntrl=0; 819 820 /*DC and AC coeff pointers*/ 821 WORD16 *pi2_res_mb_ac,*pi2_res_mb_dc; 822 823 /********************************************************/ 824 /* pack dc coeff data for entropy coding */ 825 /********************************************************/ 826 827 pi2_res_mb_dc = pi2_res_mb; 828 pu1_scan_order = gu1_luma_scan_order_dc; 829 830 u4_nnz = *pu1_nnz; 831 u4_cntrl = 0; 832 833 /* write number of non zero coefficients */ 834 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; 835 836 if (u4_nnz) 837 { 838 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) 839 { 840 if (pi2_res_mb_dc[pu1_scan_order[coeff_cnt]]) 841 { 842 /* write residue */ 843 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_dc[pu1_scan_order[coeff_cnt]]; 844 u4_s_map |= mask; 845 } 846 mask <<= 1; 847 } 848 /* write significant coeff map */ 849 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); 850 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); 851 852 u4_cntrl = 0x00008000;// Set DC bit in ctrl code 853 } 854 else 855 { 856 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; 857 } 858 859 /********************************************************/ 860 /* pack ac coeff data for entropy coding */ 861 /********************************************************/ 862 863 pu1_nnz ++; 864 pu1_scan_order = gu1_luma_scan_order; 865 pi2_res_mb += i4_res_strd; /*Move to AC block*/ 866 867 ps_mb_coeff_data_ac = (*pv_mb_coeff_data); 868 869 for (b4 = 0; b4 < 16; b4++) 870 { 871 ps_mb_coeff_data = (*pv_mb_coeff_data); 872 873 u4_nnz = pu1_nnz[u1_scan_order[b4]]; 874 875 /* Jump according to the scan order */ 876 pi2_res_mb_ac = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]); 877 878 /* 879 * Since this is a i16x16 block, we should not count dc coeff on indi 880 * vidual 4x4 blocks to nnz. But due to the implementation of 16x16 881 * trans function, we add dc's nnz to u4_nnz too. Hence we adjust that 882 * here 883 */ 884 u4_nnz -= (pi2_res_mb_ac[0] != 0); 885 886 /* write number of non zero coefficients */ 887 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; 888 889 if (u4_nnz) 890 { 891 for (u4_nnz_cnt = 0, coeff_cnt = 1, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) 892 { 893 if (pi2_res_mb_ac[pu1_scan_order[coeff_cnt]]) 894 { 895 /* write residue */ 896 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_ac[pu1_scan_order[coeff_cnt]]; 897 u4_s_map |= mask; 898 } 899 mask <<= 1; 900 } 901 /* write significant coeff map */ 902 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); 903 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); 904 *u1_cbp_l = 15; 905 906 u4_cntrl |= (1 << (31 - u1_scan_order[b4])); 907 } 908 else 909 { 910 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; 911 } 912 913 } 914 915 if (!(*u1_cbp_l)) 916 { 917 (*pv_mb_coeff_data) = ps_mb_coeff_data_ac; 918 } 919 920 /* Store the cntrl signal */ 921 (*pu4_cntrl) = u4_cntrl; 922 return; 923 } 924 925 /** 926 ****************************************************************************** 927 * 928 * @brief This function packs residue of an p16x16 luma mb for entropy coding 929 * 930 * @par Description 931 * A p16x16 macro block contains two classes of units 16 4x4 ac blocks. 932 * while packing the mb, the dc block is sent first, and 933 * the 16 ac blocks are sent next in scan order. Each and every block is 934 * represented by 3 parameters (nnz, significant coefficient map and the 935 * residue coefficients itself). If a 4x4 unit does not have any coefficients 936 * then only nnz is sent. Inside a 4x4 block the individual coefficients are 937 * sent in scan order. 938 * 939 * The first byte of each block will be nnz of the block, if it is non zero, 940 * a 2 byte significance map is sent. This is followed by nonzero coefficients. 941 * This is repeated for 1 dc + 16 ac blocks. 942 * 943 * @param[in] pi2_res_mb 944 * pointer to residue mb 945 * 946 * @param[in, out] pv_mb_coeff_data 947 * buffer pointing to packed residue coefficients 948 * 949 * @param[in] i4_res_strd 950 * residual block stride 951 * 952 * @param[out] u1_cbp_l 953 * coded block pattern luma 954 * 955 * @param[in] pu1_nnz 956 * number of non zero coefficients in each 4x4 unit 957 * 958 * @param[out] pu4_cntrl 959 * Control signal for inverse transform 960 * 961 * @return none 962 * 963 * @remarks Killing coffs not yet coded 964 * 965 ****************************************************************************** 966 */ 967 void ih264e_pack_l_mb(WORD16 *pi2_res_mb, 968 void **pv_mb_coeff_data, 969 WORD32 i4_res_strd, 970 UWORD8 *u1_cbp_l, 971 UWORD8 *pu1_nnz, 972 UWORD32 u4_thres_resi, 973 UWORD32 *pu4_cntrl) 974 { 975 /* pointer to packed sub block buffer space */ 976 tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8, *ps_mb_coeff_data_mb; 977 978 /* no of non zero coefficients in the current sub block */ 979 UWORD32 u4_nnz_cnt; 980 981 /* significant coefficient map */ 982 UWORD32 u4_s_map; 983 984 /* pointer to scanning matrix */ 985 const UWORD8 *pu1_scan_order = gu1_luma_scan_order; 986 987 /* number of non zeros in sub block */ 988 UWORD32 u4_nnz; 989 990 /* pointer to residual sub block */ 991 WORD16 *pi2_res_sb; 992 993 /* coeff scan order */ 994 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; 995 996 /* coeff cost */ 997 const UWORD8 *pu1_coeff_cost = gu1_coeff_cost; 998 999 /* temp var */ 1000 UWORD32 u4_mb_coeff_cost = 0, u4_b8_coeff_cost = 0, coeff_cnt, mask, u4_cntrl = 0, b4, b8; 1001 1002 /* temp var */ 1003 WORD32 i4_res_val, i4_run = -1, dcac_block; 1004 1005 /* When Hadamard transform is disabled, first row values are dont care, ignore them */ 1006 pi2_res_mb += i4_res_strd; 1007 1008 /* When Hadamard transform is disabled, first unit value is dont care, ignore this */ 1009 pu1_nnz ++; 1010 1011 ps_mb_coeff_data_mb = ps_mb_coeff_data_b8 = (*pv_mb_coeff_data); 1012 1013 /********************************************************/ 1014 /* pack coeff data for entropy coding */ 1015 /********************************************************/ 1016 1017 for (b4 = 0; b4 < 16; b4++) 1018 { 1019 ps_mb_coeff_data = (*pv_mb_coeff_data); 1020 1021 b8 = b4 >> 2; 1022 1023 u4_nnz = pu1_nnz[u1_scan_order[b4]]; 1024 1025 /* Jump according to the scan order */ 1026 pi2_res_sb = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]); 1027 1028 /* write number of non zero coefficients */ 1029 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; 1030 1031 if (u4_nnz) 1032 { 1033 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) 1034 { 1035 /* number of runs of zero before, this is used to compute coeff cost */ 1036 i4_run++; 1037 1038 i4_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]]; 1039 1040 if (i4_res_val) 1041 { 1042 /* write residue */ 1043 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i4_res_val; 1044 u4_s_map |= mask; 1045 1046 if (u4_thres_resi) 1047 { 1048 /* compute coeff cost */ 1049 if (i4_res_val == 1 || i4_res_val == -1) 1050 { 1051 if (i4_run < 6) 1052 u4_b8_coeff_cost += pu1_coeff_cost[i4_run]; 1053 } 1054 else 1055 u4_b8_coeff_cost += 9; 1056 1057 i4_run = -1; 1058 } 1059 } 1060 1061 mask <<= 1; 1062 } 1063 1064 /* write significant coeff map */ 1065 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); 1066 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); 1067 1068 /* cbp */ 1069 *u1_cbp_l |= (1 << b8); 1070 1071 /* Cntrl map for inverse transform computation 1072 * 1073 * If coeff_cnt is zero, it means that only nonzero was a dc coeff 1074 * Hence we have to set the 16 - u1_scan_order[b4]) position instead 1075 * of 31 - u1_scan_order[b4] 1076 */ 1077 dcac_block = (coeff_cnt == 0)?16:31; 1078 u4_cntrl |= (1 << (dcac_block - u1_scan_order[b4])); 1079 } 1080 else 1081 { 1082 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; 1083 } 1084 1085 /* Decide if the 8x8 unit has to be sent for entropy coding? */ 1086 if ((b4+1) % 4 == 0) 1087 { 1088 if ( u4_thres_resi && (u4_b8_coeff_cost <= LUMA_SUB_BLOCK_SKIP_THRESHOLD) && 1089 (*u1_cbp_l & (1 << b8)) ) 1090 { 1091 1092 1093 /* 1094 * When we want to reset the full 8x8 block, we have to reset 1095 * both the dc and ac coeff bits hence we have the symmetric 1096 * arrangement of bits 1097 */ 1098 const UWORD32 cntrl_mask_map[4] = {0xcc00cc00, 0x33003300, 0x00cc00cc, 0x00330033}; 1099 1100 /* restore cbp */ 1101 *u1_cbp_l = (*u1_cbp_l & (~(1 << b8))); 1102 1103 /* correct cntrl flag */ 1104 u4_cntrl = u4_cntrl & (~cntrl_mask_map[(b4 >> 2)]); 1105 1106 /* correct nnz */ 1107 pu1_nnz[u1_scan_order[b4 - 3]] = 0; 1108 pu1_nnz[u1_scan_order[b4 - 2]] = 0; 1109 pu1_nnz[u1_scan_order[b4 - 1]] = 0; 1110 pu1_nnz[u1_scan_order[b4]] = 0; 1111 1112 /* reset blk cost */ 1113 u4_b8_coeff_cost = 0; 1114 } 1115 1116 if (!(*u1_cbp_l & (1 << b8))) 1117 { 1118 (*pv_mb_coeff_data) = ps_mb_coeff_data_b8; 1119 } 1120 1121 u4_mb_coeff_cost += u4_b8_coeff_cost; 1122 1123 u4_b8_coeff_cost = 0; 1124 i4_run = -1; 1125 ps_mb_coeff_data_b8 = (*pv_mb_coeff_data); 1126 } 1127 } 1128 1129 if (u4_thres_resi && (u4_mb_coeff_cost <= LUMA_BLOCK_SKIP_THRESHOLD) 1130 && (*u1_cbp_l)) 1131 { 1132 (*pv_mb_coeff_data) = ps_mb_coeff_data_mb; 1133 *u1_cbp_l = 0; 1134 u4_cntrl = 0; 1135 memset(pu1_nnz, 0, 16); 1136 } 1137 1138 (*pu4_cntrl) = u4_cntrl; 1139 1140 return; 1141 } 1142 1143 /** 1144 ****************************************************************************** 1145 * 1146 * @brief This function packs residue of an i8x8 chroma mb for entropy coding 1147 * 1148 * @par Description 1149 * An i8 chroma macro block contains two classes of units, dc 2x2 block and 1150 * 4x4 ac blocks. while packing the mb, the dc block is sent first, and 1151 * the 4 ac blocks are sent next in scan order. Each and every block is 1152 * represented by 3 parameters (nnz, significant coefficient map and the 1153 * residue coefficients itself). If a 4x4 unit does not have any coefficients 1154 * then only nnz is sent. Inside a 4x4 block the individual coefficients are 1155 * sent in scan order. 1156 * 1157 * The first byte of each block will be nnz of the block, if it is non zero, 1158 * a 2 byte significance map is sent. This is followed by nonzero coefficients. 1159 * This is repeated for 1 dc + 4 ac blocks. 1160 * 1161 * @param[in] pi2_res_mb 1162 * pointer to residue mb 1163 * 1164 * @param[in, out] pv_mb_coeff_data 1165 * buffer pointing to packed residue coefficients 1166 * 1167 * @param[in] u4_res_strd 1168 * residual block stride 1169 * 1170 * @param[out] u1_cbp_c 1171 * coded block pattern chroma 1172 * 1173 * @param[in] pu1_nnz 1174 * number of non zero coefficients in each 4x4 unit 1175 * 1176 * @param[out] pu1_nnz 1177 * Control signal for inverse transform 1178 * 1179 * @param[in] u4_swap_uv 1180 * Swaps the order of U and V planes in entropy bitstream 1181 * 1182 * @return none 1183 * 1184 * @ remarks 1185 * 1186 ****************************************************************************** 1187 */ 1188 void ih264e_pack_c_mb(WORD16 *pi2_res_mb, 1189 void **pv_mb_coeff_data, 1190 WORD32 i4_res_strd, 1191 UWORD8 *u1_cbp_c, 1192 UWORD8 *pu1_nnz, 1193 UWORD32 u4_thres_resi, 1194 UWORD32 *pu4_cntrl, 1195 UWORD32 u4_swap_uv) 1196 { 1197 /* pointer to packed sub block buffer space */ 1198 tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data); 1199 tu_sblk_coeff_data_t *ps_mb_coeff_data_dc, *ps_mb_coeff_data_ac; 1200 1201 /* nnz pointer */ 1202 UWORD8 *pu1_nnz_ac, *pu1_nnz_dc; 1203 1204 /* nnz counter */ 1205 UWORD32 u4_nnz_cnt; 1206 1207 /* significant coefficient map */ 1208 UWORD32 u4_s_map; 1209 1210 /* pointer to scanning matrix */ 1211 const UWORD8 *pu1_scan_order; 1212 1213 /* no of non zero coefficients in the current sub block */ 1214 UWORD32 u4_nnz; 1215 1216 /* pointer to residual sub block, res val */ 1217 WORD16 *pi2_res_sb, i2_res_val; 1218 1219 /* temp var */ 1220 UWORD32 coeff_cnt, mask, b4,plane; 1221 1222 /* temp var */ 1223 UWORD32 u4_coeff_cost; 1224 WORD32 i4_run; 1225 1226 /* coeff cost */ 1227 const UWORD8 *pu1_coeff_cost = gu1_coeff_cost; 1228 1229 /* pointer to packed buffer space */ 1230 UWORD32 *pu4_mb_coeff_data = NULL; 1231 1232 /* ac coded block pattern */ 1233 UWORD8 u1_cbp_ac; 1234 1235 /* Variable to store the current bit pos in cntrl variable*/ 1236 UWORD32 cntrl_pos = 0; 1237 1238 /********************************************************/ 1239 /* pack dc coeff data for entropy coding */ 1240 /********************************************************/ 1241 pu1_scan_order = gu1_chroma_scan_order_dc; 1242 pi2_res_sb = pi2_res_mb; 1243 pu1_nnz_dc = pu1_nnz; 1244 (*pu4_cntrl) = 0; 1245 cntrl_pos = 15; 1246 ps_mb_coeff_data_dc = (*pv_mb_coeff_data); 1247 1248 /* Color space conversion between SP_UV and SP_VU 1249 * We always assume SP_UV for all the processing 1250 * Hence to get proper stream output we need to swap U and V channels here 1251 * 1252 * For that there are two paths we need to look for 1253 * One is the path to bitstream , these variables should have the proper input 1254 * configured UV or VU 1255 * For the other path the inverse transform variables should have what ever ordering the 1256 * input had 1257 */ 1258 1259 if (u4_swap_uv) 1260 { 1261 pu1_nnz_dc += 5;/* Move to NNZ of V planve */ 1262 pi2_res_sb += 4;/* Move to DC coff of V plane */ 1263 1264 cntrl_pos = 14; /* Control bit for V plane */ 1265 } 1266 1267 for (plane = 0; plane < 2; plane++) 1268 { 1269 ps_mb_coeff_data = (*pv_mb_coeff_data); 1270 1271 u4_nnz = *pu1_nnz_dc; 1272 /* write number of non zero coefficients U/V */ 1273 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; 1274 1275 if (u4_nnz) 1276 { 1277 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) 1278 { 1279 i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]]; 1280 if (i2_res_val) 1281 { 1282 /* write residue U/V */ 1283 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val; 1284 u4_s_map |= mask; 1285 } 1286 mask <<= 1; 1287 } 1288 /* write significant coeff map U/V */ 1289 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); 1290 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); 1291 *u1_cbp_c = 1; 1292 1293 (*pu4_cntrl) |= (1 << cntrl_pos); 1294 } 1295 else 1296 { 1297 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; 1298 } 1299 1300 if (u4_swap_uv) 1301 { 1302 cntrl_pos++; /* Control bit for U plane */ 1303 pu1_nnz_dc -= 5; /* Move to NNZ of U plane */ 1304 pi2_res_sb -= 4; /* Move to DC coff of U plane */ 1305 1306 } 1307 else 1308 { 1309 cntrl_pos--; /* Control bit for U plane */ 1310 pu1_nnz_dc += 5; /* 4 for AC NNZ and 1 for DC */ 1311 pi2_res_sb += 4; /* Move to DC coff of V plane */ 1312 } 1313 } 1314 1315 /********************************************************/ 1316 /* pack ac coeff data for entropy coding */ 1317 /********************************************************/ 1318 1319 pu1_scan_order = gu1_chroma_scan_order; 1320 ps_mb_coeff_data_ac = (*pv_mb_coeff_data); 1321 1322 if (u4_swap_uv) 1323 { 1324 pi2_res_sb = pi2_res_mb + i4_res_strd * 5; /* Move to V plane ,ie 1dc row+ 4 ac row */ 1325 cntrl_pos = 27; /* The control bits are to be added for V bloc ie 31-4 th bit */ 1326 pu1_nnz_ac = pu1_nnz + 6;/*Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */ 1327 } 1328 else 1329 { 1330 pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to U plane ,ie 1dc row */ 1331 cntrl_pos = 31; 1332 pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc */ 1333 } 1334 1335 for (plane = 0; plane < 2; plane++) 1336 { 1337 pu4_mb_coeff_data = (*pv_mb_coeff_data); 1338 1339 u4_coeff_cost = 0; 1340 i4_run = -1; 1341 1342 /* get the current cbp, so that it automatically 1343 * gets reverted in case of zero ac values */ 1344 u1_cbp_ac = *u1_cbp_c; 1345 1346 for (b4 = 0; b4 < 4; b4++) 1347 { 1348 ps_mb_coeff_data = (*pv_mb_coeff_data); 1349 1350 u4_nnz = *pu1_nnz_ac; 1351 1352 /* 1353 * We are scanning only ac coeffs, but the nnz is for the 1354 * complete 4x4 block. Hence we have to discount the nnz contributed 1355 * by the dc coefficient 1356 */ 1357 u4_nnz -= (pi2_res_sb[0]!=0); 1358 1359 /* write number of non zero coefficients U/V */ 1360 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; 1361 1362 if (u4_nnz) 1363 { 1364 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) 1365 { 1366 i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]]; 1367 1368 i4_run++; 1369 1370 if (i2_res_val) 1371 { 1372 /* write residue U/V */ 1373 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val; 1374 u4_s_map |= mask; 1375 1376 if ( u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD) ) 1377 { 1378 /* compute coeff cost */ 1379 if (i2_res_val == 1 || i2_res_val == -1) 1380 { 1381 if (i4_run < 6) 1382 u4_coeff_cost += pu1_coeff_cost[i4_run]; 1383 } 1384 else 1385 u4_coeff_cost += 9; 1386 1387 i4_run = -1; 1388 } 1389 } 1390 mask <<= 1; 1391 } 1392 1393 /* write significant coeff map U/V */ 1394 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); 1395 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); 1396 u1_cbp_ac = 2; 1397 1398 (*pu4_cntrl) |= 1 << cntrl_pos; 1399 } 1400 else 1401 { 1402 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; 1403 } 1404 1405 pu1_nnz_ac++; 1406 pi2_res_sb += i4_res_strd; 1407 cntrl_pos--; 1408 } 1409 1410 /* reset block */ 1411 if (u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD)) 1412 { 1413 pu4_mb_coeff_data[0] = 0; 1414 pu4_mb_coeff_data[1] = 0; 1415 pu4_mb_coeff_data[2] = 0; 1416 pu4_mb_coeff_data[3] = 0; 1417 (*pv_mb_coeff_data) = pu4_mb_coeff_data + 4; 1418 1419 /* Generate the control signal */ 1420 /* Zero out the current plane's AC coefficients */ 1421 (*pu4_cntrl) &= ((plane == u4_swap_uv) ? 0x0FFFFFFF : 0xF0FFFFFF); 1422 1423 /* Similarly do for the NNZ also */ 1424 *(pu1_nnz_ac - 4) = 0; 1425 *(pu1_nnz_ac - 3) = 0; 1426 *(pu1_nnz_ac - 2) = 0; 1427 *(pu1_nnz_ac - 1) = 0; 1428 } 1429 else 1430 { 1431 *u1_cbp_c = u1_cbp_ac; 1432 } 1433 1434 if (u4_swap_uv) 1435 { 1436 pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to V plane ,ie 1dc row+ 4 ac row + 1 dc row */ 1437 cntrl_pos = 31; /* The control bits are to be added for V bloc ie 31-4 th bit */ 1438 pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */ 1439 1440 pu1_nnz_ac = pu1_nnz + 1; 1441 } 1442 else 1443 pu1_nnz_ac = pu1_nnz + 6; /* Go to nnz of V plane */ 1444 } 1445 1446 /* restore the ptr basing on cbp */ 1447 if (*u1_cbp_c == 0) 1448 { 1449 (*pv_mb_coeff_data) = ps_mb_coeff_data_dc; 1450 } 1451 else if (*u1_cbp_c == 1) 1452 { 1453 (*pv_mb_coeff_data) = ps_mb_coeff_data_ac; 1454 } 1455 1456 return ; 1457 } 1458 1459 /** 1460 ******************************************************************************* 1461 * 1462 * @brief performs luma core coding when intra mode is i16x16 1463 * 1464 * @par Description: 1465 * If the current mb is to be coded as intra of mb type i16x16, the mb is first 1466 * predicted using one of i16x16 prediction filters, basing on the intra mode 1467 * chosen. Then, error is computed between the input blk and the estimated blk. 1468 * This error is transformed (hierarchical transform i.e., dct followed by hada- 1469 * -mard), quantized. The quantized coefficients are packed in scan order for 1470 * entropy coding. 1471 * 1472 * @param[in] ps_proc_ctxt 1473 * pointer to the current macro block context 1474 * 1475 * @returns u1_cbp_l 1476 * coded block pattern luma 1477 * 1478 * @remarks none 1479 * 1480 ******************************************************************************* 1481 */ 1482 1483 UWORD8 ih264e_code_luma_intra_macroblock_16x16(process_ctxt_t *ps_proc) 1484 { 1485 /* Codec Context */ 1486 codec_t *ps_codec = ps_proc->ps_codec; 1487 1488 /* pointer to ref macro block */ 1489 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma; 1490 1491 /* pointer to src macro block */ 1492 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma; 1493 1494 /* pointer to prediction macro block */ 1495 UWORD8 *pu1_pred_mb = NULL; 1496 1497 /* pointer to residual macro block */ 1498 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; 1499 1500 /* strides */ 1501 WORD32 i4_src_strd = ps_proc->i4_src_strd; 1502 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 1503 WORD32 i4_pred_strd = ps_proc->i4_pred_strd; 1504 WORD32 i4_res_strd = ps_proc->i4_res_strd; 1505 1506 /* intra mode */ 1507 UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode; 1508 1509 /* coded block pattern */ 1510 UWORD8 u1_cbp_l = 0; 1511 1512 /* number of non zero coeffs*/ 1513 UWORD32 au4_nnz[5]; 1514 UWORD8 *pu1_nnz = (UWORD8 *)au4_nnz; 1515 1516 /*Cntrol signal for itrans*/ 1517 UWORD32 u4_cntrl; 1518 1519 /* quantization parameters */ 1520 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; 1521 1522 /* pointer to packed mb coeff data */ 1523 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); 1524 1525 /* init nnz */ 1526 au4_nnz[0] = 0; 1527 au4_nnz[1] = 0; 1528 au4_nnz[2] = 0; 1529 au4_nnz[3] = 0; 1530 au4_nnz[4] = 0; 1531 1532 if (u1_intra_mode == PLANE_I16x16) 1533 { 1534 pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16_plane; 1535 } 1536 else 1537 { 1538 pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16; 1539 } 1540 1541 /********************************************************/ 1542 /* error estimation, */ 1543 /* transform */ 1544 /* quantization */ 1545 /********************************************************/ 1546 ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb, 1547 pu1_pred_mb, pi2_res_mb, 1548 i4_src_strd, i4_pred_strd, 1549 i4_res_strd, 1550 ps_qp_params->pu2_scale_mat, 1551 ps_qp_params->pu2_thres_mat, 1552 ps_qp_params->u1_qbits, 1553 ps_qp_params->u4_dead_zone, 1554 pu1_nnz, ENABLE_DC_TRANSFORM); 1555 1556 /********************************************************/ 1557 /* pack coeff data for entropy coding */ 1558 /********************************************************/ 1559 ih264e_pack_l_mb_i16(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l, 1560 pu1_nnz, &u4_cntrl); 1561 1562 /********************************************************/ 1563 /* ierror estimation, */ 1564 /* itransform */ 1565 /* iquantization */ 1566 /********************************************************/ 1567 /* 1568 *if refernce frame is not to be computed 1569 *we only need the right and bottom border 4x4 blocks to predict next intra 1570 *blocks, hence only compute them 1571 */ 1572 if (!ps_proc->u4_compute_recon) 1573 { 1574 u4_cntrl &= 0x111F8000; 1575 } 1576 1577 if (u4_cntrl) 1578 { 1579 ih264e_luma_16x16_idctrans_iquant_itrans_recon( 1580 ps_codec, pi2_res_mb, pu1_pred_mb, pu1_ref_mb, 1581 i4_res_strd, i4_pred_strd, i4_rec_strd, 1582 ps_qp_params->pu2_iscale_mat, 1583 ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div, 1584 u4_cntrl, ENABLE_DC_TRANSFORM, 1585 ps_proc->pv_scratch_buff); 1586 } 1587 else 1588 { 1589 ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb, i4_pred_strd, 1590 i4_rec_strd, MB_SIZE, MB_SIZE, NULL, 1591 0); 1592 } 1593 1594 return (u1_cbp_l); 1595 } 1596 1597 1598 /** 1599 ******************************************************************************* 1600 * 1601 * @brief performs luma core coding when intra mode is i4x4 1602 * 1603 * @par Description: 1604 * If the current mb is to be coded as intra of mb type i4x4, the mb is first 1605 * predicted using one of i4x4 prediction filters, basing on the intra mode 1606 * chosen. Then, error is computed between the input blk and the estimated blk. 1607 * This error is dct transformed and quantized. The quantized coefficients are 1608 * packed in scan order for entropy coding. 1609 * 1610 * @param[in] ps_proc_ctxt 1611 * pointer to the current macro block context 1612 * 1613 * @returns u1_cbp_l 1614 * coded block pattern luma 1615 * 1616 * @remarks 1617 * The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order 1618 * mentioned in h.264 specification 1619 * 1620 ******************************************************************************* 1621 */ 1622 UWORD8 ih264e_code_luma_intra_macroblock_4x4(process_ctxt_t *ps_proc) 1623 { 1624 /* Codec Context */ 1625 codec_t *ps_codec = ps_proc->ps_codec; 1626 1627 /* pointer to ref macro block */ 1628 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma; 1629 1630 /* pointer to src macro block */ 1631 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma; 1632 1633 /* pointer to prediction macro block */ 1634 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; 1635 1636 /* pointer to residual macro block */ 1637 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; 1638 1639 /* strides */ 1640 WORD32 i4_src_strd = ps_proc->i4_src_strd; 1641 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 1642 WORD32 i4_pred_strd = ps_proc->i4_pred_strd; 1643 1644 /* pointer to neighbors: left, top, top-left */ 1645 UWORD8 *pu1_mb_a; 1646 UWORD8 *pu1_mb_b; 1647 UWORD8 *pu1_mb_c; 1648 UWORD8 *pu1_mb_d; 1649 1650 /* intra mode */ 1651 UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode; 1652 1653 /* neighbor availability */ 1654 WORD32 i4_ngbr_avbl; 1655 1656 /* neighbor pels for intra prediction */ 1657 UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels; 1658 1659 /* coded block pattern */ 1660 UWORD8 u1_cbp_l = 0; 1661 1662 /* number of non zero coeffs*/ 1663 UWORD8 u1_nnz; 1664 1665 /* quantization parameters */ 1666 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; 1667 1668 /* pointer to packed mb coeff data */ 1669 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); 1670 1671 /* pointer to packed mb coeff data */ 1672 tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8; 1673 1674 /* no of non zero coefficients in the current sub block */ 1675 UWORD32 u4_nnz_cnt; 1676 1677 /* significant coefficient map */ 1678 UWORD32 u4_s_map; 1679 1680 /* pointer to scanning matrix */ 1681 const UWORD8 *pu1_scan_order = gu1_luma_scan_order; 1682 1683 /*Dummy variable for 4x4 trans fucntion*/ 1684 WORD16 i2_dc_dummy; 1685 1686 /* temp var */ 1687 UWORD32 i, b8, b4, u1_blk_x, u1_blk_y, u1_pix_x, u1_pix_y, coeff_cnt, mask; 1688 1689 /* Process 16 4x4 lum sub-blocks of the MB in scan order */ 1690 for (b8 = 0; b8 < 4; b8++) 1691 { 1692 u1_blk_x = GET_BLK_RASTER_POS_X(b8) << 3; 1693 u1_blk_y = GET_BLK_RASTER_POS_Y(b8) << 3; 1694 1695 /* if in case cbp for the 8x8 block is zero, send no residue */ 1696 ps_mb_coeff_data_b8 = *pv_mb_coeff_data; 1697 1698 for (b4 = 0; b4 < 4; b4++) 1699 { 1700 /* index of pel in MB */ 1701 u1_pix_x = u1_blk_x + (GET_SUB_BLK_RASTER_POS_X(b4) << 2); 1702 u1_pix_y = u1_blk_y + (GET_SUB_BLK_RASTER_POS_Y(b4) << 2); 1703 1704 /* Initialize source and reference pointers */ 1705 pu1_curr_mb = ps_proc->pu1_src_buf_luma + u1_pix_x + (u1_pix_y * i4_src_strd); 1706 pu1_ref_mb = ps_proc->pu1_rec_buf_luma + u1_pix_x + (u1_pix_y * i4_rec_strd); 1707 1708 /* pointer to left of ref macro block */ 1709 pu1_mb_a = pu1_ref_mb - 1; 1710 /* pointer to top of ref macro block */ 1711 pu1_mb_b = pu1_ref_mb - i4_rec_strd; 1712 /* pointer to topright of ref macro block */ 1713 pu1_mb_c = pu1_mb_b + 4; 1714 /* pointer to topleft macro block */ 1715 pu1_mb_d = pu1_mb_b - 1; 1716 1717 /* compute neighbor availability */ 1718 i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4]; 1719 1720 /* sub block intra mode */ 1721 u1_intra_mode = ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4]; 1722 1723 /********************************************************/ 1724 /* gather prediction pels from neighbors for prediction */ 1725 /********************************************************/ 1726 /* left pels */ 1727 if (i4_ngbr_avbl & LEFT_MB_AVAILABLE_MASK) 1728 { 1729 for (i = 0; i < 4; i++) 1730 pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_rec_strd]; 1731 } 1732 else 1733 { 1734 memset(pu1_ngbr_pels_i4, 0, 4); 1735 } 1736 1737 /* top pels */ 1738 if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK) 1739 { 1740 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4); 1741 } 1742 else 1743 { 1744 memset(pu1_ngbr_pels_i4 + 5, 0, 4); 1745 } 1746 /* top left pels */ 1747 if (i4_ngbr_avbl & TOP_LEFT_MB_AVAILABLE_MASK) 1748 { 1749 pu1_ngbr_pels_i4[4] = *pu1_mb_d; 1750 } 1751 else 1752 { 1753 pu1_ngbr_pels_i4[4] = 0; 1754 } 1755 /* top right pels */ 1756 if (i4_ngbr_avbl & TOP_RIGHT_MB_AVAILABLE_MASK) 1757 { 1758 memcpy(pu1_ngbr_pels_i4+8+1,pu1_mb_c,4); 1759 } 1760 else if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK) 1761 { 1762 memset(pu1_ngbr_pels_i4+8+1,pu1_ngbr_pels_i4[8],4); 1763 } 1764 1765 /********************************************************/ 1766 /* prediction */ 1767 /********************************************************/ 1768 (ps_codec->apf_intra_pred_4_l)[u1_intra_mode](pu1_ngbr_pels_i4, 1769 pu1_pred_mb, 0, 1770 i4_pred_strd, 1771 i4_ngbr_avbl); 1772 1773 /********************************************************/ 1774 /* error estimation, */ 1775 /* transform */ 1776 /* quantization */ 1777 /********************************************************/ 1778 ps_codec->pf_resi_trans_quant_4x4(pu1_curr_mb, pu1_pred_mb, 1779 pi2_res_mb, i4_src_strd, 1780 i4_pred_strd, 1781 ps_qp_params->pu2_scale_mat, 1782 ps_qp_params->pu2_thres_mat, 1783 ps_qp_params->u1_qbits, 1784 ps_qp_params->u4_dead_zone, 1785 &u1_nnz, &i2_dc_dummy); 1786 1787 /********************************************************/ 1788 /* pack coeff data for entropy coding */ 1789 /********************************************************/ 1790 ps_mb_coeff_data = *pv_mb_coeff_data; 1791 1792 /* write number of non zero coefficients */ 1793 ps_mb_coeff_data->i4_sig_map_nnz = u1_nnz; 1794 1795 if (u1_nnz) 1796 { 1797 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u1_nnz; coeff_cnt++) 1798 { 1799 if (pi2_res_mb[pu1_scan_order[coeff_cnt]]) 1800 { 1801 /* write residue */ 1802 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]]; 1803 u4_s_map |= mask; 1804 } 1805 mask <<= 1; 1806 } 1807 /* write significant coeff map */ 1808 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); 1809 1810 /* update ptr to coeff data */ 1811 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); 1812 1813 /* cbp */ 1814 u1_cbp_l |= (1 << b8); 1815 } 1816 else 1817 { 1818 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; 1819 } 1820 1821 /********************************************************/ 1822 /* ierror estimation, */ 1823 /* itransform */ 1824 /* iquantization */ 1825 /********************************************************/ 1826 if (u1_nnz) 1827 ps_codec->pf_iquant_itrans_recon_4x4( 1828 pi2_res_mb, pu1_pred_mb, pu1_ref_mb, 1829 /*No input stride,*/i4_pred_strd, 1830 i4_rec_strd, ps_qp_params->pu2_iscale_mat, 1831 ps_qp_params->pu2_weigh_mat, 1832 ps_qp_params->u1_qp_div, 1833 ps_proc->pv_scratch_buff, 0, 0); 1834 else 1835 ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb, 1836 i4_pred_strd, i4_rec_strd, 1837 BLK_SIZE, BLK_SIZE, NULL, 1838 0); 1839 1840 } 1841 1842 /* if the 8x8 block has no residue, nothing needs to be sent to entropy */ 1843 if (!(u1_cbp_l & (1 << b8))) 1844 { 1845 *pv_mb_coeff_data = ps_mb_coeff_data_b8; 1846 } 1847 } 1848 1849 return (u1_cbp_l); 1850 } 1851 1852 /** 1853 ******************************************************************************* 1854 * 1855 * @brief performs luma core coding when intra mode is i4x4 1856 * 1857 * @par Description: 1858 * If the current mb is to be coded as intra of mb type i4x4, the mb is first 1859 * predicted using one of i4x4 prediction filters, basing on the intra mode 1860 * chosen. Then, error is computed between the input blk and the estimated blk. 1861 * This error is dct transformed and quantized. The quantized coefficients are 1862 * packed in scan order for entropy coding. 1863 * 1864 * @param[in] ps_proc_ctxt 1865 * pointer to the current macro block context 1866 * 1867 * @returns u1_cbp_l 1868 * coded block pattern luma 1869 * 1870 * @remarks 1871 * The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order 1872 * mentioned in h.264 specification 1873 * 1874 ******************************************************************************* 1875 */ 1876 UWORD8 ih264e_code_luma_intra_macroblock_4x4_rdopt_on(process_ctxt_t *ps_proc) 1877 { 1878 /* Codec Context */ 1879 codec_t *ps_codec = ps_proc->ps_codec; 1880 1881 /* pointer to ref macro block */ 1882 UWORD8 *pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4; 1883 1884 /* pointer to recon buffer */ 1885 UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma; 1886 1887 /* pointer to residual macro block */ 1888 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4; 1889 1890 /* strides */ 1891 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 1892 1893 /* number of non zero coeffs*/ 1894 UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4; 1895 1896 /* coded block pattern */ 1897 UWORD8 u1_cbp_l = 0; 1898 1899 /* pointer to packed mb coeff data */ 1900 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); 1901 1902 /* pointer to packed mb coeff data */ 1903 tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8; 1904 1905 /* no of non zero coefficients in the current sub block */ 1906 UWORD32 u4_nnz_cnt; 1907 1908 /* significant coefficient map */ 1909 UWORD32 u4_s_map; 1910 1911 /* pointer to scanning matrix */ 1912 const UWORD8 *pu1_scan_order = gu1_luma_scan_order; 1913 1914 /* temp var */ 1915 UWORD32 b8, b4, coeff_cnt, mask; 1916 1917 /* Process 16 4x4 lum sub-blocks of the MB in scan order */ 1918 for (b8 = 0; b8 < 4; b8++) 1919 { 1920 /* if in case cbp for the 8x8 block is zero, send no residue */ 1921 ps_mb_coeff_data_b8 = *pv_mb_coeff_data; 1922 1923 for (b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE) 1924 { 1925 /********************************************************/ 1926 /* pack coeff data for entropy coding */ 1927 /********************************************************/ 1928 ps_mb_coeff_data = *pv_mb_coeff_data; 1929 1930 /* write number of non zero coefficients */ 1931 ps_mb_coeff_data->i4_sig_map_nnz = *pu1_nnz; 1932 1933 if (*pu1_nnz) 1934 { 1935 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < *pu1_nnz; coeff_cnt++) 1936 { 1937 if (pi2_res_mb[pu1_scan_order[coeff_cnt]]) 1938 { 1939 /* write residue */ 1940 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]]; 1941 u4_s_map |= mask; 1942 } 1943 mask <<= 1; 1944 } 1945 /* write significant coeff map */ 1946 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); 1947 1948 /* update ptr to coeff data */ 1949 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); 1950 1951 /* cbp */ 1952 u1_cbp_l |= (1 << b8); 1953 } 1954 else 1955 { 1956 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; 1957 } 1958 } 1959 1960 /* if the 8x8 block has no residue, nothing needs to be sent to entropy */ 1961 if (!(u1_cbp_l & (1 << b8))) 1962 { 1963 *pv_mb_coeff_data = ps_mb_coeff_data_b8; 1964 } 1965 } 1966 1967 /* memcpy recon */ 1968 ps_codec->pf_inter_pred_luma_copy(pu1_ref_mb_intra_4x4, pu1_rec_mb, MB_SIZE, i4_rec_strd, MB_SIZE, MB_SIZE, NULL, 0); 1969 1970 return (u1_cbp_l); 1971 } 1972 1973 1974 /** 1975 ******************************************************************************* 1976 * 1977 * @brief performs chroma core coding for intra macro blocks 1978 * 1979 * @par Description: 1980 * If the current MB is to be intra coded with mb type chroma I8x8, the MB is 1981 * first predicted using intra 8x8 prediction filters. The predicted data is 1982 * compared with the input for error and the error is transformed. The DC 1983 * coefficients of each transformed sub blocks are further transformed using 1984 * Hadamard transform. The resulting coefficients are quantized, packed and sent 1985 * for entropy coding. 1986 * 1987 * @param[in] ps_proc_ctxt 1988 * pointer to the current macro block context 1989 * 1990 * @returns u1_cbp_c 1991 * coded block pattern chroma 1992 * 1993 * @remarks 1994 * The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order 1995 * mentioned in h.264 specification 1996 * 1997 ******************************************************************************* 1998 */ 1999 UWORD8 ih264e_code_chroma_intra_macroblock_8x8(process_ctxt_t *ps_proc) 2000 { 2001 /* Codec Context */ 2002 codec_t *ps_codec = ps_proc->ps_codec; 2003 2004 /* pointer to ref macro block */ 2005 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma; 2006 2007 /* pointer to src macro block */ 2008 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma; 2009 2010 /* pointer to prediction macro block */ 2011 UWORD8 *pu1_pred_mb = NULL; 2012 2013 /* pointer to residual macro block */ 2014 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; 2015 2016 /* strides */ 2017 WORD32 i4_src_strd = ps_proc->i4_src_chroma_strd; 2018 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 2019 WORD32 i4_pred_strd = ps_proc->i4_pred_strd; 2020 WORD32 i4_res_strd = ps_proc->i4_res_strd; 2021 2022 /* intra mode */ 2023 UWORD8 u1_intra_mode = ps_proc->u1_c_i8_mode; 2024 2025 /* coded block pattern */ 2026 UWORD8 u1_cbp_c = 0; 2027 2028 /* number of non zero coeffs*/ 2029 UWORD8 au1_nnz[18] = {0}; 2030 2031 /* quantization parameters */ 2032 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1]; 2033 2034 /* Control signal for inverse transform */ 2035 UWORD32 u4_cntrl; 2036 2037 /* pointer to packed mb coeff data */ 2038 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); 2039 2040 /* See if we need to swap U and V plances for entropy */ 2041 UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU; 2042 2043 if (PLANE_CH_I8x8 == u1_intra_mode) 2044 { 2045 pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma_plane; 2046 } 2047 else 2048 { 2049 pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma; 2050 } 2051 2052 /********************************************************/ 2053 /* error estimation, */ 2054 /* transform */ 2055 /* quantization */ 2056 /********************************************************/ 2057 ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb, 2058 pu1_pred_mb, pi2_res_mb, 2059 i4_src_strd, i4_pred_strd, 2060 i4_res_strd, 2061 ps_qp_params->pu2_scale_mat, 2062 ps_qp_params->pu2_thres_mat, 2063 ps_qp_params->u1_qbits, 2064 ps_qp_params->u4_dead_zone, 2065 au1_nnz); 2066 2067 /********************************************************/ 2068 /* pack coeff data for entropy coding */ 2069 /********************************************************/ 2070 ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c, 2071 au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv); 2072 2073 /********************************************************/ 2074 /* ierror estimation, */ 2075 /* itransform */ 2076 /* iquantization */ 2077 /********************************************************/ 2078 ih264e_chroma_8x8_idctrans_iquant_itrans_recon(ps_codec, pi2_res_mb, 2079 pu1_pred_mb, pu1_ref_mb, 2080 i4_res_strd, i4_pred_strd, 2081 i4_rec_strd, 2082 ps_qp_params->pu2_iscale_mat, 2083 ps_qp_params->pu2_weigh_mat, 2084 ps_qp_params->u1_qp_div, 2085 u4_cntrl, 2086 ps_proc->pv_scratch_buff); 2087 return (u1_cbp_c); 2088 } 2089 2090 2091 /** 2092 ******************************************************************************* 2093 * 2094 * @brief performs luma core coding when mode is inter 2095 * 2096 * @par Description: 2097 * If the current mb is to be coded as inter the mb is predicted based on the 2098 * sub mb partitions and corresponding motion vectors generated by ME. Then, 2099 * error is computed between the input blk and the estimated blk. This error is 2100 * transformed, quantized. The quantized coefficients are packed in scan order 2101 * for entropy coding 2102 * 2103 * @param[in] ps_proc_ctxt 2104 * pointer to the current macro block context 2105 * 2106 * @returns u1_cbp_l 2107 * coded block pattern luma 2108 * 2109 * @remarks none 2110 * 2111 ******************************************************************************* 2112 */ 2113 2114 UWORD8 ih264e_code_luma_inter_macroblock_16x16(process_ctxt_t *ps_proc) 2115 { 2116 /* Codec Context */ 2117 codec_t *ps_codec = ps_proc->ps_codec; 2118 2119 /* pointer to ref macro block */ 2120 UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma; 2121 2122 /* pointer to src macro block */ 2123 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma; 2124 2125 /* pointer to prediction macro block */ 2126 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; 2127 2128 /* pointer to residual macro block */ 2129 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; 2130 2131 /* strides */ 2132 WORD32 i4_src_strd = ps_proc->i4_src_strd; 2133 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 2134 WORD32 i4_pred_strd = ps_proc->i4_pred_strd; 2135 WORD32 i4_res_strd = ps_proc->i4_res_strd; 2136 2137 /* coded block pattern */ 2138 UWORD8 u1_cbp_l = 0; 2139 2140 /*Control signal of itrans*/ 2141 UWORD32 u4_cntrl; 2142 2143 /* number of non zero coeffs*/ 2144 UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz; 2145 2146 /* quantization parameters */ 2147 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; 2148 2149 /* pointer to packed mb coeff data */ 2150 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); 2151 2152 /* pseudo pred buffer */ 2153 UWORD8 *pu1_pseudo_pred = pu1_pred_mb; 2154 2155 /* pseudo pred buffer stride */ 2156 WORD32 i4_pseudo_pred_strd = i4_pred_strd; 2157 2158 /* init nnz */ 2159 ps_proc->au4_nnz[0] = 0; 2160 ps_proc->au4_nnz[1] = 0; 2161 ps_proc->au4_nnz[2] = 0; 2162 ps_proc->au4_nnz[3] = 0; 2163 ps_proc->au4_nnz[4] = 0; 2164 2165 /********************************************************/ 2166 /* prediction */ 2167 /********************************************************/ 2168 ih264e_motion_comp_luma(ps_proc, &pu1_pseudo_pred, &i4_pseudo_pred_strd); 2169 2170 /********************************************************/ 2171 /* error estimation, */ 2172 /* transform */ 2173 /* quantization */ 2174 /********************************************************/ 2175 if (ps_proc->u4_min_sad_reached == 0 || ps_proc->u4_min_sad != 0) 2176 { 2177 ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb, 2178 pu1_pseudo_pred, pi2_res_mb, 2179 i4_src_strd, 2180 i4_pseudo_pred_strd, 2181 i4_res_strd, 2182 ps_qp_params->pu2_scale_mat, 2183 ps_qp_params->pu2_thres_mat, 2184 ps_qp_params->u1_qbits, 2185 ps_qp_params->u4_dead_zone, 2186 pu1_nnz, 2187 DISABLE_DC_TRANSFORM); 2188 2189 /********************************************************/ 2190 /* pack coeff data for entropy coding */ 2191 /********************************************************/ 2192 ih264e_pack_l_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l, 2193 pu1_nnz, ps_codec->u4_thres_resi, &u4_cntrl); 2194 } 2195 else 2196 { 2197 u1_cbp_l = 0; 2198 u4_cntrl = 0; 2199 } 2200 2201 /********************************************************/ 2202 /* ierror estimation, */ 2203 /* itransform */ 2204 /* iquantization */ 2205 /********************************************************/ 2206 2207 /*If the frame is not to be used for P frame reference or dumping recon 2208 * we only will use the reocn for only predicting intra Mbs 2209 * THis will need only right and bottom edge 4x4 blocks recon 2210 * Hence we selectively enable them using control signal(including DC) 2211 */ 2212 if (ps_proc->u4_compute_recon != 1) 2213 { 2214 u4_cntrl &= 0x111F0000; 2215 } 2216 2217 if (u4_cntrl) 2218 { 2219 ih264e_luma_16x16_idctrans_iquant_itrans_recon( 2220 ps_codec, pi2_res_mb, pu1_pseudo_pred, pu1_rec_mb, 2221 i4_res_strd, i4_pseudo_pred_strd, i4_rec_strd, 2222 ps_qp_params->pu2_iscale_mat, 2223 ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div, 2224 u4_cntrl /*Cntrl*/, DISABLE_DC_TRANSFORM, 2225 ps_proc->pv_scratch_buff); 2226 } 2227 else 2228 { 2229 ps_codec->pf_inter_pred_luma_copy(pu1_pseudo_pred, pu1_rec_mb, 2230 i4_pseudo_pred_strd, i4_rec_strd, 2231 MB_SIZE, MB_SIZE, NULL, 0); 2232 } 2233 2234 2235 return (u1_cbp_l); 2236 } 2237 2238 /** 2239 ******************************************************************************* 2240 * 2241 * @brief performs chroma core coding for inter macro blocks 2242 * 2243 * @par Description: 2244 * If the current mb is to be coded as inter predicted mb,based on the sub mb partitions 2245 * and corresponding motion vectors generated by ME ,prediction is done. 2246 * Then, error is computed between the input blk and the estimated blk. 2247 * This error is transformed , quantized. The quantized coefficients 2248 * are packed in scan order for 2249 * entropy coding. 2250 * 2251 * @param[in] ps_proc_ctxt 2252 * pointer to the current macro block context 2253 * 2254 * @returns u1_cbp_l 2255 * coded block pattern chroma 2256 * 2257 * @remarks none 2258 * 2259 ******************************************************************************* 2260 */ 2261 UWORD8 ih264e_code_chroma_inter_macroblock_8x8(process_ctxt_t *ps_proc) 2262 { 2263 /* Codec Context */ 2264 codec_t *ps_codec = ps_proc->ps_codec; 2265 2266 /* pointer to ref macro block */ 2267 UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_chroma; 2268 2269 /* pointer to src macro block */ 2270 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma; 2271 2272 /* pointer to prediction macro block */ 2273 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; 2274 2275 /* pointer to residual macro block */ 2276 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; 2277 2278 /* strides */ 2279 WORD32 i4_src_strd = ps_proc->i4_src_chroma_strd; 2280 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 2281 WORD32 i4_pred_strd = ps_proc->i4_pred_strd; 2282 WORD32 i4_res_strd = ps_proc->i4_res_strd; 2283 2284 /* coded block pattern */ 2285 UWORD8 u1_cbp_c = 0; 2286 2287 /*Control signal for inverse transform*/ 2288 UWORD32 u4_cntrl; 2289 2290 /* number of non zero coeffs*/ 2291 UWORD8 au1_nnz[10] = {0}; 2292 2293 /* quantization parameters */ 2294 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1]; 2295 2296 /* pointer to packed mb coeff data */ 2297 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); 2298 2299 /*See if we need to swap U and V plances for entropy*/ 2300 UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU; 2301 2302 /********************************************************/ 2303 /* prediction */ 2304 /********************************************************/ 2305 ih264e_motion_comp_chroma(ps_proc); 2306 2307 /********************************************************/ 2308 /* error estimation, */ 2309 /* transform */ 2310 /* quantization */ 2311 /********************************************************/ 2312 ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb, 2313 pu1_pred_mb, pi2_res_mb, 2314 i4_src_strd, i4_pred_strd, 2315 i4_res_strd, 2316 ps_qp_params->pu2_scale_mat, 2317 ps_qp_params->pu2_thres_mat, 2318 ps_qp_params->u1_qbits, 2319 ps_qp_params->u4_dead_zone, 2320 au1_nnz); 2321 2322 /********************************************************/ 2323 /* pack coeff data for entropy coding */ 2324 /********************************************************/ 2325 ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c, 2326 au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv); 2327 2328 /********************************************************/ 2329 /* ierror estimation, */ 2330 /* itransform */ 2331 /* iquantization */ 2332 /********************************************************/ 2333 2334 /* If the frame is not to be used for P frame reference or dumping recon 2335 * we only will use the reocn for only predicting intra Mbs 2336 * THis will need only right and bottom edge 4x4 blocks recon 2337 * Hence we selectively enable them using control signal(including DC) 2338 */ 2339 if (!ps_proc->u4_compute_recon) 2340 { 2341 u4_cntrl &= 0x7700C000; 2342 } 2343 2344 if (u4_cntrl) 2345 { 2346 ih264e_chroma_8x8_idctrans_iquant_itrans_recon( 2347 ps_codec, pi2_res_mb, pu1_pred_mb, pu1_rec_mb, 2348 i4_res_strd, i4_pred_strd, i4_rec_strd, 2349 ps_qp_params->pu2_iscale_mat, 2350 ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div, 2351 u4_cntrl, ps_proc->pv_scratch_buff); 2352 } 2353 else 2354 { 2355 ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_rec_mb, i4_pred_strd, 2356 i4_rec_strd, MB_SIZE >> 1, MB_SIZE, 2357 NULL, 0); 2358 } 2359 2360 return (u1_cbp_c); 2361 } 2362