1 /****************************************************************************** 2 * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 21 /*! 22 ****************************************************************************** 23 * \file ihevce_enc_loop_utils.c 24 * 25 * \brief 26 * This file contains utility functions of Encode loop 27 * 28 * \date 29 * 18/09/2012 30 * 31 * \author 32 * Ittiam 33 * 34 * 35 * List of Functions 36 * 37 * 38 ****************************************************************************** 39 */ 40 41 /*****************************************************************************/ 42 /* File Includes */ 43 /*****************************************************************************/ 44 /* System include files */ 45 #include <stdio.h> 46 #include <string.h> 47 #include <stdlib.h> 48 #include <assert.h> 49 #include <stdarg.h> 50 #include <math.h> 51 #include <limits.h> 52 53 /* User include files */ 54 #include "ihevc_typedefs.h" 55 #include "itt_video_api.h" 56 #include "ihevce_api.h" 57 58 #include "rc_cntrl_param.h" 59 #include "rc_frame_info_collector.h" 60 #include "rc_look_ahead_params.h" 61 62 #include "ihevc_defs.h" 63 #include "ihevc_macros.h" 64 #include "ihevc_debug.h" 65 #include "ihevc_structs.h" 66 #include "ihevc_platform_macros.h" 67 #include "ihevc_deblk.h" 68 #include "ihevc_itrans_recon.h" 69 #include "ihevc_chroma_itrans_recon.h" 70 #include "ihevc_chroma_intra_pred.h" 71 #include "ihevc_intra_pred.h" 72 #include "ihevc_inter_pred.h" 73 #include "ihevc_mem_fns.h" 74 #include "ihevc_padding.h" 75 #include "ihevc_weighted_pred.h" 76 #include "ihevc_sao.h" 77 #include "ihevc_resi_trans.h" 78 #include "ihevc_quant_iquant_ssd.h" 79 #include "ihevc_cabac_tables.h" 80 #include "ihevc_common_tables.h" 81 82 #include "ihevce_defs.h" 83 #include "ihevce_hle_interface.h" 84 #include "ihevce_lap_enc_structs.h" 85 #include "ihevce_multi_thrd_structs.h" 86 #include "ihevce_multi_thrd_funcs.h" 87 #include "ihevce_me_common_defs.h" 88 #include "ihevce_had_satd.h" 89 #include "ihevce_error_codes.h" 90 #include "ihevce_bitstream.h" 91 #include "ihevce_cabac.h" 92 #include "ihevce_rdoq_macros.h" 93 #include "ihevce_function_selector.h" 94 #include "ihevce_enc_structs.h" 95 #include "ihevce_entropy_structs.h" 96 #include "ihevce_cmn_utils_instr_set_router.h" 97 #include "ihevce_ipe_instr_set_router.h" 98 #include "ihevce_decomp_pre_intra_structs.h" 99 #include "ihevce_decomp_pre_intra_pass.h" 100 #include "ihevce_enc_loop_structs.h" 101 #include "ihevce_nbr_avail.h" 102 #include "ihevce_enc_loop_utils.h" 103 #include "ihevce_sub_pic_rc.h" 104 #include "ihevce_global_tables.h" 105 #include "ihevce_bs_compute_ctb.h" 106 #include "ihevce_cabac_rdo.h" 107 #include "ihevce_deblk.h" 108 #include "ihevce_frame_process.h" 109 #include "ihevce_rc_enc_structs.h" 110 #include "hme_datatype.h" 111 #include "hme_interface.h" 112 #include "hme_common_defs.h" 113 #include "hme_defs.h" 114 #include "hme_common_utils.h" 115 #include "ihevce_me_instr_set_router.h" 116 #include "ihevce_enc_subpel_gen.h" 117 #include "ihevce_inter_pred.h" 118 #include "ihevce_mv_pred.h" 119 #include "ihevce_mv_pred_merge.h" 120 #include "ihevce_enc_loop_inter_mode_sifter.h" 121 #include "ihevce_enc_cu_recursion.h" 122 #include "ihevce_enc_loop_pass.h" 123 #include "ihevce_common_utils.h" 124 #include "ihevce_dep_mngr_interface.h" 125 #include "ihevce_sao.h" 126 #include "ihevce_tile_interface.h" 127 #include "ihevce_profile.h" 128 #include "ihevce_stasino_helpers.h" 129 #include "ihevce_tu_tree_selector.h" 130 131 /*****************************************************************************/ 132 /* Globals */ 133 /*****************************************************************************/ 134 135 extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2]; 136 extern const UWORD8 gu1_hevce_scan4x4[3][16]; 137 extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc[4][16]; 138 extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_tr4[16]; 139 extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_00[16]; 140 141 /*****************************************************************************/ 142 /* Constant Macros */ 143 /*****************************************************************************/ 144 #define ENABLE_ZERO_CBF 1 145 #define DISABLE_RDOQ_INTRA 0 146 147 /*****************************************************************************/ 148 /* Function Definitions */ 149 /*****************************************************************************/ 150 void *ihevce_tu_tree_update( 151 tu_prms_t *ps_tu_prms, 152 WORD32 *pnum_tu_in_cu, 153 WORD32 depth, 154 WORD32 tu_split_flag, 155 WORD32 tu_early_cbf, 156 WORD32 i4_x_off, 157 WORD32 i4_y_off) 158 { 159 //WORD32 tu_split_flag = p_tu_split_flag[0]; 160 WORD32 p_tu_split_flag[4]; 161 WORD32 p_tu_early_cbf[4]; 162 163 WORD32 tu_size = ps_tu_prms->u1_tu_size; 164 165 if(((tu_size >> depth) >= 16) && (tu_split_flag & 0x1)) 166 { 167 if((tu_size >> depth) == 32) 168 { 169 /* Get the individual TU split flags */ 170 p_tu_split_flag[0] = (tu_split_flag >> 16) & 0x1F; 171 p_tu_split_flag[1] = (tu_split_flag >> 11) & 0x1F; 172 p_tu_split_flag[2] = (tu_split_flag >> 6) & 0x1F; 173 p_tu_split_flag[3] = (tu_split_flag >> 1) & 0x1F; 174 175 /* Get the early CBF flags */ 176 p_tu_early_cbf[0] = (tu_early_cbf >> 16) & 0x1F; 177 p_tu_early_cbf[1] = (tu_early_cbf >> 11) & 0x1F; 178 p_tu_early_cbf[2] = (tu_early_cbf >> 6) & 0x1F; 179 p_tu_early_cbf[3] = (tu_early_cbf >> 1) & 0x1F; 180 } 181 else 182 { 183 /* Get the individual TU split flags */ 184 p_tu_split_flag[0] = ((tu_split_flag >> 4) & 0x1); 185 p_tu_split_flag[1] = ((tu_split_flag >> 3) & 0x1); 186 p_tu_split_flag[2] = ((tu_split_flag >> 2) & 0x1); 187 p_tu_split_flag[3] = ((tu_split_flag >> 1) & 0x1); 188 189 /* Get the early CBF flags */ 190 p_tu_early_cbf[0] = ((tu_early_cbf >> 4) & 0x1); 191 p_tu_early_cbf[1] = ((tu_early_cbf >> 3) & 0x1); 192 p_tu_early_cbf[2] = ((tu_early_cbf >> 2) & 0x1); 193 p_tu_early_cbf[3] = ((tu_early_cbf >> 1) & 0x1); 194 } 195 196 ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update( 197 ps_tu_prms, 198 pnum_tu_in_cu, 199 depth + 1, 200 p_tu_split_flag[0], 201 p_tu_early_cbf[0], 202 i4_x_off, 203 i4_y_off); 204 205 ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update( 206 ps_tu_prms, 207 pnum_tu_in_cu, 208 depth + 1, 209 p_tu_split_flag[1], 210 p_tu_early_cbf[1], 211 (i4_x_off + (tu_size >> (depth + 1))), 212 i4_y_off); 213 214 ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update( 215 ps_tu_prms, 216 pnum_tu_in_cu, 217 depth + 1, 218 p_tu_split_flag[2], 219 p_tu_early_cbf[2], 220 i4_x_off, 221 (i4_y_off + (tu_size >> (depth + 1)))); 222 223 ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update( 224 ps_tu_prms, 225 pnum_tu_in_cu, 226 depth + 1, 227 p_tu_split_flag[3], 228 p_tu_early_cbf[3], 229 (i4_x_off + (tu_size >> (depth + 1))), 230 (i4_y_off + (tu_size >> (depth + 1)))); 231 } 232 else 233 { 234 if(tu_split_flag & 0x1) 235 { 236 /* This piece of code will be entered for the 8x8, if it is split 237 Update the 4 child TU's accordingly. */ 238 239 (*pnum_tu_in_cu) += 4; 240 241 /* TL TU update */ 242 ps_tu_prms->u1_tu_size = tu_size >> (depth + 1); 243 244 ps_tu_prms->u1_x_off = i4_x_off; 245 246 ps_tu_prms->u1_y_off = i4_y_off; 247 248 /* Early CBF is not done for 4x4 transforms */ 249 ps_tu_prms->i4_early_cbf = 1; 250 251 ps_tu_prms++; 252 253 /* TR TU update */ 254 ps_tu_prms->u1_tu_size = tu_size >> (depth + 1); 255 256 ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1)); 257 258 ps_tu_prms->u1_y_off = i4_y_off; 259 260 /* Early CBF is not done for 4x4 transforms */ 261 ps_tu_prms->i4_early_cbf = 1; 262 263 ps_tu_prms++; 264 265 /* BL TU update */ 266 ps_tu_prms->u1_tu_size = tu_size >> (depth + 1); 267 268 ps_tu_prms->u1_x_off = i4_x_off; 269 270 ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1)); 271 272 /* Early CBF is not done for 4x4 transforms */ 273 ps_tu_prms->i4_early_cbf = 1; 274 275 ps_tu_prms++; 276 277 /* BR TU update */ 278 ps_tu_prms->u1_tu_size = tu_size >> (depth + 1); 279 280 ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1)); 281 282 ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1)); 283 284 /* Early CBF is not done for 4x4 transforms */ 285 ps_tu_prms->i4_early_cbf = 1; 286 } 287 else 288 { 289 /* Update the TU params */ 290 ps_tu_prms->u1_tu_size = tu_size >> depth; 291 292 ps_tu_prms->u1_x_off = i4_x_off; 293 294 ps_tu_prms->u1_y_off = i4_y_off; 295 296 (*pnum_tu_in_cu)++; 297 298 /* Early CBF update for current TU */ 299 ps_tu_prms->i4_early_cbf = tu_early_cbf & 0x1; 300 } 301 if((*pnum_tu_in_cu) < MAX_TU_IN_CTB) 302 { 303 ps_tu_prms++; 304 305 ps_tu_prms->u1_tu_size = tu_size; 306 } 307 } 308 309 return ps_tu_prms; 310 } 311 312 /*! 313 ****************************************************************************** 314 * \if Function name : ihevce_compute_quant_rel_param \endif 315 * 316 * \brief 317 * This function updates quantization related parameters like qp_mod_6 etc in 318 * context according to new qp 319 * 320 * \date 321 * 08/01/2013 322 * 323 * \author 324 * Ittiam 325 * 326 * \return 327 * 328 * List of Functions 329 * 330 * 331 ****************************************************************************** 332 */ 333 void ihevce_compute_quant_rel_param(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD8 i1_cu_qp) 334 { 335 WORD32 i4_div_factor; 336 337 ps_ctxt->i4_chrm_cu_qp = 338 (ps_ctxt->u1_chroma_array_type == 2) 339 ? MIN(i1_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51) 340 : gai1_ihevc_chroma_qp_scale[i1_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET]; 341 ps_ctxt->i4_cu_qp_div6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6; 342 i4_div_factor = (i1_cu_qp + 3) / 6; 343 i4_div_factor = CLIP3(i4_div_factor, 3, 6); 344 ps_ctxt->i4_cu_qp_mod6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6; 345 ps_ctxt->i4_chrm_cu_qp_div6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6; 346 ps_ctxt->i4_chrm_cu_qp_mod6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6; 347 348 #define INTER_RND_QP_BY_6 349 #ifdef INTER_RND_QP_BY_6 350 /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */ 351 { 352 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = 353 (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)6) + 0.5f); 354 } 355 #else 356 /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */ 357 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3; 358 #endif 359 360 if(ISLICE == ps_ctxt->i1_slice_type) 361 { 362 /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */ 363 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = 364 (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f); 365 } 366 else 367 { 368 if(0) /*TRAQO_EXT_ENABLE_ONE_THIRD_RND*/ 369 { 370 /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */ 371 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = 372 (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f); 373 } 374 else 375 { 376 /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */ 377 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = 378 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER]; 379 /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */ 380 } 381 } 382 } 383 384 /*! 385 ****************************************************************************** 386 * \if Function name : ihevce_populate_cl_cu_lambda_prms \endif 387 * 388 * \brief 389 * Function whihc calculates the Lambda params for current picture 390 * 391 * \param[in] ps_enc_ctxt : encoder ctxt pointer 392 * \param[in] ps_cur_pic_ctxt : current pic ctxt 393 * \param[in] i4_cur_frame_qp : current pic QP 394 * \param[in] first_field : is first field flag 395 * \param[in] i4_temporal_lyr_id : Current picture layer id 396 * 397 * \return 398 * None 399 * 400 * \author 401 * Ittiam 402 * 403 ***************************************************************************** 404 */ 405 void ihevce_populate_cl_cu_lambda_prms( 406 ihevce_enc_loop_ctxt_t *ps_ctxt, 407 frm_lambda_ctxt_t *ps_frm_lamda, 408 WORD32 i4_slice_type, 409 WORD32 i4_temporal_lyr_id, 410 WORD32 i4_lambda_type) 411 { 412 WORD32 i4_curr_cu_qp, i4_curr_cu_qp_offset; 413 double lambda_modifier; 414 double lambda_uv_modifier; 415 double lambda; 416 double lambda_uv; 417 418 WORD32 i4_qp_bdoffset = 6 * (ps_ctxt->u1_bit_depth - 8); 419 420 /*Populate lamda modifier */ 421 ps_ctxt->i4_lamda_modifier = ps_frm_lamda->lambda_modifier; 422 ps_ctxt->i4_uv_lamda_modifier = ps_frm_lamda->lambda_uv_modifier; 423 ps_ctxt->i4_temporal_layer_id = i4_temporal_lyr_id; 424 425 for(i4_curr_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp; 426 i4_curr_cu_qp <= ps_ctxt->ps_rc_quant_ctxt->i2_max_qp; 427 i4_curr_cu_qp++) 428 { 429 WORD32 chroma_qp = (ps_ctxt->i4_chroma_format == IV_YUV_422SP_UV) 430 ? MIN(i4_curr_cu_qp, 51) 431 : gai1_ihevc_chroma_qp_scale[i4_curr_cu_qp + MAX_QP_BD_OFFSET]; 432 433 i4_curr_cu_qp_offset = i4_curr_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset; 434 435 lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0)); 436 lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0)); 437 438 if((BSLICE == i4_slice_type) && (i4_temporal_lyr_id)) 439 { 440 lambda_modifier = ps_frm_lamda->lambda_modifier * 441 CLIP3((((double)(i4_curr_cu_qp - 12)) / 6.0), 2.00, 4.00); 442 lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier * 443 CLIP3((((double)(chroma_qp - 12)) / 6.0), 2.00, 4.00); 444 } 445 else 446 { 447 lambda_modifier = ps_frm_lamda->lambda_modifier; 448 lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier; 449 } 450 if(ps_ctxt->i4_use_const_lamda_modifier) 451 { 452 if(ISLICE == ps_ctxt->i1_slice_type) 453 { 454 lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier; 455 lambda_uv_modifier = ps_ctxt->f_i_pic_lamda_modifier; 456 } 457 else 458 { 459 lambda_modifier = CONST_LAMDA_MOD_VAL; 460 lambda_uv_modifier = CONST_LAMDA_MOD_VAL; 461 } 462 } 463 switch(i4_lambda_type) 464 { 465 case 0: 466 { 467 i4_qp_bdoffset = 0; 468 469 lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0)); 470 lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0)); 471 472 lambda *= lambda_modifier; 473 lambda_uv *= lambda_uv_modifier; 474 475 ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] = 476 (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT)); 477 478 ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] = 479 (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT)); 480 481 ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] = 482 (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT)); 483 if(ps_ctxt->i4_use_const_lamda_modifier) 484 { 485 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] = 486 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); 487 } 488 else 489 { 490 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] = 491 (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT)); 492 } 493 494 ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] = 495 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); 496 497 ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] = 498 ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset]; 499 500 ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] = 501 ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset]; 502 503 ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] = 504 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset]; 505 506 ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] = 507 ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset]; 508 509 break; 510 } 511 case 1: 512 { 513 lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0)); 514 lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0)); 515 516 lambda *= lambda_modifier; 517 lambda_uv *= lambda_uv_modifier; 518 519 ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] = 520 (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT)); 521 522 ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] = 523 (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT)); 524 525 ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] = 526 (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT)); 527 if(ps_ctxt->i4_use_const_lamda_modifier) 528 { 529 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] = 530 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); 531 } 532 else 533 { 534 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] = 535 (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT)); 536 } 537 ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] = 538 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); 539 540 ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] = 541 ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset]; 542 543 ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] = 544 ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset]; 545 546 ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] = 547 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset]; 548 549 ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] = 550 ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset]; 551 552 break; 553 } 554 case 2: 555 { 556 lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0)); 557 lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0)); 558 559 lambda *= lambda_modifier; 560 lambda_uv *= lambda_uv_modifier; 561 562 ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] = 563 (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT)); 564 565 ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] = 566 (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT)); 567 568 ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] = 569 (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT)); 570 571 if(ps_ctxt->i4_use_const_lamda_modifier) 572 { 573 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] = 574 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); 575 } 576 else 577 { 578 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] = 579 (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT)); 580 } 581 ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] = 582 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); 583 584 /* lambda corresponding to 8- bit, for metrics based on 8- bit ( Example 8bit SAD in encloop)*/ 585 lambda = pow(2.0, (((double)(i4_curr_cu_qp - 12)) / 3.0)); 586 lambda_uv = pow(2.0, (((double)(chroma_qp - 12)) / 3.0)); 587 588 lambda *= lambda_modifier; 589 lambda_uv *= lambda_uv_modifier; 590 591 ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] = 592 (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT)); 593 594 ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] = 595 (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT)); 596 597 ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] = 598 (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT)); 599 if(ps_ctxt->i4_use_const_lamda_modifier) 600 { 601 ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] = 602 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); 603 } 604 else 605 { 606 ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] = 607 (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT)); 608 } 609 610 ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] = 611 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); 612 613 break; 614 } 615 default: 616 { 617 /* Intended to be a barren wasteland! */ 618 ASSERT(0); 619 } 620 } 621 } 622 } 623 624 /*! 625 ****************************************************************************** 626 * \if Function name : ihevce_get_cl_cu_lambda_prms \endif 627 * 628 * \brief 629 * Function whihc calculates the Lambda params for current picture 630 * 631 * \param[in] ps_enc_ctxt : encoder ctxt pointer 632 * \param[in] ps_cur_pic_ctxt : current pic ctxt 633 * \param[in] i4_cur_frame_qp : current pic QP 634 * \param[in] first_field : is first field flag 635 * \param[in] i4_temporal_lyr_id : Current picture layer id 636 * 637 * \return 638 * None 639 * 640 * \author 641 * Ittiam 642 * 643 ***************************************************************************** 644 */ 645 void ihevce_get_cl_cu_lambda_prms(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 i4_cur_cu_qp) 646 { 647 WORD32 chroma_qp = (ps_ctxt->u1_chroma_array_type == 2) 648 ? MIN(i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51) 649 : gai1_ihevc_chroma_qp_scale 650 [i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET]; 651 652 /* closed loop ssd lambda is same as final lambda */ 653 ps_ctxt->i8_cl_ssd_lambda_qf = 654 ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]; 655 ps_ctxt->i8_cl_ssd_lambda_chroma_qf = 656 ps_ctxt 657 ->i8_cl_ssd_lambda_chroma_qf_array[chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]; 658 ps_ctxt->u4_chroma_cost_weighing_factor = 659 ps_ctxt->au4_chroma_cost_weighing_factor_array 660 [chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]; 661 /* --- Initialized the lambda for SATD computations --- */ 662 /* --- 0.95 is the multiplication factor as per HM --- */ 663 /* --- 1.9 is the multiplication factor for Hadamard Transform --- */ 664 ps_ctxt->i4_satd_lamda = 665 ps_ctxt->i4_satd_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]; 666 ps_ctxt->i4_sad_lamda = 667 ps_ctxt->i4_sad_type2_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]; 668 } 669 670 /*! 671 ****************************************************************************** 672 * \if Function name : ihevce_update_pred_qp \endif 673 * 674 * \brief 675 * Computes pred qp for the given CU 676 * 677 * \param[in] 678 * 679 * \return 680 * 681 * 682 * \author 683 * Ittiam 684 * 685 ***************************************************************************** 686 */ 687 void ihevce_update_pred_qp(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 cu_pos_x, WORD32 cu_pos_y) 688 { 689 WORD32 i4_pred_qp = 0x7FFFFFFF; 690 WORD32 i4_top, i4_left; 691 if(cu_pos_x == 0 && cu_pos_y == 0) /*CTB start*/ 692 { 693 i4_pred_qp = ps_ctxt->i4_prev_QP; 694 } 695 else 696 { 697 if(cu_pos_y == 0) /*CTB boundary*/ 698 { 699 i4_top = ps_ctxt->i4_prev_QP; 700 } 701 else /*within CTB*/ 702 { 703 i4_top = ps_ctxt->ai4_qp_qg[(cu_pos_y - 1) * 8 + (cu_pos_x)]; 704 } 705 if(cu_pos_x == 0) /*CTB boundary*/ 706 { 707 i4_left = ps_ctxt->i4_prev_QP; 708 } 709 else /*within CTB*/ 710 { 711 i4_left = ps_ctxt->ai4_qp_qg[(cu_pos_y)*8 + (cu_pos_x - 1)]; 712 } 713 i4_pred_qp = (i4_left + i4_top + 1) >> 1; 714 } 715 ps_ctxt->i4_pred_qp = i4_pred_qp; 716 return; 717 } 718 /*! 719 ****************************************************************************** 720 * \if Function name : ihevce_compute_cu_level_QP \endif 721 * 722 * \brief 723 * Computes cu level QP with Traqo,Spatial Mod and In-frame RC 724 * 725 * \param[in] 726 * 727 * \return 728 * 729 * 730 * \author 731 * Ittiam 732 * 733 ***************************************************************************** 734 */ 735 void ihevce_compute_cu_level_QP( 736 ihevce_enc_loop_ctxt_t *ps_ctxt, 737 WORD32 i4_activity_for_qp, 738 WORD32 i4_activity_for_lamda, 739 WORD32 i4_reduce_qp) 740 { 741 /*modify quant related param in ctxt based on current cu qp*/ 742 WORD32 i4_input_QP = ps_ctxt->i4_frame_mod_qp; 743 WORD32 cu_qp = i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset; 744 745 WORD32 i4_max_qp_allowed; 746 WORD32 i4_min_qp_allowed; 747 WORD32 i4_pred_qp; 748 749 i4_pred_qp = ps_ctxt->i4_pred_qp; 750 751 if(ps_ctxt->i4_sub_pic_level_rc) 752 { 753 i4_max_qp_allowed = (i4_pred_qp + (25 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2))); 754 i4_min_qp_allowed = (i4_pred_qp - (26 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2))); 755 } 756 else 757 { 758 i4_max_qp_allowed = (i4_input_QP + (7 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4))); 759 i4_min_qp_allowed = (i4_input_QP - (18 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4))); 760 } 761 if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) 762 return; 763 764 #if LAMDA_BASED_ON_QUANT 765 i4_activity_for_lamda = i4_activity_for_qp; 766 #endif 767 768 if(i4_activity_for_qp != -1) 769 { 770 cu_qp = (ps_ctxt->ps_rc_quant_ctxt 771 ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]); 772 if(ps_ctxt->i4_qp_mod) 773 { 774 /*Recompute the Qp as per enc thread's frame level Qp*/ 775 ASSERT(i4_activity_for_qp > 0); 776 cu_qp = ((cu_qp * i4_activity_for_qp) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >> 777 QP_LEVEL_MOD_ACT_FACTOR; 778 } 779 780 // To avoid access of uninitialised Qscale to qp conversion table 781 if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale) 782 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale; 783 else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale) 784 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale; 785 786 cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp]; 787 788 if((1 == i4_reduce_qp) && (cu_qp > 1)) 789 cu_qp--; 790 791 /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/ 792 if(cu_qp > i4_max_qp_allowed) 793 cu_qp = i4_max_qp_allowed; 794 else if(cu_qp < i4_min_qp_allowed) 795 cu_qp = i4_min_qp_allowed; 796 797 /* CLIP to maintain Qp between user configured and min and max Qp values*/ 798 if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp) 799 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp; 800 else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp) 801 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp; 802 803 /*cu qp must be populated in cu_analyse_t struct*/ 804 ps_ctxt->i4_cu_qp = cu_qp; 805 /*recompute quant related param at every cu level*/ 806 ihevce_compute_quant_rel_param(ps_ctxt, cu_qp); 807 } 808 809 /*Decoupling qp and lamda calculation */ 810 if(i4_activity_for_lamda != -1) 811 { 812 cu_qp = (ps_ctxt->ps_rc_quant_ctxt 813 ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]); 814 815 if(ps_ctxt->i4_qp_mod) 816 { 817 #if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON 818 /*Recompute the Qp as per enc thread's frame level Qp*/ 819 ASSERT(i4_activity_for_lamda > 0); 820 cu_qp = ((cu_qp * i4_activity_for_lamda) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >> 821 QP_LEVEL_MOD_ACT_FACTOR; 822 #endif 823 } 824 if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale) 825 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale; 826 else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale) 827 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale; 828 829 cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp]; 830 831 /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/ 832 if(cu_qp > i4_max_qp_allowed) 833 cu_qp = i4_max_qp_allowed; 834 else if(cu_qp < i4_min_qp_allowed) 835 cu_qp = i4_min_qp_allowed; 836 837 /* CLIP to maintain Qp between user configured and min and max Qp values*/ 838 if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp) 839 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp; 840 else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp) 841 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp; 842 /* get frame level lambda params */ 843 ihevce_get_cl_cu_lambda_prms( 844 ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? cu_qp : ps_ctxt->i4_frame_qp); 845 } 846 } 847 848 /** 849 ******************************************************************************* 850 * \if Function name : ihevce_scan_coeffs \endif 851 * 852 * @brief * Computes the coeff buffer for a coded TU for entropy coding 853 * 854 * @par Description 855 * Computes the coeff buffer for a coded TU for entropy coding 856 * 857 * \param[in] pi2_quan_coeffs Quantized coefficient context 858 * 859 * \param[in] scan_idx Scan index specifying the scan order 860 * 861 * \param[in] trans_size Transform unit size 862 * 863 * \param[inout] pu1_out_data output coeff buffer for a coded TU for entropy coding 864 * 865 * \param[in] pu1_csbf_buf csb flag buffer 866 * 867 * @returns num_bytes 868 * Number of bytes written to pu1_out_data 869 * 870 * @remarks 871 * 872 * \author 873 * Ittiam 874 * 875 ******************************************************************************* 876 */ 877 878 WORD32 ihevce_scan_coeffs( 879 WORD16 *pi2_quant_coeffs, 880 WORD32 *pi4_subBlock2csbfId_map, 881 WORD32 scan_idx, 882 WORD32 trans_size, 883 UWORD8 *pu1_out_data, 884 UWORD8 *pu1_csbf_buf, 885 WORD32 i4_csbf_stride) 886 { 887 WORD32 i, trans_unit_idx, num_gt1_flag; 888 UWORD16 u2_csbf0flags; 889 WORD32 num_bytes = 0; 890 UWORD8 *pu1_trans_table; 891 UWORD8 *pu1_csb_table; 892 WORD32 shift_value, mask_value; 893 UWORD16 u2_sig_coeff_abs_gt0_flags = 0, u2_sig_coeff_abs_gt1_flags = 0; 894 UWORD16 u2_sign_flags; 895 UWORD16 u2_abs_coeff_remaining[16]; 896 WORD32 blk_row, blk_col; 897 898 UWORD8 *pu1_out_data_header; 899 UWORD16 *pu2_out_data_coeff; 900 901 WORD32 x_pos, y_pos; 902 WORD32 quant_coeff; 903 904 WORD32 num_gt0_flag; 905 (void)i4_csbf_stride; 906 pu1_out_data_header = pu1_out_data; 907 /* Need only last 3 bits, rest are reserved for debugging and making */ 908 /* WORD alignment */ 909 u2_csbf0flags = 0xBAD0; 910 911 /* Select proper order for your transform unit and csb based on scan_idx*/ 912 /* and the trans_size */ 913 914 /* scan order inside a csb */ 915 pu1_csb_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]); 916 /* GETRANGE will give the log_2 of trans_size to shift_value */ 917 GETRANGE(shift_value, trans_size); 918 shift_value = shift_value - 3; /* for finding. row no. from scan index */ 919 mask_value = (trans_size / 4) - 1; /*for finding the col. no. from scan index*/ 920 switch(trans_size) 921 { 922 case 32: 923 pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_8x8[scan_idx][0]); 924 break; 925 case 16: 926 pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]); 927 break; 928 case 8: 929 pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_2x2[scan_idx][0]); 930 break; 931 case 4: 932 pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_1x1[0]); 933 break; 934 default: 935 DBG_PRINTF("Invalid Trans Size\n"); 936 return -1; 937 break; 938 } 939 940 /*go through each csb in the scan order for first non-zero coded sub-block*/ 941 for(trans_unit_idx = (trans_size * trans_size / 16) - 1; trans_unit_idx >= 0; trans_unit_idx--) 942 { 943 /* check for the first csb flag in our scan order */ 944 if(pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]]) 945 { 946 UWORD8 u1_last_x, u1_last_y; 947 /* row of csb */ 948 blk_row = pu1_trans_table[trans_unit_idx] >> shift_value; 949 /* col of csb */ 950 blk_col = pu1_trans_table[trans_unit_idx] & mask_value; 951 952 /*check for the 1st non-0 values inside the csb in our scan order*/ 953 for(i = 15; i >= 0; i--) 954 { 955 x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4; 956 y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4; 957 958 quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)]; 959 960 if(quant_coeff != 0) 961 break; 962 } 963 964 ASSERT(i >= 0); 965 966 u1_last_x = x_pos; 967 u1_last_y = y_pos; 968 969 /* storing last_x and last_y */ 970 *pu1_out_data_header = u1_last_x; 971 pu1_out_data_header++; 972 num_bytes++; 973 *pu1_out_data_header = u1_last_y; 974 pu1_out_data_header++; 975 num_bytes++; 976 977 /* storing the scan order */ 978 *pu1_out_data_header = scan_idx; 979 pu1_out_data_header++; 980 num_bytes++; 981 /* storing last_sub_block pos. in scan order count */ 982 *pu1_out_data_header = trans_unit_idx; 983 pu1_out_data_header++; 984 num_bytes++; 985 986 /*stored the first 4 bytes, now all are word16. So word16 pointer*/ 987 pu2_out_data_coeff = (UWORD16 *)pu1_out_data_header; 988 989 /* u2_csbf0flags word */ 990 u2_csbf0flags = 0xBAD0 | 1; /*since right&bottom csbf is 0*/ 991 /* storing u2_csbf0flags word */ 992 *pu2_out_data_coeff = u2_csbf0flags; 993 pu2_out_data_coeff++; 994 num_bytes += 2; 995 996 num_gt0_flag = 1; 997 num_gt1_flag = 0; 998 u2_sign_flags = 0; 999 1000 /* set the i th bit of u2_sig_coeff_abs_gt0_flags */ 1001 u2_sig_coeff_abs_gt0_flags = u2_sig_coeff_abs_gt0_flags | (1 << i); 1002 if(abs(quant_coeff) > 1) 1003 { 1004 /* set the i th bit of u2_sig_coeff_abs_gt1_flags */ 1005 u2_sig_coeff_abs_gt1_flags = u2_sig_coeff_abs_gt1_flags | (1 << i); 1006 /* update u2_abs_coeff_remaining */ 1007 u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1; 1008 1009 num_gt1_flag++; 1010 } 1011 1012 if(quant_coeff < 0) 1013 { 1014 /* set the i th bit of u2_sign_flags */ 1015 u2_sign_flags = u2_sign_flags | (1 << i); 1016 } 1017 1018 /* Test remaining elements in our scan order */ 1019 /* Can optimize further by CLZ macro */ 1020 for(i = i - 1; i >= 0; i--) 1021 { 1022 x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4; 1023 y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4; 1024 1025 quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)]; 1026 1027 if(quant_coeff != 0) 1028 { 1029 /* set the i th bit of u2_sig_coeff_abs_gt0_flags */ 1030 u2_sig_coeff_abs_gt0_flags |= (1 << i); 1031 1032 if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE)) 1033 { 1034 /* set the i th bit of u2_sig_coeff_abs_gt1_flags */ 1035 u2_sig_coeff_abs_gt1_flags |= (1 << i); 1036 1037 /* update u2_abs_coeff_remaining */ 1038 u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1; 1039 1040 num_gt1_flag++; /*n0. of Ones in sig_coeff_abs_gt1_flag*/ 1041 } 1042 1043 if(quant_coeff < 0) 1044 { 1045 /* set the i th bit of u2_sign_flags */ 1046 u2_sign_flags |= (1 << i); 1047 } 1048 1049 num_gt0_flag++; 1050 } 1051 } 1052 1053 /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */ 1054 *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags; 1055 pu2_out_data_coeff++; 1056 num_bytes += 2; 1057 /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */ 1058 *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags; 1059 pu2_out_data_coeff++; 1060 num_bytes += 2; 1061 /* storing u2_sign_flags 2 bytes */ 1062 *pu2_out_data_coeff = u2_sign_flags; 1063 pu2_out_data_coeff++; 1064 num_bytes += 2; 1065 1066 /* Store the u2_abs_coeff_remaining[] */ 1067 for(i = 0; i < num_gt1_flag; i++) 1068 { 1069 /* storing u2_abs_coeff_remaining[i] 2 bytes */ 1070 *pu2_out_data_coeff = u2_abs_coeff_remaining[i]; 1071 pu2_out_data_coeff++; 1072 num_bytes += 2; 1073 } 1074 1075 break; /*We just need this loop for finding 1st non-zero csb only*/ 1076 } 1077 } 1078 1079 /* go through remaining csb in the scan order */ 1080 for(trans_unit_idx = trans_unit_idx - 1; trans_unit_idx >= 0; trans_unit_idx--) 1081 { 1082 blk_row = pu1_trans_table[trans_unit_idx] >> shift_value; /*row of csb*/ 1083 blk_col = pu1_trans_table[trans_unit_idx] & mask_value; /*col of csb*/ 1084 1085 /* u2_csbf0flags word */ 1086 u2_csbf0flags = 0xBAD0 | /* assuming csbf_buf has only 0 or 1 values */ 1087 (pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]]); 1088 1089 /********************************************************************/ 1090 /* Minor hack: As per HEVC spec csbf in not signalled in stream for */ 1091 /* block0, instead sig coeff map is directly signalled. This is */ 1092 /* taken care by forcing csbf for block0 to be 1 even if it is 0 */ 1093 /********************************************************************/ 1094 if(0 == trans_unit_idx) 1095 { 1096 u2_csbf0flags |= 1; 1097 } 1098 1099 if((blk_col + 1 < trans_size / 4)) /* checking right boundary */ 1100 { 1101 if(pu1_csbf_buf[pi4_subBlock2csbfId_map[blk_row * trans_size / 4 + blk_col + 1]]) 1102 { 1103 /* set the 2nd bit of u2_csbf0flags for right csbf */ 1104 u2_csbf0flags = u2_csbf0flags | (1 << 1); 1105 } 1106 } 1107 if((blk_row + 1 < trans_size / 4)) /* checking bottom oundary */ 1108 { 1109 if(pu1_csbf_buf[pi4_subBlock2csbfId_map[(blk_row + 1) * trans_size / 4 + blk_col]]) 1110 { 1111 /* set the 3rd bit of u2_csbf0flags for bottom csbf */ 1112 u2_csbf0flags = u2_csbf0flags | (1 << 2); 1113 } 1114 } 1115 1116 /* storing u2_csbf0flags word */ 1117 *pu2_out_data_coeff = u2_csbf0flags; 1118 pu2_out_data_coeff++; 1119 num_bytes += 2; 1120 1121 /* check for the csb flag in our scan order */ 1122 if(u2_csbf0flags & 0x1) 1123 { 1124 u2_sig_coeff_abs_gt0_flags = 0; 1125 u2_sig_coeff_abs_gt1_flags = 0; 1126 u2_sign_flags = 0; 1127 1128 num_gt0_flag = 0; 1129 num_gt1_flag = 0; 1130 /* check for the non-0 values inside the csb in our scan order */ 1131 /* Can optimize further by CLZ macro */ 1132 for(i = 15; i >= 0; i--) 1133 { 1134 x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4; 1135 y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4; 1136 1137 quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)]; 1138 1139 if(quant_coeff != 0) 1140 { 1141 /* set the i th bit of u2_sig_coeff_abs_gt0_flags */ 1142 u2_sig_coeff_abs_gt0_flags |= (1 << i); 1143 1144 if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE)) 1145 { 1146 /* set the i th bit of u2_sig_coeff_abs_gt1_flags */ 1147 u2_sig_coeff_abs_gt1_flags |= (1 << i); 1148 1149 /* update u2_abs_coeff_remaining */ 1150 u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1; 1151 1152 num_gt1_flag++; 1153 } 1154 1155 if(quant_coeff < 0) 1156 { 1157 /* set the i th bit of u2_sign_flags */ 1158 u2_sign_flags = u2_sign_flags | (1 << i); 1159 } 1160 1161 num_gt0_flag++; 1162 } 1163 } 1164 1165 /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */ 1166 *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags; 1167 pu2_out_data_coeff++; 1168 num_bytes += 2; 1169 1170 /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */ 1171 *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags; 1172 pu2_out_data_coeff++; 1173 num_bytes += 2; 1174 1175 /* storing u2_sign_flags 2 bytes */ 1176 *pu2_out_data_coeff = u2_sign_flags; 1177 pu2_out_data_coeff++; 1178 num_bytes += 2; 1179 1180 /* Store the u2_abs_coeff_remaining[] */ 1181 for(i = 0; i < num_gt1_flag; i++) 1182 { 1183 /* storing u2_abs_coeff_remaining[i] 2 bytes */ 1184 *pu2_out_data_coeff = u2_abs_coeff_remaining[i]; 1185 pu2_out_data_coeff++; 1186 num_bytes += 2; 1187 } 1188 } 1189 } 1190 1191 return num_bytes; /* Return the number of bytes written to out_data */ 1192 } 1193 1194 /** 1195 ******************************************************************************* 1196 * \if Function name : ihevce_populate_intra_pred_mode \endif 1197 * 1198 * \brief * populates intra pred modes,b2_mpm_idx,b1_prev_intra_luma_pred_flag & 1199 * b5_rem_intra_pred_mode for a CU based on nieghbouring CUs, 1200 * 1201 * \par Description 1202 * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode 1203 * for a CU 1204 * 1205 * \param[in] top_intra_mode Top intra mode 1206 * \param[in] left_intra_mode Left intra mode 1207 * \param[in] available_top Top availability flag 1208 * \param[in] available_left Left availability flag 1209 * \param[in] cu_pos_y CU 'y' position 1210 * \param[in] ps_cand_mode_list pointer to populate candidate list 1211 * 1212 * \returns none 1213 * 1214 * \author 1215 * Ittiam 1216 * 1217 ******************************************************************************* 1218 */ 1219 1220 void ihevce_populate_intra_pred_mode( 1221 WORD32 top_intra_mode, 1222 WORD32 left_intra_mode, 1223 WORD32 available_top, 1224 WORD32 available_left, 1225 WORD32 cu_pos_y, 1226 WORD32 *ps_cand_mode_list) 1227 { 1228 /* local variables */ 1229 WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top; 1230 1231 /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */ 1232 /* N = top */ 1233 if(0 == available_top) 1234 { 1235 cand_intra_pred_mode_top = INTRA_DC; 1236 } 1237 /* for neighbour != INTRA, setting DC is done outside */ 1238 else if(0 == cu_pos_y) /* It's on the CTB boundary */ 1239 { 1240 cand_intra_pred_mode_top = INTRA_DC; 1241 } 1242 else 1243 { 1244 cand_intra_pred_mode_top = top_intra_mode; 1245 } 1246 1247 /* N = left */ 1248 if(0 == available_left) 1249 { 1250 cand_intra_pred_mode_left = INTRA_DC; 1251 } 1252 /* for neighbour != INTRA, setting DC is done outside */ 1253 else 1254 { 1255 cand_intra_pred_mode_left = left_intra_mode; 1256 } 1257 1258 /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */ 1259 if(cand_intra_pred_mode_left == cand_intra_pred_mode_top) 1260 { 1261 if(cand_intra_pred_mode_left < 2) 1262 { 1263 ps_cand_mode_list[0] = INTRA_PLANAR; 1264 ps_cand_mode_list[1] = INTRA_DC; 1265 ps_cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */ 1266 } 1267 else 1268 { 1269 ps_cand_mode_list[0] = cand_intra_pred_mode_left; 1270 ps_cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32); 1271 ps_cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32); 1272 } 1273 } 1274 else 1275 { 1276 ps_cand_mode_list[0] = cand_intra_pred_mode_left; 1277 ps_cand_mode_list[1] = cand_intra_pred_mode_top; 1278 1279 if((cand_intra_pred_mode_left != INTRA_PLANAR) && 1280 (cand_intra_pred_mode_top != INTRA_PLANAR)) 1281 { 1282 ps_cand_mode_list[2] = INTRA_PLANAR; 1283 } 1284 else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC)) 1285 { 1286 ps_cand_mode_list[2] = INTRA_DC; 1287 } 1288 else 1289 { 1290 ps_cand_mode_list[2] = INTRA_ANGULAR(26); 1291 } 1292 } 1293 } 1294 /** 1295 ******************************************************************************* 1296 * \if Function name : ihevce_intra_pred_mode_signaling \endif 1297 * 1298 * \brief * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & 1299 * b5_rem_intra_pred_mode for a CU 1300 * 1301 * \par Description 1302 * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode 1303 * for a CU 1304 * 1305 * \param[in] ps_nbr_top Top neighbour context 1306 * \param[in] ps_nbr_left Left neighbour context 1307 * \param[in] available_top Top availability flag 1308 * \param[in] available_left Left availability flag 1309 * \param[in] cu_pos_y CU 'y' position 1310 * \param[in] luma_intra_pred_mode_current the intra_pred_mode of current block 1311 * \param[inout] ps_intra_pred_mode_current 1312 * Pointer to structure having b1_prev_intra_luma_pred_flag, b2_mpm_idx and 1313 * b5_rem_intra_pred_mode 1314 * 1315 * \returns none 1316 * 1317 * \author 1318 * Ittiam 1319 * 1320 ******************************************************************************* 1321 */ 1322 1323 void ihevce_intra_pred_mode_signaling( 1324 WORD32 top_intra_mode, 1325 WORD32 left_intra_mode, 1326 WORD32 available_top, 1327 WORD32 available_left, 1328 WORD32 cu_pos_y, 1329 WORD32 luma_intra_pred_mode_current, 1330 intra_prev_rem_flags_t *ps_intra_pred_mode_current) 1331 { 1332 /* local variables */ 1333 WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top; 1334 WORD32 cand_mode_list[3]; 1335 1336 ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0; 1337 ps_intra_pred_mode_current->b2_mpm_idx = 0; // for safety purpose 1338 ps_intra_pred_mode_current->b5_rem_intra_pred_mode = 0; 1339 1340 /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */ 1341 /* N = top */ 1342 if(0 == available_top) 1343 { 1344 cand_intra_pred_mode_top = INTRA_DC; 1345 } 1346 /* for neighbour != INTRA, setting DC is done outside */ 1347 else if(0 == cu_pos_y) /* It's on the CTB boundary */ 1348 { 1349 cand_intra_pred_mode_top = INTRA_DC; 1350 } 1351 else 1352 { 1353 cand_intra_pred_mode_top = top_intra_mode; 1354 } 1355 1356 /* N = left */ 1357 if(0 == available_left) 1358 { 1359 cand_intra_pred_mode_left = INTRA_DC; 1360 } 1361 /* for neighbour != INTRA, setting DC is done outside */ 1362 else 1363 { 1364 cand_intra_pred_mode_left = left_intra_mode; 1365 } 1366 1367 /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */ 1368 if(cand_intra_pred_mode_left == cand_intra_pred_mode_top) 1369 { 1370 if(cand_intra_pred_mode_left < 2) 1371 { 1372 cand_mode_list[0] = INTRA_PLANAR; 1373 cand_mode_list[1] = INTRA_DC; 1374 cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */ 1375 } 1376 else 1377 { 1378 cand_mode_list[0] = cand_intra_pred_mode_left; 1379 cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32); 1380 cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32); 1381 } 1382 } 1383 else 1384 { 1385 cand_mode_list[0] = cand_intra_pred_mode_left; 1386 cand_mode_list[1] = cand_intra_pred_mode_top; 1387 1388 if((cand_intra_pred_mode_left != INTRA_PLANAR) && 1389 (cand_intra_pred_mode_top != INTRA_PLANAR)) 1390 { 1391 cand_mode_list[2] = INTRA_PLANAR; 1392 } 1393 else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC)) 1394 { 1395 cand_mode_list[2] = INTRA_DC; 1396 } 1397 else 1398 { 1399 cand_mode_list[2] = INTRA_ANGULAR(26); 1400 } 1401 } 1402 1403 /* Signal Generation */ 1404 1405 /* Flag & mpm_index generation */ 1406 if(cand_mode_list[0] == luma_intra_pred_mode_current) 1407 { 1408 ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1; 1409 ps_intra_pred_mode_current->b2_mpm_idx = 0; 1410 } 1411 else if(cand_mode_list[1] == luma_intra_pred_mode_current) 1412 { 1413 ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1; 1414 ps_intra_pred_mode_current->b2_mpm_idx = 1; 1415 } 1416 else if(cand_mode_list[2] == luma_intra_pred_mode_current) 1417 { 1418 ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1; 1419 ps_intra_pred_mode_current->b2_mpm_idx = 2; 1420 } 1421 /* Flag & b5_rem_intra_pred_mode generation */ 1422 else 1423 { 1424 WORD32 rem_mode; 1425 1426 ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0; 1427 1428 /* sorting cand_mode_list */ 1429 if(cand_mode_list[0] > cand_mode_list[1]) 1430 { 1431 SWAP(cand_mode_list[0], cand_mode_list[1]); 1432 } 1433 if(cand_mode_list[0] > cand_mode_list[2]) 1434 { 1435 SWAP(cand_mode_list[0], cand_mode_list[2]); 1436 } 1437 if(cand_mode_list[1] > cand_mode_list[2]) 1438 { 1439 SWAP(cand_mode_list[1], cand_mode_list[2]); 1440 } 1441 1442 rem_mode = luma_intra_pred_mode_current; 1443 1444 if((rem_mode) >= cand_mode_list[2]) 1445 { 1446 (rem_mode)--; 1447 } 1448 if((rem_mode) >= cand_mode_list[1]) 1449 { 1450 (rem_mode)--; 1451 } 1452 if((rem_mode) >= cand_mode_list[0]) 1453 { 1454 (rem_mode)--; 1455 } 1456 ps_intra_pred_mode_current->b5_rem_intra_pred_mode = rem_mode; 1457 } 1458 } 1459 1460 void ihevce_quant_rounding_factor_gen( 1461 WORD32 i4_trans_size, 1462 WORD32 is_luma, 1463 rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt, 1464 WORD32 *pi4_quant_round_0_1, 1465 WORD32 *pi4_quant_round_1_2, 1466 double i4_lamda_modifier, 1467 UWORD8 i4_is_tu_level_quant_rounding) 1468 { 1469 //WORD32 i4_scan_idx = ps_ctxt->i4_scan_idx; 1470 UWORD8 *pu1_ctxt_model; 1471 WORD32 scan_pos; 1472 WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag */ 1473 WORD32 abs_gt1_base_ctxt; 1474 WORD32 log2_tr_size, i; 1475 UWORD16 u4_bits_estimated_r0, u4_bits_estimated_r1, u4_bits_estimated_r2; 1476 UWORD16 u4_bits_estimated_r1_temp; 1477 WORD32 j = 0; 1478 WORD32 k = 0; 1479 WORD32 temp2; 1480 1481 double i4_lamda_mod = i4_lamda_modifier * pow(2.0, (-8.0 / 3.0)); 1482 LWORD64 lamda_mod = (LWORD64)(i4_lamda_mod * (1 << LAMDA_Q_SHIFT_FACT)); 1483 /* transform size to log2transform size */ 1484 GETRANGE(log2_tr_size, i4_trans_size); 1485 log2_tr_size -= 1; 1486 1487 if(1 == i4_is_tu_level_quant_rounding) 1488 { 1489 entropy_context_t *ps_cur_tu_entropy; 1490 cab_ctxt_t *ps_cabac; 1491 WORD32 curr_buf_idx = ps_rdopt_entropy_ctxt->i4_curr_buf_idx; 1492 ps_cur_tu_entropy = &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[curr_buf_idx]; 1493 1494 ps_cabac = &ps_cur_tu_entropy->s_cabac_ctxt; 1495 1496 pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0]; 1497 } 1498 else 1499 { 1500 pu1_ctxt_model = &ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states[0]; 1501 } 1502 /*If transform size is 4x4, then only one sub-block*/ 1503 if(is_luma) 1504 { 1505 sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG; 1506 abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG; 1507 1508 if(3 == log2_tr_size) 1509 { 1510 /* 8x8 transform size */ 1511 /* Assuming diagnol scan idx for now */ 1512 sig_coeff_base_ctxt += 9; 1513 } 1514 else if(3 < log2_tr_size) 1515 { 1516 /* larger transform sizes */ 1517 sig_coeff_base_ctxt += 21; 1518 } 1519 } 1520 else 1521 { 1522 /* chroma context initializations */ 1523 sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27; 1524 abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16; 1525 1526 if(3 == log2_tr_size) 1527 { 1528 /* 8x8 transform size */ 1529 sig_coeff_base_ctxt += 9; 1530 } 1531 else if(3 < log2_tr_size) 1532 { 1533 /* larger transform sizes */ 1534 sig_coeff_base_ctxt += 12; 1535 } 1536 } 1537 1538 /*Transform size of 4x4 will have only a single CSB */ 1539 /* derive the context inc as per section 9.3.3.1.4 */ 1540 1541 if(2 == log2_tr_size) 1542 { 1543 UWORD8 sig_ctxinc; 1544 WORD32 state_mps; 1545 WORD32 gt1_ctxt = 0; 1546 WORD32 ctxt_set = 0; 1547 WORD32 ctxt_idx = 0; 1548 1549 /* context set based on luma subblock pos */ 1550 1551 /* Encodet the abs level gt1 bins */ 1552 /* Currently calculating trade off between mps(2) and mps(1)*/ 1553 /* The estimation has to be further done for mps(11) and mps(111)*/ 1554 /*ctxt_set = 0 as transform 4x4 has only one csb with DC */ 1555 /* gt1_ctxt = 0 for the co-ef value to be 2 */ 1556 1557 ctxt_set = gt1_ctxt = 0; 1558 ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt; 1559 1560 state_mps = pu1_ctxt_model[ctxt_idx]; 1561 1562 u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1]; 1563 1564 u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; 1565 1566 QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1_temp, lamda_mod); 1567 for(scan_pos = 0; scan_pos < 16; scan_pos++) 1568 { 1569 *(pi4_quant_round_1_2 + scan_pos) = temp2; 1570 } 1571 1572 for(scan_pos = 0; scan_pos < 16; scan_pos++) 1573 { 1574 //UWORD8 nbr_csbf = 1; 1575 /* derive the x,y pos */ 1576 UWORD8 y_pos_x_pos = scan_pos; //gu1_hevce_scan4x4[i4_scan_idx][scan_pos]; 1577 1578 /* 4x4 transform size increment uses lookup */ 1579 sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos]; 1580 1581 /*Get the mps state based on ctxt modes */ 1582 state_mps = pu1_ctxt_model[sig_ctxinc + sig_coeff_base_ctxt]; 1583 1584 /* Bits taken to encode sig co-ef flag as 0 */ 1585 u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; 1586 1587 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ 1588 // 1589 u4_bits_estimated_r1 = 1590 (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000)); 1591 1592 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ 1593 u4_bits_estimated_r1 += u4_bits_estimated_r1_temp; 1594 1595 QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod); 1596 *(pi4_quant_round_0_1 + scan_pos) = temp2; 1597 } 1598 } 1599 else 1600 { 1601 UWORD8 *pu1_hevce_sigcoeff_ctxtinc; 1602 WORD32 is_nbr_csb_state_mps; 1603 1604 WORD32 state_mps; 1605 WORD32 gt1_ctxt = 0; 1606 WORD32 ctxt_set = 0; 1607 WORD32 ctxt_idx; 1608 /*1to2 rounding factor is same for all sub blocks except for sub-block = 0*/ 1609 /*Hence will write all the sub-block with i >=1 coeff, and then overwrite for i = 0*/ 1610 1611 /*ctxt_set = 0 DC subblock, the previous state did not have 2 1612 ctxt_set = 1 DC subblock, the previous state did have >= 2 1613 ctxt_set = 2 AC subblock, the previous state did not have 2 1614 ctxt_set = 3 AC subblock, the previous state did have >= 2*/ 1615 i = 1; 1616 ctxt_set = (i && is_luma) ? 2 : 0; 1617 1618 ctxt_set++; 1619 1620 /*0th position indicates the probability of 2 */ 1621 /*1th position indicates the probability of 1 */ 1622 /*2th position indicates the probability of 11 */ 1623 /*3th position indicates the probability of 111 */ 1624 1625 gt1_ctxt = 0; 1626 ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt; 1627 1628 state_mps = pu1_ctxt_model[ctxt_idx]; 1629 1630 u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1]; 1631 1632 u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; 1633 QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod); 1634 1635 for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4)); scan_pos++) 1636 { 1637 *(pi4_quant_round_1_2 + scan_pos) = temp2; 1638 } 1639 1640 i = 0; 1641 ctxt_set = (i && is_luma) ? 2 : 0; 1642 ctxt_set++; 1643 1644 /*0th position indicates the probability of 2 */ 1645 /*1th position indicates the probability of 1 */ 1646 /*2th position indicates the probability of 11 */ 1647 /*3th position indicates the probability of 111 */ 1648 1649 gt1_ctxt = 0; 1650 ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt; 1651 1652 state_mps = pu1_ctxt_model[ctxt_idx]; 1653 1654 u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1]; 1655 1656 u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; 1657 QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod); 1658 1659 for(scan_pos = 0; scan_pos < 16; scan_pos++) 1660 { 1661 *(pi4_quant_round_1_2 + ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2; 1662 } 1663 1664 { 1665 WORD32 ctxt_idx; 1666 1667 WORD32 nbr_csbf_0, nbr_csbf_1; 1668 WORD32 state_mps_0, state_mps_1; 1669 ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX; 1670 ctxt_idx += is_luma ? 0 : 2; 1671 1672 /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */ 1673 /* if neibhor not available, ctxt idx = 0*/ 1674 nbr_csbf_0 = 0; 1675 ctxt_idx += nbr_csbf_0 ? 1 : 0; 1676 state_mps_0 = pu1_ctxt_model[ctxt_idx]; 1677 1678 nbr_csbf_1 = 1; 1679 ctxt_idx += nbr_csbf_1 ? 1 : 0; 1680 state_mps_1 = pu1_ctxt_model[ctxt_idx]; 1681 1682 is_nbr_csb_state_mps = ((state_mps_0 % 2) == 1) && ((state_mps_1 % 2) == 1); 1683 } 1684 1685 if(1 == is_nbr_csb_state_mps) 1686 { 1687 for(i = 0; i < (i4_trans_size * i4_trans_size >> 4); i++) 1688 { 1689 UWORD8 sig_ctxinc; 1690 WORD32 state_mps; 1691 WORD32 gt1_ctxt = 0; 1692 WORD32 ctxt_set = 0; 1693 1694 WORD32 ctxt_idx; 1695 1696 /*Check if the cabac states had previous nbr available */ 1697 1698 if(i == 0) 1699 pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[3][0]; 1700 else if(i < (i4_trans_size >> 2)) 1701 pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[1][0]; 1702 else if((i % (i4_trans_size >> 2)) == 0) 1703 pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[2][0]; 1704 else 1705 pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0]; 1706 1707 if(((i % (i4_trans_size >> 2)) == 0) && (i != 0)) 1708 k++; 1709 1710 j = ((i4_trans_size * 4) * k) + ((i % (i4_trans_size >> 2)) * 4); 1711 /*ctxt_set = 0 DC subblock, the previous state did not have 2 1712 ctxt_set = 1 DC subblock, the previous state did have >= 2 1713 ctxt_set = 2 AC subblock, the previous state did not have 2 1714 ctxt_set = 3 AC subblock, the previous state did have >= 2*/ 1715 1716 ctxt_set = (i && is_luma) ? 2 : 0; 1717 1718 /* gt1_ctxt = 1 for the co-ef value to be 1 */ 1719 gt1_ctxt = 0; 1720 ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt; 1721 1722 state_mps = pu1_ctxt_model[ctxt_idx]; 1723 1724 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ 1725 u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; 1726 1727 for(scan_pos = 0; scan_pos < 16; scan_pos++) 1728 { 1729 UWORD8 y_pos_x_pos; 1730 1731 if(scan_pos || i) 1732 { 1733 y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos]; 1734 /* ctxt for AC coeff depends on curpos and neigbour csbf */ 1735 sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos]; 1736 1737 /* based on luma subblock pos */ 1738 sig_ctxinc += (i && is_luma) ? 3 : 0; 1739 1740 sig_ctxinc += sig_coeff_base_ctxt; 1741 } 1742 else 1743 { 1744 /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */ 1745 /* DC coeff has fixed context for luma and chroma */ 1746 sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27; 1747 } 1748 1749 /*Get the mps state based on ctxt modes */ 1750 state_mps = pu1_ctxt_model[sig_ctxinc]; 1751 1752 /* Bits taken to encode sig co-ef flag as 0 */ 1753 u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; 1754 1755 u4_bits_estimated_r1 = 1756 (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000)); 1757 1758 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ 1759 u4_bits_estimated_r1 += u4_bits_estimated_r1_temp; 1760 { 1761 QUANT_ROUND_FACTOR( 1762 temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod); 1763 *(pi4_quant_round_0_1 + 1764 ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size)) + j) = temp2; 1765 } 1766 } 1767 } 1768 } 1769 else 1770 { 1771 /*If Both nbr csbfs are 0, then all the coef in sub-blocks will have same value except for 1st subblock, 1772 Hence will write the same value to all sub block, and overwrite for the 1st one */ 1773 i = 1; 1774 { 1775 UWORD8 sig_ctxinc; 1776 UWORD8 y_pos_x_pos; 1777 WORD32 quant_rounding_0_1; 1778 1779 pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc_00[0]; 1780 1781 scan_pos = 0; 1782 y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos]; 1783 /* ctxt for AC coeff depends on curpos and neigbour csbf */ 1784 sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos]; 1785 1786 /* based on luma subblock pos */ 1787 sig_ctxinc += (is_luma) ? 3 : 0; 1788 1789 sig_ctxinc += sig_coeff_base_ctxt; 1790 1791 /*Get the mps state based on ctxt modes */ 1792 state_mps = pu1_ctxt_model[sig_ctxinc]; 1793 1794 /* Bits taken to encode sig co-ef flag as 0 */ 1795 u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; 1796 1797 u4_bits_estimated_r1 = 1798 (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000)); 1799 1800 /*ctxt_set = 0 DC subblock, the previous state did not have 2 1801 ctxt_set = 1 DC subblock, the previous state did have >= 2 1802 ctxt_set = 2 AC subblock, the previous state did not have 2 1803 ctxt_set = 3 AC subblock, the previous state did have >= 2*/ 1804 1805 ctxt_set = (i && is_luma) ? 2 : 0; 1806 1807 /* gt1_ctxt = 1 for the co-ef value to be 1 */ 1808 gt1_ctxt = 0; 1809 ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt; 1810 1811 state_mps = pu1_ctxt_model[ctxt_idx]; 1812 1813 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ 1814 u4_bits_estimated_r1 += gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; 1815 1816 QUANT_ROUND_FACTOR( 1817 quant_rounding_0_1, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod); 1818 1819 for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4)); 1820 scan_pos++) 1821 { 1822 *(pi4_quant_round_0_1 + scan_pos) = quant_rounding_0_1; 1823 } 1824 } 1825 1826 /*First Subblock*/ 1827 i = 0; 1828 1829 { 1830 UWORD8 sig_ctxinc; 1831 WORD32 state_mps; 1832 WORD32 gt1_ctxt = 0; 1833 WORD32 ctxt_set = 0; 1834 1835 WORD32 ctxt_idx; 1836 1837 /*Check if the cabac states had previous nbr available */ 1838 1839 { 1840 pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0]; 1841 1842 /*ctxt_set = 0 DC subblock, the previous state did not have 2 1843 ctxt_set = 1 DC subblock, the previous state did have >= 2 1844 ctxt_set = 2 AC subblock, the previous state did not have 2 1845 ctxt_set = 3 AC subblock, the previous state did have >= 2*/ 1846 ctxt_set = (i && is_luma) ? 2 : 0; 1847 1848 /* gt1_ctxt = 1 for the co-ef value to be 1 */ 1849 gt1_ctxt = 0; 1850 ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt; 1851 1852 state_mps = pu1_ctxt_model[ctxt_idx]; 1853 1854 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ 1855 u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; 1856 1857 for(scan_pos = 0; scan_pos < 16; scan_pos++) 1858 { 1859 UWORD8 y_pos_x_pos; 1860 1861 if(scan_pos) 1862 { 1863 y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos]; 1864 /* ctxt for AC coeff depends on curpos and neigbour csbf */ 1865 sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos]; 1866 1867 /* based on luma subblock pos */ 1868 sig_ctxinc += (i && is_luma) ? 3 : 0; 1869 1870 sig_ctxinc += sig_coeff_base_ctxt; 1871 } 1872 else 1873 { 1874 /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */ 1875 /* DC coeff has fixed context for luma and chroma */ 1876 sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27; 1877 } 1878 1879 /*Get the mps state based on ctxt modes */ 1880 state_mps = pu1_ctxt_model[sig_ctxinc]; 1881 1882 /* Bits taken to encode sig co-ef flag as 0 */ 1883 u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; 1884 1885 u4_bits_estimated_r1 = 1886 (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000)); 1887 1888 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ 1889 u4_bits_estimated_r1 += u4_bits_estimated_r1_temp; 1890 { 1891 QUANT_ROUND_FACTOR( 1892 temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod); 1893 *(pi4_quant_round_0_1 + 1894 ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2; 1895 } 1896 } 1897 } 1898 } 1899 } 1900 } 1901 return; 1902 } 1903 1904 /*! 1905 ****************************************************************************** 1906 * \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif 1907 * 1908 * \brief 1909 * Transform unit level (Luma) enc_loop function 1910 * 1911 * \param[in] ps_ctxt enc_loop module ctxt pointer 1912 * \param[in] pu1_pred pointer to predicted data buffer 1913 * \param[in] pred_strd predicted buffer stride 1914 * \param[in] pu1_src pointer to source data buffer 1915 * \param[in] src_strd source buffer stride 1916 * \param[in] pi2_deq_data pointer to store iq data 1917 * \param[in] deq_data_strd iq data buffer stride 1918 * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod) 1919 * \param[out] pu1_csbf_buf pointer to store the csbf for all 4x4 in a current 1920 * block 1921 * \param[out] csbf_strd csbf buffer stride 1922 * \param[in] trans_size transform size (4, 8, 16,32) 1923 * \param[in] packed_pred_mode 0:Inter 1:Intra 2:Skip 1924 * \param[out] pi4_cost pointer to store the cost 1925 * \param[out] pi4_coeff_off pointer to store the number of bytes produced in 1926 * coeff buffer 1927 * \param[out] pu4_tu_bits pointer to store the best TU bits required encode 1928 the current TU in RDopt Mode 1929 * \param[out] pu4_blk_sad pointer to store the block sad for RC 1930 * \param[out] pi4_zero_col pointer to store the zero_col info for the TU 1931 * \param[out] pi4_zero_row pointer to store the zero_row info for the TU 1932 * \param[in] i4_perform_rdoq Indicates if RDOQ should be performed or not 1933 * \param[in] i4_perform_sbh Indicates if SBH should be performed or not 1934 * 1935 * \return 1936 * CBF of the current block 1937 * 1938 * \author 1939 * Ittiam 1940 * 1941 ***************************************************************************** 1942 */ 1943 1944 WORD32 ihevce_t_q_iq_ssd_scan_fxn( 1945 ihevce_enc_loop_ctxt_t *ps_ctxt, 1946 UWORD8 *pu1_pred, 1947 WORD32 pred_strd, 1948 UWORD8 *pu1_src, 1949 WORD32 src_strd, 1950 WORD16 *pi2_deq_data, 1951 WORD32 deq_data_strd, 1952 UWORD8 *pu1_recon, 1953 WORD32 i4_recon_stride, 1954 UWORD8 *pu1_ecd_data, 1955 UWORD8 *pu1_csbf_buf, 1956 WORD32 csbf_strd, 1957 WORD32 trans_size, 1958 WORD32 packed_pred_mode, 1959 LWORD64 *pi8_cost, 1960 WORD32 *pi4_coeff_off, 1961 WORD32 *pi4_tu_bits, 1962 UWORD32 *pu4_blk_sad, 1963 WORD32 *pi4_zero_col, 1964 WORD32 *pi4_zero_row, 1965 UWORD8 *pu1_is_recon_available, 1966 WORD32 i4_perform_rdoq, 1967 WORD32 i4_perform_sbh, 1968 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 1969 WORD32 i4_alpha_stim_multiplier, 1970 UWORD8 u1_is_cu_noisy, 1971 #endif 1972 SSD_TYPE_T e_ssd_type, 1973 WORD32 early_cbf) 1974 { 1975 WORD32 cbf = 0; 1976 WORD32 trans_idx; 1977 WORD32 quant_scale_mat_offset; 1978 WORD32 *pi4_trans_scratch; 1979 WORD16 *pi2_trans_values; 1980 WORD16 *pi2_quant_coeffs; 1981 WORD32 *pi4_subBlock2csbfId_map = NULL; 1982 1983 #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3 1984 WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i; 1985 #endif 1986 1987 rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt; 1988 1989 WORD32 i4_perform_zcbf = (ENABLE_INTER_ZCU_COST && (PRED_MODE_INTRA != packed_pred_mode)) || 1990 (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE); 1991 WORD32 i4_perform_coeff_level_rdoq = (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING); 1992 WORD8 intra_flag = 0; 1993 ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW); 1994 1995 *pi4_tu_bits = 0; 1996 *pi4_coeff_off = 0; 1997 pu1_is_recon_available[0] = 0; 1998 1999 if((PRED_MODE_SKIP == packed_pred_mode) || (0 == early_cbf)) 2000 { 2001 if(e_ssd_type != NULL_TYPE) 2002 { 2003 /* SSD cost is stored to the pointer */ 2004 pi8_cost[0] = 2005 2006 ps_ctxt->s_cmn_opt_func.pf_ssd_and_sad_calculator( 2007 pu1_pred, pred_strd, pu1_src, src_strd, trans_size, pu4_blk_sad); 2008 2009 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 2010 if(u1_is_cu_noisy && i4_alpha_stim_multiplier) 2011 { 2012 pi8_cost[0] = ihevce_inject_stim_into_distortion( 2013 pu1_src, 2014 src_strd, 2015 pu1_pred, 2016 pred_strd, 2017 pi8_cost[0], 2018 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS 2019 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * 2020 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / 2021 100.0, 2022 trans_size, 2023 0, 2024 ps_ctxt->u1_enable_psyRDOPT, 2025 NULL_PLANE); 2026 } 2027 #endif 2028 2029 /* copy pred to recon for skip mode */ 2030 if(SPATIAL_DOMAIN_SSD == e_ssd_type) 2031 { 2032 ps_ctxt->s_cmn_opt_func.pf_copy_2d( 2033 pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size); 2034 pu1_is_recon_available[0] = 1; 2035 } 2036 else 2037 { 2038 pu1_is_recon_available[0] = 0; 2039 } 2040 2041 #if ENABLE_INTER_ZCU_COST 2042 ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0]; 2043 #endif 2044 } 2045 else 2046 { 2047 pi8_cost[0] = UINT_MAX; 2048 } 2049 2050 /* cbf is returned as 0 */ 2051 return (0); 2052 } 2053 2054 /* derive context variables */ 2055 pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0]; 2056 pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0]; 2057 pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2); 2058 2059 /* translate the transform size to index for 4x4 and 8x8 */ 2060 trans_idx = trans_size >> 2; 2061 2062 if(PRED_MODE_INTRA == packed_pred_mode) 2063 { 2064 quant_scale_mat_offset = 0; 2065 intra_flag = 1; 2066 #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3 2067 ai4_quant_rounding_factors[0][0] = 2068 MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3); 2069 2070 for(i = 0; i < trans_size * trans_size; i++) 2071 { 2072 ai4_quant_rounding_factors[1][i] = 2073 MAX(ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3][i], 2074 (1 << QUANT_ROUND_FACTOR_Q) / 3); 2075 ai4_quant_rounding_factors[2][i] = 2076 MAX(ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3][i], 2077 (1 << QUANT_ROUND_FACTOR_Q) / 3); 2078 } 2079 #endif 2080 } 2081 else 2082 { 2083 quant_scale_mat_offset = NUM_TRANS_TYPES; 2084 } 2085 /* for intra 4x4 DST transform should be used */ 2086 if((1 == trans_idx) && (1 == intra_flag)) 2087 { 2088 trans_idx = 0; 2089 } 2090 /* for 16x16 cases */ 2091 else if(16 == trans_size) 2092 { 2093 trans_idx = 3; 2094 } 2095 /* for 32x32 cases */ 2096 else if(32 == trans_size) 2097 { 2098 trans_idx = 4; 2099 } 2100 2101 switch(trans_size) 2102 { 2103 case 4: 2104 { 2105 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU; 2106 2107 break; 2108 } 2109 case 8: 2110 { 2111 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU; 2112 2113 break; 2114 } 2115 case 16: 2116 { 2117 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU; 2118 2119 break; 2120 } 2121 case 32: 2122 { 2123 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU; 2124 2125 break; 2126 } 2127 } 2128 2129 /* Do not call the FT and Quant functions if early_cbf is 0 */ 2130 if(1 == early_cbf) 2131 { 2132 /* ---------- call residue and transform block ------- */ 2133 *pu4_blk_sad = ps_ctxt->apf_resd_trns[trans_idx]( 2134 pu1_src, 2135 pu1_pred, 2136 pi4_trans_scratch, 2137 pi2_trans_values, 2138 src_strd, 2139 pred_strd, 2140 ((trans_size << 16) + 0)); /* dst strd and chroma flag are packed together */ 2141 2142 cbf = ps_ctxt->apf_quant_iquant_ssd 2143 [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2]( 2144 pi2_trans_values, 2145 ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset], 2146 pi2_quant_coeffs, 2147 pi2_deq_data, 2148 trans_size, 2149 ps_ctxt->i4_cu_qp_div6, 2150 ps_ctxt->i4_cu_qp_mod6, 2151 #if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3 2152 ps_ctxt->i4_quant_rnd_factor[intra_flag], 2153 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3], 2154 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3], 2155 #else 2156 intra_flag ? ai4_quant_rounding_factors[0][0] 2157 : ps_ctxt->i4_quant_rnd_factor[intra_flag], 2158 intra_flag ? ai4_quant_rounding_factors[1] 2159 : ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3], 2160 intra_flag ? ai4_quant_rounding_factors[2] 2161 : ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3], 2162 #endif 2163 trans_size, 2164 trans_size, 2165 deq_data_strd, 2166 pu1_csbf_buf, 2167 csbf_strd, 2168 pi4_zero_col, 2169 pi4_zero_row, 2170 ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset], 2171 pi8_cost); 2172 2173 if(e_ssd_type != FREQUENCY_DOMAIN_SSD) 2174 { 2175 pi8_cost[0] = UINT_MAX; 2176 } 2177 } 2178 2179 if(0 != cbf) 2180 { 2181 if(i4_perform_sbh || i4_perform_rdoq) 2182 { 2183 ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd; 2184 ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size; 2185 ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map; 2186 2187 ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_cu_qp_div6; 2188 ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_cu_qp_mod6; 2189 ps_rdoq_sbh_ctxt->i4_scan_idx = ps_ctxt->i4_scan_idx; 2190 ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost; 2191 ps_rdoq_sbh_ctxt->i4_trans_size = trans_size; 2192 2193 ps_rdoq_sbh_ctxt->pi2_dequant_coeff = 2194 ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset]; 2195 ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data; 2196 ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs; 2197 ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values; 2198 ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf; 2199 2200 /* ------- call coeffs scan function ------- */ 2201 if((!i4_perform_rdoq)) 2202 { 2203 ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt); 2204 2205 pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost; 2206 } 2207 } 2208 2209 *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs( 2210 pi2_quant_coeffs, 2211 pi4_subBlock2csbfId_map, 2212 ps_ctxt->i4_scan_idx, 2213 trans_size, 2214 pu1_ecd_data, 2215 pu1_csbf_buf, 2216 csbf_strd); 2217 } 2218 *pi8_cost >>= ga_trans_shift[trans_idx]; 2219 2220 #if RDOPT_ZERO_CBF_ENABLE 2221 /* compare null cbf cost with encode tu rd-cost */ 2222 if(cbf != 0) 2223 { 2224 WORD32 tu_bits; 2225 LWORD64 tu_rd_cost; 2226 2227 LWORD64 zero_cbf_cost = 0; 2228 2229 /*Populating the feilds of rdoq_ctxt structure*/ 2230 if(i4_perform_rdoq) 2231 { 2232 /* transform size to log2transform size */ 2233 GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size); 2234 ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1; 2235 ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_qf; 2236 ps_rdoq_sbh_ctxt->i4_is_luma = 1; 2237 ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx]; 2238 ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td = 2239 (1 << ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td) / 2; 2240 ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0; 2241 ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col; 2242 ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row; 2243 } 2244 else if(i4_perform_zcbf) 2245 { 2246 zero_cbf_cost = 2247 2248 ps_ctxt->s_cmn_opt_func.pf_ssd_calculator( 2249 pu1_src, pu1_pred, src_strd, pred_strd, trans_size, trans_size); 2250 } 2251 2252 /************************************************************************/ 2253 /* call the entropy rdo encode to get the bit estimate for current tu */ 2254 /* note that tu includes only residual coding bits and does not include */ 2255 /* tu split, cbf and qp delta encoding bits for a TU */ 2256 /************************************************************************/ 2257 if(i4_perform_rdoq) 2258 { 2259 tu_bits = ihevce_entropy_rdo_encode_tu_rdoq( 2260 &ps_ctxt->s_rdopt_entropy_ctxt, 2261 (pu1_ecd_data), 2262 trans_size, 2263 1, 2264 ps_rdoq_sbh_ctxt, 2265 pi8_cost, 2266 &zero_cbf_cost, 2267 0); 2268 2269 if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0) 2270 { 2271 cbf = 0; 2272 *pi4_coeff_off = 0; 2273 } 2274 2275 if((i4_perform_sbh) && (0 != cbf)) 2276 { 2277 ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost; 2278 ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt); 2279 *pi8_cost = ps_rdoq_sbh_ctxt->i8_ssd_cost; 2280 } 2281 2282 /*Add round value before normalizing*/ 2283 *pi8_cost += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td; 2284 *pi8_cost >>= ga_trans_shift[trans_idx]; 2285 2286 if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1) 2287 { 2288 pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0]; 2289 *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs( 2290 pi2_quant_coeffs, 2291 pi4_subBlock2csbfId_map, 2292 ps_ctxt->i4_scan_idx, 2293 trans_size, 2294 pu1_ecd_data, 2295 pu1_csbf_buf, 2296 csbf_strd); 2297 } 2298 } 2299 else 2300 { 2301 tu_bits = ihevce_entropy_rdo_encode_tu( 2302 &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 1, i4_perform_sbh); 2303 } 2304 2305 *pi4_tu_bits = tu_bits; 2306 2307 if(e_ssd_type == SPATIAL_DOMAIN_SSD) 2308 { 2309 *pi8_cost = ihevce_it_recon_ssd( 2310 ps_ctxt, 2311 pu1_src, 2312 src_strd, 2313 pu1_pred, 2314 pred_strd, 2315 pi2_deq_data, 2316 deq_data_strd, 2317 pu1_recon, 2318 i4_recon_stride, 2319 pu1_ecd_data, 2320 trans_size, 2321 packed_pred_mode, 2322 cbf, 2323 *pi4_zero_col, 2324 *pi4_zero_row, 2325 NULL_PLANE); 2326 2327 pu1_is_recon_available[0] = 1; 2328 } 2329 2330 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 2331 if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier) 2332 { 2333 pi8_cost[0] = ihevce_inject_stim_into_distortion( 2334 pu1_src, 2335 src_strd, 2336 pu1_recon, 2337 i4_recon_stride, 2338 pi8_cost[0], 2339 i4_alpha_stim_multiplier, 2340 trans_size, 2341 0, 2342 ps_ctxt->u1_enable_psyRDOPT, 2343 NULL_PLANE); 2344 } 2345 else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier) 2346 { 2347 pi8_cost[0] = ihevce_inject_stim_into_distortion( 2348 pu1_src, 2349 src_strd, 2350 pu1_pred, 2351 pred_strd, 2352 pi8_cost[0], 2353 i4_alpha_stim_multiplier, 2354 trans_size, 2355 0, 2356 ps_ctxt->u1_enable_psyRDOPT, 2357 NULL_PLANE); 2358 } 2359 #endif 2360 2361 /* add the SSD cost to bits estimate given by ECD */ 2362 tu_rd_cost = *pi8_cost + COMPUTE_RATE_COST_CLIP30( 2363 tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); 2364 2365 if(i4_perform_zcbf) 2366 { 2367 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 2368 if(u1_is_cu_noisy && i4_alpha_stim_multiplier) 2369 { 2370 zero_cbf_cost = ihevce_inject_stim_into_distortion( 2371 pu1_src, 2372 src_strd, 2373 pu1_pred, 2374 pred_strd, 2375 zero_cbf_cost, 2376 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS 2377 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * 2378 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / 2379 100.0, 2380 trans_size, 2381 0, 2382 ps_ctxt->u1_enable_psyRDOPT, 2383 NULL_PLANE); 2384 } 2385 #endif 2386 2387 /* force the tu as zero cbf if zero_cbf_cost is lower */ 2388 if(zero_cbf_cost < tu_rd_cost) 2389 { 2390 /* num bytes is set to 0 */ 2391 *pi4_coeff_off = 0; 2392 2393 /* cbf is returned as 0 */ 2394 cbf = 0; 2395 2396 /* cost is returned as 0 cbf cost */ 2397 *pi8_cost = zero_cbf_cost; 2398 2399 /* TU bits is set to 0 */ 2400 *pi4_tu_bits = 0; 2401 pu1_is_recon_available[0] = 0; 2402 2403 if(SPATIAL_DOMAIN_SSD == e_ssd_type) 2404 { 2405 /* copy pred to recon for zcbf mode */ 2406 2407 ps_ctxt->s_cmn_opt_func.pf_copy_2d( 2408 pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size); 2409 2410 pu1_is_recon_available[0] = 1; 2411 } 2412 } 2413 /* accumulate cu not coded cost with zcbf cost */ 2414 #if ENABLE_INTER_ZCU_COST 2415 ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost; 2416 #endif 2417 } 2418 } 2419 else 2420 { 2421 /* cbf = 0, accumulate cu not coded cost */ 2422 if(e_ssd_type == SPATIAL_DOMAIN_SSD) 2423 { 2424 *pi8_cost = ihevce_it_recon_ssd( 2425 ps_ctxt, 2426 pu1_src, 2427 src_strd, 2428 pu1_pred, 2429 pred_strd, 2430 pi2_deq_data, 2431 deq_data_strd, 2432 pu1_recon, 2433 i4_recon_stride, 2434 pu1_ecd_data, 2435 trans_size, 2436 packed_pred_mode, 2437 cbf, 2438 *pi4_zero_col, 2439 *pi4_zero_row, 2440 NULL_PLANE); 2441 2442 pu1_is_recon_available[0] = 1; 2443 } 2444 2445 #if ENABLE_INTER_ZCU_COST 2446 { 2447 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 2448 if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier) 2449 { 2450 pi8_cost[0] = ihevce_inject_stim_into_distortion( 2451 pu1_src, 2452 src_strd, 2453 pu1_recon, 2454 i4_recon_stride, 2455 pi8_cost[0], 2456 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS 2457 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * 2458 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / 2459 100.0, 2460 trans_size, 2461 0, 2462 ps_ctxt->u1_enable_psyRDOPT, 2463 NULL_PLANE); 2464 } 2465 else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier) 2466 { 2467 pi8_cost[0] = ihevce_inject_stim_into_distortion( 2468 pu1_src, 2469 src_strd, 2470 pu1_pred, 2471 pred_strd, 2472 pi8_cost[0], 2473 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS 2474 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * 2475 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / 2476 100.0, 2477 trans_size, 2478 0, 2479 ps_ctxt->u1_enable_psyRDOPT, 2480 NULL_PLANE); 2481 } 2482 #endif 2483 2484 ps_ctxt->i8_cu_not_coded_cost += *pi8_cost; 2485 } 2486 #endif /* ENABLE_INTER_ZCU_COST */ 2487 } 2488 #endif 2489 2490 return (cbf); 2491 } 2492 2493 /*! 2494 ****************************************************************************** 2495 * \if Function name : ihevce_it_recon_fxn \endif 2496 * 2497 * \brief 2498 * Transform unit level (Luma) IT Recon function 2499 * 2500 * \param[in] ps_ctxt enc_loop module ctxt pointer 2501 * \param[in] pi2_deq_data pointer to iq data 2502 * \param[in] deq_data_strd iq data buffer stride 2503 * \param[in] pu1_pred pointer to predicted data buffer 2504 * \param[in] pred_strd predicted buffer stride 2505 * \param[in] pu1_recon pointer to recon buffer 2506 * \param[in] recon_strd recon buffer stride 2507 * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod) 2508 * \param[in] trans_size transform size (4, 8, 16,32) 2509 * \param[in] packed_pred_mode 0:Inter 1:Intra 2:Skip 2510 * \param[in] cbf CBF of the current block 2511 * \param[in] zero_cols zero_cols of the current block 2512 * \param[in] zero_rows zero_rows of the current block 2513 * 2514 * \return 2515 * 2516 * \author 2517 * Ittiam 2518 * 2519 ***************************************************************************** 2520 */ 2521 2522 void ihevce_it_recon_fxn( 2523 ihevce_enc_loop_ctxt_t *ps_ctxt, 2524 WORD16 *pi2_deq_data, 2525 WORD32 deq_dat_strd, 2526 UWORD8 *pu1_pred, 2527 WORD32 pred_strd, 2528 UWORD8 *pu1_recon, 2529 WORD32 recon_strd, 2530 UWORD8 *pu1_ecd_data, 2531 WORD32 trans_size, 2532 WORD32 packed_pred_mode, 2533 WORD32 cbf, 2534 WORD32 zero_cols, 2535 WORD32 zero_rows) 2536 { 2537 WORD32 dc_add_flag = 0; 2538 WORD32 trans_idx; 2539 2540 /* translate the transform size to index for 4x4 and 8x8 */ 2541 trans_idx = trans_size >> 2; 2542 2543 /* if SKIP mode needs to be evaluated the pred is copied to recon */ 2544 if(PRED_MODE_SKIP == packed_pred_mode) 2545 { 2546 UWORD8 *pu1_curr_recon, *pu1_curr_pred; 2547 2548 pu1_curr_pred = pu1_pred; 2549 pu1_curr_recon = pu1_recon; 2550 2551 /* 2D copy of data */ 2552 2553 ps_ctxt->s_cmn_opt_func.pf_2d_square_copy( 2554 pu1_curr_recon, recon_strd, pu1_curr_pred, pred_strd, trans_size, sizeof(UWORD8)); 2555 2556 return; 2557 } 2558 2559 /* for intra 4x4 DST transform should be used */ 2560 if((1 == trans_idx) && (PRED_MODE_INTRA == packed_pred_mode)) 2561 { 2562 trans_idx = 0; 2563 } 2564 /* for 16x16 cases */ 2565 else if(16 == trans_size) 2566 { 2567 trans_idx = 3; 2568 } 2569 /* for 32x32 cases */ 2570 else if(32 == trans_size) 2571 { 2572 trans_idx = 4; 2573 } 2574 2575 /*if (lastx == 0 && lasty == 0) , ie only 1 coefficient */ 2576 if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1])) 2577 { 2578 dc_add_flag = 1; 2579 } 2580 2581 if(0 == cbf) 2582 { 2583 /* buffer copy */ 2584 ps_ctxt->s_cmn_opt_func.pf_2d_square_copy( 2585 pu1_recon, recon_strd, pu1_pred, pred_strd, trans_size, 1); 2586 } 2587 else if((1 == dc_add_flag) && (0 != trans_idx)) 2588 { 2589 /* dc add */ 2590 ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc( 2591 pu1_pred, 2592 pred_strd, 2593 pu1_recon, 2594 recon_strd, 2595 trans_size, 2596 pi2_deq_data[0], 2597 NULL_PLANE /* luma */ 2598 ); 2599 } 2600 else 2601 { 2602 ps_ctxt->apf_it_recon[trans_idx]( 2603 pi2_deq_data, 2604 &ps_ctxt->ai2_scratch[0], 2605 pu1_pred, 2606 pu1_recon, 2607 deq_dat_strd, 2608 pred_strd, 2609 recon_strd, 2610 zero_cols, 2611 zero_rows); 2612 } 2613 } 2614 2615 /*! 2616 ****************************************************************************** 2617 * \if Function name : ihevce_chroma_it_recon_fxn \endif 2618 * 2619 * \brief 2620 * Transform unit level (Chroma) IT Recon function 2621 * 2622 * \param[in] ps_ctxt enc_loop module ctxt pointer 2623 * \param[in] pi2_deq_data pointer to iq data 2624 * \param[in] deq_data_strd iq data buffer stride 2625 * \param[in] pu1_pred pointer to predicted data buffer 2626 * \param[in] pred_strd predicted buffer stride 2627 * \param[in] pu1_recon pointer to recon buffer 2628 * \param[in] recon_strd recon buffer stride 2629 * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod) 2630 * \param[in] trans_size transform size (4, 8, 16) 2631 * \param[in] cbf CBF of the current block 2632 * \param[in] zero_cols zero_cols of the current block 2633 * \param[in] zero_rows zero_rows of the current block 2634 * 2635 * \return 2636 * 2637 * \author 2638 * Ittiam 2639 * 2640 ***************************************************************************** 2641 */ 2642 2643 void ihevce_chroma_it_recon_fxn( 2644 ihevce_enc_loop_ctxt_t *ps_ctxt, 2645 WORD16 *pi2_deq_data, 2646 WORD32 deq_dat_strd, 2647 UWORD8 *pu1_pred, 2648 WORD32 pred_strd, 2649 UWORD8 *pu1_recon, 2650 WORD32 recon_strd, 2651 UWORD8 *pu1_ecd_data, 2652 WORD32 trans_size, 2653 WORD32 cbf, 2654 WORD32 zero_cols, 2655 WORD32 zero_rows, 2656 CHROMA_PLANE_ID_T e_chroma_plane) 2657 { 2658 WORD32 trans_idx; 2659 2660 ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE)); 2661 2662 /* since 2x2 transform is not allowed for chroma*/ 2663 if(2 == trans_size) 2664 { 2665 trans_size = 4; 2666 } 2667 2668 /* translate the transform size to index */ 2669 trans_idx = trans_size >> 2; 2670 2671 /* for 16x16 cases */ 2672 if(16 == trans_size) 2673 { 2674 trans_idx = 3; 2675 } 2676 2677 if(0 == cbf) 2678 { 2679 /* buffer copy */ 2680 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( 2681 pu1_pred, pred_strd, pu1_recon, recon_strd, trans_size, trans_size, e_chroma_plane); 2682 } 2683 else if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1])) 2684 { 2685 /* dc add */ 2686 ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc( 2687 pu1_pred, 2688 pred_strd, 2689 pu1_recon, 2690 recon_strd, 2691 trans_size, 2692 pi2_deq_data[0], 2693 e_chroma_plane /* chroma plane */ 2694 ); 2695 } 2696 else 2697 { 2698 ps_ctxt->apf_chrm_it_recon[trans_idx - 1]( 2699 pi2_deq_data, 2700 &ps_ctxt->ai2_scratch[0], 2701 pu1_pred + (WORD32)e_chroma_plane, 2702 pu1_recon + (WORD32)e_chroma_plane, 2703 deq_dat_strd, 2704 pred_strd, 2705 recon_strd, 2706 zero_cols, 2707 zero_rows); 2708 } 2709 } 2710 2711 /** 2712 ******************************************************************************* 2713 * \if Function name : ihevce_mpm_idx_based_filter_RDOPT_cand \endif 2714 * 2715 * \brief * Filters the RDOPT candidates based on mpm_idx 2716 * 2717 * \par Description 2718 * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode 2719 * for a CU 2720 * 2721 * \param[in] ps_ctxt : ptr to enc loop context 2722 * \param[in] ps_cu_analyse : ptr to CU analyse structure 2723 * \param[in] ps_top_nbr_4x4 top 4x4 neighbour pointer 2724 * \param[in] ps_left_nbr_4x4 left 4x4 neighbour pointer 2725 * \param[in] pu1_luma_mode luma mode 2726 * 2727 * \returns none 2728 * 2729 * \author 2730 * Ittiam 2731 * 2732 ******************************************************************************* 2733 */ 2734 2735 void ihevce_mpm_idx_based_filter_RDOPT_cand( 2736 ihevce_enc_loop_ctxt_t *ps_ctxt, 2737 cu_analyse_t *ps_cu_analyse, 2738 nbr_4x4_t *ps_left_nbr_4x4, 2739 nbr_4x4_t *ps_top_nbr_4x4, 2740 UWORD8 *pu1_luma_mode, 2741 UWORD8 *pu1_eval_mark) 2742 { 2743 WORD32 cu_pos_x; 2744 WORD32 cu_pos_y; 2745 nbr_avail_flags_t s_nbr; 2746 WORD32 trans_size; 2747 WORD32 au4_cand_mode_list[3]; 2748 WORD32 nbr_flags; 2749 UWORD8 *pu1_intra_luma_modes; 2750 WORD32 rdopt_cand_ctr = 0; 2751 UWORD8 *pu1_luma_eval_mark; 2752 2753 cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 1; 2754 cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 1; 2755 trans_size = ps_cu_analyse->u1_cu_size; 2756 2757 /* get the neighbour availability flags */ 2758 nbr_flags = ihevce_get_nbr_intra( 2759 &s_nbr, 2760 ps_ctxt->pu1_ctb_nbr_map, 2761 ps_ctxt->i4_nbr_map_strd, 2762 cu_pos_x, 2763 cu_pos_y, 2764 trans_size >> 2); 2765 (void)nbr_flags; 2766 /*Call the fun to populate luma intra pred mode fro TU=CU and use the same list fro 2767 *TU=CU/2 also since the modes are same in both the cases. 2768 */ 2769 ihevce_populate_intra_pred_mode( 2770 ps_top_nbr_4x4->b6_luma_intra_mode, 2771 ps_left_nbr_4x4->b6_luma_intra_mode, 2772 s_nbr.u1_top_avail, 2773 s_nbr.u1_left_avail, 2774 cu_pos_y, 2775 &au4_cand_mode_list[0]); 2776 2777 /*Loop through all the RDOPT candidates of TU=CU and TU=CU/2 and check if the current RDOPT 2778 *cand is present in a4_cand_mode_list, If yes set eval flag to 1 else set it to zero 2779 */ 2780 2781 pu1_intra_luma_modes = pu1_luma_mode; 2782 pu1_luma_eval_mark = pu1_eval_mark; 2783 2784 while(pu1_intra_luma_modes[rdopt_cand_ctr] != 255) 2785 { 2786 WORD32 i; 2787 WORD32 found_flag = 0; 2788 2789 /*1st candidate of TU=CU list and TU=CU/2 list must go through RDOPT stage 2790 *irrespective of whether the cand is present in the mpm idx list or not 2791 */ 2792 if(rdopt_cand_ctr == 0) 2793 { 2794 rdopt_cand_ctr++; 2795 continue; 2796 } 2797 2798 for(i = 0; i < 3; i++) 2799 { 2800 if(pu1_intra_luma_modes[rdopt_cand_ctr] == au4_cand_mode_list[i]) 2801 { 2802 found_flag = 1; 2803 break; 2804 } 2805 } 2806 2807 if(found_flag == 0) 2808 { 2809 pu1_luma_eval_mark[rdopt_cand_ctr] = 0; 2810 } 2811 2812 rdopt_cand_ctr++; 2813 } 2814 } 2815 2816 /*! 2817 ****************************************************************************** 2818 * \if Function name : ihevce_intra_rdopt_cu_ntu \endif 2819 * 2820 * \brief 2821 * Intra Coding unit funtion for RD opt mode 2822 * 2823 * \param[in] ps_ctxt enc_loop module ctxt pointer 2824 * \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure 2825 * \param[in] pu1_luma_mode : pointer to luma mode 2826 * \param[in] ps_cu_analyse pointer to cu analyse pointer 2827 * \param[in] pu1_src pointer to source data buffer 2828 * \param[in] src_strd source buffer stride 2829 * \param[in] pu1_cu_left pointer to left recon data buffer 2830 * \param[in] pu1_cu_top pointer to top recon data buffer 2831 * \param[in] pu1_cu_top_left pointer to top left recon data buffer 2832 * \param[in] ps_left_nbr_4x4 : left 4x4 neighbour pointer 2833 * \param[in] ps_top_nbr_4x4 : top 4x4 neighbour pointer 2834 * \param[in] nbr_4x4_left_strd left nbr4x4 stride 2835 * \param[in] cu_left_stride left recon buffer stride 2836 * \param[in] curr_buf_idx RD opt buffer index for current usage 2837 * \param[in] func_proc_mode : function procesing mode @sa TU_SIZE_WRT_CU_T 2838 * 2839 * \return 2840 * RDopt cost 2841 * 2842 * \author 2843 * Ittiam 2844 * 2845 ***************************************************************************** 2846 */ 2847 LWORD64 ihevce_intra_rdopt_cu_ntu( 2848 ihevce_enc_loop_ctxt_t *ps_ctxt, 2849 enc_loop_cu_prms_t *ps_cu_prms, 2850 void *pv_pred_org, 2851 WORD32 pred_strd_org, 2852 enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms, 2853 UWORD8 *pu1_luma_mode, 2854 cu_analyse_t *ps_cu_analyse, 2855 void *pv_curr_src, 2856 void *pv_cu_left, 2857 void *pv_cu_top, 2858 void *pv_cu_top_left, 2859 nbr_4x4_t *ps_left_nbr_4x4, 2860 nbr_4x4_t *ps_top_nbr_4x4, 2861 WORD32 nbr_4x4_left_strd, 2862 WORD32 cu_left_stride, 2863 WORD32 curr_buf_idx, 2864 WORD32 func_proc_mode, 2865 WORD32 i4_alpha_stim_multiplier) 2866 { 2867 enc_loop_cu_final_prms_t *ps_final_prms; 2868 nbr_avail_flags_t s_nbr; 2869 nbr_4x4_t *ps_nbr_4x4; 2870 nbr_4x4_t *ps_tmp_lt_4x4; 2871 recon_datastore_t *ps_recon_datastore; 2872 2873 ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr; 2874 2875 UWORD32 *pu4_nbr_flags; 2876 UWORD8 *pu1_intra_pred_mode; 2877 WORD32 cu_pos_x; 2878 WORD32 cu_pos_y; 2879 WORD32 trans_size = 0; 2880 UWORD8 *pu1_left; 2881 UWORD8 *pu1_top; 2882 UWORD8 *pu1_top_left; 2883 UWORD8 *pu1_recon; 2884 UWORD8 *pu1_csbf_buf; 2885 UWORD8 *pu1_ecd_data; 2886 WORD16 *pi2_deq_data; 2887 WORD32 deq_data_strd; 2888 LWORD64 total_rdopt_cost; 2889 WORD32 ctr; 2890 WORD32 left_strd; 2891 WORD32 i4_recon_stride; 2892 WORD32 csbf_strd; 2893 WORD32 ecd_data_bytes_cons; 2894 WORD32 num_4x4_in_tu; 2895 WORD32 num_4x4_in_cu; 2896 WORD32 chrm_present_flag; 2897 WORD32 tx_size; 2898 WORD32 cu_bits; 2899 WORD32 num_cu_parts = 0; 2900 WORD32 num_cands = 0; 2901 WORD32 cu_pos_x_8pelunits; 2902 WORD32 cu_pos_y_8pelunits; 2903 WORD32 i4_perform_rdoq; 2904 WORD32 i4_perform_sbh; 2905 UWORD8 u1_compute_spatial_ssd; 2906 UWORD8 u1_compute_recon; 2907 UWORD8 au1_intra_nxn_rdopt_ctxt_models[2][IHEVC_CAB_CTXT_END]; 2908 2909 UWORD16 u2_num_tus_in_cu = 0; 2910 WORD32 is_sub_pu_in_hq = 0; 2911 /* Get the RDOPT cost of the best CU mode for early_exit */ 2912 LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost; 2913 /* cabac context of prev intra luma pred flag */ 2914 UWORD8 u1_prev_flag_cabac_ctxt = 2915 ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_INTRA_LUMA_PRED_FLAG]; 2916 WORD32 src_strd = ps_cu_prms->i4_luma_src_stride; 2917 2918 UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY; 2919 2920 total_rdopt_cost = 0; 2921 ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx]; 2922 ps_recon_datastore = &ps_final_prms->s_recon_datastore; 2923 i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride; 2924 csbf_strd = ps_ctxt->i4_cu_csbf_strd; 2925 pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0]; 2926 pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0]; 2927 pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0]; 2928 deq_data_strd = ps_cu_analyse->u1_cu_size; /* deq_data stride is cu size */ 2929 ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0]; 2930 ps_tmp_lt_4x4 = ps_left_nbr_4x4; 2931 pu4_nbr_flags = &ps_final_prms->au4_nbr_flags[0]; 2932 pu1_intra_pred_mode = &ps_final_prms->au1_intra_pred_mode[0]; 2933 cu_pos_x = ps_cu_analyse->b3_cu_pos_x; 2934 cu_pos_y = ps_cu_analyse->b3_cu_pos_y; 2935 cu_pos_x_8pelunits = cu_pos_x; 2936 cu_pos_y_8pelunits = cu_pos_y; 2937 2938 /* reset cu not coded cost */ 2939 ps_ctxt->i8_cu_not_coded_cost = 0; 2940 2941 /* based on the Processng mode */ 2942 if(TU_EQ_CU == func_proc_mode) 2943 { 2944 ps_final_prms->u1_part_mode = SIZE_2Nx2N; 2945 trans_size = ps_cu_analyse->u1_cu_size; 2946 num_cu_parts = 1; 2947 num_cands = 1; 2948 u2_num_tus_in_cu = 1; 2949 } 2950 else if(TU_EQ_CU_DIV2 == func_proc_mode) 2951 { 2952 ps_final_prms->u1_part_mode = SIZE_2Nx2N; 2953 trans_size = ps_cu_analyse->u1_cu_size >> 1; 2954 num_cu_parts = 4; 2955 num_cands = 1; 2956 u2_num_tus_in_cu = 4; 2957 } 2958 else if(TU_EQ_SUBCU == func_proc_mode) 2959 { 2960 ps_final_prms->u1_part_mode = SIZE_NxN; 2961 trans_size = ps_cu_analyse->u1_cu_size >> 1; 2962 num_cu_parts = 4; 2963 /*In HQ for TU = SUBPU, all 35 modes used for RDOPT instead of 3 modes */ 2964 if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset) 2965 { 2966 if(ps_ctxt->i1_slice_type != BSLICE) 2967 { 2968 num_cands = (4 * MAX_INTRA_CU_CANDIDATES) + 2; 2969 } 2970 else 2971 { 2972 num_cands = (2 * MAX_INTRA_CU_CANDIDATES); 2973 } 2974 } 2975 else 2976 { 2977 num_cands = MAX_INTRA_CU_CANDIDATES; 2978 } 2979 u2_num_tus_in_cu = 4; 2980 } 2981 else 2982 { 2983 /* should not enter here */ 2984 ASSERT(0); 2985 } 2986 2987 if(ps_ctxt->i1_cu_qp_delta_enable) 2988 { 2989 WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0; 2990 if(ps_cu_analyse->u1_cu_size == 64) 2991 { 2992 ASSERT( 2993 (trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4)); 2994 i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4)); 2995 i4_act_counter_lamda = 3; 2996 } 2997 else if(ps_cu_analyse->u1_cu_size == 32) 2998 { 2999 ASSERT( 3000 (trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4)); 3001 i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4)); 3002 i4_act_counter_lamda = 0; 3003 } 3004 else if(ps_cu_analyse->u1_cu_size == 16) 3005 { 3006 ASSERT((trans_size == 16) || (trans_size == 8) || (trans_size == 4)); 3007 i4_act_counter = (trans_size == 8) || (trans_size == 4); 3008 i4_act_counter_lamda = 0; 3009 } 3010 else if(ps_cu_analyse->u1_cu_size == 8) 3011 { 3012 ASSERT((trans_size == 8) || (trans_size == 4)); 3013 i4_act_counter = 1; 3014 i4_act_counter_lamda = 0; 3015 } 3016 else 3017 { 3018 ASSERT(0); 3019 } 3020 if(ps_ctxt->i4_use_ctb_level_lamda) 3021 { 3022 ihevce_compute_cu_level_QP( 3023 ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][1], -1, 0); 3024 } 3025 else 3026 { 3027 ihevce_compute_cu_level_QP( 3028 ps_ctxt, 3029 ps_cu_analyse->i4_act_factor[i4_act_counter][1], 3030 ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][1], 3031 0); 3032 } 3033 3034 ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp; 3035 } 3036 if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT) 3037 { 3038 ps_ctxt->i8_cl_ssd_lambda_qf = 3039 ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 3040 100.0f); 3041 ps_ctxt->i8_cl_ssd_lambda_chroma_qf = 3042 ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf * 3043 (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f); 3044 } 3045 3046 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) && 3047 (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) && 3048 CONVERT_SSDS_TO_SPATIAL_DOMAIN; 3049 3050 if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT) 3051 { 3052 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) && 3053 CONVERT_SSDS_TO_SPATIAL_DOMAIN; 3054 } 3055 3056 /* populate the neigbours */ 3057 pu1_left = (UWORD8 *)pv_cu_left; 3058 pu1_top = (UWORD8 *)pv_cu_top; 3059 pu1_top_left = (UWORD8 *)pv_cu_top_left; 3060 left_strd = cu_left_stride; 3061 num_4x4_in_tu = (trans_size >> 2); 3062 num_4x4_in_cu = (ps_cu_analyse->u1_cu_size >> 2); 3063 chrm_present_flag = 1; 3064 ecd_data_bytes_cons = 0; 3065 cu_bits = 0; 3066 3067 /* get the 4x4 level postion of current cu */ 3068 cu_pos_x = cu_pos_x << 1; 3069 cu_pos_y = cu_pos_y << 1; 3070 3071 /* pouplate cu level params knowing that current is intra */ 3072 ps_final_prms->u1_skip_flag = 0; 3073 ps_final_prms->u1_intra_flag = PRED_MODE_INTRA; 3074 ps_final_prms->u2_num_pus_in_cu = 1; 3075 /*init the is_cu_coded flag*/ 3076 ps_final_prms->u1_is_cu_coded = 0; 3077 ps_final_prms->u4_cu_sad = 0; 3078 3079 ps_final_prms->as_pu_enc_loop[0].b1_intra_flag = PRED_MODE_INTRA; 3080 ps_final_prms->as_pu_enc_loop[0].b4_wd = (trans_size >> 1) - 1; 3081 ps_final_prms->as_pu_enc_loop[0].b4_ht = (trans_size >> 1) - 1; 3082 ps_final_prms->as_pu_enc_loop[0].b4_pos_x = cu_pos_x; 3083 ps_final_prms->as_pu_enc_loop[0].b4_pos_y = cu_pos_y; 3084 ps_final_prms->as_pu_enc_loop[0].b1_merge_flag = 0; 3085 3086 ps_final_prms->as_col_pu_enc_loop[0].b1_intra_flag = 1; 3087 3088 /*copy qp directly as intra cant be skip*/ 3089 ps_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp; 3090 ps_nbr_4x4->mv.s_l0_mv.i2_mvx = 0; 3091 ps_nbr_4x4->mv.s_l0_mv.i2_mvy = 0; 3092 ps_nbr_4x4->mv.s_l1_mv.i2_mvx = 0; 3093 ps_nbr_4x4->mv.s_l1_mv.i2_mvy = 0; 3094 ps_nbr_4x4->mv.i1_l0_ref_pic_buf_id = -1; 3095 ps_nbr_4x4->mv.i1_l1_ref_pic_buf_id = -1; 3096 ps_nbr_4x4->mv.i1_l0_ref_idx = -1; 3097 ps_nbr_4x4->mv.i1_l1_ref_idx = -1; 3098 3099 /* RDOPT copy States : TU init (best until prev TU) to current */ 3100 memcpy( 3101 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] 3102 .s_cabac_ctxt.au1_ctxt_models[0], 3103 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 3104 IHEVC_CAB_COEFFX_PREFIX); 3105 3106 /* RDOPT copy States :update to init state if 0 cbf */ 3107 memcpy( 3108 &au1_intra_nxn_rdopt_ctxt_models[0][0], 3109 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 3110 IHEVC_CAB_COEFFX_PREFIX); 3111 memcpy( 3112 &au1_intra_nxn_rdopt_ctxt_models[1][0], 3113 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 3114 IHEVC_CAB_COEFFX_PREFIX); 3115 3116 /* loop for all partitions in CU blocks */ 3117 for(ctr = 0; ctr < num_cu_parts; ctr++) 3118 { 3119 UWORD8 *pu1_curr_mode; 3120 WORD32 cand_ctr; 3121 WORD32 nbr_flags; 3122 3123 /* for NxN case to track the best mode */ 3124 /* for other cases zeroth index will be used */ 3125 intra_prev_rem_flags_t as_intra_prev_rem[2]; 3126 LWORD64 ai8_cand_rdopt_cost[2]; 3127 UWORD32 au4_tu_sad[2]; 3128 WORD32 ai4_tu_bits[2]; 3129 WORD32 ai4_cbf[2]; 3130 WORD32 ai4_curr_bytes[2]; 3131 WORD32 ai4_zero_col[2]; 3132 WORD32 ai4_zero_row[2]; 3133 /* To store the pred, coeff and dequant for TU_EQ_SUBCU case (since mul. 3134 cand. are there) ping-pong buffer to store the best and current */ 3135 UWORD8 au1_cur_pred_data[2][MIN_TU_SIZE * MIN_TU_SIZE]; 3136 UWORD8 au1_intra_coeffs[2][MAX_SCAN_COEFFS_BYTES_4x4]; 3137 WORD16 ai2_intra_deq_coeffs[2][MIN_TU_SIZE * MIN_TU_SIZE]; 3138 /* Context models stored for RDopt store and restore purpose */ 3139 3140 UWORD8 au1_recon_availability[2]; 3141 3142 WORD32 best_cand_idx = 0; 3143 LWORD64 best_cand_cost = MAX_COST_64; 3144 /* counters to toggle b/w best and current */ 3145 WORD32 best_intra_buf_idx = 1; 3146 WORD32 curr_intra_buf_idx = 0; 3147 3148 /* copy the mode pointer to be used in inner loop */ 3149 pu1_curr_mode = pu1_luma_mode; 3150 3151 /* get the neighbour availability flags */ 3152 nbr_flags = ihevce_get_nbr_intra( 3153 &s_nbr, 3154 ps_ctxt->pu1_ctb_nbr_map, 3155 ps_ctxt->i4_nbr_map_strd, 3156 cu_pos_x, 3157 cu_pos_y, 3158 num_4x4_in_tu); 3159 3160 /* copy the nbr flags for chroma reuse */ 3161 if(4 != trans_size) 3162 { 3163 *pu4_nbr_flags = nbr_flags; 3164 } 3165 else if(1 == chrm_present_flag) 3166 { 3167 /* compute the avail flags assuming luma trans is 8x8 */ 3168 /* get the neighbour availability flags */ 3169 *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu( 3170 ps_ctxt->pu1_ctb_nbr_map, 3171 ps_ctxt->i4_nbr_map_strd, 3172 cu_pos_x, 3173 cu_pos_y, 3174 (num_4x4_in_tu << 1), 3175 (num_4x4_in_tu << 1)); 3176 } 3177 3178 u1_compute_recon = !u1_compute_spatial_ssd && ((num_cu_parts > 1) && (ctr < 3)); 3179 3180 if(!ctr && (u1_compute_spatial_ssd || u1_compute_recon)) 3181 { 3182 ps_recon_datastore->u1_is_lumaRecon_available = 1; 3183 } 3184 else if(!ctr) 3185 { 3186 ps_recon_datastore->u1_is_lumaRecon_available = 0; 3187 } 3188 3189 ihevc_intra_pred_luma_ref_substitution_fptr = 3190 ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr; 3191 3192 /* call reference array substitution */ 3193 ihevc_intra_pred_luma_ref_substitution_fptr( 3194 pu1_top_left, 3195 pu1_top, 3196 pu1_left, 3197 left_strd, 3198 trans_size, 3199 nbr_flags, 3200 (UWORD8 *)ps_ctxt->pv_ref_sub_out, 3201 1); 3202 3203 /* Intra Mode gating based on MPM cand list and encoder quality preset */ 3204 if((ps_ctxt->i1_slice_type != ISLICE) && (TU_EQ_SUBCU == func_proc_mode) && 3205 (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3)) 3206 { 3207 ihevce_mpm_idx_based_filter_RDOPT_cand( 3208 ps_ctxt, 3209 ps_cu_analyse, 3210 ps_left_nbr_4x4, 3211 ps_top_nbr_4x4, 3212 pu1_luma_mode, 3213 &ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][0]); 3214 } 3215 3216 if((TU_EQ_SUBCU == func_proc_mode) && (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) && 3217 (ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr] >= MAX_INTRA_CU_CANDIDATES)) 3218 { 3219 WORD32 ai4_mpm_mode_list[3]; 3220 WORD32 i; 3221 3222 WORD32 i4_curr_index = ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr]; 3223 3224 ihevce_populate_intra_pred_mode( 3225 ps_top_nbr_4x4->b6_luma_intra_mode, 3226 ps_tmp_lt_4x4->b6_luma_intra_mode, 3227 s_nbr.u1_top_avail, 3228 s_nbr.u1_left_avail, 3229 cu_pos_y, 3230 &ai4_mpm_mode_list[0]); 3231 3232 for(i = 0; i < 3; i++) 3233 { 3234 if(ps_cu_analyse->s_cu_intra_cand 3235 .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] == 0) 3236 { 3237 ASSERT(ai4_mpm_mode_list[i] < 35); 3238 3239 ps_cu_analyse->s_cu_intra_cand 3240 .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] = 1; 3241 pu1_luma_mode[i4_curr_index] = ai4_mpm_mode_list[i]; 3242 ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr]++; 3243 i4_curr_index++; 3244 } 3245 } 3246 3247 pu1_luma_mode[i4_curr_index] = 255; 3248 } 3249 3250 /* loop over candidates for each partition */ 3251 for(cand_ctr = 0; cand_ctr < num_cands; cand_ctr++) 3252 { 3253 WORD32 curr_pred_mode; 3254 WORD32 bits = 0; 3255 LWORD64 curr_cost; 3256 WORD32 luma_pred_func_idx; 3257 UWORD8 *pu1_curr_ecd_data; 3258 WORD16 *pi2_curr_deq_data; 3259 WORD32 curr_deq_data_strd; 3260 WORD32 pred_strd; 3261 UWORD8 *pu1_pred; 3262 3263 /* if NXN case the recon and ecd data is stored in temp buffers */ 3264 if(TU_EQ_SUBCU == func_proc_mode) 3265 { 3266 pu1_pred = &au1_cur_pred_data[curr_intra_buf_idx][0]; 3267 pred_strd = trans_size; 3268 pu1_curr_ecd_data = &au1_intra_coeffs[curr_intra_buf_idx][0]; 3269 pi2_curr_deq_data = &ai2_intra_deq_coeffs[curr_intra_buf_idx][0]; 3270 curr_deq_data_strd = trans_size; 3271 3272 ASSERT(trans_size == MIN_TU_SIZE); 3273 } 3274 else 3275 { 3276 pu1_pred = (UWORD8 *)pv_pred_org; 3277 pred_strd = pred_strd_org; 3278 pu1_curr_ecd_data = pu1_ecd_data; 3279 pi2_curr_deq_data = pi2_deq_data; 3280 curr_deq_data_strd = deq_data_strd; 3281 } 3282 3283 pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[curr_intra_buf_idx]) + 3284 (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride; 3285 3286 if(is_sub_pu_in_hq == 1) 3287 { 3288 curr_pred_mode = cand_ctr; 3289 } 3290 else 3291 { 3292 curr_pred_mode = pu1_curr_mode[cand_ctr]; 3293 } 3294 3295 /* If the candidate mode is 255, then break */ 3296 if(255 == curr_pred_mode) 3297 { 3298 break; 3299 } 3300 else if(250 == curr_pred_mode) 3301 { 3302 continue; 3303 } 3304 3305 /* check if this mode needs to be evaluated or not. For 2nx2n cases, this */ 3306 /* function will be called once per candidate, so this check has been done */ 3307 /* outside this function call. For NxN case, this function will be called */ 3308 /* only once, and all the candidates will be evaluated here. */ 3309 if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3) 3310 { 3311 if((TU_EQ_SUBCU == func_proc_mode) && 3312 (0 == ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][cand_ctr])) 3313 { 3314 continue; 3315 } 3316 } 3317 3318 /* call reference filtering */ 3319 ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr( 3320 (UWORD8 *)ps_ctxt->pv_ref_sub_out, 3321 trans_size, 3322 (UWORD8 *)ps_ctxt->pv_ref_filt_out, 3323 curr_pred_mode, 3324 ps_ctxt->i1_strong_intra_smoothing_enable_flag); 3325 3326 /* use the look up to get the function idx */ 3327 luma_pred_func_idx = g_i4_ip_funcs[curr_pred_mode]; 3328 3329 /* call the intra prediction function */ 3330 ps_ctxt->apf_lum_ip[luma_pred_func_idx]( 3331 (UWORD8 *)ps_ctxt->pv_ref_filt_out, 3332 1, 3333 pu1_pred, 3334 pred_strd, 3335 trans_size, 3336 curr_pred_mode); 3337 3338 /* populate the coeffs scan idx */ 3339 ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT; 3340 3341 /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/ 3342 if(trans_size < 16) 3343 { 3344 /* for modes from 22 upto 30 horizontal scan is used */ 3345 if((curr_pred_mode > 21) && (curr_pred_mode < 31)) 3346 { 3347 ps_ctxt->i4_scan_idx = SCAN_HORZ; 3348 } 3349 /* for modes from 6 upto 14 horizontal scan is used */ 3350 else if((curr_pred_mode > 5) && (curr_pred_mode < 15)) 3351 { 3352 ps_ctxt->i4_scan_idx = SCAN_VERT; 3353 } 3354 } 3355 3356 /* RDOPT copy States : TU init (best until prev TU) to current */ 3357 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( 3358 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] 3359 .s_cabac_ctxt.au1_ctxt_models[0] + 3360 IHEVC_CAB_COEFFX_PREFIX, 3361 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, 3362 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); 3363 3364 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq; 3365 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh; 3366 3367 #if DISABLE_RDOQ_INTRA 3368 i4_perform_rdoq = 0; 3369 #endif 3370 3371 /*2 Multi- dimensinal array based on trans size of rounding factor to be added here */ 3372 /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */ 3373 /* Currently the complete array will contain only single value*/ 3374 /*The rounding factor is calculated with the formula 3375 Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2 3376 rounding factor = (1 - DeadZone Val) 3377 3378 Assumption: Cabac states of All the sub-blocks in the TU are considered independent 3379 */ 3380 if((ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)) 3381 { 3382 if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0)) 3383 { 3384 double i4_lamda_modifier; 3385 3386 if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id)) 3387 { 3388 i4_lamda_modifier = 3389 ps_ctxt->i4_lamda_modifier * 3390 CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00); 3391 } 3392 else 3393 { 3394 i4_lamda_modifier = ps_ctxt->i4_lamda_modifier; 3395 } 3396 if(ps_ctxt->i4_use_const_lamda_modifier) 3397 { 3398 if(ISLICE == ps_ctxt->i1_slice_type) 3399 { 3400 i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier; 3401 } 3402 else 3403 { 3404 i4_lamda_modifier = CONST_LAMDA_MOD_VAL; 3405 } 3406 } 3407 3408 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] = 3409 &ps_ctxt->i4_quant_round_tu[0][0]; 3410 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] = 3411 &ps_ctxt->i4_quant_round_tu[1][0]; 3412 3413 memset( 3414 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3], 3415 0, 3416 trans_size * trans_size * sizeof(WORD32)); 3417 memset( 3418 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3], 3419 0, 3420 trans_size * trans_size * sizeof(WORD32)); 3421 3422 ihevce_quant_rounding_factor_gen( 3423 trans_size, 3424 1, 3425 &ps_ctxt->s_rdopt_entropy_ctxt, 3426 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3], 3427 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3], 3428 i4_lamda_modifier, 3429 1); 3430 } 3431 else 3432 { 3433 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] = 3434 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3]; 3435 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] = 3436 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3]; 3437 } 3438 } 3439 3440 /* call T Q IT IQ and recon function */ 3441 ai4_cbf[curr_intra_buf_idx] = ihevce_t_q_iq_ssd_scan_fxn( 3442 ps_ctxt, 3443 pu1_pred, 3444 pred_strd, 3445 (UWORD8 *)pv_curr_src, 3446 src_strd, 3447 pi2_curr_deq_data, 3448 curr_deq_data_strd, 3449 pu1_recon, 3450 i4_recon_stride, 3451 pu1_curr_ecd_data, 3452 pu1_csbf_buf, 3453 csbf_strd, 3454 trans_size, 3455 PRED_MODE_INTRA, 3456 &ai8_cand_rdopt_cost[curr_intra_buf_idx], 3457 &ai4_curr_bytes[curr_intra_buf_idx], 3458 &ai4_tu_bits[curr_intra_buf_idx], 3459 &au4_tu_sad[curr_intra_buf_idx], 3460 &ai4_zero_col[curr_intra_buf_idx], 3461 &ai4_zero_row[curr_intra_buf_idx], 3462 &au1_recon_availability[curr_intra_buf_idx], 3463 i4_perform_rdoq, 3464 i4_perform_sbh, 3465 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 3466 i4_alpha_stim_multiplier, 3467 u1_is_cu_noisy, 3468 #endif 3469 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, 3470 1 /*early_cbf */ 3471 ); 3472 3473 #if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 3474 if(u1_is_cu_noisy && i4_alpha_stim_multiplier) 3475 { 3476 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT 3477 ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion( 3478 pv_curr_src, 3479 src_strd, 3480 pu1_pred, 3481 pred_strd, 3482 ai8_cand_rdopt_cost[curr_intra_buf_idx], 3483 i4_alpha_stim_multiplier, 3484 trans_size, 3485 0, 3486 ps_ctxt->u1_enable_psyRDOPT, 3487 NULL_PLANE); 3488 #else 3489 if(u1_compute_spatial_ssd && au1_recon_availability[curr_intra_buf_idx]) 3490 { 3491 ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion( 3492 pv_curr_src, 3493 src_strd, 3494 pu1_recon, 3495 i4_recon_stride, 3496 ai8_cand_rdopt_cost[curr_intra_buf_idx], 3497 i4_alpha_stim_multiplier, 3498 trans_size, 3499 0, 3500 ps_ctxt->u1_enable_psyRDOPT, 3501 NULL_PLANE); 3502 } 3503 else 3504 { 3505 ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion( 3506 pv_curr_src, 3507 src_strd, 3508 pu1_pred, 3509 pred_strd, 3510 ai8_cand_rdopt_cost[curr_intra_buf_idx], 3511 i4_alpha_stim_multiplier, 3512 trans_size, 3513 0, 3514 ps_ctxt->u1_enable_psyRDOPT, 3515 NULL_PLANE); 3516 } 3517 #endif 3518 } 3519 #endif 3520 3521 if(TU_EQ_SUBCU == func_proc_mode) 3522 { 3523 ASSERT(ai4_curr_bytes[curr_intra_buf_idx] < MAX_SCAN_COEFFS_BYTES_4x4); 3524 } 3525 3526 /* based on CBF/No CBF copy the corresponding state */ 3527 if(0 == ai4_cbf[curr_intra_buf_idx]) 3528 { 3529 /* RDOPT copy States :update to init state if 0 cbf */ 3530 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( 3531 &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] + 3532 IHEVC_CAB_COEFFX_PREFIX, 3533 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, 3534 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); 3535 } 3536 else 3537 { 3538 /* RDOPT copy States :update to new state only if CBF is non zero */ 3539 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( 3540 &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] + 3541 IHEVC_CAB_COEFFX_PREFIX, 3542 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] 3543 .s_cabac_ctxt.au1_ctxt_models[0] + 3544 IHEVC_CAB_COEFFX_PREFIX, 3545 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); 3546 } 3547 3548 /* call the function which perform intra mode prediction */ 3549 ihevce_intra_pred_mode_signaling( 3550 ps_top_nbr_4x4->b6_luma_intra_mode, 3551 ps_tmp_lt_4x4->b6_luma_intra_mode, 3552 s_nbr.u1_top_avail, 3553 s_nbr.u1_left_avail, 3554 cu_pos_y, 3555 curr_pred_mode, 3556 &as_intra_prev_rem[curr_intra_buf_idx]); 3557 /******************************************************************/ 3558 /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN 3559 The bits for these are evaluated for every RDO mode of current subcu 3560 as they can significantly contribute to RDO cost. Note that these 3561 bits are not accounted for here (ai8_cand_rdopt_cost) as they 3562 are accounted for in encode_cu call later */ 3563 3564 /******************************************************************/ 3565 /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN 3566 The bits for these are evaluated for every RDO mode of current subcu 3567 as they can significantly contribute to RDO cost. Note that these 3568 bits are not accounted for here (ai8_cand_rdopt_cost) as they 3569 are accounted for in encode_cu call later */ 3570 3571 /* Estimate bits to encode prev rem flag for NXN mode */ 3572 { 3573 WORD32 bits_frac = gau2_ihevce_cabac_bin_to_bits 3574 [u1_prev_flag_cabac_ctxt ^ 3575 as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag]; 3576 3577 /* rounding the fractional bits to nearest integer */ 3578 bits = ((bits_frac + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q); 3579 } 3580 3581 /* based on prev flag all the mpmidx bits and rem bits */ 3582 if(1 == as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag) 3583 { 3584 /* mpm_idx */ 3585 bits += as_intra_prev_rem[curr_intra_buf_idx].b2_mpm_idx ? 2 : 1; 3586 } 3587 else 3588 { 3589 /* rem intra mode */ 3590 bits += 5; 3591 } 3592 3593 bits += ai4_tu_bits[curr_intra_buf_idx]; 3594 3595 /* compute the total cost for current candidate */ 3596 curr_cost = ai8_cand_rdopt_cost[curr_intra_buf_idx]; 3597 3598 /* get the final ssd cost */ 3599 curr_cost += 3600 COMPUTE_RATE_COST_CLIP30(bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); 3601 3602 /* check of the best candidate cost */ 3603 if(curr_cost < best_cand_cost) 3604 { 3605 best_cand_cost = curr_cost; 3606 best_cand_idx = cand_ctr; 3607 best_intra_buf_idx = curr_intra_buf_idx; 3608 curr_intra_buf_idx = !curr_intra_buf_idx; 3609 } 3610 } 3611 3612 /*************** For TU_EQ_SUBCU case *****************/ 3613 /* Copy the pred for best cand. to the final pred array */ 3614 /* Copy the iq-coeff for best cand. to the final array */ 3615 /* copy the best coeffs data to final buffer */ 3616 if(TU_EQ_SUBCU == func_proc_mode) 3617 { 3618 /* Copy the pred for best cand. to the final pred array */ 3619 3620 ps_ctxt->s_cmn_opt_func.pf_copy_2d( 3621 (UWORD8 *)pv_pred_org, 3622 pred_strd_org, 3623 &au1_cur_pred_data[best_intra_buf_idx][0], 3624 trans_size, 3625 trans_size, 3626 trans_size); 3627 3628 /* Copy the deq-coeff for best cand. to the final array */ 3629 3630 ps_ctxt->s_cmn_opt_func.pf_copy_2d( 3631 (UWORD8 *)pi2_deq_data, 3632 deq_data_strd << 1, 3633 (UWORD8 *)&ai2_intra_deq_coeffs[best_intra_buf_idx][0], 3634 trans_size << 1, 3635 trans_size << 1, 3636 trans_size); 3637 /* copy the coeffs to final cu ecd bytes buffer */ 3638 memcpy( 3639 pu1_ecd_data, 3640 &au1_intra_coeffs[best_intra_buf_idx][0], 3641 ai4_curr_bytes[best_intra_buf_idx]); 3642 3643 pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[best_intra_buf_idx]) + 3644 (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride; 3645 } 3646 3647 /*---------- Calculate Recon for the best INTRA mode ---------*/ 3648 /* TU_EQ_CU case : No need for recon, otherwise recon is required */ 3649 /* Compute recon only for the best mode for TU_EQ_SUBCU case */ 3650 if(u1_compute_recon) 3651 { 3652 ihevce_it_recon_fxn( 3653 ps_ctxt, 3654 pi2_deq_data, 3655 deq_data_strd, 3656 (UWORD8 *)pv_pred_org, 3657 pred_strd_org, 3658 pu1_recon, 3659 i4_recon_stride, 3660 pu1_ecd_data, 3661 trans_size, 3662 PRED_MODE_INTRA, 3663 ai4_cbf[best_intra_buf_idx], 3664 ai4_zero_col[best_intra_buf_idx], 3665 ai4_zero_row[best_intra_buf_idx]); 3666 3667 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx; 3668 } 3669 else if(u1_compute_spatial_ssd && au1_recon_availability[best_intra_buf_idx]) 3670 { 3671 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx; 3672 } 3673 else 3674 { 3675 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX; 3676 } 3677 3678 /* RDOPT copy States :update to best modes state */ 3679 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( 3680 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, 3681 &au1_intra_nxn_rdopt_ctxt_models[best_intra_buf_idx][0] + IHEVC_CAB_COEFFX_PREFIX, 3682 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); 3683 3684 /* copy the prev,mpm_idx and rem modes from best cand */ 3685 ps_final_prms->as_intra_prev_rem[ctr] = as_intra_prev_rem[best_intra_buf_idx]; 3686 3687 /* update the cabac context of prev intra pred mode flag */ 3688 u1_prev_flag_cabac_ctxt = gau1_ihevc_next_state 3689 [(u1_prev_flag_cabac_ctxt << 1) | 3690 as_intra_prev_rem[best_intra_buf_idx].b1_prev_intra_luma_pred_flag]; 3691 3692 /* accumulate the TU bits into cu bits */ 3693 cu_bits += ai4_tu_bits[best_intra_buf_idx]; 3694 3695 /* copy the intra pred mode for chroma reuse */ 3696 if(is_sub_pu_in_hq == 0) 3697 { 3698 *pu1_intra_pred_mode = pu1_curr_mode[best_cand_idx]; 3699 } 3700 else 3701 { 3702 *pu1_intra_pred_mode = best_cand_idx; 3703 } 3704 3705 /* Store luma mode as chroma mode. If chroma prcs happens, and 3706 if a diff. mode wins, it should update this!! */ 3707 if(1 == chrm_present_flag) 3708 { 3709 if(is_sub_pu_in_hq == 0) 3710 { 3711 ps_final_prms->u1_chroma_intra_pred_actual_mode = 3712 ((ps_ctxt->u1_chroma_array_type == 2) 3713 ? gau1_chroma422_intra_angle_mapping[pu1_curr_mode[best_cand_idx]] 3714 : pu1_curr_mode[best_cand_idx]); 3715 } 3716 else 3717 { 3718 ps_final_prms->u1_chroma_intra_pred_actual_mode = 3719 ((ps_ctxt->u1_chroma_array_type == 2) 3720 ? gau1_chroma422_intra_angle_mapping[best_cand_idx] 3721 : best_cand_idx); 3722 } 3723 3724 ps_final_prms->u1_chroma_intra_pred_mode = 4; 3725 } 3726 3727 /*remember the cbf flag to replicate qp for 4x4 neighbour*/ 3728 ps_final_prms->u1_is_cu_coded |= ai4_cbf[best_intra_buf_idx]; 3729 3730 /*accumulate ssd over all TU of intra CU*/ 3731 ps_final_prms->u4_cu_sad += au4_tu_sad[best_intra_buf_idx]; 3732 3733 /* update the bytes */ 3734 ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons; 3735 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 3736 ai4_curr_bytes[best_intra_buf_idx]; 3737 /* update the zero_row and col info for the final mode */ 3738 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col = 3739 ai4_zero_col[best_intra_buf_idx]; 3740 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row = 3741 ai4_zero_row[best_intra_buf_idx]; 3742 3743 ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons; 3744 3745 /* update the total bytes cons */ 3746 ecd_data_bytes_cons += ai4_curr_bytes[best_intra_buf_idx]; 3747 pu1_ecd_data += ai4_curr_bytes[best_intra_buf_idx]; 3748 3749 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = ai4_cbf[best_intra_buf_idx]; 3750 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0; 3751 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0; 3752 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0; 3753 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0; 3754 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag; 3755 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp; 3756 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0; 3757 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0; 3758 GETRANGE(tx_size, trans_size); 3759 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3; 3760 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x; 3761 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y; 3762 3763 /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */ 3764 ps_nbr_4x4->b1_skip_flag = 0; 3765 ps_nbr_4x4->b1_intra_flag = 1; 3766 ps_nbr_4x4->b1_pred_l0_flag = 0; 3767 ps_nbr_4x4->b1_pred_l1_flag = 0; 3768 3769 if(is_sub_pu_in_hq == 0) 3770 { 3771 ps_nbr_4x4->b6_luma_intra_mode = pu1_curr_mode[best_cand_idx]; 3772 } 3773 else 3774 { 3775 ps_nbr_4x4->b6_luma_intra_mode = best_cand_idx; 3776 } 3777 3778 ps_nbr_4x4->b1_y_cbf = ai4_cbf[best_intra_buf_idx]; 3779 3780 /* since tu size can be less than cusize, replication is done with strd */ 3781 { 3782 WORD32 i, j; 3783 nbr_4x4_t *ps_tmp_4x4; 3784 3785 ps_tmp_4x4 = ps_nbr_4x4; 3786 3787 for(i = 0; i < num_4x4_in_tu; i++) 3788 { 3789 for(j = 0; j < num_4x4_in_tu; j++) 3790 { 3791 ps_tmp_4x4[j] = *ps_nbr_4x4; 3792 } 3793 /* row level update*/ 3794 ps_tmp_4x4 += num_4x4_in_cu; 3795 } 3796 } 3797 3798 if(TU_EQ_SUBCU == func_proc_mode) 3799 { 3800 pu1_luma_mode += ((MAX_INTRA_CU_CANDIDATES * 4) + 2 + 1); 3801 } 3802 3803 if((num_cu_parts > 1) && (ctr < 3)) 3804 { 3805 /* set the neighbour map to 1 */ 3806 ihevce_set_nbr_map( 3807 ps_ctxt->pu1_ctb_nbr_map, 3808 ps_ctxt->i4_nbr_map_strd, 3809 cu_pos_x, 3810 cu_pos_y, 3811 trans_size >> 2, 3812 1); 3813 3814 /* block level updates block number (1 & 3 )*/ 3815 pv_curr_src = (UWORD8 *)pv_curr_src + trans_size; 3816 pv_pred_org = (UWORD8 *)pv_pred_org + trans_size; 3817 pi2_deq_data += trans_size; 3818 3819 switch(ctr) 3820 { 3821 case 0: 3822 { 3823 pu1_left = pu1_recon + trans_size - 1; 3824 pu1_top += trans_size; 3825 pu1_top_left = pu1_top - 1; 3826 left_strd = i4_recon_stride; 3827 3828 break; 3829 } 3830 case 1: 3831 { 3832 ASSERT( 3833 (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 0) || 3834 (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 1)); 3835 3836 /* Since the 'lumaRefSubstitution' function expects both Top and */ 3837 /* TopRight recon pixels to be present in the same buffer */ 3838 if(ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] != 3839 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]) 3840 { 3841 UWORD8 *pu1_src = 3842 ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs 3843 [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) + 3844 trans_size; 3845 UWORD8 *pu1_dst = 3846 ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs 3847 [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) + 3848 trans_size; 3849 3850 ps_ctxt->s_cmn_opt_func.pf_copy_2d( 3851 pu1_dst, i4_recon_stride, pu1_src, i4_recon_stride, trans_size, trans_size); 3852 3853 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] = 3854 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]; 3855 } 3856 3857 pu1_left = (UWORD8 *)pv_cu_left + trans_size * cu_left_stride; 3858 pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs 3859 [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) + 3860 (trans_size - 1) * i4_recon_stride; 3861 pu1_top_left = pu1_left - cu_left_stride; 3862 left_strd = cu_left_stride; 3863 3864 break; 3865 } 3866 case 2: 3867 { 3868 ASSERT( 3869 (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 0) || 3870 (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 1)); 3871 3872 pu1_left = pu1_recon + trans_size - 1; 3873 pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs 3874 [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) + 3875 (trans_size - 1) * i4_recon_stride + trans_size; 3876 pu1_top_left = pu1_top - 1; 3877 left_strd = i4_recon_stride; 3878 3879 break; 3880 } 3881 } 3882 3883 pu1_csbf_buf += num_4x4_in_tu; 3884 cu_pos_x += num_4x4_in_tu; 3885 ps_nbr_4x4 += num_4x4_in_tu; 3886 ps_top_nbr_4x4 += num_4x4_in_tu; 3887 ps_tmp_lt_4x4 = ps_nbr_4x4 - 1; 3888 3889 pu1_intra_pred_mode++; 3890 3891 /* after 2 blocks increment the pointers to bottom blocks */ 3892 if(1 == ctr) 3893 { 3894 pv_curr_src = (UWORD8 *)pv_curr_src - (trans_size << 1); 3895 pv_curr_src = (UWORD8 *)pv_curr_src + (trans_size * src_strd); 3896 3897 pv_pred_org = (UWORD8 *)pv_pred_org - (trans_size << 1); 3898 pv_pred_org = (UWORD8 *)pv_pred_org + (trans_size * pred_strd_org); 3899 pi2_deq_data -= (trans_size << 1); 3900 pi2_deq_data += (trans_size * deq_data_strd); 3901 3902 pu1_csbf_buf -= (num_4x4_in_tu << 1); 3903 pu1_csbf_buf += (num_4x4_in_tu * csbf_strd); 3904 3905 ps_nbr_4x4 -= (num_4x4_in_tu << 1); 3906 ps_nbr_4x4 += (num_4x4_in_tu * num_4x4_in_cu); 3907 ps_top_nbr_4x4 = ps_nbr_4x4 - num_4x4_in_cu; 3908 ps_tmp_lt_4x4 = ps_left_nbr_4x4 + (num_4x4_in_tu * nbr_4x4_left_strd); 3909 3910 /* decrement pos x to start */ 3911 cu_pos_x -= (num_4x4_in_tu << 1); 3912 cu_pos_y += num_4x4_in_tu; 3913 } 3914 } 3915 3916 #if RDOPT_ENABLE 3917 /* compute the RDOPT cost for the current TU */ 3918 ai8_cand_rdopt_cost[best_intra_buf_idx] += COMPUTE_RATE_COST_CLIP30( 3919 ai4_tu_bits[best_intra_buf_idx], ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); 3920 #endif 3921 3922 /* accumulate the costs */ 3923 total_rdopt_cost += ai8_cand_rdopt_cost[best_intra_buf_idx]; 3924 3925 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) 3926 { 3927 /* Early exit : If the current running cost exceeds 3928 the prev. best mode cost, break */ 3929 if(total_rdopt_cost > prev_best_rdopt_cost) 3930 { 3931 return (total_rdopt_cost); 3932 } 3933 } 3934 3935 /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/ 3936 chrm_present_flag = (4 != trans_size) ? 1 : INTRA_PRED_CHROMA_IDX_NONE; 3937 3938 pu4_nbr_flags++; 3939 } 3940 /* Modify the cost function for this CU. */ 3941 /* loop in for 8x8 blocks */ 3942 if(ps_ctxt->u1_enable_psyRDOPT) 3943 { 3944 UWORD8 *pu1_recon_cu; 3945 WORD32 recon_stride; 3946 WORD32 curr_pos_x; 3947 WORD32 curr_pos_y; 3948 WORD32 start_index; 3949 WORD32 num_horz_cu_in_ctb; 3950 WORD32 cu_size; 3951 WORD32 had_block_size; 3952 3953 /* tODO: sreenivasa ctb size has to be used appropriately */ 3954 had_block_size = 8; 3955 cu_size = ps_cu_analyse->u1_cu_size; /* todo */ 3956 num_horz_cu_in_ctb = 64 / had_block_size; 3957 3958 curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */ 3959 curr_pos_y = ps_cu_analyse->b3_cu_pos_y << 3; /* pel units */ 3960 recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride; 3961 pu1_recon_cu = 3962 ((UWORD8 *)ps_final_prms->s_recon_datastore 3963 .apv_luma_recon_bufs[ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]); 3964 /* + \ curr_pos_x + curr_pos_y * recon_stride; */ 3965 3966 /* start index to index the source satd of curr cu int he current ctb*/ 3967 start_index = 3968 (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb; 3969 3970 { 3971 total_rdopt_cost += ihevce_psy_rd_cost( 3972 ps_ctxt->ai4_source_satd_8x8, 3973 pu1_recon_cu, 3974 recon_stride, 3975 1, // 3976 cu_size, 3977 0, // pic type 3978 0, //layer id 3979 ps_ctxt->i4_satd_lamda, // lambda 3980 start_index, 3981 ps_ctxt->u1_is_input_data_hbd, 3982 ps_ctxt->u4_psy_strength, 3983 &ps_ctxt->s_cmn_opt_func 3984 3985 ); // 8 bit 3986 } 3987 } 3988 3989 #if !FORCE_INTRA_TU_DEPTH_TO_0 //RATIONALISE_NUM_RDO_MODES_IN_PQ_AND_HQ 3990 if(TU_EQ_SUBCU == func_proc_mode) 3991 { 3992 UWORD8 au1_tu_eq_cu_div2_modes[4]; 3993 UWORD8 au1_freq_of_mode[4]; 3994 3995 WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D( 3996 ps_final_prms->au1_intra_pred_mode, au1_tu_eq_cu_div2_modes, au1_freq_of_mode, 4); 3997 3998 if(1 == i4_num_clusters) 3999 { 4000 ps_final_prms->u2_num_pus_in_cu = 1; 4001 ps_final_prms->u1_part_mode = SIZE_2Nx2N; 4002 } 4003 } 4004 #endif 4005 4006 /* store the num TUs*/ 4007 ps_final_prms->u2_num_tus_in_cu = u2_num_tus_in_cu; 4008 4009 /* update the bytes consumed */ 4010 ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons; 4011 4012 /* store the current cu size to final prms */ 4013 ps_final_prms->u1_cu_size = ps_cu_analyse->u1_cu_size; 4014 4015 /* cu bits will be having luma residual bits till this point */ 4016 /* if zero_cbf eval is disabled then cu bits will be zero */ 4017 ps_final_prms->u4_cu_luma_res_bits = cu_bits; 4018 4019 /* ------------- Chroma processing -------------- */ 4020 /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/ 4021 if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt) 4022 { 4023 LWORD64 chrm_rdopt_cost; 4024 WORD32 chrm_rdopt_tu_bits; 4025 4026 /* Store the current RDOPT cost to enable early exit in chrom_prcs */ 4027 ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost; 4028 4029 chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt( 4030 ps_ctxt, 4031 curr_buf_idx, 4032 func_proc_mode, 4033 ps_chrm_cu_buf_prms->pu1_curr_src, 4034 ps_chrm_cu_buf_prms->i4_chrm_src_stride, 4035 ps_chrm_cu_buf_prms->pu1_cu_left, 4036 ps_chrm_cu_buf_prms->pu1_cu_top, 4037 ps_chrm_cu_buf_prms->pu1_cu_top_left, 4038 ps_chrm_cu_buf_prms->i4_cu_left_stride, 4039 cu_pos_x_8pelunits, 4040 cu_pos_y_8pelunits, 4041 &chrm_rdopt_tu_bits, 4042 i4_alpha_stim_multiplier, 4043 u1_is_cu_noisy); 4044 4045 #if WEIGH_CHROMA_COST 4046 chrm_rdopt_cost = (LWORD64)( 4047 (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor + 4048 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> 4049 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); 4050 #endif 4051 4052 #if CHROMA_RDOPT_ENABLE 4053 total_rdopt_cost += chrm_rdopt_cost; 4054 #endif 4055 cu_bits += chrm_rdopt_tu_bits; 4056 4057 /* cu bits for chroma residual if chroma rdopt is on */ 4058 /* if zero_cbf eval is disabled then cu bits will be zero */ 4059 ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits; 4060 4061 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) 4062 { 4063 /* Early exit : If the current running cost exceeds 4064 the prev. best mode cost, break */ 4065 if(total_rdopt_cost > prev_best_rdopt_cost) 4066 { 4067 return (total_rdopt_cost); 4068 } 4069 } 4070 } 4071 else 4072 {} 4073 4074 /* RDOPT copy States : Best after all luma TUs to current */ 4075 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( 4076 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] 4077 .s_cabac_ctxt.au1_ctxt_models[0] + 4078 IHEVC_CAB_COEFFX_PREFIX, 4079 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, 4080 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); 4081 4082 /* get the neighbour availability flags for current cu */ 4083 ihevce_get_only_nbr_flag( 4084 &s_nbr, 4085 ps_ctxt->pu1_ctb_nbr_map, 4086 ps_ctxt->i4_nbr_map_strd, 4087 (cu_pos_x_8pelunits << 1), 4088 (cu_pos_y_8pelunits << 1), 4089 (trans_size << 1), 4090 (trans_size << 1)); 4091 4092 /* call the entropy rdo encode to get the bit estimate for current cu */ 4093 /*if ZERO_CBF eval is enabled then this function will return only CU header bits */ 4094 { 4095 /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/ 4096 WORD32 cbf_bits, header_bits; 4097 4098 header_bits = ihevce_entropy_rdo_encode_cu( 4099 &ps_ctxt->s_rdopt_entropy_ctxt, 4100 ps_final_prms, 4101 cu_pos_x_8pelunits, 4102 cu_pos_y_8pelunits, 4103 ps_cu_analyse->u1_cu_size, 4104 s_nbr.u1_top_avail, 4105 s_nbr.u1_left_avail, 4106 &ps_final_prms->pu1_cu_coeffs[0], 4107 &cbf_bits); 4108 4109 cu_bits += header_bits; 4110 4111 /* cbf bits are excluded from header bits, instead considered as texture bits */ 4112 /* incase if zero cbf eval is disabled then texture bits gets added here */ 4113 ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits); 4114 ps_final_prms->u4_cu_cbf_bits = cbf_bits; 4115 4116 #if RDOPT_ENABLE 4117 /* add the cost of coding the cu bits */ 4118 total_rdopt_cost += 4119 COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); 4120 #endif 4121 } 4122 return (total_rdopt_cost); 4123 } 4124 /*! 4125 ****************************************************************************** 4126 * \if Function name : ihevce_inter_rdopt_cu_ntu \endif 4127 * 4128 * \brief 4129 * Inter Coding unit funtion whic perfomr the TQ IT IQ recon for luma 4130 * 4131 * \param[in] ps_ctxt enc_loop module ctxt pointer 4132 * \param[in] ps_inter_cand pointer to inter candidate structure 4133 * \param[in] pu1_src pointer to source data buffer 4134 * \param[in] cu_size Current CU size 4135 * \param[in] cu_pos_x cu position x w.r.t to ctb 4136 * \param[in] cu_pos_y cu position y w.r.t to ctb 4137 * \param[in] src_strd source buffer stride 4138 * \param[in] curr_buf_idx buffer index for current output storage 4139 * \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure 4140 * 4141 * \return 4142 * Rdopt cost 4143 * 4144 * \author 4145 * Ittiam 4146 * 4147 ***************************************************************************** 4148 */ 4149 LWORD64 ihevce_inter_rdopt_cu_ntu( 4150 ihevce_enc_loop_ctxt_t *ps_ctxt, 4151 enc_loop_cu_prms_t *ps_cu_prms, 4152 void *pv_src, 4153 WORD32 cu_size, 4154 WORD32 cu_pos_x, 4155 WORD32 cu_pos_y, 4156 WORD32 curr_buf_idx, 4157 enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms, 4158 cu_inter_cand_t *ps_inter_cand, 4159 cu_analyse_t *ps_cu_analyse, 4160 WORD32 i4_alpha_stim_multiplier) 4161 { 4162 enc_loop_cu_final_prms_t *ps_final_prms; 4163 nbr_4x4_t *ps_nbr_4x4; 4164 tu_prms_t s_tu_prms[64 * 4]; 4165 tu_prms_t *ps_tu_prms; 4166 4167 WORD32 i4_perform_rdoq; 4168 WORD32 i4_perform_sbh; 4169 WORD32 ai4_tu_split_flags[4]; 4170 WORD32 ai4_tu_early_cbf[4]; 4171 WORD32 num_split_flags = 1; 4172 WORD32 i; 4173 UWORD8 u1_tu_size; 4174 UWORD8 *pu1_pred; 4175 UWORD8 *pu1_ecd_data; 4176 WORD16 *pi2_deq_data; 4177 UWORD8 *pu1_csbf_buf; 4178 UWORD8 *pu1_tu_sz_sft; 4179 UWORD8 *pu1_tu_posx; 4180 UWORD8 *pu1_tu_posy; 4181 LWORD64 total_rdopt_cost; 4182 WORD32 ctr; 4183 WORD32 chrm_ctr; 4184 WORD32 num_tu_in_cu = 0; 4185 WORD32 pred_stride; 4186 WORD32 recon_stride; 4187 WORD32 trans_size = ps_cu_analyse->u1_cu_size; 4188 WORD32 csbf_strd; 4189 WORD32 chrm_present_flag; 4190 WORD32 ecd_data_bytes_cons; 4191 WORD32 num_4x4_in_cu; 4192 WORD32 num_4x4_in_tu; 4193 WORD32 recon_func_mode; 4194 WORD32 cu_bits; 4195 UWORD8 u1_compute_spatial_ssd; 4196 4197 /* min_trans_size is initialized to some huge number than usual TU sizes */ 4198 WORD32 i4_min_trans_size = 256; 4199 /* Get the RDOPT cost of the best CU mode for early_exit */ 4200 LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost; 4201 WORD32 src_strd = ps_cu_prms->i4_luma_src_stride; 4202 4203 /* model for no residue syntax qt root cbf flag */ 4204 UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX]; 4205 4206 /* backup copy of cabac states for restoration if zero cu reside rdo wins later */ 4207 UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END]; 4208 4209 /* for skip cases tables are not reqquired */ 4210 UWORD8 u1_skip_tu_sz_sft = 0; 4211 UWORD8 u1_skip_tu_posx = 0; 4212 UWORD8 u1_skip_tu_posy = 0; 4213 UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy; 4214 4215 /* get the pointers based on curbuf idx */ 4216 ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx]; 4217 ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0]; 4218 pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0]; 4219 pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0]; 4220 csbf_strd = ps_ctxt->i4_cu_csbf_strd; 4221 pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0]; 4222 4223 pred_stride = ps_inter_cand->i4_pred_data_stride; 4224 recon_stride = cu_size; 4225 pu1_pred = ps_inter_cand->pu1_pred_data; 4226 chrm_ctr = 0; 4227 ecd_data_bytes_cons = 0; 4228 total_rdopt_cost = 0; 4229 num_4x4_in_cu = cu_size >> 2; 4230 recon_func_mode = PRED_MODE_INTER; 4231 cu_bits = 0; 4232 4233 /* get the 4x4 level postion of current cu */ 4234 cu_pos_x = cu_pos_x << 1; 4235 cu_pos_y = cu_pos_y << 1; 4236 4237 /* default value for cu coded flag */ 4238 ps_final_prms->u1_is_cu_coded = 0; 4239 4240 /*init of ssd of CU accuumulated over all TU*/ 4241 ps_final_prms->u4_cu_sad = 0; 4242 4243 /* populate the coeffs scan idx */ 4244 ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT; 4245 4246 #if ENABLE_INTER_ZCU_COST 4247 /* reset cu not coded cost */ 4248 ps_ctxt->i8_cu_not_coded_cost = 0; 4249 4250 /* backup copy of cabac states for restoration if zero cu reside rdo wins later */ 4251 memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END); 4252 #endif 4253 4254 if(ps_cu_analyse->u1_cu_size == 64) 4255 { 4256 num_split_flags = 4; 4257 u1_tu_size = 32; 4258 } 4259 else 4260 { 4261 num_split_flags = 1; 4262 u1_tu_size = ps_cu_analyse->u1_cu_size; 4263 } 4264 4265 /* ckeck for skip mode */ 4266 if(1 == ps_final_prms->u1_skip_flag) 4267 { 4268 if(64 == cu_size) 4269 { 4270 /* TU = CU/2 is set but no trnaform is evaluated */ 4271 num_tu_in_cu = 4; 4272 pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0]; 4273 pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0]; 4274 pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0]; 4275 } 4276 else 4277 { 4278 /* TU = CU is set but no trnaform is evaluated */ 4279 num_tu_in_cu = 1; 4280 pu1_tu_sz_sft = &u1_skip_tu_sz_sft; 4281 pu1_tu_posx = &u1_skip_tu_posx; 4282 pu1_tu_posy = &u1_skip_tu_posy; 4283 } 4284 4285 recon_func_mode = PRED_MODE_SKIP; 4286 } 4287 /* check for PU part mode being AMP or No AMP */ 4288 else if(ps_final_prms->u1_part_mode < SIZE_2NxnU) 4289 { 4290 if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64)) 4291 { 4292 /* TU= CU is evaluated 2Nx2N inter case */ 4293 num_tu_in_cu = 1; 4294 pu1_tu_sz_sft = &u1_skip_tu_sz_sft; 4295 pu1_tu_posx = &u1_skip_tu_posx; 4296 pu1_tu_posy = &u1_skip_tu_posy; 4297 } 4298 else 4299 { 4300 /* currently TU= CU/2 is evaluated for all inter case */ 4301 num_tu_in_cu = 4; 4302 pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0]; 4303 pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0]; 4304 pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0]; 4305 } 4306 } 4307 else 4308 { 4309 /* for AMP cases one level of TU recurssion is done */ 4310 /* based on oreintation of the partitions */ 4311 num_tu_in_cu = 10; 4312 pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0]; 4313 pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0]; 4314 pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0]; 4315 } 4316 4317 ps_tu_prms = &s_tu_prms[0]; 4318 num_tu_in_cu = 0; 4319 4320 for(i = 0; i < num_split_flags; i++) 4321 { 4322 WORD32 i4_x_off = 0, i4_y_off = 0; 4323 4324 if(i == 1 || i == 3) 4325 { 4326 i4_x_off = 32; 4327 } 4328 4329 if(i == 2 || i == 3) 4330 { 4331 i4_y_off = 32; 4332 } 4333 4334 if(1 == ps_final_prms->u1_skip_flag) 4335 { 4336 ai4_tu_split_flags[0] = 0; 4337 ps_inter_cand->ai4_tu_split_flag[i] = 0; 4338 4339 ai4_tu_early_cbf[0] = 0; 4340 } 4341 else 4342 { 4343 ai4_tu_split_flags[0] = ps_inter_cand->ai4_tu_split_flag[i]; 4344 ai4_tu_early_cbf[0] = ps_inter_cand->ai4_tu_early_cbf[i]; 4345 } 4346 4347 ps_tu_prms->u1_tu_size = u1_tu_size; 4348 4349 ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update( 4350 ps_tu_prms, 4351 &num_tu_in_cu, 4352 0, 4353 ai4_tu_split_flags[0], 4354 ai4_tu_early_cbf[0], 4355 i4_x_off, 4356 i4_y_off); 4357 } 4358 4359 /* loop for all tu blocks in current cu */ 4360 ps_tu_prms = &s_tu_prms[0]; 4361 for(ctr = 0; ctr < num_tu_in_cu; ctr++) 4362 { 4363 trans_size = ps_tu_prms->u1_tu_size; 4364 4365 if(i4_min_trans_size > trans_size) 4366 { 4367 i4_min_trans_size = trans_size; 4368 } 4369 ps_tu_prms++; 4370 } 4371 4372 if(ps_ctxt->i1_cu_qp_delta_enable) 4373 { 4374 WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0; 4375 4376 if(ps_cu_analyse->u1_cu_size == 64) 4377 { 4378 ASSERT( 4379 (i4_min_trans_size == 32) || (i4_min_trans_size == 16) || 4380 (i4_min_trans_size == 8) || (i4_min_trans_size == 4)); 4381 i4_act_counter = (i4_min_trans_size == 16) + 4382 2 * ((i4_min_trans_size == 8) || (i4_min_trans_size == 4)); 4383 i4_act_counter_lamda = 3; 4384 } 4385 else if(ps_cu_analyse->u1_cu_size == 32) 4386 { 4387 ASSERT( 4388 (i4_min_trans_size == 32) || (i4_min_trans_size == 16) || 4389 (i4_min_trans_size == 8) || (i4_min_trans_size == 4)); 4390 i4_act_counter = (i4_min_trans_size == 16) + 4391 2 * ((i4_min_trans_size == 8) || (i4_min_trans_size == 4)); 4392 i4_act_counter_lamda = 0; 4393 } 4394 else if(ps_cu_analyse->u1_cu_size == 16) 4395 { 4396 ASSERT( 4397 (i4_min_trans_size == 16) || (i4_min_trans_size == 8) || (i4_min_trans_size == 4)); 4398 i4_act_counter = (i4_min_trans_size == 8) || (i4_min_trans_size == 4); 4399 i4_act_counter_lamda = 0; 4400 } 4401 else if(ps_cu_analyse->u1_cu_size == 8) 4402 { 4403 ASSERT((i4_min_trans_size == 8) || (i4_min_trans_size == 4)); 4404 i4_act_counter = 1; 4405 i4_act_counter_lamda = 0; 4406 } 4407 else 4408 { 4409 ASSERT(0); 4410 } 4411 if(ps_ctxt->i4_use_ctb_level_lamda) 4412 { 4413 ihevce_compute_cu_level_QP( 4414 ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][0], -1, 0); 4415 } 4416 else 4417 { 4418 ihevce_compute_cu_level_QP( 4419 ps_ctxt, 4420 ps_cu_analyse->i4_act_factor[i4_act_counter][0], 4421 ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][0], 4422 0); 4423 } 4424 4425 ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp; 4426 } 4427 if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT) 4428 { 4429 ps_ctxt->i8_cl_ssd_lambda_qf = 4430 ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 4431 100.0f); 4432 ps_ctxt->i8_cl_ssd_lambda_chroma_qf = 4433 ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf * 4434 (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f); 4435 } 4436 4437 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) && 4438 (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) && 4439 CONVERT_SSDS_TO_SPATIAL_DOMAIN; 4440 4441 if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT) 4442 { 4443 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) && 4444 CONVERT_SSDS_TO_SPATIAL_DOMAIN; 4445 } 4446 4447 if(!u1_compute_spatial_ssd) 4448 { 4449 ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0; 4450 ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0; 4451 } 4452 else 4453 { 4454 ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1; 4455 } 4456 4457 ps_tu_prms = &s_tu_prms[0]; 4458 4459 ASSERT(num_tu_in_cu <= 256); 4460 4461 /* RDOPT copy States : TU init (best until prev TU) to current */ 4462 memcpy( 4463 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] 4464 .s_cabac_ctxt.au1_ctxt_models[0], 4465 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 4466 IHEVC_CAB_COEFFX_PREFIX); 4467 4468 for(ctr = 0; ctr < num_tu_in_cu; ctr++) 4469 { 4470 WORD32 curr_bytes; 4471 WORD32 tx_size; 4472 WORD32 cbf, zero_col, zero_row; 4473 LWORD64 rdopt_cost; 4474 UWORD8 u1_is_recon_available; 4475 4476 WORD32 curr_pos_x; 4477 WORD32 curr_pos_y; 4478 nbr_4x4_t *ps_cur_nbr_4x4; 4479 UWORD8 *pu1_cur_pred; 4480 UWORD8 *pu1_cur_src; 4481 UWORD8 *pu1_cur_recon; 4482 WORD16 *pi2_cur_deq_data; 4483 UWORD32 u4_tu_sad; 4484 WORD32 tu_bits; 4485 4486 WORD32 i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride; 4487 4488 trans_size = ps_tu_prms->u1_tu_size; 4489 /* get the current pos x and pos y in pixels */ 4490 curr_pos_x = ps_tu_prms->u1_x_off; //((cu_size >> 2) * pu1_tu_posx[ctr]); 4491 curr_pos_y = ps_tu_prms->u1_y_off; //((cu_size >> 2) * pu1_tu_posy[ctr]); 4492 4493 num_4x4_in_tu = trans_size >> 2; 4494 4495 #if FORCE_8x8_TFR 4496 if(cu_size == 64) 4497 { 4498 curr_pos_x = ((cu_size >> 3) * pu1_tu_posx[ctr]); 4499 curr_pos_y = ((cu_size >> 3) * pu1_tu_posy[ctr]); 4500 } 4501 #endif 4502 4503 /* increment the pointers to start of current TU */ 4504 pu1_cur_src = ((UWORD8 *)pv_src + curr_pos_x); 4505 pu1_cur_src += (curr_pos_y * src_strd); 4506 pu1_cur_pred = (pu1_pred + curr_pos_x); 4507 pu1_cur_pred += (curr_pos_y * pred_stride); 4508 pi2_cur_deq_data = pi2_deq_data + curr_pos_x; 4509 pi2_cur_deq_data += (curr_pos_y * cu_size); 4510 pu1_cur_recon = ((UWORD8 *)ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0]) + 4511 curr_pos_x + curr_pos_y * i4_recon_stride; 4512 4513 ps_cur_nbr_4x4 = (ps_nbr_4x4 + (curr_pos_x >> 2)); 4514 ps_cur_nbr_4x4 += ((curr_pos_y >> 2) * num_4x4_in_cu); 4515 4516 /* RDOPT copy States : TU init (best until prev TU) to current */ 4517 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( 4518 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] 4519 .s_cabac_ctxt.au1_ctxt_models[0] + 4520 IHEVC_CAB_COEFFX_PREFIX, 4521 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, 4522 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); 4523 4524 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq; 4525 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh; 4526 4527 /*2 Multi- dimensinal array based on trans size of rounding factor to be added here */ 4528 /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */ 4529 /* Currently the complete array will contain only single value*/ 4530 /*The rounding factor is calculated with the formula 4531 Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2 4532 rounding factor = (1 - DeadZone Val) 4533 4534 Assumption: Cabac states of All the sub-blocks in the TU are considered independent 4535 */ 4536 if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0)) 4537 { 4538 double i4_lamda_modifier; 4539 4540 if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id)) 4541 { 4542 i4_lamda_modifier = ps_ctxt->i4_lamda_modifier * 4543 CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00); 4544 } 4545 else 4546 { 4547 i4_lamda_modifier = ps_ctxt->i4_lamda_modifier; 4548 } 4549 if(ps_ctxt->i4_use_const_lamda_modifier) 4550 { 4551 if(ISLICE == ps_ctxt->i1_slice_type) 4552 { 4553 i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier; 4554 } 4555 else 4556 { 4557 i4_lamda_modifier = CONST_LAMDA_MOD_VAL; 4558 } 4559 } 4560 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] = 4561 &ps_ctxt->i4_quant_round_tu[0][0]; 4562 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] = 4563 &ps_ctxt->i4_quant_round_tu[1][0]; 4564 4565 memset( 4566 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3], 4567 0, 4568 trans_size * trans_size * sizeof(WORD32)); 4569 memset( 4570 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3], 4571 0, 4572 trans_size * trans_size * sizeof(WORD32)); 4573 4574 ihevce_quant_rounding_factor_gen( 4575 trans_size, 4576 1, 4577 &ps_ctxt->s_rdopt_entropy_ctxt, 4578 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3], 4579 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3], 4580 i4_lamda_modifier, 4581 1); 4582 } 4583 else 4584 { 4585 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] = 4586 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3]; 4587 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] = 4588 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3]; 4589 } 4590 4591 /* call T Q IT IQ and recon function */ 4592 cbf = ihevce_t_q_iq_ssd_scan_fxn( 4593 ps_ctxt, 4594 pu1_cur_pred, 4595 pred_stride, 4596 pu1_cur_src, 4597 src_strd, 4598 pi2_cur_deq_data, 4599 cu_size, 4600 pu1_cur_recon, 4601 i4_recon_stride, 4602 pu1_ecd_data, 4603 pu1_csbf_buf, 4604 csbf_strd, 4605 trans_size, 4606 recon_func_mode, 4607 &rdopt_cost, 4608 &curr_bytes, 4609 &tu_bits, 4610 &u4_tu_sad, 4611 &zero_col, 4612 &zero_row, 4613 &u1_is_recon_available, 4614 i4_perform_rdoq, 4615 i4_perform_sbh, 4616 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 4617 i4_alpha_stim_multiplier, 4618 u1_is_cu_noisy, 4619 #endif 4620 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, 4621 ps_ctxt->u1_use_early_cbf_data ? ps_tu_prms->i4_early_cbf : 1); 4622 4623 #if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 4624 if(u1_is_cu_noisy && i4_alpha_stim_multiplier) 4625 { 4626 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT 4627 rdopt_cost = ihevce_inject_stim_into_distortion( 4628 pu1_cur_src, 4629 src_strd, 4630 pu1_cur_pred, 4631 pred_stride, 4632 rdopt_cost, 4633 i4_alpha_stim_multiplier, 4634 trans_size, 4635 0, 4636 ps_ctxt->u1_enable_psyRDOPT, 4637 NULL_PLANE); 4638 #else 4639 if(u1_compute_spatial_ssd && u1_is_recon_available) 4640 { 4641 rdopt_cost = ihevce_inject_stim_into_distortion( 4642 pu1_cur_src, 4643 src_strd, 4644 pu1_cur_recon, 4645 i4_recon_stride, 4646 rdopt_cost, 4647 i4_alpha_stim_multiplier, 4648 trans_size, 4649 0, 4650 NULL_PLANE); 4651 } 4652 else 4653 { 4654 rdopt_cost = ihevce_inject_stim_into_distortion( 4655 pu1_cur_src, 4656 src_strd, 4657 pu1_cur_pred, 4658 pred_stride, 4659 rdopt_cost, 4660 i4_alpha_stim_multiplier, 4661 trans_size, 4662 0, 4663 ps_ctxt->u1_enable_psyRDOPT, 4664 NULL_PLANE); 4665 } 4666 #endif 4667 } 4668 #endif 4669 4670 if(u1_compute_spatial_ssd && u1_is_recon_available) 4671 { 4672 ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = 0; 4673 } 4674 else 4675 { 4676 ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX; 4677 } 4678 4679 /* accumulate the TU sad into cu sad */ 4680 ps_final_prms->u4_cu_sad += u4_tu_sad; 4681 4682 /* accumulate the TU bits into cu bits */ 4683 cu_bits += tu_bits; 4684 4685 /* inter cu is coded if any of the tu is coded in it */ 4686 ps_final_prms->u1_is_cu_coded |= cbf; 4687 4688 /* call the entropy function to get the bits */ 4689 /* add that to rd opt cost(SSD) */ 4690 4691 /* update the bytes */ 4692 ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons; 4693 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = curr_bytes; 4694 /* update the zero_row and col info for the final mode */ 4695 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col = zero_col; 4696 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row = zero_row; 4697 4698 /* update the bytes */ 4699 ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons; 4700 4701 /* update the total bytes cons */ 4702 ecd_data_bytes_cons += curr_bytes; 4703 pu1_ecd_data += curr_bytes; 4704 4705 /* RDOPT copy States : New updated after curr TU to TU init */ 4706 if(0 != cbf) 4707 { 4708 /* update to new state only if CBF is non zero */ 4709 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( 4710 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, 4711 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] 4712 .s_cabac_ctxt.au1_ctxt_models[0] + 4713 IHEVC_CAB_COEFFX_PREFIX, 4714 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); 4715 } 4716 4717 /* by default chroma present is set to 1*/ 4718 chrm_present_flag = 1; 4719 if(4 == trans_size) 4720 { 4721 /* if tusize is 4x4 then only first luma 4x4 will have chroma*/ 4722 if(0 != chrm_ctr) 4723 { 4724 chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE; 4725 } 4726 4727 /* increment the chrm ctr unconditionally */ 4728 chrm_ctr++; 4729 4730 /* after ctr reached 4 reset it */ 4731 if(4 == chrm_ctr) 4732 { 4733 chrm_ctr = 0; 4734 } 4735 } 4736 4737 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = cbf; 4738 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0; 4739 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0; 4740 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0; 4741 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0; 4742 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag; 4743 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp; 4744 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0; 4745 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0; 4746 GETRANGE(tx_size, trans_size); 4747 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3; 4748 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + (curr_pos_x >> 2); 4749 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + (curr_pos_y >> 2); 4750 4751 /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */ 4752 ps_cur_nbr_4x4->b1_y_cbf = cbf; 4753 /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/ 4754 ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp; 4755 4756 /* Qp and cbf are stored for the all 4x4 in TU */ 4757 { 4758 WORD32 i, j; 4759 nbr_4x4_t *ps_tmp_4x4; 4760 ps_tmp_4x4 = ps_cur_nbr_4x4; 4761 4762 for(i = 0; i < num_4x4_in_tu; i++) 4763 { 4764 for(j = 0; j < num_4x4_in_tu; j++) 4765 { 4766 ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp; 4767 ps_tmp_4x4[j].b1_y_cbf = cbf; 4768 } 4769 /* row level update*/ 4770 ps_tmp_4x4 += num_4x4_in_cu; 4771 } 4772 } 4773 4774 #if RDOPT_ENABLE 4775 /* compute the rdopt cost */ 4776 rdopt_cost += 4777 COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); 4778 #endif 4779 /* accumulate the costs */ 4780 total_rdopt_cost += rdopt_cost; 4781 4782 ps_tu_prms++; 4783 4784 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) 4785 { 4786 /* Early exit : If the current running cost exceeds 4787 the prev. best mode cost, break */ 4788 if(total_rdopt_cost > prev_best_rdopt_cost) 4789 { 4790 return (total_rdopt_cost); 4791 } 4792 } 4793 } 4794 4795 /* Modify the cost function for this CU. */ 4796 /* loop in for 8x8 blocks */ 4797 if(ps_ctxt->u1_enable_psyRDOPT) 4798 { 4799 UWORD8 *pu1_recon_cu; 4800 WORD32 recon_stride; 4801 WORD32 curr_pos_x; 4802 WORD32 curr_pos_y; 4803 WORD32 start_index; 4804 WORD32 num_horz_cu_in_ctb; 4805 WORD32 had_block_size; 4806 4807 /* tODO: sreenivasa ctb size has to be used appropriately */ 4808 had_block_size = 8; 4809 num_horz_cu_in_ctb = 64 / had_block_size; 4810 4811 curr_pos_x = cu_pos_x << 2; /* pel units */ 4812 curr_pos_y = cu_pos_y << 2; /* pel units */ 4813 recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride; 4814 pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore 4815 .apv_luma_recon_bufs[0]); // already pointing to the current CU recon 4816 //+ \curr_pos_x + curr_pos_y * recon_stride; 4817 4818 /* start index to index the source satd of curr cu int he current ctb*/ 4819 start_index = 4820 (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb; 4821 4822 { 4823 total_rdopt_cost += ihevce_psy_rd_cost( 4824 ps_ctxt->ai4_source_satd_8x8, 4825 pu1_recon_cu, 4826 recon_stride, 4827 1, //howz stride 4828 cu_size, 4829 0, // pic type 4830 0, //layer id 4831 ps_ctxt->i4_satd_lamda, // lambda 4832 start_index, 4833 ps_ctxt->u1_is_input_data_hbd, 4834 ps_ctxt->u4_psy_strength, 4835 &ps_ctxt->s_cmn_opt_func); // 8 bit 4836 } 4837 } 4838 4839 /* store the num TUs*/ 4840 ps_final_prms->u2_num_tus_in_cu = num_tu_in_cu; 4841 4842 /* update the bytes consumed */ 4843 ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons; 4844 4845 /* store the current cu size to final prms */ 4846 ps_final_prms->u1_cu_size = cu_size; 4847 4848 /* cu bits will be having luma residual bits till this point */ 4849 /* if zero_cbf eval is disabled then cu bits will be zero */ 4850 ps_final_prms->u4_cu_luma_res_bits = cu_bits; 4851 4852 /* ------------- Chroma processing -------------- */ 4853 /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/ 4854 if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt) 4855 { 4856 LWORD64 chrm_rdopt_cost; 4857 WORD32 chrm_rdopt_tu_bits; 4858 4859 /* Store the current RDOPT cost to enable early exit in chrom_prcs */ 4860 ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost; 4861 4862 chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt( 4863 ps_ctxt, 4864 curr_buf_idx, 4865 0, /* TU mode : Don't care in Inter patrh */ 4866 ps_chrm_cu_buf_prms->pu1_curr_src, 4867 ps_chrm_cu_buf_prms->i4_chrm_src_stride, 4868 ps_chrm_cu_buf_prms->pu1_cu_left, 4869 ps_chrm_cu_buf_prms->pu1_cu_top, 4870 ps_chrm_cu_buf_prms->pu1_cu_top_left, 4871 ps_chrm_cu_buf_prms->i4_cu_left_stride, 4872 (cu_pos_x >> 1), 4873 (cu_pos_y >> 1), 4874 &chrm_rdopt_tu_bits, 4875 i4_alpha_stim_multiplier, 4876 u1_is_cu_noisy); 4877 4878 #if WEIGH_CHROMA_COST 4879 chrm_rdopt_cost = (LWORD64)( 4880 (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor + 4881 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> 4882 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); 4883 #endif 4884 4885 #if CHROMA_RDOPT_ENABLE 4886 total_rdopt_cost += chrm_rdopt_cost; 4887 #endif 4888 cu_bits += chrm_rdopt_tu_bits; 4889 4890 /* during chroma evaluation if skip decision was over written */ 4891 /* then the current skip candidate is set to a non skip candidate */ 4892 ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag; 4893 4894 /* cu bits for chroma residual if chroma rdopt is on */ 4895 /* if zero_cbf eval is disabled then cu bits will be zero */ 4896 ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits; 4897 4898 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) 4899 { 4900 /* Early exit : If the current running cost exceeds 4901 the prev. best mode cost, break */ 4902 if(total_rdopt_cost > prev_best_rdopt_cost) 4903 { 4904 return (total_rdopt_cost); 4905 } 4906 } 4907 } 4908 else 4909 {} 4910 4911 #if SHRINK_INTER_TUTREE 4912 /* ------------- Quadtree TU split optimization ------------ */ 4913 if(ps_final_prms->u1_is_cu_coded) 4914 { 4915 ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree( 4916 &ps_final_prms->as_tu_enc_loop[0], 4917 &ps_final_prms->as_tu_enc_loop_temp_prms[0], 4918 &ps_final_prms->s_recon_datastore, 4919 num_tu_in_cu, 4920 (ps_ctxt->u1_chroma_array_type == 2)); 4921 } 4922 #endif 4923 4924 /* RDOPT copy States : Best after all luma TUs (and chroma,if enabled)to current */ 4925 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( 4926 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] 4927 .s_cabac_ctxt.au1_ctxt_models[0] + 4928 IHEVC_CAB_COEFFX_PREFIX, 4929 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, 4930 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); 4931 4932 /* -------- Bit estimate for RD opt -------------- */ 4933 { 4934 nbr_avail_flags_t s_nbr; 4935 /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/ 4936 WORD32 cbf_bits, header_bits; 4937 4938 /* get the neighbour availability flags for current cu */ 4939 ihevce_get_only_nbr_flag( 4940 &s_nbr, 4941 ps_ctxt->pu1_ctb_nbr_map, 4942 ps_ctxt->i4_nbr_map_strd, 4943 cu_pos_x, 4944 cu_pos_y, 4945 (cu_size >> 2), 4946 (cu_size >> 2)); 4947 4948 /* call the entropy rdo encode to get the bit estimate for current cu */ 4949 header_bits = ihevce_entropy_rdo_encode_cu( 4950 &ps_ctxt->s_rdopt_entropy_ctxt, 4951 ps_final_prms, 4952 (cu_pos_x >> 1), /* back to 8x8 pel units */ 4953 (cu_pos_y >> 1), /* back to 8x8 pel units */ 4954 cu_size, 4955 ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail 4956 : s_nbr.u1_top_avail, 4957 s_nbr.u1_left_avail, 4958 &ps_final_prms->pu1_cu_coeffs[0], 4959 &cbf_bits); 4960 4961 cu_bits += header_bits; 4962 4963 /* cbf bits are excluded from header bits, instead considered as texture bits */ 4964 /* incase if zero cbf eval is disabled then texture bits gets added here */ 4965 ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits); 4966 ps_final_prms->u4_cu_cbf_bits = cbf_bits; 4967 4968 #if RDOPT_ENABLE 4969 /* add the cost of coding the header bits */ 4970 total_rdopt_cost += 4971 COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); 4972 4973 #if ENABLE_INTER_ZCU_COST 4974 /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */ 4975 if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level)) 4976 { 4977 LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost; 4978 4979 WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) && 4980 (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag); 4981 4982 cab_ctxt_t *ps_cab_ctxt = 4983 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt; 4984 4985 /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call */ 4986 UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12; 4987 4988 /* account for coding qt_root_cbf = 0 */ 4989 /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */ 4990 u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0]; 4991 if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1]) 4992 u4_cu_hdr_bits_q12 = 0; 4993 else 4994 u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1]; 4995 4996 /* add the cost of coding the header bits */ 4997 i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30( 4998 u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */, 4999 ps_ctxt->i8_cl_ssd_lambda_qf, 5000 (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q)); 5001 5002 if(ps_ctxt->u1_enable_psyRDOPT) 5003 { 5004 i8_cu_not_coded_cost = total_rdopt_cost + 1; 5005 } 5006 5007 /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */ 5008 if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu)) 5009 { 5010 WORD32 tx_size; 5011 5012 /* force cu as not coded and update the cost */ 5013 ps_final_prms->u1_is_cu_coded = 0; 5014 ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0; 5015 ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0; 5016 5017 total_rdopt_cost = i8_cu_not_coded_cost; 5018 5019 /* reset num TUs to 1 unless cu size id 64 */ 5020 ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1; 5021 trans_size = (64 == cu_size) ? 32 : cu_size; 5022 GETRANGE(tx_size, trans_size); 5023 5024 /* reset the bytes consumed */ 5025 ps_final_prms->i4_num_bytes_ecd_data = 0; 5026 5027 /* reset texture related bits and roll back header bits*/ 5028 ps_final_prms->u4_cu_cbf_bits = 0; 5029 ps_final_prms->u4_cu_luma_res_bits = 0; 5030 ps_final_prms->u4_cu_chroma_res_bits = 0; 5031 ps_final_prms->u4_cu_hdr_bits = 5032 (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q; 5033 5034 /* update cabac model with qtroot cbf = 0 decision */ 5035 ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] = 5036 gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1]; 5037 5038 /* restore untouched cabac models for, tusplit, cbfs, texture etc */ 5039 memcpy( 5040 &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM], 5041 &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM], 5042 (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM)); 5043 5044 /* mark all tus as not coded for final eval */ 5045 for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++) 5046 { 5047 WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0; 5048 WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0; 5049 5050 nbr_4x4_t *ps_cur_nbr_4x4 = 5051 ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu); 5052 5053 num_4x4_in_tu = trans_size >> 2; 5054 5055 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0; 5056 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0; 5057 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0; 5058 5059 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0; 5060 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0; 5061 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0; 5062 5063 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0; 5064 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0; 5065 5066 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3; 5067 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x; 5068 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y; 5069 5070 /* reset cbf for the all 4x4 in TU */ 5071 { 5072 WORD32 i, j; 5073 nbr_4x4_t *ps_tmp_4x4; 5074 ps_tmp_4x4 = ps_cur_nbr_4x4; 5075 5076 for(i = 0; i < num_4x4_in_tu; i++) 5077 { 5078 for(j = 0; j < num_4x4_in_tu; j++) 5079 { 5080 ps_tmp_4x4[j].b1_y_cbf = 0; 5081 } 5082 /* row level update*/ 5083 ps_tmp_4x4 += num_4x4_in_cu; 5084 } 5085 } 5086 } 5087 } 5088 } 5089 #endif /* ENABLE_INTER_ZCU_COST */ 5090 5091 #endif /* RDOPT_ENABLE */ 5092 } 5093 5094 return (total_rdopt_cost); 5095 } 5096 5097 #if ENABLE_RDO_BASED_TU_RECURSION 5098 LWORD64 ihevce_inter_tu_tree_selector_and_rdopt_cost_computer( 5099 ihevce_enc_loop_ctxt_t *ps_ctxt, 5100 enc_loop_cu_prms_t *ps_cu_prms, 5101 void *pv_src, 5102 WORD32 cu_size, 5103 WORD32 cu_pos_x, 5104 WORD32 cu_pos_y, 5105 WORD32 curr_buf_idx, 5106 enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms, 5107 cu_inter_cand_t *ps_inter_cand, 5108 cu_analyse_t *ps_cu_analyse, 5109 WORD32 i4_alpha_stim_multiplier) 5110 { 5111 tu_tree_node_t as_tu_nodes[256 + 64 + 16 + 4 + 1]; 5112 buffer_data_for_tu_t s_buffer_data_for_tu; 5113 enc_loop_cu_final_prms_t *ps_final_prms; 5114 nbr_4x4_t *ps_nbr_4x4; 5115 5116 WORD32 num_split_flags = 1; 5117 UWORD8 u1_tu_size; 5118 UWORD8 *pu1_pred; 5119 UWORD8 *pu1_ecd_data; 5120 WORD16 *pi2_deq_data; 5121 UWORD8 *pu1_csbf_buf; 5122 UWORD8 *pu1_tu_sz_sft; 5123 UWORD8 *pu1_tu_posx; 5124 UWORD8 *pu1_tu_posy; 5125 LWORD64 total_rdopt_cost; 5126 WORD32 ctr; 5127 WORD32 chrm_ctr; 5128 WORD32 pred_stride; 5129 WORD32 recon_stride; 5130 WORD32 trans_size = ps_cu_analyse->u1_cu_size; 5131 WORD32 csbf_strd; 5132 WORD32 ecd_data_bytes_cons; 5133 WORD32 num_4x4_in_cu; 5134 WORD32 num_4x4_in_tu; 5135 WORD32 recon_func_mode; 5136 WORD32 cu_bits; 5137 UWORD8 u1_compute_spatial_ssd; 5138 /* backup copy of cabac states for restoration if zero cu reside rdo wins later */ 5139 UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END]; 5140 5141 WORD32 i4_min_trans_size = 256; 5142 LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost; 5143 WORD32 src_strd = ps_cu_prms->i4_luma_src_stride; 5144 /* model for no residue syntax qt root cbf flag */ 5145 UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX]; 5146 UWORD8 u1_skip_tu_sz_sft = 0; 5147 UWORD8 u1_skip_tu_posx = 0; 5148 UWORD8 u1_skip_tu_posy = 0; 5149 UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy; 5150 5151 ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx]; 5152 ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0]; 5153 pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0]; 5154 pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0]; 5155 csbf_strd = ps_ctxt->i4_cu_csbf_strd; 5156 pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0]; 5157 pred_stride = ps_inter_cand->i4_pred_data_stride; 5158 recon_stride = cu_size; 5159 pu1_pred = ps_inter_cand->pu1_pred_data; 5160 chrm_ctr = 0; 5161 ecd_data_bytes_cons = 0; 5162 total_rdopt_cost = 0; 5163 num_4x4_in_cu = cu_size >> 2; 5164 recon_func_mode = PRED_MODE_INTER; 5165 cu_bits = 0; 5166 5167 /* get the 4x4 level postion of current cu */ 5168 cu_pos_x = cu_pos_x << 1; 5169 cu_pos_y = cu_pos_y << 1; 5170 5171 ps_final_prms->u1_is_cu_coded = 0; 5172 ps_final_prms->u4_cu_sad = 0; 5173 5174 /* populate the coeffs scan idx */ 5175 ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT; 5176 5177 #if ENABLE_INTER_ZCU_COST 5178 /* reset cu not coded cost */ 5179 ps_ctxt->i8_cu_not_coded_cost = 0; 5180 5181 /* backup copy of cabac states for restoration if zero cu reside rdo wins later */ 5182 memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END); 5183 #endif 5184 5185 if(ps_cu_analyse->u1_cu_size == 64) 5186 { 5187 num_split_flags = 4; 5188 u1_tu_size = 32; 5189 } 5190 else 5191 { 5192 num_split_flags = 1; 5193 u1_tu_size = ps_cu_analyse->u1_cu_size; 5194 } 5195 5196 if(1 == ps_final_prms->u1_skip_flag) 5197 { 5198 if(64 == cu_size) 5199 { 5200 /* TU = CU/2 is set but no trnaform is evaluated */ 5201 pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0]; 5202 pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0]; 5203 pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0]; 5204 } 5205 else 5206 { 5207 /* TU = CU is set but no trnaform is evaluated */ 5208 pu1_tu_sz_sft = &u1_skip_tu_sz_sft; 5209 pu1_tu_posx = &u1_skip_tu_posx; 5210 pu1_tu_posy = &u1_skip_tu_posy; 5211 } 5212 5213 recon_func_mode = PRED_MODE_SKIP; 5214 } 5215 /* check for PU part mode being AMP or No AMP */ 5216 else if(ps_final_prms->u1_part_mode < SIZE_2NxnU) 5217 { 5218 if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64)) 5219 { 5220 /* TU= CU is evaluated 2Nx2N inter case */ 5221 pu1_tu_sz_sft = &u1_skip_tu_sz_sft; 5222 pu1_tu_posx = &u1_skip_tu_posx; 5223 pu1_tu_posy = &u1_skip_tu_posy; 5224 } 5225 else 5226 { 5227 /* currently TU= CU/2 is evaluated for all inter case */ 5228 pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0]; 5229 pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0]; 5230 pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0]; 5231 } 5232 } 5233 else 5234 { 5235 /* for AMP cases one level of TU recurssion is done */ 5236 /* based on oreintation of the partitions */ 5237 pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0]; 5238 pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0]; 5239 pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0]; 5240 } 5241 5242 i4_min_trans_size = 4; 5243 5244 if(ps_ctxt->i1_cu_qp_delta_enable) 5245 { 5246 WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0; 5247 if(ps_cu_analyse->u1_cu_size == 64) 5248 { 5249 ASSERT( 5250 (i4_min_trans_size == 32) || (i4_min_trans_size == 16) || 5251 (i4_min_trans_size == 8) || (i4_min_trans_size == 4)); 5252 i4_act_counter = (i4_min_trans_size == 16) + 5253 2 * ((i4_min_trans_size == 8) || (i4_min_trans_size == 4)); 5254 i4_act_counter_lamda = 3; 5255 } 5256 else if(ps_cu_analyse->u1_cu_size == 32) 5257 { 5258 ASSERT( 5259 (i4_min_trans_size == 32) || (i4_min_trans_size == 16) || 5260 (i4_min_trans_size == 8) || (i4_min_trans_size == 4)); 5261 i4_act_counter = (i4_min_trans_size == 16) + 5262 2 * ((i4_min_trans_size == 8) || (i4_min_trans_size == 4)); 5263 i4_act_counter_lamda = 0; 5264 } 5265 else if(ps_cu_analyse->u1_cu_size == 16) 5266 { 5267 ASSERT( 5268 (i4_min_trans_size == 16) || (i4_min_trans_size == 8) || (i4_min_trans_size == 4)); 5269 i4_act_counter = (i4_min_trans_size == 8) || (i4_min_trans_size == 4); 5270 i4_act_counter_lamda = 0; 5271 } 5272 else if(ps_cu_analyse->u1_cu_size == 8) 5273 { 5274 ASSERT((i4_min_trans_size == 8) || (i4_min_trans_size == 4)); 5275 i4_act_counter = 1; 5276 i4_act_counter_lamda = 0; 5277 } 5278 else 5279 { 5280 ASSERT(0); 5281 } 5282 if(ps_ctxt->i4_use_ctb_level_lamda) 5283 { 5284 ihevce_compute_cu_level_QP( 5285 ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][0], -1, 0); 5286 } 5287 else 5288 { 5289 ihevce_compute_cu_level_QP( 5290 ps_ctxt, 5291 ps_cu_analyse->i4_act_factor[i4_act_counter][0], 5292 ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][0], 5293 0); 5294 } 5295 5296 ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp; 5297 } 5298 5299 if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT) 5300 { 5301 ps_ctxt->i8_cl_ssd_lambda_qf = 5302 ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 5303 100.0f); 5304 ps_ctxt->i8_cl_ssd_lambda_chroma_qf = 5305 ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf * 5306 (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f); 5307 } 5308 5309 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) && 5310 (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) && 5311 CONVERT_SSDS_TO_SPATIAL_DOMAIN; 5312 5313 if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT) 5314 { 5315 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) && 5316 CONVERT_SSDS_TO_SPATIAL_DOMAIN; 5317 } 5318 5319 if(!u1_compute_spatial_ssd) 5320 { 5321 ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0; 5322 ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0; 5323 } 5324 else 5325 { 5326 ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1; 5327 5328 if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)) 5329 { 5330 ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 1; 5331 } 5332 } 5333 5334 /* RDOPT copy States : TU init (best until prev TU) to current */ 5335 memcpy( 5336 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] 5337 .s_cabac_ctxt.au1_ctxt_models[0], 5338 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 5339 IHEVC_CAB_COEFFX_PREFIX); 5340 5341 ihevce_tu_tree_init( 5342 as_tu_nodes, 5343 cu_size, 5344 (cu_size == 64) ? !ps_inter_cand->b1_skip_flag : 0, 5345 ps_inter_cand->b1_skip_flag ? 0 : ps_ctxt->u1_max_inter_tr_depth, 5346 INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0), 5347 ps_ctxt->u1_chroma_array_type == 2); 5348 5349 if(!ps_inter_cand->b1_skip_flag && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3)) 5350 { 5351 ihevce_tuSplitArray_to_tuTree_mapper( 5352 as_tu_nodes, 5353 ps_inter_cand->ai4_tu_split_flag, 5354 cu_size, 5355 cu_size, 5356 MAX(MIN_TU_SIZE, (cu_size >> ps_ctxt->u1_max_inter_tr_depth)), 5357 MIN(MAX_TU_SIZE, cu_size), 5358 ps_inter_cand->b1_skip_flag); 5359 } 5360 5361 ASSERT(ihevce_tu_tree_coverage_in_cu(as_tu_nodes) == cu_size * cu_size); 5362 5363 #if ENABLE_INTER_ZCU_COST 5364 ps_ctxt->i8_cu_not_coded_cost = 0; 5365 #endif 5366 5367 s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_src = pv_src; 5368 s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_pred = pu1_pred; 5369 s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_recon = 5370 ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0]; 5371 s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_src_stride = src_strd; 5372 s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_pred_stride = pred_stride; 5373 s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_recon_stride = 5374 ps_final_prms->s_recon_datastore.i4_lumaRecon_stride; 5375 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_src = ps_chrm_cu_buf_prms->pu1_curr_src; 5376 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred = 5377 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] + 5378 curr_buf_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) + ((ps_ctxt->u1_chroma_array_type == 2) * 5379 (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1))); 5380 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_recon = 5381 ps_final_prms->s_recon_datastore.apv_chroma_recon_bufs[0]; 5382 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_src_stride = 5383 ps_chrm_cu_buf_prms->i4_chrm_src_stride; 5384 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride = 5385 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX]; 5386 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_recon_stride = 5387 ps_final_prms->s_recon_datastore.i4_chromaRecon_stride; 5388 s_buffer_data_for_tu.ps_nbr_data_buf = ps_nbr_4x4; 5389 s_buffer_data_for_tu.pi2_deq_data = pi2_deq_data; 5390 s_buffer_data_for_tu.pi2_deq_data_chroma = 5391 pi2_deq_data + ps_final_prms->i4_chrm_deq_coeff_strt_idx; 5392 s_buffer_data_for_tu.i4_nbr_data_buf_stride = num_4x4_in_cu; 5393 s_buffer_data_for_tu.i4_deq_data_stride = cu_size; 5394 s_buffer_data_for_tu.i4_deq_data_stride_chroma = cu_size; 5395 s_buffer_data_for_tu.ppu1_ecd = &pu1_ecd_data; 5396 5397 if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)) 5398 { 5399 UWORD8 i; 5400 5401 UWORD8 *pu1_pred = (UWORD8 *)s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred; 5402 5403 for(i = 0; i < (!!ps_inter_cand->b3_part_size) + 1; i++) 5404 { 5405 pu_t *ps_pu; 5406 5407 WORD32 inter_pu_wd; 5408 WORD32 inter_pu_ht; 5409 5410 ps_pu = ps_inter_cand->as_inter_pu + i; 5411 5412 inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */ 5413 inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1; 5414 inter_pu_ht <<= (ps_ctxt->u1_chroma_array_type == 2); 5415 ihevce_chroma_inter_pred_pu( 5416 &ps_ctxt->s_mc_ctxt, 5417 ps_pu, 5418 pu1_pred, 5419 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride); 5420 if(!!ps_inter_cand->b3_part_size) 5421 { 5422 /* 2Nx__ partion case */ 5423 if(inter_pu_wd == cu_size) 5424 { 5425 pu1_pred += 5426 (inter_pu_ht * 5427 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride); 5428 } 5429 5430 /* __x2N partion case */ 5431 if(inter_pu_ht == (cu_size >> !(ps_ctxt->u1_chroma_array_type == 2))) 5432 { 5433 pu1_pred += inter_pu_wd; 5434 } 5435 } 5436 } 5437 } 5438 5439 #if !ENABLE_TOP_DOWN_TU_RECURSION 5440 total_rdopt_cost = ihevce_tu_tree_selector( 5441 ps_ctxt, 5442 as_tu_nodes, 5443 &s_buffer_data_for_tu, 5444 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] 5445 .s_cabac_ctxt.au1_ctxt_models[0], 5446 recon_func_mode, 5447 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 5448 i4_alpha_stim_multiplier, 5449 u1_is_cu_noisy, 5450 #endif 5451 0, 5452 ps_ctxt->u1_max_inter_tr_depth, 5453 ps_inter_cand->b3_part_size, 5454 u1_compute_spatial_ssd); 5455 #else 5456 total_rdopt_cost = ihevce_topDown_tu_tree_selector( 5457 ps_ctxt, 5458 as_tu_nodes, 5459 &s_buffer_data_for_tu, 5460 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] 5461 .s_cabac_ctxt.au1_ctxt_models[0], 5462 recon_func_mode, 5463 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 5464 i4_alpha_stim_multiplier, 5465 u1_is_cu_noisy, 5466 #endif 5467 0, 5468 ps_ctxt->u1_max_inter_tr_depth, 5469 ps_inter_cand->b3_part_size, 5470 INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0), 5471 u1_compute_spatial_ssd); 5472 #endif 5473 5474 ps_final_prms->u2_num_tus_in_cu = 0; 5475 ps_final_prms->u4_cu_luma_res_bits = 0; 5476 ps_final_prms->u4_cu_sad = 0; 5477 total_rdopt_cost = 0; 5478 ecd_data_bytes_cons = 0; 5479 cu_bits = 0; 5480 #if ENABLE_INTER_ZCU_COST 5481 ps_ctxt->i8_cu_not_coded_cost = 0; 5482 #endif 5483 ps_final_prms->u1_is_cu_coded = 0; 5484 ps_final_prms->u1_cu_size = cu_size; 5485 5486 ihevce_tu_selector_debriefer( 5487 as_tu_nodes, 5488 ps_final_prms, 5489 &total_rdopt_cost, 5490 #if ENABLE_INTER_ZCU_COST 5491 &ps_ctxt->i8_cu_not_coded_cost, 5492 #endif 5493 &ecd_data_bytes_cons, 5494 &cu_bits, 5495 &ps_final_prms->u2_num_tus_in_cu, 5496 ps_ctxt->i4_cu_qp, 5497 cu_pos_x * 4, 5498 cu_pos_y * 4, 5499 INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0), 5500 (ps_ctxt->u1_chroma_array_type == 2), 5501 POS_TL); 5502 5503 if(!(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))) 5504 { 5505 ps_final_prms->i4_chrm_cu_coeff_strt_idx = ecd_data_bytes_cons; 5506 } 5507 5508 /* Modify the cost function for this CU. */ 5509 /* loop in for 8x8 blocks */ 5510 if(ps_ctxt->u1_enable_psyRDOPT) 5511 { 5512 UWORD8 *pu1_recon_cu; 5513 WORD32 recon_stride; 5514 WORD32 curr_pos_x; 5515 WORD32 curr_pos_y; 5516 WORD32 start_index; 5517 WORD32 num_horz_cu_in_ctb; 5518 WORD32 had_block_size; 5519 5520 /* tODO: sreenivasa ctb size has to be used appropriately */ 5521 had_block_size = 8; 5522 num_horz_cu_in_ctb = 64 / had_block_size; 5523 5524 curr_pos_x = cu_pos_x << 2; /* pel units */ 5525 curr_pos_y = cu_pos_y << 2; /* pel units */ 5526 recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride; 5527 pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore 5528 .apv_luma_recon_bufs[0]); // already pointing to the current CU recon 5529 //+ \curr_pos_x + curr_pos_y * recon_stride; 5530 5531 /* start index to index the source satd of curr cu int he current ctb*/ 5532 start_index = 5533 (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb; 5534 5535 { 5536 total_rdopt_cost += ihevce_psy_rd_cost( 5537 ps_ctxt->ai4_source_satd_8x8, 5538 pu1_recon_cu, 5539 recon_stride, 5540 1, //howz stride 5541 cu_size, 5542 0, // pic type 5543 0, //layer id 5544 ps_ctxt->i4_satd_lamda, // lambda 5545 start_index, 5546 ps_ctxt->u1_is_input_data_hbd, 5547 ps_ctxt->u4_psy_strength, 5548 &ps_ctxt->s_cmn_opt_func); // 8 bit 5549 } 5550 } 5551 5552 ps_final_prms->u1_chroma_intra_pred_mode = 4; 5553 5554 /* update the bytes consumed */ 5555 ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons; 5556 5557 /* store the current cu size to final prms */ 5558 ps_final_prms->u1_cu_size = cu_size; 5559 /* ------------- Chroma processing -------------- */ 5560 /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/ 5561 if(ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt && 5562 !(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))) 5563 { 5564 LWORD64 chrm_rdopt_cost; 5565 WORD32 chrm_rdopt_tu_bits; 5566 5567 /* Store the current RDOPT cost to enable early exit in chrom_prcs */ 5568 ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost; 5569 5570 chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt( 5571 ps_ctxt, 5572 curr_buf_idx, 5573 0, /* TU mode : Don't care in Inter patrh */ 5574 ps_chrm_cu_buf_prms->pu1_curr_src, 5575 ps_chrm_cu_buf_prms->i4_chrm_src_stride, 5576 ps_chrm_cu_buf_prms->pu1_cu_left, 5577 ps_chrm_cu_buf_prms->pu1_cu_top, 5578 ps_chrm_cu_buf_prms->pu1_cu_top_left, 5579 ps_chrm_cu_buf_prms->i4_cu_left_stride, 5580 (cu_pos_x >> 1), 5581 (cu_pos_y >> 1), 5582 &chrm_rdopt_tu_bits, 5583 i4_alpha_stim_multiplier, 5584 u1_is_cu_noisy); 5585 5586 #if WEIGH_CHROMA_COST 5587 chrm_rdopt_cost = (LWORD64)( 5588 (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor + 5589 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> 5590 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); 5591 #endif 5592 5593 #if CHROMA_RDOPT_ENABLE 5594 total_rdopt_cost += chrm_rdopt_cost; 5595 #endif 5596 cu_bits += chrm_rdopt_tu_bits; 5597 5598 /* during chroma evaluation if skip decision was over written */ 5599 /* then the current skip candidate is set to a non skip candidate */ 5600 ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag; 5601 5602 /* cu bits for chroma residual if chroma rdopt is on */ 5603 /* if zero_cbf eval is disabled then cu bits will be zero */ 5604 ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits; 5605 5606 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) 5607 { 5608 /* Early exit : If the current running cost exceeds 5609 the prev. best mode cost, break */ 5610 if(total_rdopt_cost > prev_best_rdopt_cost) 5611 { 5612 return (total_rdopt_cost); 5613 } 5614 } 5615 } 5616 else 5617 {} 5618 5619 #if SHRINK_INTER_TUTREE 5620 /* ------------- Quadtree TU split optimization ------------ */ 5621 if(ps_final_prms->u1_is_cu_coded) 5622 { 5623 ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree( 5624 &ps_final_prms->as_tu_enc_loop[0], 5625 &ps_final_prms->as_tu_enc_loop_temp_prms[0], 5626 &ps_final_prms->s_recon_datastore, 5627 ps_final_prms->u2_num_tus_in_cu, 5628 (ps_ctxt->u1_chroma_array_type == 2)); 5629 } 5630 #endif 5631 5632 /* RDOPT copy States : Best after all luma TUs (and chroma,if enabled)to current */ 5633 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( 5634 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] 5635 .s_cabac_ctxt.au1_ctxt_models[0] + 5636 IHEVC_CAB_COEFFX_PREFIX, 5637 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, 5638 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); 5639 5640 /* -------- Bit estimate for RD opt -------------- */ 5641 { 5642 nbr_avail_flags_t s_nbr; 5643 /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/ 5644 WORD32 cbf_bits, header_bits; 5645 5646 /* get the neighbour availability flags for current cu */ 5647 ihevce_get_only_nbr_flag( 5648 &s_nbr, 5649 ps_ctxt->pu1_ctb_nbr_map, 5650 ps_ctxt->i4_nbr_map_strd, 5651 cu_pos_x, 5652 cu_pos_y, 5653 (cu_size >> 2), 5654 (cu_size >> 2)); 5655 5656 /* call the entropy rdo encode to get the bit estimate for current cu */ 5657 header_bits = ihevce_entropy_rdo_encode_cu( 5658 &ps_ctxt->s_rdopt_entropy_ctxt, 5659 ps_final_prms, 5660 (cu_pos_x >> 1), /* back to 8x8 pel units */ 5661 (cu_pos_y >> 1), /* back to 8x8 pel units */ 5662 cu_size, 5663 ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail 5664 : s_nbr.u1_top_avail, 5665 s_nbr.u1_left_avail, 5666 &ps_final_prms->pu1_cu_coeffs[0], 5667 &cbf_bits); 5668 5669 cu_bits += header_bits; 5670 5671 /* cbf bits are excluded from header bits, instead considered as texture bits */ 5672 /* incase if zero cbf eval is disabled then texture bits gets added here */ 5673 ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits); 5674 ps_final_prms->u4_cu_cbf_bits = cbf_bits; 5675 5676 #if RDOPT_ENABLE 5677 /* add the cost of coding the header bits */ 5678 total_rdopt_cost += 5679 COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); 5680 5681 #if ENABLE_INTER_ZCU_COST 5682 /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */ 5683 if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level)) 5684 { 5685 LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost; 5686 5687 WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) && 5688 (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag); 5689 5690 cab_ctxt_t *ps_cab_ctxt = 5691 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt; 5692 5693 /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call */ 5694 UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12; 5695 5696 /* account for coding qt_root_cbf = 0 */ 5697 /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */ 5698 u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0]; 5699 if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1]) 5700 u4_cu_hdr_bits_q12 = 0; 5701 else 5702 u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1]; 5703 5704 /* add the cost of coding the header bits */ 5705 i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30( 5706 u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */, 5707 ps_ctxt->i8_cl_ssd_lambda_qf, 5708 (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q)); 5709 5710 if(ps_ctxt->u1_enable_psyRDOPT) 5711 { 5712 i8_cu_not_coded_cost = total_rdopt_cost + 1; 5713 } 5714 5715 /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */ 5716 if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu)) 5717 { 5718 WORD32 tx_size; 5719 5720 /* force cu as not coded and update the cost */ 5721 ps_final_prms->u1_is_cu_coded = 0; 5722 ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0; 5723 ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0; 5724 5725 total_rdopt_cost = i8_cu_not_coded_cost; 5726 5727 /* reset num TUs to 1 unless cu size id 64 */ 5728 ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1; 5729 trans_size = (64 == cu_size) ? 32 : cu_size; 5730 GETRANGE(tx_size, trans_size); 5731 5732 /* reset the bytes consumed */ 5733 ps_final_prms->i4_num_bytes_ecd_data = 0; 5734 5735 /* reset texture related bits and roll back header bits*/ 5736 ps_final_prms->u4_cu_cbf_bits = 0; 5737 ps_final_prms->u4_cu_luma_res_bits = 0; 5738 ps_final_prms->u4_cu_chroma_res_bits = 0; 5739 ps_final_prms->u4_cu_hdr_bits = 5740 (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q; 5741 5742 /* update cabac model with qtroot cbf = 0 decision */ 5743 ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] = 5744 gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1]; 5745 5746 /* restore untouched cabac models for, tusplit, cbfs, texture etc */ 5747 memcpy( 5748 &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM], 5749 &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM], 5750 (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM)); 5751 5752 /* mark all tus as not coded for final eval */ 5753 for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++) 5754 { 5755 WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0; 5756 WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0; 5757 5758 nbr_4x4_t *ps_cur_nbr_4x4 = 5759 ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu); 5760 5761 num_4x4_in_tu = trans_size >> 2; 5762 5763 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0; 5764 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0; 5765 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0; 5766 5767 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0; 5768 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0; 5769 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0; 5770 5771 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0; 5772 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0; 5773 5774 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3; 5775 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x; 5776 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y; 5777 5778 /* reset cbf for the all 4x4 in TU */ 5779 { 5780 WORD32 i, j; 5781 nbr_4x4_t *ps_tmp_4x4; 5782 ps_tmp_4x4 = ps_cur_nbr_4x4; 5783 5784 for(i = 0; i < num_4x4_in_tu; i++) 5785 { 5786 for(j = 0; j < num_4x4_in_tu; j++) 5787 { 5788 ps_tmp_4x4[j].b1_y_cbf = 0; 5789 } 5790 /* row level update*/ 5791 ps_tmp_4x4 += num_4x4_in_cu; 5792 } 5793 } 5794 } 5795 } 5796 } 5797 #endif /* ENABLE_INTER_ZCU_COST */ 5798 5799 #endif /* RDOPT_ENABLE */ 5800 } 5801 5802 return (total_rdopt_cost); 5803 } 5804 #endif 5805 5806 /*! 5807 ****************************************************************************** 5808 * \if Function name : ihevce_inter_rdopt_cu_mc_mvp \endif 5809 * 5810 * \brief 5811 * Inter Coding unit funtion which performs MC and MVP calc for RD opt mode 5812 * 5813 * \param[in] ps_ctxt enc_loop module ctxt pointer 5814 * \param[in] ps_inter_cand pointer to inter candidate structure 5815 * \param[in] cu_size Current CU size 5816 * \param[in] cu_pos_x cu position x w.r.t to ctb 5817 * \param[in] cu_pos_y cu position y w.r.t to ctb 5818 * \param[in] ps_left_nbr_4x4 Left neighbour 4x4 structure pointer 5819 * \param[in] ps_top_nbr_4x4 top neighbour 4x4 structure pointer 5820 * \param[in] ps_topleft_nbr_4x4 top left neighbour 4x4 structure pointer 5821 * \param[in] nbr_4x4_left_strd left neighbour 4x4 buffer stride 5822 * \param[in] curr_buf_idx Current Buffer index 5823 * 5824 * \return 5825 * Rdopt cost 5826 * 5827 * \author 5828 * Ittiam 5829 * 5830 ***************************************************************************** 5831 */ 5832 LWORD64 ihevce_inter_rdopt_cu_mc_mvp( 5833 ihevce_enc_loop_ctxt_t *ps_ctxt, 5834 cu_inter_cand_t *ps_inter_cand, 5835 WORD32 cu_size, 5836 WORD32 cu_pos_x, 5837 WORD32 cu_pos_y, 5838 nbr_4x4_t *ps_left_nbr_4x4, 5839 nbr_4x4_t *ps_top_nbr_4x4, 5840 nbr_4x4_t *ps_topleft_nbr_4x4, 5841 WORD32 nbr_4x4_left_strd, 5842 WORD32 curr_buf_idx) 5843 { 5844 /* local variables */ 5845 enc_loop_cu_final_prms_t *ps_final_prms; 5846 nbr_avail_flags_t s_nbr; 5847 nbr_4x4_t *ps_nbr_4x4; 5848 5849 UWORD8 au1_is_top_used[2][MAX_MVP_LIST_CAND]; 5850 UWORD8 *pu1_pred; 5851 WORD32 rdopt_cost; 5852 WORD32 ctr; 5853 WORD32 num_cu_part; 5854 WORD32 inter_pu_wd; 5855 WORD32 inter_pu_ht; 5856 WORD32 pred_stride; 5857 5858 /* get the pointers based on curbuf idx */ 5859 ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0]; 5860 ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx]; 5861 pu1_pred = ps_inter_cand->pu1_pred_data; 5862 5863 pred_stride = ps_inter_cand->i4_pred_data_stride; 5864 5865 /* store the partition mode in final prms */ 5866 ps_final_prms->u1_part_mode = ps_inter_cand->b3_part_size; 5867 5868 /* since encoder does not support NXN part type */ 5869 /* num parts can be either 1 or 2 only */ 5870 ASSERT(SIZE_NxN != ps_inter_cand->b3_part_size); 5871 5872 num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1; 5873 5874 /* get the 4x4 level position of current cu */ 5875 cu_pos_x = cu_pos_x << 1; 5876 cu_pos_y = cu_pos_y << 1; 5877 5878 /* populate cu level params */ 5879 ps_final_prms->u1_intra_flag = PRED_MODE_INTER; 5880 ps_final_prms->u2_num_pus_in_cu = num_cu_part; 5881 5882 /* run a loop over all the partitons in cu */ 5883 for(ctr = 0; ctr < num_cu_part; ctr++) 5884 { 5885 pu_mv_t as_pred_mv[MAX_MVP_LIST_CAND]; 5886 pu_t *ps_pu; 5887 WORD32 skip_or_merge_flag; 5888 UWORD8 u1_use_mvp_from_top_row; 5889 5890 ps_pu = &ps_inter_cand->as_inter_pu[ctr]; 5891 5892 /* IF AMP then each partitions can have diff wd ht */ 5893 inter_pu_wd = (ps_pu->b4_wd + 1) << 2; 5894 inter_pu_ht = (ps_pu->b4_ht + 1) << 2; 5895 5896 /* populate reference pic buf id for bs compute */ 5897 5898 /* L0 */ 5899 if(-1 != ps_pu->mv.i1_l0_ref_idx) 5900 { 5901 ps_pu->mv.i1_l0_ref_pic_buf_id = 5902 ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx]->i4_buf_id; 5903 } 5904 5905 /* L1 */ 5906 if(-1 != ps_pu->mv.i1_l1_ref_idx) 5907 { 5908 ps_pu->mv.i1_l1_ref_pic_buf_id = 5909 ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx]->i4_buf_id; 5910 } 5911 5912 /* SKIP or merge check for every part */ 5913 skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag; 5914 5915 /* ----------- MV Prediction ----------------- */ 5916 if(0 == skip_or_merge_flag) 5917 { 5918 /* get the neighbour availability flags */ 5919 ihevce_get_only_nbr_flag( 5920 &s_nbr, 5921 ps_ctxt->pu1_ctb_nbr_map, 5922 ps_ctxt->i4_nbr_map_strd, 5923 cu_pos_x, 5924 cu_pos_y, 5925 inter_pu_wd >> 2, 5926 inter_pu_ht >> 2); 5927 5928 if(ps_ctxt->u1_disable_intra_eval && DISABLE_TOP_SYNC && (ps_pu->b4_pos_y == 0)) 5929 { 5930 u1_use_mvp_from_top_row = 0; 5931 } 5932 else 5933 { 5934 u1_use_mvp_from_top_row = 1; 5935 } 5936 5937 if(!u1_use_mvp_from_top_row) 5938 { 5939 if(s_nbr.u1_top_avail || s_nbr.u1_top_lt_avail || s_nbr.u1_top_rt_avail) 5940 { 5941 if(!s_nbr.u1_left_avail && !s_nbr.u1_bot_lt_avail) 5942 { 5943 WORD32 curr_cu_pos_in_row, cu_top_right_offset, cu_top_right_dep_pos; 5944 5945 /* Ensure Top Right Sync */ 5946 if(!ps_ctxt->u1_use_top_at_ctb_boundary) 5947 { 5948 curr_cu_pos_in_row = 5949 ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x + (cu_pos_x << 2); 5950 5951 if(ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y == 0) 5952 { 5953 /* No wait for 1st row */ 5954 cu_top_right_offset = -(MAX_CTB_SIZE); 5955 { 5956 ihevce_tile_params_t *ps_col_tile_params = 5957 ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base + 5958 ps_ctxt->i4_tile_col_idx); 5959 5960 /* No wait for 1st row */ 5961 cu_top_right_offset = 5962 -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE)); 5963 } 5964 cu_top_right_dep_pos = 0; 5965 } 5966 else 5967 { 5968 cu_top_right_offset = (cu_size) + 4; 5969 cu_top_right_dep_pos = 5970 (ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y >> 6) - 1; 5971 } 5972 5973 ihevce_dmgr_chk_row_row_sync( 5974 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right, 5975 curr_cu_pos_in_row, 5976 cu_top_right_offset, 5977 cu_top_right_dep_pos, 5978 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */ 5979 ps_ctxt->thrd_id); 5980 } 5981 5982 u1_use_mvp_from_top_row = 1; 5983 } 5984 else 5985 { 5986 s_nbr.u1_top_avail = 0; 5987 s_nbr.u1_top_lt_avail = 0; 5988 s_nbr.u1_top_rt_avail = 0; 5989 } 5990 } 5991 else 5992 { 5993 u1_use_mvp_from_top_row = 1; 5994 } 5995 } 5996 /* Call the MV prediction module to get MVP */ 5997 ihevce_mv_pred( 5998 &ps_ctxt->s_mv_pred_ctxt, 5999 ps_top_nbr_4x4, 6000 ps_left_nbr_4x4, 6001 ps_topleft_nbr_4x4, 6002 nbr_4x4_left_strd, 6003 &s_nbr, 6004 NULL, /* colocated MV */ 6005 ps_pu, 6006 &as_pred_mv[0], 6007 au1_is_top_used); 6008 } 6009 6010 /* store the nbr 4x4 structure */ 6011 ps_nbr_4x4->b1_skip_flag = ps_inter_cand->b1_skip_flag; 6012 ps_nbr_4x4->b1_intra_flag = 0; 6013 ps_nbr_4x4->b1_pred_l0_flag = 0; 6014 ps_nbr_4x4->b1_pred_l1_flag = 0; 6015 6016 /* DC is default mode for inter cu, required for intra mode signalling */ 6017 ps_nbr_4x4->b6_luma_intra_mode = 1; 6018 6019 /* copy the motion vectors to neighbour structure */ 6020 ps_nbr_4x4->mv = ps_pu->mv; 6021 6022 /* copy the PU to final out pu */ 6023 ps_final_prms->as_pu_enc_loop[ctr] = *ps_pu; 6024 6025 /* copy the PU to chroma */ 6026 ps_final_prms->as_pu_chrm_proc[ctr] = *ps_pu; 6027 6028 /* store the skip flag to final prms */ 6029 ps_final_prms->u1_skip_flag = ps_inter_cand->b1_skip_flag; 6030 6031 /* MVP index & MVD calc is gated on skip/merge flag */ 6032 if(0 == skip_or_merge_flag) 6033 { 6034 /* calculate the MVDs and popluate the MVP idx for L0 */ 6035 if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode)) 6036 { 6037 WORD32 idx0_cost, idx1_cost; 6038 6039 /* calculate the ABS mvd for cand 0 */ 6040 idx0_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[0].s_l0_mv.i2_mvx); 6041 idx0_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[0].s_l0_mv.i2_mvy); 6042 6043 /* calculate the ABS mvd for cand 1 */ 6044 if(u1_use_mvp_from_top_row) 6045 { 6046 idx1_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[1].s_l0_mv.i2_mvx); 6047 idx1_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[1].s_l0_mv.i2_mvy); 6048 } 6049 else 6050 { 6051 idx1_cost = INT_MAX; 6052 } 6053 6054 /* based on the least cost choose the mvp idx */ 6055 if(idx0_cost <= idx1_cost) 6056 { 6057 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -= 6058 as_pred_mv[0].s_l0_mv.i2_mvx; 6059 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -= 6060 as_pred_mv[0].s_l0_mv.i2_mvy; 6061 6062 ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 0; 6063 } 6064 else 6065 { 6066 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -= 6067 as_pred_mv[1].s_l0_mv.i2_mvx; 6068 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -= 6069 as_pred_mv[1].s_l0_mv.i2_mvy; 6070 6071 ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 1; 6072 } 6073 6074 /* set the pred l0 flag for neighbour storage */ 6075 ps_nbr_4x4->b1_pred_l0_flag = 1; 6076 } 6077 /* calculate the MVDs and popluate the MVP idx for L1 */ 6078 if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode)) 6079 { 6080 WORD32 idx0_cost, idx1_cost; 6081 6082 /* calculate the ABS mvd for cand 0 */ 6083 idx0_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[0].s_l1_mv.i2_mvx); 6084 idx0_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[0].s_l1_mv.i2_mvy); 6085 6086 /* calculate the ABS mvd for cand 1 */ 6087 if(u1_use_mvp_from_top_row) 6088 { 6089 idx1_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[1].s_l1_mv.i2_mvx); 6090 idx1_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[1].s_l1_mv.i2_mvy); 6091 } 6092 else 6093 { 6094 idx1_cost = INT_MAX; 6095 } 6096 6097 /* based on the least cost choose the mvp idx */ 6098 if(idx0_cost <= idx1_cost) 6099 { 6100 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -= 6101 as_pred_mv[0].s_l1_mv.i2_mvx; 6102 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -= 6103 as_pred_mv[0].s_l1_mv.i2_mvy; 6104 6105 ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 0; 6106 } 6107 else 6108 { 6109 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -= 6110 as_pred_mv[1].s_l1_mv.i2_mvx; 6111 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -= 6112 as_pred_mv[1].s_l1_mv.i2_mvy; 6113 6114 ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 1; 6115 } 6116 6117 /* set the pred l1 flag for neighbour storage */ 6118 ps_nbr_4x4->b1_pred_l1_flag = 1; 6119 } 6120 6121 /* set the merge flag to 0 */ 6122 ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = 0; 6123 ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = 0; 6124 } 6125 else 6126 { 6127 /* copy the merge index from candidate */ 6128 ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = ps_pu->b1_merge_flag; 6129 6130 ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = ps_pu->b3_merge_idx; 6131 6132 if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode)) 6133 { 6134 /* set the pred l0 flag for neighbour storage */ 6135 ps_nbr_4x4->b1_pred_l0_flag = 1; 6136 } 6137 6138 /* calculate the MVDs and popluate the MVP idx for L1 */ 6139 if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode)) 6140 { 6141 /* set the pred l1 flag for neighbour storage */ 6142 ps_nbr_4x4->b1_pred_l1_flag = 1; 6143 } 6144 } 6145 6146 /* RD opt cost computation is part of cu_ntu func hence here it is set to 0 */ 6147 rdopt_cost = 0; 6148 6149 /* copy the MV to colocated Mv structure */ 6150 ps_final_prms->as_col_pu_enc_loop[ctr].s_l0_mv = ps_pu->mv.s_l0_mv; 6151 ps_final_prms->as_col_pu_enc_loop[ctr].s_l1_mv = ps_pu->mv.s_l1_mv; 6152 ps_final_prms->as_col_pu_enc_loop[ctr].i1_l0_ref_idx = ps_pu->mv.i1_l0_ref_idx; 6153 ps_final_prms->as_col_pu_enc_loop[ctr].i1_l1_ref_idx = ps_pu->mv.i1_l1_ref_idx; 6154 ps_final_prms->as_col_pu_enc_loop[ctr].b2_pred_mode = ps_pu->b2_pred_mode; 6155 ps_final_prms->as_col_pu_enc_loop[ctr].b1_intra_flag = 0; 6156 6157 /* replicate neighbour 4x4 strcuture for entire partition */ 6158 { 6159 WORD32 i, j; 6160 nbr_4x4_t *ps_tmp_4x4; 6161 6162 ps_tmp_4x4 = ps_nbr_4x4; 6163 6164 for(i = 0; i < (inter_pu_ht >> 2); i++) 6165 { 6166 for(j = 0; j < (inter_pu_wd >> 2); j++) 6167 { 6168 ps_tmp_4x4[j] = *ps_nbr_4x4; 6169 } 6170 /* row level update*/ 6171 ps_tmp_4x4 += (cu_size >> 2); 6172 } 6173 } 6174 /* set the neighbour map to 1 */ 6175 ihevce_set_inter_nbr_map( 6176 ps_ctxt->pu1_ctb_nbr_map, 6177 ps_ctxt->i4_nbr_map_strd, 6178 cu_pos_x, 6179 cu_pos_y, 6180 (inter_pu_wd >> 2), 6181 (inter_pu_ht >> 2), 6182 1); 6183 /* ----------- Motion Compensation for Luma ----------- */ 6184 #if !ENABLE_MIXED_INTER_MODE_EVAL 6185 { 6186 IV_API_CALL_STATUS_T valid_mv_cand; 6187 6188 /*If the inter candidate is neither merge cand nor skip cand 6189 then calculate the mc.*/ 6190 if(0 == skip_or_merge_flag || (ps_ctxt->u1_high_speed_cu_dec_on)) 6191 { 6192 valid_mv_cand = 6193 ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 0); 6194 6195 /* assert if the MC is given a valid mv candidate */ 6196 ASSERT(valid_mv_cand == IV_SUCCESS); 6197 } 6198 } 6199 #endif 6200 if((2 == num_cu_part) && (0 == ctr)) 6201 { 6202 /* 2Nx__ partion case */ 6203 if(inter_pu_wd == cu_size) 6204 { 6205 cu_pos_y += (inter_pu_ht >> 2); 6206 pu1_pred += (inter_pu_ht * pred_stride); 6207 ps_nbr_4x4 += (inter_pu_ht >> 2) * (cu_size >> 2); 6208 ps_left_nbr_4x4 += (inter_pu_ht >> 2) * nbr_4x4_left_strd; 6209 ps_top_nbr_4x4 = ps_nbr_4x4 - (cu_size >> 2); 6210 ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - nbr_4x4_left_strd; 6211 } 6212 6213 /* __x2N partion case */ 6214 if(inter_pu_ht == cu_size) 6215 { 6216 cu_pos_x += (inter_pu_wd >> 2); 6217 pu1_pred += inter_pu_wd; 6218 ps_nbr_4x4 += (inter_pu_wd >> 2); 6219 ps_left_nbr_4x4 = ps_nbr_4x4 - 1; 6220 ps_top_nbr_4x4 += (inter_pu_wd >> 2); 6221 ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1; 6222 nbr_4x4_left_strd = (cu_size >> 2); 6223 } 6224 } 6225 } 6226 6227 return (rdopt_cost); 6228 } 6229 6230 /*! 6231 ****************************************************************************** 6232 * \if Function name : ihevce_intra_chroma_pred_mode_selector \endif 6233 * 6234 * \brief 6235 * Coding unit processing function for chroma special modes (Non-Luma modes) 6236 * 6237 * \param[in] ps_ctxt enc_loop module ctxt pointer 6238 * \param[in] ps_chrm_cu_buf_prms ctxt having chroma related prms 6239 * \param[in] ps_cu_analyse pointer to cu analyse 6240 * \param[in] rd_opt_curr_idx index in the array of RDopt params 6241 * \param[in] tu_mode TU_EQ_CU or other case 6242 * 6243 * \return 6244 * Stores the best SATD mode, it's RDOPT cost, CABAC state, TU bits 6245 * 6246 * \author 6247 * Ittiam 6248 * 6249 ***************************************************************************** 6250 */ 6251 UWORD8 ihevce_distortion_based_intra_chroma_mode_selector( 6252 cu_analyse_t *ps_cu_analyse, 6253 ihevc_intra_pred_chroma_ref_substitution_ft *pf_ref_substitution, 6254 pf_intra_pred *ppf_chroma_ip, 6255 pf_res_trans_luma_had_chroma *ppf_resd_trns_had, 6256 UWORD8 *pu1_src, 6257 WORD32 i4_src_stride, 6258 UWORD8 *pu1_pred, 6259 WORD32 i4_pred_stride, 6260 UWORD8 *pu1_ctb_nbr_map, 6261 WORD32 i4_nbr_map_strd, 6262 UWORD8 *pu1_ref_sub_out, 6263 WORD32 i4_alpha_stim_multiplier, 6264 UWORD8 u1_is_cu_noisy, 6265 UWORD8 u1_trans_size, 6266 UWORD8 u1_trans_idx, 6267 UWORD8 u1_num_tus_in_cu, 6268 UWORD8 u1_num_4x4_luma_blks_in_tu, 6269 UWORD8 u1_enable_psyRDOPT, 6270 UWORD8 u1_is_422) 6271 { 6272 UWORD8 u1_chrm_mode; 6273 UWORD8 ctr; 6274 WORD32 i4_subtu_idx; 6275 6276 WORD32 i = 0; 6277 UWORD8 u1_chrm_modes[4] = { 0, 1, 10, 26 }; 6278 WORD32 i4_satd_had[4] = { 0 }; 6279 WORD32 i4_best_satd_had = INT_MAX; 6280 UWORD8 u1_cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1); 6281 UWORD8 u1_cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1); 6282 WORD32 i4_num_sub_tus = u1_is_422 + 1; 6283 UWORD8 u1_best_chrm_mode = 0; 6284 6285 /* Get the best satd among all possible modes */ 6286 for(i = 0; i < 4; i++) 6287 { 6288 WORD32 left_strd = i4_src_stride; 6289 6290 u1_chrm_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[u1_chrm_modes[i]] 6291 : u1_chrm_modes[i]; 6292 6293 /* loop based on num tus in a cu */ 6294 for(ctr = 0; ctr < u1_num_tus_in_cu; ctr++) 6295 { 6296 WORD32 luma_nbr_flags; 6297 WORD32 chrm_pred_func_idx; 6298 6299 WORD32 i4_trans_size_m2 = u1_trans_size << 1; 6300 UWORD8 *pu1_tu_src = pu1_src + ((ctr & 1) * i4_trans_size_m2) + 6301 (((ctr > 1) * u1_trans_size * i4_src_stride) << u1_is_422); 6302 UWORD8 *pu1_tu_pred = pu1_pred + ((ctr & 1) * i4_trans_size_m2) + 6303 (((ctr > 1) * u1_trans_size * i4_pred_stride) << u1_is_422); 6304 WORD32 i4_curr_tu_pos_x = u1_cu_pos_x + ((ctr & 1) * u1_num_4x4_luma_blks_in_tu); 6305 WORD32 i4_curr_tu_pos_y = u1_cu_pos_y + ((ctr > 1) * u1_num_4x4_luma_blks_in_tu); 6306 6307 luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu( 6308 pu1_ctb_nbr_map, 6309 i4_nbr_map_strd, 6310 i4_curr_tu_pos_x, 6311 i4_curr_tu_pos_y, 6312 u1_num_4x4_luma_blks_in_tu, 6313 u1_num_4x4_luma_blks_in_tu); 6314 6315 for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++) 6316 { 6317 WORD32 nbr_flags; 6318 6319 UWORD8 *pu1_cur_src = 6320 pu1_tu_src + ((i4_subtu_idx == 1) * u1_trans_size * i4_src_stride); 6321 UWORD8 *pu1_cur_pred = 6322 pu1_tu_pred + ((i4_subtu_idx == 1) * u1_trans_size * i4_pred_stride); 6323 UWORD8 *pu1_left = pu1_cur_src - 2; 6324 UWORD8 *pu1_top = pu1_cur_src - i4_src_stride; 6325 UWORD8 *pu1_top_left = pu1_top - 2; 6326 6327 nbr_flags = ihevce_get_intra_chroma_tu_nbr( 6328 luma_nbr_flags, i4_subtu_idx, u1_trans_size, u1_is_422); 6329 6330 /* call the chroma reference array substitution */ 6331 pf_ref_substitution( 6332 pu1_top_left, 6333 pu1_top, 6334 pu1_left, 6335 left_strd, 6336 u1_trans_size, 6337 nbr_flags, 6338 pu1_ref_sub_out, 6339 1); 6340 6341 /* use the look up to get the function idx */ 6342 chrm_pred_func_idx = g_i4_ip_funcs[u1_chrm_mode]; 6343 6344 /* call the intra prediction function */ 6345 ppf_chroma_ip[chrm_pred_func_idx]( 6346 pu1_ref_sub_out, 1, pu1_cur_pred, i4_pred_stride, u1_trans_size, u1_chrm_mode); 6347 6348 if(!u1_is_cu_noisy || !i4_alpha_stim_multiplier) 6349 { 6350 /* compute Hadamard-transform satd : Cb */ 6351 i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1]( 6352 pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0); 6353 6354 /* compute Hadamard-transform satd : Cr */ 6355 i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1]( 6356 pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0); 6357 } 6358 else 6359 { 6360 WORD32 i4_satd; 6361 6362 /* compute Hadamard-transform satd : Cb */ 6363 i4_satd = ppf_resd_trns_had[u1_trans_idx - 1]( 6364 pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0); 6365 6366 i4_satd = ihevce_inject_stim_into_distortion( 6367 pu1_cur_src, 6368 i4_src_stride, 6369 pu1_cur_pred, 6370 i4_pred_stride, 6371 i4_satd, 6372 i4_alpha_stim_multiplier, 6373 u1_trans_size, 6374 0, 6375 u1_enable_psyRDOPT, 6376 U_PLANE); 6377 6378 i4_satd_had[i] += i4_satd; 6379 6380 /* compute Hadamard-transform satd : Cr */ 6381 i4_satd = ppf_resd_trns_had[u1_trans_idx - 1]( 6382 pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0); 6383 6384 i4_satd = ihevce_inject_stim_into_distortion( 6385 pu1_cur_src, 6386 i4_src_stride, 6387 pu1_cur_pred, 6388 i4_pred_stride, 6389 i4_satd, 6390 i4_alpha_stim_multiplier, 6391 u1_trans_size, 6392 0, 6393 u1_enable_psyRDOPT, 6394 V_PLANE); 6395 6396 i4_satd_had[i] += i4_satd; 6397 } 6398 } 6399 6400 /* set the neighbour map to 1 */ 6401 ihevce_set_nbr_map( 6402 pu1_ctb_nbr_map, 6403 i4_nbr_map_strd, 6404 i4_curr_tu_pos_x, 6405 i4_curr_tu_pos_y, 6406 u1_num_4x4_luma_blks_in_tu, 6407 1); 6408 } 6409 6410 /* set the neighbour map to 0 */ 6411 ihevce_set_nbr_map( 6412 pu1_ctb_nbr_map, 6413 i4_nbr_map_strd, 6414 (ps_cu_analyse->b3_cu_pos_x << 1), 6415 (ps_cu_analyse->b3_cu_pos_y << 1), 6416 (ps_cu_analyse->u1_cu_size >> 2), 6417 0); 6418 6419 /* Get the least SATD and corresponding mode */ 6420 if(i4_best_satd_had > i4_satd_had[i]) 6421 { 6422 i4_best_satd_had = i4_satd_had[i]; 6423 u1_best_chrm_mode = u1_chrm_mode; 6424 } 6425 } 6426 6427 return u1_best_chrm_mode; 6428 } 6429 6430 void ihevce_intra_chroma_pred_mode_selector( 6431 ihevce_enc_loop_ctxt_t *ps_ctxt, 6432 enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms, 6433 cu_analyse_t *ps_cu_analyse, 6434 WORD32 rd_opt_curr_idx, 6435 WORD32 tu_mode, 6436 WORD32 i4_alpha_stim_multiplier, 6437 UWORD8 u1_is_cu_noisy) 6438 { 6439 chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt; 6440 6441 ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr; 6442 6443 UWORD8 *pu1_pred; 6444 WORD32 trans_size; 6445 WORD32 num_tus_in_cu; 6446 WORD32 pred_strd; 6447 WORD32 ctr; 6448 WORD32 i4_subtu_idx; 6449 WORD32 i4_num_sub_tus; 6450 WORD32 trans_idx; 6451 WORD32 scan_idx; 6452 WORD32 num_4x4_luma_in_tu; 6453 WORD32 cu_pos_x; 6454 WORD32 cu_pos_y; 6455 6456 recon_datastore_t *aps_recon_datastore[2] = { &ps_ctxt->as_cu_prms[0].s_recon_datastore, 6457 &ps_ctxt->as_cu_prms[1].s_recon_datastore }; 6458 6459 LWORD64 chrm_cod_cost = 0; 6460 WORD32 chrm_tu_bits = 0; 6461 WORD32 best_chrm_mode = DM_CHROMA_IDX; 6462 UWORD8 *pu1_chrm_src = ps_chrm_cu_buf_prms->pu1_curr_src; 6463 WORD32 chrm_src_stride = ps_chrm_cu_buf_prms->i4_chrm_src_stride; 6464 UWORD8 *pu1_cu_left = ps_chrm_cu_buf_prms->pu1_cu_left; 6465 UWORD8 *pu1_cu_top = ps_chrm_cu_buf_prms->pu1_cu_top; 6466 UWORD8 *pu1_cu_top_left = ps_chrm_cu_buf_prms->pu1_cu_top_left; 6467 WORD32 cu_left_stride = ps_chrm_cu_buf_prms->i4_cu_left_stride; 6468 WORD32 cu_size = ps_cu_analyse->u1_cu_size; 6469 WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq; 6470 WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh; 6471 UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2); 6472 6473 ihevc_intra_pred_chroma_ref_substitution_fptr = 6474 ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr; 6475 i4_num_sub_tus = (u1_is_422 == 1) + 1; 6476 6477 #if DISABLE_RDOQ_INTRA 6478 i4_perform_rdoq = 0; 6479 #endif 6480 6481 if(TU_EQ_CU == tu_mode) 6482 { 6483 num_tus_in_cu = 1; 6484 trans_size = cu_size >> 1; 6485 num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/ 6486 ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode]; 6487 } 6488 else 6489 { 6490 num_tus_in_cu = 4; 6491 trans_size = cu_size >> 2; 6492 num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/ 6493 6494 /* For 8x8 CU only one TU */ 6495 if(MIN_TU_SIZE > trans_size) 6496 { 6497 trans_size = MIN_TU_SIZE; 6498 num_tus_in_cu = 1; 6499 /* chroma nbr avail. is derived based on luma. 6500 for 4x4 chrm use 8x8 luma's size */ 6501 num_4x4_luma_in_tu = num_4x4_luma_in_tu << 1; 6502 } 6503 6504 ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode]; 6505 } 6506 6507 /* Can't be TU_EQ_SUBCU case */ 6508 ASSERT(TU_EQ_SUBCU != tu_mode); 6509 6510 /* translate the transform size to index */ 6511 trans_idx = trans_size >> 2; 6512 6513 pu1_pred = (UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data; 6514 6515 pred_strd = ps_chr_intra_satd_ctxt->i4_pred_stride; 6516 6517 /* for 16x16 cases */ 6518 if(16 == trans_size) 6519 { 6520 trans_idx = 3; 6521 } 6522 6523 best_chrm_mode = ihevce_distortion_based_intra_chroma_mode_selector( 6524 ps_cu_analyse, 6525 ihevc_intra_pred_chroma_ref_substitution_fptr, 6526 ps_ctxt->apf_chrm_ip, 6527 ps_ctxt->apf_chrm_resd_trns_had, 6528 pu1_chrm_src, 6529 chrm_src_stride, 6530 pu1_pred, 6531 pred_strd, 6532 ps_ctxt->pu1_ctb_nbr_map, 6533 ps_ctxt->i4_nbr_map_strd, 6534 (UWORD8 *)ps_ctxt->pv_ref_sub_out, 6535 i4_alpha_stim_multiplier, 6536 u1_is_cu_noisy, 6537 trans_size, 6538 trans_idx, 6539 num_tus_in_cu, 6540 num_4x4_luma_in_tu, 6541 ps_ctxt->u1_enable_psyRDOPT, 6542 u1_is_422); 6543 6544 /* Store the best chroma mode */ 6545 ps_chr_intra_satd_ctxt->u1_best_cr_mode = best_chrm_mode; 6546 6547 /* evaluate RDOPT cost for the Best mode */ 6548 { 6549 WORD32 i4_subtu_pos_x; 6550 WORD32 i4_subtu_pos_y; 6551 UWORD8 u1_compute_spatial_ssd; 6552 6553 WORD32 ai4_total_bytes_offset_cb[2] = { 0, 0 }; 6554 WORD32 ai4_total_bytes_offset_cr[2] = { 0, 0 }; 6555 /* State for prefix bin of chroma intra pred mode before CU encode */ 6556 UWORD8 u1_chroma_intra_mode_prefix_state = 6557 ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_CHROMA_PRED_MODE]; 6558 WORD32 luma_trans_size = trans_size << 1; 6559 WORD32 calc_recon = 0; 6560 UWORD8 *pu1_left = pu1_cu_left; 6561 UWORD8 *pu1_top = pu1_cu_top; 6562 UWORD8 *pu1_top_left = pu1_cu_top_left; 6563 WORD32 left_strd = cu_left_stride; 6564 6565 if(ps_ctxt->i1_cu_qp_delta_enable) 6566 { 6567 WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0; 6568 if(ps_cu_analyse->u1_cu_size == 64) 6569 { 6570 ASSERT( 6571 (luma_trans_size == 32) || (luma_trans_size == 16) || (luma_trans_size == 8) || 6572 (luma_trans_size == 4)); 6573 i4_act_counter = (luma_trans_size == 16) + 6574 2 * ((luma_trans_size == 8) || (luma_trans_size == 4)); 6575 i4_act_counter_lamda = 3; 6576 } 6577 else if(ps_cu_analyse->u1_cu_size == 32) 6578 { 6579 ASSERT( 6580 (luma_trans_size == 32) || (luma_trans_size == 16) || (luma_trans_size == 8) || 6581 (luma_trans_size == 4)); 6582 i4_act_counter = (luma_trans_size == 16) + 6583 2 * ((luma_trans_size == 8) || (luma_trans_size == 4)); 6584 i4_act_counter_lamda = 0; 6585 } 6586 else if(ps_cu_analyse->u1_cu_size == 16) 6587 { 6588 ASSERT((luma_trans_size == 16) || (luma_trans_size == 8) || (luma_trans_size == 4)); 6589 i4_act_counter = (luma_trans_size == 8) || (luma_trans_size == 4); 6590 i4_act_counter_lamda = 0; 6591 } 6592 else if(ps_cu_analyse->u1_cu_size == 8) 6593 { 6594 ASSERT((luma_trans_size == 8) || (luma_trans_size == 4)); 6595 i4_act_counter = 1; 6596 i4_act_counter_lamda = 0; 6597 } 6598 else 6599 { 6600 ASSERT(0); 6601 } 6602 /*assumption is that control comes here for intras*/ 6603 if(ps_ctxt->i4_use_ctb_level_lamda) 6604 { 6605 ihevce_compute_cu_level_QP( 6606 ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][1], -1, 0); 6607 } 6608 else 6609 { 6610 ihevce_compute_cu_level_QP( 6611 ps_ctxt, 6612 ps_cu_analyse->i4_act_factor[i4_act_counter][1], 6613 ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][1], 6614 0); 6615 } 6616 6617 ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp; 6618 } 6619 6620 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) && 6621 (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) && 6622 CONVERT_SSDS_TO_SPATIAL_DOMAIN; 6623 6624 if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT) 6625 { 6626 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) && 6627 CONVERT_SSDS_TO_SPATIAL_DOMAIN; 6628 } 6629 6630 /* get the 4x4 level postion of current cu */ 6631 cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1); 6632 cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1); 6633 6634 calc_recon = !u1_compute_spatial_ssd && ((4 == num_tus_in_cu) || (u1_is_422 == 1)); 6635 6636 if(calc_recon || u1_compute_spatial_ssd) 6637 { 6638 aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1; 6639 aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1; 6640 } 6641 else 6642 { 6643 aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0; 6644 aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0; 6645 } 6646 6647 /* loop based on num tus in a cu */ 6648 for(ctr = 0; ctr < num_tus_in_cu; ctr++) 6649 { 6650 WORD16 *pi2_cur_deq_data_cb; 6651 WORD16 *pi2_cur_deq_data_cr; 6652 6653 WORD32 deq_data_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride; 6654 WORD32 luma_nbr_flags = 0; 6655 6656 luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu( 6657 ps_ctxt->pu1_ctb_nbr_map, 6658 ps_ctxt->i4_nbr_map_strd, 6659 (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x, 6660 (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y, 6661 (luma_trans_size >> 2), 6662 (luma_trans_size >> 2)); 6663 6664 for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++) 6665 { 6666 WORD32 cbf, num_bytes; 6667 LWORD64 trans_ssd_u, trans_ssd_v; 6668 UWORD8 u1_is_recon_available; 6669 6670 WORD32 trans_size_m2 = trans_size << 1; 6671 UWORD8 *pu1_cur_src = pu1_chrm_src + ((ctr & 1) * trans_size_m2) + 6672 (((ctr > 1) * trans_size * chrm_src_stride) << u1_is_422) + 6673 (i4_subtu_idx * trans_size * chrm_src_stride); 6674 UWORD8 *pu1_cur_pred = pu1_pred + ((ctr & 1) * trans_size_m2) + 6675 (((ctr > 1) * trans_size * pred_strd) << u1_is_422) + 6676 (i4_subtu_idx * trans_size * pred_strd); 6677 WORD32 i4_recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride; 6678 UWORD8 *pu1_cur_recon = ((UWORD8 *)aps_recon_datastore[0] 6679 ->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)]) + 6680 ((ctr & 1) * trans_size_m2) + 6681 (((ctr > 1) * trans_size * i4_recon_stride) << u1_is_422) + 6682 (i4_subtu_idx * trans_size * i4_recon_stride); 6683 6684 /* Use Chroma coeff/iq buf of the cur. intra cand. Not rememb. 6685 chroma coeff/iq for high quality intra SATD special modes. Will 6686 be over written by coeff of luma mode in chroma_rdopt call */ 6687 UWORD8 *pu1_ecd_data_cb = 6688 &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0]; 6689 UWORD8 *pu1_ecd_data_cr = 6690 &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0]; 6691 6692 WORD32 chrm_pred_func_idx = 0; 6693 LWORD64 curr_cb_cod_cost = 0; 6694 LWORD64 curr_cr_cod_cost = 0; 6695 WORD32 nbr_flags = 0; 6696 6697 i4_subtu_pos_x = (((ctr & 1) * trans_size_m2) >> 2); 6698 i4_subtu_pos_y = (((ctr > 1) * trans_size) >> (!u1_is_422 + 1)) + 6699 ((i4_subtu_idx * trans_size) >> 2); 6700 pi2_cur_deq_data_cb = &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] + 6701 ((ctr & 1) * trans_size) + 6702 (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) + 6703 (i4_subtu_idx * trans_size * deq_data_strd); 6704 pi2_cur_deq_data_cr = &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] + 6705 ((ctr & 1) * trans_size) + 6706 (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) + 6707 (i4_subtu_idx * trans_size * deq_data_strd); 6708 6709 /* left cu boundary */ 6710 if(0 == i4_subtu_pos_x) 6711 { 6712 left_strd = cu_left_stride; 6713 pu1_left = pu1_cu_left + (i4_subtu_pos_y << 2) * left_strd; 6714 } 6715 else 6716 { 6717 pu1_left = pu1_cur_recon - 2; 6718 left_strd = i4_recon_stride; 6719 } 6720 6721 /* top cu boundary */ 6722 if(0 == i4_subtu_pos_y) 6723 { 6724 pu1_top = pu1_cu_top + (i4_subtu_pos_x << 2); 6725 } 6726 else 6727 { 6728 pu1_top = pu1_cur_recon - i4_recon_stride; 6729 } 6730 6731 /* by default top left is set to cu top left */ 6732 pu1_top_left = pu1_cu_top_left; 6733 6734 /* top left based on position */ 6735 if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x)) 6736 { 6737 pu1_top_left = pu1_left - left_strd; 6738 } 6739 else if(0 != i4_subtu_pos_x) 6740 { 6741 pu1_top_left = pu1_top - 2; 6742 } 6743 6744 /* populate the coeffs scan idx */ 6745 scan_idx = SCAN_DIAG_UPRIGHT; 6746 6747 /* RDOPT copy States : TU init (best until prev TU) to current */ 6748 COPY_CABAC_STATES( 6749 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx] 6750 .s_cabac_ctxt.au1_ctxt_models[0], 6751 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 6752 IHEVC_CAB_CTXT_END); 6753 6754 /* for 4x4 transforms based on intra pred mode scan is choosen*/ 6755 if(4 == trans_size) 6756 { 6757 /* for modes from 22 upto 30 horizontal scan is used */ 6758 if((best_chrm_mode > 21) && (best_chrm_mode < 31)) 6759 { 6760 scan_idx = SCAN_HORZ; 6761 } 6762 /* for modes from 6 upto 14 horizontal scan is used */ 6763 else if((best_chrm_mode > 5) && (best_chrm_mode < 15)) 6764 { 6765 scan_idx = SCAN_VERT; 6766 } 6767 } 6768 6769 nbr_flags = ihevce_get_intra_chroma_tu_nbr( 6770 luma_nbr_flags, i4_subtu_idx, trans_size, u1_is_422); 6771 6772 /* call the chroma reference array substitution */ 6773 ihevc_intra_pred_chroma_ref_substitution_fptr( 6774 pu1_top_left, 6775 pu1_top, 6776 pu1_left, 6777 left_strd, 6778 trans_size, 6779 nbr_flags, 6780 (UWORD8 *)ps_ctxt->pv_ref_sub_out, 6781 1); 6782 6783 /* use the look up to get the function idx */ 6784 chrm_pred_func_idx = g_i4_ip_funcs[best_chrm_mode]; 6785 6786 /* call the intra prediction function */ 6787 ps_ctxt->apf_chrm_ip[chrm_pred_func_idx]( 6788 (UWORD8 *)ps_ctxt->pv_ref_sub_out, 6789 1, 6790 pu1_cur_pred, 6791 pred_strd, 6792 trans_size, 6793 best_chrm_mode); 6794 6795 /* UPLANE RDOPT Loop */ 6796 { 6797 WORD32 tu_bits; 6798 6799 cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn( 6800 ps_ctxt, 6801 pu1_cur_pred, 6802 pred_strd, 6803 pu1_cur_src, 6804 chrm_src_stride, 6805 pi2_cur_deq_data_cb, 6806 deq_data_strd, 6807 pu1_cur_recon, 6808 i4_recon_stride, 6809 pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx], 6810 ps_ctxt->au1_cu_csbf, 6811 ps_ctxt->i4_cu_csbf_strd, 6812 trans_size, 6813 scan_idx, 6814 1, 6815 &num_bytes, 6816 &tu_bits, 6817 &ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr], 6818 &ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr], 6819 &u1_is_recon_available, 6820 i4_perform_sbh, 6821 i4_perform_rdoq, 6822 &trans_ssd_u, 6823 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 6824 i4_alpha_stim_multiplier, 6825 u1_is_cu_noisy, 6826 #endif 6827 0, 6828 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, 6829 U_PLANE); 6830 6831 #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL 6832 if(u1_is_cu_noisy && i4_alpha_stim_multiplier) 6833 { 6834 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT 6835 trans_ssd_u = ihevce_inject_stim_into_distortion( 6836 pu1_cur_src, 6837 chrm_src_stride, 6838 pu1_cur_pred, 6839 pred_strd, 6840 trans_ssd_u, 6841 i4_alpha_stim_multiplier, 6842 trans_size, 6843 0, 6844 ps_ctxt->u1_enable_psyRDOPT, 6845 U_PLANE); 6846 #else 6847 if(u1_compute_spatial_ssd && u1_is_recon_available) 6848 { 6849 trans_ssd_u = ihevce_inject_stim_into_distortion( 6850 pu1_cur_src, 6851 chrm_src_stride, 6852 pu1_cur_recon, 6853 i4_recon_stride, 6854 trans_ssd_u, 6855 i4_alpha_stim_multiplier, 6856 trans_size, 6857 0, 6858 ps_ctxt->u1_enable_psyRDOPT, 6859 U_PLANE); 6860 } 6861 else 6862 { 6863 trans_ssd_u = ihevce_inject_stim_into_distortion( 6864 pu1_cur_src, 6865 chrm_src_stride, 6866 pu1_cur_pred, 6867 pred_strd, 6868 trans_ssd_u, 6869 i4_alpha_stim_multiplier, 6870 trans_size, 6871 0, 6872 ps_ctxt->u1_enable_psyRDOPT, 6873 U_PLANE); 6874 } 6875 #endif 6876 } 6877 #endif 6878 6879 /* RDOPT copy States : New updated after curr TU to TU init */ 6880 if(0 != cbf) 6881 { 6882 memcpy( 6883 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 6884 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx] 6885 .s_cabac_ctxt.au1_ctxt_models[0], 6886 IHEVC_CAB_CTXT_END); 6887 } 6888 /* RDOPT copy States : Restoring back the Cb init state to Cr */ 6889 else 6890 { 6891 memcpy( 6892 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx] 6893 .s_cabac_ctxt.au1_ctxt_models[0], 6894 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 6895 IHEVC_CAB_CTXT_END); 6896 } 6897 6898 if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd)) 6899 { 6900 ihevce_chroma_it_recon_fxn( 6901 ps_ctxt, 6902 pi2_cur_deq_data_cb, 6903 deq_data_strd, 6904 pu1_cur_pred, 6905 pred_strd, 6906 pu1_cur_recon, 6907 i4_recon_stride, 6908 (pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx]), 6909 trans_size, 6910 cbf, 6911 ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr], 6912 ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr], 6913 U_PLANE); 6914 } 6915 6916 ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr] = cbf; 6917 curr_cb_cod_cost = 6918 trans_ssd_u + 6919 COMPUTE_RATE_COST_CLIP30( 6920 tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT); 6921 chrm_tu_bits += tu_bits; 6922 ai4_total_bytes_offset_cb[i4_subtu_idx] += num_bytes; 6923 ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr] = 6924 num_bytes; 6925 } 6926 6927 /* VPLANE RDOPT Loop */ 6928 { 6929 WORD32 tu_bits; 6930 6931 cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn( 6932 ps_ctxt, 6933 pu1_cur_pred, 6934 pred_strd, 6935 pu1_cur_src, 6936 chrm_src_stride, 6937 pi2_cur_deq_data_cr, 6938 deq_data_strd, 6939 pu1_cur_recon, 6940 i4_recon_stride, 6941 pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx], 6942 ps_ctxt->au1_cu_csbf, 6943 ps_ctxt->i4_cu_csbf_strd, 6944 trans_size, 6945 scan_idx, 6946 1, 6947 &num_bytes, 6948 &tu_bits, 6949 &ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr], 6950 &ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr], 6951 &u1_is_recon_available, 6952 i4_perform_sbh, 6953 i4_perform_rdoq, 6954 &trans_ssd_v, 6955 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 6956 i4_alpha_stim_multiplier, 6957 u1_is_cu_noisy, 6958 #endif 6959 0, 6960 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, 6961 V_PLANE); 6962 6963 #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL 6964 if(u1_is_cu_noisy && i4_alpha_stim_multiplier) 6965 { 6966 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT 6967 trans_ssd_v = ihevce_inject_stim_into_distortion( 6968 pu1_cur_src, 6969 chrm_src_stride, 6970 pu1_cur_pred, 6971 pred_strd, 6972 trans_ssd_v, 6973 i4_alpha_stim_multiplier, 6974 trans_size, 6975 0, 6976 ps_ctxt->u1_enable_psyRDOPT, 6977 V_PLANE); 6978 #else 6979 if(u1_compute_spatial_ssd && u1_is_recon_available) 6980 { 6981 trans_ssd_v = ihevce_inject_stim_into_distortion( 6982 pu1_cur_src, 6983 chrm_src_stride, 6984 pu1_cur_recon, 6985 i4_recon_stride, 6986 trans_ssd_v, 6987 i4_alpha_stim_multiplier, 6988 trans_size, 6989 0, 6990 ps_ctxt->u1_enable_psyRDOPT, 6991 V_PLANE); 6992 } 6993 else 6994 { 6995 trans_ssd_v = ihevce_inject_stim_into_distortion( 6996 pu1_cur_src, 6997 chrm_src_stride, 6998 pu1_cur_pred, 6999 pred_strd, 7000 trans_ssd_v, 7001 i4_alpha_stim_multiplier, 7002 trans_size, 7003 0, 7004 ps_ctxt->u1_enable_psyRDOPT, 7005 V_PLANE); 7006 } 7007 #endif 7008 } 7009 #endif 7010 7011 /* RDOPT copy States : New updated after curr TU to TU init */ 7012 if(0 != cbf) 7013 { 7014 COPY_CABAC_STATES( 7015 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 7016 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx] 7017 .s_cabac_ctxt.au1_ctxt_models[0], 7018 IHEVC_CAB_CTXT_END); 7019 } 7020 /* RDOPT copy States : Restoring back the Cb init state to Cr */ 7021 else 7022 { 7023 COPY_CABAC_STATES( 7024 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx] 7025 .s_cabac_ctxt.au1_ctxt_models[0], 7026 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 7027 IHEVC_CAB_CTXT_END); 7028 } 7029 7030 if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd)) 7031 { 7032 ihevce_chroma_it_recon_fxn( 7033 ps_ctxt, 7034 pi2_cur_deq_data_cr, 7035 deq_data_strd, 7036 pu1_cur_pred, 7037 pred_strd, 7038 pu1_cur_recon, 7039 i4_recon_stride, 7040 (pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx]), 7041 trans_size, 7042 cbf, 7043 ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr], 7044 ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr], 7045 V_PLANE); 7046 } 7047 7048 ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr] = cbf; 7049 curr_cr_cod_cost = 7050 trans_ssd_v + 7051 COMPUTE_RATE_COST_CLIP30( 7052 tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT); 7053 chrm_tu_bits += tu_bits; 7054 ai4_total_bytes_offset_cr[i4_subtu_idx] += num_bytes; 7055 ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr] = 7056 num_bytes; 7057 } 7058 7059 chrm_cod_cost += curr_cb_cod_cost; 7060 chrm_cod_cost += curr_cr_cod_cost; 7061 } 7062 7063 /* set the neighbour map to 1 */ 7064 ihevce_set_nbr_map( 7065 ps_ctxt->pu1_ctb_nbr_map, 7066 ps_ctxt->i4_nbr_map_strd, 7067 (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x, 7068 (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y, 7069 (luma_trans_size >> 2), 7070 1); 7071 } 7072 7073 /* set the neighbour map to 0 */ 7074 ihevce_set_nbr_map( 7075 ps_ctxt->pu1_ctb_nbr_map, 7076 ps_ctxt->i4_nbr_map_strd, 7077 (ps_cu_analyse->b3_cu_pos_x << 1), 7078 (ps_cu_analyse->b3_cu_pos_y << 1), 7079 (ps_cu_analyse->u1_cu_size >> 2), 7080 0); 7081 7082 /* Account for coding b3_chroma_intra_pred_mode prefix and suffix bins */ 7083 /* This is done by adding the bits for signalling chroma mode (0-3) */ 7084 /* and subtracting the bits for chroma mode same as luma mode (4) */ 7085 #if CHROMA_RDOPT_ENABLE 7086 { 7087 /* Estimate bits to encode prefix bin as 1 for b3_chroma_intra_pred_mode */ 7088 WORD32 bits_frac_1 = 7089 gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 1]; 7090 7091 WORD32 bits_for_mode_0to3 = (2 << CABAC_FRAC_BITS_Q) + bits_frac_1; 7092 7093 /* Estimate bits to encode prefix bin as 0 for b3_chroma_intra_pred_mode */ 7094 WORD32 bits_for_mode4 = 7095 gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 0]; 7096 7097 /* accumulate into final rd cost for chroma */ 7098 ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode = COMPUTE_RATE_COST_CLIP30( 7099 (bits_for_mode_0to3 - bits_for_mode4), 7100 ps_ctxt->i8_cl_ssd_lambda_chroma_qf, 7101 (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q)); 7102 7103 chrm_cod_cost += ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode; 7104 } 7105 #endif 7106 7107 if(ps_ctxt->u1_enable_psyRDOPT) 7108 { 7109 UWORD8 *pu1_recon_cu; 7110 WORD32 recon_stride; 7111 WORD32 curr_pos_x; 7112 WORD32 curr_pos_y; 7113 WORD32 start_index; 7114 WORD32 num_horz_cu_in_ctb; 7115 WORD32 had_block_size; 7116 7117 /* tODO: sreenivasa ctb size has to be used appropriately */ 7118 had_block_size = 8; 7119 num_horz_cu_in_ctb = 2 * 64 / had_block_size; 7120 curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */ 7121 curr_pos_y = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */ 7122 recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride; 7123 pu1_recon_cu = 7124 aps_recon_datastore[0]->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)]; // 7125 7126 /* start index to index the source satd of curr cu int he current ctb*/ 7127 start_index = 2 * (curr_pos_x / had_block_size) + 7128 (curr_pos_y / had_block_size) * num_horz_cu_in_ctb; 7129 7130 { 7131 chrm_cod_cost += ihevce_psy_rd_cost_croma( 7132 ps_ctxt->ai4_source_chroma_satd, 7133 pu1_recon_cu, 7134 recon_stride, 7135 1, // 7136 cu_size, 7137 0, // pic type 7138 0, //layer id 7139 ps_ctxt->i4_satd_lamda, // lambda 7140 start_index, 7141 ps_ctxt->u1_is_input_data_hbd, // 8 bit 7142 ps_ctxt->u1_chroma_array_type, 7143 &ps_ctxt->s_cmn_opt_func 7144 7145 ); // chroma subsampling 420 7146 } 7147 } 7148 7149 ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt = chrm_cod_cost; 7150 ps_chr_intra_satd_ctxt->i4_chrm_tu_bits = chrm_tu_bits; 7151 7152 memcpy( 7153 &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0], 7154 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 7155 IHEVC_CAB_CTXT_END); 7156 } 7157 } 7158 7159 /*! 7160 ****************************************************************************** 7161 * \if Function name : ihevce_chroma_cu_prcs_rdopt \endif 7162 * 7163 * \brief 7164 * Coding unit processing function for chroma 7165 * 7166 * \param[in] ps_ctxt enc_loop module ctxt pointer 7167 * \param[in] rd_opt_curr_idx index in the array of RDopt params 7168 * \param[in] func_proc_mode TU_EQ_CU or other case 7169 * \param[in] pu1_chrm_src pointer to source data buffer 7170 * \param[in] chrm_src_stride source buffer stride 7171 * \param[in] pu1_cu_left pointer to left recon data buffer 7172 * \param[in] pu1_cu_top pointer to top recon data buffer 7173 * \param[in] pu1_cu_top_left pointer to top left recon data buffer 7174 * \param[in] left_stride left recon buffer stride 7175 * \param[out] cu_pos_x position x of current CU in CTB 7176 * \param[out] cu_pos_y position y of current CU in CTB 7177 * \param[out] pi4_chrm_tu_bits pointer to store the totla chroma bits 7178 * 7179 * \return 7180 * Chroma coding cost (cb adn Cr included) 7181 * 7182 * \author 7183 * Ittiam 7184 * 7185 ***************************************************************************** 7186 */ 7187 LWORD64 ihevce_chroma_cu_prcs_rdopt( 7188 ihevce_enc_loop_ctxt_t *ps_ctxt, 7189 WORD32 rd_opt_curr_idx, 7190 WORD32 func_proc_mode, 7191 UWORD8 *pu1_chrm_src, 7192 WORD32 chrm_src_stride, 7193 UWORD8 *pu1_cu_left, 7194 UWORD8 *pu1_cu_top, 7195 UWORD8 *pu1_cu_top_left, 7196 WORD32 cu_left_stride, 7197 WORD32 cu_pos_x, 7198 WORD32 cu_pos_y, 7199 WORD32 *pi4_chrm_tu_bits, 7200 WORD32 i4_alpha_stim_multiplier, 7201 UWORD8 u1_is_cu_noisy) 7202 { 7203 tu_enc_loop_out_t *ps_tu; 7204 tu_enc_loop_temp_prms_t *ps_tu_temp_prms; 7205 7206 ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr; 7207 7208 UWORD8 *pu1_pred; 7209 UWORD8 *pu1_recon; 7210 WORD32 i4_recon_stride; 7211 WORD32 cu_size, trans_size = 0; 7212 WORD32 pred_strd; 7213 WORD32 ctr, i4_subtu_idx; 7214 WORD32 scan_idx; 7215 WORD32 u1_is_cu_coded_old; 7216 WORD32 init_bytes_offset; 7217 7218 enc_loop_cu_final_prms_t *ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_curr_idx]; 7219 recon_datastore_t *ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore; 7220 7221 WORD32 total_bytes_offset = 0; 7222 LWORD64 chrm_cod_cost = 0; 7223 WORD32 chrm_tu_bits = 0; 7224 WORD32 chrm_pred_mode = DM_CHROMA_IDX, luma_pred_mode = 35; 7225 LWORD64 i8_ssd_cb = 0; 7226 WORD32 i4_bits_cb = 0; 7227 LWORD64 i8_ssd_cr = 0; 7228 WORD32 i4_bits_cr = 0; 7229 UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2); 7230 UWORD8 u1_num_tus = 7231 /* NumChromaTU's = 1, if TUSize = 4 and CUSize = 8 */ 7232 (!ps_best_cu_prms->as_tu_enc_loop[0].s_tu.b3_size && ps_best_cu_prms->u1_intra_flag) 7233 ? 1 7234 : ps_best_cu_prms->u2_num_tus_in_cu; 7235 UWORD8 u1_num_subtus_in_tu = u1_is_422 + 1; 7236 UWORD8 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) && 7237 (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) && 7238 CONVERT_SSDS_TO_SPATIAL_DOMAIN; 7239 /* Get the RDOPT cost of the best CU mode for early_exit */ 7240 LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!rd_opt_curr_idx].i8_best_rdopt_cost; 7241 /* Get the current running RDOPT (Luma RDOPT) for early_exit */ 7242 LWORD64 curr_rdopt_cost = ps_ctxt->as_cu_prms[rd_opt_curr_idx].i8_curr_rdopt_cost; 7243 WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq; 7244 WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh; 7245 7246 ihevc_intra_pred_chroma_ref_substitution_fptr = 7247 ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr; 7248 7249 if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT) 7250 { 7251 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) && 7252 CONVERT_SSDS_TO_SPATIAL_DOMAIN; 7253 } 7254 7255 /* Store the init bytes offset from luma */ 7256 init_bytes_offset = ps_best_cu_prms->i4_num_bytes_ecd_data; 7257 7258 /* Unused pred buffer in merge_skip_pred_data_t structure is used as 7259 Chroma pred storage buf. for final_recon function. 7260 The buffer is split into two and used as a ping-pong buffer */ 7261 pu1_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] + 7262 rd_opt_curr_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) + 7263 (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1))); 7264 7265 pred_strd = ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX]; 7266 7267 pu1_recon = (UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs[0]; 7268 i4_recon_stride = ps_recon_datastore->i4_chromaRecon_stride; 7269 cu_size = ps_best_cu_prms->u1_cu_size; 7270 chrm_tu_bits = 0; 7271 7272 /* get the first TU pointer */ 7273 ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0]; 7274 /* get the first TU enc_loop temp prms pointer */ 7275 ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0]; 7276 7277 if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag) 7278 { 7279 /* Mode signalled by intra prediction for luma */ 7280 luma_pred_mode = ps_best_cu_prms->au1_intra_pred_mode[0]; 7281 7282 #if DISABLE_RDOQ_INTRA 7283 i4_perform_rdoq = 0; 7284 #endif 7285 } 7286 7287 else 7288 { 7289 UWORD8 *pu1_pred_org = pu1_pred; 7290 7291 /* ------ Motion Compensation for Chroma -------- */ 7292 for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++) 7293 { 7294 pu_t *ps_pu; 7295 WORD32 inter_pu_wd; 7296 WORD32 inter_pu_ht; 7297 7298 ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr]; 7299 7300 inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */ 7301 inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1; 7302 inter_pu_ht <<= u1_is_422; 7303 7304 ihevce_chroma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_strd); 7305 7306 if(2 == ps_best_cu_prms->u2_num_pus_in_cu) 7307 { 7308 /* 2Nx__ partion case */ 7309 if(inter_pu_wd == cu_size) 7310 { 7311 pu1_pred += (inter_pu_ht * pred_strd); 7312 } 7313 7314 /* __x2N partion case */ 7315 if(inter_pu_ht == (cu_size >> (u1_is_422 == 0))) 7316 { 7317 pu1_pred += inter_pu_wd; 7318 } 7319 } 7320 } 7321 7322 /* restore the pred pointer to start for transform loop */ 7323 pu1_pred = pu1_pred_org; 7324 } 7325 7326 /* Used to store back only the luma based info. if SATD based chorma 7327 mode also comes */ 7328 u1_is_cu_coded_old = ps_best_cu_prms->u1_is_cu_coded; 7329 7330 /* evaluate chroma candidates (same as luma) and 7331 if INTRA & HIGH_QUALITY compare with best SATD mode */ 7332 { 7333 WORD32 calc_recon = 0, deq_data_strd; 7334 WORD16 *pi2_deq_data; 7335 UWORD8 *pu1_ecd_data; 7336 UWORD8 u1_is_mode_eq_chroma_satd_mode = 0; 7337 7338 pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0]; 7339 pi2_deq_data += ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx; 7340 deq_data_strd = cu_size; 7341 /* update ecd buffer for storing coeff. */ 7342 pu1_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0]; 7343 pu1_ecd_data += init_bytes_offset; 7344 /* store chroma starting index */ 7345 ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx = init_bytes_offset; 7346 7347 /* get the first TU pointer */ 7348 ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0]; 7349 ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0]; 7350 7351 /* Reset total_bytes_offset for each candidate */ 7352 chrm_pred_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[luma_pred_mode] 7353 : luma_pred_mode; 7354 7355 total_bytes_offset = 0; 7356 7357 if(TU_EQ_SUBCU == func_proc_mode) 7358 { 7359 func_proc_mode = TU_EQ_CU_DIV2; 7360 } 7361 7362 /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and 7363 TU_EQ_CU_DIV2 and TU_EQ_SUBCU case */ 7364 if(8 == cu_size) 7365 { 7366 func_proc_mode = TU_EQ_CU; 7367 } 7368 7369 /* loop based on num tus in a cu */ 7370 if(!ps_best_cu_prms->u1_intra_flag || !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd || 7371 (ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd && 7372 (chrm_pred_mode != 7373 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode].u1_best_cr_mode))) 7374 { 7375 /* loop based on num tus in a cu */ 7376 for(ctr = 0; ctr < u1_num_tus; ctr++) 7377 { 7378 WORD32 num_bytes = 0; 7379 LWORD64 curr_cb_cod_cost = 0; 7380 LWORD64 curr_cr_cod_cost = 0; 7381 WORD32 chrm_pred_func_idx = 0; 7382 UWORD8 u1_is_early_exit_condition_satisfied = 0; 7383 7384 /* Default cb and cr offset initializatio for b3_chroma_intra_mode_idx=7 */ 7385 /* FIX for TU tree shrinkage caused by ecd data copies in final mode recon */ 7386 ps_tu->s_tu.b1_cb_cbf = ps_tu->s_tu.b1_cr_cbf = 0; 7387 ps_tu->s_tu.b1_cb_cbf_subtu1 = ps_tu->s_tu.b1_cr_cbf_subtu1 = 0; 7388 ps_tu->ai4_cb_coeff_offset[0] = total_bytes_offset + init_bytes_offset; 7389 ps_tu->ai4_cr_coeff_offset[0] = total_bytes_offset + init_bytes_offset; 7390 ps_tu->ai4_cb_coeff_offset[1] = total_bytes_offset + init_bytes_offset; 7391 ps_tu->ai4_cr_coeff_offset[1] = total_bytes_offset + init_bytes_offset; 7392 ps_tu_temp_prms->ai2_cb_bytes_consumed[0] = 0; 7393 ps_tu_temp_prms->ai2_cr_bytes_consumed[0] = 0; 7394 ps_tu_temp_prms->ai2_cb_bytes_consumed[1] = 0; 7395 ps_tu_temp_prms->ai2_cr_bytes_consumed[1] = 0; 7396 7397 /* TU level inits */ 7398 /* check if chroma present flag is set */ 7399 if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx) 7400 { 7401 /* RDOPT copy States : TU init (best until prev TU) to current */ 7402 COPY_CABAC_STATES( 7403 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx] 7404 .s_cabac_ctxt.au1_ctxt_models[0], 7405 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 7406 IHEVC_CAB_CTXT_END); 7407 7408 /* get the current transform size */ 7409 trans_size = ps_tu->s_tu.b3_size; 7410 trans_size = (1 << (trans_size + 1)); /* in chroma units */ 7411 7412 /* since 2x2 transform is not allowed for chroma*/ 7413 if(2 == trans_size) 7414 { 7415 trans_size = 4; 7416 } 7417 } 7418 7419 for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++) 7420 { 7421 WORD32 cbf; 7422 UWORD8 u1_is_recon_available; 7423 7424 WORD32 nbr_flags = 0; 7425 WORD32 zero_cols = 0; 7426 WORD32 zero_rows = 0; 7427 7428 /* check if chroma present flag is set */ 7429 if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx) 7430 { 7431 UWORD8 *pu1_cur_pred; 7432 UWORD8 *pu1_cur_recon; 7433 UWORD8 *pu1_cur_src; 7434 WORD16 *pi2_cur_deq_data; 7435 WORD32 curr_pos_x, curr_pos_y; 7436 LWORD64 trans_ssd_u, trans_ssd_v; 7437 7438 /* get the current sub-tu posx and posy w.r.t to cu */ 7439 curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3); 7440 curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) + 7441 (i4_subtu_idx * trans_size); 7442 7443 /* 420sp case only vertical height will be half */ 7444 if(u1_is_422 == 0) 7445 { 7446 curr_pos_y >>= 1; 7447 } 7448 7449 /* increment the pointers to start of current Sub-TU */ 7450 pu1_cur_recon = (pu1_recon + curr_pos_x); 7451 pu1_cur_recon += (curr_pos_y * i4_recon_stride); 7452 pu1_cur_src = (pu1_chrm_src + curr_pos_x); 7453 pu1_cur_src += (curr_pos_y * chrm_src_stride); 7454 pu1_cur_pred = (pu1_pred + curr_pos_x); 7455 pu1_cur_pred += (curr_pos_y * pred_strd); 7456 pi2_cur_deq_data = pi2_deq_data + curr_pos_x; 7457 pi2_cur_deq_data += (curr_pos_y * deq_data_strd); 7458 7459 /* populate the coeffs scan idx */ 7460 scan_idx = SCAN_DIAG_UPRIGHT; 7461 7462 /* perform intra prediction only for Intra case */ 7463 if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag) 7464 { 7465 UWORD8 *pu1_top_left; 7466 UWORD8 *pu1_top; 7467 UWORD8 *pu1_left; 7468 WORD32 left_strd; 7469 7470 calc_recon = !u1_compute_spatial_ssd && 7471 ((4 == u1_num_tus) || (u1_is_422 == 1)) && 7472 (((u1_num_tus == 1) && (0 == i4_subtu_idx)) || 7473 ((ctr == 3) && (0 == i4_subtu_idx) && (u1_is_422 == 1)) || 7474 ((u1_num_tus == 4) && (ctr < 3))); 7475 7476 /* left cu boundary */ 7477 if(0 == curr_pos_x) 7478 { 7479 pu1_left = pu1_cu_left + curr_pos_y * cu_left_stride; 7480 left_strd = cu_left_stride; 7481 } 7482 else 7483 { 7484 pu1_left = pu1_cur_recon - 2; 7485 left_strd = i4_recon_stride; 7486 } 7487 7488 /* top cu boundary */ 7489 if(0 == curr_pos_y) 7490 { 7491 pu1_top = pu1_cu_top + curr_pos_x; 7492 } 7493 else 7494 { 7495 pu1_top = pu1_cur_recon - i4_recon_stride; 7496 } 7497 7498 /* by default top left is set to cu top left */ 7499 pu1_top_left = pu1_cu_top_left; 7500 7501 /* top left based on position */ 7502 if((0 != curr_pos_y) && (0 == curr_pos_x)) 7503 { 7504 pu1_top_left = pu1_left - cu_left_stride; 7505 } 7506 else if(0 != curr_pos_x) 7507 { 7508 pu1_top_left = pu1_top - 2; 7509 } 7510 7511 /* for 4x4 transforms based on intra pred mode scan is choosen*/ 7512 if(4 == trans_size) 7513 { 7514 /* for modes from 22 upto 30 horizontal scan is used */ 7515 if((chrm_pred_mode > 21) && (chrm_pred_mode < 31)) 7516 { 7517 scan_idx = SCAN_HORZ; 7518 } 7519 /* for modes from 6 upto 14 horizontal scan is used */ 7520 else if((chrm_pred_mode > 5) && (chrm_pred_mode < 15)) 7521 { 7522 scan_idx = SCAN_VERT; 7523 } 7524 } 7525 7526 nbr_flags = ihevce_get_intra_chroma_tu_nbr( 7527 ps_best_cu_prms->au4_nbr_flags[ctr], 7528 i4_subtu_idx, 7529 trans_size, 7530 u1_is_422); 7531 7532 /* call the chroma reference array substitution */ 7533 ihevc_intra_pred_chroma_ref_substitution_fptr( 7534 pu1_top_left, 7535 pu1_top, 7536 pu1_left, 7537 left_strd, 7538 trans_size, 7539 nbr_flags, 7540 (UWORD8 *)ps_ctxt->pv_ref_sub_out, 7541 1); 7542 7543 /* use the look up to get the function idx */ 7544 chrm_pred_func_idx = g_i4_ip_funcs[chrm_pred_mode]; 7545 7546 /* call the intra prediction function */ 7547 ps_ctxt->apf_chrm_ip[chrm_pred_func_idx]( 7548 (UWORD8 *)ps_ctxt->pv_ref_sub_out, 7549 1, 7550 pu1_cur_pred, 7551 pred_strd, 7552 trans_size, 7553 chrm_pred_mode); 7554 } 7555 7556 if(!ctr && !i4_subtu_idx && (u1_compute_spatial_ssd || calc_recon)) 7557 { 7558 ps_recon_datastore->au1_is_chromaRecon_available[0] = 7559 !ps_best_cu_prms->u1_skip_flag; 7560 } 7561 else if(!ctr && !i4_subtu_idx) 7562 { 7563 ps_recon_datastore->au1_is_chromaRecon_available[0] = 0; 7564 } 7565 /************************************************************/ 7566 /* recon loop is done for all cases including skip cu */ 7567 /* This is because skipping chroma reisdual based on luma */ 7568 /* skip decision can lead to chroma artifacts */ 7569 /************************************************************/ 7570 /************************************************************/ 7571 /*In the high quality and medium speed modes, wherein chroma*/ 7572 /*and luma costs are included in the total cost calculation */ 7573 /*the cost is just a ssd cost, and not that obtained through*/ 7574 /*iq_it path */ 7575 /************************************************************/ 7576 if(ps_best_cu_prms->u1_skip_flag == 0) 7577 { 7578 WORD32 tu_bits; 7579 7580 cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn( 7581 ps_ctxt, 7582 pu1_cur_pred, 7583 pred_strd, 7584 pu1_cur_src, 7585 chrm_src_stride, 7586 pi2_cur_deq_data, 7587 deq_data_strd, 7588 pu1_cur_recon, 7589 i4_recon_stride, 7590 pu1_ecd_data + total_bytes_offset, 7591 ps_ctxt->au1_cu_csbf, 7592 ps_ctxt->i4_cu_csbf_strd, 7593 trans_size, 7594 scan_idx, 7595 PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag, 7596 &num_bytes, 7597 &tu_bits, 7598 &zero_cols, 7599 &zero_rows, 7600 &u1_is_recon_available, 7601 i4_perform_sbh, 7602 i4_perform_rdoq, 7603 &trans_ssd_u, 7604 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 7605 i4_alpha_stim_multiplier, 7606 u1_is_cu_noisy, 7607 #endif 7608 ps_best_cu_prms->u1_skip_flag, 7609 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, 7610 U_PLANE); 7611 7612 if(u1_compute_spatial_ssd && u1_is_recon_available) 7613 { 7614 ps_recon_datastore 7615 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] 7616 [i4_subtu_idx] = 0; 7617 } 7618 else 7619 { 7620 ps_recon_datastore 7621 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] 7622 [i4_subtu_idx] = UCHAR_MAX; 7623 } 7624 7625 #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 7626 if(u1_is_cu_noisy && i4_alpha_stim_multiplier) 7627 { 7628 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT 7629 trans_ssd_u = ihevce_inject_stim_into_distortion( 7630 pu1_cur_src, 7631 chrm_src_stride, 7632 pu1_cur_pred, 7633 pred_strd, 7634 trans_ssd_u, 7635 i4_alpha_stim_multiplier, 7636 trans_size, 7637 0, 7638 ps_ctxt->u1_enable_psyRDOPT, 7639 U_PLANE); 7640 #else 7641 if(u1_compute_spatial_ssd && u1_is_recon_available) 7642 { 7643 trans_ssd_u = ihevce_inject_stim_into_distortion( 7644 pu1_cur_src, 7645 chrm_src_stride, 7646 pu1_cur_recon, 7647 i4_recon_stride, 7648 trans_ssd_u, 7649 i4_alpha_stim_multiplier, 7650 trans_size, 7651 0, 7652 ps_ctxt->u1_enable_psyRDOPT, 7653 U_PLANE); 7654 } 7655 else 7656 { 7657 trans_ssd_u = ihevce_inject_stim_into_distortion( 7658 pu1_cur_src, 7659 chrm_src_stride, 7660 pu1_cur_pred, 7661 pred_strd, 7662 trans_ssd_u, 7663 i4_alpha_stim_multiplier, 7664 trans_size, 7665 0, 7666 ps_ctxt->u1_enable_psyRDOPT, 7667 U_PLANE); 7668 } 7669 #endif 7670 } 7671 #endif 7672 7673 curr_cb_cod_cost = 7674 trans_ssd_u + 7675 COMPUTE_RATE_COST_CLIP30( 7676 tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT); 7677 7678 chrm_tu_bits += tu_bits; 7679 i4_bits_cb += tu_bits; 7680 7681 /* RDOPT copy States : New updated after curr TU to TU init */ 7682 if(0 != cbf) 7683 { 7684 COPY_CABAC_STATES( 7685 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 7686 &ps_ctxt->s_rdopt_entropy_ctxt 7687 .as_cu_entropy_ctxt[rd_opt_curr_idx] 7688 .s_cabac_ctxt.au1_ctxt_models[0], 7689 IHEVC_CAB_CTXT_END); 7690 } 7691 /* RDOPT copy States : Restoring back the Cb init state to Cr */ 7692 else 7693 { 7694 COPY_CABAC_STATES( 7695 &ps_ctxt->s_rdopt_entropy_ctxt 7696 .as_cu_entropy_ctxt[rd_opt_curr_idx] 7697 .s_cabac_ctxt.au1_ctxt_models[0], 7698 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 7699 IHEVC_CAB_CTXT_END); 7700 } 7701 7702 /* If Intra and TU=CU/2, need recon for next TUs */ 7703 if(calc_recon) 7704 { 7705 ihevce_chroma_it_recon_fxn( 7706 ps_ctxt, 7707 pi2_cur_deq_data, 7708 deq_data_strd, 7709 pu1_cur_pred, 7710 pred_strd, 7711 pu1_cur_recon, 7712 i4_recon_stride, 7713 (pu1_ecd_data + total_bytes_offset), 7714 trans_size, 7715 cbf, 7716 zero_cols, 7717 zero_rows, 7718 U_PLANE); 7719 7720 ps_recon_datastore 7721 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] 7722 [i4_subtu_idx] = 0; 7723 } 7724 else 7725 { 7726 ps_recon_datastore 7727 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] 7728 [i4_subtu_idx] = UCHAR_MAX; 7729 } 7730 } 7731 else 7732 { 7733 /* num bytes is set to 0 */ 7734 num_bytes = 0; 7735 7736 /* cbf is returned as 0 */ 7737 cbf = 0; 7738 7739 curr_cb_cod_cost = trans_ssd_u = 7740 7741 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator( 7742 pu1_cur_pred, 7743 pu1_cur_src, 7744 pred_strd, 7745 chrm_src_stride, 7746 trans_size, 7747 trans_size); 7748 7749 if(u1_compute_spatial_ssd) 7750 { 7751 /* buffer copy fromp pred to recon */ 7752 7753 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( 7754 pu1_cur_pred, 7755 pred_strd, 7756 pu1_cur_recon, 7757 i4_recon_stride, 7758 trans_size, 7759 trans_size, 7760 U_PLANE); 7761 7762 ps_recon_datastore 7763 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] 7764 [i4_subtu_idx] = 0; 7765 } 7766 7767 if(u1_is_cu_noisy && i4_alpha_stim_multiplier) 7768 { 7769 trans_ssd_u = ihevce_inject_stim_into_distortion( 7770 pu1_cur_src, 7771 chrm_src_stride, 7772 pu1_cur_pred, 7773 pred_strd, 7774 trans_ssd_u, 7775 i4_alpha_stim_multiplier, 7776 trans_size, 7777 0, 7778 ps_ctxt->u1_enable_psyRDOPT, 7779 U_PLANE); 7780 } 7781 7782 #if ENABLE_INTER_ZCU_COST 7783 #if !WEIGH_CHROMA_COST 7784 /* cbf = 0, accumulate cu not coded cost */ 7785 ps_ctxt->i8_cu_not_coded_cost += curr_cb_cod_cost; 7786 #else 7787 /* cbf = 0, accumulate cu not coded cost */ 7788 7789 ps_ctxt->i8_cu_not_coded_cost += (LWORD64)( 7790 (curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor + 7791 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> 7792 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); 7793 #endif 7794 #endif 7795 } 7796 7797 #if !WEIGH_CHROMA_COST 7798 curr_rdopt_cost += curr_cb_cod_cost; 7799 #else 7800 curr_rdopt_cost += 7801 ((curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor + 7802 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> 7803 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); 7804 #endif 7805 chrm_cod_cost += curr_cb_cod_cost; 7806 i8_ssd_cb += trans_ssd_u; 7807 7808 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) 7809 { 7810 /* Early exit : If the current running cost exceeds 7811 the prev. best mode cost, break */ 7812 if(curr_rdopt_cost > prev_best_rdopt_cost) 7813 { 7814 u1_is_early_exit_condition_satisfied = 1; 7815 break; 7816 } 7817 } 7818 7819 /* inter cu is coded if any of the tu is coded in it */ 7820 ps_best_cu_prms->u1_is_cu_coded |= cbf; 7821 7822 /* update CB related params */ 7823 ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] = 7824 total_bytes_offset + init_bytes_offset; 7825 7826 if(0 == i4_subtu_idx) 7827 { 7828 ps_tu->s_tu.b1_cb_cbf = cbf; 7829 } 7830 else 7831 { 7832 ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf; 7833 } 7834 7835 total_bytes_offset += num_bytes; 7836 7837 ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] = zero_cols; 7838 ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] = zero_rows; 7839 ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes; 7840 7841 /* recon loop is done for non skip cases */ 7842 if(ps_best_cu_prms->u1_skip_flag == 0) 7843 { 7844 WORD32 tu_bits; 7845 7846 cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn( 7847 ps_ctxt, 7848 pu1_cur_pred, 7849 pred_strd, 7850 pu1_cur_src, 7851 chrm_src_stride, 7852 pi2_cur_deq_data + trans_size, 7853 deq_data_strd, 7854 pu1_cur_recon, 7855 i4_recon_stride, 7856 pu1_ecd_data + total_bytes_offset, 7857 ps_ctxt->au1_cu_csbf, 7858 ps_ctxt->i4_cu_csbf_strd, 7859 trans_size, 7860 scan_idx, 7861 PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag, 7862 &num_bytes, 7863 &tu_bits, 7864 &zero_cols, 7865 &zero_rows, 7866 &u1_is_recon_available, 7867 i4_perform_sbh, 7868 i4_perform_rdoq, 7869 &trans_ssd_v, 7870 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 7871 i4_alpha_stim_multiplier, 7872 u1_is_cu_noisy, 7873 #endif 7874 ps_best_cu_prms->u1_skip_flag, 7875 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, 7876 V_PLANE); 7877 7878 if(u1_compute_spatial_ssd && u1_is_recon_available) 7879 { 7880 ps_recon_datastore 7881 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] 7882 [i4_subtu_idx] = 0; 7883 } 7884 else 7885 { 7886 ps_recon_datastore 7887 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] 7888 [i4_subtu_idx] = UCHAR_MAX; 7889 } 7890 7891 #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 7892 if(u1_is_cu_noisy && i4_alpha_stim_multiplier) 7893 { 7894 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT 7895 trans_ssd_v = ihevce_inject_stim_into_distortion( 7896 pu1_cur_src, 7897 chrm_src_stride, 7898 pu1_cur_pred, 7899 pred_strd, 7900 trans_ssd_v, 7901 i4_alpha_stim_multiplier, 7902 trans_size, 7903 0, 7904 ps_ctxt->u1_enable_psyRDOPT, 7905 V_PLANE); 7906 #else 7907 if(u1_compute_spatial_ssd && u1_is_recon_available) 7908 { 7909 trans_ssd_v = ihevce_inject_stim_into_distortion( 7910 pu1_cur_src, 7911 chrm_src_stride, 7912 pu1_cur_recon, 7913 i4_recon_stride, 7914 trans_ssd_v, 7915 i4_alpha_stim_multiplier, 7916 trans_size, 7917 0, 7918 ps_ctxt->u1_enable_psyRDOPT, 7919 V_PLANE); 7920 } 7921 else 7922 { 7923 trans_ssd_v = ihevce_inject_stim_into_distortion( 7924 pu1_cur_src, 7925 chrm_src_stride, 7926 pu1_cur_pred, 7927 pred_strd, 7928 trans_ssd_v, 7929 i4_alpha_stim_multiplier, 7930 trans_size, 7931 0, 7932 ps_ctxt->u1_enable_psyRDOPT, 7933 V_PLANE); 7934 } 7935 #endif 7936 } 7937 #endif 7938 7939 curr_cr_cod_cost = 7940 trans_ssd_v + 7941 COMPUTE_RATE_COST_CLIP30( 7942 tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT); 7943 chrm_tu_bits += tu_bits; 7944 i4_bits_cr += tu_bits; 7945 7946 /* RDOPT copy States : New updated after curr TU to TU init */ 7947 if(0 != cbf) 7948 { 7949 COPY_CABAC_STATES( 7950 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 7951 &ps_ctxt->s_rdopt_entropy_ctxt 7952 .as_cu_entropy_ctxt[rd_opt_curr_idx] 7953 .s_cabac_ctxt.au1_ctxt_models[0], 7954 IHEVC_CAB_CTXT_END); 7955 } 7956 /* RDOPT copy States : Restoring back the Cb init state to Cr */ 7957 else 7958 { 7959 COPY_CABAC_STATES( 7960 &ps_ctxt->s_rdopt_entropy_ctxt 7961 .as_cu_entropy_ctxt[rd_opt_curr_idx] 7962 .s_cabac_ctxt.au1_ctxt_models[0], 7963 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 7964 IHEVC_CAB_CTXT_END); 7965 } 7966 7967 /* If Intra and TU=CU/2, need recon for next TUs */ 7968 if(calc_recon) 7969 { 7970 ihevce_chroma_it_recon_fxn( 7971 ps_ctxt, 7972 (pi2_cur_deq_data + trans_size), 7973 deq_data_strd, 7974 pu1_cur_pred, 7975 pred_strd, 7976 pu1_cur_recon, 7977 i4_recon_stride, 7978 (pu1_ecd_data + total_bytes_offset), 7979 trans_size, 7980 cbf, 7981 zero_cols, 7982 zero_rows, 7983 V_PLANE); 7984 7985 ps_recon_datastore 7986 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] 7987 [i4_subtu_idx] = 0; 7988 } 7989 else 7990 { 7991 ps_recon_datastore 7992 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] 7993 [i4_subtu_idx] = UCHAR_MAX; 7994 } 7995 } 7996 else 7997 { 7998 /* num bytes is set to 0 */ 7999 num_bytes = 0; 8000 8001 /* cbf is returned as 0 */ 8002 cbf = 0; 8003 8004 curr_cr_cod_cost = trans_ssd_v = 8005 8006 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator( 8007 pu1_cur_pred + 1, 8008 pu1_cur_src + 1, 8009 pred_strd, 8010 chrm_src_stride, 8011 trans_size, 8012 trans_size); 8013 8014 if(u1_compute_spatial_ssd) 8015 { 8016 /* buffer copy fromp pred to recon */ 8017 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( 8018 pu1_cur_pred, 8019 pred_strd, 8020 pu1_cur_recon, 8021 i4_recon_stride, 8022 trans_size, 8023 trans_size, 8024 V_PLANE); 8025 8026 ps_recon_datastore 8027 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] 8028 [i4_subtu_idx] = 0; 8029 } 8030 8031 if(u1_is_cu_noisy && i4_alpha_stim_multiplier) 8032 { 8033 trans_ssd_v = ihevce_inject_stim_into_distortion( 8034 pu1_cur_src, 8035 chrm_src_stride, 8036 pu1_cur_pred, 8037 pred_strd, 8038 trans_ssd_v, 8039 i4_alpha_stim_multiplier, 8040 trans_size, 8041 0, 8042 ps_ctxt->u1_enable_psyRDOPT, 8043 V_PLANE); 8044 } 8045 8046 #if ENABLE_INTER_ZCU_COST 8047 #if !WEIGH_CHROMA_COST 8048 /* cbf = 0, accumulate cu not coded cost */ 8049 ps_ctxt->i8_cu_not_coded_cost += curr_cr_cod_cost; 8050 #else 8051 /* cbf = 0, accumulate cu not coded cost */ 8052 8053 ps_ctxt->i8_cu_not_coded_cost += (LWORD64)( 8054 (curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor + 8055 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> 8056 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); 8057 #endif 8058 #endif 8059 } 8060 8061 #if !WEIGH_CHROMA_COST 8062 curr_rdopt_cost += curr_cr_cod_cost; 8063 #else 8064 curr_rdopt_cost += 8065 ((curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor + 8066 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> 8067 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); 8068 #endif 8069 8070 chrm_cod_cost += curr_cr_cod_cost; 8071 i8_ssd_cr += trans_ssd_v; 8072 8073 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) 8074 { 8075 /* Early exit : If the current running cost exceeds 8076 the prev. best mode cost, break */ 8077 if(curr_rdopt_cost > prev_best_rdopt_cost) 8078 { 8079 u1_is_early_exit_condition_satisfied = 1; 8080 break; 8081 } 8082 } 8083 8084 /* inter cu is coded if any of the tu is coded in it */ 8085 ps_best_cu_prms->u1_is_cu_coded |= cbf; 8086 8087 /* update CR related params */ 8088 ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] = 8089 total_bytes_offset + init_bytes_offset; 8090 8091 if(0 == i4_subtu_idx) 8092 { 8093 ps_tu->s_tu.b1_cr_cbf = cbf; 8094 } 8095 else 8096 { 8097 ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf; 8098 } 8099 8100 total_bytes_offset += num_bytes; 8101 8102 ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] = zero_cols; 8103 ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] = zero_rows; 8104 ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes; 8105 } 8106 else 8107 { 8108 ps_recon_datastore 8109 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx] = 8110 UCHAR_MAX; 8111 ps_recon_datastore 8112 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx] = 8113 UCHAR_MAX; 8114 } 8115 } 8116 8117 if(u1_is_early_exit_condition_satisfied) 8118 { 8119 break; 8120 } 8121 8122 /* loop increments */ 8123 ps_tu++; 8124 ps_tu_temp_prms++; 8125 } 8126 8127 /* Signal as luma mode. HIGH_QUALITY may update it */ 8128 ps_best_cu_prms->u1_chroma_intra_pred_mode = 4; 8129 8130 /* modify the cost chrm_cod_cost */ 8131 if(ps_ctxt->u1_enable_psyRDOPT) 8132 { 8133 UWORD8 *pu1_recon_cu; 8134 WORD32 recon_stride; 8135 WORD32 curr_pos_x; 8136 WORD32 curr_pos_y; 8137 WORD32 start_index; 8138 WORD32 num_horz_cu_in_ctb; 8139 WORD32 had_block_size; 8140 /* tODO: sreenivasa ctb size has to be used appropriately */ 8141 had_block_size = 8; 8142 num_horz_cu_in_ctb = 2 * 64 / had_block_size; 8143 8144 curr_pos_x = cu_pos_x << 3; /* pel units */ 8145 curr_pos_y = cu_pos_y << 3; /* pel units */ 8146 recon_stride = i4_recon_stride; 8147 pu1_recon_cu = pu1_recon; 8148 8149 /* start index to index the source satd of curr cu int he current ctb*/ 8150 start_index = 2 * (curr_pos_x / had_block_size) + 8151 (curr_pos_y / had_block_size) * num_horz_cu_in_ctb; 8152 8153 { 8154 chrm_cod_cost += ihevce_psy_rd_cost_croma( 8155 ps_ctxt->ai4_source_chroma_satd, 8156 pu1_recon, 8157 recon_stride, 8158 1, // 8159 cu_size, 8160 0, // pic type 8161 0, //layer id 8162 ps_ctxt->i4_satd_lamda, // lambda 8163 start_index, 8164 ps_ctxt->u1_is_input_data_hbd, // 8 bit 8165 ps_ctxt->u1_chroma_array_type, 8166 &ps_ctxt->s_cmn_opt_func 8167 8168 ); // chroma subsampling 420 8169 } 8170 } 8171 } 8172 else 8173 { 8174 u1_is_mode_eq_chroma_satd_mode = 1; 8175 chrm_cod_cost = MAX_COST_64; 8176 } 8177 8178 /* If Intra Block and preset is HIGH QUALITY, then compare with best SATD mode */ 8179 if((PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag) && 8180 (1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd)) 8181 { 8182 if(64 == cu_size) 8183 { 8184 ASSERT(TU_EQ_CU != func_proc_mode); 8185 } 8186 8187 if(ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode] 8188 .i8_chroma_best_rdopt < chrm_cod_cost) 8189 { 8190 UWORD8 *pu1_src; 8191 UWORD8 *pu1_ecd_data_src_cb; 8192 UWORD8 *pu1_ecd_data_src_cr; 8193 8194 chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt = 8195 &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode]; 8196 8197 UWORD8 *pu1_dst = &ps_ctxt->au1_rdopt_init_ctxt_models[0]; 8198 WORD32 ai4_ecd_data_cb_offset[2] = { 0, 0 }; 8199 WORD32 ai4_ecd_data_cr_offset[2] = { 0, 0 }; 8200 8201 pu1_src = &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0]; 8202 chrm_cod_cost = ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt; 8203 chrm_pred_mode = ps_chr_intra_satd_ctxt->u1_best_cr_mode; 8204 chrm_tu_bits = ps_chr_intra_satd_ctxt->i4_chrm_tu_bits; 8205 8206 if(u1_is_mode_eq_chroma_satd_mode) 8207 { 8208 chrm_cod_cost -= ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode; 8209 } 8210 8211 /*Resetting total_num_bytes_to 0*/ 8212 total_bytes_offset = 0; 8213 8214 /* Update the CABAC state corresponding to chroma only */ 8215 /* Chroma Cbf */ 8216 memcpy(pu1_dst + IHEVC_CAB_CBCR_IDX, pu1_src + IHEVC_CAB_CBCR_IDX, 2); 8217 /* Chroma transform skip */ 8218 memcpy(pu1_dst + IHEVC_CAB_TFM_SKIP12, pu1_src + IHEVC_CAB_TFM_SKIP12, 1); 8219 /* Chroma last coeff x prefix */ 8220 memcpy( 8221 pu1_dst + IHEVC_CAB_COEFFX_PREFIX + 15, 8222 pu1_src + IHEVC_CAB_COEFFX_PREFIX + 15, 8223 3); 8224 /* Chroma last coeff y prefix */ 8225 memcpy( 8226 pu1_dst + IHEVC_CAB_COEFFY_PREFIX + 15, 8227 pu1_src + IHEVC_CAB_COEFFY_PREFIX + 15, 8228 3); 8229 /* Chroma csbf */ 8230 memcpy( 8231 pu1_dst + IHEVC_CAB_CODED_SUBLK_IDX + 2, 8232 pu1_src + IHEVC_CAB_CODED_SUBLK_IDX + 2, 8233 2); 8234 /* Chroma sig coeff flags */ 8235 memcpy( 8236 pu1_dst + IHEVC_CAB_COEFF_FLAG + 27, pu1_src + IHEVC_CAB_COEFF_FLAG + 27, 15); 8237 /* Chroma absgt1 flags */ 8238 memcpy( 8239 pu1_dst + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16, 8240 pu1_src + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16, 8241 8); 8242 /* Chroma absgt2 flags */ 8243 memcpy( 8244 pu1_dst + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4, 8245 pu1_src + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4, 8246 2); 8247 8248 ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0]; 8249 ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0]; 8250 8251 /* update to luma decision as we update chroma in final mode */ 8252 ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded_old; 8253 8254 for(ctr = 0; ctr < u1_num_tus; ctr++) 8255 { 8256 for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++) 8257 { 8258 WORD32 cbf; 8259 WORD32 num_bytes; 8260 8261 pu1_ecd_data_src_cb = 8262 &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0]; 8263 pu1_ecd_data_src_cr = 8264 &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0]; 8265 8266 /* check if chroma present flag is set */ 8267 if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx) 8268 { 8269 UWORD8 *pu1_cur_pred_dest; 8270 UWORD8 *pu1_cur_pred_src; 8271 WORD32 pred_src_strd; 8272 WORD16 *pi2_cur_deq_data_dest; 8273 WORD16 *pi2_cur_deq_data_src_cb; 8274 WORD16 *pi2_cur_deq_data_src_cr; 8275 WORD32 deq_src_strd; 8276 8277 WORD32 curr_pos_x, curr_pos_y; 8278 8279 trans_size = ps_tu->s_tu.b3_size; 8280 trans_size = (1 << (trans_size + 1)); /* in chroma units */ 8281 8282 /*Deriving stride values*/ 8283 pred_src_strd = ps_chr_intra_satd_ctxt->i4_pred_stride; 8284 deq_src_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride; 8285 8286 /* since 2x2 transform is not allowed for chroma*/ 8287 if(2 == trans_size) 8288 { 8289 trans_size = 4; 8290 } 8291 8292 /* get the current tu posx and posy w.r.t to cu */ 8293 curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3); 8294 curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) + 8295 (i4_subtu_idx * trans_size); 8296 8297 /* 420sp case only vertical height will be half */ 8298 if(0 == u1_is_422) 8299 { 8300 curr_pos_y >>= 1; 8301 } 8302 8303 /* increment the pointers to start of current TU */ 8304 pu1_cur_pred_src = 8305 ((UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data + curr_pos_x); 8306 pu1_cur_pred_src += (curr_pos_y * pred_src_strd); 8307 pu1_cur_pred_dest = (pu1_pred + curr_pos_x); 8308 pu1_cur_pred_dest += (curr_pos_y * pred_strd); 8309 8310 pi2_cur_deq_data_src_cb = 8311 &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] + (curr_pos_x >> 1); 8312 pi2_cur_deq_data_src_cr = 8313 &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] + (curr_pos_x >> 1); 8314 pi2_cur_deq_data_src_cb += (curr_pos_y * deq_src_strd); 8315 pi2_cur_deq_data_src_cr += (curr_pos_y * deq_src_strd); 8316 pi2_cur_deq_data_dest = pi2_deq_data + curr_pos_x; 8317 pi2_cur_deq_data_dest += (curr_pos_y * deq_data_strd); 8318 8319 /*Overwriting deq data with that belonging to the winning special mode 8320 (luma mode != chroma mode) 8321 ihevce_copy_2d takes source and dest arguments as UWORD8 *. We have to 8322 correspondingly manipulate to copy WORD16 data*/ 8323 8324 ps_ctxt->s_cmn_opt_func.pf_copy_2d( 8325 (UWORD8 *)pi2_cur_deq_data_dest, 8326 (deq_data_strd << 1), 8327 (UWORD8 *)pi2_cur_deq_data_src_cb, 8328 (deq_src_strd << 1), 8329 (trans_size << 1), 8330 trans_size); 8331 8332 ps_ctxt->s_cmn_opt_func.pf_copy_2d( 8333 (UWORD8 *)(pi2_cur_deq_data_dest + trans_size), 8334 (deq_data_strd << 1), 8335 (UWORD8 *)pi2_cur_deq_data_src_cr, 8336 (deq_src_strd << 1), 8337 (trans_size << 1), 8338 trans_size); 8339 8340 /*Overwriting pred data with that belonging to the winning special mode 8341 (luma mode != chroma mode)*/ 8342 8343 ps_ctxt->s_cmn_opt_func.pf_copy_2d( 8344 pu1_cur_pred_dest, 8345 pred_strd, 8346 pu1_cur_pred_src, 8347 pred_src_strd, 8348 (trans_size << 1), 8349 trans_size); 8350 8351 num_bytes = ps_chr_intra_satd_ctxt 8352 ->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr]; 8353 cbf = ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr]; 8354 /* inter cu is coded if any of the tu is coded in it */ 8355 ps_best_cu_prms->u1_is_cu_coded |= cbf; 8356 8357 /* update CB related params */ 8358 ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] = 8359 total_bytes_offset + init_bytes_offset; 8360 8361 if(0 == i4_subtu_idx) 8362 { 8363 ps_tu->s_tu.b1_cb_cbf = cbf; 8364 } 8365 else 8366 { 8367 ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf; 8368 } 8369 8370 /*Overwriting the cb ecd data corresponding to the special mode*/ 8371 if(0 != num_bytes) 8372 { 8373 memcpy( 8374 (pu1_ecd_data + total_bytes_offset), 8375 pu1_ecd_data_src_cb + ai4_ecd_data_cb_offset[i4_subtu_idx], 8376 num_bytes); 8377 } 8378 8379 total_bytes_offset += num_bytes; 8380 ai4_ecd_data_cb_offset[i4_subtu_idx] += num_bytes; 8381 ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes; 8382 8383 num_bytes = ps_chr_intra_satd_ctxt 8384 ->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr]; 8385 cbf = ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr]; 8386 /* inter cu is coded if any of the tu is coded in it */ 8387 ps_best_cu_prms->u1_is_cu_coded |= cbf; 8388 8389 /*Overwriting the cr ecd data corresponding to the special mode*/ 8390 if(0 != num_bytes) 8391 { 8392 memcpy( 8393 (pu1_ecd_data + total_bytes_offset), 8394 pu1_ecd_data_src_cr + ai4_ecd_data_cr_offset[i4_subtu_idx], 8395 num_bytes); 8396 } 8397 8398 /* update CR related params */ 8399 ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] = 8400 total_bytes_offset + init_bytes_offset; 8401 8402 if(0 == i4_subtu_idx) 8403 { 8404 ps_tu->s_tu.b1_cr_cbf = cbf; 8405 } 8406 else 8407 { 8408 ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf; 8409 } 8410 8411 total_bytes_offset += num_bytes; 8412 ai4_ecd_data_cr_offset[i4_subtu_idx] += num_bytes; 8413 8414 /*Updating zero rows and zero cols*/ 8415 ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] = 8416 ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr]; 8417 ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] = 8418 ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr]; 8419 ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] = 8420 ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr]; 8421 ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] = 8422 ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr]; 8423 8424 ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes; 8425 8426 if((u1_num_tus > 1) && 8427 ps_recon_datastore->au1_is_chromaRecon_available[2]) 8428 { 8429 ps_recon_datastore 8430 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] 8431 [i4_subtu_idx] = 2; 8432 ps_recon_datastore 8433 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] 8434 [i4_subtu_idx] = 2; 8435 } 8436 else if( 8437 (1 == u1_num_tus) && 8438 ps_recon_datastore->au1_is_chromaRecon_available[1]) 8439 { 8440 ps_recon_datastore 8441 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] 8442 [i4_subtu_idx] = 1; 8443 ps_recon_datastore 8444 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] 8445 [i4_subtu_idx] = 1; 8446 } 8447 else 8448 { 8449 ps_recon_datastore 8450 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] 8451 [i4_subtu_idx] = UCHAR_MAX; 8452 ps_recon_datastore 8453 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] 8454 [i4_subtu_idx] = UCHAR_MAX; 8455 } 8456 } 8457 } 8458 8459 /* loop increments */ 8460 ps_tu++; 8461 ps_tu_temp_prms++; 8462 } 8463 } 8464 8465 if(!u1_is_422) 8466 { 8467 if(chrm_pred_mode == luma_pred_mode) 8468 { 8469 ps_best_cu_prms->u1_chroma_intra_pred_mode = 4; 8470 } 8471 else if(chrm_pred_mode == 0) 8472 { 8473 ps_best_cu_prms->u1_chroma_intra_pred_mode = 0; 8474 } 8475 else if(chrm_pred_mode == 1) 8476 { 8477 ps_best_cu_prms->u1_chroma_intra_pred_mode = 3; 8478 } 8479 else if(chrm_pred_mode == 10) 8480 { 8481 ps_best_cu_prms->u1_chroma_intra_pred_mode = 2; 8482 } 8483 else if(chrm_pred_mode == 26) 8484 { 8485 ps_best_cu_prms->u1_chroma_intra_pred_mode = 1; 8486 } 8487 else 8488 { 8489 ASSERT(0); /*Should not come here*/ 8490 } 8491 } 8492 else 8493 { 8494 if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[luma_pred_mode]) 8495 { 8496 ps_best_cu_prms->u1_chroma_intra_pred_mode = 4; 8497 } 8498 else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[0]) 8499 { 8500 ps_best_cu_prms->u1_chroma_intra_pred_mode = 0; 8501 } 8502 else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[1]) 8503 { 8504 ps_best_cu_prms->u1_chroma_intra_pred_mode = 3; 8505 } 8506 else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[10]) 8507 { 8508 ps_best_cu_prms->u1_chroma_intra_pred_mode = 2; 8509 } 8510 else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[26]) 8511 { 8512 ps_best_cu_prms->u1_chroma_intra_pred_mode = 1; 8513 } 8514 else 8515 { 8516 ASSERT(0); /*Should not come here*/ 8517 } 8518 } 8519 } 8520 8521 /* Store the actual chroma mode */ 8522 ps_best_cu_prms->u1_chroma_intra_pred_actual_mode = chrm_pred_mode; 8523 } 8524 8525 /* update the total bytes produced */ 8526 ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes_offset + init_bytes_offset; 8527 8528 /* store the final chrm bits accumulated */ 8529 *pi4_chrm_tu_bits = chrm_tu_bits; 8530 8531 return (chrm_cod_cost); 8532 } 8533 8534 /*! 8535 ****************************************************************************** 8536 * \if Function name : ihevce_final_rdopt_mode_prcs \endif 8537 * 8538 * \brief 8539 * Final RDOPT mode process function. Performs Recon computation for the 8540 * final mode. Re-use or Compute pred, iq-data, coeff based on the flags. 8541 * 8542 * \param[in] pv_ctxt : pointer to enc_loop module 8543 * \param[in] ps_prms : pointer to struct containing requisite parameters 8544 * 8545 * \return 8546 * None 8547 * 8548 * \author 8549 * Ittiam 8550 * 8551 ***************************************************************************** 8552 */ 8553 void ihevce_final_rdopt_mode_prcs( 8554 ihevce_enc_loop_ctxt_t *ps_ctxt, final_mode_process_prms_t *ps_prms) 8555 { 8556 enc_loop_cu_final_prms_t *ps_best_cu_prms; 8557 tu_enc_loop_out_t *ps_tu_enc_loop; 8558 tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms; 8559 nbr_avail_flags_t s_nbr; 8560 recon_datastore_t *ps_recon_datastore; 8561 8562 ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr; 8563 ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr; 8564 ihevc_intra_pred_ref_filtering_ft *ihevc_intra_pred_ref_filtering_fptr; 8565 8566 WORD32 num_tu_in_cu; 8567 LWORD64 rd_opt_cost; 8568 WORD32 ctr; 8569 WORD32 i4_subtu_idx; 8570 WORD32 cu_size; 8571 WORD32 cu_pos_x, cu_pos_y; 8572 WORD32 chrm_present_flag = 1; 8573 WORD32 num_bytes, total_bytes = 0; 8574 WORD32 chrm_ctr = 0; 8575 WORD32 u1_is_cu_coded; 8576 UWORD8 *pu1_old_ecd_data; 8577 UWORD8 *pu1_chrm_old_ecd_data; 8578 UWORD8 *pu1_cur_pred; 8579 WORD16 *pi2_deq_data; 8580 WORD16 *pi2_chrm_deq_data; 8581 WORD16 *pi2_cur_deq_data; 8582 WORD16 *pi2_cur_deq_data_chrm; 8583 UWORD8 *pu1_cur_luma_recon; 8584 UWORD8 *pu1_cur_chroma_recon; 8585 UWORD8 *pu1_cur_src; 8586 UWORD8 *pu1_cur_src_chrm; 8587 UWORD8 *pu1_cur_pred_chrm; 8588 UWORD8 *pu1_intra_pred_mode; 8589 UWORD32 *pu4_nbr_flags; 8590 LWORD64 i8_ssd; 8591 8592 cu_nbr_prms_t *ps_cu_nbr_prms = ps_prms->ps_cu_nbr_prms; 8593 cu_inter_cand_t *ps_best_inter_cand = ps_prms->ps_best_inter_cand; 8594 enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms = ps_prms->ps_chrm_cu_buf_prms; 8595 8596 WORD32 packed_pred_mode = ps_prms->packed_pred_mode; 8597 WORD32 rd_opt_best_idx = ps_prms->rd_opt_best_idx; 8598 UWORD8 *pu1_src = (UWORD8 *)ps_prms->pv_src; 8599 WORD32 src_strd = ps_prms->src_strd; 8600 UWORD8 *pu1_pred = (UWORD8 *)ps_prms->pv_pred; 8601 WORD32 pred_strd = ps_prms->pred_strd; 8602 UWORD8 *pu1_pred_chrm = (UWORD8 *)ps_prms->pv_pred_chrm; 8603 WORD32 pred_chrm_strd = ps_prms->pred_chrm_strd; 8604 UWORD8 *pu1_final_ecd_data = ps_prms->pu1_final_ecd_data; 8605 UWORD8 *pu1_csbf_buf = ps_prms->pu1_csbf_buf; 8606 WORD32 csbf_strd = ps_prms->csbf_strd; 8607 UWORD8 *pu1_luma_recon = (UWORD8 *)ps_prms->pv_luma_recon; 8608 WORD32 recon_luma_strd = ps_prms->recon_luma_strd; 8609 UWORD8 *pu1_chrm_recon = (UWORD8 *)ps_prms->pv_chrm_recon; 8610 WORD32 recon_chrma_strd = ps_prms->recon_chrma_strd; 8611 UWORD8 u1_cu_pos_x = ps_prms->u1_cu_pos_x; 8612 UWORD8 u1_cu_pos_y = ps_prms->u1_cu_pos_y; 8613 UWORD8 u1_cu_size = ps_prms->u1_cu_size; 8614 WORD8 i1_cu_qp = ps_prms->i1_cu_qp; 8615 UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2); 8616 UWORD8 u1_num_subtus = (u1_is_422 == 1) + 1; 8617 /* Get the Chroma pointer and parameters */ 8618 UWORD8 *pu1_src_chrm = ps_chrm_cu_buf_prms->pu1_curr_src; 8619 WORD32 src_chrm_strd = ps_chrm_cu_buf_prms->i4_chrm_src_stride; 8620 UWORD8 u1_compute_spatial_ssd_luma = 0; 8621 UWORD8 u1_compute_spatial_ssd_chroma = 0; 8622 /* Get the pointer for function selector */ 8623 ihevc_intra_pred_luma_ref_substitution_fptr = 8624 ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr; 8625 8626 ihevc_intra_pred_ref_filtering_fptr = 8627 ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr; 8628 8629 ihevc_intra_pred_chroma_ref_substitution_fptr = 8630 ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr; 8631 8632 /* Get the best CU parameters */ 8633 ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx]; 8634 num_tu_in_cu = ps_best_cu_prms->u2_num_tus_in_cu; 8635 cu_size = ps_best_cu_prms->u1_cu_size; 8636 cu_pos_x = u1_cu_pos_x; 8637 cu_pos_y = u1_cu_pos_y; 8638 pu1_intra_pred_mode = &ps_best_cu_prms->au1_intra_pred_mode[0]; 8639 pu4_nbr_flags = &ps_best_cu_prms->au4_nbr_flags[0]; 8640 ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore; 8641 8642 /* get the first TU pointer */ 8643 ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0]; 8644 /* get the first TU only enc_loop prms pointer */ 8645 ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0]; 8646 /*modify quant related param in ctxt based on current cu qp*/ 8647 if((ps_ctxt->i1_cu_qp_delta_enable)) 8648 { 8649 /*recompute quant related param at every cu level*/ 8650 ihevce_compute_quant_rel_param(ps_ctxt, i1_cu_qp); 8651 8652 /* get frame level lambda params */ 8653 ihevce_get_cl_cu_lambda_prms( 8654 ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? i1_cu_qp : ps_ctxt->i4_frame_qp); 8655 } 8656 8657 ps_best_cu_prms->i8_cu_ssd = 0; 8658 ps_best_cu_prms->u4_cu_open_intra_sad = 0; 8659 8660 /* For skip case : Set TU_size = CU_size and make cbf = 0 8661 so that same TU loop can be used for all modes */ 8662 if(PRED_MODE_SKIP == packed_pred_mode) 8663 { 8664 for(ctr = 0; ctr < num_tu_in_cu; ctr++) 8665 { 8666 ps_tu_enc_loop->s_tu.b1_y_cbf = 0; 8667 8668 ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = 0; 8669 8670 ps_tu_enc_loop++; 8671 ps_tu_enc_loop_temp_prms++; 8672 } 8673 8674 /* go back to the first TU pointer */ 8675 ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0]; 8676 ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0]; 8677 } 8678 /** For inter case, pred calculation is outside the loop **/ 8679 if(PRED_MODE_INTRA != packed_pred_mode) 8680 { 8681 /**------------- Compute pred data if required --------------**/ 8682 if((1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data)) 8683 { 8684 nbr_4x4_t *ps_topleft_nbr_4x4; 8685 nbr_4x4_t *ps_left_nbr_4x4; 8686 nbr_4x4_t *ps_top_nbr_4x4; 8687 WORD32 nbr_4x4_left_strd; 8688 8689 ps_best_inter_cand->pu1_pred_data = pu1_pred; 8690 ps_best_inter_cand->i4_pred_data_stride = pred_strd; 8691 8692 /* Get the CU nbr information */ 8693 ps_topleft_nbr_4x4 = ps_cu_nbr_prms->ps_topleft_nbr_4x4; 8694 ps_left_nbr_4x4 = ps_cu_nbr_prms->ps_left_nbr_4x4; 8695 ps_top_nbr_4x4 = ps_cu_nbr_prms->ps_top_nbr_4x4; 8696 nbr_4x4_left_strd = ps_cu_nbr_prms->nbr_4x4_left_strd; 8697 8698 /* MVP ,MVD calc and Motion compensation */ 8699 rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)( 8700 ps_ctxt, 8701 ps_best_inter_cand, 8702 u1_cu_size, 8703 cu_pos_x, 8704 cu_pos_y, 8705 ps_left_nbr_4x4, 8706 ps_top_nbr_4x4, 8707 ps_topleft_nbr_4x4, 8708 nbr_4x4_left_strd, 8709 rd_opt_best_idx); 8710 } 8711 8712 /** ------ Motion Compensation for Chroma -------- **/ 8713 if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data) 8714 { 8715 UWORD8 *pu1_cur_pred; 8716 pu1_cur_pred = pu1_pred_chrm; 8717 8718 /* run a loop over all the partitons in cu */ 8719 for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++) 8720 { 8721 pu_t *ps_pu; 8722 WORD32 inter_pu_wd, inter_pu_ht; 8723 8724 ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr]; 8725 8726 /* IF AMP then each partitions can have diff wd ht */ 8727 inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */ 8728 inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1; 8729 inter_pu_ht <<= u1_is_422; 8730 /* chroma mc func */ 8731 ihevce_chroma_inter_pred_pu( 8732 &ps_ctxt->s_mc_ctxt, ps_pu, pu1_cur_pred, pred_chrm_strd); 8733 if(2 == ps_best_cu_prms->u2_num_pus_in_cu) 8734 { 8735 /* 2Nx__ partion case */ 8736 if(inter_pu_wd == ps_best_cu_prms->u1_cu_size) 8737 { 8738 pu1_cur_pred += (inter_pu_ht * pred_chrm_strd); 8739 } 8740 /* __x2N partion case */ 8741 if(inter_pu_ht == (ps_best_cu_prms->u1_cu_size >> (u1_is_422 == 0))) 8742 { 8743 pu1_cur_pred += inter_pu_wd; 8744 } 8745 } 8746 } 8747 } 8748 } 8749 pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0]; 8750 pi2_chrm_deq_data = 8751 &ps_best_cu_prms->pi2_cu_deq_coeffs[0] + ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx; 8752 pu1_old_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0]; 8753 pu1_chrm_old_ecd_data = 8754 &ps_best_cu_prms->pu1_cu_coeffs[0] + ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx; 8755 8756 /* default value for cu coded flag */ 8757 u1_is_cu_coded = 0; 8758 8759 /* If we are re-computing coeff, set sad to 0 and start accumulating */ 8760 /* else use the best cand. sad from RDOPT stage */ 8761 if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data) 8762 { 8763 /*init of ssd of CU accuumulated over all TU*/ 8764 ps_best_cu_prms->u4_cu_sad = 0; 8765 8766 /* reset the luma residual bits */ 8767 ps_best_cu_prms->u4_cu_luma_res_bits = 0; 8768 } 8769 8770 if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data) 8771 { 8772 /* reset the chroma residual bits */ 8773 ps_best_cu_prms->u4_cu_chroma_res_bits = 0; 8774 } 8775 8776 if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data) || 8777 (1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)) 8778 { 8779 /*Header bits have to be reevaluated if luma and chroma reevaluation is done, as 8780 the quantized coefficients might be changed. 8781 We are copying only those states which correspond to the header from the cabac state 8782 of the previous CU, because the header is going to be recomputed for this condition*/ 8783 ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1; 8784 memcpy( 8785 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 8786 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0], 8787 IHEVC_CAB_COEFFX_PREFIX); 8788 8789 if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)) 8790 { 8791 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( 8792 (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX), 8793 (&ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0] + 8794 IHEVC_CAB_COEFFX_PREFIX), 8795 (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX)); 8796 } 8797 else 8798 { 8799 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( 8800 (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX), 8801 (&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] 8802 .s_cabac_ctxt.au1_ctxt_models[0] + 8803 IHEVC_CAB_COEFFX_PREFIX), 8804 (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX)); 8805 } 8806 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_best_idx; 8807 } 8808 else 8809 { 8810 ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0; 8811 } 8812 8813 /* Zero cbf tool is disabled for intra CUs */ 8814 if(PRED_MODE_INTRA == packed_pred_mode) 8815 { 8816 #if ENABLE_ZERO_CBF_IN_INTRA 8817 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE; 8818 #else 8819 ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF; 8820 #endif 8821 } 8822 else 8823 { 8824 #if DISABLE_ZERO_ZBF_IN_INTER 8825 ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF; 8826 #else 8827 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE; 8828 #endif 8829 } 8830 8831 /** Loop for all tu blocks in current cu and do reconstruction **/ 8832 for(ctr = 0; ctr < num_tu_in_cu; ctr++) 8833 { 8834 tu_t *ps_tu; 8835 WORD32 trans_size, num_4x4_in_tu; 8836 WORD32 cbf, zero_rows, zero_cols; 8837 WORD32 cu_pos_x_in_4x4, cu_pos_y_in_4x4; 8838 WORD32 cu_pos_x_in_pix, cu_pos_y_in_pix; 8839 WORD32 luma_pred_mode, chroma_pred_mode = 0; 8840 UWORD8 au1_is_recon_available[2]; 8841 8842 ps_tu = &(ps_tu_enc_loop->s_tu); /* Points to the TU property ctxt */ 8843 8844 u1_compute_spatial_ssd_luma = 0; 8845 u1_compute_spatial_ssd_chroma = 0; 8846 8847 trans_size = 1 << (ps_tu->b3_size + 2); 8848 num_4x4_in_tu = (trans_size >> 2); 8849 cu_pos_x_in_4x4 = ps_tu->b4_pos_x; 8850 cu_pos_y_in_4x4 = ps_tu->b4_pos_y; 8851 8852 /* populate the coeffs scan idx */ 8853 ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT; 8854 8855 /* get the current pos x and pos y in pixels */ 8856 cu_pos_x_in_pix = (cu_pos_x_in_4x4 << 2) - (cu_pos_x << 3); 8857 cu_pos_y_in_pix = (cu_pos_y_in_4x4 << 2) - (cu_pos_y << 3); 8858 8859 /* Update pointers based on the location */ 8860 pu1_cur_src = pu1_src + cu_pos_x_in_pix; 8861 pu1_cur_src += (cu_pos_y_in_pix * src_strd); 8862 pu1_cur_pred = pu1_pred + cu_pos_x_in_pix; 8863 pu1_cur_pred += (cu_pos_y_in_pix * pred_strd); 8864 8865 pu1_cur_luma_recon = pu1_luma_recon + cu_pos_x_in_pix; 8866 pu1_cur_luma_recon += (cu_pos_y_in_pix * recon_luma_strd); 8867 8868 pi2_cur_deq_data = pi2_deq_data + cu_pos_x_in_pix; 8869 pi2_cur_deq_data += cu_pos_y_in_pix * cu_size; 8870 8871 pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix; 8872 pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) + 8873 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd)); 8874 8875 pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix; 8876 pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) + 8877 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd)); 8878 8879 pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix; 8880 pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) + 8881 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd)); 8882 8883 pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix; 8884 pi2_cur_deq_data_chrm += 8885 ((cu_pos_y_in_pix >> 1) * cu_size) + (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size)); 8886 8887 /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/ 8888 chrm_present_flag = 1; /* by default chroma present is set to 1*/ 8889 8890 if(4 == trans_size) 8891 { 8892 /* if tusize is 4x4 then only first luma 4x4 will have chroma*/ 8893 if(0 != chrm_ctr) 8894 { 8895 chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE; 8896 } 8897 8898 /* increment the chrm ctr unconditionally */ 8899 chrm_ctr++; 8900 /* after ctr reached 4 reset it */ 8901 if(4 == chrm_ctr) 8902 { 8903 chrm_ctr = 0; 8904 } 8905 } 8906 8907 /**------------- Compute pred data if required --------------**/ 8908 if(PRED_MODE_INTRA == packed_pred_mode) /* Inter pred calc. is done outside loop */ 8909 { 8910 /* Get the pred mode for scan idx calculation, even if pred is not required */ 8911 luma_pred_mode = *pu1_intra_pred_mode; 8912 8913 if((ps_ctxt->i4_rc_pass == 1) || 8914 (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data)) 8915 { 8916 WORD32 nbr_flags; 8917 WORD32 luma_pred_func_idx; 8918 UWORD8 *pu1_left; 8919 UWORD8 *pu1_top; 8920 UWORD8 *pu1_top_left; 8921 WORD32 left_strd; 8922 8923 /* left cu boundary */ 8924 if(0 == cu_pos_x_in_pix) 8925 { 8926 left_strd = ps_cu_nbr_prms->cu_left_stride; 8927 pu1_left = ps_cu_nbr_prms->pu1_cu_left + cu_pos_y_in_pix * left_strd; 8928 } 8929 else 8930 { 8931 pu1_left = pu1_cur_luma_recon - 1; 8932 left_strd = recon_luma_strd; 8933 } 8934 8935 /* top cu boundary */ 8936 if(0 == cu_pos_y_in_pix) 8937 { 8938 pu1_top = ps_cu_nbr_prms->pu1_cu_top + cu_pos_x_in_pix; 8939 } 8940 else 8941 { 8942 pu1_top = pu1_cur_luma_recon - recon_luma_strd; 8943 } 8944 8945 /* by default top left is set to cu top left */ 8946 pu1_top_left = ps_cu_nbr_prms->pu1_cu_top_left; 8947 8948 /* top left based on position */ 8949 if((0 != cu_pos_y_in_pix) && (0 == cu_pos_x_in_pix)) 8950 { 8951 pu1_top_left = pu1_left - left_strd; 8952 } 8953 else if(0 != cu_pos_x_in_pix) 8954 { 8955 pu1_top_left = pu1_top - 1; 8956 } 8957 8958 /* get the neighbour availability flags */ 8959 nbr_flags = ihevce_get_nbr_intra( 8960 &s_nbr, 8961 ps_ctxt->pu1_ctb_nbr_map, 8962 ps_ctxt->i4_nbr_map_strd, 8963 cu_pos_x_in_4x4, 8964 cu_pos_y_in_4x4, 8965 num_4x4_in_tu); 8966 8967 if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data) 8968 { 8969 /* copy the nbr flags for chroma reuse */ 8970 if(4 != trans_size) 8971 { 8972 *pu4_nbr_flags = nbr_flags; 8973 } 8974 else if(1 == chrm_present_flag) 8975 { 8976 /* compute the avail flags assuming luma trans is 8x8 */ 8977 /* get the neighbour availability flags */ 8978 *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu( 8979 ps_ctxt->pu1_ctb_nbr_map, 8980 ps_ctxt->i4_nbr_map_strd, 8981 cu_pos_x_in_4x4, 8982 cu_pos_y_in_4x4, 8983 (num_4x4_in_tu << 1), 8984 (num_4x4_in_tu << 1)); 8985 } 8986 8987 /* call reference array substitution */ 8988 ihevc_intra_pred_luma_ref_substitution_fptr( 8989 pu1_top_left, 8990 pu1_top, 8991 pu1_left, 8992 left_strd, 8993 trans_size, 8994 nbr_flags, 8995 (UWORD8 *)ps_ctxt->pv_ref_sub_out, 8996 1); 8997 8998 /* call reference filtering */ 8999 ihevc_intra_pred_ref_filtering_fptr( 9000 (UWORD8 *)ps_ctxt->pv_ref_sub_out, 9001 trans_size, 9002 (UWORD8 *)ps_ctxt->pv_ref_filt_out, 9003 luma_pred_mode, 9004 ps_ctxt->i1_strong_intra_smoothing_enable_flag); 9005 9006 /* use the look up to get the function idx */ 9007 luma_pred_func_idx = g_i4_ip_funcs[luma_pred_mode]; 9008 9009 /* call the intra prediction function */ 9010 ps_ctxt->apf_lum_ip[luma_pred_func_idx]( 9011 (UWORD8 *)ps_ctxt->pv_ref_filt_out, 9012 1, 9013 pu1_cur_pred, 9014 pred_strd, 9015 trans_size, 9016 luma_pred_mode); 9017 } 9018 } 9019 else if( 9020 (1 == chrm_present_flag) && 9021 (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data)) 9022 { 9023 WORD32 temp_num_4x4_in_tu = num_4x4_in_tu; 9024 9025 if(4 == trans_size) /* compute the avail flags assuming luma trans is 8x8 */ 9026 { 9027 temp_num_4x4_in_tu = num_4x4_in_tu << 1; 9028 } 9029 9030 *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu( 9031 ps_ctxt->pu1_ctb_nbr_map, 9032 ps_ctxt->i4_nbr_map_strd, 9033 cu_pos_x_in_4x4, 9034 cu_pos_y_in_4x4, 9035 temp_num_4x4_in_tu, 9036 temp_num_4x4_in_tu); 9037 } 9038 9039 /* Get the pred mode for scan idx calculation, even if pred is not required */ 9040 chroma_pred_mode = ps_best_cu_prms->u1_chroma_intra_pred_actual_mode; 9041 } 9042 9043 if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data) 9044 { 9045 WORD32 temp_bits; 9046 LWORD64 temp_cost; 9047 UWORD32 u4_tu_sad; 9048 WORD32 perform_sbh, perform_rdoq; 9049 9050 if(PRED_MODE_INTRA == packed_pred_mode) 9051 { 9052 /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/ 9053 if(trans_size < 16) 9054 { 9055 /* for modes from 22 upto 30 horizontal scan is used */ 9056 if((luma_pred_mode > 21) && (luma_pred_mode < 31)) 9057 { 9058 ps_ctxt->i4_scan_idx = SCAN_HORZ; 9059 } 9060 /* for modes from 6 upto 14 horizontal scan is used */ 9061 else if((luma_pred_mode > 5) && (luma_pred_mode < 15)) 9062 { 9063 ps_ctxt->i4_scan_idx = SCAN_VERT; 9064 } 9065 } 9066 } 9067 9068 /* RDOPT copy States : TU init (best until prev TU) to current */ 9069 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( 9070 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] 9071 .s_cabac_ctxt.au1_ctxt_models[0] + 9072 IHEVC_CAB_COEFFX_PREFIX, 9073 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, 9074 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); 9075 9076 if(ps_prms->u1_recompute_sbh_and_rdoq) 9077 { 9078 perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH); 9079 perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ); 9080 } 9081 else 9082 { 9083 /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/ 9084 perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh; 9085 /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean 9086 we would have to do RDOQ again.*/ 9087 perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq; 9088 } 9089 9090 #if DISABLE_RDOQ_INTRA 9091 if(PRED_MODE_INTRA == packed_pred_mode) 9092 { 9093 perform_rdoq = 0; 9094 } 9095 #endif 9096 /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled 9097 so that all candidates and best candidate are quantized with same rounding factor */ 9098 if(1 == perform_rdoq) 9099 { 9100 ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING); 9101 } 9102 9103 cbf = ihevce_t_q_iq_ssd_scan_fxn( 9104 ps_ctxt, 9105 pu1_cur_pred, 9106 pred_strd, 9107 pu1_cur_src, 9108 src_strd, 9109 pi2_cur_deq_data, 9110 cu_size, /*deq_data stride is cu_size*/ 9111 pu1_cur_luma_recon, 9112 recon_luma_strd, 9113 pu1_final_ecd_data, 9114 pu1_csbf_buf, 9115 csbf_strd, 9116 trans_size, 9117 packed_pred_mode, 9118 &temp_cost, 9119 &num_bytes, 9120 &temp_bits, 9121 &u4_tu_sad, 9122 &zero_cols, 9123 &zero_rows, 9124 &au1_is_recon_available[0], 9125 perform_rdoq, //(BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level), 9126 perform_sbh, 9127 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 9128 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT 9129 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * 9130 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) / 9131 100.0, 9132 ps_prms->u1_is_cu_noisy, 9133 #endif 9134 u1_compute_spatial_ssd_luma ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, 9135 1 /*early cbf*/ 9136 ); //(BEST_CAND_SBH == ps_ctxt->i4_sbh_level)); 9137 9138 /* Accumulate luma residual bits */ 9139 ps_best_cu_prms->u4_cu_luma_res_bits += temp_bits; 9140 9141 /* RDOPT copy States : New updated after curr TU to TU init */ 9142 if(0 != cbf) 9143 { 9144 /* update to new state only if CBF is non zero */ 9145 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( 9146 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, 9147 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] 9148 .s_cabac_ctxt.au1_ctxt_models[0] + 9149 IHEVC_CAB_COEFFX_PREFIX, 9150 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); 9151 } 9152 9153 /* accumulate the TU sad into cu sad */ 9154 ps_best_cu_prms->u4_cu_sad += u4_tu_sad; 9155 ps_tu->b1_y_cbf = cbf; 9156 ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = num_bytes; 9157 9158 /* If somebody updates cbf (RDOQ or SBH), update in nbr str. for BS */ 9159 if((ps_prms->u1_will_cabac_state_change) && (!ps_prms->u1_is_first_pass)) 9160 { 9161 WORD32 num_4x4_in_cu = u1_cu_size >> 2; 9162 nbr_4x4_t *ps_cur_nbr_4x4 = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0]; 9163 ps_cur_nbr_4x4 = (ps_cur_nbr_4x4 + (cu_pos_x_in_pix >> 2)); 9164 ps_cur_nbr_4x4 += ((cu_pos_y_in_pix >> 2) * num_4x4_in_cu); 9165 /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */ 9166 ps_cur_nbr_4x4->b1_y_cbf = cbf; 9167 /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/ 9168 ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp; 9169 /* Qp and cbf are stored for the all 4x4 in TU */ 9170 { 9171 WORD32 i, j; 9172 nbr_4x4_t *ps_tmp_4x4; 9173 ps_tmp_4x4 = ps_cur_nbr_4x4; 9174 9175 for(i = 0; i < num_4x4_in_tu; i++) 9176 { 9177 for(j = 0; j < num_4x4_in_tu; j++) 9178 { 9179 ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp; 9180 ps_tmp_4x4[j].b1_y_cbf = cbf; 9181 } 9182 /* row level update*/ 9183 ps_tmp_4x4 += num_4x4_in_cu; 9184 } 9185 } 9186 } 9187 } 9188 else 9189 { 9190 zero_cols = ps_tu_enc_loop_temp_prms->u4_luma_zero_col; 9191 zero_rows = ps_tu_enc_loop_temp_prms->u4_luma_zero_row; 9192 9193 if(ps_prms->u1_will_cabac_state_change) 9194 { 9195 num_bytes = ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed; 9196 } 9197 else 9198 { 9199 num_bytes = 0; 9200 } 9201 9202 /* copy luma ecd data to final buffer */ 9203 memcpy(pu1_final_ecd_data, pu1_old_ecd_data, num_bytes); 9204 9205 pu1_old_ecd_data += num_bytes; 9206 9207 au1_is_recon_available[0] = 0; 9208 } 9209 9210 /**-------- Compute Recon data (Do IT & Recon) : Luma -----------**/ 9211 if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data && 9212 (!u1_compute_spatial_ssd_luma || 9213 (!au1_is_recon_available[0] && u1_compute_spatial_ssd_luma))) 9214 { 9215 if(!ps_recon_datastore->u1_is_lumaRecon_available || 9216 (ps_recon_datastore->u1_is_lumaRecon_available && 9217 (UCHAR_MAX == ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]))) 9218 { 9219 ihevce_it_recon_fxn( 9220 ps_ctxt, 9221 pi2_cur_deq_data, 9222 cu_size, 9223 pu1_cur_pred, 9224 pred_strd, 9225 pu1_cur_luma_recon, 9226 recon_luma_strd, 9227 pu1_final_ecd_data, 9228 trans_size, 9229 packed_pred_mode, 9230 ps_tu->b1_y_cbf, 9231 zero_cols, 9232 zero_rows); 9233 } 9234 else if( 9235 ps_recon_datastore->u1_is_lumaRecon_available && 9236 (UCHAR_MAX != ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr])) 9237 { 9238 UWORD8 *pu1_recon_src = 9239 ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs 9240 [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]]) + 9241 cu_pos_x_in_pix + cu_pos_y_in_pix * ps_recon_datastore->i4_lumaRecon_stride; 9242 9243 ps_ctxt->s_cmn_opt_func.pf_copy_2d( 9244 pu1_cur_luma_recon, 9245 recon_luma_strd, 9246 pu1_recon_src, 9247 ps_recon_datastore->i4_lumaRecon_stride, 9248 trans_size, 9249 trans_size); 9250 } 9251 } 9252 9253 if(ps_prms->u1_will_cabac_state_change) 9254 { 9255 ps_tu_enc_loop->i4_luma_coeff_offset = total_bytes; 9256 } 9257 9258 pu1_final_ecd_data += num_bytes; 9259 /* update total bytes consumed */ 9260 total_bytes += num_bytes; 9261 9262 u1_is_cu_coded |= ps_tu->b1_y_cbf; 9263 9264 /***************** Compute T,Q,IQ,IT & Recon for Chroma ********************/ 9265 if(1 == chrm_present_flag) 9266 { 9267 pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix; 9268 pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) + 9269 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd)); 9270 9271 pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix; 9272 pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) + 9273 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd)); 9274 9275 pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix; 9276 pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) + 9277 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd)); 9278 9279 pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix; 9280 pi2_cur_deq_data_chrm += ((cu_pos_y_in_pix >> 1) * cu_size) + 9281 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size)); 9282 9283 if(INCLUDE_CHROMA_DURING_TU_RECURSION && 9284 (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0) && 9285 (PRED_MODE_INTRA != packed_pred_mode)) 9286 { 9287 WORD32 i4_num_bytes; 9288 UWORD8 *pu1_chroma_pred; 9289 UWORD8 *pu1_chroma_recon; 9290 WORD16 *pi2_chroma_deq; 9291 UWORD32 u4_zero_col; 9292 UWORD32 u4_zero_row; 9293 9294 for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++) 9295 { 9296 WORD32 chroma_trans_size = MAX(4, trans_size >> 1); 9297 WORD32 i4_subtu_pos_x = cu_pos_x_in_pix; 9298 WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size); 9299 9300 if(0 == u1_is_422) 9301 { 9302 i4_subtu_pos_y >>= 1; 9303 } 9304 9305 pu1_chroma_pred = 9306 pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd); 9307 pu1_chroma_recon = pu1_cur_chroma_recon + 9308 (i4_subtu_idx * chroma_trans_size * recon_chrma_strd); 9309 pi2_chroma_deq = 9310 pi2_cur_deq_data_chrm + (i4_subtu_idx * chroma_trans_size * cu_size); 9311 9312 u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx]; 9313 u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx]; 9314 9315 if(ps_prms->u1_will_cabac_state_change) 9316 { 9317 i4_num_bytes = 9318 ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx]; 9319 } 9320 else 9321 { 9322 i4_num_bytes = 0; 9323 } 9324 9325 memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes); 9326 9327 pu1_old_ecd_data += i4_num_bytes; 9328 9329 au1_is_recon_available[U_PLANE] = 0; 9330 9331 if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data && 9332 (!u1_compute_spatial_ssd_chroma || 9333 (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma))) 9334 { 9335 if(!ps_recon_datastore->au1_is_chromaRecon_available[0] || 9336 (ps_recon_datastore->au1_is_chromaRecon_available[0] && 9337 (UCHAR_MAX == 9338 ps_recon_datastore 9339 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))) 9340 { 9341 ihevce_chroma_it_recon_fxn( 9342 ps_ctxt, 9343 pi2_chroma_deq, 9344 cu_size, 9345 pu1_chroma_pred, 9346 pred_chrm_strd, 9347 pu1_chroma_recon, 9348 recon_chrma_strd, 9349 pu1_final_ecd_data, 9350 chroma_trans_size, 9351 (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1, 9352 u4_zero_col, 9353 u4_zero_row, 9354 U_PLANE); 9355 } 9356 else if( 9357 ps_recon_datastore->au1_is_chromaRecon_available[0] && 9358 (UCHAR_MAX != 9359 ps_recon_datastore 9360 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])) 9361 { 9362 UWORD8 *pu1_recon_src = 9363 ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs 9364 [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon 9365 [U_PLANE][ctr][i4_subtu_idx]]) + 9366 i4_subtu_pos_x + 9367 i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride; 9368 9369 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( 9370 pu1_recon_src, 9371 ps_recon_datastore->i4_lumaRecon_stride, 9372 pu1_chroma_recon, 9373 recon_chrma_strd, 9374 chroma_trans_size, 9375 chroma_trans_size, 9376 U_PLANE); 9377 } 9378 } 9379 9380 u1_is_cu_coded |= 9381 ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf); 9382 9383 pu1_final_ecd_data += i4_num_bytes; 9384 total_bytes += i4_num_bytes; 9385 } 9386 9387 for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++) 9388 { 9389 WORD32 chroma_trans_size = MAX(4, trans_size >> 1); 9390 WORD32 i4_subtu_pos_x = cu_pos_x_in_pix; 9391 WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size); 9392 9393 if(0 == u1_is_422) 9394 { 9395 i4_subtu_pos_y >>= 1; 9396 } 9397 9398 pu1_chroma_pred = 9399 pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd); 9400 pu1_chroma_recon = pu1_cur_chroma_recon + 9401 (i4_subtu_idx * chroma_trans_size * recon_chrma_strd); 9402 pi2_chroma_deq = pi2_cur_deq_data_chrm + 9403 (i4_subtu_idx * chroma_trans_size * cu_size) + 9404 chroma_trans_size; 9405 9406 u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx]; 9407 u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx]; 9408 9409 if(ps_prms->u1_will_cabac_state_change) 9410 { 9411 i4_num_bytes = 9412 ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx]; 9413 } 9414 else 9415 { 9416 i4_num_bytes = 0; 9417 } 9418 9419 memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes); 9420 9421 pu1_old_ecd_data += i4_num_bytes; 9422 9423 au1_is_recon_available[V_PLANE] = 0; 9424 9425 if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data && 9426 (!u1_compute_spatial_ssd_chroma || 9427 (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma))) 9428 { 9429 if(!ps_recon_datastore->au1_is_chromaRecon_available[0] || 9430 (ps_recon_datastore->au1_is_chromaRecon_available[0] && 9431 (UCHAR_MAX == 9432 ps_recon_datastore 9433 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))) 9434 { 9435 ihevce_chroma_it_recon_fxn( 9436 ps_ctxt, 9437 pi2_chroma_deq, 9438 cu_size, 9439 pu1_chroma_pred, 9440 pred_chrm_strd, 9441 pu1_chroma_recon, 9442 recon_chrma_strd, 9443 pu1_final_ecd_data, 9444 chroma_trans_size, 9445 (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1, 9446 u4_zero_col, 9447 u4_zero_row, 9448 V_PLANE); 9449 } 9450 else if( 9451 ps_recon_datastore->au1_is_chromaRecon_available[0] && 9452 (UCHAR_MAX != 9453 ps_recon_datastore 9454 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])) 9455 { 9456 UWORD8 *pu1_recon_src = 9457 ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs 9458 [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon 9459 [V_PLANE][ctr][i4_subtu_idx]]) + 9460 i4_subtu_pos_x + 9461 i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride; 9462 9463 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( 9464 pu1_recon_src, 9465 ps_recon_datastore->i4_lumaRecon_stride, 9466 pu1_chroma_recon, 9467 recon_chrma_strd, 9468 chroma_trans_size, 9469 chroma_trans_size, 9470 V_PLANE); 9471 } 9472 } 9473 9474 u1_is_cu_coded |= 9475 ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf); 9476 9477 pu1_final_ecd_data += i4_num_bytes; 9478 total_bytes += i4_num_bytes; 9479 } 9480 } 9481 else 9482 { 9483 WORD32 cb_zero_col, cb_zero_row, cr_zero_col, cr_zero_row; 9484 9485 for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++) 9486 { 9487 WORD32 cb_cbf, cr_cbf; 9488 WORD32 cb_num_bytes, cr_num_bytes; 9489 9490 WORD32 chroma_trans_size = MAX(4, trans_size >> 1); 9491 9492 WORD32 i4_subtu_pos_x = cu_pos_x_in_pix; 9493 WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size); 9494 9495 if(0 == u1_is_422) 9496 { 9497 i4_subtu_pos_y >>= 1; 9498 } 9499 9500 pu1_cur_src_chrm += (i4_subtu_idx * chroma_trans_size * src_chrm_strd); 9501 pu1_cur_pred_chrm += (i4_subtu_idx * chroma_trans_size * pred_chrm_strd); 9502 pu1_cur_chroma_recon += (i4_subtu_idx * chroma_trans_size * recon_chrma_strd); 9503 pi2_cur_deq_data_chrm += (i4_subtu_idx * chroma_trans_size * cu_size); 9504 9505 if((PRED_MODE_INTRA == packed_pred_mode) && 9506 (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data)) 9507 { 9508 WORD32 nbr_flags, left_strd_chrm, chrm_pred_func_idx; 9509 UWORD8 *pu1_left_chrm; 9510 UWORD8 *pu1_top_chrm; 9511 UWORD8 *pu1_top_left_chrm; 9512 9513 nbr_flags = ihevce_get_intra_chroma_tu_nbr( 9514 *pu4_nbr_flags, i4_subtu_idx, chroma_trans_size, u1_is_422); 9515 9516 /* left cu boundary */ 9517 if(0 == i4_subtu_pos_x) 9518 { 9519 left_strd_chrm = ps_chrm_cu_buf_prms->i4_cu_left_stride; 9520 pu1_left_chrm = 9521 ps_chrm_cu_buf_prms->pu1_cu_left + i4_subtu_pos_y * left_strd_chrm; 9522 } 9523 else 9524 { 9525 pu1_left_chrm = pu1_cur_chroma_recon - 2; 9526 left_strd_chrm = recon_chrma_strd; 9527 } 9528 9529 /* top cu boundary */ 9530 if(0 == i4_subtu_pos_y) 9531 { 9532 pu1_top_chrm = ps_chrm_cu_buf_prms->pu1_cu_top + i4_subtu_pos_x; 9533 } 9534 else 9535 { 9536 pu1_top_chrm = pu1_cur_chroma_recon - recon_chrma_strd; 9537 } 9538 9539 /* by default top left is set to cu top left */ 9540 pu1_top_left_chrm = ps_chrm_cu_buf_prms->pu1_cu_top_left; 9541 9542 /* top left based on position */ 9543 if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x)) 9544 { 9545 pu1_top_left_chrm = pu1_left_chrm - left_strd_chrm; 9546 } 9547 else if(0 != i4_subtu_pos_x) 9548 { 9549 pu1_top_left_chrm = pu1_top_chrm - 2; 9550 } 9551 9552 /* call the chroma reference array substitution */ 9553 ihevc_intra_pred_chroma_ref_substitution_fptr( 9554 pu1_top_left_chrm, 9555 pu1_top_chrm, 9556 pu1_left_chrm, 9557 left_strd_chrm, 9558 chroma_trans_size, 9559 nbr_flags, 9560 (UWORD8 *)ps_ctxt->pv_ref_sub_out, 9561 1); 9562 9563 /* use the look up to get the function idx */ 9564 chrm_pred_func_idx = g_i4_ip_funcs[chroma_pred_mode]; 9565 9566 /* call the intra prediction function */ 9567 ps_ctxt->apf_chrm_ip[chrm_pred_func_idx]( 9568 (UWORD8 *)ps_ctxt->pv_ref_sub_out, 9569 1, 9570 pu1_cur_pred_chrm, 9571 pred_chrm_strd, 9572 chroma_trans_size, 9573 chroma_pred_mode); 9574 } 9575 9576 /**---------- Compute iq&coeff data if required : Chroma ------------**/ 9577 if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data) 9578 { 9579 WORD32 perform_sbh, perform_rdoq, temp_bits; 9580 9581 if(ps_prms->u1_recompute_sbh_and_rdoq) 9582 { 9583 perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH); 9584 perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ); 9585 } 9586 else 9587 { 9588 /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/ 9589 perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh; 9590 /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean 9591 we would have to do RDOQ again.*/ 9592 perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq; 9593 } 9594 9595 /* populate the coeffs scan idx */ 9596 ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT; 9597 9598 if(PRED_MODE_INTRA == packed_pred_mode) 9599 { 9600 /* for 4x4 transforms based on intra pred mode scan is choosen*/ 9601 if(4 == chroma_trans_size) 9602 { 9603 /* for modes from 22 upto 30 horizontal scan is used */ 9604 if((chroma_pred_mode > 21) && (chroma_pred_mode < 31)) 9605 { 9606 ps_ctxt->i4_scan_idx = SCAN_HORZ; 9607 } 9608 /* for modes from 6 upto 14 horizontal scan is used */ 9609 else if((chroma_pred_mode > 5) && (chroma_pred_mode < 15)) 9610 { 9611 ps_ctxt->i4_scan_idx = SCAN_VERT; 9612 } 9613 } 9614 } 9615 9616 #if DISABLE_RDOQ_INTRA 9617 if(PRED_MODE_INTRA == packed_pred_mode) 9618 { 9619 perform_rdoq = 0; 9620 } 9621 #endif 9622 9623 /* RDOPT copy States : TU init (best until prev TU) to current */ 9624 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( 9625 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] 9626 .s_cabac_ctxt.au1_ctxt_models[0] + 9627 IHEVC_CAB_COEFFX_PREFIX, 9628 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, 9629 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); 9630 9631 ASSERT(rd_opt_best_idx == ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx); 9632 /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled 9633 so that all candidates and best candidate are quantized with same rounding factor */ 9634 if(1 == perform_rdoq) 9635 { 9636 ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING); 9637 } 9638 9639 if(!ps_best_cu_prms->u1_skip_flag || 9640 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt) 9641 { 9642 /* Cb */ 9643 cb_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn( 9644 ps_ctxt, 9645 pu1_cur_pred_chrm, 9646 pred_chrm_strd, 9647 pu1_cur_src_chrm, 9648 src_chrm_strd, 9649 pi2_cur_deq_data_chrm, 9650 cu_size, 9651 pu1_chrm_recon, 9652 recon_chrma_strd, 9653 pu1_final_ecd_data, 9654 pu1_csbf_buf, 9655 csbf_strd, 9656 chroma_trans_size, 9657 ps_ctxt->i4_scan_idx, 9658 (PRED_MODE_INTRA == packed_pred_mode), 9659 &cb_num_bytes, 9660 &temp_bits, 9661 &cb_zero_col, 9662 &cb_zero_row, 9663 &au1_is_recon_available[U_PLANE], 9664 perform_sbh, 9665 perform_rdoq, 9666 &i8_ssd, 9667 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 9668 !ps_ctxt->u1_is_refPic 9669 ? ALPHA_FOR_NOISE_TERM_IN_RDOPT 9670 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * 9671 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) / 9672 100.0, 9673 ps_prms->u1_is_cu_noisy, 9674 #endif 9675 ps_best_cu_prms->u1_skip_flag && 9676 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt, 9677 u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD 9678 : FREQUENCY_DOMAIN_SSD, 9679 U_PLANE); 9680 } 9681 else 9682 { 9683 cb_cbf = 0; 9684 temp_bits = 0; 9685 cb_num_bytes = 0; 9686 au1_is_recon_available[U_PLANE] = 0; 9687 cb_zero_col = 0; 9688 cb_zero_row = 0; 9689 } 9690 9691 /* Accumulate chroma residual bits */ 9692 ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits; 9693 9694 /* RDOPT copy States : New updated after curr TU to TU init */ 9695 if(0 != cb_cbf) 9696 { 9697 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( 9698 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, 9699 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] 9700 .s_cabac_ctxt.au1_ctxt_models[0] + 9701 IHEVC_CAB_COEFFX_PREFIX, 9702 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); 9703 } 9704 /* RDOPT copy States : Restoring back the Cb init state to Cr */ 9705 else 9706 { 9707 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( 9708 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] 9709 .s_cabac_ctxt.au1_ctxt_models[0] + 9710 IHEVC_CAB_COEFFX_PREFIX, 9711 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, 9712 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); 9713 } 9714 9715 if(!ps_best_cu_prms->u1_skip_flag || 9716 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt) 9717 { 9718 /* Cr */ 9719 cr_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn( 9720 ps_ctxt, 9721 pu1_cur_pred_chrm, 9722 pred_chrm_strd, 9723 pu1_cur_src_chrm, 9724 src_chrm_strd, 9725 pi2_cur_deq_data_chrm + chroma_trans_size, 9726 cu_size, 9727 pu1_chrm_recon, 9728 recon_chrma_strd, 9729 pu1_final_ecd_data + cb_num_bytes, 9730 pu1_csbf_buf, 9731 csbf_strd, 9732 chroma_trans_size, 9733 ps_ctxt->i4_scan_idx, 9734 (PRED_MODE_INTRA == packed_pred_mode), 9735 &cr_num_bytes, 9736 &temp_bits, 9737 &cr_zero_col, 9738 &cr_zero_row, 9739 &au1_is_recon_available[V_PLANE], 9740 perform_sbh, 9741 perform_rdoq, 9742 &i8_ssd, 9743 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 9744 !ps_ctxt->u1_is_refPic 9745 ? ALPHA_FOR_NOISE_TERM_IN_RDOPT 9746 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * 9747 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) / 9748 100.0, 9749 ps_prms->u1_is_cu_noisy, 9750 #endif 9751 ps_best_cu_prms->u1_skip_flag && 9752 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt, 9753 u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD 9754 : FREQUENCY_DOMAIN_SSD, 9755 V_PLANE); 9756 } 9757 else 9758 { 9759 cr_cbf = 0; 9760 temp_bits = 0; 9761 cr_num_bytes = 0; 9762 au1_is_recon_available[V_PLANE] = 0; 9763 cr_zero_col = 0; 9764 cr_zero_row = 0; 9765 } 9766 9767 /* Accumulate chroma residual bits */ 9768 ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits; 9769 9770 /* RDOPT copy States : New updated after curr TU to TU init */ 9771 if(0 != cr_cbf) 9772 { 9773 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( 9774 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, 9775 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] 9776 .s_cabac_ctxt.au1_ctxt_models[0] + 9777 IHEVC_CAB_COEFFX_PREFIX, 9778 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); 9779 } 9780 9781 if(0 == i4_subtu_idx) 9782 { 9783 ps_tu->b1_cb_cbf = cb_cbf; 9784 ps_tu->b1_cr_cbf = cr_cbf; 9785 } 9786 else 9787 { 9788 ps_tu->b1_cb_cbf_subtu1 = cb_cbf; 9789 ps_tu->b1_cr_cbf_subtu1 = cr_cbf; 9790 } 9791 } 9792 else 9793 { 9794 cb_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx]; 9795 cb_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx]; 9796 cr_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx]; 9797 cr_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx]; 9798 9799 if(ps_prms->u1_will_cabac_state_change) 9800 { 9801 cb_num_bytes = 9802 ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx]; 9803 } 9804 else 9805 { 9806 cb_num_bytes = 0; 9807 } 9808 9809 if(ps_prms->u1_will_cabac_state_change) 9810 { 9811 cr_num_bytes = 9812 ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx]; 9813 } 9814 else 9815 { 9816 cr_num_bytes = 0; 9817 } 9818 9819 /* copy cb ecd data to final buffer */ 9820 memcpy(pu1_final_ecd_data, pu1_chrm_old_ecd_data, cb_num_bytes); 9821 9822 pu1_chrm_old_ecd_data += cb_num_bytes; 9823 9824 /* copy cb ecd data to final buffer */ 9825 memcpy( 9826 (pu1_final_ecd_data + cb_num_bytes), 9827 pu1_chrm_old_ecd_data, 9828 cr_num_bytes); 9829 9830 pu1_chrm_old_ecd_data += cr_num_bytes; 9831 9832 au1_is_recon_available[U_PLANE] = 0; 9833 au1_is_recon_available[V_PLANE] = 0; 9834 } 9835 9836 /**-------- Compute Recon data (Do IT & Recon) : Chroma -----------**/ 9837 if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data && 9838 (!u1_compute_spatial_ssd_chroma || 9839 (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma))) 9840 { 9841 if(!ps_recon_datastore->au1_is_chromaRecon_available[0] || 9842 (ps_recon_datastore->au1_is_chromaRecon_available[0] && 9843 (UCHAR_MAX == 9844 ps_recon_datastore 9845 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))) 9846 { 9847 ihevce_chroma_it_recon_fxn( 9848 ps_ctxt, 9849 pi2_cur_deq_data_chrm, 9850 cu_size, 9851 pu1_cur_pred_chrm, 9852 pred_chrm_strd, 9853 pu1_cur_chroma_recon, 9854 recon_chrma_strd, 9855 pu1_final_ecd_data, 9856 chroma_trans_size, 9857 (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1, 9858 cb_zero_col, 9859 cb_zero_row, 9860 U_PLANE); 9861 } 9862 else if( 9863 ps_recon_datastore->au1_is_chromaRecon_available[0] && 9864 (UCHAR_MAX != 9865 ps_recon_datastore 9866 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])) 9867 { 9868 UWORD8 *pu1_recon_src = 9869 ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs 9870 [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon 9871 [U_PLANE][ctr][i4_subtu_idx]]) + 9872 i4_subtu_pos_x + 9873 i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride; 9874 9875 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( 9876 pu1_recon_src, 9877 ps_recon_datastore->i4_lumaRecon_stride, 9878 pu1_cur_chroma_recon, 9879 recon_chrma_strd, 9880 chroma_trans_size, 9881 chroma_trans_size, 9882 U_PLANE); 9883 } 9884 } 9885 9886 u1_is_cu_coded |= 9887 ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf); 9888 9889 if(ps_prms->u1_will_cabac_state_change) 9890 { 9891 ps_tu_enc_loop->ai4_cb_coeff_offset[i4_subtu_idx] = total_bytes; 9892 } 9893 9894 pu1_final_ecd_data += cb_num_bytes; 9895 /* update total bytes consumed */ 9896 total_bytes += cb_num_bytes; 9897 9898 if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data && 9899 (!u1_compute_spatial_ssd_chroma || 9900 (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma))) 9901 { 9902 if(!ps_recon_datastore->au1_is_chromaRecon_available[0] || 9903 (ps_recon_datastore->au1_is_chromaRecon_available[0] && 9904 (UCHAR_MAX == 9905 ps_recon_datastore 9906 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))) 9907 { 9908 ihevce_chroma_it_recon_fxn( 9909 ps_ctxt, 9910 pi2_cur_deq_data_chrm + chroma_trans_size, 9911 cu_size, 9912 pu1_cur_pred_chrm, 9913 pred_chrm_strd, 9914 pu1_cur_chroma_recon, 9915 recon_chrma_strd, 9916 pu1_final_ecd_data, 9917 chroma_trans_size, 9918 (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1, 9919 cr_zero_col, 9920 cr_zero_row, 9921 V_PLANE); 9922 } 9923 else if( 9924 ps_recon_datastore->au1_is_chromaRecon_available[0] && 9925 (UCHAR_MAX != 9926 ps_recon_datastore 9927 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])) 9928 { 9929 UWORD8 *pu1_recon_src = 9930 ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs 9931 [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon 9932 [V_PLANE][ctr][i4_subtu_idx]]) + 9933 i4_subtu_pos_x + 9934 i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride; 9935 9936 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( 9937 pu1_recon_src, 9938 ps_recon_datastore->i4_lumaRecon_stride, 9939 pu1_cur_chroma_recon, 9940 recon_chrma_strd, 9941 chroma_trans_size, 9942 chroma_trans_size, 9943 V_PLANE); 9944 } 9945 } 9946 9947 u1_is_cu_coded |= 9948 ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf); 9949 9950 if(ps_prms->u1_will_cabac_state_change) 9951 { 9952 ps_tu_enc_loop->ai4_cr_coeff_offset[i4_subtu_idx] = total_bytes; 9953 } 9954 9955 pu1_final_ecd_data += cr_num_bytes; 9956 /* update total bytes consumed */ 9957 total_bytes += cr_num_bytes; 9958 } 9959 } 9960 } 9961 else 9962 { 9963 ps_tu_enc_loop->ai4_cb_coeff_offset[0] = total_bytes; 9964 ps_tu_enc_loop->ai4_cr_coeff_offset[0] = total_bytes; 9965 ps_tu_enc_loop->ai4_cb_coeff_offset[1] = total_bytes; 9966 ps_tu_enc_loop->ai4_cr_coeff_offset[1] = total_bytes; 9967 ps_tu->b1_cb_cbf = 0; 9968 ps_tu->b1_cr_cbf = 0; 9969 ps_tu->b1_cb_cbf_subtu1 = 0; 9970 ps_tu->b1_cr_cbf_subtu1 = 0; 9971 } 9972 9973 /* Update to next TU */ 9974 ps_tu_enc_loop++; 9975 ps_tu_enc_loop_temp_prms++; 9976 9977 pu4_nbr_flags++; 9978 pu1_intra_pred_mode++; 9979 9980 /*Do not set the nbr map for last pu in cu */ 9981 if((num_tu_in_cu - 1) != ctr) 9982 { 9983 /* set the neighbour map to 1 */ 9984 ihevce_set_nbr_map( 9985 ps_ctxt->pu1_ctb_nbr_map, 9986 ps_ctxt->i4_nbr_map_strd, 9987 cu_pos_x_in_4x4, 9988 cu_pos_y_in_4x4, 9989 (trans_size >> 2), 9990 1); 9991 } 9992 } 9993 9994 if(ps_prms->u1_will_cabac_state_change) 9995 { 9996 ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded; 9997 9998 /* Modify skip flag, if luma is skipped & Chroma is coded */ 9999 if((1 == u1_is_cu_coded) && (PRED_MODE_SKIP == packed_pred_mode)) 10000 { 10001 ps_best_cu_prms->u1_skip_flag = 0; 10002 } 10003 } 10004 10005 /* during chroma evaluation if skip decision was over written */ 10006 /* then the current skip candidate is set to a non skip candidate */ 10007 if(PRED_MODE_INTRA != packed_pred_mode) 10008 { 10009 ps_best_inter_cand->b1_skip_flag = ps_best_cu_prms->u1_skip_flag; 10010 } 10011 10012 /**------------- Compute header data if required --------------**/ 10013 if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data) 10014 { 10015 WORD32 cbf_bits; 10016 WORD32 cu_bits; 10017 WORD32 unit_4x4_size = cu_size >> 2; 10018 10019 /*Restoring the running reference into the best rdopt_ctxt cabac states which will then 10020 be copied as the base reference for the next cu 10021 Assumption : We are ensuring that the u1_eval_header_data flag is set to 1 only if either 10022 luma and chroma are being reevaluated*/ 10023 COPY_CABAC_STATES( 10024 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] 10025 .s_cabac_ctxt.au1_ctxt_models[0], 10026 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 10027 IHEVC_CAB_CTXT_END); 10028 10029 /* get the neighbour availability flags for current cu */ 10030 ihevce_get_only_nbr_flag( 10031 &s_nbr, 10032 ps_ctxt->pu1_ctb_nbr_map, 10033 ps_ctxt->i4_nbr_map_strd, 10034 (cu_pos_x << 1), 10035 (cu_pos_y << 1), 10036 unit_4x4_size, 10037 unit_4x4_size); 10038 10039 cu_bits = ihevce_entropy_rdo_encode_cu( 10040 &ps_ctxt->s_rdopt_entropy_ctxt, 10041 ps_best_cu_prms, 10042 cu_pos_x, 10043 cu_pos_y, 10044 cu_size, 10045 ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail 10046 : s_nbr.u1_top_avail, 10047 s_nbr.u1_left_avail, 10048 (pu1_final_ecd_data - total_bytes), 10049 &cbf_bits); 10050 10051 /* cbf bits are excluded from header bits, instead considered as texture bits */ 10052 ps_best_cu_prms->u4_cu_hdr_bits = cu_bits - cbf_bits; 10053 ps_best_cu_prms->u4_cu_cbf_bits = cbf_bits; 10054 } 10055 10056 if(ps_prms->u1_will_cabac_state_change) 10057 { 10058 ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes; 10059 } 10060 } 10061 10062 /*! 10063 ****************************************************************************** 10064 * \if Function name : ihevce_set_eval_flags \endif 10065 * 10066 * \brief 10067 * Function which decides which eval flags have to be set based on present 10068 * and RDOQ conditions 10069 * 10070 * \param[in] ps_ctxt : encoder ctxt pointer 10071 * \param[in] enc_loop_cu_final_prms_t : pointer to final cu params 10072 * 10073 * \return 10074 * None 10075 * 10076 * \author 10077 * Ittiam 10078 * 10079 ***************************************************************************** 10080 */ 10081 void ihevce_set_eval_flags( 10082 ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_final_prms_t *ps_enc_loop_bestprms) 10083 { 10084 WORD32 count = 0; 10085 10086 ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0; 10087 10088 ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data = 10089 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt; 10090 10091 if(ps_ctxt->u1_disable_intra_eval && (!(ps_ctxt->i4_deblk_pad_hpel_cur_pic & 0x1))) 10092 { 10093 ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 0; 10094 } 10095 else 10096 { 10097 ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1; 10098 } 10099 10100 if((1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq) || 10101 (1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh)) 10102 { 10103 /* When rdoq is enabled only for the best candidate, in case of in Intra nTU 10104 RDOQ might have altered the coeffs of the neighbour CU. As a result, the pred 10105 for the current CU will change. Therefore, we need to reevaluate the pred data*/ 10106 if((ps_enc_loop_bestprms->u2_num_tus_in_cu > 1) && 10107 (ps_enc_loop_bestprms->u1_intra_flag == 1)) 10108 { 10109 ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 1; 10110 ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data = 1; 10111 } 10112 if(ps_enc_loop_bestprms->u1_skip_flag == 1) 10113 { 10114 for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++) 10115 { 10116 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] 10117 .b1_eval_luma_iq_and_coeff_data = 0; 10118 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] 10119 .b1_eval_chroma_iq_and_coeff_data = 0; 10120 } 10121 } 10122 else 10123 { 10124 for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++) 10125 { 10126 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] 10127 .b1_eval_luma_iq_and_coeff_data = 1; 10128 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] 10129 .b1_eval_chroma_iq_and_coeff_data = 1; 10130 } 10131 } 10132 } 10133 else 10134 { 10135 switch(ps_ctxt->i4_quality_preset) 10136 { 10137 case IHEVCE_QUALITY_P0: 10138 case IHEVCE_QUALITY_P2: 10139 case IHEVCE_QUALITY_P3: 10140 { 10141 for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++) 10142 { 10143 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] 10144 .b1_eval_luma_iq_and_coeff_data = 0; 10145 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] 10146 .b1_eval_chroma_iq_and_coeff_data = 10147 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt; 10148 } 10149 10150 break; 10151 } 10152 case IHEVCE_QUALITY_P4: 10153 case IHEVCE_QUALITY_P5: 10154 { 10155 for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++) 10156 { 10157 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] 10158 .b1_eval_luma_iq_and_coeff_data = 0; 10159 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] 10160 .b1_eval_chroma_iq_and_coeff_data = 10161 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt; 10162 } 10163 10164 break; 10165 } 10166 case IHEVCE_QUALITY_P6: 10167 { 10168 for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++) 10169 { 10170 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] 10171 .b1_eval_luma_iq_and_coeff_data = 0; 10172 #if !ENABLE_CHROMA_TRACKING_OF_LUMA_CBF_IN_XS25 10173 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] 10174 .b1_eval_chroma_iq_and_coeff_data = 10175 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt; 10176 #else 10177 if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_temporal_layer_id > 1) && 10178 (ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b3_size >= 2)) 10179 { 10180 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] 10181 .b1_eval_chroma_iq_and_coeff_data = 10182 ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b1_y_cbf; 10183 } 10184 else 10185 { 10186 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] 10187 .b1_eval_chroma_iq_and_coeff_data = 10188 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt; 10189 } 10190 #endif 10191 } 10192 10193 break; 10194 } 10195 default: 10196 { 10197 break; 10198 } 10199 } 10200 } 10201 10202 /* Not recomputing Luma pred-data and header data for any preset now */ 10203 ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1; 10204 } 10205 10206 /** 10207 ****************************************************************************** 10208 * 10209 * @brief Shrink's TU tree of inter CUs by merging redundnant child nodes 10210 * (not coded children) into a parent node(not coded). 10211 * 10212 * @par Description 10213 * This is required post RDO evaluation as TU decisions are 10214 * pre-determined(pre RDO) based on recursive SATD, 10215 * while the quad children TU's can be skipped during RDO 10216 * 10217 * The shrink process is applied iteratively till there are no 10218 * more modes to shrink 10219 * 10220 * @param[inout] ps_tu_enc_loop 10221 * pointer to tu enc loop params of inter cu 10222 * 10223 * @param[inout] ps_tu_enc_loop_temp_prms 10224 * pointer to temp tu enc loop params of inter cu 10225 * 10226 * @param[in] num_tu_in_cu 10227 * number of tus in cu 10228 * 10229 * @return modified number of tus in cu 10230 * 10231 ****************************************************************************** 10232 */ 10233 WORD32 ihevce_shrink_inter_tu_tree( 10234 tu_enc_loop_out_t *ps_tu_enc_loop, 10235 tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms, 10236 recon_datastore_t *ps_recon_datastore, 10237 WORD32 num_tu_in_cu, 10238 UWORD8 u1_is_422) 10239 { 10240 WORD32 recurse = 1; 10241 WORD32 ctr; 10242 10243 /* ------------- Quadtree TU Split Transform flag optimization ------------ */ 10244 /* Post RDO, if all 4 child nodes are not coded the overheads of split TU */ 10245 /* flags and cbf flags are saved by merging to parent node and marking */ 10246 /* parent TU as not coded */ 10247 /* */ 10248 /* ParentTUSplit=1 */ 10249 /* | */ 10250 /* --------------------------------------------------------- */ 10251 /* |C0(Not coded) | C1(Not coded) | C2(Not coded) | C3(Not coded) */ 10252 /* || */ 10253 /* \/ */ 10254 /* */ 10255 /* ParentTUSplit=0 (Not Coded) */ 10256 /* */ 10257 /* ------------- Quadtree TU Split Transform flag optimization ------------ */ 10258 while((num_tu_in_cu > 4) && recurse) 10259 { 10260 recurse = 0; 10261 10262 /* Validate inter CU */ 10263 //ASSERT(ps_tu_enc_loop[0].s_tu.s_tu.b1_intra_flag == 0); /*b1_intra_flag no longer a member of tu structure */ 10264 10265 /* loop for all tu blocks in current cu */ 10266 for(ctr = 0; ctr < num_tu_in_cu;) 10267 { 10268 /* Get current tu posx, posy and size */ 10269 WORD32 curr_pos_x = ps_tu_enc_loop[ctr].s_tu.b4_pos_x << 2; 10270 WORD32 curr_pos_y = ps_tu_enc_loop[ctr].s_tu.b4_pos_y << 2; 10271 /* +1 is for parents size */ 10272 WORD32 parent_tu_size = 1 << (ps_tu_enc_loop[ctr].s_tu.b3_size + 2 + 1); 10273 10274 /* eval merge if leaf nodes reached i.e all child tus are of same size and first tu pos is same as parent pos */ 10275 WORD32 eval_merge = ((curr_pos_x & (parent_tu_size - 1)) == 0); 10276 eval_merge &= ((curr_pos_y & (parent_tu_size - 1)) == 0); 10277 10278 /* As TUs are published in encode order (Z SCAN), */ 10279 /* Four consecutive TUS of same size implies we have hit leaf nodes. */ 10280 if(((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 1].s_tu.b3_size)) && 10281 ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 2].s_tu.b3_size)) && 10282 ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 3].s_tu.b3_size)) && 10283 eval_merge) 10284 { 10285 WORD32 merge_parent = 1; 10286 10287 /* If any leaf noded is coded, it cannot be merged to parent */ 10288 if((ps_tu_enc_loop[ctr].s_tu.b1_y_cbf) || (ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf) || 10289 (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf) || 10290 10291 (ps_tu_enc_loop[ctr + 1].s_tu.b1_y_cbf) || 10292 (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf) || 10293 (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf) || 10294 10295 (ps_tu_enc_loop[ctr + 2].s_tu.b1_y_cbf) || 10296 (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf) || 10297 (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf) || 10298 10299 (ps_tu_enc_loop[ctr + 3].s_tu.b1_y_cbf) || 10300 (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf) || 10301 (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf)) 10302 { 10303 merge_parent = 0; 10304 } 10305 10306 if(u1_is_422) 10307 { 10308 if((ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1) || 10309 (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1) || 10310 10311 (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf_subtu1) || 10312 (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf_subtu1) || 10313 10314 (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf_subtu1) || 10315 (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf_subtu1) || 10316 10317 (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf_subtu1) || 10318 (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf_subtu1)) 10319 { 10320 merge_parent = 0; 10321 } 10322 } 10323 10324 if(merge_parent) 10325 { 10326 /* Merge all the children (ctr,ctr+1,ctr+2,ctr+3) to parent (ctr) */ 10327 10328 if(ps_recon_datastore->u1_is_lumaRecon_available) 10329 { 10330 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX; 10331 10332 memmove( 10333 &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 1], 10334 &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 4], 10335 (num_tu_in_cu - ctr - 4) * sizeof(UWORD8)); 10336 } 10337 10338 if(ps_recon_datastore->au1_is_chromaRecon_available[0]) 10339 { 10340 ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][0] = 10341 UCHAR_MAX; 10342 ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][0] = 10343 UCHAR_MAX; 10344 10345 memmove( 10346 &ps_recon_datastore 10347 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][0], 10348 &ps_recon_datastore 10349 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][0], 10350 (num_tu_in_cu - ctr - 4) * sizeof(UWORD8)); 10351 10352 memmove( 10353 &ps_recon_datastore 10354 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][0], 10355 &ps_recon_datastore 10356 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][0], 10357 (num_tu_in_cu - ctr - 4) * sizeof(UWORD8)); 10358 10359 if(u1_is_422) 10360 { 10361 ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][1] = 10362 UCHAR_MAX; 10363 ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][1] = 10364 UCHAR_MAX; 10365 10366 memmove( 10367 &ps_recon_datastore 10368 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][1], 10369 &ps_recon_datastore 10370 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][1], 10371 (num_tu_in_cu - ctr - 4) * sizeof(UWORD8)); 10372 10373 memmove( 10374 &ps_recon_datastore 10375 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][1], 10376 &ps_recon_datastore 10377 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][1], 10378 (num_tu_in_cu - ctr - 4) * sizeof(UWORD8)); 10379 } 10380 } 10381 10382 /* Parent node size is one more than that of child */ 10383 ps_tu_enc_loop[ctr].s_tu.b3_size++; 10384 10385 ctr++; 10386 10387 /* move the subsequent TUs to next element */ 10388 ASSERT(num_tu_in_cu >= (ctr + 3)); 10389 memmove( 10390 (void *)(ps_tu_enc_loop + ctr), 10391 (void *)(ps_tu_enc_loop + ctr + 3), 10392 (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_out_t)); 10393 10394 /* Also memmove the temp TU params */ 10395 memmove( 10396 (void *)(ps_tu_enc_loop_temp_prms + ctr), 10397 (void *)(ps_tu_enc_loop_temp_prms + ctr + 3), 10398 (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_temp_prms_t)); 10399 10400 /* Number of TUs in CU are now less by 3 */ 10401 num_tu_in_cu -= 3; 10402 10403 /* Recurse again as new parent also be can be merged later */ 10404 recurse = 1; 10405 } 10406 else 10407 { 10408 /* Go to next set of leaf nodes */ 10409 ctr += 4; 10410 } 10411 } 10412 else 10413 { 10414 ctr++; 10415 } 10416 } 10417 } 10418 10419 /* return the modified num TUs*/ 10420 ASSERT(num_tu_in_cu > 0); 10421 return (num_tu_in_cu); 10422 } 10423 10424 UWORD8 ihevce_intra_mode_nxn_hash_updater( 10425 UWORD8 *pu1_mode_array, UWORD8 *pu1_hash_table, UWORD8 u1_num_ipe_modes) 10426 { 10427 WORD32 i; 10428 WORD32 i4_mode; 10429 10430 for(i = 0; i < MAX_INTRA_CU_CANDIDATES; i++) 10431 { 10432 if(pu1_mode_array[i] < 35) 10433 { 10434 if(pu1_mode_array[i] != 0) 10435 { 10436 i4_mode = pu1_mode_array[i] - 1; 10437 10438 if(!pu1_hash_table[i4_mode]) 10439 { 10440 pu1_hash_table[i4_mode] = 1; 10441 pu1_mode_array[u1_num_ipe_modes] = i4_mode; 10442 u1_num_ipe_modes++; 10443 } 10444 } 10445 10446 if(pu1_mode_array[i] != 34) 10447 { 10448 i4_mode = pu1_mode_array[i] + 1; 10449 10450 if((!pu1_hash_table[i4_mode])) 10451 { 10452 pu1_hash_table[i4_mode] = 1; 10453 pu1_mode_array[u1_num_ipe_modes] = i4_mode; 10454 u1_num_ipe_modes++; 10455 } 10456 } 10457 } 10458 } 10459 10460 if(!pu1_hash_table[INTRA_PLANAR]) 10461 { 10462 pu1_hash_table[INTRA_PLANAR] = 1; 10463 pu1_mode_array[u1_num_ipe_modes] = INTRA_PLANAR; 10464 u1_num_ipe_modes++; 10465 } 10466 10467 if(!pu1_hash_table[INTRA_DC]) 10468 { 10469 pu1_hash_table[INTRA_DC] = 1; 10470 pu1_mode_array[u1_num_ipe_modes] = INTRA_DC; 10471 u1_num_ipe_modes++; 10472 } 10473 10474 return u1_num_ipe_modes; 10475 } 10476 10477 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT 10478 WORD32 ihevce_determine_tu_tree_distribution( 10479 cu_inter_cand_t *ps_cu_data, 10480 me_func_selector_t *ps_func_selector, 10481 WORD16 *pi2_scratch_mem, 10482 UWORD8 *pu1_inp, 10483 WORD32 i4_inp_stride, 10484 WORD32 i4_lambda, 10485 UWORD8 u1_lambda_q_shift, 10486 UWORD8 u1_cu_size, 10487 UWORD8 u1_max_tr_depth) 10488 { 10489 err_prms_t s_err_prms; 10490 10491 PF_SAD_FXN_TU_REC pf_err_compute[4]; 10492 10493 WORD32 i4_satd; 10494 10495 s_err_prms.pi4_sad_grid = &i4_satd; 10496 s_err_prms.pi4_tu_split_flags = ps_cu_data->ai4_tu_split_flag; 10497 s_err_prms.pu1_inp = pu1_inp; 10498 s_err_prms.pu1_ref = ps_cu_data->pu1_pred_data; 10499 s_err_prms.i4_inp_stride = i4_inp_stride; 10500 s_err_prms.i4_ref_stride = ps_cu_data->i4_pred_data_stride; 10501 s_err_prms.pu1_wkg_mem = (UWORD8 *)pi2_scratch_mem; 10502 10503 if(u1_cu_size == 64) 10504 { 10505 s_err_prms.u1_max_tr_depth = MIN(1, u1_max_tr_depth); 10506 } 10507 else 10508 { 10509 s_err_prms.u1_max_tr_depth = u1_max_tr_depth; 10510 } 10511 10512 pf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec; 10513 pf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec; 10514 pf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec; 10515 pf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec; 10516 10517 i4_satd = pf_err_compute[hme_get_range(u1_cu_size) - 4]( 10518 &s_err_prms, i4_lambda, u1_lambda_q_shift, 0, ps_func_selector); 10519 10520 if((0 == u1_max_tr_depth) && (ps_cu_data->b3_part_size != 0) && (u1_cu_size != 64)) 10521 { 10522 ps_cu_data->ai4_tu_split_flag[0] = 1; 10523 } 10524 10525 return i4_satd; 10526 } 10527 #endif 10528 10529 void ihevce_populate_nbr_4x4_with_pu_data( 10530 nbr_4x4_t *ps_nbr_4x4, pu_t *ps_pu, WORD32 i4_nbr_buf_stride) 10531 { 10532 WORD32 i, j; 10533 10534 nbr_4x4_t *ps_tmp_4x4 = ps_nbr_4x4; 10535 10536 WORD32 ht = (ps_pu->b4_ht + 1); 10537 WORD32 wd = (ps_pu->b4_wd + 1); 10538 10539 ps_nbr_4x4->b1_intra_flag = 0; 10540 ps_nbr_4x4->b1_pred_l0_flag = !(ps_pu->b2_pred_mode & 1); 10541 ps_nbr_4x4->b1_pred_l1_flag = (ps_pu->b2_pred_mode > PRED_L0); 10542 ps_nbr_4x4->mv = ps_pu->mv; 10543 10544 for(i = 0; i < ht; i++) 10545 { 10546 for(j = 0; j < wd; j++) 10547 { 10548 ps_tmp_4x4[j] = *ps_nbr_4x4; 10549 } 10550 10551 ps_tmp_4x4 += i4_nbr_buf_stride; 10552 } 10553 } 10554 10555 void ihevce_call_luma_inter_pred_rdopt_pass1( 10556 ihevce_enc_loop_ctxt_t *ps_ctxt, cu_inter_cand_t *ps_inter_cand, WORD32 cu_size) 10557 { 10558 pu_t *ps_pu; 10559 UWORD8 *pu1_pred; 10560 WORD32 pred_stride, ctr, num_cu_part, skip_or_merge_flag = 0; 10561 WORD32 inter_pu_wd, inter_pu_ht; 10562 10563 pu1_pred = ps_inter_cand->pu1_pred_data_scr; 10564 pred_stride = ps_inter_cand->i4_pred_data_stride; 10565 num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1; 10566 10567 for(ctr = 0; ctr < num_cu_part; ctr++) 10568 { 10569 ps_pu = &ps_inter_cand->as_inter_pu[ctr]; 10570 10571 /* IF AMP then each partitions can have diff wd ht */ 10572 inter_pu_wd = (ps_pu->b4_wd + 1) << 2; 10573 inter_pu_ht = (ps_pu->b4_ht + 1) << 2; 10574 10575 skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag; 10576 //if(0 == skip_or_merge_flag) 10577 { 10578 ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 1); 10579 } 10580 if((2 == num_cu_part) && (0 == ctr)) 10581 { 10582 /* 2Nx__ partion case */ 10583 if(inter_pu_wd == cu_size) 10584 { 10585 pu1_pred += (inter_pu_ht * pred_stride); 10586 } 10587 10588 /* __x2N partion case */ 10589 if(inter_pu_ht == cu_size) 10590 { 10591 pu1_pred += inter_pu_wd; 10592 } 10593 } 10594 } 10595 } 10596 10597 LWORD64 ihevce_it_recon_ssd( 10598 ihevce_enc_loop_ctxt_t *ps_ctxt, 10599 UWORD8 *pu1_src, 10600 WORD32 i4_src_strd, 10601 UWORD8 *pu1_pred, 10602 WORD32 i4_pred_strd, 10603 WORD16 *pi2_deq_data, 10604 WORD32 i4_deq_data_strd, 10605 UWORD8 *pu1_recon, 10606 WORD32 i4_recon_stride, 10607 UWORD8 *pu1_ecd_data, 10608 UWORD8 u1_trans_size, 10609 UWORD8 u1_pred_mode, 10610 WORD32 i4_cbf, 10611 WORD32 i4_zero_col, 10612 WORD32 i4_zero_row, 10613 CHROMA_PLANE_ID_T e_chroma_plane) 10614 { 10615 if(NULL_PLANE == e_chroma_plane) 10616 { 10617 ihevce_it_recon_fxn( 10618 ps_ctxt, 10619 pi2_deq_data, 10620 i4_deq_data_strd, 10621 pu1_pred, 10622 i4_pred_strd, 10623 pu1_recon, 10624 i4_recon_stride, 10625 pu1_ecd_data, 10626 u1_trans_size, 10627 u1_pred_mode, 10628 i4_cbf, 10629 i4_zero_col, 10630 i4_zero_row); 10631 10632 return ps_ctxt->s_cmn_opt_func.pf_ssd_calculator( 10633 pu1_recon, pu1_src, i4_recon_stride, i4_src_strd, u1_trans_size, u1_trans_size); 10634 } 10635 else 10636 { 10637 ihevce_chroma_it_recon_fxn( 10638 ps_ctxt, 10639 pi2_deq_data, 10640 i4_deq_data_strd, 10641 pu1_pred, 10642 i4_pred_strd, 10643 pu1_recon, 10644 i4_recon_stride, 10645 pu1_ecd_data, 10646 u1_trans_size, 10647 i4_cbf, 10648 i4_zero_col, 10649 i4_zero_row, 10650 e_chroma_plane); 10651 10652 return ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator( 10653 pu1_recon + (e_chroma_plane == V_PLANE), 10654 pu1_src + (e_chroma_plane == V_PLANE), 10655 i4_recon_stride, 10656 i4_src_strd, 10657 u1_trans_size, 10658 u1_trans_size); 10659 } 10660 } 10661 10662 /*! 10663 ****************************************************************************** 10664 * \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif 10665 * 10666 * \brief 10667 * Transform unit level (Chroma) enc_loop function 10668 * 10669 * \param[in] ps_ctxt enc_loop module ctxt pointer 10670 * \param[in] pu1_pred pointer to predicted data buffer 10671 * \param[in] pred_strd predicted buffer stride 10672 * \param[in] pu1_src pointer to source data buffer 10673 * \param[in] src_strd source buffer stride 10674 * \param[in] pi2_deq_data pointer to store iq data 10675 * \param[in] deq_data_strd iq data buffer stride 10676 * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod) 10677 * \param[out] pu1_csbf_buf pointer to store the csbf for all 4x4 in a current 10678 * block 10679 * \param[out] csbf_strd csbf buffer stride 10680 * \param[in] trans_size transform size (4, 8, 16) 10681 * \param[in] intra_flag 0:Inter/Skip 1:Intra 10682 * \param[out] pi4_coeff_off pointer to store the number of bytes produced in 10683 * coeff buffer 10684 the current TU in RDopt Mode 10685 * \param[out] pi4_zero_col pointer to store the zero_col info for the TU 10686 * \param[out] pi4_zero_row pointer to store the zero_row info for the TU 10687 * 10688 * \return 10689 * CBF of the current block 10690 * 10691 * \author 10692 * Ittiam 10693 * 10694 ***************************************************************************** 10695 */ 10696 WORD32 ihevce_chroma_t_q_iq_ssd_scan_fxn( 10697 ihevce_enc_loop_ctxt_t *ps_ctxt, 10698 UWORD8 *pu1_pred, 10699 WORD32 pred_strd, 10700 UWORD8 *pu1_src, 10701 WORD32 src_strd, 10702 WORD16 *pi2_deq_data, 10703 WORD32 deq_data_strd, 10704 UWORD8 *pu1_recon, 10705 WORD32 i4_recon_stride, 10706 UWORD8 *pu1_ecd_data, 10707 UWORD8 *pu1_csbf_buf, 10708 WORD32 csbf_strd, 10709 WORD32 trans_size, 10710 WORD32 i4_scan_idx, 10711 WORD32 intra_flag, 10712 WORD32 *pi4_coeff_off, 10713 WORD32 *pi4_tu_bits, 10714 WORD32 *pi4_zero_col, 10715 WORD32 *pi4_zero_row, 10716 UWORD8 *pu1_is_recon_available, 10717 WORD32 i4_perform_sbh, 10718 WORD32 i4_perform_rdoq, 10719 LWORD64 *pi8_cost, 10720 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 10721 WORD32 i4_alpha_stim_multiplier, 10722 UWORD8 u1_is_cu_noisy, 10723 #endif 10724 UWORD8 u1_is_skip, 10725 SSD_TYPE_T e_ssd_type, 10726 CHROMA_PLANE_ID_T e_chroma_plane) 10727 { 10728 WORD32 trans_idx, cbf, u4_blk_sad; 10729 WORD16 *pi2_quant_coeffs; 10730 WORD16 *pi2_trans_values; 10731 WORD32 quant_scale_mat_offset; 10732 WORD32 *pi4_trans_scratch; 10733 WORD32 *pi4_subBlock2csbfId_map = NULL; 10734 10735 #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3 10736 WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i; 10737 #endif 10738 10739 rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt; 10740 10741 WORD32 i4_perform_zcbf = (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE) || 10742 (!intra_flag && ENABLE_INTER_ZCU_COST); 10743 WORD32 i4_perform_coeff_level_rdoq = 10744 (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING) && 10745 (ps_ctxt->i4_chroma_quant_rounding_level == CHROMA_QUANT_ROUNDING); 10746 10747 ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE)); 10748 ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW); 10749 10750 *pi4_coeff_off = 0; 10751 *pi4_tu_bits = 0; 10752 pu1_is_recon_available[0] = 0; 10753 10754 pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0]; 10755 pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0]; 10756 pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2); 10757 10758 if(2 == trans_size) 10759 { 10760 trans_size = 4; 10761 } 10762 10763 /* translate the transform size to index */ 10764 trans_idx = trans_size >> 2; 10765 10766 if(16 == trans_size) 10767 { 10768 trans_idx = 3; 10769 } 10770 10771 if(u1_is_skip) 10772 { 10773 pi8_cost[0] = ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator( 10774 pu1_pred + e_chroma_plane, 10775 pu1_src + e_chroma_plane, 10776 pred_strd, 10777 src_strd, 10778 trans_size, 10779 trans_size); 10780 10781 if(e_ssd_type == SPATIAL_DOMAIN_SSD) 10782 { 10783 /* buffer copy fromp pred to recon */ 10784 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( 10785 pu1_pred, 10786 pred_strd, 10787 pu1_recon, 10788 i4_recon_stride, 10789 trans_size, 10790 trans_size, 10791 e_chroma_plane); 10792 10793 pu1_is_recon_available[0] = 1; 10794 } 10795 10796 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 10797 if(u1_is_cu_noisy && i4_alpha_stim_multiplier) 10798 { 10799 pi8_cost[0] = ihevce_inject_stim_into_distortion( 10800 pu1_src, 10801 src_strd, 10802 pu1_pred, 10803 pred_strd, 10804 pi8_cost[0], 10805 i4_alpha_stim_multiplier, 10806 trans_size, 10807 0, 10808 ps_ctxt->u1_enable_psyRDOPT, 10809 e_chroma_plane); 10810 } 10811 #endif 10812 10813 #if ENABLE_INTER_ZCU_COST 10814 #if !WEIGH_CHROMA_COST 10815 /* cbf = 0, accumulate cu not coded cost */ 10816 ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0]; 10817 #else 10818 ps_ctxt->i8_cu_not_coded_cost += (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor + 10819 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> 10820 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT; 10821 #endif 10822 #endif 10823 10824 return 0; 10825 } 10826 10827 if(intra_flag == 1) 10828 { 10829 quant_scale_mat_offset = 0; 10830 10831 #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3 10832 ai4_quant_rounding_factors[0][0] = 10833 MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3); 10834 10835 for(i = 0; i < trans_size * trans_size; i++) 10836 { 10837 ai4_quant_rounding_factors[1][i] = 10838 MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3][i], 10839 (1 << QUANT_ROUND_FACTOR_Q) / 3); 10840 ai4_quant_rounding_factors[2][i] = 10841 MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3][i], 10842 (1 << QUANT_ROUND_FACTOR_Q) / 3); 10843 } 10844 #endif 10845 } 10846 else 10847 { 10848 quant_scale_mat_offset = NUM_TRANS_TYPES; 10849 } 10850 10851 switch(trans_size) 10852 { 10853 case 4: 10854 { 10855 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU; 10856 10857 break; 10858 } 10859 case 8: 10860 { 10861 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU; 10862 10863 break; 10864 } 10865 case 16: 10866 { 10867 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU; 10868 10869 break; 10870 } 10871 case 32: 10872 { 10873 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU; 10874 10875 break; 10876 } 10877 } 10878 10879 /* ---------- call residue and transform block ------- */ 10880 u4_blk_sad = ps_ctxt->apf_chrm_resd_trns[trans_idx - 1]( 10881 pu1_src + (e_chroma_plane == V_PLANE), 10882 pu1_pred + (e_chroma_plane == V_PLANE), 10883 pi4_trans_scratch, 10884 pi2_trans_values, 10885 src_strd, 10886 pred_strd, 10887 ((trans_size << 16) + 1)); /* dst strd and chroma flag are packed together */ 10888 (void)u4_blk_sad; 10889 /* -------- calculate SSD calculation in Transform Domain ------ */ 10890 10891 cbf = ps_ctxt->apf_quant_iquant_ssd 10892 [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2] 10893 10894 (pi2_trans_values, 10895 ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset], 10896 pi2_quant_coeffs, 10897 pi2_deq_data, 10898 trans_size, 10899 ps_ctxt->i4_chrm_cu_qp_div6, 10900 ps_ctxt->i4_chrm_cu_qp_mod6, 10901 #if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3 10902 ps_ctxt->i4_quant_rnd_factor[intra_flag], 10903 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3], 10904 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3], 10905 #else 10906 intra_flag ? ai4_quant_rounding_factors[0][0] : ps_ctxt->i4_quant_rnd_factor[intra_flag], 10907 intra_flag ? ai4_quant_rounding_factors[1] 10908 : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3], 10909 intra_flag ? ai4_quant_rounding_factors[2] 10910 : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3], 10911 #endif 10912 trans_size, 10913 trans_size, 10914 deq_data_strd, 10915 pu1_csbf_buf, 10916 csbf_strd, 10917 pi4_zero_col, 10918 pi4_zero_row, 10919 ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset], 10920 pi8_cost); 10921 10922 if(e_ssd_type != FREQUENCY_DOMAIN_SSD) 10923 { 10924 pi8_cost[0] = UINT_MAX; 10925 } 10926 10927 if(0 != cbf) 10928 { 10929 if(i4_perform_sbh || i4_perform_rdoq) 10930 { 10931 ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd; 10932 ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size; 10933 10934 ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_chrm_cu_qp_div6; 10935 ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_chrm_cu_qp_mod6; 10936 ps_rdoq_sbh_ctxt->i4_scan_idx = i4_scan_idx; 10937 ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost; 10938 ps_rdoq_sbh_ctxt->i4_trans_size = trans_size; 10939 10940 ps_rdoq_sbh_ctxt->pi2_dequant_coeff = 10941 ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset]; 10942 ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data; 10943 ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs; 10944 ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values; 10945 ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf; 10946 ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map; 10947 10948 if((!i4_perform_rdoq)) 10949 { 10950 ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt); 10951 10952 pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost; 10953 } 10954 } 10955 10956 /* ------- call coeffs scan function ------- */ 10957 *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs( 10958 pi2_quant_coeffs, 10959 pi4_subBlock2csbfId_map, 10960 i4_scan_idx, 10961 trans_size, 10962 pu1_ecd_data, 10963 pu1_csbf_buf, 10964 csbf_strd); 10965 } 10966 10967 /* Normalize Cost. Note : trans_idx, not (trans_idx-1) */ 10968 pi8_cost[0] >>= ga_trans_shift[trans_idx]; 10969 10970 #if RDOPT_ZERO_CBF_ENABLE 10971 if((0 != cbf)) 10972 { 10973 WORD32 tu_bits; 10974 LWORD64 zero_cbf_cost_u, curr_cb_cod_cost; 10975 10976 zero_cbf_cost_u = 0; 10977 10978 /*Populating the feilds of rdoq_ctxt structure*/ 10979 if(i4_perform_rdoq) 10980 { 10981 //memset(ps_rdoq_sbh_ctxt,0,sizeof(rdoq_sbh_ctxt_t)); 10982 /* transform size to log2transform size */ 10983 GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size); 10984 ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1; 10985 10986 ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_chroma_qf; 10987 ps_rdoq_sbh_ctxt->i4_is_luma = 0; 10988 ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx]; 10989 ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td = 10990 (1 << (ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td - 1)); 10991 ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0; 10992 ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col; 10993 ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row; 10994 } 10995 else if(i4_perform_zcbf) 10996 { 10997 /* cost of zero cbf encoding */ 10998 zero_cbf_cost_u = 10999 11000 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator( 11001 pu1_pred + (e_chroma_plane == V_PLANE), 11002 pu1_src + (e_chroma_plane == V_PLANE), 11003 pred_strd, 11004 src_strd, 11005 trans_size, 11006 trans_size); 11007 } 11008 11009 /************************************************************************/ 11010 /* call the entropy rdo encode to get the bit estimate for current tu */ 11011 /* note that tu includes only residual coding bits and does not include */ 11012 /* tu split, cbf and qp delta encoding bits for a TU */ 11013 /************************************************************************/ 11014 if(i4_perform_rdoq) 11015 { 11016 tu_bits = ihevce_entropy_rdo_encode_tu_rdoq( 11017 &ps_ctxt->s_rdopt_entropy_ctxt, 11018 pu1_ecd_data, 11019 trans_size, 11020 0, 11021 ps_rdoq_sbh_ctxt, 11022 pi8_cost, 11023 &zero_cbf_cost_u, 11024 0); 11025 //Currently, we are not accounting for sign bit in RDOPT bits calculation when RDOQ is turned on 11026 11027 if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0) 11028 { 11029 cbf = 0; 11030 11031 /* num bytes is set to 0 */ 11032 *pi4_coeff_off = 0; 11033 } 11034 11035 (*pi4_tu_bits) += tu_bits; 11036 11037 if((i4_perform_sbh) && (0 != cbf)) 11038 { 11039 ps_rdoq_sbh_ctxt->i8_ssd_cost = pi8_cost[0]; 11040 11041 ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt); 11042 11043 pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost; 11044 } 11045 11046 /*Add round value before normalizing*/ 11047 pi8_cost[0] += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td; 11048 pi8_cost[0] >>= ga_trans_shift[trans_idx]; 11049 11050 if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1) 11051 { 11052 *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs( 11053 pi2_quant_coeffs, 11054 pi4_subBlock2csbfId_map, 11055 i4_scan_idx, 11056 trans_size, 11057 pu1_ecd_data, 11058 ps_rdoq_sbh_ctxt->pu1_csbf_buf, 11059 csbf_strd); 11060 } 11061 } 11062 else 11063 { 11064 /************************************************************************/ 11065 /* call the entropy rdo encode to get the bit estimate for current tu */ 11066 /* note that tu includes only residual coding bits and does not include */ 11067 /* tu split, cbf and qp delta encoding bits for a TU */ 11068 /************************************************************************/ 11069 tu_bits = ihevce_entropy_rdo_encode_tu( 11070 &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 0, i4_perform_sbh); 11071 11072 (*pi4_tu_bits) += tu_bits; 11073 } 11074 11075 if(e_ssd_type == SPATIAL_DOMAIN_SSD) 11076 { 11077 pi8_cost[0] = ihevce_it_recon_ssd( 11078 ps_ctxt, 11079 pu1_src, 11080 src_strd, 11081 pu1_pred, 11082 pred_strd, 11083 pi2_deq_data, 11084 deq_data_strd, 11085 pu1_recon, 11086 i4_recon_stride, 11087 pu1_ecd_data, 11088 trans_size, 11089 PRED_MODE_INTRA, 11090 cbf, 11091 pi4_zero_col[0], 11092 pi4_zero_row[0], 11093 e_chroma_plane); 11094 11095 pu1_is_recon_available[0] = 1; 11096 } 11097 11098 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 11099 if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier) 11100 { 11101 pi8_cost[0] = ihevce_inject_stim_into_distortion( 11102 pu1_src, 11103 src_strd, 11104 pu1_recon, 11105 i4_recon_stride, 11106 pi8_cost[0], 11107 i4_alpha_stim_multiplier, 11108 trans_size, 11109 0, 11110 ps_ctxt->u1_enable_psyRDOPT, 11111 e_chroma_plane); 11112 } 11113 else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier) 11114 { 11115 pi8_cost[0] = ihevce_inject_stim_into_distortion( 11116 pu1_src, 11117 src_strd, 11118 pu1_pred, 11119 pred_strd, 11120 pi8_cost[0], 11121 i4_alpha_stim_multiplier, 11122 trans_size, 11123 0, 11124 ps_ctxt->u1_enable_psyRDOPT, 11125 e_chroma_plane); 11126 } 11127 #endif 11128 11129 curr_cb_cod_cost = pi8_cost[0]; 11130 11131 /* add the SSD cost to bits estimate given by ECD */ 11132 curr_cb_cod_cost += 11133 COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT); 11134 11135 if(i4_perform_zcbf) 11136 { 11137 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 11138 if(u1_is_cu_noisy && i4_alpha_stim_multiplier) 11139 { 11140 zero_cbf_cost_u = ihevce_inject_stim_into_distortion( 11141 pu1_src, 11142 src_strd, 11143 pu1_pred, 11144 pred_strd, 11145 zero_cbf_cost_u, 11146 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS 11147 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * 11148 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / 11149 100.0, 11150 trans_size, 11151 0, 11152 ps_ctxt->u1_enable_psyRDOPT, 11153 e_chroma_plane); 11154 } 11155 #endif 11156 /* force the tu as zero cbf if zero_cbf_cost is lower */ 11157 if(zero_cbf_cost_u < curr_cb_cod_cost) 11158 { 11159 *pi4_coeff_off = 0; 11160 cbf = 0; 11161 (*pi4_tu_bits) = 0; 11162 pi8_cost[0] = zero_cbf_cost_u; 11163 11164 pu1_is_recon_available[0] = 0; 11165 11166 if(e_ssd_type == SPATIAL_DOMAIN_SSD) 11167 { 11168 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( 11169 pu1_pred, 11170 pred_strd, 11171 pu1_recon, 11172 i4_recon_stride, 11173 trans_size, 11174 trans_size, 11175 e_chroma_plane); 11176 11177 pu1_is_recon_available[0] = 1; 11178 } 11179 } 11180 11181 #if ENABLE_INTER_ZCU_COST 11182 if(!intra_flag) 11183 { 11184 #if !WEIGH_CHROMA_COST 11185 ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost_u; 11186 #else 11187 ps_ctxt->i8_cu_not_coded_cost += (LWORD64)( 11188 (zero_cbf_cost_u * ps_ctxt->u4_chroma_cost_weighing_factor + 11189 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> 11190 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); 11191 #endif 11192 } 11193 #endif 11194 } 11195 } 11196 else 11197 { 11198 if(e_ssd_type == SPATIAL_DOMAIN_SSD) 11199 { 11200 pi8_cost[0] = ihevce_it_recon_ssd( 11201 ps_ctxt, 11202 pu1_src, 11203 src_strd, 11204 pu1_pred, 11205 pred_strd, 11206 pi2_deq_data, 11207 deq_data_strd, 11208 pu1_recon, 11209 i4_recon_stride, 11210 pu1_ecd_data, 11211 trans_size, 11212 PRED_MODE_INTRA, 11213 cbf, 11214 pi4_zero_col[0], 11215 pi4_zero_row[0], 11216 e_chroma_plane); 11217 11218 pu1_is_recon_available[0] = 1; 11219 } 11220 11221 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 11222 if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier) 11223 { 11224 pi8_cost[0] = ihevce_inject_stim_into_distortion( 11225 pu1_src, 11226 src_strd, 11227 pu1_recon, 11228 i4_recon_stride, 11229 pi8_cost[0], 11230 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS 11231 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * 11232 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / 11233 100.0, 11234 trans_size, 11235 0, 11236 ps_ctxt->u1_enable_psyRDOPT, 11237 e_chroma_plane); 11238 } 11239 else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier) 11240 { 11241 pi8_cost[0] = ihevce_inject_stim_into_distortion( 11242 pu1_src, 11243 src_strd, 11244 pu1_pred, 11245 pred_strd, 11246 pi8_cost[0], 11247 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS 11248 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * 11249 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / 11250 100.0, 11251 trans_size, 11252 0, 11253 ps_ctxt->u1_enable_psyRDOPT, 11254 e_chroma_plane); 11255 } 11256 #endif 11257 11258 #if ENABLE_INTER_ZCU_COST 11259 if(!intra_flag) 11260 { 11261 #if !WEIGH_CHROMA_COST 11262 /* cbf = 0, accumulate cu not coded cost */ 11263 ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0]; 11264 #else 11265 /* cbf = 0, accumulate cu not coded cost */ 11266 11267 ps_ctxt->i8_cu_not_coded_cost += (LWORD64)( 11268 (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor + 11269 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> 11270 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); 11271 #endif 11272 } 11273 #endif 11274 } 11275 #endif /* RDOPT_ZERO_CBF_ENABLE */ 11276 11277 return (cbf); 11278 } 11279