1 /****************************************************************************** 2 * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 21 /*! 22 ****************************************************************************** 23 * \file ihevce_enc_loop_pass.c 24 * 25 * \brief 26 * This file contains Encoder normative loop pass related functions 27 * 28 * \date 29 * 18/09/2012 30 * 31 * \author 32 * Ittiam 33 * 34 * 35 * List of Functions 36 * 37 * 38 ****************************************************************************** 39 */ 40 41 /*****************************************************************************/ 42 /* File Includes */ 43 /*****************************************************************************/ 44 /* System include files */ 45 #include <stdio.h> 46 #include <string.h> 47 #include <stdlib.h> 48 #include <assert.h> 49 #include <stdarg.h> 50 #include <math.h> 51 #include <limits.h> 52 53 /* User include files */ 54 #include "ihevc_typedefs.h" 55 #include "itt_video_api.h" 56 #include "ihevce_api.h" 57 58 #include "rc_cntrl_param.h" 59 #include "rc_frame_info_collector.h" 60 #include "rc_look_ahead_params.h" 61 62 #include "ihevc_defs.h" 63 #include "ihevc_macros.h" 64 #include "ihevc_debug.h" 65 #include "ihevc_structs.h" 66 #include "ihevc_platform_macros.h" 67 #include "ihevc_deblk.h" 68 #include "ihevc_itrans_recon.h" 69 #include "ihevc_chroma_itrans_recon.h" 70 #include "ihevc_chroma_intra_pred.h" 71 #include "ihevc_intra_pred.h" 72 #include "ihevc_inter_pred.h" 73 #include "ihevc_mem_fns.h" 74 #include "ihevc_padding.h" 75 #include "ihevc_weighted_pred.h" 76 #include "ihevc_sao.h" 77 #include "ihevc_resi_trans.h" 78 #include "ihevc_quant_iquant_ssd.h" 79 #include "ihevc_cabac_tables.h" 80 #include "ihevc_common_tables.h" 81 #include "ihevc_quant_tables.h" 82 83 #include "ihevce_defs.h" 84 #include "ihevce_hle_interface.h" 85 #include "ihevce_lap_enc_structs.h" 86 #include "ihevce_multi_thrd_structs.h" 87 #include "ihevce_multi_thrd_funcs.h" 88 #include "ihevce_me_common_defs.h" 89 #include "ihevce_had_satd.h" 90 #include "ihevce_error_codes.h" 91 #include "ihevce_bitstream.h" 92 #include "ihevce_cabac.h" 93 #include "ihevce_rdoq_macros.h" 94 #include "ihevce_function_selector.h" 95 #include "ihevce_enc_structs.h" 96 #include "ihevce_entropy_structs.h" 97 #include "ihevce_cmn_utils_instr_set_router.h" 98 #include "ihevce_ipe_instr_set_router.h" 99 #include "ihevce_decomp_pre_intra_structs.h" 100 #include "ihevce_decomp_pre_intra_pass.h" 101 #include "ihevce_enc_loop_structs.h" 102 #include "ihevce_nbr_avail.h" 103 #include "ihevce_enc_loop_utils.h" 104 #include "ihevce_sub_pic_rc.h" 105 #include "ihevce_global_tables.h" 106 #include "ihevce_bs_compute_ctb.h" 107 #include "ihevce_cabac_rdo.h" 108 #include "ihevce_deblk.h" 109 #include "ihevce_frame_process.h" 110 #include "ihevce_rc_enc_structs.h" 111 #include "hme_datatype.h" 112 #include "hme_interface.h" 113 #include "hme_common_defs.h" 114 #include "hme_defs.h" 115 #include "ihevce_me_instr_set_router.h" 116 #include "ihevce_enc_subpel_gen.h" 117 #include "ihevce_inter_pred.h" 118 #include "ihevce_mv_pred.h" 119 #include "ihevce_mv_pred_merge.h" 120 #include "ihevce_enc_loop_inter_mode_sifter.h" 121 #include "ihevce_enc_cu_recursion.h" 122 #include "ihevce_enc_loop_pass.h" 123 #include "ihevce_common_utils.h" 124 #include "ihevce_dep_mngr_interface.h" 125 #include "ihevce_sao.h" 126 #include "ihevce_tile_interface.h" 127 #include "ihevce_profile.h" 128 129 #include "cast_types.h" 130 #include "osal.h" 131 #include "osal_defaults.h" 132 133 /*****************************************************************************/ 134 /* Globals */ 135 /*****************************************************************************/ 136 extern PART_ID_T ge_part_type_to_part_id[MAX_PART_TYPES][MAX_NUM_PARTS]; 137 138 extern UWORD8 gau1_num_parts_in_part_type[MAX_PART_TYPES]; 139 140 /*****************************************************************************/ 141 /* Constant Macros */ 142 /*****************************************************************************/ 143 #define UPDATE_QP_AT_CTB 6 144 145 /*****************************************************************************/ 146 /* Function Definitions */ 147 /*****************************************************************************/ 148 149 /*! 150 ****************************************************************************** 151 * \if Function name : ihevce_enc_loop_ctb_left_copy \endif 152 * 153 * \brief 154 * This function copy the right data of CTB to context buffers 155 * 156 * \date 157 * 18/09/2012 158 * 159 * \author 160 * Ittiam 161 * 162 * \return 163 * 164 * List of Functions 165 * 166 * 167 ****************************************************************************** 168 */ 169 void ihevce_enc_loop_ctb_left_copy(ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_prms_t *ps_cu_prms) 170 { 171 /* ------------------------------------------------------------------ */ 172 /* copy the right coloum data to the context buffers */ 173 /* ------------------------------------------------------------------ */ 174 175 nbr_4x4_t *ps_left_nbr; 176 nbr_4x4_t *ps_nbr; 177 UWORD8 *pu1_buff; 178 WORD32 num_pels; 179 UWORD8 *pu1_luma_left, *pu1_chrm_left; 180 181 UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2); 182 183 pu1_luma_left = (UWORD8 *)ps_ctxt->pv_left_luma_data; 184 pu1_chrm_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data; 185 ps_left_nbr = &ps_ctxt->as_left_col_nbr[0]; 186 187 /* copy right luma data */ 188 pu1_buff = ps_cu_prms->pu1_luma_recon + ps_cu_prms->i4_ctb_size - 1; 189 190 for(num_pels = 0; num_pels < ps_cu_prms->i4_ctb_size; num_pels++) 191 { 192 WORD32 i4_indx = ps_cu_prms->i4_luma_recon_stride * num_pels; 193 194 pu1_luma_left[num_pels] = pu1_buff[i4_indx]; 195 } 196 197 /* copy right chroma data */ 198 pu1_buff = ps_cu_prms->pu1_chrm_recon + ps_cu_prms->i4_ctb_size - 2; 199 200 for(num_pels = 0; num_pels < (ps_cu_prms->i4_ctb_size >> (0 == u1_is_422)); num_pels++) 201 { 202 WORD32 i4_indx = ps_cu_prms->i4_chrm_recon_stride * num_pels; 203 204 *pu1_chrm_left++ = pu1_buff[i4_indx]; 205 *pu1_chrm_left++ = pu1_buff[i4_indx + 1]; 206 } 207 208 /* store the nbr 4x4 data at ctb level */ 209 { 210 WORD32 ctr; 211 WORD32 nbr_strd; 212 213 nbr_strd = ps_cu_prms->i4_ctb_size >> 2; 214 215 /* copy right nbr data */ 216 ps_nbr = &ps_ctxt->as_ctb_nbr_arr[0]; 217 ps_nbr += ((ps_cu_prms->i4_ctb_size >> 2) - 1); 218 219 for(ctr = 0; ctr < (ps_cu_prms->i4_ctb_size >> 2); ctr++) 220 { 221 WORD32 i4_indx = nbr_strd * ctr; 222 223 ps_left_nbr[ctr] = ps_nbr[i4_indx]; 224 } 225 } 226 return; 227 } 228 229 /*! 230 ****************************************************************************** 231 * \if Function name : ihevce_mark_all_modes_to_evaluate \endif 232 * 233 * \brief 234 * Mark all modes for inter/intra for evaluation. This function will be 235 * called by ref instance 236 * 237 * \param[in] pv_ctxt : pointer to enc_loop module 238 * \param[in] ps_cu_analyse : pointer to cu analyse 239 * 240 * \return 241 * None 242 * 243 * \author 244 * Ittiam 245 * 246 ***************************************************************************** 247 */ 248 void ihevce_mark_all_modes_to_evaluate(void *pv_ctxt, cu_analyse_t *ps_cu_analyse) 249 { 250 UWORD8 ctr; 251 WORD32 i4_part; 252 253 (void)pv_ctxt; 254 /* run a loop over all Inter cands */ 255 for(ctr = 0; ctr < MAX_INTER_CU_CANDIDATES; ctr++) 256 { 257 ps_cu_analyse->as_cu_inter_cand[ctr].b1_eval_mark = 1; 258 } 259 260 /* run a loop over all intra candidates */ 261 if(0 != ps_cu_analyse->u1_num_intra_rdopt_cands) 262 { 263 for(ctr = 0; ctr < MAX_INTRA_CU_CANDIDATES + 1; ctr++) 264 { 265 ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr] = 1; 266 ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr] = 1; 267 268 for(i4_part = 0; i4_part < NUM_PU_PARTS; i4_part++) 269 { 270 ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[i4_part][ctr] = 1; 271 } 272 } 273 } 274 } 275 276 /*! 277 ****************************************************************************** 278 * \if Function name : ihevce_cu_mode_decide \endif 279 * 280 * \brief 281 * Coding Unit mode decide function. Performs RD opt and decides the best mode 282 * 283 * \param[in] ps_ctxt : pointer to enc_loop module 284 * \param[in] ps_cu_prms : pointer to coding unit params (position, buffer pointers) 285 * \param[in] ps_cu_analyse : pointer to cu analyse 286 * \param[out] ps_cu_final : pointer to cu final 287 * \param[out] pu1_ecd_data :pointer to store coeff data for ECD 288 * \param[out]ps_row_col_pu; colocated pu buffer pointer 289 * \param[out]pu1_row_pu_map; colocated pu map buffer pointer 290 * \param[in]col_start_pu_idx : pu index start value 291 * 292 * \return 293 * None 294 * 295 * 296 * \author 297 * Ittiam 298 * 299 ***************************************************************************** 300 */ 301 LWORD64 ihevce_cu_mode_decide( 302 ihevce_enc_loop_ctxt_t *ps_ctxt, 303 enc_loop_cu_prms_t *ps_cu_prms, 304 cu_analyse_t *ps_cu_analyse, 305 final_mode_state_t *ps_final_mode_state, 306 UWORD8 *pu1_ecd_data, 307 pu_col_mv_t *ps_col_pu, 308 UWORD8 *pu1_col_pu_map, 309 WORD32 col_start_pu_idx) 310 { 311 enc_loop_chrm_cu_buf_prms_t s_chrm_cu_buf_prms; 312 cu_nbr_prms_t s_cu_nbr_prms; 313 inter_cu_mode_info_t s_inter_cu_mode_info; 314 cu_inter_cand_t *ps_best_inter_cand = NULL; 315 UWORD8 *pu1_cu_top; 316 UWORD8 *pu1_cu_top_left; 317 UWORD8 *pu1_cu_left; 318 UWORD8 *pu1_final_recon = NULL; 319 UWORD8 *pu1_curr_src = NULL; 320 void *pv_curr_src = NULL; 321 void *pv_cu_left = NULL; 322 void *pv_cu_top = NULL; 323 void *pv_cu_top_left = NULL; 324 325 WORD32 cu_left_stride = 0; 326 WORD32 ctr; 327 WORD32 rd_opt_best_idx; 328 LWORD64 rd_opt_least_cost; 329 WORD32 rd_opt_curr_idx; 330 WORD32 num_4x4_in_ctb; 331 WORD32 nbr_4x4_left_strd = 0; 332 333 nbr_4x4_t *ps_topleft_nbr_4x4; 334 nbr_4x4_t *ps_left_nbr_4x4 = NULL; 335 nbr_4x4_t *ps_top_nbr_4x4 = NULL; 336 nbr_4x4_t *ps_curr_nbr_4x4; 337 WORD32 enable_intra_eval_flag; 338 WORD32 i4_best_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1; 339 WORD32 curr_cu_pos_in_row; 340 WORD32 cu_top_right_offset; 341 WORD32 cu_top_right_dep_pos; 342 WORD32 i4_ctb_x_off, i4_ctb_y_off; 343 344 UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2); 345 (void)ps_final_mode_state; 346 /* default init */ 347 rd_opt_least_cost = MAX_COST_64; 348 ps_ctxt->as_cu_prms[0].i8_best_rdopt_cost = MAX_COST_64; 349 ps_ctxt->as_cu_prms[1].i8_best_rdopt_cost = MAX_COST_64; 350 351 /* Zero cbf tool is enabled by default for all presets */ 352 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE; 353 354 rd_opt_best_idx = 1; 355 rd_opt_curr_idx = 0; 356 enable_intra_eval_flag = 1; 357 358 /* CU params in enc ctxt*/ 359 ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_x = ps_cu_analyse->b3_cu_pos_x; 360 ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_y = ps_cu_analyse->b3_cu_pos_y; 361 ps_ctxt->ps_enc_out_ctxt->u1_cu_size = ps_cu_analyse->u1_cu_size; 362 363 num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2); 364 ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0]; 365 ps_curr_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1); 366 ps_curr_nbr_4x4 += ((ps_cu_analyse->b3_cu_pos_y << 1) * num_4x4_in_ctb); 367 368 /* CB and Cr are pixel interleaved */ 369 s_chrm_cu_buf_prms.i4_chrm_recon_stride = ps_cu_prms->i4_chrm_recon_stride; 370 371 s_chrm_cu_buf_prms.i4_chrm_src_stride = ps_cu_prms->i4_chrm_src_stride; 372 373 if(!ps_ctxt->u1_is_input_data_hbd) 374 { 375 /* --------------------------------------- */ 376 /* ----- Luma Pointers Derivation -------- */ 377 /* --------------------------------------- */ 378 379 /* based on CU position derive the pointers */ 380 pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_cu_analyse->b3_cu_pos_x << 3); 381 382 pu1_curr_src = ps_cu_prms->pu1_luma_src + (ps_cu_analyse->b3_cu_pos_x << 3); 383 384 pu1_final_recon += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride); 385 386 pu1_curr_src += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_src_stride); 387 388 pv_curr_src = pu1_curr_src; 389 390 /* CU left */ 391 if(0 == ps_cu_analyse->b3_cu_pos_x) 392 { 393 /* CTB boundary */ 394 pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_luma_data; 395 pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << 3); 396 cu_left_stride = 1; 397 398 ps_left_nbr_4x4 = &ps_ctxt->as_left_col_nbr[0]; 399 ps_left_nbr_4x4 += ps_cu_analyse->b3_cu_pos_y << 1; 400 nbr_4x4_left_strd = 1; 401 } 402 else 403 { 404 /* inside CTB */ 405 pu1_cu_left = pu1_final_recon - 1; 406 cu_left_stride = ps_cu_prms->i4_luma_recon_stride; 407 408 ps_left_nbr_4x4 = ps_curr_nbr_4x4 - 1; 409 nbr_4x4_left_strd = num_4x4_in_ctb; 410 } 411 412 pv_cu_left = pu1_cu_left; 413 414 /* CU top */ 415 if(0 == ps_cu_analyse->b3_cu_pos_y) 416 { 417 /* CTB boundary */ 418 pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_luma; 419 pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size; 420 pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3); 421 422 ps_top_nbr_4x4 = ps_ctxt->ps_top_row_nbr; 423 ps_top_nbr_4x4 += (ps_cu_prms->i4_ctb_pos * (ps_cu_prms->i4_ctb_size >> 2)); 424 ps_top_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1); 425 } 426 else 427 { 428 /* inside CTB */ 429 pu1_cu_top = pu1_final_recon - ps_cu_prms->i4_luma_recon_stride; 430 431 ps_top_nbr_4x4 = ps_curr_nbr_4x4 - num_4x4_in_ctb; 432 } 433 434 pv_cu_top = pu1_cu_top; 435 436 /* CU top left */ 437 if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y)) 438 { 439 /* left ctb boundary but not first row */ 440 pu1_cu_top_left = pu1_cu_left - 1; /* stride is 1 */ 441 ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - 1; /* stride is 1 */ 442 } 443 else 444 { 445 /* rest all cases topleft is top -1 */ 446 pu1_cu_top_left = pu1_cu_top - 1; 447 ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1; 448 } 449 450 pv_cu_top_left = pu1_cu_top_left; 451 452 /* Store the CU nbr information in the ctxt for final reconstruction fun. */ 453 s_cu_nbr_prms.nbr_4x4_left_strd = nbr_4x4_left_strd; 454 s_cu_nbr_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4; 455 s_cu_nbr_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4; 456 s_cu_nbr_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4; 457 s_cu_nbr_prms.pu1_cu_left = pu1_cu_left; 458 s_cu_nbr_prms.pu1_cu_top = pu1_cu_top; 459 s_cu_nbr_prms.pu1_cu_top_left = pu1_cu_top_left; 460 s_cu_nbr_prms.cu_left_stride = cu_left_stride; 461 462 /* ------------------------------------------------------------ */ 463 /* -- Initialize the number of neigbour skip cu count for rdo --*/ 464 /* ------------------------------------------------------------ */ 465 { 466 nbr_avail_flags_t s_nbr; 467 WORD32 i4_num_nbr_skip_cus = 0; 468 469 /* get the neighbour availability flags for current cu */ 470 ihevce_get_nbr_intra( 471 &s_nbr, 472 ps_ctxt->pu1_ctb_nbr_map, 473 ps_ctxt->i4_nbr_map_strd, 474 (ps_cu_analyse->b3_cu_pos_x << 1), 475 (ps_cu_analyse->b3_cu_pos_y << 1), 476 (ps_cu_analyse->u1_cu_size >> 2)); 477 if(s_nbr.u1_top_avail) 478 { 479 i4_num_nbr_skip_cus += ps_top_nbr_4x4->b1_skip_flag; 480 } 481 482 if(s_nbr.u1_left_avail) 483 { 484 i4_num_nbr_skip_cus += ps_left_nbr_4x4->b1_skip_flag; 485 } 486 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0].i4_num_nbr_skip_cus = 487 i4_num_nbr_skip_cus; 488 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1].i4_num_nbr_skip_cus = 489 i4_num_nbr_skip_cus; 490 } 491 492 /* --------------------------------------- */ 493 /* --- Chroma Pointers Derivation -------- */ 494 /* --------------------------------------- */ 495 496 /* based on CU position derive the pointers */ 497 s_chrm_cu_buf_prms.pu1_final_recon = 498 ps_cu_prms->pu1_chrm_recon + (ps_cu_analyse->b3_cu_pos_x << 3); 499 500 s_chrm_cu_buf_prms.pu1_curr_src = 501 ps_cu_prms->pu1_chrm_src + (ps_cu_analyse->b3_cu_pos_x << 3); 502 503 s_chrm_cu_buf_prms.pu1_final_recon += 504 ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_recon_stride); 505 506 s_chrm_cu_buf_prms.pu1_curr_src += 507 ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_src_stride); 508 509 /* CU left */ 510 if(0 == ps_cu_analyse->b3_cu_pos_x) 511 { 512 /* CTB boundary */ 513 s_chrm_cu_buf_prms.pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data; 514 s_chrm_cu_buf_prms.pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 3)); 515 s_chrm_cu_buf_prms.i4_cu_left_stride = 2; 516 } 517 else 518 { 519 /* inside CTB */ 520 s_chrm_cu_buf_prms.pu1_cu_left = s_chrm_cu_buf_prms.pu1_final_recon - 2; 521 s_chrm_cu_buf_prms.i4_cu_left_stride = ps_cu_prms->i4_chrm_recon_stride; 522 } 523 524 /* CU top */ 525 if(0 == ps_cu_analyse->b3_cu_pos_y) 526 { 527 /* CTB boundary */ 528 s_chrm_cu_buf_prms.pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_chroma; 529 s_chrm_cu_buf_prms.pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size; 530 s_chrm_cu_buf_prms.pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3); 531 } 532 else 533 { 534 /* inside CTB */ 535 s_chrm_cu_buf_prms.pu1_cu_top = 536 s_chrm_cu_buf_prms.pu1_final_recon - ps_cu_prms->i4_chrm_recon_stride; 537 } 538 539 /* CU top left */ 540 if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y)) 541 { 542 /* left ctb boundary but not first row */ 543 s_chrm_cu_buf_prms.pu1_cu_top_left = 544 s_chrm_cu_buf_prms.pu1_cu_left - 2; /* stride is 1 (2 pixels) */ 545 } 546 else 547 { 548 /* rest all cases topleft is top -2 */ 549 s_chrm_cu_buf_prms.pu1_cu_top_left = s_chrm_cu_buf_prms.pu1_cu_top - 2; 550 } 551 } 552 553 /* Set Variables for Dep. Checking and Setting */ 554 i4_ctb_x_off = (ps_cu_prms->i4_ctb_pos << 6); 555 556 i4_ctb_y_off = ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y; 557 ps_ctxt->i4_satd_buf_idx = rd_opt_curr_idx; 558 559 /* Set the pred pointer count for ME/intra to 0 to start */ 560 ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count = 0; 561 562 ASSERT( 563 (ps_cu_analyse->u1_num_inter_cands > 0) || (ps_cu_analyse->u1_num_intra_rdopt_cands > 0)); 564 565 ASSERT(ps_cu_analyse->u1_num_inter_cands <= MAX_INTER_CU_CANDIDATES); 566 s_inter_cu_mode_info.u1_num_inter_cands = 0; 567 s_inter_cu_mode_info.u1_idx_of_worst_cost_in_cost_array = 0; 568 s_inter_cu_mode_info.u1_idx_of_worst_cost_in_pred_buf_array = 0; 569 570 ps_ctxt->s_cu_inter_merge_skip.u1_num_merge_cands = 0; 571 ps_ctxt->s_cu_inter_merge_skip.u1_num_skip_cands = 0; 572 ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type0_cands = 0; 573 ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type1_cands = 0; 574 ps_ctxt->s_pred_buf_data.i4_pred_stride = ps_cu_analyse->u1_cu_size; 575 if(0 != ps_cu_analyse->u1_num_inter_cands) 576 { 577 ihevce_inter_cand_sifter_prms_t s_prms; 578 579 UWORD8 u1_enable_top_row_sync; 580 581 if(ps_ctxt->u1_disable_intra_eval) 582 { 583 u1_enable_top_row_sync = !DISABLE_TOP_SYNC; 584 } 585 else 586 { 587 u1_enable_top_row_sync = 1; 588 } 589 590 if((!ps_ctxt->u1_use_top_at_ctb_boundary) && u1_enable_top_row_sync) 591 { 592 /* Wait till top data is ready */ 593 /* Currently checking till top right CU */ 594 curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3); 595 596 if(i4_ctb_y_off == 0) 597 { 598 /* No wait for 1st row */ 599 cu_top_right_offset = -(MAX_CTB_SIZE); 600 { 601 ihevce_tile_params_t *ps_col_tile_params = 602 ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base + 603 ps_ctxt->i4_tile_col_idx); 604 /* No wait for 1st row */ 605 cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE)); 606 } 607 cu_top_right_dep_pos = 0; 608 } 609 else 610 { 611 cu_top_right_offset = (ps_cu_analyse->u1_cu_size) + 4; 612 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1; 613 } 614 615 if(0 == ps_cu_analyse->b3_cu_pos_y) 616 { 617 ihevce_dmgr_chk_row_row_sync( 618 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right, 619 curr_cu_pos_in_row, 620 cu_top_right_offset, 621 cu_top_right_dep_pos, 622 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */ 623 ps_ctxt->thrd_id); 624 } 625 } 626 627 s_prms.i4_ctb_nbr_map_stride = ps_ctxt->i4_nbr_map_strd; 628 s_prms.i4_max_num_inter_rdopt_cands = ps_ctxt->i4_max_num_inter_rdopt_cands; 629 s_prms.i4_nbr_4x4_left_strd = nbr_4x4_left_strd; 630 s_prms.i4_src_strd = ps_cu_prms->i4_luma_src_stride; 631 s_prms.ps_cu_inter_merge_skip = &ps_ctxt->s_cu_inter_merge_skip; 632 s_prms.aps_cu_nbr_buf[0] = &ps_ctxt->as_cu_nbr[ps_ctxt->i4_satd_buf_idx][0]; 633 s_prms.aps_cu_nbr_buf[1] = &ps_ctxt->as_cu_nbr[!ps_ctxt->i4_satd_buf_idx][0]; 634 s_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4; 635 s_prms.ps_mc_ctxt = &ps_ctxt->s_mc_ctxt; 636 s_prms.ps_me_cands = ps_cu_analyse->as_cu_inter_cand; 637 s_prms.ps_mixed_modes_datastore = &ps_ctxt->s_mixed_mode_inter_cu; 638 s_prms.ps_mv_pred_ctxt = &ps_ctxt->s_mv_pred_ctxt; 639 s_prms.ps_pred_buf_data = &ps_ctxt->s_pred_buf_data; 640 s_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4; 641 s_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4; 642 s_prms.pu1_ctb_nbr_map = ps_ctxt->pu1_ctb_nbr_map; 643 s_prms.pv_src = pv_curr_src; 644 s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; 645 s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 3; 646 s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size; 647 s_prms.u1_max_merge_candidates = ps_ctxt->i4_max_merge_candidates; 648 s_prms.u1_num_me_cands = ps_cu_analyse->u1_num_inter_cands; 649 s_prms.u1_use_satd_for_merge_eval = ps_ctxt->i4_use_satd_for_merge_eval; 650 s_prms.u1_quality_preset = ps_ctxt->i4_quality_preset; 651 s_prms.i1_slice_type = ps_ctxt->i1_slice_type; 652 s_prms.ps_cu_me_intra_pred_prms = &ps_ctxt->s_cu_me_intra_pred_prms; 653 s_prms.u1_is_hbd = (ps_ctxt->u1_bit_depth > 8); 654 s_prms.ps_inter_cu_mode_info = &s_inter_cu_mode_info; 655 s_prms.pai4_mv_cost = ps_cu_analyse->ai4_mv_cost; 656 s_prms.i4_lambda_qf = ps_ctxt->i4_sad_lamda; 657 s_prms.u1_use_merge_cand_from_top_row = 658 (u1_enable_top_row_sync || (s_prms.u1_cu_pos_y > 0)); 659 s_prms.u1_merge_idx_cabac_model = 660 ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[IHEVC_CAB_MERGE_IDX_EXT]; 661 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING 662 s_prms.pai4_me_err_metric = ps_cu_analyse->ai4_err_metric; 663 s_prms.u1_reuse_me_sad = 1; 664 #else 665 s_prms.u1_reuse_me_sad = 0; 666 #endif 667 668 if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_type != PSLICE) 669 { 670 if(ps_ctxt->i4_temporal_layer == 1) 671 { 672 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_BREF; 673 } 674 else 675 { 676 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME; 677 } 678 } 679 else 680 { 681 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_P; 682 } 683 s_prms.u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy; 684 685 if(s_prms.u1_is_cu_noisy) 686 { 687 s_prms.i4_lambda_qf = 688 ((float)s_prms.i4_lambda_qf) * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f; 689 } 690 s_prms.pf_luma_inter_pred_pu = ihevce_luma_inter_pred_pu; 691 692 s_prms.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func; 693 694 s_prms.pf_evalsad_pt_npu_mxn_8bit = (FT_SAD_EVALUATOR *)ps_ctxt->pv_evalsad_pt_npu_mxn_8bit; 695 ihevce_inter_cand_sifter(&s_prms); 696 } 697 if(u1_is_422) 698 { 699 UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX - 1]; 700 UWORD8 u1_num_bufs_allocated; 701 702 u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices( 703 au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX - 1); 704 705 ASSERT(u1_num_bufs_allocated == (NUM_CU_ME_INTRA_PRED_IDX - 1)); 706 707 for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated; 708 ctr++) 709 { 710 { 711 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] = 712 (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]]; 713 } 714 715 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size; 716 717 ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++; 718 } 719 720 { 721 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] = 722 (UWORD8 *)ps_ctxt->pv_422_chroma_intra_pred_buf; 723 } 724 725 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size; 726 727 ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++; 728 } 729 else 730 { 731 UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX]; 732 UWORD8 u1_num_bufs_allocated; 733 734 u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices( 735 au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX); 736 737 ASSERT(u1_num_bufs_allocated == NUM_CU_ME_INTRA_PRED_IDX); 738 739 for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated; 740 ctr++) 741 { 742 { 743 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] = 744 (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]]; 745 } 746 747 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size; 748 749 ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++; 750 } 751 } 752 753 ihevce_mark_all_modes_to_evaluate(ps_ctxt, ps_cu_analyse); 754 755 ps_ctxt->as_cu_prms[0].s_recon_datastore.u1_is_lumaRecon_available = 0; 756 ps_ctxt->as_cu_prms[1].s_recon_datastore.u1_is_lumaRecon_available = 0; 757 ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[0] = 0; 758 ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[0] = 0; 759 ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[1] = 0; 760 ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[1] = 0; 761 ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[2] = 0; 762 ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[2] = 0; 763 /* --------------------------------------- */ 764 /* ------ Inter RD OPT stage ------------- */ 765 /* --------------------------------------- */ 766 if(0 != s_inter_cu_mode_info.u1_num_inter_cands) 767 { 768 UWORD8 u1_ssd_bit_info_ctr = 0; 769 770 /* -- run a loop over all Inter rd opt cands ------ */ 771 for(ctr = 0; ctr < s_inter_cu_mode_info.u1_num_inter_cands; ctr++) 772 { 773 cu_inter_cand_t *ps_inter_cand; 774 775 LWORD64 rd_opt_cost = 0; 776 777 ps_inter_cand = s_inter_cu_mode_info.aps_cu_data[ctr]; 778 779 if((ps_inter_cand->b1_skip_flag) || (ps_inter_cand->as_inter_pu[0].b1_merge_flag) || 780 (ps_inter_cand->b3_part_size && ps_inter_cand->as_inter_pu[1].b1_merge_flag)) 781 { 782 ps_inter_cand->b1_eval_mark = 1; 783 } 784 785 /****************************************************************/ 786 /* This check is only valid for derived instances. */ 787 /* check if this mode needs to be evaluated or not. */ 788 /* if it is a skip candidate, go ahead and evaluate it even if */ 789 /* it has not been marked while sorting. */ 790 /****************************************************************/ 791 if((0 == ps_inter_cand->b1_eval_mark) && (0 == ps_inter_cand->b1_skip_flag)) 792 { 793 continue; 794 } 795 796 /* RDOPT related copies and settings */ 797 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx; 798 799 /* RDOPT copy States : Prev Cu best to current init */ 800 COPY_CABAC_STATES( 801 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 802 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0], 803 IHEVC_CAB_CTXT_END * sizeof(UWORD8)); 804 /* MVP ,MVD calc and Motion compensation */ 805 rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)( 806 ps_ctxt, 807 ps_inter_cand, 808 ps_cu_analyse->u1_cu_size, 809 ps_cu_analyse->b3_cu_pos_x, 810 ps_cu_analyse->b3_cu_pos_y, 811 ps_left_nbr_4x4, 812 ps_top_nbr_4x4, 813 ps_topleft_nbr_4x4, 814 nbr_4x4_left_strd, 815 rd_opt_curr_idx); 816 817 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT 818 if((ps_ctxt->u1_bit_depth == 8) && (!ps_inter_cand->b1_skip_flag)) 819 { 820 ihevce_determine_tu_tree_distribution( 821 ps_inter_cand, 822 (me_func_selector_t *)ps_ctxt->pv_err_func_selector, 823 ps_ctxt->ai2_scratch, 824 (UWORD8 *)pv_curr_src, 825 ps_cu_prms->i4_luma_src_stride, 826 ps_ctxt->i4_satd_lamda, 827 LAMBDA_Q_SHIFT, 828 ps_cu_analyse->u1_cu_size, 829 ps_ctxt->u1_max_tr_depth); 830 } 831 #endif 832 #if DISABLE_ZERO_ZBF_IN_INTER 833 ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF; 834 #else 835 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE; 836 #endif 837 /* Recon loop with different TUs based on partition type*/ 838 rd_opt_cost += ((pf_inter_rdopt_cu_ntu)ps_ctxt->pv_inter_rdopt_cu_ntu)( 839 ps_ctxt, 840 ps_cu_prms, 841 pv_curr_src, 842 ps_cu_analyse->u1_cu_size, 843 ps_cu_analyse->b3_cu_pos_x, 844 ps_cu_analyse->b3_cu_pos_y, 845 rd_opt_curr_idx, 846 &s_chrm_cu_buf_prms, 847 ps_inter_cand, 848 ps_cu_analyse, 849 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT 850 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * 851 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) / 852 100.0); 853 854 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY 855 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy) 856 { 857 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf; 858 ps_ctxt->i8_cl_ssd_lambda_chroma_qf = 859 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf; 860 } 861 #endif 862 863 /* based on the rd opt cost choose the best and current index */ 864 if(rd_opt_cost < rd_opt_least_cost) 865 { 866 /* swap the best and current indx */ 867 rd_opt_best_idx = !rd_opt_best_idx; 868 rd_opt_curr_idx = !rd_opt_curr_idx; 869 870 ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost; 871 rd_opt_least_cost = rd_opt_cost; 872 i4_best_cu_qp = ps_ctxt->i4_cu_qp; 873 874 /* Store the best Inter cand. for final_recon function */ 875 ps_best_inter_cand = ps_inter_cand; 876 } 877 878 /* set the neighbour map to 0 */ 879 ihevce_set_nbr_map( 880 ps_ctxt->pu1_ctb_nbr_map, 881 ps_ctxt->i4_nbr_map_strd, 882 (ps_cu_analyse->b3_cu_pos_x << 1), 883 (ps_cu_analyse->b3_cu_pos_y << 1), 884 (ps_cu_analyse->u1_cu_size >> 2), 885 0); 886 887 } /* end of loop for all the Inter RD OPT cand */ 888 } 889 /* --------------------------------------- */ 890 /* ---- Conditional Eval of Intra -------- */ 891 /* --------------------------------------- */ 892 { 893 enc_loop_cu_final_prms_t *ps_enc_loop_bestprms; 894 ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx]; 895 896 /* check if inter candidates are valid */ 897 if(0 != ps_cu_analyse->u1_num_inter_cands) 898 { 899 /* if skip or no residual inter candidates has won then */ 900 /* evaluation of intra candidates is disabled */ 901 if((1 == ps_enc_loop_bestprms->u1_skip_flag) || 902 (0 == ps_enc_loop_bestprms->u1_is_cu_coded)) 903 { 904 enable_intra_eval_flag = 0; 905 } 906 } 907 /* Disable Intra Gating for HIGH QUALITY PRESET */ 908 #if !ENABLE_INTRA_GATING_FOR_HQ 909 if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset) 910 { 911 enable_intra_eval_flag = 1; 912 913 #if DISABLE_LARGE_INTRA_PQ 914 if((IHEVCE_QUALITY_P0 == ps_ctxt->i4_quality_preset) && (ps_cu_prms->u1_is_cu_noisy) && 915 (ps_ctxt->i1_slice_type != ISLICE) && (0 != s_inter_cu_mode_info.u1_num_inter_cands)) 916 { 917 if(ps_cu_analyse->u1_cu_size > 16) 918 { 919 /* Disable 32x32 / 64x64 Intra in PQ P and B pics */ 920 enable_intra_eval_flag = 0; 921 } 922 else if(ps_cu_analyse->u1_cu_size == 16) 923 { 924 /* Disable tu equal to cu mode in 16x16 Intra in PQ P and B pics */ 925 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255; 926 } 927 } 928 #endif 929 } 930 #endif 931 } 932 933 /* --------------------------------------- */ 934 /* ------ Intra RD OPT stage ------------- */ 935 /* --------------------------------------- */ 936 937 /* -- run a loop over all Intra rd opt cands ------ */ 938 if((0 != ps_cu_analyse->u1_num_intra_rdopt_cands) && (1 == enable_intra_eval_flag)) 939 { 940 LWORD64 rd_opt_cost; 941 WORD32 end_flag = 0; 942 WORD32 cu_eval_done = 0; 943 WORD32 subcu_eval_done = 0; 944 WORD32 subpu_eval_done = 0; 945 WORD32 max_trans_size; 946 WORD32 sync_wait_stride; 947 max_trans_size = MIN(MAX_TU_SIZE, (ps_cu_analyse->u1_cu_size)); 948 sync_wait_stride = (ps_cu_analyse->u1_cu_size) + max_trans_size; 949 950 if(!ps_ctxt->u1_use_top_at_ctb_boundary) 951 { 952 /* Wait till top data is ready */ 953 /* Currently checking till top right CU */ 954 curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3); 955 956 if(i4_ctb_y_off == 0) 957 { 958 /* No wait for 1st row */ 959 cu_top_right_offset = -(MAX_CTB_SIZE); 960 { 961 ihevce_tile_params_t *ps_col_tile_params = 962 ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base + 963 ps_ctxt->i4_tile_col_idx); 964 /* No wait for 1st row */ 965 cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE)); 966 } 967 cu_top_right_dep_pos = 0; 968 } 969 else 970 { 971 cu_top_right_offset = sync_wait_stride; 972 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1; 973 } 974 975 if(0 == ps_cu_analyse->b3_cu_pos_y) 976 { 977 ihevce_dmgr_chk_row_row_sync( 978 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right, 979 curr_cu_pos_in_row, 980 cu_top_right_offset, 981 cu_top_right_dep_pos, 982 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */ 983 ps_ctxt->thrd_id); 984 } 985 } 986 ctr = 0; 987 988 /* Zero cbf tool is disabled for intra CUs */ 989 #if ENABLE_ZERO_CBF_IN_INTRA 990 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE; 991 #else 992 ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF; 993 #endif 994 995 /* Intra Mode gating based on MPM cand list and encoder quality preset */ 996 if((ps_ctxt->i1_slice_type != ISLICE) && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3)) 997 { 998 ihevce_mpm_idx_based_filter_RDOPT_cand( 999 ps_ctxt, 1000 ps_cu_analyse, 1001 ps_left_nbr_4x4, 1002 ps_top_nbr_4x4, 1003 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0], 1004 &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[0]); 1005 1006 ihevce_mpm_idx_based_filter_RDOPT_cand( 1007 ps_ctxt, 1008 ps_cu_analyse, 1009 ps_left_nbr_4x4, 1010 ps_top_nbr_4x4, 1011 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0], 1012 &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[0]); 1013 } 1014 1015 /* Call Chroma SATD function for curr_func_mode in HIGH QUALITY mode */ 1016 if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd) 1017 { 1018 /* For cu_size = 64, there won't be any TU_EQ_CU case */ 1019 if(64 != ps_cu_analyse->u1_cu_size) 1020 { 1021 /* RDOPT copy States : Prev Cu best to current init */ 1022 COPY_CABAC_STATES( 1023 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 1024 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0], 1025 IHEVC_CAB_CTXT_END); 1026 1027 /* RDOPT related copies and settings */ 1028 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx; 1029 1030 /* Calc. best SATD mode for TU_EQ_CU case */ 1031 ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)( 1032 ps_ctxt, 1033 &s_chrm_cu_buf_prms, 1034 ps_cu_analyse, 1035 rd_opt_curr_idx, 1036 TU_EQ_CU, 1037 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT 1038 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * 1039 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) / 1040 100.0, 1041 ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY); 1042 1043 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY 1044 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy) 1045 { 1046 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf; 1047 ps_ctxt->i8_cl_ssd_lambda_chroma_qf = 1048 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf; 1049 } 1050 #endif 1051 } 1052 1053 /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and 1054 TU_EQ_CU_DIV2 case */ 1055 1056 if((ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] != 1057 255) && 1058 (8 != ps_cu_analyse->u1_cu_size)) 1059 { 1060 /* RDOPT copy States : Prev Cu best to current init */ 1061 COPY_CABAC_STATES( 1062 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 1063 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0], 1064 IHEVC_CAB_CTXT_END); 1065 1066 /* RDOPT related copies and settings */ 1067 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx; 1068 1069 /* Calc. best SATD mode for TU_EQ_CU_DIV2 case */ 1070 ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)( 1071 ps_ctxt, 1072 &s_chrm_cu_buf_prms, 1073 ps_cu_analyse, 1074 rd_opt_curr_idx, 1075 TU_EQ_CU_DIV2, 1076 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT 1077 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * 1078 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) / 1079 100.0, 1080 ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY); 1081 1082 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY 1083 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy) 1084 { 1085 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf; 1086 ps_ctxt->i8_cl_ssd_lambda_chroma_qf = 1087 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf; 1088 } 1089 #endif 1090 } 1091 } 1092 1093 while(0 == end_flag) 1094 { 1095 UWORD8 *pu1_mode = NULL; 1096 WORD32 curr_func_mode = 0; 1097 void *pv_pred; 1098 1099 ASSERT(ctr < 36); 1100 1101 /* TU equal to CU size evaluation of different modes */ 1102 if(0 == cu_eval_done) 1103 { 1104 /* check if the all the modes have been evaluated */ 1105 if(255 == ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr]) 1106 { 1107 cu_eval_done = 1; 1108 ctr = 0; 1109 } 1110 else if( 1111 (1 == ctr) && 1112 ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) || 1113 (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) && 1114 (ps_ctxt->i1_slice_type != ISLICE)) 1115 { 1116 ctr = 0; 1117 cu_eval_done = 1; 1118 subcu_eval_done = 1; 1119 subpu_eval_done = 1; 1120 } 1121 else 1122 { 1123 if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr]) 1124 { 1125 ctr++; 1126 continue; 1127 } 1128 1129 pu1_mode = 1130 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr]; 1131 ctr++; 1132 curr_func_mode = TU_EQ_CU; 1133 } 1134 } 1135 /* Sub CU (NXN) mode evaluation of different pred modes */ 1136 if((0 == subpu_eval_done) && (1 == cu_eval_done)) 1137 { 1138 /*For NxN modes evaluation all candidates for all PU parts are evaluated */ 1139 /*inside the ihevce_intra_rdopt_cu_ntu function, so the subpu_eval_done is set to 1 */ 1140 { 1141 pu1_mode = &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][ctr]; 1142 1143 curr_func_mode = TU_EQ_SUBCU; 1144 /* check if the any modes have to be evaluated */ 1145 if(255 == *pu1_mode) 1146 { 1147 subpu_eval_done = 1; 1148 ctr = 0; 1149 } 1150 else if(ctr != 0) /* If the modes have to be evaluated, then terminate, as all modes are already evaluated */ 1151 { 1152 subpu_eval_done = 1; 1153 ctr = 0; 1154 } 1155 else 1156 { 1157 ctr++; 1158 } 1159 } 1160 } 1161 1162 /* TU size equal to CU div2 mode evaluation of different pred modes */ 1163 if((0 == subcu_eval_done) && (1 == subpu_eval_done) && (1 == cu_eval_done)) 1164 { 1165 /* check if the all the modes have been evaluated */ 1166 if(255 == 1167 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr]) 1168 { 1169 subcu_eval_done = 1; 1170 } 1171 else if( 1172 (1 == ctr) && 1173 ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) || 1174 (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) && 1175 (ps_ctxt->i1_slice_type != ISLICE) && (ps_cu_analyse->u1_cu_size == 64)) 1176 { 1177 subcu_eval_done = 1; 1178 } 1179 else 1180 { 1181 if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr]) 1182 { 1183 ctr++; 1184 continue; 1185 } 1186 1187 pu1_mode = &ps_cu_analyse->s_cu_intra_cand 1188 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr]; 1189 1190 ctr++; 1191 curr_func_mode = TU_EQ_CU_DIV2; 1192 } 1193 } 1194 1195 /* check if all CU option have been evalueted */ 1196 if((1 == cu_eval_done) && (1 == subcu_eval_done) && (1 == subpu_eval_done)) 1197 { 1198 break; 1199 } 1200 1201 /* RDOPT related copies and settings */ 1202 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx; 1203 1204 /* Assign ME/Intra pred buf. to the current intra cand. since we 1205 are storing pred data for final_reon function */ 1206 { 1207 pv_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_curr_idx]; 1208 } 1209 1210 /* RDOPT copy States : Prev Cu best to current init */ 1211 COPY_CABAC_STATES( 1212 &ps_ctxt->au1_rdopt_init_ctxt_models[0], 1213 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0], 1214 IHEVC_CAB_CTXT_END); 1215 1216 /* call the function which performs the normative Intra encode */ 1217 rd_opt_cost = ((pf_intra_rdopt_cu_ntu)ps_ctxt->pv_intra_rdopt_cu_ntu)( 1218 ps_ctxt, 1219 ps_cu_prms, 1220 pv_pred, 1221 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_curr_idx], 1222 &s_chrm_cu_buf_prms, 1223 pu1_mode, 1224 ps_cu_analyse, 1225 pv_curr_src, 1226 pv_cu_left, 1227 pv_cu_top, 1228 pv_cu_top_left, 1229 ps_left_nbr_4x4, 1230 ps_top_nbr_4x4, 1231 nbr_4x4_left_strd, 1232 cu_left_stride, 1233 rd_opt_curr_idx, 1234 curr_func_mode, 1235 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT 1236 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * 1237 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) / 1238 100.0); 1239 1240 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY 1241 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy) 1242 { 1243 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf; 1244 ps_ctxt->i8_cl_ssd_lambda_chroma_qf = 1245 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf; 1246 } 1247 #endif 1248 1249 /* based on the rd opt cost choose the best and current index */ 1250 if(rd_opt_cost < rd_opt_least_cost) 1251 { 1252 /* swap the best and current indx */ 1253 rd_opt_best_idx = !rd_opt_best_idx; 1254 rd_opt_curr_idx = !rd_opt_curr_idx; 1255 i4_best_cu_qp = ps_ctxt->i4_cu_qp; 1256 1257 rd_opt_least_cost = rd_opt_cost; 1258 ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost; 1259 } 1260 1261 if((TU_EQ_SUBCU == curr_func_mode) && 1262 (ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_intra_flag) && 1263 (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P2) && !FORCE_INTRA_TU_DEPTH_TO_0) 1264 { 1265 UWORD8 au1_tu_eq_cu_div2_modes[4]; 1266 UWORD8 au1_freq_of_mode[4]; 1267 1268 if(ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_part_mode == SIZE_2Nx2N) 1269 { 1270 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 1271 255; //ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode[0]; 1272 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 1273 255; 1274 } 1275 else 1276 { 1277 WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D( 1278 ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode, 1279 au1_tu_eq_cu_div2_modes, 1280 au1_freq_of_mode, 1281 4); 1282 1283 if(2 == i4_num_clusters) 1284 { 1285 if(au1_freq_of_mode[0] == 3) 1286 { 1287 ps_cu_analyse->s_cu_intra_cand 1288 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 1289 au1_tu_eq_cu_div2_modes[0]; 1290 ps_cu_analyse->s_cu_intra_cand 1291 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255; 1292 } 1293 else if(au1_freq_of_mode[1] == 3) 1294 { 1295 ps_cu_analyse->s_cu_intra_cand 1296 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 1297 au1_tu_eq_cu_div2_modes[1]; 1298 ps_cu_analyse->s_cu_intra_cand 1299 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255; 1300 } 1301 else 1302 { 1303 ps_cu_analyse->s_cu_intra_cand 1304 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 1305 au1_tu_eq_cu_div2_modes[0]; 1306 ps_cu_analyse->s_cu_intra_cand 1307 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 1308 au1_tu_eq_cu_div2_modes[1]; 1309 ps_cu_analyse->s_cu_intra_cand 1310 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[2] = 255; 1311 } 1312 } 1313 } 1314 } 1315 1316 /* set the neighbour map to 0 */ 1317 ihevce_set_nbr_map( 1318 ps_ctxt->pu1_ctb_nbr_map, 1319 ps_ctxt->i4_nbr_map_strd, 1320 (ps_cu_analyse->b3_cu_pos_x << 1), 1321 (ps_cu_analyse->b3_cu_pos_y << 1), 1322 (ps_cu_analyse->u1_cu_size >> 2), 1323 0); 1324 } 1325 1326 } /* end of Intra RD OPT cand evaluation */ 1327 1328 ASSERT(i4_best_cu_qp > (ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1)); 1329 ps_ctxt->i4_cu_qp = i4_best_cu_qp; 1330 ps_cu_analyse->i1_cu_qp = i4_best_cu_qp; 1331 1332 /* --------------------------------------- */ 1333 /* --------Final mode Recon ---------- */ 1334 /* --------------------------------------- */ 1335 { 1336 enc_loop_cu_final_prms_t *ps_enc_loop_bestprms; 1337 void *pv_final_pred = NULL; 1338 WORD32 final_pred_strd = 0; 1339 void *pv_final_pred_chrm = NULL; 1340 WORD32 final_pred_strd_chrm = 0; 1341 WORD32 packed_pred_mode; 1342 1343 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS 1344 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2) 1345 { 1346 pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0]; 1347 } 1348 #else 1349 pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0]; 1350 #endif 1351 1352 ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx]; 1353 packed_pred_mode = 1354 ps_enc_loop_bestprms->u1_intra_flag + (ps_enc_loop_bestprms->u1_skip_flag) * 2; 1355 1356 if(!ps_ctxt->u1_is_input_data_hbd) 1357 { 1358 if(ps_enc_loop_bestprms->u1_intra_flag) 1359 { 1360 pv_final_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_best_idx]; 1361 final_pred_strd = 1362 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_best_idx]; 1363 } 1364 else 1365 { 1366 pv_final_pred = ps_best_inter_cand->pu1_pred_data; 1367 final_pred_strd = ps_best_inter_cand->i4_pred_data_stride; 1368 } 1369 1370 pv_final_pred_chrm = 1371 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] + 1372 rd_opt_best_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) + 1373 (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1))); 1374 final_pred_strd_chrm = 1375 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX]; 1376 } 1377 1378 ihevce_set_eval_flags(ps_ctxt, ps_enc_loop_bestprms); 1379 1380 { 1381 final_mode_process_prms_t s_prms; 1382 1383 void *pv_cu_luma_recon; 1384 void *pv_cu_chroma_recon; 1385 WORD32 luma_stride, chroma_stride; 1386 1387 if(!ps_ctxt->u1_is_input_data_hbd) 1388 { 1389 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS 1390 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2) 1391 { 1392 pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon; 1393 pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon; 1394 luma_stride = ps_cu_analyse->u1_cu_size; 1395 chroma_stride = ps_cu_analyse->u1_cu_size; 1396 } 1397 else 1398 { 1399 /* based on CU position derive the luma pointers */ 1400 pv_cu_luma_recon = pu1_final_recon; 1401 1402 /* based on CU position derive the chroma pointers */ 1403 pv_cu_chroma_recon = s_chrm_cu_buf_prms.pu1_final_recon; 1404 1405 luma_stride = ps_cu_prms->i4_luma_recon_stride; 1406 1407 chroma_stride = ps_cu_prms->i4_chrm_recon_stride; 1408 } 1409 #else 1410 pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon; 1411 pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon; 1412 luma_stride = ps_cu_analyse->u1_cu_size; 1413 chroma_stride = ps_cu_analyse->u1_cu_size; 1414 #endif 1415 1416 s_prms.ps_cu_nbr_prms = &s_cu_nbr_prms; 1417 s_prms.ps_best_inter_cand = ps_best_inter_cand; 1418 s_prms.ps_chrm_cu_buf_prms = &s_chrm_cu_buf_prms; 1419 s_prms.packed_pred_mode = packed_pred_mode; 1420 s_prms.rd_opt_best_idx = rd_opt_best_idx; 1421 s_prms.pv_src = pu1_curr_src; 1422 s_prms.src_strd = ps_cu_prms->i4_luma_src_stride; 1423 s_prms.pv_pred = pv_final_pred; 1424 s_prms.pred_strd = final_pred_strd; 1425 s_prms.pv_pred_chrm = pv_final_pred_chrm; 1426 s_prms.pred_chrm_strd = final_pred_strd_chrm; 1427 s_prms.pu1_final_ecd_data = pu1_ecd_data; 1428 s_prms.pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0]; 1429 s_prms.csbf_strd = ps_ctxt->i4_cu_csbf_strd; 1430 s_prms.pv_luma_recon = pv_cu_luma_recon; 1431 s_prms.recon_luma_strd = luma_stride; 1432 s_prms.pv_chrm_recon = pv_cu_chroma_recon; 1433 s_prms.recon_chrma_strd = chroma_stride; 1434 s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x; 1435 s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y; 1436 s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size; 1437 s_prms.i1_cu_qp = ps_cu_analyse->i1_cu_qp; 1438 s_prms.u1_will_cabac_state_change = 1; 1439 s_prms.u1_recompute_sbh_and_rdoq = 0; 1440 s_prms.u1_is_first_pass = 1; 1441 } 1442 1443 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS 1444 s_prms.u1_is_cu_noisy = !ps_enc_loop_bestprms->u1_intra_flag 1445 ? ps_cu_prms->u1_is_cu_noisy 1446 : ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY; 1447 #endif 1448 1449 ((pf_final_rdopt_mode_prcs)ps_ctxt->pv_final_rdopt_mode_prcs)(ps_ctxt, &s_prms); 1450 1451 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY 1452 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy) 1453 { 1454 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf; 1455 ps_ctxt->i8_cl_ssd_lambda_chroma_qf = 1456 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf; 1457 } 1458 #endif 1459 } 1460 } 1461 1462 /* --------------------------------------- */ 1463 /* --------Populate CU out prms ---------- */ 1464 /* --------------------------------------- */ 1465 { 1466 enc_loop_cu_final_prms_t *ps_enc_loop_bestprms; 1467 UWORD8 *pu1_pu_map; 1468 ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx]; 1469 1470 /* Corner case : If Part is 2Nx2N and Merge has all TU with zero cbf */ 1471 /* then it has to be coded as skip CU */ 1472 if((SIZE_2Nx2N == ps_enc_loop_bestprms->u1_part_mode) && 1473 (1 == ps_enc_loop_bestprms->as_pu_enc_loop[0].b1_merge_flag) && 1474 (0 == ps_enc_loop_bestprms->u1_skip_flag) && (0 == ps_enc_loop_bestprms->u1_is_cu_coded)) 1475 { 1476 ps_enc_loop_bestprms->u1_skip_flag = 1; 1477 } 1478 1479 /* update number PUs in CU */ 1480 ps_cu_prms->i4_num_pus_in_cu = ps_enc_loop_bestprms->u2_num_pus_in_cu; 1481 1482 /* ---- populate the colocated pu map index --- */ 1483 for(ctr = 0; ctr < ps_enc_loop_bestprms->u2_num_pus_in_cu; ctr++) 1484 { 1485 WORD32 i; 1486 WORD32 vert_ht; 1487 WORD32 horz_wd; 1488 1489 if(ps_enc_loop_bestprms->u1_intra_flag) 1490 { 1491 ps_enc_loop_bestprms->as_col_pu_enc_loop[ctr].b1_intra_flag = 1; 1492 vert_ht = ps_cu_analyse->u1_cu_size >> 2; 1493 horz_wd = ps_cu_analyse->u1_cu_size >> 2; 1494 } 1495 else 1496 { 1497 vert_ht = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_ht + 1) << 2) >> 2); 1498 horz_wd = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_wd + 1) << 2) >> 2); 1499 } 1500 1501 pu1_pu_map = pu1_col_pu_map + ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_x; 1502 pu1_pu_map += (ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_y * num_4x4_in_ctb); 1503 1504 for(i = 0; i < vert_ht; i++) 1505 { 1506 memset(pu1_pu_map, col_start_pu_idx, horz_wd); 1507 pu1_pu_map += num_4x4_in_ctb; 1508 } 1509 /* increment the index */ 1510 col_start_pu_idx++; 1511 } 1512 /* ---- copy the colocated PUs to frm pu ----- */ 1513 memcpy( 1514 ps_col_pu, 1515 &ps_enc_loop_bestprms->as_col_pu_enc_loop[0], 1516 ps_enc_loop_bestprms->u2_num_pus_in_cu * sizeof(pu_col_mv_t)); 1517 1518 /*---populate qp for 4x4 nbr array based on skip and cbf zero flag---*/ 1519 { 1520 entropy_context_t *ps_entropy_ctxt; 1521 1522 WORD32 diff_cu_qp_delta_depth, log2_ctb_size; 1523 1524 WORD32 log2_min_cu_qp_delta_size; 1525 UWORD32 block_addr_align; 1526 ps_entropy_ctxt = ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt; 1527 1528 log2_ctb_size = ps_entropy_ctxt->i1_log2_ctb_size; 1529 diff_cu_qp_delta_depth = ps_entropy_ctxt->ps_pps->i1_diff_cu_qp_delta_depth; 1530 1531 log2_min_cu_qp_delta_size = log2_ctb_size - diff_cu_qp_delta_depth; 1532 block_addr_align = 15 << (log2_min_cu_qp_delta_size - 3); 1533 1534 ps_entropy_ctxt->i4_qg_pos_x = ps_cu_analyse->b3_cu_pos_x & block_addr_align; 1535 ps_entropy_ctxt->i4_qg_pos_y = ps_cu_analyse->b3_cu_pos_y & block_addr_align; 1536 /*Update the Qp value used. It will not have a valid value iff 1537 current CU is (skipped/no_cbf). In that case the Qp needed for 1538 deblocking is calculated from top/left/previous coded CU*/ 1539 1540 ps_ctxt->ps_enc_out_ctxt->i1_cu_qp = ps_cu_analyse->i1_cu_qp; 1541 1542 if(ps_entropy_ctxt->i4_qg_pos_x == ps_cu_analyse->b3_cu_pos_x && 1543 ps_entropy_ctxt->i4_qg_pos_y == ps_cu_analyse->b3_cu_pos_y) 1544 { 1545 ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 1; 1546 } 1547 else 1548 { 1549 ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 0; 1550 } 1551 } 1552 1553 /* -- at the end of CU set the neighbour map to 1 -- */ 1554 ihevce_set_nbr_map( 1555 ps_ctxt->pu1_ctb_nbr_map, 1556 ps_ctxt->i4_nbr_map_strd, 1557 (ps_cu_analyse->b3_cu_pos_x << 1), 1558 (ps_cu_analyse->b3_cu_pos_y << 1), 1559 (ps_cu_analyse->u1_cu_size >> 2), 1560 1); 1561 1562 /* -- at the end of CU update best cabac rdopt states -- */ 1563 /* -- and also set the top row skip flags ------------- */ 1564 ihevce_entropy_update_best_cu_states( 1565 &ps_ctxt->s_rdopt_entropy_ctxt, 1566 ps_cu_analyse->b3_cu_pos_x, 1567 ps_cu_analyse->b3_cu_pos_y, 1568 ps_cu_analyse->u1_cu_size, 1569 0, 1570 rd_opt_best_idx); 1571 } 1572 1573 /* Store Output struct */ 1574 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS 1575 { 1576 { 1577 memcpy( 1578 &ps_ctxt->ps_enc_out_ctxt->s_cu_prms, 1579 &ps_ctxt->as_cu_prms[rd_opt_best_idx], 1580 sizeof(enc_loop_cu_final_prms_t)); 1581 } 1582 1583 memcpy( 1584 &ps_ctxt->as_cu_recur_nbr[0], 1585 &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0], 1586 sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) * 1587 (ps_cu_analyse->u1_cu_size >> 2)); 1588 1589 ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms; 1590 1591 ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0]; 1592 } 1593 #else 1594 if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2) 1595 { 1596 ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx]; 1597 1598 ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0]; 1599 1600 if(ps_ctxt->u1_disable_intra_eval && ps_ctxt->i4_deblk_pad_hpel_cur_pic) 1601 { 1602 /* Wait till top data is ready */ 1603 /* Currently checking till top right CU */ 1604 curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3); 1605 1606 if(i4_ctb_y_off == 0) 1607 { 1608 /* No wait for 1st row */ 1609 cu_top_right_offset = -(MAX_CTB_SIZE); 1610 { 1611 ihevce_tile_params_t *ps_col_tile_params = 1612 ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base + 1613 ps_ctxt->i4_tile_col_idx); 1614 1615 /* No wait for 1st row */ 1616 cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE)); 1617 } 1618 cu_top_right_dep_pos = 0; 1619 } 1620 else 1621 { 1622 cu_top_right_offset = (ps_cu_analyse->u1_cu_size); 1623 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1; 1624 } 1625 1626 if(0 == ps_cu_analyse->b3_cu_pos_y) 1627 { 1628 ihevce_dmgr_chk_row_row_sync( 1629 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right, 1630 curr_cu_pos_in_row, 1631 cu_top_right_offset, 1632 cu_top_right_dep_pos, 1633 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */ 1634 ps_ctxt->thrd_id); 1635 } 1636 } 1637 } 1638 else 1639 { 1640 { 1641 memcpy( 1642 &ps_ctxt->ps_enc_out_ctxt->s_cu_prms, 1643 &ps_ctxt->as_cu_prms[rd_opt_best_idx], 1644 sizeof(enc_loop_cu_final_prms_t)); 1645 } 1646 1647 memcpy( 1648 &ps_ctxt->as_cu_recur_nbr[0], 1649 &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0], 1650 sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) * 1651 (ps_cu_analyse->u1_cu_size >> 2)); 1652 1653 ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms; 1654 1655 ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0]; 1656 } 1657 #endif 1658 1659 ps_ctxt->s_pred_buf_data.u4_is_buf_in_use &= 1660 ~((1 << (ps_ctxt->i4_max_num_inter_rdopt_cands + 4)) - 1); 1661 1662 return rd_opt_least_cost; 1663 } 1664 1665 /*! 1666 ****************************************************************************** 1667 * \if Function name : ihevce_enc_loop_process_row \endif 1668 * 1669 * \brief 1670 * Row level enc_loop pass function 1671 * 1672 * \param[in] pv_ctxt : pointer to enc_loop module 1673 * \param[in] ps_curr_src_bufs : pointer to input yuv buffer (row buffer) 1674 * \param[out] ps_curr_recon_bufs : pointer recon picture structure pointer (row buffer) 1675 * \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (row buffer) 1676 * \param[out] ps_ctb_out : pointer CTB output structure (row buffer) 1677 * \param[out] ps_cu_out : pointer CU output structure (row buffer) 1678 * \param[out] ps_tu_out : pointer TU output structure (row buffer) 1679 * \param[out] pi2_frm_coeffs : pointer coeff output (row buffer) 1680 * \param[in] i4_poc : current poc. Needed to send recon in dist-client mode 1681 * 1682 * \return 1683 * None 1684 * 1685 * Note : Currently the frame level calcualtions done assumes that 1686 * framewidth of the input /recon are excat multiple of ctbsize 1687 * 1688 * \author 1689 * Ittiam 1690 * 1691 ***************************************************************************** 1692 */ 1693 void ihevce_enc_loop_process_row( 1694 ihevce_enc_loop_ctxt_t *ps_ctxt, 1695 iv_enc_yuv_buf_t *ps_curr_src_bufs, 1696 iv_enc_yuv_buf_t *ps_curr_recon_bufs, 1697 iv_enc_yuv_buf_src_t *ps_curr_recon_bufs_src, 1698 UWORD8 **ppu1_y_subpel_planes, 1699 ctb_analyse_t *ps_ctb_in, 1700 ctb_enc_loop_out_t *ps_ctb_out, 1701 ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse, 1702 cur_ctb_cu_tree_t *ps_row_cu_tree, 1703 cu_enc_loop_out_t *ps_row_cu, 1704 tu_enc_loop_out_t *ps_row_tu, 1705 pu_t *ps_row_pu, 1706 pu_col_mv_t *ps_row_col_pu, 1707 UWORD16 *pu2_num_pu_map, 1708 UWORD8 *pu1_row_pu_map, 1709 UWORD8 *pu1_row_ecd_data, 1710 UWORD32 *pu4_pu_offsets, 1711 frm_ctb_ctxt_t *ps_frm_ctb_prms, 1712 WORD32 vert_ctr, 1713 recon_pic_buf_t *ps_frm_recon, 1714 void *pv_dep_mngr_encloop_dep_me, 1715 pad_interp_recon_frm_t *ps_pad_interp_recon, 1716 WORD32 i4_pass, 1717 multi_thrd_ctxt_t *ps_multi_thrd_ctxt, 1718 ihevce_tile_params_t *ps_tile_params) 1719 { 1720 enc_loop_cu_prms_t s_cu_prms; 1721 ctb_enc_loop_out_t *ps_ctb_out_dblk; 1722 1723 WORD32 ctb_ctr, ctb_start, ctb_end; 1724 WORD32 col_pu_map_idx; 1725 WORD32 num_ctbs_horz_pic; 1726 WORD32 ctb_size; 1727 WORD32 last_ctb_row_flag; 1728 WORD32 last_ctb_col_flag; 1729 WORD32 last_hz_ctb_wd; 1730 WORD32 last_vt_ctb_ht; 1731 void *pv_dep_mngr_enc_loop_dblk; 1732 void *pv_dep_mngr_enc_loop_cu_top_right; 1733 WORD32 dblk_offset, dblk_check_dep_pos; 1734 WORD32 aux_offset, aux_check_dep_pos; 1735 void *pv_dep_mngr_me_dep_encloop; 1736 ctb_enc_loop_out_t *ps_ctb_out_sao; 1737 /*Structure to store deblocking parameters at CTB-row level*/ 1738 deblk_ctbrow_prms_t s_deblk_ctb_row_params; 1739 UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2); 1740 1741 pv_dep_mngr_me_dep_encloop = (void *)ps_frm_recon->pv_dep_mngr_recon; 1742 num_ctbs_horz_pic = ps_frm_ctb_prms->i4_num_ctbs_horz; 1743 ctb_size = ps_frm_ctb_prms->i4_ctb_size; 1744 1745 /* Store the num_ctb_horz in sao context*/ 1746 ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_horz = ps_frm_ctb_prms->i4_num_ctbs_horz; 1747 ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_vert = ps_frm_ctb_prms->i4_num_ctbs_vert; 1748 1749 /* Get the EncLoop Deblock Dep Mngr */ 1750 pv_dep_mngr_enc_loop_dblk = ps_ctxt->pv_dep_mngr_enc_loop_dblk; 1751 /* Get the EncLoop Top-Right CU Dep Mngr */ 1752 pv_dep_mngr_enc_loop_cu_top_right = ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right; 1753 /* Set Variables for Dep. Checking and Setting */ 1754 aux_check_dep_pos = vert_ctr; 1755 aux_offset = 2; /* Should be there for 0th row also */ 1756 if(vert_ctr > 0) 1757 { 1758 dblk_check_dep_pos = vert_ctr - 1; 1759 dblk_offset = 2; 1760 } 1761 else 1762 { 1763 /* First row should run without waiting */ 1764 dblk_check_dep_pos = 0; 1765 dblk_offset = -(ps_tile_params->i4_first_sample_x + 1); 1766 } 1767 1768 /* check if the current row processed in last CTb row */ 1769 last_ctb_row_flag = (vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1)); 1770 1771 /* Valid Width (pixels) in the last CTB in every row (padding cases) */ 1772 last_hz_ctb_wd = ps_frm_ctb_prms->i4_cu_aligned_pic_wd - ((num_ctbs_horz_pic - 1) * ctb_size); 1773 1774 /* Valid Height (pixels) in the last CTB row (padding cases) */ 1775 last_vt_ctb_ht = ps_frm_ctb_prms->i4_cu_aligned_pic_ht - 1776 ((ps_frm_ctb_prms->i4_num_ctbs_vert - 1) * ctb_size); 1777 /* reset the states copied flag */ 1778 ps_ctxt->u1_cabac_states_next_row_copied_flag = 0; 1779 ps_ctxt->u1_cabac_states_first_cu_copied_flag = 0; 1780 1781 /* populate the cu prms which are common for entire ctb row */ 1782 s_cu_prms.i4_luma_src_stride = ps_curr_src_bufs->i4_y_strd; 1783 s_cu_prms.i4_chrm_src_stride = ps_curr_src_bufs->i4_uv_strd; 1784 s_cu_prms.i4_luma_recon_stride = ps_curr_recon_bufs->i4_y_strd; 1785 s_cu_prms.i4_chrm_recon_stride = ps_curr_recon_bufs->i4_uv_strd; 1786 s_cu_prms.i4_ctb_size = ctb_size; 1787 1788 ps_ctxt->i4_is_first_cu_qg_coded = 0; 1789 1790 /* Initialize the number of PUs for the first CTB to 0 */ 1791 *pu2_num_pu_map = 0; 1792 1793 /*Getting the address of BS and Qp arrays and other info*/ 1794 memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t)); 1795 { 1796 WORD32 num_ctbs_horz_tile; 1797 /* Update the pointers which are accessed not by using ctb_ctr 1798 to the tile start here! */ 1799 ps_ctb_in += ps_tile_params->i4_first_ctb_x; 1800 ps_ctb_out += ps_tile_params->i4_first_ctb_x; 1801 1802 ps_row_cu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_cus_in_ctb); 1803 ps_row_tu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_tus_in_ctb); 1804 ps_row_pu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb); 1805 pu1_row_pu_map += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb); 1806 pu1_row_ecd_data += 1807 (ps_tile_params->i4_first_ctb_x * 1808 ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_num_tus_in_ctb << 1) 1809 : ((ps_frm_ctb_prms->i4_num_tus_in_ctb * 3) >> 1)) * 1810 MAX_SCAN_COEFFS_BYTES_4x4); 1811 1812 /* Update the pointers to the tile start */ 1813 s_deblk_ctb_row_params.pu4_ctb_row_bs_vert += 1814 (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one vertical edge per 8x8 block 1815 s_deblk_ctb_row_params.pu4_ctb_row_bs_horz += 1816 (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one horizontal edge per 8x8 block 1817 s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2)); 1818 1819 num_ctbs_horz_tile = ps_tile_params->i4_curr_tile_wd_in_ctb_unit; 1820 1821 ctb_start = ps_tile_params->i4_first_ctb_x; 1822 ctb_end = ps_tile_params->i4_first_ctb_x + num_ctbs_horz_tile; 1823 } 1824 ps_ctb_out_dblk = ps_ctb_out; 1825 1826 ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_ctxt->i4_frame_qp; 1827 1828 /* --------- Loop over all the CTBs in a row --------------- */ 1829 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++) 1830 { 1831 cu_final_update_prms s_cu_update_prms; 1832 1833 cur_ctb_cu_tree_t *ps_cu_tree_analyse; 1834 me_ctb_data_t *ps_cu_me_data; 1835 ipe_l0_ctb_analyse_for_me_t *ps_ctb_ipe_analyse; 1836 cu_enc_loop_out_t *ps_cu_final; 1837 pu_col_mv_t *ps_ctb_col_pu; 1838 1839 WORD32 cur_ctb_ht, cur_ctb_wd; 1840 WORD32 last_cu_pos_in_ctb; 1841 WORD32 last_cu_size; 1842 WORD32 num_pus_in_ctb; 1843 UWORD8 u1_is_ctb_noisy; 1844 ps_ctb_col_pu = ps_row_col_pu + ctb_ctr * ps_frm_ctb_prms->i4_num_pus_in_ctb; 1845 1846 if(ctb_ctr) 1847 { 1848 ps_ctxt->i4_prev_QP = ps_ctxt->i4_last_cu_qp_from_prev_ctb; 1849 } 1850 /*If Sup pic rc is enabled*/ 1851 if(ps_ctxt->i4_sub_pic_level_rc) 1852 { 1853 ihevce_sub_pic_rc_scale_query((void *)ps_multi_thrd_ctxt, (void *)ps_ctxt); 1854 } 1855 /* check if the current row processed in last CTb row */ 1856 last_ctb_col_flag = (ctb_ctr == (num_ctbs_horz_pic - 1)); 1857 if(1 == last_ctb_col_flag) 1858 { 1859 cur_ctb_wd = last_hz_ctb_wd; 1860 } 1861 else 1862 { 1863 cur_ctb_wd = ctb_size; 1864 } 1865 1866 /* If it's the last CTB, get the actual ht of CTB */ 1867 if(1 == last_ctb_row_flag) 1868 { 1869 cur_ctb_ht = last_vt_ctb_ht; 1870 } 1871 else 1872 { 1873 cur_ctb_ht = ctb_size; 1874 } 1875 1876 ps_ctxt->u4_cur_ctb_ht = cur_ctb_ht; 1877 ps_ctxt->u4_cur_ctb_wd = cur_ctb_wd; 1878 1879 /* Wait till reference frame recon is available */ 1880 1881 /* ------------ Wait till current data is ready from ME -------------- */ 1882 1883 /*only for ref instance and Non I pics */ 1884 if((ps_ctxt->i4_bitrate_instance_num == 0) && 1885 ((ISLICE != ps_ctxt->i1_slice_type) || L0ME_IN_OPENLOOP_MODE)) 1886 { 1887 if(ctb_ctr < (num_ctbs_horz_pic)) 1888 { 1889 ihevce_dmgr_chk_row_row_sync( 1890 pv_dep_mngr_encloop_dep_me, 1891 ctb_ctr, 1892 1, 1893 vert_ctr, 1894 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */ 1895 ps_ctxt->thrd_id); 1896 } 1897 } 1898 1899 /* store the cu pointer for current ctb out */ 1900 ps_ctb_out->ps_enc_cu = ps_row_cu; 1901 ps_cu_final = ps_row_cu; 1902 1903 /* Get the base point of CU recursion tree */ 1904 if(ISLICE != ps_ctxt->i1_slice_type) 1905 { 1906 ps_cu_tree_analyse = ps_ctb_in->ps_cu_tree; 1907 ASSERT(ps_ctb_in->ps_cu_tree == (ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE))); 1908 } 1909 else 1910 { 1911 /* Initialize ptr to current CTB */ 1912 ps_cu_tree_analyse = ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE); 1913 } 1914 1915 /* Get the ME data pointer for 16x16 block data in ctb */ 1916 ps_cu_me_data = ps_ctb_in->ps_me_ctb_data; 1917 u1_is_ctb_noisy = ps_ctb_in->s_ctb_noise_params.i4_noise_present; 1918 s_cu_prms.u1_is_cu_noisy = u1_is_ctb_noisy; 1919 s_cu_prms.pu1_is_8x8Blk_noisy = ps_ctb_in->s_ctb_noise_params.au1_is_8x8Blk_noisy; 1920 1921 /* store the ctb level prms in cu prms */ 1922 s_cu_prms.i4_ctb_pos = ctb_ctr; 1923 1924 s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size; 1925 s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size; 1926 1927 { 1928 s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size; 1929 s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size; 1930 } 1931 1932 s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size; 1933 1934 s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size; 1935 1936 s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size; 1937 1938 /* Initialize ptr to current CTB */ 1939 ps_ctb_ipe_analyse = ps_row_ipe_analyse + ctb_ctr; // * ctb_size; 1940 1941 /* reset the map idx for current ctb */ 1942 col_pu_map_idx = 0; 1943 num_pus_in_ctb = 0; 1944 1945 /* reset the map buffer to 0*/ 1946 1947 memset( 1948 &ps_ctxt->au1_nbr_ctb_map[0][0], 1949 0, 1950 (MAX_PU_IN_CTB_ROW + 1 + 8) * (MAX_PU_IN_CTB_ROW + 1 + 8)); 1951 1952 /* set the CTB neighbour availability flags */ 1953 ihevce_set_ctb_nbr( 1954 &ps_ctb_out->s_ctb_nbr_avail_flags, 1955 ps_ctxt->pu1_ctb_nbr_map, 1956 ps_ctxt->i4_nbr_map_strd, 1957 ctb_ctr, 1958 vert_ctr, 1959 ps_frm_ctb_prms); 1960 1961 /* -------- update the cur CTB offsets for inter prediction-------- */ 1962 ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = ctb_ctr * ctb_size; 1963 ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = vert_ctr * ctb_size; 1964 1965 /* -------- update the cur CTB offsets for MV prediction-------- */ 1966 ps_ctxt->s_mv_pred_ctxt.i4_ctb_x = ctb_ctr; 1967 ps_ctxt->s_mv_pred_ctxt.i4_ctb_y = vert_ctr; 1968 1969 /* -------------- Boundary Strength Initialization ----------- */ 1970 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic) 1971 { 1972 ihevce_bs_init_ctb(&ps_ctxt->s_deblk_bs_prms, ps_frm_ctb_prms, ctb_ctr, vert_ctr); 1973 } 1974 1975 /* -------- update cur CTB offsets for entropy rdopt context------- */ 1976 ihevce_entropy_rdo_ctb_init(&ps_ctxt->s_rdopt_entropy_ctxt, ctb_ctr, vert_ctr); 1977 1978 /* --------- CU Recursion --------------- */ 1979 1980 { 1981 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS 1982 WORD32 i4_max_tree_depth = 4; 1983 #endif 1984 WORD32 i4_tree_depth = 0; 1985 /* Init no. of CU in CTB to 0*/ 1986 ps_ctb_out->u1_num_cus_in_ctb = 0; 1987 1988 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS 1989 if(ps_ctxt->i4_bitrate_instance_num == 0) 1990 { 1991 WORD32 i4_max_tree_depth = 4; 1992 WORD32 i; 1993 for(i = 0; i < i4_max_tree_depth; i++) 1994 { 1995 COPY_CABAC_STATES( 1996 &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0], 1997 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0], 1998 IHEVC_CAB_CTXT_END * sizeof(UWORD8)); 1999 } 2000 } 2001 #else 2002 if(ps_ctxt->i4_bitrate_instance_num == 0) 2003 { 2004 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2) 2005 { 2006 WORD32 i4_max_tree_depth = 4; 2007 WORD32 i; 2008 for(i = 0; i < i4_max_tree_depth; i++) 2009 { 2010 COPY_CABAC_STATES( 2011 &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0], 2012 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0], 2013 IHEVC_CAB_CTXT_END * sizeof(UWORD8)); 2014 } 2015 } 2016 } 2017 2018 #endif 2019 if(ps_ctxt->i4_bitrate_instance_num == 0) 2020 { 2021 /* FOR I- PIC populate the curr_ctb accordingly */ 2022 if(ISLICE == ps_ctxt->i1_slice_type) 2023 { 2024 ps_ctb_ipe_analyse->ps_cu_tree_root = ps_cu_tree_analyse; 2025 ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1; 2026 2027 ihevce_populate_cu_tree( 2028 ps_ctb_ipe_analyse, 2029 ps_cu_tree_analyse, 2030 0, 2031 (IHEVCE_QUALITY_CONFIG_T)ps_ctxt->i4_quality_preset, 2032 POS_NA, 2033 POS_NA, 2034 POS_NA); 2035 } 2036 } 2037 ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1; 2038 ps_ctxt->ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0]; 2039 ps_ctxt->pu1_ecd_data = pu1_row_ecd_data; 2040 if(ps_ctxt->i4_use_ctb_level_lamda) 2041 { 2042 ihevce_compute_cu_level_QP( 2043 ps_ctxt, -1, ps_ctb_ipe_analyse->i4_64x64_act_factor[3][1], 0); 2044 } 2045 2046 s_cu_update_prms.ppu1_row_ecd_data = &pu1_row_ecd_data; 2047 s_cu_update_prms.pi4_last_cu_pos_in_ctb = &last_cu_pos_in_ctb; 2048 s_cu_update_prms.pi4_last_cu_size = &last_cu_size; 2049 s_cu_update_prms.pi4_num_pus_in_ctb = &num_pus_in_ctb; 2050 s_cu_update_prms.pps_cu_final = &ps_cu_final; 2051 s_cu_update_prms.pps_row_pu = &ps_row_pu; 2052 s_cu_update_prms.pps_row_tu = &ps_row_tu; 2053 s_cu_update_prms.pu1_num_cus_in_ctb_out = &ps_ctb_out->u1_num_cus_in_ctb; 2054 2055 // source satd computation 2056 /* compute the source 8x8 SATD for the current CTB */ 2057 /* populate pui4_source_satd in some structure and pass it inside */ 2058 if(ps_ctxt->u1_enable_psyRDOPT) 2059 { 2060 /* declare local variables */ 2061 WORD32 i; 2062 WORD32 ctb_size; 2063 WORD32 num_comp_had_blocks; 2064 UWORD8 *pu1_l0_block; 2065 WORD32 block_ht; 2066 WORD32 block_wd; 2067 WORD32 ht_offset; 2068 WORD32 wd_offset; 2069 2070 WORD32 num_horz_blocks; 2071 WORD32 had_block_size; 2072 WORD32 total_had_block_size; 2073 WORD16 pi2_residue_had_zscan[64]; 2074 UWORD8 ai1_zeros_buffer[64]; 2075 2076 WORD32 index_satd; 2077 WORD32 is_hbd; 2078 /* initialize the variables */ 2079 block_ht = cur_ctb_ht; 2080 block_wd = cur_ctb_wd; 2081 2082 is_hbd = ps_ctxt->u1_is_input_data_hbd; 2083 2084 had_block_size = 8; 2085 total_had_block_size = had_block_size * had_block_size; 2086 2087 for(i = 0; i < total_had_block_size; i++) 2088 { 2089 ai1_zeros_buffer[i] = 0; 2090 } 2091 2092 ctb_size = block_ht * block_wd; //ctb_width * ctb_height; 2093 num_comp_had_blocks = ctb_size / (had_block_size * had_block_size); 2094 2095 num_horz_blocks = block_wd / had_block_size; //ctb_width / had_block_size; 2096 ht_offset = -had_block_size; 2097 wd_offset = -had_block_size; 2098 2099 index_satd = 0; 2100 /*Loop over all 8x8 blocsk in the CTB*/ 2101 for(i = 0; i < num_comp_had_blocks; i++) 2102 { 2103 if(i % num_horz_blocks == 0) 2104 { 2105 wd_offset = -had_block_size; 2106 ht_offset += had_block_size; 2107 } 2108 wd_offset += had_block_size; 2109 2110 if(!is_hbd) 2111 { 2112 /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */ 2113 pu1_l0_block = s_cu_prms.pu1_luma_src + 2114 ps_curr_src_bufs->i4_y_strd * ht_offset + wd_offset; 2115 2116 ps_ctxt->ai4_source_satd_8x8[index_satd] = 2117 2118 ps_ctxt->s_cmn_opt_func.pf_AC_HAD_8x8_8bit( 2119 pu1_l0_block, 2120 ps_curr_src_bufs->i4_y_strd, 2121 ai1_zeros_buffer, 2122 had_block_size, 2123 pi2_residue_had_zscan, 2124 had_block_size); 2125 } 2126 index_satd++; 2127 } 2128 } 2129 2130 if(ps_ctxt->u1_enable_psyRDOPT) 2131 { 2132 /* declare local variables */ 2133 WORD32 i; 2134 WORD32 ctb_size; 2135 WORD32 num_comp_had_blocks; 2136 UWORD8 *pu1_l0_block; 2137 UWORD8 *pu1_l0_block_prev = NULL; 2138 WORD32 block_ht; 2139 WORD32 block_wd; 2140 WORD32 ht_offset; 2141 WORD32 wd_offset; 2142 2143 WORD32 num_horz_blocks; 2144 WORD32 had_block_size; 2145 WORD16 pi2_residue_had[64]; 2146 UWORD8 ai1_zeros_buffer[64]; 2147 WORD32 index_satd = 0; 2148 2149 WORD32 is_hbd; 2150 is_hbd = ps_ctxt->u1_is_input_data_hbd; // 8 bit 2151 2152 /* initialize the variables */ 2153 /* change this based ont he bit depth */ 2154 // ps_ctxt->u1_chroma_array_type 2155 if(ps_ctxt->u1_chroma_array_type == 1) 2156 { 2157 block_ht = cur_ctb_ht / 2; 2158 block_wd = cur_ctb_wd / 2; 2159 } 2160 else 2161 { 2162 block_ht = cur_ctb_ht; 2163 block_wd = cur_ctb_wd / 2; 2164 } 2165 2166 had_block_size = 4; 2167 memset(ai1_zeros_buffer, 0, 64 * sizeof(UWORD8)); 2168 2169 ctb_size = block_ht * block_wd; //ctb_width * ctb_height; 2170 num_comp_had_blocks = 2 * ctb_size / (had_block_size * had_block_size); 2171 2172 num_horz_blocks = 2 * block_wd / had_block_size; //ctb_width / had_block_size; 2173 ht_offset = -had_block_size; 2174 wd_offset = -had_block_size; 2175 2176 if(!is_hbd) 2177 { 2178 /* loop over for every 4x4 blocks in the CU for Cb */ 2179 for(i = 0; i < num_comp_had_blocks; i++) 2180 { 2181 if(i % num_horz_blocks == 0) 2182 { 2183 wd_offset = -had_block_size; 2184 ht_offset += had_block_size; 2185 } 2186 wd_offset += had_block_size; 2187 2188 /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */ 2189 if(i % 2 != 0) 2190 { 2191 if(!is_hbd) 2192 { 2193 pu1_l0_block = pu1_l0_block_prev + 1; 2194 } 2195 } 2196 else 2197 { 2198 if(!is_hbd) 2199 { 2200 pu1_l0_block = s_cu_prms.pu1_chrm_src + 2201 s_cu_prms.i4_chrm_src_stride * ht_offset + wd_offset; 2202 pu1_l0_block_prev = pu1_l0_block; 2203 } 2204 } 2205 2206 if(had_block_size == 4) 2207 { 2208 if(!is_hbd) 2209 { 2210 ps_ctxt->ai4_source_chroma_satd[index_satd] = 2211 ps_ctxt->s_cmn_opt_func.pf_chroma_AC_HAD_4x4_8bit( 2212 pu1_l0_block, 2213 s_cu_prms.i4_chrm_src_stride, 2214 ai1_zeros_buffer, 2215 had_block_size, 2216 pi2_residue_had, 2217 had_block_size); 2218 } 2219 2220 index_satd++; 2221 2222 } // block size of 4x4 2223 2224 } // for all blocks 2225 2226 } // is hbd check 2227 } 2228 2229 ihevce_cu_recurse_decide( 2230 ps_ctxt, 2231 &s_cu_prms, 2232 ps_cu_tree_analyse, 2233 ps_cu_tree_analyse, 2234 ps_ctb_ipe_analyse, 2235 ps_cu_me_data, 2236 &ps_ctb_col_pu, 2237 &s_cu_update_prms, 2238 pu1_row_pu_map, 2239 &col_pu_map_idx, 2240 i4_tree_depth, 2241 ctb_ctr << 6, 2242 vert_ctr << 6, 2243 cur_ctb_ht); 2244 2245 if(ps_ctxt->i1_slice_type != ISLICE) 2246 { 2247 ASSERT( 2248 (cur_ctb_wd * cur_ctb_ht) <= 2249 ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree_analyse)); 2250 } 2251 /*If Sup pic rc is enabled*/ 2252 if(1 == ps_ctxt->i4_sub_pic_level_rc) 2253 { 2254 /*In a row, after the required CTB is reached, send data and query scale from Bit Control thread */ 2255 ihevce_sub_pic_rc_in_data( 2256 (void *)ps_multi_thrd_ctxt, 2257 (void *)ps_ctxt, 2258 (void *)ps_ctb_ipe_analyse, 2259 (void *)ps_frm_ctb_prms); 2260 } 2261 2262 ps_ctxt->ps_enc_out_ctxt->u1_cu_size = 128; 2263 2264 } /* End of CU recursion block */ 2265 2266 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS 2267 { 2268 ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0]; 2269 enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms; 2270 ps_ctxt->pu1_ecd_data = pu1_row_ecd_data; 2271 2272 do 2273 { 2274 ihevce_update_final_cu_results( 2275 ps_ctxt, 2276 ps_enc_out_ctxt, 2277 ps_cu_prms, 2278 NULL, /* &ps_ctb_col_pu */ 2279 NULL, /* &col_pu_map_idx */ 2280 &s_cu_update_prms, 2281 ctb_ctr, 2282 vert_ctr); 2283 2284 ps_enc_out_ctxt++; 2285 2286 ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE); 2287 2288 } while(ps_enc_out_ctxt->u1_cu_size != 128); 2289 } 2290 #else 2291 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2) 2292 { 2293 ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0]; 2294 enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms; 2295 ps_ctxt->pu1_ecd_data = pu1_row_ecd_data; 2296 2297 do 2298 { 2299 ihevce_update_final_cu_results( 2300 ps_ctxt, 2301 ps_enc_out_ctxt, 2302 ps_cu_prms, 2303 NULL, /* &ps_ctb_col_pu */ 2304 NULL, /* &col_pu_map_idx */ 2305 &s_cu_update_prms, 2306 ctb_ctr, 2307 vert_ctr); 2308 2309 ps_enc_out_ctxt++; 2310 2311 ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE); 2312 2313 } while(ps_enc_out_ctxt->u1_cu_size != 128); 2314 } 2315 #endif 2316 2317 /* --- ctb level copy of data to left buffers--*/ 2318 ((pf_enc_loop_ctb_left_copy)ps_ctxt->pv_enc_loop_ctb_left_copy)(ps_ctxt, &s_cu_prms); 2319 2320 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic) 2321 { 2322 /* For the Unaligned CTB, make the invalid edge boundary strength 0 */ 2323 ihevce_bs_clear_invalid( 2324 &ps_ctxt->s_deblk_bs_prms, 2325 last_ctb_row_flag, 2326 (ctb_ctr == (num_ctbs_horz_pic - 1)), 2327 last_hz_ctb_wd, 2328 last_vt_ctb_ht); 2329 2330 /* -----------------Read boundary strengts for current CTB------------- */ 2331 2332 if((0 == ps_ctxt->i4_deblock_type) && (ps_ctxt->i4_deblk_pad_hpel_cur_pic)) 2333 { 2334 /*Storing boundary strengths of current CTB*/ 2335 UWORD32 *pu4_bs_horz = &ps_ctxt->s_deblk_bs_prms.au4_horz_bs[0]; 2336 UWORD32 *pu4_bs_vert = &ps_ctxt->s_deblk_bs_prms.au4_vert_bs[0]; 2337 2338 memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_vert, pu4_bs_vert, (ctb_size * 4) / 8); 2339 memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_horz, pu4_bs_horz, (ctb_size * 4) / 8); 2340 } 2341 //Increment for storing next CTB info 2342 s_deblk_ctb_row_params.pu4_ctb_row_bs_vert += 2343 (ctb_size >> 3); //one vertical edge per 8x8 block 2344 s_deblk_ctb_row_params.pu4_ctb_row_bs_horz += 2345 (ctb_size >> 3); //one horizontal edge per 8x8 block 2346 } 2347 2348 /* -------------- ctb level updates ----------------- */ 2349 ps_row_cu += ps_ctb_out->u1_num_cus_in_ctb; 2350 2351 pu1_row_pu_map += (ctb_size >> 2) * (ctb_size >> 2); 2352 2353 /* first ctb offset will be populated by the caller */ 2354 if(0 != ctb_ctr) 2355 { 2356 pu4_pu_offsets[ctb_ctr] = pu4_pu_offsets[ctb_ctr - 1] + num_pus_in_ctb; 2357 } 2358 pu2_num_pu_map[ctb_ctr] = num_pus_in_ctb; 2359 ASSERT(ps_ctb_out->u1_num_cus_in_ctb != 0); 2360 2361 ps_ctb_in++; 2362 ps_ctb_out++; 2363 } 2364 2365 /* ---------- Encloop end of row updates ----------------- */ 2366 2367 /* at the end of row processing cu pixel counter is set to */ 2368 /* (num ctb * ctbzise) + ctb size */ 2369 /* this is to set the dependency for right most cu of last */ 2370 /* ctb's top right data dependency */ 2371 /* this even takes care of entropy dependency for */ 2372 /* incomplete ctb as well */ 2373 ihevce_dmgr_set_row_row_sync( 2374 pv_dep_mngr_enc_loop_cu_top_right, 2375 (ctb_ctr * ctb_size + ctb_size), 2376 vert_ctr, 2377 ps_ctxt->i4_tile_col_idx /* Col Tile No. */); 2378 2379 ps_ctxt->s_sao_ctxt_t.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func; 2380 2381 /* Restore structure. 2382 Getting the address of stored-BS and Qp-map and other info */ 2383 memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t)); 2384 { 2385 /* Update the pointers to the tile start */ 2386 s_deblk_ctb_row_params.pu4_ctb_row_bs_vert += 2387 (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one vertical edge per 8x8 block 2388 s_deblk_ctb_row_params.pu4_ctb_row_bs_horz += 2389 (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one horizontal edge per 8x8 block 2390 s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2)); 2391 } 2392 2393 #if PROFILE_ENC_REG_DATA 2394 s_profile.u8_enc_reg_data[vert_ctr] = 0; 2395 #endif 2396 2397 /* -- Loop over all the CTBs in a row for Deblocking and Subpel gen --- */ 2398 if(!ps_ctxt->u1_is_input_data_hbd) 2399 { 2400 WORD32 last_col_pic, last_col_tile; 2401 2402 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++) 2403 { 2404 /* store the ctb level prms in cu prms */ 2405 s_cu_prms.i4_ctb_pos = ctb_ctr; 2406 s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size; 2407 s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size; 2408 2409 s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size; 2410 s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size; 2411 s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size; 2412 2413 s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size; 2414 2415 s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size; 2416 2417 /* If last ctb in the horizontal row */ 2418 if(ctb_ctr == (num_ctbs_horz_pic - 1)) 2419 { 2420 last_col_pic = 1; 2421 } 2422 else 2423 { 2424 last_col_pic = 0; 2425 } 2426 2427 /* If last ctb in the tile row */ 2428 if(ctb_ctr == (ctb_end - 1)) 2429 { 2430 last_col_tile = 1; 2431 } 2432 else 2433 { 2434 last_col_tile = 0; 2435 } 2436 2437 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic) 2438 { 2439 /* Wait till top neighbour CTB has done it's deblocking*/ 2440 if(ctb_ctr < (ctb_end)-1) 2441 { 2442 ihevce_dmgr_chk_row_row_sync( 2443 pv_dep_mngr_enc_loop_dblk, 2444 ctb_ctr, 2445 dblk_offset, 2446 dblk_check_dep_pos, 2447 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */ 2448 ps_ctxt->thrd_id); 2449 } 2450 2451 if((0 == ps_ctxt->i4_deblock_type)) 2452 { 2453 /* Populate Qp-map */ 2454 if(ctb_start == ctb_ctr) 2455 { 2456 ihevce_deblk_populate_qp_map( 2457 ps_ctxt, 2458 &s_deblk_ctb_row_params, 2459 ps_ctb_out_dblk, 2460 vert_ctr, 2461 ps_frm_ctb_prms, 2462 ps_tile_params); 2463 } 2464 ps_ctxt->s_deblk_prms.i4_ctb_size = ctb_size; 2465 2466 /* recon pointers and stride */ 2467 ps_ctxt->s_deblk_prms.pu1_ctb_y = s_cu_prms.pu1_luma_recon; 2468 ps_ctxt->s_deblk_prms.pu1_ctb_uv = s_cu_prms.pu1_chrm_recon; 2469 ps_ctxt->s_deblk_prms.i4_luma_pic_stride = s_cu_prms.i4_luma_recon_stride; 2470 ps_ctxt->s_deblk_prms.i4_chroma_pic_stride = s_cu_prms.i4_chrm_recon_stride; 2471 2472 ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge = (0 == vert_ctr) ? 0 : 1; 2473 { 2474 ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge = 2475 (ps_tile_params->i4_first_ctb_y == vert_ctr) ? 0 : 1; 2476 } 2477 ps_ctxt->s_deblk_prms.i4_deblock_left_ctb_edge = (ctb_start == ctb_ctr) ? 0 : 1; 2478 //or according to slice boundary. Support yet to be added !!!! 2479 2480 ihevce_deblk_ctb( 2481 &ps_ctxt->s_deblk_prms, last_col_tile, &s_deblk_ctb_row_params); 2482 2483 //Increment for storing next CTB info 2484 s_deblk_ctb_row_params.pu4_ctb_row_bs_vert += 2485 (ctb_size >> 3); //one vertical edge per 8x8 block 2486 s_deblk_ctb_row_params.pu4_ctb_row_bs_horz += 2487 (ctb_size >> 3); //one horizontal edge per 8x8 block 2488 s_deblk_ctb_row_params.pi1_ctb_row_qp += 2489 (ctb_size >> 2); //one qp per 4x4 block. 2490 2491 } //end of if((0 == ps_ctxt->i4_deblock_type) 2492 } // end of if(ps_ctxt->i4_deblk_pad_hpel_cur_pic) 2493 2494 /* Apply SAO over the previous CTB-row */ 2495 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag || 2496 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag) 2497 { 2498 sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t; 2499 2500 if((vert_ctr > ps_tile_params->i4_first_ctb_y) && 2501 (ctb_ctr > ctb_start)) //if((vert_ctr > 0) && (ctb_ctr > 0)) 2502 { 2503 /* Call the sao function to do sao for the current ctb*/ 2504 2505 /* Register the curr ctb's x pos in sao context*/ 2506 ps_sao_ctxt->i4_ctb_x = ctb_ctr - 1; 2507 2508 /* Register the curr ctb's y pos in sao context*/ 2509 ps_sao_ctxt->i4_ctb_y = vert_ctr - 1; 2510 2511 ps_ctb_out_sao = ps_sao_ctxt->ps_ctb_out + 2512 (vert_ctr - 1) * ps_frm_ctb_prms->i4_num_ctbs_horz + 2513 (ctb_ctr - 1); 2514 ps_sao_ctxt->ps_sao = &ps_ctb_out_sao->s_sao; 2515 ps_sao_ctxt->i4_sao_blk_wd = ctb_size; 2516 ps_sao_ctxt->i4_sao_blk_ht = ctb_size; 2517 2518 ps_sao_ctxt->i4_is_last_ctb_row = 0; 2519 ps_sao_ctxt->i4_is_last_ctb_col = 0; 2520 2521 /* Calculate the recon buf pointer and stride for teh current ctb */ 2522 ps_sao_ctxt->pu1_cur_luma_recon_buf = 2523 ps_sao_ctxt->pu1_frm_luma_recon_buf + 2524 (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) + 2525 (ps_sao_ctxt->i4_ctb_x * ctb_size); 2526 2527 ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride; 2528 2529 ps_sao_ctxt->pu1_cur_chroma_recon_buf = 2530 ps_sao_ctxt->pu1_frm_chroma_recon_buf + 2531 (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y * 2532 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) + 2533 (ps_sao_ctxt->i4_ctb_x * ctb_size); 2534 2535 ps_sao_ctxt->i4_cur_chroma_recon_stride = 2536 ps_sao_ctxt->i4_frm_chroma_recon_stride; 2537 2538 ps_sao_ctxt->pu1_cur_luma_src_buf = 2539 ps_sao_ctxt->pu1_frm_luma_src_buf + 2540 (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) + 2541 (ps_sao_ctxt->i4_ctb_x * ctb_size); 2542 2543 ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride; 2544 2545 ps_sao_ctxt->pu1_cur_chroma_src_buf = 2546 ps_sao_ctxt->pu1_frm_chroma_src_buf + 2547 (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y * 2548 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) + 2549 (ps_sao_ctxt->i4_ctb_x * ctb_size); 2550 2551 ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride; 2552 2553 /* Calculate the pointer to buff to store the (x,y)th sao 2554 * for the top merge of (x,y+1)th ctb 2555 */ 2556 ps_sao_ctxt->ps_top_ctb_sao = 2557 &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id] 2558 [ps_sao_ctxt->i4_ctb_x + 2559 (ps_sao_ctxt->i4_ctb_y) * 2560 ps_frm_ctb_prms->i4_num_ctbs_horz + 2561 (ps_ctxt->i4_bitrate_instance_num * 2562 ps_sao_ctxt->i4_num_ctb_units)]; 2563 2564 /* Calculate the pointer to buff to store the top pixels of curr ctb*/ 2565 ps_sao_ctxt->pu1_curr_sao_src_top_luma = 2566 ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] + 2567 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride + 2568 ps_sao_ctxt->i4_ctb_x * ctb_size + 2569 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size + 2570 ps_sao_ctxt->i4_top_chroma_buf_size); 2571 2572 /* Calculate the pointer to buff to store the top pixels of curr ctb*/ 2573 ps_sao_ctxt->pu1_curr_sao_src_top_chroma = 2574 ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] + 2575 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride + 2576 ps_sao_ctxt->i4_ctb_x * ctb_size + 2577 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size + 2578 ps_sao_ctxt->i4_top_chroma_buf_size); 2579 2580 { 2581 UWORD32 u4_ctb_sao_bits; 2582 2583 ihevce_sao_analyse( 2584 &ps_ctxt->s_sao_ctxt_t, 2585 ps_ctb_out_sao, 2586 &u4_ctb_sao_bits, 2587 ps_tile_params); 2588 ps_ctxt 2589 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id] 2590 [ps_ctxt->i4_bitrate_instance_num] 2591 ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits; 2592 ps_ctxt 2593 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id] 2594 [ps_ctxt->i4_bitrate_instance_num] 2595 ->u4_frame_rdopt_bits += u4_ctb_sao_bits; 2596 } 2597 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic & 2598 0x1) /** Subpel generation not done for non-ref picture **/ 2599 { 2600 /* Padding and Subpel Plane Generation */ 2601 ihevce_pad_interp_recon_ctb( 2602 ps_pad_interp_recon, 2603 ctb_ctr - 1, 2604 vert_ctr - 1, 2605 ps_ctxt->i4_quality_preset, 2606 ps_frm_ctb_prms, 2607 ps_ctxt->ai2_scratch, 2608 ps_ctxt->i4_bitrate_instance_num, 2609 ps_ctxt->ps_func_selector); 2610 } 2611 } 2612 2613 /* Call the sao function again for the last ctb of the previous row*/ 2614 if(((ctb_ctr + 1) == (ctb_end)) && 2615 (vert_ctr > 2616 ps_tile_params 2617 ->i4_first_ctb_y)) //( ((ctb_ctr+1) == ps_frm_ctb_prms->i4_num_ctbs_horz) && (vert_ctr > 0) ) 2618 { 2619 /* Register the curr ctb's x pos in sao context*/ 2620 ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr; 2621 2622 /* Register the curr ctb's y pos in sao context*/ 2623 ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr - 1; 2624 2625 ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out + 2626 (vert_ctr - 1) * ps_frm_ctb_prms->i4_num_ctbs_horz + (ctb_ctr); 2627 2628 ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao; 2629 2630 ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd = 2631 ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) - 2632 ps_tile_params->i4_curr_tile_width); 2633 2634 ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht = ps_ctxt->s_sao_ctxt_t.i4_ctb_size; 2635 2636 ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 0; 2637 ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 1; 2638 2639 /* Calculate the recon buf pointer and stride for teh current ctb */ 2640 ps_sao_ctxt->pu1_cur_luma_recon_buf = 2641 ps_sao_ctxt->pu1_frm_luma_recon_buf + 2642 (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) + 2643 (ps_sao_ctxt->i4_ctb_x * ctb_size); 2644 2645 ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride; 2646 2647 ps_sao_ctxt->pu1_cur_chroma_recon_buf = 2648 ps_sao_ctxt->pu1_frm_chroma_recon_buf + 2649 (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y * 2650 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) + 2651 (ps_sao_ctxt->i4_ctb_x * ctb_size); 2652 2653 ps_sao_ctxt->i4_cur_chroma_recon_stride = 2654 ps_sao_ctxt->i4_frm_chroma_recon_stride; 2655 2656 ps_sao_ctxt->pu1_cur_luma_src_buf = 2657 ps_sao_ctxt->pu1_frm_luma_src_buf + 2658 (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) + 2659 (ps_sao_ctxt->i4_ctb_x * ctb_size); 2660 2661 ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride; 2662 2663 ps_sao_ctxt->pu1_cur_chroma_src_buf = 2664 ps_sao_ctxt->pu1_frm_chroma_src_buf + 2665 (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y * 2666 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) + 2667 (ps_sao_ctxt->i4_ctb_x * ctb_size); 2668 2669 ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride; 2670 2671 /* Calculate the pointer to buff to store the (x,y)th sao 2672 * for the top merge of (x,y+1)th ctb 2673 */ 2674 ps_sao_ctxt->ps_top_ctb_sao = 2675 &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id] 2676 [ps_sao_ctxt->i4_ctb_x + 2677 (ps_sao_ctxt->i4_ctb_y) * 2678 ps_frm_ctb_prms->i4_num_ctbs_horz + 2679 (ps_ctxt->i4_bitrate_instance_num * 2680 ps_sao_ctxt->i4_num_ctb_units)]; 2681 2682 /* Calculate the pointer to buff to store the top pixels of curr ctb*/ 2683 ps_sao_ctxt->pu1_curr_sao_src_top_luma = 2684 ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] + 2685 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride + 2686 ps_sao_ctxt->i4_ctb_x * ctb_size + 2687 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size + 2688 ps_sao_ctxt->i4_top_chroma_buf_size); 2689 2690 /* Calculate the pointer to buff to store the top pixels of curr ctb*/ 2691 ps_sao_ctxt->pu1_curr_sao_src_top_chroma = 2692 ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] + 2693 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride + 2694 ps_sao_ctxt->i4_ctb_x * ctb_size + 2695 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size + 2696 ps_sao_ctxt->i4_top_chroma_buf_size); 2697 2698 { 2699 UWORD32 u4_ctb_sao_bits; 2700 2701 ihevce_sao_analyse( 2702 &ps_ctxt->s_sao_ctxt_t, 2703 ps_ctb_out_sao, 2704 &u4_ctb_sao_bits, 2705 ps_tile_params); 2706 ps_ctxt 2707 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id] 2708 [ps_ctxt->i4_bitrate_instance_num] 2709 ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits; 2710 ps_ctxt 2711 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id] 2712 [ps_ctxt->i4_bitrate_instance_num] 2713 ->u4_frame_rdopt_bits += u4_ctb_sao_bits; 2714 } 2715 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic & 2716 0x1) /** Subpel generation not done for non-ref picture **/ 2717 { 2718 /* Padding and Subpel Plane Generation */ 2719 ihevce_pad_interp_recon_ctb( 2720 ps_pad_interp_recon, 2721 ctb_ctr, 2722 vert_ctr - 1, 2723 ps_ctxt->i4_quality_preset, 2724 ps_frm_ctb_prms, 2725 ps_ctxt->ai2_scratch, 2726 ps_ctxt->i4_bitrate_instance_num, 2727 ps_ctxt->ps_func_selector); 2728 } 2729 } 2730 } 2731 else //SAO Disabled 2732 { 2733 if(1 == ps_ctxt->i4_deblk_pad_hpel_cur_pic) 2734 { 2735 /* Padding and Subpel Plane Generation */ 2736 ihevce_pad_interp_recon_ctb( 2737 ps_pad_interp_recon, 2738 ctb_ctr, 2739 vert_ctr, 2740 ps_ctxt->i4_quality_preset, 2741 ps_frm_ctb_prms, 2742 ps_ctxt->ai2_scratch, 2743 ps_ctxt->i4_bitrate_instance_num, 2744 ps_ctxt->ps_func_selector); 2745 } 2746 } 2747 2748 /* update the number of ctbs deblocked for this row */ 2749 ihevce_dmgr_set_row_row_sync( 2750 pv_dep_mngr_enc_loop_dblk, 2751 (ctb_ctr + 1), 2752 vert_ctr, 2753 ps_ctxt->i4_tile_col_idx /* Col Tile No. */); 2754 } //end of loop over CTBs in current CTB-row 2755 { 2756 if(!ps_ctxt->i4_bitrate_instance_num) 2757 { 2758 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag || 2759 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag) 2760 { 2761 /* If SAO is on, then signal completion of previous CTB row */ 2762 if(0 != vert_ctr) 2763 { 2764 { 2765 WORD32 post_ctb_ctr; 2766 2767 for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++) 2768 { 2769 ihevce_dmgr_map_set_sync( 2770 pv_dep_mngr_me_dep_encloop, 2771 post_ctb_ctr, 2772 (vert_ctr - 1), 2773 MAP_CTB_COMPLETE); 2774 } 2775 } 2776 } 2777 } 2778 else 2779 { 2780 { 2781 WORD32 post_ctb_ctr; 2782 2783 for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++) 2784 { 2785 ihevce_dmgr_map_set_sync( 2786 pv_dep_mngr_me_dep_encloop, 2787 post_ctb_ctr, 2788 vert_ctr, 2789 MAP_CTB_COMPLETE); 2790 } 2791 } 2792 } 2793 } 2794 } 2795 2796 /* Call the sao function again for the last ctb row of frame */ 2797 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag || 2798 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag) 2799 { 2800 sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t; 2801 2802 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++) 2803 { 2804 if((vert_ctr == (ps_tile_params->i4_first_ctb_y + 2805 ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1)) && 2806 (ctb_ctr > 2807 ctb_start)) //((vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1)) && (ctb_ctr > 0)) 2808 { 2809 /* Register the curr ctb's x pos in sao context*/ 2810 ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr - 1; 2811 2812 /* Register the curr ctb's y pos in sao context*/ 2813 ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr; 2814 2815 ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out + 2816 (vert_ctr)*ps_frm_ctb_prms->i4_num_ctbs_horz + (ctb_ctr - 1); 2817 2818 ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao; 2819 2820 ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd = ps_ctxt->s_sao_ctxt_t.i4_ctb_size; 2821 ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 0; 2822 2823 ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht = 2824 ctb_size - ((ps_tile_params->i4_curr_tile_ht_in_ctb_unit * ctb_size) - 2825 ps_tile_params->i4_curr_tile_height); 2826 2827 ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 1; 2828 2829 /* Calculate the recon buf pointer and stride for teh current ctb */ 2830 ps_sao_ctxt->pu1_cur_luma_recon_buf = 2831 ps_sao_ctxt->pu1_frm_luma_recon_buf + 2832 (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) + 2833 (ps_sao_ctxt->i4_ctb_x * ctb_size); 2834 2835 ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride; 2836 2837 ps_sao_ctxt->pu1_cur_chroma_recon_buf = 2838 ps_sao_ctxt->pu1_frm_chroma_recon_buf + 2839 (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y * 2840 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) + 2841 (ps_sao_ctxt->i4_ctb_x * ctb_size); 2842 2843 ps_sao_ctxt->i4_cur_chroma_recon_stride = 2844 ps_sao_ctxt->i4_frm_chroma_recon_stride; 2845 2846 ps_sao_ctxt->pu1_cur_luma_src_buf = 2847 ps_sao_ctxt->pu1_frm_luma_src_buf + 2848 (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) + 2849 (ps_sao_ctxt->i4_ctb_x * ctb_size); 2850 2851 ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride; 2852 2853 ps_sao_ctxt->pu1_cur_chroma_src_buf = 2854 ps_sao_ctxt->pu1_frm_chroma_src_buf + 2855 (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y * 2856 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) + 2857 (ps_sao_ctxt->i4_ctb_x * ctb_size); 2858 2859 ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride; 2860 2861 /* Calculate the pointer to buff to store the (x,y)th sao 2862 * for the top merge of (x,y+1)th ctb 2863 */ 2864 ps_sao_ctxt->ps_top_ctb_sao = 2865 &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id] 2866 [ps_sao_ctxt->i4_ctb_x + 2867 (ps_sao_ctxt->i4_ctb_y) * 2868 ps_frm_ctb_prms->i4_num_ctbs_horz + 2869 (ps_ctxt->i4_bitrate_instance_num * 2870 ps_sao_ctxt->i4_num_ctb_units)]; 2871 2872 /* Calculate the pointer to buff to store the top pixels of curr ctb*/ 2873 ps_sao_ctxt->pu1_curr_sao_src_top_luma = 2874 ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] + 2875 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride + 2876 ps_sao_ctxt->i4_ctb_x * ctb_size + 2877 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size + 2878 ps_sao_ctxt->i4_top_chroma_buf_size); 2879 2880 /* Calculate the pointer to buff to store the top pixels of curr ctb*/ 2881 ps_sao_ctxt->pu1_curr_sao_src_top_chroma = 2882 ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] + 2883 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride + 2884 ps_sao_ctxt->i4_ctb_x * ctb_size + 2885 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size + 2886 ps_sao_ctxt->i4_top_chroma_buf_size); 2887 2888 { 2889 UWORD32 u4_ctb_sao_bits; 2890 ihevce_sao_analyse( 2891 &ps_ctxt->s_sao_ctxt_t, 2892 ps_ctb_out_sao, 2893 &u4_ctb_sao_bits, 2894 ps_tile_params); 2895 ps_ctxt 2896 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id] 2897 [ps_ctxt->i4_bitrate_instance_num] 2898 ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits; 2899 ps_ctxt 2900 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id] 2901 [ps_ctxt->i4_bitrate_instance_num] 2902 ->u4_frame_rdopt_bits += u4_ctb_sao_bits; 2903 } 2904 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic & 2905 0x1) /** Subpel generation not done for non-ref picture **/ 2906 { 2907 /* Padding and Subpel Plane Generation */ 2908 ihevce_pad_interp_recon_ctb( 2909 ps_pad_interp_recon, 2910 ctb_ctr - 1, 2911 vert_ctr, 2912 ps_ctxt->i4_quality_preset, 2913 ps_frm_ctb_prms, 2914 ps_ctxt->ai2_scratch, 2915 ps_ctxt->i4_bitrate_instance_num, 2916 ps_ctxt->ps_func_selector); 2917 } 2918 } 2919 /* Call the sao function again for the last ctb of the last ctb row of frame */ 2920 if((vert_ctr == (ps_tile_params->i4_first_ctb_y + 2921 ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1)) && 2922 ((ctb_ctr + 1) == 2923 (ctb_end))) //( ((ctb_ctr+1) == ps_frm_ctb_prms->i4_num_ctbs_horz)) 2924 { 2925 /* Register the curr ctb's x pos in sao context*/ 2926 ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr; 2927 2928 /* Register the curr ctb's y pos in sao context*/ 2929 ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr; 2930 2931 ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out + 2932 (vert_ctr)*ps_frm_ctb_prms->i4_num_ctbs_horz + (ctb_ctr); 2933 2934 ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao; 2935 2936 ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd = 2937 ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) - 2938 ps_tile_params->i4_curr_tile_width); 2939 2940 ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht = 2941 ctb_size - ((ps_tile_params->i4_curr_tile_ht_in_ctb_unit * ctb_size) - 2942 ps_tile_params->i4_curr_tile_height); 2943 2944 ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 1; 2945 ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 1; 2946 2947 /* Calculate the recon buf pointer and stride for teh current ctb */ 2948 ps_sao_ctxt->pu1_cur_luma_recon_buf = 2949 ps_sao_ctxt->pu1_frm_luma_recon_buf + 2950 (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) + 2951 (ps_sao_ctxt->i4_ctb_x * ctb_size); 2952 2953 ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride; 2954 2955 ps_sao_ctxt->pu1_cur_chroma_recon_buf = 2956 ps_sao_ctxt->pu1_frm_chroma_recon_buf + 2957 (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y * 2958 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) + 2959 (ps_sao_ctxt->i4_ctb_x * ctb_size); 2960 2961 ps_sao_ctxt->i4_cur_chroma_recon_stride = 2962 ps_sao_ctxt->i4_frm_chroma_recon_stride; 2963 2964 ps_sao_ctxt->pu1_cur_luma_src_buf = 2965 ps_sao_ctxt->pu1_frm_luma_src_buf + 2966 (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) + 2967 (ps_sao_ctxt->i4_ctb_x * ctb_size); 2968 2969 ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride; 2970 2971 ps_sao_ctxt->pu1_cur_chroma_src_buf = 2972 ps_sao_ctxt->pu1_frm_chroma_src_buf + 2973 (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y * 2974 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) + 2975 (ps_sao_ctxt->i4_ctb_x * ctb_size); 2976 2977 ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride; 2978 2979 /* Calculate the pointer to buff to store the (x,y)th sao 2980 * for the top merge of (x,y+1)th ctb 2981 */ 2982 ps_sao_ctxt->ps_top_ctb_sao = 2983 &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id] 2984 [ps_sao_ctxt->i4_ctb_x + 2985 ps_sao_ctxt->i4_ctb_y * 2986 ps_frm_ctb_prms->i4_num_ctbs_horz + 2987 (ps_ctxt->i4_bitrate_instance_num * 2988 ps_sao_ctxt->i4_num_ctb_units)]; 2989 2990 /* Calculate the pointer to buff to store the top pixels of curr ctb*/ 2991 ps_sao_ctxt->pu1_curr_sao_src_top_luma = 2992 ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] + 2993 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride + 2994 ps_sao_ctxt->i4_ctb_x * ctb_size + 2995 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size + 2996 ps_sao_ctxt->i4_top_chroma_buf_size); 2997 2998 /* Calculate the pointer to buff to store the top pixels of curr ctb*/ 2999 ps_sao_ctxt->pu1_curr_sao_src_top_chroma = 3000 ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] + 3001 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride + 3002 ps_sao_ctxt->i4_ctb_x * ctb_size + 3003 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size + 3004 ps_sao_ctxt->i4_top_chroma_buf_size); 3005 3006 { 3007 UWORD32 u4_ctb_sao_bits; 3008 3009 ihevce_sao_analyse( 3010 &ps_ctxt->s_sao_ctxt_t, 3011 ps_ctb_out_sao, 3012 &u4_ctb_sao_bits, 3013 ps_tile_params); 3014 ps_ctxt 3015 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id] 3016 [ps_ctxt->i4_bitrate_instance_num] 3017 ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits; 3018 ps_ctxt 3019 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id] 3020 [ps_ctxt->i4_bitrate_instance_num] 3021 ->u4_frame_rdopt_bits += u4_ctb_sao_bits; 3022 } 3023 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic & 3024 0x1) /** Subpel generation not done for non-ref picture **/ 3025 { 3026 /* Padding and Subpel Plane Generation */ 3027 ihevce_pad_interp_recon_ctb( 3028 ps_pad_interp_recon, 3029 ctb_ctr, 3030 vert_ctr, 3031 ps_ctxt->i4_quality_preset, 3032 ps_frm_ctb_prms, 3033 ps_ctxt->ai2_scratch, 3034 ps_ctxt->i4_bitrate_instance_num, 3035 ps_ctxt->ps_func_selector); 3036 } 3037 } 3038 } //end of loop over CTBs in current CTB-row 3039 3040 /* If SAO is on, then signal completion of the last CTB row of frame */ 3041 { 3042 if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1)) 3043 { 3044 if(!ps_ctxt->i4_bitrate_instance_num) 3045 { 3046 { 3047 WORD32 post_ctb_ctr; 3048 3049 for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++) 3050 { 3051 ihevce_dmgr_map_set_sync( 3052 pv_dep_mngr_me_dep_encloop, 3053 post_ctb_ctr, 3054 vert_ctr, 3055 MAP_CTB_COMPLETE); 3056 } 3057 } 3058 } 3059 } 3060 } 3061 } 3062 } 3063 3064 return; 3065 } 3066 3067 /*! 3068 ****************************************************************************** 3069 * \if Function name : ihevce_enc_loop_pass \endif 3070 * 3071 * \brief 3072 * Frame level enc_loop pass function 3073 * 3074 * \param[in] pv_ctxt : pointer to enc_loop module 3075 * \param[in] ps_frm_lamda : Frame level Lambda params 3076 * \param[in] ps_inp : pointer to input yuv buffer (frame buffer) 3077 * \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (frame buffer) 3078 * \param[out] ps_frm_recon : pointer recon picture structure pointer (frame buffer) 3079 * \param[out] ps_ctb_out : pointer CTB output structure (frame buffer) 3080 * \param[out] ps_cu_out : pointer CU output structure (frame buffer) 3081 * \param[out] ps_tu_out : pointer TU output structure (frame buffer) 3082 * \param[out] pi2_frm_coeffs : pointer coeff output frame buffer) 3083 * 3084 * \return 3085 * None 3086 * 3087 * Note : Currently the frame level calcualtions done assumes that 3088 * framewidth of the input /recon are excat multiple of ctbsize 3089 * 3090 * \author 3091 * Ittiam 3092 * 3093 ***************************************************************************** 3094 */ 3095 void ihevce_enc_loop_process( 3096 void *pv_ctxt, 3097 ihevce_lap_enc_buf_t *ps_curr_inp, 3098 ctb_analyse_t *ps_ctb_in, 3099 ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse, 3100 recon_pic_buf_t *ps_frm_recon, 3101 cur_ctb_cu_tree_t *ps_cu_tree_out, 3102 ctb_enc_loop_out_t *ps_ctb_out, 3103 cu_enc_loop_out_t *ps_cu_out, 3104 tu_enc_loop_out_t *ps_tu_out, 3105 pu_t *ps_pu_out, 3106 UWORD8 *pu1_frm_ecd_data, 3107 frm_ctb_ctxt_t *ps_frm_ctb_prms, 3108 frm_lambda_ctxt_t *ps_frm_lamda, 3109 multi_thrd_ctxt_t *ps_multi_thrd_ctxt, 3110 WORD32 thrd_id, 3111 WORD32 i4_enc_frm_id, 3112 WORD32 i4_pass) 3113 { 3114 WORD32 vert_ctr; 3115 WORD32 tile_col_idx; 3116 iv_enc_yuv_buf_t s_curr_src_bufs; 3117 iv_enc_yuv_buf_t s_curr_recon_bufs; 3118 iv_enc_yuv_buf_src_t s_curr_recon_bufs_src; 3119 UWORD32 *pu4_pu_offsets; 3120 WORD32 end_of_frame; 3121 UWORD8 *apu1_y_sub_pel_planes[3]; 3122 pad_interp_recon_frm_t s_pad_interp_recon; 3123 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_ctxt; 3124 3125 ihevce_enc_loop_ctxt_t *ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[thrd_id]; 3126 3127 WORD32 i4_bitrate_instance_num = ps_ctxt->i4_bitrate_instance_num; 3128 3129 /* initialize the closed loop lambda for the current frame */ 3130 ps_ctxt->i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf; 3131 ps_ctxt->i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf; 3132 ps_ctxt->u4_chroma_cost_weighing_factor = ps_frm_lamda->u4_chroma_cost_weighing_factor; 3133 ps_ctxt->i4_satd_lamda = ps_frm_lamda->i4_cl_satd_lambda_qf; 3134 ps_ctxt->i4_sad_lamda = ps_frm_lamda->i4_cl_sad_type2_lambda_qf; 3135 ps_ctxt->thrd_id = thrd_id; 3136 ps_ctxt->u1_is_refPic = ps_curr_inp->s_lap_out.i4_is_ref_pic; 3137 3138 #if DISABLE_SAO_WHEN_NOISY 3139 ps_ctxt->s_sao_ctxt_t.ps_ctb_data = ps_ctb_in; 3140 ps_ctxt->s_sao_ctxt_t.i4_ctb_data_stride = ps_frm_ctb_prms->i4_num_ctbs_horz; 3141 #endif 3142 3143 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT 3144 ps_ctxt->pv_err_func_selector = ps_func_selector; 3145 #endif 3146 3147 /*Bit0 - of this Flag indicates whether current pictute needs to be deblocked, 3148 padded and hpel planes need to be generated. 3149 Bit1 - of this flag set to 1 if sao is enabled. This is to enable deblocking when sao is enabled*/ 3150 ps_ctxt->i4_deblk_pad_hpel_cur_pic = 3151 (ps_frm_recon->i4_deblk_pad_hpel_cur_pic) || 3152 ((ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag || 3153 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag) 3154 << 1); 3155 3156 /* Share all reference pictures with nbr clients. This flag will be used only 3157 in case of dist-enc mode */ 3158 ps_ctxt->i4_share_flag = (ps_frm_recon->i4_is_reference != 0); 3159 ps_ctxt->pv_frm_recon = (void *)ps_frm_recon; 3160 3161 /* Register the frame level ssd lamda for both luma and chroma*/ 3162 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf; 3163 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf; 3164 3165 ihevce_populate_cl_cu_lambda_prms( 3166 ps_ctxt, 3167 ps_frm_lamda, 3168 (WORD32)ps_ctxt->i1_slice_type, 3169 ps_curr_inp->s_lap_out.i4_temporal_lyr_id, 3170 ENC_LOOP_LAMBDA_TYPE); 3171 3172 ps_ctxt->u1_disable_intra_eval = DISABLE_INTRA_IN_BPICS && 3173 (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) && 3174 (ps_ctxt->i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE); 3175 3176 end_of_frame = 0; 3177 3178 /* ----------------------------------------------------- */ 3179 /* store the stride and dimensions of source and recon */ 3180 /* buffer pointers will be over written at every CTB row */ 3181 /* ----------------------------------------------------- */ 3182 memcpy(&s_curr_src_bufs, &ps_curr_inp->s_lap_out.s_input_buf, sizeof(iv_enc_yuv_buf_t)); 3183 3184 memcpy(&s_curr_recon_bufs, &ps_frm_recon->s_yuv_buf_desc, sizeof(iv_enc_yuv_buf_t)); 3185 3186 memcpy(&s_curr_recon_bufs_src, &ps_frm_recon->s_yuv_buf_desc_src, sizeof(iv_enc_yuv_buf_src_t)); 3187 3188 /* get the frame level pu offset pointer*/ 3189 pu4_pu_offsets = ps_frm_recon->pu4_pu_off; 3190 3191 s_pad_interp_recon.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type; 3192 3193 /* ------------ Loop over all the CTB rows --------------- */ 3194 while(0 == end_of_frame) 3195 { 3196 UWORD8 *pu1_tmp; 3197 UWORD8 *pu1_row_pu_map; 3198 UWORD8 *pu1_row_ecd_data; 3199 ctb_analyse_t *ps_ctb_row_in; 3200 ctb_enc_loop_out_t *ps_ctb_row_out; 3201 cu_enc_loop_out_t *ps_row_cu; 3202 tu_enc_loop_out_t *ps_row_tu; 3203 pu_t *ps_row_pu; 3204 pu_col_mv_t *ps_row_col_pu; 3205 job_queue_t *ps_job; 3206 UWORD32 *pu4_pu_row_offsets; 3207 UWORD16 *pu2_num_pu_row; 3208 3209 ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse; 3210 cur_ctb_cu_tree_t *ps_row_cu_tree; 3211 UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2); 3212 3213 /* Get the current row from the job queue */ 3214 ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job( 3215 ps_multi_thrd_ctxt, ENC_LOOP_JOB + i4_bitrate_instance_num, 1, i4_enc_frm_id); 3216 3217 /* Register the pointer to ctb out of the current frame*/ 3218 ps_ctxt->s_sao_ctxt_t.ps_ctb_out = ps_ctb_out; 3219 3220 /* If all rows are done, set the end of process flag to 1, */ 3221 /* and the current row to -1 */ 3222 if(NULL == ps_job) 3223 { 3224 vert_ctr = -1; 3225 tile_col_idx = -1; 3226 end_of_frame = 1; 3227 } 3228 else 3229 { 3230 ihevce_tile_params_t *ps_col_tile_params_temp; 3231 ihevce_tile_params_t *ps_tile_params; 3232 WORD32 i4_tile_id; 3233 3234 ASSERT((ENC_LOOP_JOB + i4_bitrate_instance_num) == ps_job->i4_task_type); 3235 /* set the output dependency */ 3236 ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_enc_frm_id); 3237 3238 /* Obtain the current row's details from the job */ 3239 vert_ctr = ps_job->s_job_info.s_enc_loop_job_info.i4_ctb_row_no; 3240 { 3241 /* Obtain the current colum tile index from the job */ 3242 tile_col_idx = ps_job->s_job_info.s_enc_loop_job_info.i4_tile_col_idx; 3243 3244 /* The tile parameter for the col. idx. Use only the properties 3245 which is same for all the bottom tiles like width, start_x, etc. 3246 Don't use height, start_y, etc. */ 3247 ps_col_tile_params_temp = 3248 ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + tile_col_idx); 3249 3250 /* Derive actual tile_id based on vert_ctr */ 3251 i4_tile_id = 3252 *(ps_frm_ctb_prms->pi4_tile_id_map + 3253 vert_ctr * ps_frm_ctb_prms->i4_tile_id_ctb_map_stride + 3254 ps_col_tile_params_temp->i4_first_ctb_x); 3255 /* Derive pointer to current tile prms */ 3256 ps_tile_params = 3257 ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + i4_tile_id); 3258 } 3259 3260 ps_ctxt->i4_tile_col_idx = tile_col_idx; 3261 /* derive the current ctb row pointers */ 3262 3263 /* luma src */ 3264 pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf + 3265 (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y * 3266 ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) + 3267 ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x; 3268 3269 pu1_tmp += 3270 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * 3271 ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd); 3272 3273 s_curr_src_bufs.pv_y_buf = pu1_tmp; 3274 3275 if(!ps_ctxt->u1_is_input_data_hbd) 3276 { 3277 /* cb src */ 3278 pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf; 3279 pu1_tmp += 3280 (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) * 3281 ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd); 3282 3283 s_curr_src_bufs.pv_u_buf = pu1_tmp; 3284 } 3285 3286 /* luma recon */ 3287 pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf; 3288 pu1_tmp += 3289 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd); 3290 3291 s_curr_recon_bufs.pv_y_buf = pu1_tmp; 3292 s_pad_interp_recon.pu1_luma_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf; 3293 s_pad_interp_recon.i4_luma_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_y_strd; 3294 if(!ps_ctxt->u1_is_input_data_hbd) 3295 { 3296 /* cb recon */ 3297 pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf; 3298 pu1_tmp += 3299 (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) * 3300 ps_frm_recon->s_yuv_buf_desc.i4_uv_strd); 3301 3302 s_curr_recon_bufs.pv_u_buf = pu1_tmp; 3303 s_pad_interp_recon.pu1_chrm_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf; 3304 s_pad_interp_recon.i4_chrm_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_uv_strd; 3305 3306 s_pad_interp_recon.i4_ctb_size = ps_frm_ctb_prms->i4_ctb_size; 3307 3308 /* Register the source buffer pointers in sao context*/ 3309 ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_src_buf = 3310 (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf + 3311 (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y * 3312 ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) + 3313 ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x; 3314 3315 ps_ctxt->s_sao_ctxt_t.i4_frm_luma_src_stride = 3316 ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd; 3317 3318 ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_src_buf = 3319 (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf; 3320 3321 ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_src_stride = 3322 ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd; 3323 } 3324 3325 /* Subpel planes hxfy, fxhy, hxhy*/ 3326 pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[0]; 3327 pu1_tmp += 3328 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd); 3329 apu1_y_sub_pel_planes[0] = pu1_tmp; 3330 s_pad_interp_recon.pu1_sbpel_hxfy = ps_frm_recon->apu1_y_sub_pel_planes[0]; 3331 3332 pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[1]; 3333 pu1_tmp += 3334 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd); 3335 apu1_y_sub_pel_planes[1] = pu1_tmp; 3336 s_pad_interp_recon.pu1_sbpel_fxhy = ps_frm_recon->apu1_y_sub_pel_planes[1]; 3337 3338 pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[2]; 3339 pu1_tmp += 3340 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd); 3341 apu1_y_sub_pel_planes[2] = pu1_tmp; 3342 s_pad_interp_recon.pu1_sbpel_hxhy = ps_frm_recon->apu1_y_sub_pel_planes[2]; 3343 3344 /* row level coeffs buffer */ 3345 pu1_row_ecd_data = 3346 pu1_frm_ecd_data + 3347 (vert_ctr * 3348 ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_max_tus_in_row << 1) 3349 : ((ps_frm_ctb_prms->i4_max_tus_in_row * 3) >> 1)) * 3350 MAX_SCAN_COEFFS_BYTES_4x4); 3351 3352 /* Row level CU buffer */ 3353 ps_row_cu = ps_cu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_cus_in_row); 3354 3355 /* Row level TU buffer */ 3356 ps_row_tu = ps_tu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_tus_in_row); 3357 3358 /* Row level PU buffer */ 3359 ps_row_pu = ps_pu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row); 3360 3361 /* Row level colocated PU buffer */ 3362 /* ps_frm_col_mv has (i4_num_ctbs_horz + 1) CTBs for stride */ 3363 ps_row_col_pu = 3364 ps_frm_recon->ps_frm_col_mv + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) * 3365 ps_frm_ctb_prms->i4_num_pus_in_ctb); 3366 /* Row level col PU map buffer */ 3367 /* pu1_frm_pu_map has (i4_num_ctbs_horz + 1) CTBs for stride */ 3368 pu1_row_pu_map = 3369 ps_frm_recon->pu1_frm_pu_map + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) * 3370 ps_frm_ctb_prms->i4_num_pus_in_ctb); 3371 /* row ctb in pointer */ 3372 ps_ctb_row_in = ps_ctb_in + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz; 3373 3374 /* row ctb out pointer */ 3375 ps_ctb_row_out = ps_ctb_out + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz; 3376 3377 /* row number of PUs map pointer */ 3378 pu2_num_pu_row = 3379 ps_frm_recon->pu2_num_pu_map + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz; 3380 3381 /* row pu offsets pointer */ 3382 pu4_pu_row_offsets = pu4_pu_offsets + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz; 3383 /* store the first CTB pu offset pointer */ 3384 *pu4_pu_row_offsets = vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row; 3385 /* Initialize ptr to current IPE row */ 3386 ps_row_ipe_analyse = ps_ipe_analyse + (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz); 3387 3388 /* Initialize ptr to current row */ 3389 ps_row_cu_tree = ps_cu_tree_out + 3390 (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE); 3391 3392 /* Get the EncLoop Top-Right CU Dep Mngr */ 3393 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right = 3394 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[ps_ctxt->i4_enc_frm_id] 3395 [i4_bitrate_instance_num]; 3396 /* Get the EncLoop Deblock Dep Mngr */ 3397 ps_ctxt->pv_dep_mngr_enc_loop_dblk = 3398 ps_master_ctxt 3399 ->aapv_dep_mngr_enc_loop_dblk[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]; 3400 3401 ps_ctxt->pu1_curr_row_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr][0]; 3402 3403 { 3404 /* derive the pointers of top row buffers */ 3405 ps_ctxt->pv_top_row_luma = 3406 (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] + 3407 (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) + 3408 (vert_ctr - 1) * ps_ctxt->i4_top_row_luma_stride; 3409 3410 ps_ctxt->pv_top_row_chroma = 3411 (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] + 3412 (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) + 3413 (vert_ctr - 1) * ps_ctxt->i4_top_row_chroma_stride; 3414 3415 /* derive the pointers of bottom row buffers to update current row data */ 3416 ps_ctxt->pv_bot_row_luma = 3417 (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] + 3418 (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) + 3419 (vert_ctr)*ps_ctxt->i4_top_row_luma_stride; 3420 3421 ps_ctxt->pv_bot_row_chroma = 3422 (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] + 3423 (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) + 3424 (vert_ctr)*ps_ctxt->i4_top_row_chroma_stride; 3425 3426 /* Register the buffer pointers in sao context*/ 3427 ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_recon_buf = 3428 (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf; 3429 ps_ctxt->s_sao_ctxt_t.i4_frm_luma_recon_stride = 3430 ps_frm_recon->s_yuv_buf_desc.i4_y_strd; 3431 3432 ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_recon_buf = 3433 (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf; 3434 ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_recon_stride = 3435 ps_frm_recon->s_yuv_buf_desc.i4_uv_strd; 3436 3437 ps_ctxt->s_sao_ctxt_t.ps_rdopt_entropy_ctxt = &ps_ctxt->s_rdopt_entropy_ctxt; 3438 3439 ps_ctxt->s_sao_ctxt_t.i4_frm_top_luma_buf_stride = 3440 ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 1; 3441 3442 ps_ctxt->s_sao_ctxt_t.i4_frm_top_chroma_buf_stride = 3443 ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 2; 3444 } 3445 3446 ps_ctxt->ps_top_row_nbr = 3447 ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] + 3448 (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) + 3449 (vert_ctr - 1) * ps_ctxt->i4_top_row_nbr_stride; 3450 3451 ps_ctxt->ps_bot_row_nbr = 3452 ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] + 3453 (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) + 3454 (vert_ctr)*ps_ctxt->i4_top_row_nbr_stride; 3455 3456 if(vert_ctr > 0) 3457 { 3458 ps_ctxt->pu1_top_rt_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr - 1][0]; 3459 } 3460 else 3461 { 3462 ps_ctxt->pu1_top_rt_cabac_state = NULL; 3463 } 3464 3465 ASSERT( 3466 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0] 3467 .ps_pps->i1_sign_data_hiding_flag == 3468 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1] 3469 .ps_pps->i1_sign_data_hiding_flag); 3470 3471 /* call the row level processing function */ 3472 ihevce_enc_loop_process_row( 3473 ps_ctxt, 3474 &s_curr_src_bufs, 3475 &s_curr_recon_bufs, 3476 &s_curr_recon_bufs_src, 3477 &apu1_y_sub_pel_planes[0], 3478 ps_ctb_row_in, 3479 ps_ctb_row_out, 3480 ps_row_ipe_analyse, 3481 ps_row_cu_tree, 3482 ps_row_cu, 3483 ps_row_tu, 3484 ps_row_pu, 3485 ps_row_col_pu, 3486 pu2_num_pu_row, 3487 pu1_row_pu_map, 3488 pu1_row_ecd_data, 3489 pu4_pu_row_offsets, 3490 ps_frm_ctb_prms, 3491 vert_ctr, 3492 ps_frm_recon, 3493 ps_ctxt->pv_dep_mngr_encloop_dep_me, 3494 &s_pad_interp_recon, 3495 i4_pass, 3496 ps_multi_thrd_ctxt, 3497 ps_tile_params); 3498 } 3499 } 3500 } 3501 3502 /*! 3503 ****************************************************************************** 3504 * \if Function name : ihevce_enc_loop_dblk_get_prms_dep_mngr \endif 3505 * 3506 * \brief Returns to the caller key attributes relevant for dependency manager, 3507 * ie, the number of vertical units in l0 layer 3508 * 3509 * \par Description: 3510 * 3511 * \param[in] pai4_ht : ht 3512 * \param[out] pi4_num_vert_units_in_lyr : Pointer to store num vertical units 3513 * for deblocking 3514 * 3515 * \return 3516 * None 3517 * 3518 * \author 3519 * Ittiam 3520 * 3521 ***************************************************************************** 3522 */ 3523 void ihevce_enc_loop_dblk_get_prms_dep_mngr(WORD32 i4_ht, WORD32 *pi4_num_vert_units_in_lyr) 3524 { 3525 /* Blk ht at a given layer*/ 3526 WORD32 unit_ht_c; 3527 WORD32 ctb_size = 64; 3528 3529 /* compute blk ht and unit ht */ 3530 unit_ht_c = ctb_size; 3531 3532 /* set the numebr of vertical units */ 3533 *pi4_num_vert_units_in_lyr = (i4_ht + unit_ht_c - 1) / unit_ht_c; 3534 } 3535 3536 /*! 3537 ****************************************************************************** 3538 * \if Function name : ihevce_enc_loop_get_num_mem_recs \endif 3539 * 3540 * \brief 3541 * Number of memory records are returned for enc_loop module 3542 * Note : Include TOT MEM. req. for ENC.LOOP + TOT MEM. req. for Dep Mngr for Dblk 3543 * 3544 * \return 3545 * None 3546 * 3547 * \author 3548 * Ittiam 3549 * 3550 ***************************************************************************** 3551 */ 3552 WORD32 3553 ihevce_enc_loop_get_num_mem_recs(WORD32 i4_num_bitrate_inst, WORD32 i4_num_enc_loop_frm_pllel) 3554 { 3555 WORD32 enc_loop_mem_recs = NUM_ENC_LOOP_MEM_RECS; 3556 WORD32 enc_loop_dblk_dep_mngr_mem_recs = 3557 i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs(); 3558 WORD32 enc_loop_cu_top_right_dep_mngr_mem_recs = 3559 i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs(); 3560 WORD32 enc_loop_aux_br_dep_mngr_mem_recs = 3561 i4_num_enc_loop_frm_pllel * (i4_num_bitrate_inst - 1) * ihevce_dmgr_get_num_mem_recs(); 3562 3563 return ( 3564 (enc_loop_mem_recs + enc_loop_dblk_dep_mngr_mem_recs + 3565 enc_loop_cu_top_right_dep_mngr_mem_recs + enc_loop_aux_br_dep_mngr_mem_recs)); 3566 } 3567 /*! 3568 ****************************************************************************** 3569 * \if Function name : ihevce_enc_loop_get_mem_recs \endif 3570 * 3571 * \brief 3572 * Memory requirements are returned for ENC_LOOP. 3573 * 3574 * \param[in,out] ps_mem_tab : pointer to memory descriptors table 3575 * \param[in] ps_init_prms : Create time static parameters 3576 * \param[in] i4_num_proc_thrds : Number of processing threads for this module 3577 * \param[in] i4_mem_space : memspace in whihc memory request should be done 3578 * 3579 * \return 3580 * None 3581 * 3582 * \author 3583 * Ittiam 3584 * 3585 ***************************************************************************** 3586 */ 3587 WORD32 ihevce_enc_loop_get_mem_recs( 3588 iv_mem_rec_t *ps_mem_tab, 3589 ihevce_static_cfg_params_t *ps_init_prms, 3590 WORD32 i4_num_proc_thrds, 3591 WORD32 i4_num_bitrate_inst, 3592 WORD32 i4_num_enc_loop_frm_pllel, 3593 WORD32 i4_mem_space, 3594 WORD32 i4_resolution_id) 3595 { 3596 UWORD32 u4_width, u4_height, n_tabs; 3597 UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame; 3598 WORD32 ctr; 3599 WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format; 3600 3601 /* derive frame dimensions */ 3602 /*width of the input YUV to be encoded */ 3603 u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width; 3604 /*making the width a multiple of CTB size*/ 3605 u4_width += SET_CTB_ALIGN( 3606 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE); 3607 3608 /*height of the input YUV to be encoded */ 3609 u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height; 3610 /*making the height a multiple of CTB size*/ 3611 u4_height += SET_CTB_ALIGN( 3612 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE); 3613 u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE); 3614 u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE); 3615 /* memories should be requested assuming worst case requirememnts */ 3616 3617 /* Module context structure */ 3618 ps_mem_tab[ENC_LOOP_CTXT].i4_mem_size = sizeof(ihevce_enc_loop_master_ctxt_t); 3619 3620 ps_mem_tab[ENC_LOOP_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3621 3622 ps_mem_tab[ENC_LOOP_CTXT].i4_mem_alignment = 8; 3623 3624 /* Thread context structure */ 3625 ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_size = 3626 i4_num_proc_thrds * sizeof(ihevce_enc_loop_ctxt_t); 3627 3628 ps_mem_tab[ENC_LOOP_THRDS_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3629 3630 ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_alignment = 16; 3631 3632 /* Scale matrices */ 3633 ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16); 3634 3635 ps_mem_tab[ENC_LOOP_SCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3636 3637 ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_alignment = 8; 3638 3639 /* Rescale matrices */ 3640 ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16); 3641 3642 ps_mem_tab[ENC_LOOP_RESCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3643 3644 ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_alignment = 8; 3645 3646 /* top row luma one row of pixel data per CTB row */ 3647 if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) 3648 { 3649 ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) * 3650 (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD16) * 3651 i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel; 3652 } 3653 else 3654 { 3655 ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) * 3656 (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD8) * 3657 i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel; 3658 } 3659 3660 ps_mem_tab[ENC_LOOP_TOP_LUMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3661 3662 ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_alignment = 8; 3663 3664 /* top row chroma */ 3665 if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) 3666 { 3667 ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size = 3668 (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD16) * 3669 i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel; 3670 } 3671 else 3672 { 3673 ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size = 3674 (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD8) * 3675 i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel; 3676 } 3677 3678 ps_mem_tab[ENC_LOOP_TOP_CHROMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3679 3680 ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_alignment = 8; 3681 3682 /* top row neighbour 4x4 */ 3683 ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_size = 3684 (u4_ctb_rows_in_a_frame + 1) * (((u4_width + MAX_CU_SIZE) >> 2) + 1) * sizeof(nbr_4x4_t) * 3685 i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel; 3686 3687 ps_mem_tab[ENC_LOOP_TOP_NBR4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3688 3689 ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_alignment = 8; 3690 3691 /* memory to dump rate control parameters by each thread for each bit-rate instance */ 3692 /* RC params collated by each thread for each bit-rate instance separately */ 3693 ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_size = i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel * 3694 i4_num_proc_thrds * sizeof(enc_loop_rc_params_t); 3695 3696 ps_mem_tab[ENC_LOOP_RC_PARAMS].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3697 3698 ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_alignment = 8; 3699 /* Memory required for deblocking */ 3700 { 3701 /* Memory to store Qp of top4x4 blocks for each CTB row. 3702 This memory is allocated at frame level and shared across 3703 all cores. The Qp values are needed to form Qp-map(described 3704 in the ENC_LOOP_DEBLOCKING section below)*/ 3705 3706 UWORD32 u4_size_bs_memory, u4_size_qp_memory; 3707 UWORD32 u4_size_top_4x4_qp_memory; 3708 3709 /*Memory required to store Qp of top4x4 blocks for a CTB row for entire frame*/ 3710 /*Space required per CTB*/ 3711 u4_size_top_4x4_qp_memory = (MAX_CTB_SIZE / 4); 3712 /*Space required for entire CTB row*/ 3713 u4_size_top_4x4_qp_memory *= u4_ctb_in_a_row; 3714 /*Space required for entire frame*/ 3715 u4_size_top_4x4_qp_memory *= u4_ctb_rows_in_a_frame; 3716 /*Space required for multiple bitrate*/ 3717 u4_size_top_4x4_qp_memory *= i4_num_bitrate_inst; 3718 /*Space required for multiple frames in parallel*/ 3719 u4_size_top_4x4_qp_memory *= i4_num_enc_loop_frm_pllel; 3720 3721 ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_size = u4_size_top_4x4_qp_memory; 3722 ps_mem_tab[ENC_LOOP_QP_TOP_4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3723 ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_alignment = 8; 3724 3725 /* Memory allocation of BS and Qp-map for deblocking at CTB-row level: 3726 ## Boundary Strength(Vertical): 3727 BS stored per CTB at one stretch i.e. for a 64x CTB first 8 entries belongs to first CTB 3728 of the row followed by 8 entries of second CTB and so on. 3729 8 entries: Includes left edge of current CTB and excludes right edge. 3730 ## Boundary Strength(Horizontal): 3731 Same as Vertical. 3732 8 entries: Includes top edge of current CTB and excludes bottom edge. 3733 3734 ## Qp-map storage: 3735 T0 T1 T2 T3 T4 T5 ..........to the end of the CTB row 3736 00 01 02 03 04 05 ..........to the end of the CTB row 3737 10 11 12 13 14 15 ..........to the end of the CTB row 3738 20 21 22 23 24 25 ..........to the end of the CTB row 3739 30 31 32 33 34 35 ..........to the end of the CTB row 3740 40 41 42 43 44 45 ..........to the end of the CTB row 3741 ............................to the end of the CTB row 3742 upto height_of_CTB..........to the end of the CTB row 3743 3744 Qp is stored for each "4x4 block" in a proper 2-D array format (One entry for each 4x4). 3745 A 2-D array of height= (height_of_CTB +1), and width = (width_of_CTB). 3746 where, 3747 => height_of_CTB = number of 4x4 blocks in a CTB vertically, 3748 => +1 is done to store Qp of lowest 4x4-block layer of top-CTB 3749 in order to deblock top edge of current CTB. 3750 => width_of_CTB = number of 4x4 blocks in a CTB horizontally, 3751 */ 3752 3753 /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/ 3754 /*1 vertical edge per 8 pixel*/ 3755 u4_size_bs_memory = (MAX_CTB_SIZE >> 3); 3756 /*Vertical edges for entire width of CTB row*/ 3757 u4_size_bs_memory *= u4_ctb_in_a_row; 3758 /*Each vertical edge of CTB row is 4 bytes*/ 3759 u4_size_bs_memory = u4_size_bs_memory << 2; 3760 /*Adding Memory required for storing horizontal BS by doubling*/ 3761 u4_size_bs_memory = u4_size_bs_memory << 1; 3762 3763 /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/ 3764 /*Number of 4x4 blocks in the width of a CTB*/ 3765 u4_size_qp_memory = (MAX_CTB_SIZE >> 2); 3766 /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest 3767 4x4-block layer of top-CTB in order to deblock top edge of current CTB*/ 3768 u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1); 3769 /*Storage for entire CTB row*/ 3770 u4_size_qp_memory *= u4_ctb_in_a_row; 3771 3772 /*Multiplying by i4_num_proc_thrds to assign memory for each core*/ 3773 ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_size = 3774 i4_num_proc_thrds * (u4_size_bs_memory + u4_size_qp_memory); 3775 3776 ps_mem_tab[ENC_LOOP_DEBLOCKING].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3777 3778 ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_alignment = 8; 3779 } 3780 3781 /* Memory required to store pred for 422 chroma */ 3782 ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_size = 3783 i4_num_proc_thrds * MAX_CTB_SIZE * MAX_CTB_SIZE * 2 * 3784 (i4_chroma_format == IV_YUV_422SP_UV) * 3785 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); 3786 3787 ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3788 3789 ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_alignment = 8; 3790 3791 /* Memory for inter pred buffers */ 3792 { 3793 WORD32 i4_num_bufs_per_thread = 0; 3794 3795 WORD32 i4_buf_size_per_cand = 3796 (MAX_CTB_SIZE) * (MAX_CTB_SIZE) * 3797 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); 3798 WORD32 i4_quality_preset = 3799 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset; 3800 switch(i4_quality_preset) 3801 { 3802 case IHEVCE_QUALITY_P0: 3803 { 3804 i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_PQ; 3805 break; 3806 } 3807 case IHEVCE_QUALITY_P2: 3808 { 3809 i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HQ; 3810 break; 3811 } 3812 case IHEVCE_QUALITY_P3: 3813 { 3814 i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_MS; 3815 break; 3816 } 3817 case IHEVCE_QUALITY_P4: 3818 { 3819 i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HS; 3820 break; 3821 } 3822 case IHEVCE_QUALITY_P5: 3823 case IHEVCE_QUALITY_P6: 3824 case IHEVCE_QUALITY_P7: 3825 { 3826 i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_ES; 3827 break; 3828 } 3829 default: 3830 { 3831 ASSERT(0); 3832 } 3833 } 3834 3835 i4_num_bufs_per_thread += 4; 3836 3837 ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size = 3838 i4_num_bufs_per_thread * i4_num_proc_thrds * i4_buf_size_per_cand; 3839 3840 ps_mem_tab[ENC_LOOP_INTER_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3841 3842 ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_alignment = 8; 3843 } 3844 3845 /* Memory required to store chroma intra pred */ 3846 ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_size = 3847 i4_num_proc_thrds * (MAX_TU_SIZE) * (MAX_TU_SIZE)*2 * NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD * 3848 ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) * 3849 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); 3850 3851 ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3852 3853 ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_alignment = 8; 3854 3855 /* Memory required to store pred for reference substitution output */ 3856 ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_size = 3857 i4_num_proc_thrds * ((MAX_TU_SIZE * 2 * 2) + 4) * 3858 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); 3859 3860 ps_mem_tab[ENC_LOOP_REF_SUB_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3861 3862 ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_alignment = 8; 3863 3864 /* Memory required to store pred for reference filtering output */ 3865 ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_size = 3866 i4_num_proc_thrds * ((MAX_TU_SIZE * 2 * 2) + 4) * 3867 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); 3868 3869 ps_mem_tab[ENC_LOOP_REF_FILT_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3870 3871 ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_alignment = 8; 3872 3873 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS 3874 if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0) 3875 #endif 3876 { 3877 /* Memory assignments for recon storage during CU Recursion */ 3878 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size = 3879 i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) * 3880 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); 3881 3882 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3883 3884 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8; 3885 3886 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size = 3887 i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) * 3888 ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) * 3889 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); 3890 3891 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3892 3893 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8; 3894 } 3895 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS 3896 else 3897 { 3898 /* Memory assignments for recon storage during CU Recursion */ 3899 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size = 0; 3900 3901 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3902 3903 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8; 3904 3905 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size = 0; 3906 3907 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3908 3909 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8; 3910 } 3911 #endif 3912 3913 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS 3914 if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0) 3915 #endif 3916 { 3917 /* Memory assignments for pred storage during CU Recursion */ 3918 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size = 3919 i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) * 3920 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); 3921 3922 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3923 3924 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8; 3925 3926 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size = 3927 i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) * 3928 ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) * 3929 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); 3930 3931 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3932 3933 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8; 3934 } 3935 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS 3936 else 3937 { 3938 /* Memory assignments for pred storage during CU Recursion */ 3939 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size = 0; 3940 3941 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3942 3943 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8; 3944 3945 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size = 0; 3946 3947 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3948 3949 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8; 3950 } 3951 #endif 3952 3953 /* Memory assignments for CTB left luma data storage */ 3954 ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_size = 3955 i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) * 3956 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); 3957 3958 ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3959 3960 ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_alignment = 8; 3961 3962 /* Memory assignments for CTB left chroma data storage */ 3963 ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size = 3964 i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) * 3965 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); 3966 ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size <<= 3967 ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0); 3968 3969 ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 3970 3971 ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_alignment = 8; 3972 3973 /* Memory required for SAO */ 3974 { 3975 WORD32 num_vert_units; 3976 WORD32 num_horz_units; 3977 WORD32 ctb_aligned_ht, ctb_aligned_wd; 3978 WORD32 luma_buf, chroma_buf; 3979 3980 num_vert_units = u4_height / MAX_CTB_SIZE; 3981 num_horz_units = u4_width / MAX_CTB_SIZE; 3982 3983 ctb_aligned_ht = u4_height; 3984 ctb_aligned_wd = u4_width; 3985 3986 /* Memory for top buffer. 1 extra width is required for top buf ptr for row 0 3987 * and 1 extra location is required for top left buf ptr for row 0 3988 * Also 1 extra byte is required for every row for top left pixel if 3989 * the top left ptr is to be passed to leaf level unconditionally 3990 */ 3991 luma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 1) * (num_vert_units + 1)) * 3992 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 3993 chroma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 2) * (num_vert_units + 1)) * 3994 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 3995 3996 ps_mem_tab[ENC_LOOP_SAO].i4_mem_size = 3997 (luma_buf + chroma_buf) * (i4_num_bitrate_inst) * (i4_num_enc_loop_frm_pllel); 3998 3999 /* Add the memory required to store the sao information of top ctb for top merge 4000 * This is frame level buffer. 4001 */ 4002 ps_mem_tab[ENC_LOOP_SAO].i4_mem_size += 4003 ((num_horz_units * sizeof(sao_enc_t)) * num_vert_units) * (i4_num_bitrate_inst) * 4004 (i4_num_enc_loop_frm_pllel); 4005 4006 ps_mem_tab[ENC_LOOP_SAO].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 4007 4008 ps_mem_tab[ENC_LOOP_SAO].i4_mem_alignment = 8; 4009 } 4010 4011 /* Memory for CU level Coeff data buffer */ 4012 { 4013 /* 16 additional bytes are required to ensure alignment */ 4014 { 4015 ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_size = 4016 i4_num_proc_thrds * 4017 (((MAX_LUMA_COEFFS_CTB + 4018 (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) + 4019 16) * 4020 (2) * sizeof(UWORD8)); 4021 } 4022 4023 ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 4024 4025 ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_alignment = 16; 4026 4027 ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_size = 4028 i4_num_proc_thrds * 4029 (MAX_LUMA_COEFFS_CTB + 4030 (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) * 4031 sizeof(UWORD8); 4032 4033 ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 4034 4035 ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_alignment = 16; 4036 } 4037 4038 /* Memory for CU dequant data buffer */ 4039 { 4040 /* 16 additional bytes are required to ensure alignment */ 4041 { 4042 ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_size = 4043 i4_num_proc_thrds * 4044 (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1)) 4045 : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) + 4046 8) * 4047 (2) * sizeof(WORD16); 4048 } 4049 4050 ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 4051 4052 ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_alignment = 16; 4053 } 4054 4055 /* Memory for Recon Datastore (Used around and within the RDOPT loop) */ 4056 { 4057 WORD32 i4_memSize_perThread; 4058 4059 WORD32 i4_chroma_memSize_perThread = 0; 4060 /* 2 bufs each allocated to the two 'enc_loop_cu_final_prms_t' structs */ 4061 /* used in RDOPT to store cur and best modes' data */ 4062 WORD32 i4_luma_memSize_perThread = 4063 4 * MAX_CU_SIZE * MAX_CU_SIZE * 4064 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4065 4066 /* 'Glossary' for comments in the following codeBlock */ 4067 /* 1 - 2 Bufs for storing recons of the best modes determined in the */ 4068 /* function 'ihevce_intra_chroma_pred_mode_selector' */ 4069 /* 2 - 1 buf each allocated to the two 'enc_loop_cu_final_prms_t' structs */ 4070 /* used in RDOPT to store cur and best modes' data */ 4071 if(i4_chroma_format == IV_YUV_422SP_UV) 4072 { 4073 WORD32 i4_quality_preset = 4074 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset; 4075 switch(i4_quality_preset) 4076 { 4077 case IHEVCE_QUALITY_P0: 4078 { 4079 /* 1 */ 4080 i4_chroma_memSize_perThread += 4081 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ * 4082 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4083 4084 /* 2 */ 4085 i4_chroma_memSize_perThread += 4086 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ * 4087 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4088 4089 break; 4090 } 4091 case IHEVCE_QUALITY_P2: 4092 { 4093 /* 1 */ 4094 i4_chroma_memSize_perThread += 4095 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ * 4096 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4097 4098 /* 2 */ 4099 i4_chroma_memSize_perThread += 4100 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ * 4101 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4102 4103 break; 4104 } 4105 case IHEVCE_QUALITY_P3: 4106 { 4107 /* 1 */ 4108 i4_chroma_memSize_perThread += 4109 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_MS * 4110 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4111 4112 /* 2 */ 4113 i4_chroma_memSize_perThread += 4114 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS * 4115 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4116 4117 break; 4118 } 4119 case IHEVCE_QUALITY_P4: 4120 { 4121 /* 1 */ 4122 i4_chroma_memSize_perThread += 4123 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HS * 4124 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4125 4126 /* 2 */ 4127 i4_chroma_memSize_perThread += 4128 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS * 4129 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4130 4131 break; 4132 } 4133 case IHEVCE_QUALITY_P5: 4134 { 4135 /* 1 */ 4136 i4_chroma_memSize_perThread += 4137 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS * 4138 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4139 4140 /* 2 */ 4141 i4_chroma_memSize_perThread += 4142 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS * 4143 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4144 4145 break; 4146 } 4147 case IHEVCE_QUALITY_P6: 4148 case IHEVCE_QUALITY_P7: 4149 { 4150 /* 1 */ 4151 i4_chroma_memSize_perThread += 4152 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 * 4153 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4154 4155 /* 2 */ 4156 i4_chroma_memSize_perThread += 4157 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 * 4158 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4159 4160 break; 4161 } 4162 } 4163 } 4164 else 4165 { 4166 WORD32 i4_quality_preset = 4167 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset; 4168 switch(i4_quality_preset) 4169 { 4170 case IHEVCE_QUALITY_P0: 4171 { 4172 /* 1 */ 4173 i4_chroma_memSize_perThread += 4174 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ * 4175 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4176 4177 /* 2 */ 4178 i4_chroma_memSize_perThread += 4179 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * 4180 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ * 4181 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4182 4183 break; 4184 } 4185 case IHEVCE_QUALITY_P2: 4186 { 4187 /* 1 */ 4188 i4_chroma_memSize_perThread += 4189 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ * 4190 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4191 4192 /* 2 */ 4193 i4_chroma_memSize_perThread += 4194 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * 4195 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ * 4196 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4197 4198 break; 4199 } 4200 case IHEVCE_QUALITY_P3: 4201 { 4202 /* 1 */ 4203 i4_chroma_memSize_perThread += 4204 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_MS * 4205 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4206 4207 /* 2 */ 4208 i4_chroma_memSize_perThread += 4209 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * 4210 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS * 4211 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4212 4213 break; 4214 } 4215 case IHEVCE_QUALITY_P4: 4216 { 4217 /* 1 */ 4218 i4_chroma_memSize_perThread += 4219 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HS * 4220 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4221 4222 /* 2 */ 4223 i4_chroma_memSize_perThread += 4224 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * 4225 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS * 4226 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4227 4228 break; 4229 } 4230 case IHEVCE_QUALITY_P5: 4231 { 4232 /* 1 */ 4233 i4_chroma_memSize_perThread += 4234 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS * 4235 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4236 4237 /* 2 */ 4238 i4_chroma_memSize_perThread += 4239 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * 4240 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS * 4241 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4242 4243 break; 4244 } 4245 case IHEVCE_QUALITY_P6: 4246 case IHEVCE_QUALITY_P7: 4247 { 4248 /* 1 */ 4249 i4_chroma_memSize_perThread += 4250 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 * 4251 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4252 4253 /* 2 */ 4254 i4_chroma_memSize_perThread += 4255 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * 4256 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 * 4257 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); 4258 4259 break; 4260 } 4261 } 4262 } 4263 4264 i4_memSize_perThread = i4_luma_memSize_perThread + i4_chroma_memSize_perThread; 4265 4266 ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size = 4267 i4_num_proc_thrds * i4_memSize_perThread * sizeof(UWORD8); 4268 4269 ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 4270 4271 ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_alignment = 16; 4272 } 4273 4274 n_tabs = NUM_ENC_LOOP_MEM_RECS; 4275 4276 /*************************************************************************/ 4277 /* --- EncLoop Deblock sync Dep Mngr Mem requests -- */ 4278 /*************************************************************************/ 4279 4280 /* Fill the memtabs for EncLoop Deblock Dep Mngr */ 4281 { 4282 WORD32 count; 4283 WORD32 num_vert_units; 4284 WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height; 4285 4286 ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units); 4287 ASSERT(num_vert_units > 0); 4288 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++) 4289 { 4290 for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++) 4291 { 4292 n_tabs += ihevce_dmgr_get_mem_recs( 4293 &ps_mem_tab[n_tabs], 4294 DEP_MNGR_ROW_ROW_SYNC, 4295 num_vert_units, 4296 ps_init_prms->s_app_tile_params.i4_num_tile_cols, 4297 i4_num_proc_thrds, 4298 i4_mem_space); 4299 } 4300 } 4301 } 4302 4303 /*************************************************************************/ 4304 /* --- EncLoop Top-Right CU sync Dep Mngr Mem requests -- */ 4305 /*************************************************************************/ 4306 4307 /* Fill the memtabs for Top-Right CU sync Dep Mngr */ 4308 { 4309 WORD32 count; 4310 WORD32 num_vert_units; 4311 WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height; 4312 ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units); 4313 ASSERT(num_vert_units > 0); 4314 4315 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++) 4316 { 4317 for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++) 4318 { 4319 n_tabs += ihevce_dmgr_get_mem_recs( 4320 &ps_mem_tab[n_tabs], 4321 DEP_MNGR_ROW_ROW_SYNC, 4322 num_vert_units, 4323 ps_init_prms->s_app_tile_params.i4_num_tile_cols, 4324 i4_num_proc_thrds, 4325 i4_mem_space); 4326 } 4327 } 4328 } 4329 4330 /*************************************************************************/ 4331 /* --- EncLoop Aux. on Ref. bitrate sync Dep Mngr Mem requests -- */ 4332 /*************************************************************************/ 4333 4334 /* Fill the memtabs for EncLoop Aux. on Ref. bitrate Dep Mngr */ 4335 { 4336 WORD32 count; 4337 WORD32 num_vert_units; 4338 WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height; 4339 4340 ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units); 4341 ASSERT(num_vert_units > 0); 4342 4343 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++) 4344 { 4345 for(ctr = 1; ctr < i4_num_bitrate_inst; ctr++) 4346 { 4347 n_tabs += ihevce_dmgr_get_mem_recs( 4348 &ps_mem_tab[n_tabs], 4349 DEP_MNGR_ROW_ROW_SYNC, 4350 num_vert_units, 4351 ps_init_prms->s_app_tile_params.i4_num_tile_cols, 4352 i4_num_proc_thrds, 4353 i4_mem_space); 4354 } 4355 } 4356 } 4357 4358 return (n_tabs); 4359 } 4360 4361 /*! 4362 ****************************************************************************** 4363 * \if Function name : ihevce_enc_loop_init \endif 4364 * 4365 * \brief 4366 * Intialization for ENC_LOOP context state structure . 4367 * 4368 * \param[in] ps_mem_tab : pointer to memory descriptors table 4369 * \param[in] ps_init_prms : Create time static parameters 4370 * \param[in] pv_osal_handle : Osal handle 4371 * 4372 * \return 4373 * None 4374 * 4375 * \author 4376 * Ittiam 4377 * 4378 ***************************************************************************** 4379 */ 4380 void *ihevce_enc_loop_init( 4381 iv_mem_rec_t *ps_mem_tab, 4382 ihevce_static_cfg_params_t *ps_init_prms, 4383 WORD32 i4_num_proc_thrds, 4384 void *pv_osal_handle, 4385 func_selector_t *ps_func_selector, 4386 rc_quant_t *ps_rc_quant_ctxt, 4387 ihevce_tile_params_t *ps_tile_params_base, 4388 WORD32 i4_resolution_id, 4389 WORD32 i4_num_enc_loop_frm_pllel, 4390 UWORD8 u1_is_popcnt_available) 4391 { 4392 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt; 4393 ihevce_enc_loop_ctxt_t *ps_ctxt; 4394 WORD32 ctr, n_tabs; 4395 UWORD32 u4_width, u4_height; 4396 UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame; 4397 UWORD32 u4_size_bs_memory, u4_size_qp_memory; 4398 UWORD8 *pu1_deblk_base; /*Store the base address of deblcoking memory*/ 4399 WORD32 i; 4400 WORD32 i4_num_bitrate_inst = 4401 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_num_bitrate_instances; 4402 enc_loop_rc_params_t *ps_enc_loop_rc_params; 4403 UWORD8 *pu1_sao_base; /* store the base address of sao*/ 4404 UWORD32 u4_ctb_aligned_wd, ctb_size, u4_ctb_aligned_ht, num_vert_units; 4405 WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format; 4406 WORD32 is_hbd_mode = (ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8); 4407 WORD32 i4_enc_frm_id; 4408 WORD32 num_cu_in_ctb; 4409 WORD32 i4_num_tile_cols = 1; //Default value is 1 4410 4411 /* ENC_LOOP state structure */ 4412 ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)ps_mem_tab[ENC_LOOP_CTXT].pv_base; 4413 4414 ps_master_ctxt->i4_num_proc_thrds = i4_num_proc_thrds; 4415 4416 ps_ctxt = (ihevce_enc_loop_ctxt_t *)ps_mem_tab[ENC_LOOP_THRDS_CTXT].pv_base; 4417 ps_enc_loop_rc_params = (enc_loop_rc_params_t *)ps_mem_tab[ENC_LOOP_RC_PARAMS].pv_base; 4418 ps_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt; 4419 /*Calculation of memory sizes for deblocking*/ 4420 { 4421 /*width of the input YUV to be encoded. */ 4422 u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width; 4423 /*making the width a multiple of CTB size*/ 4424 u4_width += SET_CTB_ALIGN( 4425 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE); 4426 4427 u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE); 4428 4429 /*height of the input YUV to be encoded */ 4430 u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height; 4431 /*making the height a multiple of CTB size*/ 4432 u4_height += SET_CTB_ALIGN( 4433 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE); 4434 4435 u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE); 4436 4437 /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/ 4438 /*1 vertical edge per 8 pixel*/ 4439 u4_size_bs_memory = (MAX_CTB_SIZE >> 3); 4440 /*Vertical edges for entire width of CTB row*/ 4441 u4_size_bs_memory *= u4_ctb_in_a_row; 4442 /*Each vertical edge of CTB row is 4 bytes*/ 4443 u4_size_bs_memory = u4_size_bs_memory << 2; 4444 /*Adding Memory required for storing horizontal BS by doubling*/ 4445 u4_size_bs_memory = u4_size_bs_memory << 1; 4446 4447 /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/ 4448 /*Number of 4x4 blocks in the width of a CTB*/ 4449 u4_size_qp_memory = (MAX_CTB_SIZE >> 2); 4450 /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest 4451 4x4-block layer of top-CTB in order to deblock top edge of current CTB*/ 4452 u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1); 4453 /*Storage for entire CTB row*/ 4454 u4_size_qp_memory *= u4_ctb_in_a_row; 4455 4456 pu1_deblk_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_DEBLOCKING].pv_base; 4457 } 4458 4459 /*Derive the base pointer of sao*/ 4460 pu1_sao_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_SAO].pv_base; 4461 ctb_size = (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size); 4462 u4_ctb_aligned_wd = u4_width; 4463 u4_ctb_aligned_ht = u4_height; 4464 num_vert_units = (u4_height) / ctb_size; 4465 4466 for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++) 4467 { 4468 ps_master_ctxt->aps_enc_loop_thrd_ctxt[ctr] = ps_ctxt; 4469 /* Store Tile params base into EncLoop context */ 4470 ps_ctxt->pv_tile_params_base = (void *)ps_tile_params_base; 4471 ihevce_cmn_utils_instr_set_router( 4472 &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type); 4473 ihevce_sifter_sad_fxn_assigner( 4474 (FT_SAD_EVALUATOR **)(&ps_ctxt->pv_evalsad_pt_npu_mxn_8bit), ps_init_prms->e_arch_type); 4475 ps_ctxt->i4_max_search_range_horizontal = 4476 ps_init_prms->s_config_prms.i4_max_search_range_horz; 4477 ps_ctxt->i4_max_search_range_vertical = 4478 ps_init_prms->s_config_prms.i4_max_search_range_vert; 4479 4480 ps_ctxt->i4_quality_preset = 4481 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset; 4482 4483 if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P7) 4484 { 4485 ps_ctxt->i4_quality_preset = IHEVCE_QUALITY_P6; 4486 } 4487 4488 ps_ctxt->i4_num_proc_thrds = ps_master_ctxt->i4_num_proc_thrds; 4489 4490 ps_ctxt->i4_rc_pass = ps_init_prms->s_pass_prms.i4_pass; 4491 4492 ps_ctxt->u1_chroma_array_type = (i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1; 4493 4494 ps_ctxt->s_deblk_prms.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type; 4495 4496 ps_ctxt->pi2_scal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_SCALE_MAT].pv_base; 4497 4498 ps_ctxt->pi2_rescal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_RESCALE_MAT].pv_base; 4499 4500 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2) 4501 { 4502 ps_ctxt->i4_use_ctb_level_lamda = 0; 4503 } 4504 else 4505 { 4506 ps_ctxt->i4_use_ctb_level_lamda = 0; 4507 } 4508 4509 /** Register the function selector pointer*/ 4510 ps_ctxt->ps_func_selector = ps_func_selector; 4511 4512 ps_ctxt->s_mc_ctxt.ps_func_selector = ps_func_selector; 4513 4514 /* Initiallization for non-distributed mode */ 4515 ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[0] = 0; 4516 ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[1] = 0; 4517 ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[2] = 0; 4518 ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[3] = 0; 4519 4520 ps_ctxt->s_deblk_prms.ps_func_selector = ps_func_selector; 4521 ps_ctxt->i4_top_row_luma_stride = (u4_width + MAX_CU_SIZE + 1); 4522 4523 ps_ctxt->i4_frm_top_row_luma_size = 4524 ps_ctxt->i4_top_row_luma_stride * (u4_ctb_rows_in_a_frame + 1); 4525 4526 ps_ctxt->i4_top_row_chroma_stride = (u4_width + MAX_CU_SIZE + 2); 4527 4528 ps_ctxt->i4_frm_top_row_chroma_size = 4529 ps_ctxt->i4_top_row_chroma_stride * (u4_ctb_rows_in_a_frame + 1); 4530 4531 { 4532 for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++) 4533 { 4534 /* +1 is to provision top left pel */ 4535 ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] = 4536 (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_LUMA].pv_base + 1 + 4537 (ps_ctxt->i4_frm_top_row_luma_size * i4_enc_frm_id * i4_num_bitrate_inst); 4538 4539 /* pointer incremented by 1 row to avoid OOB access in 0th row */ 4540 ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] = 4541 (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] + 4542 ps_ctxt->i4_top_row_luma_stride; 4543 4544 /* +2 is to provision top left pel */ 4545 ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] = 4546 (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_CHROMA].pv_base + 2 + 4547 (ps_ctxt->i4_frm_top_row_chroma_size * i4_enc_frm_id * i4_num_bitrate_inst); 4548 4549 /* pointer incremented by 1 row to avoid OOB access in 0th row */ 4550 ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] = 4551 (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] + 4552 ps_ctxt->i4_top_row_chroma_stride; 4553 } 4554 } 4555 4556 /* +1 is to provision top left nbr */ 4557 ps_ctxt->i4_top_row_nbr_stride = (((u4_width + MAX_CU_SIZE) >> 2) + 1); 4558 ps_ctxt->i4_frm_top_row_nbr_size = 4559 ps_ctxt->i4_top_row_nbr_stride * (u4_ctb_rows_in_a_frame + 1); 4560 for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++) 4561 { 4562 ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] = 4563 (nbr_4x4_t *)ps_mem_tab[ENC_LOOP_TOP_NBR4X4].pv_base + 1 + 4564 (ps_ctxt->i4_frm_top_row_nbr_size * i4_enc_frm_id * i4_num_bitrate_inst); 4565 ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] += ps_ctxt->i4_top_row_nbr_stride; 4566 } 4567 4568 num_cu_in_ctb = ctb_size / MIN_CU_SIZE; 4569 num_cu_in_ctb *= num_cu_in_ctb; 4570 4571 /* pointer incremented by 1 row to avoid OOB access in 0th row */ 4572 4573 /* Memory for CU level Coeff data buffer */ 4574 { 4575 WORD32 i4_16byte_boundary_overshoot; 4576 WORD32 buf_size_per_cu; 4577 WORD32 buf_size_per_thread_wo_alignment_req; 4578 WORD32 buf_size_per_thread; 4579 4580 buf_size_per_cu = 4581 ((MAX_LUMA_COEFFS_CTB + 4582 (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) + 4583 16) * 4584 sizeof(UWORD8); 4585 buf_size_per_thread_wo_alignment_req = buf_size_per_cu - 16 * sizeof(UWORD8); 4586 4587 { 4588 buf_size_per_thread = buf_size_per_cu * (2); 4589 4590 for(i = 0; i < 2; i++) 4591 { 4592 ps_ctxt->as_cu_prms[i].pu1_cu_coeffs = 4593 (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].pv_base + 4594 (ctr * buf_size_per_thread) + (i * buf_size_per_cu); 4595 4596 i4_16byte_boundary_overshoot = 4597 ((LWORD64)ps_ctxt->as_cu_prms[i].pu1_cu_coeffs & 0xf); 4598 4599 ps_ctxt->as_cu_prms[i].pu1_cu_coeffs += (16 - i4_16byte_boundary_overshoot); 4600 } 4601 } 4602 4603 ps_ctxt->pu1_cu_recur_coeffs = 4604 (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].pv_base + 4605 (ctr * buf_size_per_thread_wo_alignment_req); 4606 } 4607 4608 /* Memory for CU dequant data buffer */ 4609 { 4610 WORD32 buf_size_per_thread; 4611 WORD32 i4_16byte_boundary_overshoot; 4612 4613 WORD32 buf_size_per_cu = 4614 (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1)) 4615 : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) + 4616 8) * 4617 sizeof(WORD16); 4618 4619 { 4620 buf_size_per_thread = buf_size_per_cu * 2; 4621 4622 for(i = 0; i < 2; i++) 4623 { 4624 ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs = 4625 (WORD16 4626 *)((UWORD8 *)ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].pv_base + (ctr * buf_size_per_thread) + (i * buf_size_per_cu)); 4627 4628 i4_16byte_boundary_overshoot = 4629 ((LWORD64)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs & 0xf); 4630 4631 ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs = 4632 (WORD16 4633 *)((UWORD8 *)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs + (16 - i4_16byte_boundary_overshoot)); 4634 } 4635 } 4636 } 4637 4638 /*------ Deblocking memory's pointers assignements starts ------*/ 4639 4640 /*Assign stride = 4x4 blocks in horizontal edge*/ 4641 ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row; 4642 4643 ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size = 4644 ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd * u4_ctb_rows_in_a_frame; 4645 4646 /*Assign frame level memory to store the Qp of 4647 top 4x4 neighbours of each CTB row*/ 4648 for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++) 4649 { 4650 ps_ctxt->s_deblk_ctbrow_prms.api1_qp_top_4x4_ctb_row[i4_enc_frm_id] = 4651 (WORD8 *)ps_mem_tab[ENC_LOOP_QP_TOP_4X4].pv_base + 4652 (ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size * i4_num_bitrate_inst * 4653 i4_enc_frm_id); 4654 } 4655 4656 ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_vert = (UWORD32 *)pu1_deblk_base; 4657 4658 ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_horz = 4659 (UWORD32 *)(pu1_deblk_base + (u4_size_bs_memory >> 1)); 4660 4661 ps_ctxt->s_deblk_ctbrow_prms.pi1_ctb_row_qp = (WORD8 *)pu1_deblk_base + u4_size_bs_memory; 4662 4663 /*Assign stride = 4x4 blocks in horizontal edge*/ 4664 ps_ctxt->s_deblk_ctbrow_prms.u4_qp_buffer_stride = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row; 4665 4666 pu1_deblk_base += (u4_size_bs_memory + u4_size_qp_memory); 4667 4668 /*------Deblocking memory's pointers assignements ends ------*/ 4669 4670 /*------SAO memory's pointer assignment starts------------*/ 4671 if(!is_hbd_mode) 4672 { 4673 /* 2 is added to allocate top left pixel */ 4674 ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size = 4675 u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1); 4676 ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size = 4677 u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 2) * (num_vert_units + 1); 4678 ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units = 4679 num_vert_units * (u4_ctb_aligned_wd / MAX_CTB_SIZE); 4680 4681 for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++) 4682 { 4683 ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_luma[i4_enc_frm_id] = 4684 pu1_sao_base + 4685 ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size + 4686 ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) * 4687 i4_num_bitrate_inst * i4_enc_frm_id) + // move to the next frame_id 4688 u4_ctb_aligned_wd + 4689 2; 4690 4691 ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_chroma[i4_enc_frm_id] = 4692 pu1_sao_base + 4693 ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size + 4694 ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) * 4695 i4_num_bitrate_inst * i4_enc_frm_id) + 4696 +u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1) + 4697 u4_ctb_aligned_wd + 4; 4698 4699 ps_ctxt->s_sao_ctxt_t.aps_frm_top_ctb_sao[i4_enc_frm_id] = (sao_enc_t *) (pu1_sao_base + 4700 ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size + ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) 4701 *i4_num_bitrate_inst*i4_num_enc_loop_frm_pllel) + 4702 (ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units * sizeof(sao_enc_t) *i4_num_bitrate_inst * i4_enc_frm_id)); 4703 } 4704 ps_ctxt->s_sao_ctxt_t.i4_ctb_size = 4705 (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size); 4706 ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd = u4_ctb_aligned_wd; 4707 } 4708 4709 /*------SAO memory's pointer assignment ends------------*/ 4710 4711 /* perform all one time initialisation here */ 4712 ps_ctxt->i4_nbr_map_strd = MAX_PU_IN_CTB_ROW + 1 + 8; 4713 4714 ps_ctxt->pu1_ctb_nbr_map = ps_ctxt->au1_nbr_ctb_map[0]; 4715 4716 ps_ctxt->i4_deblock_type = ps_init_prms->s_coding_tools_prms.i4_deblocking_type; 4717 4718 /* move the pointer to 1,2 location */ 4719 ps_ctxt->pu1_ctb_nbr_map += ps_ctxt->i4_nbr_map_strd; 4720 ps_ctxt->pu1_ctb_nbr_map++; 4721 4722 ps_ctxt->i4_cu_csbf_strd = MAX_TU_IN_CTB_ROW; 4723 4724 CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map4x4TU, 1, 4, ps_ctxt->i4_cu_csbf_strd); 4725 4726 CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map8x8TU, 4, 8, ps_ctxt->i4_cu_csbf_strd); 4727 4728 CREATE_SUBBLOCK2CSBFID_MAP( 4729 gai4_subBlock2csbfId_map16x16TU, 16, 16, ps_ctxt->i4_cu_csbf_strd); 4730 4731 CREATE_SUBBLOCK2CSBFID_MAP( 4732 gai4_subBlock2csbfId_map32x32TU, 64, 32, ps_ctxt->i4_cu_csbf_strd); 4733 4734 /* For both instance initialise the chroma dequant start idx */ 4735 ps_ctxt->as_cu_prms[0].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE); 4736 ps_ctxt->as_cu_prms[1].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE); 4737 4738 /* initialise all the function pointer tables */ 4739 { 4740 ps_ctxt->pv_inter_rdopt_cu_mc_mvp = 4741 (pf_inter_rdopt_cu_mc_mvp)ihevce_inter_rdopt_cu_mc_mvp; 4742 4743 ps_ctxt->pv_inter_rdopt_cu_ntu = (pf_inter_rdopt_cu_ntu)ihevce_inter_rdopt_cu_ntu; 4744 4745 #if ENABLE_RDO_BASED_TU_RECURSION 4746 if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0) 4747 { 4748 ps_ctxt->pv_inter_rdopt_cu_ntu = 4749 (pf_inter_rdopt_cu_ntu)ihevce_inter_tu_tree_selector_and_rdopt_cost_computer; 4750 } 4751 #endif 4752 ps_ctxt->pv_intra_chroma_pred_mode_selector = 4753 (pf_intra_chroma_pred_mode_selector)ihevce_intra_chroma_pred_mode_selector; 4754 ps_ctxt->pv_intra_rdopt_cu_ntu = (pf_intra_rdopt_cu_ntu)ihevce_intra_rdopt_cu_ntu; 4755 ps_ctxt->pv_final_rdopt_mode_prcs = 4756 (pf_final_rdopt_mode_prcs)ihevce_final_rdopt_mode_prcs; 4757 ps_ctxt->pv_store_cu_results = (pf_store_cu_results)ihevce_store_cu_results; 4758 ps_ctxt->pv_enc_loop_cu_bot_copy = (pf_enc_loop_cu_bot_copy)ihevce_enc_loop_cu_bot_copy; 4759 ps_ctxt->pv_enc_loop_ctb_left_copy = 4760 (pf_enc_loop_ctb_left_copy)ihevce_enc_loop_ctb_left_copy; 4761 4762 /* Memory assignments for chroma intra pred buffer */ 4763 { 4764 WORD32 pred_buf_size = 4765 MAX_TU_SIZE * MAX_TU_SIZE * 2 * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1); 4766 WORD32 pred_buf_size_per_thread = 4767 NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD * pred_buf_size; 4768 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].pv_base + 4769 (ctr * pred_buf_size_per_thread); 4770 4771 for(i = 0; i < NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD; i++) 4772 { 4773 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[i].pv_pred_data = pu1_base; 4774 pu1_base += pred_buf_size; 4775 } 4776 } 4777 4778 /* Memory assignments for reference substitution output */ 4779 { 4780 WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + 4); 4781 WORD32 pred_buf_size_per_thread = pred_buf_size; 4782 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_SUB_OUT].pv_base + 4783 (ctr * pred_buf_size_per_thread); 4784 4785 ps_ctxt->pv_ref_sub_out = pu1_base; 4786 } 4787 4788 /* Memory assignments for reference filtering output */ 4789 { 4790 WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + 4); 4791 WORD32 pred_buf_size_per_thread = pred_buf_size; 4792 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_FILT_OUT].pv_base + 4793 (ctr * pred_buf_size_per_thread); 4794 4795 ps_ctxt->pv_ref_filt_out = pu1_base; 4796 } 4797 4798 /* Memory assignments for recon storage during CU Recursion */ 4799 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS 4800 if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0) 4801 #endif 4802 { 4803 { 4804 WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE); 4805 WORD32 pred_buf_size_per_thread = pred_buf_size; 4806 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].pv_base + 4807 (ctr * pred_buf_size_per_thread); 4808 4809 ps_ctxt->pv_cu_luma_recon = pu1_base; 4810 } 4811 4812 { 4813 WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) * 4814 ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1); 4815 WORD32 pred_buf_size_per_thread = pred_buf_size; 4816 UWORD8 *pu1_base = 4817 (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].pv_base + 4818 (ctr * pred_buf_size_per_thread); 4819 4820 ps_ctxt->pv_cu_chrma_recon = pu1_base; 4821 } 4822 } 4823 4824 /* Memory assignments for pred storage during CU Recursion */ 4825 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS 4826 if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0) 4827 #endif 4828 { 4829 { 4830 WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE); 4831 WORD32 pred_buf_size_per_thread = pred_buf_size; 4832 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].pv_base + 4833 (ctr * pred_buf_size_per_thread); 4834 4835 ps_ctxt->pv_CTB_pred_luma = pu1_base; 4836 } 4837 4838 { 4839 WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) * 4840 ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1); 4841 WORD32 pred_buf_size_per_thread = pred_buf_size; 4842 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].pv_base + 4843 (ctr * pred_buf_size_per_thread); 4844 4845 ps_ctxt->pv_CTB_pred_chroma = pu1_base; 4846 } 4847 } 4848 4849 /* Memory assignments for CTB left luma data storage */ 4850 { 4851 WORD32 pred_buf_size = (MAX_CTB_SIZE + MAX_TU_SIZE); 4852 WORD32 pred_buf_size_per_thread = pred_buf_size; 4853 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].pv_base + 4854 (ctr * pred_buf_size_per_thread); 4855 4856 ps_ctxt->pv_left_luma_data = pu1_base; 4857 } 4858 4859 /* Memory assignments for CTB left chroma data storage */ 4860 { 4861 WORD32 pred_buf_size = 4862 (MAX_CTB_SIZE + MAX_TU_SIZE) * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1); 4863 WORD32 pred_buf_size_per_thread = pred_buf_size; 4864 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].pv_base + 4865 (ctr * pred_buf_size_per_thread); 4866 4867 ps_ctxt->pv_left_chrm_data = pu1_base; 4868 } 4869 } 4870 4871 /* Memory for inter pred buffers */ 4872 { 4873 WORD32 i4_num_bufs_per_thread; 4874 4875 WORD32 i4_buf_size_per_cand = 4876 (MAX_CTB_SIZE) * (MAX_CTB_SIZE) * 4877 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); 4878 4879 i4_num_bufs_per_thread = 4880 (ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size / i4_num_proc_thrds) / 4881 i4_buf_size_per_cand; 4882 4883 ps_ctxt->i4_max_num_inter_rdopt_cands = i4_num_bufs_per_thread - 4; 4884 4885 ps_ctxt->s_pred_buf_data.u4_is_buf_in_use = UINT_MAX; 4886 4887 { 4888 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_INTER_PRED].pv_base + 4889 +(ctr * i4_buf_size_per_cand * i4_num_bufs_per_thread); 4890 4891 for(i = 0; i < i4_num_bufs_per_thread; i++) 4892 { 4893 ps_ctxt->s_pred_buf_data.apv_inter_pred_data[i] = 4894 pu1_base + i * i4_buf_size_per_cand; 4895 ps_ctxt->s_pred_buf_data.u4_is_buf_in_use ^= (1 << i); 4896 } 4897 } 4898 } 4899 4900 /* Memory required to store pred for 422 chroma */ 4901 if(i4_chroma_format == IV_YUV_422SP_UV) 4902 { 4903 WORD32 pred_buf_size = MAX_CTB_SIZE * MAX_CTB_SIZE * 2; 4904 WORD32 pred_buf_size_per_thread = 4905 pred_buf_size * ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * 4906 sizeof(UWORD8); 4907 void *pv_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].pv_base + 4908 (ctr * pred_buf_size_per_thread); 4909 4910 ps_ctxt->pv_422_chroma_intra_pred_buf = pv_base; 4911 } 4912 else 4913 { 4914 ps_ctxt->pv_422_chroma_intra_pred_buf = NULL; 4915 } 4916 4917 /* Memory for Recon Datastore (Used around and within the RDOPT loop) */ 4918 { 4919 WORD32 i4_lumaBufSize = MAX_CU_SIZE * MAX_CU_SIZE; 4920 WORD32 i4_chromaBufSize = 4921 MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ((i4_chroma_format == IV_YUV_422SP_UV) + 1); 4922 WORD32 i4_memSize_perThread = ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size / 4923 (i4_num_proc_thrds * sizeof(UWORD8) * (is_hbd_mode + 1)); 4924 WORD32 i4_quality_preset = ps_ctxt->i4_quality_preset; 4925 { 4926 UWORD8 *pu1_mem_base = 4927 (((UWORD8 *)ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].pv_base) + 4928 ctr * i4_memSize_perThread); 4929 4930 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[0] = 4931 pu1_mem_base + i4_lumaBufSize * 0; 4932 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[1] = 4933 pu1_mem_base + i4_lumaBufSize * 1; 4934 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[0] = 4935 pu1_mem_base + i4_lumaBufSize * 2; 4936 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[1] = 4937 pu1_mem_base + i4_lumaBufSize * 3; 4938 4939 pu1_mem_base += i4_lumaBufSize * 4; 4940 4941 switch(i4_quality_preset) 4942 { 4943 case IHEVCE_QUALITY_P0: 4944 { 4945 #if ENABLE_CHROMA_RDOPT_EVAL_IN_PQ 4946 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = 4947 pu1_mem_base + i4_chromaBufSize * 0; 4948 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = 4949 pu1_mem_base + i4_chromaBufSize * 1; 4950 #else 4951 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL; 4952 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL; 4953 #endif 4954 4955 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ 4956 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = 4957 pu1_mem_base + i4_chromaBufSize * 2; 4958 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = 4959 pu1_mem_base + i4_chromaBufSize * 3; 4960 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = 4961 pu1_mem_base + i4_chromaBufSize * 2; 4962 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = 4963 pu1_mem_base + i4_chromaBufSize * 3; 4964 #else 4965 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL; 4966 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL; 4967 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL; 4968 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL; 4969 #endif 4970 4971 break; 4972 } 4973 case IHEVCE_QUALITY_P2: 4974 { 4975 #if ENABLE_CHROMA_RDOPT_EVAL_IN_HQ 4976 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = 4977 pu1_mem_base + i4_chromaBufSize * 0; 4978 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = 4979 pu1_mem_base + i4_chromaBufSize * 1; 4980 #else 4981 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL; 4982 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL; 4983 #endif 4984 4985 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ 4986 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = 4987 pu1_mem_base + i4_chromaBufSize * 2; 4988 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = 4989 pu1_mem_base + i4_chromaBufSize * 3; 4990 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = 4991 pu1_mem_base + i4_chromaBufSize * 2; 4992 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = 4993 pu1_mem_base + i4_chromaBufSize * 3; 4994 #else 4995 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL; 4996 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL; 4997 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL; 4998 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL; 4999 #endif 5000 5001 break; 5002 } 5003 case IHEVCE_QUALITY_P3: 5004 { 5005 #if ENABLE_CHROMA_RDOPT_EVAL_IN_MS 5006 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = 5007 pu1_mem_base + i4_chromaBufSize * 0; 5008 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = 5009 pu1_mem_base + i4_chromaBufSize * 1; 5010 #else 5011 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL; 5012 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL; 5013 #endif 5014 5015 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS 5016 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = 5017 pu1_mem_base + i4_chromaBufSize * 2; 5018 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = 5019 pu1_mem_base + i4_chromaBufSize * 3; 5020 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = 5021 pu1_mem_base + i4_chromaBufSize * 2; 5022 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = 5023 pu1_mem_base + i4_chromaBufSize * 3; 5024 #else 5025 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL; 5026 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL; 5027 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL; 5028 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL; 5029 #endif 5030 5031 break; 5032 } 5033 case IHEVCE_QUALITY_P4: 5034 { 5035 #if ENABLE_CHROMA_RDOPT_EVAL_IN_HS 5036 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = 5037 pu1_mem_base + i4_chromaBufSize * 0; 5038 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = 5039 pu1_mem_base + i4_chromaBufSize * 1; 5040 #else 5041 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL; 5042 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL; 5043 #endif 5044 5045 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS 5046 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = 5047 pu1_mem_base + i4_chromaBufSize * 2; 5048 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = 5049 pu1_mem_base + i4_chromaBufSize * 3; 5050 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = 5051 pu1_mem_base + i4_chromaBufSize * 2; 5052 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = 5053 pu1_mem_base + i4_chromaBufSize * 3; 5054 #else 5055 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL; 5056 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL; 5057 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL; 5058 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL; 5059 #endif 5060 5061 break; 5062 } 5063 case IHEVCE_QUALITY_P5: 5064 { 5065 #if ENABLE_CHROMA_RDOPT_EVAL_IN_XS 5066 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = 5067 pu1_mem_base + i4_chromaBufSize * 0; 5068 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = 5069 pu1_mem_base + i4_chromaBufSize * 1; 5070 #else 5071 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL; 5072 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL; 5073 #endif 5074 5075 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS 5076 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = 5077 pu1_mem_base + i4_chromaBufSize * 2; 5078 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = 5079 pu1_mem_base + i4_chromaBufSize * 3; 5080 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = 5081 pu1_mem_base + i4_chromaBufSize * 2; 5082 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = 5083 pu1_mem_base + i4_chromaBufSize * 3; 5084 #else 5085 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL; 5086 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL; 5087 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL; 5088 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL; 5089 #endif 5090 5091 break; 5092 } 5093 } 5094 } 5095 5096 ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE; 5097 ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE; 5098 ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE; 5099 ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE; 5100 5101 } /* Recon Datastore */ 5102 5103 /****************************************************/ 5104 /****************************************************/ 5105 /* ps_pps->i1_sign_data_hiding_flag == UNHIDDEN */ 5106 /* when NO_SBH. else HIDDEN */ 5107 /****************************************************/ 5108 /****************************************************/ 5109 /* Zero cbf tool is enabled by default for all presets */ 5110 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE; 5111 5112 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) 5113 { 5114 ps_ctxt->i4_quant_rounding_level = CU_LEVEL_QUANT_ROUNDING; 5115 ps_ctxt->i4_chroma_quant_rounding_level = CHROMA_QUANT_ROUNDING; 5116 ps_ctxt->i4_rdoq_level = ALL_CAND_RDOQ; 5117 ps_ctxt->i4_sbh_level = ALL_CAND_SBH; 5118 } 5119 else if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P3) 5120 { 5121 ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING; 5122 ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING; 5123 ps_ctxt->i4_rdoq_level = NO_RDOQ; 5124 ps_ctxt->i4_sbh_level = NO_SBH; 5125 } 5126 else 5127 { 5128 ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING; 5129 ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING; 5130 ps_ctxt->i4_rdoq_level = NO_RDOQ; 5131 ps_ctxt->i4_sbh_level = NO_SBH; 5132 } 5133 5134 #if DISABLE_QUANT_ROUNDING 5135 ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING; 5136 ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING; 5137 #endif 5138 /*Disabling RDOQ only when spatial modulation is enabled 5139 as RDOQ degrades visual quality*/ 5140 if(ps_init_prms->s_config_prms.i4_cu_level_rc & 1) 5141 { 5142 ps_ctxt->i4_rdoq_level = NO_RDOQ; 5143 } 5144 5145 #if DISABLE_RDOQ 5146 ps_ctxt->i4_rdoq_level = NO_RDOQ; 5147 #endif 5148 5149 #if DISABLE_SBH 5150 ps_ctxt->i4_sbh_level = NO_SBH; 5151 #endif 5152 5153 /*Rounding factor calc based on previous cabac states */ 5154 5155 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_4x4[0][0]; 5156 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_8x8[0][0]; 5157 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_16x16[0][0]; 5158 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[4] = &ps_ctxt->i4_quant_round_32x32[0][0]; 5159 5160 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_4x4[1][0]; 5161 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_8x8[1][0]; 5162 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_16x16[1][0]; 5163 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[4] = &ps_ctxt->i4_quant_round_32x32[1][0]; 5164 5165 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_cr_4x4[0][0]; 5166 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_cr_8x8[0][0]; 5167 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_cr_16x16[0][0]; 5168 5169 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_cr_4x4[1][0]; 5170 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_cr_8x8[1][0]; 5171 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_cr_16x16[1][0]; 5172 5173 /****************************************************************************************/ 5174 /* Setting the perform rdoq and sbh flags appropriately */ 5175 /****************************************************************************************/ 5176 { 5177 /******************************************/ 5178 /* For best cand rdoq and/or sbh */ 5179 /******************************************/ 5180 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq = 5181 (ps_ctxt->i4_rdoq_level == BEST_CAND_RDOQ); 5182 /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean 5183 we would have to do RDOQ again.*/ 5184 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq = 5185 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq || 5186 ((BEST_CAND_SBH == ps_ctxt->i4_sbh_level) && 5187 (ALL_CAND_RDOQ == ps_ctxt->i4_rdoq_level)); 5188 5189 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh = 5190 (ps_ctxt->i4_sbh_level == BEST_CAND_SBH); 5191 5192 /* SBH should be performed if 5193 a) i4_sbh_level is BEST_CAND_SBH. 5194 b) For all quality presets above medium speed(i.e. high speed and extreme speed) and 5195 if SBH has to be done because for these presets the quant, iquant and scan coeff 5196 data are calculated in this function and not during the RDOPT stage*/ 5197 5198 /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/ 5199 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh = 5200 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh || 5201 ((BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level) && 5202 (ALL_CAND_SBH == ps_ctxt->i4_sbh_level)); 5203 5204 /******************************************/ 5205 /* For all cand rdoq and/or sbh */ 5206 /******************************************/ 5207 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq = 5208 (ps_ctxt->i4_rdoq_level == ALL_CAND_RDOQ); 5209 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh = 5210 (ps_ctxt->i4_sbh_level == ALL_CAND_SBH); 5211 ps_ctxt->s_rdoq_sbh_ctxt.i4_bit_depth = 5212 ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth; 5213 } 5214 5215 if(!is_hbd_mode) 5216 { 5217 if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1) 5218 { 5219 if(ps_ctxt->i4_rdoq_level == NO_RDOQ) 5220 { 5221 ps_ctxt->apf_quant_iquant_ssd[0] = 5222 ps_func_selector->ihevc_quant_iquant_ssd_fptr; 5223 ps_ctxt->apf_quant_iquant_ssd[2] = ps_func_selector->ihevc_quant_iquant_fptr; 5224 } 5225 else 5226 { 5227 ps_ctxt->apf_quant_iquant_ssd[0] = 5228 ps_func_selector->ihevc_quant_iquant_ssd_rdoq_fptr; 5229 ps_ctxt->apf_quant_iquant_ssd[2] = 5230 ps_func_selector->ihevc_quant_iquant_rdoq_fptr; 5231 } 5232 5233 /*If coef level RDOQ is enabled, quantization based on corr. error to be done */ 5234 if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING) 5235 { 5236 ps_ctxt->apf_quant_iquant_ssd[1] = 5237 ps_func_selector->ihevc_q_iq_ssd_var_rnd_fact_fptr; 5238 ps_ctxt->apf_quant_iquant_ssd[3] = 5239 ps_func_selector->ihevc_q_iq_var_rnd_fact_fptr; 5240 } 5241 else 5242 { 5243 ps_ctxt->apf_quant_iquant_ssd[1] = 5244 ps_func_selector->ihevc_quant_iquant_ssd_fptr; 5245 ps_ctxt->apf_quant_iquant_ssd[3] = ps_func_selector->ihevc_quant_iquant_fptr; 5246 } 5247 } 5248 else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0) 5249 { 5250 if(ps_ctxt->i4_rdoq_level == NO_RDOQ) 5251 { 5252 ps_ctxt->apf_quant_iquant_ssd[0] = 5253 ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr; 5254 ps_ctxt->apf_quant_iquant_ssd[2] = 5255 ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr; 5256 } 5257 else 5258 { 5259 ps_ctxt->apf_quant_iquant_ssd[0] = 5260 ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_fptr; 5261 ps_ctxt->apf_quant_iquant_ssd[2] = 5262 ps_func_selector->ihevc_quant_iquant_flat_scale_mat_rdoq_fptr; 5263 } 5264 5265 /*If coef level RDOQ is enabled, quantization based on corr. error to be done */ 5266 if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING) 5267 { 5268 ps_ctxt->apf_quant_iquant_ssd[1] = 5269 ps_func_selector->ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_fptr; 5270 ps_ctxt->apf_quant_iquant_ssd[3] = 5271 ps_func_selector->ihevc_q_iq_flat_scale_mat_var_rnd_fact_fptr; 5272 } 5273 else 5274 { 5275 ps_ctxt->apf_quant_iquant_ssd[1] = 5276 ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr; 5277 ps_ctxt->apf_quant_iquant_ssd[3] = 5278 ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr; 5279 } 5280 } 5281 5282 ps_ctxt->s_sao_ctxt_t.apf_sao_luma[0] = 5283 ps_func_selector->ihevc_sao_edge_offset_class0_fptr; 5284 ps_ctxt->s_sao_ctxt_t.apf_sao_luma[1] = 5285 ps_func_selector->ihevc_sao_edge_offset_class1_fptr; 5286 ps_ctxt->s_sao_ctxt_t.apf_sao_luma[2] = 5287 ps_func_selector->ihevc_sao_edge_offset_class2_fptr; 5288 ps_ctxt->s_sao_ctxt_t.apf_sao_luma[3] = 5289 ps_func_selector->ihevc_sao_edge_offset_class3_fptr; 5290 5291 ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[0] = 5292 ps_func_selector->ihevc_sao_edge_offset_class0_chroma_fptr; 5293 ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[1] = 5294 ps_func_selector->ihevc_sao_edge_offset_class1_chroma_fptr; 5295 ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[2] = 5296 ps_func_selector->ihevc_sao_edge_offset_class2_chroma_fptr; 5297 ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[3] = 5298 ps_func_selector->ihevc_sao_edge_offset_class3_chroma_fptr; 5299 5300 ps_ctxt->apf_it_recon[0] = ps_func_selector->ihevc_itrans_recon_4x4_ttype1_fptr; 5301 ps_ctxt->apf_it_recon[1] = ps_func_selector->ihevc_itrans_recon_4x4_fptr; 5302 ps_ctxt->apf_it_recon[2] = ps_func_selector->ihevc_itrans_recon_8x8_fptr; 5303 ps_ctxt->apf_it_recon[3] = ps_func_selector->ihevc_itrans_recon_16x16_fptr; 5304 ps_ctxt->apf_it_recon[4] = ps_func_selector->ihevc_itrans_recon_32x32_fptr; 5305 5306 ps_ctxt->apf_chrm_it_recon[0] = ps_func_selector->ihevc_chroma_itrans_recon_4x4_fptr; 5307 ps_ctxt->apf_chrm_it_recon[1] = ps_func_selector->ihevc_chroma_itrans_recon_8x8_fptr; 5308 ps_ctxt->apf_chrm_it_recon[2] = ps_func_selector->ihevc_chroma_itrans_recon_16x16_fptr; 5309 5310 ps_ctxt->apf_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr; 5311 ps_ctxt->apf_resd_trns[1] = ps_func_selector->ihevc_resi_trans_4x4_fptr; 5312 ps_ctxt->apf_resd_trns[2] = ps_func_selector->ihevc_resi_trans_8x8_fptr; 5313 ps_ctxt->apf_resd_trns[3] = ps_func_selector->ihevc_resi_trans_16x16_fptr; 5314 ps_ctxt->apf_resd_trns[4] = ps_func_selector->ihevc_resi_trans_32x32_fptr; 5315 5316 ps_ctxt->apf_chrm_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_fptr; 5317 ps_ctxt->apf_chrm_resd_trns[1] = ps_func_selector->ihevc_resi_trans_8x8_fptr; 5318 ps_ctxt->apf_chrm_resd_trns[2] = ps_func_selector->ihevc_resi_trans_16x16_fptr; 5319 5320 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_0] = 5321 ps_func_selector->ihevc_intra_pred_luma_planar_fptr; 5322 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_1] = ps_func_selector->ihevc_intra_pred_luma_dc_fptr; 5323 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_2] = 5324 ps_func_selector->ihevc_intra_pred_luma_mode2_fptr; 5325 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_3TO9] = 5326 ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr; 5327 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_10] = 5328 ps_func_selector->ihevc_intra_pred_luma_horz_fptr; 5329 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_11TO17] = 5330 ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr; 5331 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_18_34] = 5332 ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr; 5333 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_19TO25] = 5334 ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr; 5335 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_26] = ps_func_selector->ihevc_intra_pred_luma_ver_fptr; 5336 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_27TO33] = 5337 ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr; 5338 5339 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_0] = 5340 ps_func_selector->ihevc_intra_pred_chroma_planar_fptr; 5341 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_1] = 5342 ps_func_selector->ihevc_intra_pred_chroma_dc_fptr; 5343 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_2] = 5344 ps_func_selector->ihevc_intra_pred_chroma_mode2_fptr; 5345 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_3TO9] = 5346 ps_func_selector->ihevc_intra_pred_chroma_mode_3_to_9_fptr; 5347 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_10] = 5348 ps_func_selector->ihevc_intra_pred_chroma_horz_fptr; 5349 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_11TO17] = 5350 ps_func_selector->ihevc_intra_pred_chroma_mode_11_to_17_fptr; 5351 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_18_34] = 5352 ps_func_selector->ihevc_intra_pred_chroma_mode_18_34_fptr; 5353 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_19TO25] = 5354 ps_func_selector->ihevc_intra_pred_chroma_mode_19_to_25_fptr; 5355 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_26] = 5356 ps_func_selector->ihevc_intra_pred_chroma_ver_fptr; 5357 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_27TO33] = 5358 ps_func_selector->ihevc_intra_pred_chroma_mode_27_to_33_fptr; 5359 5360 ps_ctxt->apf_chrm_resd_trns_had[0] = 5361 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_4x4_8bit; 5362 ps_ctxt->apf_chrm_resd_trns_had[1] = 5363 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_8x8_8bit; 5364 ps_ctxt->apf_chrm_resd_trns_had[2] = 5365 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_16x16_8bit; 5366 } 5367 5368 if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0) 5369 { 5370 /* initialise the scale & rescale matricies */ 5371 ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; 5372 ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; 5373 ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_flat_scale_mat_8x8[0]; 5374 ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_flat_scale_mat_16x16[0]; 5375 ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_flat_scale_mat_32x32[0]; 5376 /*init for inter matrix*/ 5377 ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; 5378 ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; 5379 ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_flat_scale_mat_8x8[0]; 5380 ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_flat_scale_mat_16x16[0]; 5381 ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_flat_scale_mat_32x32[0]; 5382 5383 /*init for rescale matrix*/ 5384 ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; 5385 ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; 5386 ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0]; 5387 ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0]; 5388 ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0]; 5389 /*init for rescale inter matrix*/ 5390 ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; 5391 ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; 5392 ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0]; 5393 ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0]; 5394 ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0]; 5395 } 5396 else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1) 5397 { 5398 /* initialise the scale & rescale matricies */ 5399 ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; 5400 ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; 5401 ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_intra_default_scale_mat_8x8[0]; 5402 ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_intra_default_scale_mat_16x16[0]; 5403 ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_intra_default_scale_mat_32x32[0]; 5404 /*init for inter matrix*/ 5405 ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; 5406 ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; 5407 ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_inter_default_scale_mat_8x8[0]; 5408 ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_inter_default_scale_mat_16x16[0]; 5409 ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_inter_default_scale_mat_32x32[0]; 5410 5411 /*init for rescale matrix*/ 5412 ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; 5413 ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; 5414 ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_intra_default_rescale_mat_8x8[0]; 5415 ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_intra_default_rescale_mat_16x16[0]; 5416 ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_intra_default_rescale_mat_32x32[0]; 5417 /*init for rescale inter matrix*/ 5418 ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; 5419 ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; 5420 ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_inter_default_rescale_mat_8x8[0]; 5421 ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_inter_default_rescale_mat_16x16[0]; 5422 ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_inter_default_rescale_mat_32x32[0]; 5423 } 5424 else 5425 { 5426 ASSERT(0); 5427 } 5428 5429 /* Not recomputing Luma pred-data and header data for any preset now */ 5430 ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0; 5431 ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0; 5432 ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1; 5433 5434 switch(ps_ctxt->i4_quality_preset) 5435 { 5436 case IHEVCE_QUALITY_P0: 5437 { 5438 ps_ctxt->i4_max_merge_candidates = 5; 5439 ps_ctxt->i4_use_satd_for_merge_eval = 1; 5440 ps_ctxt->u1_use_top_at_ctb_boundary = 1; 5441 ps_ctxt->u1_use_early_cbf_data = 0; 5442 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_PQ; 5443 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = 5444 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ; 5445 5446 break; 5447 } 5448 case IHEVCE_QUALITY_P2: 5449 { 5450 ps_ctxt->i4_max_merge_candidates = 5; 5451 ps_ctxt->i4_use_satd_for_merge_eval = 1; 5452 ps_ctxt->u1_use_top_at_ctb_boundary = 1; 5453 ps_ctxt->u1_use_early_cbf_data = 0; 5454 5455 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HQ; 5456 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = 5457 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ; 5458 5459 break; 5460 } 5461 case IHEVCE_QUALITY_P3: 5462 { 5463 ps_ctxt->i4_max_merge_candidates = 3; 5464 ps_ctxt->i4_use_satd_for_merge_eval = 1; 5465 ps_ctxt->u1_use_top_at_ctb_boundary = 0; 5466 5467 ps_ctxt->u1_use_early_cbf_data = 0; 5468 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_MS; 5469 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = 5470 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS; 5471 5472 break; 5473 } 5474 case IHEVCE_QUALITY_P4: 5475 { 5476 ps_ctxt->i4_max_merge_candidates = 2; 5477 ps_ctxt->i4_use_satd_for_merge_eval = 1; 5478 ps_ctxt->u1_use_top_at_ctb_boundary = 0; 5479 ps_ctxt->u1_use_early_cbf_data = 0; 5480 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HS; 5481 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = 5482 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS; 5483 5484 break; 5485 } 5486 case IHEVCE_QUALITY_P5: 5487 { 5488 ps_ctxt->i4_max_merge_candidates = 2; 5489 ps_ctxt->i4_use_satd_for_merge_eval = 0; 5490 ps_ctxt->u1_use_top_at_ctb_boundary = 0; 5491 ps_ctxt->u1_use_early_cbf_data = 0; 5492 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_XS; 5493 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = 5494 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS; 5495 5496 break; 5497 } 5498 case IHEVCE_QUALITY_P6: 5499 { 5500 ps_ctxt->i4_max_merge_candidates = 2; 5501 ps_ctxt->i4_use_satd_for_merge_eval = 0; 5502 ps_ctxt->u1_use_top_at_ctb_boundary = 0; 5503 ps_ctxt->u1_use_early_cbf_data = EARLY_CBF_ON; 5504 break; 5505 } 5506 default: 5507 { 5508 ASSERT(0); 5509 } 5510 } 5511 5512 #if DISABLE_SKIP_AND_MERGE_EVAL 5513 ps_ctxt->i4_max_merge_candidates = 0; 5514 #endif 5515 5516 ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data = 5517 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt; 5518 5519 /*initialize memory for RC related parameters required/populated by enc_loop */ 5520 /* the allocated memory is distributed as follows assuming encoder is running for 3 bit-rate instnaces 5521 |-------|-> Thread 0, instance 0 5522 | | 5523 | | 5524 | | 5525 |-------|-> thread 0, instance 1 5526 | | 5527 | | 5528 | | 5529 |-------|-> thread 0, intance 2 5530 | | 5531 | | 5532 | | 5533 |-------|-> thread 1, instance 0 5534 | | 5535 | | 5536 | | 5537 |-------|-> thread 1, instance 1 5538 | | 5539 | | 5540 | | 5541 |-------|-> thread 1, instance 2 5542 ... ... 5543 5544 Each theard will collate the data corresponding to the bit-rate instnace it's running at the appropriate place. 5545 Finally, one thread will become master and collate the data from all the threads */ 5546 for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++) 5547 { 5548 for(i = 0; i < i4_num_bitrate_inst; i++) 5549 { 5550 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i] = ps_enc_loop_rc_params; 5551 ps_enc_loop_rc_params++; 5552 } 5553 } 5554 /* Non-Luma modes for Chroma are evaluated only in HIGH QUALITY preset */ 5555 5556 #if !ENABLE_SEPARATE_LUMA_CHROMA_INTRA_MODE 5557 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = 0; 5558 #endif 5559 5560 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_iq_buff_stride = 5561 MAX_TU_SIZE; 5562 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_iq_buff_stride = 5563 MAX_TU_SIZE; 5564 /*Multiplying by two to account for interleaving of cb and cr*/ 5565 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_pred_stride = MAX_TU_SIZE 5566 << 1; 5567 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_pred_stride = 5568 MAX_TU_SIZE << 1; 5569 5570 /* Memory for a frame level memory to store tile-id */ 5571 /* corresponding to each CTB of frame */ 5572 ps_ctxt->pi4_offset_for_last_cu_qp = &ps_master_ctxt->ai4_offset_for_last_cu_qp[0]; 5573 5574 ps_ctxt->i4_qp_mod = ps_init_prms->s_config_prms.i4_cu_level_rc & 1; 5575 /* psy rd strength is a run time parametr control by bit field 5-7 in the VQET field.*/ 5576 /* we disable psyrd if the the psy strength is zero or the BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER field is not set */ 5577 if(ps_init_prms->s_coding_tools_prms.i4_vqet & 5578 (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) 5579 { 5580 UWORD32 psy_strength; 5581 UWORD32 psy_strength_mask = 5582 224; // only bits 5,6,7 are ones. These three bits represent the psy strength 5583 psy_strength = ps_init_prms->s_coding_tools_prms.i4_vqet & psy_strength_mask; 5584 ps_ctxt->u1_enable_psyRDOPT = 1; 5585 ps_ctxt->u4_psy_strength = psy_strength >> BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1; 5586 if(psy_strength == 0) 5587 { 5588 ps_ctxt->u1_enable_psyRDOPT = 0; 5589 ps_ctxt->u4_psy_strength = 0; 5590 } 5591 } 5592 5593 ps_ctxt->u1_is_stasino_enabled = 5594 ((ps_init_prms->s_coding_tools_prms.i4_vqet & 5595 (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) && 5596 (ps_init_prms->s_coding_tools_prms.i4_vqet & 5597 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION))); 5598 5599 ps_ctxt->u1_max_inter_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_nI; 5600 ps_ctxt->u1_max_intra_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_I; 5601 ps_ctxt++; 5602 } 5603 /* Store Tile params base into EncLoop Master context */ 5604 ps_master_ctxt->pv_tile_params_base = (void *)ps_tile_params_base; 5605 5606 if(1 == ps_tile_params_base->i4_tiles_enabled_flag) 5607 { 5608 i4_num_tile_cols = ps_tile_params_base->i4_num_tile_cols; 5609 } 5610 5611 /* Updating ai4_offset_for_last_cu_qp[] array for all tile-colums of frame */ 5612 /* Loop over all tile-cols in frame */ 5613 for(ctr = 0; ctr < i4_num_tile_cols; ctr++) 5614 { 5615 WORD32 i4_tile_col_wd_in_ctb_unit = 5616 (ps_tile_params_base + ctr)->i4_curr_tile_wd_in_ctb_unit; 5617 WORD32 offset_x; 5618 5619 if(ctr == (i4_num_tile_cols - 1)) 5620 { /* Last tile-row of frame */ 5621 WORD32 min_cu_size = 1 << ps_init_prms->s_config_prms.i4_min_log2_cu_size; 5622 5623 WORD32 cu_aligned_pic_wd = 5624 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width + 5625 SET_CTB_ALIGN( 5626 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, 5627 min_cu_size); 5628 5629 WORD32 last_hz_ctb_wd = MAX_CTB_SIZE - (u4_width - cu_aligned_pic_wd); 5630 5631 offset_x = (i4_tile_col_wd_in_ctb_unit - 1) * MAX_CTB_SIZE; 5632 offset_x += last_hz_ctb_wd; 5633 } 5634 else 5635 { /* Not the last tile-row of frame */ 5636 offset_x = (i4_tile_col_wd_in_ctb_unit)*MAX_CTB_SIZE; 5637 } 5638 5639 offset_x /= 4; 5640 offset_x -= 1; 5641 5642 ps_master_ctxt->ai4_offset_for_last_cu_qp[ctr] = offset_x; 5643 } 5644 5645 n_tabs = NUM_ENC_LOOP_MEM_RECS; 5646 5647 /*store num bit-rate instances in the master context */ 5648 ps_master_ctxt->i4_num_bitrates = i4_num_bitrate_inst; 5649 ps_master_ctxt->i4_num_enc_loop_frm_pllel = i4_num_enc_loop_frm_pllel; 5650 /*************************************************************************/ 5651 /* --- EncLoop Deblock sync Dep Mngr Mem init -- */ 5652 /*************************************************************************/ 5653 { 5654 WORD32 count; 5655 WORD32 num_vert_units, num_blks_in_row; 5656 WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height; 5657 WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width; 5658 5659 ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units); 5660 ihevce_enc_loop_dblk_get_prms_dep_mngr(wd, &num_blks_in_row); 5661 ASSERT(num_vert_units > 0); 5662 ASSERT(num_blks_in_row > 0); 5663 5664 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++) 5665 { 5666 for(i = 0; i < i4_num_bitrate_inst; i++) 5667 { 5668 ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[count][i] = ihevce_dmgr_init( 5669 &ps_mem_tab[n_tabs], 5670 pv_osal_handle, 5671 DEP_MNGR_ROW_ROW_SYNC, 5672 num_vert_units, 5673 num_blks_in_row, 5674 i4_num_tile_cols, /* Number of Col Tiles */ 5675 i4_num_proc_thrds, 5676 0 /*Sem Disabled*/ 5677 ); 5678 5679 n_tabs += ihevce_dmgr_get_num_mem_recs(); 5680 } 5681 } 5682 } 5683 /*************************************************************************/ 5684 /* --- EncLoop Top-Right CU synnc Dep Mngr Mem init -- */ 5685 /*************************************************************************/ 5686 { 5687 WORD32 count; 5688 WORD32 num_vert_units, num_blks_in_row; 5689 WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height; 5690 WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width; 5691 5692 WORD32 i4_sem = 0; 5693 5694 if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset >= 5695 IHEVCE_QUALITY_P4) 5696 i4_sem = 0; 5697 else 5698 i4_sem = 1; 5699 ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units); 5700 /* For Top-Right CU sync, adding one more CTB since value updation */ 5701 /* happens in that way for the last CTB in the row */ 5702 num_blks_in_row = wd + SET_CTB_ALIGN(wd, MAX_CU_SIZE); 5703 num_blks_in_row += MAX_CTB_SIZE; 5704 5705 ASSERT(num_vert_units > 0); 5706 ASSERT(num_blks_in_row > 0); 5707 5708 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++) 5709 { 5710 for(i = 0; i < i4_num_bitrate_inst; i++) 5711 { 5712 /* For ES/HS, CU level updates uses spin-locks than semaphore */ 5713 { 5714 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[count][i] = 5715 ihevce_dmgr_init( 5716 &ps_mem_tab[n_tabs], 5717 pv_osal_handle, 5718 DEP_MNGR_ROW_ROW_SYNC, 5719 num_vert_units, 5720 num_blks_in_row, 5721 i4_num_tile_cols, /* Number of Col Tiles */ 5722 i4_num_proc_thrds, 5723 i4_sem /*Sem Disabled*/ 5724 ); 5725 } 5726 n_tabs += ihevce_dmgr_get_num_mem_recs(); 5727 } 5728 } 5729 } 5730 5731 for(i = 1; i < 5; i++) 5732 { 5733 WORD32 i4_log2_trans_size = i + 1; 5734 WORD32 i4_bit_depth = ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth; 5735 5736 ga_trans_shift[i] = (MAX_TR_DYNAMIC_RANGE - i4_bit_depth - i4_log2_trans_size) << 1; 5737 } 5738 5739 ga_trans_shift[0] = ga_trans_shift[1]; 5740 5741 /* return the handle to caller */ 5742 return ((void *)ps_master_ctxt); 5743 } 5744 5745 /*! 5746 ****************************************************************************** 5747 * \if Function name : ihevce_enc_loop_reg_sem_hdls \endif 5748 * 5749 * \brief 5750 * Intialization for ENC_LOOP context state structure . 5751 * 5752 * \param[in] ps_mem_tab : pointer to memory descriptors table 5753 * \param[in] ppv_sem_hdls : Array of semaphore handles 5754 * \param[in] i4_num_proc_thrds : Number of processing threads 5755 * 5756 * \return 5757 * None 5758 * 5759 * \author 5760 * Ittiam 5761 * 5762 ***************************************************************************** 5763 */ 5764 void ihevce_enc_loop_reg_sem_hdls( 5765 void *pv_enc_loop_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds) 5766 { 5767 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt; 5768 WORD32 i, enc_frm_id; 5769 5770 ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt; 5771 5772 /*************************************************************************/ 5773 /* --- EncLoop Deblock sync Dep Mngr reg Semaphores -- */ 5774 /*************************************************************************/ 5775 for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++) 5776 { 5777 for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++) 5778 { 5779 ihevce_dmgr_reg_sem_hdls( 5780 ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][i], 5781 ppv_sem_hdls, 5782 i4_num_proc_thrds); 5783 } 5784 } 5785 5786 /*************************************************************************/ 5787 /* --- EncLoop Top-Right CU synnc Dep Mngr reg Semaphores -- */ 5788 /*************************************************************************/ 5789 for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++) 5790 { 5791 for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++) 5792 { 5793 ihevce_dmgr_reg_sem_hdls( 5794 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][i], 5795 ppv_sem_hdls, 5796 i4_num_proc_thrds); 5797 } 5798 } 5799 5800 return; 5801 } 5802 5803 /*! 5804 ****************************************************************************** 5805 * \if Function name : ihevce_enc_loop_delete \endif 5806 * 5807 * \brief 5808 * Destroy EncLoop module 5809 * Note : Only Destroys the resources allocated in the module like 5810 * semaphore,etc. Memory free is done Separately using memtabs 5811 * 5812 * \param[in] pv_me_ctxt : pointer to EncLoop ctxt 5813 * 5814 * \return 5815 * None 5816 * 5817 * \author 5818 * Ittiam 5819 * 5820 ***************************************************************************** 5821 */ 5822 void ihevce_enc_loop_delete(void *pv_enc_loop_ctxt) 5823 { 5824 ihevce_enc_loop_master_ctxt_t *ps_enc_loop_ctxt; 5825 WORD32 ctr, enc_frm_id; 5826 5827 ps_enc_loop_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt; 5828 5829 for(enc_frm_id = 0; enc_frm_id < ps_enc_loop_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++) 5830 { 5831 for(ctr = 0; ctr < ps_enc_loop_ctxt->i4_num_bitrates; ctr++) 5832 { 5833 /* --- EncLoop Deblock sync Dep Mngr Delete --*/ 5834 ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][ctr]); 5835 /* --- EncLoop Top-Right CU sync Dep Mngr Delete --*/ 5836 ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][ctr]); 5837 } 5838 } 5839 } 5840 5841 /*! 5842 ****************************************************************************** 5843 * \if Function name : ihevce_enc_loop_dep_mngr_frame_reset \endif 5844 * 5845 * \brief 5846 * Frame level Reset for the Dependency Mngrs local to EncLoop., 5847 * ie CU_TopRight and Dblk 5848 * 5849 * \param[in] pv_enc_loop_ctxt : Enc_loop context pointer 5850 * 5851 * \return 5852 * None 5853 * 5854 * \author 5855 * Ittiam 5856 * 5857 ***************************************************************************** 5858 */ 5859 void ihevce_enc_loop_dep_mngr_frame_reset(void *pv_enc_loop_ctxt, WORD32 enc_frm_id) 5860 { 5861 WORD32 ctr, frame_id; 5862 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt; 5863 5864 ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt; 5865 5866 if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel) 5867 { 5868 frame_id = 0; 5869 } 5870 else 5871 { 5872 frame_id = enc_frm_id; 5873 } 5874 5875 for(ctr = 0; ctr < ps_master_ctxt->i4_num_bitrates; ctr++) 5876 { 5877 /* Dep. Mngr : Reset the num ctb Deblocked in every row for ENC sync */ 5878 ihevce_dmgr_rst_row_row_sync(ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[frame_id][ctr]); 5879 5880 /* Dep. Mngr : Reset the TopRight CU Processed in every row for ENC sync */ 5881 ihevce_dmgr_rst_row_row_sync( 5882 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[frame_id][ctr]); 5883 } 5884 } 5885 5886 /*! 5887 ****************************************************************************** 5888 * \if Function name : ihevce_enc_loop_frame_init \endif 5889 * 5890 * \brief 5891 * Frame level init of enocde loop function . 5892 * 5893 * \param[in] pv_enc_loop_ctxt : Enc_loop context pointer 5894 * \param[in] pi4_cu_processed : ptr to cur frame cu process in pix. 5895 * \param[in] aps_ref_list : ref pic list for the current frame 5896 * \param[in] ps_slice_hdr : ptr to current slice header params 5897 * \param[in] ps_pps : ptr to active pps params 5898 * \param[in] ps_sps : ptr to active sps params 5899 * \param[in] ps_vps : ptr to active vps params 5900 5901 5902 * \param[in] i1_weighted_pred_flag : weighted pred enable flag (unidir) 5903 * \param[in] i1_weighted_bipred_flag : weighted pred enable flag (bidir) 5904 * \param[in] log2_luma_wght_denom : down shift factor for weighted pred of luma 5905 * \param[in] log2_chroma_wght_denom : down shift factor for weighted pred of chroma 5906 * \param[in] cur_poc : currennt frame poc 5907 * \param[in] i4_bitrate_instance_num : number indicating the instance of bit-rate for multi-rate encoder 5908 * 5909 * \return 5910 * None 5911 * 5912 * \author 5913 * Ittiam 5914 * 5915 ***************************************************************************** 5916 */ 5917 void ihevce_enc_loop_frame_init( 5918 void *pv_enc_loop_ctxt, 5919 WORD32 i4_frm_qp, 5920 recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2], 5921 recon_pic_buf_t *ps_frm_recon, 5922 slice_header_t *ps_slice_hdr, 5923 pps_t *ps_pps, 5924 sps_t *ps_sps, 5925 vps_t *ps_vps, 5926 WORD8 i1_weighted_pred_flag, 5927 WORD8 i1_weighted_bipred_flag, 5928 WORD32 log2_luma_wght_denom, 5929 WORD32 log2_chroma_wght_denom, 5930 WORD32 cur_poc, 5931 WORD32 i4_display_num, 5932 enc_ctxt_t *ps_enc_ctxt, 5933 me_enc_rdopt_ctxt_t *ps_curr_inp_prms, 5934 WORD32 i4_bitrate_instance_num, 5935 WORD32 i4_thrd_id, 5936 WORD32 i4_enc_frm_id, 5937 WORD32 i4_num_bitrates, 5938 WORD32 i4_quality_preset, 5939 void *pv_dep_mngr_encloop_dep_me) 5940 { 5941 /* local variables */ 5942 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt; 5943 ihevce_enc_loop_ctxt_t *ps_ctxt; 5944 WORD32 chroma_qp_offset, i4_div_factor; 5945 WORD8 i1_slice_type = ps_slice_hdr->i1_slice_type; 5946 WORD8 i1_strong_intra_smoothing_enable_flag = ps_sps->i1_strong_intra_smoothing_enable_flag; 5947 5948 /* ENC_LOOP master state structure */ 5949 ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt; 5950 5951 /* Nithya: Store the current POC in the slice header */ 5952 ps_slice_hdr->i4_abs_pic_order_cnt = cur_poc; 5953 5954 /* Update the POC list of the current frame to the recon buffer */ 5955 if(ps_slice_hdr->i1_num_ref_idx_l0_active != 0) 5956 { 5957 int i4_i; 5958 for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l0_active; i4_i++) 5959 { 5960 ps_frm_recon->ai4_col_l0_poc[i4_i] = aps_ref_list[0][i4_i]->i4_poc; 5961 } 5962 } 5963 if(ps_slice_hdr->i1_num_ref_idx_l1_active != 0) 5964 { 5965 int i4_i; 5966 for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l1_active; i4_i++) 5967 { 5968 ps_frm_recon->ai4_col_l1_poc[i4_i] = aps_ref_list[1][i4_i]->i4_poc; 5969 } 5970 } 5971 5972 /* loop over all the threads */ 5973 // for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++) 5974 { 5975 /* ENC_LOOP state structure */ 5976 ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i4_thrd_id]; 5977 5978 /* SAO ctxt structure initialization*/ 5979 ps_ctxt->s_sao_ctxt_t.ps_pps = ps_pps; 5980 ps_ctxt->s_sao_ctxt_t.ps_sps = ps_sps; 5981 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr = ps_slice_hdr; 5982 5983 /*bit-rate instance number for Multi-bitrate (MBR) encode */ 5984 ps_ctxt->i4_bitrate_instance_num = i4_bitrate_instance_num; 5985 ps_ctxt->i4_num_bitrates = i4_num_bitrates; 5986 ps_ctxt->i4_chroma_format = ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format; 5987 ps_ctxt->i4_is_first_query = 1; 5988 ps_ctxt->i4_is_ctb_qp_modified = 0; 5989 5990 /* enc_frm_id for multiframe encode */ 5991 5992 if(1 == ps_enc_ctxt->s_multi_thrd.i4_num_enc_loop_frm_pllel) 5993 { 5994 ps_ctxt->i4_enc_frm_id = 0; 5995 i4_enc_frm_id = 0; 5996 } 5997 else 5998 { 5999 ps_ctxt->i4_enc_frm_id = i4_enc_frm_id; 6000 } 6001 6002 /*Initialize the sub pic rc buf appropriately */ 6003 6004 /*Set the thrd id flag */ 6005 ps_enc_ctxt->s_multi_thrd 6006 .ai4_thrd_id_valid_flag[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 1; 6007 6008 ps_enc_ctxt->s_multi_thrd 6009 .ai8_nctb_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0; 6010 ps_enc_ctxt->s_multi_thrd 6011 .ai8_nctb_me_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0; 6012 6013 ps_enc_ctxt->s_multi_thrd 6014 .ai8_nctb_l0_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0; 6015 ps_enc_ctxt->s_multi_thrd 6016 .ai8_nctb_act_factor[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0; 6017 6018 ps_enc_ctxt->s_multi_thrd 6019 .ai8_nctb_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0; 6020 ps_enc_ctxt->s_multi_thrd 6021 .ai8_acc_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0; 6022 ps_enc_ctxt->s_multi_thrd 6023 .ai8_acc_bits_mul_qs_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0; 6024 ps_enc_ctxt->s_multi_thrd 6025 .ai8_nctb_hdr_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0; 6026 ps_enc_ctxt->s_multi_thrd 6027 .ai8_nctb_mpm_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0; 6028 ps_enc_ctxt->s_multi_thrd.ai4_prev_chunk_qp[i4_enc_frm_id][i4_bitrate_instance_num] = 6029 i4_frm_qp; 6030 6031 /*Frame level data for Sub Pic rc is initalized here */ 6032 /*Can be sent once per frame*/ 6033 { 6034 WORD32 i4_tot_frame_ctb = ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert * 6035 ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz; 6036 6037 /*Accumalated bits of all cu for required CTBS estimated during RDO evaluation*/ 6038 ps_ctxt->u4_total_cu_bits = 0; 6039 ps_ctxt->u4_total_cu_hdr_bits = 0; 6040 6041 ps_ctxt->u4_cu_tot_bits_into_qscale = 0; 6042 ps_ctxt->u4_cu_tot_bits = 0; 6043 ps_ctxt->u4_total_cu_bits_mul_qs = 0; 6044 ps_ctxt->i4_display_num = i4_display_num; 6045 ps_ctxt->i4_sub_pic_level_rc = ps_enc_ctxt->s_multi_thrd.i4_in_frame_rc_enabled; 6046 /*The Qscale is to be generated every 10th of total frame ctb is completed */ 6047 //ps_ctxt->i4_num_ctb_for_out_scale = (10 * i4_tot_frame_ctb)/100 ; 6048 ps_ctxt->i4_num_ctb_for_out_scale = (UPDATE_QP_AT_CTB * i4_tot_frame_ctb) / 100; 6049 6050 ps_ctxt->i4_cu_qp_sub_pic_rc = (1 << QP_LEVEL_MOD_ACT_FACTOR); 6051 /*Sub Pic RC frame level params */ 6052 ps_ctxt->i8_frame_l1_ipe_sad = 6053 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_pre_intra_sad; 6054 ps_ctxt->i8_frame_l0_ipe_satd = 6055 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_l0_acc_satd; 6056 ps_ctxt->i8_frame_l1_me_sad = 6057 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_l1_coarse_me_sad; 6058 ps_ctxt->i8_frame_l1_activity_fact = 6059 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_level_activity_fact; 6060 if(ps_ctxt->i4_sub_pic_level_rc) 6061 { 6062 ASSERT( 6063 ps_curr_inp_prms->ps_curr_inp->s_lap_out 6064 .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num] != 0); 6065 6066 ps_ctxt->ai4_frame_bits_estimated[ps_ctxt->i4_enc_frm_id] 6067 [ps_ctxt->i4_bitrate_instance_num] = 6068 ps_curr_inp_prms->ps_curr_inp->s_lap_out 6069 .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num]; 6070 } 6071 //ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type = 1; 6072 6073 ps_ctxt->i4_is_I_scenecut = 6074 ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) && 6075 (ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_IDR_FRAME || 6076 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_I_FRAME)); 6077 6078 ps_ctxt->i4_is_non_I_scenecut = 6079 ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) && 6080 (ps_ctxt->i4_is_I_scenecut == 0)); 6081 6082 /*ps_ctxt->i4_is_I_only_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_I_only_scd; 6083 ps_ctxt->i4_is_non_I_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_non_I_scd;*/ 6084 ps_ctxt->i4_is_model_valid = 6085 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i4_is_model_valid; 6086 } 6087 /* cb and cr offsets are assumed to be same */ 6088 chroma_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset + ps_pps->i1_pic_cb_qp_offset; 6089 6090 /* assumption of cb = cr qp */ 6091 ASSERT(ps_slice_hdr->i1_slice_cb_qp_offset == ps_slice_hdr->i1_slice_cr_qp_offset); 6092 ASSERT(ps_pps->i1_pic_cb_qp_offset == ps_pps->i1_pic_cr_qp_offset); 6093 6094 ps_ctxt->u1_is_input_data_hbd = (ps_sps->i1_bit_depth_luma_minus8 > 0); 6095 6096 ps_ctxt->u1_bit_depth = ps_sps->i1_bit_depth_luma_minus8 + 8; 6097 6098 ps_ctxt->s_mc_ctxt.i4_bit_depth = ps_ctxt->u1_bit_depth; 6099 ps_ctxt->s_mc_ctxt.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type; 6100 6101 /*remember chroma qp offset as qp related parameters are calculated at CU level*/ 6102 ps_ctxt->i4_chroma_qp_offset = chroma_qp_offset; 6103 ps_ctxt->i1_cu_qp_delta_enable = ps_pps->i1_cu_qp_delta_enabled_flag; 6104 ps_ctxt->i1_entropy_coding_sync_enabled_flag = ps_pps->i1_entropy_coding_sync_enabled_flag; 6105 6106 ps_ctxt->i4_is_ref_pic = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_ref_pic; 6107 ps_ctxt->i4_temporal_layer = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_temporal_lyr_id; 6108 ps_ctxt->i4_use_const_lamda_modifier = USE_CONSTANT_LAMBDA_MODIFIER; 6109 ps_ctxt->i4_use_const_lamda_modifier = 6110 ps_ctxt->i4_use_const_lamda_modifier || 6111 ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet & 6112 (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) && 6113 ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet & 6114 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) || 6115 (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet & 6116 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1)) || 6117 (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet & 6118 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2)) || 6119 (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet & 6120 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3)))); 6121 6122 { 6123 ps_ctxt->f_i_pic_lamda_modifier = 6124 ps_curr_inp_prms->ps_curr_inp->s_lap_out.f_i_pic_lamda_modifier; 6125 } 6126 6127 ps_ctxt->i4_frame_qp = i4_frm_qp; 6128 ps_ctxt->i4_frame_mod_qp = i4_frm_qp; 6129 ps_ctxt->i4_cu_qp = i4_frm_qp; 6130 ps_ctxt->i4_prev_cu_qp = i4_frm_qp; 6131 ps_ctxt->i4_chrm_cu_qp = 6132 (ps_ctxt->u1_chroma_array_type == 2) 6133 ? MIN(i4_frm_qp + chroma_qp_offset, 51) 6134 : gai1_ihevc_chroma_qp_scale[i4_frm_qp + chroma_qp_offset + MAX_QP_BD_OFFSET]; 6135 6136 ps_ctxt->i4_cu_qp_div6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6; 6137 i4_div_factor = (i4_frm_qp + 3) / 6; 6138 i4_div_factor = CLIP3(i4_div_factor, 3, 6); 6139 ps_ctxt->i4_cu_qp_mod6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6; 6140 6141 ps_ctxt->i4_chrm_cu_qp_div6 = 6142 (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6; 6143 ps_ctxt->i4_chrm_cu_qp_mod6 = 6144 (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6; 6145 6146 #define INTER_RND_QP_BY_6 6147 #ifdef INTER_RND_QP_BY_6 6148 6149 { /*1/6 rounding for 8 bit b frames*/ 6150 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = 85 6151 /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/; 6152 } 6153 #else 6154 /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */ 6155 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3; 6156 #endif 6157 6158 if(ISLICE == i1_slice_type) 6159 { 6160 /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */ 6161 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = 171 6162 /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/; 6163 } 6164 else 6165 { 6166 /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */ 6167 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = 6168 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER]; 6169 /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */ 6170 } 6171 6172 ps_ctxt->i1_strong_intra_smoothing_enable_flag = i1_strong_intra_smoothing_enable_flag; 6173 6174 ps_ctxt->i1_slice_type = i1_slice_type; 6175 6176 /* intialize the inter pred (MC) context at frame level */ 6177 ps_ctxt->s_mc_ctxt.ps_ref_list = aps_ref_list; 6178 ps_ctxt->s_mc_ctxt.i1_weighted_pred_flag = i1_weighted_pred_flag; 6179 ps_ctxt->s_mc_ctxt.i1_weighted_bipred_flag = i1_weighted_bipred_flag; 6180 ps_ctxt->s_mc_ctxt.i4_log2_luma_wght_denom = log2_luma_wght_denom; 6181 ps_ctxt->s_mc_ctxt.i4_log2_chroma_wght_denom = log2_chroma_wght_denom; 6182 6183 /* intialize the MV pred context at frame level */ 6184 ps_ctxt->s_mv_pred_ctxt.ps_ref_list = aps_ref_list; 6185 ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr = ps_slice_hdr; 6186 ps_ctxt->s_mv_pred_ctxt.ps_sps = ps_sps; 6187 ps_ctxt->s_mv_pred_ctxt.i4_log2_parallel_merge_level_minus2 = 6188 ps_pps->i1_log2_parallel_merge_level - 2; 6189 6190 #if ADAPT_COLOCATED_FROM_L0_FLAG 6191 if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_temporal_mvp_enable_flag) 6192 { 6193 if((ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_num_ref_idx_l1_active > 0) && 6194 (ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][0]->i4_frame_qp < 6195 ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][0]->i4_frame_qp)) 6196 { 6197 ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_collocated_from_l0_flag = 1; 6198 } 6199 } 6200 #endif 6201 /* Initialization of deblocking params */ 6202 ps_ctxt->s_deblk_prms.i4_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2; 6203 ps_ctxt->s_deblk_prms.i4_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2; 6204 6205 ps_ctxt->s_deblk_prms.i4_cb_qp_indx_offset = ps_pps->i1_pic_cb_qp_offset; 6206 6207 ps_ctxt->s_deblk_prms.i4_cr_qp_indx_offset = ps_pps->i1_pic_cr_qp_offset; 6208 /*init frame level stat accumualtion parameters */ 6209 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] 6210 ->u4_frame_sad_acc = 0; 6211 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] 6212 ->u4_frame_intra_sad_acc = 0; 6213 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] 6214 ->u4_frame_open_loop_intra_sad = 0; 6215 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] 6216 ->i8_frame_open_loop_ssd = 0; 6217 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] 6218 ->u4_frame_inter_sad_acc = 0; 6219 6220 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] 6221 ->i8_frame_cost_acc = 0; 6222 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] 6223 ->i8_frame_intra_cost_acc = 0; 6224 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] 6225 ->i8_frame_inter_cost_acc = 0; 6226 6227 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] 6228 ->u4_frame_intra_sad = 0; 6229 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] 6230 ->u4_frame_rdopt_bits = 0; 6231 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] 6232 ->u4_frame_rdopt_header_bits = 0; 6233 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] 6234 ->i4_qp_normalized_8x8_cu_sum[0] = 0; 6235 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] 6236 ->i4_qp_normalized_8x8_cu_sum[1] = 0; 6237 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] 6238 ->i4_8x8_cu_sum[0] = 0; 6239 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] 6240 ->i4_8x8_cu_sum[1] = 0; 6241 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] 6242 ->i8_sad_by_qscale[0] = 0; 6243 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] 6244 ->i8_sad_by_qscale[1] = 0; 6245 /* Compute the frame_qstep */ 6246 GET_FRAME_QSTEP_FROM_QP(ps_ctxt->i4_frame_qp, ps_ctxt->i4_frame_qstep); 6247 6248 ps_ctxt->u1_max_tr_depth = ps_sps->i1_max_transform_hierarchy_depth_inter; 6249 6250 ps_ctxt->ps_rc_quant_ctxt = &ps_enc_ctxt->s_rc_quant; 6251 /* intialize the cabac rdopt context at frame level */ 6252 ihevce_entropy_rdo_frame_init( 6253 &ps_ctxt->s_rdopt_entropy_ctxt, 6254 ps_slice_hdr, 6255 ps_pps, 6256 ps_sps, 6257 ps_vps, 6258 ps_master_ctxt->au1_cu_skip_top_row, 6259 &ps_enc_ctxt->s_rc_quant); 6260 6261 /* register the dep mngr instance for forward ME sync */ 6262 ps_ctxt->pv_dep_mngr_encloop_dep_me = pv_dep_mngr_encloop_dep_me; 6263 } 6264 } 6265 /* 6266 ****************************************************************************** 6267 * \if Function name : ihevce_enc_loop_get_frame_rc_prms \endif 6268 * 6269 * \brief 6270 * returns Nil 6271 * 6272 * \param[in] pv_enc_loop_ctxt : pointer to encode loop context 6273 * \param[out]ps_rc_prms : ptr to frame level info structure 6274 * 6275 * \return 6276 * None 6277 * 6278 * \author 6279 * Ittiam 6280 * 6281 ***************************************************************************** 6282 */ 6283 void ihevce_enc_loop_get_frame_rc_prms( 6284 void *pv_enc_loop_ctxt, 6285 rc_bits_sad_t *ps_rc_prms, 6286 WORD32 i4_br_id, //bitrate instance id 6287 WORD32 i4_enc_frm_id) // frame id 6288 { 6289 /*Get the master thread pointer*/ 6290 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt; 6291 ihevce_enc_loop_ctxt_t *ps_ctxt; 6292 UWORD32 total_frame_intra_sad = 0, total_frame_open_loop_intra_sad = 0; 6293 LWORD64 i8_total_ssd_frame = 0; 6294 UWORD32 total_frame_sad = 0; 6295 UWORD32 total_frame_rdopt_bits = 0; 6296 UWORD32 total_frame_rdopt_header_bits = 0; 6297 WORD32 i4_qp_normalized_8x8_cu_sum[2] = { 0, 0 }; 6298 WORD32 i4_8x8_cu_sum[2] = { 0, 0 }; 6299 LWORD64 i8_sad_by_qscale[2] = { 0, 0 }; 6300 WORD32 i4_curr_qp_acc = 0; 6301 WORD32 i; 6302 6303 /* ENC_LOOP master state structure */ 6304 ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt; 6305 6306 if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel) 6307 { 6308 i4_enc_frm_id = 0; 6309 } 6310 /*loop through all threads and accumulate intra sad across all threads*/ 6311 for(i = 0; i < ps_master_ctxt->i4_num_proc_thrds; i++) 6312 { 6313 /* ENC_LOOP state structure */ 6314 ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i]; 6315 total_frame_open_loop_intra_sad += 6316 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_open_loop_intra_sad; 6317 i8_total_ssd_frame += 6318 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_open_loop_ssd; 6319 total_frame_intra_sad += 6320 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_intra_sad; 6321 total_frame_sad += 6322 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_sad_acc; 6323 total_frame_rdopt_bits += 6324 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_bits; 6325 total_frame_rdopt_header_bits += 6326 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_header_bits; 6327 i4_qp_normalized_8x8_cu_sum[0] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id] 6328 ->i4_qp_normalized_8x8_cu_sum[0]; 6329 i4_qp_normalized_8x8_cu_sum[1] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id] 6330 ->i4_qp_normalized_8x8_cu_sum[1]; 6331 i4_8x8_cu_sum[0] += 6332 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[0]; 6333 i4_8x8_cu_sum[1] += 6334 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[1]; 6335 i8_sad_by_qscale[0] += 6336 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[0]; 6337 i8_sad_by_qscale[1] += 6338 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[1]; 6339 } 6340 6341 ps_rc_prms->u4_open_loop_intra_sad = total_frame_open_loop_intra_sad; 6342 ps_rc_prms->i8_total_ssd_frame = i8_total_ssd_frame; 6343 ps_rc_prms->u4_total_sad = total_frame_sad; 6344 ps_rc_prms->u4_total_texture_bits = total_frame_rdopt_bits - total_frame_rdopt_header_bits; 6345 ps_rc_prms->u4_total_header_bits = total_frame_rdopt_header_bits; 6346 /*This accumulation of intra frame sad is not intact. This can only be a temp change*/ 6347 ps_rc_prms->u4_total_intra_sad = total_frame_intra_sad; 6348 ps_rc_prms->i4_qp_normalized_8x8_cu_sum[0] = i4_qp_normalized_8x8_cu_sum[0]; 6349 ps_rc_prms->i4_qp_normalized_8x8_cu_sum[1] = i4_qp_normalized_8x8_cu_sum[1]; 6350 ps_rc_prms->i4_8x8_cu_sum[0] = i4_8x8_cu_sum[0]; 6351 ps_rc_prms->i4_8x8_cu_sum[1] = i4_8x8_cu_sum[1]; 6352 ps_rc_prms->i8_sad_by_qscale[0] = i8_sad_by_qscale[0]; 6353 ps_rc_prms->i8_sad_by_qscale[1] = i8_sad_by_qscale[1]; 6354 } 6355