1 /****************************************************************************** 2 * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 21 /*! 22 ****************************************************************************** 23 * \file ihevce_recur_bracketing.c 24 * 25 * \brief 26 * This file contains interface functions of recursive bracketing 27 * module 28 * \date 29 * 12/02/2012 30 * 31 * \author 32 * Ittiam 33 * 34 * List of Functions 35 * 36 * 37 ****************************************************************************** 38 */ 39 40 /*****************************************************************************/ 41 /* File Includes */ 42 /*****************************************************************************/ 43 /* System include files */ 44 #include <stdio.h> 45 #include <string.h> 46 #include <stdlib.h> 47 #include <assert.h> 48 #include <stdarg.h> 49 #include <math.h> 50 51 /* User include files */ 52 #include "ihevc_typedefs.h" 53 #include "itt_video_api.h" 54 #include "ihevce_api.h" 55 56 #include "rc_cntrl_param.h" 57 #include "rc_frame_info_collector.h" 58 #include "rc_look_ahead_params.h" 59 60 #include "ihevc_defs.h" 61 #include "ihevc_structs.h" 62 #include "ihevc_platform_macros.h" 63 #include "ihevc_deblk.h" 64 #include "ihevc_itrans_recon.h" 65 #include "ihevc_chroma_itrans_recon.h" 66 #include "ihevc_chroma_intra_pred.h" 67 #include "ihevc_intra_pred.h" 68 #include "ihevc_inter_pred.h" 69 #include "ihevc_mem_fns.h" 70 #include "ihevc_padding.h" 71 #include "ihevc_weighted_pred.h" 72 #include "ihevc_sao.h" 73 #include "ihevc_resi_trans.h" 74 #include "ihevc_quant_iquant_ssd.h" 75 #include "ihevc_cabac_tables.h" 76 77 #include "ihevce_defs.h" 78 #include "ihevce_lap_enc_structs.h" 79 #include "ihevce_multi_thrd_structs.h" 80 #include "ihevce_me_common_defs.h" 81 #include "ihevce_had_satd.h" 82 #include "ihevce_error_codes.h" 83 #include "ihevce_bitstream.h" 84 #include "ihevce_cabac.h" 85 #include "ihevce_rdoq_macros.h" 86 #include "ihevce_function_selector.h" 87 #include "ihevce_enc_structs.h" 88 #include "ihevce_entropy_structs.h" 89 #include "ihevce_cmn_utils_instr_set_router.h" 90 #include "ihevce_enc_loop_structs.h" 91 #include "ihevce_ipe_instr_set_router.h" 92 #include "ihevce_ipe_structs.h" 93 #include "ihevce_ipe_pass.h" 94 #include "ihevce_recur_bracketing.h" 95 #include "ihevce_nbr_avail.h" 96 #include "ihevc_common_tables.h" 97 #include "ihevce_decomp_pre_intra_structs.h" 98 #include "ihevce_decomp_pre_intra_pass.h" 99 100 #include "cast_types.h" 101 #include "osal.h" 102 #include "osal_defaults.h" 103 104 /*****************************************************************************/ 105 /* Constant Macros */ 106 /*****************************************************************************/ 107 #define IP_DBG_L1_l2 0 108 #define CHILD_BIAS 12 109 110 /*****************************************************************************/ 111 /* Globals */ 112 /*****************************************************************************/ 113 extern pf_intra_pred g_apf_lum_ip[10]; 114 115 extern WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES]; 116 117 UWORD8 gau1_cu_pos_x[64] = { 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 118 6, 7, 4, 5, 4, 5, 6, 7, 6, 7, 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 119 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 4, 5, 4, 5, 6, 7, 6, 7 }; 120 121 UWORD8 gau1_cu_pos_y[64] = { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 0, 0, 1, 1, 0, 0, 122 1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 5, 5, 6, 6, 7, 7, 123 6, 6, 7, 7, 4, 4, 5, 5, 4, 4, 5, 5, 6, 6, 7, 7, 6, 6, 7, 7 }; 124 125 #define RESET_BIT(x, bit) (x = x & ~((WORD32)1 << bit)) 126 127 /*****************************************************************************/ 128 /* Function Definitions */ 129 /*****************************************************************************/ 130 131 /*! 132 ****************************************************************************** 133 * \if Function name : ihevce_update_cand_list \endif 134 * 135 * \brief 136 * Final Candidate list population, nbr flag andd nbr mode update function 137 * 138 * \param[in] ps_row_cu : pointer to cu analyse struct 139 * \param[in] ps_cu_node : pointer to cu node info buffer 140 * \param[in] ps_ed_blk_l1 : pointer to level 1 and 2 decision buffer 141 * \param[in] pu1_cand_mode_list : pointer to candidate list buffer 142 * 143 * \return 144 * None 145 * 146 * \author 147 * Ittiam 148 * 149 ***************************************************************************** 150 */ 151 void ihevce_update_cand_list( 152 ihevce_ipe_cu_tree_t *ps_cu_node, ihevce_ed_blk_t *ps_ed_blk_l1, ihevce_ipe_ctxt_t *ps_ctxt) 153 { 154 WORD32 row, col, x, y, size; 155 156 /* Candidate mode Update */ 157 (void)ps_ed_blk_l1; 158 /* Update CTB mode map for the finalised CU */ 159 x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1; 160 y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1; 161 size = ps_cu_node->u1_cu_size >> 2; 162 for(row = y; row < (y + size); row++) 163 { 164 for(col = x; col < (x + size); col++) 165 { 166 ps_ctxt->au1_ctb_mode_map[row][col] = ps_cu_node->best_mode; 167 } 168 } 169 return; 170 } 171 172 /*! 173 ****************************************************************************** 174 * \if Function name : ihevce_intra_populate_mode_bits_cost_bracketing \endif 175 * 176 * \brief 177 * Mpm indx calc function based on left and top available modes 178 * 179 * \param[in] top_intra_mode : Top available intra mode 180 * \param[in] left_intra_mode : Left available intra mode 181 * \param[in] available_top : Top availability flag 182 * \param[in] available_left : Left availability flag 183 * \param[in] cu_pos_y : cu position wrt to CTB 184 * \param[in] mode_bits_cost : pointer to mode bits buffer 185 * \param[in] lambda : Lambda value (SAD/SATD) 186 * \param[in] cand_mode_list : pointer to candidate list buffer 187 * 188 * \return 189 * None 190 * 191 * \author 192 * Ittiam 193 * 194 ***************************************************************************** 195 */ 196 void ihevce_intra_populate_mode_bits_cost_bracketing( 197 WORD32 top_intra_mode, 198 WORD32 left_intra_mode, 199 WORD32 available_top, 200 WORD32 available_left, 201 WORD32 cu_pos_y, 202 UWORD16 *mode_bits_cost, 203 UWORD16 *mode_bits, 204 WORD32 lambda, 205 WORD32 *cand_mode_list) 206 { 207 /* local variables */ 208 WORD32 i; 209 WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top; 210 211 UWORD16 one_bits_cost = 212 COMPUTE_RATE_COST_CLIP30(4, lambda, (LAMBDA_Q_SHIFT + 1)); //1.5 * lambda 213 UWORD16 two_bits_cost = 214 COMPUTE_RATE_COST_CLIP30(6, lambda, (LAMBDA_Q_SHIFT + 1)); //2.5 * lambda 215 UWORD16 five_bits_cost = 216 COMPUTE_RATE_COST_CLIP30(12, lambda, (LAMBDA_Q_SHIFT + 1)); //5.5 * lambda 217 218 for(i = 0; i < 35; i++) 219 { 220 mode_bits_cost[i] = five_bits_cost; 221 mode_bits[i] = 5; 222 } 223 224 /* EIID: set availability flag to zero if modes are invalid. 225 Required since some CU's might be skipped (though available) 226 and their modes will be set to 255 (-1)*/ 227 if(35 < top_intra_mode || 0 > top_intra_mode) 228 available_top = 0; 229 if(35 < left_intra_mode || 0 > left_intra_mode) 230 available_left = 0; 231 232 /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */ 233 /* N = top */ 234 if(0 == available_top) 235 { 236 cand_intra_pred_mode_top = INTRA_DC; 237 } 238 /* for neighbour != INTRA, setting DC is done outside */ 239 else if(0 == cu_pos_y) /* It's on the CTB boundary */ 240 { 241 cand_intra_pred_mode_top = INTRA_DC; 242 } 243 else 244 { 245 cand_intra_pred_mode_top = top_intra_mode; 246 } 247 248 /* N = left */ 249 if(0 == available_left) 250 { 251 cand_intra_pred_mode_left = INTRA_DC; 252 //cand_intra_pred_mode_left = cand_intra_pred_mode_top; 253 } 254 /* for neighbour != INTRA, setting DC is done outside */ 255 else 256 { 257 cand_intra_pred_mode_left = left_intra_mode; 258 } 259 260 /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */ 261 if(cand_intra_pred_mode_left == cand_intra_pred_mode_top) 262 { 263 if(cand_intra_pred_mode_left < 2) 264 { 265 cand_mode_list[0] = INTRA_PLANAR; 266 cand_mode_list[1] = INTRA_DC; 267 cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */ 268 } 269 else 270 { 271 cand_mode_list[0] = cand_intra_pred_mode_left; 272 cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32); 273 cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32); 274 } 275 } 276 else 277 { 278 if(0 == available_left) 279 { 280 cand_mode_list[0] = cand_intra_pred_mode_top; 281 cand_mode_list[1] = cand_intra_pred_mode_left; 282 } 283 else 284 { 285 cand_mode_list[0] = cand_intra_pred_mode_left; 286 cand_mode_list[1] = cand_intra_pred_mode_top; 287 } 288 if((cand_intra_pred_mode_left != INTRA_PLANAR) && 289 (cand_intra_pred_mode_top != INTRA_PLANAR)) 290 { 291 cand_mode_list[2] = INTRA_PLANAR; 292 } 293 else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC)) 294 { 295 cand_mode_list[2] = INTRA_DC; 296 } 297 else 298 { 299 cand_mode_list[2] = INTRA_ANGULAR(26); 300 } 301 } 302 mode_bits_cost[cand_mode_list[0]] = one_bits_cost; 303 mode_bits_cost[cand_mode_list[1]] = two_bits_cost; 304 mode_bits_cost[cand_mode_list[2]] = two_bits_cost; 305 306 mode_bits[cand_mode_list[0]] = 2; 307 mode_bits[cand_mode_list[1]] = 3; 308 mode_bits[cand_mode_list[2]] = 3; 309 } 310 311 /*! 312 ****************************************************************************** 313 * \if Function name : ihevce_pu_calc_4x4_blk \endif 314 * 315 * \brief 316 * 4x4 pu (8x8 CU) mode decision using step 8421 method 317 * 318 * \param[in] ps_cu_node : pointer to cu node info buffer 319 * \param[in] pu1_src : pointer to src pixels 320 * \param[in] src_stride : frm source stride 321 * \param[in] ref : pointer to reference pixels for prediction 322 * \param[in] cand_mode_list : pointer to candidate list buffer 323 * \param[in] best_costs_4x4 : pointer to 3 best cost buffer 324 * \param[in] best_modes_4x4 : pointer to 3 best mode buffer 325 * 326 * \return 327 * None 328 * 329 * \author 330 * Ittiam 331 * 332 ***************************************************************************** 333 */ 334 void ihevce_pu_calc_4x4_blk( 335 ihevce_ipe_ctxt_t *ps_ctxt, 336 ihevce_ipe_cu_tree_t *ps_cu_node, 337 UWORD8 *pu1_src, 338 WORD32 src_stride, 339 UWORD8 *ref, 340 UWORD16 *mode_bits_cost, 341 WORD32 *best_costs_4x4, 342 UWORD8 *best_modes_4x4, 343 func_selector_t *ps_func_selector) 344 { 345 WORD16 *pi2_trans_tmp = ps_ctxt->pi2_trans_tmp; 346 WORD16 *pi2_trans_out = ps_ctxt->pi2_trans_out; 347 UWORD8 u1_use_satd = ps_ctxt->u1_use_satd; 348 UWORD8 u1_level_1_refine_on = ps_ctxt->u1_level_1_refine_on; 349 350 WORD32 i, j = 0, i_end; 351 UWORD8 mode, best_amode = 255; 352 UWORD8 pred[16]; 353 354 UWORD16 sad; 355 WORD32 sad_cost = 0; 356 WORD32 best_asad_cost = 0xFFFFF; 357 WORD32 temp; 358 UWORD8 modes_to_eval[5]; 359 WORD32 costs_4x4[5]; 360 UWORD8 modes_4x4[5] = { 0, 1, 2, 3, 4 }; 361 362 /* LO resolution hence low resolution disable */ 363 WORD32 u1_low_resol = 0; 364 UWORD8 au1_best_modes[1] = { 0 }; 365 WORD32 ai4_best_sad_costs[1] = { 0 }; 366 367 WORD16 *pi2_tmp = &pi2_trans_tmp[0]; 368 369 ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list = 370 &ps_ctxt->s_ipe_optimised_function_list; 371 372 //apf_resd_trns[0] = &ihevc_resi_trans_4x4_ttype1; 373 //apf_resd_trns[0] = &ihevc_HAD_4x4_8bit; 374 375 for(i = 0; i < 5; i++) 376 { 377 costs_4x4[i] = MAX_INTRA_COST_IPE; 378 } 379 380 ps_ipe_optimised_function_list->pf_ed_4x4_find_best_modes( 381 pu1_src, 382 src_stride, 383 ref, 384 mode_bits_cost, 385 au1_best_modes, 386 ai4_best_sad_costs, 387 u1_low_resol, 388 ps_ipe_optimised_function_list->pf_4x4_sad_computer); 389 390 best_amode = au1_best_modes[0]; 391 best_asad_cost = ai4_best_sad_costs[0]; 392 393 ASSERT(best_amode != 255); 394 /* Around best level 4 angular mode, search for best level 2 mode */ 395 modes_to_eval[0] = best_amode - 2; 396 modes_to_eval[1] = best_amode + 2; 397 i = 0; 398 i_end = 2; 399 if(best_amode == 2) 400 i = 1; 401 else if(best_amode == 34) 402 i_end = 1; 403 for(; i < i_end; i++) 404 { 405 mode = modes_to_eval[i]; 406 407 g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode); 408 409 sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(pu1_src, &pred[0], src_stride, 4); 410 411 sad_cost = sad; 412 sad_cost += mode_bits_cost[mode]; 413 414 if(sad_cost < best_asad_cost) 415 { 416 best_amode = mode; 417 best_asad_cost = sad_cost; 418 } 419 } 420 421 /* Around best level 2 angular mode, search for best level 1 mode */ 422 /* Also evaluate for non-angular mode */ 423 424 i = 0; 425 /*Level 1 refinement is disabled for ES preset */ 426 if(1 == u1_level_1_refine_on) 427 { 428 if(best_amode != 2) 429 modes_to_eval[i++] = best_amode - 1; 430 modes_to_eval[i++] = best_amode; 431 } 432 433 modes_to_eval[i++] = 0; 434 modes_to_eval[i++] = 1; 435 436 if(1 == u1_level_1_refine_on) 437 { 438 if(best_amode != 34) 439 modes_to_eval[i++] = best_amode + 1; 440 } 441 i_end = i; 442 i = 0; 443 444 for(; i < i_end; i++) 445 { 446 mode = modes_to_eval[i]; 447 448 g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode); 449 450 /* Hard coding to use SATD */ 451 if(u1_use_satd) 452 { 453 ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr( 454 pu1_src, &pred[0], (WORD32 *)pi2_tmp, pi2_trans_out, src_stride, 4, (4 << 16) | 0); 455 456 sad = ihevce_ipe_pass_satd(pi2_trans_out, 4, 4); 457 } 458 else 459 { 460 sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer( 461 pu1_src, &pred[0], src_stride, 4); 462 } 463 sad_cost = sad; 464 sad_cost += mode_bits_cost[mode]; 465 466 costs_4x4[i] = sad_cost; 467 } 468 469 /* Arrange the reference array in ascending order */ 470 for(i = 0; i < (i_end - 1); i++) 471 { 472 for(j = i + 1; j < i_end; j++) 473 { 474 if(costs_4x4[i] > costs_4x4[j]) 475 { 476 temp = costs_4x4[i]; 477 costs_4x4[i] = costs_4x4[j]; 478 costs_4x4[j] = temp; 479 480 temp = modes_4x4[i]; 481 modes_4x4[i] = modes_4x4[j]; 482 modes_4x4[j] = temp; 483 } 484 } 485 } 486 for(i = 0; i < 3; i++) 487 { 488 best_costs_4x4[i] = costs_4x4[i]; 489 best_modes_4x4[i] = modes_to_eval[modes_4x4[i]]; 490 } 491 492 { 493 ps_cu_node->best_mode = best_modes_4x4[0]; 494 ps_cu_node->best_cost = best_costs_4x4[0]; 495 ps_cu_node->best_satd = best_costs_4x4[0] - mode_bits_cost[ps_cu_node->best_mode]; 496 } 497 } 498 499 /*! 500 ****************************************************************************** 501 * \if Function name : ihevce_pu_calc_8x8_blk \endif 502 * 503 * \brief 504 * 4x4 pu (8x8 CU) mode decision loop using step 8421 method 505 * 506 * \param[in] ps_curr_src : pointer to src pixels struct 507 * \param[in] ps_ctxt : pointer to IPE context struct 508 * \param[in] ps_cu_node : pointer to cu node info buffer 509 * 510 * \return 511 * None 512 * 513 * \author 514 * Ittiam 515 * 516 ***************************************************************************** 517 */ 518 void ihevce_pu_calc_8x8_blk( 519 iv_enc_yuv_buf_t *ps_curr_src, 520 ihevce_ipe_ctxt_t *ps_ctxt, 521 ihevce_ipe_cu_tree_t *ps_cu_node, 522 func_selector_t *ps_func_selector) 523 { 524 WORD32 i, j; 525 WORD32 nbr_flags; 526 nbr_avail_flags_t s_nbr; 527 WORD32 trans_size = ps_cu_node->ps_parent->u1_cu_size >> 1; 528 529 UWORD8 *pu1_src_4x4; 530 WORD32 xA, xB, yA, yB; 531 //WORD32 x, y, size; 532 WORD32 top_intra_mode; 533 WORD32 left_intra_mode; 534 // WORD8 *top_intra_mode_ptr; 535 // WORD8 *left_intra_mode_ptr; 536 UWORD8 *pu1_orig; 537 WORD32 src_strd = ps_curr_src->i4_y_strd; 538 539 WORD32 cu_pos_x = ps_cu_node->ps_parent->u2_x0 << 1; 540 WORD32 cu_pos_y = ps_cu_node->ps_parent->u2_y0 << 1; 541 ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr; 542 543 ihevc_intra_pred_luma_ref_substitution_fptr = 544 ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr; 545 546 pu1_orig = (UWORD8 *)(ps_curr_src->pv_y_buf) + 547 ((ps_cu_node->ps_parent->u2_y0 << 3) * src_strd) + 548 (ps_cu_node->ps_parent->u2_x0 << 3); 549 for(i = 0; i < 2; i++) 550 { 551 for(j = 0; j < 2; j++) 552 { 553 WORD32 cand_mode_list[3]; 554 pu1_src_4x4 = pu1_orig + (i * trans_size * src_strd) + (j * trans_size); 555 /* get the neighbour availability flags */ 556 nbr_flags = ihevce_get_nbr_intra( 557 &s_nbr, 558 ps_ctxt->pu1_ctb_nbr_map, 559 ps_ctxt->i4_nbr_map_strd, 560 cu_pos_x + ((j) * (trans_size >> 2)), 561 cu_pos_y + ((i) * (trans_size >> 2)), 562 trans_size >> 2); 563 564 /* call the function which populates sad cost for all the modes */ 565 xA = ((ps_cu_node->ps_parent->u2_x0 << 3) >> 2) + j; 566 yA = ((ps_cu_node->ps_parent->u2_y0 << 3) >> 2) + 1 + i; 567 xB = xA + 1; 568 yB = yA - 1; 569 left_intra_mode = ps_ctxt->au1_ctb_mode_map[yA][xA]; 570 top_intra_mode = ps_ctxt->au1_ctb_mode_map[yB][xB]; 571 572 ihevce_intra_populate_mode_bits_cost_bracketing( 573 top_intra_mode, 574 left_intra_mode, 575 s_nbr.u1_top_avail, 576 s_nbr.u1_left_avail, 577 ps_cu_node->ps_parent->u2_y0, 578 &ps_ctxt->au2_mode_bits_cost_8x8pu[i * 2 + j][0], 579 &ps_ctxt->au2_mode_bits_8x8_pu[0], 580 ps_ctxt->i4_ol_sad_lambda, 581 cand_mode_list); 582 583 /* call the function which populates ref data for intra predicion */ 584 ihevc_intra_pred_luma_ref_substitution_fptr( 585 pu1_src_4x4 - src_strd - 1, 586 pu1_src_4x4 - src_strd, 587 pu1_src_4x4 - 1, 588 src_strd, 589 4, 590 nbr_flags, 591 &ps_ctxt->au1_ref_8x8pu[i * 2 + j][0], 592 0); 593 594 ihevce_pu_calc_4x4_blk( 595 ps_ctxt, 596 ps_cu_node->ps_sub_cu[(i * 2) + j], 597 pu1_src_4x4, 598 src_strd, 599 &ps_ctxt->au1_ref_8x8pu[i * 2 + j][0], 600 &ps_ctxt->au2_mode_bits_cost_8x8pu[i * 2 + j][0], 601 &ps_cu_node->ps_sub_cu[(i * 2) + j]->au4_best_cost_1tu[0], 602 &ps_cu_node->ps_sub_cu[(i * 2) + j]->au1_best_mode_1tu[0], 603 ps_func_selector); 604 605 /*&au4_cost_4x4[i*2 + j][0], 606 &au1_modes_4x4[i*2 + j][0]);*/ //TTODO : mode will change for the four partition 607 608 ihevce_set_nbr_map( 609 ps_ctxt->pu1_ctb_nbr_map, 610 ps_ctxt->i4_nbr_map_strd, 611 cu_pos_x + ((j) * (trans_size >> 2)), 612 cu_pos_y + ((i) * (trans_size >> 2)), 613 (trans_size >> 2), 614 1); 615 616 xA = ((ps_cu_node->ps_parent->u2_x0 << 3) >> 2) + 1 + j; 617 yA = ((ps_cu_node->ps_parent->u2_y0 << 3) >> 2) + 1 + i; 618 ps_ctxt->au1_ctb_mode_map[yA][xA] = ps_cu_node->ps_sub_cu[i * 2 + j]->best_mode; 619 ps_cu_node->ps_sub_cu[i * 2 + j]->u2_mode_bits_cost = 620 ps_ctxt->au2_mode_bits_8x8_pu[ps_cu_node->ps_sub_cu[i * 2 + j]->best_mode]; 621 } 622 } 623 } 624 625 /*! 626 ****************************************************************************** 627 * \if Function name : ihevce_bracketing_analysis \endif 628 * 629 * \brief 630 * Interface function that evaluates MAX cu and MAX - 1 cu, with MAX cu size 631 * info decided coarse resolution mode decision. Compares the SATD/SAD cost btwn 632 * 2 CUS and determines the actual CU size and best 3 modes to be given to rdopt 633 * 634 * \param[in] ps_ctxt : pointer to IPE context struct 635 * \param[in] ps_cu_node : pointer to cu node info buffer 636 * \param[in] ps_curr_src : pointer to src pixels struct 637 * \param[in] ps_ctb_out : pointer to ip ctb out struct 638 * \param[in] ps_row_cu : pointer to cu analyse struct 639 * \param[in] ps_ed_l1_ctb : pointer to level 1 early deci struct 640 * \param[in] ps_ed_l2_ctb : pointer to level 2 early deci struct 641 * \param[in] ps_l0_ipe_out_ctb : pointer to ipe_l0_ctb_analyse_for_me_t struct 642 * 643 * \return 644 * None 645 * 646 * \author 647 * Ittiam 648 * 649 ***************************************************************************** 650 */ 651 void ihevce_bracketing_analysis( 652 ihevce_ipe_ctxt_t *ps_ctxt, 653 ihevce_ipe_cu_tree_t *ps_cu_node, 654 iv_enc_yuv_buf_t *ps_curr_src, 655 ctb_analyse_t *ps_ctb_out, 656 //cu_analyse_t *ps_row_cu, 657 ihevce_ed_blk_t *ps_ed_l1_ctb, 658 ihevce_ed_blk_t *ps_ed_l2_ctb, 659 ihevce_ed_ctb_l1_t *ps_ed_ctb_l1, 660 ipe_l0_ctb_analyse_for_me_t *ps_l0_ipe_out_ctb) 661 { 662 WORD32 cu_pos_x = 0; 663 WORD32 cu_pos_y = 0; 664 665 UWORD8 u1_curr_ctb_wdt = ps_cu_node->u1_width; 666 UWORD8 u1_curr_ctb_hgt = ps_cu_node->u1_height; 667 WORD32 num_8x8_blks_x = (u1_curr_ctb_wdt >> 3); 668 WORD32 num_8x8_blks_y = (u1_curr_ctb_hgt >> 3); 669 670 ihevce_ed_blk_t *ps_ed_blk_l1 = ps_ed_l1_ctb; 671 ihevce_ed_blk_t *ps_ed_blk_l2 = ps_ed_l2_ctb; 672 673 WORD32 i; 674 WORD32 cand_mode_list[3]; 675 //cu_analyse_t *ps_curr_cu = ps_row_cu; 676 WORD32 blk_cnt = 0; 677 WORD32 j = 0; 678 WORD32 merge_32x32_l1, merge_32x32_l2; 679 680 WORD32 i4_skip_intra_eval_32x32_l1; 681 //EIID: flag indicating number of 16x16 blocks to be skipped for intra evaluation within 32x32 block 682 683 WORD32 parent_cost = 0; 684 WORD32 child_cost[4] = { 0 }; 685 WORD32 child_cost_least = 0; 686 WORD32 child_satd[4] = { 0 }; 687 WORD32 x, y, size; 688 WORD32 merge_64x64 = 1; 689 UWORD8 au1_best_32x32_modes[4]; 690 WORD32 au4_best_32x32_cost[4]; 691 WORD32 parent_best_mode; 692 UWORD8 best_mode; 693 694 WORD32 i4_quality_preset = ps_ctxt->i4_quality_preset; 695 /* flag to control 1CU-4TU modes based on quality preset */ 696 /* if set 1CU-4TU are explicity evaluated else 1CU-1TU modes are copied */ 697 WORD32 i4_enable_1cu_4tu = (i4_quality_preset == IHEVCE_QUALITY_P2) || 698 (i4_quality_preset == IHEVCE_QUALITY_P0); 699 700 /* flag to control 4CU-16TU mode based on quality preset */ 701 /* if set 4CU-16TU are explicity evaluated else 4CU-4TU modes are copied*/ 702 WORD32 i4_enable_4cu_16tu = (i4_quality_preset == IHEVCE_QUALITY_P2) || 703 (i4_quality_preset == IHEVCE_QUALITY_P0); 704 705 WORD32 i4_mod_factor_num, i4_mod_factor_den = QP_MOD_FACTOR_DEN; //2; 706 float f_strength; 707 /* Accumalte satd */ 708 LWORD64 i8_frame_acc_satd_cost = 0, i8_frame_acc_satd_by_modqp_q10 = 0; 709 WORD32 i4_ctb_acc_satd = 0; 710 711 /* Accumalate Mode bits cost */ 712 LWORD64 i8_frame_acc_mode_bits_cost = 0; 713 714 /* Step2 is bypassed for parent, uses children modes*/ 715 WORD32 step2_bypass = 1; 716 717 if(1 == ps_ctxt->u1_disable_child_cu_decide) 718 step2_bypass = 0; 719 720 ps_cu_node->ps_parent = ps_ctxt->ps_ipe_cu_tree; 721 for(i = 0; i < 4; i++) 722 { 723 ps_cu_node->ps_sub_cu[i] = ps_ctxt->ps_ipe_cu_tree + 1 + i; 724 } 725 726 /* Loop for all 8x8 block in a CTB */ 727 ps_ctb_out->u4_cu_split_flags = 0x1; 728 729 /* Initialize intra 64x64, 32x32 and 16x16 costs to max value */ 730 for(i = 0; i < (MAX_CU_IN_CTB >> 4); i++) 731 { 732 ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i] = MAX_INTRA_COST_IPE; 733 } 734 735 for(i = 0; i < (MAX_CU_IN_CTB >> 2); i++) 736 { 737 ps_l0_ipe_out_ctb->ai4_best16x16_intra_cost[i] = MAX_INTRA_COST_IPE; 738 } 739 740 for(i = 0; i < (MAX_CU_IN_CTB); i++) 741 { 742 ps_l0_ipe_out_ctb->ai4_best8x8_intra_cost[i] = MAX_INTRA_COST_IPE; 743 } 744 745 ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = MAX_INTRA_COST_IPE; 746 747 /* by default 64x64 modes are set to default values DC and Planar */ 748 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[0] = 0; 749 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[1] = 1; 750 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[2] = 255; 751 752 /* by default 64x4 split is set to 1 */ 753 ps_l0_ipe_out_ctb->u1_split_flag = 1; 754 755 /* Modulation factor calculated based on spatial variance instead of hardcoded val*/ 756 i4_mod_factor_num = ps_ctxt->ai4_mod_factor_derived_by_variance[1]; //16; 757 758 f_strength = ps_ctxt->f_strength; 759 760 /* ------------------------------------------------ */ 761 /* populate the early decisions done by L1 analysis */ 762 /* ------------------------------------------------ */ 763 { 764 ihevce_ed_blk_t *ps_ed_blk_l1_curr = ps_ed_l1_ctb; 765 WORD32 ctr_8x8; 766 WORD8 *pi1_ed_buf; 767 768 /* set all the decisions to invalid */ 769 memset( 770 &ps_l0_ipe_out_ctb->ai1_early_intra_inter_decision[0], 771 0, 772 sizeof(UWORD8) * MAX_CU_IN_CTB); 773 774 pi1_ed_buf = &ps_l0_ipe_out_ctb->ai1_early_intra_inter_decision[0]; 775 776 for(ctr_8x8 = 0; ctr_8x8 < MAX_CTB_SIZE; ctr_8x8++) 777 { 778 WORD32 pos_x_8x8, pos_y_8x8; 779 780 pos_x_8x8 = gau1_cu_pos_x[ctr_8x8]; 781 pos_y_8x8 = gau1_cu_pos_y[ctr_8x8]; 782 783 pi1_ed_buf[pos_x_8x8 + (pos_y_8x8 * MAX_CU_IN_CTB_ROW)] = 784 ps_ed_blk_l1_curr->intra_or_inter; 785 ps_ed_blk_l1_curr++; 786 } 787 788 for(ctr_8x8 = 0; ctr_8x8 < (MAX_CU_IN_CTB >> 2); ctr_8x8++) 789 { 790 ps_l0_ipe_out_ctb->ai4_best_sad_8x8_l1_ipe[ctr_8x8] = 791 ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[ctr_8x8]; 792 793 ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_ipe[ctr_8x8] = 794 ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[ctr_8x8]; 795 796 /*Earlier only me sad was getting populated, now best of ipe and me is populated*/ 797 ps_l0_ipe_out_ctb->ai4_best_sad_8x8_l1_me[ctr_8x8] = 798 ps_ed_ctb_l1->i4_best_sad_8x8_l1_me[ctr_8x8]; 799 //ps_ed_ctb_l1->i4_sad_me_for_ref[ctr_8x8]; 800 801 ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_me[ctr_8x8] = 802 ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_me[ctr_8x8]; 803 //ps_ed_ctb_l1->i4_sad_cost_me_for_ref[ctr_8x8]; 804 } 805 806 /*Init CTB level accumalated SATD and MPM bits */ 807 ps_l0_ipe_out_ctb->i4_ctb_acc_satd = 0; 808 ps_l0_ipe_out_ctb->i4_ctb_acc_mpm_bits = 0; 809 } 810 811 /* ------------------------------------------------ */ 812 /* Loop over all the blocks in current CTB */ 813 /* ------------------------------------------------ */ 814 815 { 816 /* 64 8x8 blocks should be encountered for the do,while loop to exit */ 817 do 818 { 819 intra32_analyse_t *ps_intra32_analyse; 820 intra16_analyse_t *ps_intra16_analyse; 821 WORD32 *pi4_intra_32_cost; 822 WORD32 *pi4_intra_16_cost; 823 WORD32 *pi4_intra_8_cost; 824 WORD32 merge_16x16_l1; 825 826 /* Given the blk_cnt, get the CU's top-left 8x8 block's x and y positions within the CTB */ 827 cu_pos_x = gau1_cu_pos_x[blk_cnt]; 828 cu_pos_y = gau1_cu_pos_y[blk_cnt]; 829 830 /* default value for 32x32 best mode - blk_cnt increases by 16 for each 32x32 */ 831 au1_best_32x32_modes[blk_cnt >> 4] = 255; 832 833 /* get the corresponding intra 32 analyse pointer use (blk_cnt / 16) */ 834 /* blk cnt is in terms of 8x8 units so a 32x32 will have 16 8x8 units */ 835 ps_intra32_analyse = &ps_l0_ipe_out_ctb->as_intra32_analyse[blk_cnt >> 4]; 836 837 /* get the corresponding intra 16 analyse pointer use (blk_cnt & 0xF / 4)*/ 838 /* blk cnt is in terms of 8x8 units so a 16x16 will have 4 8x8 units */ 839 ps_intra16_analyse = &ps_intra32_analyse->as_intra16_analyse[(blk_cnt & 0xF) >> 2]; 840 841 /* Line below assumes min_cu_size of 8 - checks whether CU starts are within picture */ 842 if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y)) 843 { 844 /* Reset to zero for every cu decision */ 845 merge_32x32_l1 = 0; 846 847 child_cost_least = 0; 848 849 /* At L2, each 4x4 corresponds to 16x16 at L0. Every 4 16x16 stores a merge_success flag */ 850 ps_ed_blk_l2 = ps_ed_l2_ctb + (blk_cnt >> 2); 851 852 pi4_intra_32_cost = &ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[blk_cnt >> 4]; 853 854 /* by default 32x32 modes are set to default values DC and Planar */ 855 ps_intra32_analyse->au1_best_modes_32x32_tu[0] = 0; 856 ps_intra32_analyse->au1_best_modes_32x32_tu[1] = 1; 857 ps_intra32_analyse->au1_best_modes_32x32_tu[2] = 255; 858 859 /* By default 32x32 split is set to 1 */ 860 ps_intra32_analyse->b1_split_flag = 1; 861 862 ps_intra32_analyse->au1_best_modes_16x16_tu[0] = 0; 863 ps_intra32_analyse->au1_best_modes_16x16_tu[1] = 1; 864 ps_intra32_analyse->au1_best_modes_16x16_tu[2] = 255; 865 866 /* 16x16 cost & 8x8 cost are stored in Raster scan order */ 867 /* stride of 16x16 buffer is MAX_CU_IN_CTB_ROW >> 1 */ 868 /* stride of 8x8 buffer is MAX_CU_IN_CTB_ROW */ 869 { 870 WORD32 pos_x_8x8, pos_y_8x8; 871 872 pos_x_8x8 = gau1_cu_pos_x[blk_cnt]; 873 pos_y_8x8 = gau1_cu_pos_y[blk_cnt]; 874 875 pi4_intra_16_cost = &ps_l0_ipe_out_ctb->ai4_best16x16_intra_cost[0]; 876 877 pi4_intra_16_cost += 878 ((pos_x_8x8 >> 1) + ((pos_y_8x8 >> 1) * (MAX_CU_IN_CTB_ROW >> 1))); 879 880 pi4_intra_8_cost = &ps_l0_ipe_out_ctb->ai4_best8x8_intra_cost[0]; 881 882 pi4_intra_8_cost += (pos_x_8x8 + (pos_y_8x8 * MAX_CU_IN_CTB_ROW)); 883 } 884 885 merge_32x32_l1 = 0; 886 merge_32x32_l2 = 0; 887 i4_skip_intra_eval_32x32_l1 = 0; 888 889 /* Enable 16x16 merge iff sufficient 8x8 blocks remain in the current CTB */ 890 merge_16x16_l1 = 0; 891 if(((num_8x8_blks_x - cu_pos_x) >= 2) && ((num_8x8_blks_y - cu_pos_y) >= 2)) 892 { 893 #if !ENABLE_UNIFORM_CU_SIZE_8x8 894 merge_16x16_l1 = ps_ed_blk_l1->merge_success; 895 #else 896 merge_16x16_l1 = 0; 897 #endif 898 } 899 900 /* Enable 32x32 merge iff sufficient 8x8 blocks remain in the current CTB */ 901 if(((num_8x8_blks_x - cu_pos_x) >= 4) && ((num_8x8_blks_y - cu_pos_y) >= 4)) 902 { 903 /* Check 4 flags of L1(8x8) say merge */ 904 for(i = 0; i < 4; i++) 905 { 906 merge_32x32_l1 += (ps_ed_blk_l1 + (i * 4))->merge_success; 907 908 //EIDD: num 16x16 blocks for which inter_intra flag says eval only inter, i.e. skip intra eval 909 i4_skip_intra_eval_32x32_l1 += 910 ((ps_ed_blk_l1 + (i * 4))->intra_or_inter == 2) ? 1 : 0; 911 } 912 913 #if !ENABLE_UNIFORM_CU_SIZE_8x8 914 /* Check 1 flag from L2(16x16) say merge */ 915 merge_32x32_l2 = ps_ed_blk_l2->merge_success; 916 #else 917 merge_32x32_l1 = 0; 918 merge_32x32_l2 = 0; 919 #endif 920 } 921 922 #if DISABLE_L2_IPE_IN_PB_L1_IN_B 923 if((i4_quality_preset == IHEVCE_QUALITY_P6) && (ps_ctxt->i4_slice_type != ISLICE)) 924 { 925 merge_32x32_l2 = 0; 926 ps_ed_blk_l2->merge_success = 0; 927 } 928 #endif 929 930 ps_intra32_analyse->b1_valid_cu = 1; 931 932 /* If Merge success from all 4 L1 and L2, max CU size 32x32 is chosen */ 933 /* EIID: if all blocks to be skipped then skip entire 32x32 for intra eval, 934 if no blocks to be skipped then eval entire 32x32, 935 else break the merge and go to 16x16 level eval */ 936 if((merge_32x32_l1 == 4) && merge_32x32_l2 && 937 ((i4_skip_intra_eval_32x32_l1 == 0) || 938 (i4_skip_intra_eval_32x32_l1 == 4)) //comment this line to disable break-merge 939 ) 940 { 941 #if IP_DBG_L1_l2 942 /* Populate params for 32x32 block analysis */ 943 ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE; 944 945 ps_cu_node->ps_parent->u1_cu_size = 32; 946 ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */ 947 ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */ 948 ps_cu_node->ps_parent->best_mode = ps_ed_blk_l2->best_merge_mode; 949 /* CU size 32x32 and fill the final cu params */ 950 951 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); 952 953 /* Increment pointers */ 954 ps_ed_blk_l1 += 16; 955 blk_cnt += 16; 956 ps_row_cu++; 957 merge_64x64 &= 1; 958 #else 959 960 /* EIID: dont evaluate if all 4 blocks at L1 said inter is winning*/ 961 if(4 == i4_skip_intra_eval_32x32_l1 && (ps_ctxt->i4_slice_type != ISLICE)) 962 { 963 WORD32 i4_local_ctr1, i4_local_ctr2; 964 965 ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE; 966 967 ps_cu_node->ps_parent->u1_cu_size = 32; 968 ps_cu_node->ps_parent->u2_x0 = 969 gau1_cu_pos_x[blk_cnt]; /* Populate properly */ 970 ps_cu_node->ps_parent->u2_y0 = 971 gau1_cu_pos_y[blk_cnt]; /* Populate properly */ 972 ps_cu_node->ps_parent->best_mode = 973 INTRA_DC; //ps_ed_blk_l2->best_merge_mode; 974 /* CU size 32x32 and fill the final cu params */ 975 976 /* fill in the first modes as invalid */ 977 ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC; 978 ps_cu_node->ps_parent->au1_best_mode_1tu[1] = 979 INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3 980 ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC; 981 982 ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC; 983 ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC; 984 ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC; 985 986 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); 987 988 //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0; 989 //ps_row_cu->u1_num_intra_rdopt_cands = 0; 990 991 ps_intra32_analyse->b1_valid_cu = 0; 992 ps_intra32_analyse->b1_split_flag = 0; 993 ps_intra32_analyse->b1_merge_flag = 0; 994 /*memset (&ps_intra32_analyse->au1_best_modes_32x32_tu, 995 255, 996 NUM_BEST_MODES); 997 memset (&ps_intra32_analyse->au1_best_modes_16x16_tu, 998 255, 999 NUM_BEST_MODES);*/ 1000 //set only first mode since if it's 255. it wont go ahead 1001 ps_intra32_analyse->au1_best_modes_32x32_tu[0] = 255; 1002 ps_intra32_analyse->au1_best_modes_16x16_tu[0] = 255; 1003 ps_intra32_analyse->i4_best_intra_cost = MAX_INTRA_COST_IPE; 1004 1005 *pi4_intra_32_cost = MAX_INTRA_COST_IPE; 1006 1007 /*since ME will start evaluating from bottom up, set the lower 1008 cu size data invalid */ 1009 for(i4_local_ctr1 = 0; i4_local_ctr1 < 4; i4_local_ctr1++) 1010 { 1011 WORD32 *pi4_intra_8_cost_curr16; 1012 1013 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] 1014 .au1_best_modes_16x16_tu[0] = 255; 1015 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] 1016 .au1_best_modes_8x8_tu[0] = 255; 1017 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] 1018 .i4_best_intra_cost = MAX_INTRA_COST_IPE; 1019 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_merge_flag = 0; 1020 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_valid_cu = 0; 1021 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_split_flag = 0; 1022 1023 pi4_intra_16_cost 1024 [(i4_local_ctr1 & 1) + ((MAX_CU_IN_CTB_ROW >> 1) * 1025 (i4_local_ctr1 >> 1))] = MAX_INTRA_COST_IPE; 1026 1027 pi4_intra_8_cost_curr16 = pi4_intra_8_cost + ((i4_local_ctr1 & 1) << 1); 1028 pi4_intra_8_cost_curr16 += 1029 ((i4_local_ctr1 >> 1) << 1) * MAX_CU_IN_CTB_ROW; 1030 1031 for(i4_local_ctr2 = 0; i4_local_ctr2 < 4; i4_local_ctr2++) 1032 { 1033 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] 1034 .as_intra8_analyse[i4_local_ctr2] 1035 .au1_4x4_best_modes[0][0] = 255; 1036 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] 1037 .as_intra8_analyse[i4_local_ctr2] 1038 .au1_4x4_best_modes[1][0] = 255; 1039 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] 1040 .as_intra8_analyse[i4_local_ctr2] 1041 .au1_4x4_best_modes[2][0] = 255; 1042 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] 1043 .as_intra8_analyse[i4_local_ctr2] 1044 .au1_4x4_best_modes[3][0] = 255; 1045 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] 1046 .as_intra8_analyse[i4_local_ctr2] 1047 .au1_best_modes_8x8_tu[0] = 255; 1048 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] 1049 .as_intra8_analyse[i4_local_ctr2] 1050 .au1_best_modes_4x4_tu[0] = 255; 1051 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] 1052 .as_intra8_analyse[i4_local_ctr2] 1053 .i4_best_intra_cost = MAX_INTRA_COST_IPE; 1054 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] 1055 .as_intra8_analyse[i4_local_ctr2] 1056 .b1_valid_cu = 0; 1057 1058 pi4_intra_8_cost_curr16 1059 [(i4_local_ctr2 & 1) + 1060 (MAX_CU_IN_CTB_ROW * (i4_local_ctr2 >> 1))] = 1061 MAX_INTRA_COST_IPE; 1062 } 1063 } 1064 1065 /* set neighbours even if intra is not evaluated, since source is always available. */ 1066 ihevce_set_nbr_map( 1067 ps_ctxt->pu1_ctb_nbr_map, 1068 ps_ctxt->i4_nbr_map_strd, 1069 ps_cu_node->ps_parent->u2_x0 << 1, 1070 ps_cu_node->ps_parent->u2_y0 << 1, 1071 (ps_cu_node->ps_parent->u1_cu_size >> 2), 1072 1); 1073 1074 /* cost accumalation of best cu size candiate */ 1075 /*i8_frame_acc_satd_cost += parent_cost;*/ 1076 1077 /* Mode bits cost accumalation for best cu size and cu mode */ 1078 /*i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost;*/ 1079 1080 /*satd/mod_qp accumulation of best cu */ 1081 /*i8_frame_acc_satd_by_modqp_q10 += ((LWORD64)ps_cu_node->ps_parent->best_satd << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3))/i4_q_scale_q3_mod;*/ 1082 1083 /* Increment pointers */ 1084 ps_ed_blk_l1 += 16; 1085 blk_cnt += 16; 1086 //ps_row_cu++; 1087 merge_64x64 = 0; 1088 1089 /* increment for stat purpose only. Increment is valid only on single thread */ 1090 ps_ctxt->u4_num_16x16_skips_at_L0_IPE += 4; 1091 } 1092 else 1093 { 1094 /* Revaluation of 4 16x16 blocks at 8x8 prediction level */ 1095 //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); 1096 1097 if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) && 1098 (ps_ctxt->i4_slice_type == PSLICE)) 1099 { 1100 ps_ctxt->u1_disable_child_cu_decide = 1; 1101 step2_bypass = 0; 1102 } 1103 1104 /* Based on the flag, Child modes decision can be disabled*/ 1105 if(0 == ps_ctxt->u1_disable_child_cu_decide) 1106 { 1107 for(j = 0; j < 4; j++) 1108 { 1109 ps_cu_node->ps_sub_cu[j]->u2_x0 = 1110 gau1_cu_pos_x[blk_cnt + (j * 4)]; /* Populate properly */ 1111 ps_cu_node->ps_sub_cu[j]->u2_y0 = 1112 gau1_cu_pos_y[blk_cnt + (j * 4)]; /* Populate properly */ 1113 ps_cu_node->ps_sub_cu[j]->u1_cu_size = 16; 1114 1115 { 1116 WORD32 best_ang_mode = 1117 (ps_ed_blk_l1 + (j * 4))->best_merge_mode; 1118 1119 if(best_ang_mode < 2) 1120 best_ang_mode = 26; 1121 1122 ihevce_mode_eval_filtering( 1123 ps_cu_node->ps_sub_cu[j], 1124 ps_cu_node, 1125 ps_ctxt, 1126 ps_curr_src, 1127 best_ang_mode, 1128 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0], 1129 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], 1130 !step2_bypass, 1131 1); 1132 1133 if(i4_enable_4cu_16tu) 1134 { 1135 ihevce_mode_eval_filtering( 1136 ps_cu_node->ps_sub_cu[j], 1137 ps_cu_node, 1138 ps_ctxt, 1139 ps_curr_src, 1140 best_ang_mode, 1141 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], 1142 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], 1143 !step2_bypass, 1144 0); 1145 } 1146 else 1147 { 1148 /* 4TU not evaluated : 4tu modes set same as 1tu modes */ 1149 memcpy( 1150 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], 1151 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], 1152 NUM_BEST_MODES); 1153 1154 /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */ 1155 memcpy( 1156 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], 1157 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0], 1158 NUM_BEST_MODES * sizeof(WORD32)); 1159 } 1160 1161 child_cost[j] = 1162 MIN(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], 1163 ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]); 1164 1165 /* Child cost is sum of costs at 16x16 level */ 1166 child_cost_least += child_cost[j]; 1167 1168 /* Select the best mode to be populated as top and left nbr depending on the 1169 4tu and 1tu cost */ 1170 if(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0] > 1171 ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]) 1172 { 1173 ps_cu_node->ps_sub_cu[j]->best_mode = 1174 ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0]; 1175 } 1176 else 1177 { 1178 ps_cu_node->ps_sub_cu[j]->best_mode = 1179 ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0]; 1180 } 1181 1182 { /* Update the CTB nodes only for MAX - 1 CU nodes */ 1183 WORD32 xA, yA, row, col; 1184 xA = ((ps_cu_node->ps_sub_cu[j]->u2_x0 << 3) >> 2) + 1; 1185 yA = ((ps_cu_node->ps_sub_cu[j]->u2_y0 << 3) >> 2) + 1; 1186 size = ps_cu_node->ps_sub_cu[j]->u1_cu_size >> 2; 1187 for(row = yA; row < (yA + size); row++) 1188 { 1189 for(col = xA; col < (xA + size); col++) 1190 { 1191 ps_ctxt->au1_ctb_mode_map[row][col] = 1192 ps_cu_node->ps_sub_cu[j]->best_mode; 1193 } 1194 } 1195 } 1196 } 1197 1198 /*Child SATD cost*/ 1199 child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd; 1200 1201 /* store the child 16x16 costs */ 1202 pi4_intra_16_cost[(j & 1) + ((MAX_CU_IN_CTB_ROW >> 1) * (j >> 1))] = 1203 child_cost[j]; 1204 1205 /* set the CU valid flag */ 1206 ps_intra16_analyse[j].b1_valid_cu = 1; 1207 1208 /* All 16x16 merge is valid, if Cu 32x32 is chosen */ 1209 /* To be reset, if CU 64x64 is chosen */ 1210 ps_intra16_analyse[j].b1_merge_flag = 1; 1211 1212 /* storing the modes to intra 16 analyse */ 1213 /* store the best 16x16 modes 8x8 tu */ 1214 memcpy( 1215 &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0], 1216 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], 1217 sizeof(UWORD8) * (NUM_BEST_MODES)); 1218 ps_intra16_analyse[j].au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255; 1219 1220 /* store the best 16x16 modes 16x16 tu */ 1221 memcpy( 1222 &ps_intra16_analyse[j].au1_best_modes_16x16_tu[0], 1223 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], 1224 sizeof(UWORD8) * (NUM_BEST_MODES)); 1225 ps_intra16_analyse[j].au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255; 1226 1227 /* divide the 16x16 costs (pro rating) to 4 8x8 costs */ 1228 /* store the same 16x16 modes as 4 8x8 child modes */ 1229 { 1230 WORD32 idx_8x8; 1231 WORD32 *pi4_intra_8_cost_curr16; 1232 intra8_analyse_t *ps_intra8_analyse; 1233 1234 pi4_intra_8_cost_curr16 = pi4_intra_8_cost + ((j & 1) << 1); 1235 pi4_intra_8_cost_curr16 += ((j >> 1) << 1) * MAX_CU_IN_CTB_ROW; 1236 1237 for(idx_8x8 = 0; idx_8x8 < 4; idx_8x8++) 1238 { 1239 pi4_intra_8_cost_curr16 1240 [(idx_8x8 & 1) + (MAX_CU_IN_CTB_ROW * (idx_8x8 >> 1))] = 1241 (child_cost[j] + 3) >> 2; 1242 1243 ps_intra8_analyse = 1244 &ps_intra16_analyse[j].as_intra8_analyse[idx_8x8]; 1245 1246 ps_intra8_analyse->b1_enable_nxn = 0; 1247 ps_intra8_analyse->b1_valid_cu = 1; 1248 1249 /* store the best 8x8 modes 8x8 tu */ 1250 memcpy( 1251 &ps_intra8_analyse->au1_best_modes_8x8_tu[0], 1252 &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0], 1253 sizeof(UWORD8) * (NUM_BEST_MODES + 1)); 1254 1255 /* store the best 8x8 modes 4x4 tu */ 1256 memcpy( 1257 &ps_intra8_analyse->au1_best_modes_4x4_tu[0], 1258 &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0], 1259 sizeof(UWORD8) * (NUM_BEST_MODES + 1)); 1260 1261 /* NXN modes not evaluated hence set to 0 */ 1262 memset( 1263 &ps_intra8_analyse->au1_4x4_best_modes[0][0], 1264 255, 1265 sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1)); 1266 } 1267 } 1268 } 1269 1270 ihevce_set_nbr_map( 1271 ps_ctxt->pu1_ctb_nbr_map, 1272 ps_ctxt->i4_nbr_map_strd, 1273 ps_cu_node->ps_sub_cu[0]->u2_x0 << 1, 1274 ps_cu_node->ps_sub_cu[0]->u2_y0 << 1, 1275 (ps_cu_node->ps_sub_cu[0]->u1_cu_size >> 1), 1276 0); 1277 } 1278 #if 1 //DISBLE_CHILD_CU_EVAL_L0_IPE //1 1279 else 1280 { 1281 for(j = 0; j < 4; j++) 1282 { 1283 WORD32 idx_8x8; 1284 intra8_analyse_t *ps_intra8_analyse; 1285 ps_intra16_analyse[j].au1_best_modes_8x8_tu[0] = 255; 1286 ps_intra16_analyse[j].au1_best_modes_16x16_tu[0] = 255; 1287 1288 ps_intra16_analyse[j].b1_valid_cu = 0; 1289 1290 for(idx_8x8 = 0; idx_8x8 < 4; idx_8x8++) 1291 { 1292 ps_intra8_analyse = 1293 &ps_intra16_analyse[j].as_intra8_analyse[idx_8x8]; 1294 1295 ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255; 1296 ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255; 1297 1298 ps_intra8_analyse->b1_enable_nxn = 0; 1299 ps_intra8_analyse->b1_valid_cu = 0; 1300 1301 /* NXN modes not evaluated hence set to 0 */ 1302 memset( 1303 &ps_intra8_analyse->au1_4x4_best_modes[0][0], 1304 255, 1305 sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1)); 1306 } 1307 } 1308 1309 child_cost_least = MAX_INTRA_COST_IPE; 1310 } 1311 #endif 1312 1313 /* Populate params for 32x32 block analysis */ 1314 1315 ps_cu_node->ps_parent->u1_cu_size = 32; 1316 ps_cu_node->ps_parent->u2_x0 = 1317 gau1_cu_pos_x[blk_cnt]; /* Populate properly */ 1318 ps_cu_node->ps_parent->u2_y0 = 1319 gau1_cu_pos_y[blk_cnt]; /* Populate properly */ 1320 1321 /* Revaluation for 32x32 parent block at 16x16 prediction level */ 1322 //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); 1323 1324 { 1325 /* Eval for TUSize = CuSize */ 1326 ihevce_mode_eval_filtering( 1327 ps_cu_node->ps_parent, 1328 ps_cu_node, 1329 ps_ctxt, 1330 ps_curr_src, 1331 26, 1332 &ps_cu_node->ps_parent->au4_best_cost_1tu[0], 1333 &ps_cu_node->ps_parent->au1_best_mode_1tu[0], 1334 step2_bypass, 1335 1); 1336 1337 if(i4_enable_1cu_4tu) 1338 { 1339 /* Eval for TUSize = CuSize/2 */ 1340 ihevce_mode_eval_filtering( 1341 ps_cu_node->ps_parent, 1342 ps_cu_node, 1343 ps_ctxt, 1344 ps_curr_src, 1345 26, 1346 &ps_cu_node->ps_parent->au4_best_cost_4tu[0], 1347 &ps_cu_node->ps_parent->au1_best_mode_4tu[0], 1348 step2_bypass, 1349 0); 1350 } 1351 else 1352 { 1353 /* 4TU not evaluated : 4tu modes set same as 1tu modes */ 1354 memcpy( 1355 &ps_cu_node->ps_parent->au1_best_mode_4tu[0], 1356 &ps_cu_node->ps_parent->au1_best_mode_1tu[0], 1357 NUM_BEST_MODES); 1358 1359 /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */ 1360 memcpy( 1361 &ps_cu_node->ps_parent->au4_best_cost_4tu[0], 1362 &ps_cu_node->ps_parent->au4_best_cost_1tu[0], 1363 NUM_BEST_MODES * sizeof(WORD32)); 1364 } 1365 } 1366 1367 ps_ctxt->u1_disable_child_cu_decide = 0; 1368 step2_bypass = 1; 1369 1370 /* Update parent cost */ 1371 parent_cost = 1372 MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0], 1373 ps_cu_node->ps_parent->au4_best_cost_1tu[0]); 1374 1375 /* Select the best mode to be populated as top and left nbr depending on the 1376 4tu and 1tu cost */ 1377 if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] > 1378 ps_cu_node->ps_parent->au4_best_cost_1tu[0]) 1379 { 1380 ps_cu_node->ps_parent->best_mode = 1381 ps_cu_node->ps_parent->au1_best_mode_1tu[0]; 1382 } 1383 else 1384 { 1385 ps_cu_node->ps_parent->best_mode = 1386 ps_cu_node->ps_parent->au1_best_mode_4tu[0]; 1387 } 1388 1389 /* store the 32x32 cost */ 1390 *pi4_intra_32_cost = parent_cost; 1391 1392 /* set the CU valid flag */ 1393 ps_intra32_analyse->b1_valid_cu = 1; 1394 1395 ps_intra32_analyse->b1_merge_flag = 1; 1396 1397 /* storing the modes to intra 32 analyse */ 1398 { 1399 /* store the best 32x32 modes 16x16 tu */ 1400 memcpy( 1401 &ps_intra32_analyse->au1_best_modes_16x16_tu[0], 1402 &ps_cu_node->ps_parent->au1_best_mode_4tu[0], 1403 sizeof(UWORD8) * (NUM_BEST_MODES)); 1404 ps_intra32_analyse->au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255; 1405 1406 /* store the best 32x32 modes 32x32 tu */ 1407 memcpy( 1408 &ps_intra32_analyse->au1_best_modes_32x32_tu[0], 1409 &ps_cu_node->ps_parent->au1_best_mode_1tu[0], 1410 sizeof(UWORD8) * (NUM_BEST_MODES)); 1411 ps_intra32_analyse->au1_best_modes_32x32_tu[NUM_BEST_MODES] = 255; 1412 } 1413 parent_best_mode = ps_cu_node->ps_parent->best_mode; 1414 if((parent_cost <= 1415 child_cost_least + (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >> 1416 LAMBDA_Q_SHIFT))) //|| identical_modes) 1417 { 1418 WORD32 i4_q_scale_q3_mod; 1419 UWORD8 u1_cu_possible_qp; 1420 WORD32 i4_act_factor; 1421 1422 /* CU size 32x32 and fill the final cu params */ 1423 1424 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); 1425 1426 if((IHEVCE_QUALITY_P3 > i4_quality_preset)) 1427 { 1428 for(i = 0; i < 4; i++) 1429 { 1430 intra8_analyse_t *ps_intra8_analyse; 1431 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i]; 1432 for(j = 0; j < 4; j++) 1433 { 1434 /* Populate best 3 nxn modes */ 1435 ps_intra8_analyse->au1_4x4_best_modes[j][0] = 1436 ps_cu_node->ps_sub_cu[i]->au1_best_mode_4tu[0]; 1437 ps_intra8_analyse->au1_4x4_best_modes[j][1] = 1438 ps_cu_node->ps_sub_cu[i] 1439 ->au1_best_mode_4tu[1]; //(ps_ed + 1)->best_mode; 1440 ps_intra8_analyse->au1_4x4_best_modes[j][2] = 1441 ps_cu_node->ps_sub_cu[i] 1442 ->au1_best_mode_4tu[2]; //(ps_ed + 2)->best_mode; 1443 ps_intra8_analyse->au1_4x4_best_modes[j][3] = 255; 1444 } 1445 } 1446 } 1447 /* store the 32x32 non split flag */ 1448 ps_intra32_analyse->b1_split_flag = 0; 1449 ps_intra32_analyse->as_intra16_analyse[0].b1_split_flag = 0; 1450 ps_intra32_analyse->as_intra16_analyse[1].b1_split_flag = 0; 1451 ps_intra32_analyse->as_intra16_analyse[2].b1_split_flag = 0; 1452 ps_intra32_analyse->as_intra16_analyse[3].b1_split_flag = 0; 1453 1454 au1_best_32x32_modes[blk_cnt >> 4] = 1455 ps_cu_node->ps_parent->au1_best_mode_1tu[0]; 1456 1457 au4_best_32x32_cost[blk_cnt >> 4] = 1458 ps_cu_node->ps_parent->au4_best_cost_1tu[0]; 1459 /*As 32*32 has won, pick L2 8x8 qp which maps 1460 to L0 32x32 Qp*/ 1461 ASSERT(((blk_cnt >> 4) & 3) == (blk_cnt >> 4)); 1462 ASSERT(ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0] != -2); 1463 u1_cu_possible_qp = ihevce_cu_level_qp_mod( 1464 ps_ctxt->i4_qscale, 1465 ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0], 1466 ps_ctxt->ld_curr_frame_16x16_log_avg[0], 1467 f_strength, 1468 &i4_act_factor, 1469 &i4_q_scale_q3_mod, 1470 ps_ctxt->ps_rc_quant_ctxt); 1471 /* cost accumalation of best cu size candiate */ 1472 i8_frame_acc_satd_cost += parent_cost; 1473 1474 /* satd and mpm bits accumalation of best cu size candiate */ 1475 i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd; 1476 1477 /* Mode bits cost accumalation for best cu size and cu mode */ 1478 i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost; 1479 1480 /*satd/mod_qp accumulation of best cu */ 1481 i8_frame_acc_satd_by_modqp_q10 += 1482 ((LWORD64)ps_cu_node->ps_parent->best_satd 1483 << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / 1484 i4_q_scale_q3_mod; 1485 1486 /* Increment pointers */ 1487 ps_ed_blk_l1 += 16; 1488 blk_cnt += 16; 1489 //ps_row_cu++; 1490 merge_64x64 &= 1; 1491 } 1492 else 1493 { 1494 /* store the 32x32 split flag */ 1495 ps_intra32_analyse->b1_split_flag = 1; 1496 1497 /* CU size 16x16 and fill the final cu params for all 4 blocks */ 1498 for(j = 0; j < 4; j++) 1499 { 1500 WORD32 i4_q_scale_q3_mod; 1501 UWORD8 u1_cu_possible_qp; 1502 WORD32 i4_act_factor; 1503 1504 /* Set CU split flag */ 1505 ASSERT(blk_cnt % 4 == 0); 1506 1507 ihevce_update_cand_list( 1508 ps_cu_node->ps_sub_cu[j], ps_ed_blk_l1, ps_ctxt); 1509 1510 /* store the 16x16 non split flag */ 1511 ps_intra16_analyse[j].b1_split_flag = 0; 1512 1513 ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2)); 1514 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0] != -2); 1515 /*As 16*16 has won, pick L1 8x8 qp which maps 1516 to L0 16x16 Qp*/ 1517 u1_cu_possible_qp = ihevce_cu_level_qp_mod( 1518 ps_ctxt->i4_qscale, 1519 ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0], 1520 ps_ctxt->ld_curr_frame_8x8_log_avg[0], 1521 f_strength, 1522 &i4_act_factor, 1523 &i4_q_scale_q3_mod, 1524 ps_ctxt->ps_rc_quant_ctxt); 1525 1526 /*accum satd/qp for all child block*/ 1527 i8_frame_acc_satd_by_modqp_q10 += 1528 ((LWORD64)child_satd[j] 1529 << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / 1530 i4_q_scale_q3_mod; 1531 1532 /* Accumalate mode bits for all child blocks */ 1533 i8_frame_acc_mode_bits_cost += 1534 ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost; 1535 1536 /* satd and mpm bits accumalation of best cu size candiate */ 1537 i4_ctb_acc_satd += child_satd[j]; 1538 1539 /* Increment pointers */ 1540 //ps_row_cu++; 1541 ps_ed_blk_l1 += 4; 1542 blk_cnt += 4; 1543 } 1544 1545 /* cost accumalation of best cu size candiate */ 1546 i8_frame_acc_satd_cost += child_cost_least; 1547 1548 /* 64x64 merge is not possible */ 1549 merge_64x64 = 0; 1550 } 1551 1552 //ps_ed_blk_l2 += 4; 1553 1554 } //end of EIID's else 1555 #endif 1556 } 1557 /* If Merge success for L1 max CU size 16x16 is chosen */ 1558 else if(merge_16x16_l1) 1559 { 1560 #if IP_DBG_L1_l2 1561 ps_cu_node->ps_parent->u1_cu_size = 16; 1562 ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */ 1563 ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */ 1564 ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_merge_mode; 1565 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); 1566 1567 blk_cnt += 4; 1568 ps_ed_blk_l1 += 4; 1569 ps_row_cu++; 1570 merge_64x64 = 0; 1571 #else 1572 1573 /*EIID: evaluate only if L1 early-inter-intra decision is not favouring inter*/ 1574 /* enable this only in B pictures */ 1575 if(ps_ed_blk_l1->intra_or_inter == 2 && (ps_ctxt->i4_slice_type != ISLICE)) 1576 { 1577 WORD32 i4_q_scale_q3_mod, i4_local_ctr; 1578 WORD8 i1_cu_possible_qp; 1579 WORD32 i4_act_factor; 1580 /* make cost infinity. */ 1581 /* make modes invalid */ 1582 /* update loop variables */ 1583 /* set other output variales */ 1584 /* dont set neighbour flag so that next blocks wont access this cu */ 1585 /* what happens to ctb_mode_map?? */ 1586 1587 ps_cu_node->ps_parent->u1_cu_size = 16; 1588 ps_cu_node->ps_parent->u2_x0 = 1589 gau1_cu_pos_x[blk_cnt]; /* Populate properly */ 1590 ps_cu_node->ps_parent->u2_y0 = 1591 gau1_cu_pos_y[blk_cnt]; /* Populate properly */ 1592 ps_cu_node->ps_parent->best_mode = 1593 INTRA_DC; //ps_ed_blk_l1->best_merge_mode; 1594 1595 /* fill in the first modes as invalid */ 1596 1597 ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC; 1598 ps_cu_node->ps_parent->au1_best_mode_1tu[1] = 1599 INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3 1600 ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC; 1601 1602 ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC; 1603 ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC; 1604 ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC; 1605 1606 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); 1607 1608 //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0; 1609 //ps_row_cu->u1_num_intra_rdopt_cands = 0; 1610 1611 ps_intra32_analyse->b1_split_flag = 1; 1612 ps_intra32_analyse->b1_merge_flag = 0; 1613 1614 ps_intra16_analyse->b1_valid_cu = 0; 1615 ps_intra16_analyse->b1_split_flag = 0; 1616 ps_intra16_analyse->b1_merge_flag = 1; 1617 //memset (&ps_intra16_analyse->au1_best_modes_16x16_tu, 1618 // 255, 1619 // NUM_BEST_MODES); 1620 //memset (&ps_intra16_analyse->au1_best_modes_8x8_tu, 1621 // 255, 1622 // NUM_BEST_MODES); 1623 //set only first mode since if it's 255. it wont go ahead 1624 ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 255; 1625 ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 255; 1626 ps_intra16_analyse->i4_best_intra_cost = MAX_INTRA_COST_IPE; 1627 *pi4_intra_16_cost = MAX_INTRA_COST_IPE; 1628 1629 /*since ME will start evaluating from bottom up, set the lower 1630 cu size data invalid */ 1631 for(i4_local_ctr = 0; i4_local_ctr < 4; i4_local_ctr++) 1632 { 1633 ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] 1634 .au1_4x4_best_modes[0][0] = 255; 1635 ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] 1636 .au1_4x4_best_modes[1][0] = 255; 1637 ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] 1638 .au1_4x4_best_modes[2][0] = 255; 1639 ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] 1640 .au1_4x4_best_modes[3][0] = 255; 1641 ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] 1642 .au1_best_modes_8x8_tu[0] = 255; 1643 ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] 1644 .au1_best_modes_4x4_tu[0] = 255; 1645 ps_intra16_analyse->as_intra8_analyse[i4_local_ctr].i4_best_intra_cost = 1646 MAX_INTRA_COST_IPE; 1647 1648 pi4_intra_8_cost 1649 [(i4_local_ctr & 1) + (MAX_CU_IN_CTB_ROW * (i4_local_ctr >> 1))] = 1650 MAX_INTRA_COST_IPE; 1651 } 1652 1653 /* set neighbours even if intra is not evaluated, since source is always available. */ 1654 ihevce_set_nbr_map( 1655 ps_ctxt->pu1_ctb_nbr_map, 1656 ps_ctxt->i4_nbr_map_strd, 1657 ps_cu_node->ps_parent->u2_x0 << 1, 1658 ps_cu_node->ps_parent->u2_y0 << 1, 1659 (ps_cu_node->ps_parent->u1_cu_size >> 2), 1660 1); 1661 1662 //what happends to RC variables?? 1663 /* run only constant Qp */ 1664 ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2)); 1665 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0] != -2); 1666 i1_cu_possible_qp = ihevce_cu_level_qp_mod( 1667 ps_ctxt->i4_qscale, 1668 ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0], 1669 ps_ctxt->ld_curr_frame_8x8_log_avg[0], 1670 f_strength, 1671 &i4_act_factor, 1672 &i4_q_scale_q3_mod, 1673 ps_ctxt->ps_rc_quant_ctxt); 1674 1675 /* cost accumalation of best cu size candiate */ 1676 i8_frame_acc_satd_cost += 0; //parent_cost; //incorrect accumulation 1677 1678 /*satd/mod_qp accumulation of best cu */ 1679 i8_frame_acc_satd_by_modqp_q10 += 0; //incorrect accumulation 1680 //((LWORD64)ps_cu_node->ps_parent->best_satd << SATD_BY_ACT_Q_FAC)/i4_q_scale_q3_mod; 1681 1682 /* Accumalate mode bits for all child blocks */ 1683 i8_frame_acc_mode_bits_cost += 1684 0; //ps_cu_node->ps_parent->u2_mode_bits_cost; 1685 //incoorect accumulation 1686 1687 blk_cnt += 4; 1688 ps_ed_blk_l1 += 4; 1689 //ps_row_cu++; 1690 merge_64x64 = 0; 1691 1692 /* increment for stat purpose only. Increment is valid only on single thread */ 1693 ps_ctxt->u4_num_16x16_skips_at_L0_IPE += 1; 1694 } 1695 else 1696 { 1697 /* 64x64 merge is not possible */ 1698 merge_64x64 = 0; 1699 1700 /* set the 32x32 split flag to 1 */ 1701 ps_intra32_analyse->b1_split_flag = 1; 1702 1703 ps_intra32_analyse->b1_merge_flag = 0; 1704 1705 ps_intra16_analyse->b1_merge_flag = 1; 1706 1707 if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) && 1708 (ps_ctxt->i4_slice_type == PSLICE)) 1709 { 1710 ps_ctxt->u1_disable_child_cu_decide = 1; 1711 step2_bypass = 0; 1712 } 1713 //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); 1714 /* Based on the flag, Child modes decision can be disabled*/ 1715 if(0 == ps_ctxt->u1_disable_child_cu_decide) 1716 { 1717 for(j = 0; j < 4; j++) 1718 { 1719 intra8_analyse_t *ps_intra8_analyse; 1720 WORD32 best_ang_mode = (ps_ed_blk_l1 + j)->best_mode; 1721 1722 if(best_ang_mode < 2) 1723 best_ang_mode = 26; 1724 1725 //ps_cu_node->ps_sub_cu[j]->best_cost = MAX_INTRA_COST_IPE; 1726 //ps_cu_node->ps_sub_cu[j]->best_mode = (ps_ed_blk_l1 + j)->best_mode; 1727 1728 ps_cu_node->ps_sub_cu[j]->u2_x0 = 1729 gau1_cu_pos_x[blk_cnt + j]; /* Populate properly */ 1730 ps_cu_node->ps_sub_cu[j]->u2_y0 = 1731 gau1_cu_pos_y[blk_cnt + j]; /* Populate properly */ 1732 ps_cu_node->ps_sub_cu[j]->u1_cu_size = 8; 1733 1734 ihevce_mode_eval_filtering( 1735 ps_cu_node->ps_sub_cu[j], 1736 ps_cu_node, 1737 ps_ctxt, 1738 ps_curr_src, 1739 best_ang_mode, 1740 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0], 1741 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], 1742 !step2_bypass, 1743 1); 1744 1745 if(i4_enable_4cu_16tu) 1746 { 1747 ihevce_mode_eval_filtering( 1748 ps_cu_node->ps_sub_cu[j], 1749 ps_cu_node, 1750 ps_ctxt, 1751 ps_curr_src, 1752 best_ang_mode, 1753 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], 1754 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], 1755 !step2_bypass, 1756 0); 1757 } 1758 else 1759 { 1760 /* 4TU not evaluated : 4tu modes set same as 1tu modes */ 1761 memcpy( 1762 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], 1763 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], 1764 NUM_BEST_MODES); 1765 1766 /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */ 1767 memcpy( 1768 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], 1769 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0], 1770 NUM_BEST_MODES * sizeof(WORD32)); 1771 } 1772 1773 child_cost[j] = 1774 MIN(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], 1775 ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]); 1776 1777 child_cost_least += child_cost[j]; 1778 1779 /* Select the best mode to be populated as top and left nbr depending on the 1780 4tu and 1tu cost */ 1781 if(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0] > 1782 ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]) 1783 { 1784 ps_cu_node->ps_sub_cu[j]->best_mode = 1785 ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0]; 1786 } 1787 else 1788 { 1789 ps_cu_node->ps_sub_cu[j]->best_mode = 1790 ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0]; 1791 } 1792 { /* Update the CTB nodes only for MAX - 1 CU nodes */ 1793 WORD32 xA, yA, row, col; 1794 xA = ((ps_cu_node->ps_sub_cu[j]->u2_x0 << 3) >> 2) + 1; 1795 yA = ((ps_cu_node->ps_sub_cu[j]->u2_y0 << 3) >> 2) + 1; 1796 size = ps_cu_node->ps_sub_cu[j]->u1_cu_size >> 2; 1797 for(row = yA; row < (yA + size); row++) 1798 { 1799 for(col = xA; col < (xA + size); col++) 1800 { 1801 ps_ctxt->au1_ctb_mode_map[row][col] = 1802 ps_cu_node->ps_sub_cu[j]->best_mode; 1803 } 1804 } 1805 } 1806 1807 /*collect individual child satd for final SATD/qp accum*/ 1808 child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd; 1809 1810 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j]; 1811 1812 /* store the child 8x8 costs */ 1813 pi4_intra_8_cost[(j & 1) + (MAX_CU_IN_CTB_ROW * (j >> 1))] = 1814 child_cost[j]; 1815 1816 /* set the CU valid flag */ 1817 ps_intra8_analyse->b1_valid_cu = 1; 1818 ps_intra8_analyse->b1_enable_nxn = 0; 1819 1820 /* storing the modes to intra8 analyse */ 1821 1822 /* store the best 8x8 modes 8x8 tu */ 1823 memcpy( 1824 &ps_intra8_analyse->au1_best_modes_8x8_tu[0], 1825 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], 1826 sizeof(UWORD8) * (NUM_BEST_MODES)); 1827 ps_intra8_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255; 1828 1829 /* store the best 8x8 modes 4x4 tu */ 1830 memcpy( 1831 &ps_intra8_analyse->au1_best_modes_4x4_tu[0], 1832 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], 1833 sizeof(UWORD8) * (NUM_BEST_MODES)); 1834 ps_intra8_analyse->au1_best_modes_4x4_tu[NUM_BEST_MODES] = 255; 1835 1836 /* NXN modes not evaluated hence set to 255 */ 1837 memset( 1838 &ps_intra8_analyse->au1_4x4_best_modes[0][0], 1839 255, 1840 sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1)); 1841 } 1842 1843 ihevce_set_nbr_map( 1844 ps_ctxt->pu1_ctb_nbr_map, 1845 ps_ctxt->i4_nbr_map_strd, 1846 ps_cu_node->ps_sub_cu[0]->u2_x0 << 1, 1847 ps_cu_node->ps_sub_cu[0]->u2_y0 << 1, 1848 (ps_cu_node->ps_sub_cu[0]->u1_cu_size >> 1), 1849 0); 1850 } 1851 #if 1 //DISBLE_CHILD_CU_EVAL_L0_IPE //1 1852 else 1853 { 1854 for(j = 0; j < 4; j++) 1855 { 1856 intra8_analyse_t *ps_intra8_analyse; 1857 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j]; 1858 ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255; 1859 ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255; 1860 /* NXN modes not evaluated hence set to 255 */ 1861 memset( 1862 &ps_intra8_analyse->au1_4x4_best_modes[0][0], 1863 255, 1864 sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1)); 1865 1866 ps_intra8_analyse->b1_valid_cu = 0; 1867 ps_intra8_analyse->b1_enable_nxn = 0; 1868 } 1869 child_cost_least = MAX_INTRA_COST_IPE; 1870 } 1871 #endif 1872 //ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode; 1873 //ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE; 1874 1875 ps_cu_node->ps_parent->u1_cu_size = 16; 1876 ps_cu_node->ps_parent->u2_x0 = 1877 gau1_cu_pos_x[blk_cnt]; /* Populate properly */ 1878 ps_cu_node->ps_parent->u2_y0 = 1879 gau1_cu_pos_y[blk_cnt]; /* Populate properly */ 1880 1881 //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); 1882 1883 /* Eval for TUSize = CuSize */ 1884 ihevce_mode_eval_filtering( 1885 ps_cu_node->ps_parent, 1886 ps_cu_node, 1887 ps_ctxt, 1888 ps_curr_src, 1889 26, 1890 &ps_cu_node->ps_parent->au4_best_cost_1tu[0], 1891 &ps_cu_node->ps_parent->au1_best_mode_1tu[0], 1892 step2_bypass, 1893 1); 1894 1895 if(i4_enable_1cu_4tu) 1896 { 1897 /* Eval for TUSize = CuSize/2 */ 1898 ihevce_mode_eval_filtering( 1899 ps_cu_node->ps_parent, 1900 ps_cu_node, 1901 ps_ctxt, 1902 ps_curr_src, 1903 26, 1904 &ps_cu_node->ps_parent->au4_best_cost_4tu[0], 1905 &ps_cu_node->ps_parent->au1_best_mode_4tu[0], 1906 step2_bypass, 1907 0); 1908 } 1909 else 1910 { 1911 /* 4TU not evaluated : 4tu modes set same as 1tu modes */ 1912 memcpy( 1913 &ps_cu_node->ps_parent->au1_best_mode_4tu[0], 1914 &ps_cu_node->ps_parent->au1_best_mode_1tu[0], 1915 NUM_BEST_MODES); 1916 1917 /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */ 1918 memcpy( 1919 &ps_cu_node->ps_parent->au4_best_cost_4tu[0], 1920 &ps_cu_node->ps_parent->au4_best_cost_1tu[0], 1921 NUM_BEST_MODES * sizeof(WORD32)); 1922 } 1923 1924 ps_ctxt->u1_disable_child_cu_decide = 0; 1925 step2_bypass = 1; 1926 1927 /* Update parent cost */ 1928 parent_cost = 1929 MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0], 1930 ps_cu_node->ps_parent->au4_best_cost_1tu[0]); 1931 1932 /* Select the best mode to be populated as top and left nbr depending on the 1933 4tu and 1tu cost */ 1934 if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] > 1935 ps_cu_node->ps_parent->au4_best_cost_1tu[0]) 1936 { 1937 ps_cu_node->ps_parent->best_mode = 1938 ps_cu_node->ps_parent->au1_best_mode_1tu[0]; 1939 } 1940 else 1941 { 1942 ps_cu_node->ps_parent->best_mode = 1943 ps_cu_node->ps_parent->au1_best_mode_4tu[0]; 1944 } 1945 1946 /* store the 16x16 cost */ 1947 *pi4_intra_16_cost = parent_cost; 1948 1949 /* accumulate the 32x32 cost */ 1950 if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost) 1951 { 1952 *pi4_intra_32_cost = parent_cost; 1953 } 1954 else 1955 { 1956 *pi4_intra_32_cost += parent_cost; 1957 } 1958 1959 /* set the CU valid flag */ 1960 ps_intra16_analyse->b1_valid_cu = 1; 1961 1962 /* storing the modes to intra 16 analyse */ 1963 { 1964 /* store the best 16x16 modes 16x16 tu */ 1965 memcpy( 1966 &ps_intra16_analyse->au1_best_modes_16x16_tu[0], 1967 &ps_cu_node->ps_parent->au1_best_mode_1tu[0], 1968 sizeof(UWORD8) * NUM_BEST_MODES); 1969 ps_intra16_analyse->au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255; 1970 1971 /* store the best 16x16 modes 8x8 tu */ 1972 memcpy( 1973 &ps_intra16_analyse->au1_best_modes_8x8_tu[0], 1974 &ps_cu_node->ps_parent->au1_best_mode_4tu[0], 1975 sizeof(UWORD8) * NUM_BEST_MODES); 1976 ps_intra16_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255; 1977 } 1978 1979 parent_best_mode = ps_cu_node->ps_parent->best_mode; 1980 if(parent_cost <= 1981 child_cost_least + (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >> 1982 LAMBDA_Q_SHIFT)) //|| identical_modes) 1983 { 1984 WORD32 i4_q_scale_q3_mod; 1985 WORD8 i1_cu_possible_qp; 1986 WORD32 i4_act_factor; 1987 //choose parent CU 1988 1989 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); 1990 1991 /* set the 16x16 non split flag */ 1992 ps_intra16_analyse->b1_split_flag = 0; 1993 1994 /*As 16*16 has won, pick L1 8x8 qp which maps 1995 to L0 16x16 Qp*/ 1996 ASSERT(((blk_cnt >> 4) & 3) == (blk_cnt >> 4)); 1997 ASSERT(ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0] != -2); 1998 i1_cu_possible_qp = ihevce_cu_level_qp_mod( 1999 ps_ctxt->i4_qscale, 2000 ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0], 2001 ps_ctxt->ld_curr_frame_8x8_log_avg[0], 2002 f_strength, 2003 &i4_act_factor, 2004 &i4_q_scale_q3_mod, 2005 ps_ctxt->ps_rc_quant_ctxt); 2006 2007 /* cost accumalation of best cu size candiate */ 2008 i8_frame_acc_satd_cost += parent_cost; 2009 2010 /* satd and mpm bits accumalation of best cu size candiate */ 2011 i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd; 2012 2013 /*satd/mod_qp accumulation of best cu */ 2014 i8_frame_acc_satd_by_modqp_q10 += 2015 ((LWORD64)ps_cu_node->ps_parent->best_satd 2016 << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / 2017 i4_q_scale_q3_mod; 2018 2019 /* Accumalate mode bits for all child blocks */ 2020 i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost; 2021 2022 blk_cnt += 4; 2023 ps_ed_blk_l1 += 4; 2024 //ps_row_cu++; 2025 } 2026 else 2027 { 2028 //choose child CU 2029 WORD8 i1_cu_possible_qp; 2030 WORD32 i4_act_factor; 2031 WORD32 i4_q_scale_q3_mod; 2032 2033 ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2)); 2034 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][1] != -2); 2035 i1_cu_possible_qp = ihevce_cu_level_qp_mod( 2036 ps_ctxt->i4_qscale, 2037 ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][1], 2038 ps_ctxt->ld_curr_frame_8x8_log_avg[1], 2039 f_strength, 2040 &i4_act_factor, 2041 &i4_q_scale_q3_mod, 2042 ps_ctxt->ps_rc_quant_ctxt); 2043 2044 /* set the 16x16 split flag */ 2045 ps_intra16_analyse->b1_split_flag = 1; 2046 2047 for(j = 0; j < 4; j++) 2048 { 2049 ihevce_update_cand_list( 2050 ps_cu_node->ps_sub_cu[j], ps_ed_blk_l1, ps_ctxt); 2051 2052 if((IHEVCE_QUALITY_P3 > i4_quality_preset)) 2053 { 2054 WORD32 k; 2055 intra8_analyse_t *ps_intra8_analyse; 2056 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j]; 2057 2058 for(k = 0; k < 4; k++) 2059 { 2060 /* Populate best 3 nxn modes */ 2061 ps_intra8_analyse->au1_4x4_best_modes[k][0] = 2062 ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0]; 2063 ps_intra8_analyse->au1_4x4_best_modes[k][1] = 2064 ps_cu_node->ps_sub_cu[j] 2065 ->au1_best_mode_4tu[1]; //(ps_ed + 1)->best_mode; 2066 ps_intra8_analyse->au1_4x4_best_modes[k][2] = 2067 ps_cu_node->ps_sub_cu[j] 2068 ->au1_best_mode_4tu[2]; //(ps_ed + 2)->best_mode; 2069 ps_intra8_analyse->au1_4x4_best_modes[k][3] = 255; 2070 } 2071 } 2072 /*accum satd/qp for all child block*/ 2073 i8_frame_acc_satd_by_modqp_q10 += 2074 ((LWORD64)child_satd[j] 2075 << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / 2076 i4_q_scale_q3_mod; 2077 2078 /* Accumalate mode bits for all child blocks */ 2079 i8_frame_acc_mode_bits_cost += 2080 ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost; 2081 2082 /* satd and mpm bits accumalation of best cu size candiate */ 2083 i4_ctb_acc_satd += child_satd[j]; 2084 2085 blk_cnt += 1; 2086 ps_ed_blk_l1 += 1; 2087 //ps_row_cu++; 2088 } 2089 2090 /* cost accumalation of best cu size candiate */ 2091 i8_frame_acc_satd_cost += child_cost_least; 2092 } 2093 2094 } //else of EIID 2095 #endif 2096 } // if(merge_16x16_l1) 2097 /* MAX CU SIZE 8x8 */ 2098 else 2099 { 2100 #if IP_DBG_L1_l2 2101 for(i = 0; i < 4; i++) 2102 { 2103 ps_cu_node->ps_parent->u1_cu_size = 8; 2104 ps_cu_node->ps_parent->u2_x0 = 2105 gau1_cu_pos_x[blk_cnt]; /* Populate properly */ 2106 ps_cu_node->ps_parent->u2_y0 = 2107 gau1_cu_pos_y[blk_cnt]; /* Populate properly */ 2108 ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode; 2109 2110 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); 2111 blk_cnt++; 2112 ps_ed_blk_l1++; 2113 ps_row_cu++; 2114 merge_64x64 = 0; 2115 } 2116 #else 2117 2118 /* EIID: Skip all 4 8x8 block if L1 decisions says skip intra */ 2119 if(ps_ed_blk_l1->intra_or_inter == 2 && (ps_ctxt->i4_slice_type != ISLICE)) 2120 { 2121 WORD32 i4_q_scale_q3_mod; 2122 WORD8 i1_cu_possible_qp; 2123 WORD32 i4_act_factor; 2124 2125 merge_64x64 = 0; 2126 2127 ps_intra32_analyse->b1_merge_flag = 0; 2128 2129 ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 255; 2130 ps_intra16_analyse->au1_best_modes_8x8_tu[1] = 255; 2131 ps_intra16_analyse->au1_best_modes_8x8_tu[2] = 255; 2132 2133 ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 255; 2134 ps_intra16_analyse->au1_best_modes_16x16_tu[1] = 255; 2135 ps_intra16_analyse->au1_best_modes_16x16_tu[2] = 255; 2136 ps_intra16_analyse->b1_split_flag = 1; 2137 ps_intra16_analyse->b1_valid_cu = 0; 2138 ps_intra16_analyse->b1_merge_flag = 0; 2139 2140 ps_intra16_analyse->i4_best_intra_cost = MAX_INTRA_COST_IPE; 2141 2142 for(i = 0; i < 4; i++) 2143 { 2144 intra8_analyse_t *ps_intra8_analyse; 2145 WORD32 ctr_sub_cu; 2146 2147 cu_pos_x = gau1_cu_pos_x[blk_cnt]; 2148 cu_pos_y = gau1_cu_pos_y[blk_cnt]; 2149 2150 if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y)) 2151 { 2152 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i]; 2153 2154 ps_intra8_analyse->b1_valid_cu = 0; 2155 ps_intra8_analyse->b1_enable_nxn = 0; 2156 ps_intra8_analyse->au1_4x4_best_modes[0][0] = 255; 2157 ps_intra8_analyse->au1_4x4_best_modes[1][0] = 255; 2158 ps_intra8_analyse->au1_4x4_best_modes[2][0] = 255; 2159 ps_intra8_analyse->au1_4x4_best_modes[3][0] = 255; 2160 ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255; 2161 ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255; 2162 ps_intra8_analyse->i4_best_intra_cost = MAX_INTRA_COST_IPE; 2163 2164 ps_cu_node->ps_parent->u1_cu_size = 8; 2165 ps_cu_node->ps_parent->u2_x0 = 2166 gau1_cu_pos_x[blk_cnt]; /* Populate properly */ 2167 ps_cu_node->ps_parent->u2_y0 = 2168 gau1_cu_pos_y[blk_cnt]; /* Populate properly */ 2169 ps_cu_node->ps_parent->best_mode = 2170 INTRA_DC; //ps_ed_blk_l1->best_mode; 2171 2172 /* fill in the first modes as invalid */ 2173 2174 ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC; 2175 ps_cu_node->ps_parent->au1_best_mode_1tu[1] = 2176 INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3 2177 ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC; 2178 2179 ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC; 2180 ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC; 2181 ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC; 2182 2183 ihevce_update_cand_list( 2184 ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); 2185 2186 //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0; 2187 //ps_row_cu->u1_num_intra_rdopt_cands = 0; 2188 2189 for(ctr_sub_cu = 0; ctr_sub_cu < 4; ctr_sub_cu++) 2190 { 2191 ps_cu_node->ps_sub_cu[ctr_sub_cu]->au1_best_mode_1tu[0] = 2192 INTRA_DC; 2193 ps_cu_node->ps_sub_cu[ctr_sub_cu]->au1_best_mode_4tu[0] = 2194 INTRA_DC; 2195 ps_cu_node->ps_sub_cu[ctr_sub_cu]->au4_best_cost_1tu[0] = 2196 MAX_INTRA_COST_IPE; 2197 2198 ps_cu_node->ps_sub_cu[ctr_sub_cu]->au4_best_cost_4tu[0] = 2199 MAX_INTRA_COST_IPE; 2200 ps_cu_node->ps_sub_cu[ctr_sub_cu]->best_cost = 2201 MAX_INTRA_COST_IPE; 2202 } 2203 2204 pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] = 2205 MAX_INTRA_COST_IPE; 2206 2207 ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2)); 2208 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1] != -2); 2209 i1_cu_possible_qp = ihevce_cu_level_qp_mod( 2210 ps_ctxt->i4_qscale, 2211 ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1], 2212 ps_ctxt->ld_curr_frame_8x8_log_avg[1], 2213 f_strength, 2214 &i4_act_factor, 2215 &i4_q_scale_q3_mod, 2216 ps_ctxt->ps_rc_quant_ctxt); 2217 2218 /* set neighbours even if intra is not evaluated, since source is always available. */ 2219 ihevce_set_nbr_map( 2220 ps_ctxt->pu1_ctb_nbr_map, 2221 ps_ctxt->i4_nbr_map_strd, 2222 ps_cu_node->ps_parent->u2_x0 << 1, 2223 ps_cu_node->ps_parent->u2_y0 << 1, 2224 (ps_cu_node->ps_parent->u1_cu_size >> 2), 2225 1); 2226 2227 //ps_row_cu++; 2228 } 2229 blk_cnt++; 2230 ps_ed_blk_l1++; 2231 } 2232 } 2233 else 2234 { 2235 //cu_intra_cand_t *ps_cu_intra_cand; 2236 WORD8 i1_cu_possible_qp; 2237 WORD32 i4_act_factor; 2238 WORD32 i4_q_scale_q3_mod; 2239 2240 ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2)); 2241 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1] != -2); 2242 i1_cu_possible_qp = ihevce_cu_level_qp_mod( 2243 ps_ctxt->i4_qscale, 2244 ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1], 2245 ps_ctxt->ld_curr_frame_8x8_log_avg[1], 2246 f_strength, 2247 &i4_act_factor, 2248 &i4_q_scale_q3_mod, 2249 ps_ctxt->ps_rc_quant_ctxt); 2250 2251 /* 64x64 merge is not possible */ 2252 merge_64x64 = 0; 2253 2254 ps_intra32_analyse->b1_merge_flag = 0; 2255 2256 ps_intra16_analyse->b1_merge_flag = 0; 2257 2258 /* by default 16x16 modes are set to default values DC and Planar */ 2259 ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 0; 2260 ps_intra16_analyse->au1_best_modes_8x8_tu[1] = 1; 2261 ps_intra16_analyse->au1_best_modes_8x8_tu[2] = 255; 2262 2263 ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 0; 2264 ps_intra16_analyse->au1_best_modes_16x16_tu[1] = 1; 2265 ps_intra16_analyse->au1_best_modes_16x16_tu[2] = 255; 2266 ps_intra16_analyse->b1_split_flag = 1; 2267 ps_intra16_analyse->b1_valid_cu = 1; 2268 2269 for(i = 0; i < 4; i++) 2270 { 2271 intra8_analyse_t *ps_intra8_analyse; 2272 cu_pos_x = gau1_cu_pos_x[blk_cnt]; 2273 cu_pos_y = gau1_cu_pos_y[blk_cnt]; 2274 if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y)) 2275 { 2276 //ps_cu_intra_cand = &ps_row_cu->s_cu_intra_cand; 2277 //ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE; 2278 2279 //ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode; 2280 2281 child_cost_least = 0; 2282 2283 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i]; 2284 ps_cu_node->ps_parent->u1_cu_size = 8; 2285 ps_cu_node->ps_parent->u2_x0 = 2286 gau1_cu_pos_x[blk_cnt]; /* Populate properly */ 2287 ps_cu_node->ps_parent->u2_y0 = 2288 gau1_cu_pos_y[blk_cnt]; /* Populate properly */ 2289 2290 //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); 2291 2292 /*EARLY DECISION 8x8 block */ 2293 ihevce_pu_calc_8x8_blk( 2294 ps_curr_src, ps_ctxt, ps_cu_node, ps_ctxt->ps_func_selector); 2295 for(j = 0; j < 4; j++) 2296 { 2297 child_cost_least += ps_cu_node->ps_sub_cu[j]->best_cost; 2298 child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd; 2299 } 2300 2301 /* Based on the flag, CU = 4TU modes decision can be disabled, CU = 4PU is retained */ 2302 if(0 == ps_ctxt->u1_disable_child_cu_decide) 2303 { 2304 ihevce_set_nbr_map( 2305 ps_ctxt->pu1_ctb_nbr_map, 2306 ps_ctxt->i4_nbr_map_strd, 2307 ps_cu_node->ps_parent->u2_x0 << 1, 2308 ps_cu_node->ps_parent->u2_y0 << 1, 2309 (ps_cu_node->ps_parent->u1_cu_size >> 2), 2310 0); 2311 2312 //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); 2313 2314 /* Eval for TUSize = CuSize */ 2315 ihevce_mode_eval_filtering( 2316 ps_cu_node->ps_parent, 2317 ps_cu_node, 2318 ps_ctxt, 2319 ps_curr_src, 2320 26, 2321 &ps_cu_node->ps_parent->au4_best_cost_1tu[0], 2322 &ps_cu_node->ps_parent->au1_best_mode_1tu[0], 2323 step2_bypass, 2324 1); 2325 2326 if(i4_enable_1cu_4tu) 2327 { 2328 /* Eval for TUSize = CuSize/2 */ 2329 ihevce_mode_eval_filtering( 2330 ps_cu_node->ps_parent, 2331 ps_cu_node, 2332 ps_ctxt, 2333 ps_curr_src, 2334 26, 2335 &ps_cu_node->ps_parent->au4_best_cost_4tu[0], 2336 &ps_cu_node->ps_parent->au1_best_mode_4tu[0], 2337 step2_bypass, 2338 0); 2339 } 2340 else 2341 { 2342 /* 4TU not evaluated : 4tu modes set same as 1tu modes */ 2343 memcpy( 2344 &ps_cu_node->ps_parent->au1_best_mode_4tu[0], 2345 &ps_cu_node->ps_parent->au1_best_mode_1tu[0], 2346 NUM_BEST_MODES); 2347 2348 /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */ 2349 memcpy( 2350 &ps_cu_node->ps_parent->au4_best_cost_4tu[0], 2351 &ps_cu_node->ps_parent->au4_best_cost_1tu[0], 2352 NUM_BEST_MODES * sizeof(WORD32)); 2353 } 2354 2355 /* Update parent cost */ 2356 parent_cost = 2357 MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0], 2358 ps_cu_node->ps_parent->au4_best_cost_1tu[0]); 2359 2360 /* Select the best mode to be populated as top and left nbr depending on the 2361 4tu and 1tu cost */ 2362 if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] > 2363 ps_cu_node->ps_parent->au4_best_cost_1tu[0]) 2364 { 2365 ps_cu_node->ps_parent->best_mode = 2366 ps_cu_node->ps_parent->au1_best_mode_1tu[0]; 2367 } 2368 else 2369 { 2370 ps_cu_node->ps_parent->best_mode = 2371 ps_cu_node->ps_parent->au1_best_mode_4tu[0]; 2372 } 2373 } 2374 2375 /* set the CU valid flag */ 2376 ps_intra8_analyse->b1_valid_cu = 1; 2377 ps_intra8_analyse->b1_enable_nxn = 0; 2378 2379 /* storing the modes to intra 8 analyse */ 2380 2381 /* store the best 8x8 modes 8x8 tu */ 2382 memcpy( 2383 &ps_intra8_analyse->au1_best_modes_8x8_tu[0], 2384 &ps_cu_node->ps_parent->au1_best_mode_1tu[0], 2385 sizeof(UWORD8) * (NUM_BEST_MODES)); 2386 ps_intra8_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255; 2387 2388 /* store the best 8x8 modes 4x4 tu */ 2389 memcpy( 2390 &ps_intra8_analyse->au1_best_modes_4x4_tu[0], 2391 &ps_cu_node->ps_parent->au1_best_mode_4tu[0], 2392 sizeof(UWORD8) * (NUM_BEST_MODES)); 2393 ps_intra8_analyse->au1_best_modes_4x4_tu[NUM_BEST_MODES] = 255; 2394 2395 /*As 8*8 has won, pick L1 4x4 qp which is equal to 2396 L1 8x8 Qp*/ 2397 //ps_row_cu->u1_cu_possible_qp[0] = u1_cu_possible_qp; 2398 //ps_row_cu->i4_act_factor[0][1] = i4_act_factor; 2399 2400 parent_best_mode = ps_cu_node->ps_parent->best_mode; 2401 if(parent_cost <= 2402 child_cost_least + 2403 (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >> LAMBDA_Q_SHIFT)) 2404 { 2405 /*CU = 4TU */ 2406 ihevce_update_cand_list( 2407 ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); 2408 2409 /* store the child 8x8 costs */ 2410 pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] = 2411 parent_cost; 2412 2413 /* cost accumalation of best cu size candiate */ 2414 i8_frame_acc_satd_cost += parent_cost; 2415 2416 /*satd/mod_qp accumulation of best cu */ 2417 i8_frame_acc_satd_by_modqp_q10 += 2418 ((LWORD64)ps_cu_node->ps_parent->best_satd 2419 << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / 2420 i4_q_scale_q3_mod; 2421 2422 /* Accumalate mode bits for all child blocks */ 2423 i8_frame_acc_mode_bits_cost += 2424 ps_cu_node->ps_parent->u2_mode_bits_cost; 2425 2426 /* satd and mpm bits accumalation of best cu size candiate */ 2427 i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd; 2428 2429 /* accumulate the 16x16 cost*/ 2430 if(MAX_INTRA_COST_IPE == *pi4_intra_16_cost) 2431 { 2432 *pi4_intra_16_cost = parent_cost; 2433 } 2434 else 2435 { 2436 *pi4_intra_16_cost += parent_cost; 2437 } 2438 2439 /* accumulate the 32x32 cost*/ 2440 if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost) 2441 { 2442 *pi4_intra_32_cost = parent_cost; 2443 } 2444 else 2445 { 2446 *pi4_intra_32_cost += parent_cost; 2447 } 2448 } 2449 else 2450 { 2451 /*CU = 4PU*/ 2452 //ps_row_cu->b3_cu_pos_x = (UWORD8) ps_cu_node->ps_parent->u2_x0; 2453 //ps_row_cu->b3_cu_pos_y = (UWORD8) ps_cu_node->ps_parent->u2_y0; 2454 //ps_row_cu->u1_cu_size = ps_cu_node->ps_parent->u1_cu_size; 2455 2456 /* store the child 8x8 costs woth 4x4 pu summed cost */ 2457 pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] = 2458 (child_cost_least); 2459 2460 /* accumulate the 16x16 cost*/ 2461 if(MAX_INTRA_COST_IPE == *pi4_intra_16_cost) 2462 { 2463 *pi4_intra_16_cost = child_cost_least; 2464 } 2465 else 2466 { 2467 *pi4_intra_16_cost += child_cost_least; 2468 } 2469 2470 /* cost accumalation of best cu size candiate */ 2471 i8_frame_acc_satd_cost += child_cost_least; 2472 2473 for(j = 0; j < 4; j++) 2474 { 2475 /*satd/qp accumualtion*/ 2476 i8_frame_acc_satd_by_modqp_q10 += 2477 ((LWORD64)child_satd[j] 2478 << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / 2479 i4_q_scale_q3_mod; 2480 2481 /* Accumalate mode bits for all child blocks */ 2482 i8_frame_acc_mode_bits_cost += 2483 ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost; 2484 2485 /* satd and mpm bits accumalation of best cu size candiate */ 2486 i4_ctb_acc_satd += child_satd[j]; 2487 } 2488 2489 /* accumulate the 32x32 cost*/ 2490 if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost) 2491 { 2492 *pi4_intra_32_cost = child_cost_least; 2493 } 2494 else 2495 { 2496 *pi4_intra_32_cost += child_cost_least; 2497 } 2498 2499 ps_intra8_analyse->b1_enable_nxn = 1; 2500 2501 /* Insert the best 8x8 modes unconditionally */ 2502 2503 x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1; 2504 y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1; 2505 size = ps_cu_node->u1_cu_size >> 2; 2506 2507 ps_ctxt->au1_ctb_mode_map[y][x] = 2508 ps_cu_node->ps_sub_cu[0]->best_mode; 2509 ps_ctxt->au1_ctb_mode_map[y][x + 1] = 2510 ps_cu_node->ps_sub_cu[1]->best_mode; 2511 ps_ctxt->au1_ctb_mode_map[y + 1][x] = 2512 ps_cu_node->ps_sub_cu[2]->best_mode; 2513 ps_ctxt->au1_ctb_mode_map[y + 1][x + 1] = 2514 ps_cu_node->ps_sub_cu[3]->best_mode; 2515 } 2516 /* NXN mode population */ 2517 for(j = 0; j < 4; j++) 2518 { 2519 cand_mode_list[0] = 2520 ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0]; 2521 cand_mode_list[1] = 2522 ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[1]; 2523 cand_mode_list[2] = 2524 ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[2]; 2525 2526 if(1) 2527 { 2528 /* Populate best 3 nxn modes */ 2529 ps_intra8_analyse->au1_4x4_best_modes[j][0] = 2530 cand_mode_list[0]; 2531 ps_intra8_analyse->au1_4x4_best_modes[j][1] = 2532 cand_mode_list[1]; //(ps_ed + 1)->best_mode; 2533 ps_intra8_analyse->au1_4x4_best_modes[j][2] = 2534 cand_mode_list[2]; //(ps_ed + 2)->best_mode; 2535 ps_intra8_analyse->au1_4x4_best_modes[j][3] = 255; 2536 2537 //memcpy(ps_intra8_analyse->au1_4x4_best_modes[j], ps_row_cu->s_cu_intra_cand.au1_intra_luma_modes_nxn[j], 4); 2538 } 2539 /* For HQ, all 35 modes to be used for RDOPT, removed from here for memory clean-up */ 2540 2541 else /* IHEVCE_QUALITY_P0 == i4_quality_preset */ 2542 { 2543 /* To indicate to enc loop that NXN is enabled in HIGH QUALITY fior CU 8x8*/ 2544 ps_intra8_analyse->au1_4x4_best_modes[j][0] = 0; 2545 } 2546 2547 ps_intra8_analyse 2548 ->au1_4x4_best_modes[j][MAX_INTRA_CU_CANDIDATES] = 255; 2549 } 2550 2551 //ps_row_cu++; 2552 } 2553 else 2554 { 2555 /* For Incomplete CTB, 16x16 is not valid */ 2556 ps_intra16_analyse->b1_valid_cu = 0; 2557 } 2558 blk_cnt++; 2559 ps_ed_blk_l1++; 2560 } 2561 //ps_ed_blk_l2 ++; 2562 } //else of EIID 2563 #endif 2564 } 2565 } 2566 else 2567 { 2568 /* For incomplete CTB, init valid CU to 0 */ 2569 ps_ed_blk_l1++; 2570 ps_intra32_analyse->b1_valid_cu = 0; 2571 ps_intra16_analyse[0].b1_valid_cu = 0; 2572 blk_cnt++; 2573 merge_64x64 = 0; 2574 } 2575 } while(blk_cnt != MAX_CTB_SIZE); 2576 /* if 64x64 merge is possible then check for 32x32 having same best modes */ 2577 if(1 == merge_64x64) 2578 { 2579 WORD32 act_mode = au1_best_32x32_modes[0]; 2580 2581 ps_ed_blk_l2 = ps_ed_l2_ctb; 2582 best_mode = ps_ed_blk_l2->best_mode; 2583 merge_64x64 = 2584 ((act_mode == au1_best_32x32_modes[0]) + (act_mode == au1_best_32x32_modes[1]) + 2585 (act_mode == au1_best_32x32_modes[2]) + 2586 (act_mode == au1_best_32x32_modes[3]) == 2587 4); 2588 if(merge_64x64 == 1) 2589 best_mode = au1_best_32x32_modes[0]; 2590 else 2591 best_mode = ps_ed_blk_l2->best_mode; 2592 /* All 32x32 costs are accumalated to 64x64 cost */ 2593 ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = 0; 2594 for(i = 0; i < 4; i++) 2595 { 2596 ps_l0_ipe_out_ctb->i4_best64x64_intra_cost += 2597 ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i]; 2598 } 2599 2600 /* If all modes of 32x32 block is not same */ 2601 if(0 == merge_64x64) 2602 { 2603 /*Compute CHILD cost for 32x32 */ 2604 WORD32 child_cost_64x64 = au4_best_32x32_cost[0] + au4_best_32x32_cost[1] + 2605 au4_best_32x32_cost[2] + au4_best_32x32_cost[3]; 2606 WORD32 cost = MAX_INTRA_COST_IPE; 2607 2608 WORD32 best_mode_temp = 0; 2609 /*Compute 64x64 cost for each mode of 32x32*/ 2610 for(i = 0; i < 4; i++) 2611 { 2612 WORD32 mode = au1_best_32x32_modes[i]; 2613 if(mode < 2) 2614 mode = 26; 2615 ps_cu_node->ps_parent->u1_cu_size = 64; 2616 ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[0]; /* Populate properly */ 2617 ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[0]; /* Populate properly */ 2618 2619 ihevce_set_nbr_map( 2620 ps_ctxt->pu1_ctb_nbr_map, 2621 ps_ctxt->i4_nbr_map_strd, 2622 (ps_cu_node->ps_parent->u2_x0 << 1), 2623 (ps_cu_node->ps_parent->u2_y0 << 1), 2624 (ps_cu_node->ps_parent->u1_cu_size >> 2), 2625 0); 2626 2627 ihevce_mode_eval_filtering( 2628 ps_cu_node->ps_parent, 2629 ps_cu_node, 2630 ps_ctxt, 2631 ps_curr_src, 2632 mode, 2633 &ps_cu_node->ps_parent->au4_best_cost_1tu[0], 2634 &ps_cu_node->ps_parent->au1_best_mode_1tu[0], 2635 !step2_bypass, 2636 0); 2637 2638 parent_cost = ps_cu_node->ps_parent->best_cost; 2639 if(cost > parent_cost) 2640 { 2641 cost = parent_cost; 2642 best_mode_temp = ps_cu_node->ps_parent->best_mode; 2643 } 2644 } 2645 if(cost < child_cost_64x64) 2646 { 2647 merge_64x64 = 1; 2648 best_mode = best_mode_temp; 2649 2650 /* Update 64x64 cost if CU 64x64 is chosen */ 2651 ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = cost; 2652 2653 /* Accumalate the least cost for CU 64x64 */ 2654 i8_frame_acc_satd_cost = cost; 2655 i8_frame_acc_mode_bits_cost = ps_cu_node->ps_parent->u2_mode_bits_cost; 2656 2657 /* satd and mpm bits accumalation of best cu size candiate */ 2658 i4_ctb_acc_satd = ps_cu_node->ps_parent->best_satd; 2659 } 2660 } 2661 } 2662 2663 if(merge_64x64) 2664 { 2665 WORD32 i, j; 2666 intra32_analyse_t *ps_intra32_analyse; 2667 intra16_analyse_t *ps_intra16_analyse; 2668 WORD32 row, col; 2669 WORD32 i4_q_scale_q3_mod; 2670 WORD8 i1_cu_possible_qp; 2671 WORD32 i4_act_factor; 2672 //ps_row_cu = ps_curr_cu; 2673 ps_ctb_out->u4_cu_split_flags = 0x0; 2674 ps_ed_blk_l1 = ps_ed_l1_ctb; 2675 ps_ed_blk_l2 = ps_ed_l2_ctb; 2676 2677 ps_l0_ipe_out_ctb->u1_split_flag = 0; 2678 2679 /* If CU size of 64x64 is chosen, disbale all the 16x16 flag*/ 2680 for(i = 0; i < 4; i++) 2681 { 2682 /* get the corresponding intra 32 analyse pointer use (blk_cnt / 16) */ 2683 /* blk cnt is in terms of 8x8 units so a 32x32 will have 16 8x8 units */ 2684 ps_intra32_analyse = &ps_l0_ipe_out_ctb->as_intra32_analyse[i]; 2685 2686 for(j = 0; j < 4; j++) 2687 { 2688 /* get the corresponding intra 16 analyse pointer use (blk_cnt & 0xF / 4)*/ 2689 /* blk cnt is in terms of 8x8 units so a 16x16 will have 4 8x8 units */ 2690 ps_intra16_analyse = &ps_intra32_analyse->as_intra16_analyse[j]; 2691 ps_intra16_analyse->b1_merge_flag = 0; 2692 } 2693 } 2694 2695 /* CU size 64x64 and fill the final cu params */ 2696 //ps_row_cu->b3_cu_pos_x = gau1_cu_pos_x[0]; 2697 //ps_row_cu->b3_cu_pos_y = gau1_cu_pos_y[0]; 2698 //ps_row_cu->u1_cu_size = 64; 2699 2700 /* Candidate mode Update */ 2701 cand_mode_list[0] = best_mode; 2702 if(cand_mode_list[0] > 1) 2703 { 2704 if(cand_mode_list[0] == 2) 2705 { 2706 cand_mode_list[1] = 34; 2707 cand_mode_list[2] = 3; 2708 } 2709 else if(cand_mode_list[0] == 34) 2710 { 2711 cand_mode_list[1] = 2; 2712 cand_mode_list[2] = 33; 2713 } 2714 else 2715 { 2716 cand_mode_list[1] = cand_mode_list[0] - 1; 2717 cand_mode_list[2] = cand_mode_list[0] + 1; 2718 } 2719 //cand_mode_list[1] = ps_ed_blk_l1->nang_attr.best_mode; 2720 //cand_mode_list[2] = ps_ed_blk_l1->ang_attr.best_mode; 2721 } 2722 else 2723 { 2724 cand_mode_list[0] = 0; 2725 cand_mode_list[1] = 1; 2726 cand_mode_list[2] = 26; 2727 //cand_mode_list[2] = ps_ed_blk_l1->nang_attr.best_mode; 2728 } 2729 2730 /* All 32x32 costs are accumalated to 64x64 cost */ 2731 ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = 0; 2732 for(i = 0; i < 4; i++) 2733 { 2734 ps_l0_ipe_out_ctb->i4_best64x64_intra_cost += 2735 ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i]; 2736 } 2737 /* by default 64x64 modes are set to default values DC and Planar */ 2738 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[0] = cand_mode_list[0]; 2739 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[1] = cand_mode_list[1]; 2740 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[2] = cand_mode_list[2]; 2741 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[3] = 255; 2742 2743 /* Update CTB mode map for the finalised CU */ 2744 x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1; 2745 y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1; 2746 size = ps_cu_node->u1_cu_size >> 2; 2747 2748 for(row = y; row < (y + size); row++) 2749 { 2750 for(col = x; col < (x + size); col++) 2751 { 2752 ps_ctxt->au1_ctb_mode_map[row][col] = best_mode; 2753 } 2754 } 2755 2756 ihevce_set_nbr_map( 2757 ps_ctxt->pu1_ctb_nbr_map, 2758 ps_ctxt->i4_nbr_map_strd, 2759 (ps_cu_node->u2_x0 << 1), 2760 (ps_cu_node->u2_y0 << 1), 2761 (ps_cu_node->u1_cu_size >> 2), 2762 1); 2763 2764 /*As 64*64 has won, pick L1 32x32 qp*/ 2765 //ASSERT(((blk_cnt>>6) & 0xF) == (blk_cnt>>6)); 2766 //ASSERT((blk_cnt>>6) == 0); 2767 ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][0] != -2); 2768 i1_cu_possible_qp = ihevce_cu_level_qp_mod( 2769 ps_ctxt->i4_qscale, 2770 ps_ed_ctb_l1->i4_32x32_satd[0][0], 2771 ps_ctxt->ld_curr_frame_32x32_log_avg[0], 2772 f_strength, 2773 &i4_act_factor, 2774 &i4_q_scale_q3_mod, 2775 ps_ctxt->ps_rc_quant_ctxt); 2776 2777 i8_frame_acc_satd_by_modqp_q10 = 2778 (i8_frame_acc_satd_cost << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / 2779 i4_q_scale_q3_mod; 2780 /* Increment pointers */ 2781 ps_ed_blk_l1 += 64; 2782 ps_ed_blk_l2 += 16; 2783 //ps_row_cu++; 2784 } 2785 } 2786 2787 //ps_ctb_out->u1_num_cus_in_ctb = (UWORD8)(ps_row_cu - ps_curr_cu); 2788 2789 { 2790 WORD32 i4_i, i4_j; 2791 WORD32 dummy; 2792 WORD8 i1_cu_qp; 2793 (void)i1_cu_qp; 2794 /*MAM_VAR_L1*/ 2795 for(i4_j = 0; i4_j < 2; i4_j++) 2796 { 2797 i4_mod_factor_num = ps_ctxt->ai4_mod_factor_derived_by_variance[i4_j]; 2798 f_strength = ps_ctxt->f_strength; 2799 2800 //i4_mod_factor_num = 4; 2801 2802 ps_ed_blk_l1 = ps_ed_l1_ctb; 2803 ps_ed_blk_l2 = ps_ed_l2_ctb; 2804 //ps_row_cu = ps_curr_cu; 2805 2806 /*Valid only for complete CTB */ 2807 if((64 == u1_curr_ctb_wdt) && (64 == u1_curr_ctb_hgt)) 2808 { 2809 ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][0] != -2); 2810 ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][1] != -2); 2811 ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][2] != -2); 2812 ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][3] != -2); 2813 2814 i1_cu_qp = ihevce_cu_level_qp_mod( 2815 ps_ctxt->i4_qscale, 2816 ps_ed_ctb_l1->i4_32x32_satd[0][0], 2817 ps_ctxt->ld_curr_frame_32x32_log_avg[0], 2818 f_strength, 2819 &ps_l0_ipe_out_ctb->i4_64x64_act_factor[0][i4_j], 2820 &dummy, 2821 ps_ctxt->ps_rc_quant_ctxt); 2822 2823 i1_cu_qp = ihevce_cu_level_qp_mod( 2824 ps_ctxt->i4_qscale, 2825 ps_ed_ctb_l1->i4_32x32_satd[0][1], 2826 ps_ctxt->ld_curr_frame_32x32_log_avg[1], 2827 f_strength, 2828 &ps_l0_ipe_out_ctb->i4_64x64_act_factor[1][i4_j], 2829 &dummy, 2830 ps_ctxt->ps_rc_quant_ctxt); 2831 i1_cu_qp = ihevce_cu_level_qp_mod( 2832 ps_ctxt->i4_qscale, 2833 ps_ed_ctb_l1->i4_32x32_satd[0][2], 2834 ps_ctxt->ld_curr_frame_32x32_log_avg[2], 2835 f_strength, 2836 &ps_l0_ipe_out_ctb->i4_64x64_act_factor[2][i4_j], 2837 &dummy, 2838 ps_ctxt->ps_rc_quant_ctxt); 2839 2840 i1_cu_qp = ihevce_cu_level_qp_mod( 2841 ps_ctxt->i4_qscale, 2842 ps_ed_ctb_l1->i4_32x32_satd[0][3], 2843 2.0 + ps_ctxt->ld_curr_frame_16x16_log_avg[0], 2844 f_strength, 2845 &ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j], 2846 &dummy, 2847 ps_ctxt->ps_rc_quant_ctxt); 2848 2849 ASSERT(ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j] > 0); 2850 } 2851 else 2852 { 2853 ps_l0_ipe_out_ctb->i4_64x64_act_factor[0][i4_j] = 1024; 2854 ps_l0_ipe_out_ctb->i4_64x64_act_factor[1][i4_j] = 1024; 2855 ps_l0_ipe_out_ctb->i4_64x64_act_factor[2][i4_j] = 1024; 2856 ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j] = 1024; 2857 } 2858 2859 /*Store the 8x8 Qps from L2 (in raster order) as output of intra prediction 2860 for the usage by ME*/ 2861 2862 { 2863 WORD32 pos_x_32, pos_y_32, pos; 2864 //WORD32 i4_incomplete_ctb_val_8; 2865 pos_x_32 = u1_curr_ctb_wdt / 16; 2866 pos_y_32 = u1_curr_ctb_hgt / 16; 2867 2868 pos = (pos_x_32 < pos_y_32) ? pos_x_32 : pos_y_32; 2869 2870 for(i4_i = 0; i4_i < 4; i4_i++) 2871 { 2872 if(i4_i < pos) 2873 { 2874 ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][0] != -2); 2875 ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][1] != -2); 2876 ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][2] != -2); 2877 i1_cu_qp = ihevce_cu_level_qp_mod( 2878 ps_ctxt->i4_qscale, 2879 ps_ed_ctb_l1->i4_16x16_satd[i4_i][0], 2880 ps_ctxt->ld_curr_frame_16x16_log_avg[0], 2881 f_strength, 2882 &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][0][i4_j], 2883 &dummy, 2884 ps_ctxt->ps_rc_quant_ctxt); 2885 i1_cu_qp = ihevce_cu_level_qp_mod( 2886 ps_ctxt->i4_qscale, 2887 ps_ed_ctb_l1->i4_16x16_satd[i4_i][1], 2888 ps_ctxt->ld_curr_frame_16x16_log_avg[1], 2889 f_strength, 2890 &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][1][i4_j], 2891 &dummy, 2892 ps_ctxt->ps_rc_quant_ctxt); 2893 i1_cu_qp = ihevce_cu_level_qp_mod( 2894 ps_ctxt->i4_qscale, 2895 ps_ed_ctb_l1->i4_16x16_satd[i4_i][2], 2896 ps_ctxt->ld_curr_frame_16x16_log_avg[2], 2897 f_strength, 2898 &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][2][i4_j], 2899 &dummy, 2900 ps_ctxt->ps_rc_quant_ctxt); 2901 } 2902 else 2903 { 2904 /*For incomplete CTB */ 2905 ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][0][i4_j] = 1024; 2906 ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][1][i4_j] = 1024; 2907 ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][2][i4_j] = 1024; 2908 } 2909 } 2910 } 2911 2912 /*Store the 8x8 Qps from L1 (in raster order) as output of intra prediction 2913 for the usage by ME*/ 2914 { 2915 WORD32 pos_x_16, pos_y_16, pos; 2916 //WORD32 i4_incomplete_ctb_val_8; 2917 pos_x_16 = u1_curr_ctb_wdt / 4; 2918 pos_y_16 = u1_curr_ctb_hgt / 4; 2919 2920 pos = (pos_x_16 < pos_y_16) ? pos_x_16 : pos_y_16; 2921 for(i4_i = 0; i4_i < 16; i4_i++) 2922 { 2923 if(i4_i < pos) 2924 { 2925 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[i4_i][0] != -2); 2926 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[i4_i][1] != -2); 2927 i1_cu_qp = ihevce_cu_level_qp_mod( 2928 ps_ctxt->i4_qscale, 2929 ps_ed_ctb_l1->i4_8x8_satd[i4_i][0], 2930 ps_ctxt->ld_curr_frame_8x8_log_avg[0], 2931 f_strength, 2932 &ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][0][i4_j], 2933 &dummy, 2934 ps_ctxt->ps_rc_quant_ctxt); 2935 i1_cu_qp = ihevce_cu_level_qp_mod( 2936 ps_ctxt->i4_qscale, 2937 ps_ed_ctb_l1->i4_8x8_satd[i4_i][1], 2938 ps_ctxt->ld_curr_frame_8x8_log_avg[1], 2939 f_strength, 2940 &ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][1][i4_j], 2941 &dummy, 2942 ps_ctxt->ps_rc_quant_ctxt); 2943 } 2944 else 2945 { 2946 /*For incomplete CTB */ 2947 ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][0][i4_j] = 1024; 2948 ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][1][i4_j] = 1024; 2949 } 2950 } 2951 } 2952 } //for loop 2953 2954 /* Accumalate the cost of ctb to the total cost */ 2955 ps_ctxt->i8_frame_acc_satd_cost += i8_frame_acc_satd_cost; 2956 ps_ctxt->i8_frame_acc_satd_by_modqp_q10 += i8_frame_acc_satd_by_modqp_q10; 2957 2958 ps_ctxt->i8_frame_acc_mode_bits_cost += i8_frame_acc_mode_bits_cost; 2959 2960 /* satd and mpm bits accumalation of best cu size candiate for the ctb */ 2961 ps_l0_ipe_out_ctb->i4_ctb_acc_satd = i4_ctb_acc_satd; 2962 ps_l0_ipe_out_ctb->i4_ctb_acc_mpm_bits = i8_frame_acc_mode_bits_cost; 2963 2964 ps_ctxt->i8_frame_acc_satd += i4_ctb_acc_satd; 2965 } 2966 2967 { 2968 WORD32 ctr_8x8; 2969 for(ctr_8x8 = 0; ctr_8x8 < (MAX_CU_IN_CTB >> 2); ctr_8x8++) 2970 { 2971 /*Accumalate activity factor for Intra and Inter*/ 2972 if(ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_ipe[ctr_8x8] < 2973 ps_ed_ctb_l1->i4_sad_me_for_ref[ctr_8x8]) 2974 { 2975 ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8] = 2976 ps_l0_ipe_out_ctb->i4_16x16_act_factor[ctr_8x8][1][0]; 2977 } 2978 else 2979 { 2980 ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8] = 2981 ps_l0_ipe_out_ctb->i4_16x16_act_factor[ctr_8x8][1][0]; 2982 } 2983 2984 /*Accumalate activity factor at frame level*/ 2985 ps_ctxt->i8_frame_acc_act_factor += ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8]; 2986 } 2987 } 2988 return; 2989 } 2990 2991 WORD32 ihevce_nxn_sad_computer( 2992 UWORD8 *pu1_inp, WORD32 i4_inp_stride, UWORD8 *pu1_ref, WORD32 i4_ref_stride, WORD32 trans_size) 2993 { 2994 WORD32 wd, ht, i, j; 2995 WORD32 sad = 0; 2996 2997 wd = trans_size; 2998 ht = trans_size; 2999 3000 for(i = 0; i < ht; i++) 3001 { 3002 for(j = 0; j < wd; j++) 3003 { 3004 sad += (ABS(((WORD32)pu1_inp[j] - (WORD32)pu1_ref[j]))); 3005 } 3006 pu1_inp += i4_inp_stride; 3007 pu1_ref += i4_ref_stride; 3008 } 3009 3010 return sad; 3011 } 3012 3013 /*! 3014 ****************************************************************************** 3015 * \if Function name : ihevce_mode_eval_filtering \endif 3016 * 3017 * \brief 3018 * Evaluates best 3 modes for the given CU size with probable modes from, 3019 * early decision structure, mpm candidates and dc, planar mode 3020 * 3021 * \param[in] ps_cu_node : pointer to MAX cu node info buffer 3022 * \param[in] ps_child_cu_node : pointer to (MAX - 1) cu node info buffer 3023 * \param[in] ps_ctxt : pointer to IPE context struct 3024 * \param[in] ps_curr_src : pointer to src pixels struct 3025 * \param[in] best_amode : best angular mode from l1 layer or 3026 from (MAX - 1) CU mode 3027 * \param[in] best_costs_4x4 : pointer to 3 best cost buffer 3028 * \param[in] best_modes_4x4 : pointer to 3 best mode buffer 3029 * \param[in] step2_bypass : if 0, (MAX - 1) CU is evaluated 3030 * if 1, (MAX CU) sugested is evaluated 3031 * \param[in] tu_eq_cu : indicates if tu size is same as cu or cu/2 3032 * 3033 * \return 3034 * None 3035 * 3036 * \author 3037 * Ittiam 3038 * 3039 ***************************************************************************** 3040 */ 3041 void ihevce_mode_eval_filtering( 3042 ihevce_ipe_cu_tree_t *ps_cu_node, 3043 ihevce_ipe_cu_tree_t *ps_child_cu_node, 3044 ihevce_ipe_ctxt_t *ps_ctxt, 3045 iv_enc_yuv_buf_t *ps_curr_src, 3046 WORD32 best_amode, 3047 WORD32 *best_costs_4x4, 3048 UWORD8 *best_modes_4x4, 3049 WORD32 step2_bypass, 3050 WORD32 tu_eq_cu) 3051 { 3052 UWORD8 *pu1_origin, *pu1_orig; 3053 WORD32 src_strd = ps_curr_src->i4_y_strd; 3054 WORD32 nbr_flags; 3055 nbr_avail_flags_t s_nbr; 3056 WORD32 trans_size = tu_eq_cu ? ps_cu_node->u1_cu_size : ps_cu_node->u1_cu_size >> 1; 3057 WORD32 num_tu_in_x = tu_eq_cu ? 1 : 2; 3058 WORD32 num_tu_in_y = tu_eq_cu ? 1 : 2; 3059 UWORD8 mode; 3060 3061 WORD32 cost_ang_mode = MAX_INTRA_COST_IPE; 3062 WORD32 filter_flag; 3063 WORD32 cost_amode_step2[7] = { 0 }; 3064 /*WORD32 best_sad[5]; // NOTE_A01: Not getting consumed at present */ 3065 WORD32 sad = 0; 3066 WORD32 cu_pos_x, cu_pos_y; 3067 WORD32 temp; 3068 WORD32 i = 0, j, k, i_end, z; 3069 //WORD32 row, col, size; 3070 UWORD8 *pu1_ref; 3071 WORD32 xA, yA, xB, yB; 3072 WORD32 top_intra_mode; 3073 WORD32 left_intra_mode; 3074 UWORD8 *pu1_ref_orig = &ps_ctxt->au1_ref_samples[0]; 3075 UWORD8 *pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0]; 3076 3077 UWORD8 modes_4x4[5] = { 0, 1, 2, 3, 4 }; 3078 WORD32 count; 3079 3080 pf_ipe_res_trans_had apf_resd_trns_had[4]; 3081 3082 WORD32 cand_mode_satd_list[3]; 3083 ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr; 3084 3085 ihevc_intra_pred_luma_ref_substitution_fptr = 3086 ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr; 3087 3088 apf_resd_trns_had[0] = ps_ctxt->s_cmn_opt_func.pf_HAD_4x4_8bit; 3089 apf_resd_trns_had[1] = ps_ctxt->s_cmn_opt_func.pf_HAD_8x8_8bit; 3090 apf_resd_trns_had[2] = ps_ctxt->s_cmn_opt_func.pf_HAD_16x16_8bit; 3091 apf_resd_trns_had[3] = ps_ctxt->s_cmn_opt_func.pf_HAD_32x32_8bit; 3092 3093 /* initialize modes_to_eval as zero */ 3094 memset(&ps_ctxt->au1_modes_to_eval, 0, MAX_NUM_IP_MODES); 3095 3096 /* Compute the Parent Cost */ 3097 3098 /* Pointer to top-left of the CU - y0,x0 in 8x8 granularity */ 3099 pu1_orig = (UWORD8 *)(ps_curr_src->pv_y_buf) + ((ps_cu_node->u2_y0 << 3) * src_strd) + 3100 (ps_cu_node->u2_x0 << 3); 3101 3102 /* Get position of CU within CTB at 4x4 granularity */ 3103 cu_pos_x = ps_cu_node->u2_x0 << 1; 3104 cu_pos_y = ps_cu_node->u2_y0 << 1; 3105 3106 /* get the neighbour availability flags */ 3107 ihevce_get_only_nbr_flag( 3108 &s_nbr, 3109 ps_ctxt->pu1_ctb_nbr_map, 3110 ps_ctxt->i4_nbr_map_strd, 3111 cu_pos_x, 3112 cu_pos_y, 3113 trans_size >> 2, 3114 trans_size >> 2); 3115 3116 /* Traverse for all 4 child blocks in the parent block */ 3117 xA = (ps_cu_node->u2_x0 << 3) >> 2; 3118 yA = ((ps_cu_node->u2_y0 << 3) >> 2) + 1; 3119 xB = xA + 1; 3120 yB = yA - 1; 3121 left_intra_mode = ps_ctxt->au1_ctb_mode_map[yA][xA]; 3122 top_intra_mode = ps_ctxt->au1_ctb_mode_map[yB][xB]; 3123 /* call the function which populates sad cost for all the modes */ 3124 3125 ihevce_intra_populate_mode_bits_cost_bracketing( 3126 top_intra_mode, 3127 left_intra_mode, 3128 s_nbr.u1_top_avail, 3129 s_nbr.u1_left_avail, 3130 ps_cu_node->u2_y0, 3131 &ps_ctxt->au2_mode_bits_satd_cost[0], 3132 &ps_ctxt->au2_mode_bits_satd[0], 3133 ps_ctxt->i4_ol_satd_lambda, 3134 cand_mode_satd_list); 3135 3136 for(k = 0; k < num_tu_in_y; k++) 3137 { 3138 for(j = 0; j < num_tu_in_x; j++) 3139 { 3140 /* get the neighbour availability flags */ 3141 nbr_flags = ihevce_get_nbr_intra( 3142 &s_nbr, 3143 ps_ctxt->pu1_ctb_nbr_map, 3144 ps_ctxt->i4_nbr_map_strd, 3145 cu_pos_x + ((j) * (trans_size >> 2)), 3146 cu_pos_y + ((k) * (trans_size >> 2)), 3147 trans_size >> 2); 3148 3149 pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size); 3150 3151 /* Create reference samples array */ 3152 ihevc_intra_pred_luma_ref_substitution_fptr( 3153 pu1_origin - src_strd - 1, 3154 pu1_origin - src_strd, 3155 pu1_origin - 1, 3156 src_strd, 3157 trans_size, 3158 nbr_flags, 3159 pu1_ref_orig, 3160 0); 3161 3162 /* Perform reference samples filtering */ 3163 ihevce_intra_pred_ref_filtering(pu1_ref_orig, trans_size, pu1_ref_filt); 3164 3165 ihevce_set_nbr_map( 3166 ps_ctxt->pu1_ctb_nbr_map, 3167 ps_ctxt->i4_nbr_map_strd, 3168 cu_pos_x + ((j) * (trans_size >> 2)), 3169 cu_pos_y + ((k) * (trans_size >> 2)), 3170 (trans_size >> 2), 3171 1); 3172 3173 pu1_ref_orig += (4 * MAX_CTB_SIZE + 1); 3174 pu1_ref_filt += (4 * MAX_CTB_SIZE + 1); 3175 } 3176 } 3177 3178 /* Revaluation for angular mode */ 3179 //if(ps_ed_blk->ang_attr.mode_present == 1) 3180 //if(((best_amode & 0x1) != 1)) 3181 3182 { 3183 WORD32 u1_trans_idx = trans_size >> 3; 3184 if(trans_size == 32) 3185 u1_trans_idx = 3; 3186 //best_amode = ps_ed_blk->ang_attr.best_mode; 3187 3188 i = 0; 3189 if(!step2_bypass) 3190 { 3191 /* Around best level 4 angular mode, search for best level 2 mode */ 3192 ASSERT((best_amode >= 2) && (best_amode <= 34)); 3193 3194 if(ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P3) 3195 { 3196 if(best_amode >= 4) 3197 ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode - 2; 3198 } 3199 3200 ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode; 3201 3202 if(ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P3) 3203 { 3204 if(best_amode <= 32) 3205 ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode + 2; 3206 } 3207 } 3208 else 3209 { 3210 ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[0]->best_mode; 3211 ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[1]->best_mode; 3212 ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[2]->best_mode; 3213 ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[3]->best_mode; 3214 } 3215 3216 /* Add the left and top MPM modes for computation*/ 3217 3218 ps_ctxt->au1_modes_to_eval_temp[i++] = cand_mode_satd_list[0]; 3219 ps_ctxt->au1_modes_to_eval_temp[i++] = cand_mode_satd_list[1]; 3220 3221 i_end = i; 3222 count = 0; 3223 3224 /*Remove duplicate modes from modes_to_eval_temp[] */ 3225 for(j = 0; j < i_end; j++) 3226 { 3227 for(k = 0; k < count; k++) 3228 { 3229 if(ps_ctxt->au1_modes_to_eval_temp[j] == ps_ctxt->au1_modes_to_eval[k]) 3230 break; 3231 } 3232 if((k == count) && (ps_ctxt->au1_modes_to_eval_temp[j] > 1)) 3233 { 3234 ps_ctxt->au1_modes_to_eval[count] = ps_ctxt->au1_modes_to_eval_temp[j]; 3235 count++; 3236 } 3237 } 3238 i_end = count; 3239 if(count == 0) 3240 { 3241 ps_ctxt->au1_modes_to_eval[0] = 26; 3242 i_end = 1; 3243 } 3244 3245 for(i = 0; i < i_end; i++) 3246 { 3247 pu1_ref_orig = &ps_ctxt->au1_ref_samples[0]; 3248 pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0]; 3249 3250 mode = ps_ctxt->au1_modes_to_eval[i]; 3251 ASSERT((mode >= 2) && (mode <= 34)); 3252 cost_amode_step2[i] = ps_ctxt->au2_mode_bits_satd_cost[mode]; 3253 filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(trans_size) - 2)); 3254 3255 for(k = 0; k < num_tu_in_y; k++) 3256 { 3257 for(j = 0; j < num_tu_in_x; j++) 3258 { 3259 pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size); 3260 3261 if(0 == filter_flag) 3262 pu1_ref = pu1_ref_orig; 3263 else 3264 pu1_ref = pu1_ref_filt; 3265 3266 g_apf_lum_ip[g_i4_ip_funcs[mode]]( 3267 pu1_ref, 0, &ps_ctxt->au1_pred_samples[0], trans_size, trans_size, mode); 3268 3269 if(ps_ctxt->u1_use_satd) 3270 { 3271 sad = apf_resd_trns_had[u1_trans_idx]( 3272 pu1_origin, 3273 ps_curr_src->i4_y_strd, 3274 &ps_ctxt->au1_pred_samples[0], 3275 trans_size, 3276 NULL, 3277 0 3278 3279 ); 3280 } 3281 else 3282 { 3283 sad = ps_ctxt->s_ipe_optimised_function_list.pf_nxn_sad_computer( 3284 pu1_origin, 3285 ps_curr_src->i4_y_strd, 3286 &ps_ctxt->au1_pred_samples[0], 3287 trans_size, 3288 trans_size); 3289 } 3290 3291 cost_amode_step2[i] += sad; 3292 3293 pu1_ref_orig += (4 * MAX_CTB_SIZE + 1); 3294 pu1_ref_filt += (4 * MAX_CTB_SIZE + 1); 3295 } 3296 } 3297 } 3298 best_amode = ps_ctxt->au1_modes_to_eval[0]; 3299 /*Init cost indx */ 3300 cost_ang_mode = MAX_INTRA_COST_IPE; //cost_amode_step2[0]; 3301 for(z = 0; z < i_end; z++) 3302 { 3303 /* Least cost of all 3 angles are stored in cost_amode_step2[0] and corr. mode*/ 3304 if(cost_ang_mode >= cost_amode_step2[z]) 3305 { 3306 if(cost_ang_mode == cost_amode_step2[z]) 3307 { 3308 if(best_amode > ps_ctxt->au1_modes_to_eval[z]) 3309 best_amode = ps_ctxt->au1_modes_to_eval[z]; 3310 } 3311 else 3312 { 3313 best_amode = ps_ctxt->au1_modes_to_eval[z]; 3314 } 3315 cost_ang_mode = cost_amode_step2[z]; 3316 } 3317 } 3318 3319 /*Modify mode bits for the angular modes */ 3320 } 3321 3322 { 3323 /* Step - I modification */ 3324 ASSERT((best_amode >= 2) && (best_amode <= 34)); 3325 i_end = 0; 3326 z = 0; 3327 3328 /* Around best level 3 angular mode, search for best level 1 mode */ 3329 ps_ctxt->au1_modes_to_eval[i_end++] = 0; 3330 ps_ctxt->au1_modes_to_eval[i_end++] = 1; 3331 3332 if(best_amode != 2) 3333 ps_ctxt->au1_modes_to_eval[i_end++] = best_amode - 1; 3334 3335 ps_ctxt->au1_modes_to_eval[i_end++] = best_amode; 3336 3337 if(best_amode != 34) 3338 ps_ctxt->au1_modes_to_eval[i_end++] = best_amode + 1; 3339 3340 /* Inserting step_2's best mode at last to avoid 3341 recalculation of it's SATD cost */ 3342 3343 //ps_ctxt->au1_modes_to_eval[i_end] = best_amode; //Bugfix: HSAD compared with SAD 3344 //cost_amode_step2[i_end] = cost_ang_mode; 3345 3346 /*best_sad[i_end] = cost_ang_mode 3347 - mode_bits_satd_cost[best_amode]; //See NOTE_A01 above */ 3348 3349 cost_ang_mode = MAX_INTRA_COST_IPE; /* Init cost */ 3350 3351 for(i = 0; i < i_end; i++) 3352 { 3353 WORD32 u1_trans_idx = trans_size >> 3; 3354 if(trans_size == 32) 3355 u1_trans_idx = 3; 3356 pu1_ref_orig = &ps_ctxt->au1_ref_samples[0]; 3357 pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0]; 3358 3359 /*best_sad[i] = 0; //See NOTE_A01 above */ 3360 mode = ps_ctxt->au1_modes_to_eval[i]; 3361 cost_amode_step2[i] = ps_ctxt->au2_mode_bits_satd_cost[mode]; 3362 filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(trans_size) - 2)); 3363 3364 for(k = 0; k < num_tu_in_y; k++) 3365 { 3366 for(j = 0; j < num_tu_in_x; j++) 3367 { 3368 pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size); 3369 3370 if(0 == filter_flag) 3371 pu1_ref = pu1_ref_orig; 3372 else 3373 pu1_ref = pu1_ref_filt; 3374 3375 g_apf_lum_ip[g_i4_ip_funcs[mode]]( 3376 pu1_ref, 0, &ps_ctxt->au1_pred_samples[0], trans_size, trans_size, mode); 3377 3378 //if(trans_size != 4) 3379 { 3380 sad = apf_resd_trns_had[u1_trans_idx]( 3381 pu1_origin, 3382 ps_curr_src->i4_y_strd, 3383 &ps_ctxt->au1_pred_samples[0], 3384 trans_size, 3385 NULL, 3386 0); 3387 } 3388 3389 /*accumualting SATD though name says it is sad*/ 3390 cost_amode_step2[i] += sad; 3391 /*best_sad[i] +=sad; //See NOTE_A01 above */ 3392 pu1_ref_orig += (4 * MAX_CTB_SIZE + 1); 3393 pu1_ref_filt += (4 * MAX_CTB_SIZE + 1); 3394 } 3395 } 3396 } 3397 /* Updating i_end for the step_2's inserted mode*/ 3398 // i_end++; 3399 3400 /* Arrange the reference array in ascending order */ 3401 3402 for(i = 0; i < (i_end - 1); i++) 3403 { 3404 for(j = i + 1; j < i_end; j++) 3405 { 3406 if(cost_amode_step2[i] > cost_amode_step2[j]) 3407 { 3408 temp = cost_amode_step2[i]; 3409 cost_amode_step2[i] = cost_amode_step2[j]; 3410 cost_amode_step2[j] = temp; 3411 3412 temp = modes_4x4[i]; 3413 modes_4x4[i] = modes_4x4[j]; 3414 modes_4x4[j] = temp; 3415 } 3416 } 3417 } 3418 3419 /* Least cost of all 3 angles are stored in cost_amode_step2[0] and corr. mode*/ 3420 best_amode = ps_ctxt->au1_modes_to_eval[modes_4x4[0]]; 3421 cost_ang_mode = cost_amode_step2[0]; 3422 ps_cu_node->best_satd = cost_ang_mode - ps_ctxt->au2_mode_bits_satd_cost[best_amode]; 3423 ps_cu_node->best_cost = cost_amode_step2[0]; 3424 ps_cu_node->best_mode = ps_ctxt->au1_modes_to_eval[modes_4x4[0]]; 3425 ps_cu_node->best_satd = 3426 ps_cu_node->best_cost - ps_ctxt->au2_mode_bits_satd_cost[ps_cu_node->best_mode]; 3427 3428 /*Accumalate best mode bits cost for RC*/ 3429 ps_cu_node->u2_mode_bits_cost = ps_ctxt->au2_mode_bits_satd[ps_cu_node->best_mode]; 3430 3431 /* Store the best three candidates */ 3432 for(i = 0; i < 3; i++) 3433 { 3434 best_costs_4x4[i] = cost_amode_step2[i]; 3435 best_modes_4x4[i] = ps_ctxt->au1_modes_to_eval[modes_4x4[i]]; 3436 } 3437 } 3438 3439 return; 3440 } 3441