1 /****************************************************************************** 2 * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 /** 21 ****************************************************************************** 22 * @file hme_refine.c 23 * 24 * @brief 25 * Contains the implementation of the refinement layer searches and related 26 * functionality like CU merge. 27 * 28 * @author 29 * Ittiam 30 * 31 * 32 * List of Functions 33 * 34 * 35 ****************************************************************************** 36 */ 37 38 /*****************************************************************************/ 39 /* File Includes */ 40 /*****************************************************************************/ 41 /* System include files */ 42 #include <stdio.h> 43 #include <string.h> 44 #include <stdlib.h> 45 #include <assert.h> 46 #include <stdarg.h> 47 #include <math.h> 48 #include <limits.h> 49 50 /* User include files */ 51 #include "ihevc_typedefs.h" 52 #include "itt_video_api.h" 53 #include "ihevce_api.h" 54 55 #include "rc_cntrl_param.h" 56 #include "rc_frame_info_collector.h" 57 #include "rc_look_ahead_params.h" 58 59 #include "ihevc_defs.h" 60 #include "ihevc_structs.h" 61 #include "ihevc_platform_macros.h" 62 #include "ihevc_deblk.h" 63 #include "ihevc_itrans_recon.h" 64 #include "ihevc_chroma_itrans_recon.h" 65 #include "ihevc_chroma_intra_pred.h" 66 #include "ihevc_intra_pred.h" 67 #include "ihevc_inter_pred.h" 68 #include "ihevc_mem_fns.h" 69 #include "ihevc_padding.h" 70 #include "ihevc_weighted_pred.h" 71 #include "ihevc_sao.h" 72 #include "ihevc_resi_trans.h" 73 #include "ihevc_quant_iquant_ssd.h" 74 #include "ihevc_cabac_tables.h" 75 76 #include "ihevce_defs.h" 77 #include "ihevce_lap_enc_structs.h" 78 #include "ihevce_multi_thrd_structs.h" 79 #include "ihevce_multi_thrd_funcs.h" 80 #include "ihevce_me_common_defs.h" 81 #include "ihevce_had_satd.h" 82 #include "ihevce_error_codes.h" 83 #include "ihevce_bitstream.h" 84 #include "ihevce_cabac.h" 85 #include "ihevce_rdoq_macros.h" 86 #include "ihevce_function_selector.h" 87 #include "ihevce_enc_structs.h" 88 #include "ihevce_entropy_structs.h" 89 #include "ihevce_cmn_utils_instr_set_router.h" 90 #include "ihevce_enc_loop_structs.h" 91 #include "ihevce_bs_compute_ctb.h" 92 #include "ihevce_global_tables.h" 93 #include "ihevce_dep_mngr_interface.h" 94 #include "hme_datatype.h" 95 #include "hme_interface.h" 96 #include "hme_common_defs.h" 97 #include "hme_defs.h" 98 #include "ihevce_me_instr_set_router.h" 99 #include "hme_globals.h" 100 #include "hme_utils.h" 101 #include "hme_coarse.h" 102 #include "hme_fullpel.h" 103 #include "hme_subpel.h" 104 #include "hme_refine.h" 105 #include "hme_err_compute.h" 106 #include "hme_common_utils.h" 107 #include "hme_search_algo.h" 108 #include "ihevce_stasino_helpers.h" 109 #include "ihevce_common_utils.h" 110 111 /*****************************************************************************/ 112 /* Globals */ 113 /*****************************************************************************/ 114 115 /* brief: mapping buffer to convert raster scan indices into z-scan oder in a ctb */ 116 UWORD8 gau1_raster_scan_to_ctb[4][4] = { 117 { 0, 4, 16, 20 }, { 8, 12, 24, 28 }, { 32, 36, 48, 52 }, { 40, 44, 56, 60 } 118 }; 119 120 /*****************************************************************************/ 121 /* Extern Fucntion declaration */ 122 /*****************************************************************************/ 123 extern ctb_boundary_attrs_t * 124 get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt); 125 126 typedef void (*PF_HME_PROJECT_COLOC_CANDT_FXN)( 127 search_node_t *ps_search_node, 128 layer_ctxt_t *ps_curr_layer, 129 layer_ctxt_t *ps_coarse_layer, 130 S32 i4_pos_x, 131 S32 i4_pos_y, 132 S08 i1_ref_id, 133 S32 i4_result_id); 134 135 typedef void (*PF_HME_PROJECT_COLOC_CANDT_L0_ME_FXN)( 136 search_node_t *ps_search_node, 137 layer_ctxt_t *ps_curr_layer, 138 layer_ctxt_t *ps_coarse_layer, 139 S32 i4_pos_x, 140 S32 i4_pos_y, 141 S32 i4_num_act_ref_l0, 142 U08 u1_pred_dir, 143 U08 u1_default_ref_id, 144 S32 i4_result_id); 145 146 /*****************************************************************************/ 147 /* Function Definitions */ 148 /*****************************************************************************/ 149 150 void ihevce_no_wt_copy( 151 coarse_me_ctxt_t *ps_ctxt, 152 layer_ctxt_t *ps_curr_layer, 153 pu_t *ps_pu, 154 UWORD8 *pu1_temp_pred, 155 WORD32 temp_stride, 156 WORD32 blk_x, 157 WORD32 blk_y) 158 { 159 UWORD8 *pu1_ref; 160 WORD32 ref_stride, ref_offset; 161 WORD32 row, col, i4_tmp; 162 163 ASSERT((ps_pu->b2_pred_mode == PRED_L0) || (ps_pu->b2_pred_mode == PRED_L1)); 164 165 if(ps_pu->b2_pred_mode == PRED_L0) 166 { 167 WORD8 i1_ref_idx; 168 169 i1_ref_idx = ps_pu->mv.i1_l0_ref_idx; 170 pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx]; 171 172 ref_stride = ps_curr_layer->i4_inp_stride; 173 174 ref_offset = ((blk_y << 3) + ps_pu->mv.s_l0_mv.i2_mvy) * ref_stride; 175 ref_offset += (blk_x << 3) + ps_pu->mv.s_l0_mv.i2_mvx; 176 177 pu1_ref += ref_offset; 178 179 for(row = 0; row < temp_stride; row++) 180 { 181 for(col = 0; col < temp_stride; col++) 182 { 183 i4_tmp = pu1_ref[col]; 184 pu1_temp_pred[col] = CLIP_U8(i4_tmp); 185 } 186 187 pu1_ref += ref_stride; 188 pu1_temp_pred += temp_stride; 189 } 190 } 191 else 192 { 193 WORD8 i1_ref_idx; 194 195 i1_ref_idx = ps_pu->mv.i1_l1_ref_idx; 196 pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx]; 197 198 ref_stride = ps_curr_layer->i4_inp_stride; 199 200 ref_offset = ((blk_y << 3) + ps_pu->mv.s_l1_mv.i2_mvy) * ref_stride; 201 ref_offset += (blk_x << 3) + ps_pu->mv.s_l1_mv.i2_mvx; 202 203 pu1_ref += ref_offset; 204 205 for(row = 0; row < temp_stride; row++) 206 { 207 for(col = 0; col < temp_stride; col++) 208 { 209 i4_tmp = pu1_ref[col]; 210 pu1_temp_pred[col] = CLIP_U8(i4_tmp); 211 } 212 213 pu1_ref += ref_stride; 214 pu1_temp_pred += temp_stride; 215 } 216 } 217 } 218 219 static WORD32 hme_add_clustered_mvs_as_merge_cands( 220 cluster_data_t *ps_cluster_base, 221 search_node_t *ps_merge_cand, 222 range_prms_t **pps_range_prms, 223 U08 *pu1_refid_to_pred_dir_list, 224 WORD32 i4_num_clusters, 225 U08 u1_pred_dir) 226 { 227 WORD32 i, j, k; 228 WORD32 i4_num_cands_added = 0; 229 WORD32 i4_num_mvs_in_cluster; 230 231 for(i = 0; i < i4_num_clusters; i++) 232 { 233 cluster_data_t *ps_data = &ps_cluster_base[i]; 234 235 if(u1_pred_dir == !pu1_refid_to_pred_dir_list[ps_data->ref_id]) 236 { 237 i4_num_mvs_in_cluster = ps_data->num_mvs; 238 239 for(j = 0; j < i4_num_mvs_in_cluster; j++) 240 { 241 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_data->as_mv[j].mvx; 242 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_data->as_mv[j].mvy; 243 ps_merge_cand[i4_num_cands_added].i1_ref_idx = ps_data->ref_id; 244 245 CLIP_MV_WITHIN_RANGE( 246 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx, 247 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy, 248 pps_range_prms[ps_data->ref_id], 249 0, 250 0, 251 0); 252 253 for(k = 0; k < i4_num_cands_added; k++) 254 { 255 if((ps_merge_cand[k].s_mv.i2_mvx == ps_data->as_mv[j].mvx) && 256 (ps_merge_cand[k].s_mv.i2_mvy == ps_data->as_mv[j].mvy) && 257 (ps_merge_cand[k].i1_ref_idx == ps_data->ref_id)) 258 { 259 break; 260 } 261 } 262 263 if(k == i4_num_cands_added) 264 { 265 i4_num_cands_added++; 266 } 267 } 268 } 269 } 270 271 return i4_num_cands_added; 272 } 273 274 static WORD32 hme_add_me_best_as_merge_cands( 275 search_results_t **pps_child_data_array, 276 inter_cu_results_t *ps_8x8cu_results, 277 search_node_t *ps_merge_cand, 278 range_prms_t **pps_range_prms, 279 U08 *pu1_refid_to_pred_dir_list, 280 S08 *pi1_past_list, 281 S08 *pi1_future_list, 282 BLK_SIZE_T e_blk_size, 283 ME_QUALITY_PRESETS_T e_quality_preset, 284 S32 i4_num_cands_added, 285 U08 u1_pred_dir) 286 { 287 WORD32 i, j, k; 288 WORD32 i4_max_cands_to_add; 289 290 WORD32 i4_result_id = 0; 291 292 ASSERT(!pps_child_data_array[0]->u1_split_flag || (BLK_64x64 != e_blk_size)); 293 ASSERT(!pps_child_data_array[1]->u1_split_flag || (BLK_64x64 != e_blk_size)); 294 ASSERT(!pps_child_data_array[2]->u1_split_flag || (BLK_64x64 != e_blk_size)); 295 ASSERT(!pps_child_data_array[3]->u1_split_flag || (BLK_64x64 != e_blk_size)); 296 297 switch(e_quality_preset) 298 { 299 case ME_PRISTINE_QUALITY: 300 { 301 i4_max_cands_to_add = MAX_MERGE_CANDTS; 302 303 break; 304 } 305 case ME_HIGH_QUALITY: 306 { 307 /* All 4 children are split and each grandchild contributes an MV */ 308 /* and 2 best results per grandchild */ 309 i4_max_cands_to_add = 4 * 4 * 2; 310 311 break; 312 } 313 case ME_MEDIUM_SPEED: 314 { 315 i4_max_cands_to_add = 4 * 2 * 2; 316 317 break; 318 } 319 case ME_HIGH_SPEED: 320 case ME_XTREME_SPEED: 321 case ME_XTREME_SPEED_25: 322 { 323 i4_max_cands_to_add = 4 * 2 * 1; 324 325 break; 326 } 327 } 328 329 while(i4_result_id < 4) 330 { 331 for(i = 0; i < 4; i++) 332 { 333 inter_cu_results_t *ps_child_data = pps_child_data_array[i]->ps_cu_results; 334 inter_cu_results_t *ps_grandchild_data = &ps_8x8cu_results[i << 2]; 335 336 if(!pps_child_data_array[i]->u1_split_flag) 337 { 338 part_type_results_t *ps_data = &ps_child_data->ps_best_results[i4_result_id]; 339 340 if(ps_child_data->u1_num_best_results <= i4_result_id) 341 { 342 continue; 343 } 344 345 if(ps_data->as_pu_results->pu.b1_intra_flag) 346 { 347 continue; 348 } 349 350 for(j = 0; j <= (ps_data->u1_part_type != PRT_2Nx2N); j++) 351 { 352 mv_t *ps_mv; 353 354 S08 i1_ref_idx; 355 356 pu_t *ps_pu = &ps_data->as_pu_results[j].pu; 357 358 if(u1_pred_dir != 359 ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode)) 360 { 361 continue; 362 } 363 364 if(u1_pred_dir) 365 { 366 ps_mv = &ps_pu->mv.s_l1_mv; 367 i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx]; 368 } 369 else 370 { 371 ps_mv = &ps_pu->mv.s_l0_mv; 372 i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx]; 373 } 374 375 if(-1 == i1_ref_idx) 376 { 377 continue; 378 } 379 380 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx; 381 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy; 382 ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx; 383 384 CLIP_MV_WITHIN_RANGE( 385 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx, 386 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy, 387 pps_range_prms[i1_ref_idx], 388 0, 389 0, 390 0); 391 392 for(k = 0; k < i4_num_cands_added; k++) 393 { 394 if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) && 395 (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) && 396 (ps_merge_cand[k].i1_ref_idx == i1_ref_idx)) 397 { 398 break; 399 } 400 } 401 402 if(k == i4_num_cands_added) 403 { 404 i4_num_cands_added++; 405 406 if(i4_max_cands_to_add <= i4_num_cands_added) 407 { 408 return i4_num_cands_added; 409 } 410 } 411 } 412 } 413 else 414 { 415 for(j = 0; j < 4; j++) 416 { 417 mv_t *ps_mv; 418 419 S08 i1_ref_idx; 420 421 part_type_results_t *ps_data = ps_grandchild_data[j].ps_best_results; 422 pu_t *ps_pu = &ps_data->as_pu_results[0].pu; 423 424 ASSERT(ps_data->u1_part_type == PRT_2Nx2N); 425 426 if(ps_grandchild_data[j].u1_num_best_results <= i4_result_id) 427 { 428 continue; 429 } 430 431 if(ps_data->as_pu_results->pu.b1_intra_flag) 432 { 433 continue; 434 } 435 436 if(u1_pred_dir != 437 ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode)) 438 { 439 continue; 440 } 441 442 if(u1_pred_dir) 443 { 444 ps_mv = &ps_pu->mv.s_l1_mv; 445 i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx]; 446 } 447 else 448 { 449 ps_mv = &ps_pu->mv.s_l0_mv; 450 i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx]; 451 } 452 453 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx; 454 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy; 455 ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx; 456 457 CLIP_MV_WITHIN_RANGE( 458 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx, 459 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy, 460 pps_range_prms[i1_ref_idx], 461 0, 462 0, 463 0); 464 465 for(k = 0; k < i4_num_cands_added; k++) 466 { 467 if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) && 468 (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) && 469 (ps_merge_cand[k].i1_ref_idx == i1_ref_idx)) 470 { 471 break; 472 } 473 } 474 475 if(k == i4_num_cands_added) 476 { 477 i4_num_cands_added++; 478 479 if(i4_max_cands_to_add <= i4_num_cands_added) 480 { 481 return i4_num_cands_added; 482 } 483 } 484 } 485 } 486 } 487 488 i4_result_id++; 489 } 490 491 return i4_num_cands_added; 492 } 493 494 WORD32 hme_add_cands_for_merge_eval( 495 ctb_cluster_info_t *ps_cluster_info, 496 search_results_t **pps_child_data_array, 497 inter_cu_results_t *ps_8x8cu_results, 498 range_prms_t **pps_range_prms, 499 search_node_t *ps_merge_cand, 500 U08 *pu1_refid_to_pred_dir_list, 501 S08 *pi1_past_list, 502 S08 *pi1_future_list, 503 ME_QUALITY_PRESETS_T e_quality_preset, 504 BLK_SIZE_T e_blk_size, 505 U08 u1_pred_dir, 506 U08 u1_blk_id) 507 { 508 WORD32 i4_num_cands_added = 0; 509 510 if(ME_PRISTINE_QUALITY == e_quality_preset) 511 { 512 cluster_data_t *ps_cluster_primo; 513 514 WORD32 i4_num_clusters; 515 516 if(BLK_32x32 == e_blk_size) 517 { 518 ps_cluster_primo = ps_cluster_info->ps_32x32_blk[u1_blk_id].as_cluster_data; 519 i4_num_clusters = ps_cluster_info->ps_32x32_blk[u1_blk_id].num_clusters; 520 } 521 else 522 { 523 ps_cluster_primo = ps_cluster_info->ps_64x64_blk->as_cluster_data; 524 i4_num_clusters = ps_cluster_info->ps_64x64_blk->num_clusters; 525 } 526 527 i4_num_cands_added = hme_add_clustered_mvs_as_merge_cands( 528 ps_cluster_primo, 529 ps_merge_cand, 530 pps_range_prms, 531 pu1_refid_to_pred_dir_list, 532 i4_num_clusters, 533 u1_pred_dir); 534 } 535 536 i4_num_cands_added = hme_add_me_best_as_merge_cands( 537 pps_child_data_array, 538 ps_8x8cu_results, 539 ps_merge_cand, 540 pps_range_prms, 541 pu1_refid_to_pred_dir_list, 542 pi1_past_list, 543 pi1_future_list, 544 e_blk_size, 545 e_quality_preset, 546 i4_num_cands_added, 547 u1_pred_dir); 548 549 return i4_num_cands_added; 550 } 551 552 /** 553 ******************************************************************************** 554 * @fn void hme_pick_refine_merge_candts(hme_merge_prms_t *ps_merge_prms, 555 * S08 i1_ref_idx, 556 * S32 i4_best_part_type, 557 * S32 i4_is_vert) 558 * 559 * @brief Given a target partition orientation in the merged CU, and the 560 * partition type of most likely partition this fxn picks up 561 * candidates from the 4 constituent CUs and does refinement search 562 * to identify best results for the merge CU across active partitions 563 * 564 * @param[in,out] ps_merge_prms : Parameters sent from higher layers. Out of 565 * these params, the search result structure is also derived and 566 * updated during the search 567 * 568 * @param[in] i1_ref_idx : ID of the buffer within the search results to update. 569 * Will be 0 if all refidx collapsed to one buf, else it'll be 0/1 570 * 571 * @param[in] i4_best_part_type : partition type of potential partition in the 572 * merged CU, -1 if the merge process has not yet been able to 573 * determine this. 574 * 575 * @param[in] i4_is_vert : Whether target partition of merged CU is vertical 576 * orientation or horizontal orientation. 577 * 578 * @return Number of merge candidates 579 ******************************************************************************** 580 */ 581 WORD32 hme_pick_eval_merge_candts( 582 hme_merge_prms_t *ps_merge_prms, 583 hme_subpel_prms_t *ps_subpel_prms, 584 S32 i4_search_idx, 585 S32 i4_best_part_type, 586 S32 i4_is_vert, 587 wgt_pred_ctxt_t *ps_wt_inp_prms, 588 S32 i4_frm_qstep, 589 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list, 590 ihevce_me_optimised_function_list_t *ps_me_optimised_function_list) 591 { 592 S32 x_off, y_off; 593 search_node_t *ps_search_node; 594 S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1]; 595 S32 i4_num_valid_parts; 596 pred_ctxt_t *ps_pred_ctxt; 597 598 search_node_t as_merge_unique_node[MAX_MERGE_CANDTS]; 599 S32 num_unique_nodes_cu_merge = 0; 600 601 search_results_t *ps_search_results = ps_merge_prms->ps_results_merge; 602 CU_SIZE_T e_cu_size = ps_search_results->e_cu_size; 603 S32 i4_part_mask = ps_search_results->i4_part_mask; 604 605 search_results_t *aps_child_results[4]; 606 layer_ctxt_t *ps_curr_layer = ps_merge_prms->ps_layer_ctxt; 607 608 S32 i4_ref_stride, i, j; 609 result_upd_prms_t s_result_prms; 610 611 BLK_SIZE_T e_blk_size = ge_cu_size_to_blk_size[e_cu_size]; 612 S32 i4_offset; 613 614 /*************************************************************************/ 615 /* Function pointer for SAD/SATD, array and prms structure to pass to */ 616 /* This function */ 617 /*************************************************************************/ 618 PF_SAD_FXN_T pf_err_compute; 619 S32 ai4_sad_grid[9][17]; 620 err_prms_t s_err_prms; 621 622 /*************************************************************************/ 623 /* Allowed MV RANGE */ 624 /*************************************************************************/ 625 range_prms_t **pps_range_prms = ps_merge_prms->aps_mv_range; 626 PF_INTERP_FXN_T pf_qpel_interp; 627 PF_MV_COST_FXN pf_mv_cost_compute; 628 WORD32 pred_lx; 629 U08 *apu1_hpel_ref[4]; 630 631 interp_prms_t s_interp_prms; 632 S32 i4_interp_buf_id; 633 634 S32 i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off; 635 S32 i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off; 636 637 /* Sanity checks */ 638 ASSERT((e_blk_size == BLK_64x64) || (e_blk_size == BLK_32x32)); 639 640 s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list; 641 642 /* Initialize all the ptrs to child CUs for merge decision */ 643 aps_child_results[0] = ps_merge_prms->ps_results_tl; 644 aps_child_results[1] = ps_merge_prms->ps_results_tr; 645 aps_child_results[2] = ps_merge_prms->ps_results_bl; 646 aps_child_results[3] = ps_merge_prms->ps_results_br; 647 648 num_unique_nodes_cu_merge = 0; 649 650 pf_mv_cost_compute = compute_mv_cost_implicit_high_speed; 651 652 if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset) 653 { 654 num_unique_nodes_cu_merge = hme_add_cands_for_merge_eval( 655 ps_merge_prms->ps_cluster_info, 656 aps_child_results, 657 ps_merge_prms->ps_8x8_cu_results, 658 pps_range_prms, 659 as_merge_unique_node, 660 ps_search_results->pu1_is_past, 661 ps_merge_prms->pi1_past_list, 662 ps_merge_prms->pi1_future_list, 663 ps_merge_prms->e_quality_preset, 664 e_blk_size, 665 i4_search_idx, 666 (ps_merge_prms->ps_results_merge->u1_x_off >> 5) + 667 (ps_merge_prms->ps_results_merge->u1_y_off >> 4)); 668 } 669 else 670 { 671 /*************************************************************************/ 672 /* Populate the list of unique search nodes in the child CUs for merge */ 673 /* evaluation */ 674 /*************************************************************************/ 675 for(i = 0; i < 4; i++) 676 { 677 search_node_t s_search_node; 678 679 PART_TYPE_T e_part_type; 680 PART_ID_T e_part_id; 681 682 WORD32 part_num; 683 684 search_results_t *ps_child = aps_child_results[i]; 685 686 if(ps_child->ps_cu_results->u1_num_best_results) 687 { 688 if(!((ps_child->ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) && 689 (1 == ps_child->ps_cu_results->u1_num_best_results))) 690 { 691 e_part_type = 692 (PART_TYPE_T)ps_child->ps_cu_results->ps_best_results[0].u1_part_type; 693 694 ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS); 695 696 /* Insert mvs of NxN partitions. */ 697 for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)]; 698 part_num++) 699 { 700 e_part_id = ge_part_type_to_part_id[e_part_type][part_num]; 701 702 if(ps_child->aps_part_results[i4_search_idx][e_part_id]->i1_ref_idx != -1) 703 { 704 s_search_node = *ps_child->aps_part_results[i4_search_idx][e_part_id]; 705 if(s_search_node.s_mv.i2_mvx != INTRA_MV) 706 { 707 CLIP_MV_WITHIN_RANGE( 708 s_search_node.s_mv.i2_mvx, 709 s_search_node.s_mv.i2_mvy, 710 pps_range_prms[s_search_node.i1_ref_idx], 711 0, 712 0, 713 0); 714 715 INSERT_NEW_NODE_NOMAP( 716 as_merge_unique_node, 717 num_unique_nodes_cu_merge, 718 s_search_node, 719 1); 720 } 721 } 722 } 723 } 724 } 725 else if(!((ps_merge_prms->ps_results_grandchild[(i << 2)] 726 .ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) && 727 (1 == ps_merge_prms->ps_results_grandchild[(i << 2)] 728 .ps_cu_results->u1_num_best_results))) 729 { 730 search_results_t *ps_results_root = &ps_merge_prms->ps_results_grandchild[(i << 2)]; 731 732 for(j = 0; j < 4; j++) 733 { 734 e_part_type = (PART_TYPE_T)ps_results_root[j] 735 .ps_cu_results->ps_best_results[0] 736 .u1_part_type; 737 738 ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS); 739 740 /* Insert mvs of NxN partitions. */ 741 for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)]; 742 part_num++) 743 { 744 e_part_id = ge_part_type_to_part_id[e_part_type][part_num]; 745 746 if((ps_results_root[j] 747 .aps_part_results[i4_search_idx][e_part_id] 748 ->i1_ref_idx != -1) && 749 (!ps_child->ps_cu_results->ps_best_results->as_pu_results->pu 750 .b1_intra_flag)) 751 { 752 s_search_node = 753 *ps_results_root[j].aps_part_results[i4_search_idx][e_part_id]; 754 if(s_search_node.s_mv.i2_mvx != INTRA_MV) 755 { 756 CLIP_MV_WITHIN_RANGE( 757 s_search_node.s_mv.i2_mvx, 758 s_search_node.s_mv.i2_mvy, 759 pps_range_prms[s_search_node.i1_ref_idx], 760 0, 761 0, 762 0); 763 764 INSERT_NEW_NODE_NOMAP( 765 as_merge_unique_node, 766 num_unique_nodes_cu_merge, 767 s_search_node, 768 1); 769 } 770 } 771 } 772 } 773 } 774 } 775 } 776 777 if(0 == num_unique_nodes_cu_merge) 778 { 779 return 0; 780 } 781 782 /*************************************************************************/ 783 /* Appropriate Err compute fxn, depends on SAD/SATD, blk size and remains*/ 784 /* fixed through this subpel refinement for this partition. */ 785 /* Note, we do not enable grid sads since one pt is evaluated per node */ 786 /* Hence, part mask is also nearly dont care and we use 2Nx2N enabled. */ 787 /*************************************************************************/ 788 i4_part_mask = ps_search_results->i4_part_mask; 789 790 /* Need to add the corresponding SAD functions for EXTREME SPEED : Lokesh */ 791 if(ps_subpel_prms->i4_use_satd) 792 { 793 if(BLK_32x32 == e_blk_size) 794 { 795 pf_err_compute = hme_evalsatd_pt_pu_32x32; 796 } 797 else 798 { 799 pf_err_compute = hme_evalsatd_pt_pu_64x64; 800 } 801 } 802 else 803 { 804 pf_err_compute = (PF_SAD_FXN_T)hme_evalsad_grid_pu_MxM; 805 } 806 807 i4_ref_stride = ps_curr_layer->i4_rec_stride; 808 809 x_off = ps_merge_prms->ps_results_tl->u1_x_off; 810 y_off = ps_merge_prms->ps_results_tl->u1_y_off; 811 i4_offset = x_off + i4_ctb_x_off + ((y_off + i4_ctb_y_off) * i4_ref_stride); 812 813 /*************************************************************************/ 814 /* This array stores the ids of the partitions whose */ 815 /* SADs are updated. Since the partitions whose SADs are updated may not */ 816 /* be in contiguous order, we supply another level of indirection. */ 817 /*************************************************************************/ 818 i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids); 819 820 /* Initialize result params used for partition update */ 821 s_result_prms.pf_mv_cost_compute = NULL; 822 s_result_prms.ps_search_results = ps_search_results; 823 s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids; 824 s_result_prms.i1_ref_idx = i4_search_idx; 825 s_result_prms.i4_part_mask = i4_part_mask; 826 s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0]; 827 s_result_prms.i4_grid_mask = 1; 828 829 /* One time Initialization of error params used for SAD/SATD compute */ 830 s_err_prms.i4_inp_stride = ps_subpel_prms->i4_inp_stride; 831 s_err_prms.i4_ref_stride = i4_ref_stride; 832 s_err_prms.i4_part_mask = (ENABLE_2Nx2N); 833 s_err_prms.i4_grid_mask = 1; 834 s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0]; 835 s_err_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size]; 836 s_err_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size]; 837 s_err_prms.i4_step = 1; 838 839 /*************************************************************************/ 840 /* One time preparation of non changing interpolation params. */ 841 /*************************************************************************/ 842 s_interp_prms.i4_ref_stride = i4_ref_stride; 843 s_interp_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size]; 844 s_interp_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size]; 845 s_interp_prms.apu1_interp_out[0] = ps_subpel_prms->pu1_wkg_mem; 846 s_interp_prms.i4_out_stride = gau1_blk_size_to_wd[e_blk_size]; 847 i4_interp_buf_id = 0; 848 849 pf_qpel_interp = ps_subpel_prms->pf_qpel_interp; 850 851 /***************************************************************************/ 852 /* Compute SATD/SAD for all unique nodes of children CUs to get best merge */ 853 /* results */ 854 /***************************************************************************/ 855 for(i = 0; i < num_unique_nodes_cu_merge; i++) 856 { 857 WORD8 i1_ref_idx; 858 ps_search_node = &as_merge_unique_node[i]; 859 860 /*********************************************************************/ 861 /* Compute the base pointer for input, interpolated buffers */ 862 /* The base pointers point as follows: */ 863 /* fx fy : 0, 0 :: fx, hy : 0, 0.5, hx, fy: 0.5, 0, hx, fy: 0.5, 0.5 */ 864 /* To these, we need to add the offset of the current node */ 865 /*********************************************************************/ 866 i1_ref_idx = ps_search_node->i1_ref_idx; 867 apu1_hpel_ref[0] = ps_curr_layer->ppu1_list_rec_fxfy[i1_ref_idx] + i4_offset; 868 apu1_hpel_ref[1] = ps_curr_layer->ppu1_list_rec_hxfy[i1_ref_idx] + i4_offset; 869 apu1_hpel_ref[2] = ps_curr_layer->ppu1_list_rec_fxhy[i1_ref_idx] + i4_offset; 870 apu1_hpel_ref[3] = ps_curr_layer->ppu1_list_rec_hxhy[i1_ref_idx] + i4_offset; 871 872 s_interp_prms.ppu1_ref = &apu1_hpel_ref[0]; 873 874 pf_qpel_interp( 875 &s_interp_prms, 876 ps_search_node->s_mv.i2_mvx, 877 ps_search_node->s_mv.i2_mvy, 878 i4_interp_buf_id); 879 880 pred_lx = i4_search_idx; 881 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx]; 882 883 s_result_prms.u1_pred_lx = pred_lx; 884 s_result_prms.ps_search_node_base = ps_search_node; 885 s_err_prms.pu1_inp = 886 ps_wt_inp_prms->apu1_wt_inp[i1_ref_idx] + x_off + y_off * ps_subpel_prms->i4_inp_stride; 887 s_err_prms.pu1_ref = s_interp_prms.pu1_final_out; 888 s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride; 889 890 /* Carry out the SAD/SATD. This call also does the TU RECURSION. 891 Here the tu recursion logic is restricted with the size of the PU*/ 892 pf_err_compute(&s_err_prms); 893 894 if(ps_subpel_prms->u1_is_cu_noisy && 895 ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier) 896 { 897 ps_me_optimised_function_list->pf_compute_stim_injected_distortion_for_all_parts( 898 s_err_prms.pu1_ref, 899 s_err_prms.i4_ref_stride, 900 ai4_valid_part_ids, 901 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX, 902 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared, 903 s_err_prms.pi4_sad_grid, 904 ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier, 905 ps_wt_inp_prms->a_inv_wpred_wt[i1_ref_idx], 906 ps_wt_inp_prms->ai4_shift_val[i1_ref_idx], 907 i4_num_valid_parts, 908 ps_wt_inp_prms->wpred_log_wdc, 909 (BLK_32x32 == e_blk_size) ? 32 : 64); 910 } 911 912 /* Update the mv's */ 913 s_result_prms.i2_mv_x = ps_search_node->s_mv.i2_mvx; 914 s_result_prms.i2_mv_y = ps_search_node->s_mv.i2_mvy; 915 916 /* Update best results */ 917 hme_update_results_pt_pu_best1_subpel_hs(&s_err_prms, &s_result_prms); 918 } 919 920 /************************************************************************/ 921 /* Update mv cost and total cost for each valid partition in the CU */ 922 /************************************************************************/ 923 for(i = 0; i < TOT_NUM_PARTS; i++) 924 { 925 if(i4_part_mask & (1 << i)) 926 { 927 WORD32 j; 928 WORD32 i4_mv_cost; 929 930 ps_search_node = ps_search_results->aps_part_results[i4_search_idx][i]; 931 932 for(j = 0; 933 j < MIN(ps_search_results->u1_num_results_per_part, num_unique_nodes_cu_merge); 934 j++) 935 { 936 if(ps_search_node->i1_ref_idx != -1) 937 { 938 pred_lx = i4_search_idx; 939 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx]; 940 941 /* Prediction context should now deal with qpel units */ 942 HME_SET_MVPRED_RES(ps_pred_ctxt, MV_RES_QPEL); 943 944 ps_search_node->u1_subpel_done = 1; 945 ps_search_node->u1_is_avail = 1; 946 947 i4_mv_cost = 948 pf_mv_cost_compute(ps_search_node, ps_pred_ctxt, (PART_ID_T)i, MV_RES_QPEL); 949 950 ps_search_node->i4_tot_cost = i4_mv_cost + ps_search_node->i4_sad; 951 ps_search_node->i4_mv_cost = i4_mv_cost; 952 953 ps_search_node++; 954 } 955 } 956 } 957 } 958 959 return num_unique_nodes_cu_merge; 960 } 961 962 #define CU_MERGE_MAX_INTRA_PARTS 4 963 964 /** 965 ******************************************************************************** 966 * @fn hme_try_merge_high_speed 967 * 968 * @brief Attempts to merge 4 NxN candts to a 2Nx2N candt, either as a single 969 entity or with partititons for high speed preset 970 * 971 * @param[in,out] hme_merge_prms_t: Params for CU merge 972 * 973 * @return MERGE_RESULT_T type result of merge (CU_MERGED/CU_SPLIT) 974 ******************************************************************************** 975 */ 976 CU_MERGE_RESULT_T hme_try_merge_high_speed( 977 me_ctxt_t *ps_thrd_ctxt, 978 me_frm_ctxt_t *ps_ctxt, 979 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb, 980 hme_subpel_prms_t *ps_subpel_prms, 981 hme_merge_prms_t *ps_merge_prms, 982 inter_pu_results_t *ps_pu_results, 983 pu_result_t *ps_pu_result) 984 { 985 search_results_t *ps_results_tl, *ps_results_tr; 986 search_results_t *ps_results_bl, *ps_results_br; 987 988 S32 i; 989 S32 i4_search_idx; 990 S32 i4_cost_parent; 991 S32 intra_cu_size; 992 ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17]; 993 994 search_results_t *ps_results_merge = ps_merge_prms->ps_results_merge; 995 wgt_pred_ctxt_t *ps_wt_inp_prms = &ps_ctxt->s_wt_pred; 996 997 S32 i4_part_mask = ENABLE_ALL_PARTS - ENABLE_NxN; 998 S32 is_vert = 0, i4_best_part_type = -1; 999 S32 i4_intra_parts = 0; /* Keeps track of intra percentage before merge */ 1000 S32 i4_cost_children = 0; 1001 S32 i4_frm_qstep = ps_ctxt->frm_qstep; 1002 S32 i4_num_merge_cands_evaluated = 0; 1003 U08 u1_x_off = ps_results_merge->u1_x_off; 1004 U08 u1_y_off = ps_results_merge->u1_y_off; 1005 S32 i4_32x32_id = (u1_y_off >> 4) + (u1_x_off >> 5); 1006 1007 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list = 1008 ps_thrd_ctxt->ps_cmn_utils_optimised_function_list; 1009 ihevce_me_optimised_function_list_t *ps_me_optimised_function_list = 1010 ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list); 1011 ps_results_tl = ps_merge_prms->ps_results_tl; 1012 ps_results_tr = ps_merge_prms->ps_results_tr; 1013 ps_results_bl = ps_merge_prms->ps_results_bl; 1014 ps_results_br = ps_merge_prms->ps_results_br; 1015 1016 if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED) 1017 { 1018 i4_part_mask &= ~ENABLE_AMP; 1019 } 1020 1021 if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25) 1022 { 1023 i4_part_mask &= ~ENABLE_AMP; 1024 1025 i4_part_mask &= ~ENABLE_SMP; 1026 } 1027 1028 ps_merge_prms->i4_num_pred_dir_actual = 0; 1029 1030 /*************************************************************************/ 1031 /* The logic for High speed CU merge goes as follows: */ 1032 /* */ 1033 /* 1. Early exit with CU_SPLIT if sum of best partitions of children CUs */ 1034 /* exceed 7 */ 1035 /* 2. Early exit with CU_MERGE if mvs of best partitions of children CUs */ 1036 /* are identical */ 1037 /* 3. Find the all unique mvs of best partitions of children CUs and */ 1038 /* evaluate partial SATDs (all 17 partitions) for each unique mv. If */ 1039 /* best parent cost is lower than sum of the best children costs */ 1040 /* return CU_MERGE after seeding the best results else return CU_SPLIT*/ 1041 /* */ 1042 /*************************************************************************/ 1043 1044 /* Count the number of best partitions in child CUs, early exit if > 7 */ 1045 if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) || 1046 (CU_32x32 == ps_results_merge->e_cu_size)) 1047 { 1048 S32 num_parts_in_32x32 = 0; 1049 WORD32 i4_part_type; 1050 1051 if(ps_results_tl->u1_split_flag) 1052 { 1053 num_parts_in_32x32 += 4; 1054 1055 #define COST_INTERCHANGE 0 1056 i4_cost_children = ps_merge_prms->ps_8x8_cu_results[0].ps_best_results->i4_tot_cost + 1057 ps_merge_prms->ps_8x8_cu_results[1].ps_best_results->i4_tot_cost + 1058 ps_merge_prms->ps_8x8_cu_results[2].ps_best_results->i4_tot_cost + 1059 ps_merge_prms->ps_8x8_cu_results[3].ps_best_results->i4_tot_cost; 1060 } 1061 else 1062 { 1063 i4_part_type = ps_results_tl->ps_cu_results->ps_best_results[0].u1_part_type; 1064 num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type]; 1065 i4_cost_children = ps_results_tl->ps_cu_results->ps_best_results[0].i4_tot_cost; 1066 } 1067 1068 if(ps_results_tr->u1_split_flag) 1069 { 1070 num_parts_in_32x32 += 4; 1071 1072 i4_cost_children += ps_merge_prms->ps_8x8_cu_results[4].ps_best_results->i4_tot_cost + 1073 ps_merge_prms->ps_8x8_cu_results[5].ps_best_results->i4_tot_cost + 1074 ps_merge_prms->ps_8x8_cu_results[6].ps_best_results->i4_tot_cost + 1075 ps_merge_prms->ps_8x8_cu_results[7].ps_best_results->i4_tot_cost; 1076 } 1077 else 1078 { 1079 i4_part_type = ps_results_tr->ps_cu_results->ps_best_results[0].u1_part_type; 1080 num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type]; 1081 i4_cost_children += ps_results_tr->ps_cu_results->ps_best_results[0].i4_tot_cost; 1082 } 1083 1084 if(ps_results_bl->u1_split_flag) 1085 { 1086 num_parts_in_32x32 += 4; 1087 1088 i4_cost_children += ps_merge_prms->ps_8x8_cu_results[8].ps_best_results->i4_tot_cost + 1089 ps_merge_prms->ps_8x8_cu_results[9].ps_best_results->i4_tot_cost + 1090 ps_merge_prms->ps_8x8_cu_results[10].ps_best_results->i4_tot_cost + 1091 ps_merge_prms->ps_8x8_cu_results[11].ps_best_results->i4_tot_cost; 1092 } 1093 else 1094 { 1095 i4_part_type = ps_results_bl->ps_cu_results->ps_best_results[0].u1_part_type; 1096 num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type]; 1097 i4_cost_children += ps_results_bl->ps_cu_results->ps_best_results[0].i4_tot_cost; 1098 } 1099 1100 if(ps_results_br->u1_split_flag) 1101 { 1102 num_parts_in_32x32 += 4; 1103 1104 i4_cost_children += ps_merge_prms->ps_8x8_cu_results[12].ps_best_results->i4_tot_cost + 1105 ps_merge_prms->ps_8x8_cu_results[13].ps_best_results->i4_tot_cost + 1106 ps_merge_prms->ps_8x8_cu_results[14].ps_best_results->i4_tot_cost + 1107 ps_merge_prms->ps_8x8_cu_results[15].ps_best_results->i4_tot_cost; 1108 } 1109 else 1110 { 1111 i4_part_type = ps_results_br->ps_cu_results->ps_best_results[0].u1_part_type; 1112 num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type]; 1113 i4_cost_children += ps_results_br->ps_cu_results->ps_best_results[0].i4_tot_cost; 1114 } 1115 1116 if((num_parts_in_32x32 > 7) && (ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY)) 1117 { 1118 return CU_SPLIT; 1119 } 1120 1121 if((num_parts_in_32x32 > MAX_NUM_CONSTITUENT_MVS_TO_ENABLE_32MERGE_IN_XS25) && 1122 (ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25)) 1123 { 1124 return CU_SPLIT; 1125 } 1126 } 1127 1128 /* Accumulate intra percentage before merge for early CU_SPLIT decision */ 1129 /* Note : Each intra part represent a NxN unit of the children CUs */ 1130 /* This is essentially 1/16th of the CUsize under consideration for merge */ 1131 if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset) 1132 { 1133 if(CU_64x64 == ps_results_merge->e_cu_size) 1134 { 1135 i4_intra_parts = 1136 (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_inter_eval_enable) 1137 ? 16 1138 : ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_intra_eval_enable; 1139 } 1140 else 1141 { 1142 switch((ps_results_merge->u1_x_off >> 5) + ((ps_results_merge->u1_y_off >> 4))) 1143 { 1144 case 0: 1145 { 1146 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tl 1147 ->u1_inter_eval_enable) 1148 ? 16 1149 : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root 1150 ->ps_child_node_tl->u1_intra_eval_enable); 1151 1152 break; 1153 } 1154 case 1: 1155 { 1156 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tr 1157 ->u1_inter_eval_enable) 1158 ? 16 1159 : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root 1160 ->ps_child_node_tr->u1_intra_eval_enable); 1161 1162 break; 1163 } 1164 case 2: 1165 { 1166 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_bl 1167 ->u1_inter_eval_enable) 1168 ? 16 1169 : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root 1170 ->ps_child_node_bl->u1_intra_eval_enable); 1171 1172 break; 1173 } 1174 case 3: 1175 { 1176 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_br 1177 ->u1_inter_eval_enable) 1178 ? 16 1179 : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root 1180 ->ps_child_node_br->u1_intra_eval_enable); 1181 1182 break; 1183 } 1184 } 1185 } 1186 } 1187 else 1188 { 1189 for(i = 0; i < 4; i++) 1190 { 1191 search_results_t *ps_results = 1192 (i == 0) ? ps_results_tl 1193 : ((i == 1) ? ps_results_tr : ((i == 2) ? ps_results_bl : ps_results_br)); 1194 1195 part_type_results_t *ps_best_res = &ps_results->ps_cu_results->ps_best_results[0]; 1196 1197 if(ps_results->u1_split_flag) 1198 { 1199 U08 u1_x_off = ps_results->u1_x_off; 1200 U08 u1_y_off = ps_results->u1_y_off; 1201 U08 u1_8x8_zscan_id = gau1_ctb_raster_to_zscan[(u1_x_off >> 2) + (u1_y_off << 2)] >> 1202 2; 1203 1204 /* Special case to handle 8x8 CUs when 16x16 is split */ 1205 ASSERT(ps_results->e_cu_size == CU_16x16); 1206 1207 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id].ps_best_results[0]; 1208 1209 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag) 1210 i4_intra_parts += 1; 1211 1212 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 1].ps_best_results[0]; 1213 1214 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag) 1215 i4_intra_parts += 1; 1216 1217 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 2].ps_best_results[0]; 1218 1219 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag) 1220 i4_intra_parts += 1; 1221 1222 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 3].ps_best_results[0]; 1223 1224 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag) 1225 i4_intra_parts += 1; 1226 } 1227 else if(ps_best_res[0].as_pu_results[0].pu.b1_intra_flag) 1228 { 1229 i4_intra_parts += 4; 1230 } 1231 } 1232 } 1233 1234 /* Determine the max intra CU size indicated by IPE */ 1235 intra_cu_size = CU_64x64; 1236 if(ps_cur_ipe_ctb->u1_split_flag) 1237 { 1238 intra_cu_size = CU_32x32; 1239 if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag) 1240 { 1241 intra_cu_size = CU_16x16; 1242 } 1243 } 1244 1245 if(((i4_intra_parts > CU_MERGE_MAX_INTRA_PARTS) && 1246 (intra_cu_size < ps_results_merge->e_cu_size) && 1247 (ME_PRISTINE_QUALITY != ps_merge_prms->e_quality_preset)) || 1248 (i4_intra_parts == 16)) 1249 { 1250 S32 i4_merge_outcome; 1251 1252 i4_merge_outcome = (CU_32x32 == ps_results_merge->e_cu_size) 1253 ? (!ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag && 1254 ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_valid_cu) 1255 : (!ps_cur_ipe_ctb->u1_split_flag); 1256 1257 i4_merge_outcome = i4_merge_outcome || 1258 (ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset); 1259 1260 i4_merge_outcome = i4_merge_outcome && 1261 !(ps_subpel_prms->u1_is_cu_noisy && DISABLE_INTRA_WHEN_NOISY); 1262 1263 if(i4_merge_outcome) 1264 { 1265 inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results; 1266 part_type_results_t *ps_best_result = ps_cu_results->ps_best_results; 1267 pu_t *ps_pu = &ps_best_result->as_pu_results->pu; 1268 1269 ps_cu_results->u1_num_best_results = 1; 1270 ps_cu_results->u1_cu_size = ps_results_merge->e_cu_size; 1271 ps_cu_results->u1_x_off = u1_x_off; 1272 ps_cu_results->u1_y_off = u1_y_off; 1273 1274 ps_best_result->u1_part_type = PRT_2Nx2N; 1275 ps_best_result->ai4_tu_split_flag[0] = 0; 1276 ps_best_result->ai4_tu_split_flag[1] = 0; 1277 ps_best_result->ai4_tu_split_flag[2] = 0; 1278 ps_best_result->ai4_tu_split_flag[3] = 0; 1279 ps_best_result->i4_tot_cost = 1280 (CU_64x64 == ps_results_merge->e_cu_size) 1281 ? ps_cur_ipe_ctb->i4_best64x64_intra_cost 1282 : ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id]; 1283 1284 ps_pu->b1_intra_flag = 1; 1285 ps_pu->b4_pos_x = u1_x_off >> 2; 1286 ps_pu->b4_pos_y = u1_y_off >> 2; 1287 ps_pu->b4_wd = (1 << (ps_results_merge->e_cu_size + 1)) - 1; 1288 ps_pu->b4_ht = ps_pu->b4_wd; 1289 ps_pu->mv.i1_l0_ref_idx = -1; 1290 ps_pu->mv.i1_l1_ref_idx = -1; 1291 ps_pu->mv.s_l0_mv.i2_mvx = INTRA_MV; 1292 ps_pu->mv.s_l0_mv.i2_mvy = INTRA_MV; 1293 ps_pu->mv.s_l1_mv.i2_mvx = INTRA_MV; 1294 ps_pu->mv.s_l1_mv.i2_mvy = INTRA_MV; 1295 1296 return CU_MERGED; 1297 } 1298 else 1299 { 1300 return CU_SPLIT; 1301 } 1302 } 1303 1304 if(i4_intra_parts) 1305 { 1306 i4_part_mask = ENABLE_2Nx2N; 1307 } 1308 1309 ps_results_merge->u1_num_active_ref = (ps_ctxt->s_frm_prms.bidir_enabled) ? 2 : 1; 1310 1311 hme_reset_search_results(ps_results_merge, i4_part_mask, MV_RES_QPEL); 1312 1313 ps_results_merge->u1_num_active_ref = ps_merge_prms->i4_num_ref; 1314 ps_merge_prms->i4_num_pred_dir_actual = 0; 1315 1316 if(ps_subpel_prms->u1_is_cu_noisy && ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier) 1317 { 1318 S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1]; 1319 S32 i4_num_valid_parts; 1320 S32 i4_sigma_array_offset; 1321 1322 i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids); 1323 1324 /*********************************************************************************************************************************************/ 1325 /* i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values */ 1326 /* Logic is x/4 + ((y/4) x 16) : every 4 pixel increase in x equals one 4x4 block increment, every 4 pixel increase in y equals 16 4x4 block */ 1327 /* increment as there will be 256 4x4 blocks in a CTB */ 1328 /*********************************************************************************************************************************************/ 1329 i4_sigma_array_offset = (ps_merge_prms->ps_results_merge->u1_x_off / 4) + 1330 (ps_merge_prms->ps_results_merge->u1_y_off * 4); 1331 1332 for(i = 0; i < i4_num_valid_parts; i++) 1333 { 1334 S32 i4_part_id = ai4_valid_part_ids[i]; 1335 1336 hme_compute_final_sigma_of_pu_from_base_blocks( 1337 ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset, 1338 ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset, 1339 au8_final_src_sigmaX, 1340 au8_final_src_sigmaXSquared, 1341 (CU_32x32 == ps_results_merge->e_cu_size) ? 32 : 64, 1342 4, 1343 i4_part_id, 1344 16); 1345 } 1346 1347 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX = au8_final_src_sigmaX; 1348 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared = au8_final_src_sigmaXSquared; 1349 } 1350 1351 /*************************************************************************/ 1352 /* Loop through all ref idx and pick the merge candts and refine based */ 1353 /* on the active partitions. At this stage num ref will be 1 or 2 */ 1354 /*************************************************************************/ 1355 for(i4_search_idx = 0; i4_search_idx < ps_merge_prms->i4_num_ref; i4_search_idx++) 1356 { 1357 S32 i4_cands; 1358 U08 u1_pred_dir = 0; 1359 1360 if((2 == ps_merge_prms->i4_num_ref) || (!ps_ctxt->s_frm_prms.bidir_enabled)) 1361 { 1362 u1_pred_dir = i4_search_idx; 1363 } 1364 else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0) 1365 { 1366 u1_pred_dir = 1; 1367 } 1368 else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0) 1369 { 1370 u1_pred_dir = 0; 1371 } 1372 else 1373 { 1374 ASSERT(0); 1375 } 1376 1377 /* call the function to pick and evaluate the merge candts, given */ 1378 /* a ref id and a part mask. */ 1379 i4_cands = hme_pick_eval_merge_candts( 1380 ps_merge_prms, 1381 ps_subpel_prms, 1382 u1_pred_dir, 1383 i4_best_part_type, 1384 is_vert, 1385 ps_wt_inp_prms, 1386 i4_frm_qstep, 1387 ps_cmn_utils_optimised_function_list, 1388 ps_me_optimised_function_list); 1389 1390 if(i4_cands) 1391 { 1392 ps_merge_prms->au1_pred_dir_searched[ps_merge_prms->i4_num_pred_dir_actual] = 1393 u1_pred_dir; 1394 ps_merge_prms->i4_num_pred_dir_actual++; 1395 } 1396 1397 i4_num_merge_cands_evaluated += i4_cands; 1398 } 1399 1400 /* Call the decide_part_types function here */ 1401 /* Populate the new PU struct with the results post subpel refinement*/ 1402 if(i4_num_merge_cands_evaluated) 1403 { 1404 inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results; 1405 1406 hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr); 1407 1408 ps_merge_prms->ps_inter_ctb_prms->i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off; 1409 ps_merge_prms->ps_inter_ctb_prms->i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off; 1410 1411 hme_populate_pus( 1412 ps_thrd_ctxt, 1413 ps_ctxt, 1414 ps_subpel_prms, 1415 ps_results_merge, 1416 ps_cu_results, 1417 ps_pu_results, 1418 ps_pu_result, 1419 ps_merge_prms->ps_inter_ctb_prms, 1420 &ps_ctxt->s_wt_pred, 1421 ps_merge_prms->ps_layer_ctxt, 1422 ps_merge_prms->au1_pred_dir_searched, 1423 ps_merge_prms->i4_num_pred_dir_actual); 1424 1425 ps_cu_results->i4_inp_offset = (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64); 1426 1427 hme_decide_part_types( 1428 ps_cu_results, 1429 ps_pu_results, 1430 ps_merge_prms->ps_inter_ctb_prms, 1431 ps_ctxt, 1432 ps_cmn_utils_optimised_function_list, 1433 ps_me_optimised_function_list 1434 1435 ); 1436 1437 /*****************************************************************/ 1438 /* INSERT INTRA RESULTS AT 32x32/64x64 LEVEL. */ 1439 /*****************************************************************/ 1440 #if DISABLE_INTRA_IN_BPICS 1441 if(1 != ((ME_XTREME_SPEED_25 == ps_merge_prms->e_quality_preset) && 1442 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))) 1443 #endif 1444 { 1445 if(!(DISABLE_INTRA_WHEN_NOISY && ps_merge_prms->ps_inter_ctb_prms->u1_is_cu_noisy)) 1446 { 1447 hme_insert_intra_nodes_post_bipred( 1448 ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep); 1449 } 1450 } 1451 } 1452 else 1453 { 1454 return CU_SPLIT; 1455 } 1456 1457 /* We check the best result of ref idx 0 and compare for parent vs child */ 1458 if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) || 1459 (CU_32x32 == ps_results_merge->e_cu_size)) 1460 { 1461 i4_cost_parent = ps_results_merge->ps_cu_results->ps_best_results[0].i4_tot_cost; 1462 /*********************************************************************/ 1463 /* Add the cost of signaling the CU tree bits. */ 1464 /* Assuming parent is not split, then we signal 1 bit for this parent*/ 1465 /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */ 1466 /* So, 4*lambda is extra for children cost. :Lokesh */ 1467 /*********************************************************************/ 1468 { 1469 pred_ctxt_t *ps_pred_ctxt = &ps_results_merge->as_pred_ctxt[0]; 1470 1471 i4_cost_children += ((4 * ps_pred_ctxt->lambda) >> (ps_pred_ctxt->lambda_q_shift)); 1472 } 1473 1474 if(i4_cost_parent < i4_cost_children) 1475 { 1476 return CU_MERGED; 1477 } 1478 1479 return CU_SPLIT; 1480 } 1481 else 1482 { 1483 return CU_MERGED; 1484 } 1485 } 1486 1487 #define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift) \ 1488 { \ 1489 (ps_mv)->i2_mv_x = (ps_search_node)->s_mv.i2_mvx >> (shift); \ 1490 (ps_mv)->i2_mv_y = (ps_search_node)->s_mv.i2_mvy >> (shift); \ 1491 *(pi1_ref_idx) = (ps_search_node)->i1_ref_idx; \ 1492 } 1493 1494 /** 1495 ******************************************************************************** 1496 * @fn hme_update_mv_bank_noencode(search_results_t *ps_search_results, 1497 * layer_mv_t *ps_layer_mv, 1498 * S32 i4_search_blk_x, 1499 * S32 i4_search_blk_y, 1500 * mvbank_update_prms_t *ps_prms) 1501 * 1502 * @brief Updates the mv bank in case there is no further encodign to be done 1503 * 1504 * @param[in] ps_search_results: contains results for the block just searched 1505 * 1506 * @param[in,out] ps_layer_mv : Has pointer to mv bank amongst other things 1507 * 1508 * @param[in] i4_search_blk_x : col num of blk being searched 1509 * 1510 * @param[in] i4_search_blk_y : row num of blk being searched 1511 * 1512 * @param[in] ps_prms : contains certain parameters which govern how updatedone 1513 * 1514 * @return None 1515 ******************************************************************************** 1516 */ 1517 1518 void hme_update_mv_bank_noencode( 1519 search_results_t *ps_search_results, 1520 layer_mv_t *ps_layer_mv, 1521 S32 i4_search_blk_x, 1522 S32 i4_search_blk_y, 1523 mvbank_update_prms_t *ps_prms) 1524 { 1525 hme_mv_t *ps_mv; 1526 hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4; 1527 S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4; 1528 S32 i4_blk_x, i4_blk_y, i4_offset; 1529 S32 i4_j, i4_ref_id; 1530 search_node_t *ps_search_node; 1531 search_node_t *ps_search_node_8x8, *ps_search_node_4x4_1; 1532 search_node_t *ps_search_node_4x4_2, *ps_search_node_4x4_3; 1533 search_node_t *ps_search_node_4x4_4; 1534 1535 i4_blk_x = i4_search_blk_x << ps_prms->i4_shift; 1536 i4_blk_y = i4_search_blk_y << ps_prms->i4_shift; 1537 i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row; 1538 1539 i4_offset *= ps_layer_mv->i4_num_mvs_per_blk; 1540 1541 /* Identify the correct offset in the mvbank and the reference id buf */ 1542 ps_mv = ps_layer_mv->ps_mv + i4_offset; 1543 pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset; 1544 1545 /*************************************************************************/ 1546 /* Supposing we store the mvs in the same blk size as we searched (e.g. */ 1547 /* we searched 8x8 blks and store results for 8x8 blks), then we can */ 1548 /* do a straightforward single update of results. This will have a 1-1 */ 1549 /* correspondence. */ 1550 /*************************************************************************/ 1551 if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size) 1552 { 1553 for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++) 1554 { 1555 ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N]; 1556 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++) 1557 { 1558 COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, 0); 1559 ps_mv++; 1560 pi1_ref_idx++; 1561 ps_search_node++; 1562 } 1563 } 1564 return; 1565 } 1566 1567 /*************************************************************************/ 1568 /* Case where search blk size is 8x8, but we update 4x4 results. In this */ 1569 /* case, we need to have NxN partitions enabled in search. */ 1570 /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */ 1571 /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/ 1572 /*************************************************************************/ 1573 ASSERT(ps_layer_mv->e_blk_size == BLK_4x4); 1574 ASSERT(ps_prms->e_search_blk_size == BLK_8x8); 1575 ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN)); 1576 1577 /*************************************************************************/ 1578 /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */ 1579 /* hence the below check. */ 1580 /*************************************************************************/ 1581 ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1); 1582 1583 ps_mv1 = ps_mv; 1584 ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk; 1585 ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row); 1586 ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk); 1587 pi1_ref_idx1 = pi1_ref_idx; 1588 pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk; 1589 pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row); 1590 pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk); 1591 1592 for(i4_ref_id = 0; i4_ref_id < (S32)ps_search_results->u1_num_active_ref; i4_ref_id++) 1593 { 1594 ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N]; 1595 1596 ps_search_node_4x4_1 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL]; 1597 1598 ps_search_node_4x4_2 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TR]; 1599 1600 ps_search_node_4x4_3 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BL]; 1601 1602 ps_search_node_4x4_4 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BR]; 1603 1604 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0); 1605 ps_mv1++; 1606 pi1_ref_idx1++; 1607 ps_search_node_4x4_1++; 1608 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0); 1609 ps_mv2++; 1610 pi1_ref_idx2++; 1611 ps_search_node_4x4_2++; 1612 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0); 1613 ps_mv3++; 1614 pi1_ref_idx3++; 1615 ps_search_node_4x4_3++; 1616 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0); 1617 ps_mv4++; 1618 pi1_ref_idx4++; 1619 ps_search_node_4x4_4++; 1620 1621 if(ps_layer_mv->i4_num_mvs_per_ref > 1) 1622 { 1623 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_8x8, 0); 1624 ps_mv1++; 1625 pi1_ref_idx1++; 1626 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_8x8, 0); 1627 ps_mv2++; 1628 pi1_ref_idx2++; 1629 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_8x8, 0); 1630 ps_mv3++; 1631 pi1_ref_idx3++; 1632 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_8x8, 0); 1633 ps_mv4++; 1634 pi1_ref_idx4++; 1635 } 1636 1637 for(i4_j = 2; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++) 1638 { 1639 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0); 1640 ps_mv1++; 1641 pi1_ref_idx1++; 1642 ps_search_node_4x4_1++; 1643 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0); 1644 ps_mv2++; 1645 pi1_ref_idx2++; 1646 ps_search_node_4x4_2++; 1647 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0); 1648 ps_mv3++; 1649 pi1_ref_idx3++; 1650 ps_search_node_4x4_3++; 1651 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0); 1652 ps_mv4++; 1653 pi1_ref_idx4++; 1654 ps_search_node_4x4_4++; 1655 } 1656 } 1657 } 1658 1659 void hme_update_mv_bank_encode( 1660 search_results_t *ps_search_results, 1661 layer_mv_t *ps_layer_mv, 1662 S32 i4_search_blk_x, 1663 S32 i4_search_blk_y, 1664 mvbank_update_prms_t *ps_prms, 1665 U08 *pu1_pred_dir_searched, 1666 S32 i4_num_act_ref_l0) 1667 { 1668 hme_mv_t *ps_mv; 1669 hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4; 1670 S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4; 1671 S32 i4_blk_x, i4_blk_y, i4_offset; 1672 S32 j, i, num_parts; 1673 search_node_t *ps_search_node_tl, *ps_search_node_tr; 1674 search_node_t *ps_search_node_bl, *ps_search_node_br; 1675 search_node_t s_zero_mv; 1676 WORD32 i4_part_type = ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type; 1677 1678 i4_blk_x = i4_search_blk_x << ps_prms->i4_shift; 1679 i4_blk_y = i4_search_blk_y << ps_prms->i4_shift; 1680 i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row; 1681 1682 i4_offset *= ps_layer_mv->i4_num_mvs_per_blk; 1683 1684 /* Identify the correct offset in the mvbank and the reference id buf */ 1685 ps_mv = ps_layer_mv->ps_mv + i4_offset; 1686 pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset; 1687 1688 ASSERT(ps_layer_mv->e_blk_size == BLK_8x8); 1689 ASSERT(ps_prms->e_search_blk_size == BLK_16x16); 1690 1691 /*************************************************************************/ 1692 /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */ 1693 /* hence the below check. */ 1694 /*************************************************************************/ 1695 ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_best_results); 1696 1697 ps_mv1 = ps_mv; 1698 ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk; 1699 ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row); 1700 ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk); 1701 pi1_ref_idx1 = pi1_ref_idx; 1702 pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk; 1703 pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row); 1704 pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk); 1705 1706 /* Initialize zero mv: default mv used for intra mvs */ 1707 s_zero_mv.s_mv.i2_mvx = 0; 1708 s_zero_mv.s_mv.i2_mvy = 0; 1709 s_zero_mv.i1_ref_idx = 0; 1710 1711 if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) && 1712 (ps_search_results->i4_part_mask & ENABLE_NxN)) 1713 { 1714 i4_part_type = PRT_NxN; 1715 } 1716 1717 for(i = 0; i < ps_prms->i4_num_ref; i++) 1718 { 1719 for(j = 0; j < ps_layer_mv->i4_num_mvs_per_ref; j++) 1720 { 1721 WORD32 i4_part_id = ge_part_type_to_part_id[i4_part_type][0]; 1722 1723 num_parts = gau1_num_parts_in_part_type[i4_part_type]; 1724 1725 ps_search_node_tl = 1726 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id]; 1727 1728 if(num_parts == 1) 1729 { 1730 ps_search_node_tr = ps_search_node_tl; 1731 ps_search_node_bl = ps_search_node_tl; 1732 ps_search_node_br = ps_search_node_tl; 1733 } 1734 else if(num_parts == 2) 1735 { 1736 /* For vertically oriented partitions, tl, bl pt to same result */ 1737 /* For horizontally oriented partition, tl, tr pt to same result */ 1738 /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */ 1739 /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */ 1740 /* and right 2 8x8 have 12x16R partition */ 1741 if(gau1_is_vert_part[i4_part_type]) 1742 { 1743 ps_search_node_tr = 1744 ps_search_results 1745 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1]; 1746 ps_search_node_bl = ps_search_node_tl; 1747 } 1748 else 1749 { 1750 ps_search_node_tr = ps_search_node_tl; 1751 ps_search_node_bl = 1752 ps_search_results 1753 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1]; 1754 } 1755 ps_search_node_br = 1756 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1]; 1757 } 1758 else 1759 { 1760 /* 4 unique results */ 1761 ps_search_node_tr = 1762 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1]; 1763 ps_search_node_bl = 1764 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2]; 1765 ps_search_node_br = 1766 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3]; 1767 } 1768 1769 if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV) 1770 ps_search_node_tl++; 1771 if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV) 1772 ps_search_node_tr++; 1773 if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV) 1774 ps_search_node_bl++; 1775 if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV) 1776 ps_search_node_br++; 1777 1778 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0); 1779 ps_mv1++; 1780 pi1_ref_idx1++; 1781 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0); 1782 ps_mv2++; 1783 pi1_ref_idx2++; 1784 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0); 1785 ps_mv3++; 1786 pi1_ref_idx3++; 1787 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0); 1788 ps_mv4++; 1789 pi1_ref_idx4++; 1790 1791 if(ps_prms->i4_num_results_to_store > 1) 1792 { 1793 ps_search_node_tl = 1794 &ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id][1]; 1795 1796 if(num_parts == 1) 1797 { 1798 ps_search_node_tr = ps_search_node_tl; 1799 ps_search_node_bl = ps_search_node_tl; 1800 ps_search_node_br = ps_search_node_tl; 1801 } 1802 else if(num_parts == 2) 1803 { 1804 /* For vertically oriented partitions, tl, bl pt to same result */ 1805 /* For horizontally oriented partition, tl, tr pt to same result */ 1806 /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */ 1807 /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */ 1808 /* and right 2 8x8 have 12x16R partition */ 1809 if(gau1_is_vert_part[i4_part_type]) 1810 { 1811 ps_search_node_tr = 1812 &ps_search_results 1813 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1]; 1814 ps_search_node_bl = ps_search_node_tl; 1815 } 1816 else 1817 { 1818 ps_search_node_tr = ps_search_node_tl; 1819 ps_search_node_bl = 1820 &ps_search_results 1821 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1]; 1822 } 1823 ps_search_node_br = 1824 &ps_search_results 1825 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1]; 1826 } 1827 else 1828 { 1829 /* 4 unique results */ 1830 ps_search_node_tr = 1831 &ps_search_results 1832 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1]; 1833 ps_search_node_bl = 1834 &ps_search_results 1835 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2][1]; 1836 ps_search_node_br = 1837 &ps_search_results 1838 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3][1]; 1839 } 1840 1841 if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV) 1842 ps_search_node_tl++; 1843 if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV) 1844 ps_search_node_tr++; 1845 if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV) 1846 ps_search_node_bl++; 1847 if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV) 1848 ps_search_node_br++; 1849 1850 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0); 1851 ps_mv1++; 1852 pi1_ref_idx1++; 1853 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0); 1854 ps_mv2++; 1855 pi1_ref_idx2++; 1856 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0); 1857 ps_mv3++; 1858 pi1_ref_idx3++; 1859 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0); 1860 ps_mv4++; 1861 pi1_ref_idx4++; 1862 } 1863 } 1864 } 1865 } 1866 1867 /** 1868 ******************************************************************************** 1869 * @fn hme_update_mv_bank_noencode(search_results_t *ps_search_results, 1870 * layer_mv_t *ps_layer_mv, 1871 * S32 i4_search_blk_x, 1872 * S32 i4_search_blk_y, 1873 * mvbank_update_prms_t *ps_prms) 1874 * 1875 * @brief Updates the mv bank in case there is no further encodign to be done 1876 * 1877 * @param[in] ps_search_results: contains results for the block just searched 1878 * 1879 * @param[in,out] ps_layer_mv : Has pointer to mv bank amongst other things 1880 * 1881 * @param[in] i4_search_blk_x : col num of blk being searched 1882 * 1883 * @param[in] i4_search_blk_y : row num of blk being searched 1884 * 1885 * @param[in] ps_prms : contains certain parameters which govern how updatedone 1886 * 1887 * @return None 1888 ******************************************************************************** 1889 */ 1890 1891 void hme_update_mv_bank_in_l1_me( 1892 search_results_t *ps_search_results, 1893 layer_mv_t *ps_layer_mv, 1894 S32 i4_search_blk_x, 1895 S32 i4_search_blk_y, 1896 mvbank_update_prms_t *ps_prms) 1897 { 1898 hme_mv_t *ps_mv; 1899 hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4; 1900 S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4; 1901 S32 i4_blk_x, i4_blk_y, i4_offset; 1902 S32 i4_j, i4_ref_id; 1903 search_node_t *ps_search_node; 1904 search_node_t *ps_search_node_8x8, *ps_search_node_4x4; 1905 1906 i4_blk_x = i4_search_blk_x << ps_prms->i4_shift; 1907 i4_blk_y = i4_search_blk_y << ps_prms->i4_shift; 1908 i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row; 1909 1910 i4_offset *= ps_layer_mv->i4_num_mvs_per_blk; 1911 1912 /* Identify the correct offset in the mvbank and the reference id buf */ 1913 ps_mv = ps_layer_mv->ps_mv + i4_offset; 1914 pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset; 1915 1916 /*************************************************************************/ 1917 /* Supposing we store the mvs in the same blk size as we searched (e.g. */ 1918 /* we searched 8x8 blks and store results for 8x8 blks), then we can */ 1919 /* do a straightforward single update of results. This will have a 1-1 */ 1920 /* correspondence. */ 1921 /*************************************************************************/ 1922 if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size) 1923 { 1924 search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 2]; 1925 1926 hme_mv_t *ps_mv_l0_root = ps_mv; 1927 hme_mv_t *ps_mv_l1_root = 1928 ps_mv + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); 1929 1930 U32 u4_num_l0_results_updated = 0; 1931 U32 u4_num_l1_results_updated = 0; 1932 1933 S08 *pi1_ref_idx_l0_root = pi1_ref_idx; 1934 S08 *pi1_ref_idx_l1_root = 1935 pi1_ref_idx_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); 1936 1937 for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++) 1938 { 1939 U32 *pu4_num_results_updated; 1940 search_node_t **pps_result_nodes; 1941 1942 U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id]; 1943 1944 if(u1_pred_dir_of_cur_ref) 1945 { 1946 pu4_num_results_updated = &u4_num_l1_results_updated; 1947 pps_result_nodes = &aps_result_nodes_sorted[1][0]; 1948 } 1949 else 1950 { 1951 pu4_num_results_updated = &u4_num_l0_results_updated; 1952 pps_result_nodes = &aps_result_nodes_sorted[0][0]; 1953 } 1954 1955 ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N]; 1956 1957 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++) 1958 { 1959 hme_add_new_node_to_a_sorted_array( 1960 &ps_search_node[i4_j], pps_result_nodes, NULL, *pu4_num_results_updated, 0); 1961 1962 ASSERT(ps_search_node[i4_j].i1_ref_idx == i4_ref_id); 1963 (*pu4_num_results_updated)++; 1964 } 1965 } 1966 1967 for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++) 1968 { 1969 COPY_SEARCH_RESULT( 1970 &ps_mv_l0_root[i4_j], 1971 &pi1_ref_idx_l0_root[i4_j], 1972 aps_result_nodes_sorted[0][i4_j], 1973 0); 1974 } 1975 1976 for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++) 1977 { 1978 COPY_SEARCH_RESULT( 1979 &ps_mv_l1_root[i4_j], 1980 &pi1_ref_idx_l1_root[i4_j], 1981 aps_result_nodes_sorted[1][i4_j], 1982 0); 1983 } 1984 1985 return; 1986 } 1987 1988 /*************************************************************************/ 1989 /* Case where search blk size is 8x8, but we update 4x4 results. In this */ 1990 /* case, we need to have NxN partitions enabled in search. */ 1991 /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */ 1992 /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/ 1993 /*************************************************************************/ 1994 ASSERT(ps_layer_mv->e_blk_size == BLK_4x4); 1995 ASSERT(ps_prms->e_search_blk_size == BLK_8x8); 1996 ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN)); 1997 1998 /*************************************************************************/ 1999 /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */ 2000 /* hence the below check. */ 2001 /*************************************************************************/ 2002 ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1); 2003 2004 ps_mv1 = ps_mv; 2005 ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk; 2006 ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row); 2007 ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk); 2008 pi1_ref_idx1 = pi1_ref_idx; 2009 pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk; 2010 pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row); 2011 pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk); 2012 2013 { 2014 search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 4]; 2015 U08 au1_cost_shifts_for_sorted_node[2][MAX_NUM_REF * 4]; 2016 2017 S32 i; 2018 2019 hme_mv_t *ps_mv1_l0_root = ps_mv1; 2020 hme_mv_t *ps_mv1_l1_root = 2021 ps_mv1 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); 2022 hme_mv_t *ps_mv2_l0_root = ps_mv2; 2023 hme_mv_t *ps_mv2_l1_root = 2024 ps_mv2 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); 2025 hme_mv_t *ps_mv3_l0_root = ps_mv3; 2026 hme_mv_t *ps_mv3_l1_root = 2027 ps_mv3 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); 2028 hme_mv_t *ps_mv4_l0_root = ps_mv4; 2029 hme_mv_t *ps_mv4_l1_root = 2030 ps_mv4 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); 2031 2032 U32 u4_num_l0_results_updated = 0; 2033 U32 u4_num_l1_results_updated = 0; 2034 2035 S08 *pi1_ref_idx1_l0_root = pi1_ref_idx1; 2036 S08 *pi1_ref_idx1_l1_root = pi1_ref_idx1_l0_root + (ps_prms->i4_num_active_ref_l0 * 2037 ps_layer_mv->i4_num_mvs_per_ref); 2038 S08 *pi1_ref_idx2_l0_root = pi1_ref_idx2; 2039 S08 *pi1_ref_idx2_l1_root = pi1_ref_idx2_l0_root + (ps_prms->i4_num_active_ref_l0 * 2040 ps_layer_mv->i4_num_mvs_per_ref); 2041 S08 *pi1_ref_idx3_l0_root = pi1_ref_idx3; 2042 S08 *pi1_ref_idx3_l1_root = pi1_ref_idx3_l0_root + (ps_prms->i4_num_active_ref_l0 * 2043 ps_layer_mv->i4_num_mvs_per_ref); 2044 S08 *pi1_ref_idx4_l0_root = pi1_ref_idx4; 2045 S08 *pi1_ref_idx4_l1_root = pi1_ref_idx4_l0_root + (ps_prms->i4_num_active_ref_l0 * 2046 ps_layer_mv->i4_num_mvs_per_ref); 2047 2048 for(i = 0; i < 4; i++) 2049 { 2050 hme_mv_t *ps_mv_l0_root; 2051 hme_mv_t *ps_mv_l1_root; 2052 2053 S08 *pi1_ref_idx_l0_root; 2054 S08 *pi1_ref_idx_l1_root; 2055 2056 for(i4_ref_id = 0; i4_ref_id < ps_search_results->u1_num_active_ref; i4_ref_id++) 2057 { 2058 U32 *pu4_num_results_updated; 2059 search_node_t **pps_result_nodes; 2060 U08 *pu1_cost_shifts_for_sorted_node; 2061 2062 U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id]; 2063 2064 if(u1_pred_dir_of_cur_ref) 2065 { 2066 pu4_num_results_updated = &u4_num_l1_results_updated; 2067 pps_result_nodes = &aps_result_nodes_sorted[1][0]; 2068 pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0]; 2069 } 2070 else 2071 { 2072 pu4_num_results_updated = &u4_num_l0_results_updated; 2073 pps_result_nodes = &aps_result_nodes_sorted[0][0]; 2074 pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0]; 2075 } 2076 2077 ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N]; 2078 2079 ps_search_node_4x4 = 2080 ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL + i]; 2081 2082 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++) 2083 { 2084 hme_add_new_node_to_a_sorted_array( 2085 &ps_search_node_4x4[i4_j], 2086 pps_result_nodes, 2087 pu1_cost_shifts_for_sorted_node, 2088 *pu4_num_results_updated, 2089 0); 2090 2091 (*pu4_num_results_updated)++; 2092 2093 hme_add_new_node_to_a_sorted_array( 2094 &ps_search_node_8x8[i4_j], 2095 pps_result_nodes, 2096 pu1_cost_shifts_for_sorted_node, 2097 *pu4_num_results_updated, 2098 2); 2099 2100 (*pu4_num_results_updated)++; 2101 } 2102 } 2103 2104 switch(i) 2105 { 2106 case 0: 2107 { 2108 ps_mv_l0_root = ps_mv1_l0_root; 2109 ps_mv_l1_root = ps_mv1_l1_root; 2110 2111 pi1_ref_idx_l0_root = pi1_ref_idx1_l0_root; 2112 pi1_ref_idx_l1_root = pi1_ref_idx1_l1_root; 2113 2114 break; 2115 } 2116 case 1: 2117 { 2118 ps_mv_l0_root = ps_mv2_l0_root; 2119 ps_mv_l1_root = ps_mv2_l1_root; 2120 2121 pi1_ref_idx_l0_root = pi1_ref_idx2_l0_root; 2122 pi1_ref_idx_l1_root = pi1_ref_idx2_l1_root; 2123 2124 break; 2125 } 2126 case 2: 2127 { 2128 ps_mv_l0_root = ps_mv3_l0_root; 2129 ps_mv_l1_root = ps_mv3_l1_root; 2130 2131 pi1_ref_idx_l0_root = pi1_ref_idx3_l0_root; 2132 pi1_ref_idx_l1_root = pi1_ref_idx3_l1_root; 2133 2134 break; 2135 } 2136 case 3: 2137 { 2138 ps_mv_l0_root = ps_mv4_l0_root; 2139 ps_mv_l1_root = ps_mv4_l1_root; 2140 2141 pi1_ref_idx_l0_root = pi1_ref_idx4_l0_root; 2142 pi1_ref_idx_l1_root = pi1_ref_idx4_l1_root; 2143 2144 break; 2145 } 2146 } 2147 2148 u4_num_l0_results_updated = 2149 MIN((S32)u4_num_l0_results_updated, 2150 ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); 2151 2152 u4_num_l1_results_updated = 2153 MIN((S32)u4_num_l1_results_updated, 2154 ps_prms->i4_num_active_ref_l1 * ps_layer_mv->i4_num_mvs_per_ref); 2155 2156 for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++) 2157 { 2158 COPY_SEARCH_RESULT( 2159 &ps_mv_l0_root[i4_j], 2160 &pi1_ref_idx_l0_root[i4_j], 2161 aps_result_nodes_sorted[0][i4_j], 2162 0); 2163 } 2164 2165 for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++) 2166 { 2167 COPY_SEARCH_RESULT( 2168 &ps_mv_l1_root[i4_j], 2169 &pi1_ref_idx_l1_root[i4_j], 2170 aps_result_nodes_sorted[1][i4_j], 2171 0); 2172 } 2173 } 2174 } 2175 } 2176 2177 /** 2178 ****************************************************************************** 2179 * @brief Scales motion vector component projecte from a diff layer in same 2180 * picture (so no ref id related delta poc scaling required) 2181 ****************************************************************************** 2182 */ 2183 2184 #define SCALE_MV_COMP_RES(mvcomp_p, dim_c, dim_p) \ 2185 ((((mvcomp_p) * (dim_c)) + ((SIGN((mvcomp_p)) * (dim_p)) >> 1)) / (dim_p)) 2186 /** 2187 ******************************************************************************** 2188 * @fn hme_project_coloc_candt(search_node_t *ps_search_node, 2189 * layer_ctxt_t *ps_curr_layer, 2190 * layer_ctxt_t *ps_coarse_layer, 2191 * S32 i4_pos_x, 2192 * S32 i4_pos_y, 2193 * S08 i1_ref_id, 2194 * S08 i1_result_id) 2195 * 2196 * @brief From a coarser layer, projects a candidated situated at "colocated" 2197 * position in the picture (e.g. given x, y it will be x/2, y/2 dyadic 2198 * 2199 * @param[out] ps_search_node : contains the projected result 2200 * 2201 * @param[in] ps_curr_layer : current layer context 2202 * 2203 * @param[in] ps_coarse_layer : coarser layer context 2204 * 2205 * @param[in] i4_pos_x : x Position where mv is required (w.r.t. curr layer) 2206 * 2207 * @param[in] i4_pos_y : y Position where mv is required (w.r.t. curr layer) 2208 * 2209 * @param[in] i1_ref_id : reference id for which the candidate required 2210 * 2211 * @param[in] i4_result_id : result id for which the candidate required 2212 * (0 : best result, 1 : next best) 2213 * 2214 * @return None 2215 ******************************************************************************** 2216 */ 2217 2218 void hme_project_coloc_candt( 2219 search_node_t *ps_search_node, 2220 layer_ctxt_t *ps_curr_layer, 2221 layer_ctxt_t *ps_coarse_layer, 2222 S32 i4_pos_x, 2223 S32 i4_pos_y, 2224 S08 i1_ref_id, 2225 S32 i4_result_id) 2226 { 2227 S32 wd_c, ht_c, wd_p, ht_p; 2228 S32 blksize_p, blk_x, blk_y, i4_offset; 2229 layer_mv_t *ps_layer_mvbank; 2230 hme_mv_t *ps_mv; 2231 S08 *pi1_ref_idx; 2232 2233 /* Width and ht of current and prev layers */ 2234 wd_c = ps_curr_layer->i4_wd; 2235 ht_c = ps_curr_layer->i4_ht; 2236 wd_p = ps_coarse_layer->i4_wd; 2237 ht_p = ps_coarse_layer->i4_ht; 2238 2239 ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank; 2240 blksize_p = (S32)gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size]; 2241 2242 /* Safety check to avoid uninitialized access across temporal layers */ 2243 i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p)); 2244 i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p)); 2245 2246 /* Project the positions to prev layer */ 2247 /* TODO: convert these to scale factors at pic level */ 2248 blk_x = (i4_pos_x * wd_p) / (wd_c * blksize_p); 2249 blk_y = (i4_pos_y * ht_p) / (ht_c * blksize_p); 2250 2251 /* Pick up the mvs from the location */ 2252 i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk); 2253 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y); 2254 2255 ps_mv = ps_layer_mvbank->ps_mv + i4_offset; 2256 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; 2257 2258 ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref); 2259 pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref); 2260 2261 ps_search_node->s_mv.i2_mvx = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_x, wd_c, wd_p); 2262 ps_search_node->s_mv.i2_mvy = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_y, ht_c, ht_p); 2263 ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id]; 2264 ps_search_node->u1_subpel_done = 0; 2265 if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV)) 2266 { 2267 ps_search_node->i1_ref_idx = i1_ref_id; 2268 ps_search_node->s_mv.i2_mvx = 0; 2269 ps_search_node->s_mv.i2_mvy = 0; 2270 } 2271 } 2272 2273 /** 2274 ******************************************************************************** 2275 * @fn hme_project_coloc_candt_dyadic(search_node_t *ps_search_node, 2276 * layer_ctxt_t *ps_curr_layer, 2277 * layer_ctxt_t *ps_coarse_layer, 2278 * S32 i4_pos_x, 2279 * S32 i4_pos_y, 2280 * S08 i1_ref_id, 2281 * S08 i1_result_id) 2282 * 2283 * @brief From a coarser layer, projects a candidated situated at "colocated" 2284 * position in the picture when the ratios are dyadic 2285 * 2286 * @param[out] ps_search_node : contains the projected result 2287 * 2288 * @param[in] ps_curr_layer : current layer context 2289 * 2290 * @param[in] ps_coarse_layer : coarser layer context 2291 * 2292 * @param[in] i4_pos_x : x Position where mv is required (w.r.t. curr layer) 2293 * 2294 * @param[in] i4_pos_y : y Position where mv is required (w.r.t. curr layer) 2295 * 2296 * @param[in] i1_ref_id : reference id for which the candidate required 2297 * 2298 * @param[in] i4_result_id : result id for which the candidate required 2299 * (0 : best result, 1 : next best) 2300 * 2301 * @return None 2302 ******************************************************************************** 2303 */ 2304 2305 void hme_project_coloc_candt_dyadic( 2306 search_node_t *ps_search_node, 2307 layer_ctxt_t *ps_curr_layer, 2308 layer_ctxt_t *ps_coarse_layer, 2309 S32 i4_pos_x, 2310 S32 i4_pos_y, 2311 S08 i1_ref_id, 2312 S32 i4_result_id) 2313 { 2314 S32 wd_c, ht_c, wd_p, ht_p; 2315 S32 blksize_p, blk_x, blk_y, i4_offset; 2316 layer_mv_t *ps_layer_mvbank; 2317 hme_mv_t *ps_mv; 2318 S08 *pi1_ref_idx; 2319 2320 /* Width and ht of current and prev layers */ 2321 wd_c = ps_curr_layer->i4_wd; 2322 ht_c = ps_curr_layer->i4_ht; 2323 wd_p = ps_coarse_layer->i4_wd; 2324 ht_p = ps_coarse_layer->i4_ht; 2325 2326 ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank; 2327 /* blksize_p = log2(wd) + 1 */ 2328 blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size]; 2329 2330 /* ASSERT for valid sizes */ 2331 ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5)); 2332 2333 /* Safety check to avoid uninitialized access across temporal layers */ 2334 i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p)); 2335 i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p)); 2336 2337 /* Project the positions to prev layer */ 2338 /* TODO: convert these to scale factors at pic level */ 2339 blk_x = i4_pos_x >> blksize_p; // (2 * blksize_p); 2340 blk_y = i4_pos_y >> blksize_p; // (2 * blksize_p); 2341 2342 /* Pick up the mvs from the location */ 2343 i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk); 2344 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y); 2345 2346 ps_mv = ps_layer_mvbank->ps_mv + i4_offset; 2347 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; 2348 2349 ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref); 2350 pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref); 2351 2352 ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1; 2353 ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1; 2354 ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id]; 2355 if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV)) 2356 { 2357 ps_search_node->i1_ref_idx = i1_ref_id; 2358 ps_search_node->s_mv.i2_mvx = 0; 2359 ps_search_node->s_mv.i2_mvy = 0; 2360 } 2361 } 2362 2363 void hme_project_coloc_candt_dyadic_implicit( 2364 search_node_t *ps_search_node, 2365 layer_ctxt_t *ps_curr_layer, 2366 layer_ctxt_t *ps_coarse_layer, 2367 S32 i4_pos_x, 2368 S32 i4_pos_y, 2369 S32 i4_num_act_ref_l0, 2370 U08 u1_pred_dir, 2371 U08 u1_default_ref_id, 2372 S32 i4_result_id) 2373 { 2374 S32 wd_c, ht_c, wd_p, ht_p; 2375 S32 blksize_p, blk_x, blk_y, i4_offset; 2376 layer_mv_t *ps_layer_mvbank; 2377 hme_mv_t *ps_mv; 2378 S08 *pi1_ref_idx; 2379 2380 /* Width and ht of current and prev layers */ 2381 wd_c = ps_curr_layer->i4_wd; 2382 ht_c = ps_curr_layer->i4_ht; 2383 wd_p = ps_coarse_layer->i4_wd; 2384 ht_p = ps_coarse_layer->i4_ht; 2385 2386 ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank; 2387 blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size]; 2388 2389 /* ASSERT for valid sizes */ 2390 ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5)); 2391 2392 /* Safety check to avoid uninitialized access across temporal layers */ 2393 i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p)); 2394 i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p)); 2395 /* Project the positions to prev layer */ 2396 /* TODO: convert these to scale factors at pic level */ 2397 blk_x = i4_pos_x >> blksize_p; // (2 * blksize_p); 2398 blk_y = i4_pos_y >> blksize_p; // (2 * blksize_p); 2399 2400 /* Pick up the mvs from the location */ 2401 i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk); 2402 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y); 2403 2404 ps_mv = ps_layer_mvbank->ps_mv + i4_offset; 2405 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; 2406 2407 if(u1_pred_dir == 1) 2408 { 2409 ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref); 2410 pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref); 2411 } 2412 2413 ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1; 2414 ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1; 2415 ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id]; 2416 if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV)) 2417 { 2418 ps_search_node->i1_ref_idx = u1_default_ref_id; 2419 ps_search_node->s_mv.i2_mvx = 0; 2420 ps_search_node->s_mv.i2_mvy = 0; 2421 } 2422 } 2423 2424 #define SCALE_RANGE_PRMS(prm1, prm2, shift) \ 2425 { \ 2426 prm1.i2_min_x = prm2.i2_min_x << shift; \ 2427 prm1.i2_max_x = prm2.i2_max_x << shift; \ 2428 prm1.i2_min_y = prm2.i2_min_y << shift; \ 2429 prm1.i2_max_y = prm2.i2_max_y << shift; \ 2430 } 2431 2432 #define SCALE_RANGE_PRMS_POINTERS(prm1, prm2, shift) \ 2433 { \ 2434 prm1->i2_min_x = prm2->i2_min_x << shift; \ 2435 prm1->i2_max_x = prm2->i2_max_x << shift; \ 2436 prm1->i2_min_y = prm2->i2_min_y << shift; \ 2437 prm1->i2_max_y = prm2->i2_max_y << shift; \ 2438 } 2439 2440 /** 2441 ******************************************************************************** 2442 * @fn void hme_refine_frm_init(me_ctxt_t *ps_ctxt, 2443 * refine_layer_prms_t *ps_refine_prms) 2444 * 2445 * @brief Frame init of refinemnet layers in ME 2446 * 2447 * @param[in,out] ps_ctxt: ME Handle 2448 * 2449 * @param[in] ps_refine_prms : refinement layer prms 2450 * 2451 * @return None 2452 ******************************************************************************** 2453 */ 2454 void hme_refine_frm_init( 2455 layer_ctxt_t *ps_curr_layer, refine_prms_t *ps_refine_prms, layer_ctxt_t *ps_coarse_layer) 2456 { 2457 /* local variables */ 2458 BLK_SIZE_T e_result_blk_size = BLK_8x8; 2459 S32 i4_num_ref_fpel, i4_num_ref_prev_layer; 2460 2461 i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref; 2462 2463 if(ps_refine_prms->explicit_ref) 2464 { 2465 i4_num_ref_fpel = i4_num_ref_prev_layer; 2466 } 2467 else 2468 { 2469 i4_num_ref_fpel = 2; 2470 } 2471 2472 if(ps_refine_prms->i4_enable_4x4_part) 2473 { 2474 e_result_blk_size = BLK_4x4; 2475 } 2476 2477 i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer); 2478 2479 hme_init_mv_bank( 2480 ps_curr_layer, 2481 e_result_blk_size, 2482 i4_num_ref_fpel, 2483 ps_refine_prms->i4_num_mvbank_results, 2484 ps_refine_prms->i4_layer_id > 0 ? 0 : 1); 2485 } 2486 2487 #if 1 //ENABLE_CU_RECURSION || TEST_AND_EVALUATE_CU_RECURSION 2488 /** 2489 ******************************************************************************** 2490 * @fn void hme_init_clusters_16x16 2491 * ( 2492 * cluster_16x16_blk_t *ps_cluster_blk_16x16 2493 * ) 2494 * 2495 * @brief Intialisations for the structs used in clustering algorithm 2496 * 2497 * @param[in/out] ps_cluster_blk_16x16: pointer to structure containing clusters 2498 * of 16x16 block 2499 * 2500 * @return None 2501 ******************************************************************************** 2502 */ 2503 static __inline void 2504 hme_init_clusters_16x16(cluster_16x16_blk_t *ps_cluster_blk_16x16, S32 bidir_enabled) 2505 { 2506 S32 i; 2507 2508 ps_cluster_blk_16x16->num_clusters = 0; 2509 ps_cluster_blk_16x16->intra_mv_area = 0; 2510 ps_cluster_blk_16x16->best_inter_cost = 0; 2511 2512 for(i = 0; i < MAX_NUM_CLUSTERS_16x16; i++) 2513 { 2514 ps_cluster_blk_16x16->as_cluster_data[i].max_dist_from_centroid = 2515 bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_16x16_B : MAX_DISTANCE_FROM_CENTROID_16x16; 2516 2517 ps_cluster_blk_16x16->as_cluster_data[i].is_valid_cluster = 0; 2518 2519 ps_cluster_blk_16x16->as_cluster_data[i].bi_mv_pixel_area = 0; 2520 ps_cluster_blk_16x16->as_cluster_data[i].uni_mv_pixel_area = 0; 2521 } 2522 for(i = 0; i < MAX_NUM_REF; i++) 2523 { 2524 ps_cluster_blk_16x16->au1_num_clusters[i] = 0; 2525 } 2526 } 2527 2528 /** 2529 ******************************************************************************** 2530 * @fn void hme_init_clusters_32x32 2531 * ( 2532 * cluster_32x32_blk_t *ps_cluster_blk_32x32 2533 * ) 2534 * 2535 * @brief Intialisations for the structs used in clustering algorithm 2536 * 2537 * @param[in/out] ps_cluster_blk_32x32: pointer to structure containing clusters 2538 * of 32x32 block 2539 * 2540 * @return None 2541 ******************************************************************************** 2542 */ 2543 static __inline void 2544 hme_init_clusters_32x32(cluster_32x32_blk_t *ps_cluster_blk_32x32, S32 bidir_enabled) 2545 { 2546 S32 i; 2547 2548 ps_cluster_blk_32x32->num_clusters = 0; 2549 ps_cluster_blk_32x32->intra_mv_area = 0; 2550 ps_cluster_blk_32x32->best_alt_ref = -1; 2551 ps_cluster_blk_32x32->best_uni_ref = -1; 2552 ps_cluster_blk_32x32->best_inter_cost = 0; 2553 ps_cluster_blk_32x32->num_clusters_with_weak_sdi_density = 0; 2554 2555 for(i = 0; i < MAX_NUM_CLUSTERS_32x32; i++) 2556 { 2557 ps_cluster_blk_32x32->as_cluster_data[i].max_dist_from_centroid = 2558 bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_32x32_B : MAX_DISTANCE_FROM_CENTROID_32x32; 2559 ps_cluster_blk_32x32->as_cluster_data[i].is_valid_cluster = 0; 2560 2561 ps_cluster_blk_32x32->as_cluster_data[i].bi_mv_pixel_area = 0; 2562 ps_cluster_blk_32x32->as_cluster_data[i].uni_mv_pixel_area = 0; 2563 } 2564 for(i = 0; i < MAX_NUM_REF; i++) 2565 { 2566 ps_cluster_blk_32x32->au1_num_clusters[i] = 0; 2567 } 2568 } 2569 2570 /** 2571 ******************************************************************************** 2572 * @fn void hme_init_clusters_64x64 2573 * ( 2574 * cluster_64x64_blk_t *ps_cluster_blk_64x64 2575 * ) 2576 * 2577 * @brief Intialisations for the structs used in clustering algorithm 2578 * 2579 * @param[in/out] ps_cluster_blk_64x64: pointer to structure containing clusters 2580 * of 64x64 block 2581 * 2582 * @return None 2583 ******************************************************************************** 2584 */ 2585 static __inline void 2586 hme_init_clusters_64x64(cluster_64x64_blk_t *ps_cluster_blk_64x64, S32 bidir_enabled) 2587 { 2588 S32 i; 2589 2590 ps_cluster_blk_64x64->num_clusters = 0; 2591 ps_cluster_blk_64x64->intra_mv_area = 0; 2592 ps_cluster_blk_64x64->best_alt_ref = -1; 2593 ps_cluster_blk_64x64->best_uni_ref = -1; 2594 ps_cluster_blk_64x64->best_inter_cost = 0; 2595 2596 for(i = 0; i < MAX_NUM_CLUSTERS_64x64; i++) 2597 { 2598 ps_cluster_blk_64x64->as_cluster_data[i].max_dist_from_centroid = 2599 bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_64x64_B : MAX_DISTANCE_FROM_CENTROID_64x64; 2600 ps_cluster_blk_64x64->as_cluster_data[i].is_valid_cluster = 0; 2601 2602 ps_cluster_blk_64x64->as_cluster_data[i].bi_mv_pixel_area = 0; 2603 ps_cluster_blk_64x64->as_cluster_data[i].uni_mv_pixel_area = 0; 2604 } 2605 for(i = 0; i < MAX_NUM_REF; i++) 2606 { 2607 ps_cluster_blk_64x64->au1_num_clusters[i] = 0; 2608 } 2609 } 2610 2611 /** 2612 ******************************************************************************** 2613 * @fn void hme_sort_and_assign_top_ref_ids_areawise 2614 * ( 2615 * ctb_cluster_info_t *ps_ctb_cluster_info 2616 * ) 2617 * 2618 * @brief Finds best_uni_ref and best_alt_ref 2619 * 2620 * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data 2621 * 2622 * @param[in] bidir_enabled: flag that indicates whether or not bi-pred is 2623 * enabled 2624 * 2625 * @param[in] block_width: width of the block in pels 2626 * 2627 * @param[in] e_cu_pos: position of the block within the CTB 2628 * 2629 * @return None 2630 ******************************************************************************** 2631 */ 2632 void hme_sort_and_assign_top_ref_ids_areawise( 2633 ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width, CU_POS_T e_cu_pos) 2634 { 2635 cluster_32x32_blk_t *ps_32x32 = NULL; 2636 cluster_64x64_blk_t *ps_64x64 = NULL; 2637 cluster_data_t *ps_data; 2638 2639 S32 j, k; 2640 2641 S32 ai4_uni_area[MAX_NUM_REF]; 2642 S32 ai4_bi_area[MAX_NUM_REF]; 2643 S32 ai4_ref_id_found[MAX_NUM_REF]; 2644 S32 ai4_ref_id[MAX_NUM_REF]; 2645 2646 S32 best_uni_ref = -1, best_alt_ref = -1; 2647 S32 num_clusters; 2648 S32 num_ref = 0; 2649 S32 num_clusters_evaluated = 0; 2650 S32 is_cur_blk_valid; 2651 2652 if(32 == block_width) 2653 { 2654 is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << e_cu_pos)) || 0; 2655 ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cu_pos]; 2656 num_clusters = ps_32x32->num_clusters; 2657 ps_data = &ps_32x32->as_cluster_data[0]; 2658 } 2659 else 2660 { 2661 is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask == 0xf); 2662 ps_64x64 = ps_ctb_cluster_info->ps_64x64_blk; 2663 num_clusters = ps_64x64->num_clusters; 2664 ps_data = &ps_64x64->as_cluster_data[0]; 2665 } 2666 2667 #if !ENABLE_4CTB_EVALUATION 2668 if((num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)) 2669 { 2670 return; 2671 } 2672 #endif 2673 if(num_clusters == 0) 2674 { 2675 return; 2676 } 2677 else if(!is_cur_blk_valid) 2678 { 2679 return; 2680 } 2681 2682 memset(ai4_uni_area, 0, sizeof(S32) * MAX_NUM_REF); 2683 memset(ai4_bi_area, 0, sizeof(S32) * MAX_NUM_REF); 2684 memset(ai4_ref_id_found, 0, sizeof(S32) * MAX_NUM_REF); 2685 memset(ai4_ref_id, -1, sizeof(S32) * MAX_NUM_REF); 2686 2687 for(j = 0; num_clusters_evaluated < num_clusters; j++, ps_data++) 2688 { 2689 S32 ref_id; 2690 2691 if(!ps_data->is_valid_cluster) 2692 { 2693 continue; 2694 } 2695 2696 ref_id = ps_data->ref_id; 2697 2698 num_clusters_evaluated++; 2699 2700 ai4_uni_area[ref_id] += ps_data->uni_mv_pixel_area; 2701 ai4_bi_area[ref_id] += ps_data->bi_mv_pixel_area; 2702 2703 if(!ai4_ref_id_found[ref_id]) 2704 { 2705 ai4_ref_id[ref_id] = ref_id; 2706 ai4_ref_id_found[ref_id] = 1; 2707 num_ref++; 2708 } 2709 } 2710 2711 { 2712 S32 ai4_ref_id_temp[MAX_NUM_REF]; 2713 2714 memcpy(ai4_ref_id_temp, ai4_ref_id, sizeof(S32) * MAX_NUM_REF); 2715 2716 for(k = 1; k < MAX_NUM_REF; k++) 2717 { 2718 if(ai4_uni_area[k] > ai4_uni_area[0]) 2719 { 2720 SWAP_HME(ai4_uni_area[k], ai4_uni_area[0], S32); 2721 SWAP_HME(ai4_ref_id_temp[k], ai4_ref_id_temp[0], S32); 2722 } 2723 } 2724 2725 best_uni_ref = ai4_ref_id_temp[0]; 2726 } 2727 2728 if(bidir_enabled) 2729 { 2730 for(k = 1; k < MAX_NUM_REF; k++) 2731 { 2732 if(ai4_bi_area[k] > ai4_bi_area[0]) 2733 { 2734 SWAP_HME(ai4_bi_area[k], ai4_bi_area[0], S32); 2735 SWAP_HME(ai4_ref_id[k], ai4_ref_id[0], S32); 2736 } 2737 } 2738 2739 if(!ai4_bi_area[0]) 2740 { 2741 best_alt_ref = -1; 2742 2743 if(32 == block_width) 2744 { 2745 SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref); 2746 } 2747 else 2748 { 2749 SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref); 2750 } 2751 2752 return; 2753 } 2754 2755 if(best_uni_ref == ai4_ref_id[0]) 2756 { 2757 for(k = 2; k < MAX_NUM_REF; k++) 2758 { 2759 if(ai4_bi_area[k] > ai4_bi_area[1]) 2760 { 2761 SWAP_HME(ai4_bi_area[k], ai4_bi_area[1], S32); 2762 SWAP_HME(ai4_ref_id[k], ai4_ref_id[1], S32); 2763 } 2764 } 2765 2766 best_alt_ref = ai4_ref_id[1]; 2767 } 2768 else 2769 { 2770 best_alt_ref = ai4_ref_id[0]; 2771 } 2772 } 2773 2774 if(32 == block_width) 2775 { 2776 SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref); 2777 } 2778 else 2779 { 2780 SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref); 2781 } 2782 } 2783 2784 /** 2785 ******************************************************************************** 2786 * @fn void hme_find_top_ref_ids 2787 * ( 2788 * ctb_cluster_info_t *ps_ctb_cluster_info 2789 * ) 2790 * 2791 * @brief Finds best_uni_ref and best_alt_ref 2792 * 2793 * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data 2794 * 2795 * @return None 2796 ******************************************************************************** 2797 */ 2798 void hme_find_top_ref_ids( 2799 ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width) 2800 { 2801 S32 i; 2802 2803 if(32 == block_width) 2804 { 2805 for(i = 0; i < 4; i++) 2806 { 2807 hme_sort_and_assign_top_ref_ids_areawise( 2808 ps_ctb_cluster_info, bidir_enabled, block_width, (CU_POS_T)i); 2809 } 2810 } 2811 else if(64 == block_width) 2812 { 2813 hme_sort_and_assign_top_ref_ids_areawise( 2814 ps_ctb_cluster_info, bidir_enabled, block_width, POS_NA); 2815 } 2816 } 2817 2818 /** 2819 ******************************************************************************** 2820 * @fn void hme_boot_out_outlier 2821 * ( 2822 * ctb_cluster_info_t *ps_ctb_cluster_info 2823 * ) 2824 * 2825 * @brief Removes outlier clusters before CU tree population 2826 * 2827 * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data 2828 * 2829 * @return None 2830 ******************************************************************************** 2831 */ 2832 void hme_boot_out_outlier(ctb_cluster_info_t *ps_ctb_cluster_info, S32 blk_width) 2833 { 2834 cluster_32x32_blk_t *ps_32x32; 2835 2836 S32 i; 2837 2838 cluster_64x64_blk_t *ps_64x64 = &ps_ctb_cluster_info->ps_64x64_blk[0]; 2839 2840 S32 sdi_threshold = ps_ctb_cluster_info->sdi_threshold; 2841 2842 if(32 == blk_width) 2843 { 2844 /* 32x32 clusters */ 2845 for(i = 0; i < 4; i++) 2846 { 2847 ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i]; 2848 2849 if(ps_32x32->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX) 2850 { 2851 BUMP_OUTLIER_CLUSTERS(ps_32x32, sdi_threshold); 2852 } 2853 } 2854 } 2855 else if(64 == blk_width) 2856 { 2857 /* 64x64 clusters */ 2858 if(ps_64x64->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX) 2859 { 2860 BUMP_OUTLIER_CLUSTERS(ps_64x64, sdi_threshold); 2861 } 2862 } 2863 } 2864 2865 /** 2866 ******************************************************************************** 2867 * @fn void hme_update_cluster_attributes 2868 * ( 2869 * cluster_data_t *ps_cluster_data, 2870 * S32 mvx, 2871 * S32 mvy, 2872 * PART_ID_T e_part_id 2873 * ) 2874 * 2875 * @brief Implementation fo the clustering algorithm 2876 * 2877 * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct 2878 * 2879 * @param[in] mvx : x co-ordinate of the motion vector 2880 * 2881 * @param[in] mvy : y co-ordinate of the motion vector 2882 * 2883 * @param[in] ref_idx : ref_id of the motion vector 2884 * 2885 * @param[in] e_part_id : partition id of the motion vector 2886 * 2887 * @return None 2888 ******************************************************************************** 2889 */ 2890 static __inline void hme_update_cluster_attributes( 2891 cluster_data_t *ps_cluster_data, 2892 S32 mvx, 2893 S32 mvy, 2894 S32 mvdx, 2895 S32 mvdy, 2896 S32 ref_id, 2897 S32 sdi, 2898 U08 is_part_of_bi, 2899 PART_ID_T e_part_id) 2900 { 2901 LWORD64 i8_mvx_sum_q8; 2902 LWORD64 i8_mvy_sum_q8; 2903 2904 S32 centroid_posx_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8; 2905 S32 centroid_posy_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8; 2906 2907 if((mvdx > 0) && (ps_cluster_data->min_x > mvx)) 2908 { 2909 ps_cluster_data->min_x = mvx; 2910 } 2911 else if((mvdx < 0) && (ps_cluster_data->max_x < mvx)) 2912 { 2913 ps_cluster_data->max_x = mvx; 2914 } 2915 2916 if((mvdy > 0) && (ps_cluster_data->min_y > mvy)) 2917 { 2918 ps_cluster_data->min_y = mvy; 2919 } 2920 else if((mvdy < 0) && (ps_cluster_data->max_y < mvy)) 2921 { 2922 ps_cluster_data->max_y = mvy; 2923 } 2924 2925 { 2926 S32 num_mvs = ps_cluster_data->num_mvs; 2927 2928 ps_cluster_data->as_mv[num_mvs].pixel_count = gai4_partition_area[e_part_id]; 2929 ps_cluster_data->as_mv[num_mvs].mvx = mvx; 2930 ps_cluster_data->as_mv[num_mvs].mvy = mvy; 2931 2932 /***************************/ 2933 ps_cluster_data->as_mv[num_mvs].is_uni = !is_part_of_bi; 2934 ps_cluster_data->as_mv[num_mvs].sdi = sdi; 2935 /**************************/ 2936 } 2937 2938 /* Updation of centroid */ 2939 { 2940 i8_mvx_sum_q8 = (LWORD64)centroid_posx_q8 * ps_cluster_data->num_mvs + (mvx << 8); 2941 i8_mvy_sum_q8 = (LWORD64)centroid_posy_q8 * ps_cluster_data->num_mvs + (mvy << 8); 2942 2943 ps_cluster_data->num_mvs++; 2944 2945 ps_cluster_data->s_centroid.i4_pos_x_q8 = 2946 (WORD32)((i8_mvx_sum_q8) / ps_cluster_data->num_mvs); 2947 ps_cluster_data->s_centroid.i4_pos_y_q8 = 2948 (WORD32)((i8_mvy_sum_q8) / ps_cluster_data->num_mvs); 2949 } 2950 2951 ps_cluster_data->area_in_pixels += gai4_partition_area[e_part_id]; 2952 2953 if(is_part_of_bi) 2954 { 2955 ps_cluster_data->bi_mv_pixel_area += gai4_partition_area[e_part_id]; 2956 } 2957 else 2958 { 2959 ps_cluster_data->uni_mv_pixel_area += gai4_partition_area[e_part_id]; 2960 } 2961 } 2962 2963 /** 2964 ******************************************************************************** 2965 * @fn void hme_try_cluster_merge 2966 * ( 2967 * cluster_data_t *ps_cluster_data, 2968 * S32 *pi4_num_clusters, 2969 * S32 idx_of_updated_cluster 2970 * ) 2971 * 2972 * @brief Implementation fo the clustering algorithm 2973 * 2974 * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct 2975 * 2976 * @param[in/out] pi4_num_clusters : pointer to number of clusters 2977 * 2978 * @param[in] idx_of_updated_cluster : index of the cluster most recently 2979 * updated 2980 * 2981 * @return Nothing 2982 ******************************************************************************** 2983 */ 2984 void hme_try_cluster_merge( 2985 cluster_data_t *ps_cluster_data, U08 *pu1_num_clusters, S32 idx_of_updated_cluster) 2986 { 2987 centroid_t *ps_centroid; 2988 2989 S32 cur_pos_x_q8; 2990 S32 cur_pos_y_q8; 2991 S32 i; 2992 S32 max_dist_from_centroid; 2993 S32 mvd; 2994 S32 mvdx_q8; 2995 S32 mvdx; 2996 S32 mvdy_q8; 2997 S32 mvdy; 2998 S32 num_clusters, num_clusters_evaluated; 2999 S32 other_pos_x_q8; 3000 S32 other_pos_y_q8; 3001 3002 cluster_data_t *ps_root = ps_cluster_data; 3003 cluster_data_t *ps_cur_cluster = &ps_cluster_data[idx_of_updated_cluster]; 3004 centroid_t *ps_cur_centroid = &ps_cur_cluster->s_centroid; 3005 3006 /* Merge is superfluous if num_clusters is 1 */ 3007 if(*pu1_num_clusters == 1) 3008 { 3009 return; 3010 } 3011 3012 cur_pos_x_q8 = ps_cur_centroid->i4_pos_x_q8; 3013 cur_pos_y_q8 = ps_cur_centroid->i4_pos_y_q8; 3014 3015 max_dist_from_centroid = ps_cur_cluster->max_dist_from_centroid; 3016 3017 num_clusters = *pu1_num_clusters; 3018 num_clusters_evaluated = 0; 3019 3020 for(i = 0; num_clusters_evaluated < num_clusters; i++, ps_cluster_data++) 3021 { 3022 if(!ps_cluster_data->is_valid_cluster) 3023 { 3024 continue; 3025 } 3026 if((ps_cluster_data->ref_id != ps_cur_cluster->ref_id) || (i == idx_of_updated_cluster)) 3027 { 3028 num_clusters_evaluated++; 3029 continue; 3030 } 3031 3032 ps_centroid = &ps_cluster_data->s_centroid; 3033 3034 other_pos_x_q8 = ps_centroid->i4_pos_x_q8; 3035 other_pos_y_q8 = ps_centroid->i4_pos_y_q8; 3036 3037 mvdx_q8 = (cur_pos_x_q8 - other_pos_x_q8); 3038 mvdy_q8 = (cur_pos_y_q8 - other_pos_y_q8); 3039 mvdx = (mvdx_q8 + (1 << 7)) >> 8; 3040 mvdy = (mvdy_q8 + (1 << 7)) >> 8; 3041 3042 mvd = ABS(mvdx) + ABS(mvdy); 3043 3044 if(mvd <= (max_dist_from_centroid >> 1)) 3045 { 3046 /* 0 => no updates */ 3047 /* 1 => min updated */ 3048 /* 2 => max updated */ 3049 S32 minmax_x_update_id; 3050 S32 minmax_y_update_id; 3051 3052 LWORD64 i8_mv_x_sum_self = (LWORD64)cur_pos_x_q8 * ps_cur_cluster->num_mvs; 3053 LWORD64 i8_mv_y_sum_self = (LWORD64)cur_pos_y_q8 * ps_cur_cluster->num_mvs; 3054 LWORD64 i8_mv_x_sum_cousin = (LWORD64)other_pos_x_q8 * ps_cluster_data->num_mvs; 3055 LWORD64 i8_mv_y_sum_cousin = (LWORD64)other_pos_y_q8 * ps_cluster_data->num_mvs; 3056 3057 (*pu1_num_clusters)--; 3058 3059 ps_cluster_data->is_valid_cluster = 0; 3060 3061 memcpy( 3062 &ps_cur_cluster->as_mv[ps_cur_cluster->num_mvs], 3063 ps_cluster_data->as_mv, 3064 sizeof(mv_data_t) * ps_cluster_data->num_mvs); 3065 3066 ps_cur_cluster->num_mvs += ps_cluster_data->num_mvs; 3067 ps_cur_cluster->area_in_pixels += ps_cluster_data->area_in_pixels; 3068 ps_cur_cluster->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; 3069 ps_cur_cluster->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; 3070 i8_mv_x_sum_self += i8_mv_x_sum_cousin; 3071 i8_mv_y_sum_self += i8_mv_y_sum_cousin; 3072 3073 ps_cur_centroid->i4_pos_x_q8 = (WORD32)(i8_mv_x_sum_self / ps_cur_cluster->num_mvs); 3074 ps_cur_centroid->i4_pos_y_q8 = (WORD32)(i8_mv_y_sum_self / ps_cur_cluster->num_mvs); 3075 3076 minmax_x_update_id = (ps_cur_cluster->min_x < ps_cluster_data->min_x) 3077 ? ((ps_cur_cluster->max_x > ps_cluster_data->max_x) ? 0 : 2) 3078 : 1; 3079 minmax_y_update_id = (ps_cur_cluster->min_y < ps_cluster_data->min_y) 3080 ? ((ps_cur_cluster->max_y > ps_cluster_data->max_y) ? 0 : 2) 3081 : 1; 3082 3083 /* Updation of centroid spread */ 3084 switch(minmax_x_update_id + (minmax_y_update_id << 2)) 3085 { 3086 case 1: 3087 { 3088 S32 mvd, mvd_q8; 3089 3090 ps_cur_cluster->min_x = ps_cluster_data->min_x; 3091 3092 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8); 3093 mvd = (mvd_q8 + (1 << 7)) >> 8; 3094 3095 if(mvd > (max_dist_from_centroid)) 3096 { 3097 ps_cluster_data->max_dist_from_centroid = mvd; 3098 } 3099 break; 3100 } 3101 case 2: 3102 { 3103 S32 mvd, mvd_q8; 3104 3105 ps_cur_cluster->max_x = ps_cluster_data->max_x; 3106 3107 mvd_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8; 3108 mvd = (mvd_q8 + (1 << 7)) >> 8; 3109 3110 if(mvd > (max_dist_from_centroid)) 3111 { 3112 ps_cluster_data->max_dist_from_centroid = mvd; 3113 } 3114 break; 3115 } 3116 case 4: 3117 { 3118 S32 mvd, mvd_q8; 3119 3120 ps_cur_cluster->min_y = ps_cluster_data->min_y; 3121 3122 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8); 3123 mvd = (mvd_q8 + (1 << 7)) >> 8; 3124 3125 if(mvd > (max_dist_from_centroid)) 3126 { 3127 ps_cluster_data->max_dist_from_centroid = mvd; 3128 } 3129 break; 3130 } 3131 case 5: 3132 { 3133 S32 mvd; 3134 S32 mvdx, mvdx_q8; 3135 S32 mvdy, mvdy_q8; 3136 3137 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8); 3138 mvdy = (mvdy_q8 + (1 << 7)) >> 8; 3139 3140 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8); 3141 mvdx = (mvdx_q8 + (1 << 7)) >> 8; 3142 3143 mvd = (mvdx > mvdy) ? mvdx : mvdy; 3144 3145 ps_cur_cluster->min_x = ps_cluster_data->min_x; 3146 ps_cur_cluster->min_y = ps_cluster_data->min_y; 3147 3148 if(mvd > max_dist_from_centroid) 3149 { 3150 ps_cluster_data->max_dist_from_centroid = mvd; 3151 } 3152 break; 3153 } 3154 case 6: 3155 { 3156 S32 mvd; 3157 S32 mvdx, mvdx_q8; 3158 S32 mvdy, mvdy_q8; 3159 3160 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8); 3161 mvdy = (mvdy_q8 + (1 << 7)) >> 8; 3162 3163 mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8; 3164 mvdx = (mvdx_q8 + (1 << 7)) >> 8; 3165 3166 mvd = (mvdx > mvdy) ? mvdx : mvdy; 3167 3168 ps_cur_cluster->max_x = ps_cluster_data->max_x; 3169 ps_cur_cluster->min_y = ps_cluster_data->min_y; 3170 3171 if(mvd > max_dist_from_centroid) 3172 { 3173 ps_cluster_data->max_dist_from_centroid = mvd; 3174 } 3175 break; 3176 } 3177 case 8: 3178 { 3179 S32 mvd, mvd_q8; 3180 3181 ps_cur_cluster->max_y = ps_cluster_data->max_y; 3182 3183 mvd_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8; 3184 mvd = (mvd_q8 + (1 << 7)) >> 8; 3185 3186 if(mvd > (max_dist_from_centroid)) 3187 { 3188 ps_cluster_data->max_dist_from_centroid = mvd; 3189 } 3190 break; 3191 } 3192 case 9: 3193 { 3194 S32 mvd; 3195 S32 mvdx, mvdx_q8; 3196 S32 mvdy, mvdy_q8; 3197 3198 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8); 3199 mvdx = (mvdx_q8 + (1 << 7)) >> 8; 3200 3201 mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8; 3202 mvdy = (mvdy_q8 + (1 << 7)) >> 8; 3203 3204 mvd = (mvdx > mvdy) ? mvdx : mvdy; 3205 3206 ps_cur_cluster->min_x = ps_cluster_data->min_x; 3207 ps_cur_cluster->max_y = ps_cluster_data->max_y; 3208 3209 if(mvd > max_dist_from_centroid) 3210 { 3211 ps_cluster_data->max_dist_from_centroid = mvd; 3212 } 3213 break; 3214 } 3215 case 10: 3216 { 3217 S32 mvd; 3218 S32 mvdx, mvdx_q8; 3219 S32 mvdy, mvdy_q8; 3220 3221 mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8; 3222 mvdx = (mvdx_q8 + (1 << 7)) >> 8; 3223 3224 mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8; 3225 mvdy = (mvdy_q8 + (1 << 7)) >> 8; 3226 3227 mvd = (mvdx > mvdy) ? mvdx : mvdy; 3228 3229 ps_cur_cluster->max_x = ps_cluster_data->max_x; 3230 ps_cur_cluster->max_y = ps_cluster_data->max_y; 3231 3232 if(mvd > ps_cluster_data->max_dist_from_centroid) 3233 { 3234 ps_cluster_data->max_dist_from_centroid = mvd; 3235 } 3236 break; 3237 } 3238 default: 3239 { 3240 break; 3241 } 3242 } 3243 3244 hme_try_cluster_merge(ps_root, pu1_num_clusters, idx_of_updated_cluster); 3245 3246 return; 3247 } 3248 3249 num_clusters_evaluated++; 3250 } 3251 } 3252 3253 /** 3254 ******************************************************************************** 3255 * @fn void hme_find_and_update_clusters 3256 * ( 3257 * cluster_data_t *ps_cluster_data, 3258 * S32 *pi4_num_clusters, 3259 * S32 mvx, 3260 * S32 mvy, 3261 * S32 ref_idx, 3262 * PART_ID_T e_part_id 3263 * ) 3264 * 3265 * @brief Implementation fo the clustering algorithm 3266 * 3267 * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct 3268 * 3269 * @param[in/out] pi4_num_clusters : pointer to number of clusters 3270 * 3271 * @param[in] mvx : x co-ordinate of the motion vector 3272 * 3273 * @param[in] mvy : y co-ordinate of the motion vector 3274 * 3275 * @param[in] ref_idx : ref_id of the motion vector 3276 * 3277 * @param[in] e_part_id : partition id of the motion vector 3278 * 3279 * @return None 3280 ******************************************************************************** 3281 */ 3282 void hme_find_and_update_clusters( 3283 cluster_data_t *ps_cluster_data, 3284 U08 *pu1_num_clusters, 3285 S16 i2_mv_x, 3286 S16 i2_mv_y, 3287 U08 i1_ref_idx, 3288 S32 i4_sdi, 3289 PART_ID_T e_part_id, 3290 U08 is_part_of_bi) 3291 { 3292 S32 i; 3293 S32 min_mvd_cluster_id = -1; 3294 S32 mvd, mvd_limit, mvdx, mvdy; 3295 S32 min_mvdx, min_mvdy; 3296 3297 S32 min_mvd = MAX_32BIT_VAL; 3298 S32 num_clusters = *pu1_num_clusters; 3299 3300 S32 mvx = i2_mv_x; 3301 S32 mvy = i2_mv_y; 3302 S32 ref_idx = i1_ref_idx; 3303 S32 sdi = i4_sdi; 3304 S32 new_cluster_idx = MAX_NUM_CLUSTERS_16x16; 3305 3306 if(num_clusters == 0) 3307 { 3308 cluster_data_t *ps_data = &ps_cluster_data[num_clusters]; 3309 3310 ps_data->num_mvs = 1; 3311 ps_data->s_centroid.i4_pos_x_q8 = mvx << 8; 3312 ps_data->s_centroid.i4_pos_y_q8 = mvy << 8; 3313 ps_data->ref_id = ref_idx; 3314 ps_data->area_in_pixels = gai4_partition_area[e_part_id]; 3315 ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id]; 3316 ps_data->as_mv[0].mvx = mvx; 3317 ps_data->as_mv[0].mvy = mvy; 3318 3319 /***************************/ 3320 ps_data->as_mv[0].is_uni = !is_part_of_bi; 3321 ps_data->as_mv[0].sdi = sdi; 3322 if(is_part_of_bi) 3323 { 3324 ps_data->bi_mv_pixel_area += ps_data->area_in_pixels; 3325 } 3326 else 3327 { 3328 ps_data->uni_mv_pixel_area += ps_data->area_in_pixels; 3329 } 3330 /**************************/ 3331 ps_data->max_x = mvx; 3332 ps_data->min_x = mvx; 3333 ps_data->max_y = mvy; 3334 ps_data->min_y = mvy; 3335 3336 ps_data->is_valid_cluster = 1; 3337 3338 *pu1_num_clusters = 1; 3339 } 3340 else 3341 { 3342 S32 num_clusters_evaluated = 0; 3343 3344 for(i = 0; num_clusters_evaluated < num_clusters; i++) 3345 { 3346 cluster_data_t *ps_data = &ps_cluster_data[i]; 3347 3348 centroid_t *ps_centroid; 3349 3350 S32 mvx_q8; 3351 S32 mvy_q8; 3352 S32 posx_q8; 3353 S32 posy_q8; 3354 S32 mvdx_q8; 3355 S32 mvdy_q8; 3356 3357 /* In anticipation of a possible merging of clusters */ 3358 if(ps_data->is_valid_cluster == 0) 3359 { 3360 new_cluster_idx = i; 3361 continue; 3362 } 3363 3364 if(ref_idx != ps_data->ref_id) 3365 { 3366 num_clusters_evaluated++; 3367 continue; 3368 } 3369 3370 ps_centroid = &ps_data->s_centroid; 3371 posx_q8 = ps_centroid->i4_pos_x_q8; 3372 posy_q8 = ps_centroid->i4_pos_y_q8; 3373 3374 mvx_q8 = mvx << 8; 3375 mvy_q8 = mvy << 8; 3376 3377 mvdx_q8 = posx_q8 - mvx_q8; 3378 mvdy_q8 = posy_q8 - mvy_q8; 3379 3380 mvdx = (((mvdx_q8 + (1 << 7)) >> 8)); 3381 mvdy = (((mvdy_q8 + (1 << 7)) >> 8)); 3382 3383 mvd = ABS(mvdx) + ABS(mvdy); 3384 3385 if(mvd < min_mvd) 3386 { 3387 min_mvd = mvd; 3388 min_mvdx = mvdx; 3389 min_mvdy = mvdy; 3390 min_mvd_cluster_id = i; 3391 } 3392 3393 num_clusters_evaluated++; 3394 } 3395 3396 mvd_limit = (min_mvd_cluster_id == -1) 3397 ? ps_cluster_data[0].max_dist_from_centroid 3398 : ps_cluster_data[min_mvd_cluster_id].max_dist_from_centroid; 3399 3400 /* This condition implies that min_mvd has been updated */ 3401 if(min_mvd <= mvd_limit) 3402 { 3403 hme_update_cluster_attributes( 3404 &ps_cluster_data[min_mvd_cluster_id], 3405 mvx, 3406 mvy, 3407 min_mvdx, 3408 min_mvdy, 3409 ref_idx, 3410 sdi, 3411 is_part_of_bi, 3412 e_part_id); 3413 3414 if(PRT_NxN == ge_part_id_to_part_type[e_part_id]) 3415 { 3416 hme_try_cluster_merge(ps_cluster_data, pu1_num_clusters, min_mvd_cluster_id); 3417 } 3418 } 3419 else 3420 { 3421 cluster_data_t *ps_data = (new_cluster_idx == MAX_NUM_CLUSTERS_16x16) 3422 ? &ps_cluster_data[num_clusters] 3423 : &ps_cluster_data[new_cluster_idx]; 3424 3425 ps_data->num_mvs = 1; 3426 ps_data->s_centroid.i4_pos_x_q8 = mvx << 8; 3427 ps_data->s_centroid.i4_pos_y_q8 = mvy << 8; 3428 ps_data->ref_id = ref_idx; 3429 ps_data->area_in_pixels = gai4_partition_area[e_part_id]; 3430 ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id]; 3431 ps_data->as_mv[0].mvx = mvx; 3432 ps_data->as_mv[0].mvy = mvy; 3433 3434 /***************************/ 3435 ps_data->as_mv[0].is_uni = !is_part_of_bi; 3436 ps_data->as_mv[0].sdi = sdi; 3437 if(is_part_of_bi) 3438 { 3439 ps_data->bi_mv_pixel_area += ps_data->area_in_pixels; 3440 } 3441 else 3442 { 3443 ps_data->uni_mv_pixel_area += ps_data->area_in_pixels; 3444 } 3445 /**************************/ 3446 ps_data->max_x = mvx; 3447 ps_data->min_x = mvx; 3448 ps_data->max_y = mvy; 3449 ps_data->min_y = mvy; 3450 3451 ps_data->is_valid_cluster = 1; 3452 3453 num_clusters++; 3454 *pu1_num_clusters = num_clusters; 3455 } 3456 } 3457 } 3458 3459 /** 3460 ******************************************************************************** 3461 * @fn void hme_update_32x32_cluster_attributes 3462 * ( 3463 * cluster_32x32_blk_t *ps_blk_32x32, 3464 * cluster_data_t *ps_cluster_data 3465 * ) 3466 * 3467 * @brief Updates attributes for 32x32 clusters based on the attributes of 3468 * the constituent 16x16 clusters 3469 * 3470 * @param[out] ps_blk_32x32: structure containing 32x32 block results 3471 * 3472 * @param[in] ps_cluster_data : structure containing 16x16 block results 3473 * 3474 * @return None 3475 ******************************************************************************** 3476 */ 3477 void hme_update_32x32_cluster_attributes( 3478 cluster_32x32_blk_t *ps_blk_32x32, cluster_data_t *ps_cluster_data) 3479 { 3480 cluster_data_t *ps_cur_cluster_32; 3481 3482 S32 i; 3483 S32 mvd_limit; 3484 3485 S32 num_clusters = ps_blk_32x32->num_clusters; 3486 3487 if(0 == num_clusters) 3488 { 3489 ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0]; 3490 3491 ps_blk_32x32->num_clusters++; 3492 ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++; 3493 3494 ps_cur_cluster_32->is_valid_cluster = 1; 3495 3496 ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels; 3497 ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; 3498 ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; 3499 3500 memcpy( 3501 ps_cur_cluster_32->as_mv, 3502 ps_cluster_data->as_mv, 3503 sizeof(mv_data_t) * ps_cluster_data->num_mvs); 3504 3505 ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs; 3506 3507 ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id; 3508 3509 ps_cur_cluster_32->max_x = ps_cluster_data->max_x; 3510 ps_cur_cluster_32->max_y = ps_cluster_data->max_y; 3511 ps_cur_cluster_32->min_x = ps_cluster_data->min_x; 3512 ps_cur_cluster_32->min_y = ps_cluster_data->min_y; 3513 3514 ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid; 3515 } 3516 else 3517 { 3518 centroid_t *ps_centroid; 3519 3520 S32 cur_posx_q8, cur_posy_q8; 3521 S32 min_mvd_cluster_id = -1; 3522 S32 mvd; 3523 S32 mvdx; 3524 S32 mvdy; 3525 S32 mvdx_min; 3526 S32 mvdy_min; 3527 S32 mvdx_q8; 3528 S32 mvdy_q8; 3529 3530 S32 num_clusters_evaluated = 0; 3531 3532 S32 mvd_min = MAX_32BIT_VAL; 3533 3534 S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8; 3535 S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8; 3536 3537 for(i = 0; num_clusters_evaluated < num_clusters; i++) 3538 { 3539 ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[i]; 3540 3541 if(ps_cur_cluster_32->ref_id != ps_cluster_data->ref_id) 3542 { 3543 num_clusters_evaluated++; 3544 continue; 3545 } 3546 if(!ps_cluster_data->is_valid_cluster) 3547 { 3548 continue; 3549 } 3550 3551 num_clusters_evaluated++; 3552 3553 ps_centroid = &ps_cur_cluster_32->s_centroid; 3554 3555 cur_posx_q8 = ps_centroid->i4_pos_x_q8; 3556 cur_posy_q8 = ps_centroid->i4_pos_y_q8; 3557 3558 mvdx_q8 = cur_posx_q8 - mvx_inp_q8; 3559 mvdy_q8 = cur_posy_q8 - mvy_inp_q8; 3560 3561 mvdx = (mvdx_q8 + (1 << 7)) >> 8; 3562 mvdy = (mvdy_q8 + (1 << 7)) >> 8; 3563 3564 mvd = ABS(mvdx) + ABS(mvdy); 3565 3566 if(mvd < mvd_min) 3567 { 3568 mvd_min = mvd; 3569 mvdx_min = mvdx; 3570 mvdy_min = mvdy; 3571 min_mvd_cluster_id = i; 3572 } 3573 } 3574 3575 ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0]; 3576 3577 mvd_limit = (min_mvd_cluster_id == -1) 3578 ? ps_cur_cluster_32[0].max_dist_from_centroid 3579 : ps_cur_cluster_32[min_mvd_cluster_id].max_dist_from_centroid; 3580 3581 if(mvd_min <= mvd_limit) 3582 { 3583 LWORD64 i8_updated_posx; 3584 LWORD64 i8_updated_posy; 3585 WORD32 minmax_updated_x = 0; 3586 WORD32 minmax_updated_y = 0; 3587 3588 ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[min_mvd_cluster_id]; 3589 3590 ps_centroid = &ps_cur_cluster_32->s_centroid; 3591 3592 ps_cur_cluster_32->is_valid_cluster = 1; 3593 3594 ps_cur_cluster_32->area_in_pixels += ps_cluster_data->area_in_pixels; 3595 ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; 3596 ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; 3597 3598 memcpy( 3599 &ps_cur_cluster_32->as_mv[ps_cur_cluster_32->num_mvs], 3600 ps_cluster_data->as_mv, 3601 sizeof(mv_data_t) * ps_cluster_data->num_mvs); 3602 3603 if((mvdx_min > 0) && ((ps_cur_cluster_32->min_x << 8) > mvx_inp_q8)) 3604 { 3605 ps_cur_cluster_32->min_x = (mvx_inp_q8 + ((1 << 7))) >> 8; 3606 minmax_updated_x = 1; 3607 } 3608 else if((mvdx_min < 0) && ((ps_cur_cluster_32->max_x << 8) < mvx_inp_q8)) 3609 { 3610 ps_cur_cluster_32->max_x = (mvx_inp_q8 + (1 << 7)) >> 8; 3611 minmax_updated_x = 2; 3612 } 3613 3614 if((mvdy_min > 0) && ((ps_cur_cluster_32->min_y << 8) > mvy_inp_q8)) 3615 { 3616 ps_cur_cluster_32->min_y = (mvy_inp_q8 + (1 << 7)) >> 8; 3617 minmax_updated_y = 1; 3618 } 3619 else if((mvdy_min < 0) && ((ps_cur_cluster_32->max_y << 8) < mvy_inp_q8)) 3620 { 3621 ps_cur_cluster_32->max_y = (mvy_inp_q8 + (1 << 7)) >> 8; 3622 minmax_updated_y = 2; 3623 } 3624 3625 switch((minmax_updated_y << 2) + minmax_updated_x) 3626 { 3627 case 1: 3628 { 3629 S32 mvd, mvd_q8; 3630 3631 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8); 3632 mvd = (mvd_q8 + (1 << 7)) >> 8; 3633 3634 if(mvd > (mvd_limit)) 3635 { 3636 ps_cur_cluster_32->max_dist_from_centroid = mvd; 3637 } 3638 break; 3639 } 3640 case 2: 3641 { 3642 S32 mvd, mvd_q8; 3643 3644 mvd_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8; 3645 mvd = (mvd_q8 + (1 << 7)) >> 8; 3646 3647 if(mvd > (mvd_limit)) 3648 { 3649 ps_cur_cluster_32->max_dist_from_centroid = mvd; 3650 } 3651 break; 3652 } 3653 case 4: 3654 { 3655 S32 mvd, mvd_q8; 3656 3657 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8); 3658 mvd = (mvd_q8 + (1 << 7)) >> 8; 3659 3660 if(mvd > (mvd_limit)) 3661 { 3662 ps_cur_cluster_32->max_dist_from_centroid = mvd; 3663 } 3664 break; 3665 } 3666 case 5: 3667 { 3668 S32 mvd; 3669 S32 mvdx, mvdx_q8; 3670 S32 mvdy, mvdy_q8; 3671 3672 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8); 3673 mvdy = (mvdy_q8 + (1 << 7)) >> 8; 3674 3675 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8); 3676 mvdx = (mvdx_q8 + (1 << 7)) >> 8; 3677 3678 mvd = (mvdx > mvdy) ? mvdx : mvdy; 3679 3680 if(mvd > mvd_limit) 3681 { 3682 ps_cur_cluster_32->max_dist_from_centroid = mvd; 3683 } 3684 break; 3685 } 3686 case 6: 3687 { 3688 S32 mvd; 3689 S32 mvdx, mvdx_q8; 3690 S32 mvdy, mvdy_q8; 3691 3692 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8); 3693 mvdy = (mvdy_q8 + (1 << 7)) >> 8; 3694 3695 mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8; 3696 mvdx = (mvdx_q8 + (1 << 7)) >> 8; 3697 3698 mvd = (mvdx > mvdy) ? mvdx : mvdy; 3699 3700 if(mvd > mvd_limit) 3701 { 3702 ps_cur_cluster_32->max_dist_from_centroid = mvd; 3703 } 3704 break; 3705 } 3706 case 8: 3707 { 3708 S32 mvd, mvd_q8; 3709 3710 mvd_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8; 3711 mvd = (mvd_q8 + (1 << 7)) >> 8; 3712 3713 if(mvd > (mvd_limit)) 3714 { 3715 ps_cur_cluster_32->max_dist_from_centroid = mvd; 3716 } 3717 break; 3718 } 3719 case 9: 3720 { 3721 S32 mvd; 3722 S32 mvdx, mvdx_q8; 3723 S32 mvdy, mvdy_q8; 3724 3725 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8); 3726 mvdx = (mvdx_q8 + (1 << 7)) >> 8; 3727 3728 mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8; 3729 mvdy = (mvdy_q8 + (1 << 7)) >> 8; 3730 3731 mvd = (mvdx > mvdy) ? mvdx : mvdy; 3732 3733 if(mvd > mvd_limit) 3734 { 3735 ps_cur_cluster_32->max_dist_from_centroid = mvd; 3736 } 3737 break; 3738 } 3739 case 10: 3740 { 3741 S32 mvd; 3742 S32 mvdx, mvdx_q8; 3743 S32 mvdy, mvdy_q8; 3744 3745 mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8; 3746 mvdx = (mvdx_q8 + (1 << 7)) >> 8; 3747 3748 mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8; 3749 mvdy = (mvdy_q8 + (1 << 7)) >> 8; 3750 3751 mvd = (mvdx > mvdy) ? mvdx : mvdy; 3752 3753 if(mvd > ps_cur_cluster_32->max_dist_from_centroid) 3754 { 3755 ps_cur_cluster_32->max_dist_from_centroid = mvd; 3756 } 3757 break; 3758 } 3759 default: 3760 { 3761 break; 3762 } 3763 } 3764 3765 i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_32->num_mvs) + 3766 ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs); 3767 i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_32->num_mvs) + 3768 ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs); 3769 3770 ps_cur_cluster_32->num_mvs += ps_cluster_data->num_mvs; 3771 3772 ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_32->num_mvs); 3773 ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_32->num_mvs); 3774 } 3775 else if(num_clusters < MAX_NUM_CLUSTERS_32x32) 3776 { 3777 ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[num_clusters]; 3778 3779 ps_blk_32x32->num_clusters++; 3780 ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++; 3781 3782 ps_cur_cluster_32->is_valid_cluster = 1; 3783 3784 ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels; 3785 ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; 3786 ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; 3787 3788 memcpy( 3789 ps_cur_cluster_32->as_mv, 3790 ps_cluster_data->as_mv, 3791 sizeof(mv_data_t) * ps_cluster_data->num_mvs); 3792 3793 ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs; 3794 3795 ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id; 3796 3797 ps_cur_cluster_32->max_x = ps_cluster_data->max_x; 3798 ps_cur_cluster_32->max_y = ps_cluster_data->max_y; 3799 ps_cur_cluster_32->min_x = ps_cluster_data->min_x; 3800 ps_cur_cluster_32->min_y = ps_cluster_data->min_y; 3801 3802 ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid; 3803 } 3804 } 3805 } 3806 3807 /** 3808 ******************************************************************************** 3809 * @fn void hme_update_64x64_cluster_attributes 3810 * ( 3811 * cluster_64x64_blk_t *ps_blk_32x32, 3812 * cluster_data_t *ps_cluster_data 3813 * ) 3814 * 3815 * @brief Updates attributes for 64x64 clusters based on the attributes of 3816 * the constituent 16x16 clusters 3817 * 3818 * @param[out] ps_blk_64x64: structure containing 64x64 block results 3819 * 3820 * @param[in] ps_cluster_data : structure containing 32x32 block results 3821 * 3822 * @return None 3823 ******************************************************************************** 3824 */ 3825 void hme_update_64x64_cluster_attributes( 3826 cluster_64x64_blk_t *ps_blk_64x64, cluster_data_t *ps_cluster_data) 3827 { 3828 cluster_data_t *ps_cur_cluster_64; 3829 3830 S32 i; 3831 S32 mvd_limit; 3832 3833 S32 num_clusters = ps_blk_64x64->num_clusters; 3834 3835 if(0 == num_clusters) 3836 { 3837 ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[0]; 3838 3839 ps_blk_64x64->num_clusters++; 3840 ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++; 3841 3842 ps_cur_cluster_64->is_valid_cluster = 1; 3843 3844 ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels; 3845 ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; 3846 ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; 3847 3848 memcpy( 3849 ps_cur_cluster_64->as_mv, 3850 ps_cluster_data->as_mv, 3851 sizeof(mv_data_t) * ps_cluster_data->num_mvs); 3852 3853 ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs; 3854 3855 ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id; 3856 3857 ps_cur_cluster_64->max_x = ps_cluster_data->max_x; 3858 ps_cur_cluster_64->max_y = ps_cluster_data->max_y; 3859 ps_cur_cluster_64->min_x = ps_cluster_data->min_x; 3860 ps_cur_cluster_64->min_y = ps_cluster_data->min_y; 3861 3862 ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid; 3863 } 3864 else 3865 { 3866 centroid_t *ps_centroid; 3867 3868 S32 cur_posx_q8, cur_posy_q8; 3869 S32 min_mvd_cluster_id = -1; 3870 S32 mvd; 3871 S32 mvdx; 3872 S32 mvdy; 3873 S32 mvdx_min; 3874 S32 mvdy_min; 3875 S32 mvdx_q8; 3876 S32 mvdy_q8; 3877 3878 S32 num_clusters_evaluated = 0; 3879 3880 S32 mvd_min = MAX_32BIT_VAL; 3881 3882 S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8; 3883 S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8; 3884 3885 for(i = 0; num_clusters_evaluated < num_clusters; i++) 3886 { 3887 ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[i]; 3888 3889 if(ps_cur_cluster_64->ref_id != ps_cluster_data->ref_id) 3890 { 3891 num_clusters_evaluated++; 3892 continue; 3893 } 3894 3895 if(!ps_cur_cluster_64->is_valid_cluster) 3896 { 3897 continue; 3898 } 3899 3900 num_clusters_evaluated++; 3901 3902 ps_centroid = &ps_cur_cluster_64->s_centroid; 3903 3904 cur_posx_q8 = ps_centroid->i4_pos_x_q8; 3905 cur_posy_q8 = ps_centroid->i4_pos_y_q8; 3906 3907 mvdx_q8 = cur_posx_q8 - mvx_inp_q8; 3908 mvdy_q8 = cur_posy_q8 - mvy_inp_q8; 3909 3910 mvdx = (mvdx_q8 + (1 << 7)) >> 8; 3911 mvdy = (mvdy_q8 + (1 << 7)) >> 8; 3912 3913 mvd = ABS(mvdx) + ABS(mvdy); 3914 3915 if(mvd < mvd_min) 3916 { 3917 mvd_min = mvd; 3918 mvdx_min = mvdx; 3919 mvdy_min = mvdy; 3920 min_mvd_cluster_id = i; 3921 } 3922 } 3923 3924 ps_cur_cluster_64 = ps_blk_64x64->as_cluster_data; 3925 3926 mvd_limit = (min_mvd_cluster_id == -1) 3927 ? ps_cur_cluster_64[0].max_dist_from_centroid 3928 : ps_cur_cluster_64[min_mvd_cluster_id].max_dist_from_centroid; 3929 3930 if(mvd_min <= mvd_limit) 3931 { 3932 LWORD64 i8_updated_posx; 3933 LWORD64 i8_updated_posy; 3934 WORD32 minmax_updated_x = 0; 3935 WORD32 minmax_updated_y = 0; 3936 3937 ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[min_mvd_cluster_id]; 3938 3939 ps_centroid = &ps_cur_cluster_64->s_centroid; 3940 3941 ps_cur_cluster_64->is_valid_cluster = 1; 3942 3943 ps_cur_cluster_64->area_in_pixels += ps_cluster_data->area_in_pixels; 3944 ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; 3945 ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; 3946 3947 memcpy( 3948 &ps_cur_cluster_64->as_mv[ps_cur_cluster_64->num_mvs], 3949 ps_cluster_data->as_mv, 3950 sizeof(mv_data_t) * ps_cluster_data->num_mvs); 3951 3952 if((mvdx_min > 0) && ((ps_cur_cluster_64->min_x << 8) > mvx_inp_q8)) 3953 { 3954 ps_cur_cluster_64->min_x = (mvx_inp_q8 + (1 << 7)) >> 8; 3955 minmax_updated_x = 1; 3956 } 3957 else if((mvdx_min < 0) && ((ps_cur_cluster_64->max_x << 8) < mvx_inp_q8)) 3958 { 3959 ps_cur_cluster_64->max_x = (mvx_inp_q8 + (1 << 7)) >> 8; 3960 minmax_updated_x = 2; 3961 } 3962 3963 if((mvdy_min > 0) && ((ps_cur_cluster_64->min_y << 8) > mvy_inp_q8)) 3964 { 3965 ps_cur_cluster_64->min_y = (mvy_inp_q8 + (1 << 7)) >> 8; 3966 minmax_updated_y = 1; 3967 } 3968 else if((mvdy_min < 0) && ((ps_cur_cluster_64->max_y << 8) < mvy_inp_q8)) 3969 { 3970 ps_cur_cluster_64->max_y = (mvy_inp_q8 + (1 << 7)) >> 8; 3971 minmax_updated_y = 2; 3972 } 3973 3974 switch((minmax_updated_y << 2) + minmax_updated_x) 3975 { 3976 case 1: 3977 { 3978 S32 mvd, mvd_q8; 3979 3980 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8); 3981 mvd = (mvd_q8 + (1 << 7)) >> 8; 3982 3983 if(mvd > (mvd_limit)) 3984 { 3985 ps_cur_cluster_64->max_dist_from_centroid = mvd; 3986 } 3987 break; 3988 } 3989 case 2: 3990 { 3991 S32 mvd, mvd_q8; 3992 3993 mvd_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8; 3994 mvd = (mvd_q8 + (1 << 7)) >> 8; 3995 3996 if(mvd > (mvd_limit)) 3997 { 3998 ps_cur_cluster_64->max_dist_from_centroid = mvd; 3999 } 4000 break; 4001 } 4002 case 4: 4003 { 4004 S32 mvd, mvd_q8; 4005 4006 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8); 4007 mvd = (mvd_q8 + (1 << 7)) >> 8; 4008 4009 if(mvd > (mvd_limit)) 4010 { 4011 ps_cur_cluster_64->max_dist_from_centroid = mvd; 4012 } 4013 break; 4014 } 4015 case 5: 4016 { 4017 S32 mvd; 4018 S32 mvdx, mvdx_q8; 4019 S32 mvdy, mvdy_q8; 4020 4021 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8); 4022 mvdy = (mvdy_q8 + (1 << 7)) >> 8; 4023 4024 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8); 4025 mvdx = (mvdx_q8 + (1 << 7)) >> 8; 4026 4027 mvd = (mvdx > mvdy) ? mvdx : mvdy; 4028 4029 if(mvd > mvd_limit) 4030 { 4031 ps_cur_cluster_64->max_dist_from_centroid = mvd; 4032 } 4033 break; 4034 } 4035 case 6: 4036 { 4037 S32 mvd; 4038 S32 mvdx, mvdx_q8; 4039 S32 mvdy, mvdy_q8; 4040 4041 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8); 4042 mvdy = (mvdy_q8 + (1 << 7)) >> 8; 4043 4044 mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8; 4045 mvdx = (mvdx_q8 + (1 << 7)) >> 8; 4046 4047 mvd = (mvdx > mvdy) ? mvdx : mvdy; 4048 4049 if(mvd > mvd_limit) 4050 { 4051 ps_cur_cluster_64->max_dist_from_centroid = mvd; 4052 } 4053 break; 4054 } 4055 case 8: 4056 { 4057 S32 mvd, mvd_q8; 4058 4059 mvd_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8; 4060 mvd = (mvd_q8 + (1 << 7)) >> 8; 4061 4062 if(mvd > (mvd_limit)) 4063 { 4064 ps_cur_cluster_64->max_dist_from_centroid = mvd; 4065 } 4066 break; 4067 } 4068 case 9: 4069 { 4070 S32 mvd; 4071 S32 mvdx, mvdx_q8; 4072 S32 mvdy, mvdy_q8; 4073 4074 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8); 4075 mvdx = (mvdx_q8 + (1 << 7)) >> 8; 4076 4077 mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8; 4078 mvdy = (mvdy_q8 + (1 << 7)) >> 8; 4079 4080 mvd = (mvdx > mvdy) ? mvdx : mvdy; 4081 4082 if(mvd > mvd_limit) 4083 { 4084 ps_cur_cluster_64->max_dist_from_centroid = mvd; 4085 } 4086 break; 4087 } 4088 case 10: 4089 { 4090 S32 mvd; 4091 S32 mvdx, mvdx_q8; 4092 S32 mvdy, mvdy_q8; 4093 4094 mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8; 4095 mvdx = (mvdx_q8 + (1 << 7)) >> 8; 4096 4097 mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8; 4098 mvdy = (mvdy_q8 + (1 << 7)) >> 8; 4099 4100 mvd = (mvdx > mvdy) ? mvdx : mvdy; 4101 4102 if(mvd > ps_cur_cluster_64->max_dist_from_centroid) 4103 { 4104 ps_cur_cluster_64->max_dist_from_centroid = mvd; 4105 } 4106 break; 4107 } 4108 default: 4109 { 4110 break; 4111 } 4112 } 4113 4114 i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_64->num_mvs) + 4115 ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs); 4116 i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_64->num_mvs) + 4117 ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs); 4118 4119 ps_cur_cluster_64->num_mvs += ps_cluster_data->num_mvs; 4120 4121 ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_64->num_mvs); 4122 ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_64->num_mvs); 4123 } 4124 else if(num_clusters < MAX_NUM_CLUSTERS_64x64) 4125 { 4126 ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[num_clusters]; 4127 4128 ps_blk_64x64->num_clusters++; 4129 ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++; 4130 4131 ps_cur_cluster_64->is_valid_cluster = 1; 4132 4133 ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels; 4134 ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; 4135 ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; 4136 4137 memcpy( 4138 &ps_cur_cluster_64->as_mv[0], 4139 ps_cluster_data->as_mv, 4140 sizeof(mv_data_t) * ps_cluster_data->num_mvs); 4141 4142 ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs; 4143 4144 ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id; 4145 4146 ps_cur_cluster_64->max_x = ps_cluster_data->max_x; 4147 ps_cur_cluster_64->max_y = ps_cluster_data->max_y; 4148 ps_cur_cluster_64->min_x = ps_cluster_data->min_x; 4149 ps_cur_cluster_64->min_y = ps_cluster_data->min_y; 4150 4151 ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid; 4152 } 4153 } 4154 } 4155 4156 /** 4157 ******************************************************************************** 4158 * @fn void hme_update_32x32_clusters 4159 * ( 4160 * cluster_32x32_blk_t *ps_blk_32x32, 4161 * cluster_16x16_blk_t *ps_blk_16x16 4162 * ) 4163 * 4164 * @brief Updates attributes for 32x32 clusters based on the attributes of 4165 * the constituent 16x16 clusters 4166 * 4167 * @param[out] ps_blk_32x32: structure containing 32x32 block results 4168 * 4169 * @param[in] ps_blk_16x16 : structure containing 16x16 block results 4170 * 4171 * @return None 4172 ******************************************************************************** 4173 */ 4174 static __inline void 4175 hme_update_32x32_clusters(cluster_32x32_blk_t *ps_blk_32x32, cluster_16x16_blk_t *ps_blk_16x16) 4176 { 4177 cluster_16x16_blk_t *ps_blk_16x16_cur; 4178 cluster_data_t *ps_cur_cluster; 4179 4180 S32 i, j; 4181 S32 num_clusters_cur_16x16_blk; 4182 4183 for(i = 0; i < 4; i++) 4184 { 4185 S32 num_clusters_evaluated = 0; 4186 4187 ps_blk_16x16_cur = &ps_blk_16x16[i]; 4188 4189 num_clusters_cur_16x16_blk = ps_blk_16x16_cur->num_clusters; 4190 4191 ps_blk_32x32->intra_mv_area += ps_blk_16x16_cur->intra_mv_area; 4192 4193 ps_blk_32x32->best_inter_cost += ps_blk_16x16_cur->best_inter_cost; 4194 4195 for(j = 0; num_clusters_evaluated < num_clusters_cur_16x16_blk; j++) 4196 { 4197 ps_cur_cluster = &ps_blk_16x16_cur->as_cluster_data[j]; 4198 4199 if(!ps_cur_cluster->is_valid_cluster) 4200 { 4201 continue; 4202 } 4203 4204 hme_update_32x32_cluster_attributes(ps_blk_32x32, ps_cur_cluster); 4205 4206 num_clusters_evaluated++; 4207 } 4208 } 4209 } 4210 4211 /** 4212 ******************************************************************************** 4213 * @fn void hme_update_64x64_clusters 4214 * ( 4215 * cluster_64x64_blk_t *ps_blk_64x64, 4216 * cluster_32x32_blk_t *ps_blk_32x32 4217 * ) 4218 * 4219 * @brief Updates attributes for 64x64 clusters based on the attributes of 4220 * the constituent 16x16 clusters 4221 * 4222 * @param[out] ps_blk_64x64: structure containing 32x32 block results 4223 * 4224 * @param[in] ps_blk_32x32 : structure containing 16x16 block results 4225 * 4226 * @return None 4227 ******************************************************************************** 4228 */ 4229 static __inline void 4230 hme_update_64x64_clusters(cluster_64x64_blk_t *ps_blk_64x64, cluster_32x32_blk_t *ps_blk_32x32) 4231 { 4232 cluster_32x32_blk_t *ps_blk_32x32_cur; 4233 cluster_data_t *ps_cur_cluster; 4234 4235 S32 i, j; 4236 S32 num_clusters_cur_32x32_blk; 4237 4238 for(i = 0; i < 4; i++) 4239 { 4240 S32 num_clusters_evaluated = 0; 4241 4242 ps_blk_32x32_cur = &ps_blk_32x32[i]; 4243 4244 num_clusters_cur_32x32_blk = ps_blk_32x32_cur->num_clusters; 4245 4246 ps_blk_64x64->intra_mv_area += ps_blk_32x32_cur->intra_mv_area; 4247 ps_blk_64x64->best_inter_cost += ps_blk_32x32_cur->best_inter_cost; 4248 4249 for(j = 0; num_clusters_evaluated < num_clusters_cur_32x32_blk; j++) 4250 { 4251 ps_cur_cluster = &ps_blk_32x32_cur->as_cluster_data[j]; 4252 4253 if(!ps_cur_cluster->is_valid_cluster) 4254 { 4255 continue; 4256 } 4257 4258 hme_update_64x64_cluster_attributes(ps_blk_64x64, ps_cur_cluster); 4259 4260 num_clusters_evaluated++; 4261 } 4262 } 4263 } 4264 4265 /** 4266 ******************************************************************************** 4267 * @fn void hme_try_merge_clusters_blksize_gt_16 4268 * ( 4269 * cluster_data_t *ps_cluster_data, 4270 * S32 num_clusters 4271 * ) 4272 * 4273 * @brief Merging clusters from blocks of size 32x32 and greater 4274 * 4275 * @param[in/out] ps_cluster_data: structure containing cluster data 4276 * 4277 * @param[in/out] pi4_num_clusters : pointer to number of clusters 4278 * 4279 * @return Success or failure 4280 ******************************************************************************** 4281 */ 4282 S32 hme_try_merge_clusters_blksize_gt_16(cluster_data_t *ps_cluster_data, S32 num_clusters) 4283 { 4284 centroid_t *ps_cur_centroid; 4285 cluster_data_t *ps_cur_cluster; 4286 4287 S32 i, mvd; 4288 S32 mvdx, mvdy, mvdx_q8, mvdy_q8; 4289 4290 centroid_t *ps_centroid = &ps_cluster_data->s_centroid; 4291 4292 S32 mvd_limit = ps_cluster_data->max_dist_from_centroid; 4293 S32 ref_id = ps_cluster_data->ref_id; 4294 4295 S32 node0_posx_q8 = ps_centroid->i4_pos_x_q8; 4296 S32 node0_posy_q8 = ps_centroid->i4_pos_y_q8; 4297 S32 num_clusters_evaluated = 1; 4298 S32 ret_value = 0; 4299 4300 if(1 >= num_clusters) 4301 { 4302 return ret_value; 4303 } 4304 4305 for(i = 1; num_clusters_evaluated < num_clusters; i++) 4306 { 4307 S32 cur_posx_q8; 4308 S32 cur_posy_q8; 4309 4310 ps_cur_cluster = &ps_cluster_data[i]; 4311 4312 if((ref_id != ps_cur_cluster->ref_id)) 4313 { 4314 num_clusters_evaluated++; 4315 continue; 4316 } 4317 4318 if((!ps_cur_cluster->is_valid_cluster)) 4319 { 4320 continue; 4321 } 4322 4323 num_clusters_evaluated++; 4324 4325 ps_cur_centroid = &ps_cur_cluster->s_centroid; 4326 4327 cur_posx_q8 = ps_cur_centroid->i4_pos_x_q8; 4328 cur_posy_q8 = ps_cur_centroid->i4_pos_y_q8; 4329 4330 mvdx_q8 = cur_posx_q8 - node0_posx_q8; 4331 mvdy_q8 = cur_posy_q8 - node0_posy_q8; 4332 4333 mvdx = (mvdx_q8 + (1 << 7)) >> 8; 4334 mvdy = (mvdy_q8 + (1 << 7)) >> 8; 4335 4336 mvd = ABS(mvdx) + ABS(mvdy); 4337 4338 if(mvd <= (mvd_limit >> 1)) 4339 { 4340 LWORD64 i8_updated_posx; 4341 LWORD64 i8_updated_posy; 4342 WORD32 minmax_updated_x = 0; 4343 WORD32 minmax_updated_y = 0; 4344 4345 ps_cur_cluster->is_valid_cluster = 0; 4346 4347 ps_cluster_data->area_in_pixels += ps_cur_cluster->area_in_pixels; 4348 ps_cluster_data->bi_mv_pixel_area += ps_cur_cluster->bi_mv_pixel_area; 4349 ps_cluster_data->uni_mv_pixel_area += ps_cur_cluster->uni_mv_pixel_area; 4350 4351 memcpy( 4352 &ps_cluster_data->as_mv[ps_cluster_data->num_mvs], 4353 ps_cur_cluster->as_mv, 4354 sizeof(mv_data_t) * ps_cur_cluster->num_mvs); 4355 4356 if(mvdx > 0) 4357 { 4358 ps_cluster_data->min_x = (cur_posx_q8 + (1 << 7)) >> 8; 4359 minmax_updated_x = 1; 4360 } 4361 else 4362 { 4363 ps_cluster_data->max_x = (cur_posx_q8 + (1 << 7)) >> 8; 4364 minmax_updated_x = 2; 4365 } 4366 4367 if(mvdy > 0) 4368 { 4369 ps_cluster_data->min_y = (cur_posy_q8 + (1 << 7)) >> 8; 4370 minmax_updated_y = 1; 4371 } 4372 else 4373 { 4374 ps_cluster_data->max_y = (cur_posy_q8 + (1 << 7)) >> 8; 4375 minmax_updated_y = 2; 4376 } 4377 4378 switch((minmax_updated_y << 2) + minmax_updated_x) 4379 { 4380 case 1: 4381 { 4382 S32 mvd, mvd_q8; 4383 4384 mvd_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8); 4385 mvd = (mvd_q8 + (1 << 7)) >> 8; 4386 4387 if(mvd > (mvd_limit)) 4388 { 4389 ps_cluster_data->max_dist_from_centroid = mvd; 4390 } 4391 break; 4392 } 4393 case 2: 4394 { 4395 S32 mvd, mvd_q8; 4396 4397 mvd_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8; 4398 mvd = (mvd_q8 + (1 << 7)) >> 8; 4399 4400 if(mvd > (mvd_limit)) 4401 { 4402 ps_cluster_data->max_dist_from_centroid = mvd; 4403 } 4404 break; 4405 } 4406 case 4: 4407 { 4408 S32 mvd, mvd_q8; 4409 4410 mvd_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8); 4411 mvd = (mvd_q8 + (1 << 7)) >> 8; 4412 4413 if(mvd > (mvd_limit)) 4414 { 4415 ps_cluster_data->max_dist_from_centroid = mvd; 4416 } 4417 break; 4418 } 4419 case 5: 4420 { 4421 S32 mvd; 4422 S32 mvdx, mvdx_q8; 4423 S32 mvdy, mvdy_q8; 4424 4425 mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8); 4426 mvdy = (mvdy_q8 + (1 << 7)) >> 8; 4427 4428 mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8); 4429 mvdx = (mvdx_q8 + (1 << 7)) >> 8; 4430 4431 mvd = (mvdx > mvdy) ? mvdx : mvdy; 4432 4433 if(mvd > mvd_limit) 4434 { 4435 ps_cluster_data->max_dist_from_centroid = mvd; 4436 } 4437 break; 4438 } 4439 case 6: 4440 { 4441 S32 mvd; 4442 S32 mvdx, mvdx_q8; 4443 S32 mvdy, mvdy_q8; 4444 4445 mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8); 4446 mvdy = (mvdy_q8 + (1 << 7)) >> 8; 4447 4448 mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8; 4449 mvdx = (mvdx_q8 + (1 << 7)) >> 8; 4450 4451 mvd = (mvdx > mvdy) ? mvdx : mvdy; 4452 4453 if(mvd > mvd_limit) 4454 { 4455 ps_cluster_data->max_dist_from_centroid = mvd; 4456 } 4457 break; 4458 } 4459 case 8: 4460 { 4461 S32 mvd, mvd_q8; 4462 4463 mvd_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8; 4464 mvd = (mvd_q8 + (1 << 7)) >> 8; 4465 4466 if(mvd > (mvd_limit)) 4467 { 4468 ps_cluster_data->max_dist_from_centroid = mvd; 4469 } 4470 break; 4471 } 4472 case 9: 4473 { 4474 S32 mvd; 4475 S32 mvdx, mvdx_q8; 4476 S32 mvdy, mvdy_q8; 4477 4478 mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8); 4479 mvdx = (mvdx_q8 + (1 << 7)) >> 8; 4480 4481 mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8; 4482 mvdy = (mvdy_q8 + (1 << 7)) >> 8; 4483 4484 mvd = (mvdx > mvdy) ? mvdx : mvdy; 4485 4486 if(mvd > mvd_limit) 4487 { 4488 ps_cluster_data->max_dist_from_centroid = mvd; 4489 } 4490 break; 4491 } 4492 case 10: 4493 { 4494 S32 mvd; 4495 S32 mvdx, mvdx_q8; 4496 S32 mvdy, mvdy_q8; 4497 4498 mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8; 4499 mvdx = (mvdx_q8 + (1 << 7)) >> 8; 4500 4501 mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8; 4502 mvdy = (mvdy_q8 + (1 << 7)) >> 8; 4503 4504 mvd = (mvdx > mvdy) ? mvdx : mvdy; 4505 4506 if(mvd > ps_cluster_data->max_dist_from_centroid) 4507 { 4508 ps_cluster_data->max_dist_from_centroid = mvd; 4509 } 4510 break; 4511 } 4512 default: 4513 { 4514 break; 4515 } 4516 } 4517 4518 i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cluster_data->num_mvs) + 4519 ((LWORD64)cur_posx_q8 * ps_cur_cluster->num_mvs); 4520 i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cluster_data->num_mvs) + 4521 ((LWORD64)cur_posy_q8 * ps_cur_cluster->num_mvs); 4522 4523 ps_cluster_data->num_mvs += ps_cur_cluster->num_mvs; 4524 4525 ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cluster_data->num_mvs); 4526 ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cluster_data->num_mvs); 4527 4528 if(MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK >= num_clusters) 4529 { 4530 num_clusters--; 4531 num_clusters_evaluated = 1; 4532 i = 0; 4533 ret_value++; 4534 } 4535 else 4536 { 4537 ret_value++; 4538 4539 return ret_value; 4540 } 4541 } 4542 } 4543 4544 if(ret_value) 4545 { 4546 for(i = 1; i < (num_clusters + ret_value); i++) 4547 { 4548 if(ps_cluster_data[i].is_valid_cluster) 4549 { 4550 break; 4551 } 4552 } 4553 if(i == (num_clusters + ret_value)) 4554 { 4555 return ret_value; 4556 } 4557 } 4558 else 4559 { 4560 i = 1; 4561 } 4562 4563 return (hme_try_merge_clusters_blksize_gt_16(&ps_cluster_data[i], num_clusters - 1)) + 4564 ret_value; 4565 } 4566 4567 /** 4568 ******************************************************************************** 4569 * @fn S32 hme_determine_validity_32x32 4570 * ( 4571 * ctb_cluster_info_t *ps_ctb_cluster_info 4572 * ) 4573 * 4574 * @brief Determines whther current 32x32 block needs to be evaluated in enc_loop 4575 * while recursing through the CU tree or not 4576 * 4577 * @param[in] ps_cluster_data: structure containing cluster data 4578 * 4579 * @return Success or failure 4580 ******************************************************************************** 4581 */ 4582 __inline S32 hme_determine_validity_32x32( 4583 ctb_cluster_info_t *ps_ctb_cluster_info, 4584 S32 *pi4_children_nodes_required, 4585 S32 blk_validity_wrt_pic_bndry, 4586 S32 parent_blk_validity_wrt_pic_bndry) 4587 { 4588 cluster_data_t *ps_data; 4589 4590 cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk; 4591 cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk; 4592 4593 S32 num_clusters = ps_32x32_blk->num_clusters; 4594 S32 num_clusters_parent = ps_64x64_blk->num_clusters; 4595 4596 if(!blk_validity_wrt_pic_bndry) 4597 { 4598 *pi4_children_nodes_required = 1; 4599 return 0; 4600 } 4601 4602 if(!parent_blk_validity_wrt_pic_bndry) 4603 { 4604 *pi4_children_nodes_required = 1; 4605 return 1; 4606 } 4607 4608 if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) 4609 { 4610 *pi4_children_nodes_required = 1; 4611 return 0; 4612 } 4613 4614 if(num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK) 4615 { 4616 *pi4_children_nodes_required = 1; 4617 4618 return 1; 4619 } 4620 else if(num_clusters_parent < MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK) 4621 { 4622 *pi4_children_nodes_required = 0; 4623 4624 return 1; 4625 } 4626 else 4627 { 4628 if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) 4629 { 4630 *pi4_children_nodes_required = 0; 4631 return 1; 4632 } 4633 else 4634 { 4635 S32 i; 4636 4637 S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 4; 4638 S32 min_area = MAX_32BIT_VAL; 4639 S32 num_clusters_evaluated = 0; 4640 4641 for(i = 0; num_clusters_evaluated < num_clusters; i++) 4642 { 4643 ps_data = &ps_32x32_blk->as_cluster_data[i]; 4644 4645 if(!ps_data->is_valid_cluster) 4646 { 4647 continue; 4648 } 4649 4650 num_clusters_evaluated++; 4651 4652 if(ps_data->area_in_pixels < min_area) 4653 { 4654 min_area = ps_data->area_in_pixels; 4655 } 4656 } 4657 4658 if((min_area << 4) < area_of_parent) 4659 { 4660 *pi4_children_nodes_required = 1; 4661 return 0; 4662 } 4663 else 4664 { 4665 *pi4_children_nodes_required = 0; 4666 return 1; 4667 } 4668 } 4669 } 4670 } 4671 4672 /** 4673 ******************************************************************************** 4674 * @fn S32 hme_determine_validity_16x16 4675 * ( 4676 * ctb_cluster_info_t *ps_ctb_cluster_info 4677 * ) 4678 * 4679 * @brief Determines whther current 16x16 block needs to be evaluated in enc_loop 4680 * while recursing through the CU tree or not 4681 * 4682 * @param[in] ps_cluster_data: structure containing cluster data 4683 * 4684 * @return Success or failure 4685 ******************************************************************************** 4686 */ 4687 __inline S32 hme_determine_validity_16x16( 4688 ctb_cluster_info_t *ps_ctb_cluster_info, 4689 S32 *pi4_children_nodes_required, 4690 S32 blk_validity_wrt_pic_bndry, 4691 S32 parent_blk_validity_wrt_pic_bndry) 4692 { 4693 cluster_data_t *ps_data; 4694 4695 cluster_16x16_blk_t *ps_16x16_blk = ps_ctb_cluster_info->ps_16x16_blk; 4696 cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk; 4697 cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk; 4698 4699 S32 num_clusters = ps_16x16_blk->num_clusters; 4700 S32 num_clusters_parent = ps_32x32_blk->num_clusters; 4701 S32 num_clusters_grandparent = ps_64x64_blk->num_clusters; 4702 4703 if(!blk_validity_wrt_pic_bndry) 4704 { 4705 *pi4_children_nodes_required = 1; 4706 return 0; 4707 } 4708 4709 if(!parent_blk_validity_wrt_pic_bndry) 4710 { 4711 *pi4_children_nodes_required = 1; 4712 return 1; 4713 } 4714 4715 if((num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) && 4716 (num_clusters_grandparent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)) 4717 { 4718 *pi4_children_nodes_required = 1; 4719 return 1; 4720 } 4721 4722 /* Implies nc_64 <= 3 when num_clusters_parent > 3 & */ 4723 /* implies nc_64 > 3 when num_clusters_parent < 3 & */ 4724 if(num_clusters_parent != MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) 4725 { 4726 if(num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK) 4727 { 4728 *pi4_children_nodes_required = 0; 4729 4730 return 1; 4731 } 4732 else 4733 { 4734 *pi4_children_nodes_required = 1; 4735 4736 return 0; 4737 } 4738 } 4739 /* Implies nc_64 >= 3 */ 4740 else 4741 { 4742 if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK) 4743 { 4744 *pi4_children_nodes_required = 0; 4745 return 1; 4746 } 4747 else if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK) 4748 { 4749 *pi4_children_nodes_required = 1; 4750 return 0; 4751 } 4752 else 4753 { 4754 S32 i; 4755 4756 S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 2; 4757 S32 min_area = MAX_32BIT_VAL; 4758 S32 num_clusters_evaluated = 0; 4759 4760 for(i = 0; num_clusters_evaluated < num_clusters; i++) 4761 { 4762 ps_data = &ps_16x16_blk->as_cluster_data[i]; 4763 4764 if(!ps_data->is_valid_cluster) 4765 { 4766 continue; 4767 } 4768 4769 num_clusters_evaluated++; 4770 4771 if(ps_data->area_in_pixels < min_area) 4772 { 4773 min_area = ps_data->area_in_pixels; 4774 } 4775 } 4776 4777 if((min_area << 4) < area_of_parent) 4778 { 4779 *pi4_children_nodes_required = 1; 4780 return 0; 4781 } 4782 else 4783 { 4784 *pi4_children_nodes_required = 0; 4785 return 1; 4786 } 4787 } 4788 } 4789 } 4790 4791 /** 4792 ******************************************************************************** 4793 * @fn void hme_build_cu_tree 4794 * ( 4795 * ctb_cluster_info_t *ps_ctb_cluster_info, 4796 * cur_ctb_cu_tree_t *ps_cu_tree, 4797 * S32 tree_depth, 4798 * CU_POS_T e_grand_parent_blk_pos, 4799 * CU_POS_T e_parent_blk_pos, 4800 * CU_POS_T e_cur_blk_pos 4801 * ) 4802 * 4803 * @brief Recursive function for CU tree initialisation 4804 * 4805 * @param[in] ps_ctb_cluster_info: structure containing pointers to clusters 4806 * corresponding to all block sizes from 64x64 4807 * to 16x16 4808 * 4809 * @param[in] e_parent_blk_pos: position of parent block wrt its parent, if 4810 * applicable 4811 * 4812 * @param[in] e_cur_blk_pos: position of current block wrt parent 4813 * 4814 * @param[out] ps_cu_tree : represents CU tree used in CU recursion 4815 * 4816 * @param[in] tree_depth : specifies depth of the CU tree 4817 * 4818 * @return Nothing 4819 ******************************************************************************** 4820 */ 4821 void hme_build_cu_tree( 4822 ctb_cluster_info_t *ps_ctb_cluster_info, 4823 cur_ctb_cu_tree_t *ps_cu_tree, 4824 S32 tree_depth, 4825 CU_POS_T e_grandparent_blk_pos, 4826 CU_POS_T e_parent_blk_pos, 4827 CU_POS_T e_cur_blk_pos) 4828 { 4829 ihevce_cu_tree_init( 4830 ps_cu_tree, 4831 ps_ctb_cluster_info->ps_cu_tree_root, 4832 &ps_ctb_cluster_info->nodes_created_in_cu_tree, 4833 tree_depth, 4834 e_grandparent_blk_pos, 4835 e_parent_blk_pos, 4836 e_cur_blk_pos); 4837 } 4838 4839 /** 4840 ******************************************************************************** 4841 * @fn S32 hme_sdi_based_cluster_spread_eligibility 4842 * ( 4843 * cluster_32x32_blk_t *ps_blk_32x32 4844 * ) 4845 * 4846 * @brief Determines whether the spread of high SDI MV's around each cluster 4847 * center is below a pre-determined threshold 4848 * 4849 * @param[in] ps_blk_32x32: structure containing pointers to clusters 4850 * corresponding to all block sizes from 64x64 4851 * to 16x16 4852 * 4853 * @return 1 if the spread is constrained, else 0 4854 ******************************************************************************** 4855 */ 4856 __inline S32 4857 hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t *ps_blk_32x32, S32 sdi_threshold) 4858 { 4859 S32 cumulative_mv_distance; 4860 S32 i, j; 4861 S32 num_high_sdi_mvs; 4862 4863 S32 num_clusters = ps_blk_32x32->num_clusters; 4864 4865 for(i = 0; i < num_clusters; i++) 4866 { 4867 cluster_data_t *ps_data = &ps_blk_32x32->as_cluster_data[i]; 4868 4869 num_high_sdi_mvs = 0; 4870 cumulative_mv_distance = 0; 4871 4872 for(j = 0; j < ps_data->num_mvs; j++) 4873 { 4874 mv_data_t *ps_mv = &ps_data->as_mv[j]; 4875 4876 if(ps_mv->sdi >= sdi_threshold) 4877 { 4878 num_high_sdi_mvs++; 4879 4880 COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance); 4881 } 4882 } 4883 4884 if(cumulative_mv_distance > ((ps_data->max_dist_from_centroid >> 1) * num_high_sdi_mvs)) 4885 { 4886 return 0; 4887 } 4888 } 4889 4890 return 1; 4891 } 4892 4893 /** 4894 ******************************************************************************** 4895 * @fn S32 hme_populate_cu_tree 4896 * ( 4897 * ctb_cluster_info_t *ps_ctb_cluster_info, 4898 * ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb, 4899 * cur_ctb_cu_tree_t *ps_cu_tree, 4900 * S32 tree_depth, 4901 * CU_POS_T e_parent_blk_pos, 4902 * CU_POS_T e_cur_blk_pos 4903 * ) 4904 * 4905 * @brief Recursive function for CU tree population based on output of 4906 * clustering algorithm 4907 * 4908 * @param[in] ps_ctb_cluster_info: structure containing pointers to clusters 4909 * corresponding to all block sizes from 64x64 4910 * to 16x16 4911 * 4912 * @param[in] e_parent_blk_pos: position of parent block wrt its parent, if 4913 applicable 4914 * 4915 * @param[in] e_cur_blk_pos: position of current block wrt parent 4916 * 4917 * @param[in] ps_cur_ipe_ctb : output container for ipe analyses 4918 * 4919 * @param[out] ps_cu_tree : represents CU tree used in CU recursion 4920 * 4921 * @param[in] tree_depth : specifies depth of the CU tree 4922 * 4923 * @param[in] ipe_decision_precedence : specifies whether precedence should 4924 * be given to decisions made either by IPE(1) or clustering algos. 4925 * 4926 * @return 1 if re-evaluation of parent node's validity is not required, 4927 else 0 4928 ******************************************************************************** 4929 */ 4930 void hme_populate_cu_tree( 4931 ctb_cluster_info_t *ps_ctb_cluster_info, 4932 cur_ctb_cu_tree_t *ps_cu_tree, 4933 S32 tree_depth, 4934 ME_QUALITY_PRESETS_T e_quality_preset, 4935 CU_POS_T e_grandparent_blk_pos, 4936 CU_POS_T e_parent_blk_pos, 4937 CU_POS_T e_cur_blk_pos) 4938 { 4939 S32 area_of_cur_blk; 4940 S32 area_limit_for_me_decision_precedence; 4941 S32 children_nodes_required; 4942 S32 intra_mv_area; 4943 S32 intra_eval_enable; 4944 S32 inter_eval_enable; 4945 S32 ipe_decision_precedence; 4946 S32 node_validity; 4947 S32 num_clusters; 4948 4949 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb = ps_ctb_cluster_info->ps_cur_ipe_ctb; 4950 4951 if(NULL == ps_cu_tree) 4952 { 4953 return; 4954 } 4955 4956 switch(tree_depth) 4957 { 4958 case 0: 4959 { 4960 /* 64x64 block */ 4961 S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask; 4962 4963 cluster_64x64_blk_t *ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk; 4964 4965 area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 4; 4966 area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100; 4967 children_nodes_required = 0; 4968 intra_mv_area = ps_blk_64x64->intra_mv_area; 4969 4970 ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence); 4971 4972 intra_eval_enable = ipe_decision_precedence; 4973 inter_eval_enable = !!ps_blk_64x64->num_clusters; 4974 4975 #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS 4976 if(e_quality_preset >= ME_HIGH_QUALITY) 4977 { 4978 inter_eval_enable = 1; 4979 node_validity = (blk_32x32_mask == 0xf); 4980 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS 4981 ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk); 4982 #endif 4983 break; 4984 } 4985 #endif 4986 4987 #if ENABLE_4CTB_EVALUATION 4988 node_validity = (blk_32x32_mask == 0xf); 4989 4990 break; 4991 #else 4992 { 4993 S32 i; 4994 4995 num_clusters = ps_blk_64x64->num_clusters; 4996 4997 node_validity = (ipe_decision_precedence) 4998 ? (!ps_cur_ipe_ctb->u1_split_flag) 4999 : (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK); 5000 5001 for(i = 0; i < MAX_NUM_REF; i++) 5002 { 5003 node_validity = node_validity && (ps_blk_64x64->au1_num_clusters[i] <= 5004 MAX_NUM_CLUSTERS_IN_ONE_REF_IDX); 5005 } 5006 5007 node_validity = node_validity && (blk_32x32_mask == 0xf); 5008 } 5009 break; 5010 #endif 5011 } 5012 case 1: 5013 { 5014 /* 32x32 block */ 5015 S32 is_percent_intra_area_gt_threshold; 5016 5017 cluster_32x32_blk_t *ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cur_blk_pos]; 5018 5019 S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask; 5020 5021 #if !ENABLE_4CTB_EVALUATION 5022 S32 best_inter_cost = ps_blk_32x32->best_inter_cost; 5023 S32 best_intra_cost = 5024 ((ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] + 5025 ps_ctb_cluster_info->i4_frame_qstep * ps_ctb_cluster_info->i4_frame_qstep_multiplier * 5026 4) < 0) 5027 ? MAX_32BIT_VAL 5028 : (ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] + 5029 ps_ctb_cluster_info->i4_frame_qstep * 5030 ps_ctb_cluster_info->i4_frame_qstep_multiplier * 4); 5031 S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost; 5032 S32 cost_differential = (best_inter_cost - best_cost); 5033 #endif 5034 5035 area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 2; 5036 area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100; 5037 intra_mv_area = ps_blk_32x32->intra_mv_area; 5038 is_percent_intra_area_gt_threshold = 5039 (intra_mv_area > area_limit_for_me_decision_precedence); 5040 ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence); 5041 5042 intra_eval_enable = ipe_decision_precedence; 5043 inter_eval_enable = !!ps_blk_32x32->num_clusters; 5044 children_nodes_required = 1; 5045 5046 #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS 5047 if(e_quality_preset >= ME_HIGH_QUALITY) 5048 { 5049 inter_eval_enable = 1; 5050 node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0); 5051 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS 5052 ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk); 5053 #endif 5054 break; 5055 } 5056 #endif 5057 5058 #if ENABLE_4CTB_EVALUATION 5059 node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0); 5060 5061 break; 5062 #else 5063 { 5064 S32 i; 5065 num_clusters = ps_blk_32x32->num_clusters; 5066 5067 if(ipe_decision_precedence) 5068 { 5069 node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag); 5070 node_validity = node_validity && (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0); 5071 } 5072 else 5073 { 5074 node_validity = 5075 ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential)) && 5076 (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) && 5077 (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0); 5078 5079 for(i = 0; (i < MAX_NUM_REF) && (node_validity); i++) 5080 { 5081 node_validity = node_validity && (ps_blk_32x32->au1_num_clusters[i] <= 5082 MAX_NUM_CLUSTERS_IN_ONE_REF_IDX); 5083 } 5084 5085 if(node_validity) 5086 { 5087 node_validity = node_validity && 5088 hme_sdi_based_cluster_spread_eligibility( 5089 ps_blk_32x32, ps_ctb_cluster_info->sdi_threshold); 5090 } 5091 } 5092 } 5093 5094 break; 5095 #endif 5096 } 5097 case 2: 5098 { 5099 cluster_16x16_blk_t *ps_blk_16x16 = 5100 &ps_ctb_cluster_info->ps_16x16_blk[e_cur_blk_pos + (e_parent_blk_pos << 2)]; 5101 5102 S32 blk_8x8_mask = 5103 ps_ctb_cluster_info->pi4_blk_8x8_mask[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos]; 5104 5105 area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N]; 5106 area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100; 5107 children_nodes_required = 1; 5108 intra_mv_area = ps_blk_16x16->intra_mv_area; 5109 ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence); 5110 num_clusters = ps_blk_16x16->num_clusters; 5111 5112 intra_eval_enable = ipe_decision_precedence; 5113 inter_eval_enable = 1; 5114 5115 #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS 5116 if(e_quality_preset >= ME_HIGH_QUALITY) 5117 { 5118 node_validity = 5119 !ps_ctb_cluster_info 5120 ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos]; 5121 children_nodes_required = !node_validity; 5122 break; 5123 } 5124 #endif 5125 5126 #if ENABLE_4CTB_EVALUATION 5127 node_validity = (blk_8x8_mask == 0xf); 5128 5129 #if ENABLE_CU_TREE_CULLING 5130 { 5131 cur_ctb_cu_tree_t *ps_32x32_root; 5132 5133 switch(e_parent_blk_pos) 5134 { 5135 case POS_TL: 5136 { 5137 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl; 5138 5139 break; 5140 } 5141 case POS_TR: 5142 { 5143 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr; 5144 5145 break; 5146 } 5147 case POS_BL: 5148 { 5149 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl; 5150 5151 break; 5152 } 5153 case POS_BR: 5154 { 5155 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br; 5156 5157 break; 5158 } 5159 } 5160 5161 if(ps_32x32_root->is_node_valid) 5162 { 5163 node_validity = 5164 node_validity && 5165 !ps_ctb_cluster_info 5166 ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos]; 5167 children_nodes_required = !node_validity; 5168 } 5169 } 5170 #endif 5171 5172 break; 5173 #else 5174 5175 if(ipe_decision_precedence) 5176 { 5177 S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos] 5178 .as_intra16_analyse[e_cur_blk_pos] 5179 .b1_merge_flag); 5180 S32 valid_flag = (blk_8x8_mask == 0xf); 5181 5182 node_validity = merge_flag_16 && valid_flag; 5183 } 5184 else 5185 { 5186 node_validity = (blk_8x8_mask == 0xf); 5187 } 5188 5189 break; 5190 #endif 5191 } 5192 case 3: 5193 { 5194 S32 blk_8x8_mask = 5195 ps_ctb_cluster_info 5196 ->pi4_blk_8x8_mask[(S32)(e_grandparent_blk_pos << 2) + e_parent_blk_pos]; 5197 S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos] 5198 .as_intra16_analyse[e_parent_blk_pos] 5199 .b1_merge_flag); 5200 S32 merge_flag_32 = 5201 (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos].b1_merge_flag); 5202 5203 intra_eval_enable = !merge_flag_16 || !merge_flag_32; 5204 inter_eval_enable = 1; 5205 children_nodes_required = 0; 5206 5207 #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS 5208 if(e_quality_preset >= ME_HIGH_QUALITY) 5209 { 5210 node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0); 5211 break; 5212 } 5213 #endif 5214 5215 #if ENABLE_4CTB_EVALUATION 5216 node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0); 5217 5218 break; 5219 #else 5220 { 5221 cur_ctb_cu_tree_t *ps_32x32_root; 5222 cur_ctb_cu_tree_t *ps_16x16_root; 5223 cluster_32x32_blk_t *ps_32x32_blk; 5224 5225 switch(e_grandparent_blk_pos) 5226 { 5227 case POS_TL: 5228 { 5229 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl; 5230 5231 break; 5232 } 5233 case POS_TR: 5234 { 5235 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr; 5236 5237 break; 5238 } 5239 case POS_BL: 5240 { 5241 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl; 5242 5243 break; 5244 } 5245 case POS_BR: 5246 { 5247 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br; 5248 5249 break; 5250 } 5251 } 5252 5253 switch(e_parent_blk_pos) 5254 { 5255 case POS_TL: 5256 { 5257 ps_16x16_root = ps_32x32_root->ps_child_node_tl; 5258 5259 break; 5260 } 5261 case POS_TR: 5262 { 5263 ps_16x16_root = ps_32x32_root->ps_child_node_tr; 5264 5265 break; 5266 } 5267 case POS_BL: 5268 { 5269 ps_16x16_root = ps_32x32_root->ps_child_node_bl; 5270 5271 break; 5272 } 5273 case POS_BR: 5274 { 5275 ps_16x16_root = ps_32x32_root->ps_child_node_br; 5276 5277 break; 5278 } 5279 } 5280 5281 ps_32x32_blk = &ps_ctb_cluster_info->ps_32x32_blk[e_grandparent_blk_pos]; 5282 5283 node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0) && 5284 ((!ps_32x32_root->is_node_valid) || 5285 (ps_32x32_blk->num_clusters_with_weak_sdi_density > 0) || 5286 (!ps_16x16_root->is_node_valid)); 5287 5288 break; 5289 } 5290 #endif 5291 } 5292 } 5293 5294 /* Fill the current cu_tree node */ 5295 ps_cu_tree->is_node_valid = node_validity; 5296 ps_cu_tree->u1_intra_eval_enable = intra_eval_enable; 5297 ps_cu_tree->u1_inter_eval_enable = inter_eval_enable; 5298 5299 if(children_nodes_required) 5300 { 5301 tree_depth++; 5302 5303 hme_populate_cu_tree( 5304 ps_ctb_cluster_info, 5305 ps_cu_tree->ps_child_node_tl, 5306 tree_depth, 5307 e_quality_preset, 5308 e_parent_blk_pos, 5309 e_cur_blk_pos, 5310 POS_TL); 5311 5312 hme_populate_cu_tree( 5313 ps_ctb_cluster_info, 5314 ps_cu_tree->ps_child_node_tr, 5315 tree_depth, 5316 e_quality_preset, 5317 e_parent_blk_pos, 5318 e_cur_blk_pos, 5319 POS_TR); 5320 5321 hme_populate_cu_tree( 5322 ps_ctb_cluster_info, 5323 ps_cu_tree->ps_child_node_bl, 5324 tree_depth, 5325 e_quality_preset, 5326 e_parent_blk_pos, 5327 e_cur_blk_pos, 5328 POS_BL); 5329 5330 hme_populate_cu_tree( 5331 ps_ctb_cluster_info, 5332 ps_cu_tree->ps_child_node_br, 5333 tree_depth, 5334 e_quality_preset, 5335 e_parent_blk_pos, 5336 e_cur_blk_pos, 5337 POS_BR); 5338 } 5339 } 5340 5341 /** 5342 ******************************************************************************** 5343 * @fn void hme_analyse_mv_clustering 5344 * ( 5345 * search_results_t *ps_search_results, 5346 * ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb, 5347 * cur_ctb_cu_tree_t *ps_cu_tree 5348 * ) 5349 * 5350 * @brief Implementation for the clustering algorithm 5351 * 5352 * @param[in] ps_search_results: structure containing 16x16 block results 5353 * 5354 * @param[in] ps_cur_ipe_ctb : output container for ipe analyses 5355 * 5356 * @param[out] ps_cu_tree : represents CU tree used in CU recursion 5357 * 5358 * @return None 5359 ******************************************************************************** 5360 */ 5361 void hme_analyse_mv_clustering( 5362 search_results_t *ps_search_results, 5363 inter_cu_results_t *ps_16x16_cu_results, 5364 inter_cu_results_t *ps_8x8_cu_results, 5365 ctb_cluster_info_t *ps_ctb_cluster_info, 5366 S08 *pi1_future_list, 5367 S08 *pi1_past_list, 5368 S32 bidir_enabled, 5369 ME_QUALITY_PRESETS_T e_quality_preset) 5370 { 5371 cluster_16x16_blk_t *ps_blk_16x16; 5372 cluster_32x32_blk_t *ps_blk_32x32; 5373 cluster_64x64_blk_t *ps_blk_64x64; 5374 5375 part_type_results_t *ps_best_result; 5376 pu_result_t *aps_part_result[MAX_NUM_PARTS]; 5377 pu_result_t *aps_inferior_parts[MAX_NUM_PARTS]; 5378 5379 PART_ID_T e_part_id; 5380 PART_TYPE_T e_part_type; 5381 5382 S32 enable_64x64_merge; 5383 S32 i, j, k; 5384 S32 mvx, mvy; 5385 S32 num_parts; 5386 S32 ref_idx; 5387 S32 ai4_pred_mode[MAX_NUM_PARTS]; 5388 5389 S32 num_32x32_merges = 0; 5390 5391 /*****************************************/ 5392 /*****************************************/ 5393 /********* Enter ye who is HQ ************/ 5394 /*****************************************/ 5395 /*****************************************/ 5396 5397 ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk; 5398 5399 /* Initialise data in each of the clusters */ 5400 for(i = 0; i < 16; i++) 5401 { 5402 ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i]; 5403 5404 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS 5405 if(e_quality_preset < ME_HIGH_QUALITY) 5406 { 5407 hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled); 5408 } 5409 else 5410 { 5411 ps_blk_16x16->best_inter_cost = 0; 5412 ps_blk_16x16->intra_mv_area = 0; 5413 } 5414 #else 5415 hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled); 5416 #endif 5417 } 5418 5419 for(i = 0; i < 4; i++) 5420 { 5421 ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i]; 5422 5423 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS 5424 if(e_quality_preset < ME_HIGH_QUALITY) 5425 { 5426 hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled); 5427 } 5428 else 5429 { 5430 ps_blk_32x32->best_inter_cost = 0; 5431 ps_blk_32x32->intra_mv_area = 0; 5432 } 5433 #else 5434 hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled); 5435 #endif 5436 } 5437 5438 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS 5439 if(e_quality_preset < ME_HIGH_QUALITY) 5440 { 5441 hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled); 5442 } 5443 else 5444 { 5445 ps_blk_64x64->best_inter_cost = 0; 5446 ps_blk_64x64->intra_mv_area = 0; 5447 } 5448 #else 5449 hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled); 5450 #endif 5451 5452 /* Initialise data for all nodes in the CU tree */ 5453 hme_build_cu_tree( 5454 ps_ctb_cluster_info, ps_ctb_cluster_info->ps_cu_tree_root, 0, POS_NA, POS_NA, POS_NA); 5455 5456 if(e_quality_preset >= ME_HIGH_QUALITY) 5457 { 5458 memset(ps_ctb_cluster_info->au1_is_16x16_blk_split, 1, 16 * sizeof(U08)); 5459 } 5460 5461 #if ENABLE_UNIFORM_CU_SIZE_16x16 || ENABLE_UNIFORM_CU_SIZE_8x8 5462 return; 5463 #endif 5464 5465 for(i = 0; i < 16; i++) 5466 { 5467 S32 blk_8x8_mask; 5468 S32 is_16x16_blk_valid; 5469 S32 num_clusters_updated; 5470 S32 num_clusters; 5471 5472 blk_8x8_mask = ps_ctb_cluster_info->pi4_blk_8x8_mask[i]; 5473 5474 ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i]; 5475 5476 is_16x16_blk_valid = (blk_8x8_mask == 0xf); 5477 5478 if(is_16x16_blk_valid) 5479 { 5480 /* Use 8x8 data when 16x16 CU is split */ 5481 if(ps_search_results[i].u1_split_flag) 5482 { 5483 S32 blk_8x8_idx = i << 2; 5484 5485 num_parts = 4; 5486 e_part_type = PRT_NxN; 5487 5488 for(j = 0; j < num_parts; j++, blk_8x8_idx++) 5489 { 5490 /* Only 2Nx2N partition supported for 8x8 block */ 5491 ASSERT( 5492 ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].u1_part_type == 5493 ((PART_TYPE_T)PRT_2Nx2N)); 5494 5495 aps_part_result[j] = 5496 &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].as_pu_results[0]; 5497 aps_inferior_parts[j] = 5498 &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[1].as_pu_results[0]; 5499 ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode); 5500 } 5501 } 5502 else 5503 { 5504 ps_best_result = &ps_16x16_cu_results[i].ps_best_results[0]; 5505 5506 e_part_type = (PART_TYPE_T)ps_best_result->u1_part_type; 5507 num_parts = gau1_num_parts_in_part_type[e_part_type]; 5508 5509 for(j = 0; j < num_parts; j++) 5510 { 5511 aps_part_result[j] = &ps_best_result->as_pu_results[j]; 5512 aps_inferior_parts[j] = &ps_best_result[1].as_pu_results[j]; 5513 ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode); 5514 } 5515 5516 ps_ctb_cluster_info->au1_is_16x16_blk_split[i] = 0; 5517 } 5518 5519 for(j = 0; j < num_parts; j++) 5520 { 5521 pu_result_t *ps_part_result = aps_part_result[j]; 5522 5523 S32 num_mvs = ((ai4_pred_mode[j] > 1) + 1); 5524 5525 e_part_id = ge_part_type_to_part_id[e_part_type][j]; 5526 5527 /* Skip clustering if best mode is intra */ 5528 if((ps_part_result->pu.b1_intra_flag)) 5529 { 5530 ps_blk_16x16->intra_mv_area += gai4_partition_area[e_part_id]; 5531 ps_blk_16x16->best_inter_cost += aps_inferior_parts[j]->i4_tot_cost; 5532 continue; 5533 } 5534 else 5535 { 5536 ps_blk_16x16->best_inter_cost += ps_part_result->i4_tot_cost; 5537 } 5538 5539 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS 5540 if(e_quality_preset >= ME_HIGH_QUALITY) 5541 { 5542 continue; 5543 } 5544 #endif 5545 5546 for(k = 0; k < num_mvs; k++) 5547 { 5548 mv_t *ps_mv; 5549 5550 pu_mv_t *ps_pu_mv = &ps_part_result->pu.mv; 5551 5552 S32 is_l0_mv = ((ai4_pred_mode[j] == 2) && !k) || (ai4_pred_mode[j] == 0); 5553 5554 ps_mv = (is_l0_mv) ? (&ps_pu_mv->s_l0_mv) : (&ps_pu_mv->s_l1_mv); 5555 5556 mvx = ps_mv->i2_mvx; 5557 mvy = ps_mv->i2_mvy; 5558 5559 ref_idx = (is_l0_mv) ? pi1_past_list[ps_pu_mv->i1_l0_ref_idx] 5560 : pi1_future_list[ps_pu_mv->i1_l1_ref_idx]; 5561 5562 num_clusters = ps_blk_16x16->num_clusters; 5563 5564 hme_find_and_update_clusters( 5565 ps_blk_16x16->as_cluster_data, 5566 &(ps_blk_16x16->num_clusters), 5567 mvx, 5568 mvy, 5569 ref_idx, 5570 ps_part_result->i4_sdi, 5571 e_part_id, 5572 (ai4_pred_mode[j] == 2)); 5573 5574 num_clusters_updated = (ps_blk_16x16->num_clusters); 5575 5576 ps_blk_16x16->au1_num_clusters[ref_idx] += 5577 (num_clusters_updated - num_clusters); 5578 } 5579 } 5580 } 5581 } 5582 5583 /* Search for 32x32 clusters */ 5584 for(i = 0; i < 4; i++) 5585 { 5586 S32 num_clusters_merged; 5587 5588 S32 is_32x32_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << i)) || 0; 5589 5590 if(is_32x32_blk_valid) 5591 { 5592 ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i]; 5593 ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i << 2]; 5594 5595 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS 5596 if(e_quality_preset >= ME_HIGH_QUALITY) 5597 { 5598 for(j = 0; j < 4; j++, ps_blk_16x16++) 5599 { 5600 ps_blk_32x32->intra_mv_area += ps_blk_16x16->intra_mv_area; 5601 5602 ps_blk_32x32->best_inter_cost += ps_blk_16x16->best_inter_cost; 5603 } 5604 continue; 5605 } 5606 #endif 5607 5608 hme_update_32x32_clusters(ps_blk_32x32, ps_blk_16x16); 5609 5610 if((ps_blk_32x32->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)) 5611 { 5612 num_clusters_merged = hme_try_merge_clusters_blksize_gt_16( 5613 ps_blk_32x32->as_cluster_data, (ps_blk_32x32->num_clusters)); 5614 5615 if(num_clusters_merged) 5616 { 5617 ps_blk_32x32->num_clusters -= num_clusters_merged; 5618 5619 UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_32x32); 5620 } 5621 } 5622 } 5623 } 5624 5625 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS 5626 /* Eliminate outlier 32x32 clusters */ 5627 if(e_quality_preset < ME_HIGH_QUALITY) 5628 #endif 5629 { 5630 hme_boot_out_outlier(ps_ctb_cluster_info, 32); 5631 5632 /* Find best_uni_ref and best_alt_ref */ 5633 hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 32); 5634 } 5635 5636 /* Populate the CU tree for depths 1 and higher */ 5637 { 5638 cur_ctb_cu_tree_t *ps_tree_root = ps_ctb_cluster_info->ps_cu_tree_root; 5639 cur_ctb_cu_tree_t *ps_tl = ps_tree_root->ps_child_node_tl; 5640 cur_ctb_cu_tree_t *ps_tr = ps_tree_root->ps_child_node_tr; 5641 cur_ctb_cu_tree_t *ps_bl = ps_tree_root->ps_child_node_bl; 5642 cur_ctb_cu_tree_t *ps_br = ps_tree_root->ps_child_node_br; 5643 5644 hme_populate_cu_tree( 5645 ps_ctb_cluster_info, ps_tl, 1, e_quality_preset, POS_NA, POS_NA, POS_TL); 5646 5647 num_32x32_merges += (ps_tl->is_node_valid == 1); 5648 5649 hme_populate_cu_tree( 5650 ps_ctb_cluster_info, ps_tr, 1, e_quality_preset, POS_NA, POS_NA, POS_TR); 5651 5652 num_32x32_merges += (ps_tr->is_node_valid == 1); 5653 5654 hme_populate_cu_tree( 5655 ps_ctb_cluster_info, ps_bl, 1, e_quality_preset, POS_NA, POS_NA, POS_BL); 5656 5657 num_32x32_merges += (ps_bl->is_node_valid == 1); 5658 5659 hme_populate_cu_tree( 5660 ps_ctb_cluster_info, ps_br, 1, e_quality_preset, POS_NA, POS_NA, POS_BR); 5661 5662 num_32x32_merges += (ps_br->is_node_valid == 1); 5663 } 5664 5665 #if !ENABLE_4CTB_EVALUATION 5666 if(e_quality_preset < ME_HIGH_QUALITY) 5667 { 5668 enable_64x64_merge = (num_32x32_merges >= 3); 5669 } 5670 #else 5671 if(e_quality_preset < ME_HIGH_QUALITY) 5672 { 5673 enable_64x64_merge = 1; 5674 } 5675 #endif 5676 5677 #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS 5678 if(e_quality_preset >= ME_HIGH_QUALITY) 5679 { 5680 enable_64x64_merge = 1; 5681 } 5682 #else 5683 if(e_quality_preset >= ME_HIGH_QUALITY) 5684 { 5685 enable_64x64_merge = (num_32x32_merges >= 3); 5686 } 5687 #endif 5688 5689 if(enable_64x64_merge) 5690 { 5691 S32 num_clusters_merged; 5692 5693 ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[0]; 5694 5695 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS 5696 if(e_quality_preset >= ME_HIGH_QUALITY) 5697 { 5698 for(j = 0; j < 4; j++, ps_blk_32x32++) 5699 { 5700 ps_blk_64x64->intra_mv_area += ps_blk_32x32->intra_mv_area; 5701 5702 ps_blk_64x64->best_inter_cost += ps_blk_32x32->best_inter_cost; 5703 } 5704 } 5705 else 5706 #endif 5707 { 5708 hme_update_64x64_clusters(ps_blk_64x64, ps_blk_32x32); 5709 5710 if((ps_blk_64x64->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)) 5711 { 5712 num_clusters_merged = hme_try_merge_clusters_blksize_gt_16( 5713 ps_blk_64x64->as_cluster_data, (ps_blk_64x64->num_clusters)); 5714 5715 if(num_clusters_merged) 5716 { 5717 ps_blk_64x64->num_clusters -= num_clusters_merged; 5718 5719 UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_64x64); 5720 } 5721 } 5722 } 5723 5724 #if !ENABLE_4CTB_EVALUATION 5725 if(e_quality_preset < ME_HIGH_QUALITY) 5726 { 5727 S32 best_inter_cost = ps_blk_64x64->best_inter_cost; 5728 S32 best_intra_cost = 5729 ((ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost + 5730 ps_ctb_cluster_info->i4_frame_qstep * 5731 ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16) < 0) 5732 ? MAX_32BIT_VAL 5733 : (ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost + 5734 ps_ctb_cluster_info->i4_frame_qstep * 5735 ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16); 5736 S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost; 5737 S32 cost_differential = (best_inter_cost - best_cost); 5738 5739 enable_64x64_merge = 5740 ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential)); 5741 } 5742 #endif 5743 } 5744 5745 if(enable_64x64_merge) 5746 { 5747 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS 5748 if(e_quality_preset < ME_HIGH_QUALITY) 5749 #endif 5750 { 5751 hme_boot_out_outlier(ps_ctb_cluster_info, 64); 5752 5753 hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 64); 5754 } 5755 5756 hme_populate_cu_tree( 5757 ps_ctb_cluster_info, 5758 ps_ctb_cluster_info->ps_cu_tree_root, 5759 0, 5760 e_quality_preset, 5761 POS_NA, 5762 POS_NA, 5763 POS_NA); 5764 } 5765 } 5766 #endif 5767 5768 static __inline void hme_merge_prms_init( 5769 hme_merge_prms_t *ps_prms, 5770 layer_ctxt_t *ps_curr_layer, 5771 refine_prms_t *ps_refine_prms, 5772 me_frm_ctxt_t *ps_me_ctxt, 5773 range_prms_t *ps_range_prms_rec, 5774 range_prms_t *ps_range_prms_inp, 5775 mv_grid_t **pps_mv_grid, 5776 inter_ctb_prms_t *ps_inter_ctb_prms, 5777 S32 i4_num_pred_dir, 5778 S32 i4_32x32_id, 5779 BLK_SIZE_T e_blk_size, 5780 ME_QUALITY_PRESETS_T e_me_quality_presets) 5781 { 5782 S32 i4_use_rec = ps_refine_prms->i4_use_rec_in_fpel; 5783 S32 i4_cu_16x16 = (BLK_32x32 == e_blk_size) ? (i4_32x32_id << 2) : 0; 5784 5785 /* Currently not enabling segmentation info from prev layers */ 5786 ps_prms->i4_seg_info_avail = 0; 5787 ps_prms->i4_part_mask = 0; 5788 5789 /* Number of reference pics in which to do merge */ 5790 ps_prms->i4_num_ref = i4_num_pred_dir; 5791 5792 /* Layer ctxt info */ 5793 ps_prms->ps_layer_ctxt = ps_curr_layer; 5794 5795 ps_prms->ps_inter_ctb_prms = ps_inter_ctb_prms; 5796 5797 /* Top left, top right, bottom left and bottom right 16x16 units */ 5798 if(BLK_32x32 == e_blk_size) 5799 { 5800 ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16]; 5801 ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 1]; 5802 ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 2]; 5803 ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 3]; 5804 5805 /* Merge results stored here */ 5806 ps_prms->ps_results_merge = &ps_me_ctxt->as_search_results_32x32[i4_32x32_id]; 5807 5808 /* This could be lesser than the number of 16x16results generated*/ 5809 /* For now, keeping it to be same */ 5810 ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_fpel_results; 5811 ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[i4_32x32_id << 4]; 5812 ps_prms->ps_results_grandchild = NULL; 5813 } 5814 else 5815 { 5816 ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_32x32[0]; 5817 ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_32x32[1]; 5818 ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_32x32[2]; 5819 ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_32x32[3]; 5820 5821 /* Merge results stored here */ 5822 ps_prms->ps_results_merge = &ps_me_ctxt->s_search_results_64x64; 5823 5824 ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_32x32_merge_results; 5825 ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[0]; 5826 ps_prms->ps_results_grandchild = ps_me_ctxt->as_search_results_16x16; 5827 } 5828 5829 if(i4_use_rec) 5830 { 5831 WORD32 ref_ctr; 5832 5833 for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++) 5834 { 5835 ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_rec[ref_ctr]; 5836 } 5837 } 5838 else 5839 { 5840 WORD32 ref_ctr; 5841 5842 for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++) 5843 { 5844 ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_inp[ref_ctr]; 5845 } 5846 } 5847 ps_prms->i4_use_rec = i4_use_rec; 5848 5849 ps_prms->pf_mv_cost_compute = compute_mv_cost_implicit_high_speed; 5850 5851 ps_prms->pps_mv_grid = pps_mv_grid; 5852 5853 ps_prms->log_ctb_size = ps_me_ctxt->log_ctb_size; 5854 5855 ps_prms->e_quality_preset = e_me_quality_presets; 5856 ps_prms->pi1_future_list = ps_me_ctxt->ai1_future_list; 5857 ps_prms->pi1_past_list = ps_me_ctxt->ai1_past_list; 5858 ps_prms->ps_cluster_info = ps_me_ctxt->ps_ctb_cluster_info; 5859 } 5860 5861 /** 5862 ******************************************************************************** 5863 * @fn void hme_refine(me_ctxt_t *ps_ctxt, 5864 * refine_layer_prms_t *ps_refine_prms) 5865 * 5866 * @brief Top level entry point for refinement ME 5867 * 5868 * @param[in,out] ps_ctxt: ME Handle 5869 * 5870 * @param[in] ps_refine_prms : refinement layer prms 5871 * 5872 * @return None 5873 ******************************************************************************** 5874 */ 5875 void hme_refine( 5876 me_ctxt_t *ps_thrd_ctxt, 5877 refine_prms_t *ps_refine_prms, 5878 PF_EXT_UPDATE_FXN_T pf_ext_update_fxn, 5879 layer_ctxt_t *ps_coarse_layer, 5880 multi_thrd_ctxt_t *ps_multi_thrd_ctxt, 5881 S32 lyr_job_type, 5882 S32 thrd_id, 5883 S32 me_frm_id, 5884 pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input) 5885 { 5886 inter_ctb_prms_t s_common_frm_prms; 5887 5888 BLK_SIZE_T e_search_blk_size, e_result_blk_size; 5889 WORD32 i4_me_frm_id = me_frm_id % MAX_NUM_ME_PARALLEL; 5890 me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id]; 5891 ME_QUALITY_PRESETS_T e_me_quality_presets = 5892 ps_thrd_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets; 5893 5894 WORD32 num_rows_proc = 0; 5895 WORD32 num_act_ref_pics; 5896 WORD16 i2_prev_enc_frm_max_mv_y; 5897 WORD32 i4_idx_dvsr_p = ps_multi_thrd_ctxt->i4_idx_dvsr_p; 5898 5899 /*************************************************************************/ 5900 /* Complexity of search: Low to High */ 5901 /*************************************************************************/ 5902 SEARCH_COMPLEXITY_T e_search_complexity; 5903 5904 /*************************************************************************/ 5905 /* to store the PU results which are passed to the decide_part_types */ 5906 /* as input prms. Multiplied by 4 as the max number of Ref in a List is 4*/ 5907 /*************************************************************************/ 5908 5909 pu_result_t as_pu_results[2][TOT_NUM_PARTS][MAX_NUM_RESULTS_PER_PART_LIST]; 5910 inter_pu_results_t as_inter_pu_results[4]; 5911 inter_pu_results_t *ps_pu_results = as_inter_pu_results; 5912 5913 /*************************************************************************/ 5914 /* Config parameter structures for varius ME submodules */ 5915 /*************************************************************************/ 5916 hme_merge_prms_t s_merge_prms_32x32_tl, s_merge_prms_32x32_tr; 5917 hme_merge_prms_t s_merge_prms_32x32_bl, s_merge_prms_32x32_br; 5918 hme_merge_prms_t s_merge_prms_64x64; 5919 hme_search_prms_t s_search_prms_blk; 5920 mvbank_update_prms_t s_mv_update_prms; 5921 hme_ctb_prms_t s_ctb_prms; 5922 hme_subpel_prms_t s_subpel_prms; 5923 fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_ctxt->ps_fullpel_refine_ctxt; 5924 ctb_cluster_info_t *ps_ctb_cluster_info; 5925 fpel_srch_cand_init_data_t s_srch_cand_init_data; 5926 5927 /* 4 bits (LSBs) of this variable control merge of 4 32x32 CUs in CTB */ 5928 S32 en_merge_32x32; 5929 /* 5 lsb's specify whether or not merge algorithm is required */ 5930 /* to be executed or not. Relevant only in PQ. Ought to be */ 5931 /* used in conjunction with en_merge_32x32 and */ 5932 /* ps_ctb_bound_attrs->u1_merge_to_64x64_flag. This is */ 5933 /* required when all children are deemed to be intras */ 5934 S32 en_merge_execution; 5935 5936 /*************************************************************************/ 5937 /* All types of search candidates for predictor based search. */ 5938 /*************************************************************************/ 5939 S32 num_init_candts = 0; 5940 S32 i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0; 5941 S32 i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1; 5942 search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS]; 5943 search_node_t as_top_neighbours[4], as_left_neighbours[3]; 5944 5945 pf_get_wt_inp fp_get_wt_inp; 5946 5947 search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9]; 5948 U32 au4_unique_node_map[MAP_X_MAX * 2]; 5949 5950 /* Controls the boundary attributes of CTB, whether it has 64x64 or not */ 5951 ctb_boundary_attrs_t *ps_ctb_bound_attrs; 5952 5953 /*************************************************************************/ 5954 /* points ot the search results for the blk level search (8x8/16x16) */ 5955 /*************************************************************************/ 5956 search_results_t *ps_search_results; 5957 5958 /*************************************************************************/ 5959 /* Coordinates */ 5960 /*************************************************************************/ 5961 S32 blk_x, blk_y, i4_ctb_x, i4_ctb_y, tile_col_idx, blk_id_in_ctb; 5962 S32 pos_x, pos_y; 5963 S32 blk_id_in_full_ctb; 5964 5965 /*************************************************************************/ 5966 /* Related to dimensions of block being searched and pic dimensions */ 5967 /*************************************************************************/ 5968 S32 blk_4x4_to_16x16; 5969 S32 blk_wd, blk_ht, blk_size_shift; 5970 S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb; 5971 S32 num_results_prev_layer; 5972 5973 /*************************************************************************/ 5974 /* Size of a basic unit for this layer. For non encode layers, we search */ 5975 /* in block sizes of 8x8. For encode layers, though we search 16x16s the */ 5976 /* basic unit size is the ctb size. */ 5977 /*************************************************************************/ 5978 S32 unit_size; 5979 5980 /*************************************************************************/ 5981 /* Local variable storing results of any 4 CU merge to bigger CU */ 5982 /*************************************************************************/ 5983 CU_MERGE_RESULT_T e_merge_result; 5984 5985 /*************************************************************************/ 5986 /* This mv grid stores results during and after fpel search, during */ 5987 /* merge, subpel and bidirect refinements stages. 2 instances of this are*/ 5988 /* meant for the 2 directions of search (l0 and l1). */ 5989 /*************************************************************************/ 5990 mv_grid_t *aps_mv_grid[2]; 5991 5992 /*************************************************************************/ 5993 /* Pointers to context in current and coarser layers */ 5994 /*************************************************************************/ 5995 layer_ctxt_t *ps_curr_layer, *ps_prev_layer; 5996 5997 /*************************************************************************/ 5998 /* to store mv range per blk, and picture limit, allowed search range */ 5999 /* range prms in hpel and qpel units as well */ 6000 /*************************************************************************/ 6001 range_prms_t as_range_prms_inp[MAX_NUM_REF], as_range_prms_rec[MAX_NUM_REF]; 6002 range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF]; 6003 range_prms_t as_range_prms_hpel[MAX_NUM_REF], as_range_prms_qpel[MAX_NUM_REF]; 6004 6005 /*************************************************************************/ 6006 /* These variables are used to track number of references at different */ 6007 /* stages of ME. */ 6008 /*************************************************************************/ 6009 S32 i4_num_pred_dir; 6010 S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer; 6011 S32 lambda_recon = ps_refine_prms->lambda_recon; 6012 6013 /* Counts successful merge to 32x32 every CTB (0-4) */ 6014 S32 merge_count_32x32; 6015 6016 S32 ai4_id_coloc[14], ai4_id_Z[2]; 6017 U08 au1_search_candidate_list_index[2]; 6018 S32 ai4_num_coloc_cands[2]; 6019 U08 u1_pred_dir, u1_pred_dir_ctr; 6020 6021 /*************************************************************************/ 6022 /* Input pointer and stride */ 6023 /*************************************************************************/ 6024 U08 *pu1_inp; 6025 S32 i4_inp_stride; 6026 S32 end_of_frame; 6027 S32 num_sync_units_in_row, num_sync_units_in_tile; 6028 6029 /*************************************************************************/ 6030 /* Indicates whether the all 4 8x8 blks are valid in the 16x16 blk in the*/ 6031 /* encode layer. If not 15, then 1 or more 8x8 blks not valid. Means that*/ 6032 /* we need to stop merges and force 8x8 CUs for that 16x16 blk */ 6033 /*************************************************************************/ 6034 S32 blk_8x8_mask; 6035 S32 ai4_blk_8x8_mask[16]; 6036 U08 au1_is_64x64Blk_noisy[1]; 6037 U08 au1_is_32x32Blk_noisy[4]; 6038 U08 au1_is_16x16Blk_noisy[16]; 6039 6040 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list = 6041 ps_thrd_ctxt->ps_cmn_utils_optimised_function_list; 6042 ihevce_me_optimised_function_list_t *ps_me_optimised_function_list = 6043 ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list); 6044 6045 ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1); 6046 6047 /*************************************************************************/ 6048 /* Pointers to current and coarse layer are needed for projection */ 6049 /* Pointer to prev layer are needed for other candts like coloc */ 6050 /*************************************************************************/ 6051 ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id]; 6052 6053 ps_prev_layer = hme_get_past_layer_ctxt( 6054 ps_thrd_ctxt, ps_ctxt, ps_refine_prms->i4_layer_id, ps_multi_thrd_ctxt->i4_num_me_frm_pllel); 6055 6056 num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref; 6057 6058 /* Function pointer is selected based on the C vc X86 macro */ 6059 6060 fp_get_wt_inp = ps_me_optimised_function_list->pf_get_wt_inp_ctb; 6061 6062 i4_inp_stride = ps_curr_layer->i4_inp_stride; 6063 i4_pic_wd = ps_curr_layer->i4_wd; 6064 i4_pic_ht = ps_curr_layer->i4_ht; 6065 e_search_complexity = ps_refine_prms->e_search_complexity; 6066 end_of_frame = 0; 6067 6068 /* This points to all the initial candts */ 6069 ps_search_candts = &as_search_candts[0]; 6070 6071 /* mv grid being huge strucutre is part of context */ 6072 aps_mv_grid[0] = &ps_ctxt->as_mv_grid[0]; 6073 aps_mv_grid[1] = &ps_ctxt->as_mv_grid[1]; 6074 6075 /*************************************************************************/ 6076 /* If the current layer is encoded (since it may be multicast or final */ 6077 /* layer (finest)), then we use 16x16 blk size with some selected parts */ 6078 /* If the current layer is not encoded, then we use 8x8 blk size, with */ 6079 /* enable or disable of 4x4 partitions depending on the input prms */ 6080 /*************************************************************************/ 6081 e_search_blk_size = BLK_16x16; 6082 blk_wd = blk_ht = 16; 6083 blk_size_shift = 4; 6084 e_result_blk_size = BLK_8x8; 6085 s_mv_update_prms.i4_shift = 1; 6086 6087 if(ps_coarse_layer->ps_layer_mvbank->e_blk_size == BLK_4x4) 6088 { 6089 blk_4x4_to_16x16 = 1; 6090 } 6091 else 6092 { 6093 blk_4x4_to_16x16 = 0; 6094 } 6095 6096 unit_size = 1 << ps_ctxt->log_ctb_size; 6097 s_search_prms_blk.i4_inp_stride = unit_size; 6098 6099 /* This is required to properly update the layer mv bank */ 6100 s_mv_update_prms.e_search_blk_size = e_search_blk_size; 6101 s_search_prms_blk.e_blk_size = e_search_blk_size; 6102 6103 /*************************************************************************/ 6104 /* If current layer is explicit, then the number of ref frames are to */ 6105 /* be same as previous layer. Else it will be 2 */ 6106 /*************************************************************************/ 6107 i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref; 6108 i4_num_pred_dir = 6109 (ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 0) && (i4_num_act_ref_l1 > 0)) + 6110 1; 6111 6112 #if USE_MODIFIED == 1 6113 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified; 6114 #else 6115 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed; 6116 #endif 6117 6118 i4_num_pred_dir = MIN(i4_num_pred_dir, i4_num_ref_prev_layer); 6119 if(i4_num_ref_prev_layer <= 2) 6120 { 6121 i4_num_ref_each_dir = 1; 6122 } 6123 else 6124 { 6125 i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1; 6126 } 6127 6128 s_mv_update_prms.i4_num_ref = i4_num_pred_dir; 6129 s_mv_update_prms.i4_num_results_to_store = 6130 MIN((ps_ctxt->s_frm_prms.bidir_enabled) ? ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref 6131 : (i4_num_act_ref_l0 > 1) + 1, 6132 ps_refine_prms->i4_num_results_per_part); 6133 6134 /*************************************************************************/ 6135 /* Initialization of merge params for 16x16 to 32x32 merge. */ 6136 /* There are 4 32x32 units in a CTB, so 4 param structures initialized */ 6137 /*************************************************************************/ 6138 { 6139 hme_merge_prms_t *aps_merge_prms[4]; 6140 aps_merge_prms[0] = &s_merge_prms_32x32_tl; 6141 aps_merge_prms[1] = &s_merge_prms_32x32_tr; 6142 aps_merge_prms[2] = &s_merge_prms_32x32_bl; 6143 aps_merge_prms[3] = &s_merge_prms_32x32_br; 6144 for(i = 0; i < 4; i++) 6145 { 6146 hme_merge_prms_init( 6147 aps_merge_prms[i], 6148 ps_curr_layer, 6149 ps_refine_prms, 6150 ps_ctxt, 6151 as_range_prms_rec, 6152 as_range_prms_inp, 6153 &aps_mv_grid[0], 6154 &s_common_frm_prms, 6155 i4_num_pred_dir, 6156 i, 6157 BLK_32x32, 6158 e_me_quality_presets); 6159 } 6160 } 6161 6162 /*************************************************************************/ 6163 /* Initialization of merge params for 32x32 to 64x64 merge. */ 6164 /* There are 4 32x32 units in a CTB, so only 1 64x64 CU can be in CTB */ 6165 /*************************************************************************/ 6166 { 6167 hme_merge_prms_init( 6168 &s_merge_prms_64x64, 6169 ps_curr_layer, 6170 ps_refine_prms, 6171 ps_ctxt, 6172 as_range_prms_rec, 6173 as_range_prms_inp, 6174 &aps_mv_grid[0], 6175 &s_common_frm_prms, 6176 i4_num_pred_dir, 6177 0, 6178 BLK_64x64, 6179 e_me_quality_presets); 6180 } 6181 6182 /* Pointers to cu_results are initialised here */ 6183 { 6184 WORD32 i; 6185 6186 ps_ctxt->s_search_results_64x64.ps_cu_results = &ps_ctxt->s_cu64x64_results; 6187 6188 for(i = 0; i < 4; i++) 6189 { 6190 ps_ctxt->as_search_results_32x32[i].ps_cu_results = &ps_ctxt->as_cu32x32_results[i]; 6191 } 6192 6193 for(i = 0; i < 16; i++) 6194 { 6195 ps_ctxt->as_search_results_16x16[i].ps_cu_results = &ps_ctxt->as_cu16x16_results[i]; 6196 } 6197 } 6198 6199 /*************************************************************************/ 6200 /* SUBPEL Params initialized here */ 6201 /*************************************************************************/ 6202 { 6203 s_subpel_prms.ps_search_results_16x16 = &ps_ctxt->as_search_results_16x16[0]; 6204 s_subpel_prms.ps_search_results_32x32 = &ps_ctxt->as_search_results_32x32[0]; 6205 s_subpel_prms.ps_search_results_64x64 = &ps_ctxt->s_search_results_64x64; 6206 6207 s_subpel_prms.i4_num_16x16_candts = ps_refine_prms->i4_num_fpel_results; 6208 s_subpel_prms.i4_num_32x32_candts = ps_refine_prms->i4_num_32x32_merge_results; 6209 s_subpel_prms.i4_num_64x64_candts = ps_refine_prms->i4_num_64x64_merge_results; 6210 6211 s_subpel_prms.i4_num_steps_hpel_refine = ps_refine_prms->i4_num_steps_hpel_refine; 6212 s_subpel_prms.i4_num_steps_qpel_refine = ps_refine_prms->i4_num_steps_qpel_refine; 6213 6214 s_subpel_prms.i4_use_satd = ps_refine_prms->i4_use_satd_subpel; 6215 6216 s_subpel_prms.i4_inp_stride = unit_size; 6217 6218 s_subpel_prms.u1_max_subpel_candts_2Nx2N = ps_refine_prms->u1_max_subpel_candts_2Nx2N; 6219 s_subpel_prms.u1_max_subpel_candts_NxN = ps_refine_prms->u1_max_subpel_candts_NxN; 6220 s_subpel_prms.u1_subpel_candt_threshold = ps_refine_prms->u1_subpel_candt_threshold; 6221 6222 s_subpel_prms.pf_qpel_interp = ps_me_optimised_function_list->pf_qpel_interp_avg_generic; 6223 6224 { 6225 WORD32 ref_ctr; 6226 for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++) 6227 { 6228 s_subpel_prms.aps_mv_range_hpel[ref_ctr] = &as_range_prms_hpel[ref_ctr]; 6229 s_subpel_prms.aps_mv_range_qpel[ref_ctr] = &as_range_prms_qpel[ref_ctr]; 6230 } 6231 } 6232 s_subpel_prms.pi2_inp_bck = ps_ctxt->pi2_inp_bck; 6233 6234 #if USE_MODIFIED == 0 6235 s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed; 6236 #else 6237 s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified; 6238 #endif 6239 s_subpel_prms.e_me_quality_presets = e_me_quality_presets; 6240 6241 /* BI Refinement done only if this field is 1 */ 6242 s_subpel_prms.bidir_enabled = ps_refine_prms->bidir_enabled; 6243 6244 s_subpel_prms.u1_num_ref = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past; 6245 6246 s_subpel_prms.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0; 6247 s_subpel_prms.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1; 6248 s_subpel_prms.u1_max_num_subpel_refine_centers = 6249 ps_refine_prms->u1_max_num_subpel_refine_centers; 6250 } 6251 6252 /* inter_ctb_prms_t struct initialisation */ 6253 { 6254 inter_ctb_prms_t *ps_inter_ctb_prms = &s_common_frm_prms; 6255 hme_subpel_prms_t *ps_subpel_prms = &s_subpel_prms; 6256 6257 ps_inter_ctb_prms->pps_rec_list_l0 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l0; 6258 ps_inter_ctb_prms->pps_rec_list_l1 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l1; 6259 ps_inter_ctb_prms->wpred_log_wdc = ps_ctxt->s_wt_pred.wpred_log_wdc; 6260 ps_inter_ctb_prms->u1_max_tr_depth = ps_thrd_ctxt->s_init_prms.u1_max_tr_depth; 6261 ps_inter_ctb_prms->i1_quality_preset = e_me_quality_presets; 6262 ps_inter_ctb_prms->i4_bidir_enabled = ps_subpel_prms->bidir_enabled; 6263 ps_inter_ctb_prms->i4_inp_stride = ps_subpel_prms->i4_inp_stride; 6264 ps_inter_ctb_prms->u1_num_ref = ps_subpel_prms->u1_num_ref; 6265 ps_inter_ctb_prms->u1_use_satd = ps_subpel_prms->i4_use_satd; 6266 ps_inter_ctb_prms->i4_rec_stride = ps_curr_layer->i4_rec_stride; 6267 ps_inter_ctb_prms->u1_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0; 6268 ps_inter_ctb_prms->u1_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1; 6269 ps_inter_ctb_prms->i4_lamda = lambda_recon; 6270 ps_inter_ctb_prms->u1_lamda_qshift = ps_refine_prms->lambda_q_shift; 6271 ps_inter_ctb_prms->i4_qstep_ls8 = ps_ctxt->ps_hme_frm_prms->qstep_ls8; 6272 ps_inter_ctb_prms->pi4_inv_wt = ps_ctxt->s_wt_pred.a_inv_wpred_wt; 6273 ps_inter_ctb_prms->pi1_past_list = ps_ctxt->ai1_past_list; 6274 ps_inter_ctb_prms->pi1_future_list = ps_ctxt->ai1_future_list; 6275 ps_inter_ctb_prms->pu4_src_variance = s_search_prms_blk.au4_src_variance; 6276 ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands = 6277 ps_refine_prms->u1_max_2nx2n_tu_recur_cands; 6278 } 6279 6280 for(i = 0; i < MAX_INIT_CANDTS; i++) 6281 { 6282 ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i]; 6283 ps_search_candts[i].ps_search_node->ps_mv = &ps_ctxt->as_search_cand_mv[i]; 6284 6285 INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0); 6286 } 6287 num_act_ref_pics = 6288 ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1; 6289 6290 if(num_act_ref_pics) 6291 { 6292 hme_search_cand_data_init( 6293 ai4_id_Z, 6294 ai4_id_coloc, 6295 ai4_num_coloc_cands, 6296 au1_search_candidate_list_index, 6297 i4_num_act_ref_l0, 6298 i4_num_act_ref_l1, 6299 ps_ctxt->s_frm_prms.bidir_enabled, 6300 blk_4x4_to_16x16); 6301 } 6302 6303 if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 1)) 6304 { 6305 ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0]; 6306 ps_search_candts[ai4_id_Z[1]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[1]; 6307 } 6308 else if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 == 1)) 6309 { 6310 ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0]; 6311 } 6312 6313 for(i = 0; i < 3; i++) 6314 { 6315 search_node_t *ps_search_node; 6316 ps_search_node = &as_left_neighbours[i]; 6317 INIT_SEARCH_NODE(ps_search_node, 0); 6318 ps_search_node = &as_top_neighbours[i]; 6319 INIT_SEARCH_NODE(ps_search_node, 0); 6320 } 6321 6322 INIT_SEARCH_NODE(&as_top_neighbours[3], 0); 6323 as_left_neighbours[2].u1_is_avail = 0; 6324 6325 /*************************************************************************/ 6326 /* Initialize all the search results structure here. We update all the */ 6327 /* search results to default values, and configure things like blk sizes */ 6328 /*************************************************************************/ 6329 if(num_act_ref_pics) 6330 { 6331 S32 i4_x, i4_y; 6332 /* 16x16 results */ 6333 for(i = 0; i < 16; i++) 6334 { 6335 search_results_t *ps_search_results; 6336 S32 pred_lx; 6337 ps_search_results = &ps_ctxt->as_search_results_16x16[i]; 6338 i4_x = (S32)gau1_encode_to_raster_x[i]; 6339 i4_y = (S32)gau1_encode_to_raster_y[i]; 6340 i4_x <<= 4; 6341 i4_y <<= 4; 6342 6343 hme_init_search_results( 6344 ps_search_results, 6345 i4_num_pred_dir, 6346 ps_refine_prms->i4_num_fpel_results, 6347 ps_refine_prms->i4_num_results_per_part, 6348 e_search_blk_size, 6349 i4_x, 6350 i4_y, 6351 &ps_ctxt->au1_is_past[0]); 6352 6353 for(pred_lx = 0; pred_lx < 2; pred_lx++) 6354 { 6355 pred_ctxt_t *ps_pred_ctxt; 6356 6357 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx]; 6358 6359 hme_init_pred_ctxt_encode( 6360 ps_pred_ctxt, 6361 ps_search_results, 6362 ps_search_candts[ai4_id_coloc[0]].ps_search_node, 6363 ps_search_candts[ai4_id_Z[0]].ps_search_node, 6364 aps_mv_grid[pred_lx], 6365 pred_lx, 6366 lambda_recon, 6367 ps_refine_prms->lambda_q_shift, 6368 &ps_ctxt->apu1_ref_bits_tlu_lc[0], 6369 &ps_ctxt->ai2_ref_scf[0]); 6370 } 6371 } 6372 6373 for(i = 0; i < 4; i++) 6374 { 6375 search_results_t *ps_search_results; 6376 S32 pred_lx; 6377 ps_search_results = &ps_ctxt->as_search_results_32x32[i]; 6378 6379 i4_x = (S32)gau1_encode_to_raster_x[i]; 6380 i4_y = (S32)gau1_encode_to_raster_y[i]; 6381 i4_x <<= 5; 6382 i4_y <<= 5; 6383 6384 hme_init_search_results( 6385 ps_search_results, 6386 i4_num_pred_dir, 6387 ps_refine_prms->i4_num_32x32_merge_results, 6388 ps_refine_prms->i4_num_results_per_part, 6389 BLK_32x32, 6390 i4_x, 6391 i4_y, 6392 &ps_ctxt->au1_is_past[0]); 6393 6394 for(pred_lx = 0; pred_lx < 2; pred_lx++) 6395 { 6396 pred_ctxt_t *ps_pred_ctxt; 6397 6398 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx]; 6399 6400 hme_init_pred_ctxt_encode( 6401 ps_pred_ctxt, 6402 ps_search_results, 6403 ps_search_candts[ai4_id_coloc[0]].ps_search_node, 6404 ps_search_candts[ai4_id_Z[0]].ps_search_node, 6405 aps_mv_grid[pred_lx], 6406 pred_lx, 6407 lambda_recon, 6408 ps_refine_prms->lambda_q_shift, 6409 &ps_ctxt->apu1_ref_bits_tlu_lc[0], 6410 &ps_ctxt->ai2_ref_scf[0]); 6411 } 6412 } 6413 6414 { 6415 search_results_t *ps_search_results; 6416 S32 pred_lx; 6417 ps_search_results = &ps_ctxt->s_search_results_64x64; 6418 6419 hme_init_search_results( 6420 ps_search_results, 6421 i4_num_pred_dir, 6422 ps_refine_prms->i4_num_64x64_merge_results, 6423 ps_refine_prms->i4_num_results_per_part, 6424 BLK_64x64, 6425 0, 6426 0, 6427 &ps_ctxt->au1_is_past[0]); 6428 6429 for(pred_lx = 0; pred_lx < 2; pred_lx++) 6430 { 6431 pred_ctxt_t *ps_pred_ctxt; 6432 6433 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx]; 6434 6435 hme_init_pred_ctxt_encode( 6436 ps_pred_ctxt, 6437 ps_search_results, 6438 ps_search_candts[ai4_id_coloc[0]].ps_search_node, 6439 ps_search_candts[ai4_id_Z[0]].ps_search_node, 6440 aps_mv_grid[pred_lx], 6441 pred_lx, 6442 lambda_recon, 6443 ps_refine_prms->lambda_q_shift, 6444 &ps_ctxt->apu1_ref_bits_tlu_lc[0], 6445 &ps_ctxt->ai2_ref_scf[0]); 6446 } 6447 } 6448 } 6449 6450 /* Initialise the structure used in clustering */ 6451 if(ME_PRISTINE_QUALITY == e_me_quality_presets) 6452 { 6453 ps_ctb_cluster_info = ps_ctxt->ps_ctb_cluster_info; 6454 6455 ps_ctb_cluster_info->ps_16x16_blk = ps_ctxt->ps_blk_16x16; 6456 ps_ctb_cluster_info->ps_32x32_blk = ps_ctxt->ps_blk_32x32; 6457 ps_ctb_cluster_info->ps_64x64_blk = ps_ctxt->ps_blk_64x64; 6458 ps_ctb_cluster_info->pi4_blk_8x8_mask = ai4_blk_8x8_mask; 6459 ps_ctb_cluster_info->sdi_threshold = ps_refine_prms->sdi_threshold; 6460 ps_ctb_cluster_info->i4_frame_qstep = ps_ctxt->frm_qstep; 6461 ps_ctb_cluster_info->i4_frame_qstep_multiplier = 16; 6462 } 6463 6464 /*********************************************************************/ 6465 /* Initialize the dyn. search range params. for each reference index */ 6466 /* in current layer ctxt */ 6467 /*********************************************************************/ 6468 6469 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ 6470 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) 6471 { 6472 WORD32 ref_ctr; 6473 /* set no. of act ref in L0 for further use at frame level */ 6474 ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0 = 6475 ps_ctxt->s_frm_prms.u1_num_active_ref_l0; 6476 6477 for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++) 6478 { 6479 INIT_DYN_SEARCH_PRMS( 6480 &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[ref_ctr], 6481 ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]); 6482 } 6483 } 6484 /*************************************************************************/ 6485 /* Now that the candidates have been ordered, to choose the right number */ 6486 /* of initial candidates. */ 6487 /*************************************************************************/ 6488 if(blk_4x4_to_16x16) 6489 { 6490 if(i4_num_ref_prev_layer > 2) 6491 { 6492 if(e_search_complexity == SEARCH_CX_LOW) 6493 num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); 6494 else if(e_search_complexity == SEARCH_CX_MED) 6495 num_init_candts = 14 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); 6496 else if(e_search_complexity == SEARCH_CX_HIGH) 6497 num_init_candts = 21 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); 6498 else 6499 ASSERT(0); 6500 } 6501 else if(i4_num_ref_prev_layer == 2) 6502 { 6503 if(e_search_complexity == SEARCH_CX_LOW) 6504 num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); 6505 else if(e_search_complexity == SEARCH_CX_MED) 6506 num_init_candts = 12 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); 6507 else if(e_search_complexity == SEARCH_CX_HIGH) 6508 num_init_candts = 19 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); 6509 else 6510 ASSERT(0); 6511 } 6512 else 6513 { 6514 if(e_search_complexity == SEARCH_CX_LOW) 6515 num_init_candts = 5; 6516 else if(e_search_complexity == SEARCH_CX_MED) 6517 num_init_candts = 12; 6518 else if(e_search_complexity == SEARCH_CX_HIGH) 6519 num_init_candts = 19; 6520 else 6521 ASSERT(0); 6522 } 6523 } 6524 else 6525 { 6526 if(i4_num_ref_prev_layer > 2) 6527 { 6528 if(e_search_complexity == SEARCH_CX_LOW) 6529 num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); 6530 else if(e_search_complexity == SEARCH_CX_MED) 6531 num_init_candts = 13 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); 6532 else if(e_search_complexity == SEARCH_CX_HIGH) 6533 num_init_candts = 18 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); 6534 else 6535 ASSERT(0); 6536 } 6537 else if(i4_num_ref_prev_layer == 2) 6538 { 6539 if(e_search_complexity == SEARCH_CX_LOW) 6540 num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); 6541 else if(e_search_complexity == SEARCH_CX_MED) 6542 num_init_candts = 11 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); 6543 else if(e_search_complexity == SEARCH_CX_HIGH) 6544 num_init_candts = 16 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); 6545 else 6546 ASSERT(0); 6547 } 6548 else 6549 { 6550 if(e_search_complexity == SEARCH_CX_LOW) 6551 num_init_candts = 5; 6552 else if(e_search_complexity == SEARCH_CX_MED) 6553 num_init_candts = 11; 6554 else if(e_search_complexity == SEARCH_CX_HIGH) 6555 num_init_candts = 16; 6556 else 6557 ASSERT(0); 6558 } 6559 } 6560 6561 /*************************************************************************/ 6562 /* The following search parameters are fixed throughout the search across*/ 6563 /* all blks. So these are configured outside processing loop */ 6564 /*************************************************************************/ 6565 s_search_prms_blk.i4_num_init_candts = num_init_candts; 6566 s_search_prms_blk.i4_start_step = 1; 6567 s_search_prms_blk.i4_use_satd = 0; 6568 s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel; 6569 /* we use recon only for encoded layers, otherwise it is not available */ 6570 s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel; 6571 6572 s_search_prms_blk.ps_search_candts = ps_search_candts; 6573 if(s_search_prms_blk.i4_use_rec) 6574 { 6575 WORD32 ref_ctr; 6576 for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++) 6577 s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_rec[ref_ctr]; 6578 } 6579 else 6580 { 6581 WORD32 ref_ctr; 6582 for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++) 6583 s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_inp[ref_ctr]; 6584 } 6585 6586 /*************************************************************************/ 6587 /* Initialize coordinates. Meaning as follows */ 6588 /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks */ 6589 /* blk_y : same as above, y coord. */ 6590 /* num_blks_in_this_ctb : number of blks in this given ctb that starts */ 6591 /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries. */ 6592 /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left */ 6593 /* corner of the picture. Always multiple of 64. */ 6594 /* blk_id_in_ctb : encode order id of the blk in the ctb. */ 6595 /*************************************************************************/ 6596 blk_y = 0; 6597 blk_id_in_ctb = 0; 6598 i4_ctb_y = 0; 6599 6600 /*************************************************************************/ 6601 /* Picture limit on all 4 sides. This will be used to set mv limits for */ 6602 /* every block given its coordinate. Note thsi assumes that the min amt */ 6603 /* of padding to right of pic is equal to the blk size. If we go all the */ 6604 /* way upto 64x64, then the min padding on right size of picture should */ 6605 /* be 64, and also on bottom side of picture. */ 6606 /*************************************************************************/ 6607 SET_PIC_LIMIT( 6608 s_pic_limit_inp, 6609 ps_curr_layer->i4_pad_x_rec, 6610 ps_curr_layer->i4_pad_y_rec, 6611 ps_curr_layer->i4_wd, 6612 ps_curr_layer->i4_ht, 6613 s_search_prms_blk.i4_num_steps_post_refine); 6614 6615 SET_PIC_LIMIT( 6616 s_pic_limit_rec, 6617 ps_curr_layer->i4_pad_x_rec, 6618 ps_curr_layer->i4_pad_y_rec, 6619 ps_curr_layer->i4_wd, 6620 ps_curr_layer->i4_ht, 6621 s_search_prms_blk.i4_num_steps_post_refine); 6622 6623 /*************************************************************************/ 6624 /* set the MV limit per ref. pic. */ 6625 /* - P pic. : Based on the config params. */ 6626 /* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */ 6627 /*************************************************************************/ 6628 hme_set_mv_limit_using_dvsr_data( 6629 ps_ctxt, ps_curr_layer, as_mv_limit, &i2_prev_enc_frm_max_mv_y, num_act_ref_pics); 6630 s_srch_cand_init_data.pu1_num_fpel_search_cands = ps_refine_prms->au1_num_fpel_search_cands; 6631 s_srch_cand_init_data.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0; 6632 s_srch_cand_init_data.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1; 6633 s_srch_cand_init_data.ps_coarse_layer = ps_coarse_layer; 6634 s_srch_cand_init_data.ps_curr_layer = ps_curr_layer; 6635 s_srch_cand_init_data.i4_max_num_init_cands = num_init_candts; 6636 s_srch_cand_init_data.ps_search_cands = ps_search_candts; 6637 s_srch_cand_init_data.u1_num_results_in_mvbank = s_mv_update_prms.i4_num_results_to_store; 6638 s_srch_cand_init_data.pi4_ref_id_lc_to_l0_map = ps_ctxt->a_ref_idx_lc_to_l0; 6639 s_srch_cand_init_data.pi4_ref_id_lc_to_l1_map = ps_ctxt->a_ref_idx_lc_to_l1; 6640 s_srch_cand_init_data.e_search_blk_size = e_search_blk_size; 6641 6642 while(0 == end_of_frame) 6643 { 6644 job_queue_t *ps_job; 6645 frm_ctb_ctxt_t *ps_frm_ctb_prms; 6646 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb; 6647 6648 WORD32 i4_max_mv_x_in_ctb; 6649 WORD32 i4_max_mv_y_in_ctb; 6650 void *pv_dep_mngr_encloop_dep_me; 6651 WORD32 offset_val, check_dep_pos, set_dep_pos; 6652 WORD32 left_ctb_in_diff_tile, i4_first_ctb_x = 0; 6653 6654 pv_dep_mngr_encloop_dep_me = ps_ctxt->pv_dep_mngr_encloop_dep_me; 6655 6656 ps_frm_ctb_prms = (frm_ctb_ctxt_t *)ps_thrd_ctxt->pv_ext_frm_prms; 6657 6658 /* Get the current row from the job queue */ 6659 ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job( 6660 ps_multi_thrd_ctxt, lyr_job_type, 1, me_frm_id); 6661 6662 /* If all rows are done, set the end of process flag to 1, */ 6663 /* and the current row to -1 */ 6664 if(NULL == ps_job) 6665 { 6666 blk_y = -1; 6667 i4_ctb_y = -1; 6668 tile_col_idx = -1; 6669 end_of_frame = 1; 6670 6671 continue; 6672 } 6673 6674 /* set the output dependency after picking up the row */ 6675 ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, me_frm_id); 6676 6677 /* Obtain the current row's details from the job */ 6678 { 6679 ihevce_tile_params_t *ps_col_tile_params; 6680 6681 i4_ctb_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no; 6682 /* Obtain the current colum tile index from the job */ 6683 tile_col_idx = ps_job->s_job_info.s_me_job_info.i4_tile_col_idx; 6684 6685 /* in encode layer block are 16x16 and CTB is 64 x 64 */ 6686 /* note if ctb is 32x32 the this calc needs to be changed */ 6687 num_sync_units_in_row = (i4_pic_wd + ((1 << ps_ctxt->log_ctb_size) - 1)) >> 6688 ps_ctxt->log_ctb_size; 6689 6690 /* The tile parameter for the col. idx. Use only the properties 6691 which is same for all the bottom tiles like width, start_x, etc. 6692 Don't use height, start_y, etc. */ 6693 ps_col_tile_params = 6694 ((ihevce_tile_params_t *)ps_thrd_ctxt->pv_tile_params_base + tile_col_idx); 6695 /* in encode layer block are 16x16 and CTB is 64 x 64 */ 6696 /* note if ctb is 32x32 the this calc needs to be changed */ 6697 num_sync_units_in_tile = 6698 (ps_col_tile_params->i4_curr_tile_width + ((1 << ps_ctxt->log_ctb_size) - 1)) >> 6699 ps_ctxt->log_ctb_size; 6700 6701 i4_first_ctb_x = ps_col_tile_params->i4_first_ctb_x; 6702 i4_ctb_x = i4_first_ctb_x; 6703 6704 if(!num_act_ref_pics) 6705 { 6706 for(i4_ctb_x = i4_first_ctb_x; 6707 i4_ctb_x < (ps_col_tile_params->i4_first_ctb_x + num_sync_units_in_tile); 6708 i4_ctb_x++) 6709 { 6710 S32 blk_i = 0, blk_j = 0; 6711 /* set the dependency for the corresponding row in enc loop */ 6712 ihevce_dmgr_set_row_row_sync( 6713 pv_dep_mngr_encloop_dep_me, 6714 (i4_ctb_x + 1), 6715 i4_ctb_y, 6716 tile_col_idx /* Col Tile No. */); 6717 } 6718 6719 continue; 6720 } 6721 6722 /* increment the number of rows proc */ 6723 num_rows_proc++; 6724 6725 /* Set Variables for Dep. Checking and Setting */ 6726 set_dep_pos = i4_ctb_y + 1; 6727 if(i4_ctb_y > 0) 6728 { 6729 offset_val = 2; 6730 check_dep_pos = i4_ctb_y - 1; 6731 } 6732 else 6733 { 6734 /* First row should run without waiting */ 6735 offset_val = -1; 6736 check_dep_pos = 0; 6737 } 6738 6739 /* row ctb out pointer */ 6740 ps_ctxt->ps_ctb_analyse_curr_row = 6741 ps_ctxt->ps_ctb_analyse_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz; 6742 6743 /* Row level CU Tree buffer */ 6744 ps_ctxt->ps_cu_tree_curr_row = 6745 ps_ctxt->ps_cu_tree_base + 6746 i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE; 6747 6748 ps_ctxt->ps_me_ctb_data_curr_row = 6749 ps_ctxt->ps_me_ctb_data_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz; 6750 } 6751 6752 /* This flag says the CTB under processing is at the start of tile in horz dir.*/ 6753 left_ctb_in_diff_tile = 1; 6754 6755 /* To make sure no 64-bit overflow happens when inv_wt is multiplied with un-normalized src_var, */ 6756 /* the shift value will be passed onto the functions wherever inv_wt isused so that inv_wt is appropriately shift and multiplied */ 6757 { 6758 S32 i4_ref_id, i4_bits_req; 6759 6760 for(i4_ref_id = 0; i4_ref_id < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + 6761 ps_ctxt->s_frm_prms.u1_num_active_ref_l1); 6762 i4_ref_id++) 6763 { 6764 GETRANGE(i4_bits_req, ps_ctxt->s_wt_pred.a_inv_wpred_wt[i4_ref_id]); 6765 6766 if(i4_bits_req > 12) 6767 { 6768 ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = (i4_bits_req - 12); 6769 } 6770 else 6771 { 6772 ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = 0; 6773 } 6774 } 6775 6776 s_common_frm_prms.pi4_inv_wt_shift_val = ps_ctxt->s_wt_pred.ai4_shift_val; 6777 } 6778 6779 /* if non-encode layer then i4_ctb_x will be same as blk_x */ 6780 /* loop over all the units is a row */ 6781 for(i4_ctb_x = i4_first_ctb_x; i4_ctb_x < (i4_first_ctb_x + num_sync_units_in_tile); 6782 i4_ctb_x++) 6783 { 6784 ihevce_ctb_noise_params *ps_ctb_noise_params = 6785 &ps_ctxt->ps_ctb_analyse_curr_row[i4_ctb_x].s_ctb_noise_params; 6786 6787 s_common_frm_prms.i4_ctb_x_off = i4_ctb_x << 6; 6788 s_common_frm_prms.i4_ctb_y_off = i4_ctb_y << 6; 6789 6790 ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = i4_ctb_y << 6; 6791 ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = i4_ctb_x << 6; 6792 /* Initialize ptr to current IPE CTB */ 6793 ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x + 6794 i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz; 6795 { 6796 ps_ctb_bound_attrs = 6797 get_ctb_attrs(i4_ctb_x << 6, i4_ctb_y << 6, i4_pic_wd, i4_pic_ht, ps_ctxt); 6798 6799 en_merge_32x32 = ps_ctb_bound_attrs->u1_merge_to_32x32_flag; 6800 num_blks_in_this_ctb = ps_ctb_bound_attrs->u1_num_blks_in_ctb; 6801 } 6802 6803 /* Block to initialise pointers to part_type_results_t */ 6804 /* in each size-specific inter_cu_results_t */ 6805 { 6806 WORD32 i; 6807 6808 for(i = 0; i < 64; i++) 6809 { 6810 ps_ctxt->as_cu8x8_results[i].ps_best_results = 6811 ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x] 6812 .as_8x8_block_data[i] 6813 .as_best_results; 6814 ps_ctxt->as_cu8x8_results[i].u1_num_best_results = 0; 6815 } 6816 6817 for(i = 0; i < 16; i++) 6818 { 6819 ps_ctxt->as_cu16x16_results[i].ps_best_results = 6820 ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].as_block_data[i].as_best_results; 6821 ps_ctxt->as_cu16x16_results[i].u1_num_best_results = 0; 6822 } 6823 6824 for(i = 0; i < 4; i++) 6825 { 6826 ps_ctxt->as_cu32x32_results[i].ps_best_results = 6827 ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x] 6828 .as_32x32_block_data[i] 6829 .as_best_results; 6830 ps_ctxt->as_cu32x32_results[i].u1_num_best_results = 0; 6831 } 6832 6833 ps_ctxt->s_cu64x64_results.ps_best_results = 6834 ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].s_64x64_block_data.as_best_results; 6835 ps_ctxt->s_cu64x64_results.u1_num_best_results = 0; 6836 } 6837 6838 if(ME_PRISTINE_QUALITY == e_me_quality_presets) 6839 { 6840 ps_ctb_cluster_info->blk_32x32_mask = en_merge_32x32; 6841 ps_ctb_cluster_info->ps_cur_ipe_ctb = ps_cur_ipe_ctb; 6842 ps_ctb_cluster_info->ps_cu_tree_root = 6843 ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE); 6844 ps_ctb_cluster_info->nodes_created_in_cu_tree = 1; 6845 } 6846 6847 if(ME_PRISTINE_QUALITY != e_me_quality_presets) 6848 { 6849 S32 i4_nodes_created_in_cu_tree = 1; 6850 6851 ihevce_cu_tree_init( 6852 (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)), 6853 (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)), 6854 &i4_nodes_created_in_cu_tree, 6855 0, 6856 POS_NA, 6857 POS_NA, 6858 POS_NA); 6859 } 6860 6861 memset(ai4_blk_8x8_mask, 0, 16 * sizeof(S32)); 6862 6863 if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb) 6864 { 6865 S32 j; 6866 6867 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb; 6868 6869 ps_cur_ipe_ctb = 6870 ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x + i4_ctb_y * num_sync_units_in_row; 6871 lambda_recon = 6872 hme_recompute_lambda_from_min_8x8_act_in_ctb(ps_ctxt, ps_cur_ipe_ctb); 6873 6874 lambda_recon = ((float)lambda_recon * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f); 6875 6876 for(i = 0; i < 4; i++) 6877 { 6878 ps_search_results = &ps_ctxt->as_search_results_32x32[i]; 6879 6880 for(j = 0; j < 2; j++) 6881 { 6882 ps_search_results->as_pred_ctxt[j].lambda = lambda_recon; 6883 } 6884 } 6885 ps_search_results = &ps_ctxt->s_search_results_64x64; 6886 6887 for(j = 0; j < 2; j++) 6888 { 6889 ps_search_results->as_pred_ctxt[j].lambda = lambda_recon; 6890 } 6891 6892 s_common_frm_prms.i4_lamda = lambda_recon; 6893 } 6894 else 6895 { 6896 lambda_recon = ps_refine_prms->lambda_recon; 6897 } 6898 6899 /*********************************************************************/ 6900 /* replicate the inp buffer at blk or ctb level for each ref id, */ 6901 /* Instead of searching with wk * ref(k), we search with Ik = I / wk */ 6902 /* thereby avoiding a bloat up of memory. If we did all references */ 6903 /* weighted pred, we will end up with a duplicate copy of each ref */ 6904 /* at each layer, since we need to preserve the original reference. */ 6905 /* ToDo: Need to observe performance with this mechanism and compare */ 6906 /* with case where ref is weighted. */ 6907 /*********************************************************************/ 6908 fp_get_wt_inp( 6909 ps_curr_layer, 6910 &ps_ctxt->s_wt_pred, 6911 unit_size, 6912 s_common_frm_prms.i4_ctb_x_off, 6913 s_common_frm_prms.i4_ctb_y_off, 6914 unit_size, 6915 ps_ctxt->num_ref_future + ps_ctxt->num_ref_past, 6916 ps_ctxt->i4_wt_pred_enable_flag); 6917 6918 if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled) 6919 { 6920 #if TEMPORAL_NOISE_DETECT 6921 { 6922 WORD32 had_block_size = 16; 6923 WORD32 ctb_width = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64) 6924 ? 64 6925 : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off; 6926 WORD32 ctb_height = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64) 6927 ? 64 6928 : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off; 6929 WORD32 num_pred_dir = i4_num_pred_dir; 6930 WORD32 i4_x_off = s_common_frm_prms.i4_ctb_x_off; 6931 WORD32 i4_y_off = s_common_frm_prms.i4_ctb_y_off; 6932 6933 WORD32 i; 6934 WORD32 noise_detected; 6935 WORD32 ctb_size; 6936 WORD32 num_comp_had_blocks; 6937 WORD32 noisy_block_cnt; 6938 WORD32 index_8x8_block; 6939 WORD32 num_8x8_in_ctb_row; 6940 6941 WORD32 ht_offset; 6942 WORD32 wd_offset; 6943 WORD32 block_ht; 6944 WORD32 block_wd; 6945 6946 WORD32 num_horz_blocks; 6947 WORD32 num_vert_blocks; 6948 6949 WORD32 mean; 6950 UWORD32 variance_8x8; 6951 6952 WORD32 hh_energy_percent; 6953 6954 /* variables to hold the constant values. The variable values held are decided by the HAD block size */ 6955 WORD32 min_noisy_block_cnt; 6956 WORD32 min_coeffs_above_avg; 6957 WORD32 min_coeff_avg_energy; 6958 6959 /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */ 6960 WORD32 i4_cu_x_off, i4_cu_y_off; 6961 WORD32 is_noisy; 6962 6963 /* intialise the variables holding the constants */ 6964 if(had_block_size == 8) 6965 { 6966 min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_8x8; //6;// 6967 min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_8x8; 6968 min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_8x8; 6969 } 6970 else 6971 { 6972 min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_16x16; //7;// 6973 min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_16x16; 6974 min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_16x16; 6975 } 6976 6977 /* initialize the variables */ 6978 noise_detected = 0; 6979 noisy_block_cnt = 0; 6980 hh_energy_percent = 0; 6981 variance_8x8 = 0; 6982 block_ht = ctb_height; 6983 block_wd = ctb_width; 6984 6985 mean = 0; 6986 6987 ctb_size = block_ht * block_wd; //ctb_width * ctb_height; 6988 num_comp_had_blocks = ctb_size / (had_block_size * had_block_size); 6989 6990 num_horz_blocks = block_wd / had_block_size; //ctb_width / had_block_size; 6991 num_vert_blocks = block_ht / had_block_size; //ctb_height / had_block_size; 6992 6993 ht_offset = -had_block_size; 6994 wd_offset = -had_block_size; 6995 6996 num_8x8_in_ctb_row = block_wd / 8; // number of 8x8 in this ctb 6997 for(i = 0; i < num_comp_had_blocks; i++) 6998 { 6999 if(i % num_horz_blocks == 0) 7000 { 7001 wd_offset = -had_block_size; 7002 ht_offset += had_block_size; 7003 } 7004 wd_offset += had_block_size; 7005 7006 /* CU level offsets */ 7007 i4_cu_x_off = i4_x_off + (i % 4) * 16; //+ (i % 4) * 16 7008 i4_cu_y_off = i4_y_off + (i / 4) * 16; 7009 7010 /* if 50 % or more of the CU is noisy then the return value is 1 */ 7011 is_noisy = ihevce_determine_cu_noise_based_on_8x8Blk_data( 7012 ps_ctb_noise_params->au1_is_8x8Blk_noisy, 7013 (i % 4) * 16, 7014 (i / 4) * 16, 7015 16); 7016 7017 /* only if the CU is noisy then check the temporal noise detect call is made on the CU */ 7018 if(is_noisy) 7019 { 7020 index_8x8_block = (i / num_horz_blocks) * 2 * num_8x8_in_ctb_row + 7021 (i % num_horz_blocks) * 2; 7022 noisy_block_cnt += ihevce_16x16block_temporal_noise_detect( 7023 16, 7024 ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64) 7025 ? 64 7026 : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off, 7027 ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64) 7028 ? 64 7029 : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off, 7030 ps_ctb_noise_params, 7031 &s_srch_cand_init_data, 7032 &s_search_prms_blk, 7033 ps_ctxt, 7034 num_pred_dir, 7035 i4_num_act_ref_l0, 7036 i4_num_act_ref_l1, 7037 i4_cu_x_off, 7038 i4_cu_y_off, 7039 &ps_ctxt->s_wt_pred, 7040 unit_size, 7041 index_8x8_block, 7042 num_horz_blocks, 7043 /*num_8x8_in_ctb_row*/ 8, // this should be a variable extra 7044 i); 7045 } /* if 16x16 is noisy */ 7046 } /* loop over for all 16x16*/ 7047 7048 if(noisy_block_cnt >= min_noisy_block_cnt) 7049 { 7050 noise_detected = 1; 7051 } 7052 7053 /* write back the noise presence detected for the current CTB to the structure */ 7054 ps_ctb_noise_params->i4_noise_present = noise_detected; 7055 } 7056 #endif 7057 7058 #if EVERYWHERE_NOISY && USE_NOISE_TERM_IN_L0_ME 7059 if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled && 7060 ps_ctb_noise_params->i4_noise_present) 7061 { 7062 memset( 7063 ps_ctb_noise_params->au1_is_8x8Blk_noisy, 7064 1, 7065 sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy)); 7066 } 7067 #endif 7068 7069 for(i = 0; i < 16; i++) 7070 { 7071 au1_is_16x16Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data( 7072 ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 4) * 16, (i / 4) * 16, 16); 7073 } 7074 7075 for(i = 0; i < 4; i++) 7076 { 7077 au1_is_32x32Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data( 7078 ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 2) * 32, (i / 2) * 32, 32); 7079 } 7080 7081 for(i = 0; i < 1; i++) 7082 { 7083 au1_is_64x64Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data( 7084 ps_ctb_noise_params->au1_is_8x8Blk_noisy, 0, 0, 64); 7085 } 7086 7087 if(ps_ctxt->s_frm_prms.bidir_enabled && 7088 (ps_ctxt->s_frm_prms.i4_temporal_layer_id <= 7089 MAX_LAYER_ID_OF_B_PICS_WITHOUT_NOISE_DETECTION)) 7090 { 7091 ps_ctb_noise_params->i4_noise_present = 0; 7092 memset( 7093 ps_ctb_noise_params->au1_is_8x8Blk_noisy, 7094 0, 7095 sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy)); 7096 } 7097 7098 #if ME_LAMBDA_DISCOUNT_WHEN_NOISY 7099 for(i = 0; i < 4; i++) 7100 { 7101 S32 j; 7102 S32 lambda; 7103 7104 if(au1_is_32x32Blk_noisy[i]) 7105 { 7106 lambda = lambda_recon; 7107 lambda = 7108 ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f); 7109 7110 ps_search_results = &ps_ctxt->as_search_results_32x32[i]; 7111 7112 for(j = 0; j < 2; j++) 7113 { 7114 ps_search_results->as_pred_ctxt[j].lambda = lambda; 7115 } 7116 } 7117 } 7118 7119 { 7120 S32 j; 7121 S32 lambda; 7122 7123 if(au1_is_64x64Blk_noisy[0]) 7124 { 7125 lambda = lambda_recon; 7126 lambda = 7127 ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f); 7128 7129 ps_search_results = &ps_ctxt->s_search_results_64x64; 7130 7131 for(j = 0; j < 2; j++) 7132 { 7133 ps_search_results->as_pred_ctxt[j].lambda = lambda; 7134 } 7135 } 7136 } 7137 #endif 7138 if(au1_is_64x64Blk_noisy[0]) 7139 { 7140 U08 *pu1_inp = ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off + 7141 (s_common_frm_prms.i4_ctb_y_off * 7142 ps_curr_layer->i4_inp_stride)); 7143 7144 hme_compute_sigmaX_and_sigmaXSquared( 7145 pu1_inp, 7146 ps_curr_layer->i4_inp_stride, 7147 ps_ctxt->au4_4x4_src_sigmaX, 7148 ps_ctxt->au4_4x4_src_sigmaXSquared, 7149 4, 7150 4, 7151 64, 7152 64, 7153 1, 7154 16); 7155 } 7156 else 7157 { 7158 for(i = 0; i < 4; i++) 7159 { 7160 if(au1_is_32x32Blk_noisy[i]) 7161 { 7162 U08 *pu1_inp = 7163 ps_curr_layer->pu1_inp + 7164 (s_common_frm_prms.i4_ctb_x_off + 7165 (s_common_frm_prms.i4_ctb_y_off * ps_curr_layer->i4_inp_stride)); 7166 7167 U08 u1_cu_size = 32; 7168 WORD32 i4_inp_buf_offset = 7169 (((i / 2) * (u1_cu_size * ps_curr_layer->i4_inp_stride)) + 7170 ((i % 2) * u1_cu_size)); 7171 7172 U16 u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb = 128; 7173 U16 u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb = 8; 7174 S32 i4_sigma_arr_offset = 7175 (((i / 2) * u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb) + 7176 ((i % 2) * u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb)); 7177 7178 hme_compute_sigmaX_and_sigmaXSquared( 7179 pu1_inp + i4_inp_buf_offset, 7180 ps_curr_layer->i4_inp_stride, 7181 ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset, 7182 ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset, 7183 4, 7184 4, 7185 32, 7186 32, 7187 1, 7188 16); 7189 } 7190 else 7191 { 7192 S32 j; 7193 7194 U08 u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb = 8; 7195 U08 u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb = 2; 7196 S32 i4_16x16_blk_start_index_in_i_th_32x32_blk = 7197 (((i / 2) * u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb) + 7198 ((i % 2) * u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb)); 7199 7200 for(j = 0; j < 4; j++) 7201 { 7202 U08 u1_3rd_16x16_blk_index_in_32x32_blk = 4; 7203 U08 u1_2nd_16x16_blk_index_in_32x32_blk = 1; 7204 S32 i4_16x16_blk_index_in_ctb = 7205 i4_16x16_blk_start_index_in_i_th_32x32_blk + 7206 ((j % 2) * u1_2nd_16x16_blk_index_in_32x32_blk) + 7207 ((j / 2) * u1_3rd_16x16_blk_index_in_32x32_blk); 7208 7209 //S32 k = (((i / 2) * 8) + ((i % 2) * 2)) + ((j % 2) * 1) + ((j / 2) * 4); 7210 7211 if(au1_is_16x16Blk_noisy[i4_16x16_blk_index_in_ctb]) 7212 { 7213 U08 *pu1_inp = 7214 ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off + 7215 (s_common_frm_prms.i4_ctb_y_off * 7216 ps_curr_layer->i4_inp_stride)); 7217 7218 U08 u1_cu_size = 16; 7219 WORD32 i4_inp_buf_offset = 7220 (((i4_16x16_blk_index_in_ctb % 4) * u1_cu_size) + 7221 ((i4_16x16_blk_index_in_ctb / 4) * 7222 (u1_cu_size * ps_curr_layer->i4_inp_stride))); 7223 7224 U16 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk = 64; 7225 U16 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk = 4; 7226 S32 i4_sigma_arr_offset = 7227 (((i4_16x16_blk_index_in_ctb % 4) * 7228 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk) + 7229 ((i4_16x16_blk_index_in_ctb / 4) * 7230 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk)); 7231 7232 hme_compute_sigmaX_and_sigmaXSquared( 7233 pu1_inp + i4_inp_buf_offset, 7234 ps_curr_layer->i4_inp_stride, 7235 (ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset), 7236 (ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset), 7237 4, 7238 4, 7239 16, 7240 16, 7241 1, 7242 16); 7243 } 7244 } 7245 } 7246 } 7247 } 7248 } 7249 else 7250 { 7251 memset(au1_is_16x16Blk_noisy, 0, sizeof(au1_is_16x16Blk_noisy)); 7252 7253 memset(au1_is_32x32Blk_noisy, 0, sizeof(au1_is_32x32Blk_noisy)); 7254 7255 memset(au1_is_64x64Blk_noisy, 0, sizeof(au1_is_64x64Blk_noisy)); 7256 } 7257 7258 for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++) 7259 { 7260 S32 ref_ctr; 7261 U08 au1_pred_dir_searched[2]; 7262 U08 u1_is_cu_noisy; 7263 ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17]; 7264 7265 { 7266 blk_x = (i4_ctb_x << 2) + 7267 (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_x); 7268 blk_y = (i4_ctb_y << 2) + 7269 (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_y); 7270 7271 blk_id_in_full_ctb = 7272 ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_id_in_full_ctb; 7273 blk_8x8_mask = ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_8x8_mask; 7274 ai4_blk_8x8_mask[blk_id_in_full_ctb] = blk_8x8_mask; 7275 s_search_prms_blk.i4_cu_x_off = (blk_x << blk_size_shift) - (i4_ctb_x << 6); 7276 s_search_prms_blk.i4_cu_y_off = (blk_y << blk_size_shift) - (i4_ctb_y << 6); 7277 } 7278 7279 /* get the current input blk point */ 7280 pos_x = blk_x << blk_size_shift; 7281 pos_y = blk_y << blk_size_shift; 7282 pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride); 7283 7284 /*********************************************************************/ 7285 /* For every blk in the picture, the search range needs to be derived*/ 7286 /* Any blk can have any mv, but practical search constraints are */ 7287 /* imposed by the picture boundary and amt of padding. */ 7288 /*********************************************************************/ 7289 /* MV limit is different based on ref. PIC */ 7290 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) 7291 { 7292 if(!s_search_prms_blk.i4_use_rec) 7293 { 7294 hme_derive_search_range( 7295 &as_range_prms_inp[ref_ctr], 7296 &s_pic_limit_inp, 7297 &as_mv_limit[ref_ctr], 7298 pos_x, 7299 pos_y, 7300 blk_wd, 7301 blk_ht); 7302 } 7303 else 7304 { 7305 hme_derive_search_range( 7306 &as_range_prms_rec[ref_ctr], 7307 &s_pic_limit_rec, 7308 &as_mv_limit[ref_ctr], 7309 pos_x, 7310 pos_y, 7311 blk_wd, 7312 blk_ht); 7313 } 7314 } 7315 s_search_prms_blk.i4_x_off = blk_x << blk_size_shift; 7316 s_search_prms_blk.i4_y_off = blk_y << blk_size_shift; 7317 /* Select search results from a suitable search result in the context */ 7318 { 7319 ps_search_results = &ps_ctxt->as_search_results_16x16[blk_id_in_full_ctb]; 7320 7321 if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb) 7322 { 7323 S32 i; 7324 7325 for(i = 0; i < 2; i++) 7326 { 7327 ps_search_results->as_pred_ctxt[i].lambda = lambda_recon; 7328 } 7329 } 7330 } 7331 7332 u1_is_cu_noisy = au1_is_16x16Blk_noisy 7333 [(s_search_prms_blk.i4_cu_x_off >> 4) + (s_search_prms_blk.i4_cu_y_off >> 2)]; 7334 7335 s_subpel_prms.u1_is_cu_noisy = u1_is_cu_noisy; 7336 7337 #if ME_LAMBDA_DISCOUNT_WHEN_NOISY 7338 if(u1_is_cu_noisy) 7339 { 7340 S32 j; 7341 S32 lambda; 7342 7343 lambda = lambda_recon; 7344 lambda = ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f); 7345 7346 for(j = 0; j < 2; j++) 7347 { 7348 ps_search_results->as_pred_ctxt[j].lambda = lambda; 7349 } 7350 } 7351 else 7352 { 7353 S32 j; 7354 S32 lambda; 7355 7356 lambda = lambda_recon; 7357 7358 for(j = 0; j < 2; j++) 7359 { 7360 ps_search_results->as_pred_ctxt[j].lambda = lambda; 7361 } 7362 } 7363 #endif 7364 7365 s_search_prms_blk.ps_search_results = ps_search_results; 7366 7367 s_search_prms_blk.i4_part_mask = hme_part_mask_populator( 7368 pu1_inp, 7369 i4_inp_stride, 7370 ps_refine_prms->limit_active_partitions, 7371 ps_ctxt->ps_hme_frm_prms->bidir_enabled, 7372 ps_ctxt->u1_is_curFrame_a_refFrame, 7373 blk_8x8_mask, 7374 e_me_quality_presets); 7375 7376 if(ME_PRISTINE_QUALITY == e_me_quality_presets) 7377 { 7378 ps_ctb_cluster_info->ai4_part_mask[blk_id_in_full_ctb] = 7379 s_search_prms_blk.i4_part_mask; 7380 } 7381 7382 /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */ 7383 { 7384 /* Setting u1_num_active_refs to 2 */ 7385 /* for the sole purpose of the */ 7386 /* function called below */ 7387 ps_search_results->u1_num_active_ref = (ps_refine_prms->bidir_enabled) ? 2 : 1; 7388 7389 hme_reset_search_results( 7390 ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL); 7391 7392 ps_search_results->u1_num_active_ref = i4_num_pred_dir; 7393 } 7394 7395 if(0 == blk_id_in_ctb) 7396 { 7397 UWORD8 u1_ctr; 7398 for(u1_ctr = 0; u1_ctr < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + 7399 ps_ctxt->s_frm_prms.u1_num_active_ref_l1); 7400 u1_ctr++) 7401 { 7402 WORD32 i4_max_dep_ctb_y; 7403 WORD32 i4_max_dep_ctb_x; 7404 7405 /* Set max mv in ctb units */ 7406 i4_max_mv_x_in_ctb = 7407 (ps_curr_layer->i2_max_mv_x + ((1 << ps_ctxt->log_ctb_size) - 1)) >> 7408 ps_ctxt->log_ctb_size; 7409 7410 i4_max_mv_y_in_ctb = 7411 (as_mv_limit[u1_ctr].i2_max_y + ((1 << ps_ctxt->log_ctb_size) - 1)) >> 7412 ps_ctxt->log_ctb_size; 7413 /********************************************************************/ 7414 /* Set max ctb_x and ctb_y dependency on reference picture */ 7415 /* Note +1 is due to delayed deblock, SAO, subpel plan dependency */ 7416 /********************************************************************/ 7417 i4_max_dep_ctb_x = CLIP3( 7418 (i4_ctb_x + i4_max_mv_x_in_ctb + 1), 7419 0, 7420 ps_frm_ctb_prms->i4_num_ctbs_horz - 1); 7421 i4_max_dep_ctb_y = CLIP3( 7422 (i4_ctb_y + i4_max_mv_y_in_ctb + 1), 7423 0, 7424 ps_frm_ctb_prms->i4_num_ctbs_vert - 1); 7425 7426 ihevce_dmgr_map_chk_sync( 7427 ps_curr_layer->ppv_dep_mngr_recon[u1_ctr], 7428 ps_ctxt->thrd_id, 7429 i4_ctb_x, 7430 i4_ctb_y, 7431 i4_max_mv_x_in_ctb, 7432 i4_max_mv_y_in_ctb); 7433 } 7434 } 7435 7436 /* Loop across different Ref IDx */ 7437 for(u1_pred_dir_ctr = 0; u1_pred_dir_ctr < i4_num_pred_dir; u1_pred_dir_ctr++) 7438 { 7439 S32 resultid; 7440 S08 u1_default_ref_id; 7441 S32 i4_num_srch_cands = 0; 7442 S32 i4_num_refinement_iterations; 7443 S32 i4_refine_iter_ctr; 7444 7445 if((i4_num_pred_dir == 2) || (!ps_ctxt->s_frm_prms.bidir_enabled) || 7446 (ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0)) 7447 { 7448 u1_pred_dir = u1_pred_dir_ctr; 7449 } 7450 else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0) 7451 { 7452 u1_pred_dir = 1; 7453 } 7454 7455 u1_default_ref_id = (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list[0] 7456 : ps_ctxt->ai1_future_list[0]; 7457 au1_pred_dir_searched[u1_pred_dir_ctr] = u1_pred_dir; 7458 7459 i4_num_srch_cands = 0; 7460 resultid = 0; 7461 7462 /* START OF NEW CTB MEANS FILL UP NEOGHBOURS IN 18x18 GRID */ 7463 if(0 == blk_id_in_ctb) 7464 { 7465 /*****************************************************************/ 7466 /* Initialize the mv grid with results of neighbours for the next*/ 7467 /* ctb. */ 7468 /*****************************************************************/ 7469 hme_fill_ctb_neighbour_mvs( 7470 ps_curr_layer, 7471 blk_x, 7472 blk_y, 7473 aps_mv_grid[u1_pred_dir], 7474 u1_pred_dir_ctr, 7475 u1_default_ref_id, 7476 ps_ctxt->s_frm_prms.u1_num_active_ref_l0); 7477 } 7478 7479 s_search_prms_blk.i1_ref_idx = u1_pred_dir; 7480 7481 { 7482 if((blk_id_in_full_ctb % 4) == 0) 7483 { 7484 ps_ctxt->as_search_results_32x32[blk_id_in_full_ctb >> 2] 7485 .as_pred_ctxt[u1_pred_dir] 7486 .proj_used = (blk_id_in_full_ctb == 8) ? 0 : 1; 7487 } 7488 7489 if(blk_id_in_full_ctb == 0) 7490 { 7491 ps_ctxt->s_search_results_64x64.as_pred_ctxt[u1_pred_dir].proj_used = 1; 7492 } 7493 7494 ps_search_results->as_pred_ctxt[u1_pred_dir].proj_used = 7495 !gau1_encode_to_raster_y[blk_id_in_full_ctb]; 7496 } 7497 7498 { 7499 S32 x = gau1_encode_to_raster_x[blk_id_in_full_ctb]; 7500 S32 y = gau1_encode_to_raster_y[blk_id_in_full_ctb]; 7501 U08 u1_is_blk_at_ctb_boundary = !y; 7502 7503 s_srch_cand_init_data.u1_is_left_available = 7504 !(left_ctb_in_diff_tile && !s_search_prms_blk.i4_cu_x_off); 7505 7506 if(u1_is_blk_at_ctb_boundary) 7507 { 7508 s_srch_cand_init_data.u1_is_topRight_available = 0; 7509 s_srch_cand_init_data.u1_is_topLeft_available = 0; 7510 s_srch_cand_init_data.u1_is_top_available = 0; 7511 } 7512 else 7513 { 7514 s_srch_cand_init_data.u1_is_topRight_available = 7515 gau1_cu_tr_valid[y][x] && ((pos_x + blk_wd) < i4_pic_wd); 7516 s_srch_cand_init_data.u1_is_top_available = 1; 7517 s_srch_cand_init_data.u1_is_topLeft_available = 7518 s_srch_cand_init_data.u1_is_left_available; 7519 } 7520 } 7521 7522 s_srch_cand_init_data.i1_default_ref_id = u1_default_ref_id; 7523 s_srch_cand_init_data.i1_alt_default_ref_id = ps_ctxt->ai1_past_list[1]; 7524 s_srch_cand_init_data.i4_pos_x = pos_x; 7525 s_srch_cand_init_data.i4_pos_y = pos_y; 7526 s_srch_cand_init_data.u1_pred_dir = u1_pred_dir; 7527 s_srch_cand_init_data.u1_pred_dir_ctr = u1_pred_dir_ctr; 7528 s_srch_cand_init_data.u1_search_candidate_list_index = 7529 au1_search_candidate_list_index[u1_pred_dir]; 7530 7531 i4_num_srch_cands = hme_populate_search_candidates(&s_srch_cand_init_data); 7532 7533 /* Note this block also clips the MV range for all candidates */ 7534 { 7535 S08 i1_check_for_mult_refs; 7536 7537 i1_check_for_mult_refs = u1_pred_dir ? (ps_ctxt->num_ref_future > 1) 7538 : (ps_ctxt->num_ref_past > 1); 7539 7540 ps_me_optimised_function_list->pf_mv_clipper( 7541 &s_search_prms_blk, 7542 i4_num_srch_cands, 7543 i1_check_for_mult_refs, 7544 ps_refine_prms->i4_num_steps_fpel_refine, 7545 ps_refine_prms->i4_num_steps_hpel_refine, 7546 ps_refine_prms->i4_num_steps_qpel_refine); 7547 } 7548 7549 #if ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0 7550 i4_num_refinement_iterations = 7551 ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1)) 7552 ? ((e_me_quality_presets == ME_HIGH_QUALITY) ? 2 : i4_num_act_ref_l0) 7553 : 1; 7554 #else 7555 i4_num_refinement_iterations = 7556 ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1)) ? 2 : 1; 7557 #endif 7558 7559 #if ENABLE_EXPLICIT_SEARCH_IN_PQ 7560 if(e_me_quality_presets == ME_PRISTINE_QUALITY) 7561 { 7562 i4_num_refinement_iterations = (u1_pred_dir == 0) ? i4_num_act_ref_l0 7563 : i4_num_act_ref_l1; 7564 } 7565 #endif 7566 7567 for(i4_refine_iter_ctr = 0; i4_refine_iter_ctr < i4_num_refinement_iterations; 7568 i4_refine_iter_ctr++) 7569 { 7570 S32 center_x; 7571 S32 center_y; 7572 S32 center_ref_idx; 7573 7574 S08 *pi1_pred_dir_to_ref_idx = 7575 (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list : ps_ctxt->ai1_future_list; 7576 7577 { 7578 WORD32 i4_i; 7579 7580 for(i4_i = 0; i4_i < TOT_NUM_PARTS; i4_i++) 7581 { 7582 ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL; 7583 ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL; 7584 ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] = 7585 MAX_SIGNED_16BIT_VAL; 7586 ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] = 0; 7587 ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] = 0; 7588 ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] = u1_default_ref_id; 7589 7590 if(ps_refine_prms->i4_num_results_per_part == 2) 7591 { 7592 ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] = 7593 MAX_SIGNED_16BIT_VAL; 7594 ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] = 7595 MAX_SIGNED_16BIT_VAL; 7596 ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] = 7597 MAX_SIGNED_16BIT_VAL; 7598 ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = 0; 7599 ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = 0; 7600 ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = u1_default_ref_id; 7601 } 7602 } 7603 7604 s_search_prms_blk.ps_fullpel_refine_ctxt = ps_fullpel_refine_ctxt; 7605 s_subpel_prms.ps_subpel_refine_ctxt = ps_fullpel_refine_ctxt; 7606 } 7607 7608 { 7609 search_node_t *ps_coloc_node; 7610 7611 S32 i = 0; 7612 7613 if(i4_num_refinement_iterations > 1) 7614 { 7615 for(i = 0; i < ai4_num_coloc_cands[u1_pred_dir]; i++) 7616 { 7617 ps_coloc_node = 7618 s_search_prms_blk.ps_search_candts[ai4_id_coloc[i]] 7619 .ps_search_node; 7620 7621 if(pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr] == 7622 ps_coloc_node->i1_ref_idx) 7623 { 7624 break; 7625 } 7626 } 7627 7628 if(i == ai4_num_coloc_cands[u1_pred_dir]) 7629 { 7630 i = 0; 7631 } 7632 } 7633 else 7634 { 7635 ps_coloc_node = s_search_prms_blk.ps_search_candts[ai4_id_coloc[0]] 7636 .ps_search_node; 7637 } 7638 7639 hme_set_mvp_node( 7640 ps_search_results, 7641 ps_coloc_node, 7642 u1_pred_dir, 7643 (i4_num_refinement_iterations > 1) 7644 ? pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr] 7645 : u1_default_ref_id); 7646 7647 center_x = ps_coloc_node->ps_mv->i2_mvx; 7648 center_y = ps_coloc_node->ps_mv->i2_mvy; 7649 center_ref_idx = ps_coloc_node->i1_ref_idx; 7650 } 7651 7652 /* Full-Pel search */ 7653 { 7654 S32 num_unique_nodes; 7655 7656 memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map)); 7657 7658 num_unique_nodes = hme_remove_duplicate_fpel_search_candidates( 7659 as_unique_search_nodes, 7660 s_search_prms_blk.ps_search_candts, 7661 au4_unique_node_map, 7662 pi1_pred_dir_to_ref_idx, 7663 i4_num_srch_cands, 7664 s_search_prms_blk.i4_num_init_candts, 7665 i4_refine_iter_ctr, 7666 i4_num_refinement_iterations, 7667 i4_num_act_ref_l0, 7668 center_ref_idx, 7669 center_x, 7670 center_y, 7671 ps_ctxt->s_frm_prms.bidir_enabled, 7672 e_me_quality_presets); 7673 7674 /*************************************************************************/ 7675 /* This array stores the ids of the partitions whose */ 7676 /* SADs are updated. Since the partitions whose SADs are updated may not */ 7677 /* be in contiguous order, we supply another level of indirection. */ 7678 /*************************************************************************/ 7679 ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids( 7680 s_search_prms_blk.i4_part_mask, 7681 &ps_fullpel_refine_ctxt->ai4_part_id[0]); 7682 7683 if(!i4_refine_iter_ctr && !u1_pred_dir_ctr && u1_is_cu_noisy) 7684 { 7685 S32 i; 7686 /*i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values*/ 7687 S32 i4_sigma_array_offset = (s_search_prms_blk.i4_cu_x_off / 4) + 7688 (s_search_prms_blk.i4_cu_y_off * 4); 7689 7690 for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++) 7691 { 7692 S32 i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i]; 7693 7694 hme_compute_final_sigma_of_pu_from_base_blocks( 7695 ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset, 7696 ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset, 7697 au8_final_src_sigmaX, 7698 au8_final_src_sigmaXSquared, 7699 16, 7700 4, 7701 i4_part_id, 7702 16); 7703 } 7704 7705 s_common_frm_prms.pu8_part_src_sigmaX = au8_final_src_sigmaX; 7706 s_common_frm_prms.pu8_part_src_sigmaXSquared = 7707 au8_final_src_sigmaXSquared; 7708 7709 s_search_prms_blk.pu8_part_src_sigmaX = au8_final_src_sigmaX; 7710 s_search_prms_blk.pu8_part_src_sigmaXSquared = 7711 au8_final_src_sigmaXSquared; 7712 } 7713 7714 if(0 == num_unique_nodes) 7715 { 7716 continue; 7717 } 7718 7719 if(num_unique_nodes >= 2) 7720 { 7721 s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0]; 7722 s_search_prms_blk.i4_num_search_nodes = num_unique_nodes; 7723 if(ps_ctxt->i4_pic_type != IV_P_FRAME) 7724 { 7725 if(ps_ctxt->i4_temporal_layer == 1) 7726 { 7727 hme_fullpel_cand_sifter( 7728 &s_search_prms_blk, 7729 ps_curr_layer, 7730 &ps_ctxt->s_wt_pred, 7731 ALPHA_FOR_NOISE_TERM_IN_ME, 7732 u1_is_cu_noisy, 7733 ps_me_optimised_function_list); 7734 } 7735 else 7736 { 7737 hme_fullpel_cand_sifter( 7738 &s_search_prms_blk, 7739 ps_curr_layer, 7740 &ps_ctxt->s_wt_pred, 7741 ALPHA_FOR_NOISE_TERM_IN_ME, 7742 u1_is_cu_noisy, 7743 ps_me_optimised_function_list); 7744 } 7745 } 7746 else 7747 { 7748 hme_fullpel_cand_sifter( 7749 &s_search_prms_blk, 7750 ps_curr_layer, 7751 &ps_ctxt->s_wt_pred, 7752 ALPHA_FOR_NOISE_TERM_IN_ME_P, 7753 u1_is_cu_noisy, 7754 ps_me_optimised_function_list); 7755 } 7756 } 7757 7758 s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0]; 7759 7760 hme_fullpel_refine( 7761 ps_refine_prms, 7762 &s_search_prms_blk, 7763 ps_curr_layer, 7764 &ps_ctxt->s_wt_pred, 7765 au4_unique_node_map, 7766 num_unique_nodes, 7767 blk_8x8_mask, 7768 center_x, 7769 center_y, 7770 center_ref_idx, 7771 e_me_quality_presets, 7772 ps_me_optimised_function_list); 7773 } 7774 7775 /* Sub-Pel search */ 7776 { 7777 hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr); 7778 7779 s_subpel_prms.pu1_wkg_mem = (U08 *)hme_get_wkg_mem( 7780 &ps_ctxt->s_buf_mgr, 7781 INTERP_INTERMED_BUF_SIZE + INTERP_OUT_BUF_SIZE); 7782 /* MV limit is different based on ref. PIC */ 7783 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) 7784 { 7785 SCALE_RANGE_PRMS( 7786 as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1); 7787 SCALE_RANGE_PRMS( 7788 as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2); 7789 } 7790 s_subpel_prms.i4_ctb_x_off = i4_ctb_x << 6; 7791 s_subpel_prms.i4_ctb_y_off = i4_ctb_y << 6; 7792 7793 hme_subpel_refine_cu_hs( 7794 &s_subpel_prms, 7795 ps_curr_layer, 7796 ps_search_results, 7797 u1_pred_dir, 7798 &ps_ctxt->s_wt_pred, 7799 blk_8x8_mask, 7800 ps_ctxt->ps_func_selector, 7801 ps_cmn_utils_optimised_function_list, 7802 ps_me_optimised_function_list); 7803 } 7804 } 7805 } 7806 /* Populate the new PU struct with the results post subpel refinement*/ 7807 { 7808 inter_cu_results_t *ps_cu_results; 7809 WORD32 best_inter_cost, intra_cost, posx, posy; 7810 7811 UWORD8 intra_8x8_enabled = 0; 7812 7813 /* cost of 16x16 cu parent */ 7814 WORD32 parent_cost = MAX_32BIT_VAL; 7815 7816 /* cost of 8x8 cu children */ 7817 /*********************************************************************/ 7818 /* Assuming parent is not split, then we signal 1 bit for this parent*/ 7819 /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */ 7820 /* So, 4*lambda is extra for children cost. */ 7821 /*********************************************************************/ 7822 WORD32 child_cost = 0; 7823 7824 ps_cu_results = ps_search_results->ps_cu_results; 7825 7826 /* Initialize the pu_results pointers to the first struct in the stack array */ 7827 ps_pu_results = as_inter_pu_results; 7828 7829 hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr); 7830 7831 hme_populate_pus( 7832 ps_thrd_ctxt, 7833 ps_ctxt, 7834 &s_subpel_prms, 7835 ps_search_results, 7836 ps_cu_results, 7837 ps_pu_results, 7838 &(as_pu_results[0][0][0]), 7839 &s_common_frm_prms, 7840 &ps_ctxt->s_wt_pred, 7841 ps_curr_layer, 7842 au1_pred_dir_searched, 7843 i4_num_pred_dir); 7844 7845 ps_cu_results->i4_inp_offset = 7846 (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64); 7847 7848 hme_decide_part_types( 7849 ps_cu_results, 7850 ps_pu_results, 7851 &s_common_frm_prms, 7852 ps_ctxt, 7853 ps_cmn_utils_optimised_function_list, 7854 ps_me_optimised_function_list 7855 7856 ); 7857 7858 /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */ 7859 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ 7860 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) 7861 { 7862 WORD32 res_ctr; 7863 7864 for(res_ctr = 0; res_ctr < ps_cu_results->u1_num_best_results; res_ctr++) 7865 { 7866 WORD32 num_part = 2, part_ctr; 7867 part_type_results_t *ps_best_results = 7868 &ps_cu_results->ps_best_results[res_ctr]; 7869 7870 if(PRT_2Nx2N == ps_best_results->u1_part_type) 7871 num_part = 1; 7872 7873 for(part_ctr = 0; part_ctr < num_part; part_ctr++) 7874 { 7875 pu_result_t *ps_pu_results = 7876 &ps_best_results->as_pu_results[part_ctr]; 7877 7878 ASSERT(PRED_L0 == ps_pu_results->pu.b2_pred_mode); 7879 7880 hme_update_dynamic_search_params( 7881 &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p] 7882 .as_dyn_range_prms[ps_pu_results->pu.mv.i1_l0_ref_idx], 7883 ps_pu_results->pu.mv.s_l0_mv.i2_mvy); 7884 7885 /* Sanity Check */ 7886 ASSERT( 7887 ps_pu_results->pu.mv.i1_l0_ref_idx < 7888 ps_ctxt->s_frm_prms.u1_num_active_ref_l0); 7889 7890 /* No L1 for P Pic. */ 7891 ASSERT(PRED_L1 != ps_pu_results->pu.b2_pred_mode); 7892 /* No BI for P Pic. */ 7893 ASSERT(PRED_BI != ps_pu_results->pu.b2_pred_mode); 7894 } 7895 } 7896 } 7897 7898 /*****************************************************************/ 7899 /* INSERT INTRA RESULTS AT 16x16 LEVEL. */ 7900 /*****************************************************************/ 7901 7902 #if DISABLE_INTRA_IN_BPICS 7903 if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) && 7904 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))) 7905 #endif 7906 { 7907 if(!(DISABLE_INTRA_WHEN_NOISY && s_common_frm_prms.u1_is_cu_noisy)) 7908 { 7909 hme_insert_intra_nodes_post_bipred( 7910 ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep); 7911 } 7912 } 7913 7914 #if DISABLE_INTRA_IN_BPICS 7915 if((ME_XTREME_SPEED_25 == e_me_quality_presets) && 7916 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)) 7917 { 7918 intra_8x8_enabled = 0; 7919 } 7920 else 7921 #endif 7922 { 7923 /*TRAQO intra flag updation*/ 7924 if(1 == ps_cu_results->ps_best_results->as_pu_results[0].pu.b1_intra_flag) 7925 { 7926 best_inter_cost = 7927 ps_cu_results->ps_best_results->as_pu_results[1].i4_tot_cost; 7928 intra_cost = 7929 ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost; 7930 /*@16x16 level*/ 7931 posx = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_x 7932 << 2) >> 7933 4; 7934 posy = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_y 7935 << 2) >> 7936 4; 7937 } 7938 else 7939 { 7940 best_inter_cost = 7941 ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost; 7942 posx = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_x 7943 << 2) >> 7944 3; 7945 posy = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_y 7946 << 2) >> 7947 3; 7948 } 7949 7950 /* Disable intra16/32/64 flags based on split flags recommended by IPE */ 7951 if(ps_cur_ipe_ctb->u1_split_flag) 7952 { 7953 /* Id of the 32x32 block, 16x16 block in a CTB */ 7954 WORD32 i4_32x32_id = 7955 (ps_cu_results->u1_y_off >> 5) * 2 + (ps_cu_results->u1_x_off >> 5); 7956 WORD32 i4_16x16_id = ((ps_cu_results->u1_y_off >> 4) & 0x1) * 2 + 7957 ((ps_cu_results->u1_x_off >> 4) & 0x1); 7958 7959 if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag) 7960 { 7961 if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id] 7962 .as_intra16_analyse[i4_16x16_id] 7963 .b1_split_flag) 7964 { 7965 intra_8x8_enabled = 7966 ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id] 7967 .as_intra16_analyse[i4_16x16_id] 7968 .as_intra8_analyse[0] 7969 .b1_valid_cu; 7970 intra_8x8_enabled &= 7971 ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id] 7972 .as_intra16_analyse[i4_16x16_id] 7973 .as_intra8_analyse[1] 7974 .b1_valid_cu; 7975 intra_8x8_enabled &= 7976 ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id] 7977 .as_intra16_analyse[i4_16x16_id] 7978 .as_intra8_analyse[2] 7979 .b1_valid_cu; 7980 intra_8x8_enabled &= 7981 ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id] 7982 .as_intra16_analyse[i4_16x16_id] 7983 .as_intra8_analyse[3] 7984 .b1_valid_cu; 7985 } 7986 } 7987 } 7988 } 7989 7990 if(blk_8x8_mask == 0xf) 7991 { 7992 parent_cost = 7993 ps_search_results->ps_cu_results->ps_best_results[0].i4_tot_cost; 7994 ps_search_results->u1_split_flag = 0; 7995 } 7996 else 7997 { 7998 ps_search_results->u1_split_flag = 1; 7999 } 8000 8001 ps_cu_results = &ps_ctxt->as_cu8x8_results[blk_id_in_full_ctb << 2]; 8002 8003 if(s_common_frm_prms.u1_is_cu_noisy) 8004 { 8005 intra_8x8_enabled = 0; 8006 } 8007 8008 /* Evalaute 8x8 if NxN part id is enabled */ 8009 if((ps_search_results->i4_part_mask & ENABLE_NxN) || intra_8x8_enabled) 8010 { 8011 /* Populates the PU's for the 4 8x8's in one call */ 8012 hme_populate_pus_8x8_cu( 8013 ps_thrd_ctxt, 8014 ps_ctxt, 8015 &s_subpel_prms, 8016 ps_search_results, 8017 ps_cu_results, 8018 ps_pu_results, 8019 &(as_pu_results[0][0][0]), 8020 &s_common_frm_prms, 8021 au1_pred_dir_searched, 8022 i4_num_pred_dir, 8023 blk_8x8_mask); 8024 8025 /* Re-initialize the pu_results pointers to the first struct in the stack array */ 8026 ps_pu_results = as_inter_pu_results; 8027 8028 for(i = 0; i < 4; i++) 8029 { 8030 if((blk_8x8_mask & (1 << i))) 8031 { 8032 if(ps_cu_results->i4_part_mask) 8033 { 8034 hme_decide_part_types( 8035 ps_cu_results, 8036 ps_pu_results, 8037 &s_common_frm_prms, 8038 ps_ctxt, 8039 ps_cmn_utils_optimised_function_list, 8040 ps_me_optimised_function_list 8041 8042 ); 8043 } 8044 /*****************************************************************/ 8045 /* INSERT INTRA RESULTS AT 8x8 LEVEL. */ 8046 /*****************************************************************/ 8047 #if DISABLE_INTRA_IN_BPICS 8048 if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) && 8049 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > 8050 TEMPORAL_LAYER_DISABLE))) 8051 #endif 8052 { 8053 if(!(DISABLE_INTRA_WHEN_NOISY && 8054 s_common_frm_prms.u1_is_cu_noisy)) 8055 { 8056 hme_insert_intra_nodes_post_bipred( 8057 ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep); 8058 } 8059 } 8060 8061 child_cost += ps_cu_results->ps_best_results[0].i4_tot_cost; 8062 } 8063 8064 ps_cu_results++; 8065 ps_pu_results++; 8066 } 8067 8068 /* Compare 16x16 vs 8x8 cost */ 8069 if(child_cost < parent_cost) 8070 { 8071 ps_search_results->best_cu_cost = child_cost; 8072 ps_search_results->u1_split_flag = 1; 8073 } 8074 } 8075 } 8076 8077 hme_update_mv_bank_encode( 8078 ps_search_results, 8079 ps_curr_layer->ps_layer_mvbank, 8080 blk_x, 8081 blk_y, 8082 &s_mv_update_prms, 8083 au1_pred_dir_searched, 8084 i4_num_act_ref_l0); 8085 8086 /*********************************************************************/ 8087 /* Map the best results to an MV Grid. This is a 18x18 grid that is */ 8088 /* useful for doing things like predictor for cost calculation or */ 8089 /* also for merge calculations if need be. */ 8090 /*********************************************************************/ 8091 hme_map_mvs_to_grid( 8092 &aps_mv_grid[0], ps_search_results, au1_pred_dir_searched, i4_num_pred_dir); 8093 } 8094 8095 /* Set the CU tree nodes appropriately */ 8096 if(e_me_quality_presets != ME_PRISTINE_QUALITY) 8097 { 8098 WORD32 i, j; 8099 8100 for(i = 0; i < 16; i++) 8101 { 8102 cur_ctb_cu_tree_t *ps_tree_node = 8103 ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE); 8104 search_results_t *ps_results = &ps_ctxt->as_search_results_16x16[i]; 8105 8106 switch(i >> 2) 8107 { 8108 case 0: 8109 { 8110 ps_tree_node = ps_tree_node->ps_child_node_tl; 8111 8112 break; 8113 } 8114 case 1: 8115 { 8116 ps_tree_node = ps_tree_node->ps_child_node_tr; 8117 8118 break; 8119 } 8120 case 2: 8121 { 8122 ps_tree_node = ps_tree_node->ps_child_node_bl; 8123 8124 break; 8125 } 8126 case 3: 8127 { 8128 ps_tree_node = ps_tree_node->ps_child_node_br; 8129 8130 break; 8131 } 8132 } 8133 8134 switch(i % 4) 8135 { 8136 case 0: 8137 { 8138 ps_tree_node = ps_tree_node->ps_child_node_tl; 8139 8140 break; 8141 } 8142 case 1: 8143 { 8144 ps_tree_node = ps_tree_node->ps_child_node_tr; 8145 8146 break; 8147 } 8148 case 2: 8149 { 8150 ps_tree_node = ps_tree_node->ps_child_node_bl; 8151 8152 break; 8153 } 8154 case 3: 8155 { 8156 ps_tree_node = ps_tree_node->ps_child_node_br; 8157 8158 break; 8159 } 8160 } 8161 8162 if(ai4_blk_8x8_mask[i] == 15) 8163 { 8164 if(!ps_results->u1_split_flag) 8165 { 8166 ps_tree_node->is_node_valid = 1; 8167 NULLIFY_THE_CHILDREN_NODES(ps_tree_node); 8168 } 8169 else 8170 { 8171 ps_tree_node->is_node_valid = 0; 8172 ENABLE_THE_CHILDREN_NODES(ps_tree_node); 8173 } 8174 } 8175 else 8176 { 8177 cur_ctb_cu_tree_t *ps_tree_child; 8178 8179 ps_tree_node->is_node_valid = 0; 8180 8181 for(j = 0; j < 4; j++) 8182 { 8183 switch(j) 8184 { 8185 case 0: 8186 { 8187 ps_tree_child = ps_tree_node->ps_child_node_tl; 8188 8189 break; 8190 } 8191 case 1: 8192 { 8193 ps_tree_child = ps_tree_node->ps_child_node_tr; 8194 8195 break; 8196 } 8197 case 2: 8198 { 8199 ps_tree_child = ps_tree_node->ps_child_node_bl; 8200 8201 break; 8202 } 8203 case 3: 8204 { 8205 ps_tree_child = ps_tree_node->ps_child_node_br; 8206 8207 break; 8208 } 8209 } 8210 8211 ps_tree_child->is_node_valid = !!(ai4_blk_8x8_mask[i] & (1 << j)); 8212 } 8213 } 8214 } 8215 } 8216 8217 if(ME_PRISTINE_QUALITY == e_me_quality_presets) 8218 { 8219 cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root; 8220 8221 hme_analyse_mv_clustering( 8222 ps_ctxt->as_search_results_16x16, 8223 ps_ctxt->as_cu16x16_results, 8224 ps_ctxt->as_cu8x8_results, 8225 ps_ctxt->ps_ctb_cluster_info, 8226 ps_ctxt->ai1_future_list, 8227 ps_ctxt->ai1_past_list, 8228 ps_ctxt->s_frm_prms.bidir_enabled, 8229 e_me_quality_presets); 8230 8231 #if DISABLE_BLK_MERGE_WHEN_NOISY 8232 ps_tree->ps_child_node_tl->is_node_valid = !au1_is_32x32Blk_noisy[0]; 8233 ps_tree->ps_child_node_tr->is_node_valid = !au1_is_32x32Blk_noisy[1]; 8234 ps_tree->ps_child_node_bl->is_node_valid = !au1_is_32x32Blk_noisy[2]; 8235 ps_tree->ps_child_node_br->is_node_valid = !au1_is_32x32Blk_noisy[3]; 8236 ps_tree->ps_child_node_tl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[0]; 8237 ps_tree->ps_child_node_tr->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[1]; 8238 ps_tree->ps_child_node_bl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[2]; 8239 ps_tree->ps_child_node_br->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[3]; 8240 ps_tree->is_node_valid = !au1_is_64x64Blk_noisy[0]; 8241 ps_tree->u1_inter_eval_enable = !au1_is_64x64Blk_noisy[0]; 8242 #endif 8243 8244 en_merge_32x32 = (ps_tree->ps_child_node_tl->is_node_valid << 0) | 8245 (ps_tree->ps_child_node_tr->is_node_valid << 1) | 8246 (ps_tree->ps_child_node_bl->is_node_valid << 2) | 8247 (ps_tree->ps_child_node_br->is_node_valid << 3); 8248 8249 en_merge_execution = (ps_tree->ps_child_node_tl->u1_inter_eval_enable << 0) | 8250 (ps_tree->ps_child_node_tr->u1_inter_eval_enable << 1) | 8251 (ps_tree->ps_child_node_bl->u1_inter_eval_enable << 2) | 8252 (ps_tree->ps_child_node_br->u1_inter_eval_enable << 3) | 8253 (ps_tree->u1_inter_eval_enable << 4); 8254 } 8255 else 8256 { 8257 en_merge_execution = 0x1f; 8258 8259 #if DISABLE_BLK_MERGE_WHEN_NOISY 8260 en_merge_32x32 = ((!au1_is_32x32Blk_noisy[0] << 0) & (en_merge_32x32 & 1)) | 8261 ((!au1_is_32x32Blk_noisy[1] << 1) & (en_merge_32x32 & 2)) | 8262 ((!au1_is_32x32Blk_noisy[2] << 2) & (en_merge_32x32 & 4)) | 8263 ((!au1_is_32x32Blk_noisy[3] << 3) & (en_merge_32x32 & 8)); 8264 #endif 8265 } 8266 8267 /* Re-initialize the pu_results pointers to the first struct in the stack array */ 8268 ps_pu_results = as_inter_pu_results; 8269 8270 { 8271 WORD32 ref_ctr; 8272 8273 s_ctb_prms.i4_ctb_x = i4_ctb_x << 6; 8274 s_ctb_prms.i4_ctb_y = i4_ctb_y << 6; 8275 8276 /* MV limit is different based on ref. PIC */ 8277 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) 8278 { 8279 SCALE_RANGE_PRMS(as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1); 8280 SCALE_RANGE_PRMS(as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2); 8281 } 8282 8283 e_merge_result = CU_SPLIT; 8284 merge_count_32x32 = 0; 8285 8286 if((en_merge_32x32 & 1) && (en_merge_execution & 1)) 8287 { 8288 range_prms_t *ps_pic_limit; 8289 if(s_merge_prms_32x32_tl.i4_use_rec == 1) 8290 { 8291 ps_pic_limit = &s_pic_limit_rec; 8292 } 8293 else 8294 { 8295 ps_pic_limit = &s_pic_limit_inp; 8296 } 8297 /* MV limit is different based on ref. PIC */ 8298 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) 8299 { 8300 hme_derive_search_range( 8301 s_merge_prms_32x32_tl.aps_mv_range[ref_ctr], 8302 ps_pic_limit, 8303 &as_mv_limit[ref_ctr], 8304 i4_ctb_x << 6, 8305 i4_ctb_y << 6, 8306 32, 8307 32); 8308 8309 SCALE_RANGE_PRMS_POINTERS( 8310 s_merge_prms_32x32_tl.aps_mv_range[ref_ctr], 8311 s_merge_prms_32x32_tl.aps_mv_range[ref_ctr], 8312 2); 8313 } 8314 s_merge_prms_32x32_tl.i4_ctb_x_off = i4_ctb_x << 6; 8315 s_merge_prms_32x32_tl.i4_ctb_y_off = i4_ctb_y << 6; 8316 s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[0]; 8317 8318 e_merge_result = hme_try_merge_high_speed( 8319 ps_thrd_ctxt, 8320 ps_ctxt, 8321 ps_cur_ipe_ctb, 8322 &s_subpel_prms, 8323 &s_merge_prms_32x32_tl, 8324 ps_pu_results, 8325 &as_pu_results[0][0][0]); 8326 8327 if(e_merge_result == CU_MERGED) 8328 { 8329 inter_cu_results_t *ps_cu_results = 8330 s_merge_prms_32x32_tl.ps_results_merge->ps_cu_results; 8331 8332 if(!((ps_cu_results->u1_num_best_results == 1) && 8333 (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag))) 8334 { 8335 hme_map_mvs_to_grid( 8336 &aps_mv_grid[0], 8337 s_merge_prms_32x32_tl.ps_results_merge, 8338 s_merge_prms_32x32_tl.au1_pred_dir_searched, 8339 s_merge_prms_32x32_tl.i4_num_pred_dir_actual); 8340 } 8341 8342 if(ME_PRISTINE_QUALITY != e_me_quality_presets) 8343 { 8344 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] 8345 .ps_child_node_tl->is_node_valid = 1; 8346 NULLIFY_THE_CHILDREN_NODES( 8347 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] 8348 .ps_child_node_tl); 8349 } 8350 8351 merge_count_32x32++; 8352 e_merge_result = CU_SPLIT; 8353 } 8354 else if(ME_PRISTINE_QUALITY == e_me_quality_presets) 8355 { 8356 #if ENABLE_CU_TREE_CULLING 8357 cur_ctb_cu_tree_t *ps_tree = 8358 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl; 8359 8360 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; 8361 en_merge_execution = (en_merge_execution & (~(1 << 4))); 8362 ENABLE_THE_CHILDREN_NODES(ps_tree); 8363 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); 8364 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); 8365 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); 8366 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); 8367 #endif 8368 } 8369 } 8370 else if((en_merge_32x32 & 1) && (!(en_merge_execution & 1))) 8371 { 8372 #if ENABLE_CU_TREE_CULLING 8373 cur_ctb_cu_tree_t *ps_tree = 8374 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl; 8375 8376 ENABLE_THE_CHILDREN_NODES(ps_tree); 8377 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); 8378 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); 8379 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); 8380 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); 8381 #endif 8382 8383 if(au1_is_32x32Blk_noisy[0] && DISABLE_INTRA_WHEN_NOISY) 8384 { 8385 ps_tree->is_node_valid = 0; 8386 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; 8387 en_merge_execution = (en_merge_execution & (~(1 << 4))); 8388 } 8389 } 8390 8391 if((en_merge_32x32 & 2) && (en_merge_execution & 2)) 8392 { 8393 range_prms_t *ps_pic_limit; 8394 if(s_merge_prms_32x32_tr.i4_use_rec == 1) 8395 { 8396 ps_pic_limit = &s_pic_limit_rec; 8397 } 8398 else 8399 { 8400 ps_pic_limit = &s_pic_limit_inp; 8401 } 8402 /* MV limit is different based on ref. PIC */ 8403 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) 8404 { 8405 hme_derive_search_range( 8406 s_merge_prms_32x32_tr.aps_mv_range[ref_ctr], 8407 ps_pic_limit, 8408 &as_mv_limit[ref_ctr], 8409 (i4_ctb_x << 6) + 32, 8410 i4_ctb_y << 6, 8411 32, 8412 32); 8413 SCALE_RANGE_PRMS_POINTERS( 8414 s_merge_prms_32x32_tr.aps_mv_range[ref_ctr], 8415 s_merge_prms_32x32_tr.aps_mv_range[ref_ctr], 8416 2); 8417 } 8418 s_merge_prms_32x32_tr.i4_ctb_x_off = i4_ctb_x << 6; 8419 s_merge_prms_32x32_tr.i4_ctb_y_off = i4_ctb_y << 6; 8420 s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[1]; 8421 8422 e_merge_result = hme_try_merge_high_speed( 8423 ps_thrd_ctxt, 8424 ps_ctxt, 8425 ps_cur_ipe_ctb, 8426 &s_subpel_prms, 8427 &s_merge_prms_32x32_tr, 8428 ps_pu_results, 8429 &as_pu_results[0][0][0]); 8430 8431 if(e_merge_result == CU_MERGED) 8432 { 8433 inter_cu_results_t *ps_cu_results = 8434 s_merge_prms_32x32_tr.ps_results_merge->ps_cu_results; 8435 8436 if(!((ps_cu_results->u1_num_best_results == 1) && 8437 (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag))) 8438 { 8439 hme_map_mvs_to_grid( 8440 &aps_mv_grid[0], 8441 s_merge_prms_32x32_tr.ps_results_merge, 8442 s_merge_prms_32x32_tr.au1_pred_dir_searched, 8443 s_merge_prms_32x32_tr.i4_num_pred_dir_actual); 8444 } 8445 8446 if(ME_PRISTINE_QUALITY != e_me_quality_presets) 8447 { 8448 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] 8449 .ps_child_node_tr->is_node_valid = 1; 8450 NULLIFY_THE_CHILDREN_NODES( 8451 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] 8452 .ps_child_node_tr); 8453 } 8454 8455 merge_count_32x32++; 8456 e_merge_result = CU_SPLIT; 8457 } 8458 else if(ME_PRISTINE_QUALITY == e_me_quality_presets) 8459 { 8460 #if ENABLE_CU_TREE_CULLING 8461 cur_ctb_cu_tree_t *ps_tree = 8462 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr; 8463 8464 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; 8465 en_merge_execution = (en_merge_execution & (~(1 << 4))); 8466 ENABLE_THE_CHILDREN_NODES(ps_tree); 8467 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); 8468 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); 8469 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); 8470 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); 8471 #endif 8472 } 8473 } 8474 else if((en_merge_32x32 & 2) && (!(en_merge_execution & 2))) 8475 { 8476 #if ENABLE_CU_TREE_CULLING 8477 cur_ctb_cu_tree_t *ps_tree = 8478 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr; 8479 8480 ENABLE_THE_CHILDREN_NODES(ps_tree); 8481 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); 8482 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); 8483 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); 8484 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); 8485 #endif 8486 8487 if(au1_is_32x32Blk_noisy[1] && DISABLE_INTRA_WHEN_NOISY) 8488 { 8489 ps_tree->is_node_valid = 0; 8490 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; 8491 en_merge_execution = (en_merge_execution & (~(1 << 4))); 8492 } 8493 } 8494 8495 if((en_merge_32x32 & 4) && (en_merge_execution & 4)) 8496 { 8497 range_prms_t *ps_pic_limit; 8498 if(s_merge_prms_32x32_bl.i4_use_rec == 1) 8499 { 8500 ps_pic_limit = &s_pic_limit_rec; 8501 } 8502 else 8503 { 8504 ps_pic_limit = &s_pic_limit_inp; 8505 } 8506 /* MV limit is different based on ref. PIC */ 8507 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) 8508 { 8509 hme_derive_search_range( 8510 s_merge_prms_32x32_bl.aps_mv_range[ref_ctr], 8511 ps_pic_limit, 8512 &as_mv_limit[ref_ctr], 8513 i4_ctb_x << 6, 8514 (i4_ctb_y << 6) + 32, 8515 32, 8516 32); 8517 SCALE_RANGE_PRMS_POINTERS( 8518 s_merge_prms_32x32_bl.aps_mv_range[ref_ctr], 8519 s_merge_prms_32x32_bl.aps_mv_range[ref_ctr], 8520 2); 8521 } 8522 s_merge_prms_32x32_bl.i4_ctb_x_off = i4_ctb_x << 6; 8523 s_merge_prms_32x32_bl.i4_ctb_y_off = i4_ctb_y << 6; 8524 s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[2]; 8525 8526 e_merge_result = hme_try_merge_high_speed( 8527 ps_thrd_ctxt, 8528 ps_ctxt, 8529 ps_cur_ipe_ctb, 8530 &s_subpel_prms, 8531 &s_merge_prms_32x32_bl, 8532 ps_pu_results, 8533 &as_pu_results[0][0][0]); 8534 8535 if(e_merge_result == CU_MERGED) 8536 { 8537 inter_cu_results_t *ps_cu_results = 8538 s_merge_prms_32x32_bl.ps_results_merge->ps_cu_results; 8539 8540 if(!((ps_cu_results->u1_num_best_results == 1) && 8541 (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag))) 8542 { 8543 hme_map_mvs_to_grid( 8544 &aps_mv_grid[0], 8545 s_merge_prms_32x32_bl.ps_results_merge, 8546 s_merge_prms_32x32_bl.au1_pred_dir_searched, 8547 s_merge_prms_32x32_bl.i4_num_pred_dir_actual); 8548 } 8549 8550 if(ME_PRISTINE_QUALITY != e_me_quality_presets) 8551 { 8552 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] 8553 .ps_child_node_bl->is_node_valid = 1; 8554 NULLIFY_THE_CHILDREN_NODES( 8555 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] 8556 .ps_child_node_bl); 8557 } 8558 8559 merge_count_32x32++; 8560 e_merge_result = CU_SPLIT; 8561 } 8562 else if(ME_PRISTINE_QUALITY == e_me_quality_presets) 8563 { 8564 #if ENABLE_CU_TREE_CULLING 8565 cur_ctb_cu_tree_t *ps_tree = 8566 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl; 8567 8568 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; 8569 en_merge_execution = (en_merge_execution & (~(1 << 4))); 8570 ENABLE_THE_CHILDREN_NODES(ps_tree); 8571 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); 8572 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); 8573 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); 8574 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); 8575 #endif 8576 } 8577 } 8578 else if((en_merge_32x32 & 4) && (!(en_merge_execution & 4))) 8579 { 8580 #if ENABLE_CU_TREE_CULLING 8581 cur_ctb_cu_tree_t *ps_tree = 8582 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl; 8583 8584 ENABLE_THE_CHILDREN_NODES(ps_tree); 8585 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); 8586 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); 8587 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); 8588 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); 8589 #endif 8590 8591 if(au1_is_32x32Blk_noisy[2] && DISABLE_INTRA_WHEN_NOISY) 8592 { 8593 ps_tree->is_node_valid = 0; 8594 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; 8595 en_merge_execution = (en_merge_execution & (~(1 << 4))); 8596 } 8597 } 8598 8599 if((en_merge_32x32 & 8) && (en_merge_execution & 8)) 8600 { 8601 range_prms_t *ps_pic_limit; 8602 if(s_merge_prms_32x32_br.i4_use_rec == 1) 8603 { 8604 ps_pic_limit = &s_pic_limit_rec; 8605 } 8606 else 8607 { 8608 ps_pic_limit = &s_pic_limit_inp; 8609 } 8610 /* MV limit is different based on ref. PIC */ 8611 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) 8612 { 8613 hme_derive_search_range( 8614 s_merge_prms_32x32_br.aps_mv_range[ref_ctr], 8615 ps_pic_limit, 8616 &as_mv_limit[ref_ctr], 8617 (i4_ctb_x << 6) + 32, 8618 (i4_ctb_y << 6) + 32, 8619 32, 8620 32); 8621 8622 SCALE_RANGE_PRMS_POINTERS( 8623 s_merge_prms_32x32_br.aps_mv_range[ref_ctr], 8624 s_merge_prms_32x32_br.aps_mv_range[ref_ctr], 8625 2); 8626 } 8627 s_merge_prms_32x32_br.i4_ctb_x_off = i4_ctb_x << 6; 8628 s_merge_prms_32x32_br.i4_ctb_y_off = i4_ctb_y << 6; 8629 s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[3]; 8630 8631 e_merge_result = hme_try_merge_high_speed( 8632 ps_thrd_ctxt, 8633 ps_ctxt, 8634 ps_cur_ipe_ctb, 8635 &s_subpel_prms, 8636 &s_merge_prms_32x32_br, 8637 ps_pu_results, 8638 &as_pu_results[0][0][0]); 8639 8640 if(e_merge_result == CU_MERGED) 8641 { 8642 /*inter_cu_results_t *ps_cu_results = s_merge_prms_32x32_br.ps_results_merge->ps_cu_results; 8643 8644 if(!((ps_cu_results->u1_num_best_results == 1) && 8645 (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag))) 8646 { 8647 hme_map_mvs_to_grid 8648 ( 8649 &aps_mv_grid[0], 8650 s_merge_prms_32x32_br.ps_results_merge, 8651 s_merge_prms_32x32_br.au1_pred_dir_searched, 8652 s_merge_prms_32x32_br.i4_num_pred_dir_actual 8653 ); 8654 }*/ 8655 8656 if(ME_PRISTINE_QUALITY != e_me_quality_presets) 8657 { 8658 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] 8659 .ps_child_node_br->is_node_valid = 1; 8660 NULLIFY_THE_CHILDREN_NODES( 8661 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] 8662 .ps_child_node_br); 8663 } 8664 8665 merge_count_32x32++; 8666 e_merge_result = CU_SPLIT; 8667 } 8668 else if(ME_PRISTINE_QUALITY == e_me_quality_presets) 8669 { 8670 #if ENABLE_CU_TREE_CULLING 8671 cur_ctb_cu_tree_t *ps_tree = 8672 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br; 8673 8674 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; 8675 en_merge_execution = (en_merge_execution & (~(1 << 4))); 8676 ENABLE_THE_CHILDREN_NODES(ps_tree); 8677 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); 8678 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); 8679 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); 8680 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); 8681 #endif 8682 } 8683 } 8684 else if((en_merge_32x32 & 8) && (!(en_merge_execution & 8))) 8685 { 8686 #if ENABLE_CU_TREE_CULLING 8687 cur_ctb_cu_tree_t *ps_tree = 8688 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br; 8689 8690 ENABLE_THE_CHILDREN_NODES(ps_tree); 8691 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); 8692 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); 8693 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); 8694 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); 8695 #endif 8696 8697 if(au1_is_32x32Blk_noisy[3] && DISABLE_INTRA_WHEN_NOISY) 8698 { 8699 ps_tree->is_node_valid = 0; 8700 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; 8701 en_merge_execution = (en_merge_execution & (~(1 << 4))); 8702 } 8703 } 8704 8705 /* Try merging all 32x32 to 64x64 candts */ 8706 if(((en_merge_32x32 & 0xf) == 0xf) && 8707 (((merge_count_32x32 == 4) && (e_me_quality_presets != ME_PRISTINE_QUALITY)) || 8708 ((en_merge_execution & 16) && (e_me_quality_presets == ME_PRISTINE_QUALITY)))) 8709 if((((e_me_quality_presets == ME_XTREME_SPEED_25) && 8710 !DISABLE_64X64_BLOCK_MERGE_IN_ME_IN_XS25) || 8711 (e_me_quality_presets != ME_XTREME_SPEED_25))) 8712 { 8713 range_prms_t *ps_pic_limit; 8714 if(s_merge_prms_64x64.i4_use_rec == 1) 8715 { 8716 ps_pic_limit = &s_pic_limit_rec; 8717 } 8718 else 8719 { 8720 ps_pic_limit = &s_pic_limit_inp; 8721 } 8722 /* MV limit is different based on ref. PIC */ 8723 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) 8724 { 8725 hme_derive_search_range( 8726 s_merge_prms_64x64.aps_mv_range[ref_ctr], 8727 ps_pic_limit, 8728 &as_mv_limit[ref_ctr], 8729 i4_ctb_x << 6, 8730 i4_ctb_y << 6, 8731 64, 8732 64); 8733 8734 SCALE_RANGE_PRMS_POINTERS( 8735 s_merge_prms_64x64.aps_mv_range[ref_ctr], 8736 s_merge_prms_64x64.aps_mv_range[ref_ctr], 8737 2); 8738 } 8739 s_merge_prms_64x64.i4_ctb_x_off = i4_ctb_x << 6; 8740 s_merge_prms_64x64.i4_ctb_y_off = i4_ctb_y << 6; 8741 s_subpel_prms.u1_is_cu_noisy = au1_is_64x64Blk_noisy[0]; 8742 8743 e_merge_result = hme_try_merge_high_speed( 8744 ps_thrd_ctxt, 8745 ps_ctxt, 8746 ps_cur_ipe_ctb, 8747 &s_subpel_prms, 8748 &s_merge_prms_64x64, 8749 ps_pu_results, 8750 &as_pu_results[0][0][0]); 8751 8752 if((e_merge_result == CU_MERGED) && 8753 (ME_PRISTINE_QUALITY != e_me_quality_presets)) 8754 { 8755 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] 8756 .is_node_valid = 1; 8757 NULLIFY_THE_CHILDREN_NODES( 8758 ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)); 8759 } 8760 else if( 8761 (e_merge_result == CU_SPLIT) && 8762 (ME_PRISTINE_QUALITY == e_me_quality_presets)) 8763 { 8764 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] 8765 .is_node_valid = 0; 8766 } 8767 } 8768 8769 /*****************************************************************/ 8770 /* UPDATION OF RESULT TO EXTERNAL STRUCTURES */ 8771 /*****************************************************************/ 8772 pf_ext_update_fxn((void *)ps_thrd_ctxt, (void *)ps_ctxt, i4_ctb_x, i4_ctb_y); 8773 8774 { 8775 #ifdef _DEBUG 8776 S32 wd = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64) 8777 ? 64 8778 : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off; 8779 S32 ht = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64) 8780 ? 64 8781 : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off; 8782 ASSERT( 8783 (wd * ht) == 8784 ihevce_compute_area_of_valid_cus_in_ctb( 8785 &ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)])); 8786 #endif 8787 } 8788 } 8789 8790 /* set the dependency for the corresponding row in enc loop */ 8791 ihevce_dmgr_set_row_row_sync( 8792 pv_dep_mngr_encloop_dep_me, 8793 (i4_ctb_x + 1), 8794 i4_ctb_y, 8795 tile_col_idx /* Col Tile No. */); 8796 8797 left_ctb_in_diff_tile = 0; 8798 } 8799 } 8800 } 8801 8802 /** 8803 ******************************************************************************** 8804 * @fn void hme_refine_no_encode(coarse_me_ctxt_t *ps_ctxt, 8805 * refine_layer_prms_t *ps_refine_prms) 8806 * 8807 * @brief Top level entry point for refinement ME 8808 * 8809 * @param[in,out] ps_ctxt: ME Handle 8810 * 8811 * @param[in] ps_refine_prms : refinement layer prms 8812 * 8813 * @return None 8814 ******************************************************************************** 8815 */ 8816 void hme_refine_no_encode( 8817 coarse_me_ctxt_t *ps_ctxt, 8818 refine_prms_t *ps_refine_prms, 8819 multi_thrd_ctxt_t *ps_multi_thrd_ctxt, 8820 S32 lyr_job_type, 8821 WORD32 i4_ping_pong, 8822 void **ppv_dep_mngr_hme_sync) 8823 { 8824 BLK_SIZE_T e_search_blk_size, e_result_blk_size; 8825 ME_QUALITY_PRESETS_T e_me_quality_presets = 8826 ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets; 8827 8828 /*************************************************************************/ 8829 /* Complexity of search: Low to High */ 8830 /*************************************************************************/ 8831 SEARCH_COMPLEXITY_T e_search_complexity; 8832 8833 /*************************************************************************/ 8834 /* Config parameter structures for varius ME submodules */ 8835 /*************************************************************************/ 8836 hme_search_prms_t s_search_prms_blk; 8837 mvbank_update_prms_t s_mv_update_prms; 8838 8839 /*************************************************************************/ 8840 /* All types of search candidates for predictor based search. */ 8841 /*************************************************************************/ 8842 S32 num_init_candts = 0; 8843 search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS]; 8844 search_node_t as_top_neighbours[4], as_left_neighbours[3]; 8845 search_node_t *ps_candt_zeromv, *ps_candt_tl, *ps_candt_tr; 8846 search_node_t *ps_candt_l, *ps_candt_t; 8847 search_node_t *ps_candt_prj_br[2], *ps_candt_prj_b[2], *ps_candt_prj_r[2]; 8848 search_node_t *ps_candt_prj_bl[2]; 8849 search_node_t *ps_candt_prj_tr[2], *ps_candt_prj_t[2], *ps_candt_prj_tl[2]; 8850 search_node_t *ps_candt_prj_coloc[2]; 8851 8852 pf_get_wt_inp fp_get_wt_inp; 8853 8854 search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9]; 8855 U32 au4_unique_node_map[MAP_X_MAX * 2]; 8856 8857 /*EIID */ 8858 WORD32 i4_num_inter_wins = 0; //debug code to find stat of 8859 WORD32 i4_num_comparisions = 0; //debug code 8860 WORD32 i4_threshold_multiplier; 8861 WORD32 i4_threshold_divider; 8862 WORD32 i4_temporal_layer = 8863 ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_temporal_lyr_id; 8864 8865 /*************************************************************************/ 8866 /* points ot the search results for the blk level search (8x8/16x16) */ 8867 /*************************************************************************/ 8868 search_results_t *ps_search_results; 8869 8870 /*************************************************************************/ 8871 /* Coordinates */ 8872 /*************************************************************************/ 8873 S32 blk_x, i4_ctb_x, blk_id_in_ctb; 8874 //S32 i4_ctb_y; 8875 S32 pos_x, pos_y; 8876 S32 blk_id_in_full_ctb; 8877 S32 i4_num_srch_cands; 8878 8879 S32 blk_y; 8880 8881 /*************************************************************************/ 8882 /* Related to dimensions of block being searched and pic dimensions */ 8883 /*************************************************************************/ 8884 S32 blk_wd, blk_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic; 8885 S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb; 8886 S32 num_results_prev_layer; 8887 8888 /*************************************************************************/ 8889 /* Size of a basic unit for this layer. For non encode layers, we search */ 8890 /* in block sizes of 8x8. For encode layers, though we search 16x16s the */ 8891 /* basic unit size is the ctb size. */ 8892 /*************************************************************************/ 8893 S32 unit_size; 8894 8895 /*************************************************************************/ 8896 /* Pointers to context in current and coarser layers */ 8897 /*************************************************************************/ 8898 layer_ctxt_t *ps_curr_layer, *ps_coarse_layer; 8899 8900 /*************************************************************************/ 8901 /* to store mv range per blk, and picture limit, allowed search range */ 8902 /* range prms in hpel and qpel units as well */ 8903 /*************************************************************************/ 8904 range_prms_t s_range_prms_inp, s_range_prms_rec; 8905 range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF]; 8906 /*************************************************************************/ 8907 /* These variables are used to track number of references at different */ 8908 /* stages of ME. */ 8909 /*************************************************************************/ 8910 S32 i4_num_ref_fpel, i4_num_ref_before_merge; 8911 S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer; 8912 S32 lambda_inp = ps_refine_prms->lambda_inp; 8913 8914 /*************************************************************************/ 8915 /* When a layer is implicit, it means that it searches on 1 or 2 ref idx */ 8916 /* Explicit means it searches on all active ref idx. */ 8917 /*************************************************************************/ 8918 S32 curr_layer_implicit, prev_layer_implicit; 8919 8920 /*************************************************************************/ 8921 /* Variables for loop counts */ 8922 /*************************************************************************/ 8923 S32 id; 8924 S08 i1_ref_idx; 8925 8926 /*************************************************************************/ 8927 /* Input pointer and stride */ 8928 /*************************************************************************/ 8929 U08 *pu1_inp; 8930 S32 i4_inp_stride; 8931 8932 S32 end_of_frame; 8933 8934 S32 num_sync_units_in_row; 8935 8936 PF_HME_PROJECT_COLOC_CANDT_FXN pf_hme_project_coloc_candt; 8937 ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1); 8938 8939 /*************************************************************************/ 8940 /* Pointers to current and coarse layer are needed for projection */ 8941 /* Pointer to prev layer are needed for other candts like coloc */ 8942 /*************************************************************************/ 8943 ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id]; 8944 8945 ps_coarse_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id + 1]; 8946 8947 num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref; 8948 8949 /* Function pointer is selected based on the C vc X86 macro */ 8950 8951 fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list) 8952 ->pf_get_wt_inp_8x8; 8953 8954 i4_inp_stride = ps_curr_layer->i4_inp_stride; 8955 i4_pic_wd = ps_curr_layer->i4_wd; 8956 i4_pic_ht = ps_curr_layer->i4_ht; 8957 e_search_complexity = ps_refine_prms->e_search_complexity; 8958 8959 end_of_frame = 0; 8960 8961 /* If the previous layer is non-encode layer, then use dyadic projection */ 8962 if(0 == ps_ctxt->u1_encode[ps_refine_prms->i4_layer_id + 1]) 8963 pf_hme_project_coloc_candt = hme_project_coloc_candt_dyadic; 8964 else 8965 pf_hme_project_coloc_candt = hme_project_coloc_candt; 8966 8967 /* This points to all the initial candts */ 8968 ps_search_candts = &as_search_candts[0]; 8969 8970 { 8971 e_search_blk_size = BLK_8x8; 8972 blk_wd = blk_ht = 8; 8973 blk_size_shift = 3; 8974 s_mv_update_prms.i4_shift = 0; 8975 /*********************************************************************/ 8976 /* In case we do not encode this layer, we search 8x8 with or without*/ 8977 /* enable 4x4 SAD. */ 8978 /*********************************************************************/ 8979 { 8980 S32 i4_mask = (ENABLE_2Nx2N); 8981 8982 e_result_blk_size = BLK_8x8; 8983 if(ps_refine_prms->i4_enable_4x4_part) 8984 { 8985 i4_mask |= (ENABLE_NxN); 8986 e_result_blk_size = BLK_4x4; 8987 s_mv_update_prms.i4_shift = 1; 8988 } 8989 8990 s_search_prms_blk.i4_part_mask = i4_mask; 8991 } 8992 8993 unit_size = blk_wd; 8994 s_search_prms_blk.i4_inp_stride = unit_size; 8995 } 8996 8997 /* This is required to properly update the layer mv bank */ 8998 s_mv_update_prms.e_search_blk_size = e_search_blk_size; 8999 s_search_prms_blk.e_blk_size = e_search_blk_size; 9000 9001 /*************************************************************************/ 9002 /* If current layer is explicit, then the number of ref frames are to */ 9003 /* be same as previous layer. Else it will be 2 */ 9004 /*************************************************************************/ 9005 i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref; 9006 if(ps_refine_prms->explicit_ref) 9007 { 9008 curr_layer_implicit = 0; 9009 i4_num_ref_fpel = i4_num_ref_prev_layer; 9010 /* 100578 : Using same mv cost fun. for all presets. */ 9011 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_refine; 9012 } 9013 else 9014 { 9015 i4_num_ref_fpel = 2; 9016 curr_layer_implicit = 1; 9017 { 9018 if(ME_MEDIUM_SPEED > e_me_quality_presets) 9019 { 9020 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit; 9021 } 9022 else 9023 { 9024 #if USE_MODIFIED == 1 9025 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified; 9026 #else 9027 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed; 9028 #endif 9029 } 9030 } 9031 } 9032 9033 i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer); 9034 if(ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type == 9035 IV_IDR_FRAME || 9036 ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type == IV_I_FRAME) 9037 { 9038 i4_num_ref_fpel = 1; 9039 } 9040 if(i4_num_ref_prev_layer <= 2) 9041 { 9042 prev_layer_implicit = 1; 9043 curr_layer_implicit = 1; 9044 i4_num_ref_each_dir = 1; 9045 } 9046 else 9047 { 9048 /* It is assumed that we have equal number of references in each dir */ 9049 //ASSERT(!(i4_num_ref_prev_layer & 1)); 9050 prev_layer_implicit = 0; 9051 i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1; 9052 } 9053 s_mv_update_prms.i4_num_ref = i4_num_ref_fpel; 9054 s_mv_update_prms.i4_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0; 9055 s_mv_update_prms.i4_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1; 9056 9057 /* this can be kept to 1 or 2 */ 9058 i4_num_ref_before_merge = 2; 9059 i4_num_ref_before_merge = MIN(i4_num_ref_before_merge, i4_num_ref_fpel); 9060 9061 /* Set up place holders to hold the search nodes of each initial candt */ 9062 for(i = 0; i < MAX_INIT_CANDTS; i++) 9063 { 9064 ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i]; 9065 INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0); 9066 } 9067 9068 /* redundant, but doing it here since it is used in pred ctxt init */ 9069 ps_candt_zeromv = ps_search_candts[0].ps_search_node; 9070 for(i = 0; i < 3; i++) 9071 { 9072 search_node_t *ps_search_node; 9073 ps_search_node = &as_left_neighbours[i]; 9074 INIT_SEARCH_NODE(ps_search_node, 0); 9075 ps_search_node = &as_top_neighbours[i]; 9076 INIT_SEARCH_NODE(ps_search_node, 0); 9077 } 9078 9079 INIT_SEARCH_NODE(&as_top_neighbours[3], 0); 9080 /* bottom left node always not available for the blk being searched */ 9081 as_left_neighbours[2].u1_is_avail = 0; 9082 /*************************************************************************/ 9083 /* Initialize all the search results structure here. We update all the */ 9084 /* search results to default values, and configure things like blk sizes */ 9085 /*************************************************************************/ 9086 if(ps_refine_prms->i4_encode == 0) 9087 { 9088 S32 pred_lx; 9089 search_results_t *ps_search_results; 9090 9091 ps_search_results = &ps_ctxt->s_search_results_8x8; 9092 hme_init_search_results( 9093 ps_search_results, 9094 i4_num_ref_fpel, 9095 ps_refine_prms->i4_num_fpel_results, 9096 ps_refine_prms->i4_num_results_per_part, 9097 e_search_blk_size, 9098 0, 9099 0, 9100 &ps_ctxt->au1_is_past[0]); 9101 for(pred_lx = 0; pred_lx < 2; pred_lx++) 9102 { 9103 hme_init_pred_ctxt_no_encode( 9104 &ps_search_results->as_pred_ctxt[pred_lx], 9105 ps_search_results, 9106 &as_top_neighbours[0], 9107 &as_left_neighbours[0], 9108 &ps_candt_prj_coloc[0], 9109 ps_candt_zeromv, 9110 ps_candt_zeromv, 9111 pred_lx, 9112 lambda_inp, 9113 ps_refine_prms->lambda_q_shift, 9114 &ps_ctxt->apu1_ref_bits_tlu_lc[0], 9115 &ps_ctxt->ai2_ref_scf[0]); 9116 } 9117 } 9118 9119 /*********************************************************************/ 9120 /* Initialize the dyn. search range params. for each reference index */ 9121 /* in current layer ctxt */ 9122 /*********************************************************************/ 9123 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ 9124 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) 9125 { 9126 WORD32 ref_ctr; 9127 9128 for(ref_ctr = 0; ref_ctr < s_mv_update_prms.i4_num_ref; ref_ctr++) 9129 { 9130 INIT_DYN_SEARCH_PRMS( 9131 &ps_ctxt->s_coarse_dyn_range_prms 9132 .as_dyn_range_prms[ps_refine_prms->i4_layer_id][ref_ctr], 9133 ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]); 9134 } 9135 } 9136 9137 /* Next set up initial candidates according to a given set of rules. */ 9138 /* The number of initial candidates affects the quality of ME in the */ 9139 /* case of motion with multiple degrees of freedom. In case of simple */ 9140 /* translational motion, a current and a few causal and non causal */ 9141 /* candts would suffice. More candidates help to cover more complex */ 9142 /* cases like partitions, rotation/zoom, occlusion in/out, fine motion */ 9143 /* where multiple ref helps etc. */ 9144 /* The candidate choice also depends on the following parameters. */ 9145 /* e_search_complexity: SRCH_CX_LOW, SRCH_CX_MED, SRCH_CX_HIGH */ 9146 /* Whether we encode or not, and the type of search across reference */ 9147 /* i.e. the previous layer may have been explicit/implicit and curr */ 9148 /* layer may be explicit/implicit */ 9149 9150 /* 0, 0, L, T, projected coloc best always presnt by default */ 9151 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(ZERO_MV, e_me_quality_presets); 9152 ps_candt_zeromv = ps_search_candts[id].ps_search_node; 9153 ps_search_candts[id].u1_num_steps_refine = 0; 9154 ps_candt_zeromv->s_mv.i2_mvx = 0; 9155 ps_candt_zeromv->s_mv.i2_mvy = 0; 9156 9157 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(SPATIAL_LEFT0, e_me_quality_presets); 9158 ps_candt_l = ps_search_candts[id].ps_search_node; 9159 ps_search_candts[id].u1_num_steps_refine = 0; 9160 9161 /* Even in ME_HIGH_SPEED mode, in layer 0, blocks */ 9162 /* not at the CTB boundary use the causal T and */ 9163 /* not the projected T, although the candidate is */ 9164 /* still pointed to by ps_candt_prj_t[0] */ 9165 if(ME_MEDIUM_SPEED <= e_me_quality_presets) 9166 { 9167 /* Using Projected top to eliminate sync */ 9168 id = hme_decide_search_candidate_priority_in_l1_and_l2_me( 9169 PROJECTED_TOP0, e_me_quality_presets); 9170 ps_candt_prj_t[0] = ps_search_candts[id].ps_search_node; 9171 ps_search_candts[id].u1_num_steps_refine = 1; 9172 } 9173 else 9174 { 9175 id = hme_decide_search_candidate_priority_in_l1_and_l2_me( 9176 SPATIAL_TOP0, e_me_quality_presets); 9177 ps_candt_t = ps_search_candts[id].ps_search_node; 9178 ps_search_candts[id].u1_num_steps_refine = 0; 9179 } 9180 9181 id = hme_decide_search_candidate_priority_in_l1_and_l2_me( 9182 PROJECTED_COLOC0, e_me_quality_presets); 9183 ps_candt_prj_coloc[0] = ps_search_candts[id].ps_search_node; 9184 ps_search_candts[id].u1_num_steps_refine = 1; 9185 9186 id = hme_decide_search_candidate_priority_in_l1_and_l2_me( 9187 PROJECTED_COLOC1, e_me_quality_presets); 9188 ps_candt_prj_coloc[1] = ps_search_candts[id].ps_search_node; 9189 ps_search_candts[id].u1_num_steps_refine = 1; 9190 9191 if(ME_MEDIUM_SPEED <= e_me_quality_presets) 9192 { 9193 id = hme_decide_search_candidate_priority_in_l1_and_l2_me( 9194 PROJECTED_TOP_RIGHT0, e_me_quality_presets); 9195 ps_candt_prj_tr[0] = ps_search_candts[id].ps_search_node; 9196 ps_search_candts[id].u1_num_steps_refine = 1; 9197 9198 id = hme_decide_search_candidate_priority_in_l1_and_l2_me( 9199 PROJECTED_TOP_LEFT0, e_me_quality_presets); 9200 ps_candt_prj_tl[0] = ps_search_candts[id].ps_search_node; 9201 ps_search_candts[id].u1_num_steps_refine = 1; 9202 } 9203 else 9204 { 9205 id = hme_decide_search_candidate_priority_in_l1_and_l2_me( 9206 SPATIAL_TOP_RIGHT0, e_me_quality_presets); 9207 ps_candt_tr = ps_search_candts[id].ps_search_node; 9208 ps_search_candts[id].u1_num_steps_refine = 0; 9209 9210 id = hme_decide_search_candidate_priority_in_l1_and_l2_me( 9211 SPATIAL_TOP_LEFT0, e_me_quality_presets); 9212 ps_candt_tl = ps_search_candts[id].ps_search_node; 9213 ps_search_candts[id].u1_num_steps_refine = 0; 9214 } 9215 9216 id = hme_decide_search_candidate_priority_in_l1_and_l2_me( 9217 PROJECTED_RIGHT0, e_me_quality_presets); 9218 ps_candt_prj_r[0] = ps_search_candts[id].ps_search_node; 9219 ps_search_candts[id].u1_num_steps_refine = 1; 9220 9221 id = hme_decide_search_candidate_priority_in_l1_and_l2_me( 9222 PROJECTED_BOTTOM0, e_me_quality_presets); 9223 ps_candt_prj_b[0] = ps_search_candts[id].ps_search_node; 9224 ps_search_candts[id].u1_num_steps_refine = 1; 9225 9226 id = hme_decide_search_candidate_priority_in_l1_and_l2_me( 9227 PROJECTED_BOTTOM_RIGHT0, e_me_quality_presets); 9228 ps_candt_prj_br[0] = ps_search_candts[id].ps_search_node; 9229 ps_search_candts[id].u1_num_steps_refine = 1; 9230 9231 id = hme_decide_search_candidate_priority_in_l1_and_l2_me( 9232 PROJECTED_BOTTOM_LEFT0, e_me_quality_presets); 9233 ps_candt_prj_bl[0] = ps_search_candts[id].ps_search_node; 9234 ps_search_candts[id].u1_num_steps_refine = 1; 9235 9236 id = hme_decide_search_candidate_priority_in_l1_and_l2_me( 9237 PROJECTED_RIGHT1, e_me_quality_presets); 9238 ps_candt_prj_r[1] = ps_search_candts[id].ps_search_node; 9239 ps_search_candts[id].u1_num_steps_refine = 1; 9240 9241 id = hme_decide_search_candidate_priority_in_l1_and_l2_me( 9242 PROJECTED_BOTTOM1, e_me_quality_presets); 9243 ps_candt_prj_b[1] = ps_search_candts[id].ps_search_node; 9244 ps_search_candts[id].u1_num_steps_refine = 1; 9245 9246 id = hme_decide_search_candidate_priority_in_l1_and_l2_me( 9247 PROJECTED_BOTTOM_RIGHT1, e_me_quality_presets); 9248 ps_candt_prj_br[1] = ps_search_candts[id].ps_search_node; 9249 ps_search_candts[id].u1_num_steps_refine = 1; 9250 9251 id = hme_decide_search_candidate_priority_in_l1_and_l2_me( 9252 PROJECTED_BOTTOM_LEFT1, e_me_quality_presets); 9253 ps_candt_prj_bl[1] = ps_search_candts[id].ps_search_node; 9254 ps_search_candts[id].u1_num_steps_refine = 1; 9255 9256 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(PROJECTED_TOP1, e_me_quality_presets); 9257 ps_candt_prj_t[1] = ps_search_candts[id].ps_search_node; 9258 ps_search_candts[id].u1_num_steps_refine = 1; 9259 9260 id = hme_decide_search_candidate_priority_in_l1_and_l2_me( 9261 PROJECTED_TOP_RIGHT1, e_me_quality_presets); 9262 ps_candt_prj_tr[1] = ps_search_candts[id].ps_search_node; 9263 ps_search_candts[id].u1_num_steps_refine = 1; 9264 9265 id = hme_decide_search_candidate_priority_in_l1_and_l2_me( 9266 PROJECTED_TOP_LEFT1, e_me_quality_presets); 9267 ps_candt_prj_tl[1] = ps_search_candts[id].ps_search_node; 9268 ps_search_candts[id].u1_num_steps_refine = 1; 9269 9270 /*************************************************************************/ 9271 /* Now that the candidates have been ordered, to choose the right number */ 9272 /* of initial candidates. */ 9273 /*************************************************************************/ 9274 if(curr_layer_implicit && !prev_layer_implicit) 9275 { 9276 if(e_search_complexity == SEARCH_CX_LOW) 9277 num_init_candts = 7; 9278 else if(e_search_complexity == SEARCH_CX_MED) 9279 num_init_candts = 13; 9280 else if(e_search_complexity == SEARCH_CX_HIGH) 9281 num_init_candts = 18; 9282 else 9283 ASSERT(0); 9284 } 9285 else 9286 { 9287 if(e_search_complexity == SEARCH_CX_LOW) 9288 num_init_candts = 5; 9289 else if(e_search_complexity == SEARCH_CX_MED) 9290 num_init_candts = 11; 9291 else if(e_search_complexity == SEARCH_CX_HIGH) 9292 num_init_candts = 16; 9293 else 9294 ASSERT(0); 9295 } 9296 9297 if(ME_XTREME_SPEED_25 == e_me_quality_presets) 9298 { 9299 num_init_candts = NUM_INIT_SEARCH_CANDS_IN_L1_AND_L2_ME_IN_XS25; 9300 } 9301 9302 /*************************************************************************/ 9303 /* The following search parameters are fixed throughout the search across*/ 9304 /* all blks. So these are configured outside processing loop */ 9305 /*************************************************************************/ 9306 s_search_prms_blk.i4_num_init_candts = num_init_candts; 9307 s_search_prms_blk.i4_start_step = 1; 9308 s_search_prms_blk.i4_use_satd = 0; 9309 s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel; 9310 /* we use recon only for encoded layers, otherwise it is not available */ 9311 s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel; 9312 9313 s_search_prms_blk.ps_search_candts = ps_search_candts; 9314 /* We use the same mv_range for all ref. pic. So assign to member 0 */ 9315 if(s_search_prms_blk.i4_use_rec) 9316 s_search_prms_blk.aps_mv_range[0] = &s_range_prms_rec; 9317 else 9318 s_search_prms_blk.aps_mv_range[0] = &s_range_prms_inp; 9319 /*************************************************************************/ 9320 /* Initialize coordinates. Meaning as follows */ 9321 /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks */ 9322 /* blk_y : same as above, y coord. */ 9323 /* num_blks_in_this_ctb : number of blks in this given ctb that starts */ 9324 /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries. */ 9325 /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left */ 9326 /* corner of the picture. Always multiple of 64. */ 9327 /* blk_id_in_ctb : encode order id of the blk in the ctb. */ 9328 /*************************************************************************/ 9329 blk_y = 0; 9330 blk_id_in_ctb = 0; 9331 9332 GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic); 9333 9334 /* Get the number of sync units in a row based on encode/non enocde layer */ 9335 num_sync_units_in_row = num_blks_in_row; 9336 9337 /*************************************************************************/ 9338 /* Picture limit on all 4 sides. This will be used to set mv limits for */ 9339 /* every block given its coordinate. Note thsi assumes that the min amt */ 9340 /* of padding to right of pic is equal to the blk size. If we go all the */ 9341 /* way upto 64x64, then the min padding on right size of picture should */ 9342 /* be 64, and also on bottom side of picture. */ 9343 /*************************************************************************/ 9344 SET_PIC_LIMIT( 9345 s_pic_limit_inp, 9346 ps_curr_layer->i4_pad_x_inp, 9347 ps_curr_layer->i4_pad_y_inp, 9348 ps_curr_layer->i4_wd, 9349 ps_curr_layer->i4_ht, 9350 s_search_prms_blk.i4_num_steps_post_refine); 9351 9352 SET_PIC_LIMIT( 9353 s_pic_limit_rec, 9354 ps_curr_layer->i4_pad_x_rec, 9355 ps_curr_layer->i4_pad_y_rec, 9356 ps_curr_layer->i4_wd, 9357 ps_curr_layer->i4_ht, 9358 s_search_prms_blk.i4_num_steps_post_refine); 9359 9360 /*************************************************************************/ 9361 /* set the MV limit per ref. pic. */ 9362 /* - P pic. : Based on the config params. */ 9363 /* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */ 9364 /*************************************************************************/ 9365 { 9366 WORD32 ref_ctr; 9367 /* Only for B/b pic. */ 9368 if(1 == ps_ctxt->s_frm_prms.bidir_enabled) 9369 { 9370 WORD16 i2_mv_y_per_poc, i2_max_mv_y; 9371 WORD32 cur_poc, ref_poc, abs_poc_diff; 9372 9373 cur_poc = ps_ctxt->i4_curr_poc; 9374 9375 /* Get abs MAX for symmetric search */ 9376 i2_mv_y_per_poc = MAX( 9377 ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[ps_refine_prms->i4_layer_id], 9378 (ABS(ps_ctxt->s_coarse_dyn_range_prms 9379 .i2_dyn_min_y_per_poc[ps_refine_prms->i4_layer_id]))); 9380 9381 for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++) 9382 { 9383 ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]; 9384 abs_poc_diff = ABS((cur_poc - ref_poc)); 9385 /* Get the cur. max MV based on POC distance */ 9386 i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff; 9387 i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y); 9388 9389 as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x; 9390 as_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y; 9391 as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x; 9392 as_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y; 9393 } 9394 } 9395 else 9396 { 9397 /* Set the Config. File Params for P pic. */ 9398 for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++) 9399 { 9400 as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x; 9401 as_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y; 9402 as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x; 9403 as_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y; 9404 } 9405 } 9406 } 9407 9408 /* EIID: Calculate threshold based on quality preset and/or temporal layers */ 9409 if(e_me_quality_presets == ME_MEDIUM_SPEED) 9410 { 9411 i4_threshold_multiplier = 1; 9412 i4_threshold_divider = 4; 9413 } 9414 else if(e_me_quality_presets == ME_HIGH_SPEED) 9415 { 9416 i4_threshold_multiplier = 1; 9417 i4_threshold_divider = 2; 9418 } 9419 else if((e_me_quality_presets == ME_XTREME_SPEED) || (e_me_quality_presets == ME_XTREME_SPEED_25)) 9420 { 9421 #if OLD_XTREME_SPEED 9422 /* Hard coding the temporal ID value to 1, if it is older xtreme speed */ 9423 i4_temporal_layer = 1; 9424 #endif 9425 if(i4_temporal_layer == 0) 9426 { 9427 i4_threshold_multiplier = 3; 9428 i4_threshold_divider = 4; 9429 } 9430 else if(i4_temporal_layer == 1) 9431 { 9432 i4_threshold_multiplier = 3; 9433 i4_threshold_divider = 4; 9434 } 9435 else if(i4_temporal_layer == 2) 9436 { 9437 i4_threshold_multiplier = 1; 9438 i4_threshold_divider = 1; 9439 } 9440 else 9441 { 9442 i4_threshold_multiplier = 5; 9443 i4_threshold_divider = 4; 9444 } 9445 } 9446 else if(e_me_quality_presets == ME_HIGH_QUALITY) 9447 { 9448 i4_threshold_multiplier = 1; 9449 i4_threshold_divider = 1; 9450 } 9451 9452 /*************************************************************************/ 9453 /*************************************************************************/ 9454 /*************************************************************************/ 9455 /* START OF THE CORE LOOP */ 9456 /* If Encode is 0, then we just loop over each blk */ 9457 /*************************************************************************/ 9458 /*************************************************************************/ 9459 /*************************************************************************/ 9460 while(0 == end_of_frame) 9461 { 9462 job_queue_t *ps_job; 9463 ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_row; //EIID 9464 WORD32 i4_ctb_row_ctr; //counter to calculate CTB row counter. It's (row_ctr /4) 9465 WORD32 i4_num_ctbs_in_row = (num_blks_in_row + 3) / 4; //calculations verified for L1 only 9466 //+3 to get ceil values when divided by 4 9467 WORD32 i4_num_4x4_blocks_in_ctb_at_l1 = 9468 8 * 8; //considering CTB size 32x32 at L1. hardcoded for now 9469 //if there is variable for ctb size use that and this variable can be derived 9470 WORD32 offset_val, check_dep_pos, set_dep_pos; 9471 void *pv_hme_dep_mngr; 9472 ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row; 9473 9474 /* Get the current layer HME Dep Mngr */ 9475 /* Note : Use layer_id - 1 in HME layers */ 9476 9477 pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_refine_prms->i4_layer_id - 1]; 9478 9479 /* Get the current row from the job queue */ 9480 ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job( 9481 ps_multi_thrd_ctxt, lyr_job_type, 1, i4_ping_pong); 9482 9483 /* If all rows are done, set the end of process flag to 1, */ 9484 /* and the current row to -1 */ 9485 if(NULL == ps_job) 9486 { 9487 blk_y = -1; 9488 end_of_frame = 1; 9489 9490 continue; 9491 } 9492 9493 if(1 == ps_ctxt->s_frm_prms.is_i_pic) 9494 { 9495 /* set the output dependency of current row */ 9496 ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong); 9497 continue; 9498 } 9499 9500 blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no; 9501 blk_x = 0; 9502 i4_ctb_x = 0; 9503 9504 /* wait for Corresponding Pre intra Job to be completed */ 9505 if(1 == ps_refine_prms->i4_layer_id) 9506 { 9507 volatile UWORD32 i4_l1_done; 9508 volatile UWORD32 *pi4_l1_done; 9509 pi4_l1_done = (volatile UWORD32 *)&ps_multi_thrd_ctxt 9510 ->aai4_l1_pre_intra_done[i4_ping_pong][blk_y >> 2]; 9511 i4_l1_done = *pi4_l1_done; 9512 while(!i4_l1_done) 9513 { 9514 i4_l1_done = *pi4_l1_done; 9515 } 9516 } 9517 /* Set Variables for Dep. Checking and Setting */ 9518 set_dep_pos = blk_y + 1; 9519 if(blk_y > 0) 9520 { 9521 offset_val = 2; 9522 check_dep_pos = blk_y - 1; 9523 } 9524 else 9525 { 9526 /* First row should run without waiting */ 9527 offset_val = -1; 9528 check_dep_pos = 0; 9529 } 9530 9531 /* EIID: calculate ed_blk_ctxt pointer for current row */ 9532 /* valid for only layer-1. not varified and used for other layers */ 9533 i4_ctb_row_ctr = blk_y / 4; 9534 ps_ed_blk_ctxt_curr_row = 9535 ps_ctxt->ps_ed_blk + (i4_ctb_row_ctr * i4_num_ctbs_in_row * 9536 i4_num_4x4_blocks_in_ctb_at_l1); //valid for L1 only 9537 ps_ed_ctb_l1_row = ps_ctxt->ps_ed_ctb_l1 + (i4_ctb_row_ctr * i4_num_ctbs_in_row); 9538 9539 /* if non-encode layer then i4_ctb_x will be same as blk_x */ 9540 /* loop over all the units is a row */ 9541 for(; i4_ctb_x < num_sync_units_in_row; i4_ctb_x++) 9542 { 9543 ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_ctb; //EIDD 9544 ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_curr; 9545 WORD32 i4_ctb_blk_ctr = i4_ctb_x / 4; 9546 9547 /* Wait till top row block is processed */ 9548 /* Currently checking till top right block*/ 9549 9550 /* Disabled since all candidates, except for */ 9551 /* L and C, are projected from the coarser layer, */ 9552 /* only in ME_HIGH_SPEED mode */ 9553 if((ME_MEDIUM_SPEED > e_me_quality_presets)) 9554 { 9555 if(i4_ctb_x < (num_sync_units_in_row - 1)) 9556 { 9557 ihevce_dmgr_chk_row_row_sync( 9558 pv_hme_dep_mngr, 9559 i4_ctb_x, 9560 offset_val, 9561 check_dep_pos, 9562 0, /* Col Tile No. : Not supported in PreEnc*/ 9563 ps_ctxt->thrd_id); 9564 } 9565 } 9566 9567 { 9568 /* for non encoder layer only one block is processed */ 9569 num_blks_in_this_ctb = 1; 9570 } 9571 9572 /* EIID: derive ed_ctxt ptr for current CTB */ 9573 ps_ed_blk_ctxt_curr_ctb = 9574 ps_ed_blk_ctxt_curr_row + 9575 (i4_ctb_blk_ctr * 9576 i4_num_4x4_blocks_in_ctb_at_l1); //currently valid for l1 layer only 9577 ps_ed_ctb_l1_curr = ps_ed_ctb_l1_row + i4_ctb_blk_ctr; 9578 9579 /* loop over all the blocks in CTB will always be 1 */ 9580 for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++) 9581 { 9582 { 9583 /* non encode layer */ 9584 blk_x = i4_ctb_x; 9585 blk_id_in_full_ctb = 0; 9586 s_search_prms_blk.i4_cu_x_off = s_search_prms_blk.i4_cu_y_off = 0; 9587 } 9588 9589 /* get the current input blk point */ 9590 pos_x = blk_x << blk_size_shift; 9591 pos_y = blk_y << blk_size_shift; 9592 pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride); 9593 9594 /*********************************************************************/ 9595 /* replicate the inp buffer at blk or ctb level for each ref id, */ 9596 /* Instead of searching with wk * ref(k), we search with Ik = I / wk */ 9597 /* thereby avoiding a bloat up of memory. If we did all references */ 9598 /* weighted pred, we will end up with a duplicate copy of each ref */ 9599 /* at each layer, since we need to preserve the original reference. */ 9600 /* ToDo: Need to observe performance with this mechanism and compare */ 9601 /* with case where ref is weighted. */ 9602 /*********************************************************************/ 9603 if(blk_id_in_ctb == 0) 9604 { 9605 fp_get_wt_inp( 9606 ps_curr_layer, 9607 &ps_ctxt->s_wt_pred, 9608 unit_size, 9609 pos_x, 9610 pos_y, 9611 unit_size, 9612 ps_ctxt->num_ref_future + ps_ctxt->num_ref_past, 9613 ps_ctxt->i4_wt_pred_enable_flag); 9614 } 9615 9616 s_search_prms_blk.i4_x_off = blk_x << blk_size_shift; 9617 s_search_prms_blk.i4_y_off = blk_y << blk_size_shift; 9618 /* Select search results from a suitable search result in the context */ 9619 { 9620 ps_search_results = &ps_ctxt->s_search_results_8x8; 9621 } 9622 9623 s_search_prms_blk.ps_search_results = ps_search_results; 9624 9625 /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */ 9626 hme_reset_search_results( 9627 ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL); 9628 9629 /* Loop across different Ref IDx */ 9630 for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref_fpel; i1_ref_idx++) 9631 { 9632 S32 next_blk_offset = (e_search_blk_size == BLK_16x16) ? 22 : 12; 9633 S32 prev_blk_offset = 6; 9634 S32 resultid; 9635 9636 /*********************************************************************/ 9637 /* For every blk in the picture, the search range needs to be derived*/ 9638 /* Any blk can have any mv, but practical search constraints are */ 9639 /* imposed by the picture boundary and amt of padding. */ 9640 /*********************************************************************/ 9641 /* MV limit is different based on ref. PIC */ 9642 hme_derive_search_range( 9643 &s_range_prms_inp, 9644 &s_pic_limit_inp, 9645 &as_mv_limit[i1_ref_idx], 9646 pos_x, 9647 pos_y, 9648 blk_wd, 9649 blk_ht); 9650 hme_derive_search_range( 9651 &s_range_prms_rec, 9652 &s_pic_limit_rec, 9653 &as_mv_limit[i1_ref_idx], 9654 pos_x, 9655 pos_y, 9656 blk_wd, 9657 blk_ht); 9658 9659 s_search_prms_blk.i1_ref_idx = i1_ref_idx; 9660 ps_candt_zeromv->i1_ref_idx = i1_ref_idx; 9661 9662 i4_num_srch_cands = 1; 9663 9664 if(1 != ps_refine_prms->i4_layer_id) 9665 { 9666 S32 x, y; 9667 x = gau1_encode_to_raster_x[blk_id_in_full_ctb]; 9668 y = gau1_encode_to_raster_y[blk_id_in_full_ctb]; 9669 9670 if(ME_MEDIUM_SPEED > e_me_quality_presets) 9671 { 9672 hme_get_spatial_candt( 9673 ps_curr_layer, 9674 e_search_blk_size, 9675 blk_x, 9676 blk_y, 9677 i1_ref_idx, 9678 &as_top_neighbours[0], 9679 &as_left_neighbours[0], 9680 0, 9681 ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1), 9682 0, 9683 ps_refine_prms->i4_encode); 9684 9685 *ps_candt_tr = as_top_neighbours[3]; 9686 *ps_candt_t = as_top_neighbours[1]; 9687 *ps_candt_tl = as_top_neighbours[0]; 9688 i4_num_srch_cands += 3; 9689 } 9690 else 9691 { 9692 layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank; 9693 S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size]; 9694 S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size]; 9695 search_node_t *ps_search_node; 9696 S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y; 9697 hme_mv_t *ps_mv, *ps_mv_base; 9698 S08 *pi1_ref_idx, *pi1_ref_idx_base; 9699 S32 jump = 1, mvs_in_blk, mvs_in_row; 9700 S32 shift = (ps_refine_prms->i4_encode ? 2 : 0); 9701 9702 if(i4_blk_size1 != i4_blk_size2) 9703 { 9704 blk_x_temp <<= 1; 9705 blk_y_temp <<= 1; 9706 jump = 2; 9707 if((i4_blk_size1 << 2) == i4_blk_size2) 9708 { 9709 blk_x_temp <<= 1; 9710 blk_y_temp <<= 1; 9711 jump = 4; 9712 } 9713 } 9714 9715 mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk; 9716 mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row; 9717 9718 /* Adjust teh blk coord to point to top left locn */ 9719 blk_x_temp -= 1; 9720 blk_y_temp -= 1; 9721 9722 /* Pick up the mvs from the location */ 9723 i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk); 9724 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp); 9725 9726 ps_mv = ps_layer_mvbank->ps_mv + i4_offset; 9727 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; 9728 9729 ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref); 9730 pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref); 9731 9732 ps_mv_base = ps_mv; 9733 pi1_ref_idx_base = pi1_ref_idx; 9734 9735 ps_search_node = &as_left_neighbours[0]; 9736 ps_mv = ps_mv_base + mvs_in_row; 9737 pi1_ref_idx = pi1_ref_idx_base + mvs_in_row; 9738 COPY_MV_TO_SEARCH_NODE( 9739 ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift); 9740 9741 i4_num_srch_cands++; 9742 } 9743 } 9744 else 9745 { 9746 S32 x, y; 9747 x = gau1_encode_to_raster_x[blk_id_in_full_ctb]; 9748 y = gau1_encode_to_raster_y[blk_id_in_full_ctb]; 9749 9750 if(ME_MEDIUM_SPEED > e_me_quality_presets) 9751 { 9752 hme_get_spatial_candt_in_l1_me( 9753 ps_curr_layer, 9754 e_search_blk_size, 9755 blk_x, 9756 blk_y, 9757 i1_ref_idx, 9758 !ps_search_results->pu1_is_past[i1_ref_idx], 9759 &as_top_neighbours[0], 9760 &as_left_neighbours[0], 9761 0, 9762 ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1), 9763 0, 9764 ps_ctxt->s_frm_prms.u1_num_active_ref_l0, 9765 ps_ctxt->s_frm_prms.u1_num_active_ref_l1); 9766 9767 *ps_candt_tr = as_top_neighbours[3]; 9768 *ps_candt_t = as_top_neighbours[1]; 9769 *ps_candt_tl = as_top_neighbours[0]; 9770 9771 i4_num_srch_cands += 3; 9772 } 9773 else 9774 { 9775 layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank; 9776 S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size]; 9777 S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size]; 9778 S32 i4_mv_pos_in_implicit_array; 9779 search_node_t *ps_search_node; 9780 S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y; 9781 hme_mv_t *ps_mv, *ps_mv_base; 9782 S08 *pi1_ref_idx, *pi1_ref_idx_base; 9783 S32 jump = 1, mvs_in_blk, mvs_in_row; 9784 S32 shift = (ps_refine_prms->i4_encode ? 2 : 0); 9785 U08 u1_pred_dir = !ps_search_results->pu1_is_past[i1_ref_idx]; 9786 S32 i4_num_results_in_given_dir = 9787 ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * 9788 ps_ctxt->s_frm_prms.u1_num_active_ref_l1) 9789 : (ps_layer_mvbank->i4_num_mvs_per_ref * 9790 ps_ctxt->s_frm_prms.u1_num_active_ref_l0)); 9791 9792 if(i4_blk_size1 != i4_blk_size2) 9793 { 9794 blk_x_temp <<= 1; 9795 blk_y_temp <<= 1; 9796 jump = 2; 9797 if((i4_blk_size1 << 2) == i4_blk_size2) 9798 { 9799 blk_x_temp <<= 1; 9800 blk_y_temp <<= 1; 9801 jump = 4; 9802 } 9803 } 9804 9805 mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk; 9806 mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row; 9807 9808 /* Adjust teh blk coord to point to top left locn */ 9809 blk_x_temp -= 1; 9810 blk_y_temp -= 1; 9811 9812 /* Pick up the mvs from the location */ 9813 i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk); 9814 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp); 9815 9816 i4_offset += 9817 ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * 9818 ps_ctxt->s_frm_prms.u1_num_active_ref_l0) 9819 : 0); 9820 9821 ps_mv = ps_layer_mvbank->ps_mv + i4_offset; 9822 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; 9823 9824 ps_mv_base = ps_mv; 9825 pi1_ref_idx_base = pi1_ref_idx; 9826 9827 { 9828 /* ps_mv and pi1_ref_idx now point to the top left locn */ 9829 ps_search_node = &as_left_neighbours[0]; 9830 ps_mv = ps_mv_base + mvs_in_row; 9831 pi1_ref_idx = pi1_ref_idx_base + mvs_in_row; 9832 9833 i4_mv_pos_in_implicit_array = 9834 hme_find_pos_of_implicitly_stored_ref_id( 9835 pi1_ref_idx, i1_ref_idx, 0, i4_num_results_in_given_dir); 9836 9837 if(-1 != i4_mv_pos_in_implicit_array) 9838 { 9839 COPY_MV_TO_SEARCH_NODE( 9840 ps_search_node, 9841 &ps_mv[i4_mv_pos_in_implicit_array], 9842 &pi1_ref_idx[i4_mv_pos_in_implicit_array], 9843 i1_ref_idx, 9844 shift); 9845 } 9846 else 9847 { 9848 ps_search_node->u1_is_avail = 0; 9849 ps_search_node->s_mv.i2_mvx = 0; 9850 ps_search_node->s_mv.i2_mvy = 0; 9851 ps_search_node->i1_ref_idx = i1_ref_idx; 9852 } 9853 9854 i4_num_srch_cands++; 9855 } 9856 } 9857 } 9858 9859 *ps_candt_l = as_left_neighbours[0]; 9860 9861 /* when 16x16 is searched in an encode layer, and the prev layer */ 9862 /* stores results for 4x4 blks, we project 5 candts corresponding */ 9863 /* to (2,2), (2,14), (14,2), 14,14) and 2nd best of (2,2) */ 9864 /* However in other cases, only 2,2 best and 2nd best reqd */ 9865 resultid = 0; 9866 pf_hme_project_coloc_candt( 9867 ps_candt_prj_coloc[0], 9868 ps_curr_layer, 9869 ps_coarse_layer, 9870 pos_x + 2, 9871 pos_y + 2, 9872 i1_ref_idx, 9873 resultid); 9874 9875 i4_num_srch_cands++; 9876 9877 resultid = 1; 9878 if(num_results_prev_layer > 1) 9879 { 9880 pf_hme_project_coloc_candt( 9881 ps_candt_prj_coloc[1], 9882 ps_curr_layer, 9883 ps_coarse_layer, 9884 pos_x + 2, 9885 pos_y + 2, 9886 i1_ref_idx, 9887 resultid); 9888 9889 i4_num_srch_cands++; 9890 } 9891 9892 resultid = 0; 9893 9894 if(ME_MEDIUM_SPEED <= e_me_quality_presets) 9895 { 9896 pf_hme_project_coloc_candt( 9897 ps_candt_prj_t[0], 9898 ps_curr_layer, 9899 ps_coarse_layer, 9900 pos_x, 9901 pos_y - prev_blk_offset, 9902 i1_ref_idx, 9903 resultid); 9904 9905 i4_num_srch_cands++; 9906 } 9907 9908 { 9909 pf_hme_project_coloc_candt( 9910 ps_candt_prj_br[0], 9911 ps_curr_layer, 9912 ps_coarse_layer, 9913 pos_x + next_blk_offset, 9914 pos_y + next_blk_offset, 9915 i1_ref_idx, 9916 resultid); 9917 pf_hme_project_coloc_candt( 9918 ps_candt_prj_bl[0], 9919 ps_curr_layer, 9920 ps_coarse_layer, 9921 pos_x - prev_blk_offset, 9922 pos_y + next_blk_offset, 9923 i1_ref_idx, 9924 resultid); 9925 pf_hme_project_coloc_candt( 9926 ps_candt_prj_r[0], 9927 ps_curr_layer, 9928 ps_coarse_layer, 9929 pos_x + next_blk_offset, 9930 pos_y, 9931 i1_ref_idx, 9932 resultid); 9933 pf_hme_project_coloc_candt( 9934 ps_candt_prj_b[0], 9935 ps_curr_layer, 9936 ps_coarse_layer, 9937 pos_x, 9938 pos_y + next_blk_offset, 9939 i1_ref_idx, 9940 resultid); 9941 9942 i4_num_srch_cands += 4; 9943 9944 if(ME_MEDIUM_SPEED <= e_me_quality_presets) 9945 { 9946 pf_hme_project_coloc_candt( 9947 ps_candt_prj_tr[0], 9948 ps_curr_layer, 9949 ps_coarse_layer, 9950 pos_x + next_blk_offset, 9951 pos_y - prev_blk_offset, 9952 i1_ref_idx, 9953 resultid); 9954 pf_hme_project_coloc_candt( 9955 ps_candt_prj_tl[0], 9956 ps_curr_layer, 9957 ps_coarse_layer, 9958 pos_x - prev_blk_offset, 9959 pos_y - prev_blk_offset, 9960 i1_ref_idx, 9961 resultid); 9962 9963 i4_num_srch_cands += 2; 9964 } 9965 } 9966 if((num_results_prev_layer > 1) && (e_search_complexity >= SEARCH_CX_MED)) 9967 { 9968 resultid = 1; 9969 pf_hme_project_coloc_candt( 9970 ps_candt_prj_br[1], 9971 ps_curr_layer, 9972 ps_coarse_layer, 9973 pos_x + next_blk_offset, 9974 pos_y + next_blk_offset, 9975 i1_ref_idx, 9976 resultid); 9977 pf_hme_project_coloc_candt( 9978 ps_candt_prj_bl[1], 9979 ps_curr_layer, 9980 ps_coarse_layer, 9981 pos_x - prev_blk_offset, 9982 pos_y + next_blk_offset, 9983 i1_ref_idx, 9984 resultid); 9985 pf_hme_project_coloc_candt( 9986 ps_candt_prj_r[1], 9987 ps_curr_layer, 9988 ps_coarse_layer, 9989 pos_x + next_blk_offset, 9990 pos_y, 9991 i1_ref_idx, 9992 resultid); 9993 pf_hme_project_coloc_candt( 9994 ps_candt_prj_b[1], 9995 ps_curr_layer, 9996 ps_coarse_layer, 9997 pos_x, 9998 pos_y + next_blk_offset, 9999 i1_ref_idx, 10000 resultid); 10001 10002 i4_num_srch_cands += 4; 10003 10004 pf_hme_project_coloc_candt( 10005 ps_candt_prj_tr[1], 10006 ps_curr_layer, 10007 ps_coarse_layer, 10008 pos_x + next_blk_offset, 10009 pos_y - prev_blk_offset, 10010 i1_ref_idx, 10011 resultid); 10012 pf_hme_project_coloc_candt( 10013 ps_candt_prj_tl[1], 10014 ps_curr_layer, 10015 ps_coarse_layer, 10016 pos_x - prev_blk_offset, 10017 pos_y - prev_blk_offset, 10018 i1_ref_idx, 10019 resultid); 10020 pf_hme_project_coloc_candt( 10021 ps_candt_prj_t[1], 10022 ps_curr_layer, 10023 ps_coarse_layer, 10024 pos_x, 10025 pos_y - prev_blk_offset, 10026 i1_ref_idx, 10027 resultid); 10028 10029 i4_num_srch_cands += 3; 10030 } 10031 10032 /* Note this block also clips the MV range for all candidates */ 10033 #ifdef _DEBUG 10034 { 10035 S32 candt; 10036 range_prms_t *ps_range_prms; 10037 10038 S32 num_ref_valid = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past; 10039 for(candt = 0; candt < i4_num_srch_cands; candt++) 10040 { 10041 search_node_t *ps_search_node; 10042 10043 ps_search_node = 10044 s_search_prms_blk.ps_search_candts[candt].ps_search_node; 10045 10046 ps_range_prms = s_search_prms_blk.aps_mv_range[0]; 10047 10048 if((ps_search_node->i1_ref_idx >= num_ref_valid) || 10049 (ps_search_node->i1_ref_idx < 0)) 10050 { 10051 ASSERT(0); 10052 } 10053 } 10054 } 10055 #endif 10056 10057 { 10058 S32 srch_cand; 10059 S32 num_unique_nodes = 0; 10060 S32 num_nodes_searched = 0; 10061 S32 num_best_cand = 0; 10062 S08 i1_grid_enable = 0; 10063 search_node_t as_best_two_proj_node[TOT_NUM_PARTS * 2]; 10064 /* has list of valid partition to search terminated by -1 */ 10065 S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1]; 10066 S32 center_x; 10067 S32 center_y; 10068 10069 /* indicates if the centre point of grid needs to be explicitly added for search */ 10070 S32 add_centre = 0; 10071 10072 memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map)); 10073 center_x = ps_candt_prj_coloc[0]->s_mv.i2_mvx; 10074 center_y = ps_candt_prj_coloc[0]->s_mv.i2_mvy; 10075 10076 for(srch_cand = 0; 10077 (srch_cand < i4_num_srch_cands) && 10078 (num_unique_nodes <= s_search_prms_blk.i4_num_init_candts); 10079 srch_cand++) 10080 { 10081 search_node_t s_search_node_temp = 10082 s_search_prms_blk.ps_search_candts[srch_cand].ps_search_node[0]; 10083 10084 s_search_node_temp.i1_ref_idx = i1_ref_idx; //TEMP FIX; 10085 10086 /* Clip the motion vectors as well here since after clipping 10087 two candidates can become same and they will be removed during deduplication */ 10088 CLIP_MV_WITHIN_RANGE( 10089 s_search_node_temp.s_mv.i2_mvx, 10090 s_search_node_temp.s_mv.i2_mvy, 10091 s_search_prms_blk.aps_mv_range[0], 10092 ps_refine_prms->i4_num_steps_fpel_refine, 10093 ps_refine_prms->i4_num_steps_hpel_refine, 10094 ps_refine_prms->i4_num_steps_qpel_refine); 10095 10096 /* PT_C */ 10097 INSERT_NEW_NODE( 10098 as_unique_search_nodes, 10099 num_unique_nodes, 10100 s_search_node_temp, 10101 0, 10102 au4_unique_node_map, 10103 center_x, 10104 center_y, 10105 1); 10106 10107 num_nodes_searched += 1; 10108 } 10109 num_unique_nodes = 10110 MIN(num_unique_nodes, s_search_prms_blk.i4_num_init_candts); 10111 10112 /* If number of candidates projected/number of candidates to be refined are more than 2, 10113 then filter out and choose the best two here */ 10114 if(num_unique_nodes >= 2) 10115 { 10116 S32 num_results; 10117 S32 cnt; 10118 S32 *pi4_valid_part_ids; 10119 s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0]; 10120 s_search_prms_blk.i4_num_search_nodes = num_unique_nodes; 10121 pi4_valid_part_ids = &ai4_valid_part_ids[0]; 10122 10123 /* pi4_valid_part_ids is updated inside */ 10124 hme_pred_search_no_encode( 10125 &s_search_prms_blk, 10126 ps_curr_layer, 10127 &ps_ctxt->s_wt_pred, 10128 pi4_valid_part_ids, 10129 1, 10130 e_me_quality_presets, 10131 i1_grid_enable, 10132 (ihevce_me_optimised_function_list_t *) 10133 ps_ctxt->pv_me_optimised_function_list 10134 10135 ); 10136 10137 num_best_cand = 0; 10138 cnt = 0; 10139 num_results = ps_search_results->u1_num_results_per_part; 10140 10141 while((id = pi4_valid_part_ids[cnt++]) >= 0) 10142 { 10143 num_results = 10144 MIN(ps_refine_prms->pu1_num_best_results[id], num_results); 10145 10146 for(i = 0; i < num_results; i++) 10147 { 10148 search_node_t s_search_node_temp; 10149 s_search_node_temp = 10150 *(ps_search_results->aps_part_results[i1_ref_idx][id] + i); 10151 if(s_search_node_temp.i1_ref_idx >= 0) 10152 { 10153 INSERT_NEW_NODE_NOMAP( 10154 as_best_two_proj_node, 10155 num_best_cand, 10156 s_search_node_temp, 10157 0); 10158 } 10159 } 10160 } 10161 } 10162 else 10163 { 10164 add_centre = 1; 10165 num_best_cand = num_unique_nodes; 10166 as_best_two_proj_node[0] = as_unique_search_nodes[0]; 10167 } 10168 10169 num_unique_nodes = 0; 10170 num_nodes_searched = 0; 10171 10172 if(1 == num_best_cand) 10173 { 10174 search_node_t s_search_node_temp = as_best_two_proj_node[0]; 10175 S16 i2_mv_x = s_search_node_temp.s_mv.i2_mvx; 10176 S16 i2_mv_y = s_search_node_temp.s_mv.i2_mvy; 10177 S08 i1_ref_idx = s_search_node_temp.i1_ref_idx; 10178 10179 i1_grid_enable = 1; 10180 10181 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1; 10182 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1; 10183 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; 10184 10185 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x; 10186 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1; 10187 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; 10188 10189 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1; 10190 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1; 10191 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; 10192 10193 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1; 10194 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y; 10195 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; 10196 10197 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1; 10198 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y; 10199 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; 10200 10201 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1; 10202 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1; 10203 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; 10204 10205 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x; 10206 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1; 10207 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; 10208 10209 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1; 10210 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1; 10211 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; 10212 10213 if(add_centre) 10214 { 10215 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x; 10216 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y; 10217 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; 10218 } 10219 } 10220 else 10221 { 10222 /* For the candidates where refinement was required, choose the best two */ 10223 for(srch_cand = 0; srch_cand < num_best_cand; srch_cand++) 10224 { 10225 search_node_t s_search_node_temp = as_best_two_proj_node[srch_cand]; 10226 WORD32 mv_x = s_search_node_temp.s_mv.i2_mvx; 10227 WORD32 mv_y = s_search_node_temp.s_mv.i2_mvy; 10228 10229 /* Because there may not be two best unique candidates (because of clipping), 10230 second best candidate can be uninitialized, ignore that */ 10231 if(s_search_node_temp.s_mv.i2_mvx == INTRA_MV || 10232 s_search_node_temp.i1_ref_idx < 0) 10233 { 10234 num_nodes_searched++; 10235 continue; 10236 } 10237 10238 /* PT_C */ 10239 /* Since the center point has already be evaluated and best results are persistent, 10240 it will not be evaluated again */ 10241 if(add_centre) /* centre point added explicitly again if search results is not updated */ 10242 { 10243 INSERT_NEW_NODE( 10244 as_unique_search_nodes, 10245 num_unique_nodes, 10246 s_search_node_temp, 10247 0, 10248 au4_unique_node_map, 10249 center_x, 10250 center_y, 10251 1); 10252 } 10253 10254 /* PT_L */ 10255 s_search_node_temp.s_mv.i2_mvx = mv_x - 1; 10256 s_search_node_temp.s_mv.i2_mvy = mv_y; 10257 INSERT_NEW_NODE( 10258 as_unique_search_nodes, 10259 num_unique_nodes, 10260 s_search_node_temp, 10261 0, 10262 au4_unique_node_map, 10263 center_x, 10264 center_y, 10265 1); 10266 10267 /* PT_T */ 10268 s_search_node_temp.s_mv.i2_mvx = mv_x; 10269 s_search_node_temp.s_mv.i2_mvy = mv_y - 1; 10270 INSERT_NEW_NODE( 10271 as_unique_search_nodes, 10272 num_unique_nodes, 10273 s_search_node_temp, 10274 0, 10275 au4_unique_node_map, 10276 center_x, 10277 center_y, 10278 1); 10279 10280 /* PT_R */ 10281 s_search_node_temp.s_mv.i2_mvx = mv_x + 1; 10282 s_search_node_temp.s_mv.i2_mvy = mv_y; 10283 INSERT_NEW_NODE( 10284 as_unique_search_nodes, 10285 num_unique_nodes, 10286 s_search_node_temp, 10287 0, 10288 au4_unique_node_map, 10289 center_x, 10290 center_y, 10291 1); 10292 10293 /* PT_B */ 10294 s_search_node_temp.s_mv.i2_mvx = mv_x; 10295 s_search_node_temp.s_mv.i2_mvy = mv_y + 1; 10296 INSERT_NEW_NODE( 10297 as_unique_search_nodes, 10298 num_unique_nodes, 10299 s_search_node_temp, 10300 0, 10301 au4_unique_node_map, 10302 center_x, 10303 center_y, 10304 1); 10305 10306 /* PT_TL */ 10307 s_search_node_temp.s_mv.i2_mvx = mv_x - 1; 10308 s_search_node_temp.s_mv.i2_mvy = mv_y - 1; 10309 INSERT_NEW_NODE( 10310 as_unique_search_nodes, 10311 num_unique_nodes, 10312 s_search_node_temp, 10313 0, 10314 au4_unique_node_map, 10315 center_x, 10316 center_y, 10317 1); 10318 10319 /* PT_TR */ 10320 s_search_node_temp.s_mv.i2_mvx = mv_x + 1; 10321 s_search_node_temp.s_mv.i2_mvy = mv_y - 1; 10322 INSERT_NEW_NODE( 10323 as_unique_search_nodes, 10324 num_unique_nodes, 10325 s_search_node_temp, 10326 0, 10327 au4_unique_node_map, 10328 center_x, 10329 center_y, 10330 1); 10331 10332 /* PT_BL */ 10333 s_search_node_temp.s_mv.i2_mvx = mv_x - 1; 10334 s_search_node_temp.s_mv.i2_mvy = mv_y + 1; 10335 INSERT_NEW_NODE( 10336 as_unique_search_nodes, 10337 num_unique_nodes, 10338 s_search_node_temp, 10339 0, 10340 au4_unique_node_map, 10341 center_x, 10342 center_y, 10343 1); 10344 10345 /* PT_BR */ 10346 s_search_node_temp.s_mv.i2_mvx = mv_x + 1; 10347 s_search_node_temp.s_mv.i2_mvy = mv_y + 1; 10348 INSERT_NEW_NODE( 10349 as_unique_search_nodes, 10350 num_unique_nodes, 10351 s_search_node_temp, 10352 0, 10353 au4_unique_node_map, 10354 center_x, 10355 center_y, 10356 1); 10357 } 10358 } 10359 10360 s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0]; 10361 s_search_prms_blk.i4_num_search_nodes = num_unique_nodes; 10362 10363 /*****************************************************************/ 10364 /* Call the search algorithm, this includes: */ 10365 /* Pre-Search-Refinement (for coarse candts) */ 10366 /* Search on each candidate */ 10367 /* Post Search Refinement on winners/other new candidates */ 10368 /*****************************************************************/ 10369 10370 hme_pred_search_no_encode( 10371 &s_search_prms_blk, 10372 ps_curr_layer, 10373 &ps_ctxt->s_wt_pred, 10374 ai4_valid_part_ids, 10375 0, 10376 e_me_quality_presets, 10377 i1_grid_enable, 10378 (ihevce_me_optimised_function_list_t *) 10379 ps_ctxt->pv_me_optimised_function_list); 10380 10381 i1_grid_enable = 0; 10382 } 10383 } 10384 10385 /* for non encode layer update MV and end processing for block */ 10386 { 10387 WORD32 i4_ref_id, min_cost = 0x7fffffff, min_sad = 0; 10388 search_node_t *ps_search_node; 10389 /* now update the reqd results back to the layer mv bank. */ 10390 if(1 == ps_refine_prms->i4_layer_id) 10391 { 10392 hme_update_mv_bank_in_l1_me( 10393 ps_search_results, 10394 ps_curr_layer->ps_layer_mvbank, 10395 blk_x, 10396 blk_y, 10397 &s_mv_update_prms); 10398 } 10399 else 10400 { 10401 hme_update_mv_bank_noencode( 10402 ps_search_results, 10403 ps_curr_layer->ps_layer_mvbank, 10404 blk_x, 10405 blk_y, 10406 &s_mv_update_prms); 10407 } 10408 10409 /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */ 10410 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ 10411 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) 10412 { 10413 WORD32 i4_j; 10414 layer_mv_t *ps_layer_mv = ps_curr_layer->ps_layer_mvbank; 10415 10416 //if (ps_layer_mv->e_blk_size == s_mv_update_prms.e_search_blk_size) 10417 /* Not considering this for Dyn. Search Update */ 10418 { 10419 for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref; 10420 i4_ref_id++) 10421 { 10422 ps_search_node = 10423 ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N]; 10424 10425 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++) 10426 { 10427 hme_update_dynamic_search_params( 10428 &ps_ctxt->s_coarse_dyn_range_prms 10429 .as_dyn_range_prms[ps_refine_prms->i4_layer_id] 10430 [i4_ref_id], 10431 ps_search_node->s_mv.i2_mvy); 10432 10433 ps_search_node++; 10434 } 10435 } 10436 } 10437 } 10438 10439 if(1 == ps_refine_prms->i4_layer_id) 10440 { 10441 WORD32 wt_pred_val, log_wt_pred_val; 10442 WORD32 ref_id_of_nearest_poc = 0; 10443 WORD32 max_val = 0x7fffffff; 10444 WORD32 max_l0_val = 0x7fffffff; 10445 WORD32 max_l1_val = 0x7fffffff; 10446 WORD32 cur_val; 10447 WORD32 i4_local_weighted_sad, i4_local_cost_weighted_pred; 10448 10449 WORD32 bestl0_sad = 0x7fffffff; 10450 WORD32 bestl1_sad = 0x7fffffff; 10451 search_node_t *ps_best_l0_blk = NULL, *ps_best_l1_blk = NULL; 10452 10453 for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref; 10454 i4_ref_id++) 10455 { 10456 wt_pred_val = ps_ctxt->s_wt_pred.a_wpred_wt[i4_ref_id]; 10457 log_wt_pred_val = ps_ctxt->s_wt_pred.wpred_log_wdc; 10458 10459 ps_search_node = 10460 ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N]; 10461 10462 i4_local_weighted_sad = ((ps_search_node->i4_sad * wt_pred_val) + 10463 ((1 << log_wt_pred_val) >> 1)) >> 10464 log_wt_pred_val; 10465 10466 i4_local_cost_weighted_pred = 10467 i4_local_weighted_sad + 10468 (ps_search_node->i4_tot_cost - ps_search_node->i4_sad); 10469 //the loop is redundant as the results are already sorted based on total cost 10470 //for (i4_j = 0; i4_j < ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref; i4_j++) 10471 { 10472 if(i4_local_cost_weighted_pred < min_cost) 10473 { 10474 min_cost = i4_local_cost_weighted_pred; 10475 min_sad = i4_local_weighted_sad; 10476 } 10477 } 10478 10479 /* For P frame, calculate the nearest poc which is either P or I frame*/ 10480 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) 10481 { 10482 if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id]) 10483 { 10484 cur_val = 10485 ABS(ps_ctxt->i4_curr_poc - 10486 ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id]); 10487 if(cur_val < max_val) 10488 { 10489 max_val = cur_val; 10490 ref_id_of_nearest_poc = i4_ref_id; 10491 } 10492 } 10493 } 10494 } 10495 /*Store me cost wrt. to past frame only for P frame */ 10496 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) 10497 { 10498 if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc]) 10499 { 10500 WORD16 i2_mvx, i2_mvy; 10501 10502 WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4); 10503 WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4); 10504 WORD32 z_scan_idx = 10505 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr]; 10506 WORD32 wt, log_wt; 10507 10508 /*ASSERT((ps_ctxt->i4_curr_poc - ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc]) 10509 <= (1 + ps_ctxt->num_b_frms));*/ 10510 10511 /*obtain mvx and mvy */ 10512 i2_mvx = 10513 ps_search_results 10514 ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N] 10515 ->s_mv.i2_mvx; 10516 i2_mvy = 10517 ps_search_results 10518 ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N] 10519 ->s_mv.i2_mvy; 10520 10521 /*register the min cost for l1 me in blk context */ 10522 wt = ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_of_nearest_poc]; 10523 log_wt = ps_ctxt->s_wt_pred.wpred_log_wdc; 10524 10525 /*register the min cost for l1 me in blk context */ 10526 ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] = 10527 ((ps_search_results 10528 ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N] 10529 ->i4_sad * 10530 wt) + 10531 ((1 << log_wt) >> 1)) >> 10532 log_wt; 10533 ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] = 10534 ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] + 10535 (ps_search_results 10536 ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N] 10537 ->i4_tot_cost - 10538 ps_search_results 10539 ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N] 10540 ->i4_sad); 10541 /*for complexity change detection*/ 10542 ps_ctxt->i4_num_blks++; 10543 if(ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] > 10544 (8 /*blk width*/ * 8 /*blk height*/ * (1 + ps_ctxt->num_b_frms))) 10545 { 10546 ps_ctxt->i4_num_blks_high_sad++; 10547 } 10548 } 10549 } 10550 } 10551 10552 /* EIID: Early inter intra decisions */ 10553 /* tap L1 level SAD for inter intra decisions */ 10554 if((e_me_quality_presets >= ME_MEDIUM_SPEED) && 10555 (!ps_ctxt->s_frm_prms 10556 .is_i_pic)) //for high-quality preset->disable early decisions 10557 { 10558 if(1 == ps_refine_prms->i4_layer_id) 10559 { 10560 WORD32 i4_min_sad_cost_8x8_block = min_cost; 10561 ihevce_ed_blk_t *ps_curr_ed_blk_ctxt; 10562 WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4); 10563 WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4); 10564 WORD32 z_scan_idx = 10565 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr]; 10566 ps_curr_ed_blk_ctxt = ps_ed_blk_ctxt_curr_ctb + z_scan_idx; 10567 10568 /*register the min cost for l1 me in blk context */ 10569 ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] = 10570 i4_min_sad_cost_8x8_block; 10571 i4_num_comparisions++; 10572 10573 /* take early inter-intra decision here */ 10574 ps_curr_ed_blk_ctxt->intra_or_inter = 3; /*init saying eval both */ 10575 #if DISABLE_INTRA_IN_BPICS 10576 if((e_me_quality_presets == ME_XTREME_SPEED_25) && 10577 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)) 10578 { 10579 ps_curr_ed_blk_ctxt->intra_or_inter = 10580 2; /*eval only inter if inter cost is less */ 10581 i4_num_inter_wins++; 10582 } 10583 else 10584 #endif 10585 { 10586 if(ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] < 10587 ((ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2] * 10588 i4_threshold_multiplier) / 10589 i4_threshold_divider)) 10590 { 10591 ps_curr_ed_blk_ctxt->intra_or_inter = 10592 2; /*eval only inter if inter cost is less */ 10593 i4_num_inter_wins++; 10594 } 10595 } 10596 10597 //{ 10598 // DBG_PRINTF ("(blk x, blk y):(%d, %d)\t me:(ctb_x, ctb_y):(%d, %d)\t intra_SAD_COST: %d\tInter_SAD_COST: %d\n", 10599 // blk_x,blk_y, 10600 // i4_ctb_blk_ctr, i4_ctb_row_ctr, 10601 // ps_curr_ed_blk_ctxt->i4_best_sad_8x8_l1_ipe, 10602 // i4_min_sad_cost_8x8_block 10603 // ); 10604 //} 10605 10606 } //end of layer-1 10607 } //end of if (e_me_quality_presets >= ME_MEDIUM_SPEED) 10608 else 10609 { 10610 if(1 == ps_refine_prms->i4_layer_id) 10611 { 10612 WORD32 i4_min_sad_cost_8x8_block = min_cost; 10613 WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4); 10614 WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4); 10615 WORD32 z_scan_idx = 10616 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr]; 10617 10618 /*register the min cost for l1 me in blk context */ 10619 ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] = 10620 i4_min_sad_cost_8x8_block; 10621 } 10622 } 10623 if(1 == ps_refine_prms->i4_layer_id) 10624 { 10625 WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4); 10626 WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4); 10627 WORD32 z_scan_idx = 10628 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr]; 10629 10630 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me_for_decide[z_scan_idx >> 2] = 10631 min_sad; 10632 10633 if(min_cost < 10634 ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2]) 10635 { 10636 ps_ctxt->i4_L1_hme_best_cost += min_cost; 10637 ps_ctxt->i4_L1_hme_sad += min_sad; 10638 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] = min_sad; 10639 } 10640 else 10641 { 10642 ps_ctxt->i4_L1_hme_best_cost += 10643 ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2]; 10644 ps_ctxt->i4_L1_hme_sad += 10645 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2]; 10646 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] = 10647 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2]; 10648 } 10649 } 10650 } 10651 } 10652 10653 /* Update the number of blocks processed in the current row */ 10654 if((ME_MEDIUM_SPEED > e_me_quality_presets)) 10655 { 10656 ihevce_dmgr_set_row_row_sync( 10657 pv_hme_dep_mngr, 10658 (i4_ctb_x + 1), 10659 blk_y, 10660 0 /* Col Tile No. : Not supported in PreEnc*/); 10661 } 10662 } 10663 10664 /* set the output dependency after completion of row */ 10665 ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong); 10666 } 10667 } 10668