1 /****************************************************************************** 2 * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 /** 21 ****************************************************************************** 22 * @file hme_search_algo.c 23 * 24 * @brief 25 * Contains various search algorithms to be used by coarse/refinement layers 26 * 27 * @author 28 * Ittiam 29 * 30 * 31 * List of Functions 32 * hme_compute_grid_results_step_gt_1() 33 * hme_compute_grid_results_step_1() 34 * hme_pred_search_square_stepn() 35 * 36 ****************************************************************************** 37 */ 38 39 /*****************************************************************************/ 40 /* File Includes */ 41 /*****************************************************************************/ 42 /* System include files */ 43 #include <stdio.h> 44 #include <string.h> 45 #include <stdlib.h> 46 #include <assert.h> 47 #include <stdarg.h> 48 #include <math.h> 49 #include <limits.h> 50 51 /* User include files */ 52 #include "ihevc_typedefs.h" 53 #include "itt_video_api.h" 54 #include "ihevce_api.h" 55 56 #include "rc_cntrl_param.h" 57 #include "rc_frame_info_collector.h" 58 #include "rc_look_ahead_params.h" 59 60 #include "ihevc_defs.h" 61 #include "ihevc_structs.h" 62 #include "ihevc_platform_macros.h" 63 #include "ihevc_deblk.h" 64 #include "ihevc_itrans_recon.h" 65 #include "ihevc_chroma_itrans_recon.h" 66 #include "ihevc_chroma_intra_pred.h" 67 #include "ihevc_intra_pred.h" 68 #include "ihevc_inter_pred.h" 69 #include "ihevc_mem_fns.h" 70 #include "ihevc_padding.h" 71 #include "ihevc_weighted_pred.h" 72 #include "ihevc_sao.h" 73 #include "ihevc_resi_trans.h" 74 #include "ihevc_quant_iquant_ssd.h" 75 #include "ihevc_cabac_tables.h" 76 77 #include "ihevce_defs.h" 78 #include "ihevce_lap_enc_structs.h" 79 #include "ihevce_multi_thrd_structs.h" 80 #include "ihevce_multi_thrd_funcs.h" 81 #include "ihevce_me_common_defs.h" 82 #include "ihevce_had_satd.h" 83 #include "ihevce_error_codes.h" 84 #include "ihevce_bitstream.h" 85 #include "ihevce_cabac.h" 86 #include "ihevce_rdoq_macros.h" 87 #include "ihevce_function_selector.h" 88 #include "ihevce_enc_structs.h" 89 #include "ihevce_entropy_structs.h" 90 #include "ihevce_cmn_utils_instr_set_router.h" 91 #include "ihevce_enc_loop_structs.h" 92 #include "ihevce_bs_compute_ctb.h" 93 #include "ihevce_global_tables.h" 94 #include "ihevce_dep_mngr_interface.h" 95 #include "hme_datatype.h" 96 #include "hme_interface.h" 97 #include "hme_common_defs.h" 98 #include "hme_defs.h" 99 #include "ihevce_me_instr_set_router.h" 100 #include "hme_globals.h" 101 #include "hme_utils.h" 102 #include "hme_coarse.h" 103 #include "hme_fullpel.h" 104 #include "hme_subpel.h" 105 #include "hme_refine.h" 106 #include "hme_err_compute.h" 107 #include "hme_common_utils.h" 108 #include "hme_search_algo.h" 109 #include "ihevce_stasino_helpers.h" 110 #include "ihevce_common_utils.h" 111 112 /*****************************************************************************/ 113 /* Function Definitions */ 114 /*****************************************************************************/ 115 116 /** 117 ******************************************************************************** 118 * @fn void hme_compute_grid_results_step_1(err_prms_t *ps_err_prms, 119 result_upd_prms_t *ps_result_prms, 120 BLK_SIZE_T e_blk_size) 121 * 122 * @brief Updates results for a grid of step = 1 123 * 124 * @param[in] ps_err_prms: Various parameters to this function 125 * 126 * @param[in] ps_result_prms : Parameters pertaining to result updation 127 * 128 * @param[out] e_blk_size: Block size of the blk being searched for 129 * 130 * @return none 131 ******************************************************************************** 132 */ 133 void hme_compute_grid_results( 134 err_prms_t *ps_err_prms, result_upd_prms_t *ps_result_prms, BLK_SIZE_T e_blk_size) 135 { 136 PF_RESULT_FXN_T pf_hme_result_fxn; 137 PF_SAD_FXN_T pf_sad_fxn; 138 S32 i4_num_results; 139 S32 part_id; 140 141 part_id = ps_result_prms->pi4_valid_part_ids[0]; 142 143 i4_num_results = (S32)ps_result_prms->ps_search_results->u1_num_results_per_part; 144 145 pf_sad_fxn = hme_get_sad_fxn(e_blk_size, ps_err_prms->i4_grid_mask, ps_err_prms->i4_part_mask); 146 147 pf_hme_result_fxn = 148 hme_get_result_fxn(ps_err_prms->i4_grid_mask, ps_err_prms->i4_part_mask, i4_num_results); 149 150 pf_sad_fxn(ps_err_prms); 151 pf_hme_result_fxn(ps_result_prms); 152 } 153 154 /** 155 ******************************************************************************** 156 * @fn void hme_pred_search_square_stepn(hme_search_prms_t *ps_search_prms, 157 * layer_ctxt_t *ps_layer_ctxt) 158 * 159 * @brief Implements predictive search, with square grid refinement. In this 160 * case, we start with a bigger step size, like 4, refining upto a 161 * variable number of pts, till we hit end of search range or hit a 162 * minima. Then we refine using smaller steps. The bigger step size 163 * like 4 or 2, do not use optimized SAD functions, they evaluate 164 * SAD for each individual pt. 165 * 166 * @param[in,out] ps_search_prms: All the params to this function 167 * 168 * @param[in] ps_layer_ctxt: Context for the layer 169 * 170 * @return None 171 ******************************************************************************** 172 */ 173 void hme_pred_search_square_stepn( 174 hme_search_prms_t *ps_search_prms, 175 layer_ctxt_t *ps_layer_ctxt, 176 wgt_pred_ctxt_t *ps_wt_inp_prms, 177 ME_QUALITY_PRESETS_T e_me_quality_preset, 178 ihevce_me_optimised_function_list_t *ps_me_optimised_function_list 179 180 ) 181 { 182 /* Stores the SAD for all parts at each pt in the grid */ 183 S32 ai4_sad_grid[9][TOT_NUM_PARTS]; 184 185 S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1]; 186 187 /* Atributes of input candidates */ 188 search_candt_t *ps_search_candts; 189 search_node_t s_search_node; 190 191 /* Number of candidates to search */ 192 S32 i4_num_candts, max_num_iters, i4_num_results; 193 194 /* Input and reference attributes */ 195 S32 i4_inp_stride, i4_ref_stride, i4_ref_offset; 196 197 /* The reference is actually an array of ptrs since there are several */ 198 /* reference id. So an array gets passed form calling function */ 199 U08 **ppu1_ref; 200 201 /* Holds the search results at the end of this fxn */ 202 search_results_t *ps_search_results; 203 204 /* These control number of parts and number of pts in grid to search */ 205 S32 i4_part_mask, i4_grid_mask; 206 207 /* Blk width, blk height and blk size are derived from input params */ 208 BLK_SIZE_T e_blk_size; 209 CU_SIZE_T e_cu_size; 210 S32 i4_blk_wd, i4_blk_ht, i4_step, i4_candt, i4_iter; 211 S32 i4_inp_off; 212 S32 i4_min_id; 213 /* Points to the range limits for mv */ 214 range_prms_t *ps_range_prms; 215 216 /*************************************************************************/ 217 /* These functions pointers for calculating Err and the result update */ 218 /* Each carries its own parameters structure, which is generated on the */ 219 /* fly in this function */ 220 /*************************************************************************/ 221 err_prms_t s_err_prms; 222 result_upd_prms_t s_result_prms; 223 224 max_num_iters = ps_search_prms->i4_max_iters; 225 /* Using the member 0 to store for all ref. idx., see in coarsest */ 226 ps_range_prms = ps_search_prms->aps_mv_range[0]; 227 i4_inp_stride = ps_search_prms->i4_inp_stride; 228 /* Move to the location of the search blk in inp buffer */ 229 i4_inp_off = ps_search_prms->i4_cu_x_off; 230 i4_inp_off += (ps_search_prms->i4_cu_y_off * i4_inp_stride); 231 232 ps_search_results = ps_search_prms->ps_search_results; 233 234 /*************************************************************************/ 235 /* Depending on flag i4_use_rec, we use either input of previously */ 236 /* encoded pictures or we use recon of previously encoded pictures. */ 237 /*************************************************************************/ 238 if(ps_search_prms->i4_use_rec == 1) 239 { 240 i4_ref_stride = ps_layer_ctxt->i4_rec_stride; 241 ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy; 242 } 243 else 244 { 245 i4_ref_stride = ps_layer_ctxt->i4_inp_stride; 246 ppu1_ref = ps_layer_ctxt->ppu1_list_inp; 247 } 248 i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off; 249 250 /*************************************************************************/ 251 /* Obtain the blk size of the search blk. Assumed here that the search */ 252 /* is done on a CU size, rather than any arbitrary blk size. */ 253 /*************************************************************************/ 254 ps_search_results = ps_search_prms->ps_search_results; 255 e_blk_size = ps_search_prms->e_blk_size; 256 i4_blk_wd = (S32)gau1_blk_size_to_wd[e_blk_size]; 257 i4_blk_ht = (S32)gau1_blk_size_to_ht[e_blk_size]; 258 e_cu_size = ps_search_results->e_cu_size; 259 i4_num_results = (S32)ps_search_results->u1_num_results_per_part; 260 261 ps_search_candts = ps_search_prms->ps_search_candts; 262 i4_num_candts = ps_search_prms->i4_num_init_candts; 263 i4_part_mask = ps_search_prms->i4_part_mask; 264 265 /*************************************************************************/ 266 /* This array stores the ids of the partitions whose */ 267 /* SADs are updated. Since the partitions whose SADs are updated may not */ 268 /* be in contiguous order, we supply another level of indirection. */ 269 /*************************************************************************/ 270 hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids); 271 272 /* Update the parameters used to pass to SAD */ 273 /* input ptr, strides, SAD Grid, part mask, blk width and ht */ 274 /* The above are fixed ptrs, only pu1_ref and grid mask are */ 275 /* varying params which are updated just before calling fxn */ 276 s_err_prms.i4_inp_stride = i4_inp_stride; 277 s_err_prms.i4_ref_stride = i4_ref_stride; 278 s_err_prms.i4_part_mask = i4_part_mask; 279 s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0]; 280 s_err_prms.i4_blk_wd = i4_blk_wd; 281 s_err_prms.i4_blk_ht = i4_blk_ht; 282 s_err_prms.pi4_valid_part_ids = ai4_valid_part_ids; 283 284 s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute; 285 s_result_prms.ps_search_results = ps_search_results; 286 s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids; 287 s_result_prms.i1_ref_idx = ps_search_prms->i1_ref_idx; 288 s_result_prms.i4_part_mask = ps_search_prms->i4_part_mask; 289 s_result_prms.ps_search_node_base = &s_search_node; 290 s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0]; 291 292 /* Run through each of the candts in a loop */ 293 for(i4_candt = 0; i4_candt < i4_num_candts; i4_candt++) 294 { 295 S32 i4_num_refine; 296 297 i4_step = ps_search_prms->i4_start_step; 298 299 s_search_node = *(ps_search_candts->ps_search_node); 300 301 /* initialize minimum cost for this candidate. As we search around */ 302 /* this candidate, this is used to check early exit, when in any */ 303 /* given iteration, the center pt of the grid is lowest value */ 304 s_result_prms.i4_min_cost = MAX_32BIT_VAL; 305 306 /* If we need to do refinements, then we need to evaluate */ 307 /* neighbouring pts. Before doing so, we have to do */ 308 /* basic range checks against max allowed mvs */ 309 i4_num_refine = ps_search_candts->u1_num_steps_refine; 310 311 CLIP_MV_WITHIN_RANGE( 312 s_search_node.s_mv.i2_mvx, s_search_node.s_mv.i2_mvy, ps_range_prms, 0, 0, 0); 313 314 /* The first time, we search all 8 pts around init candt plus the init candt */ 315 i4_grid_mask = 0x1ff; 316 s_err_prms.pu1_inp = ps_wt_inp_prms->apu1_wt_inp[s_search_node.i1_ref_idx] + i4_inp_off; 317 318 for(i4_iter = 0; i4_iter < max_num_iters; i4_iter++) 319 { 320 i4_grid_mask &= hme_clamp_grid_by_mvrange(&s_search_node, i4_step, ps_range_prms); 321 322 s_err_prms.i4_grid_mask = i4_grid_mask; 323 s_err_prms.pu1_ref = ppu1_ref[s_search_node.i1_ref_idx] + i4_ref_offset; 324 s_err_prms.pu1_ref += 325 (s_search_node.s_mv.i2_mvx + 326 (s_search_node.s_mv.i2_mvy * s_err_prms.i4_ref_stride)); 327 328 s_result_prms.i4_step = i4_step; 329 s_err_prms.i4_step = i4_step; 330 s_result_prms.i4_grid_mask = i4_grid_mask; 331 332 /* For Top,TopLeft and Left cand., get only center point SAD */ 333 /* and do early exit */ 334 if(0 == i4_num_refine) 335 { 336 s_err_prms.i4_grid_mask = 0x1; 337 s_result_prms.i4_grid_mask = 0x1; 338 339 /* sad pt fun. populates sad to 0th location, whereas update */ 340 /* fun. takes it based on part. id */ 341 s_err_prms.pi4_sad_grid = 342 s_result_prms.pi4_sad_grid + (1 * s_result_prms.pi4_valid_part_ids[0]); 343 344 ps_me_optimised_function_list->pf_evalsad_pt_npu_mxn_8bit(&s_err_prms); 345 346 s_err_prms.pi4_sad_grid = s_result_prms.pi4_sad_grid; 347 348 if(ME_XTREME_SPEED_25 == e_me_quality_preset) 349 hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms); 350 else 351 hme_update_results_grid_pu_bestn(&s_result_prms); 352 353 i4_min_id = (S32)PT_C; /* Center Point */ 354 i4_step = 0; /* No further refinment */ 355 s_result_prms.i4_step = i4_step; 356 s_err_prms.i4_step = i4_step; 357 } 358 else 359 { 360 if(ME_XTREME_SPEED_25 == e_me_quality_preset) 361 { 362 err_prms_t *ps_err_prms = &s_err_prms; 363 ASSERT(ps_err_prms->i4_grid_mask != 1); 364 ASSERT((ps_err_prms->i4_part_mask == 4) || (ps_err_prms->i4_part_mask == 16)); 365 366 /*****************************************************************/ 367 /* In this case, there are no partial updates. The blk can be */ 368 /* of any type and need not be a CU. The only thing that matters */ 369 /* here is the width of the blk, 4/8/(>=16) */ 370 /*****************************************************************/ 371 ps_me_optimised_function_list->pf_evalsad_grid_npu_MxN(&s_err_prms); 372 373 hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms); 374 } 375 else 376 { 377 /* Obtain SAD for all 9 pts in grid*/ 378 hme_compute_grid_results(&s_err_prms, &s_result_prms, e_blk_size); 379 } 380 381 /* Early exit in case of centre being local minima */ 382 i4_min_id = s_result_prms.i4_min_id; 383 } 384 385 i4_grid_mask = gai4_opt_grid_mask[i4_min_id]; 386 387 s_search_node.s_mv.i2_mvx += (i4_step * gai1_grid_id_to_x[i4_min_id]); 388 s_search_node.s_mv.i2_mvy += (i4_step * gai1_grid_id_to_y[i4_min_id]); 389 if(i4_min_id == (S32)PT_C) 390 break; 391 } 392 393 /* Next keep reducing stepsize by factor of 2 */ 394 i4_step >>= 1; 395 while(i4_step) 396 { 397 i4_grid_mask = 0x1fe & 398 hme_clamp_grid_by_mvrange(&s_search_node, i4_step, ps_range_prms); 399 //i4_grid_mask &= 0x1fe; 400 401 s_err_prms.i4_grid_mask = i4_grid_mask; 402 s_result_prms.i4_grid_mask = i4_grid_mask; 403 s_err_prms.i4_step = i4_step; 404 s_result_prms.i4_step = i4_step; 405 s_err_prms.pu1_ref = ppu1_ref[s_search_node.i1_ref_idx] + i4_ref_offset; 406 s_err_prms.pu1_ref += 407 (s_search_node.s_mv.i2_mvx + 408 (s_search_node.s_mv.i2_mvy * s_err_prms.i4_ref_stride)); 409 if(ME_XTREME_SPEED_25 == e_me_quality_preset) 410 { 411 err_prms_t *ps_err_prms = &s_err_prms; 412 ASSERT(ps_err_prms->i4_grid_mask != 1); 413 ASSERT((ps_err_prms->i4_part_mask == 4) || (ps_err_prms->i4_part_mask == 16)); 414 415 /*****************************************************************/ 416 /* In this case, there are no partial updates. The blk can be */ 417 /* of any type and need not be a CU. The only thing that matters */ 418 /* here is the width of the blk, 4/8/(>=16) */ 419 /*****************************************************************/ 420 ps_me_optimised_function_list->pf_evalsad_grid_npu_MxN(&s_err_prms); 421 422 hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms); 423 } 424 else 425 { 426 hme_compute_grid_results(&s_err_prms, &s_result_prms, e_blk_size); 427 } 428 429 i4_min_id = s_result_prms.i4_min_id; 430 431 s_search_node.s_mv.i2_mvx += (i4_step * gai1_grid_id_to_x[i4_min_id]); 432 s_search_node.s_mv.i2_mvy += (i4_step * gai1_grid_id_to_y[i4_min_id]); 433 434 i4_step >>= 1; 435 } 436 437 ps_search_candts++; 438 } 439 } 440 441 /** 442 ******************************************************************************** 443 * @fn hme_pred_search_square_step1(hme_search_prms_t *ps_search_prms, 444 * layer_ctxt_t *ps_layer_ctxt) 445 * 446 * @brief Implements predictive search with square grid refinement. In this 447 * case, the square grid is of step 1 always. since this is considered 448 * to be more of a refinement search 449 * 450 * @param[in,out] ps_search_prms: All the params to this function 451 * 452 * @param[in] ps_layer_ctxt: All info about this layer 453 * 454 * @return None 455 ******************************************************************************** 456 */ 457 /** 458 ******************************************************************************** 459 * @fn hme_pred_search(hme_search_prms_t *ps_search_prms, 460 * layer_ctxt_t *ps_layer_ctxt) 461 * 462 * @brief Implements predictive search after removing duplicate candidates 463 * from initial list. Each square grid (of step 1) is expanded 464 * to nine search pts before the dedeuplication process. one point 465 * cost is then evaluated for each unique node after the deduplication 466 * process 467 * 468 * @param[in,out] ps_search_prms: All the params to this function 469 * 470 * @param[in] ps_layer_ctxt: All info about this layer 471 * 472 * @return None 473 ******************************************************************************** 474 */ 475 void hme_pred_search( 476 hme_search_prms_t *ps_search_prms, 477 layer_ctxt_t *ps_layer_ctxt, 478 wgt_pred_ctxt_t *ps_wt_inp_prms, 479 S08 i1_grid_flag, 480 ihevce_me_optimised_function_list_t *ps_me_optimised_function_list 481 482 ) 483 { 484 /* Stores the SAD for all parts at each pt in the grid */ 485 S32 ai4_sad_grid[9 * TOT_NUM_PARTS]; 486 487 /* Atributes of input candidates */ 488 search_node_t *ps_search_node; 489 490 search_results_t *ps_search_results; 491 S32 i4_num_nodes, i4_candt; 492 493 /* Input and reference attributes */ 494 S32 i4_inp_stride, i4_ref_stride, i4_ref_offset; 495 496 /* The reference is actually an array of ptrs since there are several */ 497 /* reference id. So an array gets passed form calling function */ 498 U08 **ppu1_ref; 499 500 /* These control number of parts and number of pts in grid to search */ 501 S32 i4_part_mask, i4_grid_mask; 502 503 S32 shift_for_cu_size; 504 505 /* Blk width, blk height and blk size are derived from input params */ 506 BLK_SIZE_T e_blk_size; 507 CU_SIZE_T e_cu_size; 508 S32 i4_blk_wd, i4_blk_ht; 509 510 /*************************************************************************/ 511 /* These functions pointers for calculating Err and the result update */ 512 /* Each carries its own parameters structure, which is generated on the */ 513 /* fly in this function */ 514 /*************************************************************************/ 515 PF_RESULT_FXN_T pf_hme_result_fxn; 516 PF_SAD_FXN_T pf_sad_fxn; 517 PF_CALC_SAD_AND_RESULT pf_calc_sad_and_result; 518 err_prms_t s_err_prms; 519 result_upd_prms_t s_result_prms; 520 S32 i4_num_results; 521 S32 i4_inp_off; 522 fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt; 523 524 i4_inp_stride = ps_search_prms->i4_inp_stride; 525 526 /* Move to the location of the search blk in inp buffer */ 527 i4_inp_off = ps_search_prms->i4_cu_x_off; 528 i4_inp_off += ps_search_prms->i4_cu_y_off * i4_inp_stride; 529 530 /*************************************************************************/ 531 /* Depending on flag i4_use_rec, we use either input of previously */ 532 /* encoded pictures or we use recon of previously encoded pictures. */ 533 /*************************************************************************/ 534 if(ps_search_prms->i4_use_rec == 1) 535 { 536 i4_ref_stride = ps_layer_ctxt->i4_rec_stride; 537 ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy; 538 } 539 else 540 { 541 i4_ref_stride = ps_layer_ctxt->i4_rec_stride; 542 ppu1_ref = ps_layer_ctxt->ppu1_list_inp; 543 } 544 i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off; 545 /* Obtain the blk size of the search blk. Assumed here that the search */ 546 /* is done on a CU size, rather than any arbitrary blk size. */ 547 ps_search_results = ps_search_prms->ps_search_results; 548 e_blk_size = ps_search_prms->e_blk_size; 549 i4_blk_wd = gau1_blk_size_to_wd[e_blk_size]; 550 i4_blk_ht = gau1_blk_size_to_ht[e_blk_size]; 551 e_cu_size = ps_search_results->e_cu_size; 552 553 /* Assuming cu size of 8x8 as enum 0, the other will be 1, 2, 3 */ 554 /* This will also set the shift w.r.t. the base cu size of 8x8 */ 555 shift_for_cu_size = e_cu_size; 556 557 ps_search_node = ps_search_prms->ps_search_nodes; 558 i4_num_nodes = ps_search_prms->i4_num_search_nodes; 559 i4_part_mask = ps_search_prms->i4_part_mask; 560 561 /* Update the parameters used to pass to SAD */ 562 /* input ptr, strides, SAD Grid, part mask, blk width and ht */ 563 /* The above are fixed ptrs, only pu1_ref and grid mask are */ 564 /* varying params which are updated just before calling fxn */ 565 s_err_prms.i4_inp_stride = i4_inp_stride; 566 s_err_prms.i4_ref_stride = i4_ref_stride; 567 s_err_prms.i4_part_mask = i4_part_mask; 568 s_err_prms.pi4_sad_grid = &ai4_sad_grid[0]; 569 s_err_prms.i4_blk_wd = i4_blk_wd; 570 s_err_prms.i4_blk_ht = i4_blk_ht; 571 s_err_prms.i4_step = 1; 572 s_err_prms.i4_num_partitions = ps_fullpel_refine_ctxt->i4_num_valid_parts; 573 574 s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute; 575 s_result_prms.ps_search_results = ps_search_results; 576 s_result_prms.i1_ref_idx = (S08)ps_search_prms->i1_ref_idx; 577 s_result_prms.pi4_sad_grid = ai4_sad_grid; 578 s_result_prms.i4_part_mask = i4_part_mask; 579 s_result_prms.i4_step = 1; 580 pf_calc_sad_and_result = hme_get_calc_sad_and_result_fxn( 581 i1_grid_flag, 582 ps_search_prms->u1_is_cu_noisy, 583 i4_part_mask, 584 ps_fullpel_refine_ctxt->i4_num_valid_parts, 585 ps_search_results->u1_num_results_per_part); 586 587 pf_calc_sad_and_result( 588 ps_search_prms, ps_wt_inp_prms, &s_err_prms, &s_result_prms, ppu1_ref, i4_ref_stride); 589 } 590 591 static __inline FT_CALC_SAD_AND_RESULT *hme_get_calc_sad_and_result_explicit_fxn( 592 ihevce_me_optimised_function_list_t *ps_me_optimised_function_list, 593 S32 i4_part_mask, 594 S32 i4_num_partitions, 595 S08 i1_grid_enable, 596 U08 u1_num_results_per_part) 597 { 598 FT_CALC_SAD_AND_RESULT *pf_func = NULL; 599 600 if(2 == u1_num_results_per_part) 601 { 602 if(i4_part_mask == 1) 603 { 604 ASSERT(i4_num_partitions == 1); 605 606 if(i1_grid_enable == 0) 607 { 608 pf_func = 609 ps_me_optimised_function_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8; 610 } 611 else 612 { 613 pf_func = ps_me_optimised_function_list 614 ->pf_calc_pt_sad_and_2_best_results_explicit_8x8_for_grid; 615 } 616 } 617 else 618 { 619 ASSERT(i4_num_partitions == 5); 620 621 pf_func = 622 ps_me_optimised_function_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_4x4; 623 } 624 } 625 else if(1 == u1_num_results_per_part) 626 { 627 if(i4_part_mask == 1) 628 { 629 ASSERT(i4_num_partitions == 1); 630 631 if(i1_grid_enable == 0) 632 { 633 pf_func = 634 ps_me_optimised_function_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8; 635 } 636 else 637 { 638 pf_func = ps_me_optimised_function_list 639 ->pf_calc_pt_sad_and_1_best_result_explicit_8x8_for_grid; 640 } 641 } 642 else 643 { 644 ASSERT(i4_num_partitions == 5); 645 646 pf_func = 647 ps_me_optimised_function_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_4x4; 648 } 649 } 650 651 return pf_func; 652 } 653 654 /** 655 ******************************************************************************** 656 * @fn void hme_pred_search_no_encode(hme_search_prms_t *ps_search_prms, 657 * layer_ctxt_t *ps_layer_ctxt, 658 * wgt_pred_ctxt_t *ps_wt_inp_prms, 659 * S32 *pi4_valid_part_ids, 660 * S32 disable_refine, 661 * ME_QUALITY_PRESETS_T e_me_quality_preset) 662 * 663 * @brief Implements predictive search after removing duplicate candidates 664 * from initial list. Each square grid (of step 1) is expanded 665 * to nine search pts before the dedeuplication process. one point 666 * cost is then evaluated for each unique node after the deduplication 667 * process 668 * 669 * @param[in,out] ps_search_prms: All the params to this function 670 * 671 * @param[in] ps_layer_ctxt: All info about this layer 672 * 673 * @return None 674 ******************************************************************************** 675 */ 676 void hme_pred_search_no_encode( 677 hme_search_prms_t *ps_search_prms, 678 layer_ctxt_t *ps_layer_ctxt, 679 wgt_pred_ctxt_t *ps_wt_inp_prms, 680 S32 *pi4_valid_part_ids, 681 S32 disable_refine, 682 ME_QUALITY_PRESETS_T e_me_quality_preset, 683 S08 i1_grid_enable, 684 ihevce_me_optimised_function_list_t *ps_me_optimised_function_list) 685 { 686 /* Stores the SAD for all parts at each pt in the grid */ 687 S32 ai4_sad_grid[9 * TOT_NUM_PARTS]; 688 689 /* Atributes of input candidates */ 690 search_node_t *ps_search_node; 691 search_results_t *ps_search_results; 692 S32 i4_num_nodes; 693 694 /* Input and reference attributes */ 695 S32 i4_inp_stride, i4_ref_stride, i4_ref_offset; 696 697 /* The reference is actually an array of ptrs since there are several */ 698 /* reference id. So an array gets passed form calling function */ 699 U08 **ppu1_ref; 700 701 /* These control number of parts and number of pts in grid to search */ 702 S32 i4_part_mask; // i4_grid_mask; 703 704 S32 shift_for_cu_size; 705 /* Blk width, blk height and blk size are derived from input params */ 706 BLK_SIZE_T e_blk_size; 707 CU_SIZE_T e_cu_size; 708 S32 i4_blk_wd, i4_blk_ht; 709 710 /*************************************************************************/ 711 /* These functions pointers for calculating Err and the result update */ 712 /* Each carries its own parameters structure, which is generated on the */ 713 /* fly in this function */ 714 /*************************************************************************/ 715 PF_CALC_SAD_AND_RESULT pf_calc_sad_and_result; 716 err_prms_t s_err_prms; 717 result_upd_prms_t s_result_prms; 718 S32 i4_num_results; 719 S32 i4_search_idx = ps_search_prms->i1_ref_idx; 720 S32 i4_inp_off; 721 S32 i4_num_partitions; 722 723 i4_inp_stride = ps_search_prms->i4_inp_stride; 724 725 /* Move to the location of the search blk in inp buffer */ 726 i4_inp_off = ps_search_prms->i4_cu_x_off; 727 i4_inp_off += ps_search_prms->i4_cu_y_off * i4_inp_stride; 728 729 /*************************************************************************/ 730 /* Depending on flag i4_use_rec, we use either input of previously */ 731 /* encoded pictures or we use recon of previously encoded pictures. */ 732 /*************************************************************************/ 733 if(ps_search_prms->i4_use_rec == 1) 734 { 735 i4_ref_stride = ps_layer_ctxt->i4_rec_stride; 736 ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy; 737 } 738 else 739 { 740 i4_ref_stride = ps_layer_ctxt->i4_inp_stride; 741 ppu1_ref = ps_layer_ctxt->ppu1_list_inp; 742 } 743 i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off; 744 /* Obtain the blk size of the search blk. Assumed here that the search */ 745 /* is done on a CU size, rather than any arbitrary blk size. */ 746 ps_search_results = ps_search_prms->ps_search_results; 747 e_blk_size = ps_search_prms->e_blk_size; 748 i4_blk_wd = gau1_blk_size_to_wd[e_blk_size]; 749 i4_blk_ht = gau1_blk_size_to_ht[e_blk_size]; 750 e_cu_size = ps_search_results->e_cu_size; 751 752 /* Assuming cu size of 8x8 as enum 0, the other will be 1, 2, 3 */ 753 /* This will also set the shift w.r.t. the base cu size of 8x8 */ 754 shift_for_cu_size = e_cu_size; 755 756 ps_search_node = ps_search_prms->ps_search_nodes; 757 i4_num_nodes = ps_search_prms->i4_num_search_nodes; 758 i4_part_mask = ps_search_prms->i4_part_mask; 759 760 /*************************************************************************/ 761 /* This array stores the ids of the partitions whose */ 762 /* SADs are updated. Since the partitions whose SADs are updated may not */ 763 /* be in contiguous order, we supply another level of indirection. */ 764 /*************************************************************************/ 765 i4_num_partitions = hme_create_valid_part_ids(i4_part_mask, pi4_valid_part_ids); 766 767 /* Update the parameters used to pass to SAD */ 768 /* input ptr, strides, SAD Grid, part mask, blk width and ht */ 769 /* The above are fixed ptrs, only pu1_ref and grid mask are */ 770 /* varying params which are updated just before calling fxn */ 771 s_err_prms.i4_inp_stride = i4_inp_stride; 772 s_err_prms.i4_ref_stride = i4_ref_stride; 773 s_err_prms.i4_part_mask = i4_part_mask; 774 s_err_prms.pi4_sad_grid = &ai4_sad_grid[0]; 775 s_err_prms.i4_blk_wd = i4_blk_wd; 776 s_err_prms.i4_blk_ht = i4_blk_ht; 777 s_err_prms.i4_step = 1; 778 s_err_prms.pi4_valid_part_ids = pi4_valid_part_ids; 779 s_err_prms.i4_num_partitions = i4_num_partitions; 780 781 s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute; 782 s_result_prms.ps_search_results = ps_search_results; 783 s_result_prms.pi4_valid_part_ids = pi4_valid_part_ids; 784 s_result_prms.i1_ref_idx = (S08)ps_search_prms->i1_ref_idx; 785 s_result_prms.pi4_sad_grid = ai4_sad_grid; 786 s_result_prms.i4_part_mask = i4_part_mask; 787 s_result_prms.i4_step = 1; 788 789 pf_calc_sad_and_result = hme_get_calc_sad_and_result_explicit_fxn( 790 ps_me_optimised_function_list, 791 i4_part_mask, 792 i4_num_partitions, 793 i1_grid_enable, 794 ps_search_results->u1_num_results_per_part); 795 796 pf_calc_sad_and_result( 797 ps_search_prms, ps_wt_inp_prms, &s_err_prms, &s_result_prms, ppu1_ref, i4_ref_stride); 798 } 799