1 /****************************************************************************** 2 * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 21 /** 22 ****************************************************************************** 23 * @file hme_subpel.c 24 * 25 * @brief 26 * Fullpel search and refinement 27 * 28 * @author 29 * Ittiam 30 * 31 ****************************************************************************** 32 */ 33 34 /*****************************************************************************/ 35 /* File Includes */ 36 /*****************************************************************************/ 37 /* System include files */ 38 #include <stdio.h> 39 #include <string.h> 40 #include <stdlib.h> 41 #include <assert.h> 42 #include <stdarg.h> 43 #include <math.h> 44 #include <limits.h> 45 46 /* User include files */ 47 #include "ihevc_typedefs.h" 48 #include "itt_video_api.h" 49 #include "ihevce_api.h" 50 51 #include "rc_cntrl_param.h" 52 #include "rc_frame_info_collector.h" 53 #include "rc_look_ahead_params.h" 54 55 #include "ihevc_defs.h" 56 #include "ihevc_structs.h" 57 #include "ihevc_platform_macros.h" 58 #include "ihevc_deblk.h" 59 #include "ihevc_itrans_recon.h" 60 #include "ihevc_chroma_itrans_recon.h" 61 #include "ihevc_chroma_intra_pred.h" 62 #include "ihevc_intra_pred.h" 63 #include "ihevc_inter_pred.h" 64 #include "ihevc_mem_fns.h" 65 #include "ihevc_padding.h" 66 #include "ihevc_weighted_pred.h" 67 #include "ihevc_sao.h" 68 #include "ihevc_resi_trans.h" 69 #include "ihevc_quant_iquant_ssd.h" 70 #include "ihevc_cabac_tables.h" 71 72 #include "ihevce_defs.h" 73 #include "ihevce_lap_enc_structs.h" 74 #include "ihevce_multi_thrd_structs.h" 75 #include "ihevce_multi_thrd_funcs.h" 76 #include "ihevce_me_common_defs.h" 77 #include "ihevce_had_satd.h" 78 #include "ihevce_error_codes.h" 79 #include "ihevce_bitstream.h" 80 #include "ihevce_cabac.h" 81 #include "ihevce_rdoq_macros.h" 82 #include "ihevce_function_selector.h" 83 #include "ihevce_enc_structs.h" 84 #include "ihevce_entropy_structs.h" 85 #include "ihevce_cmn_utils_instr_set_router.h" 86 #include "ihevce_enc_loop_structs.h" 87 #include "ihevce_bs_compute_ctb.h" 88 #include "ihevce_global_tables.h" 89 #include "ihevce_dep_mngr_interface.h" 90 #include "hme_datatype.h" 91 #include "hme_interface.h" 92 #include "hme_common_defs.h" 93 #include "hme_defs.h" 94 #include "ihevce_me_instr_set_router.h" 95 #include "hme_globals.h" 96 #include "hme_utils.h" 97 #include "hme_coarse.h" 98 #include "hme_refine.h" 99 #include "hme_err_compute.h" 100 #include "hme_common_utils.h" 101 #include "hme_search_algo.h" 102 #include "ihevce_stasino_helpers.h" 103 104 /** 105 ******************************************************************************** 106 * @fn hme_fullpel_cand_sifter 107 * 108 * @brief Given a list of search candidates and valid partition types, 109 * this function finds the two best candidates for each partition type. 110 * 111 * @return None 112 ******************************************************************************** 113 */ 114 void hme_fullpel_cand_sifter( 115 hme_search_prms_t *ps_search_prms, 116 layer_ctxt_t *ps_layer_ctxt, 117 wgt_pred_ctxt_t *ps_wt_inp_prms, 118 S32 i4_alpha_stim_multiplier, 119 U08 u1_is_cu_noisy, 120 ihevce_me_optimised_function_list_t *ps_me_optimised_function_list) 121 { 122 S32 i4_i; 123 S16 i2_temp_tot_cost, i2_temp_stim_injected_cost, i2_temp_mv_cost, i2_temp_mv_x, i2_temp_mv_y, 124 i2_temp_ref_idx; 125 126 fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt; 127 S32 i4_temp_part_mask; 128 129 ps_search_prms->i4_alpha_stim_multiplier = i4_alpha_stim_multiplier; 130 ps_search_prms->u1_is_cu_noisy = u1_is_cu_noisy; 131 132 if(u1_is_cu_noisy) 133 { 134 i4_temp_part_mask = ps_search_prms->i4_part_mask; 135 ps_search_prms->i4_part_mask &= ((ENABLE_2Nx2N) | (ENABLE_NxN)); 136 137 ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids( 138 (ps_search_prms->i4_part_mask) & ((ENABLE_2Nx2N) | (ENABLE_NxN)), 139 &ps_fullpel_refine_ctxt->ai4_part_id[0]); 140 } 141 142 ps_search_prms->u1_is_cu_noisy = u1_is_cu_noisy; 143 144 hme_pred_search( 145 ps_search_prms, ps_layer_ctxt, ps_wt_inp_prms, 0, ps_me_optimised_function_list); 146 147 if(u1_is_cu_noisy) 148 { 149 if(ps_search_prms->ps_search_results->u1_num_results_per_part == 2) 150 { 151 for(i4_i = 0; i4_i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i4_i++) 152 { 153 if(ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] > 154 ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i]) 155 { 156 i2_temp_tot_cost = ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i]; 157 i2_temp_stim_injected_cost = 158 ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i]; 159 i2_temp_mv_cost = ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i]; 160 i2_temp_mv_x = ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i]; 161 i2_temp_mv_y = ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i]; 162 i2_temp_ref_idx = ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i]; 163 164 ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] = 165 ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i]; 166 ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] = 167 ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i]; 168 ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] = 169 ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i]; 170 ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] = 171 ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i]; 172 ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] = 173 ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i]; 174 ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] = 175 ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i]; 176 177 ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] = i2_temp_tot_cost; 178 ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] = 179 i2_temp_stim_injected_cost; 180 ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] = i2_temp_mv_cost; 181 ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = i2_temp_mv_x; 182 ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = i2_temp_mv_y; 183 ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = i2_temp_ref_idx; 184 } 185 } 186 } 187 188 ps_search_prms->i4_part_mask = i4_temp_part_mask; 189 190 ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids( 191 ps_search_prms->i4_part_mask, &ps_fullpel_refine_ctxt->ai4_part_id[0]); 192 } 193 } 194 195 static void hme_add_fpel_refine_candidates_to_search_cand_array( 196 search_node_t *ps_unique_search_nodes, 197 fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt, 198 S32 *pi4_num_unique_nodes, 199 U32 *pu4_unique_node_map, 200 S32 i4_fpel_search_result_id, 201 S32 i4_fpel_search_result_array_index, 202 S32 i4_unique_node_map_center_x, 203 S32 i4_unique_node_map_center_y, 204 S08 i1_unique_node_map_ref_idx, 205 U08 u1_add_refine_grid_center_to_search_cand_array, 206 U08 u1_do_not_check_for_duplicates) 207 { 208 search_node_t s_refine_grid_center; 209 210 U08 u1_use_hashing, i; 211 212 S32 i2_mvx = 213 ps_fullpel_refine_ctxt->i2_mv_x[i4_fpel_search_result_id][i4_fpel_search_result_array_index]; 214 S32 i2_mvy = 215 ps_fullpel_refine_ctxt->i2_mv_y[i4_fpel_search_result_id][i4_fpel_search_result_array_index]; 216 S08 i1_ref_idx = ps_fullpel_refine_ctxt 217 ->i2_ref_idx[i4_fpel_search_result_id][i4_fpel_search_result_array_index]; 218 219 if(!u1_do_not_check_for_duplicates) 220 { 221 s_refine_grid_center.s_mv.i2_mvx = i2_mvx; 222 s_refine_grid_center.s_mv.i2_mvy = i2_mvy; 223 s_refine_grid_center.i1_ref_idx = i1_ref_idx; 224 225 u1_use_hashing = (s_refine_grid_center.i1_ref_idx == i1_unique_node_map_ref_idx); 226 227 for(i = 0; i < NUM_POINTS_IN_RECTANGULAR_GRID; i++) 228 { 229 S08 i1_offset_x = gai1_mv_offsets_from_center_in_rect_grid[i][0]; 230 S08 i1_offset_y = gai1_mv_offsets_from_center_in_rect_grid[i][1]; 231 232 if(i1_offset_x || i1_offset_y) 233 { 234 s_refine_grid_center.s_mv.i2_mvx = i2_mvx + i1_offset_x; 235 s_refine_grid_center.s_mv.i2_mvy = i2_mvy + i1_offset_y; 236 237 INSERT_NEW_NODE( 238 ps_unique_search_nodes, 239 pi4_num_unique_nodes[0], 240 s_refine_grid_center, 241 1, 242 pu4_unique_node_map, 243 i4_unique_node_map_center_x, 244 i4_unique_node_map_center_y, 245 u1_use_hashing); 246 } 247 else if(u1_add_refine_grid_center_to_search_cand_array) 248 { 249 s_refine_grid_center.s_mv.i2_mvx = i2_mvx; 250 s_refine_grid_center.s_mv.i2_mvy = i2_mvy; 251 252 INSERT_NEW_NODE( 253 ps_unique_search_nodes, 254 pi4_num_unique_nodes[0], 255 s_refine_grid_center, 256 1, 257 pu4_unique_node_map, 258 i4_unique_node_map_center_x, 259 i4_unique_node_map_center_y, 260 0); 261 } 262 } 263 } 264 else 265 { 266 for(i = 0; i < NUM_POINTS_IN_RECTANGULAR_GRID; i++) 267 { 268 S08 i1_offset_x = gai1_mv_offsets_from_center_in_rect_grid[i][0]; 269 S08 i1_offset_y = gai1_mv_offsets_from_center_in_rect_grid[i][1]; 270 271 if(i1_offset_x || i1_offset_y) 272 { 273 ps_unique_search_nodes[pi4_num_unique_nodes[0]].s_mv.i2_mvx = i2_mvx + i1_offset_x; 274 ps_unique_search_nodes[pi4_num_unique_nodes[0]].s_mv.i2_mvy = i2_mvy + i1_offset_y; 275 ps_unique_search_nodes[pi4_num_unique_nodes[0]++].i1_ref_idx = i1_ref_idx; 276 } 277 else if(u1_add_refine_grid_center_to_search_cand_array) 278 { 279 ps_unique_search_nodes[pi4_num_unique_nodes[0]].s_mv.i2_mvx = i2_mvx; 280 ps_unique_search_nodes[pi4_num_unique_nodes[0]].s_mv.i2_mvy = i2_mvy; 281 ps_unique_search_nodes[pi4_num_unique_nodes[0]++].i1_ref_idx = i1_ref_idx; 282 } 283 } 284 } 285 } 286 287 void hme_fullpel_refine( 288 refine_prms_t *ps_refine_prms, 289 hme_search_prms_t *ps_search_prms, 290 layer_ctxt_t *ps_layer_ctxt, 291 wgt_pred_ctxt_t *ps_wt_inp_prms, 292 U32 *pu4_unique_node_map, 293 U08 u1_num_init_search_cands, 294 U08 u1_8x8_blk_mask, 295 S32 i4_unique_node_map_center_x, 296 S32 i4_unique_node_map_center_y, 297 S08 i1_unique_node_map_ref_idx, 298 ME_QUALITY_PRESETS_T e_quality_preset, 299 ihevce_me_optimised_function_list_t *ps_me_optimised_function_list) 300 { 301 S32 i, j; 302 S32 i4_num_results; 303 U08 u1_num_complete_grids = 0; 304 U08 u1_num_grids = 0; 305 306 fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt; 307 308 S32 i4_num_unique_nodes = 0; 309 310 search_node_t *ps_unique_search_nodes = ps_search_prms->ps_search_nodes; 311 312 if(u1_num_init_search_cands >= 2) 313 { 314 S32 i4_max_num_results = (15 == u1_8x8_blk_mask) 315 ? ps_refine_prms->u1_max_num_fpel_refine_centers 316 : ((ME_XTREME_SPEED_25 == e_quality_preset) 317 ? MAX_NUM_CANDS_FOR_FPEL_REFINE_IN_XS25 318 : INT_MAX); 319 320 for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++) 321 { 322 S32 i4_part_id; 323 S32 i4_index; 324 325 i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i]; 326 i4_index = (ps_fullpel_refine_ctxt->i4_num_valid_parts > 8) ? i4_part_id : i; 327 i4_num_results = (15 == u1_8x8_blk_mask) 328 ? MIN(ps_search_prms->ps_search_results->u1_num_results_per_part, 329 ps_refine_prms->pu1_num_best_results[i4_part_id]) 330 : ps_search_prms->ps_search_results->u1_num_results_per_part; 331 332 ASSERT(i4_num_results <= 2); 333 334 for(j = 0; j < i4_num_results; j++) 335 { 336 if((ps_fullpel_refine_ctxt->i2_ref_idx[j][i4_index] >= 0) && 337 (ps_fullpel_refine_ctxt->i2_mv_x[j][i4_index] != INTRA_MV)) 338 { 339 S32 i4_num_nodes_added = i4_num_unique_nodes; 340 341 hme_add_fpel_refine_candidates_to_search_cand_array( 342 ps_unique_search_nodes, 343 ps_fullpel_refine_ctxt, 344 &i4_num_unique_nodes, 345 pu4_unique_node_map, 346 j, 347 i4_index, 348 i4_unique_node_map_center_x, 349 i4_unique_node_map_center_y, 350 i1_unique_node_map_ref_idx, 351 0, 352 0); 353 354 i4_num_nodes_added = i4_num_unique_nodes - i4_num_nodes_added; 355 356 u1_num_complete_grids += 357 (i4_num_nodes_added >= (NUM_POINTS_IN_RECTANGULAR_GRID - 1)); 358 u1_num_grids += (!!i4_num_nodes_added); 359 360 i4_max_num_results--; 361 } 362 363 if(i4_max_num_results <= 0) 364 { 365 break; 366 } 367 } 368 369 if(i4_max_num_results <= 0) 370 { 371 break; 372 } 373 } 374 } 375 else if((1 == u1_num_init_search_cands) && (ps_refine_prms->u1_max_num_fpel_refine_centers >= 1)) 376 { 377 ps_fullpel_refine_ctxt->i2_mv_x[0][0] = ps_unique_search_nodes[0].s_mv.i2_mvx; 378 ps_fullpel_refine_ctxt->i2_mv_y[0][0] = ps_unique_search_nodes[0].s_mv.i2_mvy; 379 ps_fullpel_refine_ctxt->i2_ref_idx[0][0] = ps_unique_search_nodes[0].i1_ref_idx; 380 381 if((ps_fullpel_refine_ctxt->i2_ref_idx[0][0] >= 0) && 382 (ps_fullpel_refine_ctxt->i2_mv_x[0][0] != INTRA_MV)) 383 { 384 hme_add_fpel_refine_candidates_to_search_cand_array( 385 ps_unique_search_nodes, 386 ps_fullpel_refine_ctxt, 387 &i4_num_unique_nodes, 388 pu4_unique_node_map, 389 0, 390 0, 391 i4_unique_node_map_center_x, 392 i4_unique_node_map_center_y, 393 i1_unique_node_map_ref_idx, 394 1, 395 1); 396 397 u1_num_complete_grids++; 398 } 399 } 400 401 if(i4_num_unique_nodes > 0) 402 { 403 ps_search_prms->i4_num_search_nodes = i4_num_unique_nodes; 404 ps_search_prms->u1_is_cu_noisy = 0; 405 406 hme_pred_search( 407 ps_search_prms, 408 ps_layer_ctxt, 409 ps_wt_inp_prms, 410 (1 == u1_num_complete_grids) && (u1_num_grids == u1_num_complete_grids), 411 ps_me_optimised_function_list 412 413 ); 414 } 415 } 416 417 /** 418 ******************************************************************************** 419 * @fn hme_remove_duplicate_fpel_search_candidates 420 * 421 * @brief Function name is self-explanatory 422 * 423 * @return Number of unique candidates 424 ******************************************************************************** 425 */ 426 S32 hme_remove_duplicate_fpel_search_candidates( 427 search_node_t *ps_unique_search_nodes, 428 search_candt_t *ps_search_candts, 429 U32 *pu4_unique_node_map, 430 S08 *pi1_pred_dir_to_ref_idx, 431 S32 i4_num_srch_cands, 432 S32 i4_num_init_candts, 433 S32 i4_refine_iter_ctr, 434 S32 i4_num_refinement_iterations, 435 S32 i4_num_act_ref_l0, 436 S08 i1_unique_node_map_ref_idx, 437 S32 i4_unique_node_map_center_x, 438 S32 i4_unique_node_map_center_y, 439 U08 u1_is_bidir_enabled, 440 ME_QUALITY_PRESETS_T e_quality_preset) 441 { 442 S32 i; 443 444 S32 i4_max_num_cands = ((!u1_is_bidir_enabled) && (i4_num_act_ref_l0 > 1)) 445 ? (i4_num_init_candts >> 1) 446 : i4_num_init_candts; 447 S32 i4_num_unique_nodes = 0; 448 449 for(i = 0; (i < i4_num_srch_cands) && (i4_num_unique_nodes < i4_max_num_cands); i++) 450 { 451 search_node_t *ps_cur_cand = ps_search_candts[i].ps_search_node; 452 453 U08 u1_use_hashing = (ps_cur_cand->i1_ref_idx == i1_unique_node_map_ref_idx); 454 455 if(i4_num_refinement_iterations > 1) 456 { 457 #if !ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0 458 /* Ref0 evaluated during the first iteration */ 459 /* All other Ref's evaluated during the second iteration */ 460 if((ps_cur_cand->i1_ref_idx != pi1_pred_dir_to_ref_idx[0]) && (i4_refine_iter_ctr == 0)) 461 { 462 continue; 463 } 464 #else 465 if(e_quality_preset == ME_HIGH_QUALITY) 466 { 467 if((ps_cur_cand->i1_ref_idx != pi1_pred_dir_to_ref_idx[0]) && 468 (i4_refine_iter_ctr == 0)) 469 { 470 continue; 471 } 472 } 473 else 474 { 475 if(ps_cur_cand->i1_ref_idx != pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr]) 476 { 477 continue; 478 } 479 } 480 #endif 481 } 482 483 INSERT_UNIQUE_NODE( 484 ps_unique_search_nodes, 485 i4_num_unique_nodes, 486 ps_cur_cand[0], 487 pu4_unique_node_map, 488 i4_unique_node_map_center_x, 489 i4_unique_node_map_center_y, 490 u1_use_hashing); 491 } 492 493 return i4_num_unique_nodes; 494 } 495