Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 
     21 /**
     22 ******************************************************************************
     23 * @file hme_subpel.c
     24 *
     25 * @brief
     26 *    Fullpel search and refinement
     27 *
     28 * @author
     29 *    Ittiam
     30 *
     31 ******************************************************************************
     32 */
     33 
     34 /*****************************************************************************/
     35 /* File Includes                                                             */
     36 /*****************************************************************************/
     37 /* System include files */
     38 #include <stdio.h>
     39 #include <string.h>
     40 #include <stdlib.h>
     41 #include <assert.h>
     42 #include <stdarg.h>
     43 #include <math.h>
     44 #include <limits.h>
     45 
     46 /* User include files */
     47 #include "ihevc_typedefs.h"
     48 #include "itt_video_api.h"
     49 #include "ihevce_api.h"
     50 
     51 #include "rc_cntrl_param.h"
     52 #include "rc_frame_info_collector.h"
     53 #include "rc_look_ahead_params.h"
     54 
     55 #include "ihevc_defs.h"
     56 #include "ihevc_structs.h"
     57 #include "ihevc_platform_macros.h"
     58 #include "ihevc_deblk.h"
     59 #include "ihevc_itrans_recon.h"
     60 #include "ihevc_chroma_itrans_recon.h"
     61 #include "ihevc_chroma_intra_pred.h"
     62 #include "ihevc_intra_pred.h"
     63 #include "ihevc_inter_pred.h"
     64 #include "ihevc_mem_fns.h"
     65 #include "ihevc_padding.h"
     66 #include "ihevc_weighted_pred.h"
     67 #include "ihevc_sao.h"
     68 #include "ihevc_resi_trans.h"
     69 #include "ihevc_quant_iquant_ssd.h"
     70 #include "ihevc_cabac_tables.h"
     71 
     72 #include "ihevce_defs.h"
     73 #include "ihevce_lap_enc_structs.h"
     74 #include "ihevce_multi_thrd_structs.h"
     75 #include "ihevce_multi_thrd_funcs.h"
     76 #include "ihevce_me_common_defs.h"
     77 #include "ihevce_had_satd.h"
     78 #include "ihevce_error_codes.h"
     79 #include "ihevce_bitstream.h"
     80 #include "ihevce_cabac.h"
     81 #include "ihevce_rdoq_macros.h"
     82 #include "ihevce_function_selector.h"
     83 #include "ihevce_enc_structs.h"
     84 #include "ihevce_entropy_structs.h"
     85 #include "ihevce_cmn_utils_instr_set_router.h"
     86 #include "ihevce_enc_loop_structs.h"
     87 #include "ihevce_bs_compute_ctb.h"
     88 #include "ihevce_global_tables.h"
     89 #include "ihevce_dep_mngr_interface.h"
     90 #include "hme_datatype.h"
     91 #include "hme_interface.h"
     92 #include "hme_common_defs.h"
     93 #include "hme_defs.h"
     94 #include "ihevce_me_instr_set_router.h"
     95 #include "hme_globals.h"
     96 #include "hme_utils.h"
     97 #include "hme_coarse.h"
     98 #include "hme_refine.h"
     99 #include "hme_err_compute.h"
    100 #include "hme_common_utils.h"
    101 #include "hme_search_algo.h"
    102 #include "ihevce_stasino_helpers.h"
    103 
    104 /**
    105 ********************************************************************************
    106 *  @fn     hme_fullpel_cand_sifter
    107 *
    108 *  @brief  Given a list of search candidates and valid partition types,
    109 *          this function finds the two best candidates for each partition type.
    110 *
    111 *  @return None
    112 ********************************************************************************
    113 */
    114 void hme_fullpel_cand_sifter(
    115     hme_search_prms_t *ps_search_prms,
    116     layer_ctxt_t *ps_layer_ctxt,
    117     wgt_pred_ctxt_t *ps_wt_inp_prms,
    118     S32 i4_alpha_stim_multiplier,
    119     U08 u1_is_cu_noisy,
    120     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
    121 {
    122     S32 i4_i;
    123     S16 i2_temp_tot_cost, i2_temp_stim_injected_cost, i2_temp_mv_cost, i2_temp_mv_x, i2_temp_mv_y,
    124         i2_temp_ref_idx;
    125 
    126     fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
    127     S32 i4_temp_part_mask;
    128 
    129     ps_search_prms->i4_alpha_stim_multiplier = i4_alpha_stim_multiplier;
    130     ps_search_prms->u1_is_cu_noisy = u1_is_cu_noisy;
    131 
    132     if(u1_is_cu_noisy)
    133     {
    134         i4_temp_part_mask = ps_search_prms->i4_part_mask;
    135         ps_search_prms->i4_part_mask &= ((ENABLE_2Nx2N) | (ENABLE_NxN));
    136 
    137         ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
    138             (ps_search_prms->i4_part_mask) & ((ENABLE_2Nx2N) | (ENABLE_NxN)),
    139             &ps_fullpel_refine_ctxt->ai4_part_id[0]);
    140     }
    141 
    142     ps_search_prms->u1_is_cu_noisy = u1_is_cu_noisy;
    143 
    144     hme_pred_search(
    145         ps_search_prms, ps_layer_ctxt, ps_wt_inp_prms, 0, ps_me_optimised_function_list);
    146 
    147     if(u1_is_cu_noisy)
    148     {
    149         if(ps_search_prms->ps_search_results->u1_num_results_per_part == 2)
    150         {
    151             for(i4_i = 0; i4_i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i4_i++)
    152             {
    153                 if(ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] >
    154                    ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i])
    155                 {
    156                     i2_temp_tot_cost = ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i];
    157                     i2_temp_stim_injected_cost =
    158                         ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i];
    159                     i2_temp_mv_cost = ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i];
    160                     i2_temp_mv_x = ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i];
    161                     i2_temp_mv_y = ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i];
    162                     i2_temp_ref_idx = ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i];
    163 
    164                     ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] =
    165                         ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i];
    166                     ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] =
    167                         ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i];
    168                     ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] =
    169                         ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i];
    170                     ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] =
    171                         ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i];
    172                     ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] =
    173                         ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i];
    174                     ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] =
    175                         ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i];
    176 
    177                     ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] = i2_temp_tot_cost;
    178                     ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] =
    179                         i2_temp_stim_injected_cost;
    180                     ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] = i2_temp_mv_cost;
    181                     ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = i2_temp_mv_x;
    182                     ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = i2_temp_mv_y;
    183                     ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = i2_temp_ref_idx;
    184                 }
    185             }
    186         }
    187 
    188         ps_search_prms->i4_part_mask = i4_temp_part_mask;
    189 
    190         ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
    191             ps_search_prms->i4_part_mask, &ps_fullpel_refine_ctxt->ai4_part_id[0]);
    192     }
    193 }
    194 
    195 static void hme_add_fpel_refine_candidates_to_search_cand_array(
    196     search_node_t *ps_unique_search_nodes,
    197     fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt,
    198     S32 *pi4_num_unique_nodes,
    199     U32 *pu4_unique_node_map,
    200     S32 i4_fpel_search_result_id,
    201     S32 i4_fpel_search_result_array_index,
    202     S32 i4_unique_node_map_center_x,
    203     S32 i4_unique_node_map_center_y,
    204     S08 i1_unique_node_map_ref_idx,
    205     U08 u1_add_refine_grid_center_to_search_cand_array,
    206     U08 u1_do_not_check_for_duplicates)
    207 {
    208     search_node_t s_refine_grid_center;
    209 
    210     U08 u1_use_hashing, i;
    211 
    212     S32 i2_mvx =
    213         ps_fullpel_refine_ctxt->i2_mv_x[i4_fpel_search_result_id][i4_fpel_search_result_array_index];
    214     S32 i2_mvy =
    215         ps_fullpel_refine_ctxt->i2_mv_y[i4_fpel_search_result_id][i4_fpel_search_result_array_index];
    216     S08 i1_ref_idx = ps_fullpel_refine_ctxt
    217                          ->i2_ref_idx[i4_fpel_search_result_id][i4_fpel_search_result_array_index];
    218 
    219     if(!u1_do_not_check_for_duplicates)
    220     {
    221         s_refine_grid_center.s_mv.i2_mvx = i2_mvx;
    222         s_refine_grid_center.s_mv.i2_mvy = i2_mvy;
    223         s_refine_grid_center.i1_ref_idx = i1_ref_idx;
    224 
    225         u1_use_hashing = (s_refine_grid_center.i1_ref_idx == i1_unique_node_map_ref_idx);
    226 
    227         for(i = 0; i < NUM_POINTS_IN_RECTANGULAR_GRID; i++)
    228         {
    229             S08 i1_offset_x = gai1_mv_offsets_from_center_in_rect_grid[i][0];
    230             S08 i1_offset_y = gai1_mv_offsets_from_center_in_rect_grid[i][1];
    231 
    232             if(i1_offset_x || i1_offset_y)
    233             {
    234                 s_refine_grid_center.s_mv.i2_mvx = i2_mvx + i1_offset_x;
    235                 s_refine_grid_center.s_mv.i2_mvy = i2_mvy + i1_offset_y;
    236 
    237                 INSERT_NEW_NODE(
    238                     ps_unique_search_nodes,
    239                     pi4_num_unique_nodes[0],
    240                     s_refine_grid_center,
    241                     1,
    242                     pu4_unique_node_map,
    243                     i4_unique_node_map_center_x,
    244                     i4_unique_node_map_center_y,
    245                     u1_use_hashing);
    246             }
    247             else if(u1_add_refine_grid_center_to_search_cand_array)
    248             {
    249                 s_refine_grid_center.s_mv.i2_mvx = i2_mvx;
    250                 s_refine_grid_center.s_mv.i2_mvy = i2_mvy;
    251 
    252                 INSERT_NEW_NODE(
    253                     ps_unique_search_nodes,
    254                     pi4_num_unique_nodes[0],
    255                     s_refine_grid_center,
    256                     1,
    257                     pu4_unique_node_map,
    258                     i4_unique_node_map_center_x,
    259                     i4_unique_node_map_center_y,
    260                     0);
    261             }
    262         }
    263     }
    264     else
    265     {
    266         for(i = 0; i < NUM_POINTS_IN_RECTANGULAR_GRID; i++)
    267         {
    268             S08 i1_offset_x = gai1_mv_offsets_from_center_in_rect_grid[i][0];
    269             S08 i1_offset_y = gai1_mv_offsets_from_center_in_rect_grid[i][1];
    270 
    271             if(i1_offset_x || i1_offset_y)
    272             {
    273                 ps_unique_search_nodes[pi4_num_unique_nodes[0]].s_mv.i2_mvx = i2_mvx + i1_offset_x;
    274                 ps_unique_search_nodes[pi4_num_unique_nodes[0]].s_mv.i2_mvy = i2_mvy + i1_offset_y;
    275                 ps_unique_search_nodes[pi4_num_unique_nodes[0]++].i1_ref_idx = i1_ref_idx;
    276             }
    277             else if(u1_add_refine_grid_center_to_search_cand_array)
    278             {
    279                 ps_unique_search_nodes[pi4_num_unique_nodes[0]].s_mv.i2_mvx = i2_mvx;
    280                 ps_unique_search_nodes[pi4_num_unique_nodes[0]].s_mv.i2_mvy = i2_mvy;
    281                 ps_unique_search_nodes[pi4_num_unique_nodes[0]++].i1_ref_idx = i1_ref_idx;
    282             }
    283         }
    284     }
    285 }
    286 
    287 void hme_fullpel_refine(
    288     refine_prms_t *ps_refine_prms,
    289     hme_search_prms_t *ps_search_prms,
    290     layer_ctxt_t *ps_layer_ctxt,
    291     wgt_pred_ctxt_t *ps_wt_inp_prms,
    292     U32 *pu4_unique_node_map,
    293     U08 u1_num_init_search_cands,
    294     U08 u1_8x8_blk_mask,
    295     S32 i4_unique_node_map_center_x,
    296     S32 i4_unique_node_map_center_y,
    297     S08 i1_unique_node_map_ref_idx,
    298     ME_QUALITY_PRESETS_T e_quality_preset,
    299     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
    300 {
    301     S32 i, j;
    302     S32 i4_num_results;
    303     U08 u1_num_complete_grids = 0;
    304     U08 u1_num_grids = 0;
    305 
    306     fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
    307 
    308     S32 i4_num_unique_nodes = 0;
    309 
    310     search_node_t *ps_unique_search_nodes = ps_search_prms->ps_search_nodes;
    311 
    312     if(u1_num_init_search_cands >= 2)
    313     {
    314         S32 i4_max_num_results = (15 == u1_8x8_blk_mask)
    315                                      ? ps_refine_prms->u1_max_num_fpel_refine_centers
    316                                      : ((ME_XTREME_SPEED_25 == e_quality_preset)
    317                                             ? MAX_NUM_CANDS_FOR_FPEL_REFINE_IN_XS25
    318                                             : INT_MAX);
    319 
    320         for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++)
    321         {
    322             S32 i4_part_id;
    323             S32 i4_index;
    324 
    325             i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i];
    326             i4_index = (ps_fullpel_refine_ctxt->i4_num_valid_parts > 8) ? i4_part_id : i;
    327             i4_num_results = (15 == u1_8x8_blk_mask)
    328                                  ? MIN(ps_search_prms->ps_search_results->u1_num_results_per_part,
    329                                        ps_refine_prms->pu1_num_best_results[i4_part_id])
    330                                  : ps_search_prms->ps_search_results->u1_num_results_per_part;
    331 
    332             ASSERT(i4_num_results <= 2);
    333 
    334             for(j = 0; j < i4_num_results; j++)
    335             {
    336                 if((ps_fullpel_refine_ctxt->i2_ref_idx[j][i4_index] >= 0) &&
    337                    (ps_fullpel_refine_ctxt->i2_mv_x[j][i4_index] != INTRA_MV))
    338                 {
    339                     S32 i4_num_nodes_added = i4_num_unique_nodes;
    340 
    341                     hme_add_fpel_refine_candidates_to_search_cand_array(
    342                         ps_unique_search_nodes,
    343                         ps_fullpel_refine_ctxt,
    344                         &i4_num_unique_nodes,
    345                         pu4_unique_node_map,
    346                         j,
    347                         i4_index,
    348                         i4_unique_node_map_center_x,
    349                         i4_unique_node_map_center_y,
    350                         i1_unique_node_map_ref_idx,
    351                         0,
    352                         0);
    353 
    354                     i4_num_nodes_added = i4_num_unique_nodes - i4_num_nodes_added;
    355 
    356                     u1_num_complete_grids +=
    357                         (i4_num_nodes_added >= (NUM_POINTS_IN_RECTANGULAR_GRID - 1));
    358                     u1_num_grids += (!!i4_num_nodes_added);
    359 
    360                     i4_max_num_results--;
    361                 }
    362 
    363                 if(i4_max_num_results <= 0)
    364                 {
    365                     break;
    366                 }
    367             }
    368 
    369             if(i4_max_num_results <= 0)
    370             {
    371                 break;
    372             }
    373         }
    374     }
    375     else if((1 == u1_num_init_search_cands) && (ps_refine_prms->u1_max_num_fpel_refine_centers >= 1))
    376     {
    377         ps_fullpel_refine_ctxt->i2_mv_x[0][0] = ps_unique_search_nodes[0].s_mv.i2_mvx;
    378         ps_fullpel_refine_ctxt->i2_mv_y[0][0] = ps_unique_search_nodes[0].s_mv.i2_mvy;
    379         ps_fullpel_refine_ctxt->i2_ref_idx[0][0] = ps_unique_search_nodes[0].i1_ref_idx;
    380 
    381         if((ps_fullpel_refine_ctxt->i2_ref_idx[0][0] >= 0) &&
    382            (ps_fullpel_refine_ctxt->i2_mv_x[0][0] != INTRA_MV))
    383         {
    384             hme_add_fpel_refine_candidates_to_search_cand_array(
    385                 ps_unique_search_nodes,
    386                 ps_fullpel_refine_ctxt,
    387                 &i4_num_unique_nodes,
    388                 pu4_unique_node_map,
    389                 0,
    390                 0,
    391                 i4_unique_node_map_center_x,
    392                 i4_unique_node_map_center_y,
    393                 i1_unique_node_map_ref_idx,
    394                 1,
    395                 1);
    396 
    397             u1_num_complete_grids++;
    398         }
    399     }
    400 
    401     if(i4_num_unique_nodes > 0)
    402     {
    403         ps_search_prms->i4_num_search_nodes = i4_num_unique_nodes;
    404         ps_search_prms->u1_is_cu_noisy = 0;
    405 
    406         hme_pred_search(
    407             ps_search_prms,
    408             ps_layer_ctxt,
    409             ps_wt_inp_prms,
    410             (1 == u1_num_complete_grids) && (u1_num_grids == u1_num_complete_grids),
    411             ps_me_optimised_function_list
    412 
    413         );
    414     }
    415 }
    416 
    417 /**
    418 ********************************************************************************
    419 *  @fn     hme_remove_duplicate_fpel_search_candidates
    420 *
    421 *  @brief  Function name is self-explanatory
    422 *
    423 *  @return Number of unique candidates
    424 ********************************************************************************
    425 */
    426 S32 hme_remove_duplicate_fpel_search_candidates(
    427     search_node_t *ps_unique_search_nodes,
    428     search_candt_t *ps_search_candts,
    429     U32 *pu4_unique_node_map,
    430     S08 *pi1_pred_dir_to_ref_idx,
    431     S32 i4_num_srch_cands,
    432     S32 i4_num_init_candts,
    433     S32 i4_refine_iter_ctr,
    434     S32 i4_num_refinement_iterations,
    435     S32 i4_num_act_ref_l0,
    436     S08 i1_unique_node_map_ref_idx,
    437     S32 i4_unique_node_map_center_x,
    438     S32 i4_unique_node_map_center_y,
    439     U08 u1_is_bidir_enabled,
    440     ME_QUALITY_PRESETS_T e_quality_preset)
    441 {
    442     S32 i;
    443 
    444     S32 i4_max_num_cands = ((!u1_is_bidir_enabled) && (i4_num_act_ref_l0 > 1))
    445                                ? (i4_num_init_candts >> 1)
    446                                : i4_num_init_candts;
    447     S32 i4_num_unique_nodes = 0;
    448 
    449     for(i = 0; (i < i4_num_srch_cands) && (i4_num_unique_nodes < i4_max_num_cands); i++)
    450     {
    451         search_node_t *ps_cur_cand = ps_search_candts[i].ps_search_node;
    452 
    453         U08 u1_use_hashing = (ps_cur_cand->i1_ref_idx == i1_unique_node_map_ref_idx);
    454 
    455         if(i4_num_refinement_iterations > 1)
    456         {
    457 #if !ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0
    458             /* Ref0 evaluated during the first iteration */
    459             /* All other Ref's evaluated during the second iteration */
    460             if((ps_cur_cand->i1_ref_idx != pi1_pred_dir_to_ref_idx[0]) && (i4_refine_iter_ctr == 0))
    461             {
    462                 continue;
    463             }
    464 #else
    465             if(e_quality_preset == ME_HIGH_QUALITY)
    466             {
    467                 if((ps_cur_cand->i1_ref_idx != pi1_pred_dir_to_ref_idx[0]) &&
    468                    (i4_refine_iter_ctr == 0))
    469                 {
    470                     continue;
    471                 }
    472             }
    473             else
    474             {
    475                 if(ps_cur_cand->i1_ref_idx != pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr])
    476                 {
    477                     continue;
    478                 }
    479             }
    480 #endif
    481         }
    482 
    483         INSERT_UNIQUE_NODE(
    484             ps_unique_search_nodes,
    485             i4_num_unique_nodes,
    486             ps_cur_cand[0],
    487             pu4_unique_node_map,
    488             i4_unique_node_map_center_x,
    489             i4_unique_node_map_center_y,
    490             u1_use_hashing);
    491     }
    492 
    493     return i4_num_unique_nodes;
    494 }
    495