Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 /**
     21 ******************************************************************************
     22 * @file hme_search_algo.c
     23 *
     24 * @brief
     25 *    Contains various search algorithms to be used by coarse/refinement layers
     26 *
     27 * @author
     28 *    Ittiam
     29 *
     30 *
     31 * List of Functions
     32 * hme_compute_grid_results_step_gt_1()
     33 * hme_compute_grid_results_step_1()
     34 * hme_pred_search_square_stepn()
     35 *
     36 ******************************************************************************
     37 */
     38 
     39 /*****************************************************************************/
     40 /* File Includes                                                             */
     41 /*****************************************************************************/
     42 /* System include files */
     43 #include <stdio.h>
     44 #include <string.h>
     45 #include <stdlib.h>
     46 #include <assert.h>
     47 #include <stdarg.h>
     48 #include <math.h>
     49 #include <limits.h>
     50 
     51 /* User include files */
     52 #include "ihevc_typedefs.h"
     53 #include "itt_video_api.h"
     54 #include "ihevce_api.h"
     55 
     56 #include "rc_cntrl_param.h"
     57 #include "rc_frame_info_collector.h"
     58 #include "rc_look_ahead_params.h"
     59 
     60 #include "ihevc_defs.h"
     61 #include "ihevc_structs.h"
     62 #include "ihevc_platform_macros.h"
     63 #include "ihevc_deblk.h"
     64 #include "ihevc_itrans_recon.h"
     65 #include "ihevc_chroma_itrans_recon.h"
     66 #include "ihevc_chroma_intra_pred.h"
     67 #include "ihevc_intra_pred.h"
     68 #include "ihevc_inter_pred.h"
     69 #include "ihevc_mem_fns.h"
     70 #include "ihevc_padding.h"
     71 #include "ihevc_weighted_pred.h"
     72 #include "ihevc_sao.h"
     73 #include "ihevc_resi_trans.h"
     74 #include "ihevc_quant_iquant_ssd.h"
     75 #include "ihevc_cabac_tables.h"
     76 
     77 #include "ihevce_defs.h"
     78 #include "ihevce_lap_enc_structs.h"
     79 #include "ihevce_multi_thrd_structs.h"
     80 #include "ihevce_multi_thrd_funcs.h"
     81 #include "ihevce_me_common_defs.h"
     82 #include "ihevce_had_satd.h"
     83 #include "ihevce_error_codes.h"
     84 #include "ihevce_bitstream.h"
     85 #include "ihevce_cabac.h"
     86 #include "ihevce_rdoq_macros.h"
     87 #include "ihevce_function_selector.h"
     88 #include "ihevce_enc_structs.h"
     89 #include "ihevce_entropy_structs.h"
     90 #include "ihevce_cmn_utils_instr_set_router.h"
     91 #include "ihevce_enc_loop_structs.h"
     92 #include "ihevce_bs_compute_ctb.h"
     93 #include "ihevce_global_tables.h"
     94 #include "ihevce_dep_mngr_interface.h"
     95 #include "hme_datatype.h"
     96 #include "hme_interface.h"
     97 #include "hme_common_defs.h"
     98 #include "hme_defs.h"
     99 #include "ihevce_me_instr_set_router.h"
    100 #include "hme_globals.h"
    101 #include "hme_utils.h"
    102 #include "hme_coarse.h"
    103 #include "hme_fullpel.h"
    104 #include "hme_subpel.h"
    105 #include "hme_refine.h"
    106 #include "hme_err_compute.h"
    107 #include "hme_common_utils.h"
    108 #include "hme_search_algo.h"
    109 #include "ihevce_stasino_helpers.h"
    110 #include "ihevce_common_utils.h"
    111 
    112 /*****************************************************************************/
    113 /* Function Definitions                                                      */
    114 /*****************************************************************************/
    115 
    116 /**
    117 ********************************************************************************
    118 *  @fn     void hme_compute_grid_results_step_1(err_prms_t *ps_err_prms,
    119 result_upd_prms_t *ps_result_prms,
    120 BLK_SIZE_T e_blk_size)
    121 *
    122 *  @brief  Updates results for a grid of step = 1
    123 *
    124 *  @param[in] ps_err_prms: Various parameters to this function
    125 *
    126 *  @param[in] ps_result_prms : Parameters pertaining to result updation
    127 *
    128 *  @param[out] e_blk_size: Block size of the blk being searched for
    129 *
    130 *  @return none
    131 ********************************************************************************
    132 */
    133 void hme_compute_grid_results(
    134     err_prms_t *ps_err_prms, result_upd_prms_t *ps_result_prms, BLK_SIZE_T e_blk_size)
    135 {
    136     PF_RESULT_FXN_T pf_hme_result_fxn;
    137     PF_SAD_FXN_T pf_sad_fxn;
    138     S32 i4_num_results;
    139     S32 part_id;
    140 
    141     part_id = ps_result_prms->pi4_valid_part_ids[0];
    142 
    143     i4_num_results = (S32)ps_result_prms->ps_search_results->u1_num_results_per_part;
    144 
    145     pf_sad_fxn = hme_get_sad_fxn(e_blk_size, ps_err_prms->i4_grid_mask, ps_err_prms->i4_part_mask);
    146 
    147     pf_hme_result_fxn =
    148         hme_get_result_fxn(ps_err_prms->i4_grid_mask, ps_err_prms->i4_part_mask, i4_num_results);
    149 
    150     pf_sad_fxn(ps_err_prms);
    151     pf_hme_result_fxn(ps_result_prms);
    152 }
    153 
    154 /**
    155 ********************************************************************************
    156 *  @fn     void hme_pred_search_square_stepn(hme_search_prms_t *ps_search_prms,
    157 *                                   layer_ctxt_t *ps_layer_ctxt)
    158 *
    159 *  @brief  Implements predictive search, with square grid refinement. In this
    160 *          case, we start with a bigger step size, like 4, refining upto a
    161 *          variable number of pts, till we hit end of search range or hit a
    162 *          minima. Then we refine using smaller steps. The bigger step size
    163 *          like 4 or 2, do not use optimized SAD functions, they evaluate
    164 *          SAD for each individual pt.
    165 *
    166 *  @param[in,out]  ps_search_prms: All the params to this function
    167 *
    168 *  @param[in] ps_layer_ctxt: Context for the layer
    169 *
    170 *  @return None
    171 ********************************************************************************
    172 */
    173 void hme_pred_search_square_stepn(
    174     hme_search_prms_t *ps_search_prms,
    175     layer_ctxt_t *ps_layer_ctxt,
    176     wgt_pred_ctxt_t *ps_wt_inp_prms,
    177     ME_QUALITY_PRESETS_T e_me_quality_preset,
    178     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list
    179 
    180 )
    181 {
    182     /* Stores the SAD for all parts at each pt in the grid */
    183     S32 ai4_sad_grid[9][TOT_NUM_PARTS];
    184 
    185     S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
    186 
    187     /* Atributes of input candidates */
    188     search_candt_t *ps_search_candts;
    189     search_node_t s_search_node;
    190 
    191     /* Number of candidates to search */
    192     S32 i4_num_candts, max_num_iters, i4_num_results;
    193 
    194     /* Input and reference attributes */
    195     S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
    196 
    197     /* The reference is actually an array of ptrs since there are several    */
    198     /* reference id. So an array gets passed form calling function           */
    199     U08 **ppu1_ref;
    200 
    201     /* Holds the search results at the end of this fxn */
    202     search_results_t *ps_search_results;
    203 
    204     /* These control number of parts and number of pts in grid to search */
    205     S32 i4_part_mask, i4_grid_mask;
    206 
    207     /* Blk width, blk height and blk size are derived from input params */
    208     BLK_SIZE_T e_blk_size;
    209     CU_SIZE_T e_cu_size;
    210     S32 i4_blk_wd, i4_blk_ht, i4_step, i4_candt, i4_iter;
    211     S32 i4_inp_off;
    212     S32 i4_min_id;
    213     /* Points to the range limits for mv */
    214     range_prms_t *ps_range_prms;
    215 
    216     /*************************************************************************/
    217     /* These functions pointers for calculating Err and the result update    */
    218     /* Each carries its own parameters structure, which is generated on the  */
    219     /* fly in this function                                                  */
    220     /*************************************************************************/
    221     err_prms_t s_err_prms;
    222     result_upd_prms_t s_result_prms;
    223 
    224     max_num_iters = ps_search_prms->i4_max_iters;
    225     /* Using the member 0 to store for all ref. idx., see in coarsest */
    226     ps_range_prms = ps_search_prms->aps_mv_range[0];
    227     i4_inp_stride = ps_search_prms->i4_inp_stride;
    228     /* Move to the location of the search blk in inp buffer */
    229     i4_inp_off = ps_search_prms->i4_cu_x_off;
    230     i4_inp_off += (ps_search_prms->i4_cu_y_off * i4_inp_stride);
    231 
    232     ps_search_results = ps_search_prms->ps_search_results;
    233 
    234     /*************************************************************************/
    235     /* Depending on flag i4_use_rec, we use either input of previously       */
    236     /* encoded pictures or we use recon of previously encoded pictures.      */
    237     /*************************************************************************/
    238     if(ps_search_prms->i4_use_rec == 1)
    239     {
    240         i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
    241         ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy;
    242     }
    243     else
    244     {
    245         i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
    246         ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
    247     }
    248     i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
    249 
    250     /*************************************************************************/
    251     /* Obtain the blk size of the search blk. Assumed here that the search   */
    252     /* is done on a CU size, rather than any arbitrary blk size.             */
    253     /*************************************************************************/
    254     ps_search_results = ps_search_prms->ps_search_results;
    255     e_blk_size = ps_search_prms->e_blk_size;
    256     i4_blk_wd = (S32)gau1_blk_size_to_wd[e_blk_size];
    257     i4_blk_ht = (S32)gau1_blk_size_to_ht[e_blk_size];
    258     e_cu_size = ps_search_results->e_cu_size;
    259     i4_num_results = (S32)ps_search_results->u1_num_results_per_part;
    260 
    261     ps_search_candts = ps_search_prms->ps_search_candts;
    262     i4_num_candts = ps_search_prms->i4_num_init_candts;
    263     i4_part_mask = ps_search_prms->i4_part_mask;
    264 
    265     /*************************************************************************/
    266     /* This array stores the ids of the partitions whose                     */
    267     /* SADs are updated. Since the partitions whose SADs are updated may not */
    268     /* be in contiguous order, we supply another level of indirection.       */
    269     /*************************************************************************/
    270     hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
    271 
    272     /* Update the parameters used to pass to SAD */
    273     /* input ptr, strides, SAD Grid, part mask, blk width and ht */
    274     /* The above are fixed ptrs, only pu1_ref and grid mask are  */
    275     /* varying params which are updated just before calling fxn  */
    276     s_err_prms.i4_inp_stride = i4_inp_stride;
    277     s_err_prms.i4_ref_stride = i4_ref_stride;
    278     s_err_prms.i4_part_mask = i4_part_mask;
    279     s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
    280     s_err_prms.i4_blk_wd = i4_blk_wd;
    281     s_err_prms.i4_blk_ht = i4_blk_ht;
    282     s_err_prms.pi4_valid_part_ids = ai4_valid_part_ids;
    283 
    284     s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute;
    285     s_result_prms.ps_search_results = ps_search_results;
    286     s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids;
    287     s_result_prms.i1_ref_idx = ps_search_prms->i1_ref_idx;
    288     s_result_prms.i4_part_mask = ps_search_prms->i4_part_mask;
    289     s_result_prms.ps_search_node_base = &s_search_node;
    290     s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
    291 
    292     /* Run through each of the candts in a loop */
    293     for(i4_candt = 0; i4_candt < i4_num_candts; i4_candt++)
    294     {
    295         S32 i4_num_refine;
    296 
    297         i4_step = ps_search_prms->i4_start_step;
    298 
    299         s_search_node = *(ps_search_candts->ps_search_node);
    300 
    301         /* initialize minimum cost for this candidate. As we search around */
    302         /* this candidate, this is used to check early exit, when in any   */
    303         /* given iteration, the center pt of the grid is lowest value      */
    304         s_result_prms.i4_min_cost = MAX_32BIT_VAL;
    305 
    306         /* If we need to do refinements, then we need to evaluate */
    307         /* neighbouring pts. Before doing so, we have to do       */
    308         /* basic range checks against max allowed mvs             */
    309         i4_num_refine = ps_search_candts->u1_num_steps_refine;
    310 
    311         CLIP_MV_WITHIN_RANGE(
    312             s_search_node.s_mv.i2_mvx, s_search_node.s_mv.i2_mvy, ps_range_prms, 0, 0, 0);
    313 
    314         /* The first time, we search all 8 pts around init candt plus the init candt */
    315         i4_grid_mask = 0x1ff;
    316         s_err_prms.pu1_inp = ps_wt_inp_prms->apu1_wt_inp[s_search_node.i1_ref_idx] + i4_inp_off;
    317 
    318         for(i4_iter = 0; i4_iter < max_num_iters; i4_iter++)
    319         {
    320             i4_grid_mask &= hme_clamp_grid_by_mvrange(&s_search_node, i4_step, ps_range_prms);
    321 
    322             s_err_prms.i4_grid_mask = i4_grid_mask;
    323             s_err_prms.pu1_ref = ppu1_ref[s_search_node.i1_ref_idx] + i4_ref_offset;
    324             s_err_prms.pu1_ref +=
    325                 (s_search_node.s_mv.i2_mvx +
    326                  (s_search_node.s_mv.i2_mvy * s_err_prms.i4_ref_stride));
    327 
    328             s_result_prms.i4_step = i4_step;
    329             s_err_prms.i4_step = i4_step;
    330             s_result_prms.i4_grid_mask = i4_grid_mask;
    331 
    332             /* For Top,TopLeft and Left cand., get only center point SAD    */
    333             /* and do early exit                                            */
    334             if(0 == i4_num_refine)
    335             {
    336                 s_err_prms.i4_grid_mask = 0x1;
    337                 s_result_prms.i4_grid_mask = 0x1;
    338 
    339                 /* sad pt fun. populates sad to 0th location, whereas update */
    340                 /* fun. takes it based on part. id                           */
    341                 s_err_prms.pi4_sad_grid =
    342                     s_result_prms.pi4_sad_grid + (1 * s_result_prms.pi4_valid_part_ids[0]);
    343 
    344                 ps_me_optimised_function_list->pf_evalsad_pt_npu_mxn_8bit(&s_err_prms);
    345 
    346                 s_err_prms.pi4_sad_grid = s_result_prms.pi4_sad_grid;
    347 
    348                 if(ME_XTREME_SPEED_25 == e_me_quality_preset)
    349                     hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms);
    350                 else
    351                     hme_update_results_grid_pu_bestn(&s_result_prms);
    352 
    353                 i4_min_id = (S32)PT_C; /* Center Point         */
    354                 i4_step = 0; /* No further refinment */
    355                 s_result_prms.i4_step = i4_step;
    356                 s_err_prms.i4_step = i4_step;
    357             }
    358             else
    359             {
    360                 if(ME_XTREME_SPEED_25 == e_me_quality_preset)
    361                 {
    362                     err_prms_t *ps_err_prms = &s_err_prms;
    363                     ASSERT(ps_err_prms->i4_grid_mask != 1);
    364                     ASSERT((ps_err_prms->i4_part_mask == 4) || (ps_err_prms->i4_part_mask == 16));
    365 
    366                     /*****************************************************************/
    367                     /* In this case, there are no partial updates. The blk can be    */
    368                     /* of any type and need not be a CU. The only thing that matters */
    369                     /* here is the width of the blk, 4/8/(>=16)                      */
    370                     /*****************************************************************/
    371                     ps_me_optimised_function_list->pf_evalsad_grid_npu_MxN(&s_err_prms);
    372 
    373                     hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms);
    374                 }
    375                 else
    376                 {
    377                     /* Obtain SAD for all 9 pts in grid*/
    378                     hme_compute_grid_results(&s_err_prms, &s_result_prms, e_blk_size);
    379                 }
    380 
    381                 /* Early exit in case of centre being local minima */
    382                 i4_min_id = s_result_prms.i4_min_id;
    383             }
    384 
    385             i4_grid_mask = gai4_opt_grid_mask[i4_min_id];
    386 
    387             s_search_node.s_mv.i2_mvx += (i4_step * gai1_grid_id_to_x[i4_min_id]);
    388             s_search_node.s_mv.i2_mvy += (i4_step * gai1_grid_id_to_y[i4_min_id]);
    389             if(i4_min_id == (S32)PT_C)
    390                 break;
    391         }
    392 
    393         /* Next keep reducing stepsize by factor of 2 */
    394         i4_step >>= 1;
    395         while(i4_step)
    396         {
    397             i4_grid_mask = 0x1fe &
    398                            hme_clamp_grid_by_mvrange(&s_search_node, i4_step, ps_range_prms);
    399             //i4_grid_mask &= 0x1fe;
    400 
    401             s_err_prms.i4_grid_mask = i4_grid_mask;
    402             s_result_prms.i4_grid_mask = i4_grid_mask;
    403             s_err_prms.i4_step = i4_step;
    404             s_result_prms.i4_step = i4_step;
    405             s_err_prms.pu1_ref = ppu1_ref[s_search_node.i1_ref_idx] + i4_ref_offset;
    406             s_err_prms.pu1_ref +=
    407                 (s_search_node.s_mv.i2_mvx +
    408                  (s_search_node.s_mv.i2_mvy * s_err_prms.i4_ref_stride));
    409             if(ME_XTREME_SPEED_25 == e_me_quality_preset)
    410             {
    411                 err_prms_t *ps_err_prms = &s_err_prms;
    412                 ASSERT(ps_err_prms->i4_grid_mask != 1);
    413                 ASSERT((ps_err_prms->i4_part_mask == 4) || (ps_err_prms->i4_part_mask == 16));
    414 
    415                 /*****************************************************************/
    416                 /* In this case, there are no partial updates. The blk can be    */
    417                 /* of any type and need not be a CU. The only thing that matters */
    418                 /* here is the width of the blk, 4/8/(>=16)                      */
    419                 /*****************************************************************/
    420                 ps_me_optimised_function_list->pf_evalsad_grid_npu_MxN(&s_err_prms);
    421 
    422                 hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms);
    423             }
    424             else
    425             {
    426                 hme_compute_grid_results(&s_err_prms, &s_result_prms, e_blk_size);
    427             }
    428 
    429             i4_min_id = s_result_prms.i4_min_id;
    430 
    431             s_search_node.s_mv.i2_mvx += (i4_step * gai1_grid_id_to_x[i4_min_id]);
    432             s_search_node.s_mv.i2_mvy += (i4_step * gai1_grid_id_to_y[i4_min_id]);
    433 
    434             i4_step >>= 1;
    435         }
    436 
    437         ps_search_candts++;
    438     }
    439 }
    440 
    441 /**
    442 ********************************************************************************
    443 *  @fn     hme_pred_search_square_step1(hme_search_prms_t *ps_search_prms,
    444 *                               layer_ctxt_t *ps_layer_ctxt)
    445 *
    446 *  @brief  Implements predictive search with square grid refinement. In this
    447 *           case, the square grid is of step 1 always. since this is considered
    448 *           to be more of a refinement search
    449 *
    450 *  @param[in,out]  ps_search_prms: All the params to this function
    451 *
    452 *  @param[in] ps_layer_ctxt: All info about this layer
    453 *
    454 *  @return None
    455 ********************************************************************************
    456 */
    457 /**
    458 ********************************************************************************
    459 *  @fn     hme_pred_search(hme_search_prms_t *ps_search_prms,
    460 *                               layer_ctxt_t *ps_layer_ctxt)
    461 *
    462 *  @brief  Implements predictive search after removing duplicate candidates
    463 *          from initial list. Each square grid (of step 1) is expanded
    464 *          to nine search pts before the dedeuplication process. one point
    465 *          cost is then evaluated for each unique node after the deduplication
    466 *          process
    467 *
    468 *  @param[in,out]  ps_search_prms: All the params to this function
    469 *
    470 *  @param[in] ps_layer_ctxt: All info about this layer
    471 *
    472 *  @return None
    473 ********************************************************************************
    474 */
    475 void hme_pred_search(
    476     hme_search_prms_t *ps_search_prms,
    477     layer_ctxt_t *ps_layer_ctxt,
    478     wgt_pred_ctxt_t *ps_wt_inp_prms,
    479     S08 i1_grid_flag,
    480     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list
    481 
    482 )
    483 {
    484     /* Stores the SAD for all parts at each pt in the grid */
    485     S32 ai4_sad_grid[9 * TOT_NUM_PARTS];
    486 
    487     /* Atributes of input candidates */
    488     search_node_t *ps_search_node;
    489 
    490     search_results_t *ps_search_results;
    491     S32 i4_num_nodes, i4_candt;
    492 
    493     /* Input and reference attributes */
    494     S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
    495 
    496     /* The reference is actually an array of ptrs since there are several    */
    497     /* reference id. So an array gets passed form calling function           */
    498     U08 **ppu1_ref;
    499 
    500     /* These control number of parts and number of pts in grid to search */
    501     S32 i4_part_mask, i4_grid_mask;
    502 
    503     S32 shift_for_cu_size;
    504 
    505     /* Blk width, blk height and blk size are derived from input params */
    506     BLK_SIZE_T e_blk_size;
    507     CU_SIZE_T e_cu_size;
    508     S32 i4_blk_wd, i4_blk_ht;
    509 
    510     /*************************************************************************/
    511     /* These functions pointers for calculating Err and the result update    */
    512     /* Each carries its own parameters structure, which is generated on the  */
    513     /* fly in this function                                                  */
    514     /*************************************************************************/
    515     PF_RESULT_FXN_T pf_hme_result_fxn;
    516     PF_SAD_FXN_T pf_sad_fxn;
    517     PF_CALC_SAD_AND_RESULT pf_calc_sad_and_result;
    518     err_prms_t s_err_prms;
    519     result_upd_prms_t s_result_prms;
    520     S32 i4_num_results;
    521     S32 i4_inp_off;
    522     fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
    523 
    524     i4_inp_stride = ps_search_prms->i4_inp_stride;
    525 
    526     /* Move to the location of the search blk in inp buffer */
    527     i4_inp_off = ps_search_prms->i4_cu_x_off;
    528     i4_inp_off += ps_search_prms->i4_cu_y_off * i4_inp_stride;
    529 
    530     /*************************************************************************/
    531     /* Depending on flag i4_use_rec, we use either input of previously       */
    532     /* encoded pictures or we use recon of previously encoded pictures.      */
    533     /*************************************************************************/
    534     if(ps_search_prms->i4_use_rec == 1)
    535     {
    536         i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
    537         ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy;
    538     }
    539     else
    540     {
    541         i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
    542         ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
    543     }
    544     i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
    545     /* Obtain the blk size of the search blk. Assumed here that the search   */
    546     /* is done on a CU size, rather than any arbitrary blk size.             */
    547     ps_search_results = ps_search_prms->ps_search_results;
    548     e_blk_size = ps_search_prms->e_blk_size;
    549     i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
    550     i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
    551     e_cu_size = ps_search_results->e_cu_size;
    552 
    553     /* Assuming cu size of 8x8 as enum 0, the other will be 1, 2, 3 */
    554     /* This will also set the shift w.r.t. the base cu size of 8x8 */
    555     shift_for_cu_size = e_cu_size;
    556 
    557     ps_search_node = ps_search_prms->ps_search_nodes;
    558     i4_num_nodes = ps_search_prms->i4_num_search_nodes;
    559     i4_part_mask = ps_search_prms->i4_part_mask;
    560 
    561     /* Update the parameters used to pass to SAD */
    562     /* input ptr, strides, SAD Grid, part mask, blk width and ht */
    563     /* The above are fixed ptrs, only pu1_ref and grid mask are  */
    564     /* varying params which are updated just before calling fxn  */
    565     s_err_prms.i4_inp_stride = i4_inp_stride;
    566     s_err_prms.i4_ref_stride = i4_ref_stride;
    567     s_err_prms.i4_part_mask = i4_part_mask;
    568     s_err_prms.pi4_sad_grid = &ai4_sad_grid[0];
    569     s_err_prms.i4_blk_wd = i4_blk_wd;
    570     s_err_prms.i4_blk_ht = i4_blk_ht;
    571     s_err_prms.i4_step = 1;
    572     s_err_prms.i4_num_partitions = ps_fullpel_refine_ctxt->i4_num_valid_parts;
    573 
    574     s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute;
    575     s_result_prms.ps_search_results = ps_search_results;
    576     s_result_prms.i1_ref_idx = (S08)ps_search_prms->i1_ref_idx;
    577     s_result_prms.pi4_sad_grid = ai4_sad_grid;
    578     s_result_prms.i4_part_mask = i4_part_mask;
    579     s_result_prms.i4_step = 1;
    580     pf_calc_sad_and_result = hme_get_calc_sad_and_result_fxn(
    581         i1_grid_flag,
    582         ps_search_prms->u1_is_cu_noisy,
    583         i4_part_mask,
    584         ps_fullpel_refine_ctxt->i4_num_valid_parts,
    585         ps_search_results->u1_num_results_per_part);
    586 
    587     pf_calc_sad_and_result(
    588         ps_search_prms, ps_wt_inp_prms, &s_err_prms, &s_result_prms, ppu1_ref, i4_ref_stride);
    589 }
    590 
    591 static __inline FT_CALC_SAD_AND_RESULT *hme_get_calc_sad_and_result_explicit_fxn(
    592     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list,
    593     S32 i4_part_mask,
    594     S32 i4_num_partitions,
    595     S08 i1_grid_enable,
    596     U08 u1_num_results_per_part)
    597 {
    598     FT_CALC_SAD_AND_RESULT *pf_func = NULL;
    599 
    600     if(2 == u1_num_results_per_part)
    601     {
    602         if(i4_part_mask == 1)
    603         {
    604             ASSERT(i4_num_partitions == 1);
    605 
    606             if(i1_grid_enable == 0)
    607             {
    608                 pf_func =
    609                     ps_me_optimised_function_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8;
    610             }
    611             else
    612             {
    613                 pf_func = ps_me_optimised_function_list
    614                               ->pf_calc_pt_sad_and_2_best_results_explicit_8x8_for_grid;
    615             }
    616         }
    617         else
    618         {
    619             ASSERT(i4_num_partitions == 5);
    620 
    621             pf_func =
    622                 ps_me_optimised_function_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_4x4;
    623         }
    624     }
    625     else if(1 == u1_num_results_per_part)
    626     {
    627         if(i4_part_mask == 1)
    628         {
    629             ASSERT(i4_num_partitions == 1);
    630 
    631             if(i1_grid_enable == 0)
    632             {
    633                 pf_func =
    634                     ps_me_optimised_function_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8;
    635             }
    636             else
    637             {
    638                 pf_func = ps_me_optimised_function_list
    639                               ->pf_calc_pt_sad_and_1_best_result_explicit_8x8_for_grid;
    640             }
    641         }
    642         else
    643         {
    644             ASSERT(i4_num_partitions == 5);
    645 
    646             pf_func =
    647                 ps_me_optimised_function_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_4x4;
    648         }
    649     }
    650 
    651     return pf_func;
    652 }
    653 
    654 /**
    655 ********************************************************************************
    656 *  @fn     void hme_pred_search_no_encode(hme_search_prms_t *ps_search_prms,
    657 *                                         layer_ctxt_t *ps_layer_ctxt,
    658 *                                         wgt_pred_ctxt_t *ps_wt_inp_prms,
    659 *                                         S32 *pi4_valid_part_ids,
    660 *                                         S32 disable_refine,
    661 *                                         ME_QUALITY_PRESETS_T e_me_quality_preset)
    662 *
    663 *  @brief  Implements predictive search after removing duplicate candidates
    664 *          from initial list. Each square grid (of step 1) is expanded
    665 *          to nine search pts before the dedeuplication process. one point
    666 *          cost is then evaluated for each unique node after the deduplication
    667 *          process
    668 *
    669 *  @param[in,out]  ps_search_prms: All the params to this function
    670 *
    671 *  @param[in] ps_layer_ctxt: All info about this layer
    672 *
    673 *  @return None
    674 ********************************************************************************
    675 */
    676 void hme_pred_search_no_encode(
    677     hme_search_prms_t *ps_search_prms,
    678     layer_ctxt_t *ps_layer_ctxt,
    679     wgt_pred_ctxt_t *ps_wt_inp_prms,
    680     S32 *pi4_valid_part_ids,
    681     S32 disable_refine,
    682     ME_QUALITY_PRESETS_T e_me_quality_preset,
    683     S08 i1_grid_enable,
    684     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
    685 {
    686     /* Stores the SAD for all parts at each pt in the grid */
    687     S32 ai4_sad_grid[9 * TOT_NUM_PARTS];
    688 
    689     /* Atributes of input candidates */
    690     search_node_t *ps_search_node;
    691     search_results_t *ps_search_results;
    692     S32 i4_num_nodes;
    693 
    694     /* Input and reference attributes */
    695     S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
    696 
    697     /* The reference is actually an array of ptrs since there are several    */
    698     /* reference id. So an array gets passed form calling function           */
    699     U08 **ppu1_ref;
    700 
    701     /* These control number of parts and number of pts in grid to search */
    702     S32 i4_part_mask;  // i4_grid_mask;
    703 
    704     S32 shift_for_cu_size;
    705     /* Blk width, blk height and blk size are derived from input params */
    706     BLK_SIZE_T e_blk_size;
    707     CU_SIZE_T e_cu_size;
    708     S32 i4_blk_wd, i4_blk_ht;
    709 
    710     /*************************************************************************/
    711     /* These functions pointers for calculating Err and the result update    */
    712     /* Each carries its own parameters structure, which is generated on the  */
    713     /* fly in this function                                                  */
    714     /*************************************************************************/
    715     PF_CALC_SAD_AND_RESULT pf_calc_sad_and_result;
    716     err_prms_t s_err_prms;
    717     result_upd_prms_t s_result_prms;
    718     S32 i4_num_results;
    719     S32 i4_search_idx = ps_search_prms->i1_ref_idx;
    720     S32 i4_inp_off;
    721     S32 i4_num_partitions;
    722 
    723     i4_inp_stride = ps_search_prms->i4_inp_stride;
    724 
    725     /* Move to the location of the search blk in inp buffer */
    726     i4_inp_off = ps_search_prms->i4_cu_x_off;
    727     i4_inp_off += ps_search_prms->i4_cu_y_off * i4_inp_stride;
    728 
    729     /*************************************************************************/
    730     /* Depending on flag i4_use_rec, we use either input of previously       */
    731     /* encoded pictures or we use recon of previously encoded pictures.      */
    732     /*************************************************************************/
    733     if(ps_search_prms->i4_use_rec == 1)
    734     {
    735         i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
    736         ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy;
    737     }
    738     else
    739     {
    740         i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
    741         ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
    742     }
    743     i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
    744     /* Obtain the blk size of the search blk. Assumed here that the search   */
    745     /* is done on a CU size, rather than any arbitrary blk size.             */
    746     ps_search_results = ps_search_prms->ps_search_results;
    747     e_blk_size = ps_search_prms->e_blk_size;
    748     i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
    749     i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
    750     e_cu_size = ps_search_results->e_cu_size;
    751 
    752     /* Assuming cu size of 8x8 as enum 0, the other will be 1, 2, 3 */
    753     /* This will also set the shift w.r.t. the base cu size of 8x8 */
    754     shift_for_cu_size = e_cu_size;
    755 
    756     ps_search_node = ps_search_prms->ps_search_nodes;
    757     i4_num_nodes = ps_search_prms->i4_num_search_nodes;
    758     i4_part_mask = ps_search_prms->i4_part_mask;
    759 
    760     /*************************************************************************/
    761     /* This array stores the ids of the partitions whose                     */
    762     /* SADs are updated. Since the partitions whose SADs are updated may not */
    763     /* be in contiguous order, we supply another level of indirection.       */
    764     /*************************************************************************/
    765     i4_num_partitions = hme_create_valid_part_ids(i4_part_mask, pi4_valid_part_ids);
    766 
    767     /* Update the parameters used to pass to SAD */
    768     /* input ptr, strides, SAD Grid, part mask, blk width and ht */
    769     /* The above are fixed ptrs, only pu1_ref and grid mask are  */
    770     /* varying params which are updated just before calling fxn  */
    771     s_err_prms.i4_inp_stride = i4_inp_stride;
    772     s_err_prms.i4_ref_stride = i4_ref_stride;
    773     s_err_prms.i4_part_mask = i4_part_mask;
    774     s_err_prms.pi4_sad_grid = &ai4_sad_grid[0];
    775     s_err_prms.i4_blk_wd = i4_blk_wd;
    776     s_err_prms.i4_blk_ht = i4_blk_ht;
    777     s_err_prms.i4_step = 1;
    778     s_err_prms.pi4_valid_part_ids = pi4_valid_part_ids;
    779     s_err_prms.i4_num_partitions = i4_num_partitions;
    780 
    781     s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute;
    782     s_result_prms.ps_search_results = ps_search_results;
    783     s_result_prms.pi4_valid_part_ids = pi4_valid_part_ids;
    784     s_result_prms.i1_ref_idx = (S08)ps_search_prms->i1_ref_idx;
    785     s_result_prms.pi4_sad_grid = ai4_sad_grid;
    786     s_result_prms.i4_part_mask = i4_part_mask;
    787     s_result_prms.i4_step = 1;
    788 
    789     pf_calc_sad_and_result = hme_get_calc_sad_and_result_explicit_fxn(
    790         ps_me_optimised_function_list,
    791         i4_part_mask,
    792         i4_num_partitions,
    793         i1_grid_enable,
    794         ps_search_results->u1_num_results_per_part);
    795 
    796     pf_calc_sad_and_result(
    797         ps_search_prms, ps_wt_inp_prms, &s_err_prms, &s_result_prms, ppu1_ref, i4_ref_stride);
    798 }
    799