Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 /*!
     21 ******************************************************************************
     22 * \file ihevce_me_utils_instr_set_router.c
     23 *
     24 * \brief
     25 *    This file contains function pointer initialization of me utility
     26 *    functions
     27 *
     28 * \date
     29 *    15/07/2013
     30 *
     31 * \author
     32 *    Ittiam
     33 *
     34 * List of Functions
     35 *  ihevce_me_utils_instr_set_router()
     36 *
     37 ******************************************************************************
     38 */
     39 
     40 /*****************************************************************************/
     41 /* File Includes                                                             */
     42 /*****************************************************************************/
     43 /* System include files */
     44 #include <stdio.h>
     45 #include <string.h>
     46 #include <assert.h>
     47 
     48 /* User include files */
     49 #include "ihevc_typedefs.h"
     50 #include "itt_video_api.h"
     51 #include "ihevc_chroma_itrans_recon.h"
     52 #include "ihevc_chroma_intra_pred.h"
     53 #include "ihevc_debug.h"
     54 #include "ihevc_deblk.h"
     55 #include "ihevc_defs.h"
     56 #include "ihevc_itrans_recon.h"
     57 #include "ihevc_intra_pred.h"
     58 #include "ihevc_inter_pred.h"
     59 #include "ihevc_macros.h"
     60 #include "ihevc_mem_fns.h"
     61 #include "ihevc_padding.h"
     62 #include "ihevc_quant_iquant_ssd.h"
     63 #include "ihevc_resi_trans.h"
     64 #include "ihevc_sao.h"
     65 #include "ihevc_structs.h"
     66 #include "ihevc_weighted_pred.h"
     67 #include "ihevc_platform_macros.h"
     68 
     69 #include "rc_cntrl_param.h"
     70 #include "rc_frame_info_collector.h"
     71 #include "rc_look_ahead_params.h"
     72 
     73 #include "ihevce_api.h"
     74 #include "ihevce_defs.h"
     75 #include "ihevce_lap_enc_structs.h"
     76 #include "ihevce_multi_thrd_structs.h"
     77 #include "ihevce_function_selector.h"
     78 #include "ihevce_me_common_defs.h"
     79 #include "ihevce_enc_structs.h"
     80 #include "ihevce_had_satd.h"
     81 #include "ihevce_cmn_utils_instr_set_router.h"
     82 
     83 #include "hme_datatype.h"
     84 #include "hme_common_defs.h"
     85 #include "hme_common_utils.h"
     86 #include "hme_interface.h"
     87 #include "hme_defs.h"
     88 #include "hme_err_compute.h"
     89 #include "hme_globals.h"
     90 
     91 #include "ihevce_me_instr_set_router.h"
     92 
     93 /*****************************************************************************/
     94 /* Globals                                                                   */
     95 /*****************************************************************************/
     96 static FT_SAD_EVALUATOR *gapf_sad_pt_npu[NUM_BLK_SIZES];
     97 static FT_PART_SADS_EVALUATOR_16X16CU *gpf_part_sads_evaluator_16x16CU;
     98 static FT_PART_SADS_EVALUATOR *gpf_part_sads_evaluator_MxM;
     99 static FT_SAD_EVALUATOR *gpf_sad_grid_mxn;
    100 /* 9 => Number of function types */
    101 /* 2 => Number of results to store */
    102 static FT_CALC_SAD_AND_RESULT *gapf_calc_sad_and_result_fxn[9][2];
    103 
    104 static U08 gau1_calc_sad_and_result[2][2][4][TOT_NUM_PARTS] = {
    105     //grid flag = 0
    106     { //noise = 0
    107       { //NxN or NxN & SMP
    108         { 1, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4 },
    109         //SMP only
    110         { 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
    111         //AMP
    112         { 1, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
    113         //2Nx2N only, i.e. num_parts = 1
    114         { 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 } },
    115       //noise = 1
    116       { { 5, 7, 7, 7, 6, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
    117         { 5, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
    118         { 5, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
    119         { 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 } } },
    120 
    121     //grid flag = 1
    122     { //noise = 0
    123       { { 0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4 },
    124         { 0, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
    125         { 0, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
    126         { 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 } },
    127       //noise = 1
    128       { { 0, 7, 7, 7, 6, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
    129         { 0, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
    130         { 0, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
    131         { 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 } } }
    132 };
    133 
    134 /*****************************************************************************/
    135 /* Function Definitions                                                      */
    136 /*****************************************************************************/
    137 /*!
    138 ******************************************************************************
    139 * \if Function name : ihevce_me_instr_set_router \endif
    140 *
    141 * \brief
    142 *    Function pointer initialization of me utils struct
    143 *
    144 *****************************************************************************
    145 */
    146 void ihevce_me_instr_set_router(ihevce_me_optimised_function_list_t *ps_func_list, IV_ARCH_T e_arch)
    147 {
    148     // clang-format off
    149 #ifdef DISABLE_AVX2_INTR
    150     e_arch = (e_arch == ARCH_X86_AVX2) ? ARCH_X86_AVX : e_arch;
    151 #endif
    152 
    153     switch(e_arch)
    154     {
    155 #ifdef ENABLE_NEON
    156     case ARCH_ARM_A9Q:
    157     case ARCH_ARM_V8_NEON:
    158         ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8 = hme_calc_pt_sad_and_result_explicit;
    159         ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_4x4 = hme_calc_pt_sad_and_result_explicit;
    160         ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_for_grid = hme_calc_pt_sad_and_result_explicit;
    161         ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_generic = hme_calc_pt_sad_and_result_explicit;
    162         ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8 = hme_calc_pt_sad_and_result_explicit;
    163         ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_4x4 = hme_calc_pt_sad_and_result_explicit;
    164         ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_for_grid = hme_calc_pt_sad_and_result_explicit;
    165         ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_generic = hme_calc_pt_sad_and_result_explicit;
    166         ps_func_list->pf_calc_sad_and_1_best_result_generic = hme_calc_sad_and_1_best_result;
    167         ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_generic = hme_calc_stim_injected_sad_and_1_best_result;
    168         ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_eq_1 = hme_calc_stim_injected_sad_and_1_best_result;
    169         ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_square_parts = hme_calc_stim_injected_sad_and_1_best_result;
    170         ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_9 = hme_calc_stim_injected_sad_and_1_best_result;
    171         ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_17 = hme_calc_stim_injected_sad_and_1_best_result;
    172         ps_func_list->pf_compute_variance_for_all_parts = hme_compute_variance_for_all_parts;
    173         ps_func_list->pf_compute_stim_injected_distortion_for_all_parts = hme_compute_stim_injected_distortion_for_all_parts;
    174         ps_func_list->pf_calc_sad_and_1_best_result_num_part_1_for_grid = hme_calc_sad_and_1_best_result_neon;
    175         ps_func_list->pf_calc_sad_and_1_best_result_num_part_eq_1 = hme_calc_sad_and_1_best_result_neon;
    176         ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_17 = hme_calc_sad_and_1_best_result_neon;
    177         ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_9 = hme_calc_sad_and_1_best_result_neon;
    178         ps_func_list->pf_calc_sad_and_1_best_result_num_square_parts = hme_calc_sad_and_1_best_result_neon;
    179         ps_func_list->pf_calc_sad_and_1_best_result_subpel_generic = hme_calc_sad_and_1_best_result_subpel;
    180         ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_eq_1 = hme_calc_sad_and_1_best_result_subpel_neon;
    181         ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_lt_17 = hme_calc_sad_and_1_best_result_subpel_neon;
    182         ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_lt_9 = hme_calc_sad_and_1_best_result_subpel_neon;
    183         ps_func_list->pf_calc_sad_and_1_best_result_subpel_square_parts = hme_calc_sad_and_1_best_result_subpel_neon;
    184         ps_func_list->pf_combine_4x4_sads_and_compute_cost_high_quality = hme_combine_4x4_sads_and_compute_cost_high_quality_neon;
    185         ps_func_list->pf_combine_4x4_sads_and_compute_cost_high_speed = hme_combine_4x4_sads_and_compute_cost_high_speed_neon;
    186         ps_func_list->pf_compute_4x4_sads_for_16x16_blk = compute_4x4_sads_for_16x16_blk_neon;
    187         ps_func_list->pf_evalsad_grid_npu_MxN = hme_evalsad_grid_npu_MxN_neon;
    188         ps_func_list->pf_evalsad_grid_pu_MxM = compute_part_sads_for_MxM_blk_neon;
    189         ps_func_list->pf_evalsad_pt_npu_12x16_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
    190         ps_func_list->pf_evalsad_pt_npu_16x12_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
    191         ps_func_list->pf_evalsad_pt_npu_16x4_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
    192         ps_func_list->pf_evalsad_pt_npu_24x32_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
    193         ps_func_list->pf_evalsad_pt_npu_8x4_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
    194         ps_func_list->pf_evalsad_pt_npu_mxn_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
    195         ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
    196         ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
    197         ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
    198         ps_func_list->pf_get_wt_inp_8x8 = hme_get_wt_inp_8x8_neon;
    199         ps_func_list->pf_get_wt_inp_ctb = hme_get_wt_inp_ctb_neon;
    200         ps_func_list->pf_get_wt_inp_generic = hme_get_wt_inp;
    201         ps_func_list->pf_mv_clipper = hme_mv_clipper;
    202         ps_func_list->pf_qpel_interp_avg_1pt = hme_qpel_interp_avg_1pt_neon;
    203         ps_func_list->pf_qpel_interp_avg_2pt_horz_with_reuse = hme_qpel_interp_avg_2pt_horz_with_reuse_neon;
    204         ps_func_list->pf_qpel_interp_avg_2pt_vert_with_reuse = hme_qpel_interp_avg_2pt_vert_with_reuse_neon;
    205         ps_func_list->pf_qpel_interp_avg_generic = hme_qpel_interp_avg_neon;
    206         ps_func_list->pf_store_4x4_sads_high_quality = hme_store_4x4_sads_high_quality_neon;
    207         ps_func_list->pf_store_4x4_sads_high_speed = hme_store_4x4_sads_high_speed_neon;
    208         break;
    209 #endif
    210     default:
    211         ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8 = hme_calc_pt_sad_and_result_explicit;
    212         ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_4x4 = hme_calc_pt_sad_and_result_explicit;
    213         ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_for_grid = hme_calc_pt_sad_and_result_explicit;
    214         ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_generic = hme_calc_pt_sad_and_result_explicit;
    215         ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8 = hme_calc_pt_sad_and_result_explicit;
    216         ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_4x4 = hme_calc_pt_sad_and_result_explicit;
    217         ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_for_grid = hme_calc_pt_sad_and_result_explicit;
    218         ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_generic = hme_calc_pt_sad_and_result_explicit;
    219         ps_func_list->pf_calc_sad_and_1_best_result_generic = hme_calc_sad_and_1_best_result;
    220         ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_generic = hme_calc_stim_injected_sad_and_1_best_result;
    221         ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_eq_1 = hme_calc_stim_injected_sad_and_1_best_result;
    222         ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_square_parts = hme_calc_stim_injected_sad_and_1_best_result;
    223         ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_9 = hme_calc_stim_injected_sad_and_1_best_result;
    224         ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_17 = hme_calc_stim_injected_sad_and_1_best_result;
    225         ps_func_list->pf_compute_variance_for_all_parts = hme_compute_variance_for_all_parts;
    226         ps_func_list->pf_compute_stim_injected_distortion_for_all_parts = hme_compute_stim_injected_distortion_for_all_parts;
    227         ps_func_list->pf_calc_sad_and_1_best_result_num_part_1_for_grid = hme_calc_sad_and_1_best_result;
    228         ps_func_list->pf_calc_sad_and_1_best_result_num_part_eq_1 = hme_calc_sad_and_1_best_result;
    229         ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_17 = hme_calc_sad_and_1_best_result;
    230         ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_9 = hme_calc_sad_and_1_best_result;
    231         ps_func_list->pf_calc_sad_and_1_best_result_num_square_parts = hme_calc_sad_and_1_best_result;
    232         ps_func_list->pf_calc_sad_and_1_best_result_subpel_generic = hme_calc_sad_and_1_best_result_subpel;
    233         ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_eq_1 = hme_calc_sad_and_1_best_result_subpel;
    234         ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_lt_17 = hme_calc_sad_and_1_best_result_subpel;
    235         ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_lt_9 = hme_calc_sad_and_1_best_result_subpel;
    236         ps_func_list->pf_calc_sad_and_1_best_result_subpel_square_parts = hme_calc_sad_and_1_best_result_subpel;
    237         ps_func_list->pf_combine_4x4_sads_and_compute_cost_high_quality = hme_combine_4x4_sads_and_compute_cost_high_quality;
    238         ps_func_list->pf_combine_4x4_sads_and_compute_cost_high_speed = hme_combine_4x4_sads_and_compute_cost_high_speed;
    239         ps_func_list->pf_compute_4x4_sads_for_16x16_blk = compute_4x4_sads_for_16x16_blk;
    240         ps_func_list->pf_evalsad_grid_npu_MxN = hme_evalsad_grid_npu_MxN;
    241         ps_func_list->pf_evalsad_grid_pu_MxM = compute_part_sads_for_MxM_blk;
    242         ps_func_list->pf_evalsad_pt_npu_12x16_8bit = hme_evalsad_pt_npu_MxN_8bit;
    243         ps_func_list->pf_evalsad_pt_npu_16x12_8bit = hme_evalsad_pt_npu_MxN_8bit;
    244         ps_func_list->pf_evalsad_pt_npu_16x4_8bit = hme_evalsad_pt_npu_MxN_8bit;
    245         ps_func_list->pf_evalsad_pt_npu_24x32_8bit = hme_evalsad_pt_npu_MxN_8bit;
    246         ps_func_list->pf_evalsad_pt_npu_8x4_8bit = hme_evalsad_pt_npu_MxN_8bit;
    247         ps_func_list->pf_evalsad_pt_npu_mxn_8bit = hme_evalsad_pt_npu_MxN_8bit;
    248         ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit = hme_evalsad_pt_npu_MxN_8bit;
    249         ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit = hme_evalsad_pt_npu_MxN_8bit;
    250         ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit = hme_evalsad_pt_npu_MxN_8bit;
    251         ps_func_list->pf_get_wt_inp_8x8 = hme_get_wt_inp;
    252         ps_func_list->pf_get_wt_inp_ctb = hme_get_wt_inp;
    253         ps_func_list->pf_get_wt_inp_generic = hme_get_wt_inp;
    254         ps_func_list->pf_mv_clipper = hme_mv_clipper;
    255         ps_func_list->pf_qpel_interp_avg_1pt = hme_qpel_interp_avg_1pt;
    256         ps_func_list->pf_qpel_interp_avg_2pt_horz_with_reuse = hme_qpel_interp_avg_2pt_horz_with_reuse;
    257         ps_func_list->pf_qpel_interp_avg_2pt_vert_with_reuse = hme_qpel_interp_avg_2pt_vert_with_reuse;
    258         ps_func_list->pf_qpel_interp_avg_generic = hme_qpel_interp_avg;
    259         ps_func_list->pf_store_4x4_sads_high_quality = hme_store_4x4_sads_high_quality;
    260         ps_func_list->pf_store_4x4_sads_high_speed = hme_store_4x4_sads_high_speed;
    261         break;
    262     }
    263 
    264     gapf_sad_pt_npu[BLK_4x4] = ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit;
    265     gapf_sad_pt_npu[BLK_4x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit;
    266     gapf_sad_pt_npu[BLK_8x4] = ps_func_list->pf_evalsad_pt_npu_8x4_8bit;
    267     gapf_sad_pt_npu[BLK_8x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit;
    268     gapf_sad_pt_npu[BLK_4x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit;
    269     gapf_sad_pt_npu[BLK_8x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit;
    270     gapf_sad_pt_npu[BLK_12x16] = ps_func_list->pf_evalsad_pt_npu_12x16_8bit;
    271     gapf_sad_pt_npu[BLK_16x4] = ps_func_list->pf_evalsad_pt_npu_16x4_8bit;
    272     gapf_sad_pt_npu[BLK_16x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
    273     gapf_sad_pt_npu[BLK_16x12] = ps_func_list->pf_evalsad_pt_npu_16x12_8bit;
    274     gapf_sad_pt_npu[BLK_16x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
    275     gapf_sad_pt_npu[BLK_8x32] = ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit;
    276     gapf_sad_pt_npu[BLK_16x32] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
    277     gapf_sad_pt_npu[BLK_24x32] = ps_func_list->pf_evalsad_pt_npu_24x32_8bit;
    278     gapf_sad_pt_npu[BLK_32x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
    279     gapf_sad_pt_npu[BLK_32x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
    280     gapf_sad_pt_npu[BLK_32x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
    281     gapf_sad_pt_npu[BLK_32x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
    282     gapf_sad_pt_npu[BLK_32x24] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
    283     gapf_sad_pt_npu[BLK_32x32] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
    284     gapf_sad_pt_npu[BLK_16x64] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
    285     gapf_sad_pt_npu[BLK_32x64] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
    286     gapf_sad_pt_npu[BLK_48x64] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
    287     gapf_sad_pt_npu[BLK_64x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
    288     gapf_sad_pt_npu[BLK_64x32] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
    289     gapf_sad_pt_npu[BLK_64x48] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
    290     gapf_sad_pt_npu[BLK_64x64] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
    291 
    292     gpf_part_sads_evaluator_16x16CU = ps_func_list->pf_compute_4x4_sads_for_16x16_blk;
    293     gpf_part_sads_evaluator_MxM = ps_func_list->pf_evalsad_grid_pu_MxM;
    294 
    295     gpf_sad_grid_mxn = ps_func_list->pf_evalsad_grid_npu_MxN;
    296 
    297     gapf_calc_sad_and_result_fxn[0][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_part_1_for_grid;
    298     gapf_calc_sad_and_result_fxn[1][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_part_eq_1;
    299     gapf_calc_sad_and_result_fxn[2][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_square_parts;
    300     gapf_calc_sad_and_result_fxn[3][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_9;
    301     gapf_calc_sad_and_result_fxn[4][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_17;
    302     gapf_calc_sad_and_result_fxn[5][0] = ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_eq_1;
    303     gapf_calc_sad_and_result_fxn[6][0] = ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_square_parts;
    304     gapf_calc_sad_and_result_fxn[7][0] = ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_9;
    305     gapf_calc_sad_and_result_fxn[8][0] = ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_17;
    306     gapf_calc_sad_and_result_fxn[0][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_part_1_for_grid;
    307     gapf_calc_sad_and_result_fxn[1][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_part_eq_1;
    308     gapf_calc_sad_and_result_fxn[2][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_square_parts;
    309     gapf_calc_sad_and_result_fxn[3][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_part_lt_9;
    310     gapf_calc_sad_and_result_fxn[4][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_part_lt_17;
    311     gapf_calc_sad_and_result_fxn[5][1] = ps_func_list->pf_calc_stim_injected_sad_and_2_best_results_num_part_eq_1;
    312     gapf_calc_sad_and_result_fxn[6][1] = ps_func_list->pf_calc_stim_injected_sad_and_2_best_results_num_square_parts;
    313     gapf_calc_sad_and_result_fxn[7][1] = ps_func_list->pf_calc_stim_injected_sad_and_2_best_results_num_part_lt_9;
    314     gapf_calc_sad_and_result_fxn[8][1] = ps_func_list->pf_calc_stim_injected_sad_and_2_best_results_num_part_lt_17;
    315 }
    316 // clang-format on
    317 
    318 FT_CALC_SAD_AND_RESULT *hme_get_calc_sad_and_result_fxn(
    319     S08 i1_grid_flag, U08 u1_is_cu_noisy, S32 i4_part_mask, S32 num_parts, S32 num_results)
    320 {
    321     U08 u1_index;
    322 
    323     ASSERT((1 == num_results) || (2 == num_results));
    324 
    325     u1_index =
    326         gau1_calc_sad_and_result[i1_grid_flag][u1_is_cu_noisy]
    327                                 [(!!(i4_part_mask & (ENABLE_SMP | ENABLE_NxN)) &&
    328                                   !(i4_part_mask & ENABLE_AMP))
    329                                      ? (!!(i4_part_mask & ENABLE_NxN) ? 0 : 1)
    330                                      : (!!(i4_part_mask & ENABLE_AMP) ? 2 : 3)][num_parts - 1];
    331 
    332     return gapf_calc_sad_and_result_fxn[u1_index][2 == num_results];
    333 }
    334 
    335 void hme_evalsad_grid_pu_MxM(err_prms_t *ps_prms)
    336 {
    337     grid_ctxt_t s_grid;
    338     cand_t as_candt[9];
    339 
    340     S32 *api4_sad_grid[TOT_NUM_PARTS];
    341 
    342     hme_mv_t s_mv = { 0, 0 };
    343 
    344     CU_SIZE_T e_cu_size = (CU_SIZE_T)(hme_get_range(ps_prms->i4_blk_wd) - 4);
    345 
    346     S32 i4_ref_idx = 0, i;
    347     S32 num_candts = 0;
    348 
    349     s_grid.num_grids = 1;
    350     s_grid.ref_buf_stride = ps_prms->i4_ref_stride;
    351     s_grid.grd_sz_y_x = ((ps_prms->i4_step << 16) | ps_prms->i4_step);
    352     s_grid.ppu1_ref_ptr = &ps_prms->pu1_ref;
    353     s_grid.pi4_grd_mask = &ps_prms->i4_grid_mask;
    354     s_grid.p_mv = &s_mv;
    355     s_grid.p_ref_idx = &i4_ref_idx;
    356 
    357     for(i = 0; i < 9; i++)
    358     {
    359         if(s_grid.pi4_grd_mask[0] & (1 << i))
    360         {
    361             num_candts++;
    362         }
    363     }
    364 
    365     for(i = 0; i < TOT_NUM_PARTS; i++)
    366     {
    367         api4_sad_grid[i] = &ps_prms->pi4_sad_grid[i * num_candts];
    368     }
    369 
    370     gpf_part_sads_evaluator_MxM(
    371         &s_grid,
    372         ps_prms->pu1_inp,
    373         ps_prms->i4_inp_stride,
    374         (WORD32 **)api4_sad_grid,
    375         as_candt,
    376         &num_candts,
    377         e_cu_size);
    378 }
    379 
    380 PF_SAD_FXN_T hme_get_sad_fxn(BLK_SIZE_T e_blk_size, S32 i4_grid_mask, S32 i4_part_mask)
    381 {
    382     S32 i4_grid_en = ((i4_grid_mask & 0x1fe) != 0);
    383 
    384     if(i4_grid_en)
    385     {
    386         if(i4_part_mask & (i4_part_mask - 1))
    387         {
    388             if(BLK_16x16 == e_blk_size)
    389             {
    390                 return hme_evalsad_grid_pu_16x16;
    391             }
    392             else
    393             {
    394                 return hme_evalsad_grid_pu_MxM;
    395             }
    396         }
    397         else
    398         {
    399             return gpf_sad_grid_mxn;
    400         }
    401     }
    402     else
    403     {
    404         if(i4_part_mask & (i4_part_mask - 1))
    405         {
    406             if(BLK_16x16 == e_blk_size)
    407             {
    408                 return hme_evalsad_grid_pu_16x16;
    409             }
    410             else
    411             {
    412                 return hme_evalsad_grid_pu_MxM;
    413             }
    414         }
    415         else
    416         {
    417             return gapf_sad_pt_npu[e_blk_size];
    418         }
    419     }
    420 }
    421 
    422 void ihevce_sifter_sad_fxn_assigner(FT_SAD_EVALUATOR **ppf_evalsad_pt_npu_mxn, IV_ARCH_T e_arch)
    423 {
    424     switch(e_arch)
    425     {
    426 #ifdef ENABLE_NEON
    427     case ARCH_ARM_A9Q:
    428     case ARCH_ARM_V8_NEON:
    429         ppf_evalsad_pt_npu_mxn[0] = hme_evalsad_pt_npu_MxN_8bit_neon;
    430         break;
    431 #endif
    432 
    433     default:
    434         ppf_evalsad_pt_npu_mxn[0] = hme_evalsad_pt_npu_MxN_8bit;
    435         break;
    436     }
    437 }
    438