1 /****************************************************************************** 2 * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 /*! 21 ****************************************************************************** 22 * \file ihevce_me_utils_instr_set_router.c 23 * 24 * \brief 25 * This file contains function pointer initialization of me utility 26 * functions 27 * 28 * \date 29 * 15/07/2013 30 * 31 * \author 32 * Ittiam 33 * 34 * List of Functions 35 * ihevce_me_utils_instr_set_router() 36 * 37 ****************************************************************************** 38 */ 39 40 /*****************************************************************************/ 41 /* File Includes */ 42 /*****************************************************************************/ 43 /* System include files */ 44 #include <stdio.h> 45 #include <string.h> 46 #include <assert.h> 47 48 /* User include files */ 49 #include "ihevc_typedefs.h" 50 #include "itt_video_api.h" 51 #include "ihevc_chroma_itrans_recon.h" 52 #include "ihevc_chroma_intra_pred.h" 53 #include "ihevc_debug.h" 54 #include "ihevc_deblk.h" 55 #include "ihevc_defs.h" 56 #include "ihevc_itrans_recon.h" 57 #include "ihevc_intra_pred.h" 58 #include "ihevc_inter_pred.h" 59 #include "ihevc_macros.h" 60 #include "ihevc_mem_fns.h" 61 #include "ihevc_padding.h" 62 #include "ihevc_quant_iquant_ssd.h" 63 #include "ihevc_resi_trans.h" 64 #include "ihevc_sao.h" 65 #include "ihevc_structs.h" 66 #include "ihevc_weighted_pred.h" 67 #include "ihevc_platform_macros.h" 68 69 #include "rc_cntrl_param.h" 70 #include "rc_frame_info_collector.h" 71 #include "rc_look_ahead_params.h" 72 73 #include "ihevce_api.h" 74 #include "ihevce_defs.h" 75 #include "ihevce_lap_enc_structs.h" 76 #include "ihevce_multi_thrd_structs.h" 77 #include "ihevce_function_selector.h" 78 #include "ihevce_me_common_defs.h" 79 #include "ihevce_enc_structs.h" 80 #include "ihevce_had_satd.h" 81 #include "ihevce_cmn_utils_instr_set_router.h" 82 83 #include "hme_datatype.h" 84 #include "hme_common_defs.h" 85 #include "hme_common_utils.h" 86 #include "hme_interface.h" 87 #include "hme_defs.h" 88 #include "hme_err_compute.h" 89 #include "hme_globals.h" 90 91 #include "ihevce_me_instr_set_router.h" 92 93 /*****************************************************************************/ 94 /* Globals */ 95 /*****************************************************************************/ 96 static FT_SAD_EVALUATOR *gapf_sad_pt_npu[NUM_BLK_SIZES]; 97 static FT_PART_SADS_EVALUATOR_16X16CU *gpf_part_sads_evaluator_16x16CU; 98 static FT_PART_SADS_EVALUATOR *gpf_part_sads_evaluator_MxM; 99 static FT_SAD_EVALUATOR *gpf_sad_grid_mxn; 100 /* 9 => Number of function types */ 101 /* 2 => Number of results to store */ 102 static FT_CALC_SAD_AND_RESULT *gapf_calc_sad_and_result_fxn[9][2]; 103 104 static U08 gau1_calc_sad_and_result[2][2][4][TOT_NUM_PARTS] = { 105 //grid flag = 0 106 { //noise = 0 107 { //NxN or NxN & SMP 108 { 1, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4 }, 109 //SMP only 110 { 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4 }, 111 //AMP 112 { 1, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4 }, 113 //2Nx2N only, i.e. num_parts = 1 114 { 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 } }, 115 //noise = 1 116 { { 5, 7, 7, 7, 6, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 }, 117 { 5, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 }, 118 { 5, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 }, 119 { 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 } } }, 120 121 //grid flag = 1 122 { //noise = 0 123 { { 0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4 }, 124 { 0, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4 }, 125 { 0, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4 }, 126 { 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 } }, 127 //noise = 1 128 { { 0, 7, 7, 7, 6, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 }, 129 { 0, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 }, 130 { 0, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 }, 131 { 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 } } } 132 }; 133 134 /*****************************************************************************/ 135 /* Function Definitions */ 136 /*****************************************************************************/ 137 /*! 138 ****************************************************************************** 139 * \if Function name : ihevce_me_instr_set_router \endif 140 * 141 * \brief 142 * Function pointer initialization of me utils struct 143 * 144 ***************************************************************************** 145 */ 146 void ihevce_me_instr_set_router(ihevce_me_optimised_function_list_t *ps_func_list, IV_ARCH_T e_arch) 147 { 148 // clang-format off 149 #ifdef DISABLE_AVX2_INTR 150 e_arch = (e_arch == ARCH_X86_AVX2) ? ARCH_X86_AVX : e_arch; 151 #endif 152 153 switch(e_arch) 154 { 155 #ifdef ENABLE_NEON 156 case ARCH_ARM_A9Q: 157 case ARCH_ARM_V8_NEON: 158 ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8 = hme_calc_pt_sad_and_result_explicit; 159 ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_4x4 = hme_calc_pt_sad_and_result_explicit; 160 ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_for_grid = hme_calc_pt_sad_and_result_explicit; 161 ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_generic = hme_calc_pt_sad_and_result_explicit; 162 ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8 = hme_calc_pt_sad_and_result_explicit; 163 ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_4x4 = hme_calc_pt_sad_and_result_explicit; 164 ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_for_grid = hme_calc_pt_sad_and_result_explicit; 165 ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_generic = hme_calc_pt_sad_and_result_explicit; 166 ps_func_list->pf_calc_sad_and_1_best_result_generic = hme_calc_sad_and_1_best_result; 167 ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_generic = hme_calc_stim_injected_sad_and_1_best_result; 168 ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_eq_1 = hme_calc_stim_injected_sad_and_1_best_result; 169 ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_square_parts = hme_calc_stim_injected_sad_and_1_best_result; 170 ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_9 = hme_calc_stim_injected_sad_and_1_best_result; 171 ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_17 = hme_calc_stim_injected_sad_and_1_best_result; 172 ps_func_list->pf_compute_variance_for_all_parts = hme_compute_variance_for_all_parts; 173 ps_func_list->pf_compute_stim_injected_distortion_for_all_parts = hme_compute_stim_injected_distortion_for_all_parts; 174 ps_func_list->pf_calc_sad_and_1_best_result_num_part_1_for_grid = hme_calc_sad_and_1_best_result_neon; 175 ps_func_list->pf_calc_sad_and_1_best_result_num_part_eq_1 = hme_calc_sad_and_1_best_result_neon; 176 ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_17 = hme_calc_sad_and_1_best_result_neon; 177 ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_9 = hme_calc_sad_and_1_best_result_neon; 178 ps_func_list->pf_calc_sad_and_1_best_result_num_square_parts = hme_calc_sad_and_1_best_result_neon; 179 ps_func_list->pf_calc_sad_and_1_best_result_subpel_generic = hme_calc_sad_and_1_best_result_subpel; 180 ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_eq_1 = hme_calc_sad_and_1_best_result_subpel_neon; 181 ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_lt_17 = hme_calc_sad_and_1_best_result_subpel_neon; 182 ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_lt_9 = hme_calc_sad_and_1_best_result_subpel_neon; 183 ps_func_list->pf_calc_sad_and_1_best_result_subpel_square_parts = hme_calc_sad_and_1_best_result_subpel_neon; 184 ps_func_list->pf_combine_4x4_sads_and_compute_cost_high_quality = hme_combine_4x4_sads_and_compute_cost_high_quality_neon; 185 ps_func_list->pf_combine_4x4_sads_and_compute_cost_high_speed = hme_combine_4x4_sads_and_compute_cost_high_speed_neon; 186 ps_func_list->pf_compute_4x4_sads_for_16x16_blk = compute_4x4_sads_for_16x16_blk_neon; 187 ps_func_list->pf_evalsad_grid_npu_MxN = hme_evalsad_grid_npu_MxN_neon; 188 ps_func_list->pf_evalsad_grid_pu_MxM = compute_part_sads_for_MxM_blk_neon; 189 ps_func_list->pf_evalsad_pt_npu_12x16_8bit = hme_evalsad_pt_npu_MxN_8bit_neon; 190 ps_func_list->pf_evalsad_pt_npu_16x12_8bit = hme_evalsad_pt_npu_MxN_8bit_neon; 191 ps_func_list->pf_evalsad_pt_npu_16x4_8bit = hme_evalsad_pt_npu_MxN_8bit_neon; 192 ps_func_list->pf_evalsad_pt_npu_24x32_8bit = hme_evalsad_pt_npu_MxN_8bit_neon; 193 ps_func_list->pf_evalsad_pt_npu_8x4_8bit = hme_evalsad_pt_npu_MxN_8bit_neon; 194 ps_func_list->pf_evalsad_pt_npu_mxn_8bit = hme_evalsad_pt_npu_MxN_8bit_neon; 195 ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit = hme_evalsad_pt_npu_MxN_8bit_neon; 196 ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit = hme_evalsad_pt_npu_MxN_8bit_neon; 197 ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit = hme_evalsad_pt_npu_MxN_8bit_neon; 198 ps_func_list->pf_get_wt_inp_8x8 = hme_get_wt_inp_8x8_neon; 199 ps_func_list->pf_get_wt_inp_ctb = hme_get_wt_inp_ctb_neon; 200 ps_func_list->pf_get_wt_inp_generic = hme_get_wt_inp; 201 ps_func_list->pf_mv_clipper = hme_mv_clipper; 202 ps_func_list->pf_qpel_interp_avg_1pt = hme_qpel_interp_avg_1pt_neon; 203 ps_func_list->pf_qpel_interp_avg_2pt_horz_with_reuse = hme_qpel_interp_avg_2pt_horz_with_reuse_neon; 204 ps_func_list->pf_qpel_interp_avg_2pt_vert_with_reuse = hme_qpel_interp_avg_2pt_vert_with_reuse_neon; 205 ps_func_list->pf_qpel_interp_avg_generic = hme_qpel_interp_avg_neon; 206 ps_func_list->pf_store_4x4_sads_high_quality = hme_store_4x4_sads_high_quality_neon; 207 ps_func_list->pf_store_4x4_sads_high_speed = hme_store_4x4_sads_high_speed_neon; 208 break; 209 #endif 210 default: 211 ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8 = hme_calc_pt_sad_and_result_explicit; 212 ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_4x4 = hme_calc_pt_sad_and_result_explicit; 213 ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_for_grid = hme_calc_pt_sad_and_result_explicit; 214 ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_generic = hme_calc_pt_sad_and_result_explicit; 215 ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8 = hme_calc_pt_sad_and_result_explicit; 216 ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_4x4 = hme_calc_pt_sad_and_result_explicit; 217 ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_for_grid = hme_calc_pt_sad_and_result_explicit; 218 ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_generic = hme_calc_pt_sad_and_result_explicit; 219 ps_func_list->pf_calc_sad_and_1_best_result_generic = hme_calc_sad_and_1_best_result; 220 ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_generic = hme_calc_stim_injected_sad_and_1_best_result; 221 ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_eq_1 = hme_calc_stim_injected_sad_and_1_best_result; 222 ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_square_parts = hme_calc_stim_injected_sad_and_1_best_result; 223 ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_9 = hme_calc_stim_injected_sad_and_1_best_result; 224 ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_17 = hme_calc_stim_injected_sad_and_1_best_result; 225 ps_func_list->pf_compute_variance_for_all_parts = hme_compute_variance_for_all_parts; 226 ps_func_list->pf_compute_stim_injected_distortion_for_all_parts = hme_compute_stim_injected_distortion_for_all_parts; 227 ps_func_list->pf_calc_sad_and_1_best_result_num_part_1_for_grid = hme_calc_sad_and_1_best_result; 228 ps_func_list->pf_calc_sad_and_1_best_result_num_part_eq_1 = hme_calc_sad_and_1_best_result; 229 ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_17 = hme_calc_sad_and_1_best_result; 230 ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_9 = hme_calc_sad_and_1_best_result; 231 ps_func_list->pf_calc_sad_and_1_best_result_num_square_parts = hme_calc_sad_and_1_best_result; 232 ps_func_list->pf_calc_sad_and_1_best_result_subpel_generic = hme_calc_sad_and_1_best_result_subpel; 233 ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_eq_1 = hme_calc_sad_and_1_best_result_subpel; 234 ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_lt_17 = hme_calc_sad_and_1_best_result_subpel; 235 ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_lt_9 = hme_calc_sad_and_1_best_result_subpel; 236 ps_func_list->pf_calc_sad_and_1_best_result_subpel_square_parts = hme_calc_sad_and_1_best_result_subpel; 237 ps_func_list->pf_combine_4x4_sads_and_compute_cost_high_quality = hme_combine_4x4_sads_and_compute_cost_high_quality; 238 ps_func_list->pf_combine_4x4_sads_and_compute_cost_high_speed = hme_combine_4x4_sads_and_compute_cost_high_speed; 239 ps_func_list->pf_compute_4x4_sads_for_16x16_blk = compute_4x4_sads_for_16x16_blk; 240 ps_func_list->pf_evalsad_grid_npu_MxN = hme_evalsad_grid_npu_MxN; 241 ps_func_list->pf_evalsad_grid_pu_MxM = compute_part_sads_for_MxM_blk; 242 ps_func_list->pf_evalsad_pt_npu_12x16_8bit = hme_evalsad_pt_npu_MxN_8bit; 243 ps_func_list->pf_evalsad_pt_npu_16x12_8bit = hme_evalsad_pt_npu_MxN_8bit; 244 ps_func_list->pf_evalsad_pt_npu_16x4_8bit = hme_evalsad_pt_npu_MxN_8bit; 245 ps_func_list->pf_evalsad_pt_npu_24x32_8bit = hme_evalsad_pt_npu_MxN_8bit; 246 ps_func_list->pf_evalsad_pt_npu_8x4_8bit = hme_evalsad_pt_npu_MxN_8bit; 247 ps_func_list->pf_evalsad_pt_npu_mxn_8bit = hme_evalsad_pt_npu_MxN_8bit; 248 ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit = hme_evalsad_pt_npu_MxN_8bit; 249 ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit = hme_evalsad_pt_npu_MxN_8bit; 250 ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit = hme_evalsad_pt_npu_MxN_8bit; 251 ps_func_list->pf_get_wt_inp_8x8 = hme_get_wt_inp; 252 ps_func_list->pf_get_wt_inp_ctb = hme_get_wt_inp; 253 ps_func_list->pf_get_wt_inp_generic = hme_get_wt_inp; 254 ps_func_list->pf_mv_clipper = hme_mv_clipper; 255 ps_func_list->pf_qpel_interp_avg_1pt = hme_qpel_interp_avg_1pt; 256 ps_func_list->pf_qpel_interp_avg_2pt_horz_with_reuse = hme_qpel_interp_avg_2pt_horz_with_reuse; 257 ps_func_list->pf_qpel_interp_avg_2pt_vert_with_reuse = hme_qpel_interp_avg_2pt_vert_with_reuse; 258 ps_func_list->pf_qpel_interp_avg_generic = hme_qpel_interp_avg; 259 ps_func_list->pf_store_4x4_sads_high_quality = hme_store_4x4_sads_high_quality; 260 ps_func_list->pf_store_4x4_sads_high_speed = hme_store_4x4_sads_high_speed; 261 break; 262 } 263 264 gapf_sad_pt_npu[BLK_4x4] = ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit; 265 gapf_sad_pt_npu[BLK_4x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit; 266 gapf_sad_pt_npu[BLK_8x4] = ps_func_list->pf_evalsad_pt_npu_8x4_8bit; 267 gapf_sad_pt_npu[BLK_8x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit; 268 gapf_sad_pt_npu[BLK_4x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit; 269 gapf_sad_pt_npu[BLK_8x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit; 270 gapf_sad_pt_npu[BLK_12x16] = ps_func_list->pf_evalsad_pt_npu_12x16_8bit; 271 gapf_sad_pt_npu[BLK_16x4] = ps_func_list->pf_evalsad_pt_npu_16x4_8bit; 272 gapf_sad_pt_npu[BLK_16x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; 273 gapf_sad_pt_npu[BLK_16x12] = ps_func_list->pf_evalsad_pt_npu_16x12_8bit; 274 gapf_sad_pt_npu[BLK_16x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; 275 gapf_sad_pt_npu[BLK_8x32] = ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit; 276 gapf_sad_pt_npu[BLK_16x32] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; 277 gapf_sad_pt_npu[BLK_24x32] = ps_func_list->pf_evalsad_pt_npu_24x32_8bit; 278 gapf_sad_pt_npu[BLK_32x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; 279 gapf_sad_pt_npu[BLK_32x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; 280 gapf_sad_pt_npu[BLK_32x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; 281 gapf_sad_pt_npu[BLK_32x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; 282 gapf_sad_pt_npu[BLK_32x24] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; 283 gapf_sad_pt_npu[BLK_32x32] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; 284 gapf_sad_pt_npu[BLK_16x64] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; 285 gapf_sad_pt_npu[BLK_32x64] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; 286 gapf_sad_pt_npu[BLK_48x64] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; 287 gapf_sad_pt_npu[BLK_64x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; 288 gapf_sad_pt_npu[BLK_64x32] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; 289 gapf_sad_pt_npu[BLK_64x48] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; 290 gapf_sad_pt_npu[BLK_64x64] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit; 291 292 gpf_part_sads_evaluator_16x16CU = ps_func_list->pf_compute_4x4_sads_for_16x16_blk; 293 gpf_part_sads_evaluator_MxM = ps_func_list->pf_evalsad_grid_pu_MxM; 294 295 gpf_sad_grid_mxn = ps_func_list->pf_evalsad_grid_npu_MxN; 296 297 gapf_calc_sad_and_result_fxn[0][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_part_1_for_grid; 298 gapf_calc_sad_and_result_fxn[1][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_part_eq_1; 299 gapf_calc_sad_and_result_fxn[2][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_square_parts; 300 gapf_calc_sad_and_result_fxn[3][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_9; 301 gapf_calc_sad_and_result_fxn[4][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_17; 302 gapf_calc_sad_and_result_fxn[5][0] = ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_eq_1; 303 gapf_calc_sad_and_result_fxn[6][0] = ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_square_parts; 304 gapf_calc_sad_and_result_fxn[7][0] = ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_9; 305 gapf_calc_sad_and_result_fxn[8][0] = ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_17; 306 gapf_calc_sad_and_result_fxn[0][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_part_1_for_grid; 307 gapf_calc_sad_and_result_fxn[1][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_part_eq_1; 308 gapf_calc_sad_and_result_fxn[2][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_square_parts; 309 gapf_calc_sad_and_result_fxn[3][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_part_lt_9; 310 gapf_calc_sad_and_result_fxn[4][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_part_lt_17; 311 gapf_calc_sad_and_result_fxn[5][1] = ps_func_list->pf_calc_stim_injected_sad_and_2_best_results_num_part_eq_1; 312 gapf_calc_sad_and_result_fxn[6][1] = ps_func_list->pf_calc_stim_injected_sad_and_2_best_results_num_square_parts; 313 gapf_calc_sad_and_result_fxn[7][1] = ps_func_list->pf_calc_stim_injected_sad_and_2_best_results_num_part_lt_9; 314 gapf_calc_sad_and_result_fxn[8][1] = ps_func_list->pf_calc_stim_injected_sad_and_2_best_results_num_part_lt_17; 315 } 316 // clang-format on 317 318 FT_CALC_SAD_AND_RESULT *hme_get_calc_sad_and_result_fxn( 319 S08 i1_grid_flag, U08 u1_is_cu_noisy, S32 i4_part_mask, S32 num_parts, S32 num_results) 320 { 321 U08 u1_index; 322 323 ASSERT((1 == num_results) || (2 == num_results)); 324 325 u1_index = 326 gau1_calc_sad_and_result[i1_grid_flag][u1_is_cu_noisy] 327 [(!!(i4_part_mask & (ENABLE_SMP | ENABLE_NxN)) && 328 !(i4_part_mask & ENABLE_AMP)) 329 ? (!!(i4_part_mask & ENABLE_NxN) ? 0 : 1) 330 : (!!(i4_part_mask & ENABLE_AMP) ? 2 : 3)][num_parts - 1]; 331 332 return gapf_calc_sad_and_result_fxn[u1_index][2 == num_results]; 333 } 334 335 void hme_evalsad_grid_pu_MxM(err_prms_t *ps_prms) 336 { 337 grid_ctxt_t s_grid; 338 cand_t as_candt[9]; 339 340 S32 *api4_sad_grid[TOT_NUM_PARTS]; 341 342 hme_mv_t s_mv = { 0, 0 }; 343 344 CU_SIZE_T e_cu_size = (CU_SIZE_T)(hme_get_range(ps_prms->i4_blk_wd) - 4); 345 346 S32 i4_ref_idx = 0, i; 347 S32 num_candts = 0; 348 349 s_grid.num_grids = 1; 350 s_grid.ref_buf_stride = ps_prms->i4_ref_stride; 351 s_grid.grd_sz_y_x = ((ps_prms->i4_step << 16) | ps_prms->i4_step); 352 s_grid.ppu1_ref_ptr = &ps_prms->pu1_ref; 353 s_grid.pi4_grd_mask = &ps_prms->i4_grid_mask; 354 s_grid.p_mv = &s_mv; 355 s_grid.p_ref_idx = &i4_ref_idx; 356 357 for(i = 0; i < 9; i++) 358 { 359 if(s_grid.pi4_grd_mask[0] & (1 << i)) 360 { 361 num_candts++; 362 } 363 } 364 365 for(i = 0; i < TOT_NUM_PARTS; i++) 366 { 367 api4_sad_grid[i] = &ps_prms->pi4_sad_grid[i * num_candts]; 368 } 369 370 gpf_part_sads_evaluator_MxM( 371 &s_grid, 372 ps_prms->pu1_inp, 373 ps_prms->i4_inp_stride, 374 (WORD32 **)api4_sad_grid, 375 as_candt, 376 &num_candts, 377 e_cu_size); 378 } 379 380 PF_SAD_FXN_T hme_get_sad_fxn(BLK_SIZE_T e_blk_size, S32 i4_grid_mask, S32 i4_part_mask) 381 { 382 S32 i4_grid_en = ((i4_grid_mask & 0x1fe) != 0); 383 384 if(i4_grid_en) 385 { 386 if(i4_part_mask & (i4_part_mask - 1)) 387 { 388 if(BLK_16x16 == e_blk_size) 389 { 390 return hme_evalsad_grid_pu_16x16; 391 } 392 else 393 { 394 return hme_evalsad_grid_pu_MxM; 395 } 396 } 397 else 398 { 399 return gpf_sad_grid_mxn; 400 } 401 } 402 else 403 { 404 if(i4_part_mask & (i4_part_mask - 1)) 405 { 406 if(BLK_16x16 == e_blk_size) 407 { 408 return hme_evalsad_grid_pu_16x16; 409 } 410 else 411 { 412 return hme_evalsad_grid_pu_MxM; 413 } 414 } 415 else 416 { 417 return gapf_sad_pt_npu[e_blk_size]; 418 } 419 } 420 } 421 422 void ihevce_sifter_sad_fxn_assigner(FT_SAD_EVALUATOR **ppf_evalsad_pt_npu_mxn, IV_ARCH_T e_arch) 423 { 424 switch(e_arch) 425 { 426 #ifdef ENABLE_NEON 427 case ARCH_ARM_A9Q: 428 case ARCH_ARM_V8_NEON: 429 ppf_evalsad_pt_npu_mxn[0] = hme_evalsad_pt_npu_MxN_8bit_neon; 430 break; 431 #endif 432 433 default: 434 ppf_evalsad_pt_npu_mxn[0] = hme_evalsad_pt_npu_MxN_8bit; 435 break; 436 } 437 } 438