1 /****************************************************************************** 2 * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 21 /*****************************************************************************/ 22 /* File Includes */ 23 /*****************************************************************************/ 24 /* System include files */ 25 #include <stdio.h> 26 #include <string.h> 27 #include <stdlib.h> 28 #include <assert.h> 29 #include <stdarg.h> 30 #include <math.h> 31 #include <limits.h> 32 33 /* User include files */ 34 #include "ihevc_typedefs.h" 35 #include "itt_video_api.h" 36 #include "ihevce_api.h" 37 38 #include "rc_cntrl_param.h" 39 #include "rc_frame_info_collector.h" 40 #include "rc_look_ahead_params.h" 41 42 #include "ihevc_defs.h" 43 #include "ihevc_structs.h" 44 #include "ihevc_platform_macros.h" 45 #include "ihevc_deblk.h" 46 #include "ihevc_itrans_recon.h" 47 #include "ihevc_chroma_itrans_recon.h" 48 #include "ihevc_chroma_intra_pred.h" 49 #include "ihevc_intra_pred.h" 50 #include "ihevc_inter_pred.h" 51 #include "ihevc_mem_fns.h" 52 #include "ihevc_padding.h" 53 #include "ihevc_weighted_pred.h" 54 #include "ihevc_sao.h" 55 #include "ihevc_resi_trans.h" 56 #include "ihevc_quant_iquant_ssd.h" 57 #include "ihevc_cabac_tables.h" 58 59 #include "ihevce_defs.h" 60 #include "ihevce_lap_enc_structs.h" 61 #include "ihevce_multi_thrd_structs.h" 62 #include "ihevce_multi_thrd_funcs.h" 63 #include "ihevce_me_common_defs.h" 64 #include "ihevce_had_satd.h" 65 #include "ihevce_error_codes.h" 66 #include "ihevce_bitstream.h" 67 #include "ihevce_cabac.h" 68 #include "ihevce_rdoq_macros.h" 69 #include "ihevce_function_selector.h" 70 #include "ihevce_enc_structs.h" 71 #include "ihevce_entropy_structs.h" 72 #include "ihevce_cmn_utils_instr_set_router.h" 73 #include "ihevce_enc_loop_structs.h" 74 #include "ihevce_bs_compute_ctb.h" 75 #include "ihevce_global_tables.h" 76 #include "ihevce_dep_mngr_interface.h" 77 #include "hme_datatype.h" 78 #include "hme_interface.h" 79 #include "hme_common_defs.h" 80 #include "hme_defs.h" 81 #include "ihevce_me_instr_set_router.h" 82 #include "hme_globals.h" 83 #include "hme_utils.h" 84 #include "hme_coarse.h" 85 #include "hme_refine.h" 86 #include "hme_err_compute.h" 87 #include "hme_common_utils.h" 88 #include "hme_search_algo.h" 89 #include "ihevce_profile.h" 90 91 /*****************************************************************************/ 92 /* Function Definitions */ 93 /*****************************************************************************/ 94 95 void hme_init_globals() 96 { 97 GRID_PT_T id; 98 S32 i, j; 99 /*************************************************************************/ 100 /* Initialize the lookup table for x offset, y offset, optimized mask */ 101 /* based on grid id. The design is as follows: */ 102 /* */ 103 /* a b c d */ 104 /* TL T TR e */ 105 /* L C R f */ 106 /* BL B BR */ 107 /* */ 108 /* IF a non corner pt, like T is the new minima, then we need to */ 109 /* evaluate only 3 new pts, in this case, a, b, c. So the optimal */ 110 /* grid mask would reflect this. If a corner pt like TR is the new */ 111 /* minima, then we need to evaluate 5 new pts, in this case, b, c, d, */ 112 /* e and f. So the grid mask will have 5 pts enabled. */ 113 /*************************************************************************/ 114 115 id = PT_C; 116 gai4_opt_grid_mask[id] = GRID_ALL_PTS_VALID ^ (BIT_EN(PT_C)); 117 gai1_grid_id_to_x[id] = 0; 118 gai1_grid_id_to_y[id] = 0; 119 gai4_opt_grid_mask_diamond[id] = GRID_DIAMOND_ENABLE_ALL ^ (BIT_EN(PT_C)); 120 gai4_opt_grid_mask_conventional[id] = GRID_ALL_PTS_VALID ^ (BIT_EN(PT_C)); 121 122 id = PT_L; 123 gai4_opt_grid_mask[id] = BIT_EN(PT_TL) | BIT_EN(PT_L) | BIT_EN(PT_BL); 124 gai1_grid_id_to_x[id] = -1; 125 gai1_grid_id_to_y[id] = 0; 126 gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_T) | BIT_EN(PT_L) | BIT_EN(PT_B); 127 gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_L) | BIT_EN(PT_B); 128 129 id = PT_R; 130 gai4_opt_grid_mask[id] = BIT_EN(PT_TR) | BIT_EN(PT_R) | BIT_EN(PT_BR); 131 gai1_grid_id_to_x[id] = 1; 132 gai1_grid_id_to_y[id] = 0; 133 gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B); 134 gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B); 135 136 id = PT_T; 137 gai4_opt_grid_mask[id] = BIT_EN(PT_TL) | BIT_EN(PT_T) | BIT_EN(PT_TR); 138 gai1_grid_id_to_x[id] = 0; 139 gai1_grid_id_to_y[id] = -1; 140 gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_R) | BIT_EN(PT_L) | BIT_EN(PT_T); 141 gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_R) | BIT_EN(PT_L) | BIT_EN(PT_T); 142 143 id = PT_B; 144 gai4_opt_grid_mask[id] = BIT_EN(PT_BL) | BIT_EN(PT_B) | BIT_EN(PT_BR); 145 gai1_grid_id_to_x[id] = 0; 146 gai1_grid_id_to_y[id] = 1; 147 gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_B) | BIT_EN(PT_L) | BIT_EN(PT_R); 148 gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_B) | BIT_EN(PT_L) | BIT_EN(PT_R); 149 150 id = PT_TL; 151 gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_L] | gai4_opt_grid_mask[PT_T]; 152 gai1_grid_id_to_x[id] = -1; 153 gai1_grid_id_to_y[id] = -1; 154 gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_L); 155 156 id = PT_TR; 157 gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_R] | gai4_opt_grid_mask[PT_T]; 158 gai1_grid_id_to_x[id] = 1; 159 gai1_grid_id_to_y[id] = -1; 160 gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_R); 161 162 id = PT_BL; 163 gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_L] | gai4_opt_grid_mask[PT_B]; 164 gai1_grid_id_to_x[id] = -1; 165 gai1_grid_id_to_y[id] = 1; 166 gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_L) | BIT_EN(PT_B); 167 168 id = PT_BR; 169 gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_R] | gai4_opt_grid_mask[PT_B]; 170 gai1_grid_id_to_x[id] = 1; 171 gai1_grid_id_to_y[id] = 1; 172 gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_R) | BIT_EN(PT_B); 173 174 ge_part_id_to_blk_size[CU_8x8][PART_ID_2Nx2N] = BLK_8x8; 175 ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxN_T] = BLK_8x4; 176 ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxN_B] = BLK_8x4; 177 ge_part_id_to_blk_size[CU_8x8][PART_ID_Nx2N_L] = BLK_4x8; 178 ge_part_id_to_blk_size[CU_8x8][PART_ID_Nx2N_R] = BLK_4x8; 179 ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_TL] = BLK_4x4; 180 ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_TR] = BLK_4x4; 181 ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_BL] = BLK_4x4; 182 ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_BR] = BLK_4x4; 183 ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnU_T] = BLK_INVALID; 184 ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnU_B] = BLK_INVALID; 185 ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnD_T] = BLK_INVALID; 186 ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnD_B] = BLK_INVALID; 187 ge_part_id_to_blk_size[CU_8x8][PART_ID_nLx2N_L] = BLK_INVALID; 188 ge_part_id_to_blk_size[CU_8x8][PART_ID_nLx2N_R] = BLK_INVALID; 189 ge_part_id_to_blk_size[CU_8x8][PART_ID_nRx2N_L] = BLK_INVALID; 190 ge_part_id_to_blk_size[CU_8x8][PART_ID_nRx2N_R] = BLK_INVALID; 191 192 ge_part_id_to_blk_size[CU_16x16][PART_ID_2Nx2N] = BLK_16x16; 193 ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxN_T] = BLK_16x8; 194 ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxN_B] = BLK_16x8; 195 ge_part_id_to_blk_size[CU_16x16][PART_ID_Nx2N_L] = BLK_8x16; 196 ge_part_id_to_blk_size[CU_16x16][PART_ID_Nx2N_R] = BLK_8x16; 197 ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_TL] = BLK_8x8; 198 ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_TR] = BLK_8x8; 199 ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_BL] = BLK_8x8; 200 ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_BR] = BLK_8x8; 201 ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnU_T] = BLK_16x4; 202 ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnU_B] = BLK_16x12; 203 ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnD_T] = BLK_16x12; 204 ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnD_B] = BLK_16x4; 205 ge_part_id_to_blk_size[CU_16x16][PART_ID_nLx2N_L] = BLK_4x16; 206 ge_part_id_to_blk_size[CU_16x16][PART_ID_nLx2N_R] = BLK_12x16; 207 ge_part_id_to_blk_size[CU_16x16][PART_ID_nRx2N_L] = BLK_12x16; 208 ge_part_id_to_blk_size[CU_16x16][PART_ID_nRx2N_R] = BLK_4x16; 209 210 ge_part_id_to_blk_size[CU_32x32][PART_ID_2Nx2N] = BLK_32x32; 211 ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxN_T] = BLK_32x16; 212 ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxN_B] = BLK_32x16; 213 ge_part_id_to_blk_size[CU_32x32][PART_ID_Nx2N_L] = BLK_16x32; 214 ge_part_id_to_blk_size[CU_32x32][PART_ID_Nx2N_R] = BLK_16x32; 215 ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_TL] = BLK_16x16; 216 ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_TR] = BLK_16x16; 217 ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_BL] = BLK_16x16; 218 ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_BR] = BLK_16x16; 219 ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnU_T] = BLK_32x8; 220 ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnU_B] = BLK_32x24; 221 ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnD_T] = BLK_32x24; 222 ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnD_B] = BLK_32x8; 223 ge_part_id_to_blk_size[CU_32x32][PART_ID_nLx2N_L] = BLK_8x32; 224 ge_part_id_to_blk_size[CU_32x32][PART_ID_nLx2N_R] = BLK_24x32; 225 ge_part_id_to_blk_size[CU_32x32][PART_ID_nRx2N_L] = BLK_24x32; 226 ge_part_id_to_blk_size[CU_32x32][PART_ID_nRx2N_R] = BLK_8x32; 227 228 ge_part_id_to_blk_size[CU_64x64][PART_ID_2Nx2N] = BLK_64x64; 229 ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxN_T] = BLK_64x32; 230 ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxN_B] = BLK_64x32; 231 ge_part_id_to_blk_size[CU_64x64][PART_ID_Nx2N_L] = BLK_32x64; 232 ge_part_id_to_blk_size[CU_64x64][PART_ID_Nx2N_R] = BLK_32x64; 233 ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_TL] = BLK_32x32; 234 ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_TR] = BLK_32x32; 235 ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_BL] = BLK_32x32; 236 ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_BR] = BLK_32x32; 237 ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnU_T] = BLK_64x16; 238 ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnU_B] = BLK_64x48; 239 ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnD_T] = BLK_64x48; 240 ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnD_B] = BLK_64x16; 241 ge_part_id_to_blk_size[CU_64x64][PART_ID_nLx2N_L] = BLK_16x64; 242 ge_part_id_to_blk_size[CU_64x64][PART_ID_nLx2N_R] = BLK_48x64; 243 ge_part_id_to_blk_size[CU_64x64][PART_ID_nRx2N_L] = BLK_48x64; 244 ge_part_id_to_blk_size[CU_64x64][PART_ID_nRx2N_R] = BLK_16x64; 245 246 gau1_num_parts_in_part_type[PRT_2Nx2N] = 1; 247 gau1_num_parts_in_part_type[PRT_2NxN] = 2; 248 gau1_num_parts_in_part_type[PRT_Nx2N] = 2; 249 gau1_num_parts_in_part_type[PRT_NxN] = 4; 250 gau1_num_parts_in_part_type[PRT_2NxnU] = 2; 251 gau1_num_parts_in_part_type[PRT_2NxnD] = 2; 252 gau1_num_parts_in_part_type[PRT_nLx2N] = 2; 253 gau1_num_parts_in_part_type[PRT_nRx2N] = 2; 254 255 for(i = 0; i < MAX_PART_TYPES; i++) 256 for(j = 0; j < MAX_NUM_PARTS; j++) 257 ge_part_type_to_part_id[i][j] = PART_ID_INVALID; 258 259 /* 2Nx2N only one partition */ 260 ge_part_type_to_part_id[PRT_2Nx2N][0] = PART_ID_2Nx2N; 261 262 /* 2NxN 2 partitions */ 263 ge_part_type_to_part_id[PRT_2NxN][0] = PART_ID_2NxN_T; 264 ge_part_type_to_part_id[PRT_2NxN][1] = PART_ID_2NxN_B; 265 266 /* Nx2N 2 partitions */ 267 ge_part_type_to_part_id[PRT_Nx2N][0] = PART_ID_Nx2N_L; 268 ge_part_type_to_part_id[PRT_Nx2N][1] = PART_ID_Nx2N_R; 269 270 /* NxN 4 partitions */ 271 ge_part_type_to_part_id[PRT_NxN][0] = PART_ID_NxN_TL; 272 ge_part_type_to_part_id[PRT_NxN][1] = PART_ID_NxN_TR; 273 ge_part_type_to_part_id[PRT_NxN][2] = PART_ID_NxN_BL; 274 ge_part_type_to_part_id[PRT_NxN][3] = PART_ID_NxN_BR; 275 276 /* AMP 2Nx (N/2 + 3N/2) 2 partitions */ 277 ge_part_type_to_part_id[PRT_2NxnU][0] = PART_ID_2NxnU_T; 278 ge_part_type_to_part_id[PRT_2NxnU][1] = PART_ID_2NxnU_B; 279 280 /* AMP 2Nx (3N/2 + N/2) 2 partitions */ 281 ge_part_type_to_part_id[PRT_2NxnD][0] = PART_ID_2NxnD_T; 282 ge_part_type_to_part_id[PRT_2NxnD][1] = PART_ID_2NxnD_B; 283 284 /* AMP (N/2 + 3N/2) x 2N 2 partitions */ 285 ge_part_type_to_part_id[PRT_nLx2N][0] = PART_ID_nLx2N_L; 286 ge_part_type_to_part_id[PRT_nLx2N][1] = PART_ID_nLx2N_R; 287 288 /* AMP (3N/2 + N/2) x 2N 2 partitions */ 289 ge_part_type_to_part_id[PRT_nRx2N][0] = PART_ID_nRx2N_L; 290 ge_part_type_to_part_id[PRT_nRx2N][1] = PART_ID_nRx2N_R; 291 292 /*************************************************************************/ 293 /* initialize attributes for each partition id within the cu. */ 294 /*************************************************************************/ 295 { 296 part_attr_t *ps_part_attr; 297 298 ps_part_attr = &gas_part_attr_in_cu[PART_ID_2Nx2N]; 299 ps_part_attr->u1_x_start = 0; 300 ps_part_attr->u1_y_start = 0; 301 ps_part_attr->u1_x_count = 8; 302 ps_part_attr->u1_y_count = 8; 303 304 ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxN_T]; 305 ps_part_attr->u1_x_start = 0; 306 ps_part_attr->u1_y_start = 0; 307 ps_part_attr->u1_x_count = 8; 308 ps_part_attr->u1_y_count = 4; 309 310 ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxN_B]; 311 ps_part_attr->u1_x_start = 0; 312 ps_part_attr->u1_y_start = 4; 313 ps_part_attr->u1_x_count = 8; 314 ps_part_attr->u1_y_count = 4; 315 316 ps_part_attr = &gas_part_attr_in_cu[PART_ID_Nx2N_L]; 317 ps_part_attr->u1_x_start = 0; 318 ps_part_attr->u1_y_start = 0; 319 ps_part_attr->u1_x_count = 4; 320 ps_part_attr->u1_y_count = 8; 321 322 ps_part_attr = &gas_part_attr_in_cu[PART_ID_Nx2N_R]; 323 ps_part_attr->u1_x_start = 4; 324 ps_part_attr->u1_y_start = 0; 325 ps_part_attr->u1_x_count = 4; 326 ps_part_attr->u1_y_count = 8; 327 328 ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_TL]; 329 ps_part_attr->u1_x_start = 0; 330 ps_part_attr->u1_y_start = 0; 331 ps_part_attr->u1_x_count = 4; 332 ps_part_attr->u1_y_count = 4; 333 334 ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_TR]; 335 ps_part_attr->u1_x_start = 4; 336 ps_part_attr->u1_y_start = 0; 337 ps_part_attr->u1_x_count = 4; 338 ps_part_attr->u1_y_count = 4; 339 340 ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_BL]; 341 ps_part_attr->u1_x_start = 0; 342 ps_part_attr->u1_y_start = 4; 343 ps_part_attr->u1_x_count = 4; 344 ps_part_attr->u1_y_count = 4; 345 346 ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_BR]; 347 ps_part_attr->u1_x_start = 4; 348 ps_part_attr->u1_y_start = 4; 349 ps_part_attr->u1_x_count = 4; 350 ps_part_attr->u1_y_count = 4; 351 352 ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnU_T]; 353 ps_part_attr->u1_x_start = 0; 354 ps_part_attr->u1_y_start = 0; 355 ps_part_attr->u1_x_count = 8; 356 ps_part_attr->u1_y_count = 2; 357 358 ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnU_B]; 359 ps_part_attr->u1_x_start = 0; 360 ps_part_attr->u1_y_start = 2; 361 ps_part_attr->u1_x_count = 8; 362 ps_part_attr->u1_y_count = 6; 363 364 ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnD_T]; 365 ps_part_attr->u1_x_start = 0; 366 ps_part_attr->u1_y_start = 0; 367 ps_part_attr->u1_x_count = 8; 368 ps_part_attr->u1_y_count = 6; 369 370 ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnD_B]; 371 ps_part_attr->u1_x_start = 0; 372 ps_part_attr->u1_y_start = 6; 373 ps_part_attr->u1_x_count = 8; 374 ps_part_attr->u1_y_count = 2; 375 376 ps_part_attr = &gas_part_attr_in_cu[PART_ID_nLx2N_L]; 377 ps_part_attr->u1_x_start = 0; 378 ps_part_attr->u1_y_start = 0; 379 ps_part_attr->u1_x_count = 2; 380 ps_part_attr->u1_y_count = 8; 381 382 ps_part_attr = &gas_part_attr_in_cu[PART_ID_nLx2N_R]; 383 ps_part_attr->u1_x_start = 2; 384 ps_part_attr->u1_y_start = 0; 385 ps_part_attr->u1_x_count = 6; 386 ps_part_attr->u1_y_count = 8; 387 388 ps_part_attr = &gas_part_attr_in_cu[PART_ID_nRx2N_L]; 389 ps_part_attr->u1_x_start = 0; 390 ps_part_attr->u1_y_start = 0; 391 ps_part_attr->u1_x_count = 6; 392 ps_part_attr->u1_y_count = 8; 393 394 ps_part_attr = &gas_part_attr_in_cu[PART_ID_nRx2N_R]; 395 ps_part_attr->u1_x_start = 6; 396 ps_part_attr->u1_y_start = 0; 397 ps_part_attr->u1_x_count = 2; 398 ps_part_attr->u1_y_count = 8; 399 } 400 for(i = 0; i < NUM_BLK_SIZES; i++) 401 ge_blk_size_to_cu_size[i] = CU_INVALID; 402 403 ge_blk_size_to_cu_size[BLK_8x8] = CU_8x8; 404 ge_blk_size_to_cu_size[BLK_16x16] = CU_16x16; 405 ge_blk_size_to_cu_size[BLK_32x32] = CU_32x32; 406 ge_blk_size_to_cu_size[BLK_64x64] = CU_64x64; 407 408 /* This is the reverse, given cU size, get blk size */ 409 ge_cu_size_to_blk_size[CU_8x8] = BLK_8x8; 410 ge_cu_size_to_blk_size[CU_16x16] = BLK_16x16; 411 ge_cu_size_to_blk_size[CU_32x32] = BLK_32x32; 412 ge_cu_size_to_blk_size[CU_64x64] = BLK_64x64; 413 414 gau1_is_vert_part[PRT_2Nx2N] = 0; 415 gau1_is_vert_part[PRT_2NxN] = 0; 416 gau1_is_vert_part[PRT_Nx2N] = 1; 417 gau1_is_vert_part[PRT_NxN] = 1; 418 gau1_is_vert_part[PRT_2NxnU] = 0; 419 gau1_is_vert_part[PRT_2NxnD] = 0; 420 gau1_is_vert_part[PRT_nLx2N] = 1; 421 gau1_is_vert_part[PRT_nRx2N] = 1; 422 423 /* Initialise the number of best results for the full pell refinement */ 424 gau1_num_best_results_PQ[PART_ID_2Nx2N] = 2; 425 gau1_num_best_results_PQ[PART_ID_2NxN_T] = 0; 426 gau1_num_best_results_PQ[PART_ID_2NxN_B] = 0; 427 gau1_num_best_results_PQ[PART_ID_Nx2N_L] = 0; 428 gau1_num_best_results_PQ[PART_ID_Nx2N_R] = 0; 429 gau1_num_best_results_PQ[PART_ID_NxN_TL] = 1; 430 gau1_num_best_results_PQ[PART_ID_NxN_TR] = 1; 431 gau1_num_best_results_PQ[PART_ID_NxN_BL] = 1; 432 gau1_num_best_results_PQ[PART_ID_NxN_BR] = 1; 433 gau1_num_best_results_PQ[PART_ID_2NxnU_T] = 1; 434 gau1_num_best_results_PQ[PART_ID_2NxnU_B] = 0; 435 gau1_num_best_results_PQ[PART_ID_2NxnD_T] = 0; 436 gau1_num_best_results_PQ[PART_ID_2NxnD_B] = 1; 437 gau1_num_best_results_PQ[PART_ID_nLx2N_L] = 1; 438 gau1_num_best_results_PQ[PART_ID_nLx2N_R] = 0; 439 gau1_num_best_results_PQ[PART_ID_nRx2N_L] = 0; 440 gau1_num_best_results_PQ[PART_ID_nRx2N_R] = 1; 441 442 gau1_num_best_results_HQ[PART_ID_2Nx2N] = 2; 443 gau1_num_best_results_HQ[PART_ID_2NxN_T] = 0; 444 gau1_num_best_results_HQ[PART_ID_2NxN_B] = 0; 445 gau1_num_best_results_HQ[PART_ID_Nx2N_L] = 0; 446 gau1_num_best_results_HQ[PART_ID_Nx2N_R] = 0; 447 gau1_num_best_results_HQ[PART_ID_NxN_TL] = 1; 448 gau1_num_best_results_HQ[PART_ID_NxN_TR] = 1; 449 gau1_num_best_results_HQ[PART_ID_NxN_BL] = 1; 450 gau1_num_best_results_HQ[PART_ID_NxN_BR] = 1; 451 gau1_num_best_results_HQ[PART_ID_2NxnU_T] = 1; 452 gau1_num_best_results_HQ[PART_ID_2NxnU_B] = 0; 453 gau1_num_best_results_HQ[PART_ID_2NxnD_T] = 0; 454 gau1_num_best_results_HQ[PART_ID_2NxnD_B] = 1; 455 gau1_num_best_results_HQ[PART_ID_nLx2N_L] = 1; 456 gau1_num_best_results_HQ[PART_ID_nLx2N_R] = 0; 457 gau1_num_best_results_HQ[PART_ID_nRx2N_L] = 0; 458 gau1_num_best_results_HQ[PART_ID_nRx2N_R] = 1; 459 460 gau1_num_best_results_MS[PART_ID_2Nx2N] = 2; 461 gau1_num_best_results_MS[PART_ID_2NxN_T] = 0; 462 gau1_num_best_results_MS[PART_ID_2NxN_B] = 0; 463 gau1_num_best_results_MS[PART_ID_Nx2N_L] = 0; 464 gau1_num_best_results_MS[PART_ID_Nx2N_R] = 0; 465 gau1_num_best_results_MS[PART_ID_NxN_TL] = 1; 466 gau1_num_best_results_MS[PART_ID_NxN_TR] = 1; 467 gau1_num_best_results_MS[PART_ID_NxN_BL] = 1; 468 gau1_num_best_results_MS[PART_ID_NxN_BR] = 1; 469 gau1_num_best_results_MS[PART_ID_2NxnU_T] = 1; 470 gau1_num_best_results_MS[PART_ID_2NxnU_B] = 0; 471 gau1_num_best_results_MS[PART_ID_2NxnD_T] = 0; 472 gau1_num_best_results_MS[PART_ID_2NxnD_B] = 1; 473 gau1_num_best_results_MS[PART_ID_nLx2N_L] = 1; 474 gau1_num_best_results_MS[PART_ID_nLx2N_R] = 0; 475 gau1_num_best_results_MS[PART_ID_nRx2N_L] = 0; 476 gau1_num_best_results_MS[PART_ID_nRx2N_R] = 1; 477 478 gau1_num_best_results_HS[PART_ID_2Nx2N] = 2; 479 gau1_num_best_results_HS[PART_ID_2NxN_T] = 0; 480 gau1_num_best_results_HS[PART_ID_2NxN_B] = 0; 481 gau1_num_best_results_HS[PART_ID_Nx2N_L] = 0; 482 gau1_num_best_results_HS[PART_ID_Nx2N_R] = 0; 483 gau1_num_best_results_HS[PART_ID_NxN_TL] = 0; 484 gau1_num_best_results_HS[PART_ID_NxN_TR] = 0; 485 gau1_num_best_results_HS[PART_ID_NxN_BL] = 0; 486 gau1_num_best_results_HS[PART_ID_NxN_BR] = 0; 487 gau1_num_best_results_HS[PART_ID_2NxnU_T] = 0; 488 gau1_num_best_results_HS[PART_ID_2NxnU_B] = 0; 489 gau1_num_best_results_HS[PART_ID_2NxnD_T] = 0; 490 gau1_num_best_results_HS[PART_ID_2NxnD_B] = 0; 491 gau1_num_best_results_HS[PART_ID_nLx2N_L] = 0; 492 gau1_num_best_results_HS[PART_ID_nLx2N_R] = 0; 493 gau1_num_best_results_HS[PART_ID_nRx2N_L] = 0; 494 gau1_num_best_results_HS[PART_ID_nRx2N_R] = 0; 495 496 gau1_num_best_results_XS[PART_ID_2Nx2N] = 2; 497 gau1_num_best_results_XS[PART_ID_2NxN_T] = 0; 498 gau1_num_best_results_XS[PART_ID_2NxN_B] = 0; 499 gau1_num_best_results_XS[PART_ID_Nx2N_L] = 0; 500 gau1_num_best_results_XS[PART_ID_Nx2N_R] = 0; 501 gau1_num_best_results_XS[PART_ID_NxN_TL] = 0; 502 gau1_num_best_results_XS[PART_ID_NxN_TR] = 0; 503 gau1_num_best_results_XS[PART_ID_NxN_BL] = 0; 504 gau1_num_best_results_XS[PART_ID_NxN_BR] = 0; 505 gau1_num_best_results_XS[PART_ID_2NxnU_T] = 0; 506 gau1_num_best_results_XS[PART_ID_2NxnU_B] = 0; 507 gau1_num_best_results_XS[PART_ID_2NxnD_T] = 0; 508 gau1_num_best_results_XS[PART_ID_2NxnD_B] = 0; 509 gau1_num_best_results_XS[PART_ID_nLx2N_L] = 0; 510 gau1_num_best_results_XS[PART_ID_nLx2N_R] = 0; 511 gau1_num_best_results_XS[PART_ID_nRx2N_L] = 0; 512 gau1_num_best_results_XS[PART_ID_nRx2N_R] = 0; 513 514 gau1_num_best_results_XS25[PART_ID_2Nx2N] = MAX_NUM_CANDS_FOR_FPEL_REFINE_IN_XS25; 515 gau1_num_best_results_XS25[PART_ID_2NxN_T] = 0; 516 gau1_num_best_results_XS25[PART_ID_2NxN_B] = 0; 517 gau1_num_best_results_XS25[PART_ID_Nx2N_L] = 0; 518 gau1_num_best_results_XS25[PART_ID_Nx2N_R] = 0; 519 gau1_num_best_results_XS25[PART_ID_NxN_TL] = 0; 520 gau1_num_best_results_XS25[PART_ID_NxN_TR] = 0; 521 gau1_num_best_results_XS25[PART_ID_NxN_BL] = 0; 522 gau1_num_best_results_XS25[PART_ID_NxN_BR] = 0; 523 gau1_num_best_results_XS25[PART_ID_2NxnU_T] = 0; 524 gau1_num_best_results_XS25[PART_ID_2NxnU_B] = 0; 525 gau1_num_best_results_XS25[PART_ID_2NxnD_T] = 0; 526 gau1_num_best_results_XS25[PART_ID_2NxnD_B] = 0; 527 gau1_num_best_results_XS25[PART_ID_nLx2N_L] = 0; 528 gau1_num_best_results_XS25[PART_ID_nLx2N_R] = 0; 529 gau1_num_best_results_XS25[PART_ID_nRx2N_L] = 0; 530 gau1_num_best_results_XS25[PART_ID_nRx2N_R] = 0; 531 532 /* Top right validity for each part id */ 533 gau1_partid_tr_valid[PART_ID_2Nx2N] = 1; 534 gau1_partid_tr_valid[PART_ID_2NxN_T] = 1; 535 gau1_partid_tr_valid[PART_ID_2NxN_B] = 0; 536 gau1_partid_tr_valid[PART_ID_Nx2N_L] = 1; 537 gau1_partid_tr_valid[PART_ID_Nx2N_R] = 1; 538 gau1_partid_tr_valid[PART_ID_NxN_TL] = 1; 539 gau1_partid_tr_valid[PART_ID_NxN_TR] = 1; 540 gau1_partid_tr_valid[PART_ID_NxN_BL] = 1; 541 gau1_partid_tr_valid[PART_ID_NxN_BR] = 0; 542 gau1_partid_tr_valid[PART_ID_2NxnU_T] = 1; 543 gau1_partid_tr_valid[PART_ID_2NxnU_B] = 0; 544 gau1_partid_tr_valid[PART_ID_2NxnD_T] = 1; 545 gau1_partid_tr_valid[PART_ID_2NxnD_B] = 0; 546 gau1_partid_tr_valid[PART_ID_nLx2N_L] = 1; 547 gau1_partid_tr_valid[PART_ID_nLx2N_R] = 1; 548 gau1_partid_tr_valid[PART_ID_nRx2N_L] = 1; 549 gau1_partid_tr_valid[PART_ID_nRx2N_R] = 1; 550 551 /* Bot Left validity for each part id */ 552 gau1_partid_bl_valid[PART_ID_2Nx2N] = 1; 553 gau1_partid_bl_valid[PART_ID_2NxN_T] = 1; 554 gau1_partid_bl_valid[PART_ID_2NxN_B] = 1; 555 gau1_partid_bl_valid[PART_ID_Nx2N_L] = 1; 556 gau1_partid_bl_valid[PART_ID_Nx2N_R] = 0; 557 gau1_partid_bl_valid[PART_ID_NxN_TL] = 1; 558 gau1_partid_bl_valid[PART_ID_NxN_TR] = 0; 559 gau1_partid_bl_valid[PART_ID_NxN_BL] = 1; 560 gau1_partid_bl_valid[PART_ID_NxN_BR] = 0; 561 gau1_partid_bl_valid[PART_ID_2NxnU_T] = 1; 562 gau1_partid_bl_valid[PART_ID_2NxnU_B] = 1; 563 gau1_partid_bl_valid[PART_ID_2NxnD_T] = 1; 564 gau1_partid_bl_valid[PART_ID_2NxnD_B] = 1; 565 gau1_partid_bl_valid[PART_ID_nLx2N_L] = 1; 566 gau1_partid_bl_valid[PART_ID_nLx2N_R] = 0; 567 gau1_partid_bl_valid[PART_ID_nRx2N_L] = 1; 568 gau1_partid_bl_valid[PART_ID_nRx2N_R] = 0; 569 570 /*Part id to part num of this partition id in the CU */ 571 gau1_part_id_to_part_num[PART_ID_2Nx2N] = 0; 572 gau1_part_id_to_part_num[PART_ID_2NxN_T] = 0; 573 gau1_part_id_to_part_num[PART_ID_2NxN_B] = 1; 574 gau1_part_id_to_part_num[PART_ID_Nx2N_L] = 0; 575 gau1_part_id_to_part_num[PART_ID_Nx2N_R] = 1; 576 gau1_part_id_to_part_num[PART_ID_NxN_TL] = 0; 577 gau1_part_id_to_part_num[PART_ID_NxN_TR] = 1; 578 gau1_part_id_to_part_num[PART_ID_NxN_BL] = 2; 579 gau1_part_id_to_part_num[PART_ID_NxN_BR] = 3; 580 gau1_part_id_to_part_num[PART_ID_2NxnU_T] = 0; 581 gau1_part_id_to_part_num[PART_ID_2NxnU_B] = 1; 582 gau1_part_id_to_part_num[PART_ID_2NxnD_T] = 0; 583 gau1_part_id_to_part_num[PART_ID_2NxnD_B] = 1; 584 gau1_part_id_to_part_num[PART_ID_nLx2N_L] = 0; 585 gau1_part_id_to_part_num[PART_ID_nLx2N_R] = 1; 586 gau1_part_id_to_part_num[PART_ID_nRx2N_L] = 0; 587 gau1_part_id_to_part_num[PART_ID_nRx2N_R] = 1; 588 589 /*Which partition type does this partition id belong to */ 590 ge_part_id_to_part_type[PART_ID_2Nx2N] = PRT_2Nx2N; 591 ge_part_id_to_part_type[PART_ID_2NxN_T] = PRT_2NxN; 592 ge_part_id_to_part_type[PART_ID_2NxN_B] = PRT_2NxN; 593 ge_part_id_to_part_type[PART_ID_Nx2N_L] = PRT_Nx2N; 594 ge_part_id_to_part_type[PART_ID_Nx2N_R] = PRT_Nx2N; 595 ge_part_id_to_part_type[PART_ID_NxN_TL] = PRT_NxN; 596 ge_part_id_to_part_type[PART_ID_NxN_TR] = PRT_NxN; 597 ge_part_id_to_part_type[PART_ID_NxN_BL] = PRT_NxN; 598 ge_part_id_to_part_type[PART_ID_NxN_BR] = PRT_NxN; 599 ge_part_id_to_part_type[PART_ID_2NxnU_T] = PRT_2NxnU; 600 ge_part_id_to_part_type[PART_ID_2NxnU_B] = PRT_2NxnU; 601 ge_part_id_to_part_type[PART_ID_2NxnD_T] = PRT_2NxnD; 602 ge_part_id_to_part_type[PART_ID_2NxnD_B] = PRT_2NxnD; 603 ge_part_id_to_part_type[PART_ID_nLx2N_L] = PRT_nLx2N; 604 ge_part_id_to_part_type[PART_ID_nLx2N_R] = PRT_nLx2N; 605 ge_part_id_to_part_type[PART_ID_nRx2N_L] = PRT_nRx2N; 606 ge_part_id_to_part_type[PART_ID_nRx2N_R] = PRT_nRx2N; 607 608 /*************************************************************************/ 609 /* Set up the bits to be taken up for the part type. This is equally */ 610 /* divided up between the various partitions in the part-type. */ 611 /* For NxN @ CU 16x16, we assume it as CU 8x8, so consider it as */ 612 /* partition 2Nx2N. */ 613 /*************************************************************************/ 614 /* 1 bit for 2Nx2N partition */ 615 gau1_bits_for_part_id_q1[PART_ID_2Nx2N] = 2; 616 617 /* 3 bits for symmetric part types, so 1.5 bits per partition */ 618 gau1_bits_for_part_id_q1[PART_ID_2NxN_T] = 3; 619 gau1_bits_for_part_id_q1[PART_ID_2NxN_B] = 3; 620 gau1_bits_for_part_id_q1[PART_ID_Nx2N_L] = 3; 621 gau1_bits_for_part_id_q1[PART_ID_Nx2N_R] = 3; 622 623 /* 1 bit for NxN partitions, assuming these to be 2Nx2N CUs of lower level */ 624 gau1_bits_for_part_id_q1[PART_ID_NxN_TL] = 2; 625 gau1_bits_for_part_id_q1[PART_ID_NxN_TR] = 2; 626 gau1_bits_for_part_id_q1[PART_ID_NxN_BL] = 2; 627 gau1_bits_for_part_id_q1[PART_ID_NxN_BR] = 2; 628 629 /* 4 bits for AMP so 2 bits per partition */ 630 gau1_bits_for_part_id_q1[PART_ID_2NxnU_T] = 4; 631 gau1_bits_for_part_id_q1[PART_ID_2NxnU_B] = 4; 632 gau1_bits_for_part_id_q1[PART_ID_2NxnD_T] = 4; 633 gau1_bits_for_part_id_q1[PART_ID_2NxnD_B] = 4; 634 gau1_bits_for_part_id_q1[PART_ID_nLx2N_L] = 4; 635 gau1_bits_for_part_id_q1[PART_ID_nLx2N_R] = 4; 636 gau1_bits_for_part_id_q1[PART_ID_nRx2N_L] = 4; 637 gau1_bits_for_part_id_q1[PART_ID_nRx2N_R] = 4; 638 } 639 640 /** 641 ******************************************************************************** 642 * @fn hme_enc_num_alloc() 643 * 644 * @brief returns number of memtabs that is required by hme module 645 * 646 * @return Number of memtabs required 647 ******************************************************************************** 648 */ 649 S32 hme_enc_num_alloc(WORD32 i4_num_me_frm_pllel) 650 { 651 if(i4_num_me_frm_pllel > 1) 652 { 653 return ((S32)MAX_HME_ENC_TOT_MEMTABS); 654 } 655 else 656 { 657 return ((S32)MIN_HME_ENC_TOT_MEMTABS); 658 } 659 } 660 661 /** 662 ******************************************************************************** 663 * @fn hme_coarse_num_alloc() 664 * 665 * @brief returns number of memtabs that is required by hme module 666 * 667 * @return Number of memtabs required 668 ******************************************************************************** 669 */ 670 S32 hme_coarse_num_alloc() 671 { 672 return ((S32)HME_COARSE_TOT_MEMTABS); 673 } 674 675 /** 676 ******************************************************************************** 677 * @fn hme_coarse_dep_mngr_num_alloc() 678 * 679 * @brief returns number of memtabs that is required by Dep Mngr for hme module 680 * 681 * @return Number of memtabs required 682 ******************************************************************************** 683 */ 684 WORD32 hme_coarse_dep_mngr_num_alloc() 685 { 686 return ((WORD32)((MAX_NUM_HME_LAYERS - 1) * ihevce_dmgr_get_num_mem_recs())); 687 } 688 689 S32 hme_validate_init_prms(hme_init_prms_t *ps_prms) 690 { 691 S32 n_layers = ps_prms->num_simulcast_layers; 692 693 /* The final layer has got to be a non encode coarse layer */ 694 if(n_layers > (MAX_NUM_LAYERS - 1)) 695 return (-1); 696 697 if(n_layers < 1) 698 return (-1); 699 700 /* Width of the coarsest encode layer got to be >= 2*min_wd where min_Wd */ 701 /* represents the min allowed width in any layer. Ditto with ht */ 702 if(ps_prms->a_wd[n_layers - 1] < 2 * (MIN_WD_COARSE)) 703 return (-1); 704 if(ps_prms->a_ht[n_layers - 1] < 2 * (MIN_HT_COARSE)) 705 return (-1); 706 if(ps_prms->max_num_ref > MAX_NUM_REF) 707 return (-1); 708 if(ps_prms->max_num_ref < 0) 709 return (-1); 710 711 return (0); 712 } 713 void hme_set_layer_res_attrs( 714 layer_ctxt_t *ps_layer, S32 wd, S32 ht, S32 disp_wd, S32 disp_ht, U08 u1_enc) 715 { 716 ps_layer->i4_wd = wd; 717 ps_layer->i4_ht = ht; 718 ps_layer->i4_disp_wd = disp_wd; 719 ps_layer->i4_disp_ht = disp_ht; 720 if(0 == u1_enc) 721 { 722 ps_layer->i4_inp_stride = wd + 32 + 4; 723 ps_layer->i4_inp_offset = (ps_layer->i4_inp_stride * 16) + 16; 724 ps_layer->i4_pad_x_inp = 16; 725 ps_layer->i4_pad_y_inp = 16; 726 ps_layer->pu1_inp = ps_layer->pu1_inp_base + ps_layer->i4_inp_offset; 727 } 728 } 729 730 /** 731 ******************************************************************************** 732 * @fn hme_coarse_get_layer1_mv_bank_ref_idx_size() 733 * 734 * @brief returns the MV bank and ref idx size of Layer 1 (penultimate) 735 * 736 * @return none 737 ******************************************************************************** 738 */ 739 void hme_coarse_get_layer1_mv_bank_ref_idx_size( 740 S32 n_tot_layers, 741 S32 *a_wd, 742 S32 *a_ht, 743 S32 max_num_ref, 744 S32 *pi4_mv_bank_size, 745 S32 *pi4_ref_idx_size) 746 { 747 S32 num_blks, num_mvs_per_blk, num_ref; 748 S32 num_cols, num_rows, num_mvs_per_row; 749 S32 is_explicit_store = 1; 750 S32 wd, ht, num_layers_explicit_search; 751 S32 num_results, use_4x4; 752 wd = a_wd[1]; 753 ht = a_ht[1]; 754 755 /* Assuming abt 4 layers for 1080p, we do explicit search across all ref */ 756 /* frames in all but final layer In final layer, it could be 1/2 */ 757 //ps_hme_init_prms->num_layers_explicit_search = 3; 758 num_layers_explicit_search = 3; 759 760 if(num_layers_explicit_search <= 0) 761 num_layers_explicit_search = n_tot_layers - 1; 762 763 num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1); 764 765 /* Possibly implicit search for lower (finer) layers */ 766 if(n_tot_layers - 1 > num_layers_explicit_search) 767 is_explicit_store = 0; 768 769 /* coarsest layer alwasy uses 4x4 blks to store results */ 770 if(1 == (n_tot_layers - 1)) 771 { 772 /* we store 4 results in coarsest layer per blk. 8x4L, 8x4R, 4x8T, 4x8B */ 773 //ps_hme_init_prms->max_num_results_coarse = 4; 774 //vijay : with new algo in coarseset layer this has to be revisited 775 num_results = 4; 776 } 777 else 778 { 779 /* Every refinement layer stores a max of 2 results per partition */ 780 //ps_hme_init_prms->max_num_results = 2; 781 num_results = 2; 782 } 783 use_4x4 = hme_get_mv_blk_size(1, 1, n_tot_layers, 0); 784 785 num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2); 786 num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2); 787 788 if(is_explicit_store) 789 num_ref = max_num_ref; 790 else 791 num_ref = 2; 792 793 num_blks = num_cols * num_rows; 794 num_mvs_per_blk = num_ref * num_results; 795 num_mvs_per_row = num_mvs_per_blk * num_cols; 796 797 /* stroe the sizes */ 798 *pi4_mv_bank_size = num_blks * num_mvs_per_blk * sizeof(hme_mv_t); 799 *pi4_ref_idx_size = num_blks * num_mvs_per_blk * sizeof(S08); 800 801 return; 802 } 803 /** 804 ******************************************************************************** 805 * @fn hme_alloc_init_layer_mv_bank() 806 * 807 * @brief memory alloc and init function for MV bank 808 * 809 * @return Number of memtabs required 810 ******************************************************************************** 811 */ 812 S32 hme_alloc_init_layer_mv_bank( 813 hme_memtab_t *ps_memtab, 814 S32 max_num_results, 815 S32 max_num_ref, 816 S32 use_4x4, 817 S32 mem_avail, 818 S32 u1_enc, 819 S32 wd, 820 S32 ht, 821 S32 is_explicit_store, 822 hme_mv_t **pps_mv_base, 823 S08 **pi1_ref_idx_base, 824 S32 *pi4_num_mvs_per_row) 825 { 826 S32 count = 0; 827 S32 size; 828 S32 num_blks, num_mvs_per_blk; 829 S32 num_ref; 830 S32 num_cols, num_rows, num_mvs_per_row; 831 832 if(is_explicit_store) 833 num_ref = max_num_ref; 834 else 835 num_ref = 2; 836 837 /* MV Bank allocation takes into consideration following */ 838 /* number of results per reference x max num refrences is the amount */ 839 /* bufffered up per blk. Numbero f blks in pic deps on the blk size, */ 840 /* which could be either 4x4 or 8x8. */ 841 num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2); 842 num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2); 843 844 if(u1_enc) 845 { 846 /* TODO: CTB64x64 is assumed. FIX according to actual CTB */ 847 WORD32 num_ctb_cols = ((wd + 63) >> 6); 848 WORD32 num_ctb_rows = ((ht + 63) >> 6); 849 850 num_cols = (num_ctb_cols << 3) + 2; 851 num_rows = (num_ctb_rows << 3) + 2; 852 } 853 num_blks = num_cols * num_rows; 854 num_mvs_per_blk = num_ref * max_num_results; 855 num_mvs_per_row = num_mvs_per_blk * num_cols; 856 857 size = num_blks * num_mvs_per_blk * sizeof(hme_mv_t); 858 if(mem_avail) 859 { 860 /* store this for run time verifications */ 861 *pi4_num_mvs_per_row = num_mvs_per_row; 862 ASSERT(ps_memtab[count].size == size); 863 *pps_mv_base = (hme_mv_t *)ps_memtab[count].pu1_mem; 864 } 865 else 866 { 867 ps_memtab[count].size = size; 868 ps_memtab[count].align = 4; 869 ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM; 870 } 871 872 count++; 873 /* Ref idx takes the same route as mvbase */ 874 875 size = num_blks * num_mvs_per_blk * sizeof(S08); 876 if(mem_avail) 877 { 878 ASSERT(ps_memtab[count].size == size); 879 *pi1_ref_idx_base = (S08 *)ps_memtab[count].pu1_mem; 880 } 881 else 882 { 883 ps_memtab[count].size = size; 884 ps_memtab[count].align = 4; 885 ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM; 886 } 887 count++; 888 889 return (count); 890 } 891 /** 892 ******************************************************************************** 893 * @fn hme_alloc_init_layer() 894 * 895 * @brief memory alloc and init function 896 * 897 * @return Number of memtabs required 898 ******************************************************************************** 899 */ 900 S32 hme_alloc_init_layer( 901 hme_memtab_t *ps_memtab, 902 S32 max_num_results, 903 S32 max_num_ref, 904 S32 use_4x4, 905 S32 mem_avail, 906 S32 u1_enc, 907 S32 wd, 908 S32 ht, 909 S32 disp_wd, 910 S32 disp_ht, 911 S32 segment_layer, 912 S32 is_explicit_store, 913 layer_ctxt_t **pps_layer) 914 { 915 S32 count = 0; 916 layer_ctxt_t *ps_layer = NULL; 917 S32 size; 918 S32 num_ref; 919 920 ARG_NOT_USED(segment_layer); 921 922 if(is_explicit_store) 923 num_ref = max_num_ref; 924 else 925 num_ref = 2; 926 927 /* We do not store 4x4 results for encoding layers */ 928 if(u1_enc) 929 use_4x4 = 0; 930 931 size = sizeof(layer_ctxt_t); 932 if(mem_avail) 933 { 934 ASSERT(ps_memtab[count].size == size); 935 ps_layer = (layer_ctxt_t *)ps_memtab[count].pu1_mem; 936 *pps_layer = ps_layer; 937 } 938 else 939 { 940 ps_memtab[count].size = size; 941 ps_memtab[count].align = 8; 942 ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM; 943 } 944 945 count++; 946 947 /* Input luma buffer allocated only for non encode case */ 948 if(0 == u1_enc) 949 { 950 /* Allocate input with padding of 16 pixels */ 951 size = (wd + 32 + 4) * (ht + 32 + 4); 952 if(mem_avail) 953 { 954 ASSERT(ps_memtab[count].size == size); 955 ps_layer->pu1_inp_base = ps_memtab[count].pu1_mem; 956 } 957 else 958 { 959 ps_memtab[count].size = size; 960 ps_memtab[count].align = 16; 961 ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM; 962 } 963 count++; 964 } 965 966 /* Allocate memory or just the layer mvbank strcture. */ 967 /* TODO : see if this can be removed by moving it to layer_ctxt */ 968 size = sizeof(layer_mv_t); 969 970 if(mem_avail) 971 { 972 ASSERT(ps_memtab[count].size == size); 973 ps_layer->ps_layer_mvbank = (layer_mv_t *)ps_memtab[count].pu1_mem; 974 } 975 else 976 { 977 ps_memtab[count].size = size; 978 ps_memtab[count].align = 8; 979 ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM; 980 } 981 982 count++; 983 984 if(mem_avail) 985 { 986 hme_set_layer_res_attrs(ps_layer, wd, ht, disp_wd, disp_ht, u1_enc); 987 } 988 989 return (count); 990 } 991 992 S32 hme_alloc_init_search_nodes( 993 search_results_t *ps_search_results, 994 hme_memtab_t *ps_memtabs, 995 S32 mem_avail, 996 S32 max_num_ref, 997 S32 max_num_results) 998 { 999 S32 size = max_num_results * sizeof(search_node_t) * max_num_ref * TOT_NUM_PARTS; 1000 S32 j, k; 1001 search_node_t *ps_search_node; 1002 1003 if(mem_avail == 0) 1004 { 1005 ps_memtabs->size = size; 1006 ps_memtabs->align = 4; 1007 ps_memtabs->e_mem_attr = HME_SCRATCH_OVLY_MEM; 1008 return (1); 1009 } 1010 1011 ps_search_node = (search_node_t *)ps_memtabs->pu1_mem; 1012 ASSERT(ps_memtabs->size == size); 1013 /****************************************************************************/ 1014 /* For each CU, we search and store N best results, per partition, per ref */ 1015 /* So, number of memtabs is num_refs * num_parts */ 1016 /****************************************************************************/ 1017 for(j = 0; j < max_num_ref; j++) 1018 { 1019 for(k = 0; k < TOT_NUM_PARTS; k++) 1020 { 1021 ps_search_results->aps_part_results[j][k] = ps_search_node; 1022 ps_search_node += max_num_results; 1023 } 1024 } 1025 return (1); 1026 } 1027 1028 S32 hme_derive_num_layers(S32 n_enc_layers, S32 *p_wd, S32 *p_ht, S32 *p_disp_wd, S32 *p_disp_ht) 1029 { 1030 S32 i; 1031 /* We keep downscaling by 2 till we hit one of the conditions: */ 1032 /* 1. MAX_NUM_LAYERS reached. */ 1033 /* 2. Width or ht goes below min width and ht allowed at coarsest layer */ 1034 ASSERT(n_enc_layers < MAX_NUM_LAYERS); 1035 ASSERT(n_enc_layers > 0); 1036 ASSERT(p_wd[0] <= HME_MAX_WIDTH); 1037 ASSERT(p_ht[0] <= HME_MAX_HEIGHT); 1038 1039 p_disp_wd[0] = p_wd[0]; 1040 p_disp_ht[0] = p_ht[0]; 1041 /*************************************************************************/ 1042 /* Verify that for simulcast, lower layer to higher layer ratio is bet */ 1043 /* 2 (dyadic) and 1.33. Typically it should be 1.5. */ 1044 /* TODO : for interlace, we may choose to have additional downscaling for*/ 1045 /* width alone in coarsest layer to next layer. */ 1046 /*************************************************************************/ 1047 for(i = 1; i < n_enc_layers; i++) 1048 { 1049 S32 wd1, wd2, ht1, ht2; 1050 wd1 = FLOOR16(p_wd[i - 1] >> 1); 1051 wd2 = CEIL16((p_wd[i - 1] * 3) >> 2); 1052 ASSERT(p_wd[i] >= wd1); 1053 ASSERT(p_wd[i] <= wd2); 1054 ht1 = FLOOR16(p_ht[i - 1] >> 1); 1055 ht2 = CEIL16((p_ht[i - 1] * 3) >> 2); 1056 ASSERT(p_ht[i] >= ht1); 1057 ASSERT(p_ht[i] <= ht2); 1058 } 1059 ASSERT(p_wd[n_enc_layers - 1] >= 2 * MIN_WD_COARSE); 1060 ASSERT(p_ht[n_enc_layers - 1] >= 2 * MIN_HT_COARSE); 1061 1062 for(i = n_enc_layers; i < MAX_NUM_LAYERS; i++) 1063 { 1064 if((p_wd[i - 1] < 2 * MIN_WD_COARSE) || (p_ht[i - 1] < 2 * MIN_HT_COARSE)) 1065 { 1066 return (i); 1067 } 1068 /* Use CEIL16 to facilitate 16x16 searches in future, or to do */ 1069 /* segmentation study in future */ 1070 p_wd[i] = CEIL16(p_wd[i - 1] >> 1); 1071 p_ht[i] = CEIL16(p_ht[i - 1] >> 1); 1072 1073 p_disp_wd[i] = p_disp_wd[i - 1] >> 1; 1074 p_disp_ht[i] = p_disp_ht[i - 1] >> 1; 1075 } 1076 return (i); 1077 } 1078 1079 /** 1080 ******************************************************************************** 1081 * @fn hme_get_mv_blk_size() 1082 * 1083 * @brief returns whether blk uses 4x4 size or something else. 1084 * 1085 * @param[in] enable_4x4 : input param from application to enable 4x4 1086 * 1087 * @param[in] layer_id : id of current layer (0 finest) 1088 * 1089 * @param[in] num_layeers : total num layers 1090 * 1091 * @param[in] is_enc : Whether encoding enabled for layer 1092 * 1093 * @return 1 for 4x4 blks, 0 for 8x8 1094 ******************************************************************************** 1095 */ 1096 S32 hme_get_mv_blk_size(S32 enable_4x4, S32 layer_id, S32 num_layers, S32 is_enc) 1097 { 1098 S32 use_4x4 = enable_4x4; 1099 1100 if((layer_id <= 1) && (num_layers >= 4)) 1101 use_4x4 = USE_4x4_IN_L1; 1102 if(layer_id == num_layers - 1) 1103 use_4x4 = 1; 1104 if(is_enc) 1105 use_4x4 = 0; 1106 1107 return (use_4x4); 1108 } 1109 1110 /** 1111 ******************************************************************************** 1112 * @fn hme_enc_alloc_init_mem() 1113 * 1114 * @brief Requests/ assign memory based on mem avail 1115 * 1116 * @param[in] ps_memtabs : memtab array 1117 * 1118 * @param[in] ps_prms : init prms 1119 * 1120 * @param[in] pv_ctxt : ME ctxt 1121 * 1122 * @param[in] mem_avail : request/assign flag 1123 * 1124 * @return 1 for 4x4 blks, 0 for 8x8 1125 ******************************************************************************** 1126 */ 1127 S32 hme_enc_alloc_init_mem( 1128 hme_memtab_t *ps_memtabs, 1129 hme_init_prms_t *ps_prms, 1130 void *pv_ctxt, 1131 S32 mem_avail, 1132 S32 i4_num_me_frm_pllel) 1133 { 1134 me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_ctxt; 1135 me_ctxt_t *ps_ctxt; 1136 S32 count = 0, size, i, j, use_4x4; 1137 S32 n_tot_layers, n_enc_layers; 1138 S32 num_layers_explicit_search; 1139 S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS]; 1140 S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS]; 1141 S32 num_results; 1142 S32 num_thrds; 1143 S32 ctb_wd = 1 << ps_prms->log_ctb_size; 1144 1145 /* MV bank changes */ 1146 hme_mv_t *aps_mv_bank[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1] = { NULL }; 1147 S32 i4_num_mvs_per_row = 0; 1148 S08 *api1_ref_idx[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1] = { NULL }; 1149 1150 n_enc_layers = ps_prms->num_simulcast_layers; 1151 1152 /* Memtab 0: handle */ 1153 size = sizeof(me_master_ctxt_t); 1154 if(mem_avail) 1155 { 1156 /* store the number of processing threads */ 1157 ps_master_ctxt->i4_num_proc_thrds = ps_prms->i4_num_proc_thrds; 1158 } 1159 else 1160 { 1161 ps_memtabs[count].size = size; 1162 ps_memtabs[count].align = 8; 1163 ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM; 1164 } 1165 1166 count++; 1167 1168 /* Memtab 1: ME threads ctxt */ 1169 size = ps_prms->i4_num_proc_thrds * sizeof(me_ctxt_t); 1170 if(mem_avail) 1171 { 1172 me_ctxt_t *ps_me_tmp_ctxt = (me_ctxt_t *)ps_memtabs[count].pu1_mem; 1173 1174 /* store the indivisual thread ctxt pointers */ 1175 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 1176 { 1177 ps_master_ctxt->aps_me_ctxt[num_thrds] = ps_me_tmp_ctxt++; 1178 } 1179 } 1180 else 1181 { 1182 ps_memtabs[count].size = size; 1183 ps_memtabs[count].align = 8; 1184 ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM; 1185 } 1186 1187 count++; 1188 1189 /* Memtab 2: ME frame ctxts */ 1190 size = sizeof(me_frm_ctxt_t) * MAX_NUM_ME_PARALLEL * ps_prms->i4_num_proc_thrds; 1191 if(mem_avail) 1192 { 1193 me_frm_ctxt_t *ps_me_frm_tmp_ctxt = (me_frm_ctxt_t *)ps_memtabs[count].pu1_mem; 1194 1195 for(i = 0; i < MAX_NUM_ME_PARALLEL; i++) 1196 { 1197 /* store the indivisual thread ctxt pointers */ 1198 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 1199 { 1200 ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[i] = ps_me_frm_tmp_ctxt; 1201 1202 ps_me_frm_tmp_ctxt++; 1203 } 1204 } 1205 } 1206 else 1207 { 1208 ps_memtabs[count].size = size; 1209 ps_memtabs[count].align = 8; 1210 ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM; 1211 } 1212 1213 count++; 1214 1215 memcpy(a_wd, ps_prms->a_wd, sizeof(S32) * ps_prms->num_simulcast_layers); 1216 memcpy(a_ht, ps_prms->a_ht, sizeof(S32) * ps_prms->num_simulcast_layers); 1217 /*************************************************************************/ 1218 /* Derive the number of HME layers, including both encoded and non encode*/ 1219 /* This function also derives the width and ht of each layer. */ 1220 /*************************************************************************/ 1221 n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht); 1222 num_layers_explicit_search = ps_prms->num_layers_explicit_search; 1223 if(num_layers_explicit_search <= 0) 1224 num_layers_explicit_search = n_tot_layers - 1; 1225 1226 num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1); 1227 1228 if(mem_avail) 1229 { 1230 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 1231 { 1232 me_frm_ctxt_t *ps_frm_ctxt; 1233 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 1234 1235 for(i = 0; i < MAX_NUM_ME_PARALLEL; i++) 1236 { 1237 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i]; 1238 1239 memset(ps_frm_ctxt->u1_encode, 0, n_tot_layers); 1240 memset(ps_frm_ctxt->u1_encode, 1, n_enc_layers); 1241 1242 /* only one enocde layer is used */ 1243 ps_frm_ctxt->num_layers = 1; 1244 1245 ps_frm_ctxt->i4_wd = a_wd[0]; 1246 ps_frm_ctxt->i4_ht = a_ht[0]; 1247 /* 1248 memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32)*n_tot_layers); 1249 memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32)*n_tot_layers); 1250 */ 1251 ps_frm_ctxt->num_layers_explicit_search = num_layers_explicit_search; 1252 ps_frm_ctxt->max_num_results = ps_prms->max_num_results; 1253 ps_frm_ctxt->max_num_results_coarse = ps_prms->max_num_results_coarse; 1254 ps_frm_ctxt->max_num_ref = ps_prms->max_num_ref; 1255 } 1256 } 1257 } 1258 1259 /* Memtabs : Layers MV bank for encode layer */ 1260 /* Each ref_desr in master ctxt will have seperate layer ctxt */ 1261 1262 for(i = 0; i < (ps_prms->max_num_ref * i4_num_me_frm_pllel) + 1; i++) 1263 { 1264 for(j = 0; j < 1; j++) 1265 { 1266 S32 is_explicit_store = 1; 1267 S32 wd, ht; 1268 U08 u1_enc = 1; 1269 wd = a_wd[j]; 1270 ht = a_ht[j]; 1271 1272 /* Possibly implicit search for lower (finer) layers */ 1273 if(n_tot_layers - j > num_layers_explicit_search) 1274 is_explicit_store = 0; 1275 1276 /* Even if explicit search, we store only 2 results (L0 and L1) */ 1277 /* in finest layer */ 1278 if(j == 0) 1279 { 1280 is_explicit_store = 0; 1281 } 1282 1283 /* coarsest layer alwasy uses 4x4 blks to store results */ 1284 if(j == n_tot_layers - 1) 1285 { 1286 num_results = ps_prms->max_num_results_coarse; 1287 } 1288 else 1289 { 1290 num_results = ps_prms->max_num_results; 1291 if(j == 0) 1292 num_results = 1; 1293 } 1294 use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc); 1295 1296 count += hme_alloc_init_layer_mv_bank( 1297 &ps_memtabs[count], 1298 num_results, 1299 ps_prms->max_num_ref, 1300 use_4x4, 1301 mem_avail, 1302 u1_enc, 1303 wd, 1304 ht, 1305 is_explicit_store, 1306 &aps_mv_bank[i], 1307 &api1_ref_idx[i], 1308 &i4_num_mvs_per_row); 1309 } 1310 } 1311 1312 /* Memtabs : Layers * num-ref + 1 */ 1313 for(i = 0; i < (ps_prms->max_num_ref * i4_num_me_frm_pllel) + 1; i++) 1314 { 1315 /* layer memory allocated only for enocde layer */ 1316 for(j = 0; j < 1; j++) 1317 { 1318 layer_ctxt_t *ps_layer; 1319 S32 is_explicit_store = 1; 1320 S32 segment_this_layer = (j == 0) ? 1 : ps_prms->segment_higher_layers; 1321 S32 wd, ht; 1322 U08 u1_enc = 1; 1323 wd = a_wd[j]; 1324 ht = a_ht[j]; 1325 1326 /* Possibly implicit search for lower (finer) layers */ 1327 if(n_tot_layers - j > num_layers_explicit_search) 1328 is_explicit_store = 0; 1329 1330 /* Even if explicit search, we store only 2 results (L0 and L1) */ 1331 /* in finest layer */ 1332 if(j == 0) 1333 { 1334 is_explicit_store = 0; 1335 } 1336 1337 /* coarsest layer alwasy uses 4x4 blks to store results */ 1338 if(j == n_tot_layers - 1) 1339 { 1340 num_results = ps_prms->max_num_results_coarse; 1341 } 1342 else 1343 { 1344 num_results = ps_prms->max_num_results; 1345 if(j == 0) 1346 num_results = 1; 1347 } 1348 use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc); 1349 1350 count += hme_alloc_init_layer( 1351 &ps_memtabs[count], 1352 num_results, 1353 ps_prms->max_num_ref, 1354 use_4x4, 1355 mem_avail, 1356 u1_enc, 1357 wd, 1358 ht, 1359 a_disp_wd[j], 1360 a_disp_ht[j], 1361 segment_this_layer, 1362 is_explicit_store, 1363 &ps_layer); 1364 if(mem_avail) 1365 { 1366 /* same ps_layer memory pointer is stored in all the threads */ 1367 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 1368 { 1369 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 1370 ps_ctxt->as_ref_descr[i].aps_layers[j] = ps_layer; 1371 } 1372 1373 /* store the MV bank pointers */ 1374 ps_layer->ps_layer_mvbank->max_num_mvs_per_row = i4_num_mvs_per_row; 1375 ps_layer->ps_layer_mvbank->ps_mv_base = aps_mv_bank[i]; 1376 ps_layer->ps_layer_mvbank->pi1_ref_idx_base = api1_ref_idx[i]; 1377 } 1378 } 1379 } 1380 1381 /* Memtabs : Buf Mgr for predictor bufs and working mem */ 1382 /* TODO : Parameterise this appropriately */ 1383 size = MAX_WKG_MEM_SIZE_PER_THREAD * ps_prms->i4_num_proc_thrds * i4_num_me_frm_pllel; 1384 1385 if(mem_avail) 1386 { 1387 U08 *pu1_mem = ps_memtabs[count].pu1_mem; 1388 1389 ASSERT(ps_memtabs[count].size == size); 1390 1391 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 1392 { 1393 me_frm_ctxt_t *ps_frm_ctxt; 1394 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 1395 1396 for(i = 0; i < MAX_NUM_ME_PARALLEL; i++) 1397 { 1398 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i]; 1399 1400 hme_init_wkg_mem(&ps_frm_ctxt->s_buf_mgr, pu1_mem, MAX_WKG_MEM_SIZE_PER_THREAD); 1401 1402 if(i4_num_me_frm_pllel != 1) 1403 { 1404 /* update the memory buffer pointer */ 1405 pu1_mem += MAX_WKG_MEM_SIZE_PER_THREAD; 1406 } 1407 } 1408 if(i4_num_me_frm_pllel == 1) 1409 { 1410 pu1_mem += MAX_WKG_MEM_SIZE_PER_THREAD; 1411 } 1412 } 1413 } 1414 else 1415 { 1416 ps_memtabs[count].size = size; 1417 ps_memtabs[count].align = 4; 1418 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM; 1419 } 1420 count++; 1421 1422 /*************************************************************************/ 1423 /* Memtab : We need 64x64 buffer to store the entire CTB input for bidir */ 1424 /* refinement. This memtab stores 2I - P0, I is input and P0 is L0 pred */ 1425 /*************************************************************************/ 1426 size = sizeof(S16) * CTB_BLK_SIZE * CTB_BLK_SIZE * ps_prms->i4_num_proc_thrds * 1427 i4_num_me_frm_pllel; 1428 1429 if(mem_avail) 1430 { 1431 S16 *pi2_mem = (S16 *)ps_memtabs[count].pu1_mem; 1432 1433 ASSERT(ps_memtabs[count].size == size); 1434 1435 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 1436 { 1437 me_frm_ctxt_t *ps_frm_ctxt; 1438 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 1439 1440 for(i = 0; i < MAX_NUM_ME_PARALLEL; i++) 1441 { 1442 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i]; 1443 1444 ps_frm_ctxt->pi2_inp_bck = pi2_mem; 1445 /** If no me frames running in parallel update the other aps_me_frm_prms indices with same memory **/ 1446 if(i4_num_me_frm_pllel != 1) 1447 { 1448 pi2_mem += (CTB_BLK_SIZE * CTB_BLK_SIZE); 1449 } 1450 } 1451 if(i4_num_me_frm_pllel == 1) 1452 { 1453 pi2_mem += (CTB_BLK_SIZE * CTB_BLK_SIZE); 1454 } 1455 } 1456 } 1457 else 1458 { 1459 ps_memtabs[count].size = size; 1460 ps_memtabs[count].align = 16; 1461 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM; 1462 } 1463 1464 count++; 1465 1466 /* Allocate a memtab for each histogram. As many as num ref and number of threads */ 1467 /* Loop across for each ME_FRM in PARALLEL */ 1468 for(j = 0; j < MAX_NUM_ME_PARALLEL; j++) 1469 { 1470 for(i = 0; i < ps_prms->max_num_ref; i++) 1471 { 1472 size = ps_prms->i4_num_proc_thrds * sizeof(mv_hist_t); 1473 if(mem_avail) 1474 { 1475 mv_hist_t *ps_mv_hist = (mv_hist_t *)ps_memtabs[count].pu1_mem; 1476 1477 ASSERT(size == ps_memtabs[count].size); 1478 1479 /* divide the memory accross the threads */ 1480 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 1481 { 1482 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 1483 1484 ps_ctxt->aps_me_frm_prms[j]->aps_mv_hist[i] = ps_mv_hist; 1485 ps_mv_hist++; 1486 } 1487 } 1488 else 1489 { 1490 ps_memtabs[count].size = size; 1491 ps_memtabs[count].align = 8; 1492 ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM; 1493 } 1494 count++; 1495 } 1496 if((i4_num_me_frm_pllel == 1) && (j != (MAX_NUM_ME_PARALLEL - 1))) 1497 { 1498 /** If no me frames running in parallel update the other aps_me_frm_prms indices with same memory **/ 1499 /** bring the count back to earlier value if there are no me frames in parallel. don't decrement for last loop **/ 1500 count -= ps_prms->max_num_ref; 1501 } 1502 } 1503 1504 /* Memtabs : Search nodes for 16x16 CUs, 32x32 and 64x64 CUs */ 1505 for(j = 0; j < MAX_NUM_ME_PARALLEL; j++) 1506 { 1507 S32 count_cpy = count; 1508 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 1509 { 1510 if(mem_avail) 1511 { 1512 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 1513 } 1514 1515 for(i = 0; i < 21; i++) 1516 { 1517 search_results_t *ps_search_results = NULL; 1518 if(mem_avail) 1519 { 1520 if(i < 16) 1521 { 1522 ps_search_results = 1523 &ps_ctxt->aps_me_frm_prms[j]->as_search_results_16x16[i]; 1524 } 1525 else if(i < 20) 1526 { 1527 ps_search_results = 1528 &ps_ctxt->aps_me_frm_prms[j]->as_search_results_32x32[i - 16]; 1529 ps_search_results->ps_cu_results = 1530 &ps_ctxt->aps_me_frm_prms[j]->as_cu32x32_results[i - 16]; 1531 } 1532 else if(i == 20) 1533 { 1534 ps_search_results = &ps_ctxt->aps_me_frm_prms[j]->s_search_results_64x64; 1535 ps_search_results->ps_cu_results = 1536 &ps_ctxt->aps_me_frm_prms[j]->s_cu64x64_results; 1537 } 1538 else 1539 { 1540 /* 8x8 search results are not required in LO ME */ 1541 ASSERT(0); 1542 } 1543 } 1544 count += hme_alloc_init_search_nodes( 1545 ps_search_results, &ps_memtabs[count], mem_avail, 2, ps_prms->max_num_results); 1546 } 1547 } 1548 1549 if((i4_num_me_frm_pllel == 1) && (j != (MAX_NUM_ME_PARALLEL - 1))) 1550 { 1551 count = count_cpy; 1552 } 1553 } 1554 1555 /* Weighted inputs, one for each ref + one non weighted */ 1556 for(j = 0; j < MAX_NUM_ME_PARALLEL; j++) 1557 { 1558 size = (ps_prms->max_num_ref + 1) * ctb_wd * ctb_wd * ps_prms->i4_num_proc_thrds; 1559 if(mem_avail) 1560 { 1561 U08 *pu1_mem; 1562 ASSERT(ps_memtabs[count].size == size); 1563 pu1_mem = ps_memtabs[count].pu1_mem; 1564 1565 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 1566 { 1567 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 1568 1569 for(i = 0; i < ps_prms->max_num_ref + 1; i++) 1570 { 1571 ps_ctxt->aps_me_frm_prms[j]->s_wt_pred.apu1_wt_inp_buf_array[i] = pu1_mem; 1572 pu1_mem += (ctb_wd * ctb_wd); 1573 } 1574 } 1575 } 1576 else 1577 { 1578 ps_memtabs[count].size = size; 1579 ps_memtabs[count].align = 16; 1580 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM; 1581 } 1582 if((i4_num_me_frm_pllel != 1) || (j == (MAX_NUM_ME_PARALLEL - 1))) 1583 { 1584 count++; 1585 } 1586 } 1587 1588 /* if memory is allocated the intislaise the frm prms ptr to each thrd */ 1589 if(mem_avail) 1590 { 1591 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 1592 { 1593 me_frm_ctxt_t *ps_frm_ctxt; 1594 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 1595 1596 for(i = 0; i < MAX_NUM_ME_PARALLEL; i++) 1597 { 1598 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i]; 1599 1600 ps_frm_ctxt->ps_hme_frm_prms = &ps_master_ctxt->as_frm_prms[i]; 1601 ps_frm_ctxt->ps_hme_ref_map = &ps_master_ctxt->as_ref_map[i]; 1602 } 1603 } 1604 } 1605 1606 /* Memory allocation for use in Clustering */ 1607 if(ps_prms->s_me_coding_tools.e_me_quality_presets == ME_PRISTINE_QUALITY) 1608 { 1609 for(i = 0; i < MAX_NUM_ME_PARALLEL; i++) 1610 { 1611 size = 16 * sizeof(cluster_16x16_blk_t) + 4 * sizeof(cluster_32x32_blk_t) + 1612 sizeof(cluster_64x64_blk_t) + sizeof(ctb_cluster_info_t); 1613 size *= ps_prms->i4_num_proc_thrds; 1614 1615 if(mem_avail) 1616 { 1617 U08 *pu1_mem; 1618 1619 ASSERT(ps_memtabs[count].size == size); 1620 pu1_mem = ps_memtabs[count].pu1_mem; 1621 1622 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 1623 { 1624 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 1625 1626 ps_ctxt->aps_me_frm_prms[i]->ps_blk_16x16 = (cluster_16x16_blk_t *)pu1_mem; 1627 pu1_mem += (16 * sizeof(cluster_16x16_blk_t)); 1628 1629 ps_ctxt->aps_me_frm_prms[i]->ps_blk_32x32 = (cluster_32x32_blk_t *)pu1_mem; 1630 pu1_mem += (4 * sizeof(cluster_32x32_blk_t)); 1631 1632 ps_ctxt->aps_me_frm_prms[i]->ps_blk_64x64 = (cluster_64x64_blk_t *)pu1_mem; 1633 pu1_mem += (sizeof(cluster_64x64_blk_t)); 1634 1635 ps_ctxt->aps_me_frm_prms[i]->ps_ctb_cluster_info = 1636 (ctb_cluster_info_t *)pu1_mem; 1637 pu1_mem += (sizeof(ctb_cluster_info_t)); 1638 } 1639 } 1640 else 1641 { 1642 ps_memtabs[count].size = size; 1643 ps_memtabs[count].align = 16; 1644 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM; 1645 } 1646 1647 if((i4_num_me_frm_pllel != 1) || (i == (MAX_NUM_ME_PARALLEL - 1))) 1648 { 1649 count++; 1650 } 1651 } 1652 } 1653 else if(mem_avail) 1654 { 1655 for(i = 0; i < MAX_NUM_ME_PARALLEL; i++) 1656 { 1657 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 1658 { 1659 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 1660 1661 ps_ctxt->aps_me_frm_prms[i]->ps_blk_16x16 = NULL; 1662 1663 ps_ctxt->aps_me_frm_prms[i]->ps_blk_32x32 = NULL; 1664 1665 ps_ctxt->aps_me_frm_prms[i]->ps_blk_64x64 = NULL; 1666 1667 ps_ctxt->aps_me_frm_prms[i]->ps_ctb_cluster_info = NULL; 1668 } 1669 } 1670 } 1671 1672 for(i = 0; i < MAX_NUM_ME_PARALLEL; i++) 1673 { 1674 size = sizeof(fullpel_refine_ctxt_t); 1675 size *= ps_prms->i4_num_proc_thrds; 1676 1677 if(mem_avail) 1678 { 1679 U08 *pu1_mem; 1680 1681 ASSERT(ps_memtabs[count].size == size); 1682 pu1_mem = ps_memtabs[count].pu1_mem; 1683 1684 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 1685 { 1686 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 1687 1688 ps_ctxt->aps_me_frm_prms[i]->ps_fullpel_refine_ctxt = 1689 (fullpel_refine_ctxt_t *)pu1_mem; 1690 pu1_mem += (sizeof(fullpel_refine_ctxt_t)); 1691 } 1692 } 1693 else 1694 { 1695 ps_memtabs[count].size = size; 1696 ps_memtabs[count].align = 16; 1697 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM; 1698 } 1699 1700 if((i4_num_me_frm_pllel != 1) || (i == (MAX_NUM_ME_PARALLEL - 1))) 1701 { 1702 count++; 1703 } 1704 } 1705 1706 /* Memory for ihevce_me_optimised_function_list_t struct */ 1707 if(mem_avail) 1708 { 1709 ps_master_ctxt->pv_me_optimised_function_list = (void *)ps_memtabs[count++].pu1_mem; 1710 } 1711 else 1712 { 1713 ps_memtabs[count].size = sizeof(ihevce_me_optimised_function_list_t); 1714 ps_memtabs[count].align = 16; 1715 ps_memtabs[count++].e_mem_attr = HME_SCRATCH_OVLY_MEM; 1716 } 1717 1718 ASSERT(count < hme_enc_num_alloc(i4_num_me_frm_pllel)); 1719 return (count); 1720 } 1721 1722 /** 1723 ******************************************************************************** 1724 * @fn hme_coarse_alloc_init_mem() 1725 * 1726 * @brief Requests/ assign memory based on mem avail 1727 * 1728 * @param[in] ps_memtabs : memtab array 1729 * 1730 * @param[in] ps_prms : init prms 1731 * 1732 * @param[in] pv_ctxt : ME ctxt 1733 * 1734 * @param[in] mem_avail : request/assign flag 1735 * 1736 * @return number of memtabs 1737 ******************************************************************************** 1738 */ 1739 S32 hme_coarse_alloc_init_mem( 1740 hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms, void *pv_ctxt, S32 mem_avail) 1741 { 1742 coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt; 1743 coarse_me_ctxt_t *ps_ctxt; 1744 S32 count = 0, size, i, j, use_4x4, wd; 1745 S32 n_tot_layers; 1746 S32 num_layers_explicit_search; 1747 S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS]; 1748 S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS]; 1749 S32 num_results; 1750 S32 num_thrds; 1751 //S32 ctb_wd = 1 << ps_prms->log_ctb_size; 1752 S32 sad_4x4_block_size, sad_4x4_block_stride, search_step, num_rows; 1753 S32 layer1_blk_width = 8; // 8x8 search 1754 S32 blk_shift; 1755 1756 /* MV bank changes */ 1757 hme_mv_t *aps_mv_bank[MAX_NUM_LAYERS] = { NULL }; 1758 S32 ai4_num_mvs_per_row[MAX_NUM_LAYERS] = { 0 }; 1759 S08 *api1_ref_idx[MAX_NUM_LAYERS] = { NULL }; 1760 1761 /* Memtab 0: handle */ 1762 size = sizeof(coarse_me_master_ctxt_t); 1763 if(mem_avail) 1764 { 1765 /* store the number of processing threads */ 1766 ps_master_ctxt->i4_num_proc_thrds = ps_prms->i4_num_proc_thrds; 1767 } 1768 else 1769 { 1770 ps_memtabs[count].size = size; 1771 ps_memtabs[count].align = 8; 1772 ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM; 1773 } 1774 1775 count++; 1776 1777 /* Memtab 1: ME threads ctxt */ 1778 size = ps_prms->i4_num_proc_thrds * sizeof(coarse_me_ctxt_t); 1779 if(mem_avail) 1780 { 1781 coarse_me_ctxt_t *ps_me_tmp_ctxt = (coarse_me_ctxt_t *)ps_memtabs[count].pu1_mem; 1782 1783 /* store the indivisual thread ctxt pointers */ 1784 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 1785 { 1786 ps_master_ctxt->aps_me_ctxt[num_thrds] = ps_me_tmp_ctxt++; 1787 } 1788 } 1789 else 1790 { 1791 ps_memtabs[count].size = size; 1792 ps_memtabs[count].align = 8; 1793 ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM; 1794 } 1795 1796 count++; 1797 1798 memcpy(a_wd, ps_prms->a_wd, sizeof(S32) * ps_prms->num_simulcast_layers); 1799 memcpy(a_ht, ps_prms->a_ht, sizeof(S32) * ps_prms->num_simulcast_layers); 1800 /*************************************************************************/ 1801 /* Derive the number of HME layers, including both encoded and non encode*/ 1802 /* This function also derives the width and ht of each layer. */ 1803 /*************************************************************************/ 1804 n_tot_layers = hme_derive_num_layers(1, a_wd, a_ht, a_disp_wd, a_disp_ht); 1805 1806 num_layers_explicit_search = ps_prms->num_layers_explicit_search; 1807 1808 if(num_layers_explicit_search <= 0) 1809 num_layers_explicit_search = n_tot_layers - 1; 1810 1811 num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1); 1812 1813 if(mem_avail) 1814 { 1815 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 1816 { 1817 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 1818 memset(ps_ctxt->u1_encode, 0, n_tot_layers); 1819 1820 /* encode layer should be excluded during processing */ 1821 ps_ctxt->num_layers = n_tot_layers; 1822 1823 memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32) * n_tot_layers); 1824 memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32) * n_tot_layers); 1825 1826 ps_ctxt->num_layers_explicit_search = num_layers_explicit_search; 1827 ps_ctxt->max_num_results = ps_prms->max_num_results; 1828 ps_ctxt->max_num_results_coarse = ps_prms->max_num_results_coarse; 1829 ps_ctxt->max_num_ref = ps_prms->max_num_ref; 1830 } 1831 } 1832 1833 /* Memtabs : Layers MV bank for total layers - 2 */ 1834 /* for penultimate layer MV bank will be initialsed at every frame level */ 1835 for(j = 1; j < n_tot_layers; j++) 1836 { 1837 S32 is_explicit_store = 1; 1838 S32 wd, ht; 1839 U08 u1_enc = 0; 1840 wd = a_wd[j]; 1841 ht = a_ht[j]; 1842 1843 /* Possibly implicit search for lower (finer) layers */ 1844 if(n_tot_layers - j > num_layers_explicit_search) 1845 is_explicit_store = 0; 1846 1847 /* Even if explicit search, we store only 2 results (L0 and L1) */ 1848 /* in finest layer */ 1849 if(j == 0) 1850 { 1851 is_explicit_store = 0; 1852 } 1853 1854 /* coarsest layer alwasy uses 4x4 blks to store results */ 1855 if(j == n_tot_layers - 1) 1856 { 1857 num_results = ps_prms->max_num_results_coarse; 1858 } 1859 else 1860 { 1861 num_results = ps_prms->max_num_results; 1862 if(j == 0) 1863 num_results = 1; 1864 } 1865 use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc); 1866 1867 /* for penultimate compute the parameters and store */ 1868 if(j == 1) 1869 { 1870 S32 num_blks, num_mvs_per_blk, num_ref; 1871 S32 num_cols, num_rows, num_mvs_per_row; 1872 1873 num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2); 1874 num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2); 1875 1876 if(is_explicit_store) 1877 num_ref = ps_prms->max_num_ref; 1878 else 1879 num_ref = 2; 1880 1881 num_blks = num_cols * num_rows; 1882 num_mvs_per_blk = num_ref * num_results; 1883 num_mvs_per_row = num_mvs_per_blk * num_cols; 1884 1885 ai4_num_mvs_per_row[j] = num_mvs_per_row; 1886 aps_mv_bank[j] = NULL; 1887 api1_ref_idx[j] = NULL; 1888 } 1889 else 1890 { 1891 count += hme_alloc_init_layer_mv_bank( 1892 &ps_memtabs[count], 1893 num_results, 1894 ps_prms->max_num_ref, 1895 use_4x4, 1896 mem_avail, 1897 u1_enc, 1898 wd, 1899 ht, 1900 is_explicit_store, 1901 &aps_mv_bank[j], 1902 &api1_ref_idx[j], 1903 &ai4_num_mvs_per_row[j]); 1904 } 1905 } 1906 1907 /* Memtabs : Layers * num-ref + 1 */ 1908 for(i = 0; i < ps_prms->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++) 1909 { 1910 /* for all layer except encode layer */ 1911 for(j = 1; j < n_tot_layers; j++) 1912 { 1913 layer_ctxt_t *ps_layer; 1914 S32 is_explicit_store = 1; 1915 S32 segment_this_layer = (j == 0) ? 1 : ps_prms->segment_higher_layers; 1916 S32 wd, ht; 1917 U08 u1_enc = 0; 1918 wd = a_wd[j]; 1919 ht = a_ht[j]; 1920 1921 /* Possibly implicit search for lower (finer) layers */ 1922 if(n_tot_layers - j > num_layers_explicit_search) 1923 is_explicit_store = 0; 1924 1925 /* Even if explicit search, we store only 2 results (L0 and L1) */ 1926 /* in finest layer */ 1927 if(j == 0) 1928 { 1929 is_explicit_store = 0; 1930 } 1931 1932 /* coarsest layer alwasy uses 4x4 blks to store results */ 1933 if(j == n_tot_layers - 1) 1934 { 1935 num_results = ps_prms->max_num_results_coarse; 1936 } 1937 else 1938 { 1939 num_results = ps_prms->max_num_results; 1940 if(j == 0) 1941 num_results = 1; 1942 } 1943 use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc); 1944 1945 count += hme_alloc_init_layer( 1946 &ps_memtabs[count], 1947 num_results, 1948 ps_prms->max_num_ref, 1949 use_4x4, 1950 mem_avail, 1951 u1_enc, 1952 wd, 1953 ht, 1954 a_disp_wd[j], 1955 a_disp_ht[j], 1956 segment_this_layer, 1957 is_explicit_store, 1958 &ps_layer); 1959 if(mem_avail) 1960 { 1961 /* same ps_layer memory pointer is stored in all the threads */ 1962 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 1963 { 1964 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 1965 ps_ctxt->as_ref_descr[i].aps_layers[j] = ps_layer; 1966 } 1967 1968 /* store the MV bank pointers */ 1969 ps_layer->ps_layer_mvbank->max_num_mvs_per_row = ai4_num_mvs_per_row[j]; 1970 ps_layer->ps_layer_mvbank->ps_mv_base = aps_mv_bank[j]; 1971 ps_layer->ps_layer_mvbank->pi1_ref_idx_base = api1_ref_idx[j]; 1972 } 1973 } 1974 } 1975 1976 /* Memtabs : Prev Row search node at coarsest layer */ 1977 wd = a_wd[n_tot_layers - 1]; 1978 1979 /* Allocate a memtab for storing 4x4 SADs for n rows. As many as num ref and number of threads */ 1980 num_rows = ps_prms->i4_num_proc_thrds + 1; 1981 if(ps_prms->s_me_coding_tools.e_me_quality_presets < ME_MEDIUM_SPEED) 1982 search_step = HME_COARSE_STEP_SIZE_HIGH_QUALITY; 1983 else 1984 search_step = HME_COARSE_STEP_SIZE_HIGH_SPEED; 1985 1986 /*shift factor*/ 1987 blk_shift = 2; /*4x4*/ 1988 search_step >>= 1; 1989 1990 sad_4x4_block_size = ((2 * MAX_MVX_SUPPORTED_IN_COARSE_LAYER) >> search_step) * 1991 ((2 * MAX_MVY_SUPPORTED_IN_COARSE_LAYER) >> search_step); 1992 sad_4x4_block_stride = ((wd >> blk_shift) + 1) * sad_4x4_block_size; 1993 1994 size = num_rows * sad_4x4_block_stride * sizeof(S16); 1995 for(i = 0; i < ps_prms->max_num_ref; i++) 1996 { 1997 if(mem_avail) 1998 { 1999 ASSERT(size == ps_memtabs[count].size); 2000 2001 /* same row memory pointer is stored in all the threads */ 2002 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 2003 { 2004 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 2005 ps_ctxt->api2_sads_4x4_n_rows[i] = (S16 *)ps_memtabs[count].pu1_mem; 2006 } 2007 } 2008 else 2009 { 2010 ps_memtabs[count].size = size; 2011 ps_memtabs[count].align = 4; 2012 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM; 2013 } 2014 count++; 2015 } 2016 2017 /* Allocate a memtab for storing best search nodes 8x4 for n rows. Row is allocated for worst case (2*min_wd_coarse/4). As many as num ref and number of threads */ 2018 size = num_rows * ((wd >> blk_shift) + 1) * sizeof(search_node_t); 2019 for(i = 0; i < ps_prms->max_num_ref; i++) 2020 { 2021 if(mem_avail) 2022 { 2023 ASSERT(size == ps_memtabs[count].size); 2024 2025 /* same row memory pointer is stored in all the threads */ 2026 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 2027 { 2028 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 2029 ps_ctxt->aps_best_search_nodes_8x4_n_rows[i] = 2030 (search_node_t *)ps_memtabs[count].pu1_mem; 2031 } 2032 } 2033 else 2034 { 2035 ps_memtabs[count].size = size; 2036 ps_memtabs[count].align = 4; 2037 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM; 2038 } 2039 count++; 2040 } 2041 /* Allocate a memtab for storing best search nodes 4x8 for n rows. Row is allocated for worst case (2*min_wd_coarse/4). As many as num ref and number of threads */ 2042 size = num_rows * ((wd >> blk_shift) + 1) * sizeof(search_node_t); 2043 for(i = 0; i < ps_prms->max_num_ref; i++) 2044 { 2045 if(mem_avail) 2046 { 2047 ASSERT(size == ps_memtabs[count].size); 2048 2049 /* same row memory pointer is stored in all the threads */ 2050 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 2051 { 2052 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 2053 ps_ctxt->aps_best_search_nodes_4x8_n_rows[i] = 2054 (search_node_t *)ps_memtabs[count].pu1_mem; 2055 } 2056 } 2057 else 2058 { 2059 ps_memtabs[count].size = size; 2060 ps_memtabs[count].align = 4; 2061 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM; 2062 } 2063 count++; 2064 } 2065 2066 /* Allocate a memtab for each histogram. As many as num ref and number of threads */ 2067 for(i = 0; i < ps_prms->max_num_ref; i++) 2068 { 2069 size = ps_prms->i4_num_proc_thrds * sizeof(mv_hist_t); 2070 if(mem_avail) 2071 { 2072 mv_hist_t *ps_mv_hist = (mv_hist_t *)ps_memtabs[count].pu1_mem; 2073 2074 ASSERT(size == ps_memtabs[count].size); 2075 2076 /* divide the memory accross the threads */ 2077 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 2078 { 2079 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 2080 ps_ctxt->aps_mv_hist[i] = ps_mv_hist; 2081 ps_mv_hist++; 2082 } 2083 } 2084 else 2085 { 2086 ps_memtabs[count].size = size; 2087 ps_memtabs[count].align = 8; 2088 ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM; 2089 } 2090 count++; 2091 } 2092 2093 /* Memtabs : Search nodes for 8x8 blks */ 2094 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 2095 { 2096 search_results_t *ps_search_results = NULL; 2097 2098 if(mem_avail) 2099 { 2100 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 2101 } 2102 2103 if(mem_avail) 2104 { 2105 ps_search_results = &ps_ctxt->s_search_results_8x8; 2106 } 2107 count += hme_alloc_init_search_nodes( 2108 ps_search_results, 2109 &ps_memtabs[count], 2110 mem_avail, 2111 ps_prms->max_num_ref, 2112 ps_prms->max_num_results); 2113 } 2114 2115 /* Weighted inputs, one for each ref */ 2116 size = (ps_prms->max_num_ref + 1) * layer1_blk_width * layer1_blk_width * 2117 ps_prms->i4_num_proc_thrds; 2118 if(mem_avail) 2119 { 2120 U08 *pu1_mem; 2121 ASSERT(ps_memtabs[count].size == size); 2122 pu1_mem = ps_memtabs[count].pu1_mem; 2123 2124 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 2125 { 2126 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 2127 2128 for(i = 0; i < ps_prms->max_num_ref + 1; i++) 2129 { 2130 ps_ctxt->s_wt_pred.apu1_wt_inp_buf_array[i] = pu1_mem; 2131 pu1_mem += (layer1_blk_width * layer1_blk_width); 2132 } 2133 } 2134 } 2135 else 2136 { 2137 ps_memtabs[count].size = size; 2138 ps_memtabs[count].align = 16; 2139 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM; 2140 } 2141 count++; 2142 2143 /* if memory is allocated the intislaise the frm prms ptr to each thrd */ 2144 if(mem_avail) 2145 { 2146 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++) 2147 { 2148 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 2149 2150 ps_ctxt->ps_hme_frm_prms = &ps_master_ctxt->s_frm_prms; 2151 ps_ctxt->ps_hme_ref_map = &ps_master_ctxt->s_ref_map; 2152 } 2153 } 2154 2155 /* Memory for ihevce_me_optimised_function_list_t struct */ 2156 if(mem_avail) 2157 { 2158 ps_master_ctxt->pv_me_optimised_function_list = (void *)ps_memtabs[count++].pu1_mem; 2159 } 2160 else 2161 { 2162 ps_memtabs[count].size = sizeof(ihevce_me_optimised_function_list_t); 2163 ps_memtabs[count].align = 16; 2164 ps_memtabs[count++].e_mem_attr = HME_SCRATCH_OVLY_MEM; 2165 } 2166 2167 //ASSERT(count < hme_enc_num_alloc()); 2168 ASSERT(count < hme_coarse_num_alloc()); 2169 return (count); 2170 } 2171 2172 /*! 2173 ****************************************************************************** 2174 * \if Function name : ihevce_coarse_me_get_lyr_prms_dep_mngr \endif 2175 * 2176 * \brief Returns to the caller key attributes relevant for dependency manager, 2177 * ie, the number of vertical units in each layer 2178 * 2179 * \par Description: 2180 * This function requires the precondition that the width and ht of encode 2181 * layer is known. 2182 * The number of layers, number of vertical units in each layer, and for 2183 * each vertial unit in each layer, its dependency on previous layer's units 2184 * From ME's perspective, a vertical unit is one which is smallest min size 2185 * vertically (and spans the entire row horizontally). This is CTB for encode 2186 * layer, and 8x8 / 4x4 for non encode layers. 2187 * 2188 * \param[in] num_layers : Number of ME Layers 2189 * \param[in] pai4_ht : Array storing ht at each layer 2190 * \param[in] pai4_wd : Array storing wd at each layer 2191 * \param[out] pi4_num_vert_units_in_lyr : Array of size N (num layers), each 2192 * entry has num vertical units in that particular layer 2193 * 2194 * \return 2195 * None 2196 * 2197 * \author 2198 * Ittiam 2199 * 2200 ***************************************************************************** 2201 */ 2202 void ihevce_coarse_me_get_lyr_prms_dep_mngr( 2203 WORD32 num_layers, WORD32 *pai4_ht, WORD32 *pai4_wd, WORD32 *pai4_num_vert_units_in_lyr) 2204 { 2205 /* Height of current and next layers */ 2206 WORD32 ht_c, ht_n; 2207 /* Blk ht at a given layer and next layer*/ 2208 WORD32 unit_ht_c, unit_ht_n, blk_ht_c, blk_ht_n; 2209 /* Number of vertical units in current and next layer */ 2210 WORD32 num_vert_c, num_vert_n; 2211 2212 WORD32 ctb_size = 64, num_enc_layers = 1, use_4x4 = 1, i; 2213 UWORD8 au1_encode[MAX_NUM_LAYERS]; 2214 2215 memset(au1_encode, 0, num_layers); 2216 memset(au1_encode, 1, num_enc_layers); 2217 2218 ht_n = pai4_ht[num_layers - 2]; 2219 ht_c = pai4_ht[num_layers - 1]; 2220 2221 /* compute blk ht and unit ht for c and n */ 2222 if(au1_encode[num_layers - 1]) 2223 { 2224 blk_ht_c = 16; 2225 unit_ht_c = ctb_size; 2226 } 2227 else 2228 { 2229 blk_ht_c = hme_get_blk_size(use_4x4, num_layers - 1, num_layers, 0); 2230 unit_ht_c = blk_ht_c; 2231 } 2232 2233 num_vert_c = (ht_c + unit_ht_c - 1) / unit_ht_c; 2234 /* For new design in Coarsest HME layer we need */ 2235 /* one additional row extra at the end of frame */ 2236 /* hence num_vert_c is incremented by 1 */ 2237 num_vert_c++; 2238 2239 /*************************************************************************/ 2240 /* Run through each layer, set the number of vertical units */ 2241 /*************************************************************************/ 2242 for(i = num_layers - 1; i > 0; i--) 2243 { 2244 pai4_num_vert_units_in_lyr[i] = num_vert_c; 2245 2246 /* "n" is computed for first time */ 2247 ht_n = pai4_ht[i - 1]; 2248 blk_ht_n = hme_get_blk_size(use_4x4, i - 1, num_layers, 0); 2249 unit_ht_n = blk_ht_n; 2250 if(au1_encode[i - 1]) 2251 unit_ht_n = ctb_size; 2252 2253 num_vert_n = (ht_n + unit_ht_n - 1) / unit_ht_n; 2254 2255 /* Compute the blk size and vert unit size in each layer */ 2256 /* "c" denotes curr layer, and "n" denotes the layer to which result */ 2257 /* is projected to */ 2258 ht_c = ht_n; 2259 blk_ht_c = blk_ht_n; 2260 unit_ht_c = unit_ht_n; 2261 num_vert_c = num_vert_n; 2262 } 2263 2264 /* LAYER 0 OR ENCODE LAYER UPDATE : NO OUTPUT DEPS */ 2265 /* set the numebr of vertical units */ 2266 pai4_num_vert_units_in_lyr[0] = num_vert_c; 2267 } 2268 2269 /** 2270 ******************************************************************************** 2271 * @fn hme_coarse_dep_mngr_alloc_mem() 2272 * 2273 * @brief Requests memory for HME Dep Mngr 2274 * 2275 * \param[in,out] ps_mem_tab : pointer to memory descriptors table 2276 * \param[in] ps_init_prms : Create time static parameters 2277 * \param[in] i4_mem_space : memspace in whihc memory request should be done 2278 * 2279 * @return number of memtabs 2280 ******************************************************************************** 2281 */ 2282 WORD32 hme_coarse_dep_mngr_alloc_mem( 2283 iv_mem_rec_t *ps_mem_tab, 2284 ihevce_static_cfg_params_t *ps_init_prms, 2285 WORD32 i4_mem_space, 2286 WORD32 i4_num_proc_thrds, 2287 WORD32 i4_resolution_id) 2288 { 2289 WORD32 ai4_num_vert_units_in_lyr[MAX_NUM_HME_LAYERS]; 2290 WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS]; 2291 WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS]; 2292 WORD32 n_enc_layers = 1, n_tot_layers, n_dep_tabs = 0, i; 2293 WORD32 min_cu_size; 2294 2295 /* get the min cu size from config params */ 2296 min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size; 2297 2298 min_cu_size = 1 << min_cu_size; 2299 2300 /* Get the width and heights of different decomp layers */ 2301 *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width + 2302 SET_CTB_ALIGN( 2303 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size); 2304 2305 *a_ht = 2306 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height + 2307 SET_CTB_ALIGN( 2308 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size); 2309 2310 n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht); 2311 ASSERT(n_tot_layers >= 3); 2312 2313 /* --- Get the number of vartical units in each layer for dep. mngr -- */ 2314 ihevce_coarse_me_get_lyr_prms_dep_mngr( 2315 n_tot_layers, &a_ht[0], &a_wd[0], &ai4_num_vert_units_in_lyr[0]); 2316 2317 /* Fill memtabs for HME layers,except for L0 layer */ 2318 for(i = 1; i < n_tot_layers; i++) 2319 { 2320 n_dep_tabs += ihevce_dmgr_get_mem_recs( 2321 &ps_mem_tab[n_dep_tabs], 2322 DEP_MNGR_ROW_ROW_SYNC, 2323 ai4_num_vert_units_in_lyr[i], 2324 1, /* Number of Col Tiles : Not supported in PreEnc */ 2325 i4_num_proc_thrds, 2326 i4_mem_space); 2327 } 2328 2329 ASSERT(n_dep_tabs <= hme_coarse_dep_mngr_num_alloc()); 2330 2331 return (n_dep_tabs); 2332 } 2333 2334 /** 2335 ******************************************************************************** 2336 * @fn hme_coarse_dep_mngr_init() 2337 * 2338 * @brief Assign memory for HME Dep Mngr 2339 * 2340 * \param[in,out] ps_mem_tab : pointer to memory descriptors table 2341 * \param[in] ps_init_prms : Create time static parameters 2342 * @param[in] pv_ctxt : ME ctxt 2343 * \param[in] pv_osal_handle : Osal handle 2344 * 2345 * @return number of memtabs 2346 ******************************************************************************** 2347 */ 2348 WORD32 hme_coarse_dep_mngr_init( 2349 iv_mem_rec_t *ps_mem_tab, 2350 ihevce_static_cfg_params_t *ps_init_prms, 2351 void *pv_ctxt, 2352 void *pv_osal_handle, 2353 WORD32 i4_num_proc_thrds, 2354 WORD32 i4_resolution_id) 2355 { 2356 WORD32 ai4_num_vert_units_in_lyr[MAX_NUM_HME_LAYERS]; 2357 WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS]; 2358 WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS]; 2359 WORD32 n_enc_layers = 1, n_tot_layers, n_dep_tabs = 0, i; 2360 WORD32 min_cu_size; 2361 2362 coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt; 2363 2364 /* get the min cu size from config params */ 2365 min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size; 2366 2367 min_cu_size = 1 << min_cu_size; 2368 2369 /* Get the width and heights of different decomp layers */ 2370 *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width + 2371 SET_CTB_ALIGN( 2372 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size); 2373 *a_ht = 2374 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height + 2375 SET_CTB_ALIGN( 2376 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size); 2377 2378 n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht); 2379 ASSERT(n_tot_layers >= 3); 2380 2381 /* --- Get the number of vartical units in each layer for dep. mngr -- */ 2382 ihevce_coarse_me_get_lyr_prms_dep_mngr( 2383 n_tot_layers, &a_ht[0], &a_wd[0], &ai4_num_vert_units_in_lyr[0]); 2384 2385 /* --- HME sync Dep Mngr Mem init -- */ 2386 for(i = 1; i < n_tot_layers; i++) 2387 { 2388 WORD32 num_blks_in_row, num_blks_in_pic, blk_size_shift; 2389 2390 if(i == (n_tot_layers - 1)) /* coarsest layer */ 2391 blk_size_shift = 2; 2392 else 2393 blk_size_shift = 3; /* refine layers */ 2394 2395 GET_NUM_BLKS_IN_PIC(a_wd[i], a_ht[i], blk_size_shift, num_blks_in_row, num_blks_in_pic); 2396 2397 /* Coarsest layer : 1 block extra, since the last block */ 2398 if(i == (n_tot_layers - 1)) /* in a row needs East block */ 2399 num_blks_in_row += 1; 2400 2401 /* Note : i-1, only for HME layers, L0 is separate */ 2402 ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1] = ihevce_dmgr_init( 2403 &ps_mem_tab[n_dep_tabs], 2404 pv_osal_handle, 2405 DEP_MNGR_ROW_ROW_SYNC, 2406 ai4_num_vert_units_in_lyr[i], 2407 num_blks_in_row, 2408 1, /* Number of Col Tiles : Not supported in PreEnc */ 2409 i4_num_proc_thrds, 2410 1 /*Sem disabled*/ 2411 ); 2412 2413 n_dep_tabs += ihevce_dmgr_get_num_mem_recs(); 2414 } 2415 2416 return n_dep_tabs; 2417 } 2418 2419 /** 2420 ******************************************************************************** 2421 * @fn hme_coarse_dep_mngr_reg_sem() 2422 * 2423 * @brief Assign semaphores for HME Dep Mngr 2424 * 2425 * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt 2426 * \param[in] ppv_sem_hdls : Arry of semaphore handles 2427 * \param[in] i4_num_proc_thrds : Number of processing threads 2428 * 2429 * @return number of memtabs 2430 ******************************************************************************** 2431 */ 2432 void hme_coarse_dep_mngr_reg_sem(void *pv_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds) 2433 { 2434 WORD32 i; 2435 coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt; 2436 coarse_me_ctxt_t *ps_ctxt = ps_me_ctxt->aps_me_ctxt[0]; 2437 2438 /* --- HME sync Dep Mngr semaphore init -- */ 2439 for(i = 1; i < ps_ctxt->num_layers; i++) 2440 { 2441 ihevce_dmgr_reg_sem_hdls( 2442 ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1], ppv_sem_hdls, i4_num_proc_thrds); 2443 } 2444 2445 return; 2446 } 2447 2448 /** 2449 ******************************************************************************** 2450 * @fn hme_coarse_dep_mngr_delete() 2451 * 2452 * Destroy Coarse ME Dep Mngr module 2453 * Note : Only Destroys the resources allocated in the module like 2454 * semaphore,etc. Memory free is done Separately using memtabs 2455 * 2456 * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt 2457 * \param[in] ps_init_prms : Create time static parameters 2458 * 2459 * @return none 2460 ******************************************************************************** 2461 */ 2462 void hme_coarse_dep_mngr_delete( 2463 void *pv_me_ctxt, ihevce_static_cfg_params_t *ps_init_prms, WORD32 i4_resolution_id) 2464 { 2465 WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS]; 2466 WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS]; 2467 WORD32 n_enc_layers = 1, n_tot_layers, i; 2468 WORD32 min_cu_size; 2469 2470 coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; 2471 2472 /* get the min cu size from config params */ 2473 min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size; 2474 2475 min_cu_size = 1 << min_cu_size; 2476 2477 /* Get the width and heights of different decomp layers */ 2478 *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width + 2479 SET_CTB_ALIGN( 2480 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size); 2481 *a_ht = 2482 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height + 2483 SET_CTB_ALIGN( 2484 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size); 2485 n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht); 2486 ASSERT(n_tot_layers >= 3); 2487 2488 /* --- HME sync Dep Mngr Delete -- */ 2489 for(i = 1; i < n_tot_layers; i++) 2490 { 2491 /* Note : i-1, only for HME layers, L0 is separate */ 2492 ihevce_dmgr_del(ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1]); 2493 } 2494 } 2495 2496 /** 2497 ******************************************************************************* 2498 * @fn S32 hme_enc_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms) 2499 * 2500 * @brief Fills up memtabs with memory information details required by HME 2501 * 2502 * @param[out] ps_memtabs : Pointre to an array of memtabs where module fills 2503 * up its requirements of memory 2504 * 2505 * @param[in] ps_prms : Input parameters to module crucial in calculating reqd 2506 * amt of memory 2507 * 2508 * @return Number of memtabs required 2509 ******************************************************************************* 2510 */ 2511 S32 hme_enc_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms, WORD32 i4_num_me_frm_pllel) 2512 { 2513 S32 num, tot, i; 2514 2515 /* Validation of init params */ 2516 if(-1 == hme_validate_init_prms(ps_prms)) 2517 return (-1); 2518 2519 num = hme_enc_alloc_init_mem(ps_memtabs, ps_prms, NULL, 0, i4_num_me_frm_pllel); 2520 tot = hme_enc_num_alloc(i4_num_me_frm_pllel); 2521 for(i = num; i < tot; i++) 2522 { 2523 ps_memtabs[i].size = 4; 2524 ps_memtabs[i].align = 4; 2525 ps_memtabs[i].e_mem_attr = HME_PERSISTENT_MEM; 2526 } 2527 return (tot); 2528 } 2529 2530 /** 2531 ******************************************************************************* 2532 * @fn S32 hme_coarse_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms) 2533 * 2534 * @brief Fills up memtabs with memory information details required by Coarse HME 2535 * 2536 * @param[out] ps_memtabs : Pointre to an array of memtabs where module fills 2537 * up its requirements of memory 2538 * 2539 * @param[in] ps_prms : Input parameters to module crucial in calculating reqd 2540 * amt of memory 2541 * 2542 * @return Number of memtabs required 2543 ******************************************************************************* 2544 */ 2545 S32 hme_coarse_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms) 2546 { 2547 S32 num, tot, i; 2548 2549 /* Validation of init params */ 2550 if(-1 == hme_validate_init_prms(ps_prms)) 2551 return (-1); 2552 2553 num = hme_coarse_alloc_init_mem(ps_memtabs, ps_prms, NULL, 0); 2554 tot = hme_coarse_num_alloc(); 2555 for(i = num; i < tot; i++) 2556 { 2557 ps_memtabs[i].size = 4; 2558 ps_memtabs[i].align = 4; 2559 ps_memtabs[i].e_mem_attr = HME_PERSISTENT_MEM; 2560 } 2561 return (tot); 2562 } 2563 2564 /** 2565 ******************************************************************************* 2566 * @fn hme_coarse_dep_mngr_alloc 2567 * 2568 * @brief Fills up memtabs with memory information details required by Coarse HME 2569 * 2570 * \param[in,out] ps_mem_tab : pointer to memory descriptors table 2571 * \param[in] ps_init_prms : Create time static parameters 2572 * \param[in] i4_mem_space : memspace in whihc memory request should be done 2573 * 2574 * @return Number of memtabs required 2575 ******************************************************************************* 2576 */ 2577 WORD32 hme_coarse_dep_mngr_alloc( 2578 iv_mem_rec_t *ps_mem_tab, 2579 ihevce_static_cfg_params_t *ps_init_prms, 2580 WORD32 i4_mem_space, 2581 WORD32 i4_num_proc_thrds, 2582 WORD32 i4_resolution_id) 2583 { 2584 S32 num, tot, i; 2585 2586 num = hme_coarse_dep_mngr_alloc_mem( 2587 ps_mem_tab, ps_init_prms, i4_mem_space, i4_num_proc_thrds, i4_resolution_id); 2588 tot = hme_coarse_dep_mngr_num_alloc(); 2589 for(i = num; i < tot; i++) 2590 { 2591 ps_mem_tab[i].i4_mem_size = 4; 2592 ps_mem_tab[i].i4_mem_alignment = 4; 2593 ps_mem_tab[i].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; 2594 } 2595 return (tot); 2596 } 2597 2598 /** 2599 ******************************************************************************** 2600 * @fn hme_coarse_init_ctxt() 2601 * 2602 * @brief initialise context memory 2603 * 2604 * @param[in] ps_prms : init prms 2605 * 2606 * @param[in] pv_ctxt : ME ctxt 2607 * 2608 * @return number of memtabs 2609 ******************************************************************************** 2610 */ 2611 void hme_coarse_init_ctxt(coarse_me_master_ctxt_t *ps_master_ctxt, hme_init_prms_t *ps_prms) 2612 { 2613 S32 i, j, num_thrds; 2614 coarse_me_ctxt_t *ps_ctxt; 2615 S32 num_rows_coarse; 2616 2617 /* initialise the parameters inot context of all threads */ 2618 for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++) 2619 { 2620 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 2621 2622 /* Copy the init prms to context */ 2623 ps_ctxt->s_init_prms = *ps_prms; 2624 2625 /* Initialize some other variables in ctxt */ 2626 ps_ctxt->i4_prev_poc = -1; 2627 2628 ps_ctxt->num_b_frms = ps_prms->num_b_frms; 2629 2630 ps_ctxt->apu1_ref_bits_tlu_lc[0] = &ps_ctxt->au1_ref_bits_tlu_lc[0][0]; 2631 ps_ctxt->apu1_ref_bits_tlu_lc[1] = &ps_ctxt->au1_ref_bits_tlu_lc[1][0]; 2632 2633 /* Initialize num rows lookuptable */ 2634 ps_ctxt->i4_num_row_bufs = ps_prms->i4_num_proc_thrds + 1; 2635 num_rows_coarse = ps_ctxt->i4_num_row_bufs; 2636 for(i = 0; i < ((HEVCE_MAX_HEIGHT >> 1) >> 2); i++) 2637 { 2638 ps_ctxt->ai4_row_index[i] = (i % num_rows_coarse); 2639 } 2640 } 2641 2642 /* since same layer desc pointer is stored in all the threads ctxt */ 2643 /* layer init is done only using 0th thread ctxt */ 2644 ps_ctxt = ps_master_ctxt->aps_me_ctxt[0]; 2645 2646 /* Initialize all layers descriptors to have -1 = poc meaning unfilled */ 2647 for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++) 2648 { 2649 for(j = 1; j < ps_ctxt->num_layers; j++) 2650 { 2651 layer_ctxt_t *ps_layer; 2652 ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j]; 2653 ps_layer->i4_poc = -1; 2654 ps_layer->ppu1_list_inp = &ps_ctxt->apu1_list_inp[j][0]; 2655 memset( 2656 ps_layer->s_global_mv, 0, sizeof(hme_mv_t) * ps_ctxt->max_num_ref * NUM_GMV_LOBES); 2657 } 2658 } 2659 } 2660 2661 /** 2662 ******************************************************************************** 2663 * @fn hme_enc_init_ctxt() 2664 * 2665 * @brief initialise context memory 2666 * 2667 * @param[in] ps_prms : init prms 2668 * 2669 * @param[in] pv_ctxt : ME ctxt 2670 * 2671 * @return number of memtabs 2672 ******************************************************************************** 2673 */ 2674 void hme_enc_init_ctxt( 2675 me_master_ctxt_t *ps_master_ctxt, hme_init_prms_t *ps_prms, rc_quant_t *ps_rc_quant_ctxt) 2676 { 2677 S32 i, j, num_thrds; 2678 me_ctxt_t *ps_ctxt; 2679 me_frm_ctxt_t *ps_frm_ctxt; 2680 2681 /* initialise the parameters in context of all threads */ 2682 for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++) 2683 { 2684 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 2685 /* Store Tile params base into ME context */ 2686 ps_ctxt->pv_tile_params_base = ps_master_ctxt->pv_tile_params_base; 2687 2688 for(i = 0; i < MAX_NUM_ME_PARALLEL; i++) 2689 { 2690 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i]; 2691 2692 /* Copy the init prms to context */ 2693 ps_ctxt->s_init_prms = *ps_prms; 2694 2695 /* Initialize some other variables in ctxt */ 2696 ps_frm_ctxt->i4_prev_poc = INVALID_POC; 2697 2698 ps_frm_ctxt->log_ctb_size = ps_prms->log_ctb_size; 2699 2700 ps_frm_ctxt->num_b_frms = ps_prms->num_b_frms; 2701 2702 ps_frm_ctxt->i4_is_prev_frame_reference = 0; 2703 2704 ps_frm_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt; 2705 2706 /* Initialize mv grids for L0 and L1 used in final refinement layer */ 2707 { 2708 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid[0]); 2709 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid[1]); 2710 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_fpel[0]); 2711 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_fpel[1]); 2712 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_qpel[0]); 2713 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_qpel[1]); 2714 } 2715 2716 ps_frm_ctxt->apu1_ref_bits_tlu_lc[0] = &ps_frm_ctxt->au1_ref_bits_tlu_lc[0][0]; 2717 ps_frm_ctxt->apu1_ref_bits_tlu_lc[1] = &ps_frm_ctxt->au1_ref_bits_tlu_lc[1][0]; 2718 } 2719 } 2720 2721 /* since same layer desc pointer is stored in all the threads ctxt */ 2722 /* layer init is done only using 0th thread ctxt */ 2723 ps_ctxt = ps_master_ctxt->aps_me_ctxt[0]; 2724 2725 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[0]; 2726 2727 /* Initialize all layers descriptors to have -1 = poc meaning unfilled */ 2728 for(i = 0; i < (ps_frm_ctxt->max_num_ref * ps_master_ctxt->i4_num_me_frm_pllel) + 1; i++) 2729 { 2730 /* only enocde layer is processed */ 2731 for(j = 0; j < 1; j++) 2732 { 2733 layer_ctxt_t *ps_layer; 2734 ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j]; 2735 ps_layer->i4_poc = INVALID_POC; 2736 ps_layer->i4_is_free = 1; 2737 ps_layer->ppu1_list_inp = &ps_frm_ctxt->apu1_list_inp[j][0]; 2738 ps_layer->ppu1_list_rec_fxfy = &ps_frm_ctxt->apu1_list_rec_fxfy[j][0]; 2739 ps_layer->ppu1_list_rec_hxfy = &ps_frm_ctxt->apu1_list_rec_hxfy[j][0]; 2740 ps_layer->ppu1_list_rec_fxhy = &ps_frm_ctxt->apu1_list_rec_fxhy[j][0]; 2741 ps_layer->ppu1_list_rec_hxhy = &ps_frm_ctxt->apu1_list_rec_hxhy[j][0]; 2742 ps_layer->ppv_dep_mngr_recon = &ps_frm_ctxt->apv_list_dep_mngr[j][0]; 2743 2744 memset( 2745 ps_layer->s_global_mv, 2746 0, 2747 sizeof(hme_mv_t) * ps_frm_ctxt->max_num_ref * NUM_GMV_LOBES); 2748 } 2749 } 2750 } 2751 2752 /** 2753 ******************************************************************************* 2754 * @fn S32 hme_enc_init(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms,rc_quant_t *ps_rc_quant_ctxt) 2755 * 2756 * @brief Initialises the Encode Layer HME ctxt 2757 * 2758 * @param[out] ps_memtabs : Pointer to an array of memtabs where module fills 2759 * up its requirements of memory 2760 * 2761 * @param[in] ps_prms : Input parameters to module crucial in calculating reqd 2762 * amt of memory 2763 * 2764 * @return Number of memtabs required 2765 ******************************************************************************* 2766 */ 2767 S32 hme_enc_init( 2768 void *pv_ctxt, 2769 hme_memtab_t *ps_memtabs, 2770 hme_init_prms_t *ps_prms, 2771 rc_quant_t *ps_rc_quant_ctxt, 2772 WORD32 i4_num_me_frm_pllel) 2773 { 2774 S32 num, tot; 2775 me_master_ctxt_t *ps_ctxt = (me_master_ctxt_t *)pv_ctxt; 2776 2777 tot = hme_enc_num_alloc(i4_num_me_frm_pllel); 2778 /* Validation of init params */ 2779 if(-1 == hme_validate_init_prms(ps_prms)) 2780 return (-1); 2781 2782 num = hme_enc_alloc_init_mem(ps_memtabs, ps_prms, pv_ctxt, 1, i4_num_me_frm_pllel); 2783 if(num > tot) 2784 return (-1); 2785 2786 /* Initialize all enumerations based globals */ 2787 //hme_init_globals(); /* done as part of coarse me */ 2788 2789 /* Copy the memtabs into the context for returning during free */ 2790 memcpy(ps_ctxt->as_memtabs, ps_memtabs, sizeof(hme_memtab_t) * tot); 2791 2792 /* initialize the context and related buffers */ 2793 hme_enc_init_ctxt(ps_ctxt, ps_prms, ps_rc_quant_ctxt); 2794 return (0); 2795 } 2796 2797 /** 2798 ******************************************************************************* 2799 * @fn S32 hme_coarse_init(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms) 2800 * 2801 * @brief Initialises the Coarse HME ctxt 2802 * 2803 * @param[out] ps_memtabs : Pointer to an array of memtabs where module fills 2804 * up its requirements of memory 2805 * 2806 * @param[in] ps_prms : Input parameters to module crucial in calculating reqd 2807 * amt of memory 2808 * 2809 * @return Number of memtabs required 2810 ******************************************************************************* 2811 */ 2812 S32 hme_coarse_init(void *pv_ctxt, hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms) 2813 { 2814 S32 num, tot; 2815 coarse_me_master_ctxt_t *ps_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt; 2816 2817 tot = hme_coarse_num_alloc(); 2818 /* Validation of init params */ 2819 if(-1 == hme_validate_init_prms(ps_prms)) 2820 return (-1); 2821 2822 num = hme_coarse_alloc_init_mem(ps_memtabs, ps_prms, pv_ctxt, 1); 2823 if(num > tot) 2824 return (-1); 2825 2826 /* Initialize all enumerations based globals */ 2827 hme_init_globals(); 2828 2829 /* Copy the memtabs into the context for returning during free */ 2830 memcpy(ps_ctxt->as_memtabs, ps_memtabs, sizeof(hme_memtab_t) * tot); 2831 2832 /* initialize the context and related buffers */ 2833 hme_coarse_init_ctxt(ps_ctxt, ps_prms); 2834 2835 return (0); 2836 } 2837 2838 /** 2839 ******************************************************************************* 2840 * @fn S32 hme_set_resolution(void *pv_me_ctxt, 2841 * S32 n_enc_layers, 2842 * S32 *p_wd, 2843 * S32 *p_ht 2844 * 2845 * @brief Sets up the layers based on resolution information. 2846 * 2847 * @param[in, out] pv_me_ctxt : ME handle, updated with the resolution info 2848 * 2849 * @param[in] n_enc_layers : Number of layers encoded 2850 * 2851 * @param[in] p_wd : Pointer to an array having widths for each encode layer 2852 * 2853 * @param[in] p_ht : Pointer to an array having heights for each encode layer 2854 * 2855 * @return void 2856 ******************************************************************************* 2857 */ 2858 2859 void hme_set_resolution(void *pv_me_ctxt, S32 n_enc_layers, S32 *p_wd, S32 *p_ht, S32 me_frm_id) 2860 { 2861 S32 n_tot_layers, num_layers_explicit_search, i, j; 2862 me_ctxt_t *ps_thrd_ctxt; 2863 me_frm_ctxt_t *ps_ctxt; 2864 2865 S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS]; 2866 S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS]; 2867 memcpy(a_wd, p_wd, n_enc_layers * sizeof(S32)); 2868 memcpy(a_ht, p_ht, n_enc_layers * sizeof(S32)); 2869 2870 ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt; 2871 2872 ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id]; 2873 2874 /*************************************************************************/ 2875 /* Derive the number of HME layers, including both encoded and non encode*/ 2876 /* This function also derives the width and ht of each layer. */ 2877 /*************************************************************************/ 2878 n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht); 2879 num_layers_explicit_search = ps_thrd_ctxt->s_init_prms.num_layers_explicit_search; 2880 if(num_layers_explicit_search <= 0) 2881 num_layers_explicit_search = n_tot_layers - 1; 2882 2883 num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1); 2884 ps_ctxt->num_layers_explicit_search = num_layers_explicit_search; 2885 memset(ps_ctxt->u1_encode, 0, n_tot_layers); 2886 memset(ps_ctxt->u1_encode, 1, n_enc_layers); 2887 2888 /* only encode layer should be processed */ 2889 ps_ctxt->num_layers = n_tot_layers; 2890 2891 ps_ctxt->i4_wd = a_wd[0]; 2892 ps_ctxt->i4_ht = a_ht[0]; 2893 2894 /* Memtabs : Layers * num-ref + 1 */ 2895 for(i = 0; i < ps_ctxt->max_num_ref + 1; i++) 2896 { 2897 for(j = 0; j < 1; j++) 2898 { 2899 S32 wd, ht; 2900 layer_ctxt_t *ps_layer; 2901 U08 u1_enc = ps_ctxt->u1_encode[j]; 2902 wd = a_wd[j]; 2903 ht = a_ht[j]; 2904 ps_layer = ps_thrd_ctxt->as_ref_descr[i].aps_layers[j]; 2905 hme_set_layer_res_attrs(ps_layer, wd, ht, a_disp_wd[j], a_disp_ht[j], u1_enc); 2906 } 2907 } 2908 } 2909 2910 /** 2911 ******************************************************************************* 2912 * @fn S32 hme_coarse_set_resolution(void *pv_me_ctxt, 2913 * S32 n_enc_layers, 2914 * S32 *p_wd, 2915 * S32 *p_ht 2916 * 2917 * @brief Sets up the layers based on resolution information. 2918 * 2919 * @param[in, out] pv_me_ctxt : ME handle, updated with the resolution info 2920 * 2921 * @param[in] n_enc_layers : Number of layers encoded 2922 * 2923 * @param[in] p_wd : Pointer to an array having widths for each encode layer 2924 * 2925 * @param[in] p_ht : Pointer to an array having heights for each encode layer 2926 * 2927 * @return void 2928 ******************************************************************************* 2929 */ 2930 2931 void hme_coarse_set_resolution(void *pv_me_ctxt, S32 n_enc_layers, S32 *p_wd, S32 *p_ht) 2932 { 2933 S32 n_tot_layers, num_layers_explicit_search, i, j; 2934 coarse_me_ctxt_t *ps_ctxt; 2935 S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS]; 2936 S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS]; 2937 memcpy(a_wd, p_wd, n_enc_layers * sizeof(S32)); 2938 memcpy(a_ht, p_ht, n_enc_layers * sizeof(S32)); 2939 2940 ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt; 2941 /*************************************************************************/ 2942 /* Derive the number of HME layers, including both encoded and non encode*/ 2943 /* This function also derives the width and ht of each layer. */ 2944 /*************************************************************************/ 2945 n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht); 2946 num_layers_explicit_search = ps_ctxt->s_init_prms.num_layers_explicit_search; 2947 if(num_layers_explicit_search <= 0) 2948 num_layers_explicit_search = n_tot_layers - 1; 2949 2950 num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1); 2951 ps_ctxt->num_layers_explicit_search = num_layers_explicit_search; 2952 memset(ps_ctxt->u1_encode, 0, n_tot_layers); 2953 memset(ps_ctxt->u1_encode, 1, n_enc_layers); 2954 2955 /* encode layer should be excluded */ 2956 ps_ctxt->num_layers = n_tot_layers; 2957 2958 memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32) * n_tot_layers); 2959 memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32) * n_tot_layers); 2960 2961 /* Memtabs : Layers * num-ref + 1 */ 2962 for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++) 2963 { 2964 for(j = 1; j < n_tot_layers; j++) 2965 { 2966 S32 wd, ht; 2967 layer_ctxt_t *ps_layer; 2968 U08 u1_enc = ps_ctxt->u1_encode[j]; 2969 wd = a_wd[j]; 2970 ht = a_ht[j]; 2971 ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j]; 2972 hme_set_layer_res_attrs(ps_layer, wd, ht, a_disp_wd[j], a_disp_ht[j], u1_enc); 2973 } 2974 } 2975 } 2976 2977 S32 hme_find_descr_idx(me_ctxt_t *ps_ctxt, S32 i4_poc, S32 i4_idr_gop_num, S32 i4_num_me_frm_pllel) 2978 { 2979 S32 i; 2980 2981 for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++) 2982 { 2983 if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc == i4_poc && 2984 ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num == i4_idr_gop_num) 2985 return i; 2986 } 2987 /* Should not come here */ 2988 ASSERT(0); 2989 return (-1); 2990 } 2991 2992 S32 hme_coarse_find_descr_idx(coarse_me_ctxt_t *ps_ctxt, S32 i4_poc) 2993 { 2994 S32 i; 2995 2996 for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++) 2997 { 2998 if(ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc == i4_poc) 2999 return i; 3000 } 3001 /* Should not come here */ 3002 ASSERT(0); 3003 return (-1); 3004 } 3005 3006 S32 hme_find_free_descr_idx(me_ctxt_t *ps_ctxt, S32 i4_num_me_frm_pllel) 3007 { 3008 S32 i; 3009 3010 for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++) 3011 { 3012 if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free == 1) 3013 { 3014 ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free = 0; 3015 return i; 3016 } 3017 } 3018 /* Should not come here */ 3019 ASSERT(0); 3020 return (-1); 3021 } 3022 3023 S32 hme_coarse_find_free_descr_idx(void *pv_ctxt) 3024 { 3025 S32 i; 3026 3027 coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_ctxt; 3028 3029 for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++) 3030 { 3031 if(ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc == -1) 3032 return i; 3033 } 3034 /* Should not come here */ 3035 ASSERT(0); 3036 return (-1); 3037 } 3038 3039 void hme_discard_frm( 3040 void *pv_me_ctxt, S32 *p_pocs_to_remove, S32 i4_idr_gop_num, S32 i4_num_me_frm_pllel) 3041 { 3042 me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt; 3043 S32 count = 0, idx, i; 3044 layers_descr_t *ps_descr; 3045 3046 /* Search for the id of the layer descriptor that has this poc */ 3047 while(p_pocs_to_remove[count] != INVALID_POC) 3048 { 3049 ASSERT(count == 0); 3050 idx = hme_find_descr_idx( 3051 ps_ctxt, p_pocs_to_remove[count], i4_idr_gop_num, i4_num_me_frm_pllel); 3052 ps_descr = &ps_ctxt->as_ref_descr[idx]; 3053 /*********************************************************************/ 3054 /* Setting i4_is_free = 1 in all layers invalidates this layer ctxt */ 3055 /* Now this can be used for a fresh picture. */ 3056 /*********************************************************************/ 3057 for(i = 0; i < 1; i++) 3058 { 3059 ps_descr->aps_layers[i]->i4_is_free = 1; 3060 } 3061 count++; 3062 } 3063 } 3064 3065 void hme_coarse_discard_frm(void *pv_me_ctxt, S32 *p_pocs_to_remove) 3066 { 3067 coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt; 3068 S32 count = 0, idx, i; 3069 layers_descr_t *ps_descr; 3070 3071 /* Search for the id of the layer descriptor that has this poc */ 3072 while(p_pocs_to_remove[count] != -1) 3073 { 3074 idx = hme_coarse_find_descr_idx(ps_ctxt, p_pocs_to_remove[count]); 3075 ps_descr = &ps_ctxt->as_ref_descr[idx]; 3076 /*********************************************************************/ 3077 /* Setting poc = -1 in all layers invalidates this layer ctxt */ 3078 /* Now this can be used for a fresh picture. */ 3079 /*********************************************************************/ 3080 for(i = 1; i < ps_ctxt->num_layers; i++) 3081 { 3082 ps_descr->aps_layers[i]->i4_poc = -1; 3083 } 3084 count++; 3085 } 3086 } 3087 3088 void hme_update_layer_desc( 3089 layers_descr_t *ps_layers_desc, 3090 hme_ref_desc_t *ps_ref_desc, 3091 S32 start_lyr_id, 3092 S32 num_layers, 3093 layers_descr_t *ps_curr_desc) 3094 { 3095 layer_ctxt_t *ps_layer_ctxt, *ps_curr_layer; 3096 S32 i; 3097 for(i = start_lyr_id; i < num_layers; i++) 3098 { 3099 ps_layer_ctxt = ps_layers_desc->aps_layers[i]; 3100 ps_curr_layer = ps_curr_desc->aps_layers[i]; 3101 3102 ps_layer_ctxt->i4_poc = ps_ref_desc->i4_poc; 3103 ps_layer_ctxt->i4_idr_gop_num = ps_ref_desc->i4_GOP_num; 3104 3105 /* Copy the recon planes for the given reference pic at given layer */ 3106 ps_layer_ctxt->pu1_rec_fxfy = ps_ref_desc->as_ref_info[i].pu1_rec_fxfy; 3107 ps_layer_ctxt->pu1_rec_hxfy = ps_ref_desc->as_ref_info[i].pu1_rec_hxfy; 3108 ps_layer_ctxt->pu1_rec_fxhy = ps_ref_desc->as_ref_info[i].pu1_rec_fxhy; 3109 ps_layer_ctxt->pu1_rec_hxhy = ps_ref_desc->as_ref_info[i].pu1_rec_hxhy; 3110 3111 /*********************************************************************/ 3112 /* reconstruction strides, offsets and padding info are copied for */ 3113 /* this reference pic. It is assumed that these will be same across */ 3114 /* pics, so even the current pic has this info updated, though the */ 3115 /* current pic still does not have valid recon pointers. */ 3116 /*********************************************************************/ 3117 ps_layer_ctxt->i4_rec_stride = ps_ref_desc->as_ref_info[i].luma_stride; 3118 ps_layer_ctxt->i4_rec_offset = ps_ref_desc->as_ref_info[i].luma_offset; 3119 ps_layer_ctxt->i4_pad_x_rec = ps_ref_desc->as_ref_info[i].u1_pad_x; 3120 ps_layer_ctxt->i4_pad_y_rec = ps_ref_desc->as_ref_info[i].u1_pad_y; 3121 3122 ps_curr_layer->i4_rec_stride = ps_ref_desc->as_ref_info[i].luma_stride; 3123 ps_curr_layer->i4_pad_x_rec = ps_ref_desc->as_ref_info[i].u1_pad_x; 3124 ps_curr_layer->i4_pad_y_rec = ps_ref_desc->as_ref_info[i].u1_pad_y; 3125 } 3126 } 3127 3128 void hme_add_inp(void *pv_me_ctxt, hme_inp_desc_t *ps_inp_desc, S32 me_frm_id, S32 i4_thrd_id) 3129 { 3130 layers_descr_t *ps_desc; 3131 layer_ctxt_t *ps_layer_ctxt; 3132 me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt; 3133 me_ctxt_t *ps_thrd_ctxt; 3134 me_frm_ctxt_t *ps_ctxt; 3135 3136 hme_inp_buf_attr_t *ps_attr; 3137 S32 i4_poc, idx, i, i4_prev_poc; 3138 S32 num_thrds, prev_me_frm_id; 3139 S32 i4_idr_gop_num, i4_is_reference; 3140 3141 /* since same layer desc pointer is stored in all thread ctxt */ 3142 /* a free idx is obtained using 0th thread ctxt pointer */ 3143 3144 ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[i4_thrd_id]; 3145 3146 ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id]; 3147 3148 /* Deriving the previous poc from previous frames context */ 3149 if(me_frm_id == 0) 3150 prev_me_frm_id = (MAX_NUM_ME_PARALLEL - 1); 3151 else 3152 prev_me_frm_id = me_frm_id - 1; 3153 3154 i4_prev_poc = ps_thrd_ctxt->aps_me_frm_prms[prev_me_frm_id]->i4_curr_poc; 3155 3156 /* Obtain an empty layer descriptor */ 3157 idx = hme_find_free_descr_idx(ps_thrd_ctxt, ps_master_ctxt->i4_num_me_frm_pllel); 3158 ps_desc = &ps_thrd_ctxt->as_ref_descr[idx]; 3159 3160 /* initialise the parameters for all the threads */ 3161 for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++) 3162 { 3163 me_frm_ctxt_t *ps_tmp_frm_ctxt; 3164 3165 ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 3166 ps_tmp_frm_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id]; 3167 3168 ps_tmp_frm_ctxt->ps_curr_descr = &ps_thrd_ctxt->as_ref_descr[idx]; 3169 3170 /* Do the initialization for the first thread alone */ 3171 i4_poc = ps_inp_desc->i4_poc; 3172 i4_idr_gop_num = ps_inp_desc->i4_idr_gop_num; 3173 i4_is_reference = ps_inp_desc->i4_is_reference; 3174 /*Update poc id of previously encoded frm and curr frm */ 3175 ps_tmp_frm_ctxt->i4_prev_poc = i4_prev_poc; 3176 ps_tmp_frm_ctxt->i4_curr_poc = i4_poc; 3177 } 3178 3179 /* since same layer desc pointer is stored in all thread ctxt */ 3180 /* following processing is done using 0th thread ctxt pointer */ 3181 ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[0]; 3182 3183 /* only encode layer */ 3184 for(i = 0; i < 1; i++) 3185 { 3186 ps_layer_ctxt = ps_desc->aps_layers[i]; 3187 ps_attr = &ps_inp_desc->s_layer_desc[i]; 3188 3189 ps_layer_ctxt->i4_poc = i4_poc; 3190 ps_layer_ctxt->i4_idr_gop_num = i4_idr_gop_num; 3191 ps_layer_ctxt->i4_is_reference = i4_is_reference; 3192 ps_layer_ctxt->i4_non_ref_free = 0; 3193 3194 /* If this layer is encoded, copy input attributes */ 3195 if(ps_ctxt->u1_encode[i]) 3196 { 3197 ps_layer_ctxt->pu1_inp = ps_attr->pu1_y; 3198 ps_layer_ctxt->i4_inp_stride = ps_attr->luma_stride; 3199 ps_layer_ctxt->i4_pad_x_inp = 0; 3200 ps_layer_ctxt->i4_pad_y_inp = 0; 3201 } 3202 else 3203 { 3204 /* If not encoded, then ME owns the buffer.*/ 3205 S32 wd, dst_stride; 3206 3207 ASSERT(i != 0); 3208 3209 wd = ps_ctxt->i4_wd; 3210 3211 /* destination has padding on either side of 16 */ 3212 dst_stride = CEIL16((wd >> 1)) + 32 + 4; 3213 ps_layer_ctxt->i4_inp_stride = dst_stride; 3214 } 3215 } 3216 3217 return; 3218 } 3219 3220 void hme_coarse_add_inp(void *pv_me_ctxt, hme_inp_desc_t *ps_inp_desc, WORD32 i4_curr_idx) 3221 { 3222 layers_descr_t *ps_desc; 3223 layer_ctxt_t *ps_layer_ctxt; 3224 coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; 3225 coarse_me_ctxt_t *ps_ctxt; 3226 hme_inp_buf_attr_t *ps_attr; 3227 S32 i4_poc, i; 3228 S32 num_thrds; 3229 3230 /* since same layer desc pointer is stored in all thread ctxt */ 3231 /* a free idx is obtained using 0th thread ctxt pointer */ 3232 ps_ctxt = ps_master_ctxt->aps_me_ctxt[0]; 3233 3234 ps_desc = &ps_ctxt->as_ref_descr[i4_curr_idx]; 3235 3236 /* initialise the parameters for all the threads */ 3237 for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++) 3238 { 3239 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; 3240 ps_ctxt->ps_curr_descr = &ps_ctxt->as_ref_descr[i4_curr_idx]; 3241 i4_poc = ps_inp_desc->i4_poc; 3242 3243 /*Update poc id of previously encoded frm and curr frm */ 3244 ps_ctxt->i4_prev_poc = ps_ctxt->i4_curr_poc; 3245 ps_ctxt->i4_curr_poc = i4_poc; 3246 } 3247 3248 /* since same layer desc pointer is stored in all thread ctxt */ 3249 /* following processing is done using 0th thread ctxt pointer */ 3250 ps_ctxt = ps_master_ctxt->aps_me_ctxt[0]; 3251 3252 /* only non encode layer */ 3253 for(i = 1; i < ps_ctxt->num_layers; i++) 3254 { 3255 ps_layer_ctxt = ps_desc->aps_layers[i]; 3256 ps_attr = &ps_inp_desc->s_layer_desc[i]; 3257 3258 ps_layer_ctxt->i4_poc = i4_poc; 3259 /* If this layer is encoded, copy input attributes */ 3260 if(ps_ctxt->u1_encode[i]) 3261 { 3262 ps_layer_ctxt->pu1_inp = ps_attr->pu1_y; 3263 ps_layer_ctxt->i4_inp_stride = ps_attr->luma_stride; 3264 ps_layer_ctxt->i4_pad_x_inp = 0; 3265 ps_layer_ctxt->i4_pad_y_inp = 0; 3266 } 3267 else 3268 { 3269 /* If not encoded, then ME owns the buffer. */ 3270 /* decomp of lower layers happens on a seperate pass */ 3271 /* Coarse Me should export the pointers to the caller */ 3272 S32 wd, dst_stride; 3273 3274 ASSERT(i != 0); 3275 3276 wd = ps_ctxt->a_wd[i - 1]; 3277 3278 /* destination has padding on either side of 16 */ 3279 dst_stride = CEIL16((wd >> 1)) + 32 + 4; 3280 ps_layer_ctxt->i4_inp_stride = dst_stride; 3281 } 3282 } 3283 } 3284 3285 static __inline U08 hme_determine_num_results_per_part( 3286 U08 u1_layer_id, U08 u1_num_layers, ME_QUALITY_PRESETS_T e_quality_preset) 3287 { 3288 U08 u1_num_results_per_part = MAX_RESULTS_PER_PART; 3289 3290 if((u1_layer_id == 0) && !!RESTRICT_NUM_PARTITION_LEVEL_L0ME_RESULTS_TO_1) 3291 { 3292 switch(e_quality_preset) 3293 { 3294 case ME_XTREME_SPEED_25: 3295 case ME_XTREME_SPEED: 3296 case ME_HIGH_SPEED: 3297 case ME_MEDIUM_SPEED: 3298 case ME_HIGH_QUALITY: 3299 case ME_PRISTINE_QUALITY: 3300 { 3301 u1_num_results_per_part = 1; 3302 3303 break; 3304 } 3305 default: 3306 { 3307 u1_num_results_per_part = MAX_RESULTS_PER_PART; 3308 3309 break; 3310 } 3311 } 3312 } 3313 else if((u1_layer_id == 1) && !!RESTRICT_NUM_PARTITION_LEVEL_L1ME_RESULTS_TO_1) 3314 { 3315 switch(e_quality_preset) 3316 { 3317 case ME_XTREME_SPEED_25: 3318 case ME_HIGH_QUALITY: 3319 case ME_PRISTINE_QUALITY: 3320 { 3321 u1_num_results_per_part = 1; 3322 3323 break; 3324 } 3325 default: 3326 { 3327 u1_num_results_per_part = MAX_RESULTS_PER_PART; 3328 3329 break; 3330 } 3331 } 3332 } 3333 else if((u1_layer_id == 2) && (u1_num_layers > 3) && !!RESTRICT_NUM_PARTITION_LEVEL_L2ME_RESULTS_TO_1) 3334 { 3335 switch(e_quality_preset) 3336 { 3337 case ME_XTREME_SPEED_25: 3338 case ME_XTREME_SPEED: 3339 case ME_HIGH_SPEED: 3340 case ME_MEDIUM_SPEED: 3341 { 3342 u1_num_results_per_part = 1; 3343 3344 break; 3345 } 3346 default: 3347 { 3348 u1_num_results_per_part = MAX_RESULTS_PER_PART; 3349 3350 break; 3351 } 3352 } 3353 } 3354 3355 return u1_num_results_per_part; 3356 } 3357 3358 static __inline void hme_max_search_cands_per_search_cand_loc_populator( 3359 hme_frm_prms_t *ps_frm_prms, 3360 U08 *pu1_num_fpel_search_cands, 3361 U08 u1_layer_id, 3362 ME_QUALITY_PRESETS_T e_quality_preset) 3363 { 3364 if(0 == u1_layer_id) 3365 { 3366 S32 i; 3367 3368 for(i = 0; i < NUM_SEARCH_CAND_LOCATIONS; i++) 3369 { 3370 switch(e_quality_preset) 3371 { 3372 #if RESTRICT_NUM_SEARCH_CANDS_PER_SEARCH_CAND_LOC 3373 case ME_XTREME_SPEED_25: 3374 case ME_XTREME_SPEED: 3375 case ME_HIGH_SPEED: 3376 case ME_MEDIUM_SPEED: 3377 { 3378 pu1_num_fpel_search_cands[i] = 1; 3379 3380 break; 3381 } 3382 #endif 3383 default: 3384 { 3385 pu1_num_fpel_search_cands[i] = 3386 MAX(2, 3387 MAX(ps_frm_prms->u1_num_active_ref_l0, ps_frm_prms->u1_num_active_ref_l1) * 3388 ((COLOCATED == (SEARCH_CAND_LOCATIONS_T)i) + 1)); 3389 3390 break; 3391 } 3392 } 3393 } 3394 } 3395 } 3396 3397 static __inline U08 3398 hme_determine_max_2nx2n_tu_recur_cands(U08 u1_layer_id, ME_QUALITY_PRESETS_T e_quality_preset) 3399 { 3400 U08 u1_num_cands = 2; 3401 3402 if((u1_layer_id == 0) && !!RESTRICT_NUM_2NX2N_TU_RECUR_CANDS) 3403 { 3404 switch(e_quality_preset) 3405 { 3406 case ME_XTREME_SPEED_25: 3407 case ME_XTREME_SPEED: 3408 case ME_HIGH_SPEED: 3409 case ME_MEDIUM_SPEED: 3410 { 3411 u1_num_cands = 1; 3412 3413 break; 3414 } 3415 default: 3416 { 3417 u1_num_cands = 2; 3418 3419 break; 3420 } 3421 } 3422 } 3423 3424 return u1_num_cands; 3425 } 3426 3427 static __inline U08 3428 hme_determine_max_num_fpel_refine_centers(U08 u1_layer_id, ME_QUALITY_PRESETS_T e_quality_preset) 3429 { 3430 U08 i; 3431 3432 U08 u1_num_centers = 0; 3433 3434 if(0 == u1_layer_id) 3435 { 3436 switch(e_quality_preset) 3437 { 3438 case ME_XTREME_SPEED_25: 3439 { 3440 for(i = 0; i < TOT_NUM_PARTS; i++) 3441 { 3442 u1_num_centers += gau1_num_best_results_XS25[i]; 3443 } 3444 3445 break; 3446 } 3447 case ME_XTREME_SPEED: 3448 { 3449 for(i = 0; i < TOT_NUM_PARTS; i++) 3450 { 3451 u1_num_centers += gau1_num_best_results_XS[i]; 3452 } 3453 3454 break; 3455 } 3456 case ME_HIGH_SPEED: 3457 { 3458 for(i = 0; i < TOT_NUM_PARTS; i++) 3459 { 3460 u1_num_centers += gau1_num_best_results_HS[i]; 3461 } 3462 3463 break; 3464 } 3465 case ME_MEDIUM_SPEED: 3466 { 3467 for(i = 0; i < TOT_NUM_PARTS; i++) 3468 { 3469 u1_num_centers += gau1_num_best_results_MS[i]; 3470 } 3471 3472 break; 3473 } 3474 case ME_HIGH_QUALITY: 3475 { 3476 for(i = 0; i < TOT_NUM_PARTS; i++) 3477 { 3478 u1_num_centers += gau1_num_best_results_HQ[i]; 3479 } 3480 3481 break; 3482 } 3483 case ME_PRISTINE_QUALITY: 3484 { 3485 for(i = 0; i < TOT_NUM_PARTS; i++) 3486 { 3487 u1_num_centers += gau1_num_best_results_PQ[i]; 3488 } 3489 3490 break; 3491 } 3492 } 3493 } 3494 3495 return u1_num_centers; 3496 } 3497 3498 static __inline U08 hme_determine_max_num_subpel_refine_centers( 3499 U08 u1_layer_id, U08 u1_max_2Nx2N_subpel_cands, U08 u1_max_NxN_subpel_cands) 3500 { 3501 U08 u1_num_centers = 0; 3502 3503 if(0 == u1_layer_id) 3504 { 3505 u1_num_centers += u1_max_2Nx2N_subpel_cands + 4 * u1_max_NxN_subpel_cands; 3506 } 3507 3508 return u1_num_centers; 3509 } 3510 3511 void hme_set_refine_prms( 3512 void *pv_refine_prms, 3513 U08 u1_encode, 3514 S32 num_ref, 3515 S32 layer_id, 3516 S32 num_layers, 3517 S32 num_layers_explicit_search, 3518 S32 use_4x4, 3519 hme_frm_prms_t *ps_frm_prms, 3520 double **ppd_intra_costs, 3521 me_coding_params_t *ps_me_coding_tools) 3522 { 3523 refine_prms_t *ps_refine_prms = (refine_prms_t *)pv_refine_prms; 3524 3525 ps_refine_prms->i4_encode = u1_encode; 3526 ps_refine_prms->bidir_enabled = ps_frm_prms->bidir_enabled; 3527 ps_refine_prms->i4_layer_id = layer_id; 3528 /*************************************************************************/ 3529 /* Refinement layers have two lambdas, one for closed loop, another for */ 3530 /* open loop. Non encode layers use only open loop lambda. */ 3531 /*************************************************************************/ 3532 ps_refine_prms->lambda_inp = ps_frm_prms->i4_ol_sad_lambda_qf; 3533 ps_refine_prms->lambda_recon = ps_frm_prms->i4_cl_sad_lambda_qf; 3534 ps_refine_prms->lambda_q_shift = ps_frm_prms->lambda_q_shift; 3535 ps_refine_prms->lambda_inp = 3536 ((float)ps_refine_prms->lambda_inp) * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f; 3537 ps_refine_prms->lambda_recon = 3538 ((float)ps_refine_prms->lambda_recon) * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f; 3539 3540 if((u1_encode) && (NULL != ppd_intra_costs)) 3541 { 3542 ps_refine_prms->pd_intra_costs = ppd_intra_costs[layer_id]; 3543 } 3544 3545 /* Explicit or implicit depends on number of layers having eplicit search */ 3546 if((layer_id == 0) || (num_layers - layer_id > num_layers_explicit_search)) 3547 { 3548 ps_refine_prms->explicit_ref = 0; 3549 ps_refine_prms->i4_num_ref_fpel = MIN(2, num_ref); 3550 } 3551 else 3552 { 3553 ps_refine_prms->explicit_ref = 1; 3554 ps_refine_prms->i4_num_ref_fpel = num_ref; 3555 } 3556 3557 ps_refine_prms->e_search_complexity = SEARCH_CX_HIGH; 3558 3559 ps_refine_prms->i4_num_steps_hpel_refine = ps_me_coding_tools->i4_num_steps_hpel_refine; 3560 ps_refine_prms->i4_num_steps_qpel_refine = ps_me_coding_tools->i4_num_steps_qpel_refine; 3561 3562 if(u1_encode) 3563 { 3564 ps_refine_prms->i4_num_mvbank_results = 1; 3565 ps_refine_prms->i4_use_rec_in_fpel = 1; 3566 ps_refine_prms->i4_num_steps_fpel_refine = 1; 3567 3568 if(ps_me_coding_tools->e_me_quality_presets == ME_PRISTINE_QUALITY) 3569 { 3570 ps_refine_prms->i4_num_fpel_results = 4; 3571 ps_refine_prms->i4_num_32x32_merge_results = 4; 3572 ps_refine_prms->i4_num_64x64_merge_results = 4; 3573 ps_refine_prms->i4_num_steps_post_refine_fpel = 3; 3574 ps_refine_prms->i4_use_satd_subpel = 1; 3575 ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2; 3576 ps_refine_prms->u1_max_subpel_candts_NxN = 1; 3577 ps_refine_prms->u1_subpel_candt_threshold = 1; 3578 ps_refine_prms->e_search_complexity = SEARCH_CX_MED; 3579 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_PQ; 3580 ps_refine_prms->limit_active_partitions = 0; 3581 } 3582 else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_QUALITY) 3583 { 3584 ps_refine_prms->i4_num_fpel_results = 4; 3585 ps_refine_prms->i4_num_32x32_merge_results = 4; 3586 ps_refine_prms->i4_num_64x64_merge_results = 4; 3587 ps_refine_prms->i4_num_steps_post_refine_fpel = 3; 3588 ps_refine_prms->i4_use_satd_subpel = 1; 3589 ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2; 3590 ps_refine_prms->u1_max_subpel_candts_NxN = 1; 3591 ps_refine_prms->u1_subpel_candt_threshold = 2; 3592 ps_refine_prms->e_search_complexity = SEARCH_CX_MED; 3593 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HQ; 3594 ps_refine_prms->limit_active_partitions = 0; 3595 } 3596 else if(ps_me_coding_tools->e_me_quality_presets == ME_MEDIUM_SPEED) 3597 { 3598 ps_refine_prms->i4_num_fpel_results = 1; 3599 ps_refine_prms->i4_num_32x32_merge_results = 2; 3600 ps_refine_prms->i4_num_64x64_merge_results = 2; 3601 ps_refine_prms->i4_num_steps_post_refine_fpel = 0; 3602 ps_refine_prms->i4_use_satd_subpel = 1; 3603 ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2; 3604 ps_refine_prms->u1_max_subpel_candts_NxN = 1; 3605 ps_refine_prms->u1_subpel_candt_threshold = 3; 3606 ps_refine_prms->e_search_complexity = SEARCH_CX_MED; 3607 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_MS; 3608 ps_refine_prms->limit_active_partitions = 1; 3609 } 3610 else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_SPEED) 3611 { 3612 ps_refine_prms->i4_num_fpel_results = 1; 3613 ps_refine_prms->i4_num_32x32_merge_results = 2; 3614 ps_refine_prms->i4_num_64x64_merge_results = 2; 3615 ps_refine_prms->i4_num_steps_post_refine_fpel = 0; 3616 ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1; 3617 ps_refine_prms->u1_max_subpel_candts_NxN = 1; 3618 ps_refine_prms->i4_use_satd_subpel = 0; 3619 ps_refine_prms->u1_subpel_candt_threshold = 0; 3620 ps_refine_prms->e_search_complexity = SEARCH_CX_MED; 3621 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HS; 3622 ps_refine_prms->limit_active_partitions = 1; 3623 } 3624 else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED) 3625 { 3626 ps_refine_prms->i4_num_fpel_results = 1; 3627 ps_refine_prms->i4_num_32x32_merge_results = 2; 3628 ps_refine_prms->i4_num_64x64_merge_results = 2; 3629 ps_refine_prms->i4_num_steps_post_refine_fpel = 0; 3630 ps_refine_prms->i4_use_satd_subpel = 0; 3631 ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1; 3632 ps_refine_prms->u1_max_subpel_candts_NxN = 0; 3633 ps_refine_prms->u1_subpel_candt_threshold = 0; 3634 ps_refine_prms->e_search_complexity = SEARCH_CX_MED; 3635 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS; 3636 ps_refine_prms->limit_active_partitions = 1; 3637 } 3638 else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED_25) 3639 { 3640 ps_refine_prms->i4_num_fpel_results = 1; 3641 ps_refine_prms->i4_num_32x32_merge_results = 2; 3642 ps_refine_prms->i4_num_64x64_merge_results = 2; 3643 ps_refine_prms->i4_num_steps_post_refine_fpel = 0; 3644 ps_refine_prms->i4_use_satd_subpel = 0; 3645 ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1; 3646 ps_refine_prms->u1_max_subpel_candts_NxN = 0; 3647 ps_refine_prms->u1_subpel_candt_threshold = 0; 3648 ps_refine_prms->e_search_complexity = SEARCH_CX_LOW; 3649 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS25; 3650 ps_refine_prms->limit_active_partitions = 1; 3651 } 3652 } 3653 else 3654 { 3655 ps_refine_prms->i4_num_fpel_results = 2; 3656 ps_refine_prms->i4_use_rec_in_fpel = 0; 3657 ps_refine_prms->i4_num_steps_fpel_refine = 1; 3658 ps_refine_prms->i4_num_steps_hpel_refine = 0; 3659 ps_refine_prms->i4_num_steps_qpel_refine = 0; 3660 3661 if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_SPEED) 3662 { 3663 ps_refine_prms->i4_num_steps_post_refine_fpel = 0; 3664 ps_refine_prms->i4_use_satd_subpel = 1; 3665 ps_refine_prms->e_search_complexity = SEARCH_CX_LOW; 3666 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HS; 3667 } 3668 else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED) 3669 { 3670 ps_refine_prms->i4_num_steps_post_refine_fpel = 0; 3671 ps_refine_prms->i4_use_satd_subpel = 0; 3672 ps_refine_prms->e_search_complexity = SEARCH_CX_LOW; 3673 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS; 3674 } 3675 else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED_25) 3676 { 3677 ps_refine_prms->i4_num_steps_post_refine_fpel = 0; 3678 ps_refine_prms->i4_use_satd_subpel = 0; 3679 ps_refine_prms->e_search_complexity = SEARCH_CX_LOW; 3680 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS25; 3681 } 3682 else if(ps_me_coding_tools->e_me_quality_presets == ME_PRISTINE_QUALITY) 3683 { 3684 ps_refine_prms->i4_num_steps_post_refine_fpel = 2; 3685 ps_refine_prms->i4_use_satd_subpel = 1; 3686 ps_refine_prms->e_search_complexity = SEARCH_CX_MED; 3687 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_PQ; 3688 } 3689 else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_QUALITY) 3690 { 3691 ps_refine_prms->i4_num_steps_post_refine_fpel = 2; 3692 ps_refine_prms->i4_use_satd_subpel = 1; 3693 ps_refine_prms->e_search_complexity = SEARCH_CX_MED; 3694 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HQ; 3695 } 3696 else if(ps_me_coding_tools->e_me_quality_presets == ME_MEDIUM_SPEED) 3697 { 3698 ps_refine_prms->i4_num_steps_post_refine_fpel = 0; 3699 ps_refine_prms->i4_use_satd_subpel = 1; 3700 ps_refine_prms->e_search_complexity = SEARCH_CX_LOW; 3701 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_MS; 3702 } 3703 3704 /* Following fields unused in the non-encode layers */ 3705 /* But setting the same to default values */ 3706 ps_refine_prms->i4_num_32x32_merge_results = 4; 3707 ps_refine_prms->i4_num_64x64_merge_results = 4; 3708 3709 if(!ps_frm_prms->bidir_enabled) 3710 { 3711 ps_refine_prms->limit_active_partitions = 0; 3712 } 3713 else 3714 { 3715 ps_refine_prms->limit_active_partitions = 1; 3716 } 3717 } 3718 3719 ps_refine_prms->i4_enable_4x4_part = 3720 hme_get_mv_blk_size(use_4x4, layer_id, num_layers, u1_encode); 3721 3722 if(!ps_me_coding_tools->u1_l0_me_controlled_via_cmd_line) 3723 { 3724 ps_refine_prms->i4_num_results_per_part = hme_determine_num_results_per_part( 3725 layer_id, num_layers, ps_me_coding_tools->e_me_quality_presets); 3726 3727 hme_max_search_cands_per_search_cand_loc_populator( 3728 ps_frm_prms, 3729 ps_refine_prms->au1_num_fpel_search_cands, 3730 layer_id, 3731 ps_me_coding_tools->e_me_quality_presets); 3732 3733 ps_refine_prms->u1_max_2nx2n_tu_recur_cands = hme_determine_max_2nx2n_tu_recur_cands( 3734 layer_id, ps_me_coding_tools->e_me_quality_presets); 3735 3736 ps_refine_prms->u1_max_num_fpel_refine_centers = hme_determine_max_num_fpel_refine_centers( 3737 layer_id, ps_me_coding_tools->e_me_quality_presets); 3738 3739 ps_refine_prms->u1_max_num_subpel_refine_centers = 3740 hme_determine_max_num_subpel_refine_centers( 3741 layer_id, 3742 ps_refine_prms->u1_max_subpel_candts_2Nx2N, 3743 ps_refine_prms->u1_max_subpel_candts_NxN); 3744 } 3745 else 3746 { 3747 if(0 == layer_id) 3748 { 3749 ps_refine_prms->i4_num_results_per_part = 3750 ps_me_coding_tools->u1_num_results_per_part_in_l0me; 3751 } 3752 else if(1 == layer_id) 3753 { 3754 ps_refine_prms->i4_num_results_per_part = 3755 ps_me_coding_tools->u1_num_results_per_part_in_l1me; 3756 } 3757 else if((2 == layer_id) && (num_layers > 3)) 3758 { 3759 ps_refine_prms->i4_num_results_per_part = 3760 ps_me_coding_tools->u1_num_results_per_part_in_l2me; 3761 } 3762 else 3763 { 3764 ps_refine_prms->i4_num_results_per_part = hme_determine_num_results_per_part( 3765 layer_id, num_layers, ps_me_coding_tools->e_me_quality_presets); 3766 } 3767 3768 memset( 3769 ps_refine_prms->au1_num_fpel_search_cands, 3770 ps_me_coding_tools->u1_max_num_coloc_cands, 3771 sizeof(ps_refine_prms->au1_num_fpel_search_cands)); 3772 3773 ps_refine_prms->u1_max_2nx2n_tu_recur_cands = 3774 ps_me_coding_tools->u1_max_2nx2n_tu_recur_cands; 3775 3776 ps_refine_prms->u1_max_num_fpel_refine_centers = 3777 ps_me_coding_tools->u1_max_num_fpel_refine_centers; 3778 3779 ps_refine_prms->u1_max_num_subpel_refine_centers = 3780 ps_me_coding_tools->u1_max_num_subpel_refine_centers; 3781 } 3782 3783 if(layer_id != 0) 3784 { 3785 ps_refine_prms->i4_num_mvbank_results = ps_refine_prms->i4_num_results_per_part; 3786 } 3787 3788 /* 4 * lambda */ 3789 ps_refine_prms->sdi_threshold = 3790 (ps_refine_prms->lambda_recon + (1 << (ps_frm_prms->lambda_q_shift - 1))) >> 3791 (ps_frm_prms->lambda_q_shift - 2); 3792 3793 ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb = 3794 MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON && ps_frm_prms->u1_is_cu_qp_delta_enabled; 3795 } 3796 3797 void hme_set_ctb_boundary_attrs(ctb_boundary_attrs_t *ps_attrs, S32 num_8x8_horz, S32 num_8x8_vert) 3798 { 3799 S32 cu_16x16_valid_flag = 0, merge_pattern_x, merge_pattern_y; 3800 S32 blk, blk_x, blk_y; 3801 S32 num_16x16_horz, num_16x16_vert; 3802 blk_ctb_attrs_t *ps_blk_attrs = &ps_attrs->as_blk_attrs[0]; 3803 3804 num_16x16_horz = (num_8x8_horz + 1) >> 1; 3805 num_16x16_vert = (num_8x8_vert + 1) >> 1; 3806 ps_attrs->u1_num_blks_in_ctb = (U08)(num_16x16_horz * num_16x16_vert); 3807 3808 /*************************************************************************/ 3809 /* Run through each blk assuming all 16x16 CUs valid. The order would be */ 3810 /* 0 1 4 5 */ 3811 /* 2 3 6 7 */ 3812 /* 8 9 12 13 */ 3813 /* 10 11 14 15 */ 3814 /* Out of these some may not be valid. For example, if num_16x16_horz is */ 3815 /* 2 and num_16x16_vert is 4, then right 2 columns not valid. In this */ 3816 /* case, blks 8-11 get encoding number of 4-7. Further, the variable */ 3817 /* cu_16x16_valid_flag will be 1111 0000 1111 0000. Also, the variable */ 3818 /* u1_merge_to_32x32_flag will be 1010, and u1_merge_to_64x64_flag 0 */ 3819 /*************************************************************************/ 3820 for(blk = 0; blk < 16; blk++) 3821 { 3822 U08 u1_blk_8x8_mask = 0xF; 3823 blk_x = gau1_encode_to_raster_x[blk]; 3824 blk_y = gau1_encode_to_raster_y[blk]; 3825 if((blk_x >= num_16x16_horz) || (blk_y >= num_16x16_vert)) 3826 { 3827 continue; 3828 } 3829 3830 /* The CU at encode location blk is valid */ 3831 cu_16x16_valid_flag |= (1 << blk); 3832 ps_blk_attrs->u1_blk_id_in_full_ctb = blk; 3833 ps_blk_attrs->u1_blk_x = blk_x; 3834 ps_blk_attrs->u1_blk_y = blk_y; 3835 3836 /* Disable blks 1 and 3 if the 16x16 blk overshoots on rt border */ 3837 if(((blk_x << 1) + 2) > num_8x8_horz) 3838 u1_blk_8x8_mask &= 0x5; 3839 /* Disable blks 2 and 3 if the 16x16 blk overshoots on bot border */ 3840 if(((blk_y << 1) + 2) > num_8x8_vert) 3841 u1_blk_8x8_mask &= 0x3; 3842 ps_blk_attrs->u1_blk_8x8_mask = u1_blk_8x8_mask; 3843 ps_blk_attrs++; 3844 } 3845 3846 ps_attrs->cu_16x16_valid_flag = cu_16x16_valid_flag; 3847 3848 /* 32x32 merge is logical combination of what merge is possible */ 3849 /* horizontally as well as vertically. */ 3850 if(num_8x8_horz < 4) 3851 merge_pattern_x = 0x0; 3852 else if(num_8x8_horz < 8) 3853 merge_pattern_x = 0x5; 3854 else 3855 merge_pattern_x = 0xF; 3856 3857 if(num_8x8_vert < 4) 3858 merge_pattern_y = 0x0; 3859 else if(num_8x8_vert < 8) 3860 merge_pattern_y = 0x3; 3861 else 3862 merge_pattern_y = 0xF; 3863 3864 ps_attrs->u1_merge_to_32x32_flag = (U08)(merge_pattern_x & merge_pattern_y); 3865 3866 /* Do not attempt 64x64 merge if any blk invalid */ 3867 if(ps_attrs->u1_merge_to_32x32_flag != 0xF) 3868 ps_attrs->u1_merge_to_64x64_flag = 0; 3869 else 3870 ps_attrs->u1_merge_to_64x64_flag = 1; 3871 } 3872 3873 void hme_set_ctb_attrs(ctb_boundary_attrs_t *ps_attrs, S32 wd, S32 ht) 3874 { 3875 S32 is_cropped_rt, is_cropped_bot; 3876 3877 is_cropped_rt = ((wd & 63) != 0) ? 1 : 0; 3878 is_cropped_bot = ((ht & 63) != 0) ? 1 : 0; 3879 3880 if(is_cropped_rt) 3881 { 3882 hme_set_ctb_boundary_attrs(&ps_attrs[CTB_RT_PIC_BOUNDARY], (wd & 63) >> 3, 8); 3883 } 3884 if(is_cropped_bot) 3885 { 3886 hme_set_ctb_boundary_attrs(&ps_attrs[CTB_BOT_PIC_BOUNDARY], 8, (ht & 63) >> 3); 3887 } 3888 if(is_cropped_rt & is_cropped_bot) 3889 { 3890 hme_set_ctb_boundary_attrs( 3891 &ps_attrs[CTB_BOT_RT_PIC_BOUNDARY], (wd & 63) >> 3, (ht & 63) >> 3); 3892 } 3893 hme_set_ctb_boundary_attrs(&ps_attrs[CTB_CENTRE], 8, 8); 3894 } 3895 3896 /** 3897 ******************************************************************************** 3898 * @fn hme_scale_for_ref_idx(S32 curr_poc, S32 poc_from, S32 poc_to) 3899 * 3900 * @brief When we have an mv with ref id "poc_to" for which predictor to be 3901 * computed, and predictor is ref id "poc_from", this funciton returns 3902 * scale factor in Q8 for such a purpose 3903 * 3904 * @param[in] curr_poc : input picture poc 3905 * 3906 * @param[in] poc_from : POC of the pic, pointed to by ref id to be scaled 3907 * 3908 * @param[in] poc_to : POC of hte pic, pointed to by ref id to be scaled to 3909 * 3910 * @return Scale factor in Q8 format 3911 ******************************************************************************** 3912 */ 3913 S16 hme_scale_for_ref_idx(S32 curr_poc, S32 poc_from, S32 poc_to) 3914 { 3915 S32 td, tx, tb; 3916 S16 i2_scf; 3917 /*************************************************************************/ 3918 /* Approximate scale factor: 256 * num / denom */ 3919 /* num = curr_poc - poc_to, denom = curr_poc - poc_from */ 3920 /* Exact implementation as per standard. */ 3921 /*************************************************************************/ 3922 3923 tb = HME_CLIP((curr_poc - poc_to), -128, 127); 3924 td = HME_CLIP((curr_poc - poc_from), -128, 127); 3925 3926 tx = (16384 + (ABS(td) >> 1)) / td; 3927 //i2_scf = HME_CLIP((((tb*tx)+32)>>6), -128, 127); 3928 i2_scf = HME_CLIP((((tb * tx) + 32) >> 6), -4096, 4095); 3929 3930 return (i2_scf); 3931 } 3932 3933 /** 3934 ******************************************************************************** 3935 * @fn hme_process_frm_init 3936 * 3937 * @brief HME frame level initialsation processing function 3938 * 3939 * @param[in] pv_me_ctxt : ME ctxt pointer 3940 * 3941 * @param[in] ps_ref_map : Reference map prms pointer 3942 * 3943 * @param[in] ps_frm_prms :Pointer to frame params 3944 * 3945 * called only for encode layer 3946 * 3947 * @return Scale factor in Q8 format 3948 ******************************************************************************** 3949 */ 3950 void hme_process_frm_init( 3951 void *pv_me_ctxt, 3952 hme_ref_map_t *ps_ref_map, 3953 hme_frm_prms_t *ps_frm_prms, 3954 WORD32 i4_me_frm_id, 3955 WORD32 i4_num_me_frm_pllel) 3956 { 3957 me_ctxt_t *ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt; 3958 me_frm_ctxt_t *ps_ctxt = (me_frm_ctxt_t *)ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id]; 3959 3960 S32 i, j, desc_idx; 3961 S16 i2_max_x = 0, i2_max_y = 0; 3962 3963 /* Set the Qp of current frm passed by caller. Required for intra cost */ 3964 ps_ctxt->frm_qstep = ps_frm_prms->qstep; 3965 ps_ctxt->qstep_ls8 = ps_frm_prms->qstep_ls8; 3966 3967 /* Bidir enabled or not */ 3968 ps_ctxt->s_frm_prms = *ps_frm_prms; 3969 3970 /*************************************************************************/ 3971 /* Set up the ref pic parameters across all layers. For this, we do the */ 3972 /* following: the application has given us a ref pic list, we go index */ 3973 /* by index and pick up the picture. A picture can be uniquely be mapped */ 3974 /* to a POC. So we search all layer descriptor array to find the POC */ 3975 /* Once found, we update all attributes in this descriptor. */ 3976 /* During this updation process we also create an index of descriptor id */ 3977 /* to ref id mapping. It is important to find the same POC in the layers */ 3978 /* descr strcture since it holds the pyramid inputs for non encode layers*/ 3979 /* Apart from this, e also update array containing the index of the descr*/ 3980 /* During processing for ease of access, each layer has a pointer to aray*/ 3981 /* of pointers containing fxfy, fxhy, hxfy, hxhy and inputs for each ref */ 3982 /* we update this too. */ 3983 /*************************************************************************/ 3984 ps_ctxt->num_ref_past = 0; 3985 ps_ctxt->num_ref_future = 0; 3986 for(i = 0; i < ps_ref_map->i4_num_ref; i++) 3987 { 3988 S32 ref_id_lc, idx; 3989 hme_ref_desc_t *ps_ref_desc; 3990 3991 ps_ref_desc = &ps_ref_map->as_ref_desc[i]; 3992 ref_id_lc = ps_ref_desc->i1_ref_id_lc; 3993 /* Obtain the id of descriptor that contains this POC */ 3994 idx = hme_find_descr_idx( 3995 ps_thrd_ctxt, ps_ref_desc->i4_poc, ps_ref_desc->i4_GOP_num, i4_num_me_frm_pllel); 3996 3997 /* Update all layers in this descr with the reference attributes */ 3998 hme_update_layer_desc( 3999 &ps_thrd_ctxt->as_ref_descr[idx], 4000 ps_ref_desc, 4001 0, 4002 1, //ps_ctxt->num_layers, 4003 ps_ctxt->ps_curr_descr); 4004 4005 /* Update the pointer holder for the recon planes */ 4006 ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_inp = &ps_ctxt->apu1_list_inp[0][0]; 4007 ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_fxfy = 4008 &ps_ctxt->apu1_list_rec_fxfy[0][0]; 4009 ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_hxfy = 4010 &ps_ctxt->apu1_list_rec_hxfy[0][0]; 4011 ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_fxhy = 4012 &ps_ctxt->apu1_list_rec_fxhy[0][0]; 4013 ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_hxhy = 4014 &ps_ctxt->apu1_list_rec_hxhy[0][0]; 4015 ps_ctxt->ps_curr_descr->aps_layers[0]->ppv_dep_mngr_recon = 4016 &ps_ctxt->apv_list_dep_mngr[0][0]; 4017 4018 /* Update the array having ref id lc to descr id mapping */ 4019 ps_ctxt->a_ref_to_descr_id[ps_ref_desc->i1_ref_id_lc] = idx; 4020 4021 /* From ref id lc we need to work out the POC, So update this array */ 4022 ps_ctxt->ai4_ref_idx_to_poc_lc[ref_id_lc] = ps_ref_desc->i4_poc; 4023 4024 /* When computing costs in L0 and L1 directions, we need the */ 4025 /* respective ref id L0 and L1, so update this mapping */ 4026 ps_ctxt->a_ref_idx_lc_to_l0[ref_id_lc] = ps_ref_desc->i1_ref_id_l0; 4027 ps_ctxt->a_ref_idx_lc_to_l1[ref_id_lc] = ps_ref_desc->i1_ref_id_l1; 4028 if((ps_ctxt->i4_curr_poc > ps_ref_desc->i4_poc) || ps_ctxt->i4_curr_poc == 0) 4029 { 4030 ps_ctxt->au1_is_past[ref_id_lc] = 1; 4031 ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = ref_id_lc; 4032 ps_ctxt->num_ref_past++; 4033 } 4034 else 4035 { 4036 ps_ctxt->au1_is_past[ref_id_lc] = 0; 4037 ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = ref_id_lc; 4038 ps_ctxt->num_ref_future++; 4039 } 4040 4041 if(1 == ps_ctxt->i4_wt_pred_enable_flag) 4042 { 4043 /* copy the weight and offsets from current ref desc */ 4044 ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = ps_ref_desc->i2_weight; 4045 4046 /* inv weight is stored in Q15 format */ 4047 ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] = 4048 ((1 << 15) + (ps_ref_desc->i2_weight >> 1)) / ps_ref_desc->i2_weight; 4049 ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = ps_ref_desc->i2_offset; 4050 } 4051 else 4052 { 4053 /* store default wt and offset*/ 4054 ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = WGHT_DEFAULT; 4055 4056 /* inv weight is stored in Q15 format */ 4057 ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] = 4058 ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT; 4059 4060 ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = 0; 4061 } 4062 } 4063 4064 ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = -1; 4065 ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = -1; 4066 4067 /*************************************************************************/ 4068 /* Preparation of the TLU for bits for reference indices. */ 4069 /* Special case is that of numref = 2. (TEV) */ 4070 /* Other cases uses UEV */ 4071 /*************************************************************************/ 4072 for(i = 0; i < MAX_NUM_REF; i++) 4073 { 4074 ps_ctxt->au1_ref_bits_tlu_lc[0][i] = 0; 4075 ps_ctxt->au1_ref_bits_tlu_lc[1][i] = 0; 4076 } 4077 4078 if(ps_ref_map->i4_num_ref == 2) 4079 { 4080 ps_ctxt->au1_ref_bits_tlu_lc[0][0] = 1; 4081 ps_ctxt->au1_ref_bits_tlu_lc[1][0] = 1; 4082 ps_ctxt->au1_ref_bits_tlu_lc[0][1] = 1; 4083 ps_ctxt->au1_ref_bits_tlu_lc[1][1] = 1; 4084 } 4085 else if(ps_ref_map->i4_num_ref > 2) 4086 { 4087 for(i = 0; i < ps_ref_map->i4_num_ref; i++) 4088 { 4089 S32 l0, l1; 4090 l0 = ps_ctxt->a_ref_idx_lc_to_l0[i]; 4091 l1 = ps_ctxt->a_ref_idx_lc_to_l1[i]; 4092 ps_ctxt->au1_ref_bits_tlu_lc[0][i] = gau1_ref_bits[l0]; 4093 ps_ctxt->au1_ref_bits_tlu_lc[1][i] = gau1_ref_bits[l1]; 4094 } 4095 } 4096 4097 /*************************************************************************/ 4098 /* Preparation of the scaling factors for reference indices. The scale */ 4099 /* factor depends on distance of the two ref indices from current input */ 4100 /* in terms of poc delta. */ 4101 /*************************************************************************/ 4102 for(i = 0; i < ps_ref_map->i4_num_ref; i++) 4103 { 4104 for(j = 0; j < ps_ref_map->i4_num_ref; j++) 4105 { 4106 S16 i2_scf_q8; 4107 S32 poc_from, poc_to; 4108 4109 poc_from = ps_ctxt->ai4_ref_idx_to_poc_lc[j]; 4110 poc_to = ps_ctxt->ai4_ref_idx_to_poc_lc[i]; 4111 4112 i2_scf_q8 = hme_scale_for_ref_idx(ps_ctxt->i4_curr_poc, poc_from, poc_to); 4113 ps_ctxt->ai2_ref_scf[j + i * MAX_NUM_REF] = i2_scf_q8; 4114 } 4115 } 4116 4117 /*************************************************************************/ 4118 /* We store simplified look ups for 4 hpel planes and inp y plane for */ 4119 /* every layer and for every ref id in the layer. So update these lookups*/ 4120 /*************************************************************************/ 4121 for(i = 0; i < 1; i++) 4122 { 4123 U08 **ppu1_rec_fxfy, **ppu1_rec_hxfy, **ppu1_rec_fxhy, **ppu1_rec_hxhy; 4124 U08 **ppu1_inp; 4125 void **ppvlist_dep_mngr; 4126 layer_ctxt_t *ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i]; 4127 4128 ppvlist_dep_mngr = &ps_ctxt->apv_list_dep_mngr[i][0]; 4129 ppu1_rec_fxfy = &ps_ctxt->apu1_list_rec_fxfy[i][0]; 4130 ppu1_rec_hxfy = &ps_ctxt->apu1_list_rec_hxfy[i][0]; 4131 ppu1_rec_fxhy = &ps_ctxt->apu1_list_rec_fxhy[i][0]; 4132 ppu1_rec_hxhy = &ps_ctxt->apu1_list_rec_hxhy[i][0]; 4133 ppu1_inp = &ps_ctxt->apu1_list_inp[i][0]; 4134 for(j = 0; j < ps_ref_map->i4_num_ref; j++) 4135 { 4136 hme_ref_desc_t *ps_ref_desc; 4137 hme_ref_buf_info_t *ps_buf_info; 4138 layer_ctxt_t *ps_layer; 4139 S32 ref_id_lc; 4140 4141 ps_ref_desc = &ps_ref_map->as_ref_desc[j]; 4142 ps_buf_info = &ps_ref_desc->as_ref_info[i]; 4143 ref_id_lc = ps_ref_desc->i1_ref_id_lc; 4144 4145 desc_idx = ps_ctxt->a_ref_to_descr_id[ref_id_lc]; 4146 ps_layer = ps_thrd_ctxt->as_ref_descr[desc_idx].aps_layers[i]; 4147 4148 ppu1_inp[j] = ps_buf_info->pu1_ref_src; 4149 ppu1_rec_fxfy[j] = ps_buf_info->pu1_rec_fxfy; 4150 ppu1_rec_hxfy[j] = ps_buf_info->pu1_rec_hxfy; 4151 ppu1_rec_fxhy[j] = ps_buf_info->pu1_rec_fxhy; 4152 ppu1_rec_hxhy[j] = ps_buf_info->pu1_rec_hxhy; 4153 ppvlist_dep_mngr[j] = ps_buf_info->pv_dep_mngr; 4154 4155 /* Update the curr descriptors reference pointers here */ 4156 ps_layer_ctxt->ppu1_list_inp[j] = ps_buf_info->pu1_ref_src; 4157 ps_layer_ctxt->ppu1_list_rec_fxfy[j] = ps_buf_info->pu1_rec_fxfy; 4158 ps_layer_ctxt->ppu1_list_rec_hxfy[j] = ps_buf_info->pu1_rec_hxfy; 4159 ps_layer_ctxt->ppu1_list_rec_fxhy[j] = ps_buf_info->pu1_rec_fxhy; 4160 ps_layer_ctxt->ppu1_list_rec_hxhy[j] = ps_buf_info->pu1_rec_hxhy; 4161 } 4162 } 4163 /*************************************************************************/ 4164 /* The mv range for each layer is computed. For dyadic layers it will */ 4165 /* keep shrinking by 2, for non dyadic it will shrink by ratio of wd and */ 4166 /* ht. In general formula used is scale by ratio of wd for x and ht for y*/ 4167 /*************************************************************************/ 4168 for(i = 0; i < 1; i++) 4169 { 4170 layer_ctxt_t *ps_layer_ctxt; 4171 if(i == 0) 4172 { 4173 i2_max_x = ps_frm_prms->i2_mv_range_x; 4174 i2_max_y = ps_frm_prms->i2_mv_range_y; 4175 } 4176 else 4177 { 4178 i2_max_x = (S16)FLOOR8(((i2_max_x * ps_ctxt->i4_wd) / ps_ctxt->i4_wd)); 4179 i2_max_y = (S16)FLOOR8(((i2_max_y * ps_ctxt->i4_ht) / ps_ctxt->i4_ht)); 4180 } 4181 ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i]; 4182 ps_layer_ctxt->i2_max_mv_x = i2_max_x; 4183 ps_layer_ctxt->i2_max_mv_y = i2_max_y; 4184 4185 /*********************************************************************/ 4186 /* Every layer maintains a reference id lc to POC mapping. This is */ 4187 /* because the mapping is unique for every frm. Also, in next frm, */ 4188 /* we require colocated mvs which means scaling according to temporal*/ 4189 /*distance. Hence this mapping needs to be maintained in every */ 4190 /* layer ctxt */ 4191 /*********************************************************************/ 4192 memset(ps_layer_ctxt->ai4_ref_id_to_poc_lc, -1, sizeof(S32) * ps_ctxt->max_num_ref); 4193 if(ps_ref_map->i4_num_ref) 4194 { 4195 memcpy( 4196 ps_layer_ctxt->ai4_ref_id_to_poc_lc, 4197 ps_ctxt->ai4_ref_idx_to_poc_lc, 4198 ps_ref_map->i4_num_ref * sizeof(S32)); 4199 } 4200 } 4201 4202 return; 4203 } 4204 4205 /** 4206 ******************************************************************************** 4207 * @fn hme_coarse_process_frm_init 4208 * 4209 * @brief HME frame level initialsation processing function 4210 * 4211 * @param[in] pv_me_ctxt : ME ctxt pointer 4212 * 4213 * @param[in] ps_ref_map : Reference map prms pointer 4214 * 4215 * @param[in] ps_frm_prms :Pointer to frame params 4216 * 4217 * @return Scale factor in Q8 format 4218 ******************************************************************************** 4219 */ 4220 void hme_coarse_process_frm_init( 4221 void *pv_me_ctxt, hme_ref_map_t *ps_ref_map, hme_frm_prms_t *ps_frm_prms) 4222 { 4223 coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt; 4224 S32 i, j, desc_idx; 4225 S16 i2_max_x = 0, i2_max_y = 0; 4226 4227 /* Set the Qp of current frm passed by caller. Required for intra cost */ 4228 ps_ctxt->frm_qstep = ps_frm_prms->qstep; 4229 4230 /* Bidir enabled or not */ 4231 ps_ctxt->s_frm_prms = *ps_frm_prms; 4232 4233 /*************************************************************************/ 4234 /* Set up the ref pic parameters across all layers. For this, we do the */ 4235 /* following: the application has given us a ref pic list, we go index */ 4236 /* by index and pick up the picture. A picture can be uniquely be mapped */ 4237 /* to a POC. So we search all layer descriptor array to find the POC */ 4238 /* Once found, we update all attributes in this descriptor. */ 4239 /* During this updation process we also create an index of descriptor id */ 4240 /* to ref id mapping. It is important to find the same POC in the layers */ 4241 /* descr strcture since it holds the pyramid inputs for non encode layers*/ 4242 /* Apart from this, e also update array containing the index of the descr*/ 4243 /* During processing for ease of access, each layer has a pointer to aray*/ 4244 /* of pointers containing fxfy, fxhy, hxfy, hxhy and inputs for each ref */ 4245 /* we update this too. */ 4246 /*************************************************************************/ 4247 ps_ctxt->num_ref_past = 0; 4248 ps_ctxt->num_ref_future = 0; 4249 for(i = 0; i < ps_ref_map->i4_num_ref; i++) 4250 { 4251 S32 ref_id_lc, idx; 4252 hme_ref_desc_t *ps_ref_desc; 4253 4254 ps_ref_desc = &ps_ref_map->as_ref_desc[i]; 4255 ref_id_lc = ps_ref_desc->i1_ref_id_lc; 4256 /* Obtain the id of descriptor that contains this POC */ 4257 idx = hme_coarse_find_descr_idx(ps_ctxt, ps_ref_desc->i4_poc); 4258 4259 /* Update all layers in this descr with the reference attributes */ 4260 hme_update_layer_desc( 4261 &ps_ctxt->as_ref_descr[idx], 4262 ps_ref_desc, 4263 1, 4264 ps_ctxt->num_layers - 1, 4265 ps_ctxt->ps_curr_descr); 4266 4267 /* Update the array having ref id lc to descr id mapping */ 4268 ps_ctxt->a_ref_to_descr_id[ps_ref_desc->i1_ref_id_lc] = idx; 4269 4270 /* From ref id lc we need to work out the POC, So update this array */ 4271 ps_ctxt->ai4_ref_idx_to_poc_lc[ref_id_lc] = ps_ref_desc->i4_poc; 4272 4273 /* From ref id lc we need to work out the display num, So update this array */ 4274 ps_ctxt->ai4_ref_idx_to_disp_num[ref_id_lc] = ps_ref_desc->i4_display_num; 4275 4276 /* When computing costs in L0 and L1 directions, we need the */ 4277 /* respective ref id L0 and L1, so update this mapping */ 4278 ps_ctxt->a_ref_idx_lc_to_l0[ref_id_lc] = ps_ref_desc->i1_ref_id_l0; 4279 ps_ctxt->a_ref_idx_lc_to_l1[ref_id_lc] = ps_ref_desc->i1_ref_id_l1; 4280 if((ps_ctxt->i4_curr_poc > ps_ref_desc->i4_poc) || ps_ctxt->i4_curr_poc == 0) 4281 { 4282 ps_ctxt->au1_is_past[ref_id_lc] = 1; 4283 ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = ref_id_lc; 4284 ps_ctxt->num_ref_past++; 4285 } 4286 else 4287 { 4288 ps_ctxt->au1_is_past[ref_id_lc] = 0; 4289 ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = ref_id_lc; 4290 ps_ctxt->num_ref_future++; 4291 } 4292 if(1 == ps_ctxt->i4_wt_pred_enable_flag) 4293 { 4294 /* copy the weight and offsets from current ref desc */ 4295 ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = ps_ref_desc->i2_weight; 4296 4297 /* inv weight is stored in Q15 format */ 4298 ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] = 4299 ((1 << 15) + (ps_ref_desc->i2_weight >> 1)) / ps_ref_desc->i2_weight; 4300 4301 ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = ps_ref_desc->i2_offset; 4302 } 4303 else 4304 { 4305 /* store default wt and offset*/ 4306 ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = WGHT_DEFAULT; 4307 4308 /* inv weight is stored in Q15 format */ 4309 ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] = 4310 ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT; 4311 4312 ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = 0; 4313 } 4314 } 4315 4316 ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = -1; 4317 ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = -1; 4318 4319 /*************************************************************************/ 4320 /* Preparation of the TLU for bits for reference indices. */ 4321 /* Special case is that of numref = 2. (TEV) */ 4322 /* Other cases uses UEV */ 4323 /*************************************************************************/ 4324 for(i = 0; i < MAX_NUM_REF; i++) 4325 { 4326 ps_ctxt->au1_ref_bits_tlu_lc[0][i] = 0; 4327 ps_ctxt->au1_ref_bits_tlu_lc[1][i] = 0; 4328 } 4329 4330 if(ps_ref_map->i4_num_ref == 2) 4331 { 4332 ps_ctxt->au1_ref_bits_tlu_lc[0][0] = 1; 4333 ps_ctxt->au1_ref_bits_tlu_lc[1][0] = 1; 4334 ps_ctxt->au1_ref_bits_tlu_lc[0][1] = 1; 4335 ps_ctxt->au1_ref_bits_tlu_lc[1][1] = 1; 4336 } 4337 else if(ps_ref_map->i4_num_ref > 2) 4338 { 4339 for(i = 0; i < ps_ref_map->i4_num_ref; i++) 4340 { 4341 S32 l0, l1; 4342 l0 = ps_ctxt->a_ref_idx_lc_to_l0[i]; 4343 l1 = ps_ctxt->a_ref_idx_lc_to_l1[i]; 4344 ps_ctxt->au1_ref_bits_tlu_lc[0][i] = gau1_ref_bits[l0]; 4345 ps_ctxt->au1_ref_bits_tlu_lc[1][i] = gau1_ref_bits[l1]; 4346 } 4347 } 4348 4349 /*************************************************************************/ 4350 /* Preparation of the scaling factors for reference indices. The scale */ 4351 /* factor depends on distance of the two ref indices from current input */ 4352 /* in terms of poc delta. */ 4353 /*************************************************************************/ 4354 for(i = 0; i < ps_ref_map->i4_num_ref; i++) 4355 { 4356 for(j = 0; j < ps_ref_map->i4_num_ref; j++) 4357 { 4358 S16 i2_scf_q8; 4359 S32 poc_from, poc_to; 4360 4361 poc_from = ps_ctxt->ai4_ref_idx_to_poc_lc[j]; 4362 poc_to = ps_ctxt->ai4_ref_idx_to_poc_lc[i]; 4363 4364 i2_scf_q8 = hme_scale_for_ref_idx(ps_ctxt->i4_curr_poc, poc_from, poc_to); 4365 ps_ctxt->ai2_ref_scf[j + i * MAX_NUM_REF] = i2_scf_q8; 4366 } 4367 } 4368 4369 /*************************************************************************/ 4370 /* We store simplified look ups for inp y plane for */ 4371 /* every layer and for every ref id in the layer. */ 4372 /*************************************************************************/ 4373 for(i = 1; i < ps_ctxt->num_layers; i++) 4374 { 4375 U08 **ppu1_inp; 4376 4377 ppu1_inp = &ps_ctxt->apu1_list_inp[i][0]; 4378 for(j = 0; j < ps_ref_map->i4_num_ref; j++) 4379 { 4380 hme_ref_desc_t *ps_ref_desc; 4381 hme_ref_buf_info_t *ps_buf_info; 4382 layer_ctxt_t *ps_layer; 4383 S32 ref_id_lc; 4384 4385 ps_ref_desc = &ps_ref_map->as_ref_desc[j]; 4386 ps_buf_info = &ps_ref_desc->as_ref_info[i]; 4387 ref_id_lc = ps_ref_desc->i1_ref_id_lc; 4388 4389 desc_idx = ps_ctxt->a_ref_to_descr_id[ref_id_lc]; 4390 ps_layer = ps_ctxt->as_ref_descr[desc_idx].aps_layers[i]; 4391 4392 ppu1_inp[j] = ps_layer->pu1_inp; 4393 } 4394 } 4395 /*************************************************************************/ 4396 /* The mv range for each layer is computed. For dyadic layers it will */ 4397 /* keep shrinking by 2, for non dyadic it will shrink by ratio of wd and */ 4398 /* ht. In general formula used is scale by ratio of wd for x and ht for y*/ 4399 /*************************************************************************/ 4400 4401 /* set to layer 0 search range params */ 4402 i2_max_x = ps_frm_prms->i2_mv_range_x; 4403 i2_max_y = ps_frm_prms->i2_mv_range_y; 4404 4405 for(i = 1; i < ps_ctxt->num_layers; i++) 4406 { 4407 layer_ctxt_t *ps_layer_ctxt; 4408 4409 { 4410 i2_max_x = (S16)FLOOR8(((i2_max_x * ps_ctxt->a_wd[i]) / ps_ctxt->a_wd[i - 1])); 4411 i2_max_y = (S16)FLOOR8(((i2_max_y * ps_ctxt->a_ht[i]) / ps_ctxt->a_ht[i - 1])); 4412 } 4413 ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i]; 4414 ps_layer_ctxt->i2_max_mv_x = i2_max_x; 4415 ps_layer_ctxt->i2_max_mv_y = i2_max_y; 4416 4417 /*********************************************************************/ 4418 /* Every layer maintains a reference id lc to POC mapping. This is */ 4419 /* because the mapping is unique for every frm. Also, in next frm, */ 4420 /* we require colocated mvs which means scaling according to temporal*/ 4421 /*distance. Hence this mapping needs to be maintained in every */ 4422 /* layer ctxt */ 4423 /*********************************************************************/ 4424 memset(ps_layer_ctxt->ai4_ref_id_to_poc_lc, -1, sizeof(S32) * ps_ctxt->max_num_ref); 4425 if(ps_ref_map->i4_num_ref) 4426 { 4427 memcpy( 4428 ps_layer_ctxt->ai4_ref_id_to_poc_lc, 4429 ps_ctxt->ai4_ref_idx_to_poc_lc, 4430 ps_ref_map->i4_num_ref * sizeof(S32)); 4431 memcpy( 4432 ps_layer_ctxt->ai4_ref_id_to_disp_num, 4433 ps_ctxt->ai4_ref_idx_to_disp_num, 4434 ps_ref_map->i4_num_ref * sizeof(S32)); 4435 } 4436 } 4437 4438 return; 4439 } 4440 4441 /** 4442 ******************************************************************************** 4443 * @fn hme_process_frm 4444 * 4445 * @brief HME frame level processing function 4446 * 4447 * @param[in] pv_me_ctxt : ME ctxt pointer 4448 * 4449 * @param[in] ps_ref_map : Reference map prms pointer 4450 * 4451 * @param[in] ppd_intra_costs : pointer to array of intra cost cost buffers for each layer 4452 * 4453 * @param[in] ps_frm_prms : pointer to Frame level parameters of HME 4454 * 4455 * @param[in] pf_ext_update_fxn : function pointer to update CTb results 4456 * 4457 * @param[in] pf_get_intra_cu_and_cost :function pointer to get intra cu size and cost 4458 * 4459 * @param[in] ps_multi_thrd_ctxt :function pointer to get intra cu size and cost 4460 * 4461 * @return Scale factor in Q8 format 4462 ******************************************************************************** 4463 */ 4464 4465 void hme_process_frm( 4466 void *pv_me_ctxt, 4467 pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input, 4468 hme_ref_map_t *ps_ref_map, 4469 double **ppd_intra_costs, 4470 hme_frm_prms_t *ps_frm_prms, 4471 PF_EXT_UPDATE_FXN_T pf_ext_update_fxn, 4472 void *pv_coarse_layer, 4473 void *pv_multi_thrd_ctxt, 4474 S32 i4_frame_parallelism_level, 4475 S32 thrd_id, 4476 S32 i4_me_frm_id) 4477 { 4478 refine_prms_t s_refine_prms; 4479 me_ctxt_t *ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt; 4480 me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id]; 4481 4482 S32 lyr_job_type; 4483 multi_thrd_ctxt_t *ps_multi_thrd_ctxt; 4484 layer_ctxt_t *ps_coarse_layer = (layer_ctxt_t *)pv_coarse_layer; 4485 4486 ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt; 4487 4488 lyr_job_type = ME_JOB_ENC_LYR; 4489 /*************************************************************************/ 4490 /* Final L0 layer ME call */ 4491 /*************************************************************************/ 4492 { 4493 /* Set the CTB attributes dependin on corner/rt edge/bot edge/center*/ 4494 hme_set_ctb_attrs(ps_ctxt->as_ctb_bound_attrs, ps_ctxt->i4_wd, ps_ctxt->i4_ht); 4495 4496 hme_set_refine_prms( 4497 &s_refine_prms, 4498 ps_ctxt->u1_encode[0], 4499 ps_ref_map->i4_num_ref, 4500 0, 4501 ps_ctxt->num_layers, 4502 ps_ctxt->num_layers_explicit_search, 4503 ps_thrd_ctxt->s_init_prms.use_4x4, 4504 ps_frm_prms, 4505 ppd_intra_costs, 4506 &ps_thrd_ctxt->s_init_prms.s_me_coding_tools); 4507 4508 hme_refine( 4509 ps_thrd_ctxt, 4510 &s_refine_prms, 4511 pf_ext_update_fxn, 4512 ps_coarse_layer, 4513 ps_multi_thrd_ctxt, 4514 lyr_job_type, 4515 thrd_id, 4516 i4_me_frm_id, 4517 ps_l0_ipe_input); 4518 4519 /* Set current ref pic status which will used as perv frame ref pic */ 4520 if(i4_frame_parallelism_level) 4521 { 4522 ps_ctxt->i4_is_prev_frame_reference = 0; 4523 } 4524 else 4525 { 4526 ps_ctxt->i4_is_prev_frame_reference = 4527 ps_multi_thrd_ctxt->aps_cur_inp_me_prms[i4_me_frm_id] 4528 ->ps_curr_inp->s_lap_out.i4_is_ref_pic; 4529 } 4530 } 4531 4532 return; 4533 } 4534 4535 /** 4536 ******************************************************************************** 4537 * @fn hme_coarse_process_frm 4538 * 4539 * @brief HME frame level processing function (coarse + refine) 4540 * 4541 * @param[in] pv_me_ctxt : ME ctxt pointer 4542 * 4543 * @param[in] ps_ref_map : Reference map prms pointer 4544 * 4545 * @param[in] ps_frm_prms : pointer to Frame level parameters of HME 4546 * 4547 * @param[in] ps_multi_thrd_ctxt :Multi thread related ctxt 4548 * 4549 * @return Scale factor in Q8 format 4550 ******************************************************************************** 4551 */ 4552 4553 void hme_coarse_process_frm( 4554 void *pv_me_ctxt, 4555 hme_ref_map_t *ps_ref_map, 4556 hme_frm_prms_t *ps_frm_prms, 4557 void *pv_multi_thrd_ctxt, 4558 WORD32 i4_ping_pong, 4559 void **ppv_dep_mngr_hme_sync) 4560 { 4561 S16 i2_max; 4562 S32 layer_id; 4563 coarse_prms_t s_coarse_prms; 4564 refine_prms_t s_refine_prms; 4565 coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt; 4566 S32 lyr_job_type; 4567 multi_thrd_ctxt_t *ps_multi_thrd_ctxt; 4568 4569 ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt; 4570 /*************************************************************************/ 4571 /* Fire processing of all layers, starting with coarsest layer. */ 4572 /*************************************************************************/ 4573 layer_id = ps_ctxt->num_layers - 1; 4574 i2_max = ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_x; 4575 i2_max = MAX(i2_max, ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_y); 4576 s_coarse_prms.i4_layer_id = layer_id; 4577 { 4578 S32 log_start_step; 4579 /* Based on Preset, set the starting step size for Refinement */ 4580 if(ME_MEDIUM_SPEED > ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets) 4581 { 4582 log_start_step = 0; 4583 } 4584 else 4585 { 4586 log_start_step = 1; 4587 } 4588 4589 s_coarse_prms.i4_max_iters = i2_max >> log_start_step; 4590 s_coarse_prms.i4_start_step = 1 << log_start_step; 4591 } 4592 s_coarse_prms.i4_num_ref = ps_ref_map->i4_num_ref; 4593 s_coarse_prms.do_full_search = 1; 4594 if(s_coarse_prms.do_full_search) 4595 { 4596 /* Set to 2 or 4 */ 4597 if(ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets < ME_MEDIUM_SPEED) 4598 s_coarse_prms.full_search_step = HME_COARSE_STEP_SIZE_HIGH_QUALITY; 4599 else if(ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets >= ME_MEDIUM_SPEED) 4600 s_coarse_prms.full_search_step = HME_COARSE_STEP_SIZE_HIGH_SPEED; 4601 } 4602 s_coarse_prms.num_results = ps_ctxt->max_num_results_coarse; 4603 4604 /* Coarse layer uses only 1 lambda, i.e. the one for open loop ME */ 4605 s_coarse_prms.lambda = ps_frm_prms->i4_ol_sad_lambda_qf; 4606 s_coarse_prms.lambda_q_shift = ps_frm_prms->lambda_q_shift; 4607 s_coarse_prms.lambda = ((float)s_coarse_prms.lambda * (100.0 - ME_LAMBDA_DISCOUNT) / 100.0); 4608 4609 hme_coarsest(ps_ctxt, &s_coarse_prms, ps_multi_thrd_ctxt, i4_ping_pong, ppv_dep_mngr_hme_sync); 4610 4611 /* all refinement layer processed in the loop below */ 4612 layer_id--; 4613 lyr_job_type = ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type + 1; 4614 4615 /*************************************************************************/ 4616 /* This loop will run for all refine layers (non- encode layers) */ 4617 /*************************************************************************/ 4618 while(layer_id > 0) 4619 { 4620 hme_set_refine_prms( 4621 &s_refine_prms, 4622 ps_ctxt->u1_encode[layer_id], 4623 ps_ref_map->i4_num_ref, 4624 layer_id, 4625 ps_ctxt->num_layers, 4626 ps_ctxt->num_layers_explicit_search, 4627 ps_ctxt->s_init_prms.use_4x4, 4628 ps_frm_prms, 4629 NULL, 4630 &ps_ctxt->s_init_prms.s_me_coding_tools); 4631 4632 hme_refine_no_encode( 4633 ps_ctxt, 4634 &s_refine_prms, 4635 ps_multi_thrd_ctxt, 4636 lyr_job_type, 4637 i4_ping_pong, 4638 ppv_dep_mngr_hme_sync); 4639 4640 layer_id--; 4641 lyr_job_type++; 4642 } 4643 } 4644 /** 4645 ******************************************************************************** 4646 * @fn hme_fill_neighbour_mvs 4647 * 4648 * @brief HME neighbour MV population function 4649 * 4650 * @param[in] pps_mv_grid : MV grid array pointer 4651 * 4652 * @param[in] i4_ctb_x : CTB pos X 4653 4654 * @param[in] i4_ctb_y : CTB pos Y 4655 * 4656 * @remarks : Needs to be populated for proper implementation of cost fxn 4657 * 4658 * @return Scale factor in Q8 format 4659 ******************************************************************************** 4660 */ 4661 void hme_fill_neighbour_mvs( 4662 mv_grid_t **pps_mv_grid, S32 i4_ctb_x, S32 i4_ctb_y, S32 i4_num_ref, void *pv_ctxt) 4663 { 4664 /* TODO : Needs to be populated for proper implementation of cost fxn */ 4665 ARG_NOT_USED(pps_mv_grid); 4666 ARG_NOT_USED(i4_ctb_x); 4667 ARG_NOT_USED(i4_ctb_y); 4668 ARG_NOT_USED(i4_num_ref); 4669 ARG_NOT_USED(pv_ctxt); 4670 } 4671 4672 /** 4673 ******************************************************************************* 4674 * @fn void hme_get_active_pocs_list(void *pv_me_ctxt, 4675 * S32 *p_pocs_buffered_in_me) 4676 * 4677 * @brief Returns the list of active POCs in ME ctxt 4678 * 4679 * @param[in] pv_me_ctxt : handle to ME context 4680 * 4681 * @param[out] p_pocs_buffered_in_me : pointer to an array which this fxn 4682 * populates with pocs active 4683 * 4684 * @return void 4685 ******************************************************************************* 4686 */ 4687 WORD32 hme_get_active_pocs_list(void *pv_me_ctxt, S32 i4_num_me_frm_pllel) 4688 { 4689 me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt; 4690 S32 i, count = 0; 4691 4692 for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++) 4693 { 4694 S32 poc = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc; 4695 S32 i4_is_free = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free; 4696 4697 if((i4_is_free == 0) && (poc != INVALID_POC)) 4698 { 4699 count++; 4700 } 4701 } 4702 if(count == (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1) 4703 { 4704 return 1; 4705 } 4706 else 4707 { 4708 return 0; 4709 } 4710 } 4711 4712 /** 4713 ******************************************************************************* 4714 * @fn void hme_coarse_get_active_pocs_list(void *pv_me_ctxt, 4715 * S32 *p_pocs_buffered_in_me) 4716 * 4717 * @brief Returns the list of active POCs in ME ctxt 4718 * 4719 * @param[in] pv_me_ctxt : handle to ME context 4720 * 4721 * @param[out] p_pocs_buffered_in_me : pointer to an array which this fxn 4722 * populates with pocs active 4723 * 4724 * @return void 4725 ******************************************************************************* 4726 */ 4727 void hme_coarse_get_active_pocs_list(void *pv_me_ctxt, S32 *p_pocs_buffered_in_me) 4728 { 4729 coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt; 4730 S32 i, count = 0; 4731 4732 for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++) 4733 { 4734 S32 poc = ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc; 4735 4736 if(poc != -1) 4737 { 4738 p_pocs_buffered_in_me[count] = poc; 4739 count++; 4740 } 4741 } 4742 p_pocs_buffered_in_me[count] = -1; 4743 } 4744 4745 S32 hme_get_blk_size(S32 use_4x4, S32 layer_id, S32 n_layers, S32 encode) 4746 { 4747 /* coarsest layer uses 4x4 blks, lowermost layer/encode layer uses 16x16 */ 4748 if(layer_id == n_layers - 1) 4749 return 4; 4750 else if((layer_id == 0) || (encode)) 4751 return 16; 4752 4753 /* Intermediate non encode layers use 8 */ 4754 return 8; 4755 } 4756