1 /****************************************************************************** 2 * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 21 /*****************************************************************************/ 22 /* File Includes */ 23 /*****************************************************************************/ 24 /* System include files */ 25 #include <stdio.h> 26 #include <string.h> 27 #include <stdlib.h> 28 #include <assert.h> 29 #include <stdarg.h> 30 #include <math.h> 31 #include <limits.h> 32 33 /* User include files */ 34 #include "ihevc_typedefs.h" 35 #include "itt_video_api.h" 36 #include "ihevce_api.h" 37 38 #include "rc_cntrl_param.h" 39 #include "rc_frame_info_collector.h" 40 #include "rc_look_ahead_params.h" 41 42 #include "ihevc_defs.h" 43 #include "ihevc_structs.h" 44 #include "ihevc_platform_macros.h" 45 #include "ihevc_deblk.h" 46 #include "ihevc_itrans_recon.h" 47 #include "ihevc_chroma_itrans_recon.h" 48 #include "ihevc_chroma_intra_pred.h" 49 #include "ihevc_intra_pred.h" 50 #include "ihevc_inter_pred.h" 51 #include "ihevc_mem_fns.h" 52 #include "ihevc_padding.h" 53 #include "ihevc_weighted_pred.h" 54 #include "ihevc_sao.h" 55 #include "ihevc_resi_trans.h" 56 #include "ihevc_quant_iquant_ssd.h" 57 #include "ihevc_cabac_tables.h" 58 59 #include "ihevce_defs.h" 60 #include "ihevce_lap_enc_structs.h" 61 #include "ihevce_multi_thrd_structs.h" 62 #include "ihevce_multi_thrd_funcs.h" 63 #include "ihevce_me_common_defs.h" 64 #include "ihevce_had_satd.h" 65 #include "ihevce_error_codes.h" 66 #include "ihevce_bitstream.h" 67 #include "ihevce_cabac.h" 68 #include "ihevce_rdoq_macros.h" 69 #include "ihevce_function_selector.h" 70 #include "ihevce_enc_structs.h" 71 #include "ihevce_entropy_structs.h" 72 #include "ihevce_cmn_utils_instr_set_router.h" 73 #include "ihevce_enc_loop_structs.h" 74 #include "ihevce_inter_pred.h" 75 #include "ihevce_global_tables.h" 76 #include "ihevce_dep_mngr_interface.h" 77 #include "hme_datatype.h" 78 #include "hme_interface.h" 79 #include "hme_common_defs.h" 80 #include "hme_defs.h" 81 #include "ihevce_me_instr_set_router.h" 82 #include "hme_globals.h" 83 #include "hme_utils.h" 84 #include "hme_coarse.h" 85 #include "hme_fullpel.h" 86 #include "hme_subpel.h" 87 #include "hme_refine.h" 88 #include "hme_err_compute.h" 89 #include "hme_common_utils.h" 90 #include "hme_search_algo.h" 91 #include "ihevce_stasino_helpers.h" 92 #include "ihevce_common_utils.h" 93 94 /*****************************************************************************/ 95 /* Macros */ 96 /*****************************************************************************/ 97 #define UNI_SATD_SCALE 1 98 99 /*****************************************************************************/ 100 /* Function Definitions */ 101 /*****************************************************************************/ 102 void ihevce_open_loop_pred_data( 103 me_frm_ctxt_t *ps_ctxt, 104 inter_pu_results_t *ps_pu_results, 105 U08 *pu1_src, 106 U08 *pu1_temp_pred, 107 S32 stride, 108 S32 src_strd, 109 UWORD8 e_part_id) 110 { 111 S32 best_sad_l0 = -1, best_sad_l1 = -1; 112 S32 sad_diff, status; 113 inter_pred_me_ctxt_t *ps_inter_pred_me_ctxt; 114 U08 enable_bi = 0; 115 pu_t s_pu; 116 117 ps_inter_pred_me_ctxt = &ps_ctxt->s_mc_ctxt; 118 ps_ctxt->i4_count++; 119 /* L0*/ 120 if(ps_pu_results->u1_num_results_per_part_l0[e_part_id]) 121 { 122 pu_result_t *ps_best_l0_pu; 123 ps_best_l0_pu = ps_pu_results->aps_pu_results[0][PRT_2Nx2N]; 124 best_sad_l0 = ps_best_l0_pu->i4_tot_cost - ps_best_l0_pu->i4_mv_cost; 125 s_pu.b2_pred_mode = PRED_L0; 126 s_pu.b4_ht = ps_best_l0_pu->pu.b4_ht; 127 s_pu.b4_wd = ps_best_l0_pu->pu.b4_wd; 128 s_pu.b4_pos_x = ps_best_l0_pu->pu.b4_pos_x; 129 s_pu.b4_pos_y = ps_best_l0_pu->pu.b4_pos_y; 130 s_pu.b1_intra_flag = 0; 131 s_pu.mv.s_l0_mv.i2_mvx = ps_best_l0_pu->pu.mv.s_l0_mv.i2_mvx; 132 s_pu.mv.s_l0_mv.i2_mvy = ps_best_l0_pu->pu.mv.s_l0_mv.i2_mvy; 133 s_pu.mv.i1_l0_ref_idx = ps_best_l0_pu->pu.mv.i1_l0_ref_idx; 134 } 135 /*L1*/ 136 if(ps_pu_results->u1_num_results_per_part_l1[e_part_id]) 137 { 138 pu_result_t *ps_best_l1_pu; 139 ps_best_l1_pu = ps_pu_results->aps_pu_results[1][PRT_2Nx2N]; 140 best_sad_l1 = ps_best_l1_pu->i4_tot_cost - ps_best_l1_pu->i4_mv_cost; 141 s_pu.b2_pred_mode = PRED_L1; 142 s_pu.b4_ht = ps_best_l1_pu->pu.b4_ht; 143 s_pu.b4_wd = ps_best_l1_pu->pu.b4_wd; 144 s_pu.b4_pos_x = ps_best_l1_pu->pu.b4_pos_x; 145 s_pu.b4_pos_y = ps_best_l1_pu->pu.b4_pos_y; 146 s_pu.b1_intra_flag = 0; 147 s_pu.mv.s_l1_mv.i2_mvx = ps_best_l1_pu->pu.mv.s_l1_mv.i2_mvx; 148 s_pu.mv.s_l1_mv.i2_mvy = ps_best_l1_pu->pu.mv.s_l1_mv.i2_mvy; 149 s_pu.mv.i1_l1_ref_idx = ps_best_l1_pu->pu.mv.i1_l1_ref_idx; 150 } 151 ASSERT((best_sad_l0 != -1) || (best_sad_l1 != -1)); 152 /*bi selection*/ 153 if((best_sad_l0 != -1) && (best_sad_l1 != -1)) 154 { 155 sad_diff = abs(best_sad_l0 - best_sad_l1); 156 if((sad_diff < (best_sad_l0 * 0.15)) && (sad_diff < (best_sad_l1 * 0.15))) 157 { 158 enable_bi = 1; 159 s_pu.b2_pred_mode = PRED_BI; 160 } 161 if(!enable_bi) 162 { 163 if(best_sad_l0 < best_sad_l1) 164 { 165 s_pu.b2_pred_mode = PRED_L0; 166 } 167 else 168 { 169 s_pu.b2_pred_mode = PRED_L1; 170 } 171 } 172 } 173 status = ihevce_luma_inter_pred_pu(ps_inter_pred_me_ctxt, &s_pu, pu1_temp_pred, stride, 1); 174 if(status == -1) 175 { 176 ASSERT(0); 177 } 178 } 179 180 /** 181 ******************************************************************************** 182 * @fn void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size) 183 * 184 * @brief Allocates a block of size = i4_size from working memory and returns 185 * 186 * @param[in,out] ps_buf_mgr: Buffer manager for wkg memory 187 * 188 * @param[in] i4_size : size required 189 * 190 * @return void pointer to allocated memory, NULL if failure 191 ******************************************************************************** 192 */ 193 void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size) 194 { 195 U08 *pu1_mem; 196 197 if(ps_buf_mgr->i4_used + i4_size > ps_buf_mgr->i4_total) 198 return NULL; 199 200 pu1_mem = ps_buf_mgr->pu1_wkg_mem + ps_buf_mgr->i4_used; 201 ps_buf_mgr->i4_used += i4_size; 202 203 return ((void *)pu1_mem); 204 } 205 206 /** 207 ******************************************************************************** 208 * @fn hme_init_histogram( 209 * 210 * @brief Top level entry point for Coarse ME. Runs across blocks and does the 211 * needful by calling other low level routines. 212 * 213 * @param[in,out] ps_hist : the histogram structure 214 * 215 * @param[in] i4_max_mv_x : Maximum mv allowed in x direction (fpel units) 216 * 217 * @param[in] i4_max_mv_y : Maximum mv allowed in y direction (fpel units) 218 * 219 * @return None 220 ******************************************************************************** 221 */ 222 223 void hme_init_histogram(mv_hist_t *ps_hist, S32 i4_max_mv_x, S32 i4_max_mv_y) 224 { 225 S32 i4_num_bins, i4_num_cols, i4_num_rows; 226 S32 i4_shift_x, i4_shift_y, i, i4_range, i4_val; 227 228 /*************************************************************************/ 229 /* Evaluate the shift_x and shift_y. For this, we use the following logic*/ 230 /* Assuming that we use up all MAX_NUM_BINS. Then the number of bins is */ 231 /* given by formula ((max_mv_x * 2) >> shift_x)*((max_mv_y * 2)>>shift_y)*/ 232 /* or shift_x + shift_y is log ((max_mv_x * max_mv_y * 4) / MAX_NUM_BINS)*/ 233 /* if above quantity is negative, then we make it zero. */ 234 /* If result is odd, then shift_y is result >> 1, shift_x is shift_y + 1 */ 235 /*************************************************************************/ 236 i4_val = i4_max_mv_x * i4_max_mv_y * 4; 237 i4_range = (hme_get_range(i4_val - 1)) + 1; 238 if(i4_range > LOG_MAX_NUM_BINS) 239 { 240 i4_shift_y = (i4_range - LOG_MAX_NUM_BINS); 241 i4_shift_x = (i4_shift_y + 1) >> 1; 242 i4_shift_y >>= 1; 243 } 244 else 245 { 246 i4_shift_y = 0; 247 i4_shift_x = 0; 248 } 249 250 /* we assume the mv range is -max_mv_x to +max_mv_x, ditto for y */ 251 /* So number of columns is 2*max_mv_x >> i4_shift_x. Ditto for rows */ 252 /* this helps us compute num bins that are active for this histo session */ 253 i4_num_cols = (i4_max_mv_x << 1) >> i4_shift_x; 254 i4_num_rows = (i4_max_mv_y << 1) >> i4_shift_y; 255 i4_num_bins = i4_num_rows * i4_num_cols; 256 257 ASSERT(i4_num_bins <= MAX_NUM_BINS); 258 259 ps_hist->i4_num_rows = i4_num_rows; 260 ps_hist->i4_num_cols = i4_num_cols; 261 ps_hist->i4_min_x = -i4_max_mv_x; 262 ps_hist->i4_min_y = -i4_max_mv_y; 263 ps_hist->i4_shift_x = i4_shift_x; 264 ps_hist->i4_shift_y = i4_shift_y; 265 ps_hist->i4_lobe1_size = 5; 266 ps_hist->i4_lobe2_size = 3; 267 268 ps_hist->i4_num_bins = i4_num_bins; 269 270 for(i = 0; i < i4_num_bins; i++) 271 { 272 ps_hist->ai4_bin_count[i] = 0; 273 } 274 } 275 276 /** 277 ******************************************************************************** 278 * @fn hme_update_histogram( 279 * 280 * @brief Updates the histogram given an mv entry 281 * 282 * @param[in,out] ps_hist : the histogram structure 283 * 284 * @param[in] i4_mv_x : x component of the mv (fpel units) 285 * 286 * @param[in] i4_mv_y : y component of the mv (fpel units) 287 * 288 * @return None 289 ******************************************************************************** 290 */ 291 void hme_update_histogram(mv_hist_t *ps_hist, S32 i4_mv_x, S32 i4_mv_y) 292 { 293 S32 i4_bin_index, i4_col, i4_row; 294 295 i4_col = (i4_mv_x - ps_hist->i4_min_x) >> ps_hist->i4_shift_x; 296 i4_row = (i4_mv_y - ps_hist->i4_min_y) >> ps_hist->i4_shift_y; 297 298 i4_bin_index = i4_col + (i4_row * ps_hist->i4_num_cols); 299 /* Sanity Check */ 300 ASSERT(i4_bin_index < MAX_NUM_BINS); 301 302 ps_hist->ai4_bin_count[i4_bin_index]++; 303 } 304 305 /** 306 ******************************************************************************** 307 * @fn hme_get_global_mv( 308 * 309 * @brief returns the global mv of a previous picture. Accounts for the fact 310 * that the delta poc of the previous picture may have been different 311 * from delta poc of current picture. Delta poc is POC difference 312 * between a picture and its reference. 313 * 314 * @param[out] ps_mv: mv_t structure where the motion vector is returned 315 * 316 * @param[in] i4_delta_poc: the delta poc for the current pic w.r.t. reference 317 * 318 * @return None 319 ******************************************************************************** 320 */ 321 void hme_get_global_mv(layer_ctxt_t *ps_prev_layer, hme_mv_t *ps_mv, S32 i4_delta_poc) 322 { 323 S16 i2_mv_x, i2_mv_y; 324 S32 i4_delta_poc_prev; 325 S32 i4_poc_prev = ps_prev_layer->i4_poc; 326 S32 i4_poc_prev_ref = ps_prev_layer->ai4_ref_id_to_poc_lc[0]; 327 328 i4_delta_poc_prev = i4_poc_prev - i4_poc_prev_ref; 329 i2_mv_x = ps_prev_layer->s_global_mv[0][GMV_THICK_LOBE].i2_mv_x; 330 i2_mv_y = ps_prev_layer->s_global_mv[0][GMV_THICK_LOBE].i2_mv_y; 331 332 i2_mv_x = (S16)((i2_mv_x * i4_delta_poc) / i4_delta_poc_prev); 333 i2_mv_y = (S16)((i2_mv_y * i4_delta_poc) / i4_delta_poc_prev); 334 335 ps_mv->i2_mv_x = i2_mv_x; 336 ps_mv->i2_mv_y = i2_mv_y; 337 } 338 339 /** 340 ******************************************************************************** 341 * @fn hme_calculate_global_mv( 342 * 343 * @brief Calculates global mv for a given histogram 344 * 345 * @param[in] ps_hist : the histogram structure 346 * 347 * @param[in] ps_mv : used to return the global mv 348 * 349 * @param[in] e_lobe_type : refer to GMV_MVTYPE_T 350 * 351 * @return None 352 ******************************************************************************** 353 */ 354 void hme_calculate_global_mv(mv_hist_t *ps_hist, hme_mv_t *ps_mv, GMV_MVTYPE_T e_lobe_type) 355 { 356 S32 i4_offset, i4_lobe_size, i4_y, i4_x, *pi4_bin_count; 357 S32 i4_max_sum = -1; 358 S32 i4_max_x = 0, i4_max_y = 0; 359 360 if(e_lobe_type == GMV_THICK_LOBE) 361 i4_lobe_size = ps_hist->i4_lobe1_size; 362 else 363 i4_lobe_size = ps_hist->i4_lobe2_size; 364 365 i4_offset = i4_lobe_size >> 1; 366 for(i4_y = i4_offset; i4_y < ps_hist->i4_num_rows - i4_offset; i4_y++) 367 { 368 for(i4_x = i4_offset; i4_x < ps_hist->i4_num_cols - i4_offset; i4_x++) 369 { 370 S32 i4_bin_id, i4_sum; 371 i4_bin_id = (i4_x - 2) + ((i4_y - 2) * ps_hist->i4_num_cols); 372 373 pi4_bin_count = &ps_hist->ai4_bin_count[i4_bin_id]; 374 i4_sum = hme_compute_2d_sum_unsigned( 375 (void *)pi4_bin_count, 376 i4_lobe_size, 377 i4_lobe_size, 378 ps_hist->i4_num_cols, 379 sizeof(U32)); 380 381 if(i4_sum > i4_max_sum) 382 { 383 i4_max_x = i4_x; 384 i4_max_y = i4_y; 385 i4_max_sum = i4_sum; 386 } 387 } 388 } 389 390 ps_mv->i2_mv_y = (S16)((i4_max_y << ps_hist->i4_shift_y) + ps_hist->i4_min_y); 391 ps_mv->i2_mv_x = (S16)((i4_max_x << ps_hist->i4_shift_x) + ps_hist->i4_min_x); 392 } 393 394 /** 395 ******************************************************************************** 396 * @fn ctb_node_t *hme_get_ctb_node(ctb_mem_mgr_t *ps_mem_mgr) 397 * 398 * @brief returns a new ctb node usable for creating a new ctb candidate 399 * 400 * @param[in] ps_mem_mgr : memory manager holding all ctb nodes 401 * 402 * @return NULL if no free nodes, else ptr to the new ctb node 403 ******************************************************************************** 404 */ 405 ctb_node_t *hme_get_ctb_node(ctb_mem_mgr_t *ps_mem_mgr) 406 { 407 U08 *pu1_ret; 408 if((ps_mem_mgr->i4_used + ps_mem_mgr->i4_size) > ps_mem_mgr->i4_tot) 409 return (NULL); 410 pu1_ret = ps_mem_mgr->pu1_mem + ps_mem_mgr->i4_used; 411 ps_mem_mgr->i4_used += ps_mem_mgr->i4_size; 412 return ((ctb_node_t *)pu1_ret); 413 } 414 415 /** 416 ******************************************************************************** 417 * @fn hme_map_mvs_to_grid(mv_grid_t **pps_mv_grid, 418 search_results_t *ps_search_results, S32 i4_num_ref) 419 * 420 * @brief For a given CU whose results are in ps_search_results, the 17x17 421 * mv grid is updated for future use within the CTB 422 * 423 * @param[in] ps_search_results : Search results data structure 424 * 425 * @param[out] pps_mv_grid: The mv grid (as many as num ref) 426 * 427 * @param[in] i4_num_ref: nuber of search iterations to update 428 * 429 * @return None 430 ******************************************************************************** 431 */ 432 void hme_map_mvs_to_grid( 433 mv_grid_t **pps_mv_grid, 434 search_results_t *ps_search_results, 435 U08 *pu1_pred_dir_searched, 436 S32 i4_num_pred_dir) 437 { 438 S32 i4_cu_start_offset; 439 /*************************************************************************/ 440 /* Start x, y offset of CU relative to CTB. To update the mv grid which */ 441 /* stores 1 mv per 4x4, we convert pixel offset to 4x4 blk offset */ 442 /*************************************************************************/ 443 S32 i4_cu_offset_x = (S32)ps_search_results->u1_x_off >> 2; 444 S32 i4_cu_offset_y = (S32)ps_search_results->u1_y_off >> 2; 445 446 /* Controls the attribute of a given partition within CU */ 447 /* , i.e. start locn, size */ 448 part_attr_t *ps_part_attr; 449 450 S32 i4_part, i4_part_id, num_parts, i4_stride; 451 S16 i2_mv_x, i2_mv_y; 452 S08 i1_ref_idx; 453 454 /* Per partition, attributes w.r.t. CU start */ 455 S32 x_start, y_start, x_end, y_end, i4_x, i4_y; 456 PART_TYPE_T e_part_type; 457 458 /* Points to exact mv structures within the grid to be udpated */ 459 search_node_t *ps_grid_node, *ps_grid_node_tmp; 460 461 /* points to exact mv grid (based on search iteration) to be updated */ 462 mv_grid_t *ps_mv_grid; 463 464 search_node_t *ps_search_node; 465 466 S32 shift, i, mv_shift = 2; 467 /* Proportional to the size of CU, controls the number of 4x4 blks */ 468 /* to be updated */ 469 shift = ps_search_results->e_cu_size; 470 ASSERT(i4_num_pred_dir <= 2); 471 472 e_part_type = (PART_TYPE_T)ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type; 473 474 if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) && 475 (ps_search_results->i4_part_mask & ENABLE_NxN)) 476 { 477 e_part_type = PRT_NxN; 478 } 479 480 for(i = 0; i < i4_num_pred_dir; i++) 481 { 482 num_parts = gau1_num_parts_in_part_type[e_part_type]; 483 ps_mv_grid = pps_mv_grid[pu1_pred_dir_searched[i]]; 484 i4_stride = ps_mv_grid->i4_stride; 485 486 i4_cu_start_offset = 487 i4_cu_offset_x + i4_cu_offset_y * i4_stride + ps_mv_grid->i4_start_offset; 488 489 /* Move to the appropriate 2d locn of CU start within Grid */ 490 ps_grid_node = &ps_mv_grid->as_node[i4_cu_start_offset]; 491 492 for(i4_part = 0; i4_part < num_parts; i4_part++) 493 { 494 i4_part_id = ge_part_type_to_part_id[e_part_type][i4_part]; 495 496 /* Pick the mvx and y and ref id corresponding to this partition */ 497 ps_search_node = 498 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id]; 499 500 i2_mv_x = ps_search_node->s_mv.i2_mvx; 501 i2_mv_y = ps_search_node->s_mv.i2_mvy; 502 i1_ref_idx = ps_search_node->i1_ref_idx; 503 504 /* Move to the appropriate location within the CU */ 505 ps_part_attr = &gas_part_attr_in_cu[i4_part_id]; 506 x_start = ps_part_attr->u1_x_start; 507 x_end = x_start + ps_part_attr->u1_x_count; 508 y_start = ps_part_attr->u1_y_start; 509 y_end = y_start + ps_part_attr->u1_y_count; 510 511 /* Convert attributes from 8x8 CU size to given CU size */ 512 x_start = (x_start << shift) >> mv_shift; 513 x_end = (x_end << shift) >> mv_shift; 514 y_start = (y_start << shift) >> mv_shift; 515 y_end = (y_end << shift) >> mv_shift; 516 517 ps_grid_node_tmp = ps_grid_node + y_start * i4_stride; 518 519 /* Update all 4x4 blk mvs with the part mv */ 520 /* For e.g. we update 4 units in case of NxN for 16x16 CU */ 521 for(i4_y = y_start; i4_y < y_end; i4_y++) 522 { 523 for(i4_x = x_start; i4_x < x_end; i4_x++) 524 { 525 ps_grid_node_tmp[i4_x].s_mv.i2_mvx = i2_mv_x; 526 ps_grid_node_tmp[i4_x].s_mv.i2_mvy = i2_mv_y; 527 ps_grid_node_tmp[i4_x].i1_ref_idx = i1_ref_idx; 528 ps_grid_node_tmp[i4_x].u1_subpel_done = 1; 529 } 530 ps_grid_node_tmp += i4_stride; 531 } 532 } 533 } 534 } 535 536 void hme_set_ctb_pred_attr(ctb_node_t *ps_parent, U08 *pu1_pred0, U08 *pu1_pred1, S32 i4_stride) 537 { 538 ps_parent->apu1_pred[0] = pu1_pred0; 539 ps_parent->apu1_pred[1] = pu1_pred1; 540 ps_parent->i4_pred_stride = i4_stride; 541 if(ps_parent->ps_tl != NULL) 542 { 543 S32 blk_wd = (S32)ps_parent->ps_tr->u1_x_off; 544 blk_wd -= (S32)ps_parent->u1_x_off; 545 546 hme_set_ctb_pred_attr(ps_parent->ps_tl, pu1_pred0, pu1_pred1, i4_stride >> 1); 547 548 hme_set_ctb_pred_attr( 549 ps_parent->ps_tr, pu1_pred0 + blk_wd, pu1_pred1 + blk_wd, i4_stride >> 1); 550 551 hme_set_ctb_pred_attr( 552 ps_parent->ps_bl, 553 pu1_pred0 + (blk_wd * i4_stride), 554 pu1_pred1 + (blk_wd * i4_stride), 555 i4_stride >> 1); 556 557 hme_set_ctb_pred_attr( 558 ps_parent->ps_tr, 559 pu1_pred0 + (blk_wd * (1 + i4_stride)), 560 pu1_pred1 + (blk_wd * (1 + i4_stride)), 561 i4_stride >> 1); 562 } 563 } 564 565 /** 566 ******************************************************************************** 567 * @fn hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids) 568 * 569 * @brief Expands the part mask to a list of valid part ids terminated by -1 570 * 571 * @param[in] i4_part_mask : bit mask of active partitino ids 572 * 573 * @param[out] pi4_valid_part_ids : array, each entry has one valid part id 574 * Terminated by -1 to signal end. 575 * 576 * @return number of partitions 577 ******************************************************************************** 578 */ 579 S32 hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids) 580 { 581 S32 id = 0, i; 582 for(i = 0; i < TOT_NUM_PARTS; i++) 583 { 584 if(i4_part_mask & (1 << i)) 585 { 586 pi4_valid_part_ids[id] = i; 587 id++; 588 } 589 } 590 pi4_valid_part_ids[id] = -1; 591 592 return id; 593 } 594 595 ctb_boundary_attrs_t * 596 get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt) 597 { 598 S32 horz_crop, vert_crop; 599 ctb_boundary_attrs_t *ps_attrs; 600 601 horz_crop = ((ctb_start_x + 64) > pic_wd) ? 2 : 0; 602 vert_crop = ((ctb_start_y + 64) > pic_ht) ? 1 : 0; 603 switch(horz_crop + vert_crop) 604 { 605 case 0: 606 ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_CENTRE]; 607 break; 608 case 1: 609 ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_BOT_PIC_BOUNDARY]; 610 break; 611 case 2: 612 ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_RT_PIC_BOUNDARY]; 613 break; 614 case 3: 615 ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_BOT_RT_PIC_BOUNDARY]; 616 break; 617 } 618 return (ps_attrs); 619 } 620 621 /** 622 ******************************************************************************** 623 * @fn hevc_avg_2d(U08 *pu1_src1, 624 * U08 *pu1_src2, 625 * S32 i4_src1_stride, 626 * S32 i4_src2_stride, 627 * S32 i4_blk_wd, 628 * S32 i4_blk_ht, 629 * U08 *pu1_dst, 630 * S32 i4_dst_stride) 631 * 632 * 633 * @brief point wise average of two buffers into a third buffer 634 * 635 * @param[in] pu1_src1 : first source buffer 636 * 637 * @param[in] pu1_src2 : 2nd source buffer 638 * 639 * @param[in] i4_src1_stride : stride of source 1 buffer 640 * 641 * @param[in] i4_src2_stride : stride of source 2 buffer 642 * 643 * @param[in] i4_blk_wd : block width 644 * 645 * @param[in] i4_blk_ht : block height 646 * 647 * @param[out] pu1_dst : destination buffer 648 * 649 * @param[in] i4_dst_stride : stride of the destination buffer 650 * 651 * @return void 652 ******************************************************************************** 653 */ 654 void hevc_avg_2d( 655 U08 *pu1_src1, 656 U08 *pu1_src2, 657 S32 i4_src1_stride, 658 S32 i4_src2_stride, 659 S32 i4_blk_wd, 660 S32 i4_blk_ht, 661 U08 *pu1_dst, 662 S32 i4_dst_stride) 663 { 664 S32 i, j; 665 666 for(i = 0; i < i4_blk_ht; i++) 667 { 668 for(j = 0; j < i4_blk_wd; j++) 669 { 670 pu1_dst[j] = (pu1_src1[j] + pu1_src2[j] + 1) >> 1; 671 } 672 pu1_src1 += i4_src1_stride; 673 pu1_src2 += i4_src2_stride; 674 pu1_dst += i4_dst_stride; 675 } 676 } 677 /** 678 ******************************************************************************** 679 * @fn hme_pick_back_search_node(search_results_t *ps_search_results, 680 * search_node_t *ps_search_node_fwd, 681 * S32 i4_part_idx, 682 * layer_ctxt_t *ps_curr_layer) 683 * 684 * 685 * @brief returns the search node corresponding to a ref idx in same or 686 * opp direction. Preference is given to opp direction, but if that 687 * does not yield results, same direction is attempted. 688 * 689 * @param[in] ps_search_results: search results overall 690 * 691 * @param[in] ps_search_node_fwd: search node corresponding to "fwd" direction 692 * 693 * @param[in] i4_part_idx : partition id 694 * 695 * @param[in] ps_curr_layer : layer context for current layer. 696 * 697 * @return search node corresponding to hte "other direction" 698 ******************************************************************************** 699 */ 700 //#define PICK_L1_REF_SAME_DIR 701 search_node_t *hme_pick_back_search_node( 702 search_results_t *ps_search_results, 703 search_node_t *ps_search_node_fwd, 704 S32 i4_part_idx, 705 layer_ctxt_t *ps_curr_layer) 706 { 707 S32 is_past_l0, is_past_l1, id, i, i4_poc; 708 S32 *pi4_ref_id_to_poc_lc = ps_curr_layer->ai4_ref_id_to_poc_lc; 709 //ref_attr_t *ps_ref_attr_lc; 710 S08 i1_ref_idx_fwd; 711 S16 i2_mv_x, i2_mv_y; 712 search_node_t *ps_search_node; 713 714 i1_ref_idx_fwd = ps_search_node_fwd->i1_ref_idx; 715 i2_mv_x = ps_search_node_fwd->s_mv.i2_mvx; 716 i2_mv_y = ps_search_node_fwd->s_mv.i2_mvy; 717 i4_poc = ps_curr_layer->i4_poc; 718 719 //ps_ref_attr_lc = &ps_curr_layer->as_ref_attr_lc[0]; 720 /* If the ref id already picked up maps to a past pic, then we pick */ 721 /* a result corresponding to future pic. If such a result is not */ 722 /* to be found, then we pick a result corresponding to a past pic */ 723 //is_past = ps_ref_attr_lc[i1_ref_idx_fwd].u1_is_past; 724 is_past_l0 = (i4_poc > pi4_ref_id_to_poc_lc[i1_ref_idx_fwd]) ? 1 : 0; 725 726 ASSERT(ps_search_results->u1_num_active_ref <= 2); 727 728 /* pick the right iteration of search nodes to pick up */ 729 #ifdef PICK_L1_REF_SAME_DIR 730 if(ps_search_results->u1_num_active_ref == 2) 731 id = !is_past_l0; 732 #else 733 if(ps_search_results->u1_num_active_ref == 2) 734 id = is_past_l0; 735 #endif 736 else 737 id = 0; 738 739 ps_search_node = ps_search_results->aps_part_results[id][i4_part_idx]; 740 741 for(i = 0; i < ps_search_results->u1_num_results_per_part; i++) 742 { 743 S08 i1_ref_test = ps_search_node[i].i1_ref_idx; 744 is_past_l1 = (pi4_ref_id_to_poc_lc[i1_ref_test] < i4_poc) ? 1 : 0; 745 //if (ps_ref_attr_lc[ps_search_node[i].i1_ref_idx].u1_is_past != is_past) 746 #ifdef PICK_L1_REF_SAME_DIR 747 if(is_past_l1 == is_past_l0) 748 #else 749 if(is_past_l1 != is_past_l0) 750 #endif 751 { 752 /* belongs to same direction as the ref idx passed, so continue */ 753 return (ps_search_node + i); 754 } 755 } 756 757 /* Unable to find best result in opp direction, so try same direction */ 758 /* However we need to ensure that we do not pick up same result */ 759 for(i = 0; i < ps_search_results->u1_num_results_per_part; i++) 760 { 761 if((ps_search_node->i1_ref_idx != i1_ref_idx_fwd) || 762 (ps_search_node->s_mv.i2_mvx != i2_mv_x) || (ps_search_node->s_mv.i2_mvy != i2_mv_y)) 763 { 764 return (ps_search_node); 765 } 766 ps_search_node++; 767 } 768 769 //ASSERT(0); 770 return (ps_search_results->aps_part_results[id][i4_part_idx]); 771 772 //return (NULL); 773 } 774 775 /** 776 ******************************************************************************** 777 * @fn hme_study_input_segmentation(U08 *pu1_inp, S32 i4_inp_stride) 778 * 779 * 780 * @brief Examines input 16x16 for possible edges and orientations of those, 781 * and returns a bit mask of partitions that should be searched for 782 * 783 * @param[in] pu1_inp : input buffer 784 * 785 * @param[in] i4_inp_stride: input stride 786 * 787 * @return part mask (bit mask of active partitions to search) 788 ******************************************************************************** 789 */ 790 791 S32 hme_study_input_segmentation(U08 *pu1_inp, S32 i4_inp_stride, S32 limit_active_partitions) 792 { 793 S32 i4_rsum[16], i4_csum[16]; 794 U08 *pu1_tmp, u1_tmp; 795 S32 i4_max_ridx, i4_max_cidx, i4_tmp; 796 S32 i, j, i4_ret; 797 S32 i4_max_rp[4], i4_max_cp[4]; 798 S32 i4_seg_lutc[4] = { 0, ENABLE_nLx2N, ENABLE_Nx2N, ENABLE_nRx2N }; 799 S32 i4_seg_lutr[4] = { 0, ENABLE_2NxnU, ENABLE_2NxN, ENABLE_2NxnD }; 800 #define EDGE_THR (15 * 16) 801 #define HI_PASS(ptr, i) (2 * (ptr[i] - ptr[i - 1]) + (ptr[i + 1] - ptr[i - 2])) 802 803 if(0 == limit_active_partitions) 804 { 805 /*********************************************************************/ 806 /* In this case, we do not optimize on active partitions and search */ 807 /* brute force. This way, 17 partitinos would be enabled. */ 808 /*********************************************************************/ 809 return (ENABLE_ALL_PARTS); 810 } 811 812 /*************************************************************************/ 813 /* Control passes below in case we wish to optimize on active partitions.*/ 814 /* This is based on input characteristics, check how an edge passes along*/ 815 /* an input 16x16 area, if at all, and decide active partitinos. */ 816 /*************************************************************************/ 817 818 /* Initialize row and col sums */ 819 for(i = 0; i < 16; i++) 820 { 821 i4_rsum[i] = 0; 822 i4_csum[i] = 0; 823 } 824 pu1_tmp = pu1_inp; 825 for(i = 0; i < 16; i++) 826 { 827 for(j = 0; j < 16; j++) 828 { 829 u1_tmp = *pu1_tmp++; 830 i4_rsum[i] += u1_tmp; 831 i4_csum[j] += u1_tmp; 832 } 833 pu1_tmp += (i4_inp_stride - 16); 834 } 835 836 /* 0 is dummy; 1 is 4; 2 is 8; 3 is 12 */ 837 i4_max_rp[0] = 0; 838 i4_max_cp[0] = 0; 839 i4_max_rp[1] = 0; 840 i4_max_cp[1] = 0; 841 i4_max_rp[2] = 0; 842 i4_max_cp[2] = 0; 843 i4_max_rp[3] = 0; 844 i4_max_cp[3] = 0; 845 846 /* Get Max edge strength across (2,3) (3,4) (4,5) */ 847 for(i = 3; i < 6; i++) 848 { 849 /* Run [-1 -2 2 1] filter through rsum/csum */ 850 i4_tmp = HI_PASS(i4_rsum, i); 851 if(ABS(i4_tmp) > i4_max_rp[1]) 852 i4_max_rp[1] = i4_tmp; 853 854 i4_tmp = HI_PASS(i4_csum, i); 855 if(ABS(i4_tmp) > i4_max_cp[1]) 856 i4_max_cp[1] = i4_tmp; 857 } 858 859 /* Get Max edge strength across (6,7) (7,8) (8,9) */ 860 for(i = 7; i < 10; i++) 861 { 862 /* Run [-1 -2 2 1] filter through rsum/csum */ 863 i4_tmp = HI_PASS(i4_rsum, i); 864 if(ABS(i4_tmp) > i4_max_rp[2]) 865 i4_max_rp[2] = i4_tmp; 866 867 i4_tmp = HI_PASS(i4_csum, i); 868 if(ABS(i4_tmp) > i4_max_cp[2]) 869 i4_max_cp[2] = i4_tmp; 870 } 871 872 /* Get Max edge strength across (10,11) (11,12) (12,13) */ 873 for(i = 11; i < 14; i++) 874 { 875 /* Run [-1 -2 2 1] filter through rsum/csum */ 876 i4_tmp = HI_PASS(i4_rsum, i); 877 if(ABS(i4_tmp) > i4_max_rp[3]) 878 i4_max_rp[3] = i4_tmp; 879 880 i4_tmp = HI_PASS(i4_csum, i); 881 if(ABS(i4_tmp) > i4_max_cp[3]) 882 i4_max_cp[3] = i4_tmp; 883 } 884 885 /* Find the maximum across the 3 and see whether the strength qualifies as edge */ 886 i4_max_ridx = 1; 887 i4_max_cidx = 1; 888 for(i = 2; i <= 3; i++) 889 { 890 if(i4_max_rp[i] > i4_max_rp[i4_max_ridx]) 891 i4_max_ridx = i; 892 893 if(i4_max_cp[i] > i4_max_cp[i4_max_cidx]) 894 i4_max_cidx = i; 895 } 896 897 if(EDGE_THR > i4_max_rp[i4_max_ridx]) 898 { 899 i4_max_ridx = 0; 900 } 901 902 if(EDGE_THR > i4_max_cp[i4_max_cidx]) 903 { 904 i4_max_cidx = 0; 905 } 906 907 i4_ret = ENABLE_2Nx2N; 908 909 /* If only vertical discontinuity, go with one of 2Nx? */ 910 if(0 == (i4_max_ridx + i4_max_cidx)) 911 { 912 //num_me_parts++; 913 return i4_ret; 914 } 915 916 if(i4_max_ridx && (i4_max_cidx == 0)) 917 { 918 //num_me_parts += 3; 919 return ((i4_ret | i4_seg_lutr[i4_max_ridx])); 920 } 921 922 /* If only horizontal discontinuity, go with one of ?x2N */ 923 if(i4_max_cidx && (i4_max_ridx == 0)) 924 { 925 //num_me_parts += 3; 926 return ((i4_ret | i4_seg_lutc[i4_max_cidx])); 927 } 928 929 /* If middle is dominant in both directions, go with NxN */ 930 if((2 == i4_max_cidx) && (2 == i4_max_ridx)) 931 { 932 //num_me_parts += 5; 933 return ((i4_ret | ENABLE_NxN)); 934 } 935 936 /* Otherwise, conservatively, enable NxN and the 2 AMPs */ 937 //num_me_parts += 9; 938 return (i4_ret | ENABLE_NxN | i4_seg_lutr[i4_max_ridx] | i4_seg_lutc[i4_max_cidx]); 939 } 940 941 /** 942 ******************************************************************************** 943 * @fn hme_init_search_results(search_results_t *ps_search_results, 944 * S32 i4_num_ref, 945 * S32 i4_num_best_results, 946 * S32 i4_num_results_per_part, 947 * BLK_SIZE_T e_blk_size, 948 * S32 i4_x_off, 949 * S32 i4_y_off) 950 * 951 * @brief Initializes the search results structure with some key attributes 952 * 953 * @param[out] ps_search_results : search results structure to initialise 954 * 955 * @param[in] i4_num_Ref: corresponds to the number of ref ids searched 956 * 957 * @param[in] i4_num_best_results: Number of best results for the CU to 958 * be maintained in the result structure 959 * 960 * @param[in] i4_num_results_per_part: Per active partition the number of best 961 * results to be maintained 962 * 963 * @param[in] e_blk_size: blk size of the CU for which this structure used 964 * 965 * @param[in] i4_x_off: x offset of the top left of CU from CTB top left 966 * 967 * @param[in] i4_y_off: y offset of the top left of CU from CTB top left 968 * 969 * @param[in] pu1_is_past : points ot an array that tells whether a given ref id 970 * has prominence in L0 or in L1 list (past or future ) 971 * 972 * @return void 973 ******************************************************************************** 974 */ 975 void hme_init_search_results( 976 search_results_t *ps_search_results, 977 S32 i4_num_ref, 978 S32 i4_num_best_results, 979 S32 i4_num_results_per_part, 980 BLK_SIZE_T e_blk_size, 981 S32 i4_x_off, 982 S32 i4_y_off, 983 U08 *pu1_is_past) 984 { 985 CU_SIZE_T e_cu_size = ge_blk_size_to_cu_size[e_blk_size]; 986 987 ASSERT(e_cu_size != -1); 988 ps_search_results->e_cu_size = e_cu_size; 989 ps_search_results->u1_x_off = (U08)i4_x_off; 990 ps_search_results->u1_y_off = (U08)i4_y_off; 991 ps_search_results->u1_num_active_ref = (U08)i4_num_ref; 992 ps_search_results->u1_num_best_results = (U08)i4_num_best_results; 993 ps_search_results->u1_num_results_per_part = (U08)i4_num_results_per_part; 994 ps_search_results->pu1_is_past = pu1_is_past; 995 ps_search_results->u1_split_flag = 0; 996 ps_search_results->best_cu_cost = MAX_32BIT_VAL; 997 } 998 999 /** 1000 ******************************************************************************** 1001 * @fn hme_reset_search_results((search_results_t *ps_search_results, 1002 * S32 i4_part_mask) 1003 * 1004 * 1005 * @brief Resets the best results to maximum values, so as to allow search 1006 * for the new CU's partitions. The existing results may be from an 1007 * older CU using same structure. 1008 * 1009 * @param[in] ps_search_results: search results structure 1010 * 1011 * @param[in] i4_part_mask : bit mask of active partitions 1012 * 1013 * @return part mask (bit mask of active partitions to search) 1014 ******************************************************************************** 1015 */ 1016 void hme_reset_search_results(search_results_t *ps_search_results, S32 i4_part_mask, S32 mv_res) 1017 { 1018 S32 i4_num_ref = (S32)ps_search_results->u1_num_active_ref; 1019 S08 i1_ref_idx; 1020 S32 i, j; 1021 search_node_t *ps_search_node; 1022 1023 /* store this for future use */ 1024 ps_search_results->i4_part_mask = i4_part_mask; 1025 1026 /* Reset the spli_flag to zero */ 1027 ps_search_results->u1_split_flag = 0; 1028 1029 HME_SET_MVPRED_RES((&ps_search_results->as_pred_ctxt[0]), mv_res); 1030 HME_SET_MVPRED_RES((&ps_search_results->as_pred_ctxt[1]), mv_res); 1031 1032 for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++) 1033 { 1034 /* Reset the individual partitino results */ 1035 for(i = 0; i < TOT_NUM_PARTS; i++) 1036 { 1037 if(!(i4_part_mask & (1 << i))) 1038 continue; 1039 1040 ps_search_node = ps_search_results->aps_part_results[i1_ref_idx][i]; 1041 1042 for(j = 0; j < ps_search_results->u1_num_results_per_part; j++) 1043 { 1044 ps_search_node[j].s_mv.i2_mvx = 0; 1045 ps_search_node[j].s_mv.i2_mvy = 0; 1046 ps_search_node[j].i4_tot_cost = MAX_32BIT_VAL; 1047 ps_search_node[j].i4_sad = MAX_32BIT_VAL; 1048 ps_search_node[j].i4_sdi = 0; 1049 ps_search_node[j].i1_ref_idx = -1; 1050 ps_search_node[j].u1_subpel_done = 0; 1051 ps_search_node[j].u1_is_avail = 1; 1052 ps_search_node[j].i4_mv_cost = 0; 1053 } 1054 } 1055 } 1056 } 1057 /** 1058 ******************************************************************************** 1059 * @fn hme_clamp_grid_by_mvrange(search_node_t *ps_search_node, 1060 * S32 i4_step, 1061 * range_prms_t *ps_mvrange) 1062 * 1063 * @brief Given a central pt within mv range, and a grid of points surrounding 1064 * this pt, this function returns a grid mask of pts within search rng 1065 * 1066 * @param[in] ps_search_node: the centre pt of the grid 1067 * 1068 * @param[in] i4_step: step size of grid 1069 * 1070 * @param[in] ps_mvrange: structure containing the current mv range 1071 * 1072 * @return bitmask of the pts in grid within search range 1073 ******************************************************************************** 1074 */ 1075 S32 hme_clamp_grid_by_mvrange(search_node_t *ps_search_node, S32 i4_step, range_prms_t *ps_mvrange) 1076 { 1077 S32 i4_mask = GRID_ALL_PTS_VALID; 1078 if(ps_search_node->s_mv.i2_mvx + i4_step >= ps_mvrange->i2_max_x) 1079 { 1080 i4_mask &= (GRID_RT_3_INVALID); 1081 } 1082 if(ps_search_node->s_mv.i2_mvx - i4_step < ps_mvrange->i2_min_x) 1083 { 1084 i4_mask &= (GRID_LT_3_INVALID); 1085 } 1086 if(ps_search_node->s_mv.i2_mvy + i4_step >= ps_mvrange->i2_max_y) 1087 { 1088 i4_mask &= (GRID_BOT_3_INVALID); 1089 } 1090 if(ps_search_node->s_mv.i2_mvy - i4_step < ps_mvrange->i2_min_y) 1091 { 1092 i4_mask &= (GRID_TOP_3_INVALID); 1093 } 1094 return i4_mask; 1095 } 1096 1097 /** 1098 ******************************************************************************** 1099 * @fn layer_ctxt_t *hme_get_past_layer_ctxt(me_ctxt_t *ps_ctxt, 1100 S32 i4_layer_id) 1101 * 1102 * @brief returns the layer ctxt of the layer with given id from the temporally 1103 * previous frame 1104 * 1105 * @param[in] ps_ctxt : ME context 1106 * 1107 * @param[in] i4_layer_id : id of layer required 1108 * 1109 * @return layer ctxt of given layer id in temporally previous frame 1110 ******************************************************************************** 1111 */ 1112 layer_ctxt_t *hme_get_past_layer_ctxt( 1113 me_ctxt_t *ps_ctxt, me_frm_ctxt_t *ps_frm_ctxt, S32 i4_layer_id, S32 i4_num_me_frm_pllel) 1114 { 1115 S32 i4_poc = ps_frm_ctxt->ai4_ref_idx_to_poc_lc[0]; 1116 S32 i; 1117 layers_descr_t *ps_desc; 1118 1119 for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++) 1120 { 1121 ps_desc = &ps_ctxt->as_ref_descr[i]; 1122 if(i4_poc == ps_desc->aps_layers[i4_layer_id]->i4_poc) 1123 return (ps_desc->aps_layers[i4_layer_id]); 1124 } 1125 return NULL; 1126 } 1127 1128 /** 1129 ******************************************************************************** 1130 * @fn layer_ctxt_t *hme_coarse_get_past_layer_ctxt(me_ctxt_t *ps_ctxt, 1131 S32 i4_layer_id) 1132 * 1133 * @brief returns the layer ctxt of the layer with given id from the temporally 1134 * previous frame 1135 * 1136 * @param[in] ps_ctxt : ME context 1137 * 1138 * @param[in] i4_layer_id : id of layer required 1139 * 1140 * @return layer ctxt of given layer id in temporally previous frame 1141 ******************************************************************************** 1142 */ 1143 layer_ctxt_t *hme_coarse_get_past_layer_ctxt(coarse_me_ctxt_t *ps_ctxt, S32 i4_layer_id) 1144 { 1145 S32 i4_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[0]; 1146 S32 i; 1147 layers_descr_t *ps_desc; 1148 1149 for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++) 1150 { 1151 ps_desc = &ps_ctxt->as_ref_descr[i]; 1152 if(i4_poc == ps_desc->aps_layers[i4_layer_id]->i4_poc) 1153 return (ps_desc->aps_layers[i4_layer_id]); 1154 } 1155 return NULL; 1156 } 1157 1158 /** 1159 ******************************************************************************** 1160 * @fn void hme_init_mv_bank(layer_ctxt_t *ps_layer_ctxt, 1161 BLK_SIZE_T e_blk_size, 1162 S32 i4_num_ref, 1163 S32 i4_num_results_per_part) 1164 * 1165 * @brief Given a blk size to be used for this layer, this function initialize 1166 * the mv bank to make it ready to store and return results. 1167 * 1168 * @param[in, out] ps_layer_ctxt: pointer to layer ctxt 1169 * 1170 * @param[in] e_blk_size : resolution at which mvs are stored 1171 * 1172 * @param[in] i4_num_ref: number of reference frames corresponding to which 1173 * results are stored. 1174 * 1175 * @param[in] e_blk_size : resolution at which mvs are stored 1176 * 1177 * @param[in] i4_num_results_per_part : Number of results to be stored per 1178 * ref idx. So these many best results stored 1179 * 1180 * @return void 1181 ******************************************************************************** 1182 */ 1183 void hme_init_mv_bank( 1184 layer_ctxt_t *ps_layer_ctxt, 1185 BLK_SIZE_T e_blk_size, 1186 S32 i4_num_ref, 1187 S32 i4_num_results_per_part, 1188 U08 u1_enc) 1189 { 1190 layer_mv_t *ps_mv_bank; 1191 hme_mv_t *ps_mv1, *ps_mv2; 1192 S08 *pi1_ref_id1, *pi1_ref_id2; 1193 S32 blk_wd, mvs_in_blk, blks_in_row, mvs_in_row, blks_in_col; 1194 S32 i4_i, i4_j, blk_ht; 1195 1196 ps_mv_bank = ps_layer_ctxt->ps_layer_mvbank; 1197 ps_mv_bank->i4_num_mvs_per_ref = i4_num_results_per_part; 1198 ps_mv_bank->i4_num_ref = i4_num_ref; 1199 mvs_in_blk = i4_num_ref * i4_num_results_per_part; 1200 ps_mv_bank->i4_num_mvs_per_blk = mvs_in_blk; 1201 1202 /*************************************************************************/ 1203 /* Store blk size, from blk size derive blk width and use this to compute*/ 1204 /* number of blocks every row. We also pad to left and top by 1, to */ 1205 /* support the prediction mechanism. */ 1206 /*************************************************************************/ 1207 ps_mv_bank->e_blk_size = e_blk_size; 1208 blk_wd = gau1_blk_size_to_wd[e_blk_size]; 1209 blk_ht = gau1_blk_size_to_ht[e_blk_size]; 1210 1211 blks_in_row = (ps_layer_ctxt->i4_wd + (blk_wd - 1)) / blk_wd; 1212 blks_in_col = (ps_layer_ctxt->i4_ht + (blk_ht - 1)) / blk_ht; 1213 1214 if(u1_enc) 1215 { 1216 /* TODO: CTB64x64 is assumed. FIX according to actual CTB */ 1217 WORD32 num_ctb_cols = ((ps_layer_ctxt->i4_wd + 63) >> 6); 1218 WORD32 num_ctb_rows = ((ps_layer_ctxt->i4_ht + 63) >> 6); 1219 1220 blks_in_row = (num_ctb_cols << 3); 1221 blks_in_col = (num_ctb_rows << 3); 1222 } 1223 1224 blks_in_row += 2; 1225 mvs_in_row = blks_in_row * mvs_in_blk; 1226 1227 ps_mv_bank->i4_num_blks_per_row = blks_in_row; 1228 ps_mv_bank->i4_num_mvs_per_row = mvs_in_row; 1229 1230 /* To ensure run time requirements fall within allocation time request */ 1231 ASSERT(ps_mv_bank->i4_num_mvs_per_row <= ps_mv_bank->max_num_mvs_per_row); 1232 1233 /*************************************************************************/ 1234 /* Increment by one full row at top for padding and one column in left */ 1235 /* this gives us the actual start of mv for 0,0 blk */ 1236 /*************************************************************************/ 1237 ps_mv_bank->ps_mv = ps_mv_bank->ps_mv_base + mvs_in_row + mvs_in_blk; 1238 ps_mv_bank->pi1_ref_idx = ps_mv_bank->pi1_ref_idx_base + mvs_in_row + mvs_in_blk; 1239 1240 memset(ps_mv_bank->ps_mv_base, 0, mvs_in_row * sizeof(hme_mv_t)); 1241 memset(ps_mv_bank->pi1_ref_idx_base, -1, mvs_in_row * sizeof(U08)); 1242 1243 /*************************************************************************/ 1244 /* Initialize top row, left col and right col with zeros since these are */ 1245 /* used as candidates during searches. */ 1246 /*************************************************************************/ 1247 ps_mv1 = ps_mv_bank->ps_mv_base + mvs_in_row; 1248 ps_mv2 = ps_mv1 + mvs_in_row - mvs_in_blk; 1249 pi1_ref_id1 = ps_mv_bank->pi1_ref_idx_base + mvs_in_row; 1250 pi1_ref_id2 = pi1_ref_id1 + mvs_in_row - mvs_in_blk; 1251 for(i4_i = 0; i4_i < blks_in_col; i4_i++) 1252 { 1253 for(i4_j = 0; i4_j < mvs_in_blk; i4_j++) 1254 { 1255 ps_mv1[i4_j].i2_mv_x = 0; 1256 ps_mv1[i4_j].i2_mv_y = 0; 1257 ps_mv2[i4_j].i2_mv_x = 0; 1258 ps_mv2[i4_j].i2_mv_y = 0; 1259 pi1_ref_id1[i4_j] = -1; 1260 pi1_ref_id2[i4_j] = -1; 1261 } 1262 ps_mv1 += mvs_in_row; 1263 ps_mv2 += mvs_in_row; 1264 pi1_ref_id1 += mvs_in_row; 1265 pi1_ref_id2 += mvs_in_row; 1266 } 1267 } 1268 void hme_fill_mvbank_intra(layer_ctxt_t *ps_layer_ctxt) 1269 { 1270 layer_mv_t *ps_mv_bank; 1271 hme_mv_t *ps_mv; 1272 S08 *pi1_ref_id; 1273 S32 blk_wd, blks_in_row, mvs_in_row, blks_in_col; 1274 S32 i, j, blk_ht; 1275 BLK_SIZE_T e_blk_size; 1276 1277 ps_mv_bank = ps_layer_ctxt->ps_layer_mvbank; 1278 1279 /*************************************************************************/ 1280 /* Store blk size, from blk size derive blk width and use this to compute*/ 1281 /* number of blocks every row. We also pad to left and top by 1, to */ 1282 /* support the prediction mechanism. */ 1283 /*************************************************************************/ 1284 e_blk_size = ps_mv_bank->e_blk_size; 1285 blk_wd = gau1_blk_size_to_wd[e_blk_size]; 1286 blk_ht = gau1_blk_size_to_wd[e_blk_size]; 1287 blks_in_row = ps_layer_ctxt->i4_wd / blk_wd; 1288 blks_in_col = ps_layer_ctxt->i4_ht / blk_ht; 1289 mvs_in_row = blks_in_row * ps_mv_bank->i4_num_mvs_per_blk; 1290 1291 /*************************************************************************/ 1292 /* Increment by one full row at top for padding and one column in left */ 1293 /* this gives us the actual start of mv for 0,0 blk */ 1294 /*************************************************************************/ 1295 ps_mv = ps_mv_bank->ps_mv; 1296 pi1_ref_id = ps_mv_bank->pi1_ref_idx; 1297 1298 for(i = 0; i < blks_in_col; i++) 1299 { 1300 for(j = 0; j < blks_in_row; j++) 1301 { 1302 ps_mv[j].i2_mv_x = INTRA_MV; 1303 ps_mv[j].i2_mv_y = INTRA_MV; 1304 pi1_ref_id[j] = -1; 1305 } 1306 ps_mv += ps_mv_bank->i4_num_mvs_per_row; 1307 pi1_ref_id += ps_mv_bank->i4_num_mvs_per_row; 1308 } 1309 } 1310 1311 /** 1312 ******************************************************************************** 1313 * @fn void hme_derive_search_range(range_prms_t *ps_range, 1314 * range_prms_t *ps_pic_limit, 1315 * range_prms_t *ps_mv_limit, 1316 * S32 i4_x, 1317 * S32 i4_y, 1318 * S32 blk_wd, 1319 * S32 blk_ht) 1320 * 1321 * @brief given picture limits and blk dimensions and mv search limits, obtains 1322 * teh valid search range such that the blk stays within pic boundaries, 1323 * where picture boundaries include padded portions of picture 1324 * 1325 * @param[out] ps_range: updated with actual search range 1326 * 1327 * @param[in] ps_pic_limit : picture boundaries 1328 * 1329 * @param[in] ps_mv_limit: Search range limits for the mvs 1330 * 1331 * @param[in] i4_x : x coordinate of the blk 1332 * 1333 * @param[in] i4_y : y coordinate of the blk 1334 * 1335 * @param[in] blk_wd : blk width 1336 * 1337 * @param[in] blk_ht : blk height 1338 * 1339 * @return void 1340 ******************************************************************************** 1341 */ 1342 void hme_derive_search_range( 1343 range_prms_t *ps_range, 1344 range_prms_t *ps_pic_limit, 1345 range_prms_t *ps_mv_limit, 1346 S32 i4_x, 1347 S32 i4_y, 1348 S32 blk_wd, 1349 S32 blk_ht) 1350 { 1351 ps_range->i2_max_x = 1352 MIN((ps_pic_limit->i2_max_x - (S16)blk_wd - (S16)i4_x), ps_mv_limit->i2_max_x); 1353 ps_range->i2_min_x = MAX((ps_pic_limit->i2_min_x - (S16)i4_x), ps_mv_limit->i2_min_x); 1354 ps_range->i2_max_y = 1355 MIN((ps_pic_limit->i2_max_y - (S16)blk_ht - (S16)i4_y), ps_mv_limit->i2_max_y); 1356 ps_range->i2_min_y = MAX((ps_pic_limit->i2_min_y - (S16)i4_y), ps_mv_limit->i2_min_y); 1357 } 1358 1359 /** 1360 ******************************************************************************** 1361 * @fn void hme_get_spatial_candt(search_node_t *ps_search_node, 1362 * layer_ctxt_t *ps_curr_layer, 1363 * S32 i4_blk_x, 1364 * S32 i4_blk_y, 1365 * S08 i1_ref_id, 1366 * S32 i4_result_id) 1367 * 1368 * @brief obtains a candt from the same mv bank as the current one, its called 1369 * spatial candt as it does not require scaling for temporal distances 1370 * 1371 * @param[out] ps_search_node: mv and ref id updated here of the candt 1372 * 1373 * @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer 1374 * 1375 * @param[in] i4_blk_x : x coordinate of the block in mv bank 1376 * 1377 * @param[in] i4_blk_y : y coordinate of the block in mv bank 1378 * 1379 * @param[in] i1_ref_id : Corresponds to ref idx from which to pick up mv 1380 * results, useful if multiple ref idx candts maintained separately. 1381 * 1382 * @param[in] i4_result_id : If multiple results stored per ref idx, this 1383 * pts to the id of the result 1384 * 1385 * @param[in] tr_avail : top right availability of the block 1386 * 1387 * @param[in] bl_avail : bottom left availability of the block 1388 * 1389 * @return void 1390 ******************************************************************************** 1391 */ 1392 void hme_get_spatial_candt( 1393 layer_ctxt_t *ps_curr_layer, 1394 BLK_SIZE_T e_search_blk_size, 1395 S32 i4_blk_x, 1396 S32 i4_blk_y, 1397 S08 i1_ref_idx, 1398 search_node_t *ps_top_neighbours, 1399 search_node_t *ps_left_neighbours, 1400 S32 i4_result_id, 1401 S32 tr_avail, 1402 S32 bl_avail, 1403 S32 encode) 1404 1405 { 1406 layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank; 1407 S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size]; 1408 S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size]; 1409 search_node_t *ps_search_node; 1410 S32 i4_offset; 1411 hme_mv_t *ps_mv, *ps_mv_base; 1412 S08 *pi1_ref_idx, *pi1_ref_idx_base; 1413 S32 jump = 1, mvs_in_blk, mvs_in_row; 1414 S32 shift = (encode ? 2 : 0); 1415 1416 if(i4_blk_size1 != i4_blk_size2) 1417 { 1418 i4_blk_x <<= 1; 1419 i4_blk_y <<= 1; 1420 jump = 2; 1421 if((i4_blk_size1 << 2) == i4_blk_size2) 1422 { 1423 i4_blk_x <<= 1; 1424 i4_blk_y <<= 1; 1425 jump = 4; 1426 } 1427 } 1428 1429 mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk; 1430 mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row; 1431 1432 /* Adjust teh blk coord to point to top left locn */ 1433 i4_blk_x -= 1; 1434 i4_blk_y -= 1; 1435 /* Pick up the mvs from the location */ 1436 i4_offset = (i4_blk_x * ps_layer_mvbank->i4_num_mvs_per_blk); 1437 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * i4_blk_y); 1438 1439 ps_mv = ps_layer_mvbank->ps_mv + i4_offset; 1440 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; 1441 1442 ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref) + i4_result_id; 1443 pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref) + i4_result_id; 1444 1445 ps_mv_base = ps_mv; 1446 pi1_ref_idx_base = pi1_ref_idx; 1447 1448 /* ps_mv and pi1_ref_idx now point to the top left locn */ 1449 /* Get 4 mvs as follows: */ 1450 ps_search_node = ps_top_neighbours; 1451 COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift); 1452 1453 /* Move to top */ 1454 ps_search_node++; 1455 ps_mv += mvs_in_blk; 1456 pi1_ref_idx += mvs_in_blk; 1457 COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift); 1458 1459 /* Move to t1 : relevant for 4x4 part searches or for partitions i 16x16 */ 1460 if(ps_layer_mvbank->i4_num_mvs_per_ref > 1) 1461 { 1462 ps_search_node++; 1463 ps_mv += (mvs_in_blk * (jump >> 1)); 1464 pi1_ref_idx += (mvs_in_blk * (jump >> 1)); 1465 COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift); 1466 } 1467 else 1468 { 1469 ps_search_node++; 1470 ps_search_node->s_mv.i2_mvx = 0; 1471 ps_search_node->s_mv.i2_mvy = 0; 1472 ps_search_node->i1_ref_idx = i1_ref_idx; 1473 ps_search_node->u1_is_avail = 0; 1474 ps_search_node->u1_subpel_done = 0; 1475 } 1476 1477 /* Move to tr: this will be tr w.r.t. the blk being searched */ 1478 ps_search_node++; 1479 if(tr_avail == 0) 1480 { 1481 ps_search_node->s_mv.i2_mvx = 0; 1482 ps_search_node->s_mv.i2_mvy = 0; 1483 ps_search_node->i1_ref_idx = i1_ref_idx; 1484 ps_search_node->u1_is_avail = 0; 1485 ps_search_node->u1_subpel_done = 0; 1486 } 1487 else 1488 { 1489 ps_mv = ps_mv_base + (mvs_in_blk * (1 + jump)); 1490 pi1_ref_idx = pi1_ref_idx_base + (mvs_in_blk * (1 + jump)); 1491 COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift); 1492 } 1493 1494 /* Move to left */ 1495 ps_search_node = ps_left_neighbours; 1496 ps_mv = ps_mv_base + mvs_in_row; 1497 pi1_ref_idx = pi1_ref_idx_base + mvs_in_row; 1498 COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift); 1499 1500 /* Move to l1 */ 1501 if(ps_layer_mvbank->i4_num_mvs_per_ref > 1) 1502 { 1503 ps_search_node++; 1504 ps_mv += (mvs_in_row * (jump >> 1)); 1505 pi1_ref_idx += (mvs_in_row * (jump >> 1)); 1506 COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift); 1507 } 1508 else 1509 { 1510 ps_search_node++; 1511 ps_search_node->s_mv.i2_mvx = 0; 1512 ps_search_node->s_mv.i2_mvy = 0; 1513 ps_search_node->i1_ref_idx = i1_ref_idx; 1514 ps_search_node->u1_is_avail = 0; 1515 ps_search_node->u1_subpel_done = 0; 1516 } 1517 1518 /* Move to bl */ 1519 ps_search_node++; 1520 if(bl_avail == 0) 1521 { 1522 ps_search_node->s_mv.i2_mvx = 0; 1523 ps_search_node->s_mv.i2_mvy = 0; 1524 ps_search_node->i1_ref_idx = i1_ref_idx; 1525 ps_search_node->u1_is_avail = 0; 1526 } 1527 else 1528 { 1529 ps_mv = ps_mv_base + (mvs_in_row * (1 + jump)); 1530 pi1_ref_idx = pi1_ref_idx_base + (mvs_in_row * (1 + jump)); 1531 COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift); 1532 } 1533 } 1534 1535 void hme_get_spatial_candt_in_l1_me( 1536 layer_ctxt_t *ps_curr_layer, 1537 BLK_SIZE_T e_search_blk_size, 1538 S32 i4_blk_x, 1539 S32 i4_blk_y, 1540 S08 i1_ref_idx, 1541 U08 u1_pred_dir, 1542 search_node_t *ps_top_neighbours, 1543 search_node_t *ps_left_neighbours, 1544 S32 i4_result_id, 1545 S32 tr_avail, 1546 S32 bl_avail, 1547 S32 i4_num_act_ref_l0, 1548 S32 i4_num_act_ref_l1) 1549 { 1550 search_node_t *ps_search_node; 1551 hme_mv_t *ps_mv, *ps_mv_base; 1552 1553 S32 i4_offset; 1554 S32 mvs_in_blk, mvs_in_row; 1555 S08 *pi1_ref_idx, *pi1_ref_idx_base; 1556 S32 i4_mv_pos_in_implicit_array; 1557 1558 layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank; 1559 1560 S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size]; 1561 S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size]; 1562 S32 jump = 1; 1563 S32 shift = 0; 1564 S32 i4_num_results_in_given_dir = 1565 ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l1) 1566 : (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l0)); 1567 1568 if(i4_blk_size1 != i4_blk_size2) 1569 { 1570 i4_blk_x <<= 1; 1571 i4_blk_y <<= 1; 1572 jump = 2; 1573 if((i4_blk_size1 << 2) == i4_blk_size2) 1574 { 1575 i4_blk_x <<= 1; 1576 i4_blk_y <<= 1; 1577 jump = 4; 1578 } 1579 } 1580 1581 mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk; 1582 mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row; 1583 1584 /* Adjust the blk coord to point to top left locn */ 1585 i4_blk_x -= 1; 1586 i4_blk_y -= 1; 1587 /* Pick up the mvs from the location */ 1588 i4_offset = (i4_blk_x * ps_layer_mvbank->i4_num_mvs_per_blk); 1589 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * i4_blk_y); 1590 1591 i4_offset += 1592 ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l0) : 0); 1593 1594 ps_mv = ps_layer_mvbank->ps_mv + i4_offset; 1595 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; 1596 1597 ps_mv_base = ps_mv; 1598 pi1_ref_idx_base = pi1_ref_idx; 1599 1600 /* TL */ 1601 { 1602 /* ps_mv and pi1_ref_idx now point to the top left locn */ 1603 ps_search_node = ps_top_neighbours; 1604 1605 i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id( 1606 pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir); 1607 1608 if(-1 != i4_mv_pos_in_implicit_array) 1609 { 1610 COPY_MV_TO_SEARCH_NODE( 1611 ps_search_node, 1612 &ps_mv[i4_mv_pos_in_implicit_array], 1613 &pi1_ref_idx[i4_mv_pos_in_implicit_array], 1614 i1_ref_idx, 1615 shift); 1616 } 1617 else 1618 { 1619 ps_search_node->u1_is_avail = 0; 1620 ps_search_node->s_mv.i2_mvx = 0; 1621 ps_search_node->s_mv.i2_mvy = 0; 1622 ps_search_node->i1_ref_idx = i1_ref_idx; 1623 } 1624 } 1625 1626 /* Move to top */ 1627 { 1628 /* ps_mv and pi1_ref_idx now point to the top left locn */ 1629 ps_search_node++; 1630 ps_mv += mvs_in_blk; 1631 pi1_ref_idx += mvs_in_blk; 1632 1633 i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id( 1634 pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir); 1635 1636 if(-1 != i4_mv_pos_in_implicit_array) 1637 { 1638 COPY_MV_TO_SEARCH_NODE( 1639 ps_search_node, 1640 &ps_mv[i4_mv_pos_in_implicit_array], 1641 &pi1_ref_idx[i4_mv_pos_in_implicit_array], 1642 i1_ref_idx, 1643 shift); 1644 } 1645 else 1646 { 1647 ps_search_node->u1_is_avail = 0; 1648 ps_search_node->s_mv.i2_mvx = 0; 1649 ps_search_node->s_mv.i2_mvy = 0; 1650 ps_search_node->i1_ref_idx = i1_ref_idx; 1651 } 1652 } 1653 1654 /* Move to t1 : relevant for 4x4 part searches or for partitions i 16x16 */ 1655 if(ps_layer_mvbank->i4_num_mvs_per_ref > 1) 1656 { 1657 ps_search_node++; 1658 ps_mv += (mvs_in_blk * (jump >> 1)); 1659 pi1_ref_idx += (mvs_in_blk * (jump >> 1)); 1660 1661 i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id( 1662 pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir); 1663 1664 if(-1 != i4_mv_pos_in_implicit_array) 1665 { 1666 COPY_MV_TO_SEARCH_NODE( 1667 ps_search_node, 1668 &ps_mv[i4_mv_pos_in_implicit_array], 1669 &pi1_ref_idx[i4_mv_pos_in_implicit_array], 1670 i1_ref_idx, 1671 shift); 1672 } 1673 else 1674 { 1675 ps_search_node->u1_is_avail = 0; 1676 ps_search_node->s_mv.i2_mvx = 0; 1677 ps_search_node->s_mv.i2_mvy = 0; 1678 ps_search_node->i1_ref_idx = i1_ref_idx; 1679 } 1680 } 1681 else 1682 { 1683 ps_search_node++; 1684 ps_search_node->u1_is_avail = 0; 1685 ps_search_node->s_mv.i2_mvx = 0; 1686 ps_search_node->s_mv.i2_mvy = 0; 1687 ps_search_node->i1_ref_idx = i1_ref_idx; 1688 } 1689 1690 /* Move to tr: this will be tr w.r.t. the blk being searched */ 1691 ps_search_node++; 1692 if(tr_avail == 0) 1693 { 1694 ps_search_node->s_mv.i2_mvx = 0; 1695 ps_search_node->s_mv.i2_mvy = 0; 1696 ps_search_node->i1_ref_idx = i1_ref_idx; 1697 ps_search_node->u1_is_avail = 0; 1698 ps_search_node->u1_subpel_done = 0; 1699 } 1700 else 1701 { 1702 /* ps_mv and pi1_ref_idx now point to the top left locn */ 1703 ps_mv = ps_mv_base + (mvs_in_blk * (1 + jump)); 1704 pi1_ref_idx = pi1_ref_idx_base + (mvs_in_blk * (1 + jump)); 1705 1706 i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id( 1707 pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir); 1708 1709 if(-1 != i4_mv_pos_in_implicit_array) 1710 { 1711 COPY_MV_TO_SEARCH_NODE( 1712 ps_search_node, 1713 &ps_mv[i4_mv_pos_in_implicit_array], 1714 &pi1_ref_idx[i4_mv_pos_in_implicit_array], 1715 i1_ref_idx, 1716 shift); 1717 } 1718 else 1719 { 1720 ps_search_node->u1_is_avail = 0; 1721 ps_search_node->s_mv.i2_mvx = 0; 1722 ps_search_node->s_mv.i2_mvy = 0; 1723 ps_search_node->i1_ref_idx = i1_ref_idx; 1724 } 1725 } 1726 1727 /* Move to left */ 1728 { 1729 /* ps_mv and pi1_ref_idx now point to the top left locn */ 1730 ps_search_node = ps_left_neighbours; 1731 ps_mv = ps_mv_base + mvs_in_row; 1732 pi1_ref_idx = pi1_ref_idx_base + mvs_in_row; 1733 1734 i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id( 1735 pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir); 1736 1737 if(-1 != i4_mv_pos_in_implicit_array) 1738 { 1739 COPY_MV_TO_SEARCH_NODE( 1740 ps_search_node, 1741 &ps_mv[i4_mv_pos_in_implicit_array], 1742 &pi1_ref_idx[i4_mv_pos_in_implicit_array], 1743 i1_ref_idx, 1744 shift); 1745 } 1746 else 1747 { 1748 ps_search_node->u1_is_avail = 0; 1749 ps_search_node->s_mv.i2_mvx = 0; 1750 ps_search_node->s_mv.i2_mvy = 0; 1751 ps_search_node->i1_ref_idx = i1_ref_idx; 1752 } 1753 } 1754 1755 /* Move to l1 */ 1756 if(ps_layer_mvbank->i4_num_mvs_per_ref > 1) 1757 { 1758 /* ps_mv and pi1_ref_idx now point to the top left locn */ 1759 ps_search_node++; 1760 ps_mv += (mvs_in_row * (jump >> 1)); 1761 pi1_ref_idx += (mvs_in_row * (jump >> 1)); 1762 1763 i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id( 1764 pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir); 1765 1766 if(-1 != i4_mv_pos_in_implicit_array) 1767 { 1768 COPY_MV_TO_SEARCH_NODE( 1769 ps_search_node, 1770 &ps_mv[i4_mv_pos_in_implicit_array], 1771 &pi1_ref_idx[i4_mv_pos_in_implicit_array], 1772 i1_ref_idx, 1773 shift); 1774 } 1775 else 1776 { 1777 ps_search_node->u1_is_avail = 0; 1778 ps_search_node->s_mv.i2_mvx = 0; 1779 ps_search_node->s_mv.i2_mvy = 0; 1780 ps_search_node->i1_ref_idx = i1_ref_idx; 1781 } 1782 } 1783 else 1784 { 1785 ps_search_node++; 1786 ps_search_node->u1_is_avail = 0; 1787 ps_search_node->s_mv.i2_mvx = 0; 1788 ps_search_node->s_mv.i2_mvy = 0; 1789 ps_search_node->i1_ref_idx = i1_ref_idx; 1790 } 1791 1792 /* Move to bl */ 1793 ps_search_node++; 1794 if(bl_avail == 0) 1795 { 1796 ps_search_node->s_mv.i2_mvx = 0; 1797 ps_search_node->s_mv.i2_mvy = 0; 1798 ps_search_node->i1_ref_idx = i1_ref_idx; 1799 ps_search_node->u1_is_avail = 0; 1800 } 1801 else 1802 { 1803 /* ps_mv and pi1_ref_idx now point to the top left locn */ 1804 ps_mv = ps_mv_base + (mvs_in_row * (1 + jump)); 1805 pi1_ref_idx = pi1_ref_idx_base + (mvs_in_row * (1 + jump)); 1806 1807 i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id( 1808 pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir); 1809 1810 if(-1 != i4_mv_pos_in_implicit_array) 1811 { 1812 COPY_MV_TO_SEARCH_NODE( 1813 ps_search_node, 1814 &ps_mv[i4_mv_pos_in_implicit_array], 1815 &pi1_ref_idx[i4_mv_pos_in_implicit_array], 1816 i1_ref_idx, 1817 shift); 1818 } 1819 else 1820 { 1821 ps_search_node->u1_is_avail = 0; 1822 ps_search_node->s_mv.i2_mvx = 0; 1823 ps_search_node->s_mv.i2_mvy = 0; 1824 ps_search_node->i1_ref_idx = i1_ref_idx; 1825 } 1826 } 1827 } 1828 1829 /** 1830 ******************************************************************************** 1831 * @fn void hme_fill_ctb_neighbour_mvs(layer_ctxt_t *ps_curr_layer, 1832 * S32 i4_blk_x, 1833 * S32 i4_blk_y, 1834 * mvgrid_t *ps_mv_grid , 1835 * S32 i1_ref_id) 1836 * 1837 * @brief The 18x18 MV grid for a ctb, is filled in first row and 1st col 1838 * this corresponds to neighbours (TL, T, TR, L, BL) 1839 * 1840 * @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer 1841 * 1842 * @param[in] blk_x : x coordinate of the block in mv bank 1843 * 1844 * @param[in] blk_y : y coordinate of the block in mv bank 1845 * 1846 * @param[in] ps_mv_grid : Grid (18x18 mvs at 4x4 level) 1847 * 1848 * @param[in] i1_ref_idx : Corresponds to ref idx from which to pick up mv 1849 * results, useful if multiple ref idx candts maintained separately. 1850 * 1851 * @return void 1852 ******************************************************************************** 1853 */ 1854 void hme_fill_ctb_neighbour_mvs( 1855 layer_ctxt_t *ps_curr_layer, 1856 S32 blk_x, 1857 S32 blk_y, 1858 mv_grid_t *ps_mv_grid, 1859 U08 u1_pred_dir_ctr, 1860 U08 u1_default_ref_id, 1861 S32 i4_num_act_ref_l0) 1862 { 1863 search_node_t *ps_grid_node; 1864 layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank; 1865 S32 i4_offset; 1866 hme_mv_t *ps_mv, *ps_mv_base; 1867 S08 *pi1_ref_idx, *pi1_ref_idx_base; 1868 S32 jump = 0, inc, i, mvs_in_blk, mvs_in_row; 1869 1870 if(ps_layer_mvbank->e_blk_size == BLK_4x4) 1871 { 1872 /* searching 16x16, mvs are for 4x4 */ 1873 jump = 1; 1874 blk_x <<= 2; 1875 blk_y <<= 2; 1876 } 1877 else 1878 { 1879 /* Searching 16x16, mvs are for 8x8 */ 1880 blk_x <<= 1; 1881 blk_y <<= 1; 1882 } 1883 ASSERT(ps_layer_mvbank->e_blk_size != BLK_16x16); 1884 1885 mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk; 1886 mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row; 1887 1888 /* Adjust the blk coord to point to top left locn */ 1889 blk_x -= 1; 1890 blk_y -= 1; 1891 1892 /* Pick up the mvs from the location */ 1893 i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk); 1894 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y); 1895 1896 i4_offset += (u1_pred_dir_ctr == 1); 1897 1898 ps_mv = ps_layer_mvbank->ps_mv + i4_offset; 1899 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; 1900 1901 ps_mv_base = ps_mv; 1902 pi1_ref_idx_base = pi1_ref_idx; 1903 1904 /* the 0, 0 entry of the grid pts to top left for the ctb */ 1905 ps_grid_node = &ps_mv_grid->as_node[0]; 1906 1907 /* Copy 18 mvs at 4x4 level including top left, 16 top mvs for ctb, 1 tr */ 1908 for(i = 0; i < 18; i++) 1909 { 1910 COPY_MV_TO_SEARCH_NODE(ps_grid_node, ps_mv, pi1_ref_idx, u1_default_ref_id, 0); 1911 ps_grid_node++; 1912 inc = 1; 1913 /* If blk size is 8x8, then every 2 grid nodes are updated with same mv */ 1914 if(i & 1) 1915 inc = jump; 1916 1917 ps_mv += (mvs_in_blk * inc); 1918 pi1_ref_idx += (mvs_in_blk * inc); 1919 } 1920 1921 ps_mv = ps_mv_base + mvs_in_row; 1922 pi1_ref_idx = pi1_ref_idx_base + mvs_in_row; 1923 1924 /* now copy left 16 left mvs */ 1925 ps_grid_node = &ps_mv_grid->as_node[0]; 1926 ps_grid_node += (ps_mv_grid->i4_stride); 1927 for(i = 0; i < 16; i++) 1928 { 1929 COPY_MV_TO_SEARCH_NODE(ps_grid_node, ps_mv, pi1_ref_idx, u1_default_ref_id, 0); 1930 ps_grid_node += ps_mv_grid->i4_stride; 1931 inc = 1; 1932 /* If blk size is 8x8, then every 2 grid nodes are updated with same mv */ 1933 if(!(i & 1)) 1934 inc = jump; 1935 1936 ps_mv += (mvs_in_row * inc); 1937 pi1_ref_idx += (mvs_in_row * inc); 1938 } 1939 /* last one set to invalid as bottom left not yet encoded */ 1940 ps_grid_node->u1_is_avail = 0; 1941 } 1942 1943 void hme_reset_wkg_mem(buf_mgr_t *ps_buf_mgr) 1944 { 1945 ps_buf_mgr->i4_used = 0; 1946 } 1947 void hme_init_wkg_mem(buf_mgr_t *ps_buf_mgr, U08 *pu1_mem, S32 size) 1948 { 1949 ps_buf_mgr->pu1_wkg_mem = pu1_mem; 1950 ps_buf_mgr->i4_total = size; 1951 hme_reset_wkg_mem(ps_buf_mgr); 1952 } 1953 1954 void hme_init_mv_grid(mv_grid_t *ps_mv_grid) 1955 { 1956 S32 i, j; 1957 search_node_t *ps_search_node; 1958 /*************************************************************************/ 1959 /* We have a 64x64 CTB in the worst case. For this, we have 16x16 4x4 MVs*/ 1960 /* Additionally, we have 1 neighbour on each side. This makes it a 18x18 */ 1961 /* MV Grid. The boundary of this Grid on all sides are neighbours and the*/ 1962 /* left and top edges of this grid is filled run time. The center portion*/ 1963 /* represents the actual CTB MVs (16x16) and is also filled run time. */ 1964 /* However, the availability is always set as available (init time) */ 1965 /*************************************************************************/ 1966 ps_mv_grid->i4_stride = NUM_COLUMNS_IN_CTB_GRID; 1967 ps_mv_grid->i4_start_offset = ps_mv_grid->i4_stride + CTB_MV_GRID_PAD; 1968 ps_search_node = &ps_mv_grid->as_node[ps_mv_grid->i4_start_offset]; 1969 for(i = 0; i < 16; i++) 1970 { 1971 for(j = 0; j < 16; j++) 1972 { 1973 ps_search_node[j].u1_is_avail = 1; 1974 } 1975 1976 ps_search_node += ps_mv_grid->i4_stride; 1977 } 1978 } 1979 /** 1980 ******************************************************************************** 1981 * @fn void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht) 1982 * 1983 * @brief Pads horizontally to left side. Each pixel replicated across a line 1984 * 1985 * @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated 1986 * 1987 * @param[in] stride : stride of destination buffer 1988 * 1989 * @param[in] pad_wd : Amt of horizontal padding to be done 1990 * 1991 * @param[in] pad_ht : Number of lines for which horizontal padding to be done 1992 * 1993 * @return void 1994 ******************************************************************************** 1995 */ 1996 void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht) 1997 { 1998 S32 i, j; 1999 U08 u1_val; 2000 for(i = 0; i < pad_ht; i++) 2001 { 2002 u1_val = pu1_dst[0]; 2003 for(j = -pad_wd; j < 0; j++) 2004 pu1_dst[j] = u1_val; 2005 2006 pu1_dst += stride; 2007 } 2008 } 2009 /** 2010 ******************************************************************************** 2011 * @fn void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht) 2012 * 2013 * @brief Pads horizontally to rt side. Each pixel replicated across a line 2014 * 2015 * @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated 2016 * 2017 * @param[in] stride : stride of destination buffer 2018 * 2019 * @param[in] pad_wd : Amt of horizontal padding to be done 2020 * 2021 * @param[in] pad_ht : Number of lines for which horizontal padding to be done 2022 * 2023 * @return void 2024 ******************************************************************************** 2025 */ 2026 void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht) 2027 { 2028 S32 i, j; 2029 U08 u1_val; 2030 for(i = 0; i < pad_ht; i++) 2031 { 2032 u1_val = pu1_dst[0]; 2033 for(j = 1; j <= pad_wd; j++) 2034 pu1_dst[j] = u1_val; 2035 2036 pu1_dst += stride; 2037 } 2038 } 2039 /** 2040 ******************************************************************************** 2041 * @fn void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd) 2042 * 2043 * @brief Pads vertically on the top. Repeats the top line for top padding 2044 * 2045 * @param[in] pu1_dst : destination pointer. Points to the line to be repeated 2046 * 2047 * @param[in] stride : stride of destination buffer 2048 * 2049 * @param[in] pad_ht : Amt of vertical padding to be done 2050 * 2051 * @param[in] pad_wd : Number of columns for which vertical padding to be done 2052 * 2053 * @return void 2054 ******************************************************************************** 2055 */ 2056 void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd) 2057 { 2058 S32 i; 2059 for(i = 1; i <= pad_ht; i++) 2060 memcpy(pu1_dst - (i * stride), pu1_dst, pad_wd); 2061 } 2062 /** 2063 ******************************************************************************** 2064 * @fn void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd) 2065 * 2066 * @brief Pads vertically on the bot. Repeats the top line for top padding 2067 * 2068 * @param[in] pu1_dst : destination pointer. Points to the line to be repeated 2069 * 2070 * @param[in] stride : stride of destination buffer 2071 * 2072 * @param[in] pad_ht : Amt of vertical padding to be done 2073 * 2074 * @param[in] pad_wd : Number of columns for which vertical padding to be done 2075 * 2076 * @return void 2077 ******************************************************************************** 2078 */ 2079 void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd) 2080 { 2081 S32 i; 2082 for(i = 1; i <= pad_ht; i++) 2083 memcpy(pu1_dst + (i * stride), pu1_dst, pad_wd); 2084 } 2085 2086 /** 2087 ******************************************************************************** 2088 * @fn void hme_get_wt_inp(layer_ctxt_t *ps_curr_layer, S32 pos_x, 2089 * S32 pos_y, S32 size) 2090 * 2091 * @brief Does weighting of the input in case the search needs to happen 2092 * with reference frames weighted 2093 * 2094 * @param[in] ps_curr_layer: layer ctxt 2095 * 2096 * @param[in] pos_x : x coordinate of the input blk in the picture 2097 * 2098 * @param[in] pos_y : y coordinate of hte input blk in the picture 2099 * 2100 * @param[in] size : size of the input block 2101 * 2102 * @param[in] num_ref : Number of reference frames 2103 * 2104 * @return void 2105 ******************************************************************************** 2106 */ 2107 void hme_get_wt_inp( 2108 layer_ctxt_t *ps_curr_layer, 2109 wgt_pred_ctxt_t *ps_wt_inp_prms, 2110 S32 dst_stride, 2111 S32 pos_x, 2112 S32 pos_y, 2113 S32 size, 2114 S32 num_ref, 2115 U08 u1_is_wt_pred_on) 2116 { 2117 S32 ref, i, j; 2118 U08 *pu1_src, *pu1_dst, *pu1_src_tmp; 2119 S32 log_wdc = ps_wt_inp_prms->wpred_log_wdc; 2120 S32 x_count, y_count; 2121 2122 /* Fixed source */ 2123 pu1_src = ps_curr_layer->pu1_inp; 2124 2125 /* Make sure the start positions of block are inside frame limits */ 2126 pos_x = MIN(pos_x, ps_curr_layer->i4_wd - 1); 2127 pos_y = MIN(pos_y, ps_curr_layer->i4_ht - 1); 2128 2129 pu1_src += (pos_x + (pos_y * ps_curr_layer->i4_inp_stride)); 2130 2131 /* In case we handle imcomplete CTBs, we copy only as much as reqd */ 2132 /* from input buffers to prevent out of bound accesses. In this */ 2133 /* case, we do padding in x or y or both dirns */ 2134 x_count = MIN(size, (ps_curr_layer->i4_wd - pos_x)); 2135 y_count = MIN(size, (ps_curr_layer->i4_ht - pos_y)); 2136 2137 for(i = 0; i < num_ref + 1; i++) 2138 { 2139 ps_wt_inp_prms->apu1_wt_inp[i] = ps_wt_inp_prms->apu1_wt_inp_buf_array[num_ref]; 2140 } 2141 2142 /* Run thro all ref ids */ 2143 for(ref = 0; ref < num_ref + 1; ref++) 2144 { 2145 S32 wt, off; 2146 S32 inv_wt; 2147 2148 pu1_src_tmp = pu1_src; 2149 2150 /* Each ref id may have differnet wt/offset. */ 2151 /* So we have unique inp buf for each ref id */ 2152 pu1_dst = ps_wt_inp_prms->apu1_wt_inp[ref]; 2153 2154 if(ref == num_ref) 2155 { 2156 /* last ref will be non weighted input */ 2157 for(i = 0; i < y_count; i++) 2158 { 2159 for(j = 0; j < x_count; j++) 2160 { 2161 pu1_dst[j] = pu1_src_tmp[j]; 2162 } 2163 pu1_src_tmp += ps_curr_layer->i4_inp_stride; 2164 pu1_dst += dst_stride; 2165 } 2166 } 2167 else 2168 { 2169 /* Wt and off specific to this ref id */ 2170 wt = ps_wt_inp_prms->a_wpred_wt[ref]; 2171 inv_wt = ps_wt_inp_prms->a_inv_wpred_wt[ref]; 2172 off = ps_wt_inp_prms->a_wpred_off[ref]; 2173 2174 /* Generate size*size worth of modified input samples */ 2175 for(i = 0; i < y_count; i++) 2176 { 2177 for(j = 0; j < x_count; j++) 2178 { 2179 S32 tmp; 2180 2181 /* Since we scale input, we use inverse transform of wt pred */ 2182 //tmp = HME_INV_WT_PRED(pu1_src_tmp[j], wt, off, log_wdc); 2183 tmp = HME_INV_WT_PRED1(pu1_src_tmp[j], inv_wt, off, log_wdc); 2184 pu1_dst[j] = (U08)(HME_CLIP(tmp, 0, 255)); 2185 } 2186 pu1_src_tmp += ps_curr_layer->i4_inp_stride; 2187 pu1_dst += dst_stride; 2188 } 2189 } 2190 2191 /* Check and do padding in right direction if need be */ 2192 pu1_dst = ps_wt_inp_prms->apu1_wt_inp[ref]; 2193 if(x_count != size) 2194 { 2195 hme_pad_right(pu1_dst + x_count - 1, dst_stride, size - x_count, y_count); 2196 } 2197 2198 /* Check and do padding in bottom directino if need be */ 2199 if(y_count != size) 2200 { 2201 hme_pad_bot(pu1_dst + (y_count - 1) * dst_stride, dst_stride, size - y_count, size); 2202 } 2203 } 2204 } 2205 /** 2206 **************************************************************************************** 2207 * @fn hme_pick_best_pu_cand(pu_result_t *ps_pu_results_dst, 2208 * pu_result_t *ps_pu_results_inp, 2209 * UWORD8 u1_num_results_per_part, 2210 * UWORD8 u1_num_best_cand) 2211 * 2212 * @brief Does the candidate evaluation across all the current candidates and returns 2213 * the best two or one candidates across given lists 2214 * 2215 * @param[in] - ps_pu_results_inp : Pointer to the input candidates 2216 * - u1_num_results_per_part: Number of available candidates 2217 * 2218 * @param[out] - ps_pu_results_dst : Pointer to best PU results 2219 * 2220 **************************************************************************************** 2221 */ 2222 void hme_pick_best_pu_cand( 2223 pu_result_t *ps_pu_results_dst, 2224 pu_result_t *ps_pu_results_list0, 2225 pu_result_t *ps_pu_results_list1, 2226 UWORD8 u1_num_results_per_part_l0, 2227 UWORD8 u1_num_results_per_part_l1, 2228 UWORD8 u1_candidate_rank) 2229 { 2230 struct cand_pos_data 2231 { 2232 U08 u1_cand_list_id; 2233 2234 U08 u1_cand_id_in_cand_list; 2235 } as_cand_pos_data[MAX_NUM_RESULTS_PER_PART_LIST << 1]; 2236 2237 S32 ai4_costs[MAX_NUM_RESULTS_PER_PART_LIST << 1]; 2238 U08 i, j; 2239 2240 for(i = 0; i < u1_num_results_per_part_l0; i++) 2241 { 2242 ai4_costs[i] = ps_pu_results_list0[i].i4_tot_cost; 2243 as_cand_pos_data[i].u1_cand_id_in_cand_list = i; 2244 as_cand_pos_data[i].u1_cand_list_id = 0; 2245 } 2246 2247 for(i = 0, j = u1_num_results_per_part_l0; i < u1_num_results_per_part_l1; i++, j++) 2248 { 2249 ai4_costs[j] = ps_pu_results_list1[i].i4_tot_cost; 2250 as_cand_pos_data[j].u1_cand_id_in_cand_list = i; 2251 as_cand_pos_data[j].u1_cand_list_id = 1; 2252 } 2253 2254 SORT_PRIMARY_INTTYPE_ARRAY_AND_REORDER_GENERIC_COMPANION_ARRAY( 2255 ai4_costs, 2256 as_cand_pos_data, 2257 u1_num_results_per_part_l0 + u1_num_results_per_part_l1, 2258 struct cand_pos_data); 2259 2260 if(as_cand_pos_data[u1_candidate_rank].u1_cand_list_id) 2261 { 2262 ps_pu_results_dst[0] = 2263 ps_pu_results_list1[as_cand_pos_data[u1_candidate_rank].u1_cand_id_in_cand_list]; 2264 } 2265 else 2266 { 2267 ps_pu_results_dst[0] = 2268 ps_pu_results_list0[as_cand_pos_data[u1_candidate_rank].u1_cand_id_in_cand_list]; 2269 } 2270 } 2271 2272 /* Returns the number of candidates */ 2273 static S32 hme_tu_recur_cand_harvester( 2274 part_type_results_t *ps_cand_container, 2275 inter_pu_results_t *ps_pu_data, 2276 inter_ctb_prms_t *ps_inter_ctb_prms, 2277 S32 i4_part_mask) 2278 { 2279 part_type_results_t s_cand_data; 2280 2281 U08 i, j; 2282 PART_ID_T e_part_id; 2283 2284 S32 i4_num_cands = 0; 2285 2286 /* 2Nx2N part_type decision part */ 2287 if(i4_part_mask & ENABLE_2Nx2N) 2288 { 2289 U08 u1_num_candt_to_pick; 2290 2291 e_part_id = ge_part_type_to_part_id[PRT_2Nx2N][0]; 2292 2293 ASSERT(ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands >= 1); 2294 2295 if(!ps_inter_ctb_prms->i4_bidir_enabled || (i4_part_mask == ENABLE_2Nx2N)) 2296 { 2297 u1_num_candt_to_pick = 2298 MIN(ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands, 2299 ps_pu_data->u1_num_results_per_part_l0[e_part_id] + 2300 ps_pu_data->u1_num_results_per_part_l1[e_part_id]); 2301 } 2302 else 2303 { 2304 u1_num_candt_to_pick = 2305 MIN(1, 2306 ps_pu_data->u1_num_results_per_part_l0[e_part_id] + 2307 ps_pu_data->u1_num_results_per_part_l1[e_part_id]); 2308 } 2309 2310 if(ME_XTREME_SPEED_25 == ps_inter_ctb_prms->i1_quality_preset) 2311 { 2312 u1_num_candt_to_pick = MIN(u1_num_candt_to_pick, MAX_NUM_TU_RECUR_CANDS_IN_XS25); 2313 } 2314 2315 for(i = 0; i < u1_num_candt_to_pick; i++) 2316 { 2317 /* Picks the best two candidates of all the available ones */ 2318 hme_pick_best_pu_cand( 2319 ps_cand_container[i4_num_cands].as_pu_results, 2320 ps_pu_data->aps_pu_results[0][e_part_id], 2321 ps_pu_data->aps_pu_results[1][e_part_id], 2322 ps_pu_data->u1_num_results_per_part_l0[e_part_id], 2323 ps_pu_data->u1_num_results_per_part_l1[e_part_id], 2324 i); 2325 2326 /* Update the other params part_type and total_cost in part_type_results */ 2327 ps_cand_container[i4_num_cands].u1_part_type = e_part_id; 2328 ps_cand_container[i4_num_cands].i4_tot_cost = 2329 ps_cand_container[i4_num_cands].as_pu_results->i4_tot_cost; 2330 2331 i4_num_cands++; 2332 } 2333 } 2334 2335 /* SMP */ 2336 { 2337 S32 i4_total_cost; 2338 2339 S32 num_part_types = PRT_Nx2N - PRT_2NxN + 1; 2340 S32 start_part_type = PRT_2NxN; 2341 S32 best_cost = MAX_32BIT_VAL; 2342 S32 part_type_cnt = 0; 2343 2344 for(j = 0; j < num_part_types; j++) 2345 { 2346 if(!(i4_part_mask & gai4_part_type_to_part_mask[j + start_part_type])) 2347 { 2348 continue; 2349 } 2350 2351 for(i = 0; i < gau1_num_parts_in_part_type[j + start_part_type]; i++) 2352 { 2353 e_part_id = ge_part_type_to_part_id[j + start_part_type][i]; 2354 2355 /* Pick the best candidate for the partition acroos lists */ 2356 hme_pick_best_pu_cand( 2357 &s_cand_data.as_pu_results[i], 2358 ps_pu_data->aps_pu_results[0][e_part_id], 2359 ps_pu_data->aps_pu_results[1][e_part_id], 2360 ps_pu_data->u1_num_results_per_part_l0[e_part_id], 2361 ps_pu_data->u1_num_results_per_part_l1[e_part_id], 2362 0); 2363 } 2364 2365 i4_total_cost = 2366 s_cand_data.as_pu_results[0].i4_tot_cost + s_cand_data.as_pu_results[1].i4_tot_cost; 2367 2368 if(i4_total_cost < best_cost) 2369 { 2370 /* Stores the index of the best part_type in the sub-catoegory */ 2371 best_cost = i4_total_cost; 2372 2373 ps_cand_container[i4_num_cands] = s_cand_data; 2374 2375 ps_cand_container[i4_num_cands].u1_part_type = j + start_part_type; 2376 ps_cand_container[i4_num_cands].i4_tot_cost = i4_total_cost; 2377 } 2378 2379 part_type_cnt++; 2380 } 2381 2382 i4_num_cands = (part_type_cnt) ? (i4_num_cands + 1) : i4_num_cands; 2383 } 2384 2385 /* AMP */ 2386 { 2387 S32 i4_total_cost; 2388 2389 S32 num_part_types = PRT_nRx2N - PRT_2NxnU + 1; 2390 S32 start_part_type = PRT_2NxnU; 2391 S32 best_cost = MAX_32BIT_VAL; 2392 S32 part_type_cnt = 0; 2393 2394 for(j = 0; j < num_part_types; j++) 2395 { 2396 if(!(i4_part_mask & gai4_part_type_to_part_mask[j + start_part_type])) 2397 { 2398 continue; 2399 } 2400 2401 for(i = 0; i < gau1_num_parts_in_part_type[j + start_part_type]; i++) 2402 { 2403 e_part_id = ge_part_type_to_part_id[j + start_part_type][i]; 2404 2405 /* Pick the best candidate for the partition acroos lists */ 2406 hme_pick_best_pu_cand( 2407 &s_cand_data.as_pu_results[i], 2408 ps_pu_data->aps_pu_results[0][e_part_id], 2409 ps_pu_data->aps_pu_results[1][e_part_id], 2410 ps_pu_data->u1_num_results_per_part_l0[e_part_id], 2411 ps_pu_data->u1_num_results_per_part_l1[e_part_id], 2412 0); 2413 } 2414 2415 i4_total_cost = 2416 s_cand_data.as_pu_results[0].i4_tot_cost + s_cand_data.as_pu_results[1].i4_tot_cost; 2417 2418 if(i4_total_cost < best_cost) 2419 { 2420 /* Stores the index of the best part_type in the sub-catoegory */ 2421 best_cost = i4_total_cost; 2422 2423 ps_cand_container[i4_num_cands] = s_cand_data; 2424 2425 ps_cand_container[i4_num_cands].u1_part_type = j + start_part_type; 2426 ps_cand_container[i4_num_cands].i4_tot_cost = i4_total_cost; 2427 } 2428 2429 part_type_cnt++; 2430 } 2431 2432 i4_num_cands = (part_type_cnt) ? (i4_num_cands + 1) : i4_num_cands; 2433 } 2434 2435 return i4_num_cands; 2436 } 2437 2438 /** 2439 ***************************************************************************** 2440 * @fn hme_decide_part_types(search_results_t *ps_search_results) 2441 * 2442 * @brief Does uni/bi evaluation accross various partition types, 2443 * decides best inter partition types for the CU, compares 2444 * intra cost and decides the best K results for the CU 2445 * 2446 * This is called post subpel refinmenent for 16x16s, 8x8s and 2447 * for post merge evaluation for 32x32,64x64 CUs 2448 * 2449 * @param[in,out] ps_search_results : Search results data structure 2450 * - In : 2 lists of upto 2mvs & refids, active partition mask 2451 * - Out: Best results for final rdo evaluation of the cu 2452 * 2453 * @param[in] ps_subpel_prms : Sub pel params data structure 2454 * 2455 * 2456 * @par Description 2457 * -------------------------------------------------------------------------------- 2458 * Flow: 2459 * for each category (SMP,AMP,2Nx2N based on part mask) 2460 * { 2461 * for each part_type 2462 * { 2463 * for each part 2464 * pick best candidate from each list 2465 * combine uni part type 2466 * update best results for part type 2467 * } 2468 * pick the best part type for given category (for SMP & AMP) 2469 * } 2470 * || 2471 * || 2472 * \/ 2473 * Bi-Pred evaluation: 2474 * for upto 4 best part types 2475 * { 2476 * for each part 2477 * { 2478 * compute fixed size had for all uni and remember coeffs 2479 * compute bisatd 2480 * uni vs bi and gives upto two results 2481 * also gives the pt level pred buffer 2482 * } 2483 * } 2484 * || 2485 * || 2486 * \/ 2487 * select X candidates for tu recursion as per the Note below 2488 * tu_rec_on_part_type (reuse transform coeffs) 2489 * || 2490 * || 2491 * \/ 2492 * insert intra nodes at appropriate result id 2493 * || 2494 * || 2495 * \/ 2496 * populate y best resuls for rdo based on preset 2497 * 2498 * Note : 2499 * number of TU rec for P pics : 2 2nx2n + 1 smp + 1 amp for ms or 9 for hq 2500 * number of TU rec for B pics : 1 2nx2n + 1 smp + 1 amp for ms or 2 uni 2nx2n + 1 smp + 1 amp for ms or 9 for hq 2501 * -------------------------------------------------------------------------------- 2502 * 2503 * @return None 2504 ******************************************************************************** 2505 */ 2506 void hme_decide_part_types( 2507 inter_cu_results_t *ps_cu_results, 2508 inter_pu_results_t *ps_pu_results, 2509 inter_ctb_prms_t *ps_inter_ctb_prms, 2510 me_frm_ctxt_t *ps_ctxt, 2511 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list, 2512 ihevce_me_optimised_function_list_t *ps_me_optimised_function_list 2513 2514 ) 2515 { 2516 S32 i, j; 2517 S32 i4_part_mask; 2518 ULWORD64 au8_pred_sigmaXSquare[NUM_BEST_ME_OUTPUTS][NUM_INTER_PU_PARTS]; 2519 ULWORD64 au8_pred_sigmaX[NUM_BEST_ME_OUTPUTS][NUM_INTER_PU_PARTS]; 2520 S32 i4_noise_term; 2521 WORD32 e_part_id; 2522 2523 PF_SAD_FXN_TU_REC apf_err_compute[4]; 2524 2525 part_type_results_t as_part_type_results[NUM_BEST_ME_OUTPUTS]; 2526 part_type_results_t *ps_part_type_results; 2527 2528 S32 num_best_cand = 0; 2529 const S32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT; 2530 2531 i4_part_mask = ps_cu_results->i4_part_mask; 2532 2533 num_best_cand = hme_tu_recur_cand_harvester( 2534 as_part_type_results, ps_pu_results, ps_inter_ctb_prms, i4_part_mask); 2535 2536 /* Partition ID for the current PU */ 2537 e_part_id = (UWORD8)ge_part_type_to_part_id[PRT_2Nx2N][0]; 2538 2539 ps_part_type_results = as_part_type_results; 2540 for(i = 0; i < num_best_cand; i++) 2541 { 2542 hme_compute_pred_and_evaluate_bi( 2543 ps_cu_results, 2544 ps_pu_results, 2545 ps_inter_ctb_prms, 2546 &(ps_part_type_results[i]), 2547 au8_pred_sigmaXSquare[i], 2548 au8_pred_sigmaX[i], 2549 ps_cmn_utils_optimised_function_list, 2550 ps_me_optimised_function_list 2551 2552 ); 2553 } 2554 /* Perform TU_REC on the best candidates selected */ 2555 { 2556 WORD32 i4_sad_grid; 2557 WORD32 ai4_tu_split_flag[4]; 2558 WORD32 ai4_tu_early_cbf[4]; 2559 2560 WORD32 best_cost[NUM_BEST_ME_OUTPUTS]; 2561 WORD32 ai4_final_idx[NUM_BEST_ME_OUTPUTS]; 2562 WORD16 i2_wght; 2563 WORD32 i4_satd; 2564 2565 err_prms_t s_err_prms; 2566 err_prms_t *ps_err_prms = &s_err_prms; 2567 2568 /* Default cost and final idx initialization */ 2569 for(i = 0; i < num_best_cand; i++) 2570 { 2571 best_cost[i] = MAX_32BIT_VAL; 2572 ai4_final_idx[i] = -1; 2573 } 2574 2575 /* Assign the stad function to the err_compute function pointer : 2576 Implemented only for 32x32 and 64x64, hence 16x16 and 8x8 are kept NULL */ 2577 apf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec; 2578 apf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec; 2579 apf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec; 2580 apf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec; 2581 2582 ps_err_prms->pi4_sad_grid = &i4_sad_grid; 2583 ps_err_prms->pi4_tu_split_flags = ai4_tu_split_flag; 2584 ps_err_prms->u1_max_tr_depth = ps_inter_ctb_prms->u1_max_tr_depth; 2585 ps_err_prms->pi4_tu_early_cbf = ai4_tu_early_cbf; 2586 ps_err_prms->i4_grid_mask = 1; 2587 ps_err_prms->pu1_wkg_mem = ps_inter_ctb_prms->pu1_wkg_mem; 2588 ps_err_prms->u1_max_tr_size = 32; 2589 2590 if(ps_inter_ctb_prms->u1_is_cu_noisy) 2591 { 2592 ps_err_prms->u1_max_tr_size = MAX_TU_SIZE_WHEN_NOISY; 2593 } 2594 2595 /* TU_REC for the best candidates, as mentioned in NOTE above (except candidates that 2596 are disabled by Part_mask */ 2597 for(i = 0; i < num_best_cand; i++) 2598 { 2599 part_type_results_t *ps_best_results; 2600 pu_result_t *ps_pu_result; 2601 WORD32 part_type_cost; 2602 WORD32 cand_idx; 2603 2604 WORD32 pred_dir; 2605 S32 i4_inp_off; 2606 2607 S32 lambda; 2608 U08 lambda_qshift; 2609 U08 *apu1_inp[MAX_NUM_INTER_PARTS]; 2610 S16 ai2_wt[MAX_NUM_INTER_PARTS]; 2611 S32 ai4_inv_wt[MAX_NUM_INTER_PARTS]; 2612 S32 ai4_inv_wt_shift_val[MAX_NUM_INTER_PARTS]; 2613 2614 WORD32 part_type = ps_part_type_results[i].u1_part_type; 2615 WORD32 e_cu_size = ps_cu_results->u1_cu_size; 2616 WORD32 e_blk_size = ge_cu_size_to_blk_size[e_cu_size]; 2617 U08 u1_num_parts = gau1_num_parts_in_part_type[part_type]; 2618 U08 u1_inp_buf_idx = UCHAR_MAX; 2619 2620 ps_err_prms->i4_part_mask = i4_part_mask; 2621 ps_err_prms->i4_blk_wd = gau1_blk_size_to_wd[e_blk_size]; 2622 ps_err_prms->i4_blk_ht = gau1_blk_size_to_ht[e_blk_size]; 2623 ps_err_prms->pu1_ref = ps_part_type_results[i].pu1_pred; 2624 ps_err_prms->i4_ref_stride = ps_part_type_results[i].i4_pred_stride; 2625 2626 /* Current offset for the present part type */ 2627 i4_inp_off = ps_cu_results->i4_inp_offset; 2628 2629 ps_best_results = &(ps_part_type_results[i]); 2630 2631 part_type_cost = 0; 2632 lambda = ps_inter_ctb_prms->i4_lamda; 2633 lambda_qshift = ps_inter_ctb_prms->u1_lamda_qshift; 2634 2635 for(j = 0; j < u1_num_parts; j++) 2636 { 2637 ps_pu_result = &(ps_best_results->as_pu_results[j]); 2638 2639 pred_dir = ps_pu_result->pu.b2_pred_mode; 2640 2641 if(PRED_L0 == pred_dir) 2642 { 2643 apu1_inp[j] = 2644 ps_inter_ctb_prms->apu1_wt_inp[PRED_L0][ps_pu_result->pu.mv.i1_l0_ref_idx] + 2645 i4_inp_off; 2646 ai2_wt[j] = 2647 ps_inter_ctb_prms->pps_rec_list_l0[ps_pu_result->pu.mv.i1_l0_ref_idx] 2648 ->s_weight_offset.i2_luma_weight; 2649 ai4_inv_wt[j] = 2650 ps_inter_ctb_prms->pi4_inv_wt 2651 [ps_inter_ctb_prms->pi1_past_list[ps_pu_result->pu.mv.i1_l0_ref_idx]]; 2652 ai4_inv_wt_shift_val[j] = 2653 ps_inter_ctb_prms->pi4_inv_wt_shift_val 2654 [ps_inter_ctb_prms->pi1_past_list[ps_pu_result->pu.mv.i1_l0_ref_idx]]; 2655 } 2656 else if(PRED_L1 == pred_dir) 2657 { 2658 apu1_inp[j] = 2659 ps_inter_ctb_prms->apu1_wt_inp[PRED_L1][ps_pu_result->pu.mv.i1_l1_ref_idx] + 2660 i4_inp_off; 2661 ai2_wt[j] = 2662 ps_inter_ctb_prms->pps_rec_list_l1[ps_pu_result->pu.mv.i1_l1_ref_idx] 2663 ->s_weight_offset.i2_luma_weight; 2664 ai4_inv_wt[j] = 2665 ps_inter_ctb_prms->pi4_inv_wt 2666 [ps_inter_ctb_prms->pi1_future_list[ps_pu_result->pu.mv.i1_l1_ref_idx]]; 2667 ai4_inv_wt_shift_val[j] = 2668 ps_inter_ctb_prms->pi4_inv_wt_shift_val 2669 [ps_inter_ctb_prms->pi1_future_list[ps_pu_result->pu.mv.i1_l1_ref_idx]]; 2670 } 2671 else if(PRED_BI == pred_dir) 2672 { 2673 apu1_inp[j] = ps_inter_ctb_prms->pu1_non_wt_inp + i4_inp_off; 2674 ai2_wt[j] = 1 << ps_inter_ctb_prms->wpred_log_wdc; 2675 ai4_inv_wt[j] = i4_default_src_wt; 2676 ai4_inv_wt_shift_val[j] = 0; 2677 } 2678 else 2679 { 2680 ASSERT(0); 2681 } 2682 2683 part_type_cost += ps_pu_result->i4_mv_cost; 2684 } 2685 2686 if((u1_num_parts == 1) || (ai2_wt[0] == ai2_wt[1])) 2687 { 2688 ps_err_prms->pu1_inp = apu1_inp[0]; 2689 ps_err_prms->i4_inp_stride = ps_inter_ctb_prms->i4_inp_stride; 2690 i2_wght = ai2_wt[0]; 2691 } 2692 else 2693 { 2694 if(1 != ihevce_get_free_pred_buf_indices( 2695 &u1_inp_buf_idx, 2696 &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator, 2697 1)) 2698 { 2699 ASSERT(0); 2700 } 2701 else 2702 { 2703 U08 *pu1_dst = 2704 ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[u1_inp_buf_idx]; 2705 U08 *pu1_src = apu1_inp[0]; 2706 U08 u1_pu1_wd = (ps_part_type_results[i].as_pu_results[0].pu.b4_wd + 1) << 2; 2707 U08 u1_pu1_ht = (ps_part_type_results[i].as_pu_results[0].pu.b4_ht + 1) << 2; 2708 U08 u1_pu2_wd = (ps_part_type_results[i].as_pu_results[1].pu.b4_wd + 1) << 2; 2709 U08 u1_pu2_ht = (ps_part_type_results[i].as_pu_results[1].pu.b4_ht + 1) << 2; 2710 2711 ps_cmn_utils_optimised_function_list->pf_copy_2d( 2712 pu1_dst, 2713 MAX_CU_SIZE, 2714 pu1_src, 2715 ps_inter_ctb_prms->i4_inp_stride, 2716 u1_pu1_wd, 2717 u1_pu1_ht); 2718 2719 pu1_dst += 2720 (gai1_is_part_vertical[ge_part_type_to_part_id[part_type][0]] 2721 ? u1_pu1_ht * MAX_CU_SIZE 2722 : u1_pu1_wd); 2723 pu1_src = 2724 apu1_inp[1] + (gai1_is_part_vertical[ge_part_type_to_part_id[part_type][0]] 2725 ? u1_pu1_ht * ps_inter_ctb_prms->i4_inp_stride 2726 : u1_pu1_wd); 2727 2728 ps_cmn_utils_optimised_function_list->pf_copy_2d( 2729 pu1_dst, 2730 MAX_CU_SIZE, 2731 pu1_src, 2732 ps_inter_ctb_prms->i4_inp_stride, 2733 u1_pu2_wd, 2734 u1_pu2_ht); 2735 2736 ps_err_prms->pu1_inp = 2737 ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[u1_inp_buf_idx]; 2738 ps_err_prms->i4_inp_stride = MAX_CU_SIZE; 2739 i2_wght = ai2_wt[1]; 2740 } 2741 } 2742 2743 #if !DISABLE_TU_RECURSION 2744 i4_satd = apf_err_compute[e_cu_size]( 2745 ps_err_prms, 2746 lambda, 2747 lambda_qshift, 2748 ps_inter_ctb_prms->i4_qstep_ls8, 2749 ps_ctxt->ps_func_selector); 2750 #else 2751 ps_err_prms->pi4_sad_grid = &i4_satd; 2752 2753 pf_err_compute(ps_err_prms); 2754 2755 if((part_type == PRT_2Nx2N) || (e_cu_size != CU_64x64)) 2756 { 2757 ai4_tu_split_flag[0] = 1; 2758 ai4_tu_split_flag[1] = 1; 2759 ai4_tu_split_flag[2] = 1; 2760 ai4_tu_split_flag[3] = 1; 2761 2762 ps_err_prms->i4_tu_split_cost = 0; 2763 } 2764 else 2765 { 2766 ai4_tu_split_flag[0] = 1; 2767 ai4_tu_split_flag[1] = 1; 2768 ai4_tu_split_flag[2] = 1; 2769 ai4_tu_split_flag[3] = 1; 2770 2771 ps_err_prms->i4_tu_split_cost = 0; 2772 } 2773 #endif 2774 2775 #if UNI_SATD_SCALE 2776 i4_satd = (i4_satd * i2_wght) >> ps_inter_ctb_prms->wpred_log_wdc; 2777 #endif 2778 2779 if(ps_inter_ctb_prms->u1_is_cu_noisy && ps_inter_ctb_prms->i4_alpha_stim_multiplier) 2780 { 2781 ULWORD64 u8_temp_var, u8_temp_var1, u8_pred_sigmaSquaredX; 2782 ULWORD64 u8_src_variance, u8_pred_variance; 2783 unsigned long u4_shift_val; 2784 S32 i4_bits_req; 2785 S32 i4_q_level = STIM_Q_FORMAT + ALPHA_Q_FORMAT; 2786 2787 if(1 == u1_num_parts) 2788 { 2789 u8_pred_sigmaSquaredX = au8_pred_sigmaX[i][0] * au8_pred_sigmaX[i][0]; 2790 u8_pred_variance = au8_pred_sigmaXSquare[i][0] - u8_pred_sigmaSquaredX; 2791 2792 if(e_cu_size == CU_8x8) 2793 { 2794 PART_ID_T e_part_id = (PART_ID_T)( 2795 (PART_ID_NxN_TL) + (ps_cu_results->u1_x_off & 1) + 2796 ((ps_cu_results->u1_y_off & 1) << 1)); 2797 2798 u4_shift_val = ihevce_calc_stim_injected_variance( 2799 ps_inter_ctb_prms->pu8_part_src_sigmaX, 2800 ps_inter_ctb_prms->pu8_part_src_sigmaXSquared, 2801 &u8_src_variance, 2802 ai4_inv_wt[0], 2803 ai4_inv_wt_shift_val[0], 2804 ps_inter_ctb_prms->wpred_log_wdc, 2805 e_part_id); 2806 } 2807 else 2808 { 2809 u4_shift_val = ihevce_calc_stim_injected_variance( 2810 ps_inter_ctb_prms->pu8_part_src_sigmaX, 2811 ps_inter_ctb_prms->pu8_part_src_sigmaXSquared, 2812 &u8_src_variance, 2813 ai4_inv_wt[0], 2814 ai4_inv_wt_shift_val[0], 2815 ps_inter_ctb_prms->wpred_log_wdc, 2816 e_part_id); 2817 } 2818 2819 u8_pred_variance = u8_pred_variance >> u4_shift_val; 2820 2821 GETRANGE64(i4_bits_req, u8_pred_variance); 2822 2823 if(i4_bits_req > 27) 2824 { 2825 u8_pred_variance = u8_pred_variance >> (i4_bits_req - 27); 2826 u8_src_variance = u8_src_variance >> (i4_bits_req - 27); 2827 } 2828 2829 if(u8_src_variance == u8_pred_variance) 2830 { 2831 u8_temp_var = (1 << STIM_Q_FORMAT); 2832 } 2833 else 2834 { 2835 u8_temp_var = (2 * u8_src_variance * u8_pred_variance); 2836 u8_temp_var = (u8_temp_var * (1 << STIM_Q_FORMAT)); 2837 u8_temp_var1 = (u8_src_variance * u8_src_variance) + 2838 (u8_pred_variance * u8_pred_variance); 2839 u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2)); 2840 u8_temp_var = (u8_temp_var / u8_temp_var1); 2841 } 2842 2843 i4_noise_term = (UWORD32)u8_temp_var; 2844 2845 ASSERT(i4_noise_term >= 0); 2846 2847 i4_noise_term *= ps_inter_ctb_prms->i4_alpha_stim_multiplier; 2848 2849 u8_temp_var = i4_satd; 2850 u8_temp_var *= ((1 << (i4_q_level)) - (i4_noise_term)); 2851 u8_temp_var += (1 << ((i4_q_level)-1)); 2852 i4_satd = (UWORD32)(u8_temp_var >> (i4_q_level)); 2853 } 2854 else /*if(e_cu_size <= CU_16x16)*/ 2855 { 2856 unsigned long temp_shift_val; 2857 PART_ID_T ae_part_id[MAX_NUM_INTER_PARTS] = { 2858 ge_part_type_to_part_id[part_type][0], ge_part_type_to_part_id[part_type][1] 2859 }; 2860 2861 u4_shift_val = ihevce_calc_variance_for_diff_weights( 2862 ps_inter_ctb_prms->pu8_part_src_sigmaX, 2863 ps_inter_ctb_prms->pu8_part_src_sigmaXSquared, 2864 &u8_src_variance, 2865 ai4_inv_wt, 2866 ai4_inv_wt_shift_val, 2867 ps_best_results->as_pu_results, 2868 ps_inter_ctb_prms->wpred_log_wdc, 2869 ae_part_id, 2870 gau1_blk_size_to_wd[e_blk_size], 2871 u1_num_parts, 2872 1); 2873 2874 temp_shift_val = u4_shift_val; 2875 2876 u4_shift_val = ihevce_calc_variance_for_diff_weights( 2877 au8_pred_sigmaX[i], 2878 au8_pred_sigmaXSquare[i], 2879 &u8_pred_variance, 2880 ai4_inv_wt, 2881 ai4_inv_wt_shift_val, 2882 ps_best_results->as_pu_results, 2883 0, 2884 ae_part_id, 2885 gau1_blk_size_to_wd[e_blk_size], 2886 u1_num_parts, 2887 0); 2888 2889 u8_pred_variance = u8_pred_variance >> temp_shift_val; 2890 2891 GETRANGE64(i4_bits_req, u8_pred_variance); 2892 2893 if(i4_bits_req > 27) 2894 { 2895 u8_pred_variance = u8_pred_variance >> (i4_bits_req - 27); 2896 u8_src_variance = u8_src_variance >> (i4_bits_req - 27); 2897 } 2898 2899 if(u8_src_variance == u8_pred_variance) 2900 { 2901 u8_temp_var = (1 << STIM_Q_FORMAT); 2902 } 2903 else 2904 { 2905 u8_temp_var = (2 * u8_src_variance * u8_pred_variance); 2906 u8_temp_var = (u8_temp_var * (1 << STIM_Q_FORMAT)); 2907 u8_temp_var1 = (u8_src_variance * u8_src_variance) + 2908 (u8_pred_variance * u8_pred_variance); 2909 u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2)); 2910 u8_temp_var = (u8_temp_var / u8_temp_var1); 2911 } 2912 2913 i4_noise_term = (UWORD32)u8_temp_var; 2914 2915 ASSERT(i4_noise_term >= 0); 2916 ASSERT(i4_noise_term <= (1 << (STIM_Q_FORMAT + ALPHA_Q_FORMAT))); 2917 2918 i4_noise_term *= ps_inter_ctb_prms->i4_alpha_stim_multiplier; 2919 2920 u8_temp_var = i4_satd; 2921 u8_temp_var *= ((1 << (i4_q_level)) - (i4_noise_term)); 2922 u8_temp_var += (1 << ((i4_q_level)-1)); 2923 i4_satd = (UWORD32)(u8_temp_var >> (i4_q_level)); 2924 2925 ASSERT(i4_satd >= 0); 2926 } 2927 } 2928 2929 if(u1_inp_buf_idx != UCHAR_MAX) 2930 { 2931 ihevce_set_pred_buf_as_free( 2932 &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator, 2933 u1_inp_buf_idx); 2934 } 2935 2936 part_type_cost += i4_satd; 2937 2938 /*Update the best results with the new results */ 2939 ps_best_results->i4_tot_cost = part_type_cost; 2940 2941 ps_best_results->i4_tu_split_cost = ps_err_prms->i4_tu_split_cost; 2942 2943 ASSERT(ai4_tu_split_flag[0] >= 0); 2944 if(e_cu_size == CU_64x64) 2945 { 2946 ps_best_results->ai4_tu_split_flag[0] = ai4_tu_split_flag[0]; 2947 ps_best_results->ai4_tu_split_flag[1] = ai4_tu_split_flag[1]; 2948 ps_best_results->ai4_tu_split_flag[2] = ai4_tu_split_flag[2]; 2949 ps_best_results->ai4_tu_split_flag[3] = ai4_tu_split_flag[3]; 2950 2951 /* Update the TU early cbf flags into the best results structure */ 2952 ps_best_results->ai4_tu_early_cbf[0] = ai4_tu_early_cbf[0]; 2953 ps_best_results->ai4_tu_early_cbf[1] = ai4_tu_early_cbf[1]; 2954 ps_best_results->ai4_tu_early_cbf[2] = ai4_tu_early_cbf[2]; 2955 ps_best_results->ai4_tu_early_cbf[3] = ai4_tu_early_cbf[3]; 2956 } 2957 else 2958 { 2959 ps_best_results->ai4_tu_split_flag[0] = ai4_tu_split_flag[0]; 2960 ps_best_results->ai4_tu_early_cbf[0] = ai4_tu_early_cbf[0]; 2961 } 2962 2963 if(part_type_cost < best_cost[num_best_cand - 1]) 2964 { 2965 /* Push and sort current part type if it is one of the num_best_cand */ 2966 for(cand_idx = 0; cand_idx < i; cand_idx++) 2967 { 2968 if(part_type_cost <= best_cost[cand_idx]) 2969 { 2970 memmove( 2971 &ai4_final_idx[cand_idx + 1], 2972 &ai4_final_idx[cand_idx], 2973 sizeof(WORD32) * (i - cand_idx)); 2974 memmove( 2975 &best_cost[cand_idx + 1], 2976 &best_cost[cand_idx], 2977 sizeof(WORD32) * (i - cand_idx)); 2978 break; 2979 } 2980 } 2981 2982 ai4_final_idx[cand_idx] = i; 2983 best_cost[cand_idx] = part_type_cost; 2984 } 2985 } 2986 2987 ps_cu_results->u1_num_best_results = num_best_cand; 2988 2989 for(i = 0; i < num_best_cand; i++) 2990 { 2991 ASSERT(ai4_final_idx[i] < num_best_cand); 2992 2993 if(ai4_final_idx[i] != -1) 2994 { 2995 memcpy( 2996 &(ps_cu_results->ps_best_results[i]), 2997 &(ps_part_type_results[ai4_final_idx[i]]), 2998 sizeof(part_type_results_t)); 2999 } 3000 } 3001 } 3002 3003 for(i = 0; i < (MAX_NUM_PRED_BUFS_USED_FOR_PARTTYPE_DECISIONS)-2; i++) 3004 { 3005 ihevce_set_pred_buf_as_free( 3006 &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator, i); 3007 } 3008 } 3009 3010 /** 3011 ************************************************************************************************** 3012 * @fn hme_populate_pus(search_results_t *ps_search_results, inter_cu_results_t *ps_cu_results) 3013 * 3014 * @brief Does the population of the inter_cu_results structure with the results after the 3015 * subpel refinement 3016 * 3017 * This is called post subpel refinmenent for 16x16s, 8x8s and 3018 * for post merge evaluation for 32x32,64x64 CUs 3019 * 3020 * @param[in,out] ps_search_results : Search results data structure 3021 * - ps_cu_results : cu_results data structure 3022 * ps_pu_result : Pointer to the memory for storing PU's 3023 * 3024 **************************************************************************************************** 3025 */ 3026 void hme_populate_pus( 3027 me_ctxt_t *ps_thrd_ctxt, 3028 me_frm_ctxt_t *ps_ctxt, 3029 hme_subpel_prms_t *ps_subpel_prms, 3030 search_results_t *ps_search_results, 3031 inter_cu_results_t *ps_cu_results, 3032 inter_pu_results_t *ps_pu_results, 3033 pu_result_t *ps_pu_result, 3034 inter_ctb_prms_t *ps_inter_ctb_prms, 3035 wgt_pred_ctxt_t *ps_wt_prms, 3036 layer_ctxt_t *ps_curr_layer, 3037 U08 *pu1_pred_dir_searched, 3038 WORD32 i4_num_active_ref) 3039 { 3040 WORD32 i, j, k; 3041 WORD32 i4_part_mask; 3042 WORD32 i4_ref; 3043 UWORD8 e_part_id; 3044 pu_result_t *ps_curr_pu; 3045 search_node_t *ps_search_node; 3046 part_attr_t *ps_part_attr; 3047 UWORD8 e_cu_size = ps_search_results->e_cu_size; 3048 WORD32 num_results_per_part_l0 = 0; 3049 WORD32 num_results_per_part_l1 = 0; 3050 WORD32 i4_ref_id; 3051 WORD32 i4_total_act_ref; 3052 3053 i4_part_mask = ps_search_results->i4_part_mask; 3054 3055 /* pred_buf_mngr init */ 3056 { 3057 hme_get_wkg_mem(&ps_ctxt->s_buf_mgr, MAX_WKG_MEM_SIZE_PER_THREAD); 3058 3059 ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator = UINT_MAX; 3060 3061 for(i = 0; i < MAX_NUM_PRED_BUFS_USED_FOR_PARTTYPE_DECISIONS - 2; i++) 3062 { 3063 ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[i] = 3064 ps_ctxt->s_buf_mgr.pu1_wkg_mem + i * INTERP_OUT_BUF_SIZE; 3065 ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator &= ~(1 << i); 3066 } 3067 3068 ps_inter_ctb_prms->pu1_wkg_mem = ps_ctxt->s_buf_mgr.pu1_wkg_mem + i * INTERP_OUT_BUF_SIZE; 3069 } 3070 3071 ps_inter_ctb_prms->i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME; 3072 ps_inter_ctb_prms->u1_is_cu_noisy = ps_subpel_prms->u1_is_cu_noisy; 3073 ps_inter_ctb_prms->i4_lamda = ps_search_results->as_pred_ctxt[0].lambda; 3074 3075 /* Populate the CU level parameters */ 3076 ps_cu_results->u1_cu_size = ps_search_results->e_cu_size; 3077 ps_cu_results->u1_num_best_results = ps_search_results->u1_num_best_results; 3078 ps_cu_results->i4_part_mask = ps_search_results->i4_part_mask; 3079 ps_cu_results->u1_x_off = ps_search_results->u1_x_off; 3080 ps_cu_results->u1_y_off = ps_search_results->u1_y_off; 3081 3082 i4_total_act_ref = 3083 ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1; 3084 /*Populate the partition results 3085 Loop across all the active references that are enabled right now */ 3086 for(i = 0; i < MAX_PART_TYPES; i++) 3087 { 3088 if(!(i4_part_mask & gai4_part_type_to_part_mask[i])) 3089 { 3090 continue; 3091 } 3092 3093 for(j = 0; j < gau1_num_parts_in_part_type[i]; j++) 3094 { 3095 /* Partition ID for the current PU */ 3096 e_part_id = (UWORD8)ge_part_type_to_part_id[i][j]; 3097 ps_part_attr = &gas_part_attr_in_cu[e_part_id]; 3098 3099 num_results_per_part_l0 = 0; 3100 num_results_per_part_l1 = 0; 3101 3102 ps_pu_results->aps_pu_results[0][e_part_id] = 3103 ps_pu_result + (e_part_id * MAX_NUM_RESULTS_PER_PART_LIST); 3104 ps_pu_results->aps_pu_results[1][e_part_id] = 3105 ps_pu_result + ((e_part_id + TOT_NUM_PARTS) * MAX_NUM_RESULTS_PER_PART_LIST); 3106 3107 for(i4_ref = 0; i4_ref < i4_num_active_ref; i4_ref++) 3108 { 3109 U08 u1_pred_dir = pu1_pred_dir_searched[i4_ref]; 3110 3111 for(k = 0; k < ps_search_results->u1_num_results_per_part; k++) 3112 { 3113 ps_search_node = 3114 &ps_search_results->aps_part_results[u1_pred_dir][e_part_id][k]; 3115 3116 /* If subpel is done then the node is a valid candidate else break the loop */ 3117 if(ps_search_node->u1_subpel_done) 3118 { 3119 i4_ref_id = ps_search_node->i1_ref_idx; 3120 3121 ASSERT(i4_ref_id >= 0); 3122 3123 /* Check whether current ref_id is past or future and assign the pointers to L0 or L1 list accordingly */ 3124 if(!u1_pred_dir) 3125 { 3126 ps_curr_pu = ps_pu_results->aps_pu_results[0][e_part_id] + 3127 num_results_per_part_l0; 3128 3129 ASSERT( 3130 ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id] < 3131 ps_inter_ctb_prms->u1_num_active_ref_l0); 3132 3133 /* Always populate the ref_idx value in l0_ref_idx */ 3134 ps_curr_pu->pu.mv.i1_l0_ref_idx = 3135 ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id]; 3136 ps_curr_pu->pu.mv.s_l0_mv = ps_search_node->s_mv; 3137 ps_curr_pu->pu.mv.i1_l1_ref_idx = -1; 3138 ps_curr_pu->pu.b2_pred_mode = PRED_L0; 3139 3140 ps_inter_ctb_prms->apu1_wt_inp[0][ps_curr_pu->pu.mv.i1_l0_ref_idx] = 3141 ps_wt_prms->apu1_wt_inp[i4_ref_id]; 3142 3143 num_results_per_part_l0++; 3144 } 3145 else 3146 { 3147 ps_curr_pu = ps_pu_results->aps_pu_results[1][e_part_id] + 3148 num_results_per_part_l1; 3149 3150 ASSERT( 3151 ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id] < 3152 ps_inter_ctb_prms->u1_num_active_ref_l1); 3153 3154 /* populate the ref_idx value in l1_ref_idx */ 3155 ps_curr_pu->pu.mv.i1_l1_ref_idx = 3156 ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id]; 3157 ps_curr_pu->pu.mv.s_l1_mv = ps_search_node->s_mv; 3158 ps_curr_pu->pu.mv.i1_l0_ref_idx = -1; 3159 ps_curr_pu->pu.b2_pred_mode = PRED_L1; 3160 3161 /* Copy the values from weighted params to common_frm_aprams */ 3162 ps_inter_ctb_prms->apu1_wt_inp[1][ps_curr_pu->pu.mv.i1_l1_ref_idx] = 3163 ps_wt_prms->apu1_wt_inp[i4_ref_id]; 3164 3165 num_results_per_part_l1++; 3166 } 3167 ps_curr_pu->i4_mv_cost = ps_search_node->i4_mv_cost; 3168 ps_curr_pu->i4_sdi = ps_search_node->i4_sdi; 3169 3170 #if UNI_SATD_SCALE 3171 /*SATD is scaled by weight. Hence rescale the SATD */ 3172 ps_curr_pu->i4_tot_cost = 3173 ((ps_search_node->i4_sad * 3174 ps_ctxt->s_wt_pred.a_wpred_wt[ps_search_node->i1_ref_idx] + 3175 (1 << (ps_inter_ctb_prms->wpred_log_wdc - 1))) >> 3176 ps_inter_ctb_prms->wpred_log_wdc) + 3177 ps_search_node->i4_mv_cost; 3178 #endif 3179 3180 /* Packed format of the width and height */ 3181 ps_curr_pu->pu.b4_wd = ((ps_part_attr->u1_x_count << e_cu_size) >> 2) - 1; 3182 ps_curr_pu->pu.b4_ht = ((ps_part_attr->u1_y_count << e_cu_size) >> 2) - 1; 3183 3184 ps_curr_pu->pu.b4_pos_x = 3185 (((ps_part_attr->u1_x_start << e_cu_size) + ps_cu_results->u1_x_off) >> 3186 2); 3187 ps_curr_pu->pu.b4_pos_y = 3188 (((ps_part_attr->u1_y_start << e_cu_size) + ps_cu_results->u1_y_off) >> 3189 2); 3190 3191 ps_curr_pu->pu.b1_intra_flag = 0; 3192 3193 /* Unweighted input */ 3194 ps_inter_ctb_prms->pu1_non_wt_inp = 3195 ps_wt_prms->apu1_wt_inp[i4_total_act_ref]; 3196 3197 ps_search_node++; 3198 } 3199 else 3200 { 3201 break; 3202 } 3203 } 3204 } 3205 3206 ps_pu_results->u1_num_results_per_part_l0[e_part_id] = num_results_per_part_l0; 3207 ps_pu_results->u1_num_results_per_part_l1[e_part_id] = num_results_per_part_l1; 3208 } 3209 } 3210 } 3211 3212 /** 3213 ********************************************************************************************************* 3214 * @fn hme_populate_pus_8x8_cu(search_results_t *ps_search_results, inter_cu_results_t *ps_cu_results) 3215 * 3216 * @brief Does the population of the inter_cu_results structure with the results after the 3217 * subpel refinement 3218 * 3219 * This is called post subpel refinmenent for 16x16s, 8x8s and 3220 * for post merge evaluation for 32x32,64x64 CUs 3221 * 3222 * @param[in,out] ps_search_results : Search results data structure 3223 * - ps_cu_results : cu_results data structure 3224 * ps_pu_results : Pointer for the PU's 3225 * ps_pu_result : Pointer to the memory for storing PU's 3226 * 3227 ********************************************************************************************************* 3228 */ 3229 void hme_populate_pus_8x8_cu( 3230 me_ctxt_t *ps_thrd_ctxt, 3231 me_frm_ctxt_t *ps_ctxt, 3232 hme_subpel_prms_t *ps_subpel_prms, 3233 search_results_t *ps_search_results, 3234 inter_cu_results_t *ps_cu_results, 3235 inter_pu_results_t *ps_pu_results, 3236 pu_result_t *ps_pu_result, 3237 inter_ctb_prms_t *ps_inter_ctb_prms, 3238 U08 *pu1_pred_dir_searched, 3239 WORD32 i4_num_active_ref, 3240 U08 u1_blk_8x8_mask) 3241 { 3242 WORD32 i, k; 3243 WORD32 i4_part_mask; 3244 WORD32 i4_ref; 3245 pu_result_t *ps_curr_pu; 3246 search_node_t *ps_search_node; 3247 WORD32 i4_ref_id; 3248 WORD32 x_off, y_off; 3249 3250 /* Make part mask available as only 2Nx2N 3251 Later support for 4x8 and 8x4 needs to be added */ 3252 i4_part_mask = ENABLE_2Nx2N; 3253 3254 x_off = ps_search_results->u1_x_off; 3255 y_off = ps_search_results->u1_y_off; 3256 3257 for(i = 0; i < 4; i++) 3258 { 3259 if(u1_blk_8x8_mask & (1 << i)) 3260 { 3261 UWORD8 u1_x_pos, u1_y_pos; 3262 3263 WORD32 num_results_per_part_l0 = 0; 3264 WORD32 num_results_per_part_l1 = 0; 3265 3266 ps_cu_results->u1_cu_size = CU_8x8; 3267 ps_cu_results->u1_num_best_results = ps_search_results->u1_num_best_results; 3268 ps_cu_results->i4_part_mask = i4_part_mask; 3269 ps_cu_results->u1_x_off = x_off + (i & 1) * 8; 3270 ps_cu_results->u1_y_off = y_off + (i >> 1) * 8; 3271 ps_cu_results->i4_inp_offset = ps_cu_results->u1_x_off + (ps_cu_results->u1_y_off * 64); 3272 3273 ps_cu_results->ps_best_results[0].i4_tot_cost = MAX_32BIT_VAL; 3274 ps_cu_results->ps_best_results[0].i4_tu_split_cost = 0; 3275 3276 u1_x_pos = ps_cu_results->u1_x_off >> 2; 3277 u1_y_pos = ps_cu_results->u1_y_off >> 2; 3278 3279 if(!(ps_search_results->i4_part_mask & ENABLE_NxN)) 3280 { 3281 ps_curr_pu = &ps_cu_results->ps_best_results[0].as_pu_results[0]; 3282 3283 ps_cu_results->i4_part_mask = 0; 3284 ps_cu_results->u1_num_best_results = 0; 3285 3286 ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL; 3287 3288 ps_curr_pu->pu.b4_wd = 1; 3289 ps_curr_pu->pu.b4_ht = 1; 3290 ps_curr_pu->pu.b4_pos_x = u1_x_pos; 3291 ps_curr_pu->pu.b4_pos_y = u1_y_pos; 3292 ps_cu_results->ps_best_results[0].i4_tu_split_cost = 0; 3293 3294 ps_cu_results++; 3295 ps_pu_results++; 3296 3297 continue; 3298 } 3299 3300 ps_pu_results->aps_pu_results[0][0] = 3301 ps_pu_result + (i * MAX_NUM_RESULTS_PER_PART_LIST); 3302 ps_pu_results->aps_pu_results[1][0] = 3303 ps_pu_result + ((i + TOT_NUM_PARTS) * MAX_NUM_RESULTS_PER_PART_LIST); 3304 3305 for(i4_ref = 0; i4_ref < i4_num_active_ref; i4_ref++) 3306 { 3307 U08 u1_pred_dir = pu1_pred_dir_searched[i4_ref]; 3308 3309 /* Select the NxN partition node for the current ref_idx in the search results*/ 3310 ps_search_node = 3311 ps_search_results->aps_part_results[u1_pred_dir][PART_ID_NxN_TL + i]; 3312 3313 for(k = 0; k < ps_search_results->u1_num_results_per_part; k++) 3314 { 3315 /* If subpel is done then the node is a valid candidate else break the loop */ 3316 if((ps_search_node->u1_is_avail) || (ps_search_node->u1_subpel_done)) 3317 { 3318 i4_ref_id = ps_search_node->i1_ref_idx; 3319 3320 ASSERT(i4_ref_id >= 0); 3321 3322 if(!u1_pred_dir) 3323 { 3324 ps_curr_pu = 3325 ps_pu_results->aps_pu_results[0][0] + num_results_per_part_l0; 3326 3327 ASSERT( 3328 ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id] < 3329 ps_inter_ctb_prms->u1_num_active_ref_l0); 3330 3331 ps_curr_pu->pu.mv.i1_l0_ref_idx = 3332 ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id]; 3333 ps_curr_pu->pu.mv.s_l0_mv = ps_search_node->s_mv; 3334 ps_curr_pu->pu.mv.i1_l1_ref_idx = -1; 3335 ps_curr_pu->pu.b2_pred_mode = PRED_L0; 3336 3337 num_results_per_part_l0++; 3338 } 3339 else 3340 { 3341 ps_curr_pu = 3342 ps_pu_results->aps_pu_results[1][0] + num_results_per_part_l1; 3343 3344 ASSERT( 3345 ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id] < 3346 ps_inter_ctb_prms->u1_num_active_ref_l1); 3347 3348 ps_curr_pu->pu.mv.i1_l1_ref_idx = 3349 ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id]; 3350 ps_curr_pu->pu.mv.s_l1_mv = ps_search_node->s_mv; 3351 ps_curr_pu->pu.mv.i1_l0_ref_idx = -1; 3352 ps_curr_pu->pu.b2_pred_mode = PRED_L1; 3353 3354 num_results_per_part_l1++; 3355 } 3356 ps_curr_pu->i4_mv_cost = ps_search_node->i4_mv_cost; 3357 ps_curr_pu->i4_sdi = ps_search_node->i4_sdi; 3358 3359 #if UNI_SATD_SCALE 3360 /*SATD is scaled by weight. Hence rescale the SATD */ 3361 ps_curr_pu->i4_tot_cost = 3362 ((ps_search_node->i4_sad * 3363 ps_ctxt->s_wt_pred.a_wpred_wt[ps_search_node->i1_ref_idx] + 3364 (1 << (ps_inter_ctb_prms->wpred_log_wdc - 1))) >> 3365 ps_inter_ctb_prms->wpred_log_wdc) + 3366 ps_search_node->i4_mv_cost; 3367 #endif 3368 3369 ps_curr_pu->pu.b4_wd = 1; 3370 ps_curr_pu->pu.b4_ht = 1; 3371 ps_curr_pu->pu.b4_pos_x = u1_x_pos; 3372 ps_curr_pu->pu.b4_pos_y = u1_y_pos; 3373 ps_curr_pu->pu.b1_intra_flag = 0; 3374 3375 ps_search_node++; 3376 } 3377 else 3378 { 3379 /* if NxN was not evaluated at 16x16 level, assign max cost to 8x8 CU 3380 to remove 8x8's as possible candidates during evaluation */ 3381 3382 ps_curr_pu = ps_pu_results->aps_pu_results[0][0] + num_results_per_part_l0; 3383 3384 ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL; 3385 3386 ps_curr_pu = ps_pu_results->aps_pu_results[1][0] + num_results_per_part_l1; 3387 3388 ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL; 3389 3390 break; 3391 } 3392 } 3393 } 3394 3395 /* Update the num_results per_part across lists L0 and L1 */ 3396 ps_pu_results->u1_num_results_per_part_l0[0] = num_results_per_part_l0; 3397 ps_pu_results->u1_num_results_per_part_l1[0] = num_results_per_part_l1; 3398 } 3399 ps_cu_results++; 3400 ps_pu_results++; 3401 } 3402 } 3403 3404 /** 3405 ******************************************************************************** 3406 * @fn hme_insert_intra_nodes_post_bipred 3407 * 3408 * @brief Compares intra costs (populated by IPE) with the best inter costs 3409 * (populated after evaluating bi-pred) and updates the best results 3410 * if intra cost is better 3411 * 3412 * @param[in,out] ps_cu_results [inout] : Best results structure of CU 3413 * ps_cur_ipe_ctb [in] : intra results for the current CTB 3414 * i4_frm_qstep [in] : current frame quantizer(qscale)* 3415 * 3416 * @return None 3417 ******************************************************************************** 3418 */ 3419 void hme_insert_intra_nodes_post_bipred( 3420 inter_cu_results_t *ps_cu_results, 3421 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb, 3422 WORD32 i4_frm_qstep) 3423 { 3424 WORD32 i; 3425 WORD32 num_results; 3426 WORD32 cu_size = ps_cu_results->u1_cu_size; 3427 UWORD8 u1_x_off = ps_cu_results->u1_x_off; 3428 UWORD8 u1_y_off = ps_cu_results->u1_y_off; 3429 3430 /* Id of the 32x32 block, 16x16 block in a CTB */ 3431 WORD32 i4_32x32_id = (u1_y_off >> 5) * 2 + (u1_x_off >> 5); 3432 WORD32 i4_16x16_id = ((u1_y_off >> 4) & 0x1) * 2 + ((u1_x_off >> 4) & 0x1); 3433 3434 /* Flags to indicate if intra64/intra32/intra16 cusize are invalid as per IPE decision */ 3435 WORD32 disable_intra64 = 0; 3436 WORD32 disable_intra32 = 0; 3437 WORD32 disable_intra16 = 0; 3438 3439 S32 i4_intra_2nx2n_cost; 3440 3441 /* ME final results for this CU (post seeding of best uni/bi pred results) */ 3442 part_type_results_t *ps_best_result; 3443 3444 i4_frm_qstep *= !L0ME_IN_OPENLOOP_MODE; 3445 3446 /*If inter candidates are enabled then enter the for loop to update the intra candidate */ 3447 3448 if((ps_cu_results->u1_num_best_results == 0) && (CU_8x8 == ps_cu_results->u1_cu_size)) 3449 { 3450 ps_cu_results->u1_num_best_results = 1; 3451 } 3452 3453 num_results = ps_cu_results->u1_num_best_results; 3454 3455 ps_best_result = &ps_cu_results->ps_best_results[0]; 3456 3457 /* Disable intra16/32/64 flags based on split flags recommended by IPE */ 3458 if(ps_cur_ipe_ctb->u1_split_flag) 3459 { 3460 disable_intra64 = 1; 3461 if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag) 3462 { 3463 disable_intra32 = 1; 3464 3465 if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id] 3466 .as_intra16_analyse[i4_16x16_id] 3467 .b1_split_flag) 3468 { 3469 disable_intra16 = 1; 3470 } 3471 } 3472 } 3473 3474 /* Derive the intra cost based on current cu size and offset */ 3475 switch(cu_size) 3476 { 3477 case CU_8x8: 3478 { 3479 i4_intra_2nx2n_cost = ps_cur_ipe_ctb->ai4_best8x8_intra_cost[u1_y_off + (u1_x_off >> 3)]; 3480 3481 /* Accounting for coding noise in the open loop IPE cost */ 3482 i4_intra_2nx2n_cost += 3483 ((i4_frm_qstep * 16) >> 2) /*+ ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */; 3484 3485 break; 3486 } 3487 3488 case CU_16x16: 3489 { 3490 i4_intra_2nx2n_cost = 3491 ps_cur_ipe_ctb->ai4_best16x16_intra_cost[(u1_y_off >> 4) * 4 + (u1_x_off >> 4)]; 3492 3493 /* Accounting for coding noise in the open loop IPE cost */ 3494 i4_intra_2nx2n_cost += 3495 ((i4_frm_qstep * 16)); /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */ 3496 3497 if(disable_intra16) 3498 { 3499 /* Disable intra 2Nx2N (intra 16) as IPE suggested best mode as 8x8 */ 3500 i4_intra_2nx2n_cost = MAX_32BIT_VAL; 3501 } 3502 break; 3503 } 3504 3505 case CU_32x32: 3506 { 3507 i4_intra_2nx2n_cost = 3508 ps_cur_ipe_ctb->ai4_best32x32_intra_cost[(u1_y_off >> 5) * 2 + (u1_x_off >> 5)]; 3509 3510 /* Accounting for coding noise in the open loop IPE cost */ 3511 i4_intra_2nx2n_cost += 3512 (i4_frm_qstep * 16 * 4) /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */; 3513 3514 if(disable_intra32) 3515 { 3516 /* Disable intra 2Nx2N (intra 32) as IPE suggested best mode as 16x16 or 8x8 */ 3517 i4_intra_2nx2n_cost = MAX_32BIT_VAL; 3518 } 3519 break; 3520 } 3521 3522 case CU_64x64: 3523 { 3524 i4_intra_2nx2n_cost = ps_cur_ipe_ctb->i4_best64x64_intra_cost; 3525 3526 /* Accounting for coding noise in the open loop IPE cost */ 3527 i4_intra_2nx2n_cost += 3528 (i4_frm_qstep * 16 * 16) /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */; 3529 3530 if(disable_intra64) 3531 { 3532 /* Disable intra 2Nx2N (intra 64) as IPE suggested best mode as 32x32 /16x16 / 8x8 */ 3533 i4_intra_2nx2n_cost = MAX_32BIT_VAL; 3534 } 3535 break; 3536 } 3537 3538 default: 3539 ASSERT(0); 3540 } 3541 3542 { 3543 /*****************************************************************/ 3544 /* Intra / Inter cost comparison for 2Nx2N : cu size 8/16/32/64 */ 3545 /* Identify where the current result isto be placed. Basically */ 3546 /* find the node which has cost just higher than node under test */ 3547 /*****************************************************************/ 3548 for(i = 0; i < num_results; i++) 3549 { 3550 /* Subtrqact the tu_spli_flag_cost from total_inter_cost for fair comparision */ 3551 WORD32 inter_cost = ps_best_result[i].i4_tot_cost - ps_best_result[i].i4_tu_split_cost; 3552 3553 if(i4_intra_2nx2n_cost < inter_cost) 3554 { 3555 if(i < (num_results - 1)) 3556 { 3557 memmove( 3558 ps_best_result + i + 1, 3559 ps_best_result + i, 3560 sizeof(ps_best_result[0]) * (num_results - 1 - i)); 3561 } 3562 3563 /* Insert the intra node result */ 3564 ps_best_result[i].u1_part_type = PRT_2Nx2N; 3565 ps_best_result[i].i4_tot_cost = i4_intra_2nx2n_cost; 3566 ps_best_result[i].ai4_tu_split_flag[0] = 0; 3567 ps_best_result[i].ai4_tu_split_flag[1] = 0; 3568 ps_best_result[i].ai4_tu_split_flag[2] = 0; 3569 ps_best_result[i].ai4_tu_split_flag[3] = 0; 3570 3571 /* Populate intra flag, cost and default mvs, refidx for intra pu */ 3572 ps_best_result[i].as_pu_results[0].i4_tot_cost = i4_intra_2nx2n_cost; 3573 //ps_best_result[i].as_pu_results[0].i4_sad = i4_intra_2nx2n_cost; 3574 ps_best_result[i].as_pu_results[0].i4_mv_cost = 0; 3575 ps_best_result[i].as_pu_results[0].pu.b1_intra_flag = 1; 3576 ps_best_result[i].as_pu_results[0].pu.mv.i1_l0_ref_idx = -1; 3577 ps_best_result[i].as_pu_results[0].pu.mv.i1_l1_ref_idx = -1; 3578 ps_best_result[i].as_pu_results[0].pu.mv.s_l0_mv.i2_mvx = INTRA_MV; 3579 ps_best_result[i].as_pu_results[0].pu.mv.s_l0_mv.i2_mvy = INTRA_MV; 3580 ps_best_result[i].as_pu_results[0].pu.mv.s_l1_mv.i2_mvx = INTRA_MV; 3581 ps_best_result[i].as_pu_results[0].pu.mv.s_l1_mv.i2_mvy = INTRA_MV; 3582 3583 break; 3584 } 3585 } 3586 } 3587 } 3588 3589 S32 hme_recompute_lambda_from_min_8x8_act_in_ctb( 3590 me_frm_ctxt_t *ps_ctxt, ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb) 3591 { 3592 double lambda; 3593 double lambda_modifier; 3594 WORD32 i4_cu_qp; 3595 frm_lambda_ctxt_t *ps_frm_lambda_ctxt; 3596 //ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb; 3597 WORD32 i4_frame_qp; 3598 rc_quant_t *ps_rc_quant_ctxt; 3599 WORD32 i4_is_bpic; 3600 3601 ps_frm_lambda_ctxt = &ps_ctxt->s_frm_lambda_ctxt; 3602 //ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base; 3603 i4_frame_qp = ps_ctxt->s_frm_prms.i4_frame_qp; 3604 ps_rc_quant_ctxt = ps_ctxt->ps_rc_quant_ctxt; 3605 i4_is_bpic = ps_ctxt->s_frm_prms.bidir_enabled; 3606 3607 i4_cu_qp = ps_rc_quant_ctxt->pi4_qp_to_qscale[i4_frame_qp + ps_rc_quant_ctxt->i1_qp_offset]; 3608 3609 { 3610 if(ps_ctxt->i4_l0me_qp_mod) 3611 { 3612 #if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON 3613 #if LAMDA_BASED_ON_QUANT 3614 WORD32 i4_activity = ps_cur_ipe_ctb->i4_64x64_act_factor[2][0]; 3615 #else 3616 WORD32 i4_activity = ps_cur_ipe_ctb->i4_64x64_act_factor[3][0]; 3617 #endif 3618 i4_cu_qp = (((i4_cu_qp)*i4_activity) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >> 3619 QP_LEVEL_MOD_ACT_FACTOR; 3620 3621 #endif 3622 } 3623 if(i4_cu_qp > ps_rc_quant_ctxt->i2_max_qscale) 3624 i4_cu_qp = ps_rc_quant_ctxt->i2_max_qscale; 3625 else if(i4_cu_qp < ps_rc_quant_ctxt->i2_min_qscale) 3626 i4_cu_qp = ps_rc_quant_ctxt->i2_min_qscale; 3627 3628 i4_cu_qp = ps_rc_quant_ctxt->pi4_qscale_to_qp[i4_cu_qp]; 3629 } 3630 3631 if(i4_cu_qp > ps_rc_quant_ctxt->i2_max_qp) 3632 i4_cu_qp = ps_rc_quant_ctxt->i2_max_qp; 3633 else if(i4_cu_qp < ps_rc_quant_ctxt->i2_min_qp) 3634 i4_cu_qp = ps_rc_quant_ctxt->i2_min_qp; 3635 3636 lambda = pow(2.0, (((double)(i4_cu_qp - 12)) / 3)); 3637 3638 lambda_modifier = ps_frm_lambda_ctxt->lambda_modifier; 3639 3640 if(i4_is_bpic) 3641 { 3642 lambda_modifier = lambda_modifier * CLIP3((((double)(i4_cu_qp - 12)) / 6.0), 2.00, 4.00); 3643 } 3644 if(ps_ctxt->i4_use_const_lamda_modifier) 3645 { 3646 if(ps_ctxt->s_frm_prms.is_i_pic) 3647 { 3648 lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier; 3649 } 3650 else 3651 { 3652 lambda_modifier = CONST_LAMDA_MOD_VAL; 3653 } 3654 } 3655 lambda *= lambda_modifier; 3656 3657 return ((WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT))); 3658 } 3659 3660 /** 3661 ******************************************************************************** 3662 * @fn hme_update_dynamic_search_params 3663 * 3664 * @brief Update the Dynamic search params based on the current MVs 3665 * 3666 * @param[in,out] ps_dyn_range_prms [inout] : Dyn. Range Param str. 3667 * i2_mvy [in] : current MV y comp. 3668 * 3669 * @return None 3670 ******************************************************************************** 3671 */ 3672 void hme_update_dynamic_search_params(dyn_range_prms_t *ps_dyn_range_prms, WORD16 i2_mvy) 3673 { 3674 /* If MV is up large, update i2_dyn_max_y */ 3675 if(i2_mvy > ps_dyn_range_prms->i2_dyn_max_y) 3676 ps_dyn_range_prms->i2_dyn_max_y = i2_mvy; 3677 /* If MV is down large, update i2_dyn_min_y */ 3678 if(i2_mvy < ps_dyn_range_prms->i2_dyn_min_y) 3679 ps_dyn_range_prms->i2_dyn_min_y = i2_mvy; 3680 } 3681 3682 void hme_add_new_node_to_a_sorted_array( 3683 search_node_t *ps_result_node, 3684 search_node_t **pps_sorted_array, 3685 U08 *pu1_shifts, 3686 U32 u4_num_results_updated, 3687 U08 u1_shift) 3688 { 3689 U32 i; 3690 3691 if(NULL == pu1_shifts) 3692 { 3693 S32 i4_cur_node_cost = ps_result_node->i4_tot_cost; 3694 3695 for(i = 0; i < u4_num_results_updated; i++) 3696 { 3697 if(i4_cur_node_cost < pps_sorted_array[i]->i4_tot_cost) 3698 { 3699 memmove( 3700 &pps_sorted_array[i + 1], 3701 &pps_sorted_array[i], 3702 (u4_num_results_updated - i) * sizeof(search_node_t *)); 3703 3704 break; 3705 } 3706 } 3707 } 3708 else 3709 { 3710 S32 i4_cur_node_cost = 3711 (u1_shift == 0) ? ps_result_node->i4_tot_cost 3712 : (ps_result_node->i4_tot_cost + (1 << (u1_shift - 1))) >> u1_shift; 3713 3714 for(i = 0; i < u4_num_results_updated; i++) 3715 { 3716 S32 i4_prev_node_cost = (pu1_shifts[i] == 0) ? pps_sorted_array[i]->i4_tot_cost 3717 : (pps_sorted_array[i]->i4_tot_cost + 3718 (1 << (pu1_shifts[i] - 1))) >> 3719 pu1_shifts[i]; 3720 3721 if(i4_cur_node_cost < i4_prev_node_cost) 3722 { 3723 memmove( 3724 &pps_sorted_array[i + 1], 3725 &pps_sorted_array[i], 3726 (u4_num_results_updated - i) * sizeof(search_node_t *)); 3727 memmove( 3728 &pu1_shifts[i + 1], &pu1_shifts[i], (u4_num_results_updated - i) * sizeof(U08)); 3729 3730 break; 3731 } 3732 } 3733 3734 pu1_shifts[i] = u1_shift; 3735 } 3736 3737 pps_sorted_array[i] = ps_result_node; 3738 } 3739 3740 S32 hme_find_pos_of_implicitly_stored_ref_id( 3741 S08 *pi1_ref_idx, S08 i1_ref_idx, S32 i4_result_id, S32 i4_num_results) 3742 { 3743 S32 i; 3744 3745 for(i = 0; i < i4_num_results; i++) 3746 { 3747 if(i1_ref_idx == pi1_ref_idx[i]) 3748 { 3749 if(0 == i4_result_id) 3750 { 3751 return i; 3752 } 3753 else 3754 { 3755 i4_result_id--; 3756 } 3757 } 3758 } 3759 3760 return -1; 3761 } 3762 3763 static __inline void hme_search_node_populator( 3764 search_node_t *ps_search_node, hme_mv_t *ps_mv, S08 i1_ref_idx, S08 i1_mv_magnitude_shift) 3765 { 3766 ps_search_node->ps_mv->i2_mvx = SHL_NEG((WORD16)ps_mv->i2_mv_x, i1_mv_magnitude_shift); 3767 ps_search_node->ps_mv->i2_mvy = SHL_NEG((WORD16)ps_mv->i2_mv_y, i1_mv_magnitude_shift); 3768 ps_search_node->i1_ref_idx = i1_ref_idx; 3769 ps_search_node->u1_is_avail = 1; 3770 ps_search_node->u1_subpel_done = 0; 3771 } 3772 3773 S32 hme_populate_search_candidates(fpel_srch_cand_init_data_t *ps_ctxt) 3774 { 3775 hme_mv_t *ps_mv; 3776 3777 S32 wd_c, ht_c, wd_p, ht_p; 3778 S32 blksize_p, blksize_c; 3779 S32 i; 3780 S08 *pi1_ref_idx; 3781 /* Cache for storing offsets */ 3782 S32 ai4_cand_offsets[NUM_SEARCH_CAND_LOCATIONS]; 3783 3784 layer_ctxt_t *ps_curr_layer = ps_ctxt->ps_curr_layer; 3785 layer_ctxt_t *ps_coarse_layer = ps_ctxt->ps_coarse_layer; 3786 layer_mv_t *ps_coarse_layer_mvbank = ps_coarse_layer->ps_layer_mvbank; 3787 layer_mv_t *ps_curr_layer_mvbank = ps_curr_layer->ps_layer_mvbank; 3788 search_candt_t *ps_search_cands = ps_ctxt->ps_search_cands; 3789 hme_mv_t s_zero_mv = { 0 }; 3790 3791 S32 i4_pos_x = ps_ctxt->i4_pos_x; 3792 S32 i4_pos_y = ps_ctxt->i4_pos_y; 3793 S32 i4_num_act_ref_l0 = ps_ctxt->i4_num_act_ref_l0; 3794 S32 i4_num_act_ref_l1 = ps_ctxt->i4_num_act_ref_l1; 3795 U08 u1_pred_dir = ps_ctxt->u1_pred_dir; 3796 U08 u1_pred_dir_ctr = ps_ctxt->u1_pred_dir_ctr; 3797 U08 u1_num_results_in_curr_mvbank = ps_ctxt->u1_num_results_in_mvbank; 3798 U08 u1_num_results_in_coarse_mvbank = 3799 (u1_pred_dir == 0) ? (i4_num_act_ref_l0 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref) 3800 : (i4_num_act_ref_l1 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref); 3801 S32 i4_init_offset_projected = 3802 (u1_pred_dir == 1) ? (i4_num_act_ref_l0 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref) : 0; 3803 S32 i4_init_offset_spatial = 3804 (u1_pred_dir_ctr == 1) 3805 ? (ps_curr_layer_mvbank->i4_num_mvs_per_ref * u1_num_results_in_curr_mvbank) 3806 : 0; 3807 U08 u1_search_candidate_list_index = ps_ctxt->u1_search_candidate_list_index; 3808 U08 u1_max_num_search_cands = 3809 gau1_max_num_search_cands_in_l0_me[u1_search_candidate_list_index]; 3810 S32 i4_num_srch_cands = MIN(u1_max_num_search_cands, ps_ctxt->i4_max_num_init_cands << 1); 3811 U16 u2_is_offset_available = 0; 3812 U08 u1_search_blk_to_spatial_mvbank_blk_size_factor = 1; 3813 3814 /* Width and ht of current and prev layers */ 3815 wd_c = ps_curr_layer->i4_wd; 3816 ht_c = ps_curr_layer->i4_ht; 3817 wd_p = ps_coarse_layer->i4_wd; 3818 ht_p = ps_coarse_layer->i4_ht; 3819 3820 blksize_p = gau1_blk_size_to_wd_shift[ps_coarse_layer_mvbank->e_blk_size]; 3821 blksize_c = gau1_blk_size_to_wd_shift[ps_curr_layer_mvbank->e_blk_size]; 3822 3823 /* ASSERT for valid sizes */ 3824 ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5)); 3825 3826 { 3827 S32 x = i4_pos_x >> 4; 3828 S32 y = i4_pos_y >> 4; 3829 3830 if(blksize_c != gau1_blk_size_to_wd_shift[ps_ctxt->e_search_blk_size]) 3831 { 3832 x *= 2; 3833 y *= 2; 3834 3835 u1_search_blk_to_spatial_mvbank_blk_size_factor = 2; 3836 } 3837 3838 i4_init_offset_spatial += (x + y * ps_curr_layer_mvbank->i4_num_blks_per_row) * 3839 ps_curr_layer_mvbank->i4_num_mvs_per_blk; 3840 } 3841 3842 for(i = 0; i < i4_num_srch_cands; i++) 3843 { 3844 SEARCH_CANDIDATE_TYPE_T e_search_cand_type = 3845 gae_search_cand_priority_to_search_cand_type_map_in_l0_me[u1_search_candidate_list_index] 3846 [i]; 3847 SEARCH_CAND_LOCATIONS_T e_search_cand_loc = 3848 gae_search_cand_type_to_location_map[e_search_cand_type]; 3849 S08 i1_result_id = MIN( 3850 gai1_search_cand_type_to_result_id_map[e_search_cand_type], 3851 (e_search_cand_loc < 0 ? 0 3852 : ps_ctxt->pu1_num_fpel_search_cands[e_search_cand_loc] - 1)); 3853 U08 u1_is_spatial_cand = (1 == gau1_search_cand_type_to_spatiality_map[e_search_cand_type]); 3854 U08 u1_is_proj_cand = (0 == gau1_search_cand_type_to_spatiality_map[e_search_cand_type]); 3855 U08 u1_is_zeroMV_cand = (ZERO_MV == e_search_cand_type) || 3856 (ZERO_MV_ALTREF == e_search_cand_type); 3857 3858 /* When spatial candidates are available, use them, else use the projected candidates */ 3859 /* This is required since some blocks will never have certain spatial candidates, and in order */ 3860 /* to accomodate such instances in 'gae_search_cand_priority_to_search_cand_type_map_in_l0_me' list, */ 3861 /* all candidates apart from the 'LEFT' have been marked as projected */ 3862 if(((e_search_cand_loc == TOPLEFT) || (e_search_cand_loc == TOP) || 3863 (e_search_cand_loc == TOPRIGHT)) && 3864 (i1_result_id < u1_num_results_in_curr_mvbank) && u1_is_proj_cand) 3865 { 3866 if(e_search_cand_loc == TOPLEFT) 3867 { 3868 u1_is_spatial_cand = ps_ctxt->u1_is_topLeft_available || 3869 !ps_ctxt->u1_is_left_available; 3870 } 3871 else if(e_search_cand_loc == TOPRIGHT) 3872 { 3873 u1_is_spatial_cand = ps_ctxt->u1_is_topRight_available; 3874 } 3875 else 3876 { 3877 u1_is_spatial_cand = ps_ctxt->u1_is_top_available; 3878 } 3879 3880 u1_is_proj_cand = !u1_is_spatial_cand; 3881 } 3882 3883 switch(u1_is_zeroMV_cand + (u1_is_spatial_cand << 1) + (u1_is_proj_cand << 2)) 3884 { 3885 case 1: 3886 { 3887 hme_search_node_populator( 3888 ps_search_cands[i].ps_search_node, 3889 &s_zero_mv, 3890 (ZERO_MV == e_search_cand_type) ? ps_ctxt->i1_default_ref_id 3891 : ps_ctxt->i1_alt_default_ref_id, 3892 0); 3893 3894 break; 3895 } 3896 case 2: 3897 { 3898 S08 i1_mv_magnitude_shift = 0; 3899 3900 S32 i4_offset = i4_init_offset_spatial; 3901 3902 i1_result_id = MIN(i1_result_id, u1_num_results_in_curr_mvbank - 1); 3903 i4_offset += i1_result_id; 3904 3905 switch(e_search_cand_loc) 3906 { 3907 case LEFT: 3908 { 3909 if(ps_ctxt->u1_is_left_available) 3910 { 3911 i1_mv_magnitude_shift = -2; 3912 3913 i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_blk; 3914 3915 ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset; 3916 pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset; 3917 } 3918 else 3919 { 3920 i1_mv_magnitude_shift = 0; 3921 3922 ps_mv = &s_zero_mv; 3923 pi1_ref_idx = &ps_ctxt->i1_default_ref_id; 3924 } 3925 3926 break; 3927 } 3928 case TOPLEFT: 3929 { 3930 if(ps_ctxt->u1_is_topLeft_available) 3931 { 3932 i1_mv_magnitude_shift = -2; 3933 3934 i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_blk; 3935 i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row; 3936 3937 ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset; 3938 pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset; 3939 } 3940 else 3941 { 3942 i1_mv_magnitude_shift = 0; 3943 3944 ps_mv = &s_zero_mv; 3945 pi1_ref_idx = &ps_ctxt->i1_default_ref_id; 3946 } 3947 3948 break; 3949 } 3950 case TOP: 3951 { 3952 if(ps_ctxt->u1_is_top_available) 3953 { 3954 i1_mv_magnitude_shift = -2; 3955 3956 i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row; 3957 3958 ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset; 3959 pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset; 3960 } 3961 else 3962 { 3963 i1_mv_magnitude_shift = 0; 3964 3965 ps_mv = &s_zero_mv; 3966 pi1_ref_idx = &ps_ctxt->i1_default_ref_id; 3967 } 3968 3969 break; 3970 } 3971 case TOPRIGHT: 3972 { 3973 if(ps_ctxt->u1_is_topRight_available) 3974 { 3975 i1_mv_magnitude_shift = -2; 3976 3977 i4_offset += ps_curr_layer_mvbank->i4_num_mvs_per_blk * 3978 u1_search_blk_to_spatial_mvbank_blk_size_factor; 3979 i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row; 3980 3981 ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset; 3982 pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset; 3983 } 3984 else 3985 { 3986 i1_mv_magnitude_shift = 0; 3987 ps_mv = &s_zero_mv; 3988 pi1_ref_idx = &ps_ctxt->i1_default_ref_id; 3989 } 3990 3991 break; 3992 } 3993 default: 3994 { 3995 /* AiyAiyYo!! */ 3996 ASSERT(0); 3997 } 3998 } 3999 4000 hme_search_node_populator( 4001 ps_search_cands[i].ps_search_node, ps_mv, pi1_ref_idx[0], i1_mv_magnitude_shift); 4002 4003 break; 4004 } 4005 case 4: 4006 { 4007 ASSERT(ILLUSORY_CANDIDATE != e_search_cand_type); 4008 ASSERT(ILLUSORY_LOCATION != e_search_cand_loc); 4009 4010 i1_result_id = MIN(i1_result_id, u1_num_results_in_coarse_mvbank - 1); 4011 4012 if(!(u2_is_offset_available & (1 << e_search_cand_loc))) 4013 { 4014 S32 x, y; 4015 4016 x = i4_pos_x + gai4_search_cand_location_to_x_offset_map[e_search_cand_loc]; 4017 y = i4_pos_y + gai4_search_cand_location_to_y_offset_map[e_search_cand_loc]; 4018 4019 /* Safety check to avoid uninitialized access across temporal layers */ 4020 x = CLIP3(x, 0, (wd_c - blksize_p)); 4021 y = CLIP3(y, 0, (ht_c - blksize_p)); 4022 4023 /* Project the positions to prev layer */ 4024 x = x >> blksize_p; 4025 y = y >> blksize_p; 4026 4027 ai4_cand_offsets[e_search_cand_loc] = 4028 (x * ps_coarse_layer_mvbank->i4_num_mvs_per_blk); 4029 ai4_cand_offsets[e_search_cand_loc] += 4030 (y * ps_coarse_layer_mvbank->i4_num_mvs_per_row); 4031 ai4_cand_offsets[e_search_cand_loc] += i4_init_offset_projected; 4032 4033 u2_is_offset_available |= (1 << e_search_cand_loc); 4034 } 4035 4036 ps_mv = 4037 ps_coarse_layer_mvbank->ps_mv + ai4_cand_offsets[e_search_cand_loc] + i1_result_id; 4038 pi1_ref_idx = ps_coarse_layer_mvbank->pi1_ref_idx + 4039 ai4_cand_offsets[e_search_cand_loc] + i1_result_id; 4040 4041 hme_search_node_populator(ps_search_cands[i].ps_search_node, ps_mv, pi1_ref_idx[0], 1); 4042 4043 break; 4044 } 4045 default: 4046 { 4047 /* NoNoNoNoNooooooooNO! */ 4048 ASSERT(0); 4049 } 4050 } 4051 4052 ASSERT(ps_search_cands[i].ps_search_node->i1_ref_idx >= 0); 4053 ASSERT( 4054 !u1_pred_dir 4055 ? (ps_ctxt->pi4_ref_id_lc_to_l0_map[ps_search_cands[i].ps_search_node->i1_ref_idx] < 4056 i4_num_act_ref_l0) 4057 : (ps_ctxt->pi4_ref_id_lc_to_l1_map[ps_search_cands[i].ps_search_node->i1_ref_idx] < 4058 ps_ctxt->i4_num_act_ref_l1)); 4059 } 4060 4061 return i4_num_srch_cands; 4062 } 4063 4064 void hme_mv_clipper( 4065 hme_search_prms_t *ps_search_prms_blk, 4066 S32 i4_num_srch_cands, 4067 S08 i1_check_for_mult_refs, 4068 U08 u1_fpel_refine_extent, 4069 U08 u1_hpel_refine_extent, 4070 U08 u1_qpel_refine_extent) 4071 { 4072 S32 candt; 4073 range_prms_t *ps_range_prms; 4074 4075 for(candt = 0; candt < i4_num_srch_cands; candt++) 4076 { 4077 search_node_t *ps_search_node; 4078 4079 ps_search_node = ps_search_prms_blk->ps_search_candts[candt].ps_search_node; 4080 ps_range_prms = ps_search_prms_blk->aps_mv_range[ps_search_node->i1_ref_idx]; 4081 4082 /* Clip the motion vectors as well here since after clipping 4083 two candidates can become same and they will be removed during deduplication */ 4084 CLIP_MV_WITHIN_RANGE( 4085 ps_search_node->ps_mv->i2_mvx, 4086 ps_search_node->ps_mv->i2_mvy, 4087 ps_range_prms, 4088 u1_fpel_refine_extent, 4089 u1_hpel_refine_extent, 4090 u1_qpel_refine_extent); 4091 } 4092 } 4093 4094 void hme_init_pred_buf_info( 4095 hme_pred_buf_info_t (*ps_info)[MAX_NUM_INTER_PARTS], 4096 hme_pred_buf_mngr_t *ps_buf_mngr, 4097 U08 u1_pu1_wd, 4098 U08 u1_pu1_ht, 4099 PART_TYPE_T e_part_type) 4100 { 4101 U08 u1_pred_buf_array_id; 4102 4103 if(1 != ihevce_get_free_pred_buf_indices( 4104 &u1_pred_buf_array_id, &ps_buf_mngr->u4_pred_buf_usage_indicator, 1)) 4105 { 4106 ASSERT(0); 4107 } 4108 else 4109 { 4110 ps_info[0][0].i4_pred_stride = MAX_CU_SIZE; 4111 ps_info[0][0].pu1_pred = ps_buf_mngr->apu1_pred_bufs[u1_pred_buf_array_id]; 4112 ps_info[0][0].u1_pred_buf_array_id = u1_pred_buf_array_id; 4113 4114 if(PRT_2Nx2N != e_part_type) 4115 { 4116 ps_info[0][1].i4_pred_stride = MAX_CU_SIZE; 4117 ps_info[0][1].pu1_pred = ps_buf_mngr->apu1_pred_bufs[u1_pred_buf_array_id] + 4118 (gai1_is_part_vertical[ge_part_type_to_part_id[e_part_type][0]] 4119 ? u1_pu1_ht * ps_info[0][1].i4_pred_stride 4120 : u1_pu1_wd); 4121 ps_info[0][1].u1_pred_buf_array_id = u1_pred_buf_array_id; 4122 } 4123 } 4124 } 4125 4126 void hme_debrief_bipred_eval( 4127 part_type_results_t *ps_part_type_result, 4128 hme_pred_buf_info_t (*ps_pred_buf_info)[MAX_NUM_INTER_PARTS], 4129 hme_pred_buf_mngr_t *ps_pred_buf_mngr, 4130 U08 *pu1_allocated_pred_buf_array_indixes, 4131 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list 4132 4133 ) 4134 { 4135 PART_TYPE_T e_part_type = (PART_TYPE_T)ps_part_type_result->u1_part_type; 4136 4137 U32 *pu4_pred_buf_usage_indicator = &ps_pred_buf_mngr->u4_pred_buf_usage_indicator; 4138 U08 u1_is_part_vertical = gai1_is_part_vertical[ge_part_type_to_part_id[e_part_type][0]]; 4139 4140 if(0 == ps_part_type_result->u1_part_type) 4141 { 4142 if(ps_part_type_result->as_pu_results->pu.b2_pred_mode == PRED_BI) 4143 { 4144 ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id); 4145 4146 ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred; 4147 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride; 4148 4149 ihevce_set_pred_buf_as_free( 4150 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]); 4151 4152 ihevce_set_pred_buf_as_free( 4153 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]); 4154 } 4155 else 4156 { 4157 ps_part_type_result->pu1_pred = ps_pred_buf_info[0][0].pu1_pred; 4158 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride; 4159 4160 ihevce_set_pred_buf_as_free( 4161 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]); 4162 4163 ihevce_set_pred_buf_as_free( 4164 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]); 4165 4166 if(UCHAR_MAX == ps_pred_buf_info[0][0].u1_pred_buf_array_id) 4167 { 4168 ihevce_set_pred_buf_as_free( 4169 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]); 4170 } 4171 } 4172 } 4173 else 4174 { 4175 U08 *pu1_src_pred; 4176 U08 *pu1_dst_pred; 4177 S32 i4_src_pred_stride; 4178 S32 i4_dst_pred_stride; 4179 4180 U08 u1_pu1_wd = (ps_part_type_result->as_pu_results[0].pu.b4_wd + 1) << 2; 4181 U08 u1_pu1_ht = (ps_part_type_result->as_pu_results[0].pu.b4_ht + 1) << 2; 4182 U08 u1_pu2_wd = (ps_part_type_result->as_pu_results[1].pu.b4_wd + 1) << 2; 4183 U08 u1_pu2_ht = (ps_part_type_result->as_pu_results[1].pu.b4_ht + 1) << 2; 4184 4185 U08 u1_condition_for_switch = 4186 (ps_part_type_result->as_pu_results[0].pu.b2_pred_mode == PRED_BI) | 4187 ((ps_part_type_result->as_pu_results[1].pu.b2_pred_mode == PRED_BI) << 1); 4188 4189 switch(u1_condition_for_switch) 4190 { 4191 case 0: 4192 { 4193 ps_part_type_result->pu1_pred = 4194 ps_pred_buf_mngr->apu1_pred_bufs[pu1_allocated_pred_buf_array_indixes[0]]; 4195 ps_part_type_result->i4_pred_stride = MAX_CU_SIZE; 4196 4197 ihevce_set_pred_buf_as_free( 4198 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]); 4199 4200 ihevce_set_pred_buf_as_free( 4201 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]); 4202 4203 if(UCHAR_MAX == ps_pred_buf_info[0][0].u1_pred_buf_array_id) 4204 { 4205 pu1_src_pred = ps_pred_buf_info[0][0].pu1_pred; 4206 pu1_dst_pred = ps_part_type_result->pu1_pred; 4207 i4_src_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride; 4208 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride; 4209 4210 ps_cmn_utils_optimised_function_list->pf_copy_2d( 4211 pu1_dst_pred, 4212 i4_dst_pred_stride, 4213 pu1_src_pred, 4214 i4_src_pred_stride, 4215 u1_pu1_wd, 4216 u1_pu1_ht); 4217 } 4218 4219 if(UCHAR_MAX == ps_pred_buf_info[0][1].u1_pred_buf_array_id) 4220 { 4221 pu1_src_pred = ps_pred_buf_info[0][1].pu1_pred; 4222 pu1_dst_pred = ps_part_type_result->pu1_pred + 4223 (u1_is_part_vertical 4224 ? u1_pu1_ht * ps_part_type_result->i4_pred_stride 4225 : u1_pu1_wd); 4226 i4_src_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride; 4227 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride; 4228 4229 ps_cmn_utils_optimised_function_list->pf_copy_2d( 4230 pu1_dst_pred, 4231 i4_dst_pred_stride, 4232 pu1_src_pred, 4233 i4_src_pred_stride, 4234 u1_pu2_wd, 4235 u1_pu2_ht); 4236 } 4237 4238 break; 4239 } 4240 case 1: 4241 { 4242 ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id); 4243 4244 ihevce_set_pred_buf_as_free( 4245 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]); 4246 4247 /* Copy PU1 pred into PU2's pred buf */ 4248 if(((u1_pu1_ht < u1_pu2_ht) || (u1_pu1_wd < u1_pu2_wd)) && 4249 (UCHAR_MAX != ps_pred_buf_info[0][1].u1_pred_buf_array_id)) 4250 { 4251 ps_part_type_result->pu1_pred = 4252 ps_pred_buf_info[0][1].pu1_pred - 4253 (u1_is_part_vertical ? u1_pu1_ht * ps_pred_buf_info[0][1].i4_pred_stride 4254 : u1_pu1_wd); 4255 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride; 4256 4257 ihevce_set_pred_buf_as_free( 4258 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]); 4259 4260 pu1_src_pred = ps_pred_buf_info[2][0].pu1_pred; 4261 pu1_dst_pred = ps_part_type_result->pu1_pred; 4262 i4_src_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride; 4263 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride; 4264 4265 ps_cmn_utils_optimised_function_list->pf_copy_2d( 4266 pu1_dst_pred, 4267 i4_dst_pred_stride, 4268 pu1_src_pred, 4269 i4_src_pred_stride, 4270 u1_pu1_wd, 4271 u1_pu1_ht); 4272 } 4273 else 4274 { 4275 ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred; 4276 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride; 4277 4278 ihevce_set_pred_buf_as_free( 4279 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]); 4280 4281 pu1_src_pred = ps_pred_buf_info[0][1].pu1_pred; 4282 pu1_dst_pred = ps_part_type_result->pu1_pred; 4283 i4_src_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride; 4284 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride; 4285 4286 ps_cmn_utils_optimised_function_list->pf_copy_2d( 4287 pu1_dst_pred, 4288 i4_dst_pred_stride, 4289 pu1_src_pred, 4290 i4_src_pred_stride, 4291 u1_pu2_wd, 4292 u1_pu2_ht); 4293 } 4294 4295 break; 4296 } 4297 case 2: 4298 { 4299 ASSERT(UCHAR_MAX != ps_pred_buf_info[2][1].u1_pred_buf_array_id); 4300 4301 ihevce_set_pred_buf_as_free( 4302 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]); 4303 4304 /* Copy PU2 pred into PU1's pred buf */ 4305 if(((u1_pu1_ht > u1_pu2_ht) || (u1_pu1_wd > u1_pu2_wd)) && 4306 (UCHAR_MAX != ps_pred_buf_info[0][0].u1_pred_buf_array_id)) 4307 { 4308 ps_part_type_result->pu1_pred = ps_pred_buf_info[0][0].pu1_pred; 4309 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride; 4310 4311 ihevce_set_pred_buf_as_free( 4312 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]); 4313 4314 pu1_src_pred = ps_pred_buf_info[2][1].pu1_pred; 4315 pu1_dst_pred = ps_part_type_result->pu1_pred + 4316 (u1_is_part_vertical 4317 ? u1_pu1_ht * ps_part_type_result->i4_pred_stride 4318 : u1_pu1_wd); 4319 i4_src_pred_stride = ps_pred_buf_info[2][1].i4_pred_stride; 4320 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride; 4321 4322 ps_cmn_utils_optimised_function_list->pf_copy_2d( 4323 pu1_dst_pred, 4324 i4_dst_pred_stride, 4325 pu1_src_pred, 4326 i4_src_pred_stride, 4327 u1_pu2_wd, 4328 u1_pu2_ht); 4329 } 4330 else 4331 { 4332 ps_part_type_result->pu1_pred = 4333 ps_pred_buf_info[2][1].pu1_pred - 4334 (u1_is_part_vertical ? u1_pu1_ht * ps_pred_buf_info[2][1].i4_pred_stride 4335 : u1_pu1_wd); 4336 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][1].i4_pred_stride; 4337 4338 ihevce_set_pred_buf_as_free( 4339 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]); 4340 4341 pu1_src_pred = ps_pred_buf_info[0][0].pu1_pred; 4342 pu1_dst_pred = ps_part_type_result->pu1_pred; 4343 i4_src_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride; 4344 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride; 4345 4346 ps_cmn_utils_optimised_function_list->pf_copy_2d( 4347 pu1_dst_pred, 4348 i4_dst_pred_stride, 4349 pu1_src_pred, 4350 i4_src_pred_stride, 4351 u1_pu1_wd, 4352 u1_pu1_ht); 4353 } 4354 4355 break; 4356 } 4357 case 3: 4358 { 4359 ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id); 4360 ASSERT(UCHAR_MAX != ps_pred_buf_info[2][1].u1_pred_buf_array_id); 4361 ASSERT( 4362 ps_pred_buf_info[2][1].u1_pred_buf_array_id == 4363 ps_pred_buf_info[2][0].u1_pred_buf_array_id); 4364 4365 ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred; 4366 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride; 4367 4368 ihevce_set_pred_buf_as_free( 4369 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]); 4370 4371 break; 4372 } 4373 } 4374 } 4375 } 4376 4377 U08 hme_decide_search_candidate_priority_in_l1_and_l2_me( 4378 SEARCH_CANDIDATE_TYPE_T e_cand_type, ME_QUALITY_PRESETS_T e_quality_preset) 4379 { 4380 U08 u1_priority_val = 4381 gau1_search_cand_priority_in_l1_and_l2_me[e_quality_preset >= ME_MEDIUM_SPEED][e_cand_type]; 4382 4383 if(UCHAR_MAX == u1_priority_val) 4384 { 4385 ASSERT(0); 4386 } 4387 4388 ASSERT(u1_priority_val <= MAX_INIT_CANDTS); 4389 4390 return u1_priority_val; 4391 } 4392 4393 U08 hme_decide_search_candidate_priority_in_l0_me(SEARCH_CANDIDATE_TYPE_T e_cand_type, U08 u1_index) 4394 { 4395 U08 u1_priority_val = gau1_search_cand_priority_in_l0_me[u1_index][e_cand_type]; 4396 4397 if(UCHAR_MAX == u1_priority_val) 4398 { 4399 ASSERT(0); 4400 } 4401 4402 ASSERT(u1_priority_val <= MAX_INIT_CANDTS); 4403 4404 return u1_priority_val; 4405 } 4406 4407 void hme_search_cand_data_init( 4408 S32 *pi4_id_Z, 4409 S32 *pi4_id_coloc, 4410 S32 *pi4_num_coloc_cands, 4411 U08 *pu1_search_candidate_list_index, 4412 S32 i4_num_act_ref_l0, 4413 S32 i4_num_act_ref_l1, 4414 U08 u1_is_bidir_enabled, 4415 U08 u1_4x4_blk_in_l1me) 4416 { 4417 S32 i, j; 4418 S32 i4_num_coloc_cands; 4419 4420 U08 u1_search_candidate_list_index; 4421 4422 if(!u1_is_bidir_enabled && !u1_4x4_blk_in_l1me) 4423 { 4424 S32 i; 4425 4426 u1_search_candidate_list_index = (i4_num_act_ref_l0 - 1) * 2; 4427 i4_num_coloc_cands = i4_num_act_ref_l0 * 2; 4428 4429 switch(i4_num_act_ref_l0) 4430 { 4431 case 1: 4432 { 4433 for(i = 0; i < 2; i++) 4434 { 4435 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( 4436 (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i), 4437 u1_search_candidate_list_index); 4438 } 4439 4440 break; 4441 } 4442 case 2: 4443 { 4444 for(i = 0; i < 4; i++) 4445 { 4446 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( 4447 (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i), 4448 u1_search_candidate_list_index); 4449 } 4450 4451 break; 4452 } 4453 case 3: 4454 { 4455 for(i = 0; i < 6; i++) 4456 { 4457 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( 4458 (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i), 4459 u1_search_candidate_list_index); 4460 } 4461 4462 break; 4463 } 4464 case 4: 4465 { 4466 for(i = 0; i < 8; i++) 4467 { 4468 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( 4469 (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i), 4470 u1_search_candidate_list_index); 4471 } 4472 4473 break; 4474 } 4475 default: 4476 { 4477 ASSERT(0); 4478 } 4479 } 4480 4481 *pi4_num_coloc_cands = i4_num_coloc_cands; 4482 *pu1_search_candidate_list_index = u1_search_candidate_list_index; 4483 } 4484 else if(!u1_is_bidir_enabled && u1_4x4_blk_in_l1me) 4485 { 4486 S32 i; 4487 4488 i4_num_coloc_cands = i4_num_act_ref_l0 * 2; 4489 u1_search_candidate_list_index = (i4_num_act_ref_l0 - 1) * 2 + 1; 4490 4491 switch(i4_num_act_ref_l0) 4492 { 4493 case 1: 4494 { 4495 for(i = 0; i < 2; i++) 4496 { 4497 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( 4498 (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i), 4499 u1_search_candidate_list_index); 4500 } 4501 4502 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( 4503 PROJECTED_COLOC_TR0, u1_search_candidate_list_index); 4504 4505 pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me( 4506 PROJECTED_COLOC_BL0, u1_search_candidate_list_index); 4507 4508 pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me( 4509 PROJECTED_COLOC_BR0, u1_search_candidate_list_index); 4510 4511 i4_num_coloc_cands += 3; 4512 4513 break; 4514 } 4515 case 2: 4516 { 4517 for(i = 0; i < 4; i++) 4518 { 4519 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( 4520 (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i), 4521 u1_search_candidate_list_index); 4522 } 4523 4524 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( 4525 PROJECTED_COLOC_TR0, u1_search_candidate_list_index); 4526 4527 pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me( 4528 PROJECTED_COLOC_BL0, u1_search_candidate_list_index); 4529 4530 pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me( 4531 PROJECTED_COLOC_BR0, u1_search_candidate_list_index); 4532 4533 pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me( 4534 PROJECTED_COLOC_TR1, u1_search_candidate_list_index); 4535 4536 pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me( 4537 PROJECTED_COLOC_BL1, u1_search_candidate_list_index); 4538 4539 pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me( 4540 PROJECTED_COLOC_BR1, u1_search_candidate_list_index); 4541 4542 i4_num_coloc_cands += 6; 4543 4544 break; 4545 } 4546 case 3: 4547 { 4548 for(i = 0; i < 6; i++) 4549 { 4550 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( 4551 (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i), 4552 u1_search_candidate_list_index); 4553 } 4554 4555 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( 4556 PROJECTED_COLOC_TR0, u1_search_candidate_list_index); 4557 4558 pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me( 4559 PROJECTED_COLOC_BL0, u1_search_candidate_list_index); 4560 4561 pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me( 4562 PROJECTED_COLOC_BR0, u1_search_candidate_list_index); 4563 4564 pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me( 4565 PROJECTED_COLOC_TR1, u1_search_candidate_list_index); 4566 4567 pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me( 4568 PROJECTED_COLOC_BL1, u1_search_candidate_list_index); 4569 4570 pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me( 4571 PROJECTED_COLOC_BR1, u1_search_candidate_list_index); 4572 4573 i4_num_coloc_cands += 6; 4574 4575 break; 4576 } 4577 case 4: 4578 { 4579 for(i = 0; i < 8; i++) 4580 { 4581 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( 4582 (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i), 4583 u1_search_candidate_list_index); 4584 } 4585 4586 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( 4587 PROJECTED_COLOC_TR0, u1_search_candidate_list_index); 4588 4589 pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me( 4590 PROJECTED_COLOC_BL0, u1_search_candidate_list_index); 4591 4592 pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me( 4593 PROJECTED_COLOC_BR0, u1_search_candidate_list_index); 4594 4595 pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me( 4596 PROJECTED_COLOC_TR1, u1_search_candidate_list_index); 4597 4598 pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me( 4599 PROJECTED_COLOC_BL1, u1_search_candidate_list_index); 4600 4601 pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me( 4602 PROJECTED_COLOC_BR1, u1_search_candidate_list_index); 4603 4604 i4_num_coloc_cands += 6; 4605 4606 break; 4607 } 4608 default: 4609 { 4610 ASSERT(0); 4611 } 4612 } 4613 4614 *pi4_num_coloc_cands = i4_num_coloc_cands; 4615 *pu1_search_candidate_list_index = u1_search_candidate_list_index; 4616 } 4617 else 4618 { 4619 /* The variable 'u1_search_candidate_list_index' is hardcoded */ 4620 /* to 10 and 11 respectively. But, these values are not returned */ 4621 /* by this function since the actual values are dependent on */ 4622 /* the number of refs in L0 and L1 respectively */ 4623 /* Hence, the actual return values are being recomputed */ 4624 /* in the latter part of this block */ 4625 4626 if(!u1_4x4_blk_in_l1me) 4627 { 4628 u1_search_candidate_list_index = 10; 4629 4630 i4_num_coloc_cands = 2 + (2 * ((i4_num_act_ref_l0 > 1) || (i4_num_act_ref_l1 > 1))); 4631 4632 for(i = 0; i < i4_num_coloc_cands; i++) 4633 { 4634 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( 4635 (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i), 4636 u1_search_candidate_list_index); 4637 } 4638 } 4639 else 4640 { 4641 u1_search_candidate_list_index = 11; 4642 4643 i4_num_coloc_cands = 2 + (2 * ((i4_num_act_ref_l0 > 1) || (i4_num_act_ref_l1 > 1))); 4644 4645 for(i = 0; i < i4_num_coloc_cands; i++) 4646 { 4647 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( 4648 (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i), 4649 u1_search_candidate_list_index); 4650 } 4651 4652 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( 4653 PROJECTED_COLOC_TR0, u1_search_candidate_list_index); 4654 4655 pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me( 4656 PROJECTED_COLOC_BL0, u1_search_candidate_list_index); 4657 4658 pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me( 4659 PROJECTED_COLOC_BR0, u1_search_candidate_list_index); 4660 } 4661 4662 for(j = 0; j < 2; j++) 4663 { 4664 if(0 == j) 4665 { 4666 pu1_search_candidate_list_index[j] = 4667 8 + ((i4_num_act_ref_l0 > 1) * 2) + u1_4x4_blk_in_l1me; 4668 pi4_num_coloc_cands[j] = 4669 (u1_4x4_blk_in_l1me * 3) + 2 + ((i4_num_act_ref_l0 > 1) * 2); 4670 } 4671 else 4672 { 4673 pu1_search_candidate_list_index[j] = 4674 8 + ((i4_num_act_ref_l1 > 1) * 2) + u1_4x4_blk_in_l1me; 4675 pi4_num_coloc_cands[j] = 4676 (u1_4x4_blk_in_l1me * 3) + 2 + ((i4_num_act_ref_l1 > 1) * 2); 4677 } 4678 } 4679 } 4680 4681 if(i4_num_act_ref_l0 || i4_num_act_ref_l1) 4682 { 4683 pi4_id_Z[0] = hme_decide_search_candidate_priority_in_l0_me( 4684 (SEARCH_CANDIDATE_TYPE_T)ZERO_MV, pu1_search_candidate_list_index[0]); 4685 } 4686 4687 if((i4_num_act_ref_l0 > 1) && !u1_is_bidir_enabled) 4688 { 4689 pi4_id_Z[1] = hme_decide_search_candidate_priority_in_l0_me( 4690 (SEARCH_CANDIDATE_TYPE_T)ZERO_MV_ALTREF, pu1_search_candidate_list_index[0]); 4691 } 4692 } 4693 4694 static U08 4695 hme_determine_base_block_size(S32 *pi4_valid_part_array, S32 i4_num_valid_parts, U08 u1_cu_size) 4696 { 4697 ASSERT(i4_num_valid_parts > 0); 4698 4699 if(1 == i4_num_valid_parts) 4700 { 4701 ASSERT(pi4_valid_part_array[i4_num_valid_parts - 1] == PART_ID_2Nx2N); 4702 4703 return u1_cu_size; 4704 } 4705 else 4706 { 4707 if(pi4_valid_part_array[i4_num_valid_parts - 1] <= PART_ID_NxN_BR) 4708 { 4709 return u1_cu_size / 2; 4710 } 4711 else if(pi4_valid_part_array[i4_num_valid_parts - 1] <= PART_ID_nRx2N_R) 4712 { 4713 return u1_cu_size / 4; 4714 } 4715 } 4716 4717 return u1_cu_size / 4; 4718 } 4719 4720 static U32 hme_compute_variance_of_pu_from_base_blocks( 4721 ULWORD64 *pu8_SigmaX, 4722 ULWORD64 *pu8_SigmaXSquared, 4723 U08 u1_cu_size, 4724 U08 u1_base_block_size, 4725 S32 i4_part_id) 4726 { 4727 U08 i, j; 4728 ULWORD64 u8_final_variance; 4729 4730 U08 u1_part_dimension_multiplier = (u1_cu_size >> 4); 4731 S32 i4_part_wd = gai1_part_wd_and_ht[i4_part_id][0] * u1_part_dimension_multiplier; 4732 S32 i4_part_ht = gai1_part_wd_and_ht[i4_part_id][1] * u1_part_dimension_multiplier; 4733 U08 u1_num_base_blocks_in_pu_row = i4_part_wd / u1_base_block_size; 4734 U08 u1_num_base_blocks_in_pu_column = i4_part_ht / u1_base_block_size; 4735 U08 u1_num_base_blocks_in_cu_row = u1_cu_size / u1_base_block_size; 4736 U08 u1_num_base_blocks = (u1_num_base_blocks_in_pu_row * u1_num_base_blocks_in_pu_column); 4737 U32 u4_num_pixels_in_base_block = u1_base_block_size * u1_base_block_size; 4738 ULWORD64 u8_final_SigmaXSquared = 0; 4739 ULWORD64 u8_final_SigmaX = 0; 4740 4741 if(ge_part_id_to_part_type[i4_part_id] != PRT_NxN) 4742 { 4743 U08 u1_column_start_index = gau1_part_id_to_part_num[i4_part_id] 4744 ? (gai1_is_part_vertical[i4_part_id] 4745 ? 0 4746 : (u1_cu_size - i4_part_wd) / u1_base_block_size) 4747 : 0; 4748 U08 u1_row_start_index = gau1_part_id_to_part_num[i4_part_id] 4749 ? (gai1_is_part_vertical[i4_part_id] 4750 ? (u1_cu_size - i4_part_ht) / u1_base_block_size 4751 : 0) 4752 : 0; 4753 U08 u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row; 4754 U08 u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column; 4755 4756 for(i = u1_row_start_index; i < u1_row_end_index; i++) 4757 { 4758 for(j = u1_column_start_index; j < u1_column_end_index; j++) 4759 { 4760 u8_final_SigmaXSquared += pu8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row]; 4761 u8_final_SigmaX += pu8_SigmaX[j + i * u1_num_base_blocks_in_cu_row]; 4762 } 4763 } 4764 4765 u8_final_variance = 4766 u1_num_base_blocks * u4_num_pixels_in_base_block * u8_final_SigmaXSquared; 4767 u8_final_variance -= u8_final_SigmaX * u8_final_SigmaX; 4768 u8_final_variance += 4769 ((u1_num_base_blocks * u4_num_pixels_in_base_block) * 4770 (u1_num_base_blocks * u4_num_pixels_in_base_block) / 2); 4771 u8_final_variance /= (u1_num_base_blocks * u4_num_pixels_in_base_block) * 4772 (u1_num_base_blocks * u4_num_pixels_in_base_block); 4773 4774 ASSERT(u8_final_variance <= UINT_MAX); 4775 } 4776 else 4777 { 4778 U08 u1_row_start_index; 4779 U08 u1_column_start_index; 4780 U08 u1_row_end_index; 4781 U08 u1_column_end_index; 4782 4783 switch(gau1_part_id_to_part_num[i4_part_id]) 4784 { 4785 case 0: 4786 { 4787 u1_row_start_index = 0; 4788 u1_column_start_index = 0; 4789 4790 break; 4791 } 4792 case 1: 4793 { 4794 u1_row_start_index = 0; 4795 u1_column_start_index = u1_num_base_blocks_in_pu_row; 4796 4797 break; 4798 } 4799 case 2: 4800 { 4801 u1_row_start_index = u1_num_base_blocks_in_pu_column; 4802 u1_column_start_index = 0; 4803 4804 break; 4805 } 4806 case 3: 4807 { 4808 u1_row_start_index = u1_num_base_blocks_in_pu_column; 4809 u1_column_start_index = u1_num_base_blocks_in_pu_row; 4810 4811 break; 4812 } 4813 } 4814 4815 u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row; 4816 u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column; 4817 4818 for(i = u1_row_start_index; i < u1_row_end_index; i++) 4819 { 4820 for(j = u1_column_start_index; j < u1_column_end_index; j++) 4821 { 4822 u8_final_SigmaXSquared += pu8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row]; 4823 u8_final_SigmaX += pu8_SigmaX[j + i * u1_num_base_blocks_in_cu_row]; 4824 } 4825 } 4826 4827 u8_final_variance = 4828 u1_num_base_blocks * u4_num_pixels_in_base_block * u8_final_SigmaXSquared; 4829 u8_final_variance -= u8_final_SigmaX * u8_final_SigmaX; 4830 u8_final_variance += 4831 ((u1_num_base_blocks * u4_num_pixels_in_base_block) * 4832 (u1_num_base_blocks * u4_num_pixels_in_base_block) / 2); 4833 u8_final_variance /= (u1_num_base_blocks * u4_num_pixels_in_base_block) * 4834 (u1_num_base_blocks * u4_num_pixels_in_base_block); 4835 4836 ASSERT(u8_final_variance <= UINT_MAX); 4837 } 4838 4839 return u8_final_variance; 4840 } 4841 4842 void hme_compute_variance_for_all_parts( 4843 U08 *pu1_data, 4844 S32 i4_data_stride, 4845 S32 *pi4_valid_part_array, 4846 U32 *pu4_variance, 4847 S32 i4_num_valid_parts, 4848 U08 u1_cu_size) 4849 { 4850 ULWORD64 au8_SigmaX[16]; 4851 ULWORD64 au8_SigmaXSquared[16]; 4852 U08 i, j, k, l; 4853 U08 u1_base_block_size; 4854 U08 u1_num_base_blocks_in_cu_row; 4855 U08 u1_num_base_blocks_in_cu_column; 4856 4857 u1_base_block_size = 4858 hme_determine_base_block_size(pi4_valid_part_array, i4_num_valid_parts, u1_cu_size); 4859 4860 u1_num_base_blocks_in_cu_row = u1_num_base_blocks_in_cu_column = 4861 u1_cu_size / u1_base_block_size; 4862 4863 ASSERT(u1_num_base_blocks_in_cu_row <= 4); 4864 4865 for(i = 0; i < u1_num_base_blocks_in_cu_column; i++) 4866 { 4867 for(j = 0; j < u1_num_base_blocks_in_cu_row; j++) 4868 { 4869 U08 *pu1_buf = 4870 pu1_data + (u1_base_block_size * j) + (u1_base_block_size * i * i4_data_stride); 4871 4872 au8_SigmaX[j + i * u1_num_base_blocks_in_cu_row] = 0; 4873 au8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row] = 0; 4874 4875 for(k = 0; k < u1_base_block_size; k++) 4876 { 4877 for(l = 0; l < u1_base_block_size; l++) 4878 { 4879 au8_SigmaX[j + i * u1_num_base_blocks_in_cu_row] += 4880 pu1_buf[l + k * i4_data_stride]; 4881 au8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row] += 4882 pu1_buf[l + k * i4_data_stride] * pu1_buf[l + k * i4_data_stride]; 4883 } 4884 } 4885 } 4886 } 4887 4888 for(i = 0; i < i4_num_valid_parts; i++) 4889 { 4890 pu4_variance[pi4_valid_part_array[i]] = hme_compute_variance_of_pu_from_base_blocks( 4891 au8_SigmaX, au8_SigmaXSquared, u1_cu_size, u1_base_block_size, pi4_valid_part_array[i]); 4892 } 4893 } 4894 4895 void hme_compute_final_sigma_of_pu_from_base_blocks( 4896 U32 *pu4_SigmaX, 4897 U32 *pu4_SigmaXSquared, 4898 ULWORD64 *pu8_final_sigmaX, 4899 ULWORD64 *pu8_final_sigmaX_Squared, 4900 U08 u1_cu_size, 4901 U08 u1_base_block_size, 4902 S32 i4_part_id, 4903 U08 u1_base_blk_array_stride) 4904 { 4905 U08 i, j; 4906 //U08 u1_num_base_blocks_in_cu_row; 4907 4908 U08 u1_part_dimension_multiplier = (u1_cu_size >> 4); 4909 S32 i4_part_wd = gai1_part_wd_and_ht[i4_part_id][0] * u1_part_dimension_multiplier; 4910 S32 i4_part_ht = gai1_part_wd_and_ht[i4_part_id][1] * u1_part_dimension_multiplier; 4911 U08 u1_num_base_blocks_in_pu_row = i4_part_wd / u1_base_block_size; 4912 U08 u1_num_base_blocks_in_pu_column = i4_part_ht / u1_base_block_size; 4913 U16 u2_num_base_blocks = (u1_num_base_blocks_in_pu_row * u1_num_base_blocks_in_pu_column); 4914 U32 u4_num_pixels_in_base_block = u1_base_block_size * u1_base_block_size; 4915 U32 u4_N = (u2_num_base_blocks * u4_num_pixels_in_base_block); 4916 4917 /*if (u1_is_for_src) 4918 { 4919 u1_num_base_blocks_in_cu_row = 16; 4920 } 4921 else 4922 { 4923 u1_num_base_blocks_in_cu_row = u1_cu_size / u1_base_block_size; 4924 }*/ 4925 4926 pu8_final_sigmaX[i4_part_id] = 0; 4927 pu8_final_sigmaX_Squared[i4_part_id] = 0; 4928 4929 if(ge_part_id_to_part_type[i4_part_id] != PRT_NxN) 4930 { 4931 U08 u1_column_start_index = gau1_part_id_to_part_num[i4_part_id] 4932 ? (gai1_is_part_vertical[i4_part_id] 4933 ? 0 4934 : (u1_cu_size - i4_part_wd) / u1_base_block_size) 4935 : 0; 4936 U08 u1_row_start_index = gau1_part_id_to_part_num[i4_part_id] 4937 ? (gai1_is_part_vertical[i4_part_id] 4938 ? (u1_cu_size - i4_part_ht) / u1_base_block_size 4939 : 0) 4940 : 0; 4941 U08 u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row; 4942 U08 u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column; 4943 4944 for(i = u1_row_start_index; i < u1_row_end_index; i++) 4945 { 4946 for(j = u1_column_start_index; j < u1_column_end_index; j++) 4947 { 4948 pu8_final_sigmaX_Squared[i4_part_id] += 4949 pu4_SigmaXSquared[j + i * u1_base_blk_array_stride]; 4950 pu8_final_sigmaX[i4_part_id] += pu4_SigmaX[j + i * u1_base_blk_array_stride]; 4951 } 4952 } 4953 } 4954 else 4955 { 4956 U08 u1_row_start_index; 4957 U08 u1_column_start_index; 4958 U08 u1_row_end_index; 4959 U08 u1_column_end_index; 4960 4961 switch(gau1_part_id_to_part_num[i4_part_id]) 4962 { 4963 case 0: 4964 { 4965 u1_row_start_index = 0; 4966 u1_column_start_index = 0; 4967 4968 break; 4969 } 4970 case 1: 4971 { 4972 u1_row_start_index = 0; 4973 u1_column_start_index = u1_num_base_blocks_in_pu_row; 4974 4975 break; 4976 } 4977 case 2: 4978 { 4979 u1_row_start_index = u1_num_base_blocks_in_pu_column; 4980 u1_column_start_index = 0; 4981 4982 break; 4983 } 4984 case 3: 4985 { 4986 u1_row_start_index = u1_num_base_blocks_in_pu_column; 4987 u1_column_start_index = u1_num_base_blocks_in_pu_row; 4988 4989 break; 4990 } 4991 } 4992 4993 u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row; 4994 u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column; 4995 4996 for(i = u1_row_start_index; i < u1_row_end_index; i++) 4997 { 4998 for(j = u1_column_start_index; j < u1_column_end_index; j++) 4999 { 5000 pu8_final_sigmaX_Squared[i4_part_id] += 5001 pu4_SigmaXSquared[j + i * u1_base_blk_array_stride]; 5002 pu8_final_sigmaX[i4_part_id] += pu4_SigmaX[j + i * u1_base_blk_array_stride]; 5003 } 5004 } 5005 } 5006 5007 pu8_final_sigmaX_Squared[i4_part_id] *= u4_N; 5008 } 5009 5010 void hme_compute_stim_injected_distortion_for_all_parts( 5011 U08 *pu1_pred, 5012 S32 i4_pred_stride, 5013 S32 *pi4_valid_part_array, 5014 ULWORD64 *pu8_src_sigmaX, 5015 ULWORD64 *pu8_src_sigmaXSquared, 5016 S32 *pi4_sad_array, 5017 S32 i4_alpha_stim_multiplier, 5018 S32 i4_inv_wt, 5019 S32 i4_inv_wt_shift_val, 5020 S32 i4_num_valid_parts, 5021 S32 i4_wpred_log_wdc, 5022 U08 u1_cu_size) 5023 { 5024 U32 au4_sigmaX[16], au4_sigmaXSquared[16]; 5025 ULWORD64 au8_final_ref_sigmaX[17], au8_final_ref_sigmaXSquared[17]; 5026 S32 i4_noise_term; 5027 U16 i2_count; 5028 5029 ULWORD64 u8_temp_var, u8_temp_var1, u8_pure_dist; 5030 ULWORD64 u8_ref_X_Square, u8_src_var, u8_ref_var; 5031 5032 U08 u1_base_block_size; 5033 5034 WORD32 i4_q_level = STIM_Q_FORMAT + ALPHA_Q_FORMAT; 5035 5036 u1_base_block_size = 5037 hme_determine_base_block_size(pi4_valid_part_array, i4_num_valid_parts, u1_cu_size); 5038 5039 ASSERT(u1_cu_size >= 16); 5040 5041 hme_compute_sigmaX_and_sigmaXSquared( 5042 pu1_pred, 5043 i4_pred_stride, 5044 au4_sigmaX, 5045 au4_sigmaXSquared, 5046 u1_base_block_size, 5047 u1_base_block_size, 5048 u1_cu_size, 5049 u1_cu_size, 5050 1, 5051 u1_cu_size / u1_base_block_size); 5052 5053 /* Noise Term Computation */ 5054 for(i2_count = 0; i2_count < i4_num_valid_parts; i2_count++) 5055 { 5056 unsigned long u4_shift_val; 5057 S32 i4_bits_req; 5058 S32 part_id = pi4_valid_part_array[i2_count]; 5059 5060 if(i4_alpha_stim_multiplier) 5061 { 5062 /* Final SigmaX and SigmaX-Squared Calculation */ 5063 hme_compute_final_sigma_of_pu_from_base_blocks( 5064 au4_sigmaX, 5065 au4_sigmaXSquared, 5066 au8_final_ref_sigmaX, 5067 au8_final_ref_sigmaXSquared, 5068 u1_cu_size, 5069 u1_base_block_size, 5070 part_id, 5071 (u1_cu_size / u1_base_block_size)); 5072 5073 u8_ref_X_Square = (au8_final_ref_sigmaX[part_id] * au8_final_ref_sigmaX[part_id]); 5074 u8_ref_var = (au8_final_ref_sigmaXSquared[part_id] - u8_ref_X_Square); 5075 5076 u4_shift_val = ihevce_calc_stim_injected_variance( 5077 pu8_src_sigmaX, 5078 pu8_src_sigmaXSquared, 5079 &u8_src_var, 5080 i4_inv_wt, 5081 i4_inv_wt_shift_val, 5082 i4_wpred_log_wdc, 5083 part_id); 5084 5085 u8_ref_var = u8_ref_var >> u4_shift_val; 5086 5087 GETRANGE64(i4_bits_req, u8_ref_var); 5088 5089 if(i4_bits_req > 27) 5090 { 5091 u8_ref_var = u8_ref_var >> (i4_bits_req - 27); 5092 u8_src_var = u8_src_var >> (i4_bits_req - 27); 5093 } 5094 5095 if(u8_src_var == u8_ref_var) 5096 { 5097 u8_temp_var = (1 << STIM_Q_FORMAT); 5098 } 5099 else 5100 { 5101 u8_temp_var = (u8_src_var * u8_ref_var * (1 << STIM_Q_FORMAT)); 5102 u8_temp_var1 = (u8_src_var * u8_src_var) + (u8_ref_var * u8_ref_var); 5103 u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2)); 5104 u8_temp_var = (u8_temp_var / u8_temp_var1); 5105 u8_temp_var = (2 * u8_temp_var); 5106 } 5107 5108 i4_noise_term = (UWORD32)u8_temp_var; 5109 5110 ASSERT(i4_noise_term >= 0); 5111 5112 i4_noise_term *= i4_alpha_stim_multiplier; 5113 } 5114 else 5115 { 5116 i4_noise_term = 0; 5117 } 5118 5119 u8_pure_dist = pi4_sad_array[part_id]; 5120 u8_pure_dist *= ((1 << (i4_q_level)) - (i4_noise_term)); 5121 u8_pure_dist += (1 << ((i4_q_level)-1)); 5122 pi4_sad_array[part_id] = (UWORD32)(u8_pure_dist >> (i4_q_level)); 5123 } 5124 } 5125 5126 void hme_compute_sigmaX_and_sigmaXSquared( 5127 U08 *pu1_data, 5128 S32 i4_buf_stride, 5129 void *pv_sigmaX, 5130 void *pv_sigmaXSquared, 5131 U08 u1_base_blk_wd, 5132 U08 u1_base_blk_ht, 5133 U08 u1_blk_wd, 5134 U08 u1_blk_ht, 5135 U08 u1_is_sigma_pointer_size_32_bit, 5136 U08 u1_array_stride) 5137 { 5138 U08 i, j, k, l; 5139 U08 u1_num_base_blks_in_row; 5140 U08 u1_num_base_blks_in_column; 5141 5142 u1_num_base_blks_in_row = u1_blk_wd / u1_base_blk_wd; 5143 u1_num_base_blks_in_column = u1_blk_ht / u1_base_blk_ht; 5144 5145 if(u1_is_sigma_pointer_size_32_bit) 5146 { 5147 U32 *sigmaX, *sigmaXSquared; 5148 5149 sigmaX = (U32 *)pv_sigmaX; 5150 sigmaXSquared = (U32 *)pv_sigmaXSquared; 5151 5152 /* Loop to compute the sigma_X and sigma_X_Squared */ 5153 for(i = 0; i < u1_num_base_blks_in_column; i++) 5154 { 5155 for(j = 0; j < u1_num_base_blks_in_row; j++) 5156 { 5157 U32 u4_sigmaX = 0, u4_sigmaXSquared = 0; 5158 U08 *pu1_buf = 5159 pu1_data + (u1_base_blk_wd * j) + (u1_base_blk_ht * i * i4_buf_stride); 5160 5161 for(k = 0; k < u1_base_blk_ht; k++) 5162 { 5163 for(l = 0; l < u1_base_blk_wd; l++) 5164 { 5165 u4_sigmaX += pu1_buf[l + k * i4_buf_stride]; 5166 u4_sigmaXSquared += 5167 (pu1_buf[l + k * i4_buf_stride] * pu1_buf[l + k * i4_buf_stride]); 5168 } 5169 } 5170 5171 sigmaX[j + i * u1_array_stride] = u4_sigmaX; 5172 sigmaXSquared[j + i * u1_array_stride] = u4_sigmaXSquared; 5173 } 5174 } 5175 } 5176 else 5177 { 5178 ULWORD64 *sigmaX, *sigmaXSquared; 5179 5180 sigmaX = (ULWORD64 *)pv_sigmaX; 5181 sigmaXSquared = (ULWORD64 *)pv_sigmaXSquared; 5182 5183 /* Loop to compute the sigma_X and sigma_X_Squared */ 5184 for(i = 0; i < u1_num_base_blks_in_column; i++) 5185 { 5186 for(j = 0; j < u1_num_base_blks_in_row; j++) 5187 { 5188 ULWORD64 u8_sigmaX = 0, u8_sigmaXSquared = 0; 5189 U08 *pu1_buf = 5190 pu1_data + (u1_base_blk_wd * j) + (u1_base_blk_ht * i * i4_buf_stride); 5191 5192 for(k = 0; k < u1_base_blk_ht; k++) 5193 { 5194 for(l = 0; l < u1_base_blk_wd; l++) 5195 { 5196 u8_sigmaX += pu1_buf[l + k * i4_buf_stride]; 5197 u8_sigmaXSquared += 5198 (pu1_buf[l + k * i4_buf_stride] * pu1_buf[l + k * i4_buf_stride]); 5199 } 5200 } 5201 5202 u8_sigmaXSquared = u8_sigmaXSquared * u1_blk_wd * u1_blk_ht; 5203 5204 sigmaX[j + i * u1_array_stride] = u8_sigmaX; 5205 sigmaXSquared[j + i * u1_array_stride] = u8_sigmaXSquared; 5206 } 5207 } 5208 } 5209 } 5210 5211 #if TEMPORAL_NOISE_DETECT 5212 WORD32 ihevce_16x16block_temporal_noise_detect( 5213 WORD32 had_block_size, 5214 WORD32 ctb_width, 5215 WORD32 ctb_height, 5216 ihevce_ctb_noise_params *ps_ctb_noise_params, 5217 fpel_srch_cand_init_data_t *s_proj_srch_cand_init_data, 5218 hme_search_prms_t *s_search_prms_blk, 5219 me_frm_ctxt_t *ps_ctxt, 5220 WORD32 num_pred_dir, 5221 WORD32 i4_num_act_ref_l0, 5222 WORD32 i4_num_act_ref_l1, 5223 WORD32 i4_cu_x_off, 5224 WORD32 i4_cu_y_off, 5225 wgt_pred_ctxt_t *ps_wt_inp_prms, 5226 WORD32 input_stride, 5227 WORD32 index_8x8_block, 5228 WORD32 num_horz_blocks, 5229 WORD32 num_8x8_in_ctb_row, 5230 WORD32 i4_16x16_index) 5231 { 5232 WORD32 i; 5233 WORD32 noise_detected; 5234 5235 UWORD8 *pu1_l0_block; 5236 UWORD8 *pu1_l1_block; 5237 5238 WORD32 mean; 5239 UWORD32 variance_8x8; 5240 5241 /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */ 5242 WORD16 pi2_residue_16x16[256]; 5243 WORD32 mean_16x16; 5244 UWORD32 variance_16x16[2]; 5245 5246 /* throw errors in case of un- supported arguments */ 5247 /* assumptions size is 8 or 16 or 32 */ 5248 assert( 5249 (had_block_size == 8) || (had_block_size == 16) || (had_block_size == 32)); //ihevc_assert 5250 5251 /* initialize the variables */ 5252 noise_detected = 0; 5253 variance_8x8 = 0; 5254 5255 mean = 0; 5256 5257 { 5258 i = 0; 5259 /* get the ref/pred and source using the MV of both directions */ 5260 /* pick the best candidates in each direction */ 5261 /* Colocated cands */ 5262 { 5263 // steps to be done 5264 /* pick the candidates */ 5265 /* do motion compoensation using the candidates got from prev step : pick from the offset */ 5266 /* get the ref or the pred from the offset*/ 5267 /* get the source data */ 5268 /* send the pred - source to noise detect */ 5269 /* do noise detect on the residue of source and pred */ 5270 5271 layer_mv_t *ps_layer_mvbank; 5272 hme_mv_t *ps_mv; 5273 5274 //S32 i; 5275 S32 wd_c, ht_c, wd_p, ht_p; 5276 S32 blksize_p, blk_x, blk_y, i4_offset; 5277 S08 *pi1_ref_idx; 5278 fpel_srch_cand_init_data_t *ps_ctxt_2 = s_proj_srch_cand_init_data; 5279 layer_ctxt_t *ps_curr_layer = ps_ctxt_2->ps_curr_layer; 5280 layer_ctxt_t *ps_coarse_layer = ps_ctxt_2->ps_coarse_layer; 5281 err_prms_t s_err_prms; 5282 S32 i4_blk_wd; 5283 S32 i4_blk_ht; 5284 BLK_SIZE_T e_blk_size; 5285 hme_search_prms_t *ps_search_prms; 5286 S32 i4_part_mask; 5287 S32 *pi4_valid_part_ids; 5288 5289 /* has list of valid partition to search terminated by -1 */ 5290 S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1]; 5291 5292 /*SEARCH_COMPLEXITY_T e_search_complexity = ps_ctxt->e_search_complexity;*/ 5293 5294 S32 i4_pos_x; 5295 S32 i4_pos_y; 5296 U08 u1_pred_dir; // = ps_ctxt_2->u1_pred_dir; 5297 U08 u1_default_ref_id = 0; //ps_ctxt_2->u1_default_ref_id; 5298 S32 i4_inp_off, i4_ref_offset, i4_ref_stride; 5299 5300 /* The reference is actually an array of ptrs since there are several */ 5301 /* reference id. So an array gets passed form calling function */ 5302 U08 **ppu1_ref; 5303 5304 /* Atributes of input candidates */ 5305 search_node_t as_search_node[2]; 5306 wgt_pred_ctxt_t *ps_wt_inp_prms; 5307 5308 S32 posx; 5309 S32 posy; 5310 S32 i4_num_results_to_proj; 5311 S32 ai4_sad_grid[9 * TOT_NUM_PARTS]; 5312 S32 i4_inp_stride; 5313 5314 /* intialize variables */ 5315 /* Width and ht of current and prev layers */ 5316 wd_c = ps_curr_layer->i4_wd; 5317 ht_c = ps_curr_layer->i4_ht; 5318 wd_p = ps_coarse_layer->i4_wd; 5319 ht_p = ps_coarse_layer->i4_ht; 5320 5321 ps_search_prms = s_search_prms_blk; 5322 5323 ps_wt_inp_prms = &ps_ctxt->s_wt_pred; 5324 e_blk_size = ps_search_prms->e_blk_size; 5325 i4_part_mask = ps_search_prms->i4_part_mask; 5326 5327 i4_blk_wd = gau1_blk_size_to_wd[e_blk_size]; 5328 i4_blk_ht = gau1_blk_size_to_ht[e_blk_size]; 5329 5330 ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank; 5331 blksize_p = gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size]; 5332 5333 /* ASSERT for valid sizes */ 5334 ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5)); 5335 5336 i4_pos_x = i4_cu_x_off; 5337 i4_pos_y = i4_cu_y_off; 5338 posx = i4_pos_x + 2; 5339 posy = i4_pos_y + 2; 5340 5341 i4_inp_stride = ps_search_prms->i4_inp_stride; 5342 /* Move to the location of the search blk in inp buffer */ 5343 //i4_inp_off = i4_cu_x_off; 5344 //i4_inp_off += i4_cu_y_off * i4_inp_stride; 5345 i4_inp_off = (i4_16x16_index % 4) * 16; 5346 i4_inp_off += (i4_16x16_index / 4) * 16 * i4_inp_stride; 5347 5348 /***********pick the candidates**************************************/ 5349 for(u1_pred_dir = 0; u1_pred_dir < num_pred_dir; u1_pred_dir++) 5350 { 5351 WORD32 actual_pred_dir = 0; 5352 5353 if(u1_pred_dir == 0 && i4_num_act_ref_l0 == 0) 5354 { 5355 actual_pred_dir = 1; 5356 } 5357 else if(u1_pred_dir == 0 && i4_num_act_ref_l0 != 0) 5358 { 5359 actual_pred_dir = 0; 5360 } 5361 else if(u1_pred_dir == 1) 5362 { 5363 actual_pred_dir = 1; 5364 } 5365 5366 i4_num_results_to_proj = 1; // only the best proj 5367 5368 /* Safety check to avoid uninitialized access across temporal layers */ 5369 posx = CLIP3(posx, 0, (wd_c - blksize_p)); /* block position withing frAME */ 5370 posy = CLIP3(posy, 0, (ht_c - blksize_p)); 5371 5372 /* Project the positions to prev layer */ 5373 blk_x = posx >> blksize_p; 5374 blk_y = posy >> blksize_p; 5375 5376 /* Pick up the mvs from the location */ 5377 i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk); 5378 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y); 5379 5380 ps_mv = ps_layer_mvbank->ps_mv + i4_offset; 5381 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; 5382 5383 if(actual_pred_dir == 1) 5384 { 5385 ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref); 5386 pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref); 5387 } 5388 5389 { 5390 as_search_node[actual_pred_dir].s_mv.i2_mvx = ps_mv[0].i2_mv_x << 1; 5391 as_search_node[actual_pred_dir].s_mv.i2_mvy = ps_mv[0].i2_mv_y << 1; 5392 as_search_node[actual_pred_dir].i1_ref_idx = pi1_ref_idx[0]; 5393 5394 if((as_search_node[actual_pred_dir].i1_ref_idx < 0) || 5395 (as_search_node[actual_pred_dir].s_mv.i2_mvx == INTRA_MV)) 5396 { 5397 as_search_node[actual_pred_dir].i1_ref_idx = u1_default_ref_id; 5398 as_search_node[actual_pred_dir].s_mv.i2_mvx = 0; 5399 as_search_node[actual_pred_dir].s_mv.i2_mvy = 0; 5400 } 5401 } 5402 5403 /********************************************************************************************/ 5404 { 5405 /* declare the variables */ 5406 //ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt; 5407 5408 pi4_valid_part_ids = ai4_valid_part_ids; 5409 i4_ref_stride = ps_curr_layer->i4_rec_stride; 5410 s_err_prms.i4_inp_stride = i4_inp_stride; 5411 s_err_prms.i4_ref_stride = i4_ref_stride; 5412 s_err_prms.i4_part_mask = i4_part_mask; 5413 s_err_prms.pi4_sad_grid = &ai4_sad_grid[0]; 5414 s_err_prms.i4_blk_wd = i4_blk_wd; 5415 s_err_prms.i4_blk_ht = i4_blk_ht; 5416 s_err_prms.i4_step = 1; 5417 s_err_prms.pi4_valid_part_ids = pi4_valid_part_ids; 5418 //s_err_prms.i4_num_partitions = ps_fullpel_refine_ctxt->i4_num_valid_parts; 5419 5420 /*************************************************************************/ 5421 /* Depending on flag i4_use_rec, we use either input of previously */ 5422 /* encoded pictures or we use recon of previously encoded pictures. */ 5423 i4_ref_stride = ps_curr_layer->i4_rec_stride; 5424 ppu1_ref = ps_curr_layer->ppu1_list_rec_fxfy; // pointer to the pred 5425 5426 i4_ref_offset = (i4_ref_stride * i4_cu_y_off) + i4_cu_x_off; //i4_x_off; 5427 5428 s_err_prms.pu1_ref = 5429 ppu1_ref[as_search_node[actual_pred_dir].i1_ref_idx] + i4_ref_offset; 5430 s_err_prms.pu1_ref += as_search_node[actual_pred_dir].s_mv.i2_mvx; 5431 s_err_prms.pu1_ref += 5432 as_search_node[actual_pred_dir].s_mv.i2_mvy * i4_ref_stride; 5433 5434 /*get the source */ 5435 s_err_prms.pu1_inp = 5436 ps_wt_inp_prms->apu1_wt_inp[as_search_node[actual_pred_dir].i1_ref_idx] + 5437 i4_inp_off; //pu1_src_input + i4_inp_off;//ps_wt_inp_prms->apu1_wt_inp[as_search_node[actual_pred_dir].i1_ref_idx] + i4_inp_off; 5438 5439 /* send the pred - source to noise detect */ 5440 // noise_detect_hme(noise_structure, s_err_prms.pu1_inp, s_err_prms.pu1_ref); 5441 } 5442 /* change the l0/l1 blcok pointer names accrodingle */ 5443 5444 /* get memory pointers the input and the reference */ 5445 pu1_l0_block = s_err_prms.pu1_inp; 5446 pu1_l1_block = s_err_prms.pu1_ref; 5447 5448 { 5449 WORD32 i2, j2; 5450 WORD32 dim = 16; 5451 UWORD8 *buf1; 5452 UWORD8 *buf2; 5453 for(i2 = 0; i2 < dim; i2++) 5454 { 5455 buf1 = pu1_l0_block + i2 * i4_inp_stride; 5456 buf2 = pu1_l1_block + i2 * i4_ref_stride; 5457 5458 for(j2 = 0; j2 < dim; j2++) 5459 { 5460 pi2_residue_16x16[i2 * dim + j2] = (WORD16)(buf1[j2] - buf2[j2]); 5461 } 5462 } 5463 5464 ihevce_calc_variance_signed( 5465 pi2_residue_16x16, 16, &mean_16x16, &variance_16x16[u1_pred_dir], 16, 16); 5466 5467 /* compare the source and residue variance for this block ps_ctb_noise_params->i4_variance_src_16x16 */ 5468 if(variance_16x16[u1_pred_dir] > 5469 ((TEMPORAL_VARIANCE_FACTOR * 5470 ps_ctb_noise_params->au4_variance_src_16x16[i4_16x16_index]) >> 5471 Q_TEMPORAL_VARIANCE_FACTOR)) 5472 { 5473 /* update noisy block count only if all best MV in diff directions indicates noise */ 5474 if(u1_pred_dir == num_pred_dir - 1) 5475 { 5476 ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block] = 1; 5477 ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block + 1] = 1; 5478 ps_ctb_noise_params 5479 ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row] = 1; 5480 ps_ctb_noise_params 5481 ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row + 1] = 1; 5482 noise_detected = 1; 5483 } 5484 } 5485 else /* if any one of the direction mv says it as non noise then dont check for the other directions MV , move for next block*/ 5486 { 5487 noise_detected = 0; 5488 ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block] = 0; 5489 ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block + 1] = 0; 5490 ps_ctb_noise_params 5491 ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row] = 0; 5492 ps_ctb_noise_params 5493 ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row + 1] = 0; 5494 break; 5495 } 5496 } // variance analysis and calculation 5497 } // for each direction 5498 } // HME code 5499 5500 } // for each 16x16 block 5501 5502 return (noise_detected); 5503 } 5504 #endif 5505 5506 void hme_qpel_interp_avg_1pt( 5507 interp_prms_t *ps_prms, 5508 S32 i4_mv_x, 5509 S32 i4_mv_y, 5510 S32 i4_buf_id, 5511 U08 **ppu1_final, 5512 S32 *pi4_final_stride) 5513 { 5514 U08 *pu1_src1, *pu1_src2, *pu1_dst; 5515 qpel_input_buf_cfg_t *ps_inp_cfg; 5516 S32 i4_mv_x_frac, i4_mv_y_frac, i4_offset; 5517 5518 /*************************************************************************/ 5519 /* For a given QPEL pt, we need to determine the 2 source pts that are */ 5520 /* needed to do the QPEL averaging. The logic to do this is as follows */ 5521 /* i4_mv_x and i4_mv_y are the motion vectors in QPEL units that are */ 5522 /* pointing to the pt of interest. Obviously, they are w.r.t. the 0,0 */ 5523 /* pt of th reference blk that is colocated to the inp blk. */ 5524 /* A j E k B */ 5525 /* l m n o p */ 5526 /* F q G r H */ 5527 /* s t u v w */ 5528 /* C x I y D */ 5529 /* In above diagram, A. B, C, D are full pts at offsets (0,0),(1,0),(0,1)*/ 5530 /* and (1,1) respectively in the fpel buffer (id = 0) */ 5531 /* E and I are hxfy pts in offsets (0,0),(0,1) respectively in hxfy buf */ 5532 /* F and H are fxhy pts in offsets (0,0),(1,0) respectively in fxhy buf */ 5533 /* G is hxhy pt in offset 0,0 in hxhy buf */ 5534 /* All above offsets are computed w.r.t. motion displaced pt in */ 5535 /* respective bufs. This means that A corresponds to (i4_mv_x >> 2) and */ 5536 /* (i4_mv_y >> 2) in fxfy buf. Ditto with E, F and G */ 5537 /* fxfy buf is buf id 0, hxfy is buf id 1, fxhy is buf id 2, hxhy is 3 */ 5538 /* If we consider pt v to be derived. v has a fractional comp of 3, 3 */ 5539 /* v is avg of H and I. So the table look up of v should give following */ 5540 /* buf 1 (H) : offset = (1, 0) buf id = 2. */ 5541 /* buf 2 (I) : offset = 0 , 1) buf id = 1. */ 5542 /* NOTE: For pts that are fxfy/hxfy/fxhy/hxhy, bufid 1 will be -1. */ 5543 /*************************************************************************/ 5544 i4_mv_x_frac = i4_mv_x & 3; 5545 i4_mv_y_frac = i4_mv_y & 3; 5546 5547 i4_offset = (i4_mv_x >> 2) + (i4_mv_y >> 2) * ps_prms->i4_ref_stride; 5548 5549 /* Derive the descriptor that has all offset and size info */ 5550 ps_inp_cfg = &gas_qpel_inp_buf_cfg[i4_mv_y_frac][i4_mv_x_frac]; 5551 5552 pu1_src1 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1]; 5553 pu1_src1 += ps_inp_cfg->i1_buf_xoff1 + i4_offset; 5554 pu1_src1 += (ps_inp_cfg->i1_buf_yoff1 * ps_prms->i4_ref_stride); 5555 5556 pu1_src2 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id2]; 5557 pu1_src2 += ps_inp_cfg->i1_buf_xoff2 + i4_offset; 5558 pu1_src2 += (ps_inp_cfg->i1_buf_yoff2 * ps_prms->i4_ref_stride); 5559 5560 pu1_dst = ps_prms->apu1_interp_out[i4_buf_id]; 5561 hevc_avg_2d( 5562 pu1_src1, 5563 pu1_src2, 5564 ps_prms->i4_ref_stride, 5565 ps_prms->i4_ref_stride, 5566 ps_prms->i4_blk_wd, 5567 ps_prms->i4_blk_ht, 5568 pu1_dst, 5569 ps_prms->i4_out_stride); 5570 ppu1_final[i4_buf_id] = pu1_dst; 5571 pi4_final_stride[i4_buf_id] = ps_prms->i4_out_stride; 5572 } 5573 5574 void hme_qpel_interp_avg_2pt_vert_with_reuse( 5575 interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, U08 **ppu1_final, S32 *pi4_final_stride) 5576 { 5577 hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x, i4_mv_y + 1, 3, ppu1_final, pi4_final_stride); 5578 5579 hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x, i4_mv_y - 1, 1, ppu1_final, pi4_final_stride); 5580 } 5581 5582 void hme_qpel_interp_avg_2pt_horz_with_reuse( 5583 interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, U08 **ppu1_final, S32 *pi4_final_stride) 5584 { 5585 hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x + 1, i4_mv_y, 2, ppu1_final, pi4_final_stride); 5586 5587 hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x - 1, i4_mv_y, 0, ppu1_final, pi4_final_stride); 5588 } 5589 5590 void hme_set_mv_limit_using_dvsr_data( 5591 me_frm_ctxt_t *ps_ctxt, 5592 layer_ctxt_t *ps_curr_layer, 5593 range_prms_t *ps_mv_limit, 5594 S16 *pi2_prev_enc_frm_max_mv_y, 5595 U08 u1_num_act_ref_pics) 5596 { 5597 WORD32 ref_ctr; 5598 5599 /* Only for B/b pic. */ 5600 if(1 == ps_ctxt->s_frm_prms.bidir_enabled) 5601 { 5602 WORD16 i2_mv_y_per_poc, i2_max_mv_y; 5603 WORD32 cur_poc, prev_poc, ref_poc, abs_poc_diff; 5604 WORD32 prev_poc_count = 0; 5605 WORD32 i4_p_idx; 5606 5607 pi2_prev_enc_frm_max_mv_y[0] = 0; 5608 5609 cur_poc = ps_ctxt->i4_curr_poc; 5610 5611 i4_p_idx = 0; 5612 5613 /* Get abs MAX for symmetric search */ 5614 i2_mv_y_per_poc = ps_curr_layer->i2_max_mv_y; 5615 /* Assuming P to P distance as 4 */ 5616 i2_mv_y_per_poc = (i2_mv_y_per_poc + 2) >> 2; 5617 5618 for(ref_ctr = 0; ref_ctr < u1_num_act_ref_pics; ref_ctr++) 5619 { 5620 /* Get the prev. encoded frame POC */ 5621 prev_poc = ps_ctxt->i4_prev_poc; 5622 5623 ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]; 5624 abs_poc_diff = ABS((cur_poc - ref_poc)); 5625 /* Get the cur. max MV based on POC distance */ 5626 i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff; 5627 i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y); 5628 5629 ps_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x; 5630 ps_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y; 5631 ps_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x; 5632 ps_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y; 5633 5634 /* Find the MAX MV for the prev. encoded frame to optimize */ 5635 /* the reverse dependency of ME on Enc.Loop */ 5636 if(ref_poc == prev_poc) 5637 { 5638 /* TO DO : Same thing for horz. search also */ 5639 pi2_prev_enc_frm_max_mv_y[0] = i2_max_mv_y; 5640 prev_poc_count++; 5641 } 5642 } 5643 } 5644 else 5645 { 5646 ASSERT(0 == ps_ctxt->s_frm_prms.u1_num_active_ref_l1); 5647 5648 /* Set the Config. File Params for P pic. */ 5649 for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++) 5650 { 5651 ps_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x; 5652 ps_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y; 5653 ps_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x; 5654 ps_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y; 5655 } 5656 5657 /* For P PIC., go with Config. File Params */ 5658 pi2_prev_enc_frm_max_mv_y[0] = ps_curr_layer->i2_max_mv_y; 5659 } 5660 } 5661 5662 S32 hme_part_mask_populator( 5663 U08 *pu1_inp, 5664 S32 i4_inp_stride, 5665 U08 u1_limit_active_partitions, 5666 U08 u1_is_bPic, 5667 U08 u1_is_refPic, 5668 U08 u1_blk_8x8_mask, 5669 ME_QUALITY_PRESETS_T e_me_quality_preset) 5670 { 5671 if(15 != u1_blk_8x8_mask) 5672 { 5673 return ENABLE_NxN; 5674 } 5675 else 5676 { 5677 U08 u1_call_inp_segmentation_based_part_mask_populator = 5678 (ME_XTREME_SPEED_25 != e_me_quality_preset) || 5679 (!u1_is_bPic && !DISABLE_8X8CUS_IN_PPICS_IN_P6) || 5680 (u1_is_bPic && u1_is_refPic && !DISABLE_8X8CUS_IN_REFBPICS_IN_P6) || 5681 (u1_is_bPic && !u1_is_refPic && !DISABLE_8X8CUS_IN_NREFBPICS_IN_P6); 5682 5683 if(u1_call_inp_segmentation_based_part_mask_populator) 5684 { 5685 S32 i4_part_mask = 5686 hme_study_input_segmentation(pu1_inp, i4_inp_stride, u1_limit_active_partitions); 5687 5688 if(e_me_quality_preset == ME_XTREME_SPEED) 5689 { 5690 i4_part_mask &= ~ENABLE_AMP; 5691 } 5692 5693 if(e_me_quality_preset == ME_XTREME_SPEED_25) 5694 { 5695 i4_part_mask &= ~ENABLE_AMP; 5696 5697 i4_part_mask &= ~ENABLE_SMP; 5698 } 5699 5700 return i4_part_mask; 5701 } 5702 else 5703 { 5704 return ENABLE_2Nx2N; 5705 } 5706 } 5707 } 5708