1 /****************************************************************************** 2 * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 21 /** 22 ****************************************************************************** 23 * @file hme_coarse.c 24 * 25 * @brief 26 * Contains ME algorithm for the coarse layer. 27 * 28 * @author 29 * Ittiam 30 * 31 * 32 * List of Functions 33 * hme_update_mv_bank_coarse() 34 * hme_coarse() 35 ****************************************************************************** 36 */ 37 38 /*****************************************************************************/ 39 /* File Includes */ 40 /*****************************************************************************/ 41 /* System include files */ 42 #include <stdio.h> 43 #include <string.h> 44 #include <stdlib.h> 45 #include <assert.h> 46 #include <stdarg.h> 47 #include <math.h> 48 #include <limits.h> 49 50 /* User include files */ 51 #include "ihevc_typedefs.h" 52 #include "itt_video_api.h" 53 #include "ihevce_api.h" 54 55 #include "rc_cntrl_param.h" 56 #include "rc_frame_info_collector.h" 57 #include "rc_look_ahead_params.h" 58 59 #include "ihevc_defs.h" 60 #include "ihevc_structs.h" 61 #include "ihevc_platform_macros.h" 62 #include "ihevc_deblk.h" 63 #include "ihevc_itrans_recon.h" 64 #include "ihevc_chroma_itrans_recon.h" 65 #include "ihevc_chroma_intra_pred.h" 66 #include "ihevc_intra_pred.h" 67 #include "ihevc_inter_pred.h" 68 #include "ihevc_mem_fns.h" 69 #include "ihevc_padding.h" 70 #include "ihevc_weighted_pred.h" 71 #include "ihevc_sao.h" 72 #include "ihevc_resi_trans.h" 73 #include "ihevc_quant_iquant_ssd.h" 74 #include "ihevc_cabac_tables.h" 75 76 #include "ihevce_defs.h" 77 #include "ihevce_lap_enc_structs.h" 78 #include "ihevce_multi_thrd_structs.h" 79 #include "ihevce_multi_thrd_funcs.h" 80 #include "ihevce_me_common_defs.h" 81 #include "ihevce_had_satd.h" 82 #include "ihevce_error_codes.h" 83 #include "ihevce_bitstream.h" 84 #include "ihevce_cabac.h" 85 #include "ihevce_rdoq_macros.h" 86 #include "ihevce_function_selector.h" 87 #include "ihevce_enc_structs.h" 88 #include "ihevce_entropy_structs.h" 89 #include "ihevce_cmn_utils_instr_set_router.h" 90 #include "ihevce_enc_loop_structs.h" 91 #include "ihevce_bs_compute_ctb.h" 92 #include "ihevce_global_tables.h" 93 #include "ihevce_dep_mngr_interface.h" 94 #include "hme_datatype.h" 95 #include "hme_interface.h" 96 #include "hme_common_defs.h" 97 #include "hme_defs.h" 98 #include "ihevce_me_instr_set_router.h" 99 #include "hme_globals.h" 100 #include "hme_utils.h" 101 #include "hme_coarse.h" 102 #include "hme_refine.h" 103 #include "hme_err_compute.h" 104 #include "hme_common_utils.h" 105 #include "hme_search_algo.h" 106 107 /******************************************************************************* 108 * MACROS 109 *******************************************************************************/ 110 #define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift) \ 111 { \ 112 ps_mv->i2_mv_x = ps_search_node->s_mv.i2_mvx >> (shift); \ 113 ps_mv->i2_mv_y = ps_search_node->s_mv.i2_mvy >> (shift); \ 114 *pi1_ref_idx = ps_search_node->i1_ref_idx; \ 115 } 116 117 /*****************************************************************************/ 118 /* Function Definitions */ 119 /*****************************************************************************/ 120 121 /** 122 ******************************************************************************** 123 * @fn void hme_update_mv_bank_coarse(search_results_t *ps_search_results, 124 * layer_mv_t *ps_layer_mv, 125 * S32 i4_blk_x, 126 * S32 i4_blk_y, 127 * search_node_t *ps_search_node_4x8_l, 128 * search_node_t *ps_search_node_8x4_t, 129 * S08 i1_ref_idx, 130 * mvbank_update_prms_t *ps_prms 131 * 132 * @brief Updates the coarse layer MV Bank for a given ref id and blk pos 133 * 134 * @param[in] ps_search_results: Search results data structure 135 * 136 * @param[in, out] ps_layer_mv : MV Bank for this layer 137 * 138 * @param[in] i4_search_blk_x: column number of the 4x4 blk searched 139 * 140 * @param[in] i4_search_blk_y: row number of the 4x4 blk searched 141 * 142 * @param[in] ps_search_node_4x8_t: Best MV of the 4x8T blk 143 * 144 * @param[in] ps_search_node_8x4_l: Best MV of the 8x4L blk 145 * 146 * @param[in] i1_ref_idx : Reference ID that has been searched 147 * 148 * @param[in] ps_prms : Parameters pertaining to the MV Bank update 149 * 150 * @return None 151 ******************************************************************************** 152 */ 153 void hme_update_mv_bank_coarse( 154 search_results_t *ps_search_results, 155 layer_mv_t *ps_layer_mv, 156 S32 i4_search_blk_x, 157 S32 i4_search_blk_y, 158 search_node_t *ps_search_node_4x8_t, 159 search_node_t *ps_search_node_8x4_l, 160 S08 i1_ref_idx, 161 mvbank_update_prms_t *ps_prms) 162 { 163 /* These point to the MV and ref idx posn to be udpated */ 164 hme_mv_t *ps_mv; 165 S08 *pi1_ref_idx; 166 167 /* Offset within the bank */ 168 S32 i4_offset; 169 170 S32 i, j, i4_blk_x, i4_blk_y; 171 172 /* Best results for 8x4R and 4x8B blocks */ 173 search_node_t *ps_search_node_8x4_r, *ps_search_node_4x8_b; 174 175 /* Number of MVs in a block */ 176 S32 num_mvs = ps_layer_mv->i4_num_mvs_per_ref; 177 178 search_node_t *aps_search_nodes[4]; 179 180 /* The search blk may be different in size from the blk used to hold MV */ 181 i4_blk_x = i4_search_blk_x << ps_prms->i4_shift; 182 i4_blk_y = i4_search_blk_y << ps_prms->i4_shift; 183 184 /* Compute the offset in the MV bank */ 185 i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row; 186 i4_offset *= ps_layer_mv->i4_num_mvs_per_blk; 187 188 /* Identify the correct offset in the mvbank and the reference id buf */ 189 ps_mv = ps_layer_mv->ps_mv + (i4_offset + (num_mvs * i1_ref_idx)); 190 pi1_ref_idx = ps_layer_mv->pi1_ref_idx + (i4_offset + (num_mvs * i1_ref_idx)); 191 192 /*************************************************************************/ 193 /* We have atleast 4 distinct results: the 4x8 top (coming from top blk) */ 194 /* 8x4 left (coming from left blk), 8x4 and 4x8 right and bot resp. */ 195 /* If number of results to be stored is 4, then we store all these 4 */ 196 /* results, else we pick best ones */ 197 /*************************************************************************/ 198 ps_search_node_8x4_r = ps_search_results->aps_part_results[i1_ref_idx][PART_ID_2NxN_B]; 199 ps_search_node_4x8_b = ps_search_results->aps_part_results[i1_ref_idx][PART_ID_Nx2N_R]; 200 201 ASSERT(num_mvs <= 4); 202 203 /* Doing this to sort best results */ 204 aps_search_nodes[0] = ps_search_node_8x4_r; 205 aps_search_nodes[1] = ps_search_node_4x8_b; 206 aps_search_nodes[2] = ps_search_node_8x4_l; 207 aps_search_nodes[3] = ps_search_node_4x8_t; 208 if(num_mvs == 4) 209 { 210 COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[0], 0); 211 ps_mv++; 212 pi1_ref_idx++; 213 COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[1], 0); 214 ps_mv++; 215 pi1_ref_idx++; 216 COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[2], 0); 217 ps_mv++; 218 pi1_ref_idx++; 219 COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[3], 0); 220 ps_mv++; 221 pi1_ref_idx++; 222 return; 223 } 224 225 /* Run through the results, store them in best to worst order */ 226 for(i = 0; i < num_mvs; i++) 227 { 228 for(j = i + 1; j < 4; j++) 229 { 230 if(aps_search_nodes[j]->i4_tot_cost < aps_search_nodes[i]->i4_tot_cost) 231 { 232 SWAP_HME(aps_search_nodes[j], aps_search_nodes[i], search_node_t *); 233 } 234 } 235 COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[i], 0); 236 ps_mv++; 237 pi1_ref_idx++; 238 } 239 } 240 241 /** 242 ******************************************************************************** 243 * @fn void hme_coarse_frm_init(me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms) 244 * 245 * @brief Frame init entry point Coarse ME. 246 * 247 * @param[in,out] ps_ctxt: ME Handle 248 * 249 * @param[in] ps_coarse_prms : Coarse layer config params 250 * 251 * @return None 252 ******************************************************************************** 253 */ 254 void hme_coarse_frm_init(coarse_me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms) 255 { 256 layer_ctxt_t *ps_curr_layer; 257 258 S32 i4_pic_wd, i4_pic_ht; 259 260 S32 num_blks_in_pic, num_blks_in_row; 261 262 BLK_SIZE_T e_search_blk_size = BLK_4x4; 263 264 S32 blk_size_shift = 2, blk_wd = 4, blk_ht = 4; 265 266 /* Number of references to search */ 267 S32 i4_num_ref; 268 269 ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_coarse_prms->i4_layer_id]; 270 i4_num_ref = ps_coarse_prms->i4_num_ref; 271 272 i4_pic_wd = ps_curr_layer->i4_wd; 273 i4_pic_ht = ps_curr_layer->i4_ht; 274 /* Macro updates num_blks_in_pic and num_blks_in_row*/ 275 GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic); 276 277 /************************************************************************/ 278 /* Initialize the mv bank that holds results of this layer. */ 279 /************************************************************************/ 280 hme_init_mv_bank( 281 ps_curr_layer, 282 BLK_4x4, 283 i4_num_ref, 284 ps_coarse_prms->num_results, 285 ps_ctxt->u1_encode[ps_coarse_prms->i4_layer_id]); 286 287 return; 288 } 289 290 /** 291 ******************************************************************************** 292 * @fn void hme_derive_worst_case_search_range(range_prms_t *ps_range, 293 * range_prms_t *ps_pic_limit, 294 * range_prms_t *ps_mv_limit, 295 * S32 i4_x, 296 * S32 i4_y, 297 * S32 blk_wd, 298 * S32 blk_ht) 299 * 300 * @brief given picture limits and blk dimensions and mv search limits, obtains 301 * teh valid search range such that the blk stays within pic boundaries, 302 * where picture boundaries include padded portions of picture 303 * 304 * @param[out] ps_range: updated with actual search range 305 * 306 * @param[in] ps_pic_limit : picture boundaries 307 * 308 * @param[in] ps_mv_limit: Search range limits for the mvs 309 * 310 * @param[in] i4_x : x coordinate of the blk 311 * 312 * @param[in] i4_y : y coordinate of the blk 313 * 314 * @param[in] blk_wd : blk width 315 * 316 * @param[in] blk_ht : blk height 317 * 318 * @return void 319 ******************************************************************************** 320 */ 321 void hme_derive_worst_case_search_range( 322 range_prms_t *ps_range, 323 range_prms_t *ps_pic_limit, 324 range_prms_t *ps_mv_limit, 325 S32 i4_x, 326 S32 i4_y, 327 S32 blk_wd, 328 S32 blk_ht) 329 { 330 /* Taking max x of left block, min x of current block */ 331 ps_range->i2_max_x = 332 MIN((ps_pic_limit->i2_max_x - (S16)blk_wd - (S16)(i4_x - 4)), ps_mv_limit->i2_max_x); 333 ps_range->i2_min_x = MAX((ps_pic_limit->i2_min_x - (S16)i4_x), ps_mv_limit->i2_min_x); 334 /* Taking max y of top block, min y of current block */ 335 ps_range->i2_max_y = 336 MIN((ps_pic_limit->i2_max_y - (S16)blk_ht - (S16)(i4_y - 4)), ps_mv_limit->i2_max_y); 337 ps_range->i2_min_y = MAX((ps_pic_limit->i2_min_y - (S16)i4_y), ps_mv_limit->i2_min_y); 338 } 339 340 /** 341 ******************************************************************************** 342 * @fn void hme_combine_4x4_sads_and_compute_cost(S08 i1_ref_idx, 343 * range_prms_t *ps_mv_range, 344 * range_prms_t *ps_mv_limit, 345 * hme_mv_t *ps_best_mv_4x8, 346 * hme_mv_t *ps_best_mv_8x4, 347 * pred_ctxt_t *ps_pred_ctxt, 348 * PF_MV_COST_FXN pf_mv_cost_compute, 349 * ME_QUALITY_PRESETS_T e_me_quality_preset, 350 * S16 *pi2_sads_4x4_current, 351 * S16 *pi2_sads_4x4_east, 352 * S16 *pi2_sads_4x4_south, 353 * FILE *fp_dump_sad) 354 * 355 * @brief Does a full search on entire srch window with a given step size in coarse layer 356 * 357 * @param[in] i1_ref_idx : Cur ref idx 358 * 359 * @param[in] ps_layer_ctxt: All info about this layer 360 * 361 * @param[out] ps_best_mv : type hme_mv_t contains best mv x and y 362 * 363 * @param[in] ps_pred_ctxt : Prediction ctxt for cost computation 364 * 365 * @param[in] pf_mv_cost_compute : mv cost computation function 366 * 367 * @return void 368 ******************************************************************************** 369 */ 370 void hme_combine_4x4_sads_and_compute_cost_high_quality( 371 S08 i1_ref_idx, 372 range_prms_t *ps_mv_range, 373 range_prms_t *ps_mv_limit, 374 hme_mv_t *ps_best_mv_4x8, 375 hme_mv_t *ps_best_mv_8x4, 376 pred_ctxt_t *ps_pred_ctxt, 377 PF_MV_COST_FXN pf_mv_cost_compute, 378 S16 *pi2_sads_4x4_current, 379 S16 *pi2_sads_4x4_east, 380 S16 *pi2_sads_4x4_south) 381 { 382 /* These control number of parts and number of pts in grid to search */ 383 S32 stepy, stepx, best_mv_y_4x8, best_mv_x_4x8, best_mv_y_8x4, best_mv_x_8x4; 384 S32 step_shift_x, step_shift_y; 385 S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range; 386 387 S32 min_cost_4x8 = MAX_32BIT_VAL; 388 S32 min_cost_8x4 = MAX_32BIT_VAL; 389 390 search_node_t s_search_node; 391 s_search_node.i1_ref_idx = i1_ref_idx; 392 393 stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_QUALITY; 394 /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_QUALITY */ 395 step_shift_x = step_shift_y = 1; 396 397 mv_x_offset = (-ps_mv_limit->i2_min_x >> step_shift_x); 398 mv_y_offset = (-ps_mv_limit->i2_min_y >> step_shift_y); 399 mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x; 400 mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y; 401 402 /* Run 2loops to sweep over the reference area */ 403 for(mvy = ps_mv_range->i2_min_y; mvy < ps_mv_range->i2_max_y; mvy += stepy) 404 { 405 for(mvx = ps_mv_range->i2_min_x; mvx < ps_mv_range->i2_max_x; mvx += stepx) 406 { 407 S32 sad_4x8, cost_4x8, sad_8x4, cost_8x4; 408 S32 sad_pos = ((mvx >> step_shift_x) + mv_x_offset) + 409 ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range; 410 411 /* Get SAD by adding SAD for current and neighbour S */ 412 sad_4x8 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_south[sad_pos]; 413 sad_8x4 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_east[sad_pos]; 414 415 // fprintf(fp_dump_sad,"%d\t",sad); 416 s_search_node.s_mv.i2_mvx = mvx; 417 s_search_node.s_mv.i2_mvy = mvy; 418 419 cost_4x8 = cost_8x4 = 420 pf_mv_cost_compute(&s_search_node, ps_pred_ctxt, PART_ID_2Nx2N, MV_RES_FPEL); 421 422 cost_4x8 += sad_4x8; 423 cost_8x4 += sad_8x4; 424 425 if(cost_4x8 < min_cost_4x8) 426 { 427 best_mv_x_4x8 = mvx; 428 best_mv_y_4x8 = mvy; 429 min_cost_4x8 = cost_4x8; 430 } 431 if(cost_8x4 < min_cost_8x4) 432 { 433 best_mv_x_8x4 = mvx; 434 best_mv_y_8x4 = mvy; 435 min_cost_8x4 = cost_8x4; 436 } 437 } 438 } 439 440 ps_best_mv_4x8->i2_mv_x = best_mv_x_4x8; 441 ps_best_mv_4x8->i2_mv_y = best_mv_y_4x8; 442 443 ps_best_mv_8x4->i2_mv_x = best_mv_x_8x4; 444 ps_best_mv_8x4->i2_mv_y = best_mv_y_8x4; 445 } 446 447 void hme_combine_4x4_sads_and_compute_cost_high_speed( 448 S08 i1_ref_idx, 449 range_prms_t *ps_mv_range, 450 range_prms_t *ps_mv_limit, 451 hme_mv_t *ps_best_mv_4x8, 452 hme_mv_t *ps_best_mv_8x4, 453 pred_ctxt_t *ps_pred_ctxt, 454 PF_MV_COST_FXN pf_mv_cost_compute, 455 S16 *pi2_sads_4x4_current, 456 S16 *pi2_sads_4x4_east, 457 S16 *pi2_sads_4x4_south) 458 { 459 /* These control number of parts and number of pts in grid to search */ 460 S32 stepy, stepx, best_mv_y_4x8, best_mv_x_4x8, best_mv_y_8x4, best_mv_x_8x4; 461 S32 step_shift_x, step_shift_y; 462 S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range; 463 464 S32 rnd, lambda, lambda_q_shift; 465 466 S32 min_cost_4x8 = MAX_32BIT_VAL; 467 S32 min_cost_8x4 = MAX_32BIT_VAL; 468 469 (void)pf_mv_cost_compute; 470 stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_SPEED; 471 /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_SPEED */ 472 step_shift_x = step_shift_y = 2; 473 474 mv_x_offset = (-ps_mv_limit->i2_min_x >> step_shift_x); 475 mv_y_offset = (-ps_mv_limit->i2_min_y >> step_shift_y); 476 mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x; 477 mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y; 478 479 lambda = ps_pred_ctxt->lambda; 480 lambda_q_shift = ps_pred_ctxt->lambda_q_shift; 481 rnd = 1 << (lambda_q_shift - 1); 482 483 ASSERT(MAX_MVX_SUPPORTED_IN_COARSE_LAYER >= ABS(ps_mv_range->i2_max_x)); 484 ASSERT(MAX_MVY_SUPPORTED_IN_COARSE_LAYER >= ABS(ps_mv_range->i2_max_y)); 485 486 /* Run 2loops to sweep over the reference area */ 487 for(mvy = ps_mv_range->i2_min_y; mvy < ps_mv_range->i2_max_y; mvy += stepy) 488 { 489 for(mvx = ps_mv_range->i2_min_x; mvx < ps_mv_range->i2_max_x; mvx += stepx) 490 { 491 S32 sad_4x8, cost_4x8, sad_8x4, cost_8x4; 492 493 S32 sad_pos = ((mvx >> step_shift_x) + mv_x_offset) + 494 ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range; 495 496 /* Get SAD by adding SAD for current and neighbour S */ 497 sad_4x8 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_south[sad_pos]; 498 sad_8x4 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_east[sad_pos]; 499 500 // fprintf(fp_dump_sad,"%d\t",sad); 501 502 cost_4x8 = cost_8x4 = 503 (2 * hme_get_range(ABS(mvx)) - 1) + (2 * hme_get_range(ABS(mvy)) - 1) + i1_ref_idx; 504 505 cost_4x8 += (mvx != 0) ? 1 : 0; 506 cost_4x8 += (mvy != 0) ? 1 : 0; 507 cost_4x8 = (cost_4x8 * lambda + rnd) >> lambda_q_shift; 508 509 cost_8x4 += (mvx != 0) ? 1 : 0; 510 cost_8x4 += (mvy != 0) ? 1 : 0; 511 cost_8x4 = (cost_8x4 * lambda + rnd) >> lambda_q_shift; 512 513 cost_4x8 += sad_4x8; 514 cost_8x4 += sad_8x4; 515 516 if(cost_4x8 < min_cost_4x8) 517 { 518 best_mv_x_4x8 = mvx; 519 best_mv_y_4x8 = mvy; 520 min_cost_4x8 = cost_4x8; 521 } 522 if(cost_8x4 < min_cost_8x4) 523 { 524 best_mv_x_8x4 = mvx; 525 best_mv_y_8x4 = mvy; 526 min_cost_8x4 = cost_8x4; 527 } 528 } 529 } 530 531 ps_best_mv_4x8->i2_mv_x = best_mv_x_4x8; 532 ps_best_mv_4x8->i2_mv_y = best_mv_y_4x8; 533 534 ps_best_mv_8x4->i2_mv_x = best_mv_x_8x4; 535 ps_best_mv_8x4->i2_mv_y = best_mv_y_8x4; 536 } 537 538 /** 539 ******************************************************************************** 540 * @fn hme_store_4x4_sads(hme_search_prms_t *ps_search_prms, 541 * layer_ctxt_t *ps_layer_ctxt) 542 * 543 * @brief Does a 4x4 sad computation on a given range and stores it in memory 544 * 545 * @param[in] ps_search_prms : Search prms structure containing info like 546 * blk dimensions, search range etc 547 * 548 * @param[in] ps_layer_ctxt: All info about this layer 549 * 550 * @param[in] ps_wt_inp_prms: All info about weighted input 551 * 552 * @param[in] e_me_quality_preset: motion estimation quality preset 553 * 554 * @param[in] pi2_sads_4x4: Memory to store all 4x4 SADs for given range 555 * 556 * @return void 557 ******************************************************************************** 558 */ 559 560 void hme_store_4x4_sads_high_quality( 561 hme_search_prms_t *ps_search_prms, 562 layer_ctxt_t *ps_layer_ctxt, 563 range_prms_t *ps_mv_limit, 564 wgt_pred_ctxt_t *ps_wt_inp_prms, 565 S16 *pi2_sads_4x4) 566 { 567 S32 sad, i, j; 568 569 /* Input and reference attributes */ 570 U08 *pu1_inp, *pu1_inp_orig, *pu1_ref; 571 S32 i4_inp_stride, i4_ref_stride, i4_ref_offset; 572 573 /* The reference is actually an array of ptrs since there are several */ 574 /* reference id. So an array gets passed form calling function */ 575 U08 **ppu1_ref, *pu1_ref_coloc; 576 577 S32 stepy, stepx, step_shift_x, step_shift_y; 578 S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range; 579 580 /* Points to the range limits for mv */ 581 range_prms_t *ps_range_prms; 582 583 /* Reference index to be searched */ 584 S32 i4_search_idx = ps_search_prms->i1_ref_idx; 585 /* Using the member 0 to store for all ref. idx. */ 586 ps_range_prms = ps_search_prms->aps_mv_range[0]; 587 pu1_inp_orig = ps_wt_inp_prms->apu1_wt_inp[i4_search_idx]; 588 i4_inp_stride = ps_search_prms->i4_inp_stride; 589 590 /* Move to the location of the search blk in inp buffer */ 591 pu1_inp_orig += ps_search_prms->i4_cu_x_off; 592 pu1_inp_orig += ps_search_prms->i4_cu_y_off * i4_inp_stride; 593 594 /*************************************************************************/ 595 /* we use either input of previously encoded pictures as reference */ 596 /* in coarse layer */ 597 /*************************************************************************/ 598 i4_ref_stride = ps_layer_ctxt->i4_inp_stride; 599 ppu1_ref = ps_layer_ctxt->ppu1_list_inp; 600 601 /* colocated position in reference picture */ 602 i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off; 603 pu1_ref_coloc = ppu1_ref[i4_search_idx] + i4_ref_offset; 604 605 stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_QUALITY; 606 /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_QUALITY */ 607 step_shift_x = step_shift_y = 1; 608 609 mv_x_offset = -(ps_mv_limit->i2_min_x >> step_shift_x); 610 mv_y_offset = -(ps_mv_limit->i2_min_y >> step_shift_y); 611 mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x; 612 mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y; 613 614 /* Run 2loops to sweep over the reference area */ 615 for(mvy = ps_range_prms->i2_min_y; mvy < ps_range_prms->i2_max_y; mvy += stepy) 616 { 617 for(mvx = ps_range_prms->i2_min_x; mvx < ps_range_prms->i2_max_x; mvx += stepx) 618 { 619 /* Set up the reference and inp ptr */ 620 pu1_ref = pu1_ref_coloc + mvx + (mvy * i4_ref_stride); 621 pu1_inp = pu1_inp_orig; 622 /* SAD computation */ 623 { 624 sad = 0; 625 for(i = 0; i < 4; i++) 626 { 627 for(j = 0; j < 4; j++) 628 { 629 sad += (ABS(((S32)pu1_inp[j] - (S32)pu1_ref[j]))); 630 } 631 pu1_inp += i4_inp_stride; 632 pu1_ref += i4_ref_stride; 633 } 634 } 635 636 pi2_sads_4x4 637 [((mvx >> step_shift_x) + mv_x_offset) + 638 ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range] = sad; 639 } 640 } 641 } 642 643 void hme_store_4x4_sads_high_speed( 644 hme_search_prms_t *ps_search_prms, 645 layer_ctxt_t *ps_layer_ctxt, 646 range_prms_t *ps_mv_limit, 647 wgt_pred_ctxt_t *ps_wt_inp_prms, 648 S16 *pi2_sads_4x4) 649 { 650 S32 sad, i, j; 651 652 /* Input and reference attributes */ 653 U08 *pu1_inp, *pu1_inp_orig, *pu1_ref; 654 S32 i4_inp_stride, i4_ref_stride, i4_ref_offset; 655 656 /* The reference is actually an array of ptrs since there are several */ 657 /* reference id. So an array gets passed form calling function */ 658 U08 **ppu1_ref, *pu1_ref_coloc; 659 660 S32 stepy, stepx, step_shift_x, step_shift_y; 661 S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range; 662 663 /* Points to the range limits for mv */ 664 range_prms_t *ps_range_prms; 665 666 /* Reference index to be searched */ 667 S32 i4_search_idx = ps_search_prms->i1_ref_idx; 668 669 /* Using the member 0 for all ref. idx */ 670 ps_range_prms = ps_search_prms->aps_mv_range[0]; 671 pu1_inp_orig = ps_wt_inp_prms->apu1_wt_inp[i4_search_idx]; 672 i4_inp_stride = ps_search_prms->i4_inp_stride; 673 674 /* Move to the location of the search blk in inp buffer */ 675 pu1_inp_orig += ps_search_prms->i4_cu_x_off; 676 pu1_inp_orig += ps_search_prms->i4_cu_y_off * i4_inp_stride; 677 678 /*************************************************************************/ 679 /* we use either input of previously encoded pictures as reference */ 680 /* in coarse layer */ 681 /*************************************************************************/ 682 i4_ref_stride = ps_layer_ctxt->i4_inp_stride; 683 ppu1_ref = ps_layer_ctxt->ppu1_list_inp; 684 685 /* colocated position in reference picture */ 686 i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off; 687 pu1_ref_coloc = ppu1_ref[i4_search_idx] + i4_ref_offset; 688 689 stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_SPEED; 690 /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_SPEED */ 691 step_shift_x = step_shift_y = 2; 692 693 mv_x_offset = -(ps_mv_limit->i2_min_x >> step_shift_x); 694 mv_y_offset = -(ps_mv_limit->i2_min_y >> step_shift_y); 695 mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x; 696 mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y; 697 698 /* Run 2loops to sweep over the reference area */ 699 for(mvy = ps_range_prms->i2_min_y; mvy < ps_range_prms->i2_max_y; mvy += stepy) 700 { 701 for(mvx = ps_range_prms->i2_min_x; mvx < ps_range_prms->i2_max_x; mvx += stepx) 702 { 703 /* Set up the reference and inp ptr */ 704 pu1_ref = pu1_ref_coloc + mvx + (mvy * i4_ref_stride); 705 pu1_inp = pu1_inp_orig; 706 /* SAD computation */ 707 { 708 sad = 0; 709 for(i = 0; i < 4; i++) 710 { 711 for(j = 0; j < 4; j++) 712 { 713 sad += (ABS(((S32)pu1_inp[j] - (S32)pu1_ref[j]))); 714 } 715 pu1_inp += i4_inp_stride; 716 pu1_ref += i4_ref_stride; 717 } 718 } 719 720 pi2_sads_4x4 721 [((mvx >> step_shift_x) + mv_x_offset) + 722 ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range] = sad; 723 } 724 } 725 } 726 /** 727 ******************************************************************************** 728 * @fn void hme_coarsest(me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms) 729 * 730 * @brief Top level entry point for Coarse ME. Runs across blks and searches 731 * at a 4x4 blk granularity by using 4x8 and 8x4 patterns. 732 * 733 * @param[in,out] ps_ctxt: ME Handle 734 * 735 * @param[in] ps_coarse_prms : Coarse layer config params 736 * 737 * @param[in] ps_multi_thrd_ctxt : Multi thread context 738 * 739 * @return None 740 ******************************************************************************** 741 */ 742 void hme_coarsest( 743 coarse_me_ctxt_t *ps_ctxt, 744 coarse_prms_t *ps_coarse_prms, 745 multi_thrd_ctxt_t *ps_multi_thrd_ctxt, 746 WORD32 i4_ping_pong, 747 void **ppv_dep_mngr_hme_sync) 748 { 749 S16 *pi2_cur_ref_sads_4x4; 750 S32 ai4_sad_4x4_block_size[MAX_NUM_REF], ai4_sad_4x4_block_stride[MAX_NUM_REF]; 751 S32 num_rows_coarse; 752 S32 sad_top_offset, sad_current_offset; 753 S32 search_node_top_offset, search_node_left_offset; 754 755 ME_QUALITY_PRESETS_T e_me_quality_preset = 756 ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets; 757 758 search_results_t *ps_search_results; 759 mvbank_update_prms_t s_mv_update_prms; 760 BLK_SIZE_T e_search_blk_size = BLK_4x4; 761 hme_search_prms_t s_search_prms_4x8, s_search_prms_8x4, s_search_prms_4x4; 762 763 S32 global_id_8x4, global_id_4x8; 764 765 /*************************************************************************/ 766 /* These directly point to the best search result nodes that will be */ 767 /* updated by the search algorithm, rather than have to go through an */ 768 /* elaborate structure */ 769 /*************************************************************************/ 770 search_node_t *aps_best_search_node_8x4[MAX_NUM_REF]; 771 search_node_t *aps_best_search_node_4x8[MAX_NUM_REF]; 772 773 /* These point to various spatial candts */ 774 search_node_t *ps_candt_8x4_l, *ps_candt_8x4_t, *ps_candt_8x4_tl; 775 search_node_t *ps_candt_4x8_l, *ps_candt_4x8_t, *ps_candt_4x8_tl; 776 search_node_t *ps_candt_zeromv_8x4, *ps_candt_zeromv_4x8; 777 search_node_t *ps_candt_fs_8x4, *ps_candt_fs_4x8; 778 search_node_t as_top_neighbours[4], as_left_neighbours[3]; 779 780 /* Holds the global mv for a given ref index */ 781 search_node_t s_candt_global[MAX_NUM_REF]; 782 783 /* All the search candidates */ 784 search_candt_t as_search_candts_8x4[MAX_INIT_CANDTS]; 785 search_candt_t as_search_candts_4x8[MAX_INIT_CANDTS]; 786 search_candt_t *ps_search_candts_8x4, *ps_search_candts_4x8; 787 788 /* Actual range per blk and the pic level boundaries */ 789 range_prms_t s_range_prms, s_pic_limit, as_mv_limit[MAX_NUM_REF]; 790 791 /* Current and prev pic layer ctxt at the coarsest layer */ 792 layer_ctxt_t *ps_curr_layer, *ps_prev_layer; 793 794 /* best mv of full search */ 795 hme_mv_t best_mv_4x8, best_mv_8x4; 796 797 /* Book keeping at blk level */ 798 S32 blk_x, num_blks_in_pic, num_blks_in_row, num_4x4_blks_in_row; 799 800 S32 blk_y; 801 802 /* Block dimensions */ 803 S32 blk_size_shift = 2, blk_wd = 4, blk_ht = 4; 804 805 S32 lambda = ps_coarse_prms->lambda; 806 807 /* Number of references to search */ 808 S32 i4_num_ref; 809 810 S32 i4_i, id, i; 811 S08 i1_ref_idx; 812 813 S32 i4_pic_wd, i4_pic_ht; 814 S32 i4_layer_id; 815 816 S32 end_of_frame; 817 818 pf_get_wt_inp fp_get_wt_inp; 819 820 /* Maximum search iterations around any candidate */ 821 S32 i4_max_iters = ps_coarse_prms->i4_max_iters; 822 823 ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_coarse_prms->i4_layer_id]; 824 ps_prev_layer = hme_coarse_get_past_layer_ctxt(ps_ctxt, ps_coarse_prms->i4_layer_id); 825 826 /* We need only one instance of search results structure */ 827 ps_search_results = &ps_ctxt->s_search_results_8x8; 828 829 ps_search_candts_8x4 = &as_search_candts_8x4[0]; 830 ps_search_candts_4x8 = &as_search_candts_4x8[0]; 831 832 end_of_frame = 0; 833 834 i4_pic_wd = ps_curr_layer->i4_wd; 835 i4_pic_ht = ps_curr_layer->i4_ht; 836 837 fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list) 838 ->pf_get_wt_inp_8x8; 839 840 num_rows_coarse = ps_ctxt->i4_num_row_bufs; 841 842 /*************************************************************************/ 843 /* Coarse Layer always does explicit search. Number of reference frames */ 844 /* to search is a configurable parameter supplied by the application */ 845 /*************************************************************************/ 846 i4_num_ref = ps_coarse_prms->i4_num_ref; 847 i4_layer_id = ps_coarse_prms->i4_layer_id; 848 849 /*************************************************************************/ 850 /* The search algorithm goes as follows: */ 851 /* */ 852 /* ___ */ 853 /* | e | */ 854 /* ___|___|___ */ 855 /* | c | a | b | */ 856 /* |___|___|___| */ 857 /* | d | */ 858 /* |___| */ 859 /* */ 860 /* For the target block a, we collect best results from 2 8x4 blks */ 861 /* These are c-a and a-b. The 4x8 blks are e-a and a-d */ 862 /* c-a result is already available from results of blk c. a-b is */ 863 /* evaluated in this blk. Likewise e-a result is stored in a row buffer */ 864 /* a-d is evaluated this blk */ 865 /* So we store a row buffer which stores best 4x8 results of all top blk */ 866 /*************************************************************************/ 867 868 /************************************************************************/ 869 /* Initialize the pointers to the best node. */ 870 /************************************************************************/ 871 for(i4_i = 0; i4_i < i4_num_ref; i4_i++) 872 { 873 aps_best_search_node_8x4[i4_i] = ps_search_results->aps_part_results[i4_i][PART_ID_2NxN_B]; 874 aps_best_search_node_4x8[i4_i] = ps_search_results->aps_part_results[i4_i][PART_ID_Nx2N_R]; 875 } 876 877 /************************************************************************/ 878 /* Initialize the "searchresults" structure. This will set up the number*/ 879 /* of search types, result updates etc */ 880 /************************************************************************/ 881 { 882 S32 num_results_per_part; 883 /* We evaluate 4 types of results per 4x4 blk. 8x4L and 8x4R and */ 884 /* 4x8 T and 4x8B. So if we are to give 4 results, then we need to */ 885 /* only evaluate 1 result per part. In the coarse layer, we are */ 886 /* limited to 2 results max per part, and max of 8 results. */ 887 num_results_per_part = (ps_coarse_prms->num_results + 3) >> 2; 888 hme_init_search_results( 889 ps_search_results, 890 i4_num_ref, 891 ps_coarse_prms->num_results, 892 num_results_per_part, 893 BLK_8x8, 894 0, 895 0, 896 ps_ctxt->au1_is_past); 897 } 898 899 /* Macro updates num_blks_in_pic and num_blks_in_row*/ 900 GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic); 901 902 num_4x4_blks_in_row = num_blks_in_row + 1; 903 904 s_mv_update_prms.e_search_blk_size = e_search_blk_size; 905 s_mv_update_prms.i4_num_ref = i4_num_ref; 906 s_mv_update_prms.i4_shift = 0; 907 908 /* For full search, support 2 or 4 step size */ 909 if(ps_coarse_prms->do_full_search) 910 { 911 ASSERT((ps_coarse_prms->full_search_step == 2) || (ps_coarse_prms->full_search_step == 4)); 912 } 913 914 for(i4_i = 0; i4_i < i4_num_ref; i4_i++) 915 { 916 S32 blk, delta_poc; 917 S32 mv_x_clip, mv_y_clip; 918 /* Initialize only the first row */ 919 for(blk = 0; blk < num_blks_in_row; blk++) 920 { 921 INIT_SEARCH_NODE(&ps_ctxt->aps_best_search_nodes_4x8_n_rows[i4_i][blk], i4_i); 922 } 923 924 delta_poc = ABS(ps_curr_layer->i4_poc - ps_curr_layer->ai4_ref_id_to_poc_lc[i4_i]); 925 926 /* Setting search range for different references based on the delta poc */ 927 /*************************************************************************/ 928 /* set the MV limit per ref. pic. */ 929 /* - P pic. : Based on the config params. */ 930 /* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */ 931 /*************************************************************************/ 932 { 933 /* TO DO : Remove hard coding of P-P dist. of 4 */ 934 mv_x_clip = (ps_curr_layer->i2_max_mv_x * delta_poc) / 4; 935 936 /* Only for B/b pic. */ 937 if(1 == ps_ctxt->s_frm_prms.bidir_enabled) 938 { 939 WORD16 i2_mv_y_per_poc; 940 941 /* Get abs MAX for symmetric search */ 942 i2_mv_y_per_poc = 943 MAX(ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id], 944 (ABS(ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id]))); 945 946 mv_y_clip = i2_mv_y_per_poc * delta_poc; 947 } 948 /* Set the Config. File Params for P pic. */ 949 else 950 { 951 /* TO DO : Remove hard coding of P-P dist. of 4 */ 952 mv_y_clip = (ps_curr_layer->i2_max_mv_y * delta_poc) / 4; 953 } 954 955 /* Making mv_x and mv_y range multiple of 4 */ 956 mv_x_clip = (((mv_x_clip + 3) >> 2) << 2); 957 mv_y_clip = (((mv_y_clip + 3) >> 2) << 2); 958 /* Clipping the range of mv_x and mv_y */ 959 mv_x_clip = CLIP3(mv_x_clip, 4, MAX_MVX_SUPPORTED_IN_COARSE_LAYER); 960 mv_y_clip = CLIP3(mv_y_clip, 4, MAX_MVY_SUPPORTED_IN_COARSE_LAYER); 961 962 as_mv_limit[i4_i].i2_min_x = -mv_x_clip; 963 as_mv_limit[i4_i].i2_min_y = -mv_y_clip; 964 as_mv_limit[i4_i].i2_max_x = mv_x_clip; 965 as_mv_limit[i4_i].i2_max_y = mv_y_clip; 966 } 967 /*Populating SAD block size based on search range */ 968 ai4_sad_4x4_block_size[i4_i] = ((2 * mv_x_clip) / ps_coarse_prms->full_search_step) * 969 ((2 * mv_y_clip) / ps_coarse_prms->full_search_step); 970 ai4_sad_4x4_block_stride[i4_i] = (num_blks_in_row + 1) * ai4_sad_4x4_block_size[i4_i]; 971 } 972 973 for(i = 0; i < 2 * MAX_INIT_CANDTS; i++) 974 { 975 search_node_t *ps_search_node; 976 ps_search_node = &ps_ctxt->s_init_search_node[i]; 977 INIT_SEARCH_NODE(ps_search_node, 0); 978 } 979 for(i = 0; i < 3; i++) 980 { 981 search_node_t *ps_search_node; 982 ps_search_node = &as_left_neighbours[i]; 983 INIT_SEARCH_NODE(ps_search_node, 0); 984 ps_search_node = &as_top_neighbours[i]; 985 INIT_SEARCH_NODE(ps_search_node, 0); 986 } 987 INIT_SEARCH_NODE(&as_top_neighbours[3], 0); 988 /* Set up place holders to hold the search nodes of each initial candt */ 989 for(i = 0; i < MAX_INIT_CANDTS; i++) 990 { 991 ps_search_candts_8x4[i].ps_search_node = &ps_ctxt->s_init_search_node[i]; 992 993 ps_search_candts_4x8[i].ps_search_node = &ps_ctxt->s_init_search_node[MAX_INIT_CANDTS + i]; 994 995 ps_search_candts_8x4[i].u1_num_steps_refine = (U08)i4_max_iters; 996 ps_search_candts_4x8[i].u1_num_steps_refine = (U08)i4_max_iters; 997 } 998 999 /* For Top,TopLeft and Left cand., no need for refinement */ 1000 id = 0; 1001 if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 == e_me_quality_preset)) 1002 { 1003 /* This search candt has the full search result */ 1004 ps_candt_fs_8x4 = ps_search_candts_8x4[id].ps_search_node; 1005 id++; 1006 } 1007 1008 ps_candt_8x4_l = ps_search_candts_8x4[id].ps_search_node; 1009 ps_search_candts_8x4[id].u1_num_steps_refine = 0; 1010 id++; 1011 ps_candt_8x4_t = ps_search_candts_8x4[id].ps_search_node; 1012 ps_search_candts_8x4[id].u1_num_steps_refine = 0; 1013 id++; 1014 ps_candt_8x4_tl = ps_search_candts_8x4[id].ps_search_node; 1015 ps_search_candts_8x4[id].u1_num_steps_refine = 0; 1016 id++; 1017 /* This search candt stores the global candt */ 1018 global_id_8x4 = id; 1019 id++; 1020 1021 if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 != e_me_quality_preset)) 1022 { 1023 /* This search candt has the full search result */ 1024 ps_candt_fs_8x4 = ps_search_candts_8x4[id].ps_search_node; 1025 id++; 1026 } 1027 /* Don't increment id as (0,0) is removed from cand. list. Initializing */ 1028 /* the pointer for hme_init_pred_ctxt_no_encode() */ 1029 ps_candt_zeromv_8x4 = ps_search_candts_8x4[id].ps_search_node; 1030 1031 /* For Top,TopLeft and Left cand., no need for refinement */ 1032 id = 0; 1033 if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 == e_me_quality_preset)) 1034 { 1035 /* This search candt has the full search result */ 1036 ps_candt_fs_4x8 = ps_search_candts_4x8[id].ps_search_node; 1037 id++; 1038 } 1039 1040 ps_candt_4x8_l = ps_search_candts_4x8[id].ps_search_node; 1041 ps_search_candts_4x8[id].u1_num_steps_refine = 0; 1042 id++; 1043 ps_candt_4x8_t = ps_search_candts_4x8[id].ps_search_node; 1044 ps_search_candts_4x8[id].u1_num_steps_refine = 0; 1045 id++; 1046 ps_candt_4x8_tl = ps_search_candts_4x8[id].ps_search_node; 1047 ps_search_candts_4x8[id].u1_num_steps_refine = 0; 1048 id++; 1049 /* This search candt stores the global candt */ 1050 global_id_4x8 = id; 1051 id++; 1052 if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 != e_me_quality_preset)) 1053 { 1054 /* This search candt has the full search result */ 1055 ps_candt_fs_4x8 = ps_search_candts_4x8[id].ps_search_node; 1056 id++; 1057 } 1058 /* Don't increment id4as (0,0) is removed from cand. list. Initializing */ 1059 /* the pointer for hme_init_pred_ctxt_no_encode() */ 1060 ps_candt_zeromv_4x8 = ps_search_candts_4x8[id].ps_search_node; 1061 1062 /* Zero mv always has 0 mvx and y componnent, ref idx initialized inside */ 1063 ps_candt_zeromv_8x4->s_mv.i2_mvx = 0; 1064 ps_candt_zeromv_8x4->s_mv.i2_mvy = 0; 1065 ps_candt_zeromv_4x8->s_mv.i2_mvx = 0; 1066 ps_candt_zeromv_4x8->s_mv.i2_mvy = 0; 1067 1068 /* SET UP THE PRED CTXT FOR L0 AND L1 */ 1069 { 1070 S32 pred_lx; 1071 1072 /* Bottom left always not available */ 1073 as_left_neighbours[2].u1_is_avail = 0; 1074 1075 for(pred_lx = 0; pred_lx < 2; pred_lx++) 1076 { 1077 pred_ctxt_t *ps_pred_ctxt; 1078 1079 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx]; 1080 hme_init_pred_ctxt_no_encode( 1081 ps_pred_ctxt, 1082 ps_search_results, 1083 as_top_neighbours, 1084 as_left_neighbours, 1085 NULL, 1086 ps_candt_zeromv_8x4, 1087 ps_candt_zeromv_8x4, 1088 pred_lx, 1089 lambda, 1090 ps_coarse_prms->lambda_q_shift, 1091 ps_ctxt->apu1_ref_bits_tlu_lc, 1092 ps_ctxt->ai2_ref_scf); 1093 } 1094 } 1095 1096 /*************************************************************************/ 1097 /* Initialize the search parameters for search algo with the following */ 1098 /* parameters: No SATD, calculated number of initial candidates, */ 1099 /* No post refinement, initial step size and number of iterations as */ 1100 /* passed by the calling function. */ 1101 /* Also, we use input for this layer search, and not recon. */ 1102 /*************************************************************************/ 1103 if(e_me_quality_preset == ME_XTREME_SPEED_25) 1104 s_search_prms_8x4.i4_num_init_candts = 1; 1105 else 1106 s_search_prms_8x4.i4_num_init_candts = id; 1107 s_search_prms_8x4.i4_use_satd = 0; 1108 s_search_prms_8x4.i4_start_step = ps_coarse_prms->i4_start_step; 1109 s_search_prms_8x4.i4_num_steps_post_refine = 0; 1110 s_search_prms_8x4.i4_use_rec = 0; 1111 s_search_prms_8x4.ps_search_candts = ps_search_candts_8x4; 1112 s_search_prms_8x4.e_blk_size = BLK_8x4; 1113 s_search_prms_8x4.i4_max_iters = ps_coarse_prms->i4_max_iters; 1114 /* Coarse layer is always explicit */ 1115 if(ME_MEDIUM_SPEED > e_me_quality_preset) 1116 { 1117 s_search_prms_8x4.pf_mv_cost_compute = compute_mv_cost_coarse; 1118 } 1119 else 1120 { 1121 s_search_prms_8x4.pf_mv_cost_compute = compute_mv_cost_coarse_high_speed; 1122 } 1123 1124 s_search_prms_8x4.i4_inp_stride = 8; 1125 s_search_prms_8x4.i4_cu_x_off = s_search_prms_8x4.i4_cu_y_off = 0; 1126 if(ps_coarse_prms->do_full_search) 1127 s_search_prms_8x4.i4_max_iters = 1; 1128 s_search_prms_8x4.i4_part_mask = (1 << PART_ID_2NxN_B); 1129 /* Using the member 0 to store for all ref. idx. */ 1130 s_search_prms_8x4.aps_mv_range[0] = &s_range_prms; 1131 s_search_prms_8x4.ps_search_results = ps_search_results; 1132 s_search_prms_8x4.full_search_step = ps_coarse_prms->full_search_step; 1133 1134 s_search_prms_4x8 = s_search_prms_8x4; 1135 s_search_prms_4x8.ps_search_candts = ps_search_candts_4x8; 1136 s_search_prms_4x8.e_blk_size = BLK_4x8; 1137 s_search_prms_4x8.i4_part_mask = (1 << PART_ID_Nx2N_R); 1138 1139 s_search_prms_4x4 = s_search_prms_8x4; 1140 /* Since s_search_prms_4x4 is used only to computer sad at 4x4 level, search candidate is not used */ 1141 s_search_prms_4x4.ps_search_candts = ps_search_candts_4x8; 1142 s_search_prms_4x4.e_blk_size = BLK_4x4; 1143 s_search_prms_4x4.i4_part_mask = (1 << PART_ID_2Nx2N); 1144 /*************************************************************************/ 1145 /* Picture limit on all 4 sides. This will be used to set mv limits for */ 1146 /* every block given its coordinate. */ 1147 /*************************************************************************/ 1148 SET_PIC_LIMIT( 1149 s_pic_limit, 1150 ps_curr_layer->i4_pad_x_inp, 1151 ps_curr_layer->i4_pad_y_inp, 1152 ps_curr_layer->i4_wd, 1153 ps_curr_layer->i4_ht, 1154 s_search_prms_4x4.i4_num_steps_post_refine); 1155 1156 /* Pick the global mv from previous reference */ 1157 for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++) 1158 { 1159 if(ME_XTREME_SPEED_25 != e_me_quality_preset) 1160 { 1161 /* Distance of current pic from reference */ 1162 S32 i4_delta_poc; 1163 1164 hme_mv_t s_mv; 1165 i4_delta_poc = ps_curr_layer->i4_poc - ps_curr_layer->ai4_ref_id_to_poc_lc[i1_ref_idx]; 1166 1167 hme_get_global_mv(ps_prev_layer, &s_mv, i4_delta_poc); 1168 1169 s_candt_global[i1_ref_idx].s_mv.i2_mvx = s_mv.i2_mv_x; 1170 s_candt_global[i1_ref_idx].s_mv.i2_mvy = s_mv.i2_mv_y; 1171 s_candt_global[i1_ref_idx].i1_ref_idx = i1_ref_idx; 1172 1173 /*********************************************************************/ 1174 /* Initialize the histogram for each reference index in current */ 1175 /* layer ctxt */ 1176 /*********************************************************************/ 1177 hme_init_histogram( 1178 ps_ctxt->aps_mv_hist[i1_ref_idx], 1179 (S32)as_mv_limit[i1_ref_idx].i2_max_x, 1180 (S32)as_mv_limit[i1_ref_idx].i2_max_y); 1181 } 1182 1183 /*********************************************************************/ 1184 /* Initialize the dyn. search range params. for each reference index */ 1185 /* in current layer ctxt */ 1186 /*********************************************************************/ 1187 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ 1188 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) 1189 { 1190 INIT_DYN_SEARCH_PRMS( 1191 &ps_ctxt->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][i1_ref_idx], 1192 ps_curr_layer->ai4_ref_id_to_poc_lc[i1_ref_idx]); 1193 } 1194 } 1195 1196 /*************************************************************************/ 1197 /* if exhaustive algorithmm then we use only 1 candt 0, 0 */ 1198 /* else we use a lot of causal and non causal candts */ 1199 /* finally set number to the configured number of candts */ 1200 /*************************************************************************/ 1201 1202 /* Loop in raster order over each 4x4 blk in a given row till end of frame */ 1203 while(0 == end_of_frame) 1204 { 1205 job_queue_t *ps_job; 1206 void *pv_hme_dep_mngr; 1207 WORD32 offset_val, check_dep_pos, set_dep_pos; 1208 1209 /* Get the current layer HME Dep Mngr */ 1210 /* Note : Use layer_id - 1 in HME layers */ 1211 pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_coarse_prms->i4_layer_id - 1]; 1212 1213 /* Get the current row from the job queue */ 1214 ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job( 1215 ps_multi_thrd_ctxt, ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type, 1, i4_ping_pong); 1216 1217 /* If all rows are done, set the end of process flag to 1, */ 1218 /* and the current row to -1 */ 1219 if(NULL == ps_job) 1220 { 1221 blk_y = -1; 1222 end_of_frame = 1; 1223 } 1224 else 1225 { 1226 ASSERT(ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type == ps_job->i4_pre_enc_task_type); 1227 1228 /* Obtain the current row's details from the job */ 1229 blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no; 1230 1231 if(1 == ps_ctxt->s_frm_prms.is_i_pic) 1232 { 1233 /* set the output dependency of current row */ 1234 ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong); 1235 continue; 1236 } 1237 1238 /* Set Variables for Dep. Checking and Setting */ 1239 set_dep_pos = blk_y + 1; 1240 if(blk_y > 0) 1241 { 1242 offset_val = 2; 1243 check_dep_pos = blk_y - 1; 1244 } 1245 else 1246 { 1247 /* First row should run without waiting */ 1248 offset_val = -1; 1249 check_dep_pos = 0; 1250 } 1251 1252 /* Loop over all the blocks in current row */ 1253 /* One block extra, since the last block in a row needs East block */ 1254 for(blk_x = 0; blk_x < (num_blks_in_row + 1); blk_x++) 1255 { 1256 /* Wait till top row block is processed */ 1257 /* Currently checking till top right block*/ 1258 if(blk_x < (num_blks_in_row)) 1259 { 1260 ihevce_dmgr_chk_row_row_sync( 1261 pv_hme_dep_mngr, 1262 blk_x, 1263 offset_val, 1264 check_dep_pos, 1265 0, /* Col Tile No. : Not supported in PreEnc*/ 1266 ps_ctxt->thrd_id); 1267 } 1268 1269 /***************************************************************/ 1270 /* Get Weighted input for all references */ 1271 /***************************************************************/ 1272 fp_get_wt_inp( 1273 ps_curr_layer, 1274 &ps_ctxt->s_wt_pred, 1275 1 << (blk_size_shift + 1), 1276 blk_x << blk_size_shift, 1277 (blk_y - 1) << blk_size_shift, 1278 1 << (blk_size_shift + 1), 1279 i4_num_ref, 1280 ps_ctxt->i4_wt_pred_enable_flag); 1281 1282 /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */ 1283 hme_reset_search_results( 1284 ps_search_results, 1285 s_search_prms_8x4.i4_part_mask | s_search_prms_4x8.i4_part_mask, 1286 MV_RES_FPEL); 1287 1288 /* Compute the search node offsets */ 1289 /* MAX is used to clip when left and top neighbours are not availbale at coarse boundaries */ 1290 search_node_top_offset = 1291 blk_x + ps_ctxt->ai4_row_index[MAX((blk_y - 2), 0)] * num_blks_in_row; 1292 search_node_left_offset = 1293 MAX((blk_x - 1), 0) + 1294 ps_ctxt->ai4_row_index[MAX((blk_y - 1), 0)] * num_blks_in_row; 1295 1296 /* Input offset: wrt CU start. Offset for South block */ 1297 s_search_prms_4x4.i4_cu_x_off = 0; 1298 s_search_prms_4x4.i4_cu_y_off = 4; 1299 s_search_prms_4x4.i4_inp_stride = 8; 1300 s_search_prms_4x4.i4_x_off = blk_x << blk_size_shift; 1301 s_search_prms_4x4.i4_y_off = blk_y << blk_size_shift; 1302 1303 s_search_prms_4x8.i4_x_off = s_search_prms_8x4.i4_x_off = blk_x << blk_size_shift; 1304 s_search_prms_4x8.i4_y_off = s_search_prms_8x4.i4_y_off = (blk_y - 1) 1305 << blk_size_shift; 1306 1307 /* This layer will always use explicit ME */ 1308 /* Loop across different Ref IDx */ 1309 for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++) 1310 { 1311 sad_top_offset = (blk_x * ai4_sad_4x4_block_size[i1_ref_idx]) + 1312 ps_ctxt->ai4_row_index[MAX((blk_y - 1), 0)] * 1313 ai4_sad_4x4_block_stride[i1_ref_idx]; 1314 sad_current_offset = 1315 (blk_x * ai4_sad_4x4_block_size[i1_ref_idx]) + 1316 ps_ctxt->ai4_row_index[blk_y] * ai4_sad_4x4_block_stride[i1_ref_idx]; 1317 1318 /* Initialize search node if blk_x == 0, as it doesn't have left neighbours */ 1319 if(0 == blk_x) 1320 INIT_SEARCH_NODE( 1321 &ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx][blk_x], 1322 i1_ref_idx); 1323 1324 pi2_cur_ref_sads_4x4 = ps_ctxt->api2_sads_4x4_n_rows[i1_ref_idx]; 1325 1326 /* Initialize changing params here */ 1327 s_search_prms_8x4.i1_ref_idx = i1_ref_idx; 1328 s_search_prms_4x8.i1_ref_idx = i1_ref_idx; 1329 s_search_prms_4x4.i1_ref_idx = i1_ref_idx; 1330 1331 if(num_blks_in_row == blk_x) 1332 { 1333 S16 *pi2_sads_4x4_current; 1334 /* Since the current 4x4 block will be a padded region, which may not match with any of the reference */ 1335 pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset; 1336 1337 memset(pi2_sads_4x4_current, 0, ai4_sad_4x4_block_size[i1_ref_idx]); 1338 } 1339 1340 /* SAD to be computed and stored for the 4x4 block in 1st row and the last block of all rows*/ 1341 if((0 == blk_y) || (num_blks_in_row == blk_x)) 1342 { 1343 S16 *pi2_sads_4x4_current; 1344 /* Computer 4x4 SADs for current block */ 1345 /* Pointer to store SADs */ 1346 pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset; 1347 1348 hme_derive_worst_case_search_range( 1349 &s_range_prms, 1350 &s_pic_limit, 1351 &as_mv_limit[i1_ref_idx], 1352 blk_x << blk_size_shift, 1353 blk_y << blk_size_shift, 1354 blk_wd, 1355 blk_ht); 1356 1357 if(ME_PRISTINE_QUALITY >= e_me_quality_preset) 1358 { 1359 ((ihevce_me_optimised_function_list_t *) 1360 ps_ctxt->pv_me_optimised_function_list) 1361 ->pf_store_4x4_sads_high_quality( 1362 &s_search_prms_4x4, 1363 ps_curr_layer, 1364 &as_mv_limit[i1_ref_idx], 1365 &ps_ctxt->s_wt_pred, 1366 pi2_sads_4x4_current); 1367 } 1368 else 1369 { 1370 ((ihevce_me_optimised_function_list_t *) 1371 ps_ctxt->pv_me_optimised_function_list) 1372 ->pf_store_4x4_sads_high_speed( 1373 &s_search_prms_4x4, 1374 ps_curr_layer, 1375 &as_mv_limit[i1_ref_idx], 1376 &ps_ctxt->s_wt_pred, 1377 pi2_sads_4x4_current); 1378 } 1379 } 1380 else 1381 { 1382 /* For the zero mv candt, the ref idx to be modified */ 1383 ps_candt_zeromv_8x4->i1_ref_idx = i1_ref_idx; 1384 ps_candt_zeromv_4x8->i1_ref_idx = i1_ref_idx; 1385 1386 if(ME_XTREME_SPEED_25 != e_me_quality_preset) 1387 { 1388 /* For the global mvs alone, the search node points to a local variable */ 1389 ps_search_candts_8x4[global_id_8x4].ps_search_node = 1390 &s_candt_global[i1_ref_idx]; 1391 ps_search_candts_4x8[global_id_4x8].ps_search_node = 1392 &s_candt_global[i1_ref_idx]; 1393 } 1394 1395 hme_get_spatial_candt( 1396 ps_curr_layer, 1397 BLK_4x4, 1398 blk_x, 1399 blk_y - 1, 1400 i1_ref_idx, 1401 as_top_neighbours, 1402 as_left_neighbours, 1403 0, 1404 1, 1405 0, 1406 0); 1407 /* set up the various candts */ 1408 *ps_candt_4x8_l = as_left_neighbours[0]; 1409 *ps_candt_4x8_t = as_top_neighbours[1]; 1410 *ps_candt_4x8_tl = as_top_neighbours[0]; 1411 *ps_candt_8x4_l = *ps_candt_4x8_l; 1412 *ps_candt_8x4_tl = *ps_candt_4x8_tl; 1413 *ps_candt_8x4_t = *ps_candt_4x8_t; 1414 1415 { 1416 S32 pred_lx; 1417 S16 *pi2_sads_4x4_current, *pi2_sads_4x4_top; 1418 pred_ctxt_t *ps_pred_ctxt; 1419 PF_MV_COST_FXN pf_mv_cost_compute; 1420 1421 /* Computer 4x4 SADs for current block */ 1422 /* Pointer to store SADs */ 1423 pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset; 1424 1425 hme_derive_worst_case_search_range( 1426 &s_range_prms, 1427 &s_pic_limit, 1428 &as_mv_limit[i1_ref_idx], 1429 blk_x << blk_size_shift, 1430 blk_y << blk_size_shift, 1431 blk_wd, 1432 blk_ht); 1433 if(i4_pic_ht == blk_y) 1434 { 1435 memset(pi2_sads_4x4_current, 0, ai4_sad_4x4_block_size[i1_ref_idx]); 1436 } 1437 else 1438 { 1439 if(ME_PRISTINE_QUALITY >= e_me_quality_preset) 1440 { 1441 ((ihevce_me_optimised_function_list_t *) 1442 ps_ctxt->pv_me_optimised_function_list) 1443 ->pf_store_4x4_sads_high_quality( 1444 &s_search_prms_4x4, 1445 ps_curr_layer, 1446 &as_mv_limit[i1_ref_idx], 1447 &ps_ctxt->s_wt_pred, 1448 pi2_sads_4x4_current); 1449 } 1450 else 1451 { 1452 ((ihevce_me_optimised_function_list_t *) 1453 ps_ctxt->pv_me_optimised_function_list) 1454 ->pf_store_4x4_sads_high_speed( 1455 &s_search_prms_4x4, 1456 ps_curr_layer, 1457 &as_mv_limit[i1_ref_idx], 1458 &ps_ctxt->s_wt_pred, 1459 pi2_sads_4x4_current); 1460 } 1461 } 1462 /* Set pred direction to L0 or L1 */ 1463 pred_lx = 1 - ps_search_results->pu1_is_past[i1_ref_idx]; 1464 1465 /* Suitable context (L0 or L1) */ 1466 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx]; 1467 1468 /* Coarse layer is always explicit */ 1469 if(ME_PRISTINE_QUALITY > e_me_quality_preset) 1470 { 1471 pf_mv_cost_compute = compute_mv_cost_coarse; 1472 } 1473 else 1474 { 1475 /* Cost function is not called in high speed case. Below one is just a dummy function */ 1476 pf_mv_cost_compute = compute_mv_cost_coarse_high_speed; 1477 } 1478 1479 /*********************************************************************/ 1480 /* Now, compute the mv for the top block */ 1481 /*********************************************************************/ 1482 pi2_sads_4x4_top = pi2_cur_ref_sads_4x4 + sad_top_offset; 1483 1484 /*********************************************************************/ 1485 /* For every blk in the picture, the search range needs to be derived*/ 1486 /* Any blk can have any mv, but practical search constraints are */ 1487 /* imposed by the picture boundary and amt of padding. */ 1488 /*********************************************************************/ 1489 hme_derive_search_range( 1490 &s_range_prms, 1491 &s_pic_limit, 1492 &as_mv_limit[i1_ref_idx], 1493 blk_x << blk_size_shift, 1494 (blk_y - 1) << blk_size_shift, 1495 blk_wd, 1496 blk_ht); 1497 1498 /* Computer the mv for the top block */ 1499 if(ME_PRISTINE_QUALITY >= e_me_quality_preset) 1500 { 1501 ((ihevce_me_optimised_function_list_t *) 1502 ps_ctxt->pv_me_optimised_function_list) 1503 ->pf_combine_4x4_sads_and_compute_cost_high_quality( 1504 i1_ref_idx, 1505 &s_range_prms, /* Both 4x8 and 8x4 has same search range */ 1506 &as_mv_limit[i1_ref_idx], 1507 &best_mv_4x8, 1508 &best_mv_8x4, 1509 ps_pred_ctxt, 1510 pf_mv_cost_compute, 1511 pi2_sads_4x4_top, /* Current SAD block */ 1512 (pi2_sads_4x4_top + 1513 ai4_sad_4x4_block_size[i1_ref_idx]), /* East SAD block */ 1514 pi2_sads_4x4_current); /* South SAD block */ 1515 } 1516 else 1517 { 1518 ((ihevce_me_optimised_function_list_t *) 1519 ps_ctxt->pv_me_optimised_function_list) 1520 ->pf_combine_4x4_sads_and_compute_cost_high_speed( 1521 i1_ref_idx, 1522 &s_range_prms, /* Both 4x8 and 8x4 has same search range */ 1523 &as_mv_limit[i1_ref_idx], 1524 &best_mv_4x8, 1525 &best_mv_8x4, 1526 ps_pred_ctxt, 1527 pf_mv_cost_compute, 1528 pi2_sads_4x4_top, /* Current SAD block */ 1529 (pi2_sads_4x4_top + 1530 ai4_sad_4x4_block_size[i1_ref_idx]), /* East SAD block */ 1531 pi2_sads_4x4_current); /* South SAD block */ 1532 } 1533 1534 ps_candt_fs_4x8->s_mv.i2_mvx = best_mv_4x8.i2_mv_x; 1535 ps_candt_fs_4x8->s_mv.i2_mvy = best_mv_4x8.i2_mv_y; 1536 ps_candt_fs_4x8->i1_ref_idx = i1_ref_idx; 1537 1538 ps_candt_fs_8x4->s_mv.i2_mvx = best_mv_8x4.i2_mv_x; 1539 ps_candt_fs_8x4->s_mv.i2_mvy = best_mv_8x4.i2_mv_y; 1540 ps_candt_fs_8x4->i1_ref_idx = i1_ref_idx; 1541 } 1542 1543 /* call the appropriate Search Algo for 4x8S. The 4x8N would */ 1544 /* have already been called by top block */ 1545 hme_pred_search_square_stepn( 1546 &s_search_prms_8x4, 1547 ps_curr_layer, 1548 &ps_ctxt->s_wt_pred, 1549 e_me_quality_preset, 1550 (ihevce_me_optimised_function_list_t *) 1551 ps_ctxt->pv_me_optimised_function_list 1552 1553 ); 1554 1555 /* Call the appropriate search algo for 8x4E */ 1556 hme_pred_search_square_stepn( 1557 &s_search_prms_4x8, 1558 ps_curr_layer, 1559 &ps_ctxt->s_wt_pred, 1560 e_me_quality_preset, 1561 (ihevce_me_optimised_function_list_t *) 1562 ps_ctxt->pv_me_optimised_function_list); 1563 1564 if(ME_XTREME_SPEED_25 != e_me_quality_preset) 1565 { 1566 /* Histogram updates across different Ref ID for global MV */ 1567 hme_update_histogram( 1568 ps_ctxt->aps_mv_hist[i1_ref_idx], 1569 aps_best_search_node_8x4[i1_ref_idx]->s_mv.i2_mvx, 1570 aps_best_search_node_8x4[i1_ref_idx]->s_mv.i2_mvy); 1571 hme_update_histogram( 1572 ps_ctxt->aps_mv_hist[i1_ref_idx], 1573 aps_best_search_node_4x8[i1_ref_idx]->s_mv.i2_mvx, 1574 aps_best_search_node_4x8[i1_ref_idx]->s_mv.i2_mvy); 1575 } 1576 1577 /* update the best results to the mv bank */ 1578 hme_update_mv_bank_coarse( 1579 ps_search_results, 1580 ps_curr_layer->ps_layer_mvbank, 1581 blk_x, 1582 (blk_y - 1), 1583 ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] + 1584 search_node_top_offset, /* Top Candidate */ 1585 ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] + 1586 search_node_left_offset, /* Left candidate */ 1587 i1_ref_idx, 1588 &s_mv_update_prms); 1589 1590 /* Copy the best search result to 5 row array for future use */ 1591 *(ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] + blk_x + 1592 ps_ctxt->ai4_row_index[blk_y - 1] * num_blks_in_row) = 1593 *(aps_best_search_node_4x8[i1_ref_idx]); 1594 1595 *(ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] + blk_x + 1596 ps_ctxt->ai4_row_index[blk_y - 1] * num_blks_in_row) = 1597 *(aps_best_search_node_8x4[i1_ref_idx]); 1598 1599 /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */ 1600 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ 1601 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) 1602 { 1603 WORD32 num_mvs, i, j; 1604 search_node_t *aps_search_nodes[4]; 1605 /* Best results for 8x4R and 4x8B blocks */ 1606 search_node_t *ps_search_node_8x4_r, *ps_search_node_4x8_b; 1607 1608 num_mvs = ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref; 1609 1610 /*************************************************************************/ 1611 /* We have atleast 4 distinct results: the 4x8 top (coming from top blk) */ 1612 /* 8x4 left (coming from left blk), 8x4 and 4x8 right and bot resp. */ 1613 /* If number of results to be stored is 4, then we store all these 4 */ 1614 /* results, else we pick best ones */ 1615 /*************************************************************************/ 1616 ps_search_node_8x4_r = 1617 ps_search_results->aps_part_results[i1_ref_idx][PART_ID_2NxN_B]; 1618 ps_search_node_4x8_b = 1619 ps_search_results->aps_part_results[i1_ref_idx][PART_ID_Nx2N_R]; 1620 1621 ASSERT(num_mvs <= 4); 1622 1623 /* Doing this to sort best results */ 1624 aps_search_nodes[0] = ps_search_node_8x4_r; 1625 aps_search_nodes[1] = ps_search_node_4x8_b; 1626 aps_search_nodes[2] = 1627 ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] + 1628 search_node_left_offset; /* Left candidate */ 1629 aps_search_nodes[3] = 1630 ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] + 1631 search_node_top_offset; /* Top Candidate */ 1632 1633 /* Note : Need to be resolved!!! */ 1634 /* Added this to match with "hme_update_mv_bank_coarse" */ 1635 if(num_mvs != 4) 1636 { 1637 /* Run through the results, store them in best to worst order */ 1638 for(i = 0; i < num_mvs; i++) 1639 { 1640 for(j = i + 1; j < 4; j++) 1641 { 1642 if(aps_search_nodes[j]->i4_tot_cost < 1643 aps_search_nodes[i]->i4_tot_cost) 1644 { 1645 SWAP_HME( 1646 aps_search_nodes[j], 1647 aps_search_nodes[i], 1648 search_node_t *); 1649 } 1650 } 1651 } 1652 } 1653 1654 /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */ 1655 for(i = 0; i < num_mvs; i++) 1656 { 1657 hme_update_dynamic_search_params( 1658 &ps_ctxt->s_coarse_dyn_range_prms 1659 .as_dyn_range_prms[i4_layer_id][i1_ref_idx], 1660 aps_search_nodes[i]->s_mv.i2_mvy); 1661 } 1662 } 1663 } 1664 } 1665 1666 /* Update the number of blocks processed in the current row */ 1667 ihevce_dmgr_set_row_row_sync( 1668 pv_hme_dep_mngr, 1669 (blk_x + 1), 1670 blk_y, 1671 0 /* Col Tile No. : Not supported in PreEnc*/); 1672 } 1673 1674 /* set the output dependency after completion of row */ 1675 ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong); 1676 } 1677 } 1678 1679 return; 1680 } 1681