1 /****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 /** 21 ******************************************************************************* 22 * @file 23 * ih264e_me.c 24 * 25 * @brief 26 * 27 * 28 * @author 29 * Ittiam 30 * 31 * @par List of Functions: 32 * - 33 * 34 * @remarks 35 * None 36 * 37 ******************************************************************************* 38 */ 39 40 /*****************************************************************************/ 41 /* File Includes */ 42 /*****************************************************************************/ 43 44 /* System include files */ 45 #include <stdio.h> 46 #include <assert.h> 47 #include <limits.h> 48 #include <string.h> 49 50 /* User include files */ 51 #include "ime_typedefs.h" 52 #include "ime_distortion_metrics.h" 53 #include "ime_defs.h" 54 #include "ime_structs.h" 55 #include "ime.h" 56 #include "ime_macros.h" 57 #include "ime_statistics.h" 58 59 /** 60 ******************************************************************************* 61 * 62 * @brief Diamond Search 63 * 64 * @par Description: 65 * This function computes the sad at vertices of several layers of diamond grid 66 * at a time. The number of layers of diamond grid that would be evaluated is 67 * configurable.The function computes the sad at vertices of a diamond grid. If 68 * the sad at the center of the diamond grid is lesser than the sad at any other 69 * point of the diamond grid, the function marks the candidate Mb partition as 70 * mv. 71 * 72 * @param[in] ps_mb_part 73 * pointer to current mb partition ctxt with respect to ME 74 * 75 * @param[in] ps_me_ctxt 76 * pointer to me context 77 * 78 * @param[in] u4_lambda_motion 79 * lambda motion 80 * 81 * @param[in] u4_enable_fast_sad 82 * enable/disable fast sad computation 83 * 84 * @returns mv pair & corresponding distortion and cost 85 * 86 * @remarks Diamond Srch, radius is 1 87 * 88 ******************************************************************************* 89 */ 90 void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist) 91 { 92 /* MB partition info */ 93 mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist]; 94 95 /* lagrange parameter */ 96 UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion; 97 98 /* srch range*/ 99 WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n; 100 WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s; 101 WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e; 102 WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w; 103 104 /* enabled fast sad computation */ 105 // UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad; 106 107 /* pointer to src macro block */ 108 UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma; 109 UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist]; 110 111 /* strides */ 112 WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd; 113 WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd; 114 115 /* least cost */ 116 WORD32 i4_cost_least = ps_mb_part->i4_mb_cost; 117 118 /* least sad */ 119 WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion; 120 121 /* mv pair */ 122 WORD16 i2_mvx, i2_mvy; 123 124 /* mv bits */ 125 UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits; 126 127 /* temp var */ 128 WORD32 i4_cost[4]; 129 WORD32 i4_sad[4]; 130 UWORD8 *pu1_ref; 131 WORD16 i2_mv_u_x, i2_mv_u_y; 132 133 /* Diamond search Iteration Max Cnt */ 134 UWORD32 u4_num_layers = ps_me_ctxt->u4_num_layers; 135 136 /* temp var */ 137 // UWORD8 u1_prev_jump = NONE; 138 // UWORD8 u1_curr_jump = NONE; 139 // UWORD8 u1_next_jump; 140 // WORD32 mask_arr[5] = {15, 13, 14, 7, 11}; 141 // WORD32 mask; 142 // UWORD8 *apu1_ref[4]; 143 // WORD32 i, cnt; 144 // WORD32 dia[4][2] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}}; 145 146 /* mv with best sad during initial evaluation */ 147 i2_mvx = ps_mb_part->s_mv_curr.i2_mvx; 148 i2_mvy = ps_mb_part->s_mv_curr.i2_mvy; 149 150 i2_mv_u_x = i2_mvx; 151 i2_mv_u_y = i2_mvy; 152 153 while (u4_num_layers--) 154 { 155 /* FIXME : is this the write way to check for out of bounds ? */ 156 if ( (i2_mvx - 1 < i4_srch_range_w) || 157 (i2_mvx + 1 > i4_srch_range_e) || 158 (i2_mvy - 1 < i4_srch_range_n) || 159 (i2_mvy + 1 > i4_srch_range_s) ) 160 { 161 break; 162 } 163 164 pu1_ref = pu1_ref_mb + i2_mvx + (i2_mvy * i4_ref_strd); 165 166 ps_me_ctxt->pf_ime_compute_sad4_diamond(pu1_ref, 167 pu1_curr_mb, 168 i4_ref_strd, 169 i4_src_strd, 170 i4_sad); 171 172 DEBUG_SAD_HISTOGRAM_ADD(i4_sad[0], 2); 173 DEBUG_SAD_HISTOGRAM_ADD(i4_sad[1], 2); 174 DEBUG_SAD_HISTOGRAM_ADD(i4_sad[2], 2); 175 DEBUG_SAD_HISTOGRAM_ADD(i4_sad[3], 2); 176 177 /* compute cost */ 178 i4_cost[0] = i4_sad[0] + u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx] 179 + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] ); 180 i4_cost[1] = i4_sad[1] + u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx] 181 + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] ); 182 i4_cost[2] = i4_sad[2] + u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] 183 + pu1_mv_bits[((i2_mvy - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] ); 184 i4_cost[3] = i4_sad[3] + u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] 185 + pu1_mv_bits[((i2_mvy + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] ); 186 187 188 if (i4_cost_least > i4_cost[0]) 189 { 190 i4_cost_least = i4_cost[0]; 191 i4_distortion_least = i4_sad[0]; 192 193 i2_mv_u_x = (i2_mvx - 1); 194 i2_mv_u_y = i2_mvy; 195 } 196 197 if (i4_cost_least > i4_cost[1]) 198 { 199 i4_cost_least = i4_cost[1]; 200 i4_distortion_least = i4_sad[1]; 201 202 i2_mv_u_x = (i2_mvx + 1); 203 i2_mv_u_y = i2_mvy; 204 } 205 206 if (i4_cost_least > i4_cost[2]) 207 { 208 i4_cost_least = i4_cost[2]; 209 i4_distortion_least = i4_sad[2]; 210 211 i2_mv_u_x = i2_mvx; 212 i2_mv_u_y = i2_mvy - 1; 213 } 214 215 if (i4_cost_least > i4_cost[3]) 216 { 217 i4_cost_least = i4_cost[3]; 218 i4_distortion_least = i4_sad[3]; 219 220 i2_mv_u_x = i2_mvx; 221 i2_mv_u_y = i2_mvy + 1; 222 } 223 224 if( (i2_mv_u_x == i2_mvx) && (i2_mv_u_y == i2_mvy)) 225 { 226 ps_mb_part->u4_exit = 1; 227 break; 228 } 229 else 230 { 231 i2_mvx = i2_mv_u_x; 232 i2_mvy = i2_mv_u_y; 233 } 234 235 236 } 237 238 if (i4_cost_least < ps_mb_part->i4_mb_cost) 239 { 240 ps_mb_part->i4_mb_cost = i4_cost_least; 241 ps_mb_part->i4_mb_distortion = i4_distortion_least; 242 ps_mb_part->s_mv_curr.i2_mvx = i2_mvx; 243 ps_mb_part->s_mv_curr.i2_mvy = i2_mvy; 244 } 245 246 } 247 248 249 /** 250 ******************************************************************************* 251 * 252 * @brief This function computes the best motion vector among the tentative mv 253 * candidates chosen. 254 * 255 * @par Description: 256 * This function determines the position in the search window at which the motion 257 * estimation should begin in order to minimise the number of search iterations. 258 * 259 * @param[in] ps_mb_part 260 * pointer to current mb partition ctxt with respect to ME 261 * 262 * @param[in] u4_lambda_motion 263 * lambda motion 264 * 265 * @param[in] u4_fast_flag 266 * enable/disable fast sad computation 267 * 268 * @returns mv pair & corresponding distortion and cost 269 * 270 * @remarks none 271 * 272 ******************************************************************************* 273 */ 274 275 void ime_evaluate_init_srchposn_16x16 276 ( 277 me_ctxt_t *ps_me_ctxt, 278 WORD32 i4_reflist 279 ) 280 { 281 UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion; 282 283 /* candidate mv cnt */ 284 UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates[i4_reflist]; 285 286 /* list of candidate mvs */ 287 ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search[i4_reflist]; 288 289 /* pointer to src macro block */ 290 UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma; 291 UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist]; 292 293 /* strides */ 294 WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd; 295 WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd; 296 297 /* enabled fast sad computation */ 298 UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad; 299 300 /* SAD(distortion metric) of an 8x8 block */ 301 WORD32 i4_mb_distortion; 302 303 /* cost = distortion + u4_lambda_motion * rate */ 304 WORD32 i4_mb_cost, i4_mb_cost_least = INT_MAX, i4_distortion_least = INT_MAX; 305 306 /* mb partitions info */ 307 mb_part_ctxt *ps_mb_part = &(ps_me_ctxt->as_mb_part[i4_reflist]); 308 309 /* mv bits */ 310 UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits; 311 312 /* temp var */ 313 UWORD32 i, j; 314 WORD32 i4_srch_pos_idx = 0; 315 UWORD8 *pu1_ref = NULL; 316 317 /* Carry out a search using each of the motion vector pairs identified above as predictors. */ 318 /* TODO : Just like Skip, Do we need to add any bias to zero mv as well */ 319 for(i = 0; i < u4_num_candidates; i++) 320 { 321 /* compute sad */ 322 WORD32 c_sad = 1; 323 324 for(j = 0; j < i; j++ ) 325 { 326 if ( (ps_mv_list[i].i2_mvx == ps_mv_list[j].i2_mvx) && 327 (ps_mv_list[i].i2_mvy == ps_mv_list[j].i2_mvy) ) 328 { 329 c_sad = 0; 330 break; 331 } 332 } 333 if(c_sad) 334 { 335 /* adjust ref pointer */ 336 pu1_ref = pu1_ref_mb + ps_mv_list[i].i2_mvx + (ps_mv_list[i].i2_mvy * i4_ref_strd); 337 338 /* compute distortion */ 339 ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion); 340 341 DEBUG_SAD_HISTOGRAM_ADD(i4_mb_distortion, 3); 342 /* compute cost */ 343 i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ (ps_mv_list[i].i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] 344 + pu1_mv_bits[(ps_mv_list[i].i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] ); 345 346 if (i4_mb_cost < i4_mb_cost_least) 347 { 348 i4_mb_cost_least = i4_mb_cost; 349 350 i4_distortion_least = i4_mb_distortion; 351 352 i4_srch_pos_idx = i; 353 } 354 } 355 } 356 357 if (i4_mb_cost_least < ps_mb_part->i4_mb_cost) 358 { 359 ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx; 360 ps_mb_part->i4_mb_cost = i4_mb_cost_least; 361 ps_mb_part->i4_mb_distortion = i4_distortion_least; 362 ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[i4_srch_pos_idx].i2_mvx; 363 ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[i4_srch_pos_idx].i2_mvy; 364 } 365 } 366 367 /** 368 ******************************************************************************* 369 * 370 * @brief Searches for the best matching full pixel predictor within the search 371 * range 372 * 373 * @par Description: 374 * This function begins by computing the mv predict vector for the current mb. 375 * This is used for cost computations. Further basing on the algo. chosen, it 376 * looks through a set of candidate vectors that best represent the mb a least 377 * cost and returns this information. 378 * 379 * @param[in] ps_proc 380 * pointer to current proc ctxt 381 * 382 * @param[in] ps_me_ctxt 383 * pointer to me context 384 * 385 * @returns mv pair & corresponding distortion and cost 386 * 387 * @remarks none 388 * 389 ******************************************************************************* 390 */ 391 void ime_full_pel_motion_estimation_16x16 392 ( 393 me_ctxt_t *ps_me_ctxt, 394 WORD32 i4_ref_list 395 ) 396 { 397 /* mb part info */ 398 mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_ref_list]; 399 400 /******************************************************************/ 401 /* Modify Search range about initial candidate instead of zero mv */ 402 /******************************************************************/ 403 /* 404 * FIXME: The motion vectors in a way can become unbounded. It may so happen that 405 * MV might exceed the limit of the profile configured. 406 */ 407 ps_me_ctxt->i4_srch_range_w = MAX(ps_me_ctxt->i4_srch_range_w, 408 -ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx); 409 ps_me_ctxt->i4_srch_range_e = MIN(ps_me_ctxt->i4_srch_range_e, 410 ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx); 411 ps_me_ctxt->i4_srch_range_n = MAX(ps_me_ctxt->i4_srch_range_n, 412 -ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy); 413 ps_me_ctxt->i4_srch_range_s = MIN(ps_me_ctxt->i4_srch_range_s, 414 ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy); 415 416 /************************************************************/ 417 /* Traverse about best initial candidate for mv */ 418 /************************************************************/ 419 420 switch (ps_me_ctxt->u4_me_speed_preset) 421 { 422 case DMND_SRCH: 423 ime_diamond_search_16x16(ps_me_ctxt, i4_ref_list); 424 break; 425 default: 426 assert(0); 427 break; 428 } 429 } 430 431 /** 432 ******************************************************************************* 433 * 434 * @brief Searches for the best matching sub pixel predictor within the search 435 * range 436 * 437 * @par Description: 438 * This function begins by searching across all sub pixel sample points 439 * around the full pel motion vector. The vector with least cost is chosen as 440 * the mv for the current mb. If the skip mode is not evaluated while analysing 441 * the initial search candidates then analyse it here and update the mv. 442 * 443 * @param[in] ps_proc 444 * pointer to current proc ctxt 445 * 446 * @param[in] ps_me_ctxt 447 * pointer to me context 448 * 449 * @returns none 450 * 451 * @remarks none 452 * 453 ******************************************************************************* 454 */ 455 void ime_sub_pel_motion_estimation_16x16 456 ( 457 me_ctxt_t *ps_me_ctxt, 458 WORD32 i4_reflist 459 ) 460 { 461 /* pointers to src & ref macro block */ 462 UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma; 463 464 /* pointers to ref. half pel planes */ 465 UWORD8 *pu1_ref_mb_half_x; 466 UWORD8 *pu1_ref_mb_half_y; 467 UWORD8 *pu1_ref_mb_half_xy; 468 469 /* pointers to ref. half pel planes */ 470 UWORD8 *pu1_ref_mb_half_x_temp; 471 UWORD8 *pu1_ref_mb_half_y_temp; 472 UWORD8 *pu1_ref_mb_half_xy_temp; 473 474 /* strides */ 475 WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd; 476 477 WORD32 i4_ref_strd = ps_me_ctxt->u4_subpel_buf_strd; 478 479 /* mb partitions info */ 480 mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist]; 481 482 /* SAD(distortion metric) of an mb */ 483 WORD32 i4_mb_distortion; 484 WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion; 485 486 /* cost = distortion + u4_lambda_motion * rate */ 487 WORD32 i4_mb_cost; 488 WORD32 i4_mb_cost_least = ps_mb_part->i4_mb_cost; 489 490 /*Best half pel buffer*/ 491 UWORD8 *pu1_best_hpel_buf = NULL; 492 493 /* mv bits */ 494 UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits; 495 496 /* Motion vectors in full-pel units */ 497 WORD16 mv_x, mv_y; 498 499 /* lambda - lagrange constant */ 500 UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion; 501 502 /* Flags to check if half pel points needs to be evaluated */ 503 /**************************************/ 504 /* 1 bit for each half pel candidate */ 505 /* bit 0 - half x = 1, half y = 0 */ 506 /* bit 1 - half x = -1, half y = 0 */ 507 /* bit 2 - half x = 0, half y = 1 */ 508 /* bit 3 - half x = 0, half y = -1 */ 509 /* bit 4 - half x = 1, half y = 1 */ 510 /* bit 5 - half x = -1, half y = 1 */ 511 /* bit 6 - half x = 1, half y = -1 */ 512 /* bit 7 - half x = -1, half y = -1 */ 513 /**************************************/ 514 /* temp var */ 515 WORD16 i2_mv_u_x, i2_mv_u_y; 516 WORD32 i, j; 517 WORD32 ai4_sad[8]; 518 519 WORD32 i4_srch_pos_idx = ps_mb_part->i4_srch_pos_idx; 520 521 i2_mv_u_x = ps_mb_part->s_mv_curr.i2_mvx; 522 i2_mv_u_y = ps_mb_part->s_mv_curr.i2_mvy; 523 524 /************************************************************/ 525 /* Evaluate half pel */ 526 /************************************************************/ 527 mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2; 528 mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2; 529 530 531 /**************************************************************/ 532 /* ps_me_ctxt->pu1_half_x points to the half pel pixel on the */ 533 /* left side of full pel */ 534 /* ps_me_ctxt->pu1_half_y points to the half pel pixel on the */ 535 /* top side of full pel */ 536 /* ps_me_ctxt->pu1_half_xy points to the half pel pixel */ 537 /* on the top left side of full pel */ 538 /* for the function pf_ime_sub_pel_compute_sad_16x16 the */ 539 /* default postions are */ 540 /* ps_me_ctxt->pu1_half_x = right halp_pel */ 541 /* ps_me_ctxt->pu1_half_y = bottom halp_pel */ 542 /* ps_me_ctxt->pu1_half_xy = bottom right halp_pel */ 543 /* Hence corresponding adjustments made here */ 544 /**************************************************************/ 545 546 pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->apu1_subpel_buffs[0] + 1; 547 pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->apu1_subpel_buffs[1] + 1 + i4_ref_strd; 548 pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy = ps_me_ctxt->apu1_subpel_buffs[2] + 1 + i4_ref_strd; 549 550 ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16(pu1_curr_mb, pu1_ref_mb_half_x, 551 pu1_ref_mb_half_y, 552 pu1_ref_mb_half_xy, 553 i4_src_strd, i4_ref_strd, 554 ai4_sad); 555 556 /* Half x plane */ 557 for(i = 0; i < 2; i++) 558 { 559 WORD32 mv_x_tmp = (mv_x << 2) + 2; 560 WORD32 mv_y_tmp = (mv_y << 2); 561 562 mv_x_tmp -= (i * 4); 563 564 i4_mb_distortion = ai4_sad[i]; 565 566 /* compute cost */ 567 i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] 568 + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] ); 569 570 if (i4_mb_cost < i4_mb_cost_least) 571 { 572 i4_mb_cost_least = i4_mb_cost; 573 574 i4_distortion_least = i4_mb_distortion; 575 576 i2_mv_u_x = mv_x_tmp; 577 578 i2_mv_u_y = mv_y_tmp; 579 580 #ifndef HP_PL /*choosing whether left or right half_x*/ 581 ps_me_ctxt->apu1_subpel_buffs[0] = pu1_ref_mb_half_x_temp - i; 582 pu1_best_hpel_buf = pu1_ref_mb_half_x_temp - i; 583 584 i4_srch_pos_idx = 0; 585 #endif 586 } 587 588 } 589 590 /* Half y plane */ 591 for(i = 0; i < 2; i++) 592 { 593 WORD32 mv_x_tmp = (mv_x << 2); 594 WORD32 mv_y_tmp = (mv_y << 2) + 2; 595 596 mv_y_tmp -= (i * 4); 597 598 i4_mb_distortion = ai4_sad[2 + i]; 599 600 /* compute cost */ 601 i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] 602 + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] ); 603 604 if (i4_mb_cost < i4_mb_cost_least) 605 { 606 i4_mb_cost_least = i4_mb_cost; 607 608 i4_distortion_least = i4_mb_distortion; 609 610 i2_mv_u_x = mv_x_tmp; 611 612 i2_mv_u_y = mv_y_tmp; 613 614 #ifndef HP_PL/*choosing whether top or bottom half_y*/ 615 ps_me_ctxt->apu1_subpel_buffs[1] = pu1_ref_mb_half_y_temp - i*(i4_ref_strd); 616 pu1_best_hpel_buf = pu1_ref_mb_half_y_temp - i*(i4_ref_strd); 617 618 i4_srch_pos_idx = 1; 619 #endif 620 } 621 622 } 623 624 /* Half xy plane */ 625 for(j = 0; j < 2; j++) 626 { 627 for(i = 0; i < 2; i++) 628 { 629 WORD32 mv_x_tmp = (mv_x << 2) + 2; 630 WORD32 mv_y_tmp = (mv_y << 2) + 2; 631 632 mv_x_tmp -= (i * 4); 633 mv_y_tmp -= (j * 4); 634 635 i4_mb_distortion = ai4_sad[4 + i + 2 * j]; 636 637 /* compute cost */ 638 i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] 639 + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] ); 640 641 if (i4_mb_cost < i4_mb_cost_least) 642 { 643 i4_mb_cost_least = i4_mb_cost; 644 645 i4_distortion_least = i4_mb_distortion; 646 647 i2_mv_u_x = mv_x_tmp; 648 649 i2_mv_u_y = mv_y_tmp; 650 651 #ifndef HP_PL /*choosing between four half_xy */ 652 ps_me_ctxt->apu1_subpel_buffs[2] = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i; 653 pu1_best_hpel_buf = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i; 654 655 i4_srch_pos_idx = 2; 656 #endif 657 } 658 659 } 660 } 661 662 if (i4_mb_cost_least < ps_mb_part->i4_mb_cost) 663 { 664 ps_mb_part->i4_mb_cost = i4_mb_cost_least; 665 ps_mb_part->i4_mb_distortion = i4_distortion_least; 666 ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x; 667 ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y; 668 ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf; 669 ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx; 670 } 671 } 672 673 /** 674 ******************************************************************************* 675 * 676 * @brief This function computes cost of skip macroblocks 677 * 678 * @par Description: 679 * 680 * @param[in] ps_me_ctxt 681 * pointer to me ctxt 682 * 683 * 684 * @returns none 685 * 686 * @remarks 687 * NOTE: while computing the skip cost, do not enable early exit from compute 688 * sad function because, a negative bias gets added later 689 * Note tha the last ME candidate in me ctxt is taken as skip motion vector 690 * 691 ******************************************************************************* 692 */ 693 void ime_compute_skip_cost 694 ( 695 me_ctxt_t *ps_me_ctxt, 696 ime_mv_t *ps_skip_mv, 697 mb_part_ctxt *ps_smb_part_info, 698 UWORD32 u4_use_stat_sad, 699 WORD32 i4_reflist, 700 WORD32 i4_is_slice_type_b 701 ) 702 { 703 704 /* SAD(distortion metric) of an mb */ 705 WORD32 i4_mb_distortion; 706 707 /* cost = distortion + u4_lambda_motion * rate */ 708 WORD32 i4_mb_cost; 709 710 /* temp var */ 711 UWORD8 *pu1_ref = NULL; 712 713 ime_mv_t s_skip_mv; 714 715 s_skip_mv.i2_mvx = (ps_skip_mv->i2_mvx +2)>>2; 716 s_skip_mv.i2_mvy = (ps_skip_mv->i2_mvy +2)>>2; 717 718 /* Check if the skip mv is out of bounds or subpel */ 719 { 720 /* skip mv */ 721 ime_mv_t s_clip_skip_mv; 722 723 s_clip_skip_mv.i2_mvx = CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, s_skip_mv.i2_mvx); 724 s_clip_skip_mv.i2_mvy = CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, s_skip_mv.i2_mvy); 725 726 if ((s_clip_skip_mv.i2_mvx != s_skip_mv.i2_mvx) || 727 (s_clip_skip_mv.i2_mvy != s_skip_mv.i2_mvy) || 728 (ps_skip_mv->i2_mvx & 0x3) || 729 (ps_skip_mv->i2_mvy & 0x3)) 730 { 731 return ; 732 } 733 } 734 735 736 /* adjust ref pointer */ 737 pu1_ref = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] + s_skip_mv.i2_mvx 738 + (s_skip_mv.i2_mvy * ps_me_ctxt->i4_rec_strd); 739 740 if(u4_use_stat_sad == 1) 741 { 742 UWORD32 u4_is_nonzero; 743 744 ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16( 745 ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd, 746 ps_me_ctxt->i4_rec_strd, ps_me_ctxt->pu2_sad_thrsh, 747 &i4_mb_distortion, &u4_is_nonzero); 748 749 if (u4_is_nonzero == 0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad) 750 { 751 ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */ 752 ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0) ? 0 : i4_mb_distortion; 753 } 754 } 755 else 756 { 757 ps_me_ctxt->pf_ime_compute_sad_16x16[ps_me_ctxt->u4_enable_fast_sad]( 758 ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd, 759 ps_me_ctxt->i4_rec_strd, INT_MAX, &i4_mb_distortion); 760 761 if(i4_mb_distortion <= ps_me_ctxt->i4_min_sad) 762 { 763 ps_me_ctxt->i4_min_sad = i4_mb_distortion; 764 ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */ 765 } 766 } 767 768 769 /* for skip mode cost & distortion are identical 770 * But we shall add a bias to favor skip mode. 771 * Doc. JVT B118 Suggests SKIP_BIAS as 16. 772 * TODO : Empirical analysis of SKIP_BIAS is necessary */ 773 774 i4_mb_cost = i4_mb_distortion - (ps_me_ctxt->u4_lambda_motion * (ps_me_ctxt->i4_skip_bias[0] + ps_me_ctxt->i4_skip_bias[1] * i4_is_slice_type_b)); 775 776 if (i4_mb_cost <= ps_smb_part_info->i4_mb_cost) 777 { 778 ps_smb_part_info->i4_mb_cost = i4_mb_cost; 779 ps_smb_part_info->i4_mb_distortion = i4_mb_distortion; 780 ps_smb_part_info->s_mv_curr.i2_mvx = s_skip_mv.i2_mvx; 781 ps_smb_part_info->s_mv_curr.i2_mvy = s_skip_mv.i2_mvy; 782 } 783 } 784 785