Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2015 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 /**
     21  *******************************************************************************
     22  * @file
     23  *  ih264e_me.c
     24  *
     25  * @brief
     26  *
     27  *
     28  * @author
     29  *  Ittiam
     30  *
     31  * @par List of Functions:
     32  *  -
     33  *
     34  * @remarks
     35  *  None
     36  *
     37  *******************************************************************************
     38  */
     39 
     40 /*****************************************************************************/
     41 /* File Includes                                                             */
     42 /*****************************************************************************/
     43 
     44 /* System include files */
     45 #include <stdio.h>
     46 #include <assert.h>
     47 #include <limits.h>
     48 #include <string.h>
     49 
     50 /* User include files */
     51 #include "ime_typedefs.h"
     52 #include "ime_distortion_metrics.h"
     53 #include "ime_defs.h"
     54 #include "ime_structs.h"
     55 #include "ime.h"
     56 #include "ime_macros.h"
     57 #include "ime_statistics.h"
     58 
     59 /**
     60 *******************************************************************************
     61 *
     62 * @brief Diamond Search
     63 *
     64 * @par Description:
     65 *  This function computes the sad at vertices of several layers of diamond grid
     66 *  at a time. The number of layers of diamond grid that would be evaluated is
     67 *  configurable.The function computes the sad at vertices of a diamond grid. If
     68 *  the sad at the center of the diamond grid is lesser than the sad at any other
     69 *  point of the diamond grid, the function marks the candidate Mb partition as
     70 *  mv.
     71 *
     72 * @param[in] ps_mb_part
     73 *  pointer to current mb partition ctxt with respect to ME
     74 *
     75 * @param[in] ps_me_ctxt
     76 *  pointer to me context
     77 *
     78 * @param[in] u4_lambda_motion
     79 *  lambda motion
     80 *
     81 * @param[in] u4_enable_fast_sad
     82 *  enable/disable fast sad computation
     83 *
     84 * @returns  mv pair & corresponding distortion and cost
     85 *
     86 * @remarks Diamond Srch, radius is 1
     87 *
     88 *******************************************************************************
     89 */
     90 void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist)
     91 {
     92     /* MB partition info */
     93     mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
     94 
     95     /* lagrange parameter */
     96     UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
     97 
     98     /* srch range*/
     99     WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
    100     WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
    101     WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
    102     WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
    103 
    104     /* enabled fast sad computation */
    105 //    UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
    106 
    107     /* pointer to src macro block */
    108     UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
    109     UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
    110 
    111     /* strides */
    112     WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
    113     WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
    114 
    115     /* least cost */
    116     WORD32 i4_cost_least = ps_mb_part->i4_mb_cost;
    117 
    118     /* least sad */
    119     WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
    120 
    121     /* mv pair */
    122     WORD16 i2_mvx, i2_mvy;
    123 
    124     /* mv bits */
    125     UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
    126 
    127     /* temp var */
    128     WORD32 i4_cost[4];
    129     WORD32 i4_sad[4];
    130     UWORD8 *pu1_ref;
    131     WORD16 i2_mv_u_x, i2_mv_u_y;
    132 
    133     /* Diamond search Iteration Max Cnt */
    134     UWORD32 u4_num_layers = ps_me_ctxt->u4_num_layers;
    135 
    136     /* temp var */
    137 //    UWORD8 u1_prev_jump = NONE;
    138 //    UWORD8 u1_curr_jump = NONE;
    139 //    UWORD8 u1_next_jump;
    140 //    WORD32 mask_arr[5] = {15, 13, 14, 7, 11};
    141 //    WORD32 mask;
    142 //    UWORD8 *apu1_ref[4];
    143 //    WORD32 i, cnt;
    144 //    WORD32 dia[4][2] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}};
    145 
    146     /* mv with best sad during initial evaluation */
    147     i2_mvx = ps_mb_part->s_mv_curr.i2_mvx;
    148     i2_mvy = ps_mb_part->s_mv_curr.i2_mvy;
    149 
    150     i2_mv_u_x = i2_mvx;
    151     i2_mv_u_y = i2_mvy;
    152 
    153     while (u4_num_layers--)
    154     {
    155         /* FIXME : is this the write way to check for out of bounds ? */
    156         if ( (i2_mvx - 1 < i4_srch_range_w) ||
    157                         (i2_mvx + 1 > i4_srch_range_e) ||
    158                         (i2_mvy - 1 < i4_srch_range_n) ||
    159                         (i2_mvy + 1 > i4_srch_range_s) )
    160         {
    161             break;
    162         }
    163 
    164         pu1_ref = pu1_ref_mb + i2_mvx + (i2_mvy * i4_ref_strd);
    165 
    166         ps_me_ctxt->pf_ime_compute_sad4_diamond(pu1_ref,
    167                                                 pu1_curr_mb,
    168                                                 i4_ref_strd,
    169                                                 i4_src_strd,
    170                                                 i4_sad);
    171 
    172         DEBUG_SAD_HISTOGRAM_ADD(i4_sad[0], 2);
    173         DEBUG_SAD_HISTOGRAM_ADD(i4_sad[1], 2);
    174         DEBUG_SAD_HISTOGRAM_ADD(i4_sad[2], 2);
    175         DEBUG_SAD_HISTOGRAM_ADD(i4_sad[3], 2);
    176 
    177         /* compute cost */
    178         i4_cost[0] = i4_sad[0] + u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx]
    179                                                                    + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
    180         i4_cost[1] = i4_sad[1] + u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx]
    181                                                                    + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
    182         i4_cost[2] = i4_sad[2] + u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
    183                                                                    + pu1_mv_bits[((i2_mvy - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
    184         i4_cost[3] = i4_sad[3] + u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
    185                                                                    + pu1_mv_bits[((i2_mvy + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
    186 
    187 
    188         if (i4_cost_least > i4_cost[0])
    189         {
    190             i4_cost_least = i4_cost[0];
    191             i4_distortion_least = i4_sad[0];
    192 
    193             i2_mv_u_x = (i2_mvx - 1);
    194             i2_mv_u_y = i2_mvy;
    195         }
    196 
    197         if (i4_cost_least > i4_cost[1])
    198         {
    199             i4_cost_least = i4_cost[1];
    200             i4_distortion_least = i4_sad[1];
    201 
    202             i2_mv_u_x = (i2_mvx + 1);
    203             i2_mv_u_y = i2_mvy;
    204         }
    205 
    206         if (i4_cost_least > i4_cost[2])
    207         {
    208             i4_cost_least = i4_cost[2];
    209             i4_distortion_least = i4_sad[2];
    210 
    211             i2_mv_u_x = i2_mvx;
    212             i2_mv_u_y = i2_mvy - 1;
    213         }
    214 
    215         if (i4_cost_least > i4_cost[3])
    216         {
    217             i4_cost_least = i4_cost[3];
    218             i4_distortion_least = i4_sad[3];
    219 
    220             i2_mv_u_x = i2_mvx;
    221             i2_mv_u_y = i2_mvy + 1;
    222         }
    223 
    224         if( (i2_mv_u_x == i2_mvx) && (i2_mv_u_y == i2_mvy))
    225         {
    226             ps_mb_part->u4_exit = 1;
    227             break;
    228         }
    229         else
    230         {
    231             i2_mvx = i2_mv_u_x;
    232             i2_mvy = i2_mv_u_y;
    233         }
    234 
    235 
    236     }
    237 
    238     if (i4_cost_least < ps_mb_part->i4_mb_cost)
    239     {
    240         ps_mb_part->i4_mb_cost = i4_cost_least;
    241         ps_mb_part->i4_mb_distortion = i4_distortion_least;
    242         ps_mb_part->s_mv_curr.i2_mvx = i2_mvx;
    243         ps_mb_part->s_mv_curr.i2_mvy = i2_mvy;
    244     }
    245 
    246 }
    247 
    248 
    249 /**
    250 *******************************************************************************
    251 *
    252 * @brief This function computes the best motion vector among the tentative mv
    253 * candidates chosen.
    254 *
    255 * @par Description:
    256 *  This function determines the position in the search window at which the motion
    257 *  estimation should begin in order to minimise the number of search iterations.
    258 *
    259 * @param[in] ps_mb_part
    260 *  pointer to current mb partition ctxt with respect to ME
    261 *
    262 * @param[in] u4_lambda_motion
    263 *  lambda motion
    264 *
    265 * @param[in] u4_fast_flag
    266 *  enable/disable fast sad computation
    267 *
    268 * @returns  mv pair & corresponding distortion and cost
    269 *
    270 * @remarks none
    271 *
    272 *******************************************************************************
    273 */
    274 
    275 void ime_evaluate_init_srchposn_16x16
    276         (
    277             me_ctxt_t *ps_me_ctxt,
    278             WORD32 i4_reflist
    279         )
    280 {
    281     UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
    282 
    283     /* candidate mv cnt */
    284     UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates[i4_reflist];
    285 
    286     /* list of candidate mvs */
    287     ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search[i4_reflist];
    288 
    289     /* pointer to src macro block */
    290     UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
    291     UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
    292 
    293     /* strides */
    294     WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
    295     WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
    296 
    297     /* enabled fast sad computation */
    298     UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
    299 
    300     /* SAD(distortion metric) of an 8x8 block */
    301     WORD32 i4_mb_distortion;
    302 
    303     /* cost = distortion + u4_lambda_motion * rate */
    304     WORD32 i4_mb_cost, i4_mb_cost_least = INT_MAX, i4_distortion_least = INT_MAX;
    305 
    306     /* mb partitions info */
    307     mb_part_ctxt *ps_mb_part = &(ps_me_ctxt->as_mb_part[i4_reflist]);
    308 
    309     /* mv bits */
    310     UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
    311 
    312     /* temp var */
    313     UWORD32  i, j;
    314     WORD32 i4_srch_pos_idx = 0;
    315     UWORD8 *pu1_ref = NULL;
    316 
    317     /* Carry out a search using each of the motion vector pairs identified above as predictors. */
    318     /* TODO : Just like Skip, Do we need to add any bias to zero mv as well */
    319     for(i = 0; i < u4_num_candidates; i++)
    320     {
    321         /* compute sad */
    322         WORD32 c_sad = 1;
    323 
    324         for(j = 0; j < i; j++ )
    325         {
    326             if ( (ps_mv_list[i].i2_mvx == ps_mv_list[j].i2_mvx) &&
    327                             (ps_mv_list[i].i2_mvy == ps_mv_list[j].i2_mvy) )
    328             {
    329                 c_sad = 0;
    330                 break;
    331             }
    332         }
    333         if(c_sad)
    334         {
    335             /* adjust ref pointer */
    336             pu1_ref = pu1_ref_mb + ps_mv_list[i].i2_mvx + (ps_mv_list[i].i2_mvy * i4_ref_strd);
    337 
    338             /* compute distortion */
    339             ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion);
    340 
    341             DEBUG_SAD_HISTOGRAM_ADD(i4_mb_distortion, 3);
    342             /* compute cost */
    343             i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ (ps_mv_list[i].i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
    344                             + pu1_mv_bits[(ps_mv_list[i].i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
    345 
    346             if (i4_mb_cost < i4_mb_cost_least)
    347             {
    348                 i4_mb_cost_least = i4_mb_cost;
    349 
    350                 i4_distortion_least = i4_mb_distortion;
    351 
    352                 i4_srch_pos_idx = i;
    353             }
    354         }
    355     }
    356 
    357     if (i4_mb_cost_least < ps_mb_part->i4_mb_cost)
    358     {
    359         ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
    360         ps_mb_part->i4_mb_cost = i4_mb_cost_least;
    361         ps_mb_part->i4_mb_distortion = i4_distortion_least;
    362         ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[i4_srch_pos_idx].i2_mvx;
    363         ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[i4_srch_pos_idx].i2_mvy;
    364     }
    365 }
    366 
    367 /**
    368 *******************************************************************************
    369 *
    370 * @brief Searches for the best matching full pixel predictor within the search
    371 * range
    372 *
    373 * @par Description:
    374 *  This function begins by computing the mv predict vector for the current mb.
    375 *  This is used for cost computations. Further basing on the algo. chosen, it
    376 *  looks through a set of candidate vectors that best represent the mb a least
    377 *  cost and returns this information.
    378 *
    379 * @param[in] ps_proc
    380 *  pointer to current proc ctxt
    381 *
    382 * @param[in] ps_me_ctxt
    383 *  pointer to me context
    384 *
    385 * @returns  mv pair & corresponding distortion and cost
    386 *
    387 * @remarks none
    388 *
    389 *******************************************************************************
    390 */
    391 void ime_full_pel_motion_estimation_16x16
    392     (
    393         me_ctxt_t *ps_me_ctxt,
    394         WORD32 i4_ref_list
    395     )
    396 {
    397     /* mb part info */
    398     mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_ref_list];
    399 
    400     /******************************************************************/
    401     /* Modify Search range about initial candidate instead of zero mv */
    402     /******************************************************************/
    403     /*
    404      * FIXME: The motion vectors in a way can become unbounded. It may so happen that
    405      * MV might exceed the limit of the profile configured.
    406      */
    407     ps_me_ctxt->i4_srch_range_w = MAX(ps_me_ctxt->i4_srch_range_w,
    408                                       -ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
    409     ps_me_ctxt->i4_srch_range_e = MIN(ps_me_ctxt->i4_srch_range_e,
    410                                        ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
    411     ps_me_ctxt->i4_srch_range_n = MAX(ps_me_ctxt->i4_srch_range_n,
    412                                       -ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
    413     ps_me_ctxt->i4_srch_range_s = MIN(ps_me_ctxt->i4_srch_range_s,
    414                                        ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
    415 
    416     /************************************************************/
    417     /* Traverse about best initial candidate for mv             */
    418     /************************************************************/
    419 
    420     switch (ps_me_ctxt->u4_me_speed_preset)
    421     {
    422         case DMND_SRCH:
    423             ime_diamond_search_16x16(ps_me_ctxt, i4_ref_list);
    424             break;
    425         default:
    426             assert(0);
    427             break;
    428     }
    429 }
    430 
    431 /**
    432 *******************************************************************************
    433 *
    434 * @brief Searches for the best matching sub pixel predictor within the search
    435 * range
    436 *
    437 * @par Description:
    438 *  This function begins by searching across all sub pixel sample points
    439 *  around the full pel motion vector. The vector with least cost is chosen as
    440 *  the mv for the current mb. If the skip mode is not evaluated while analysing
    441 *  the initial search candidates then analyse it here and update the mv.
    442 *
    443 * @param[in] ps_proc
    444 *  pointer to current proc ctxt
    445 *
    446 * @param[in] ps_me_ctxt
    447 *  pointer to me context
    448 *
    449 * @returns none
    450 *
    451 * @remarks none
    452 *
    453 *******************************************************************************
    454 */
    455 void ime_sub_pel_motion_estimation_16x16
    456     (
    457         me_ctxt_t *ps_me_ctxt,
    458         WORD32 i4_reflist
    459     )
    460 {
    461     /* pointers to src & ref macro block */
    462     UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
    463 
    464     /* pointers to ref. half pel planes */
    465     UWORD8 *pu1_ref_mb_half_x;
    466     UWORD8 *pu1_ref_mb_half_y;
    467     UWORD8 *pu1_ref_mb_half_xy;
    468 
    469     /* pointers to ref. half pel planes */
    470     UWORD8 *pu1_ref_mb_half_x_temp;
    471     UWORD8 *pu1_ref_mb_half_y_temp;
    472     UWORD8 *pu1_ref_mb_half_xy_temp;
    473 
    474     /* strides */
    475     WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
    476 
    477     WORD32 i4_ref_strd = ps_me_ctxt->u4_subpel_buf_strd;
    478 
    479     /* mb partitions info */
    480     mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
    481 
    482     /* SAD(distortion metric) of an mb */
    483     WORD32 i4_mb_distortion;
    484     WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
    485 
    486     /* cost = distortion + u4_lambda_motion * rate */
    487     WORD32 i4_mb_cost;
    488     WORD32 i4_mb_cost_least = ps_mb_part->i4_mb_cost;
    489 
    490     /*Best half pel buffer*/
    491     UWORD8 *pu1_best_hpel_buf = NULL;
    492 
    493     /* mv bits */
    494     UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
    495 
    496     /* Motion vectors in full-pel units */
    497     WORD16 mv_x, mv_y;
    498 
    499     /* lambda - lagrange constant */
    500     UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
    501 
    502     /* Flags to check if half pel points needs to be evaluated */
    503     /**************************************/
    504     /* 1 bit for each half pel candidate  */
    505     /* bit 0 - half x = 1, half y = 0     */
    506     /* bit 1 - half x = -1, half y = 0    */
    507     /* bit 2 - half x = 0, half y = 1     */
    508     /* bit 3 - half x = 0, half y = -1    */
    509     /* bit 4 - half x = 1, half y = 1     */
    510     /* bit 5 - half x = -1, half y = 1    */
    511     /* bit 6 - half x = 1, half y = -1    */
    512     /* bit 7 - half x = -1, half y = -1   */
    513     /**************************************/
    514     /* temp var */
    515     WORD16 i2_mv_u_x, i2_mv_u_y;
    516     WORD32 i, j;
    517     WORD32 ai4_sad[8];
    518 
    519     WORD32 i4_srch_pos_idx = ps_mb_part->i4_srch_pos_idx;
    520 
    521     i2_mv_u_x = ps_mb_part->s_mv_curr.i2_mvx;
    522     i2_mv_u_y = ps_mb_part->s_mv_curr.i2_mvy;
    523 
    524     /************************************************************/
    525     /* Evaluate half pel                                        */
    526     /************************************************************/
    527     mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2;
    528     mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2;
    529 
    530 
    531     /**************************************************************/
    532     /* ps_me_ctxt->pu1_half_x points to the half pel pixel on the */
    533     /* left side of full pel                                      */
    534     /* ps_me_ctxt->pu1_half_y points to the half pel pixel on the */
    535     /* top  side of full pel                                      */
    536     /* ps_me_ctxt->pu1_half_xy points to the half pel pixel       */
    537     /* on the top left side of full pel                           */
    538     /* for the function pf_ime_sub_pel_compute_sad_16x16 the      */
    539     /* default postions are                                       */
    540     /* ps_me_ctxt->pu1_half_x = right halp_pel                    */
    541     /*  ps_me_ctxt->pu1_half_y = bottom halp_pel                  */
    542     /*  ps_me_ctxt->pu1_half_xy = bottom right halp_pel           */
    543     /* Hence corresponding adjustments made here                  */
    544     /**************************************************************/
    545 
    546     pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->apu1_subpel_buffs[0] + 1;
    547     pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->apu1_subpel_buffs[1] + 1 + i4_ref_strd;
    548     pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy = ps_me_ctxt->apu1_subpel_buffs[2] + 1 + i4_ref_strd;
    549 
    550     ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16(pu1_curr_mb, pu1_ref_mb_half_x,
    551                                                  pu1_ref_mb_half_y,
    552                                                  pu1_ref_mb_half_xy,
    553                                                  i4_src_strd, i4_ref_strd,
    554                                                  ai4_sad);
    555 
    556     /* Half x plane */
    557     for(i = 0; i < 2; i++)
    558     {
    559         WORD32 mv_x_tmp = (mv_x << 2) + 2;
    560         WORD32 mv_y_tmp = (mv_y << 2);
    561 
    562         mv_x_tmp -= (i * 4);
    563 
    564         i4_mb_distortion = ai4_sad[i];
    565 
    566         /* compute cost */
    567         i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
    568                         + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
    569 
    570         if (i4_mb_cost < i4_mb_cost_least)
    571         {
    572             i4_mb_cost_least = i4_mb_cost;
    573 
    574             i4_distortion_least = i4_mb_distortion;
    575 
    576             i2_mv_u_x = mv_x_tmp;
    577 
    578             i2_mv_u_y = mv_y_tmp;
    579 
    580 #ifndef HP_PL /*choosing whether left or right half_x*/
    581             ps_me_ctxt->apu1_subpel_buffs[0] = pu1_ref_mb_half_x_temp - i;
    582             pu1_best_hpel_buf = pu1_ref_mb_half_x_temp - i;
    583 
    584             i4_srch_pos_idx = 0;
    585 #endif
    586         }
    587 
    588     }
    589 
    590     /* Half y plane */
    591     for(i = 0; i < 2; i++)
    592     {
    593         WORD32 mv_x_tmp = (mv_x << 2);
    594         WORD32 mv_y_tmp = (mv_y << 2) + 2;
    595 
    596         mv_y_tmp -= (i * 4);
    597 
    598         i4_mb_distortion = ai4_sad[2 + i];
    599 
    600         /* compute cost */
    601         i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
    602                         + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
    603 
    604         if (i4_mb_cost < i4_mb_cost_least)
    605         {
    606             i4_mb_cost_least = i4_mb_cost;
    607 
    608             i4_distortion_least = i4_mb_distortion;
    609 
    610             i2_mv_u_x = mv_x_tmp;
    611 
    612             i2_mv_u_y = mv_y_tmp;
    613 
    614 #ifndef HP_PL/*choosing whether top or bottom half_y*/
    615             ps_me_ctxt->apu1_subpel_buffs[1] = pu1_ref_mb_half_y_temp  - i*(i4_ref_strd);
    616             pu1_best_hpel_buf = pu1_ref_mb_half_y_temp  - i*(i4_ref_strd);
    617 
    618             i4_srch_pos_idx = 1;
    619 #endif
    620         }
    621 
    622     }
    623 
    624     /* Half xy plane */
    625     for(j = 0; j < 2; j++)
    626     {
    627         for(i = 0; i < 2; i++)
    628         {
    629             WORD32 mv_x_tmp = (mv_x << 2) + 2;
    630             WORD32 mv_y_tmp = (mv_y << 2) + 2;
    631 
    632             mv_x_tmp -= (i * 4);
    633             mv_y_tmp -= (j * 4);
    634 
    635             i4_mb_distortion = ai4_sad[4 + i + 2 * j];
    636 
    637             /* compute cost */
    638             i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
    639                             + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
    640 
    641             if (i4_mb_cost < i4_mb_cost_least)
    642             {
    643                 i4_mb_cost_least = i4_mb_cost;
    644 
    645                 i4_distortion_least = i4_mb_distortion;
    646 
    647                 i2_mv_u_x = mv_x_tmp;
    648 
    649                 i2_mv_u_y = mv_y_tmp;
    650 
    651 #ifndef HP_PL /*choosing between four half_xy */
    652                 ps_me_ctxt->apu1_subpel_buffs[2] = pu1_ref_mb_half_xy_temp  - j*(i4_ref_strd) - i;
    653                 pu1_best_hpel_buf =  pu1_ref_mb_half_xy_temp  - j*(i4_ref_strd) - i;
    654 
    655                 i4_srch_pos_idx = 2;
    656 #endif
    657             }
    658 
    659         }
    660     }
    661 
    662     if (i4_mb_cost_least < ps_mb_part->i4_mb_cost)
    663     {
    664         ps_mb_part->i4_mb_cost = i4_mb_cost_least;
    665         ps_mb_part->i4_mb_distortion = i4_distortion_least;
    666         ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x;
    667         ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y;
    668         ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf;
    669         ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
    670     }
    671 }
    672 
    673 /**
    674 *******************************************************************************
    675 *
    676 * @brief This function computes cost of skip macroblocks
    677 *
    678 * @par Description:
    679 *
    680 * @param[in] ps_me_ctxt
    681 *  pointer to me ctxt
    682 *
    683 *
    684 * @returns  none
    685 *
    686 * @remarks
    687 * NOTE: while computing the skip cost, do not enable early exit from compute
    688 * sad function because, a negative bias gets added later
    689 * Note tha the last ME candidate in me ctxt is taken as skip motion vector
    690 *
    691 *******************************************************************************
    692 */
    693 void ime_compute_skip_cost
    694     (
    695          me_ctxt_t *ps_me_ctxt,
    696          ime_mv_t *ps_skip_mv,
    697          mb_part_ctxt *ps_smb_part_info,
    698          UWORD32 u4_use_stat_sad,
    699          WORD32 i4_reflist,
    700          WORD32 i4_is_slice_type_b
    701     )
    702 {
    703 
    704     /* SAD(distortion metric) of an mb */
    705     WORD32 i4_mb_distortion;
    706 
    707     /* cost = distortion + u4_lambda_motion * rate */
    708     WORD32 i4_mb_cost;
    709 
    710     /* temp var */
    711     UWORD8 *pu1_ref = NULL;
    712 
    713     ime_mv_t s_skip_mv;
    714 
    715     s_skip_mv.i2_mvx = (ps_skip_mv->i2_mvx +2)>>2;
    716     s_skip_mv.i2_mvy = (ps_skip_mv->i2_mvy +2)>>2;
    717 
    718     /* Check if the skip mv is out of bounds or subpel */
    719     {
    720         /* skip mv */
    721         ime_mv_t s_clip_skip_mv;
    722 
    723         s_clip_skip_mv.i2_mvx = CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, s_skip_mv.i2_mvx);
    724         s_clip_skip_mv.i2_mvy = CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, s_skip_mv.i2_mvy);
    725 
    726         if ((s_clip_skip_mv.i2_mvx != s_skip_mv.i2_mvx) ||
    727            (s_clip_skip_mv.i2_mvy != s_skip_mv.i2_mvy) ||
    728            (ps_skip_mv->i2_mvx & 0x3) ||
    729            (ps_skip_mv->i2_mvy & 0x3))
    730         {
    731             return ;
    732         }
    733     }
    734 
    735 
    736     /* adjust ref pointer */
    737     pu1_ref = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] + s_skip_mv.i2_mvx
    738                     + (s_skip_mv.i2_mvy * ps_me_ctxt->i4_rec_strd);
    739 
    740     if(u4_use_stat_sad == 1)
    741     {
    742         UWORD32 u4_is_nonzero;
    743 
    744         ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16(
    745                         ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
    746                         ps_me_ctxt->i4_rec_strd, ps_me_ctxt->pu2_sad_thrsh,
    747                         &i4_mb_distortion, &u4_is_nonzero);
    748 
    749         if (u4_is_nonzero == 0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
    750         {
    751             ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
    752             ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0) ? 0 : i4_mb_distortion;
    753         }
    754     }
    755     else
    756     {
    757         ps_me_ctxt->pf_ime_compute_sad_16x16[ps_me_ctxt->u4_enable_fast_sad](
    758                         ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
    759                         ps_me_ctxt->i4_rec_strd, INT_MAX, &i4_mb_distortion);
    760 
    761         if(i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
    762         {
    763             ps_me_ctxt->i4_min_sad = i4_mb_distortion;
    764             ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
    765         }
    766     }
    767 
    768 
    769     /* for skip mode cost & distortion are identical
    770      * But we shall add a bias to favor skip mode.
    771      * Doc. JVT B118 Suggests SKIP_BIAS as 16.
    772      * TODO : Empirical analysis of SKIP_BIAS is necessary */
    773 
    774     i4_mb_cost = i4_mb_distortion - (ps_me_ctxt->u4_lambda_motion * (ps_me_ctxt->i4_skip_bias[0] + ps_me_ctxt->i4_skip_bias[1]  * i4_is_slice_type_b));
    775 
    776     if (i4_mb_cost <= ps_smb_part_info->i4_mb_cost)
    777     {
    778         ps_smb_part_info->i4_mb_cost = i4_mb_cost;
    779         ps_smb_part_info->i4_mb_distortion = i4_mb_distortion;
    780         ps_smb_part_info->s_mv_curr.i2_mvx = s_skip_mv.i2_mvx;
    781         ps_smb_part_info->s_mv_curr.i2_mvy = s_skip_mv.i2_mvy;
    782     }
    783 }
    784 
    785