Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2015 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 
     21 /**
     22  *******************************************************************************
     23  * @file
     24  *  ih264e_me.c
     25  *
     26  * @brief
     27  *  Contains definition of functions for motion estimation
     28  *
     29  * @author
     30  *  ittiam
     31  *
     32  * @par List of Functions:
     33  *  - ih264e_init_mv_bits()
     34  *  - ih264e_skip_analysis_chroma()
     35  *  - ih264e_skip_analysis_luma()
     36  *  - ih264e_analyse_skip()
     37  *  - ih264e_get_search_candidates()
     38  *  - ih264e_find_skip_motion_vector()
     39  *  - ih264e_get_mv_predictor()
     40  *  - ih264e_mv_pred()
     41  *  - ih264e_mv_pred_me()
     42  *  - ih264e_init_me()
     43  *  - ih264e_compute_me()
     44  *  - ih264e_compute_me_nmb()
     45  *
     46  * @remarks
     47  *  None
     48  *
     49  *******************************************************************************
     50  */
     51 
     52 /*****************************************************************************/
     53 /* File Includes                                                             */
     54 /*****************************************************************************/
     55 
     56 /* System include files */
     57 #include <stdio.h>
     58 #include <assert.h>
     59 #include <limits.h>
     60 
     61 /* User include files */
     62 #include "ih264_typedefs.h"
     63 #include "iv2.h"
     64 #include "ive2.h"
     65 #include "ithread.h"
     66 #include "ih264_platform_macros.h"
     67 #include "ih264_defs.h"
     68 #include "ime_defs.h"
     69 #include "ime_distortion_metrics.h"
     70 #include "ime_structs.h"
     71 #include "ih264_structs.h"
     72 #include "ih264_trans_quant_itrans_iquant.h"
     73 #include "ih264_inter_pred_filters.h"
     74 #include "ih264_mem_fns.h"
     75 #include "ih264_padding.h"
     76 #include "ih264_intra_pred_filters.h"
     77 #include "ih264_deblk_edge_filters.h"
     78 #include "ih264_cabac_tables.h"
     79 #include "ih264e_defs.h"
     80 #include "ih264e_error.h"
     81 #include "ih264e_bitstream.h"
     82 #include "irc_cntrl_param.h"
     83 #include "irc_frame_info_collector.h"
     84 #include "ih264e_rate_control.h"
     85 #include "ih264e_cabac_structs.h"
     86 #include "ih264e_structs.h"
     87 #include "ih264e_globals.h"
     88 #include "ih264_macros.h"
     89 #include "ih264e_me.h"
     90 #include "ime.h"
     91 #include "ih264_debug.h"
     92 #include "ih264e_intra_modes_eval.h"
     93 #include "ih264e_core_coding.h"
     94 #include "ih264e_mc.h"
     95 #include "ih264e_debug.h"
     96 #include "ih264e_half_pel.h"
     97 #include "ime_statistics.h"
     98 #include "ih264e_platform_macros.h"
     99 
    100 
    101 /*****************************************************************************/
    102 /* Function Definitions                                                      */
    103 /*****************************************************************************/
    104 
    105 /**
    106 *******************************************************************************
    107 *
    108 * @brief
    109 *  This function populates the length of the codewords for motion vectors in the
    110 *  range (-search range, search range) in pixels
    111 *
    112 * @param[in] ps_me
    113 *  Pointer to me ctxt
    114 *
    115 * @param[out] pu1_mv_bits
    116 *  length of the codeword for all mv's
    117 *
    118 * @remarks The length of the code words are derived from signed exponential
    119 * goloumb codes.
    120 *
    121 *******************************************************************************
    122 */
    123 void ih264e_init_mv_bits(me_ctxt_t *ps_me_ctxt)
    124 {
    125     /* temp var */
    126     WORD32 i, codesize = 3, diff, limit;
    127     UWORD32 u4_code_num, u4_range;
    128     UWORD32 u4_uev_min, u4_uev_max, u4_sev_min, u4_sev_max;
    129 
    130     /* max srch range */
    131     diff = MAX(DEFAULT_MAX_SRCH_RANGE_X, DEFAULT_MAX_SRCH_RANGE_Y);
    132     /* sub pel */
    133     diff <<= 2;
    134     /* delta mv */
    135     diff <<= 1;
    136 
    137     /* codeNum for positive integer     =  2x-1     : Table9-3  */
    138     u4_code_num = (diff << 1);
    139 
    140     /* get range of the bit string and put using put_bits()                 */
    141     GETRANGE(u4_range, u4_code_num);
    142 
    143     limit = 2*u4_range - 1;
    144 
    145     /* init mv bits */
    146     ps_me_ctxt->pu1_mv_bits[0] = 1;
    147 
    148     while (codesize < limit)
    149     {
    150         u4_uev_min = (1 << (codesize >> 1));
    151         u4_uev_max = 2*u4_uev_min - 1;
    152 
    153         u4_sev_min = u4_uev_min >> 1;
    154         u4_sev_max = u4_uev_max >> 1;
    155 
    156         DEBUG("\n%d min, %d max %d codesize", u4_sev_min, u4_sev_max, codesize);
    157 
    158         for (i = u4_sev_min; i <= (WORD32)u4_sev_max; i++)
    159         {
    160             ps_me_ctxt->pu1_mv_bits[-i] = ps_me_ctxt->pu1_mv_bits[i] = codesize;
    161         }
    162 
    163         codesize += 2;
    164     }
    165 }
    166 
    167 
    168 
    169 /**
    170 *******************************************************************************
    171 *
    172 * @brief Determines the valid candidates for which the initial search shall happen.
    173 * The best of these candidates is used to center the diamond pixel search.
    174 *
    175 * @par Description: The function sends the skip, (0,0), left, top and top-right
    176 * neighbouring MBs MVs. The left, top and top-right MBs MVs are used because
    177 * these are the same MVs that are used to form the MV predictor. This initial MV
    178 * search candidates need not take care of slice boundaries and hence neighbor
    179 * availability checks are not made here.
    180 *
    181 * @param[in] ps_left_mb_pu
    182 *  pointer to left mb motion vector info
    183 *
    184 * @param[in] ps_top_mb_pu
    185 *  pointer to top & top right mb motion vector info
    186 *
    187 * @param[in] ps_top_left_mb_pu
    188 *  pointer to top left mb motion vector info
    189 *
    190 * @param[out] ps_skip_mv
    191 *  pointer to skip motion vectors for the curr mb
    192 *
    193 * @param[in] i4_mb_x
    194 *  mb index x
    195 *
    196 * @param[in] i4_mb_y
    197 *  mb index y
    198 *
    199 * @param[in] i4_wd_mbs
    200 *  pic width in mbs
    201 *
    202 * @param[in] ps_motionEst
    203 *  pointer to me context
    204 *
    205 * @returns  The list of MVs to be used of priming the full pel search and the
    206 * number of such MVs
    207 *
    208 * @remarks
    209 *   Assumptions : 1. Assumes Only partition of size 16x16
    210 *
    211 *******************************************************************************
    212 */
    213 static void ih264e_get_search_candidates(process_ctxt_t *ps_proc,
    214                                          me_ctxt_t *ps_me_ctxt,
    215                                          WORD32 i4_reflist)
    216 {
    217     /* curr mb indices */
    218     WORD32 i4_mb_x = ps_proc->i4_mb_x;
    219 
    220     /* Motion vector */
    221     mv_t *ps_left_mv, *ps_top_mv, *ps_top_left_mv, *ps_top_right_mv;
    222 
    223     /* Pred modes */
    224     WORD32 i4_left_mode, i4_top_mode, i4_top_left_mode, i4_top_right_mode;
    225 
    226     /* mb part info */
    227     mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
    228 
    229     /* mvs */
    230     WORD32 mvx, mvy;
    231 
    232     /* ngbr availability */
    233     block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
    234 
    235     /* Current mode */
    236     WORD32 i4_cmpl_predmode = (i4_reflist == 0) ? PRED_L1 : PRED_L0;
    237 
    238     /* srch range*/
    239     WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
    240     WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
    241     WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
    242     WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
    243 
    244     /* num of candidate search candidates */
    245     UWORD32 u4_num_candidates = 0;
    246 
    247     ps_left_mv = &ps_proc->s_left_mb_pu_ME.s_me_info[i4_reflist].s_mv;
    248     ps_top_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x)->s_me_info[i4_reflist].s_mv;
    249     ps_top_left_mv = &ps_proc->s_top_left_mb_pu_ME.s_me_info[i4_reflist].s_mv;
    250     ps_top_right_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->s_me_info[i4_reflist].s_mv;
    251 
    252     i4_left_mode = ps_proc->s_left_mb_pu_ME.b2_pred_mode != i4_cmpl_predmode;
    253     i4_top_mode = (ps_proc->ps_top_row_pu_ME + i4_mb_x)->b2_pred_mode != i4_cmpl_predmode;
    254     i4_top_left_mode = ps_proc->s_top_left_mb_pu_ME.b2_pred_mode != i4_cmpl_predmode;
    255     i4_top_right_mode = (ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->b2_pred_mode != i4_cmpl_predmode;
    256 
    257     /* Taking the Zero motion vector as one of the candidates   */
    258     ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = 0;
    259     ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = 0;
    260 
    261     u4_num_candidates++;
    262 
    263     /* Taking the Left MV Predictor as one of the candidates    */
    264     if (ps_ngbr_avbl->u1_mb_a && i4_left_mode)
    265     {
    266         mvx      = (ps_left_mv->i2_mvx + 2) >> 2;
    267         mvy      = (ps_left_mv->i2_mvy + 2) >> 2;
    268 
    269         mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
    270         mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
    271 
    272         ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
    273         ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
    274 
    275         u4_num_candidates ++;
    276     }
    277 
    278     /* Taking the Top MV Predictor as one of the candidates     */
    279     if (ps_ngbr_avbl->u1_mb_b && i4_top_mode)
    280     {
    281         mvx      = (ps_top_mv->i2_mvx + 2) >> 2;
    282         mvy      = (ps_top_mv->i2_mvy + 2) >> 2;
    283 
    284         mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
    285         mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
    286 
    287         ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
    288         ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
    289 
    290         u4_num_candidates ++;
    291 
    292         /* Taking the TopRt MV Predictor as one of the candidates   */
    293         if (ps_ngbr_avbl->u1_mb_c && i4_top_right_mode)
    294         {
    295             mvx      = (ps_top_right_mv->i2_mvx + 2) >> 2;
    296             mvy      = (ps_top_right_mv->i2_mvy + 2)>> 2;
    297 
    298             mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
    299             mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
    300 
    301             ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
    302             ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
    303 
    304             u4_num_candidates ++;
    305         }
    306         /* Taking the TopLt MV Predictor as one of the candidates   */
    307         else if(ps_ngbr_avbl->u1_mb_d && i4_top_left_mode)
    308         {
    309             mvx      = (ps_top_left_mv->i2_mvx + 2) >> 2;
    310             mvy      = (ps_top_left_mv->i2_mvy + 2) >> 2;
    311 
    312             mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
    313             mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
    314 
    315             ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
    316             ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
    317 
    318             u4_num_candidates ++;
    319         }
    320     }
    321 
    322 
    323     /********************************************************************/
    324     /*                            MV Prediction                         */
    325     /********************************************************************/
    326     ih264e_mv_pred_me(ps_proc, i4_reflist);
    327 
    328     ps_mb_part->s_mv_pred.i2_mvx = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvx;
    329     ps_mb_part->s_mv_pred.i2_mvy = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvy;
    330 
    331     /* Get the skip motion vector                               */
    332     {
    333         ps_me_ctxt->i4_skip_type = ps_proc->ps_codec->apf_find_skip_params_me
    334                                     [ps_proc->i4_slice_type](ps_proc, i4_reflist);
    335 
    336         /* Taking the Skip motion vector as one of the candidates   */
    337         mvx = (ps_proc->ps_skip_mv[i4_reflist].s_mv.i2_mvx + 2) >> 2;
    338         mvy = (ps_proc->ps_skip_mv[i4_reflist].s_mv.i2_mvy + 2) >> 2;
    339 
    340         mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
    341         mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
    342 
    343         ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
    344         ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
    345         u4_num_candidates++;
    346 
    347         if (ps_proc->i4_slice_type == BSLICE)
    348         {
    349             /* Taking the temporal Skip motion vector as one of the candidates   */
    350             mvx = (ps_proc->ps_skip_mv[i4_reflist + 2].s_mv.i2_mvx + 2) >> 2;
    351             mvy = (ps_proc->ps_skip_mv[i4_reflist + 2].s_mv.i2_mvy + 2) >> 2;
    352 
    353             mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
    354             mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
    355 
    356             ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
    357             ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
    358             u4_num_candidates++;
    359         }
    360     }
    361 
    362     ASSERT(u4_num_candidates <= 6);
    363 
    364     ps_me_ctxt->u4_num_candidates[i4_reflist] = u4_num_candidates;
    365 }
    366 
    367 /**
    368 *******************************************************************************
    369 *
    370 * @brief The function computes parameters for a PSKIP MB
    371 *
    372 * @par Description:
    373 *  The function updates the skip motion vector and checks if the current
    374 *  MB can be a skip PSKIP mB or not
    375 *
    376 * @param[in] ps_proc
    377 *  Pointer to process context
    378 *
    379 * @param[in] u4_for_me
    380 *  Flag to indicate function is called for ME or not
    381 *
    382 * @param[out] i4_ref_list
    383 *  Current active refernce list
    384 *
    385 * @returns Flag indicating if the current MB can be marked as skip
    386 *
    387 * @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
    388 *   specification.
    389 *
    390 *******************************************************************************
    391 */
    392 WORD32 ih264e_find_pskip_params(process_ctxt_t *ps_proc, WORD32 i4_reflist)
    393 {
    394     /* left mb motion vector */
    395     enc_pu_t *ps_left_mb_pu ;
    396 
    397     /* top mb motion vector */
    398     enc_pu_t *ps_top_mb_pu ;
    399 
    400     /* Skip mv */
    401     mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[PRED_L0].s_mv;
    402 
    403     UNUSED(i4_reflist);
    404 
    405     ps_left_mb_pu = &ps_proc->s_left_mb_pu ;
    406     ps_top_mb_pu = ps_proc->ps_top_row_pu + ps_proc->i4_mb_x;
    407 
    408     if ((!ps_proc->ps_ngbr_avbl->u1_mb_a) ||
    409         (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
    410         (
    411           (ps_left_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
    412           (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
    413           (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
    414        ) ||
    415        (
    416           (ps_top_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
    417           (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
    418           (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
    419        )
    420      )
    421 
    422     {
    423         ps_skip_mv->i2_mvx = 0;
    424         ps_skip_mv->i2_mvy = 0;
    425     }
    426     else
    427     {
    428         ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvx;
    429         ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvy;
    430     }
    431 
    432     if ( (ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx == ps_skip_mv->i2_mvx)
    433      && (ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy == ps_skip_mv->i2_mvy))
    434     {
    435         return 1;
    436     }
    437 
    438     return 0;
    439 }
    440 
    441 /**
    442 *******************************************************************************
    443 *
    444 * @brief The function computes parameters for a PSKIP MB
    445 *
    446 * @par Description:
    447 *  The function updates the skip motion vector and checks if the current
    448 *  MB can be a skip PSKIP mB or not
    449 *
    450 * @param[in] ps_proc
    451 *  Pointer to process context
    452 *
    453 * @param[in] u4_for_me
    454 *  Flag to dincate fucntion is called for ME or not
    455 *
    456 * @param[out] i4_ref_list
    457 *  Current active refernce list
    458 *
    459 * @returns Flag indicating if the current MB can be marked as skip
    460 *
    461 * @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
    462 *   specification.
    463 *
    464 *******************************************************************************
    465 */
    466 WORD32 ih264e_find_pskip_params_me(process_ctxt_t *ps_proc, WORD32 i4_reflist)
    467 {
    468     /* left mb motion vector */
    469     enc_pu_t *ps_left_mb_pu ;
    470 
    471     /* top mb motion vector */
    472     enc_pu_t *ps_top_mb_pu ;
    473 
    474     /* Skip mv */
    475     mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[PRED_L0].s_mv;
    476 
    477     UNUSED(i4_reflist);
    478 
    479     ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
    480     ps_top_mb_pu = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
    481 
    482     if ((!ps_proc->ps_ngbr_avbl->u1_mb_a) ||
    483         (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
    484         (
    485           (ps_left_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
    486           (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
    487           (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
    488         ) ||
    489         (
    490           (ps_top_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
    491           (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
    492           (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
    493         )
    494      )
    495 
    496     {
    497         ps_skip_mv->i2_mvx = 0;
    498         ps_skip_mv->i2_mvy = 0;
    499     }
    500     else
    501     {
    502         ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvx;
    503         ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvy;
    504     }
    505 
    506     return PRED_L0;
    507 }
    508 
    509 
    510 /**
    511 *******************************************************************************
    512 *
    513 * @brief motion vector predictor
    514 *
    515 * @par Description:
    516 *  The routine calculates the motion vector predictor for a given block,
    517 *  given the candidate MV predictors.
    518 *
    519 * @param[in] ps_left_mb_pu
    520 *  pointer to left mb motion vector info
    521 *
    522 * @param[in] ps_top_row_pu
    523 *  pointer to top & top right mb motion vector info
    524 *
    525 * @param[out] ps_pred_mv
    526 *  pointer to candidate predictors for the current block
    527 *
    528 * @returns  The x & y components of the MV predictor.
    529 *
    530 * @remarks The code implements the logic as described in sec 8.4.1.3 in H264
    531 *   specification.
    532 *   Assumptions : 1. Assumes Single reference frame
    533 *                 2. Assumes Only partition of size 16x16
    534 *
    535 *******************************************************************************
    536 */
    537 void ih264e_get_mv_predictor(enc_pu_t *ps_left_mb_pu,
    538                              enc_pu_t *ps_top_row_pu,
    539                              enc_pu_mv_t *ps_pred_mv,
    540                              WORD32 i4_ref_list)
    541 {
    542 
    543     /* Indicated the current ref */
    544     WORD8 i1_ref_idx;
    545 
    546     /* For pred L0 */
    547     i1_ref_idx = -1;
    548     {
    549         /* temp var */
    550         WORD32 pred_algo = 3, a, b, c;
    551 
    552         /* If only one of the candidate blocks has a reference frame equal to
    553          * the current block then use the same block as the final predictor */
    554         a = (ps_left_mb_pu->s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
    555         b = (ps_top_row_pu[0].s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
    556         c = (ps_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
    557 
    558         if (a == 0 && b == -1 && c == -1)
    559             pred_algo = 0; /* LEFT */
    560         else if(a == -1 && b == 0 && c == -1)
    561             pred_algo = 1; /* TOP */
    562         else if(a == -1 && b == -1 && c == 0)
    563             pred_algo = 2; /* TOP RIGHT */
    564 
    565         switch (pred_algo)
    566         {
    567             case 0:
    568                 /* left */
    569                 ps_pred_mv->s_mv.i2_mvx = ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvx;
    570                 ps_pred_mv->s_mv.i2_mvy = ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvy;
    571                 break;
    572             case 1:
    573                 /* top */
    574                 ps_pred_mv->s_mv.i2_mvx = ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvx;
    575                 ps_pred_mv->s_mv.i2_mvy = ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvy;
    576                 break;
    577             case 2:
    578                 /* top right */
    579                 ps_pred_mv->s_mv.i2_mvx = ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvx;
    580                 ps_pred_mv->s_mv.i2_mvy = ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvy;
    581                 break;
    582             case 3:
    583                 /* median */
    584                 MEDIAN(ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvx,
    585                        ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvx,
    586                        ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvx,
    587                        ps_pred_mv->s_mv.i2_mvx);
    588                 MEDIAN(ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvy,
    589                        ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvy,
    590                        ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvy,
    591                        ps_pred_mv->s_mv.i2_mvy);
    592 
    593                 break;
    594             default:
    595                 break;
    596         }
    597     }
    598 }
    599 
    600 /**
    601 *******************************************************************************
    602 *
    603 * @brief This function performs MV prediction
    604 *
    605 * @par Description:
    606 *
    607 * @param[in] ps_proc
    608 *  Process context corresponding to the job
    609 *
    610 * @returns  none
    611 *
    612 * @remarks none
    613 *  This function will update the MB availability since intra inter decision
    614 *  should be done before the call
    615 *
    616 *******************************************************************************
    617 */
    618 void ih264e_mv_pred(process_ctxt_t *ps_proc, WORD32 i4_slice_type)
    619 {
    620 
    621     /* left mb motion vector */
    622     enc_pu_t *ps_left_mb_pu;
    623 
    624     /* top left mb motion vector */
    625     enc_pu_t *ps_top_left_mb_pu;
    626 
    627     /* top row motion vector info */
    628     enc_pu_t *ps_top_row_pu;
    629 
    630     /* predicted motion vector */
    631     enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
    632 
    633     /* zero mv */
    634     mv_t zero_mv = { 0, 0 };
    635 
    636     /*  mb neighbor availability */
    637     block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
    638 
    639     /* mb syntax elements of neighbors */
    640     mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
    641     mb_info_t *ps_top_left_syn;
    642     UWORD32 u4_left_is_intra;
    643 
    644     /* Temp var */
    645     WORD32 i4_reflist, max_reflist, i4_cmpl_predmode;
    646 
    647     ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ele);
    648     u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
    649     ps_left_mb_pu = &ps_proc->s_left_mb_pu;
    650     ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
    651     ps_top_row_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x);
    652 
    653     /* Number of ref lists to process */
    654     max_reflist = (i4_slice_type == PSLICE) ? 1 : 2;
    655 
    656     for (i4_reflist = 0; i4_reflist < max_reflist; i4_reflist++)
    657     {
    658         i4_cmpl_predmode = (i4_reflist == 0) ? PRED_L1 : PRED_L0;
    659 
    660         /* Before performing mv prediction prepare the ngbr information and
    661          * reset motion vectors basing on their availability */
    662         if (!ps_ngbr_avbl->u1_mb_a || (u4_left_is_intra == 1)
    663                         || (ps_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
    664         {
    665             /* left mv */
    666             ps_left_mb_pu->s_me_info[i4_reflist].i1_ref_idx = 0;
    667             ps_left_mb_pu->s_me_info[i4_reflist].s_mv = zero_mv;
    668         }
    669         if (!ps_ngbr_avbl->u1_mb_b || ps_top_syn->u2_is_intra
    670                         || (ps_top_row_pu[0].b2_pred_mode == i4_cmpl_predmode))
    671         {
    672             /* top mv */
    673             ps_top_row_pu[0].s_me_info[i4_reflist].i1_ref_idx = 0;
    674             ps_top_row_pu[0].s_me_info[i4_reflist].s_mv = zero_mv;
    675         }
    676 
    677         if (!ps_ngbr_avbl->u1_mb_c)
    678         {
    679             /* top right mv - When top right partition is not available for
    680              * prediction if top left is available use it for prediction else
    681              * set the mv information to -1 and (0, 0)
    682              * */
    683             if (!ps_ngbr_avbl->u1_mb_d || ps_top_left_syn->u2_is_intra
    684                             || (ps_top_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
    685             {
    686                 ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = 0;
    687                 ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = zero_mv;
    688             }
    689             else
    690             {
    691                 ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = ps_top_left_mb_pu->s_me_info[i4_reflist].i1_ref_idx;
    692                 ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = ps_top_left_mb_pu->s_me_info[i4_reflist].s_mv;
    693             }
    694         }
    695         else if(ps_top_syn[1].u2_is_intra
    696                         || (ps_top_row_pu[1].b2_pred_mode == i4_cmpl_predmode))
    697         {
    698             ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = 0;
    699             ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = zero_mv;
    700         }
    701 
    702         ih264e_get_mv_predictor(ps_left_mb_pu, ps_top_row_pu, &ps_pred_mv[i4_reflist], i4_reflist);
    703     }
    704 
    705 }
    706 
    707 /**
    708 *******************************************************************************
    709 *
    710 * @brief This function approximates Pred. MV
    711 *
    712 * @par Description:
    713 *
    714 * @param[in] ps_proc
    715 *  Process context corresponding to the job
    716 *
    717 * @returns  none
    718 *
    719 * @remarks none
    720 *  Motion estimation happens at nmb level. For cost calculations, mv is appro
    721 *  ximated using this function
    722 *
    723 *******************************************************************************
    724 */
    725 void ih264e_mv_pred_me(process_ctxt_t *ps_proc, WORD32 i4_ref_list)
    726 {
    727     /* left mb motion vector */
    728     enc_pu_t *ps_left_mb_pu ;
    729 
    730     /* top left mb motion vector */
    731     enc_pu_t *ps_top_left_mb_pu ;
    732 
    733     /* top row motion vector info */
    734     enc_pu_t *ps_top_row_pu;
    735 
    736     enc_pu_t s_top_row_pu[2];
    737 
    738     /* predicted motion vector */
    739     enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
    740 
    741     /* zero mv */
    742     mv_t zero_mv = {0, 0};
    743 
    744     /* Complementary pred mode */
    745     WORD32 i4_cmpl_predmode = (i4_ref_list == 0) ? PRED_L1 : PRED_L0;
    746 
    747     /*  mb neighbor availability */
    748     block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
    749 
    750     ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
    751     ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME;
    752     ps_top_row_pu = (ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x);
    753 
    754     s_top_row_pu[0] = ps_top_row_pu[0];
    755     s_top_row_pu[1] = ps_top_row_pu[1];
    756 
    757     /*
    758      * Before performing mv prediction prepare the ngbr information and
    759      * reset motion vectors basing on their availability
    760      */
    761 
    762     if (!ps_ngbr_avbl->u1_mb_a || (ps_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
    763     {
    764         /* left mv */
    765         ps_left_mb_pu->s_me_info[i4_ref_list].i1_ref_idx = 0;
    766         ps_left_mb_pu->s_me_info[i4_ref_list].s_mv = zero_mv;
    767     }
    768     if (!ps_ngbr_avbl->u1_mb_b || (s_top_row_pu[0].b2_pred_mode == i4_cmpl_predmode))
    769     {
    770         /* top mv */
    771         s_top_row_pu[0].s_me_info[i4_ref_list].i1_ref_idx = 0;
    772         s_top_row_pu[0].s_me_info[i4_ref_list].s_mv = zero_mv;
    773 
    774     }
    775     if (!ps_ngbr_avbl->u1_mb_c)
    776     {
    777         /* top right mv - When top right partition is not available for
    778          * prediction if top left is available use it for prediction else
    779          * set the mv information to -1 and (0, 0)
    780          * */
    781         if (!ps_ngbr_avbl->u1_mb_d || (ps_top_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
    782         {
    783             s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
    784             s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
    785 
    786             s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
    787             s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
    788         }
    789         else
    790         {
    791             s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = ps_top_left_mb_pu->s_me_info[0].i1_ref_idx;
    792             s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = ps_top_left_mb_pu->s_me_info[0].s_mv;
    793         }
    794     }
    795     else if (ps_top_row_pu[1].b2_pred_mode == i4_cmpl_predmode)
    796     {
    797         ps_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
    798         ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
    799     }
    800 
    801     ih264e_get_mv_predictor(ps_left_mb_pu, &(s_top_row_pu[0]),
    802                             &ps_pred_mv[i4_ref_list], i4_ref_list);
    803 }
    804 
    805 /**
    806 *******************************************************************************
    807 *
    808 * @brief This function initializes me ctxt
    809 *
    810 * @par Description:
    811 *  Before dispatching the current job to me thread, the me context associated
    812 *  with the job is initialized.
    813 *
    814 * @param[in] ps_proc
    815 *  Process context corresponding to the job
    816 *
    817 * @returns  none
    818 *
    819 * @remarks none
    820 *
    821 *******************************************************************************
    822 */
    823 void ih264e_init_me(process_ctxt_t *ps_proc)
    824 {
    825     /* me ctxt */
    826     me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
    827 
    828     /* codec context */
    829     codec_t *ps_codec = ps_proc->ps_codec;
    830 
    831     ps_me_ctxt->i4_skip_bias[BSLICE] = SKIP_BIAS_B;
    832 
    833     if (ps_codec->s_cfg.u4_num_bframes == 0)
    834     {
    835        ps_me_ctxt->i4_skip_bias[PSLICE] = 4 * SKIP_BIAS_P;
    836     }
    837     else
    838     {
    839        ps_me_ctxt->i4_skip_bias[PSLICE] =  SKIP_BIAS_P;
    840     }
    841 
    842     /* src ptr */
    843     ps_me_ctxt->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma;
    844     /* src stride */
    845     ps_me_ctxt->i4_src_strd = ps_proc->i4_src_strd;
    846 
    847     /* ref ptrs and corresponding lagrange params */
    848     ps_me_ctxt->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma[0];
    849     ps_me_ctxt->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma[1];
    850 
    851     ps_me_ctxt->u4_lambda_motion = gu1_qp0[ps_me_ctxt->u1_mb_qp];
    852 
    853 
    854 }
    855 
    856 
    857 /**
    858 *******************************************************************************
    859 *
    860 * @brief This function performs motion estimation for the current mb using
    861 *   single reference list
    862 *
    863 * @par Description:
    864 *  The current mb is compared with a list of mb's in the reference frame for
    865 *  least cost. The mb that offers least cost is chosen as predicted mb and the
    866 *  displacement of the predicted mb from index location of the current mb is
    867 *  signaled as mv. The list of the mb's that are chosen in the reference frame
    868 *  are dependent on the speed of the ME configured.
    869 *
    870 * @param[in] ps_proc
    871 *  Process context corresponding to the job
    872 *
    873 * @returns  motion vector of the pred mb, sad, cost.
    874 *
    875 * @remarks none
    876 *
    877 *******************************************************************************
    878 */
    879 void ih264e_compute_me_single_reflist(process_ctxt_t *ps_proc)
    880 {
    881     /* me ctxt */
    882     me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
    883 
    884     /* codec context */
    885     codec_t *ps_codec = ps_proc->ps_codec;
    886 
    887     /* recon stride */
    888     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
    889 
    890     /* source buffer for halp pel generation functions */
    891     UWORD8 *pu1_hpel_src;
    892 
    893     /* quantization parameters */
    894     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
    895 
    896     /* Mb part ctxts for SKIP */
    897     mb_part_ctxt s_skip_mbpart;
    898 
    899     /* Sad therholds */
    900     ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
    901 
    902     {
    903         WORD32 rows_above, rows_below, columns_left, columns_right;
    904 
    905         /* During evaluation for motion vectors do not search through padded regions */
    906         /* Obtain number of rows and columns that are effective for computing for me evaluation */
    907         rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
    908         rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
    909         columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
    910         columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
    911 
    912         /* init srch range */
    913         /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2
    914          * on all sides.
    915          */
    916         ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
    917         ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
    918         ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
    919         ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
    920 
    921         /* this is to facilitate fast sub pel computation with minimal loads */
    922         ps_me_ctxt->i4_srch_range_w += 1;
    923         ps_me_ctxt->i4_srch_range_e -= 1;
    924         ps_me_ctxt->i4_srch_range_n += 1;
    925         ps_me_ctxt->i4_srch_range_s -= 1;
    926     }
    927 
    928     /* Compute ME and store the MVs */
    929 
    930     /***********************************************************************
    931      * Compute ME for list L0
    932      ***********************************************************************/
    933 
    934     /* Init SATQD for the current list */
    935     ps_me_ctxt->u4_min_sad_reached  = 0;
    936     ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
    937 
    938     /* Get the seed motion vector candidates                    */
    939     ih264e_get_search_candidates(ps_proc, ps_me_ctxt, PRED_L0);
    940 
    941     /* ****************************************************************
    942      *Evaluate the SKIP for current list
    943      * ****************************************************************/
    944     s_skip_mbpart.s_mv_curr.i2_mvx = 0;
    945     s_skip_mbpart.s_mv_curr.i2_mvy = 0;
    946     s_skip_mbpart.i4_mb_cost = INT_MAX;
    947     s_skip_mbpart.i4_mb_distortion = INT_MAX;
    948 
    949     ime_compute_skip_cost( ps_me_ctxt,
    950                            (ime_mv_t *)(&ps_proc->ps_skip_mv[PRED_L0].s_mv),
    951                            &s_skip_mbpart,
    952                            ps_proc->ps_codec->s_cfg.u4_enable_satqd,
    953                            PRED_L0,
    954                            0 /* Not a Bslice */ );
    955 
    956     s_skip_mbpart.s_mv_curr.i2_mvx <<= 2;
    957     s_skip_mbpart.s_mv_curr.i2_mvy <<= 2;
    958 
    959     /******************************************************************
    960      * Evaluate ME For current list
    961      *****************************************************************/
    962     ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx = 0;
    963     ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy = 0;
    964     ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost = INT_MAX;
    965     ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion = INT_MAX;
    966 
    967     /* Init Hpel */
    968     ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf = NULL;
    969 
    970     /* In case we found out the minimum SAD, exit the ME eval */
    971     if (!ps_me_ctxt->u4_min_sad_reached)
    972     {
    973         /* Evaluate search candidates for initial mv pt */
    974         ime_evaluate_init_srchposn_16x16(ps_me_ctxt, PRED_L0);
    975 
    976         /********************************************************************/
    977         /*                  full pel motion estimation                      */
    978         /********************************************************************/
    979         ime_full_pel_motion_estimation_16x16(ps_me_ctxt, PRED_L0);
    980 
    981         /* Scale the MV to qpel resolution */
    982         ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx <<= 2;
    983         ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy <<= 2;
    984 
    985         if (ps_me_ctxt->u4_enable_hpel)
    986         {
    987             /* moving src pointer to the converged motion vector location*/
    988             pu1_hpel_src =   ps_me_ctxt->apu1_ref_buf_luma[PRED_L0]
    989                              + (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx >> 2)
    990                              + (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy >> 2)* i4_rec_strd;
    991 
    992             ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
    993             ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
    994             ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
    995 
    996             ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
    997 
    998             /* half  pel search is done for both sides of full pel,
    999              * hence half_x of width x height = 17x16 is created
   1000              * starting from left half_x of converged full pel */
   1001             pu1_hpel_src -= 1;
   1002 
   1003             /* computing half_x */
   1004             ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src,
   1005                                                   ps_me_ctxt->apu1_subpel_buffs[0],
   1006                                                   i4_rec_strd,
   1007                                                   ps_me_ctxt->u4_subpel_buf_strd);
   1008 
   1009             /*
   1010              * Halfpel search is done for both sides of full pel,
   1011              * hence half_y of width x height = 16x17 is created
   1012              * starting from top half_y of converged full pel
   1013              * for half_xy top_left is required
   1014              * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1
   1015              */
   1016             pu1_hpel_src -= i4_rec_strd;
   1017 
   1018             /* computing half_y , and half_xy*/
   1019             ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
   1020                             pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1],
   1021                             ps_me_ctxt->apu1_subpel_buffs[2], i4_rec_strd,
   1022                             ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3,
   1023                             ps_me_ctxt->u4_subpel_buf_strd);
   1024 
   1025             ime_sub_pel_motion_estimation_16x16(ps_me_ctxt, PRED_L0);
   1026         }
   1027     }
   1028 
   1029 
   1030     /***********************************************************************
   1031      * If a particular skiip Mv is giving better sad, copy to the corresponding
   1032      * MBPART
   1033      * In B slices this loop should go only to PREDL1: If we found min sad
   1034      * we will go to the skip ref list only
   1035      * Have to find a way to make it without too much change or new vars
   1036      **********************************************************************/
   1037     if (s_skip_mbpart.i4_mb_cost < ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost)
   1038     {
   1039         ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost = s_skip_mbpart.i4_mb_cost;
   1040         ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion = s_skip_mbpart.i4_mb_distortion;
   1041         ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr = s_skip_mbpart.s_mv_curr;
   1042     }
   1043     else if (ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf)
   1044     {
   1045         /* Now we have to copy the buffers */
   1046         ps_codec->pf_inter_pred_luma_copy(
   1047                         ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf,
   1048                         ps_proc->pu1_best_subpel_buf,
   1049                         ps_me_ctxt->u4_subpel_buf_strd,
   1050                         ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE,
   1051                         NULL, 0);
   1052     }
   1053 
   1054     /**********************************************************************
   1055      * Now get the minimum of MB part sads by searching over all ref lists
   1056      **********************************************************************/
   1057     ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx;
   1058     ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy;
   1059     ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost;
   1060     ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion;
   1061     ps_proc->ps_cur_mb->u4_mb_type = P16x16;
   1062     ps_proc->ps_pu->b2_pred_mode = PRED_L0 ;
   1063 
   1064     /* Mark the reflists */
   1065     ps_proc->ps_pu->s_me_info[0].i1_ref_idx = -1;
   1066     ps_proc->ps_pu->s_me_info[1].i1_ref_idx =  0;
   1067 
   1068     /* number of partitions */
   1069     ps_proc->u4_num_sub_partitions = 1;
   1070     *(ps_proc->pu4_mb_pu_cnt) = 1;
   1071 
   1072     /* position in-terms of PU */
   1073     ps_proc->ps_pu->b4_pos_x = 0;
   1074     ps_proc->ps_pu->b4_pos_y = 0;
   1075 
   1076     /* PU size */
   1077     ps_proc->ps_pu->b4_wd = 3;
   1078     ps_proc->ps_pu->b4_ht = 3;
   1079 
   1080     /* Update min sad conditions */
   1081     if (ps_me_ctxt->u4_min_sad_reached == 1)
   1082     {
   1083         ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
   1084         ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
   1085     }
   1086 
   1087 }
   1088 
   1089 /**
   1090 *******************************************************************************
   1091 *
   1092 * @brief This function performs motion estimation for the current NMB
   1093 *
   1094 * @par Description:
   1095 * Intializes input and output pointers required by the function ih264e_compute_me
   1096 * and calls the function ih264e_compute_me in a loop to process NMBs.
   1097 *
   1098 * @param[in] ps_proc
   1099 *  Process context corresponding to the job
   1100 *
   1101 * @returns
   1102 *
   1103 * @remarks none
   1104 *
   1105 *******************************************************************************
   1106 */
   1107 void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count)
   1108 {
   1109     /* pic pu */
   1110     enc_pu_t *ps_pu_begin = ps_proc->ps_pu;
   1111 
   1112     /* ME map */
   1113     UWORD8 *pu1_me_map = ps_proc->pu1_me_map + (ps_proc->i4_mb_y * ps_proc->i4_wd_mbs);
   1114 
   1115     /* temp var */
   1116     UWORD32 u4_i;
   1117 
   1118     ps_proc->s_me_ctxt.u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
   1119     ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->s_left_mb_syntax_ele.u2_mb_type == PSKIP);
   1120 
   1121     for (u4_i = 0; u4_i < u4_nmb_count; u4_i++)
   1122     {
   1123         /* Wait for ME map */
   1124         if (ps_proc->i4_mb_y > 0)
   1125         {
   1126             /* Wait for top right ME to be done */
   1127             UWORD8 *pu1_me_map_tp_rw = ps_proc->pu1_me_map + (ps_proc->i4_mb_y - 1) * ps_proc->i4_wd_mbs;
   1128 
   1129             while (1)
   1130             {
   1131                 volatile UWORD8 *pu1_buf;
   1132                 WORD32 idx = ps_proc->i4_mb_x + u4_i + 1;
   1133 
   1134                 idx = MIN(idx, (ps_proc->i4_wd_mbs - 1));
   1135                 pu1_buf =  pu1_me_map_tp_rw + idx;
   1136                 if(*pu1_buf)
   1137                     break;
   1138                 ithread_yield();
   1139             }
   1140         }
   1141 
   1142         ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_i].as_skip_mv[0]);
   1143         ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_i].s_ngbr_avbl);
   1144         ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_i].as_pred_mv[0]);
   1145 
   1146         ps_proc->ps_cur_mb = &(ps_proc->ps_nmb_info[u4_i]);
   1147 
   1148         ps_proc->ps_cur_mb->u4_min_sad = ps_proc->u4_min_sad;
   1149         ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
   1150 
   1151         ps_proc->ps_cur_mb->i4_mb_cost = INT_MAX;
   1152         ps_proc->ps_cur_mb->i4_mb_distortion = SHRT_MAX;
   1153 
   1154         /* Set the best subpel buf to the correct mb so that the buffer can be copied */
   1155         ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_i].pu1_best_sub_pel_buf;
   1156         ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_i].u4_bst_spel_buf_strd;
   1157 
   1158         /* Set the min sad conditions */
   1159         ps_proc->ps_cur_mb->u4_min_sad = ps_proc->ps_codec->u4_min_sad;
   1160         ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
   1161 
   1162         /* Derive neighbor availability for the current macroblock */
   1163         ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
   1164 
   1165         /* init me */
   1166         ih264e_init_me(ps_proc);
   1167 
   1168         /* Compute ME according to slice type */
   1169         ps_proc->ps_codec->apf_compute_me[ps_proc->i4_slice_type](ps_proc);
   1170 
   1171         /* update top and left structs */
   1172         {
   1173             mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
   1174             mb_info_t *ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ME);
   1175             enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
   1176             enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME;
   1177             enc_pu_t *ps_top_mv = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
   1178 
   1179             *ps_top_left_syn = *ps_top_syn;
   1180 
   1181             *ps_top_left_mb_pu = *ps_top_mv;
   1182             *ps_left_mb_pu = *ps_proc->ps_pu;
   1183         }
   1184 
   1185         ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
   1186 
   1187         /* Copy the min sad reached info */
   1188         ps_proc->ps_nmb_info[u4_i].u4_min_sad_reached = ps_proc->ps_cur_mb->u4_min_sad_reached;
   1189         ps_proc->ps_nmb_info[u4_i].u4_min_sad   = ps_proc->ps_cur_mb->u4_min_sad;
   1190 
   1191         /*
   1192          * To make sure that the MV map is properly sync to the
   1193          * cache we need to do a DDB
   1194          */
   1195         {
   1196             DATA_SYNC();
   1197 
   1198             pu1_me_map[ps_proc->i4_mb_x] = 1;
   1199         }
   1200         ps_proc->i4_mb_x++;
   1201 
   1202         ps_proc->s_me_ctxt.u4_left_is_intra = 0;
   1203         ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->ps_cur_mb->u4_mb_type  == PSKIP);
   1204 
   1205         /* update buffers pointers */
   1206         ps_proc->pu1_src_buf_luma += MB_SIZE;
   1207         ps_proc->pu1_rec_buf_luma += MB_SIZE;
   1208         ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
   1209         ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
   1210 
   1211         /*
   1212          * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
   1213          * the stride per MB is MB_SIZE
   1214          */
   1215         ps_proc->pu1_src_buf_chroma += MB_SIZE;
   1216         ps_proc->pu1_rec_buf_chroma += MB_SIZE;
   1217         ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
   1218         ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
   1219 
   1220 
   1221         ps_proc->pu4_mb_pu_cnt += 1;
   1222     }
   1223 
   1224 
   1225     ps_proc->ps_pu = ps_pu_begin;
   1226     ps_proc->i4_mb_x = ps_proc->i4_mb_x - u4_nmb_count;
   1227 
   1228     /* update buffers pointers */
   1229     ps_proc->pu1_src_buf_luma -= MB_SIZE * u4_nmb_count;
   1230     ps_proc->pu1_rec_buf_luma -= MB_SIZE * u4_nmb_count;
   1231     ps_proc->apu1_ref_buf_luma[0] -= MB_SIZE * u4_nmb_count;
   1232     ps_proc->apu1_ref_buf_luma[1] -= MB_SIZE * u4_nmb_count;
   1233 
   1234     /*
   1235      * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
   1236      * the stride per MB is MB_SIZE
   1237      */
   1238     ps_proc->pu1_src_buf_chroma -= MB_SIZE * u4_nmb_count;
   1239     ps_proc->pu1_rec_buf_chroma -= MB_SIZE * u4_nmb_count;
   1240     ps_proc->apu1_ref_buf_chroma[0] -= MB_SIZE * u4_nmb_count;
   1241     ps_proc->apu1_ref_buf_chroma[1] -= MB_SIZE * u4_nmb_count;
   1242 
   1243 
   1244     ps_proc->pu4_mb_pu_cnt -= u4_nmb_count;
   1245 }
   1246 
   1247 
   1248 /**
   1249 *******************************************************************************
   1250 *
   1251 * @brief The function computes parameters for a BSKIP MB
   1252 *
   1253 * @par Description:
   1254 *  The function updates the skip motion vector for B Mb, check if the Mb can be
   1255 *  marked as skip and returns it
   1256 *
   1257 * @param[in] ps_proc
   1258 *  Pointer to process context
   1259 *
   1260 * @param[in] u4_for_me
   1261 *  Dummy
   1262 *
   1263 * @param[in] i4_reflist
   1264 *  Dummy
   1265 *
   1266 * @returns Flag indicating if the current Mb can be skip or not
   1267 *
   1268 * @remarks
   1269 *   The code implements the logic as described in sec 8.4.1.2.2
   1270 *   It also computes co-located MB parmas according to sec 8.4.1.2.1
   1271 *
   1272 *   Need to add condition for this fucntion to be used in ME
   1273 *
   1274 *******************************************************************************/
   1275 WORD32 ih264e_find_bskip_params_me(process_ctxt_t *ps_proc, WORD32 i4_reflist)
   1276 {
   1277     /* Colzero for co-located MB */
   1278     WORD32 i4_colzeroflag;
   1279 
   1280     /* motion vectors for neighbouring MBs */
   1281     enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
   1282 
   1283     /* Variables to check if a particular mB is available */
   1284     WORD32 i4_a, i4_b, i4_c, i4_c_avail;
   1285 
   1286     /* Mode availability, init to no modes available     */
   1287     WORD32 i4_mode_avail;
   1288 
   1289     /*  mb neighbor availability */
   1290     block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
   1291 
   1292     /* Temp var */
   1293     WORD32 i, i4_cmpl_mode, i4_skip_type = -1;
   1294 
   1295     /*
   1296      * Colocated motion vector
   1297      */
   1298     mv_t s_mvcol;
   1299 
   1300     /*
   1301      * Colocated picture idx
   1302      */
   1303     WORD32 i4_refidxcol;
   1304 
   1305     UNUSED(i4_reflist);
   1306 
   1307     /**************************************************************************
   1308      *Find co-located MB parameters
   1309      *      See sec 8.4.1.2.1  for reference
   1310      **************************************************************************/
   1311     {
   1312         /*
   1313          * Find the co-located Mb and update the skip and pred appropriately
   1314          * 1) Default colpic is forward ref : Table 8-6
   1315          * 2) Default mb col is current MB : Table 8-8
   1316          */
   1317 
   1318         if (ps_proc->ps_colpu->b1_intra_flag)
   1319         {
   1320             s_mvcol.i2_mvx = 0;
   1321             s_mvcol.i2_mvy = 0;
   1322             i4_refidxcol = -1;
   1323         }
   1324         else
   1325         {
   1326             if (ps_proc->ps_colpu->b2_pred_mode != PRED_L1)
   1327             {
   1328                 s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L0].s_mv;
   1329                 i4_refidxcol = 0;
   1330             }
   1331             else // if(ps_proc->ps_colpu->b2_pred_mode != PRED_L0)
   1332             {
   1333                 s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L1].s_mv;
   1334                 i4_refidxcol = 0;
   1335             }
   1336         }
   1337 
   1338         /* RefPicList1[ 0 ]  is marked as  "used for short-term reference", as default */
   1339         i4_colzeroflag = (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1)
   1340                         && (ABS(s_mvcol.i2_mvy) <= 1));
   1341 
   1342     }
   1343 
   1344     /***************************************************************************
   1345      * Evaluating skip params : Spatial Skip
   1346      **************************************************************************/
   1347     {
   1348     /* Get the neighbouring MBS according to Section 8.4.1.2.2 */
   1349     ps_a_pu = &ps_proc->s_left_mb_pu_ME;
   1350     ps_b_pu = (ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x);
   1351 
   1352     i4_c_avail = 0;
   1353     if (ps_ngbr_avbl->u1_mb_c)
   1354     {
   1355         ps_c_pu = &((ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x)[1]);
   1356         i4_c_avail = 1;
   1357     }
   1358     else
   1359     {
   1360         ps_c_pu = &ps_proc->s_top_left_mb_pu_ME;
   1361         i4_c_avail = ps_ngbr_avbl->u1_mb_d;
   1362     }
   1363 
   1364     i4_a = ps_ngbr_avbl->u1_mb_a;
   1365     i4_b = ps_ngbr_avbl->u1_mb_b;
   1366     i4_c = i4_c_avail;
   1367 
   1368     /* Init to no mode avail */
   1369     i4_mode_avail = 0;
   1370     for (i = 0; i < 2; i++)
   1371     {
   1372         i4_cmpl_mode = (i == 0) ? PRED_L1 : PRED_L0;
   1373 
   1374         i4_mode_avail |= (i4_a && (ps_a_pu->b2_pred_mode != i4_cmpl_mode) && (ps_a_pu->s_me_info[i].i1_ref_idx != 0))<<i;
   1375         i4_mode_avail |= (i4_b && (ps_b_pu->b2_pred_mode != i4_cmpl_mode) && (ps_b_pu->s_me_info[i].i1_ref_idx != 0))<<i;
   1376         i4_mode_avail |= (i4_c && (ps_c_pu->b2_pred_mode != i4_cmpl_mode) && (ps_c_pu->s_me_info[i].i1_ref_idx != 0))<<i;
   1377     }
   1378 
   1379     if (i4_mode_avail == 0x3 || i4_mode_avail == 0x0)
   1380     {
   1381         i4_skip_type= PRED_BI;
   1382     }
   1383     else if(i4_mode_avail == 0x1)
   1384     {
   1385         i4_skip_type = PRED_L0;
   1386     }
   1387     else if(i4_mode_avail == 0x2)
   1388     {
   1389         i4_skip_type = PRED_L1;
   1390     }
   1391 
   1392     /* Update skip MV for L0 */
   1393     if ((i4_mode_avail & 0x1) && (!i4_colzeroflag))
   1394     {
   1395         ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
   1396         ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
   1397     }
   1398     else
   1399     {
   1400         ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
   1401         ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
   1402     }
   1403 
   1404     /* Update skip MV for L1 */
   1405     if ((i4_mode_avail & 0x2) && (!i4_colzeroflag))
   1406     {
   1407         ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
   1408         ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
   1409     }
   1410     else
   1411     {
   1412         ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
   1413         ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
   1414     }
   1415 
   1416     }
   1417 
   1418     /***************************************************************************
   1419      * Evaluating skip params : Temporal skip
   1420      **************************************************************************/
   1421     {
   1422         pic_buf_t *  ps_ref_pic[MAX_REF_PIC_CNT];
   1423         WORD32 i4_td, i4_tx, i4_tb, i4_dist_scale_factor;
   1424         enc_pu_mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[2];
   1425 
   1426         ps_ref_pic[PRED_L0] = ps_proc->aps_ref_pic[PRED_L0];
   1427         ps_ref_pic[PRED_L1] = ps_proc->aps_ref_pic[PRED_L1];
   1428 
   1429         i4_tb = ps_proc->ps_codec->i4_poc - ps_ref_pic[PRED_L0]->i4_abs_poc;
   1430         i4_td = ps_ref_pic[PRED_L1]->i4_abs_poc - ps_ref_pic[PRED_L0]->i4_abs_poc;
   1431 
   1432         i4_tb = CLIP3(-128, 127, i4_tb);
   1433         i4_td = CLIP3(-128, 127, i4_td);
   1434 
   1435         i4_tx = ( 16384 + ABS( i4_td / 2 ) ) / i4_td ;
   1436         i4_dist_scale_factor =  CLIP3( -1024, 1023, ( i4_tb * i4_tx + 32 ) >> 6 );
   1437 
   1438         /* Motion vectors taken in full pel resolution , hence  -> (& 0xfffc) operation */
   1439         ps_skip_mv[PRED_L0].s_mv.i2_mvx = (( i4_dist_scale_factor * s_mvcol.i2_mvx + 128 ) >> 8) & 0xfffc;
   1440         ps_skip_mv[PRED_L0].s_mv.i2_mvy = (( i4_dist_scale_factor * s_mvcol.i2_mvy + 128 ) >> 8) & 0xfffc;
   1441 
   1442         ps_skip_mv[PRED_L1].s_mv.i2_mvx = (ps_skip_mv[PRED_L0].s_mv.i2_mvx - s_mvcol.i2_mvx) & 0xfffc;
   1443         ps_skip_mv[PRED_L1].s_mv.i2_mvy = (ps_skip_mv[PRED_L0].s_mv.i2_mvy - s_mvcol.i2_mvy) & 0xfffc;
   1444 
   1445     }
   1446 
   1447     return i4_skip_type;
   1448 }
   1449 
   1450 /**
   1451 *******************************************************************************
   1452 *
   1453 * @brief The function computes the skip motion vectoe for B mb
   1454 *
   1455 * @par Description:
   1456 *  The function gives the skip motion vector for B Mb, check if the Mb can be
   1457 *  marked as skip
   1458 *
   1459 * @param[in] ps_proc
   1460 *  Pointer to process context
   1461 *
   1462 * @param[in] u4_for_me
   1463 *  Dummy
   1464 *
   1465 * @param[in] u4_for_me
   1466 *  Dummy
   1467 *
   1468 * @returns Flag indicating if the current Mb can be skip or not
   1469 *
   1470 * @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
   1471 *   specification. It also computes co-located MB parmas according to sec 8.4.1.2.1
   1472 *
   1473 *******************************************************************************/
   1474 WORD32 ih264e_find_bskip_params(process_ctxt_t *ps_proc, WORD32 i4_reflist)
   1475 {
   1476     WORD32 i4_colzeroflag;
   1477 
   1478     /* motion vectors */
   1479     enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
   1480 
   1481     /* Syntax elem */
   1482     mb_info_t *ps_a_syn, *ps_b_syn, *ps_c_syn;
   1483 
   1484     /* Variables to check if a particular mB is available */
   1485     WORD32 i4_a, i4_b, i4_c, i4_c_avail;
   1486 
   1487     /* Mode availability, init to no modes available     */
   1488     WORD32 i4_mode_avail;
   1489 
   1490     /*  mb neighbor availability */
   1491     block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
   1492 
   1493     /* Temp var */
   1494     WORD32 i, i4_cmpl_mode;
   1495 
   1496     UNUSED(i4_reflist);
   1497 
   1498     /**************************************************************************
   1499      *Find co-locates parameters
   1500      *      See sec 8.4.1.2.1  for reference
   1501      **************************************************************************/
   1502     {
   1503         /*
   1504          * Find the co-located Mb and update the skip and pred appropriately
   1505          * 1) Default colpic is forward ref : Table 8-6
   1506          * 2) Default mb col is current MB : Table 8-8
   1507          */
   1508 
   1509         mv_t s_mvcol;
   1510         WORD32 i4_refidxcol;
   1511 
   1512         if (ps_proc->ps_colpu->b1_intra_flag)
   1513         {
   1514             s_mvcol.i2_mvx = 0;
   1515             s_mvcol.i2_mvy = 0;
   1516             i4_refidxcol = -1;
   1517         }
   1518         else
   1519         {
   1520             if (ps_proc->ps_colpu->b2_pred_mode != PRED_L1)
   1521             {
   1522                 s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L0].s_mv;
   1523                 i4_refidxcol = 0;
   1524             }
   1525             else // if(ps_proc->ps_colpu->b2_pred_mode != PRED_L0)
   1526             {
   1527                 s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L1].s_mv;
   1528                 i4_refidxcol = 0;
   1529             }
   1530         }
   1531 
   1532         /* RefPicList1[ 0 ]  is marked as  "used for short-term reference", as default */
   1533         i4_colzeroflag = (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1)
   1534                         && (ABS(s_mvcol.i2_mvy) <= 1));
   1535 
   1536     }
   1537 
   1538     /***************************************************************************
   1539      * Evaluating skip params
   1540      **************************************************************************/
   1541     /* Section 8.4.1.2.2 */
   1542     ps_a_syn = &ps_proc->s_left_mb_syntax_ele;
   1543     ps_a_pu = &ps_proc->s_left_mb_pu;
   1544 
   1545     ps_b_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
   1546     ps_b_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x);
   1547 
   1548     i4_c_avail = 0;
   1549     if (ps_ngbr_avbl->u1_mb_c)
   1550     {
   1551         ps_c_syn = &((ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x)[1]);
   1552         ps_c_pu = &((ps_proc->ps_top_row_pu + ps_proc->i4_mb_x)[1]);
   1553         i4_c_avail = 1;
   1554     }
   1555     else
   1556     {
   1557         ps_c_syn = &(ps_proc->s_top_left_mb_syntax_ele);
   1558         ps_c_pu = &ps_proc->s_top_left_mb_pu;
   1559         i4_c_avail = ps_ngbr_avbl->u1_mb_d;
   1560     }
   1561 
   1562 
   1563     i4_a = ps_ngbr_avbl->u1_mb_a;
   1564     i4_a &= !ps_a_syn->u2_is_intra;
   1565 
   1566     i4_b = ps_ngbr_avbl->u1_mb_b;
   1567     i4_b &= !ps_b_syn->u2_is_intra;
   1568 
   1569     i4_c = i4_c_avail;
   1570     i4_c &= !ps_c_syn->u2_is_intra;
   1571 
   1572     /* Init to no mode avail */
   1573     i4_mode_avail = 0;
   1574     for (i = 0; i < 2; i++)
   1575     {
   1576         i4_cmpl_mode = (i == 0) ? PRED_L1 : PRED_L0;
   1577 
   1578         i4_mode_avail |= (i4_a && (ps_a_pu->b2_pred_mode != i4_cmpl_mode) && (ps_a_pu->s_me_info[i].i1_ref_idx != 0))<<i;
   1579         i4_mode_avail |= (i4_b && (ps_b_pu->b2_pred_mode != i4_cmpl_mode) && (ps_b_pu->s_me_info[i].i1_ref_idx != 0))<<i;
   1580         i4_mode_avail |= (i4_c && (ps_c_pu->b2_pred_mode != i4_cmpl_mode) && (ps_c_pu->s_me_info[i].i1_ref_idx != 0))<<i;
   1581     }
   1582 
   1583     /* Update skip MV for L0 */
   1584     if ((i4_mode_avail & 0x1) && (!i4_colzeroflag))
   1585     {
   1586         ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
   1587         ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
   1588     }
   1589     else
   1590     {
   1591         ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
   1592         ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
   1593     }
   1594 
   1595     /* Update skip MV for L1 */
   1596     if ((i4_mode_avail & 0x2) && (!i4_colzeroflag))
   1597     {
   1598         ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
   1599         ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
   1600     }
   1601     else
   1602     {
   1603         ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
   1604         ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
   1605     }
   1606 
   1607     /* Now see if the ME information matches the SKIP information */
   1608     switch (ps_proc->ps_pu->b2_pred_mode)
   1609     {
   1610         case PRED_BI:
   1611             if (  (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx == ps_proc->ps_skip_mv[0].s_mv.i2_mvx)
   1612                && (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy == ps_proc->ps_skip_mv[0].s_mv.i2_mvy)
   1613                && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx == ps_proc->ps_skip_mv[1].s_mv.i2_mvx)
   1614                && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy == ps_proc->ps_skip_mv[1].s_mv.i2_mvy)
   1615                && (i4_mode_avail ==  0x3 || i4_mode_avail == 0x0))
   1616             {
   1617                 return 1;
   1618             }
   1619             break;
   1620 
   1621         case PRED_L0:
   1622             if ( (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx == ps_proc->ps_skip_mv[0].s_mv.i2_mvx)
   1623               && (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy == ps_proc->ps_skip_mv[0].s_mv.i2_mvy)
   1624               && (i4_mode_avail == 0x1))
   1625             {
   1626                 return 1;
   1627             }
   1628             break;
   1629 
   1630         case PRED_L1:
   1631             if (  (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx == ps_proc->ps_skip_mv[1].s_mv.i2_mvx)
   1632                && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy == ps_proc->ps_skip_mv[1].s_mv.i2_mvy)
   1633                && (i4_mode_avail == 0x2))
   1634             {
   1635                 return 1;
   1636             }
   1637             break;
   1638     }
   1639 
   1640     return 0;
   1641 }
   1642 
   1643 
   1644 /**
   1645 *******************************************************************************
   1646 *
   1647 * @brief This function computes the best motion vector among the tentative mv
   1648 * candidates chosen.
   1649 *
   1650 * @par Description:
   1651 *  This function determines the position in the search window at which the motion
   1652 *  estimation should begin in order to minimise the number of search iterations.
   1653 *
   1654 * @param[in] ps_mb_part
   1655 *  pointer to current mb partition ctxt with respect to ME
   1656 *
   1657 * @param[in] u4_lambda_motion
   1658 *  lambda motion
   1659 *
   1660 * @param[in] u4_fast_flag
   1661 *  enable/disable fast sad computation
   1662 *
   1663 * @returns  mv pair & corresponding distortion and cost
   1664 *
   1665 * @remarks Currently onyl 4 search candiates are supported
   1666 *
   1667 *******************************************************************************
   1668 */
   1669 void ih264e_evaluate_bipred(me_ctxt_t *ps_me_ctxt,
   1670                             process_ctxt_t *ps_proc,
   1671                             mb_part_ctxt *ps_mb_ctxt_bi)
   1672 {
   1673 
   1674     UWORD32 i, u4_fast_sad;
   1675 
   1676     WORD32 i4_dest_buff;
   1677 
   1678     mv_t *ps_l0_pred_mv, *ps_l1_pred_mv, s_l0_mv, s_l1_mv;
   1679 
   1680     UWORD8 *pu1_ref_mb_l0, *pu1_ref_mb_l1;
   1681 
   1682     UWORD8 *pu1_dst_buf;
   1683 
   1684     WORD32 i4_ref_l0_stride, i4_ref_l1_stride;
   1685 
   1686     WORD32 i4_mb_distortion, i4_mb_cost;
   1687 
   1688     u4_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
   1689 
   1690     i4_dest_buff = 0;
   1691     for (i = 0; i < ps_me_ctxt->u4_num_candidates[PRED_BI]; i += 2)
   1692     {
   1693         pu1_dst_buf = ps_me_ctxt->apu1_subpel_buffs[i4_dest_buff];
   1694 
   1695         s_l0_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx >> 2;
   1696         s_l0_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy >> 2;
   1697         s_l1_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx >> 2;
   1698         s_l1_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy >> 2;
   1699 
   1700         ps_l0_pred_mv = &ps_proc->ps_pred_mv[PRED_L0].s_mv;
   1701         ps_l1_pred_mv = &ps_proc->ps_pred_mv[PRED_L1].s_mv;
   1702 
   1703         if ((ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx & 0x3)||
   1704                         (ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy & 0x3))
   1705         {
   1706             pu1_ref_mb_l0 = ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf;
   1707             i4_ref_l0_stride = ps_me_ctxt->u4_subpel_buf_strd;
   1708         }
   1709         else
   1710         {
   1711             pu1_ref_mb_l0 = ps_me_ctxt->apu1_ref_buf_luma[PRED_L0] + (s_l0_mv.i2_mvx) + ((s_l0_mv.i2_mvy) * ps_me_ctxt->i4_rec_strd);
   1712             i4_ref_l0_stride = ps_me_ctxt->i4_rec_strd;
   1713         }
   1714 
   1715 
   1716         if ((ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx & 0x3) ||
   1717                         (ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy & 0x3))
   1718         {
   1719             pu1_ref_mb_l1 = ps_me_ctxt->as_mb_part[PRED_L1].pu1_best_hpel_buf;
   1720             i4_ref_l1_stride = ps_me_ctxt->u4_subpel_buf_strd;
   1721         }
   1722         else
   1723         {
   1724             pu1_ref_mb_l1 = ps_me_ctxt->apu1_ref_buf_luma[PRED_L1] + (s_l1_mv.i2_mvx) + ((s_l1_mv.i2_mvy) * ps_me_ctxt->i4_rec_strd);
   1725             i4_ref_l1_stride = ps_me_ctxt->i4_rec_strd;
   1726         }
   1727 
   1728         ps_proc->ps_codec->pf_inter_pred_luma_bilinear(
   1729                         pu1_ref_mb_l0, pu1_ref_mb_l1, pu1_dst_buf,
   1730                         i4_ref_l0_stride, i4_ref_l1_stride,
   1731                         ps_me_ctxt->u4_subpel_buf_strd, MB_SIZE, MB_SIZE);
   1732 
   1733         ps_me_ctxt->pf_ime_compute_sad_16x16[u4_fast_sad](
   1734                         ps_me_ctxt->pu1_src_buf_luma, pu1_dst_buf,
   1735                         ps_me_ctxt->i4_src_strd, ps_me_ctxt->u4_subpel_buf_strd,
   1736                         INT_MAX, &i4_mb_distortion);
   1737 
   1738         /* compute cost */
   1739         i4_mb_cost =  ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx - ps_l0_pred_mv->i2_mvx];
   1740         i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy - ps_l0_pred_mv->i2_mvy];
   1741         i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx - ps_l1_pred_mv->i2_mvx];
   1742         i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy - ps_l1_pred_mv->i2_mvy];
   1743 
   1744         i4_mb_cost -= (ps_me_ctxt->i4_skip_bias[BSLICE]) * (ps_me_ctxt->i4_skip_type == PRED_BI) * (i == 0);
   1745 
   1746 
   1747         i4_mb_cost *= ps_me_ctxt->u4_lambda_motion;
   1748         i4_mb_cost += i4_mb_distortion;
   1749 
   1750         if (i4_mb_cost < ps_mb_ctxt_bi->i4_mb_cost)
   1751         {
   1752             ps_mb_ctxt_bi->i4_srch_pos_idx = (i>>1);
   1753             ps_mb_ctxt_bi->i4_mb_cost = i4_mb_cost;
   1754             ps_mb_ctxt_bi->i4_mb_distortion = i4_mb_distortion;
   1755             ps_mb_ctxt_bi->pu1_best_hpel_buf = pu1_dst_buf;
   1756             i4_dest_buff = (i4_dest_buff + 1) % 2;
   1757         }
   1758     }
   1759 
   1760 }
   1761 
   1762 /**
   1763 *******************************************************************************
   1764 *
   1765 * @brief This function performs motion estimation for the current mb
   1766 *
   1767 * @par Description:
   1768 *  The current mb is compared with a list of mb's in the reference frame for
   1769 *  least cost. The mb that offers least cost is chosen as predicted mb and the
   1770 *  displacement of the predicted mb from index location of the current mb is
   1771 *  signaled as mv. The list of the mb's that are chosen in the reference frame
   1772 *  are dependent on the speed of the ME configured.
   1773 *
   1774 * @param[in] ps_proc
   1775 *  Process context corresponding to the job
   1776 *
   1777 * @returns  motion vector of the pred mb, sad, cost.
   1778 *
   1779 * @remarks none
   1780 *
   1781 *******************************************************************************
   1782 */
   1783 void ih264e_compute_me_multi_reflist(process_ctxt_t *ps_proc)
   1784 {
   1785     /* me ctxt */
   1786     me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
   1787 
   1788     /* codec context */
   1789     codec_t *ps_codec = ps_proc->ps_codec;
   1790 
   1791     /* Temp variables for looping over ref lists */
   1792     WORD32 i4_reflist, i4_max_reflist;
   1793 
   1794     /* recon stride */
   1795     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
   1796 
   1797     /* source buffer for halp pel generation functions */
   1798     UWORD8 *pu1_hpel_src;
   1799 
   1800     /* quantization parameters */
   1801     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
   1802 
   1803     /* Mb part ctxts for SKIP */
   1804     mb_part_ctxt as_skip_mbpart[2];
   1805 
   1806     /* Sad therholds */
   1807     ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
   1808 
   1809     {
   1810         WORD32 rows_above, rows_below, columns_left, columns_right;
   1811 
   1812         /* During evaluation for motion vectors do not search through padded regions */
   1813         /* Obtain number of rows and columns that are effective for computing for me evaluation */
   1814         rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
   1815         rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
   1816         columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
   1817         columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
   1818 
   1819         /* init srch range */
   1820         /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2
   1821          * on all sides.
   1822          */
   1823         ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
   1824         ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
   1825         ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
   1826         ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
   1827 
   1828         /* this is to facilitate fast sub pel computation with minimal loads */
   1829         if (ps_me_ctxt->u4_enable_hpel)
   1830         {
   1831             ps_me_ctxt->i4_srch_range_w += 1;
   1832             ps_me_ctxt->i4_srch_range_e -= 1;
   1833             ps_me_ctxt->i4_srch_range_n += 1;
   1834             ps_me_ctxt->i4_srch_range_s -= 1;
   1835         }
   1836     }
   1837 
   1838     /* Compute ME and store the MVs */
   1839     {
   1840         /***********************************************************************
   1841          * Compute ME for lists L0 and L1
   1842          *  For L0 -> L0 skip + L0
   1843          *  for L1 -> L0 skip + L0 + L1 skip + L1
   1844          ***********************************************************************/
   1845         i4_max_reflist = (ps_proc->i4_slice_type == PSLICE) ? PRED_L0 : PRED_L1;
   1846 
   1847         /* Init SATQD for the current list */
   1848         ps_me_ctxt->u4_min_sad_reached  = 0;
   1849         ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
   1850 
   1851         for (i4_reflist = PRED_L0; i4_reflist <= i4_max_reflist; i4_reflist++)
   1852         {
   1853 
   1854             /* Get the seed motion vector candidates                    */
   1855             ih264e_get_search_candidates(ps_proc, ps_me_ctxt, i4_reflist);
   1856 
   1857             /* ****************************************************************
   1858              *Evaluate the SKIP for current list
   1859              * ****************************************************************/
   1860             as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx = 0;
   1861             as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy = 0;
   1862             as_skip_mbpart[i4_reflist].i4_mb_cost = INT_MAX;
   1863             as_skip_mbpart[i4_reflist].i4_mb_distortion = INT_MAX;
   1864 
   1865             if (ps_me_ctxt->i4_skip_type == i4_reflist)
   1866             {
   1867                 ime_compute_skip_cost( ps_me_ctxt,
   1868                                        (ime_mv_t *)(&ps_proc->ps_skip_mv[i4_reflist].s_mv),
   1869                                        &as_skip_mbpart[i4_reflist],
   1870                                        ps_proc->ps_codec->s_cfg.u4_enable_satqd,
   1871                                        i4_reflist,
   1872                                        (ps_proc->i4_slice_type == BSLICE) );
   1873             }
   1874 
   1875             as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx <<= 2;
   1876             as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy <<= 2;
   1877 
   1878             /******************************************************************
   1879              * Evaluate ME For current list
   1880              *****************************************************************/
   1881             ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx = 0;
   1882             ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy = 0;
   1883             ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = INT_MAX;
   1884             ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = INT_MAX;
   1885 
   1886             /* Init Hpel */
   1887             ps_me_ctxt->as_mb_part[i4_reflist].pu1_best_hpel_buf = NULL;
   1888 
   1889             /* In case we found out the minimum SAD, exit the ME eval */
   1890             if (ps_me_ctxt->u4_min_sad_reached)
   1891             {
   1892                 i4_max_reflist = i4_reflist;
   1893                 break;
   1894             }
   1895 
   1896 
   1897             /* Evaluate search candidates for initial mv pt */
   1898             ime_evaluate_init_srchposn_16x16(ps_me_ctxt, i4_reflist);
   1899 
   1900             /********************************************************************/
   1901             /*                  full pel motion estimation                      */
   1902             /********************************************************************/
   1903             ime_full_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
   1904 
   1905             DEBUG_MV_HISTOGRAM_ADD((ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx >> 2),
   1906                                    (ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy >> 2));
   1907 
   1908             DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 1);
   1909 
   1910             /* Scale the MV to qpel resolution */
   1911             ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx <<= 2;
   1912             ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy <<= 2;
   1913 
   1914             if (ps_me_ctxt->u4_enable_hpel)
   1915             {
   1916                 /* moving src pointer to the converged motion vector location */
   1917                 pu1_hpel_src =   ps_me_ctxt->apu1_ref_buf_luma[i4_reflist]
   1918                                + (ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx >> 2)
   1919                                + ((ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy >> 2)* i4_rec_strd);
   1920 
   1921                 ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
   1922                 ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
   1923                 ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
   1924 
   1925                 /* Init the search position to an invalid number */
   1926                 ps_me_ctxt->as_mb_part[i4_reflist].i4_srch_pos_idx = 3;
   1927 
   1928                 /* Incase a buffer is still in use by L0, replace it with spare buff */
   1929                 ps_me_ctxt->apu1_subpel_buffs[ps_me_ctxt->as_mb_part[PRED_L0].i4_srch_pos_idx] =
   1930                                 ps_proc->apu1_subpel_buffs[3];
   1931 
   1932 
   1933                 ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
   1934 
   1935                 /* half  pel search is done for both sides of full pel,
   1936                  * hence half_x of width x height = 17x16 is created
   1937                  * starting from left half_x of converged full pel */
   1938                 pu1_hpel_src -= 1;
   1939 
   1940                 /* computing half_x */
   1941                 ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src,
   1942                                                       ps_me_ctxt->apu1_subpel_buffs[0],
   1943                                                       i4_rec_strd,
   1944                                                       ps_me_ctxt->u4_subpel_buf_strd);
   1945 
   1946                 /*
   1947                  * Halfpel search is done for both sides of full pel,
   1948                  * hence half_y of width x height = 16x17 is created
   1949                  * starting from top half_y of converged full pel
   1950                  * for half_xy top_left is required
   1951                  * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1
   1952                  */
   1953                 pu1_hpel_src -= i4_rec_strd;
   1954 
   1955                 /* computing half_y and half_xy */
   1956                 ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
   1957                                 pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1],
   1958                                 ps_me_ctxt->apu1_subpel_buffs[2], i4_rec_strd,
   1959                                 ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3,
   1960                                 ps_me_ctxt->u4_subpel_buf_strd);
   1961 
   1962                 ime_sub_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
   1963 
   1964             }
   1965         }
   1966 
   1967         /***********************************************************************
   1968          * If a particular skiip Mv is giving better sad, copy to the corresponding
   1969          * MBPART
   1970          * In B slices this loop should go only to PREDL1: If we found min sad
   1971          * we will go to the skip ref list only
   1972          * Have to find a way to make it without too much change or new vars
   1973          **********************************************************************/
   1974         for (i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
   1975         {
   1976             if (as_skip_mbpart[i4_reflist].i4_mb_cost < ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost)
   1977             {
   1978                 ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = as_skip_mbpart[i4_reflist].i4_mb_cost;
   1979                 ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = as_skip_mbpart[i4_reflist].i4_mb_distortion;
   1980                 ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr = as_skip_mbpart[i4_reflist].s_mv_curr;
   1981             }
   1982         }
   1983 
   1984         /***********************************************************************
   1985          * Compute ME for BI
   1986          *  In case of BI we do ME for two candidates
   1987          *   1) The best L0 and L1 Mvs
   1988          *   2) Skip L0 and L1 MVs
   1989          *
   1990          *   TODO
   1991          *   one of the search candidates is skip. Hence it may be duplicated
   1992          ***********************************************************************/
   1993         if (i4_max_reflist == PRED_L1 && ps_me_ctxt->u4_min_sad_reached == 0)
   1994         {
   1995             WORD32 i, j = 0;
   1996             WORD32 l0_srch_pos_idx, l1_srch_pos_idx;
   1997             WORD32 i4_l0_skip_mv_idx, i4_l1_skip_mv_idx;
   1998 
   1999             /* Get the free buffers */
   2000             l0_srch_pos_idx = ps_me_ctxt->as_mb_part[PRED_L0].i4_srch_pos_idx;
   2001             l1_srch_pos_idx = ps_me_ctxt->as_mb_part[PRED_L1].i4_srch_pos_idx;
   2002 
   2003             /* Search for the two free buffers in subpel list */
   2004             for (i = 0; i < SUBPEL_BUFF_CNT; i++)
   2005             {
   2006                 if (i != l0_srch_pos_idx && i != l1_srch_pos_idx)
   2007                 {
   2008                     ps_me_ctxt->apu1_subpel_buffs[j] = ps_proc->apu1_subpel_buffs[i];
   2009                     j++;
   2010                 }
   2011             }
   2012             ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
   2013 
   2014             /* Copy the statial SKIP MV of each list */
   2015             i4_l0_skip_mv_idx = ps_me_ctxt->u4_num_candidates[PRED_L0] - 2;
   2016             i4_l1_skip_mv_idx = ps_me_ctxt->u4_num_candidates[PRED_L1] - 2;
   2017             ps_me_ctxt->as_mv_init_search[PRED_BI][0].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvx << 2;
   2018             ps_me_ctxt->as_mv_init_search[PRED_BI][0].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvy << 2;
   2019             ps_me_ctxt->as_mv_init_search[PRED_BI][1].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvx << 2;
   2020             ps_me_ctxt->as_mv_init_search[PRED_BI][1].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvy << 2;
   2021 
   2022             /* Copy the SKIP MV temporal of each list */
   2023             i4_l0_skip_mv_idx++;
   2024             i4_l1_skip_mv_idx++;
   2025             ps_me_ctxt->as_mv_init_search[PRED_BI][2].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvx << 2;
   2026             ps_me_ctxt->as_mv_init_search[PRED_BI][2].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvy << 2;
   2027             ps_me_ctxt->as_mv_init_search[PRED_BI][3].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvx << 2;
   2028             ps_me_ctxt->as_mv_init_search[PRED_BI][3].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvy << 2;
   2029 
   2030             /* Copy the best MV after ME */
   2031             ps_me_ctxt->as_mv_init_search[PRED_BI][4] = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr;
   2032             ps_me_ctxt->as_mv_init_search[PRED_BI][5] = ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr;
   2033 
   2034             ps_me_ctxt->u4_num_candidates[PRED_BI] = 6;
   2035 
   2036             ps_me_ctxt->as_mb_part[PRED_BI].i4_mb_cost = INT_MAX;
   2037             ps_me_ctxt->as_mb_part[PRED_BI].i4_mb_distortion = INT_MAX;
   2038 
   2039             ih264e_evaluate_bipred(ps_me_ctxt, ps_proc,
   2040                                    &ps_me_ctxt->as_mb_part[PRED_BI]);
   2041 
   2042             i4_max_reflist = PRED_BI;
   2043         }
   2044 
   2045         /**********************************************************************
   2046          * Now get the minimum of MB part sads by searching over all ref lists
   2047          **********************************************************************/
   2048         ps_proc->ps_pu->b2_pred_mode = 0x3;
   2049 
   2050         for (i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
   2051         {
   2052             if (ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost < ps_proc->ps_cur_mb->i4_mb_cost)
   2053             {
   2054                 ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost;
   2055                 ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion;
   2056                 ps_proc->ps_cur_mb->u4_mb_type = (ps_proc->i4_slice_type == PSLICE) ? P16x16 : B16x16;
   2057                 ps_proc->ps_pu->b2_pred_mode = i4_reflist ;
   2058             }
   2059         }
   2060 
   2061         /**********************************************************************
   2062          * In case we have a BI MB, we have to copy the buffers and set proer MV's
   2063          *  1)In case its BI, we need to get the best MVs given by BI and update
   2064          *    to their corresponding MB part
   2065          *  2)We also need to copy the buffer in which bipred buff is populated
   2066          *
   2067          *  Not that if we have
   2068          **********************************************************************/
   2069         if (ps_proc->ps_pu->b2_pred_mode == PRED_BI)
   2070         {
   2071             WORD32 i4_srch_pos = ps_me_ctxt->as_mb_part[PRED_BI].i4_srch_pos_idx;
   2072             UWORD8 *pu1_bi_buf = ps_me_ctxt->as_mb_part[PRED_BI].pu1_best_hpel_buf;
   2073 
   2074             ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr = ps_me_ctxt->as_mv_init_search[PRED_BI][i4_srch_pos << 1];
   2075             ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr = ps_me_ctxt->as_mv_init_search[PRED_BI][(i4_srch_pos << 1) + 1];
   2076 
   2077             /* Now we have to copy the buffers */
   2078             ps_codec->pf_inter_pred_luma_copy(pu1_bi_buf,
   2079                                               ps_proc->pu1_best_subpel_buf,
   2080                                               ps_me_ctxt->u4_subpel_buf_strd,
   2081                                               ps_proc->u4_bst_spel_buf_strd,
   2082                                               MB_SIZE, MB_SIZE, NULL, 0);
   2083 
   2084         }
   2085         else if (ps_me_ctxt->as_mb_part[ps_proc->ps_pu->b2_pred_mode].pu1_best_hpel_buf)
   2086         {
   2087             /* Now we have to copy the buffers */
   2088             ps_codec->pf_inter_pred_luma_copy(
   2089                             ps_me_ctxt->as_mb_part[ps_proc->ps_pu->b2_pred_mode].pu1_best_hpel_buf,
   2090                             ps_proc->pu1_best_subpel_buf,
   2091                             ps_me_ctxt->u4_subpel_buf_strd,
   2092                             ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE,
   2093                             NULL, 0);
   2094         }
   2095     }
   2096 
   2097     /**************************************************************************
   2098      *Now copy the MVs to the current PU with qpel scaling
   2099      ***************************************************************************/
   2100     ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx = (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx);
   2101     ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy = (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy);
   2102     ps_proc->ps_pu->s_me_info[PRED_L1].s_mv.i2_mvx = (ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr.i2_mvx);
   2103     ps_proc->ps_pu->s_me_info[PRED_L1].s_mv.i2_mvy = (ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr.i2_mvy);
   2104 
   2105 
   2106     ps_proc->ps_pu->s_me_info[0].i1_ref_idx = (ps_proc->ps_pu->b2_pred_mode != PRED_L1)? -1:0;
   2107     ps_proc->ps_pu->s_me_info[1].i1_ref_idx = (ps_proc->ps_pu->b2_pred_mode != PRED_L0)? -1:0;
   2108 
   2109     /* number of partitions */
   2110     ps_proc->u4_num_sub_partitions = 1;
   2111     *(ps_proc->pu4_mb_pu_cnt) = 1;
   2112 
   2113     /* position in-terms of PU */
   2114     ps_proc->ps_pu->b4_pos_x = 0;
   2115     ps_proc->ps_pu->b4_pos_y = 0;
   2116 
   2117     /* PU size */
   2118     ps_proc->ps_pu->b4_wd = 3;
   2119     ps_proc->ps_pu->b4_ht = 3;
   2120 
   2121     /* Update min sad conditions */
   2122     if (ps_me_ctxt->u4_min_sad_reached == 1)
   2123     {
   2124         ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
   2125         ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
   2126     }
   2127 }
   2128 
   2129