Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 
     21 /**
     22 *******************************************************************************
     23 * @file
     24 *  ihevce_inter_pred.c
     25 *
     26 * @brief
     27 *  Contains funtions for giving out prediction samples for a given pu
     28 *
     29 * @author
     30 *  Ittiam
     31 *
     32 * @par List of Functions:
     33 *   - ihevc_inter_pred()
     34 *
     35 *
     36 *******************************************************************************
     37 */
     38 /* System include files */
     39 #include <stdio.h>
     40 #include <string.h>
     41 #include <stdlib.h>
     42 #include <assert.h>
     43 #include <stdarg.h>
     44 #include <math.h>
     45 
     46 /* User include files */
     47 #include "ihevc_typedefs.h"
     48 #include "itt_video_api.h"
     49 #include "ihevce_api.h"
     50 
     51 #include "rc_cntrl_param.h"
     52 #include "rc_frame_info_collector.h"
     53 #include "rc_look_ahead_params.h"
     54 
     55 #include "ihevc_debug.h"
     56 #include "ihevc_defs.h"
     57 #include "ihevc_structs.h"
     58 #include "ihevc_platform_macros.h"
     59 #include "ihevc_deblk.h"
     60 #include "ihevc_itrans_recon.h"
     61 #include "ihevc_chroma_itrans_recon.h"
     62 #include "ihevc_chroma_intra_pred.h"
     63 #include "ihevc_intra_pred.h"
     64 #include "ihevc_inter_pred.h"
     65 #include "ihevc_mem_fns.h"
     66 #include "ihevc_padding.h"
     67 #include "ihevc_weighted_pred.h"
     68 #include "ihevc_sao.h"
     69 #include "ihevc_resi_trans.h"
     70 #include "ihevc_quant_iquant_ssd.h"
     71 #include "ihevc_cabac_tables.h"
     72 
     73 #include "ihevce_defs.h"
     74 #include "ihevce_lap_enc_structs.h"
     75 #include "ihevce_multi_thrd_structs.h"
     76 #include "ihevce_me_common_defs.h"
     77 #include "ihevce_had_satd.h"
     78 #include "ihevce_error_codes.h"
     79 #include "ihevce_bitstream.h"
     80 #include "ihevce_cabac.h"
     81 #include "ihevce_rdoq_macros.h"
     82 #include "ihevce_function_selector.h"
     83 #include "ihevce_enc_structs.h"
     84 #include "ihevce_entropy_structs.h"
     85 #include "ihevce_cmn_utils_instr_set_router.h"
     86 #include "ihevce_enc_loop_structs.h"
     87 #include "ihevce_inter_pred.h"
     88 #include "ihevc_weighted_pred.h"
     89 
     90 /*****************************************************************************/
     91 /* Global tables                                                             */
     92 /*****************************************************************************/
     93 
     94 /**
     95 ******************************************************************************
     96 * @brief  Table of filter tap coefficients for HEVC luma inter prediction
     97 * input   : sub pel mv position (dx/dy = 0:3)
     98 * output  : filter coeffs to be used for that position
     99 *
    100 * @remarks See section 8.5.2.2.2.1 Luma sample interpolation process of HEVC
    101 ******************************************************************************
    102 */
    103 WORD8 gai1_hevc_luma_filter_taps[4][NTAPS_LUMA] = { { 0, 0, 0, 64, 0, 0, 0, 0 },
    104                                                     { -1, 4, -10, 58, 17, -5, 1, 0 },
    105                                                     { -1, 4, -11, 40, 40, -11, 4, -1 },
    106                                                     { 0, 1, -5, 17, 58, -10, 4, -1 } };
    107 
    108 /**
    109 ******************************************************************************
    110 * @brief  Table of filter tap coefficients for HEVC chroma inter prediction
    111 * input   : chroma sub pel mv position (dx/dy = 0:7)
    112 * output  : filter coeffs to be used for that position
    113 *
    114 * @remarks See section 8.5.2.2.2.2 Chroma sample interpolation process of HEVC
    115 The filter uses only the first four elements in each array
    116 ******************************************************************************
    117 */
    118 WORD8 gai1_hevc_chroma_filter_taps[8][NTAPS_CHROMA] = { { 0, 64, 0, 0 },    { -2, 58, 10, -2 },
    119                                                         { -4, 54, 16, -2 }, { -6, 46, 28, -4 },
    120                                                         { -4, 36, 36, -4 }, { -4, 28, 46, -6 },
    121                                                         { -2, 16, 54, -4 }, { -2, 10, 58, -2 } };
    122 
    123 /*****************************************************************************/
    124 /* Function Definitions                                                      */
    125 /*****************************************************************************/
    126 
    127 /**
    128 *******************************************************************************
    129 *
    130 * @brief
    131 *  Performs Luma inter pred based on sub pel position dxdy and store the result
    132 *  in a 16 bit destination buffer
    133 *
    134 * @param[in] pu1_src
    135 *  pointer to the source correspoding to integer pel position of a mv (left and
    136 *  top justified integer position)
    137 *
    138 * @param[out] pi2_dst
    139 *  WORD16 pointer to the destination
    140 *
    141 * @param[in] src_strd
    142 *  source buffer stride
    143 *
    144 * @param[in] dst_strd
    145 *  destination buffer stride
    146 *
    147 * @param[in] pi2_hdst_scratch
    148 *  scratch buffer for intermediate storage of horizontal filter output; used as
    149 *  input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
    150 *
    151 *  Max scratch buffer required is w * (h + 7) * sizeof(WORD16)
    152 *
    153 * @param[in] ht
    154 *  width of the prediction unit
    155 *
    156 * @param[in] wd
    157 *  width of the prediction unit
    158 *
    159 * @param[in] dx
    160 *  qpel position[0:3] of mv in x direction
    161 *
    162 * @param[in] dy
    163 *  qpel position[0:3] of mv in y direction
    164 *
    165 * @returns
    166 *   none
    167 *
    168 * @remarks
    169 *
    170 *******************************************************************************
    171 */
    172 void ihevce_luma_interpolate_16bit_dxdy(
    173     UWORD8 *pu1_src,
    174     WORD16 *pi2_dst,
    175     WORD32 src_strd,
    176     WORD32 dst_strd,
    177     WORD16 *pi2_hdst_scratch,
    178     WORD32 ht,
    179     WORD32 wd,
    180     WORD32 dy,
    181     WORD32 dx,
    182     func_selector_t *ps_func_selector)
    183 {
    184     if((0 == dx) && (0 == dy))
    185     {
    186         /*--------- full pel position : copy input by upscaling-------*/
    187 
    188         ps_func_selector->ihevc_inter_pred_luma_copy_w16out_fptr(
    189             pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[0][0], ht, wd);
    190     }
    191     else if((0 != dx) && (0 != dy))
    192     {
    193         /*----------sub pel in both x and y direction---------*/
    194 
    195         UWORD8 *pu1_horz_src = pu1_src - (3 * src_strd);
    196         WORD32 hdst_buf_stride = wd;
    197         WORD16 *pi2_vert_src = pi2_hdst_scratch + (3 * hdst_buf_stride);
    198 
    199         /* horizontal filtering of source done in a scratch buffer first  */
    200         ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr(
    201             pu1_horz_src,
    202             pi2_hdst_scratch,
    203             src_strd,
    204             hdst_buf_stride,
    205             &gai1_hevc_luma_filter_taps[dx][0],
    206             (ht + NTAPS_LUMA - 1),
    207             wd);
    208 
    209         /* vertical filtering on scratch buffer and stored in desitnation  */
    210         ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_w16out_fptr(
    211             pi2_vert_src,
    212             pi2_dst,
    213             hdst_buf_stride,
    214             dst_strd,
    215             &gai1_hevc_luma_filter_taps[dy][0],
    216             ht,
    217             wd);
    218     }
    219     else if(0 == dy)
    220     {
    221         /*----------sub pel in x direction only ---------*/
    222 
    223         ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr(
    224             pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dx][0], ht, wd);
    225     }
    226     else /* if (0 == dx) */
    227     {
    228         /*----------sub pel in y direction only ---------*/
    229 
    230         ps_func_selector->ihevc_inter_pred_luma_vert_w16out_fptr(
    231             pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dy][0], ht, wd);
    232     }
    233 }
    234 
    235 /**
    236 *******************************************************************************
    237 *
    238 * @brief
    239 *  Performs Luma inter pred based on sub pel position dxdy and store the result
    240 *  in a 8 bit destination buffer
    241 *
    242 * @param[in] pu1_src
    243 *  pointer to the source correspoding to integer pel position of a mv (left and
    244 *  top justified integer position)
    245 *
    246 * @param[out] pu1_dst
    247 *  UWORD8 pointer to the destination
    248 *
    249 * @param[in] src_strd
    250 *  source buffer stride
    251 *
    252 * @param[in] dst_strd
    253 *  destination buffer stride
    254 *
    255 * @param[in] pi2_hdst_scratch
    256 *  scratch buffer for intermediate storage of horizontal filter output; used as
    257 *  input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
    258 *
    259 *  Max scratch buffer required is w * (h + 7) * sizeof(WORD16)
    260 *
    261 * @param[in] ht
    262 *  width of the prediction unit
    263 *
    264 * @param[in] wd
    265 *  width of the prediction unit
    266 *
    267 * @param[in] dx
    268 *  qpel position[0:3] of mv in x direction
    269 *
    270 * @param[in] dy
    271 *  qpel position[0:3] of mv in y direction
    272 *
    273 * @returns
    274 *   none
    275 *
    276 * @remarks
    277 *
    278 *******************************************************************************
    279 */
    280 void ihevce_luma_interpolate_8bit_dxdy(
    281     UWORD8 *pu1_src,
    282     UWORD8 *pu1_dst,
    283     WORD32 src_strd,
    284     WORD32 dst_strd,
    285     WORD16 *pi2_hdst_scratch,
    286     WORD32 ht,
    287     WORD32 wd,
    288     WORD32 dy,
    289     WORD32 dx,
    290     func_selector_t *ps_func_selector)
    291 {
    292     if((0 == dx) && (0 == dy))
    293     {
    294         /*--------- full pel position : copy input as is -------*/
    295 
    296         ps_func_selector->ihevc_inter_pred_luma_copy_fptr(
    297             pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[0][0], ht, wd);
    298     }
    299     else if((0 != dx) && (0 != dy))
    300     {
    301         /*----------sub pel in both x and y direction---------*/
    302 
    303         UWORD8 *pu1_horz_src = pu1_src - (3 * src_strd);
    304         WORD32 hdst_buf_stride = wd;
    305         WORD16 *pi2_vert_src = pi2_hdst_scratch + (3 * hdst_buf_stride);
    306 
    307         /* horizontal filtering of source done in a scratch buffer first  */
    308         ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr(
    309             pu1_horz_src,
    310             pi2_hdst_scratch,
    311             src_strd,
    312             hdst_buf_stride,
    313             &gai1_hevc_luma_filter_taps[dx][0],
    314             (ht + NTAPS_LUMA - 1),
    315             wd);
    316 
    317         /* vertical filtering on scratch buffer and stored in desitnation  */
    318         ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_fptr(
    319             pi2_vert_src,
    320             pu1_dst,
    321             hdst_buf_stride,
    322             dst_strd,
    323             &gai1_hevc_luma_filter_taps[dy][0],
    324             ht,
    325             wd);
    326     }
    327     else if(0 == dy)
    328     {
    329         /*----------sub pel in x direction only ---------*/
    330 
    331         ps_func_selector->ihevc_inter_pred_luma_horz_fptr(
    332             pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dx][0], ht, wd);
    333     }
    334     else /* if (0 == dx) */
    335     {
    336         /*----------sub pel in y direction only ---------*/
    337 
    338         ps_func_selector->ihevc_inter_pred_luma_vert_fptr(
    339             pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dy][0], ht, wd);
    340     }
    341 }
    342 
    343 /**
    344 *******************************************************************************
    345 *
    346 * @brief
    347 *  Performs Luma prediction for a inter prediction unit(PU)
    348 *
    349 * @par Description:
    350 *  For a given PU, Inter prediction followed by weighted prediction (if
    351 *  required)
    352 *
    353 * @param[in] ps_inter_pred_ctxt
    354 *  context for inter prediction; contains ref list, weight offsets, ctb offsets
    355 *
    356 * @param[in] ps_pu
    357 *  pointer to PU structure whose inter prediction needs to be done
    358 *
    359 * @param[in] pu1_dst_buf
    360 *  pointer to destination buffer where the inter prediction is done
    361 *
    362 * @param[in] dst_stride
    363 *  pitch of the destination buffer
    364 *
    365 * @returns
    366 *   IV_FAIL for mvs going outside ref frame padded limits
    367 *   IV_SUCCESS after completing mc for given inter pu
    368 *
    369 * @remarks
    370 *
    371 *******************************************************************************
    372 */
    373 IV_API_CALL_STATUS_T ihevce_luma_inter_pred_pu(
    374     void *pv_inter_pred_ctxt,
    375     pu_t *ps_pu,
    376     void *pv_dst_buf,
    377     WORD32 dst_stride,
    378     WORD32 i4_flag_inter_pred_source)
    379 {
    380     inter_pred_ctxt_t *ps_inter_pred_ctxt = (inter_pred_ctxt_t *)pv_inter_pred_ctxt;
    381     func_selector_t *ps_func_selector = ps_inter_pred_ctxt->ps_func_selector;
    382 
    383     WORD32 inter_pred_idc = ps_pu->b2_pred_mode;
    384     UWORD8 *pu1_dst_buf = (UWORD8 *)pv_dst_buf;
    385     WORD32 pu_wd = (ps_pu->b4_wd + 1) << 2;
    386     WORD32 pu_ht = (ps_pu->b4_ht + 1) << 2;
    387 
    388     WORD32 wp_flag = ps_inter_pred_ctxt->i1_weighted_pred_flag ||
    389                      ps_inter_pred_ctxt->i1_weighted_bipred_flag;
    390 
    391     /* 16bit dest required for interpolate if weighted pred is on or bipred */
    392     WORD32 store_16bit_output;
    393 
    394     recon_pic_buf_t *ps_ref_pic_l0, *ps_ref_pic_l1;
    395     UWORD8 *pu1_ref_pic, *pu1_ref_int_pel;
    396     WORD32 ref_pic_stride;
    397 
    398     /* offset of reference block in integer pel units */
    399     WORD32 frm_x_ofst, frm_y_ofst;
    400     WORD32 frm_x_pu, frm_y_pu;
    401 
    402     /* scratch 16 bit buffers for interpolation in l0 and l1 direction */
    403     WORD16 *pi2_scr_buf_l0 = &ps_inter_pred_ctxt->ai2_scratch_buf_l0[0];
    404     WORD16 *pi2_scr_buf_l1 = &ps_inter_pred_ctxt->ai2_scratch_buf_l1[0];
    405 
    406     /* scratch buffer for horizontal interpolation destination */
    407     WORD16 *pi2_horz_scratch = &ps_inter_pred_ctxt->ai2_horz_scratch[0];
    408 
    409     WORD32 wgt0, wgt1, off0, off1, shift, lvl_shift0, lvl_shift1;
    410 
    411     /* get PU's frm x and frm y offset */
    412     frm_x_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_x + (ps_pu->b4_pos_x << 2);
    413     frm_y_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_y + (ps_pu->b4_pos_y << 2);
    414 
    415     /* sanity checks */
    416     ASSERT((wp_flag == 0) || (wp_flag == 1));
    417     ASSERT(dst_stride >= pu_wd);
    418     ASSERT(ps_pu->b1_intra_flag == 0);
    419 
    420     lvl_shift0 = 0;
    421     lvl_shift1 = 0;
    422 
    423     if(wp_flag)
    424     {
    425         UWORD8 u1_is_wgt_pred_L0, u1_is_wgt_pred_L1;
    426 
    427         if(inter_pred_idc != PRED_L1)
    428         {
    429             ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx];
    430             u1_is_wgt_pred_L0 = ps_ref_pic_l0->s_weight_offset.u1_luma_weight_enable_flag;
    431         }
    432         if(inter_pred_idc != PRED_L0)
    433         {
    434             ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx];
    435             u1_is_wgt_pred_L1 = ps_ref_pic_l1->s_weight_offset.u1_luma_weight_enable_flag;
    436         }
    437         if(inter_pred_idc == PRED_BI)
    438         {
    439             wp_flag = (u1_is_wgt_pred_L0 || u1_is_wgt_pred_L1);
    440         }
    441         else if(inter_pred_idc == PRED_L0)
    442         {
    443             wp_flag = u1_is_wgt_pred_L0;
    444         }
    445         else if(inter_pred_idc == PRED_L1)
    446         {
    447             wp_flag = u1_is_wgt_pred_L1;
    448         }
    449         else
    450         {
    451             /*other values are not allowed*/
    452             assert(0);
    453         }
    454     }
    455     store_16bit_output = (inter_pred_idc == PRED_BI) || (wp_flag);
    456 
    457     if(inter_pred_idc != PRED_L1)
    458     {
    459         /*****************************************************/
    460         /*              L0 inter prediction                  */
    461         /*****************************************************/
    462 
    463         /* motion vecs in qpel precision                    */
    464         WORD32 mv_x = ps_pu->mv.s_l0_mv.i2_mvx;
    465         WORD32 mv_y = ps_pu->mv.s_l0_mv.i2_mvy;
    466 
    467         /* sub pel offsets in x and y direction w.r.t integer pel   */
    468         WORD32 dx = mv_x & 0x3;
    469         WORD32 dy = mv_y & 0x3;
    470 
    471         /* ref idx is currently stored in the lower 4bits           */
    472         WORD32 ref_idx = (ps_pu->mv.i1_l0_ref_idx);
    473 
    474         /*  x and y integer offsets w.r.t frame start               */
    475         frm_x_ofst = (frm_x_pu + (mv_x >> 2));
    476         frm_y_ofst = (frm_y_pu + (mv_y >> 2));
    477 
    478         ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ref_idx];
    479 
    480         /* picture buffer start and stride */
    481         if(i4_flag_inter_pred_source == 1)
    482         {
    483             pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc_src.pv_y_buf;
    484         }
    485         else
    486         {
    487             pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc.pv_y_buf;
    488         }
    489         ref_pic_stride = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_strd;
    490 
    491         /* Error check for mvs going out of ref frame padded limits */
    492         {
    493             WORD32 min_x, max_x = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_wd;
    494             WORD32 min_y, max_y = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_ht;
    495 
    496             min_x =
    497                 -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT]
    498                       ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT] - 4)
    499                       : (PAD_HORZ - 4));
    500 
    501             max_x += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT]
    502                          ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT] - 4)
    503                          : (PAD_HORZ - 4);
    504 
    505             min_y =
    506                 -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP]
    507                       ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP] - 4)
    508                       : (PAD_VERT - 4));
    509 
    510             max_y += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT]
    511                          ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT] - 4)
    512                          : (PAD_VERT - 4);
    513 
    514             if((frm_x_ofst < min_x) || (frm_x_ofst + pu_wd) > max_x)
    515                 //ASSERT(0);
    516                 return (IV_FAIL);
    517 
    518             if((frm_y_ofst < min_y) || (frm_y_ofst + pu_ht) > max_y)
    519                 //ASSERT(0);
    520                 return (IV_FAIL);
    521         }
    522 
    523         /* point to reference start location in ref frame           */
    524         /* Assuming clipping of mv is not required here as ME would */
    525         /* take care of mv access not going beyond padded data      */
    526         pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
    527 
    528         /* level shifted for subpel with both x and y componenet being non 0 */
    529         /* this is because the interpolate function subtract this to contain */
    530         /* the resulting data in 16 bits                                     */
    531         lvl_shift0 = (dx != 0) && (dy != 0) ? OFFSET14 : 0;
    532 
    533         if(store_16bit_output)
    534         {
    535             /* do interpolation in 16bit L0 scratch buffer */
    536             ihevce_luma_interpolate_16bit_dxdy(
    537                 pu1_ref_int_pel,
    538                 pi2_scr_buf_l0,
    539                 ref_pic_stride,
    540                 pu_wd,
    541                 pi2_horz_scratch,
    542                 pu_ht,
    543                 pu_wd,
    544                 dy,
    545                 dx,
    546                 ps_func_selector);
    547         }
    548         else
    549         {
    550             /* do interpolation in 8bit destination buffer and return */
    551             ihevce_luma_interpolate_8bit_dxdy(
    552                 pu1_ref_int_pel,
    553                 pu1_dst_buf,
    554                 ref_pic_stride,
    555                 dst_stride,
    556                 pi2_horz_scratch,
    557                 pu_ht,
    558                 pu_wd,
    559                 dy,
    560                 dx,
    561                 ps_func_selector);
    562 
    563             return (IV_SUCCESS);
    564         }
    565     }
    566 
    567     if(inter_pred_idc != PRED_L0)
    568     {
    569         /*****************************************************/
    570         /*      L1 inter prediction                          */
    571         /*****************************************************/
    572 
    573         /* motion vecs in qpel precision                            */
    574         WORD32 mv_x = ps_pu->mv.s_l1_mv.i2_mvx;
    575         WORD32 mv_y = ps_pu->mv.s_l1_mv.i2_mvy;
    576 
    577         /* sub pel offsets in x and y direction w.r.t integer pel   */
    578         WORD32 dx = mv_x & 0x3;
    579         WORD32 dy = mv_y & 0x3;
    580 
    581         /* ref idx is currently stored in the lower 4bits           */
    582         WORD32 ref_idx = (ps_pu->mv.i1_l1_ref_idx);
    583 
    584         /*  x and y integer offsets w.r.t frame start               */
    585         frm_x_ofst = (frm_x_pu + (mv_x >> 2));
    586         frm_y_ofst = (frm_y_pu + (mv_y >> 2));
    587 
    588         ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ref_idx];
    589 
    590         /* picture buffer start and stride */
    591 
    592         if(i4_flag_inter_pred_source == 1)
    593         {
    594             pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc_src.pv_y_buf;
    595         }
    596         else
    597         {
    598             pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc.pv_y_buf;
    599         }
    600         ref_pic_stride = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_strd;
    601 
    602         /* Error check for mvs going out of ref frame padded limits */
    603         {
    604             WORD32 min_x, max_x = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_wd;
    605             WORD32 min_y, max_y = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_ht;
    606 
    607             min_x =
    608                 -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT]
    609                       ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT] - 4)
    610                       : (PAD_HORZ - 4));
    611 
    612             max_x += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT]
    613                          ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT] - 4)
    614                          : (PAD_HORZ - 4);
    615 
    616             min_y =
    617                 -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP]
    618                       ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP] - 4)
    619                       : (PAD_VERT - 4));
    620 
    621             max_y += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT]
    622                          ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT] - 4)
    623                          : (PAD_VERT - 4);
    624 
    625             if((frm_x_ofst < min_x) || (frm_x_ofst + pu_wd) > max_x)
    626                 //ASSERT(0);
    627                 return (IV_FAIL);
    628 
    629             if((frm_y_ofst < min_y) || (frm_y_ofst + pu_ht) > max_y)
    630                 //ASSERT(0);
    631                 return (IV_FAIL);
    632         }
    633 
    634         /* point to reference start location in ref frame           */
    635         /* Assuming clipping of mv is not required here as ME would */
    636         /* take care of mv access not going beyond padded data      */
    637         pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
    638 
    639         /* level shifted for subpel with both x and y componenet being non 0 */
    640         /* this is because the interpolate function subtract this to contain */
    641         /* the resulting data in 16 bits                                     */
    642         lvl_shift1 = (dx != 0) && (dy != 0) ? OFFSET14 : 0;
    643 
    644         if(store_16bit_output)
    645         {
    646             /* do interpolation in 16bit L1 scratch buffer */
    647             ihevce_luma_interpolate_16bit_dxdy(
    648                 pu1_ref_int_pel,
    649                 pi2_scr_buf_l1,
    650                 ref_pic_stride,
    651                 pu_wd,
    652                 pi2_horz_scratch,
    653                 pu_ht,
    654                 pu_wd,
    655                 dy,
    656                 dx,
    657                 ps_func_selector);
    658         }
    659         else
    660         {
    661             /* do interpolation in 8bit destination buffer and return */
    662             ihevce_luma_interpolate_8bit_dxdy(
    663                 pu1_ref_int_pel,
    664                 pu1_dst_buf,
    665                 ref_pic_stride,
    666                 dst_stride,
    667                 pi2_horz_scratch,
    668                 pu_ht,
    669                 pu_wd,
    670                 dy,
    671                 dx,
    672                 ps_func_selector);
    673 
    674             return (IV_SUCCESS);
    675         }
    676     }
    677 
    678     if((inter_pred_idc != PRED_BI) && wp_flag)
    679     {
    680         /*****************************************************/
    681         /*      unidirection weighted prediction             */
    682         /*****************************************************/
    683         ihevce_wght_offst_t *ps_weight_offset;
    684         WORD16 *pi2_src;
    685         WORD32 lvl_shift;
    686 
    687         /* intialize the weight, offsets and ref based on l0/l1 mode */
    688         if(inter_pred_idc == PRED_L0)
    689         {
    690             pi2_src = pi2_scr_buf_l0;
    691             ps_weight_offset = &ps_ref_pic_l0->s_weight_offset;
    692             lvl_shift = lvl_shift0;
    693         }
    694         else
    695         {
    696             pi2_src = pi2_scr_buf_l1;
    697             ps_weight_offset = &ps_ref_pic_l1->s_weight_offset;
    698             lvl_shift = lvl_shift1;
    699         }
    700 
    701         wgt0 = ps_weight_offset->i2_luma_weight;
    702         off0 = ps_weight_offset->i2_luma_offset;
    703         shift = ps_inter_pred_ctxt->i4_log2_luma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH;
    704 
    705         /* do the uni directional weighted prediction */
    706         ps_func_selector->ihevc_weighted_pred_uni_fptr(
    707             pi2_src, pu1_dst_buf, pu_wd, dst_stride, wgt0, off0, shift, lvl_shift, pu_ht, pu_wd);
    708     }
    709     else
    710     {
    711         /*****************************************************/
    712         /*              Bipred  prediction                   */
    713         /*****************************************************/
    714 
    715         if(wp_flag)
    716         {
    717             /*****************************************************/
    718             /*      Bi pred  weighted prediction                 */
    719             /*****************************************************/
    720             wgt0 = ps_ref_pic_l0->s_weight_offset.i2_luma_weight;
    721             off0 = ps_ref_pic_l0->s_weight_offset.i2_luma_offset;
    722 
    723             wgt1 = ps_ref_pic_l1->s_weight_offset.i2_luma_weight;
    724             off1 = ps_ref_pic_l1->s_weight_offset.i2_luma_offset;
    725 
    726             shift = ps_inter_pred_ctxt->i4_log2_luma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH + 1;
    727 
    728             ps_func_selector->ihevc_weighted_pred_bi_fptr(
    729                 pi2_scr_buf_l0,
    730                 pi2_scr_buf_l1,
    731                 pu1_dst_buf,
    732                 pu_wd,
    733                 pu_wd,
    734                 dst_stride,
    735                 wgt0,
    736                 off0,
    737                 wgt1,
    738                 off1,
    739                 shift,
    740                 lvl_shift0,
    741                 lvl_shift1,
    742                 pu_ht,
    743                 pu_wd);
    744         }
    745         else
    746         {
    747             /*****************************************************/
    748             /*          Default Bi pred  prediction              */
    749             /*****************************************************/
    750             ps_func_selector->ihevc_weighted_pred_bi_default_fptr(
    751                 pi2_scr_buf_l0,
    752                 pi2_scr_buf_l1,
    753                 pu1_dst_buf,
    754                 pu_wd,
    755                 pu_wd,
    756                 dst_stride,
    757                 lvl_shift0,
    758                 lvl_shift1,
    759                 pu_ht,
    760                 pu_wd);
    761         }
    762     }
    763 
    764     return (IV_SUCCESS);
    765 }
    766 
    767 /**
    768 *******************************************************************************
    769 *
    770 * @brief
    771 *  Performs Chroma inter pred based on sub pel position dxdy and store the
    772 *  result in a 16 bit destination buffer
    773 *
    774 * @param[in] pu1_src
    775 *  pointer to the source correspoding to integer pel position of a mv (left and
    776 *  top justified integer position)
    777 *
    778 * @param[out] pi2_dst
    779 *  WORD16 pointer to the destination
    780 *
    781 * @param[in] src_strd
    782 *  source buffer stride
    783 *
    784 * @param[in] dst_strd
    785 *  destination buffer stride
    786 *
    787 * @param[in] pi2_hdst_scratch
    788 *  scratch buffer for intermediate storage of horizontal filter output; used as
    789 *  input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
    790 *
    791 *  Max scratch buffer required is w * (h + 3) * sizeof(WORD16)
    792 *
    793 * @param[in] ht
    794 *  width of the prediction unit
    795 *
    796 * @param[in] wd
    797 *  width of the prediction unit
    798 *
    799 * @param[in] dx
    800 *  1/8th pel position[0:7] of mv in x direction
    801 *
    802 * @param[in] dy
    803 *  1/8th pel position[0:7] of mv in y direction
    804 *
    805 * @returns
    806 *   none
    807 *
    808 * @remarks
    809 *
    810 *******************************************************************************
    811 */
    812 void ihevce_chroma_interpolate_16bit_dxdy(
    813     UWORD8 *pu1_src,
    814     WORD16 *pi2_dst,
    815     WORD32 src_strd,
    816     WORD32 dst_strd,
    817     WORD16 *pi2_hdst_scratch,
    818     WORD32 ht,
    819     WORD32 wd,
    820     WORD32 dy,
    821     WORD32 dx,
    822     func_selector_t *ps_func_selector)
    823 {
    824     if((0 == dx) && (0 == dy))
    825     {
    826         /*--------- full pel position : copy input by upscaling-------*/
    827 
    828         ps_func_selector->ihevc_inter_pred_chroma_copy_w16out_fptr(
    829             pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[0][0], ht, wd);
    830     }
    831     else if((0 != dx) && (0 != dy))
    832     {
    833         /*----------sub pel in both x and y direction---------*/
    834 
    835         UWORD8 *pu1_horz_src = pu1_src - src_strd;
    836         WORD32 hdst_buf_stride = (wd << 1); /* uv interleave */
    837         WORD16 *pi2_vert_src = pi2_hdst_scratch + hdst_buf_stride;
    838 
    839         /* horizontal filtering of source done in a scratch buffer first  */
    840         ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr(
    841             pu1_horz_src,
    842             pi2_hdst_scratch,
    843             src_strd,
    844             hdst_buf_stride,
    845             &gai1_hevc_chroma_filter_taps[dx][0],
    846             (ht + NTAPS_CHROMA - 1),
    847             wd);
    848 
    849         /* vertical filtering on scratch buffer and stored in desitnation  */
    850         ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr(
    851             pi2_vert_src,
    852             pi2_dst,
    853             hdst_buf_stride,
    854             dst_strd,
    855             &gai1_hevc_chroma_filter_taps[dy][0],
    856             ht,
    857             wd);
    858     }
    859     else if(0 == dy)
    860     {
    861         /*----------sub pel in x direction only ---------*/
    862 
    863         ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr(
    864             pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dx][0], ht, wd);
    865     }
    866     else /* if (0 == dx) */
    867     {
    868         /*----------sub pel in y direction only ---------*/
    869 
    870         ps_func_selector->ihevc_inter_pred_chroma_vert_w16out_fptr(
    871             pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dy][0], ht, wd);
    872     }
    873 }
    874 
    875 /**
    876 *******************************************************************************
    877 *
    878 * @brief
    879 *  Performs Chroma inter pred based on sub pel position dxdy and store the
    880 *  result in a 8 bit destination buffer
    881 *
    882 * @param[in] pu1_src
    883 *  pointer to the source correspoding to integer pel position of a mv (left and
    884 *  top justified integer position)
    885 *
    886 * @param[out] pu1_dst
    887 *  UWORD8 pointer to the destination
    888 *
    889 * @param[in] src_strd
    890 *  source buffer stride
    891 *
    892 * @param[in] dst_strd
    893 *  destination buffer stride
    894 *
    895 * @param[in] pi2_hdst_scratch
    896 *  scratch buffer for intermediate storage of horizontal filter output; used as
    897 *  input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
    898 *
    899 *  Max scratch buffer required is w * (h + 3) * sizeof(WORD16)
    900 *
    901 * @param[in] ht
    902 *  width of the prediction unit
    903 *
    904 * @param[in] wd
    905 *  width of the prediction unit
    906 *
    907 * @param[in] dx
    908 *  1/8th pel position[0:7] of mv in x direction
    909 *
    910 * @param[in] dy
    911 *  1/8th pel position[0:7] of mv in y direction
    912 *
    913 * @returns
    914 *   none
    915 *
    916 * @remarks
    917 *
    918 *******************************************************************************
    919 */
    920 void ihevce_chroma_interpolate_8bit_dxdy(
    921     UWORD8 *pu1_src,
    922     UWORD8 *pu1_dst,
    923     WORD32 src_strd,
    924     WORD32 dst_strd,
    925     WORD16 *pi2_hdst_scratch,
    926     WORD32 ht,
    927     WORD32 wd,
    928     WORD32 dy,
    929     WORD32 dx,
    930     func_selector_t *ps_func_selector)
    931 {
    932     if((0 == dx) && (0 == dy))
    933     {
    934         /*--------- full pel position : copy input as is -------*/
    935         ps_func_selector->ihevc_inter_pred_chroma_copy_fptr(
    936             pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[0][0], ht, wd);
    937     }
    938     else if((0 != dx) && (0 != dy))
    939     {
    940         /*----------sub pel in both x and y direction---------*/
    941         UWORD8 *pu1_horz_src = pu1_src - src_strd;
    942         WORD32 hdst_buf_stride = (wd << 1); /* uv interleave */
    943         WORD16 *pi2_vert_src = pi2_hdst_scratch + hdst_buf_stride;
    944 
    945         /* horizontal filtering of source done in a scratch buffer first  */
    946         ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr(
    947             pu1_horz_src,
    948             pi2_hdst_scratch,
    949             src_strd,
    950             hdst_buf_stride,
    951             &gai1_hevc_chroma_filter_taps[dx][0],
    952             (ht + NTAPS_CHROMA - 1),
    953             wd);
    954 
    955         /* vertical filtering on scratch buffer and stored in desitnation  */
    956         ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_fptr(
    957             pi2_vert_src,
    958             pu1_dst,
    959             hdst_buf_stride,
    960             dst_strd,
    961             &gai1_hevc_chroma_filter_taps[dy][0],
    962             ht,
    963             wd);
    964     }
    965     else if(0 == dy)
    966     {
    967         /*----------sub pel in x direction only ---------*/
    968         ps_func_selector->ihevc_inter_pred_chroma_horz_fptr(
    969             pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dx][0], ht, wd);
    970     }
    971     else /* if (0 == dx) */
    972     {
    973         /*----------sub pel in y direction only ---------*/
    974         ps_func_selector->ihevc_inter_pred_chroma_vert_fptr(
    975             pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dy][0], ht, wd);
    976     }
    977 }
    978 
    979 /**
    980 *******************************************************************************
    981 *
    982 * @brief
    983 *  Performs Chroma prediction for a inter prediction unit(PU)
    984 *
    985 * @par Description:
    986 *  For a given PU, Inter prediction followed by weighted prediction (if
    987 *  required). The reference and destination buffers are uv interleaved
    988 *
    989 * @param[in] ps_inter_pred_ctxt
    990 *  context for inter prediction; contains ref list, weight offsets, ctb offsets
    991 *
    992 * @param[in] ps_pu
    993 *  pointer to PU structure whose inter prediction needs to be done
    994 *
    995 * @param[in] pu1_dst_buf
    996 *  pointer to destination buffer where the inter prediction is done
    997 *
    998 * @param[in] dst_stride
    999 *  pitch of the destination buffer
   1000 *
   1001 * @returns
   1002 *   none
   1003 *
   1004 * @remarks
   1005 *
   1006 *******************************************************************************
   1007 */
   1008 void ihevce_chroma_inter_pred_pu(
   1009     void *pv_inter_pred_ctxt, pu_t *ps_pu, UWORD8 *pu1_dst_buf, WORD32 dst_stride)
   1010 {
   1011     inter_pred_ctxt_t *ps_inter_pred_ctxt = (inter_pred_ctxt_t *)pv_inter_pred_ctxt;
   1012     func_selector_t *ps_func_selector = ps_inter_pred_ctxt->ps_func_selector;
   1013 
   1014     WORD32 inter_pred_idc = ps_pu->b2_pred_mode;
   1015     UWORD8 u1_is_422 = (ps_inter_pred_ctxt->u1_chroma_array_type == 2);
   1016     /* chroma width and height are half of luma width and height */
   1017     WORD32 pu_wd_chroma = (ps_pu->b4_wd + 1) << 1;
   1018     WORD32 pu_ht_chroma = (ps_pu->b4_ht + 1) << (u1_is_422 + 1);
   1019 
   1020     WORD32 wp_flag = ps_inter_pred_ctxt->i1_weighted_pred_flag ||
   1021                      ps_inter_pred_ctxt->i1_weighted_bipred_flag;
   1022 
   1023     /* 16bit dest required for interpolate if weighted pred is on or bipred */
   1024     WORD32 store_16bit_output;
   1025 
   1026     recon_pic_buf_t *ps_ref_pic_l0, *ps_ref_pic_l1;
   1027     UWORD8 *pu1_ref_pic, *pu1_ref_int_pel;
   1028     WORD32 ref_pic_stride;
   1029 
   1030     /* offset of reference block in integer pel units */
   1031     WORD32 frm_x_ofst, frm_y_ofst;
   1032     WORD32 frm_x_pu, frm_y_pu;
   1033 
   1034     /* scratch 16 bit buffers for interpolation in l0 and l1 direction */
   1035     WORD16 *pi2_scr_buf_l0 = &ps_inter_pred_ctxt->ai2_scratch_buf_l0[0];
   1036     WORD16 *pi2_scr_buf_l1 = &ps_inter_pred_ctxt->ai2_scratch_buf_l1[0];
   1037 
   1038     /* scratch buffer for horizontal interpolation destination */
   1039     WORD16 *pi2_horz_scratch = &ps_inter_pred_ctxt->ai2_horz_scratch[0];
   1040 
   1041     /* get PU's frm x and frm y offset : Note uv is interleaved */
   1042     frm_x_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_x + (ps_pu->b4_pos_x << 2);
   1043     frm_y_pu = (ps_inter_pred_ctxt->i4_ctb_frm_pos_y >> (u1_is_422 == 0)) +
   1044                (ps_pu->b4_pos_y << (u1_is_422 + 1));
   1045 
   1046     /* sanity checks */
   1047     ASSERT((wp_flag == 0) || (wp_flag == 1));
   1048     ASSERT(dst_stride >= (pu_wd_chroma << 1)); /* uv interleaved */
   1049     ASSERT(ps_pu->b1_intra_flag == 0);
   1050 
   1051     if(wp_flag)
   1052     {
   1053         UWORD8 u1_is_wgt_pred_L0, u1_is_wgt_pred_L1;
   1054 
   1055         if(inter_pred_idc != PRED_L1)
   1056         {
   1057             ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx];
   1058             u1_is_wgt_pred_L0 = ps_ref_pic_l0->s_weight_offset.u1_chroma_weight_enable_flag;
   1059         }
   1060         if(inter_pred_idc != PRED_L0)
   1061         {
   1062             ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx];
   1063             u1_is_wgt_pred_L1 = ps_ref_pic_l1->s_weight_offset.u1_chroma_weight_enable_flag;
   1064         }
   1065         if(inter_pred_idc == PRED_BI)
   1066         {
   1067             wp_flag = (u1_is_wgt_pred_L0 || u1_is_wgt_pred_L1);
   1068         }
   1069         else if(inter_pred_idc == PRED_L0)
   1070         {
   1071             wp_flag = u1_is_wgt_pred_L0;
   1072         }
   1073         else if(inter_pred_idc == PRED_L1)
   1074         {
   1075             wp_flag = u1_is_wgt_pred_L1;
   1076         }
   1077         else
   1078         {
   1079             /*other values are not allowed*/
   1080             assert(0);
   1081         }
   1082     }
   1083     store_16bit_output = (inter_pred_idc == PRED_BI) || (wp_flag);
   1084 
   1085     if(inter_pred_idc != PRED_L1)
   1086     {
   1087         /*****************************************************/
   1088         /*              L0 inter prediction(Chroma )         */
   1089         /*****************************************************/
   1090 
   1091         /* motion vecs in qpel precision                    */
   1092         WORD32 mv_x = ps_pu->mv.s_l0_mv.i2_mvx;
   1093         WORD32 mv_y = ps_pu->mv.s_l0_mv.i2_mvy;
   1094 
   1095         /* sub pel offsets in x and y direction w.r.t integer pel   */
   1096         WORD32 dx = mv_x & 0x7;
   1097         WORD32 dy = (mv_y & ((1 << (!u1_is_422 + 2)) - 1)) << u1_is_422;
   1098 
   1099         /* ref idx is currently stored in the lower 4bits           */
   1100         WORD32 ref_idx = (ps_pu->mv.i1_l0_ref_idx);
   1101 
   1102         /*  x and y integer offsets w.r.t frame start               */
   1103 
   1104         frm_x_ofst = (frm_x_pu + ((mv_x >> 3) << 1)); /* uv interleaved */
   1105         frm_y_ofst = (frm_y_pu + ((mv_y >> (3 - u1_is_422))));
   1106 
   1107         ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ref_idx];
   1108 
   1109         /* picture buffer start and stride */
   1110         pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc.pv_u_buf;
   1111         ref_pic_stride = ps_ref_pic_l0->s_yuv_buf_desc.i4_uv_strd;
   1112 
   1113         /* point to reference start location in ref frame           */
   1114         /* Assuming clipping of mv is not required here as ME would */
   1115         /* take care of mv access not going beyond padded data      */
   1116         pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
   1117 
   1118         if(store_16bit_output)
   1119         {
   1120             /* do interpolation in 16bit L0 scratch buffer */
   1121             ihevce_chroma_interpolate_16bit_dxdy(
   1122                 pu1_ref_int_pel,
   1123                 pi2_scr_buf_l0,
   1124                 ref_pic_stride,
   1125                 (pu_wd_chroma << 1),
   1126                 pi2_horz_scratch,
   1127                 pu_ht_chroma,
   1128                 pu_wd_chroma,
   1129                 dy,
   1130                 dx,
   1131                 ps_func_selector);
   1132         }
   1133         else
   1134         {
   1135             /* do interpolation in 8bit destination buffer and return */
   1136             ihevce_chroma_interpolate_8bit_dxdy(
   1137                 pu1_ref_int_pel,
   1138                 pu1_dst_buf,
   1139                 ref_pic_stride,
   1140                 dst_stride,
   1141                 pi2_horz_scratch,
   1142                 pu_ht_chroma,
   1143                 pu_wd_chroma,
   1144                 dy,
   1145                 dx,
   1146                 ps_func_selector);
   1147 
   1148             return;
   1149         }
   1150     }
   1151 
   1152     if(inter_pred_idc != PRED_L0)
   1153     {
   1154         /*****************************************************/
   1155         /*      L1 inter prediction(Chroma)                  */
   1156         /*****************************************************/
   1157 
   1158         /* motion vecs in qpel precision                            */
   1159         WORD32 mv_x = ps_pu->mv.s_l1_mv.i2_mvx;
   1160         WORD32 mv_y = ps_pu->mv.s_l1_mv.i2_mvy;
   1161 
   1162         /* sub pel offsets in x and y direction w.r.t integer pel   */
   1163         WORD32 dx = mv_x & 0x7;
   1164         WORD32 dy = (mv_y & ((1 << (!u1_is_422 + 2)) - 1)) << u1_is_422;
   1165 
   1166         /* ref idx is currently stored in the lower 4bits           */
   1167         WORD32 ref_idx = (ps_pu->mv.i1_l1_ref_idx);
   1168 
   1169         /*  x and y integer offsets w.r.t frame start               */
   1170         frm_x_ofst = (frm_x_pu + ((mv_x >> 3) << 1)); /* uv interleaved */
   1171         frm_y_ofst = (frm_y_pu + ((mv_y >> (3 - u1_is_422))));
   1172 
   1173         ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ref_idx];
   1174 
   1175         /* picture buffer start and stride */
   1176         pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc.pv_u_buf;
   1177         ref_pic_stride = ps_ref_pic_l1->s_yuv_buf_desc.i4_uv_strd;
   1178 
   1179         /* point to reference start location in ref frame           */
   1180         /* Assuming clipping of mv is not required here as ME would */
   1181         /* take care of mv access not going beyond padded data      */
   1182         pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
   1183 
   1184         if(store_16bit_output)
   1185         {
   1186             /* do interpolation in 16bit L1 scratch buffer */
   1187             ihevce_chroma_interpolate_16bit_dxdy(
   1188                 pu1_ref_int_pel,
   1189                 pi2_scr_buf_l1,
   1190                 ref_pic_stride,
   1191                 (pu_wd_chroma << 1),
   1192                 pi2_horz_scratch,
   1193                 pu_ht_chroma,
   1194                 pu_wd_chroma,
   1195                 dy,
   1196                 dx,
   1197                 ps_func_selector);
   1198         }
   1199         else
   1200         {
   1201             /* do interpolation in 8bit destination buffer and return */
   1202             ihevce_chroma_interpolate_8bit_dxdy(
   1203                 pu1_ref_int_pel,
   1204                 pu1_dst_buf,
   1205                 ref_pic_stride,
   1206                 dst_stride,
   1207                 pi2_horz_scratch,
   1208                 pu_ht_chroma,
   1209                 pu_wd_chroma,
   1210                 dy,
   1211                 dx,
   1212                 ps_func_selector);
   1213 
   1214             return;
   1215         }
   1216     }
   1217 
   1218     if((inter_pred_idc != PRED_BI) && wp_flag)
   1219     {
   1220         /*****************************************************/
   1221         /*      unidirection weighted prediction(Chroma)     */
   1222         /*****************************************************/
   1223         ihevce_wght_offst_t *ps_weight_offset;
   1224         WORD16 *pi2_src;
   1225         WORD32 lvl_shift = 0;
   1226         WORD32 wgt_cb, wgt_cr, off_cb, off_cr;
   1227         WORD32 shift;
   1228 
   1229         /* intialize the weight, offsets and ref based on l0/l1 mode */
   1230         if(inter_pred_idc == PRED_L0)
   1231         {
   1232             pi2_src = pi2_scr_buf_l0;
   1233             ps_weight_offset = &ps_ref_pic_l0->s_weight_offset;
   1234         }
   1235         else
   1236         {
   1237             pi2_src = pi2_scr_buf_l1;
   1238             ps_weight_offset = &ps_ref_pic_l1->s_weight_offset;
   1239         }
   1240 
   1241         wgt_cb = ps_weight_offset->i2_cb_weight;
   1242         off_cb = ps_weight_offset->i2_cb_offset;
   1243         wgt_cr = ps_weight_offset->i2_cr_weight;
   1244         off_cr = ps_weight_offset->i2_cr_offset;
   1245 
   1246         shift = ps_inter_pred_ctxt->i4_log2_chroma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH;
   1247 
   1248         /* do the uni directional weighted prediction */
   1249         ps_func_selector->ihevc_weighted_pred_chroma_uni_fptr(
   1250             pi2_src,
   1251             pu1_dst_buf,
   1252             (pu_wd_chroma << 1),
   1253             dst_stride,
   1254             wgt_cb,
   1255             wgt_cr,
   1256             off_cb,
   1257             off_cr,
   1258             shift,
   1259             lvl_shift,
   1260             pu_ht_chroma,
   1261             pu_wd_chroma);
   1262     }
   1263     else
   1264     {
   1265         /*****************************************************/
   1266         /*              Bipred  prediction(Chroma)           */
   1267         /*****************************************************/
   1268         if(wp_flag)
   1269         {
   1270             WORD32 wgt0_cb, wgt1_cb, wgt0_cr, wgt1_cr;
   1271             WORD32 off0_cb, off1_cb, off0_cr, off1_cr;
   1272             WORD32 shift;
   1273 
   1274             /*****************************************************/
   1275             /*      Bi pred  weighted prediction (Chroma)        */
   1276             /*****************************************************/
   1277             wgt0_cb = ps_ref_pic_l0->s_weight_offset.i2_cb_weight;
   1278             off0_cb = ps_ref_pic_l0->s_weight_offset.i2_cb_offset;
   1279 
   1280             wgt0_cr = ps_ref_pic_l0->s_weight_offset.i2_cr_weight;
   1281             off0_cr = ps_ref_pic_l0->s_weight_offset.i2_cr_offset;
   1282 
   1283             wgt1_cb = ps_ref_pic_l1->s_weight_offset.i2_cb_weight;
   1284             off1_cb = ps_ref_pic_l1->s_weight_offset.i2_cb_offset;
   1285 
   1286             wgt1_cr = ps_ref_pic_l1->s_weight_offset.i2_cr_weight;
   1287             off1_cr = ps_ref_pic_l1->s_weight_offset.i2_cr_offset;
   1288 
   1289             shift = ps_inter_pred_ctxt->i4_log2_chroma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH + 1;
   1290 
   1291             ps_func_selector->ihevc_weighted_pred_chroma_bi_fptr(
   1292                 pi2_scr_buf_l0,
   1293                 pi2_scr_buf_l1,
   1294                 pu1_dst_buf,
   1295                 (pu_wd_chroma << 1),
   1296                 (pu_wd_chroma << 1),
   1297                 dst_stride,
   1298                 wgt0_cb,
   1299                 wgt0_cr,
   1300                 off0_cb,
   1301                 off0_cr,
   1302                 wgt1_cb,
   1303                 wgt1_cr,
   1304                 off1_cb,
   1305                 off1_cr,
   1306                 shift,
   1307                 0,
   1308                 0,
   1309                 pu_ht_chroma,
   1310                 pu_wd_chroma);
   1311         }
   1312         else
   1313         {
   1314             /*****************************************************/
   1315             /*          Default Bi pred  prediction (Chroma)     */
   1316             /*****************************************************/
   1317             ps_func_selector->ihevc_weighted_pred_chroma_bi_default_fptr(
   1318                 pi2_scr_buf_l0,
   1319                 pi2_scr_buf_l1,
   1320                 pu1_dst_buf,
   1321                 (pu_wd_chroma << 1),
   1322                 (pu_wd_chroma << 1),
   1323                 dst_stride,
   1324                 0,
   1325                 0,
   1326                 pu_ht_chroma,
   1327                 pu_wd_chroma);
   1328         }
   1329     }
   1330 }
   1331