Home | History | Annotate | Download | only in common
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2015 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 /**
     21  *******************************************************************************
     22  * @file
     23  *  ih264_inter_pred_filters.c
     24  *
     25  * @brief
     26  *  Contains function definitions for inter prediction interpolation filters
     27  *
     28  * @author
     29  *  Ittiam
     30  *
     31  * @par List of Functions:
     32  *  - ih264_inter_pred_luma_copy
     33  *  - ih264_interleave_copy
     34  *  - ih264_inter_pred_luma_horz
     35  *  - ih264_inter_pred_luma_vert
     36  *  - ih264_inter_pred_luma_horz_hpel_vert_hpel
     37  *  - ih264_inter_pred_luma_horz_qpel
     38  *  - ih264_inter_pred_luma_vert_qpel
     39  *  - ih264_inter_pred_luma_horz_qpel_vert_qpel
     40  *  - ih264_inter_pred_luma_horz_hpel_vert_qpel
     41  *  - ih264_inter_pred_luma_horz_qpel_vert_hpel
     42  *  - ih264_inter_pred_luma_bilinear
     43  *  - ih264_inter_pred_chroma
     44  *
     45  * @remarks
     46  *  None
     47  *
     48  *******************************************************************************
     49  */
     50 
     51 /*****************************************************************************/
     52 /* File Includes                                                             */
     53 /*****************************************************************************/
     54 
     55 /* User include files */
     56 #include "ih264_typedefs.h"
     57 #include "ih264_macros.h"
     58 #include "ih264_platform_macros.h"
     59 #include "ih264_inter_pred_filters.h"
     60 
     61 
     62 /*****************************************************************************/
     63 /* Constant Data variables                                                   */
     64 /*****************************************************************************/
     65 
     66 /* coefficients for 6 tap filtering*/
     67 const WORD32 ih264_g_six_tap[3] ={1,-5,20};
     68 
     69 
     70 /*****************************************************************************/
     71 /*  Function definitions .                                                   */
     72 /*****************************************************************************/
     73 /**
     74  *******************************************************************************
     75  *
     76  * @brief
     77  * Interprediction luma function for copy
     78  *
     79  * @par Description:
     80  *    Copies the array of width 'wd' and height 'ht' from the  location pointed
     81  *    by 'src' to the location pointed by 'dst'
     82  *
     83  * @param[in] pu1_src
     84  *  UWORD8 pointer to the source
     85  *
     86  * @param[out] pu1_dst
     87  *  UWORD8 pointer to the destination
     88  *
     89  * @param[in] src_strd
     90  *  integer source stride
     91  *
     92  * @param[in] dst_strd
     93  *  integer destination stride
     94  *
     95  *
     96  * @param[in] ht
     97  *  integer height of the array
     98  *
     99  * @param[in] wd
    100  *  integer width of the array
    101  *
    102  * @returns
    103  *
    104  * @remarks
    105  *  None
    106  *
    107  *******************************************************************************
    108  */
    109 
    110 void ih264_inter_pred_luma_copy(UWORD8 *pu1_src,
    111                                 UWORD8 *pu1_dst,
    112                                 WORD32 src_strd,
    113                                 WORD32 dst_strd,
    114                                 WORD32 ht,
    115                                 WORD32 wd,
    116                                 UWORD8* pu1_tmp,
    117                                 WORD32 dydx)
    118 {
    119     WORD32 row, col;
    120     UNUSED(pu1_tmp);
    121     UNUSED(dydx);
    122     for(row = 0; row < ht; row++)
    123     {
    124         for(col = 0; col < wd; col++)
    125         {
    126             pu1_dst[col] = pu1_src[col];
    127         }
    128 
    129         pu1_src += src_strd;
    130         pu1_dst += dst_strd;
    131     }
    132 }
    133 
    134 /**
    135  *******************************************************************************
    136  *
    137  * @brief
    138  * Fucntion for copying to an interleaved destination
    139  *
    140  * @par Description:
    141  *    Copies the array of width 'wd' and height 'ht' from the  location pointed
    142  *    by 'src' to the location pointed by 'dst'
    143  *
    144  * @param[in] pu1_src
    145  *  UWORD8 pointer to the source
    146  *
    147  * @param[out] pu1_dst
    148  *  UWORD8 pointer to the destination
    149  *
    150  * @param[in] src_strd
    151  *  integer source stride
    152  *
    153  * @param[in] dst_strd
    154  *  integer destination stride
    155  *
    156  * @param[in] ht
    157  *  integer height of the array
    158  *
    159  * @param[in] wd
    160  *  integer width of the array
    161  *
    162  * @returns
    163  *
    164  * @remarks
    165  *  The alternate elements of src will be copied to alternate locations in dsr
    166  *  Other locations are not touched
    167  *
    168  *******************************************************************************
    169  */
    170 void ih264_interleave_copy(UWORD8 *pu1_src,
    171                            UWORD8 *pu1_dst,
    172                            WORD32 src_strd,
    173                            WORD32 dst_strd,
    174                            WORD32 ht,
    175                            WORD32 wd)
    176 {
    177     WORD32 row, col;
    178     wd *= 2;
    179 
    180     for(row = 0; row < ht; row++)
    181     {
    182         for(col = 0; col < wd; col+=2)
    183         {
    184             pu1_dst[col] = pu1_src[col];
    185         }
    186 
    187         pu1_src += src_strd;
    188         pu1_dst += dst_strd;
    189     }
    190 }
    191 
    192 /**
    193  *******************************************************************************
    194  *
    195  * @brief
    196  *     Interprediction luma filter for horizontal input
    197  *
    198  * @par Description:
    199  *    Applies a 6 tap horizontal filter .The output is  clipped to 8 bits
    200  *    sec 8.4.2.2.1 titled "Luma sample interpolation process"
    201  *
    202  * @param[in] pu1_src
    203  *  UWORD8 pointer to the source
    204  *
    205  * @param[out] pu1_dst
    206  *  UWORD8 pointer to the destination
    207  *
    208  * @param[in] src_strd
    209  *  integer source stride
    210  *
    211  * @param[in] dst_strd
    212  *  integer destination stride
    213  *
    214  * @param[in] ht
    215  *  integer height of the array
    216  *
    217  * @param[in] wd
    218  *  integer width of the array
    219  *
    220  * @returns
    221  *
    222  * @remarks
    223  *  None
    224  *
    225  *******************************************************************************
    226  */
    227 void ih264_inter_pred_luma_horz(UWORD8 *pu1_src,
    228                                 UWORD8 *pu1_dst,
    229                                 WORD32 src_strd,
    230                                 WORD32 dst_strd,
    231                                 WORD32 ht,
    232                                 WORD32 wd,
    233                                 UWORD8* pu1_tmp,
    234                                 WORD32 dydx)
    235 {
    236     WORD32 row, col;
    237     WORD16 i2_tmp;
    238     UNUSED(pu1_tmp);
    239     UNUSED(dydx);
    240 
    241     for(row = 0; row < ht; row++)
    242     {
    243         for(col = 0; col < wd; col++)
    244         {
    245             i2_tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
    246             i2_tmp = ih264_g_six_tap[0] *
    247                             (pu1_src[col - 2] + pu1_src[col + 3])
    248                      + ih264_g_six_tap[1] *
    249                             (pu1_src[col - 1] + pu1_src[col + 2])
    250                      + ih264_g_six_tap[2] *
    251                             (pu1_src[col] + pu1_src[col + 1]);
    252             i2_tmp = (i2_tmp + 16) >> 5;
    253             pu1_dst[col] = CLIP_U8(i2_tmp);
    254         }
    255 
    256         pu1_src += src_strd;
    257         pu1_dst += dst_strd;
    258     }
    259 
    260 }
    261 
    262 /**
    263  *******************************************************************************
    264  *
    265  * @brief
    266  *    Interprediction luma filter for vertical input
    267  *
    268  * @par Description:
    269  *   Applies a 6 tap vertical filter.The output is  clipped to 8 bits
    270  *    sec 8.4.2.2.1 titled "Luma sample interpolation process"
    271  *
    272  * @param[in] pu1_src
    273  *  UWORD8 pointer to the source
    274  *
    275  * @param[out] pu1_dst
    276  *  UWORD8 pointer to the destination
    277  *
    278  * @param[in] src_strd
    279  *  integer source stride
    280  *
    281  * @param[in] dst_strd
    282  *  integer destination stride
    283  *
    284  * @param[in] ht
    285  *  integer height of the array
    286  *
    287  * @param[in] wd
    288  *  integer width of the array
    289  *
    290  * @returns
    291  *
    292  * @remarks
    293  *  None
    294  *
    295  *******************************************************************************
    296  */
    297 void ih264_inter_pred_luma_vert(UWORD8 *pu1_src,
    298                                 UWORD8 *pu1_dst,
    299                                 WORD32 src_strd,
    300                                 WORD32 dst_strd,
    301                                 WORD32 ht,
    302                                 WORD32 wd,
    303                                 UWORD8* pu1_tmp,
    304                                 WORD32 dydx)
    305 {
    306     WORD32 row, col;
    307     WORD16 i2_tmp;
    308     UNUSED(pu1_tmp);
    309     UNUSED(dydx);
    310 
    311     for(row = 0; row < ht; row++)
    312     {
    313         for(col = 0; col < wd; col++)
    314         {
    315             i2_tmp = 0; /*ih264_g_six_tap[] is the array containing the filter coeffs*/
    316             i2_tmp = ih264_g_six_tap[0] *
    317                             (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
    318                      + ih264_g_six_tap[1] *
    319                             (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
    320                      + ih264_g_six_tap[2] *
    321                             (pu1_src[col] + pu1_src[col + 1 * src_strd]);
    322             i2_tmp = (i2_tmp + 16) >> 5;
    323             pu1_dst[col] = CLIP_U8(i2_tmp);
    324         }
    325         pu1_src += src_strd;
    326         pu1_dst += dst_strd;
    327     }
    328 }
    329 
    330 /*!
    331  **************************************************************************
    332  * \if Function name : ih264_inter_pred_luma_horz_hpel_vert_hpel \endif
    333  *
    334  * \brief
    335  *    This function implements a two stage cascaded six tap filter. It
    336  *    applies the six tap filter in the horizontal direction on the
    337  *    predictor values, followed by applying the same filter in the
    338  *    vertical direction on the output of the first stage. The six tap
    339  *    filtering operation is described in sec 8.4.2.2.1 titled "Luma sample
    340  *    interpolation process"
    341  *
    342  * \param pu1_src: Pointer to the buffer containing the predictor values.
    343  *     pu1_src could point to the frame buffer or the predictor buffer.
    344  * \param pu1_dst: Pointer to the destination buffer where the output of
    345  *     the six tap filter is stored.
    346  * \param ht: Height of the rectangular pixel grid to be interpolated
    347  * \param wd: Width of the rectangular pixel grid to be interpolated
    348  * \param src_strd: Width of the buffer pointed to by pu1_src.
    349  * \param dst_strd: Width of the destination buffer
    350  * \param pu1_tmp: temporary buffer.
    351  * \param dydx: x and y reference offset for qpel calculations: UNUSED in this function.
    352  *
    353  * \return
    354  *    None.
    355  *
    356  * \note
    357  *    This function takes the 8 bit predictor values, applies the six tap
    358  *    filter in the horizontal direction and outputs the result clipped to
    359  *    8 bit precision. The input is stored in the buffer pointed to by
    360  *    pu1_src while the output is stored in the buffer pointed by pu1_dst.
    361  *    Both pu1_src and pu1_dst could point to the same buffer i.e. the
    362  *    six tap filter could be done in place.
    363  *
    364  **************************************************************************
    365  */
    366 void ih264_inter_pred_luma_horz_hpel_vert_hpel(UWORD8 *pu1_src,
    367                                                UWORD8 *pu1_dst,
    368                                                WORD32 src_strd,
    369                                                WORD32 dst_strd,
    370                                                WORD32 ht,
    371                                                WORD32 wd,
    372                                                UWORD8* pu1_tmp,
    373                                                WORD32 dydx)
    374 {
    375     WORD32 row, col;
    376     WORD32 tmp;
    377     WORD16* pi2_pred1_temp;
    378     WORD16* pi2_pred1;
    379     UNUSED(dydx);
    380     pi2_pred1_temp = (WORD16*)pu1_tmp;
    381     pi2_pred1_temp += 2;
    382     pi2_pred1 = pi2_pred1_temp;
    383     for(row = 0; row < ht; row++)
    384     {
    385         for(col = -2; col < wd + 3; col++)
    386         {
    387             tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
    388             tmp = ih264_g_six_tap[0] *
    389                             (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
    390                   + ih264_g_six_tap[1] *
    391                             (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
    392                   + ih264_g_six_tap[2] *
    393                             (pu1_src[col] + pu1_src[col + 1 * src_strd]);
    394             pi2_pred1_temp[col] = tmp;
    395         }
    396         pu1_src += src_strd;
    397         pi2_pred1_temp = pi2_pred1_temp + wd + 5;
    398     }
    399 
    400     for(row = 0; row < ht; row++)
    401     {
    402         for(col = 0; col < wd; col++)
    403         {
    404             tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
    405             tmp = ih264_g_six_tap[0] *
    406                             (pi2_pred1[col - 2] + pi2_pred1[col + 3])
    407                   + ih264_g_six_tap[1] *
    408                             (pi2_pred1[col - 1] + pi2_pred1[col + 2])
    409                   + ih264_g_six_tap[2] * (pi2_pred1[col] + pi2_pred1[col + 1]);
    410             tmp = (tmp + 512) >> 10;
    411             pu1_dst[col] = CLIP_U8(tmp);
    412         }
    413         pi2_pred1 += (wd + 5);
    414         pu1_dst += dst_strd;
    415     }
    416 }
    417 
    418 /*!
    419  **************************************************************************
    420  * \if Function name : ih264_inter_pred_luma_horz_qpel \endif
    421  *
    422  * \brief
    423  *    This routine applies the six tap filter to the predictors in the
    424  *    horizontal direction. The six tap filtering operation is described in
    425  *    sec 8.4.2.2.1 titled "Luma sample interpolation process"
    426  *
    427  * \param pu1_src: Pointer to the buffer containing the predictor values.
    428  *     pu1_src could point to the frame buffer or the predictor buffer.
    429  * \param pu1_dst: Pointer to the destination buffer where the output of
    430  *     the six tap filter is stored.
    431  * \param ht: Height of the rectangular pixel grid to be interpolated
    432  * \param wd: Width of the rectangular pixel grid to be interpolated
    433  * \param src_strd: Width of the buffer pointed to by pu1_src.
    434  * \param dst_strd: Width of the destination buffer
    435  * \param pu1_tmp: temporary buffer: UNUSED in this function
    436  * \param dydx: x and y reference offset for qpel calculations.
    437  *
    438  * \return
    439  *    None.
    440  *
    441  * \note
    442  *    This function takes the 8 bit predictor values, applies the six tap
    443  *    filter in the horizontal direction and outputs the result clipped to
    444  *    8 bit precision. The input is stored in the buffer pointed to by
    445  *    pu1_src while the output is stored in the buffer pointed by pu1_dst.
    446  *    Both pu1_src and pu1_dst could point to the same buffer i.e. the
    447  *    six tap filter could be done in place.
    448  *
    449  **************************************************************************
    450  */
    451 void ih264_inter_pred_luma_horz_qpel(UWORD8 *pu1_src,
    452                                      UWORD8 *pu1_dst,
    453                                      WORD32 src_strd,
    454                                      WORD32 dst_strd,
    455                                      WORD32 ht,
    456                                      WORD32 wd,
    457                                      UWORD8* pu1_tmp,
    458                                      WORD32 dydx)
    459 {
    460     WORD32 row, col;
    461     UWORD8 *pu1_pred1;
    462     WORD32 x_offset = dydx & 0x3;
    463     UNUSED(pu1_tmp);
    464     pu1_pred1 = pu1_src + (x_offset >> 1);
    465 
    466     for(row = 0; row < ht; row++)
    467     {
    468         for(col = 0; col < wd; col++, pu1_src++, pu1_dst++)
    469         {
    470             WORD16 i2_temp;
    471             /* The logic below implements the following equation
    472              i2_temp = puc_pred[-2] - 5 * (puc_pred[-1] + puc_pred[2]) +
    473              20 * (puc_pred[0] + puc_pred[1]) + puc_pred[3]; */
    474             i2_temp = pu1_src[-2] + pu1_src[3]
    475                       - (pu1_src[-1] + pu1_src[2])
    476                       + ((pu1_src[0] + pu1_src[1] - pu1_src[-1] - pu1_src[2]) << 2)
    477                       + ((pu1_src[0] + pu1_src[1]) << 4);
    478             i2_temp = (i2_temp + 16) >> 5;
    479             i2_temp = CLIP_U8(i2_temp);
    480             *pu1_dst = (i2_temp + *pu1_pred1 + 1) >> 1;
    481 
    482             pu1_pred1++;
    483         }
    484         pu1_dst += dst_strd - wd;
    485         pu1_src += src_strd - wd;
    486         pu1_pred1 += src_strd - wd;
    487     }
    488 }
    489 
    490 /*!
    491  **************************************************************************
    492  * \if Function name : ih264_inter_pred_luma_vert_qpel \endif
    493  *
    494  * \brief
    495  *    This routine applies the six tap filter to the predictors in the
    496  *    vertical direction and interpolates them to obtain pixels at quarter vertical
    497  *    positions (0, 1/4) and (0, 3/4). The six tap filtering operation is
    498  *    described in sec 8.4.2.2.1 titled "Luma sample interpolation process"
    499  *
    500  * \param pu1_src: Pointer to the buffer containing the predictor values.
    501  *     pu1_src could point to the frame buffer or the predictor buffer.
    502  * \param pu1_dst: Pointer to the destination buffer where the output of
    503  *     the six tap filter is stored.
    504  * \param ht: Height of the rectangular pixel grid to be interpolated
    505  * \param wd: Width of the rectangular pixel grid to be interpolated
    506  * \param src_strd: Width of the buffer pointed to by puc_pred.
    507  * \param dst_strd: Width of the destination buffer
    508  * \param pu1_tmp: temporary buffer: UNUSED in this function
    509  * \param dydx: x and y reference offset for qpel calculations.
    510  *
    511  * \return
    512  *    void
    513  *
    514  * \note
    515  *    This function takes the 8 bit predictor values, applies the six tap
    516  *    filter in the vertical direction and outputs the result clipped to
    517  *    8 bit precision. The input is stored in the buffer pointed to by
    518  *    puc_pred while the output is stored in the buffer pointed by puc_dest.
    519  *    Both puc_pred and puc_dest could point to the same buffer i.e. the
    520  *    six tap filter could be done in place.
    521  *
    522  * \para <title>
    523  *    <paragraph>
    524  *  ...
    525  **************************************************************************
    526  */
    527 void ih264_inter_pred_luma_vert_qpel(UWORD8 *pu1_src,
    528                                      UWORD8 *pu1_dst,
    529                                      WORD32 src_strd,
    530                                      WORD32 dst_strd,
    531                                      WORD32 ht,
    532                                      WORD32 wd,
    533                                      UWORD8* pu1_tmp,
    534                                      WORD32 dydx)
    535 {
    536     WORD32 row, col;
    537     WORD32 y_offset = dydx >> 2;
    538     WORD32 off1, off2, off3;
    539     UWORD8 *pu1_pred1;
    540     UNUSED(pu1_tmp);
    541     y_offset = y_offset & 0x3;
    542 
    543     off1 = src_strd;
    544     off2 = src_strd << 1;
    545     off3 = off1 + off2;
    546 
    547     pu1_pred1 = pu1_src + (y_offset >> 1) * src_strd;
    548 
    549     for(row = 0; row < ht; row++)
    550     {
    551         for(col = 0; col < wd; col++, pu1_dst++, pu1_src++, pu1_pred1++)
    552         {
    553             WORD16 i2_temp;
    554             /* The logic below implements the following equation
    555              i16_temp = puc_pred[-2*src_strd] + puc_pred[3*src_strd] -
    556              5 * (puc_pred[-1*src_strd] + puc_pred[2*src_strd])  +
    557              20 * (puc_pred[0] + puc_pred[src_strd]); */
    558             i2_temp = pu1_src[-off2] + pu1_src[off3]
    559                        - (pu1_src[-off1] + pu1_src[off2])
    560                        + ((pu1_src[0] + pu1_src[off1] - pu1_src[-off1] - pu1_src[off2]) << 2)
    561                        + ((pu1_src[0] + pu1_src[off1]) << 4);
    562             i2_temp = (i2_temp + 16) >> 5;
    563             i2_temp = CLIP_U8(i2_temp);
    564 
    565             *pu1_dst = (i2_temp + *pu1_pred1 + 1) >> 1;
    566         }
    567         pu1_src += src_strd - wd;
    568         pu1_pred1 += src_strd - wd;
    569         pu1_dst += dst_strd - wd;
    570     }
    571 }
    572 
    573 /*!
    574  **************************************************************************
    575  * \if Function name : ih264_inter_pred_luma_horz_qpel_vert_qpel \endif
    576  *
    577  * \brief
    578  *    This routine applies the six tap filter to the predictors in the
    579  *    vertical and horizontal direction and averages them to get pixels at locations
    580  *    (1/4,1/4), (1/4, 3/4), (3/4, 1/4) & (3/4, 3/4). The six tap filtering operation
    581  *    is described in sec 8.4.2.2.1 titled "Luma sample interpolation process"
    582  *
    583  * \param pu1_src: Pointer to the buffer containing the predictor values.
    584  *     pu1_src could point to the frame buffer or the predictor buffer.
    585  * \param pu1_dst: Pointer to the destination buffer where the output of
    586  *     the six tap filter is stored.
    587  * \param wd: Width of the rectangular pixel grid to be interpolated
    588  * \param ht: Height of the rectangular pixel grid to be interpolated
    589  * \param src_strd: Width of the buffer pointed to by puc_pred.
    590  * \param dst_strd: Width of the destination buffer
    591  * \param pu1_tmp: temporary buffer, UNUSED in this function
    592  * \param dydx: x and y reference offset for qpel calculations.
    593  *
    594  * \return
    595  *    void
    596  *
    597  * \note
    598  *    This function takes the 8 bit predictor values, applies the six tap
    599  *    filter in the vertical direction and outputs the result clipped to
    600  *    8 bit precision. The input is stored in the buffer pointed to by
    601  *    puc_pred while the output is stored in the buffer pointed by puc_dest.
    602  *    Both puc_pred and puc_dest could point to the same buffer i.e. the
    603  *    six tap filter could be done in place.
    604  *
    605  * \para <title>
    606  *    <paragraph>
    607  *  ...
    608  **************************************************************************
    609  */
    610 void ih264_inter_pred_luma_horz_qpel_vert_qpel(UWORD8 *pu1_src,
    611                                                UWORD8 *pu1_dst,
    612                                                WORD32 src_strd,
    613                                                WORD32 dst_strd,
    614                                                WORD32 ht,
    615                                                WORD32 wd,
    616                                                UWORD8* pu1_tmp,
    617                                                WORD32 dydx)
    618 {
    619     WORD32 row, col;
    620     WORD32 x_offset = dydx & 0x3;
    621     WORD32 y_offset = dydx >> 2;
    622 
    623     WORD32 off1, off2, off3;
    624     UWORD8* pu1_pred_vert, *pu1_pred_horz;
    625     UNUSED(pu1_tmp);
    626     y_offset = y_offset & 0x3;
    627 
    628     off1 = src_strd;
    629     off2 = src_strd << 1;
    630     off3 = off1 + off2;
    631 
    632     pu1_pred_horz = pu1_src + (y_offset >> 1) * src_strd;
    633     pu1_pred_vert = pu1_src + (x_offset >> 1);
    634 
    635     for(row = 0; row < ht; row++)
    636     {
    637         for(col = 0; col < wd;
    638                         col++, pu1_dst++, pu1_pred_vert++, pu1_pred_horz++)
    639         {
    640             WORD16 i2_temp_vert, i2_temp_horz;
    641             /* The logic below implements the following equation
    642              i2_temp = puc_pred[-2*src_strd] + puc_pred[3*src_strd] -
    643              5 * (puc_pred[-1*src_strd] + puc_pred[2*src_strd])  +
    644              20 * (puc_pred[0] + puc_pred[src_strd]); */
    645             i2_temp_vert = pu1_pred_vert[-off2] + pu1_pred_vert[off3]
    646                             - (pu1_pred_vert[-off1] + pu1_pred_vert[off2])
    647                             + ((pu1_pred_vert[0] + pu1_pred_vert[off1]
    648                                             - pu1_pred_vert[-off1]
    649                                             - pu1_pred_vert[off2]) << 2)
    650                             + ((pu1_pred_vert[0] + pu1_pred_vert[off1]) << 4);
    651             i2_temp_vert = (i2_temp_vert + 16) >> 5;
    652             i2_temp_vert = CLIP_U8(i2_temp_vert);
    653 
    654             /* The logic below implements the following equation
    655              i16_temp = puc_pred[-2] - 5 * (puc_pred[-1] + puc_pred[2]) +
    656              20 * (puc_pred[0] + puc_pred[1]) + puc_pred[3]; */
    657             i2_temp_horz = pu1_pred_horz[-2] + pu1_pred_horz[3]
    658                             - (pu1_pred_horz[-1] + pu1_pred_horz[2])
    659                             + ((pu1_pred_horz[0] + pu1_pred_horz[1]
    660                                             - pu1_pred_horz[-1]
    661                                             - pu1_pred_horz[2]) << 2)
    662                             + ((pu1_pred_horz[0] + pu1_pred_horz[1]) << 4);
    663             i2_temp_horz = (i2_temp_horz + 16) >> 5;
    664             i2_temp_horz = CLIP_U8(i2_temp_horz);
    665             *pu1_dst = (i2_temp_vert + i2_temp_horz + 1) >> 1;
    666         }
    667         pu1_pred_vert += (src_strd - wd);
    668         pu1_pred_horz += (src_strd - wd);
    669         pu1_dst += (dst_strd - wd);
    670     }
    671 }
    672 
    673 /*!
    674  **************************************************************************
    675  * \if Function name : ih264_inter_pred_luma_horz_qpel_vert_hpel \endif
    676  *
    677  * \brief
    678  *    This routine applies the six tap filter to the predictors in the vertical
    679  *    and horizontal direction to obtain the pixel at (1/2,1/2). It then interpolates
    680  *    pixel at (0,1/2) and (1/2,1/2) to obtain pixel at (1/4,1/2). Similarly for (3/4,1/2).
    681  *    The six tap filtering operation is described in sec 8.4.2.2.1 titled
    682  *    "Luma sample interpolation process"
    683  *
    684  * \param pu1_src: Pointer to the buffer containing the predictor values.
    685  *     pu1_src could point to the frame buffer or the predictor buffer.
    686  * \param pu1_dst: Pointer to the destination buffer where the output of
    687  *     the six tap filter followed by interpolation is stored.
    688  * \param wd: Width of the rectangular pixel grid to be interpolated
    689  * \param ht: Height of the rectangular pixel grid to be interpolated
    690  * \param src_strd: Width of the buffer pointed to by puc_pred.
    691  * \param dst_strd: Width of the destination buffer
    692  * \param pu1_tmp: buffer to store temporary output after 1st 6-tap filter.
    693  * \param dydx: x and y reference offset for qpel calculations.
    694  *
    695  * \return
    696  *    void
    697  *
    698  * \note
    699  *    This function takes the 8 bit predictor values, applies the six tap
    700  *    filter in the vertical direction and outputs the result clipped to
    701  *    8 bit precision. The input is stored in the buffer pointed to by
    702  *    puc_pred while the output is stored in the buffer pointed by puc_dest.
    703  *    Both puc_pred and puc_dest could point to the same buffer i.e. the
    704  *    six tap filter could be done in place.
    705  *
    706  * \para <title>
    707  *    <paragraph>
    708  *  ...
    709  **************************************************************************
    710  */
    711 void ih264_inter_pred_luma_horz_qpel_vert_hpel(UWORD8 *pu1_src,
    712                                                UWORD8 *pu1_dst,
    713                                                WORD32 src_strd,
    714                                                WORD32 dst_strd,
    715                                                WORD32 ht,
    716                                                WORD32 wd,
    717                                                UWORD8* pu1_tmp,
    718                                                WORD32 dydx)
    719 {
    720     WORD32 row, col;
    721     WORD32 tmp;
    722     WORD16* pi2_pred1_temp, *pi2_pred1;
    723     UWORD8* pu1_dst_tmp;
    724     WORD32 x_offset = dydx & 0x3;
    725     WORD16 i2_macro;
    726 
    727     pi2_pred1_temp = (WORD16*)pu1_tmp;
    728     pi2_pred1_temp += 2;
    729     pi2_pred1 = pi2_pred1_temp;
    730     pu1_dst_tmp = pu1_dst;
    731 
    732     for(row = 0; row < ht; row++)
    733     {
    734         for(col = -2; col < wd + 3; col++)
    735         {
    736             tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
    737             tmp = ih264_g_six_tap[0] *
    738                             (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
    739                   + ih264_g_six_tap[1] *
    740                             (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
    741                   + ih264_g_six_tap[2] *
    742                             (pu1_src[col] + pu1_src[col + 1 * src_strd]);
    743             pi2_pred1_temp[col] = tmp;
    744         }
    745 
    746         pu1_src += src_strd;
    747         pi2_pred1_temp = pi2_pred1_temp + wd + 5;
    748     }
    749 
    750     pi2_pred1_temp = pi2_pred1;
    751     for(row = 0; row < ht; row++)
    752     {
    753         for(col = 0; col < wd; col++)
    754         {
    755             tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
    756             tmp = ih264_g_six_tap[0] *
    757                             (pi2_pred1[col - 2] + pi2_pred1[col + 3])
    758                   + ih264_g_six_tap[1] *
    759                             (pi2_pred1[col - 1] + pi2_pred1[col + 2])
    760                   + ih264_g_six_tap[2] *
    761                             (pi2_pred1[col] + pi2_pred1[col + 1]);
    762             tmp = (tmp + 512) >> 10;
    763             pu1_dst[col] = CLIP_U8(tmp);
    764         }
    765         pi2_pred1 += (wd + 5);
    766         pu1_dst += dst_strd;
    767     }
    768 
    769     pu1_dst = pu1_dst_tmp;
    770     pi2_pred1_temp += (x_offset >> 1);
    771     for(row = ht; row != 0; row--)
    772     {
    773         for(col = wd; col != 0; col--, pu1_dst++, pi2_pred1_temp++)
    774         {
    775             UWORD8 uc_temp;
    776             /* Clipping the output of the six tap filter obtained from the
    777              first stage of the 2d filter stage */
    778             *pi2_pred1_temp = (*pi2_pred1_temp + 16) >> 5;
    779             i2_macro = (*pi2_pred1_temp);
    780             uc_temp = CLIP_U8(i2_macro);
    781             *pu1_dst = (*pu1_dst + uc_temp + 1) >> 1;
    782         }
    783         pi2_pred1_temp += 5;
    784         pu1_dst += dst_strd - wd;
    785     }
    786 }
    787 
    788 /*!
    789  **************************************************************************
    790  * \if Function name : ih264_inter_pred_luma_horz_hpel_vert_qpel \endif
    791  *
    792  * \brief
    793  *    This routine applies the six tap filter to the predictors in the horizontal
    794  *    and vertical direction to obtain the pixel at (1/2,1/2). It then interpolates
    795  *    pixel at (1/2,0) and (1/2,1/2) to obtain pixel at (1/2,1/4). Similarly for (1/2,3/4).
    796  *    The six tap filtering operation is described in sec 8.4.2.2.1 titled
    797  *    "Luma sample interpolation process"
    798  *
    799  * \param pu1_src: Pointer to the buffer containing the predictor values.
    800  *     pu1_src could point to the frame buffer or the predictor buffer.
    801  * \param pu1_dst: Pointer to the destination buffer where the output of
    802  *     the six tap filter followed by interpolation is stored.
    803  * \param wd: Width of the rectangular pixel grid to be interpolated
    804  * \param ht: Height of the rectangular pixel grid to be interpolated
    805  * \param src_strd: Width of the buffer pointed to by puc_pred.
    806  * \param dst_strd: Width of the destination buffer
    807  * \param pu1_tmp: buffer to store temporary output after 1st 6-tap filter.
    808  * \param dydx: x and y reference offset for qpel calculations.
    809  *
    810  * \return
    811  *    void
    812  *
    813  * \note
    814  *    This function takes the 8 bit predictor values, applies the six tap
    815  *    filter in the vertical direction and outputs the result clipped to
    816  *    8 bit precision. The input is stored in the buffer pointed to by
    817  *    puc_pred while the output is stored in the buffer pointed by puc_dest.
    818  *    Both puc_pred and puc_dest could point to the same buffer i.e. the
    819  *    six tap filter could be done in place.
    820  *
    821  * \para <title>
    822  *    <paragraph>
    823  *  ...
    824  **************************************************************************
    825  */
    826 void ih264_inter_pred_luma_horz_hpel_vert_qpel(UWORD8 *pu1_src,
    827                                                UWORD8 *pu1_dst,
    828                                                WORD32 src_strd,
    829                                                WORD32 dst_strd,
    830                                                WORD32 ht,
    831                                                WORD32 wd,
    832                                                UWORD8* pu1_tmp,
    833                                                WORD32 dydx)
    834 {
    835 
    836     WORD32 row, col;
    837     WORD32 tmp;
    838     WORD32 y_offset = dydx >> 2;
    839     WORD16* pi2_pred1_temp, *pi2_pred1;
    840     UWORD8* pu1_dst_tmp;
    841     //WORD32 x_offset = dydx & 0x3;
    842     WORD16 i2_macro;
    843 
    844     y_offset = y_offset & 0x3;
    845 
    846     pi2_pred1_temp = (WORD16*)pu1_tmp;
    847     pi2_pred1_temp += 2 * wd;
    848     pi2_pred1 = pi2_pred1_temp;
    849     pu1_dst_tmp = pu1_dst;
    850     pu1_src -= 2 * src_strd;
    851     for(row = -2; row < ht + 3; row++)
    852     {
    853         for(col = 0; col < wd; col++)
    854         {
    855             tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
    856             tmp = ih264_g_six_tap[0] * (pu1_src[col - 2] + pu1_src[col + 3])
    857                   + ih264_g_six_tap[1] * (pu1_src[col - 1] + pu1_src[col + 2])
    858                   + ih264_g_six_tap[2] * (pu1_src[col] + pu1_src[col + 1]);
    859             pi2_pred1_temp[col - 2 * wd] = tmp;
    860         }
    861 
    862         pu1_src += src_strd;
    863         pi2_pred1_temp += wd;
    864     }
    865     pi2_pred1_temp = pi2_pred1;
    866     for(row = 0; row < ht; row++)
    867     {
    868         for(col = 0; col < wd; col++)
    869         {
    870             tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
    871             tmp = ih264_g_six_tap[0] * (pi2_pred1[col - 2 * wd] + pi2_pred1[col + 3 * wd])
    872                   + ih264_g_six_tap[1] * (pi2_pred1[col - 1 * wd] + pi2_pred1[col + 2 * wd])
    873                   + ih264_g_six_tap[2] * (pi2_pred1[col] + pi2_pred1[col + 1 * wd]);
    874             tmp = (tmp + 512) >> 10;
    875             pu1_dst[col] = CLIP_U8(tmp);
    876         }
    877         pi2_pred1 += wd;
    878         pu1_dst += dst_strd;
    879     }
    880     pu1_dst = pu1_dst_tmp;
    881     pi2_pred1_temp += (y_offset >> 1) * wd;
    882     for(row = ht; row != 0; row--)
    883 
    884     {
    885         for(col = wd; col != 0; col--, pu1_dst++, pi2_pred1_temp++)
    886         {
    887             UWORD8 u1_temp;
    888             /* Clipping the output of the six tap filter obtained from the
    889              first stage of the 2d filter stage */
    890             *pi2_pred1_temp = (*pi2_pred1_temp + 16) >> 5;
    891             i2_macro = (*pi2_pred1_temp);
    892             u1_temp = CLIP_U8(i2_macro);
    893             *pu1_dst = (*pu1_dst + u1_temp + 1) >> 1;
    894         }
    895         //pi16_pred1_temp += wd;
    896         pu1_dst += dst_strd - wd;
    897     }
    898 }
    899 
    900 /**
    901  *******************************************************************************
    902  *  function:ih264_inter_pred_luma_bilinear
    903  *
    904  * @brief
    905  *    This routine applies the bilinear filter to the predictors .
    906  *    The  filtering operation is described in
    907  *    sec 8.4.2.2.1 titled "Luma sample interpolation process"
    908  *
    909  * @par Description:
    910 \note
    911  *     This function is called to obtain pixels lying at the following
    912  *    locations (1/4,1), (3/4,1),(1,1/4), (1,3/4) ,(1/4,1/2), (3/4,1/2),(1/2,1/4), (1/2,3/4),(3/4,1/4),(1/4,3/4),(3/4,3/4)&& (1/4,1/4) .
    913  *    The function averages the two adjacent values from the two input arrays in horizontal direction.
    914  *
    915  *
    916  * @param[in] pu1_src1:
    917  *  UWORD8 Pointer to the buffer containing the first input array.
    918  *
    919  * @param[in] pu1_src2:
    920  *  UWORD8 Pointer to the buffer containing the second input array.
    921  *
    922  * @param[out] pu1_dst
    923  *  UWORD8 pointer to the destination where the output of bilinear filter is stored.
    924  *
    925  * @param[in] src_strd1
    926  *  Stride of the first input buffer
    927  *
    928  * @param[in] src_strd2
    929  *  Stride of the second input buffer
    930  *
    931  * @param[in] dst_strd
    932  *  integer destination stride of pu1_dst
    933  *
    934  * @param[in] ht
    935  *  integer height of the array
    936  *
    937  * @param[in] wd
    938  *  integer width of the array
    939  *
    940  * @returns
    941  *
    942  * @remarks
    943  *  None
    944  *
    945  *******************************************************************************
    946  */
    947 void ih264_inter_pred_luma_bilinear(UWORD8 *pu1_src1,
    948                                     UWORD8 *pu1_src2,
    949                                     UWORD8 *pu1_dst,
    950                                     WORD32 src_strd1,
    951                                     WORD32 src_strd2,
    952                                     WORD32 dst_strd,
    953                                     WORD32 ht,
    954                                     WORD32 wd)
    955 {
    956     WORD32 row, col;
    957     WORD16 i2_tmp;
    958 
    959     for(row = 0; row < ht; row++)
    960     {
    961         for(col = 0; col < wd; col++)
    962         {
    963             i2_tmp = pu1_src1[col] + pu1_src2[col];
    964             i2_tmp = (i2_tmp + 1) >> 1;
    965             pu1_dst[col] = CLIP_U8(i2_tmp);
    966         }
    967         pu1_src1 += src_strd1;
    968         pu1_src2 += src_strd2;
    969         pu1_dst += dst_strd;
    970     }
    971 
    972 }
    973 
    974 /**
    975  *******************************************************************************
    976  *
    977  * @brief
    978  *    Interprediction chroma filter
    979  *
    980  * @par Description:
    981  *   Applies filtering to chroma samples as mentioned in
    982  *    sec 8.4.2.2.2 titled "chroma sample interpolation process"
    983  *
    984  * @param[in] pu1_src
    985  *  UWORD8 pointer to the source containing alternate U and V samples
    986  *
    987  * @param[out] pu1_dst
    988  *  UWORD8 pointer to the destination
    989  *
    990  * @param[in] src_strd
    991  *  integer source stride
    992  *
    993  * @param[in] dst_strd
    994  *  integer destination stride
    995  *
    996  * @param[in] u1_dx
    997  *  dx value where the sample is to be produced(refer sec 8.4.2.2.2 )
    998  *
    999  * @param[in] u1_dy
   1000  *  dy value where the sample is to be produced(refer sec 8.4.2.2.2 )
   1001  *
   1002  * @param[in] ht
   1003  *  integer height of the array
   1004  *
   1005  * @param[in] wd
   1006  *  integer width of the array
   1007  *
   1008  * @returns
   1009  *
   1010  * @remarks
   1011  *  None
   1012  *
   1013  *******************************************************************************
   1014  */
   1015 void ih264_inter_pred_chroma(UWORD8 *pu1_src,
   1016                              UWORD8 *pu1_dst,
   1017                              WORD32 src_strd,
   1018                              WORD32 dst_strd,
   1019                              WORD32 dx,
   1020                              WORD32 dy,
   1021                              WORD32 ht,
   1022                              WORD32 wd)
   1023 {
   1024     WORD32 row, col;
   1025     WORD16 i2_tmp;
   1026 
   1027     for(row = 0; row < ht; row++)
   1028     {
   1029         for(col = 0; col < 2 * wd; col++)
   1030         {
   1031             i2_tmp = 0; /* applies equation (8-266) in section 8.4.2.2.2 */
   1032             i2_tmp = (8 - dx) * (8 - dy) * pu1_src[col]
   1033                      + (dx) * (8 - dy) * pu1_src[col + 2]
   1034                      + (8 - dx) * (dy) * (pu1_src + src_strd)[col]
   1035                      + (dx) * (dy) * (pu1_src + src_strd)[col + 2];
   1036             i2_tmp = (i2_tmp + 32) >> 6;
   1037             pu1_dst[col] = CLIP_U8(i2_tmp);
   1038         }
   1039         pu1_src += src_strd;
   1040         pu1_dst += dst_strd;
   1041     }
   1042 }
   1043