Home | History | Annotate | Download | only in common
      1 /******************************************************************************
      2 *
      3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 *
      5 * Licensed under the Apache License, Version 2.0 (the "License");
      6 * you may not use this file except in compliance with the License.
      7 * You may obtain a copy of the License at:
      8 *
      9 * http://www.apache.org/licenses/LICENSE-2.0
     10 *
     11 * Unless required by applicable law or agreed to in writing, software
     12 * distributed under the License is distributed on an "AS IS" BASIS,
     13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 * See the License for the specific language governing permissions and
     15 * limitations under the License.
     16 *
     17 ******************************************************************************/
     18 /**
     19 *******************************************************************************
     20 * @file
     21 *  ihevc_intra_pred_filters.c
     22 *
     23 * @brief
     24 *  Contains function Definition for intra prediction  interpolation filters
     25 *
     26 *
     27 * @author
     28 *  Srinivas T
     29 *
     30 * @par List of Functions:
     31 *  - ihevc_intra_pred_luma_planar()
     32 *  - ihevc_intra_pred_luma_dc()
     33 *  - ihevc_intra_pred_luma_horz()
     34 *  - ihevc_intra_pred_luma_ver()
     35 *  - ihevc_intra_pred_luma_mode2()
     36 *  - ihevc_intra_pred_luma_mode_18_34()
     37 *  - ihevc_intra_pred_luma_mode_3_to_9()
     38 *  - ihevc_intra_pred_luma_mode_11_to_17()
     39 *  - ihevc_intra_pred_luma_mode_19_to_25()
     40 *  - ihevc_intra_pred_luma_mode_27_to_33()
     41 *  - ihevc_intra_pred_luma_ref_substitution()
     42 *
     43 * @remarks
     44 *  None
     45 *
     46 *******************************************************************************
     47 */
     48 
     49 
     50 /*****************************************************************************/
     51 /* File Includes                                                             */
     52 /*****************************************************************************/
     53 
     54 #include <assert.h>
     55 #include "ihevc_typedefs.h"
     56 #include "ihevc_intra_pred.h"
     57 #include "ihevc_macros.h"
     58 #include "ihevc_func_selector.h"
     59 #include "ihevc_platform_macros.h"
     60 #include "ihevc_common_tables.h"
     61 #include "ihevc_defs.h"
     62 #include "ihevc_mem_fns.h"
     63 #include "ihevc_debug.h"
     64 
     65 /****************************************************************************/
     66 /* Constant Macros                                                          */
     67 /****************************************************************************/
     68 #define MAX_CU_SIZE 64
     69 #define BIT_DEPTH 8
     70 #define T32_4NT 128
     71 #define T16_4NT 64
     72 
     73 
     74 /****************************************************************************/
     75 /* Function Macros                                                          */
     76 /****************************************************************************/
     77 #define GET_BITS(y,x) ((y) & (1 << x)) && (1 << x)
     78 
     79 /*****************************************************************************/
     80 /* global tables Definition                                                  */
     81 /*****************************************************************************/
     82 
     83 
     84 /*****************************************************************************/
     85 /* Function Definition                                                      */
     86 /*****************************************************************************/
     87 
     88 /**
     89 *******************************************************************************
     90 *
     91 * @brief
     92 *    Intra prediction interpolation filter for pu1_ref substitution
     93 *
     94 *
     95 * @par Description:
     96 *    Reference substitution process for samples unavailable  for prediction
     97 *    Refer to section 8.4.4.2.2
     98 *
     99 * @param[in] pu1_top_left
    100 *  UWORD8 pointer to the top-left
    101 *
    102 * @param[in] pu1_top
    103 *  UWORD8 pointer to the top
    104 *
    105 * @param[in] pu1_left
    106 *  UWORD8 pointer to the left
    107 *
    108 * @param[in] src_strd
    109 *  WORD32 Source stride
    110 *
    111 * @param[in] nbr_flags
    112 *  WORD32 neighbor availability flags
    113 *
    114 * @param[in] nt
    115 *  WORD32 transform Block size
    116 *
    117 * @param[in] dst_strd
    118 *  WORD32 Destination stride
    119 *
    120 * @returns
    121 *
    122 * @remarks
    123 *  None
    124 *
    125 *******************************************************************************
    126 */
    127 void ihevc_intra_pred_luma_ref_subst_all_avlble(UWORD8 *pu1_top_left,
    128                                                 UWORD8 *pu1_top,
    129                                                 UWORD8 *pu1_left,
    130                                                 WORD32 src_strd,
    131                                                 WORD32 nt,
    132                                                 WORD32 nbr_flags,
    133                                                 UWORD8 *pu1_dst,
    134                                                 WORD32 dst_strd)
    135 {
    136 
    137     WORD32 i;
    138     WORD32 two_nt = 2 * nt;
    139     UNUSED(nbr_flags);
    140     UNUSED(dst_strd);
    141 
    142     /* Neighbor Flag Structure*/
    143     /* MSB ---> LSB */
    144     /*    Top-Left | Top-Right | Top | Left | Bottom-Left
    145               1         4         4     4         4
    146      */
    147     ASSERT((nbr_flags == 0x11188) || (nbr_flags == 0x133CC) || (nbr_flags == 0x1FFFF));
    148     {
    149 
    150         if(nt == 4)
    151         {
    152             /* 1 bit extraction for all the neighboring blocks */
    153 
    154 
    155             /* Else fill the corresponding samples */
    156             pu1_dst[two_nt] = *pu1_top_left;
    157             //if(left)
    158             {
    159                 for(i = 0; i < nt; i++)
    160                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
    161             }
    162 //            if(bot_left)
    163             {
    164                 for(i = nt; i < two_nt; i++)
    165                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
    166             }
    167 //            if(top)
    168             {
    169                 ihevc_memcpy(&pu1_dst[two_nt + 1], pu1_top, nt);
    170             }
    171 //            if(tp_right)
    172             {
    173                 ihevc_memcpy(&pu1_dst[two_nt + 1 + nt], pu1_top + nt, nt);
    174             }
    175 
    176 
    177         }
    178         else
    179 
    180         {
    181 
    182             /* Else fill the corresponding samples */
    183             ASSERT((nt == 8) || (nt == 16) || (nt == 32));
    184             pu1_dst[two_nt] = *pu1_top_left;
    185 
    186             for(i = 0; i < nt; i++)
    187                 pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
    188 
    189             for(i = nt; i < two_nt; i++)
    190                 pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
    191 
    192             ihevc_memcpy_mul_8(&pu1_dst[two_nt + 1], pu1_top, nt);
    193 
    194             ihevc_memcpy_mul_8(&pu1_dst[two_nt + 1 + nt], pu1_top + nt, nt);
    195         }
    196 
    197     }
    198 }
    199 
    200 
    201 void ihevc_intra_pred_luma_ref_substitution(UWORD8 *pu1_top_left,
    202                                             UWORD8 *pu1_top,
    203                                             UWORD8 *pu1_left,
    204                                             WORD32 src_strd,
    205                                             WORD32 nt,
    206                                             WORD32 nbr_flags,
    207                                             UWORD8 *pu1_dst,
    208                                             WORD32 dst_strd)
    209 {
    210     UWORD8 pu1_ref;
    211     WORD32 dc_val, i;
    212     WORD32 total_samples = (4 * nt) + 1;
    213     WORD32 two_nt = 2 * nt;
    214 
    215     WORD32 three_nt = 3 * nt;
    216     WORD32 get_bits;
    217     WORD32 next;
    218     WORD32 bot_left, left, top, tp_right, tp_left;
    219 
    220     WORD32 idx, nbr_id_from_bl, frwd_nbr_flag;
    221     UNUSED(dst_strd);
    222     /*dc_val = 1 << (BIT_DEPTH - 1);*/
    223     dc_val = 1 << (8 - 1);
    224 
    225 
    226     /* Neighbor Flag Structure*/
    227     /* MSB ---> LSB */
    228     /*    Top-Left | Top-Right | Top | Left | Bottom-Left
    229               1         4         4     4         4
    230      */
    231     /* If no neighbor flags are present, fill the neighbor samples with DC value */
    232     if(nbr_flags == 0)
    233     {
    234         for(i = 0; i < total_samples; i++)
    235         {
    236             pu1_dst[i] = dc_val;
    237         }
    238     }
    239     else
    240     {
    241         if(nt <= 8)
    242         {
    243             /* 1 bit extraction for all the neighboring blocks */
    244             tp_left = (nbr_flags & 0x10000) >> 16;
    245             bot_left = (nbr_flags & 0x8) >> 3;
    246             left = (nbr_flags & 0x80) >> 7;
    247             top = (nbr_flags & 0x100) >> 8;
    248             tp_right = (nbr_flags & 0x1000) >> 12;
    249 
    250             /* Else fill the corresponding samples */
    251             if(tp_left)
    252                 pu1_dst[two_nt] = *pu1_top_left;
    253             else
    254                 pu1_dst[two_nt] = 0;
    255 
    256 
    257             if(left)
    258             {
    259                 for(i = 0; i < nt; i++)
    260                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
    261             }
    262             else
    263             {
    264                 ihevc_memset(&pu1_dst[two_nt - 1 - (nt - 1)], 0, nt);
    265             }
    266 
    267 
    268             if(bot_left)
    269             {
    270                 for(i = nt; i < two_nt; i++)
    271                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
    272             }
    273             else
    274             {
    275                 ihevc_memset(&pu1_dst[two_nt - 1 - (two_nt - 1)], 0, nt);
    276             }
    277 
    278 
    279             if(top)
    280             {
    281                 ihevc_memcpy(&pu1_dst[two_nt + 1], pu1_top, nt);
    282             }
    283             else
    284             {
    285                 ihevc_memset(&pu1_dst[two_nt + 1], 0, nt);
    286             }
    287 
    288             if(tp_right)
    289             {
    290                 ihevc_memcpy(&pu1_dst[two_nt + 1 + nt], pu1_top + nt, nt);
    291             }
    292             else
    293             {
    294                 ihevc_memset(&pu1_dst[two_nt + 1 + nt], 0, nt);
    295             }
    296             next = 1;
    297 
    298             /* If bottom -left is not available, reverse substitution process*/
    299             if(bot_left == 0)
    300             {
    301                 WORD32 a_nbr_flag[5];
    302                 a_nbr_flag[0] = bot_left;
    303                 a_nbr_flag[1] = left;
    304                 a_nbr_flag[2] = tp_left;
    305                 a_nbr_flag[3] = top;
    306                 a_nbr_flag[4] = tp_right;
    307 
    308                 /* Check for the 1st available sample from bottom-left*/
    309                 while(!a_nbr_flag[next])
    310                     next++;
    311 
    312                 /* If Left, top-left are available*/
    313                 if(next <= 2)
    314                 {
    315                     idx = nt * next;
    316                     pu1_ref = pu1_dst[idx];
    317                     for(i = 0; i < idx; i++)
    318                         pu1_dst[i] = pu1_ref;
    319                 }
    320                 else /* If top, top-right are available */
    321                 {
    322                     /* Idx is changed to copy 1 pixel value for top-left ,if top-left is not available*/
    323                     idx = (nt * (next - 1)) + 1;
    324                     pu1_ref = pu1_dst[idx];
    325                     for(i = 0; i < idx; i++)
    326                         pu1_dst[i] = pu1_ref;
    327                 }
    328             }
    329 
    330             /* Forward Substitution Process */
    331             /* If left is Unavailable, copy the last bottom-left value */
    332             if(left == 0)
    333             {
    334                 ihevc_memset(&pu1_dst[nt], pu1_dst[nt - 1], nt);
    335 
    336             }
    337             /* If top-left is Unavailable, copy the last left value */
    338             if(tp_left == 0)
    339                 pu1_dst[two_nt] = pu1_dst[two_nt - 1];
    340             /* If top is Unavailable, copy the last top-left value */
    341             if(top == 0)
    342             {
    343                 ihevc_memset(&pu1_dst[two_nt + 1], pu1_dst[two_nt], nt);
    344             }
    345             /* If to right is Unavailable, copy the last top value */
    346             if(tp_right == 0)
    347             {
    348                 ihevc_memset(&pu1_dst[three_nt + 1], pu1_dst[three_nt], nt);
    349 
    350             }
    351         }
    352 
    353         if(nt == 16)
    354         {
    355             WORD32 nbr_flags_temp = 0;
    356             nbr_flags_temp = ((nbr_flags & 0xC) >> 2) + ((nbr_flags & 0xC0) >> 4)
    357                             + ((nbr_flags & 0x300) >> 4)
    358                             + ((nbr_flags & 0x3000) >> 6)
    359                             + ((nbr_flags & 0x10000) >> 8);
    360 
    361             /* Else fill the corresponding samples */
    362             if(nbr_flags & 0x10000)
    363                 pu1_dst[two_nt] = *pu1_top_left;
    364             else
    365                 pu1_dst[two_nt] = 0;
    366 
    367             if(nbr_flags & 0xC0)
    368             {
    369                 for(i = 0; i < nt; i++)
    370                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
    371             }
    372             else
    373             {
    374                 ihevc_memset_mul_8(&pu1_dst[two_nt - 1 - (nt - 1)], 0, nt);
    375             }
    376 
    377             if(nbr_flags & 0xC)
    378             {
    379                 for(i = nt; i < two_nt; i++)
    380                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
    381             }
    382             else
    383             {
    384                 ihevc_memset_mul_8(&pu1_dst[two_nt - 1 - (two_nt - 1)], 0, nt);
    385             }
    386 
    387 
    388             if(nbr_flags & 0x300)
    389             {
    390                 ihevc_memcpy_mul_8(&pu1_dst[two_nt + 1], pu1_top, nt);
    391             }
    392             else
    393             {
    394                 ihevc_memset_mul_8(&pu1_dst[two_nt + 1], 0, nt);
    395             }
    396 
    397             if(nbr_flags & 0x3000)
    398             {
    399                 ihevc_memcpy_mul_8(&pu1_dst[two_nt + 1 + nt], pu1_top + nt, nt);
    400             }
    401             else
    402             {
    403                 ihevc_memset_mul_8(&pu1_dst[two_nt + 1 + nt], 0, nt);
    404             }
    405             /* compute trailing zeors based on nbr_flag for substitution process of below left see section .*/
    406             /* as each bit in nbr flags corresponds to 8 pels for bot_left, left, top and topright but 1 pel for topleft */
    407             {
    408                 nbr_id_from_bl = look_up_trailing_zeros(nbr_flags_temp & 0XF) * 8; /* for below left and left */
    409 
    410                 if(nbr_id_from_bl == 64)
    411                     nbr_id_from_bl = 32;
    412 
    413                 if(nbr_id_from_bl == 32)
    414                 {
    415                     /* for top left : 1 pel per nbr bit */
    416                     if(!((nbr_flags_temp >> 8) & 0x1))
    417                     {
    418                         nbr_id_from_bl++;
    419                         nbr_id_from_bl += look_up_trailing_zeros((nbr_flags_temp >> 4) & 0xF) * 8; /* top and top right;  8 pels per nbr bit */
    420                         //nbr_id_from_bl += idx * 8;
    421                     }
    422                 }
    423                 /* Reverse Substitution Process*/
    424                 if(nbr_id_from_bl)
    425                 {
    426                     /* Replicate the bottom-left and subsequent unavailable pixels with the 1st available pixel above */
    427                     pu1_ref = pu1_dst[nbr_id_from_bl];
    428                     for(i = (nbr_id_from_bl - 1); i >= 0; i--)
    429                     {
    430                         pu1_dst[i] = pu1_ref;
    431                     }
    432                 }
    433             }
    434 
    435             /* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */
    436             while(nbr_id_from_bl < ((T16_4NT)+1))
    437             {
    438                 /* To Obtain the next unavailable idx flag after reverse neighbor substitution  */
    439                 /* Devide by 8 to obtain the original index */
    440                 frwd_nbr_flag = (nbr_id_from_bl >> 3); /*+ (nbr_id_from_bl & 0x1);*/
    441 
    442                 /* The Top-left flag is at the last bit location of nbr_flags*/
    443                 if(nbr_id_from_bl == (T16_4NT / 2))
    444                 {
    445                     get_bits = GET_BITS(nbr_flags_temp, 8);
    446 
    447                     /* only pel substitution for TL */
    448                     if(!get_bits)
    449                         pu1_dst[nbr_id_from_bl] = pu1_dst[nbr_id_from_bl - 1];
    450                 }
    451                 else
    452                 {
    453                     get_bits = GET_BITS(nbr_flags_temp, frwd_nbr_flag);
    454                     if(!get_bits)
    455                     {
    456                         /* 8 pel substitution (other than TL) */
    457                         pu1_ref = pu1_dst[nbr_id_from_bl - 1];
    458                         ihevc_memset_mul_8(pu1_dst + nbr_id_from_bl, pu1_ref, 8);
    459 
    460 
    461                     }
    462 
    463                 }
    464                 nbr_id_from_bl += (nbr_id_from_bl == (T16_4NT / 2)) ? 1 : 8;
    465             }
    466 
    467 
    468         }
    469 
    470         if(nt == 32)
    471         {
    472             /* Else fill the corresponding samples */
    473             if(nbr_flags & 0x10000)
    474                 pu1_dst[two_nt] = *pu1_top_left;
    475             else
    476                 pu1_dst[two_nt] = 0;
    477 
    478             if(nbr_flags & 0xF0)
    479             {
    480                 for(i = 0; i < nt; i++)
    481                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
    482             }
    483             else
    484             {
    485                 ihevc_memset_mul_8(&pu1_dst[two_nt - 1 - (nt - 1)], 0, nt);
    486             }
    487 
    488             if(nbr_flags & 0xF)
    489             {
    490                 for(i = nt; i < two_nt; i++)
    491                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
    492             }
    493             else
    494             {
    495                 ihevc_memset_mul_8(&pu1_dst[two_nt - 1 - (two_nt - 1)], 0, nt);
    496             }
    497 
    498 
    499             if(nbr_flags & 0xF00)
    500             {
    501                 ihevc_memcpy_mul_8(&pu1_dst[two_nt + 1], pu1_top, nt);
    502             }
    503             else
    504             {
    505                 ihevc_memset_mul_8(&pu1_dst[two_nt + 1], 0, nt);
    506             }
    507 
    508             if(nbr_flags & 0xF000)
    509             {
    510                 ihevc_memcpy_mul_8(&pu1_dst[two_nt + 1 + nt], pu1_top + nt, nt);
    511             }
    512             else
    513             {
    514                 ihevc_memset_mul_8(&pu1_dst[two_nt + 1 + nt], 0, nt);
    515             }
    516             /* compute trailing ones based on mbr_flag for substitution process of below left see section .*/
    517             /* as each bit in nbr flags corresponds to 8 pels for bot_left, left, top and topright but 1 pel for topleft */
    518             {
    519                 nbr_id_from_bl = look_up_trailing_zeros((nbr_flags & 0XFF)) * 8; /* for below left and left */
    520 
    521                 if(nbr_id_from_bl == 64)
    522                 {
    523                     /* for top left : 1 pel per nbr bit */
    524                     if(!((nbr_flags >> 16) & 0x1))
    525                     {
    526                         /* top left not available */
    527                         nbr_id_from_bl++;
    528                         /* top and top right;  8 pels per nbr bit */
    529                         nbr_id_from_bl += look_up_trailing_zeros((nbr_flags >> 8) & 0xFF) * 8;
    530                     }
    531                 }
    532                 /* Reverse Substitution Process*/
    533                 if(nbr_id_from_bl)
    534                 {
    535                     /* Replicate the bottom-left and subsequent unavailable pixels with the 1st available pixel above */
    536                     pu1_ref = pu1_dst[nbr_id_from_bl];
    537                     for(i = (nbr_id_from_bl - 1); i >= 0; i--)
    538                         pu1_dst[i] = pu1_ref;
    539                 }
    540             }
    541 
    542             /* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */
    543             while(nbr_id_from_bl < ((T32_4NT)+1))
    544             {
    545                 /* To Obtain the next unavailable idx flag after reverse neighbor substitution  */
    546                 /* Devide by 8 to obtain the original index */
    547                 frwd_nbr_flag = (nbr_id_from_bl >> 3); /*+ (nbr_id_from_bl & 0x1);*/
    548 
    549                 /* The Top-left flag is at the last bit location of nbr_flags*/
    550                 if(nbr_id_from_bl == (T32_4NT / 2))
    551                 {
    552                     get_bits = GET_BITS(nbr_flags, 16);
    553                     /* only pel substitution for TL */
    554                     if(!get_bits)
    555                         pu1_dst[nbr_id_from_bl] = pu1_dst[nbr_id_from_bl - 1];
    556                 }
    557                 else
    558                 {
    559                     get_bits = GET_BITS(nbr_flags, frwd_nbr_flag);
    560                     if(!get_bits)
    561                     {
    562                         /* 8 pel substitution (other than TL) */
    563                         pu1_ref = pu1_dst[nbr_id_from_bl - 1];
    564                         ihevc_memset_mul_8(&pu1_dst[nbr_id_from_bl], pu1_ref, 8);
    565 
    566                     }
    567 
    568                 }
    569                 nbr_id_from_bl += (nbr_id_from_bl == (T32_4NT / 2)) ? 1 : 8;
    570             }
    571         }
    572 
    573     }
    574 }
    575 
    576 
    577 /**
    578 *******************************************************************************
    579 *
    580 * @brief
    581 *    Intra prediction interpolation filter for ref_filtering
    582 *
    583 *
    584 * @par Description:
    585 *    Reference DC filtering for neighboring samples dependent  on TU size and
    586 *    mode  Refer to section 8.4.4.2.3 in the standard
    587 *
    588 * @param[in] pu1_src
    589 *  UWORD8 pointer to the source
    590 *
    591 * @param[out] pu1_dst
    592 *  UWORD8 pointer to the destination
    593 *
    594 * @param[in] nt
    595 *  integer Transform Block size
    596 *
    597 * @param[in] mode
    598 *  integer intraprediction mode
    599 *
    600 * @returns
    601 *
    602 * @remarks
    603 *  None
    604 *
    605 *******************************************************************************
    606 */
    607 
    608 
    609 void ihevc_intra_pred_ref_filtering(UWORD8 *pu1_src,
    610                                     WORD32 nt,
    611                                     UWORD8 *pu1_dst,
    612                                     WORD32 mode,
    613                                     WORD32 strong_intra_smoothing_enable_flag)
    614 {
    615     WORD32 filter_flag;
    616     WORD32 i; /* Generic indexing variable */
    617     WORD32 four_nt = 4 * nt;
    618     UWORD8 au1_flt[(4 * MAX_CU_SIZE) + 1];
    619     WORD32 bi_linear_int_flag = 0;
    620     WORD32 abs_cond_left_flag = 0;
    621     WORD32 abs_cond_top_flag = 0;
    622     /*WORD32 dc_val = 1 << (BIT_DEPTH - 5);*/
    623     WORD32 dc_val = 1 << (8 - 5);
    624     //WORD32 strong_intra_smoothing_enable_flag  = 1;
    625 
    626     filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(nt) - 2));
    627     if(0 == filter_flag)
    628     {
    629         if(pu1_src == pu1_dst)
    630         {
    631             return;
    632         }
    633         else
    634         {
    635             for(i = 0; i < (four_nt + 1); i++)
    636                 pu1_dst[i] = pu1_src[i];
    637         }
    638     }
    639 
    640     else
    641     {
    642         /* If strong intra smoothin is enabled and transform size is 32 */
    643         if((1 == strong_intra_smoothing_enable_flag) && (32 == nt))
    644         {
    645             /* Strong Intra Filtering */
    646             abs_cond_top_flag = (ABS(pu1_src[2 * nt] + pu1_src[4 * nt]
    647                             - (2 * pu1_src[3 * nt]))) < dc_val;
    648             abs_cond_left_flag = (ABS(pu1_src[2 * nt] + pu1_src[0]
    649                             - (2 * pu1_src[nt]))) < dc_val;
    650 
    651             bi_linear_int_flag = ((1 == abs_cond_left_flag)
    652                             && (1 == abs_cond_top_flag));
    653         }
    654         /* Extremities Untouched*/
    655         au1_flt[0] = pu1_src[0];
    656         au1_flt[4 * nt] = pu1_src[4 * nt];
    657 
    658         /* Strong filtering of reference samples */
    659         if(1 == bi_linear_int_flag)
    660         {
    661             au1_flt[2 * nt] = pu1_src[2 * nt];
    662 
    663             for(i = 1; i < (2 * nt); i++)
    664                 au1_flt[i] = (((2 * nt) - i) * pu1_src[0] + i * pu1_src[2 * nt] + 32) >> 6;
    665 
    666             for(i = 1; i < (2 * nt); i++)
    667                 au1_flt[i + (2 * nt)] = (((2 * nt) - i) * pu1_src[2 * nt] + i * pu1_src[4 * nt] + 32) >> 6;
    668 
    669         }
    670         else
    671         {
    672             /* Perform bilinear filtering of Reference Samples */
    673             for(i = 0; i < (four_nt - 1); i++)
    674             {
    675                 au1_flt[i + 1] = (pu1_src[i] + 2 * pu1_src[i + 1]
    676                                 + pu1_src[i + 2] + 2) >> 2;
    677             }
    678         }
    679 
    680 
    681         for(i = 0; i < (four_nt + 1); i++)
    682             pu1_dst[i] = au1_flt[i];
    683     }
    684 
    685 }
    686 
    687 
    688 /**
    689 *******************************************************************************
    690 *
    691 * @brief
    692 *    Intra prediction interpolation filter for luma planar
    693 *
    694 * @par Description:
    695 *    Planar Intraprediction with reference neighboring samples location
    696 *    pointed by 'pu1_ref' to the TU block location  pointed by 'pu1_dst'  Refer
    697 *    to section 8.4.4.2.4 in the standard
    698 *
    699 * @param[in] pu1_src
    700 *  UWORD8 pointer to the source
    701 *
    702 * @param[out] pu1_dst
    703 *  UWORD8 pointer to the destination
    704 *
    705 * @param[in] src_strd
    706 *  integer source stride
    707 *
    708 * @param[in] dst_strd
    709 *  integer destination stride
    710 *
    711 * @param[in] nt
    712 *  integer Transform Block size
    713 *
    714 * @param[in] mode
    715 *  integer intraprediction mode
    716 *
    717 * @returns
    718 *
    719 * @remarks
    720 *  None
    721 *
    722 *******************************************************************************
    723 */
    724 
    725 
    726 void ihevc_intra_pred_luma_planar(UWORD8 *pu1_ref,
    727                                   WORD32 src_strd,
    728                                   UWORD8 *pu1_dst,
    729                                   WORD32 dst_strd,
    730                                   WORD32 nt,
    731                                   WORD32 mode)
    732 {
    733 
    734 
    735     WORD32 row, col;
    736     WORD32 log2nt = 5;
    737     WORD32 two_nt, three_nt;
    738     UNUSED(src_strd);
    739     UNUSED(mode);
    740     switch(nt)
    741     {
    742         case 32:
    743             log2nt = 5;
    744             break;
    745         case 16:
    746             log2nt = 4;
    747             break;
    748         case 8:
    749             log2nt = 3;
    750             break;
    751         case 4:
    752             log2nt = 2;
    753             break;
    754         default:
    755             break;
    756     }
    757     two_nt = 2 * nt;
    758     three_nt = 3 * nt;
    759     /* Planar filtering */
    760     for(row = 0; row < nt; row++)
    761     {
    762         for(col = 0; col < nt; col++)
    763         {
    764             pu1_dst[row * dst_strd + col] = ((nt - 1 - col)
    765                             * pu1_ref[two_nt - 1 - row]
    766                             + (col + 1) * pu1_ref[three_nt + 1]
    767                             + (nt - 1 - row) * pu1_ref[two_nt + 1 + col]
    768                             + (row + 1) * pu1_ref[nt - 1] + nt) >> (log2nt + 1);
    769         }
    770     }
    771 }
    772 
    773 
    774 /**
    775 *******************************************************************************
    776 *
    777 * @brief
    778 *    Intra prediction interpolation filter for luma dc
    779 *
    780 * @par Description:
    781 *   Intraprediction for DC mode with reference neighboring  samples location
    782 *   pointed by 'pu1_ref' to the TU block  location pointed by 'pu1_dst'  Refer
    783 *   to section 8.4.4.2.5 in the standard
    784 *
    785 * @param[in] pu1_src
    786 *  UWORD8 pointer to the source
    787 *
    788 * @param[out] pu1_dst
    789 *  UWORD8 pointer to the destination
    790 *
    791 * @param[in] src_strd
    792 *  integer source stride
    793 *
    794 * @param[in] dst_strd
    795 *  integer destination stride
    796 *
    797 * @param[in] nt
    798 *  integer Transform Block size
    799 *
    800 * @param[in] mode
    801 *  integer intraprediction mode
    802 *
    803 * @returns
    804 *
    805 * @remarks
    806 *  None
    807 *
    808 *******************************************************************************
    809 */
    810 
    811 
    812 void ihevc_intra_pred_luma_dc(UWORD8 *pu1_ref,
    813                               WORD32 src_strd,
    814                               UWORD8 *pu1_dst,
    815                               WORD32 dst_strd,
    816                               WORD32 nt,
    817                               WORD32 mode)
    818 {
    819 
    820     WORD32 acc_dc;
    821     WORD32 dc_val, two_dc_val, three_dc_val;
    822     WORD32 i;
    823     WORD32 row, col;
    824     WORD32 log2nt = 5;
    825     WORD32 two_nt, three_nt;
    826     UNUSED(mode);
    827     UNUSED(src_strd);
    828     switch(nt)
    829     {
    830         case 32:
    831             log2nt = 5;
    832             break;
    833         case 16:
    834             log2nt = 4;
    835             break;
    836         case 8:
    837             log2nt = 3;
    838             break;
    839         case 4:
    840             log2nt = 2;
    841             break;
    842         default:
    843             break;
    844     }
    845     two_nt = 2 * nt;
    846     three_nt = 3 * nt;
    847 
    848     acc_dc = 0;
    849     /* Calculate DC value for the transform block */
    850     for(i = nt; i < two_nt; i++)
    851         acc_dc += pu1_ref[i];
    852 
    853     for(i = (two_nt + 1); i <= three_nt; i++)
    854         acc_dc += pu1_ref[i];
    855 
    856     dc_val = (acc_dc + nt) >> (log2nt + 1);
    857 
    858     two_dc_val = 2 * dc_val;
    859     three_dc_val = 3 * dc_val;
    860 
    861 
    862     if(nt == 32)
    863     {
    864         for(row = 0; row < nt; row++)
    865             for(col = 0; col < nt; col++)
    866                 pu1_dst[(row * dst_strd) + col] = dc_val;
    867     }
    868     else
    869     {
    870         /* DC filtering for the first top row and first left column */
    871         pu1_dst[0] = ((pu1_ref[two_nt - 1] + two_dc_val + pu1_ref[two_nt + 1] + 2)
    872                         >> 2);
    873 
    874         for(col = 1; col < nt; col++)
    875             pu1_dst[col] = (pu1_ref[two_nt + 1 + col] + three_dc_val + 2) >> 2;
    876 
    877         for(row = 1; row < nt; row++)
    878             pu1_dst[row * dst_strd] = (pu1_ref[two_nt - 1 - row] + three_dc_val + 2)
    879                             >> 2;
    880 
    881         /* Fill the remaining rows with DC value*/
    882         for(row = 1; row < nt; row++)
    883             for(col = 1; col < nt; col++)
    884                 pu1_dst[(row * dst_strd) + col] = dc_val;
    885     }
    886 }
    887 
    888 
    889 
    890 /**
    891 *******************************************************************************
    892 *
    893 * @brief
    894 *     Intra prediction interpolation filter for horizontal luma variable.
    895 *
    896 * @par Description:
    897 *      Horizontal intraprediction(mode 10) with reference  samples location
    898 *      pointed by 'pu1_ref' to the TU block  location pointed by 'pu1_dst'  Refer
    899 *      to section 8.4.4.2.6 in the standard (Special case)
    900 *
    901 * @param[in] pu1_src
    902 *  UWORD8 pointer to the source
    903 *
    904 * @param[out] pu1_dst
    905 *  UWORD8 pointer to the destination
    906 *
    907 * @param[in] src_strd
    908 *  integer source stride
    909 *
    910 * @param[in] dst_strd
    911 *  integer destination stride
    912 *
    913 * @param[in] nt
    914 *  integer Transform Block size
    915 *
    916 * @param[in] mode
    917 *  integer intraprediction mode
    918 *
    919 * @returns
    920 *
    921 * @remarks
    922 *  None
    923 *
    924 *******************************************************************************
    925 */
    926 
    927 
    928 void ihevc_intra_pred_luma_horz(UWORD8 *pu1_ref,
    929                                 WORD32 src_strd,
    930                                 UWORD8 *pu1_dst,
    931                                 WORD32 dst_strd,
    932                                 WORD32 nt,
    933                                 WORD32 mode)
    934 {
    935 
    936     WORD32 row, col;
    937     WORD32 two_nt;
    938     WORD16 s2_predpixel;
    939     UNUSED(mode);
    940     UNUSED(src_strd);
    941     two_nt = 2 * nt;
    942 
    943     if(nt == 32)
    944     {
    945         for(row = 0; row < nt; row++)
    946             for(col = 0; col < nt; col++)
    947                 pu1_dst[(row * dst_strd) + col] = pu1_ref[two_nt - 1 - row];
    948     }
    949     else
    950     {
    951         /*Filtering done for the 1st row */
    952         for(col = 0; col < nt; col++)
    953         {
    954             s2_predpixel = pu1_ref[two_nt - 1]
    955                             + ((pu1_ref[two_nt + 1 + col] - pu1_ref[two_nt]) >> 1);
    956             pu1_dst[col] = CLIP_U8(s2_predpixel);
    957         }
    958 
    959         /* Replication to next rows*/
    960         for(row = 1; row < nt; row++)
    961             for(col = 0; col < nt; col++)
    962                 pu1_dst[(row * dst_strd) + col] = pu1_ref[two_nt - 1 - row];
    963     }
    964 }
    965 
    966 
    967 
    968 
    969 
    970 /**
    971 *******************************************************************************
    972 *
    973 * @brief
    974 *     Intra prediction interpolation filter for vertical luma variable.
    975 *
    976 * @par Description:
    977 *    Horizontal intraprediction with reference neighboring  samples location
    978 *    pointed by 'pu1_ref' to the TU block  location pointed by 'pu1_dst'  Refer
    979 *    to section 8.4.4.2.6 in the standard (Special case)
    980 *
    981 * @param[in] pu1_src
    982 *  UWORD8 pointer to the source
    983 *
    984 * @param[out] pu1_dst
    985 *  UWORD8 pointer to the destination
    986 *
    987 * @param[in] src_strd
    988 *  integer source stride
    989 *
    990 * @param[in] dst_strd
    991 *  integer destination stride
    992 *
    993 * @param[in] nt
    994 *  integer Transform Block size
    995 *
    996 * @param[in] mode
    997 *  integer intraprediction mode
    998 *
    999 * @returns
   1000 *
   1001 * @remarks
   1002 *  None
   1003 *
   1004 *******************************************************************************
   1005 */
   1006 
   1007 
   1008 void ihevc_intra_pred_luma_ver(UWORD8 *pu1_ref,
   1009                                WORD32 src_strd,
   1010                                UWORD8 *pu1_dst,
   1011                                WORD32 dst_strd,
   1012                                WORD32 nt,
   1013                                WORD32 mode)
   1014 {
   1015     WORD32 row, col;
   1016     WORD16 s2_predpixel;
   1017     WORD32 two_nt = 2 * nt;
   1018     UNUSED(mode);
   1019     UNUSED(src_strd);
   1020 
   1021     if(nt == 32)
   1022     {
   1023         /* Replication to next columns*/
   1024         for(row = 0; row < nt; row++)
   1025             for(col = 0; col < nt; col++)
   1026                 pu1_dst[(row * dst_strd) + col] = pu1_ref[two_nt + 1 + col];
   1027     }
   1028     else
   1029     {
   1030         /*Filtering done for the 1st column */
   1031         for(row = 0; row < nt; row++)
   1032         {
   1033             s2_predpixel = pu1_ref[two_nt + 1]
   1034                             + ((pu1_ref[two_nt - 1 - row] - pu1_ref[two_nt]) >> 1);
   1035             pu1_dst[row * dst_strd] = CLIP_U8(s2_predpixel);
   1036         }
   1037 
   1038         /* Replication to next columns*/
   1039         for(row = 0; row < nt; row++)
   1040             for(col = 1; col < nt; col++)
   1041                 pu1_dst[(row * dst_strd) + col] = pu1_ref[two_nt + 1 + col];
   1042     }
   1043 }
   1044 
   1045 
   1046 
   1047 
   1048 /**
   1049 *******************************************************************************
   1050 *
   1051 * @brief
   1052 *     Intra prediction interpolation filter for luma mode2.
   1053 *
   1054 * @par Description:
   1055 *    Intraprediction for mode 2 (sw angle) with reference  neighboring samples
   1056 *    location pointed by 'pu1_ref' to the  TU block location pointed by
   1057 *    'pu1_dst'  Refer to section 8.4.4.2.6 in the standard
   1058 *
   1059 * @param[in] pu1_src
   1060 *  UWORD8 pointer to the source
   1061 *
   1062 * @param[out] pu1_dst
   1063 *  UWORD8 pointer to the destination
   1064 *
   1065 * @param[in] src_strd
   1066 *  integer source stride
   1067 *
   1068 * @param[in] dst_strd
   1069 *  integer destination stride
   1070 *
   1071 * @param[in] nt
   1072 *  integer Transform Block size
   1073 *
   1074 * @param[in] mode
   1075 *  integer intraprediction mode
   1076 *
   1077 * @returns
   1078 *
   1079 * @remarks
   1080 *  None
   1081 *
   1082 *******************************************************************************
   1083 */
   1084 
   1085 
   1086 void ihevc_intra_pred_luma_mode2(UWORD8 *pu1_ref,
   1087                                  WORD32 src_strd,
   1088                                  UWORD8 *pu1_dst,
   1089                                  WORD32 dst_strd,
   1090                                  WORD32 nt,
   1091                                  WORD32 mode)
   1092 {
   1093     WORD32 row, col;
   1094     WORD32 two_nt = 2 * nt;
   1095     WORD32 intra_pred_ang = 32;
   1096     WORD32 idx = 0;
   1097     UNUSED(mode);
   1098     UNUSED(src_strd);
   1099     /* For the angle 45, replication is done from the corresponding angle */
   1100     /* intra_pred_ang = tan(angle) in q5 format */
   1101     for(col = 0; col < nt; col++)
   1102     {
   1103         idx = ((col + 1) * intra_pred_ang) >> 5; /* Use idx++ */
   1104 
   1105         for(row = 0; row < nt; row++)
   1106             pu1_dst[col + (row * dst_strd)] = pu1_ref[two_nt - row - idx - 1];
   1107     }
   1108 
   1109 }
   1110 
   1111 
   1112 /**
   1113 *******************************************************************************
   1114 *
   1115 * @brief
   1116 *    Intra prediction interpolation filter for luma mode 18 & mode 34.
   1117 *
   1118 * @par Description:
   1119 *    Intraprediction for mode 34 (ne angle) and  mode 18 (nw angle) with
   1120 *    reference  neighboring samples location pointed by 'pu1_ref' to the  TU
   1121 *    block location pointed by 'pu1_dst'
   1122 *
   1123 * @param[in] pu1_src
   1124 *  UWORD8 pointer to the source
   1125 *
   1126 * @param[out] pu1_dst
   1127 *  UWORD8 pointer to the destination
   1128 *
   1129 * @param[in] src_strd
   1130 *  integer source stride
   1131 *
   1132 * @param[in] dst_strd
   1133 *  integer destination stride
   1134 *
   1135 * @param[in] nt
   1136 *  integer Transform Block size
   1137 *
   1138 * @param[in] mode
   1139 *  integer intraprediction mode
   1140 *
   1141 * @returns
   1142 *
   1143 * @remarks
   1144 *  None
   1145 *
   1146 *******************************************************************************
   1147 */
   1148 
   1149 
   1150 void ihevc_intra_pred_luma_mode_18_34(UWORD8 *pu1_ref,
   1151                                       WORD32 src_strd,
   1152                                       UWORD8 *pu1_dst,
   1153                                       WORD32 dst_strd,
   1154                                       WORD32 nt,
   1155                                       WORD32 mode)
   1156 {
   1157     WORD32 row, col;
   1158     WORD32 intra_pred_ang;
   1159     WORD32 idx = 0;
   1160     WORD32 two_nt = 2 * nt;
   1161     UNUSED(src_strd);
   1162     intra_pred_ang = 32;    /*Default value*/
   1163 
   1164     /* For mode 18, angle is -45degree */
   1165     if(mode == 18)
   1166         intra_pred_ang = -32;
   1167     /* For mode 34, angle is 45degree */
   1168     else if(mode == 34)
   1169         intra_pred_ang = 32;
   1170     /* For the angle 45 and -45, replication is done from the corresponding angle */
   1171     /* No interpolation is done for 45 degree*/
   1172     for(row = 0; row < nt; row++)
   1173     {
   1174         idx = ((row + 1) * intra_pred_ang) >> 5;
   1175 #if OPT
   1176         if(mode == 18)
   1177             idx--;
   1178         if(mode == 34)
   1179             idx++;
   1180 #endif
   1181         for(col = 0; col < nt; col++)
   1182             pu1_dst[col + (row * dst_strd)] = pu1_ref[two_nt + col + idx + 1];
   1183 
   1184     }
   1185 
   1186 }
   1187 
   1188 
   1189 /**
   1190 *******************************************************************************
   1191 *
   1192 * @brief
   1193 *    Intra prediction interpolation filter for luma mode 3 to mode 9
   1194 *
   1195 * @par Description:
   1196 *    Intraprediction for mode 3 to 9  (positive angle, horizontal mode ) with
   1197 *    reference  neighboring samples location pointed by 'pu1_ref' to the  TU
   1198 *    block location pointed by 'pu1_dst'
   1199 *
   1200 * @param[in] pu1_src
   1201 *  UWORD8 pointer to the source
   1202 *
   1203 * @param[out] pu1_dst
   1204 *  UWORD8 pointer to the destination
   1205 *
   1206 * @param[in] src_strd
   1207 *  integer source stride
   1208 *
   1209 * @param[in] dst_strd
   1210 *  integer destination stride
   1211 *
   1212 * @param[in] nt
   1213 *  integer Transform Block size
   1214 *
   1215 * @param[in] mode
   1216 *  integer intraprediction mode
   1217 *
   1218 * @returns
   1219 *
   1220 * @remarks
   1221 *  None
   1222 *
   1223 *******************************************************************************
   1224 */
   1225 
   1226 
   1227 void ihevc_intra_pred_luma_mode_3_to_9(UWORD8 *pu1_ref,
   1228                                        WORD32 src_strd,
   1229                                        UWORD8 *pu1_dst,
   1230                                        WORD32 dst_strd,
   1231                                        WORD32 nt,
   1232                                        WORD32 mode)
   1233 {
   1234     WORD32 row, col;
   1235     WORD32 two_nt = 2 * nt;
   1236     WORD32 intra_pred_ang;
   1237     WORD32 idx, ref_main_idx;
   1238     WORD32 pos, fract;
   1239     UNUSED(src_strd);
   1240     /* Intra Pred Angle according to the mode */
   1241     intra_pred_ang = gai4_ihevc_ang_table[mode];
   1242 
   1243     /* For the angles other then 45 degree, interpolation btw 2 neighboring */
   1244     /* samples dependent on distance to obtain destination sample */
   1245 
   1246     for(col = 0; col < nt; col++)
   1247     {
   1248         pos = ((col + 1) * intra_pred_ang);
   1249         idx = pos >> 5;
   1250         fract = pos & (31);
   1251 
   1252         // Do linear filtering
   1253         for(row = 0; row < nt; row++)
   1254         {
   1255             ref_main_idx = two_nt - row - idx - 1;
   1256             pu1_dst[col + (row * dst_strd)] = (((32 - fract)
   1257                             * pu1_ref[ref_main_idx]
   1258                             + fract * pu1_ref[ref_main_idx - 1] + 16) >> 5);
   1259         }
   1260 
   1261     }
   1262 
   1263 }
   1264 
   1265 
   1266 /**
   1267 *******************************************************************************
   1268 *
   1269 * @brief
   1270 *   Intra prediction interpolation filter for luma mode 11 to mode 17
   1271 *
   1272 * @par Description:
   1273 *    Intraprediction for mode 11 to 17  (negative angle, horizontal mode )
   1274 *    with reference  neighboring samples location pointed by 'pu1_ref' to the
   1275 *    TU block location pointed by 'pu1_dst'
   1276 *
   1277 * @param[in] pu1_src
   1278 *  UWORD8 pointer to the source
   1279 *
   1280 * @param[out] pu1_dst
   1281 *  UWORD8 pointer to the destination
   1282 *
   1283 * @param[in] src_strd
   1284 *  integer source stride
   1285 *
   1286 * @param[in] dst_strd
   1287 *  integer destination stride
   1288 *
   1289 * @param[in] nt
   1290 *  integer Transform Block size
   1291 *
   1292 * @param[in] mode
   1293 *  integer intraprediction mode
   1294 *
   1295 * @returns
   1296 *
   1297 * @remarks
   1298 *  None
   1299 *
   1300 *******************************************************************************
   1301 */
   1302 
   1303 
   1304 void ihevc_intra_pred_luma_mode_11_to_17(UWORD8 *pu1_ref,
   1305                                          WORD32 src_strd,
   1306                                          UWORD8 *pu1_dst,
   1307                                          WORD32 dst_strd,
   1308                                          WORD32 nt,
   1309                                          WORD32 mode)
   1310 {
   1311     /* This function and ihevc_intra_pred_luma_mode_19_to_25 are same except*/
   1312     /* for ref main & side samples assignment,can be combined for */
   1313     /* optimzation*/
   1314 
   1315     WORD32 row, col, k;
   1316     WORD32 two_nt;
   1317     WORD32 intra_pred_ang, inv_ang, inv_ang_sum;
   1318     WORD32 idx, ref_main_idx, ref_idx;
   1319     WORD32 pos, fract;
   1320 
   1321     UWORD8 ref_temp[2 * MAX_CU_SIZE + 1];
   1322     UWORD8 *ref_main;
   1323     UNUSED(src_strd);
   1324     inv_ang_sum = 128;
   1325     two_nt    = 2 * nt;
   1326 
   1327     intra_pred_ang = gai4_ihevc_ang_table[mode];
   1328 
   1329     inv_ang = gai4_ihevc_inv_ang_table[mode - 11];
   1330     /* Intermediate reference samples for negative angle modes */
   1331     /* This have to be removed during optimization*/
   1332     /* For horizontal modes, (ref main = ref left) (ref side = ref above) */
   1333 
   1334     ref_main = ref_temp + nt - 1;
   1335     for(k = 0; k < nt + 1; k++)
   1336         ref_temp[k + nt - 1] = pu1_ref[two_nt - k];
   1337 
   1338     ref_main = ref_temp + nt - 1;
   1339     ref_idx = (nt * intra_pred_ang) >> 5;
   1340 
   1341     /* SIMD Optimization can be done using look-up table for the loop */
   1342     /* For negative angled derive the main reference samples from side */
   1343     /*  reference samples refer to section 8.4.4.2.6 */
   1344     for(k = -1; k > ref_idx; k--)
   1345     {
   1346         inv_ang_sum += inv_ang;
   1347         ref_main[k] = pu1_ref[two_nt + (inv_ang_sum >> 8)];
   1348     }
   1349 
   1350     /* For the angles other then 45 degree, interpolation btw 2 neighboring */
   1351     /* samples dependent on distance to obtain destination sample */
   1352     for(col = 0; col < nt; col++)
   1353     {
   1354         pos = ((col + 1) * intra_pred_ang);
   1355         idx = pos >> 5;
   1356         fract = pos & (31);
   1357 
   1358         // Do linear filtering
   1359         for(row = 0; row < nt; row++)
   1360         {
   1361             ref_main_idx = row + idx + 1;
   1362             pu1_dst[col + (dst_strd * row)] = (UWORD8)(((32 - fract)
   1363                             * ref_main[ref_main_idx]
   1364                             + fract * ref_main[ref_main_idx + 1] + 16) >> 5);
   1365 
   1366         }
   1367 
   1368     }
   1369 
   1370 }
   1371 
   1372 
   1373 
   1374 /**
   1375 *******************************************************************************
   1376 *
   1377 * @brief
   1378 *   Intra prediction interpolation filter for luma mode 19 to mode 25
   1379 *
   1380 * @par Description:
   1381 *    Intraprediction for mode 19 to 25  (negative angle, vertical mode ) with
   1382 *    reference  neighboring samples location pointed by 'pu1_ref' to the  TU
   1383 *    block location pointed by 'pu1_dst'
   1384 *
   1385 * @param[in] pu1_src
   1386 *  UWORD8 pointer to the source
   1387 *
   1388 * @param[out] pu1_dst
   1389 *  UWORD8 pointer to the destination
   1390 *
   1391 * @param[in] src_strd
   1392 *  integer source stride
   1393 *
   1394 * @param[in] dst_strd
   1395 *  integer destination stride
   1396 *
   1397 * @param[in] nt
   1398 *  integer Transform Block size
   1399 *
   1400 * @param[in] mode
   1401 *  integer intraprediction mode
   1402 *
   1403 * @returns
   1404 *
   1405 * @remarks
   1406 *  None
   1407 *
   1408 *******************************************************************************
   1409 */
   1410 
   1411 
   1412 void ihevc_intra_pred_luma_mode_19_to_25(UWORD8 *pu1_ref,
   1413                                          WORD32 src_strd,
   1414                                          UWORD8 *pu1_dst,
   1415                                          WORD32 dst_strd,
   1416                                          WORD32 nt,
   1417                                          WORD32 mode)
   1418 {
   1419 
   1420     WORD32 row, col, k;
   1421     WORD32 two_nt, intra_pred_ang, idx;
   1422     WORD32 inv_ang, inv_ang_sum, pos, fract;
   1423     WORD32 ref_main_idx, ref_idx;
   1424     UWORD8 ref_temp[(2 * MAX_CU_SIZE) + 1];
   1425     UWORD8 *ref_main;
   1426     UNUSED(src_strd);
   1427     two_nt = 2 * nt;
   1428     intra_pred_ang = gai4_ihevc_ang_table[mode];
   1429     inv_ang = gai4_ihevc_inv_ang_table[mode - 12];
   1430 
   1431     /* Intermediate reference samples for negative angle modes */
   1432     /* This have to be removed during optimization*/
   1433     /* For horizontal modes, (ref main = ref above) (ref side = ref left) */
   1434     ref_main = ref_temp + nt - 1;
   1435     for(k = 0; k < (nt + 1); k++)
   1436         ref_temp[k + nt - 1] = pu1_ref[two_nt + k];
   1437 
   1438     ref_idx = (nt * intra_pred_ang) >> 5;
   1439     inv_ang_sum = 128;
   1440 
   1441     /* SIMD Optimization can be done using look-up table for the loop */
   1442     /* For negative angled derive the main reference samples from side */
   1443     /*  reference samples refer to section 8.4.4.2.6 */
   1444     for(k = -1; k > ref_idx; k--)
   1445     {
   1446         inv_ang_sum += inv_ang;
   1447         ref_main[k] = pu1_ref[two_nt - (inv_ang_sum >> 8)];
   1448     }
   1449 
   1450     for(row = 0; row < nt; row++)
   1451     {
   1452         pos = ((row + 1) * intra_pred_ang);
   1453         idx = pos >> 5;
   1454         fract = pos & (31);
   1455 
   1456         // Do linear filtering
   1457         for(col = 0; col < nt; col++)
   1458         {
   1459             ref_main_idx = col + idx + 1;
   1460             pu1_dst[(row * dst_strd) + col] = (UWORD8)(((32 - fract)
   1461                             * ref_main[ref_main_idx]
   1462                             + fract * ref_main[ref_main_idx + 1] + 16) >> 5);
   1463 
   1464         }
   1465 
   1466     }
   1467 
   1468 }
   1469 
   1470 
   1471 
   1472 /**
   1473 *******************************************************************************
   1474 *
   1475 * @brief
   1476 *    Intra prediction interpolation filter for luma mode 27 to mode 33
   1477 *
   1478 * @par Description:
   1479 *    Intraprediction for mode 27 to 33  (positive angle, vertical mode ) with
   1480 *    reference  neighboring samples location pointed by 'pu1_ref' to the  TU
   1481 *    block location pointed by 'pu1_dst'
   1482 *
   1483 * @param[in] pu1_src
   1484 *  UWORD8 pointer to the source
   1485 *
   1486 * @param[out] pu1_dst
   1487 *  UWORD8 pointer to the destination
   1488 *
   1489 * @param[in] src_strd
   1490 *  integer source stride
   1491 *
   1492 * @param[in] dst_strd
   1493 *  integer destination stride
   1494 *
   1495 * @param[in] nt
   1496 *  integer Transform Block size
   1497 *
   1498 * @param[in] mode
   1499 *  integer intraprediction mode
   1500 *
   1501 * @returns
   1502 *
   1503 * @remarks
   1504 *  None
   1505 *
   1506 *******************************************************************************
   1507 */
   1508 
   1509 
   1510 void ihevc_intra_pred_luma_mode_27_to_33(UWORD8 *pu1_ref,
   1511                                          WORD32 src_strd,
   1512                                          UWORD8 *pu1_dst,
   1513                                          WORD32 dst_strd,
   1514                                          WORD32 nt,
   1515                                          WORD32 mode)
   1516 {
   1517     WORD32 row, col;
   1518     WORD32 two_nt, pos, fract;
   1519     WORD32 intra_pred_ang;
   1520     WORD32 idx, ref_main_idx;
   1521     UNUSED(src_strd);
   1522     two_nt = 2 * nt;
   1523     intra_pred_ang = gai4_ihevc_ang_table[mode];
   1524 
   1525     for(row = 0; row < nt; row++)
   1526     {
   1527         pos = ((row + 1) * intra_pred_ang);
   1528         idx = pos >> 5;
   1529         fract = pos & (31);
   1530 
   1531         // Do linear filtering
   1532         for(col = 0; col < nt; col++)
   1533         {
   1534             ref_main_idx = two_nt + col + idx + 1;
   1535             pu1_dst[col + (row * dst_strd)] = (((32 - fract)
   1536                             * pu1_ref[ref_main_idx]
   1537                             + fract * pu1_ref[ref_main_idx + 1] + 16) >> 5);
   1538         }
   1539 
   1540     }
   1541 
   1542 }
   1543 
   1544