Home | History | Annotate | Download | only in arm
      1 /******************************************************************************
      2 *
      3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 *
      5 * Licensed under the Apache License, Version 2.0 (the "License");
      6 * you may not use this file except in compliance with the License.
      7 * You may obtain a copy of the License at:
      8 *
      9 * http://www.apache.org/licenses/LICENSE-2.0
     10 *
     11 * Unless required by applicable law or agreed to in writing, software
     12 * distributed under the License is distributed on an "AS IS" BASIS,
     13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 * See the License for the specific language governing permissions and
     15 * limitations under the License.
     16 *
     17 ******************************************************************************/
     18 /**
     19 *******************************************************************************
     20 * @file
     21 *  ihevcd_intra_ref_substitution.c
     22 *
     23 * @brief
     24 *  Contains ref substitution functions
     25 *
     26 * @author
     27 *  Naveen
     28 *
     29 * @par List of Functions:
     30 * @remarks
     31 *  None
     32 *
     33 *******************************************************************************
     34 */
     35 /*****************************************************************************/
     36 /* File Includes                                                             */
     37 /*****************************************************************************/
     38 #include <stdio.h>
     39 #include <stddef.h>
     40 #include <stdlib.h>
     41 #include <string.h>
     42 
     43 #include "ihevc_typedefs.h"
     44 #include "ihevc_platform_macros.h"
     45 #include "ihevc_intra_pred.h"
     46 #include "ihevc_mem_fns.h"
     47 #include "ihevc_chroma_intra_pred.h"
     48 #include "ihevc_common_tables.h"
     49 #include "ihevc_defs.h"
     50 #include "ihevc_mem_fns.h"
     51 #include "ihevc_macros.h"
     52 
     53 #define MAX_CU_SIZE 64
     54 #define BIT_DEPTH 8
     55 #define T32_4NT 128
     56 #define T16_4NT 64
     57 #define T16C_4NT 64
     58 #define T8C_4NT 32
     59 /****************************************************************************/
     60 /* Function Macros                                                          */
     61 /****************************************************************************/
     62 
     63 #define GET_BIT(y,x) ((y) & (1 << x)) && (1 << x)
     64 #define GET_BITS(y,x) ((y) & (1 << x)) && (1 << x)
     65 /**
     66 *******************************************************************************
     67 *
     68 * @brief
     69 *  Reference substitution process for samples unavailable  for prediction
     70 * Refer to section 8.4.4.2.2
     71 *
     72 * @par Description:
     73 *
     74 *
     75 * @param[in] pu1_top_left
     76 *  UWORD8 pointer to the top-left
     77 *
     78 * @param[in] pu1_top
     79 *  UWORD8 pointer to the top
     80 *
     81 * @param[in] pu1_left
     82 *  UWORD8 pointer to the left
     83 *
     84 * @param[in] src_strd
     85 *  WORD32 Source stride
     86 *
     87 * @param[in] nbr_flags
     88 *  WORD32 neighbor availability flags
     89 *
     90 * @param[in] nt
     91 *  WORD32 transform Block size
     92 *
     93 * @param[in] dst_strd
     94 *  WORD32 Destination stride
     95 *
     96 * @returns
     97 *
     98 * @remarks
     99 *  None
    100 *
    101 *******************************************************************************
    102 */
    103 
    104 void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left,
    105                                                   UWORD8 *pu1_top,
    106                                                   UWORD8 *pu1_left,
    107                                                   WORD32 src_strd,
    108                                                   WORD32 nt,
    109                                                   WORD32 nbr_flags,
    110                                                   UWORD8 *pu1_dst,
    111                                                   WORD32 dst_strd)
    112 {
    113     UWORD8 pu1_ref_u, pu1_ref_v;
    114     WORD32 dc_val, i, j;
    115     WORD32 total_samples = (4 * nt) + 1;
    116     WORD32 get_bits;
    117     WORD32 next;
    118     WORD32 bot_left, left, top, tp_right, tp_left;
    119     WORD32 idx, nbr_id_from_bl, frwd_nbr_flag;
    120     WORD32 a_nbr_flag[5];
    121     UNUSED(dst_strd);
    122     /* Neighbor Flag Structure*/
    123     /* WORD32 nbr_flags MSB-->LSB   TOP LEFT | TOP-RIGHT |  TOP   | LEFT    | BOTTOM LEFT*/
    124     /*                              (1 bit)     (4 bits)  (4 bits) (4 bits)  (4 bits)  */
    125 
    126     if(nbr_flags == 0)
    127     {
    128 /* If no neighbor flags are present, fill the neighbor samples with DC value */
    129         /*dc_val = 1 << (BIT_DEPTH - 1);*/
    130         dc_val = 1 << (8 - 1);
    131         for(i = 0; i < (2 * total_samples); i++)
    132         {
    133             pu1_dst[i] = dc_val;
    134         }
    135     }
    136     else
    137     {
    138         /* Else fill the corresponding samples */
    139 
    140         /* Check for the neighbors availibility */
    141         tp_left     = (nbr_flags & 0x10000);
    142         tp_right    = (nbr_flags & 0x0f000);
    143         top         = (nbr_flags & 0x00f00);
    144         left        = (nbr_flags & 0x000f0);
    145         bot_left    = (nbr_flags & 0x0000f);
    146 
    147         /* Fill nbrs depending on avalibility */
    148         /* Top -Left nbrs  */
    149         if(0 != tp_left)
    150         {
    151             pu1_dst[(4 * nt)] = *pu1_top_left; // U top-left sample
    152             pu1_dst[(4 * nt) + 1] = *(pu1_top_left + 1); // V top-left sample
    153         }
    154         /* Left nbrs  */
    155         if(0 != left)
    156         {
    157             for(i = 0, j = 0; i < (2 * nt); i += 2)
    158             {
    159                 pu1_dst[(4 * nt) - 2 - i] = pu1_left[j * src_strd]; // U left samples
    160                 pu1_dst[(4 * nt) - 1 - i] = pu1_left[(j * src_strd) + 1]; // V left samples
    161                 j++;
    162             }
    163         }
    164         /* Bottom - Left nbrs  */
    165         if(0 != bot_left)
    166         {
    167             for(i = (2 * nt), j = nt; i < (4 * nt); i += 2)
    168             {
    169                 pu1_dst[(4 * nt) - 2 - i] = pu1_left[j * src_strd]; // U left samples
    170                 pu1_dst[(4 * nt) - 1 - i] = pu1_left[(j * src_strd) + 1]; // V left samples
    171                 j++;
    172             }
    173         }
    174         /* Top nbrs  */
    175         if(0 != top)
    176         {
    177             ihevc_memcpy_mul_8_a9q(&pu1_dst[(4 * nt) + 2], pu1_top, 2 * nt);
    178             // U-V interleaved Top-top right samples
    179         }
    180 
    181         /* Top - Right nbrs  */
    182         if(0 != tp_right)
    183         {
    184             ihevc_memcpy_mul_8_a9q(&pu1_dst[(4 * nt) + 2 + 2 * nt], pu1_top + 2 * nt, 2 * nt);
    185             // U-V interleaved Top-top right samples
    186         }
    187 
    188         if(nt == 4)
    189         {
    190             /* 1 bit extraction for all the neighboring blocks */
    191             tp_left = (nbr_flags & 0x10000) >> 16;
    192             bot_left = (nbr_flags & 0x8) >> 3;
    193             left = (nbr_flags & 0x80) >> 7;
    194             top = (nbr_flags & 0x100) >> 8;
    195             tp_right = (nbr_flags & 0x1000) >> 12;
    196 
    197             next = 1;
    198             a_nbr_flag[0] = bot_left;
    199             a_nbr_flag[1] = left;
    200             a_nbr_flag[2] = tp_left;
    201             a_nbr_flag[3] = top;
    202             a_nbr_flag[4] = tp_right;
    203 
    204             /* If bottom -left is not available, reverse substitution process*/
    205             if(bot_left == 0)
    206             {
    207                 /* Check for the 1st available sample from bottom-left*/
    208                 while(!a_nbr_flag[next])
    209                     next++;
    210 
    211                 /* If Left, top-left are available*/
    212                 if(next <= 2)
    213                 {
    214                     UWORD16 *pu2_dst;
    215                     idx = (nt * next);
    216                     pu2_dst = (UWORD16 *)&pu1_dst[2 * idx];
    217                     ihevc_memset_16bit_a9q((UWORD16 *)pu1_dst, pu2_dst[0], idx);
    218                 }
    219                 else /* If top, top-right are available */
    220                 {
    221                     UWORD16 *pu2_dst;
    222                     /* Idx is changed to copy 1 pixel value for top-left ,if top-left is not available*/
    223                     idx = (nt * (next - 1)) + 1;
    224                     pu2_dst = (UWORD16 *)&pu1_dst[2 * idx];
    225                     ihevc_memset_16bit_a9q((UWORD16 *)pu1_dst, pu2_dst[0], idx);
    226                 }
    227             }
    228 
    229             if(left == 0)
    230             {
    231                 UWORD16 *pu2_dst = (UWORD16 *)&pu1_dst[(2 * nt) - 2];
    232                 ihevc_memset_16bit_a9q((UWORD16 *)&pu1_dst[(2 * nt)], pu2_dst[0], nt);
    233 
    234 
    235             }
    236             if(tp_left == 0)
    237             {
    238                 pu1_dst[4 * nt] = pu1_dst[(4 * nt) - 2];
    239                 pu1_dst[(4 * nt) + 1] = pu1_dst[(4 * nt) - 1];
    240             }
    241             if(top == 0)
    242             {
    243                 UWORD16 *pu2_dst = (UWORD16 *)&pu1_dst[(4 * nt)];
    244                 ihevc_memset_16bit_a9q((UWORD16 *)&pu1_dst[(4 * nt) + 2], pu2_dst[0], nt);
    245 
    246 
    247             }
    248             if(tp_right == 0)
    249             {
    250                 UWORD16 *pu2_dst = (UWORD16 *)&pu1_dst[(6 * nt)];
    251                 ihevc_memset_16bit_a9q((UWORD16 *)&pu1_dst[(6 * nt) + 2], pu2_dst[0], nt);
    252 
    253 
    254             }
    255         }
    256         else if(nt == 8)
    257         {
    258             WORD32 nbr_flags_temp = 0;
    259             nbr_flags_temp = ((nbr_flags & 0xC) >> 2) + ((nbr_flags & 0xC0) >> 4)
    260                             + ((nbr_flags & 0x300) >> 4)
    261                             + ((nbr_flags & 0x3000) >> 6)
    262                             + ((nbr_flags & 0x10000) >> 8);
    263 
    264             /* compute trailing zeors based on nbr_flag for substitution process of below left see section .*/
    265             /* as each bit in nbr flags corresponds to 8 pels for bot_left, left, top and topright but 1 pel for topleft */
    266             {
    267                 nbr_id_from_bl = look_up_trailing_zeros(nbr_flags_temp & 0XF) * 4; /* for bottom left and left */
    268                 if(nbr_id_from_bl == 32)
    269                     nbr_id_from_bl = 16;
    270                 if(nbr_id_from_bl == 16)
    271                 {
    272                     /* for top left : 1 pel per nbr bit */
    273                     if(!((nbr_flags_temp >> 8) & 0x1))
    274                     {
    275                         nbr_id_from_bl++;
    276                         nbr_id_from_bl += look_up_trailing_zeros((nbr_flags_temp >> 4) & 0xF) * 4; /* top and top right;  8 pels per nbr bit */
    277 
    278                     }
    279                 }
    280                 /* Reverse Substitution Process*/
    281                 if(nbr_id_from_bl)
    282                 {
    283                     /* Replicate the bottom-left and subsequent unavailable pixels with the 1st available pixel above */
    284                     pu1_ref_u = pu1_dst[2 * nbr_id_from_bl];
    285                     pu1_ref_v = pu1_dst[(2 * nbr_id_from_bl) + 1];
    286                     for(i = 2 * (nbr_id_from_bl - 1); i >= 0; i -= 2)
    287                     {
    288                         pu1_dst[i] = pu1_ref_u;
    289                         pu1_dst[i + 1] = pu1_ref_v;
    290                     }
    291                 }
    292             }
    293 
    294             /* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */
    295             while(nbr_id_from_bl < ((T8C_4NT)+1))
    296             {
    297                 /* To Obtain the next unavailable idx flag after reverse neighbor substitution  */
    298                 /* Divide by 8 to obtain the original index */
    299                 frwd_nbr_flag = (nbr_id_from_bl >> 2); /*+ (nbr_id_from_bl & 0x1);*/
    300 
    301                 /* The Top-left flag is at the last bit location of nbr_flags*/
    302                 if(nbr_id_from_bl == (T8C_4NT / 2))
    303                 {
    304                     get_bits = GET_BIT(nbr_flags_temp, 8);
    305 
    306                     /* only pel substitution for TL */
    307                     if(!get_bits)
    308                     {
    309                         pu1_dst[2 * nbr_id_from_bl] = pu1_dst[(2 * nbr_id_from_bl) - 2];
    310                         pu1_dst[(2 * nbr_id_from_bl) + 1] = pu1_dst[(2 * nbr_id_from_bl) - 1];
    311                     }
    312                 }
    313                 else
    314                 {
    315                     get_bits = GET_BIT(nbr_flags_temp, frwd_nbr_flag);
    316                     if(!get_bits)
    317                     {
    318                         UWORD16 *pu2_dst;
    319                         /* 8 pel substitution (other than TL) */
    320                         pu2_dst = (UWORD16 *)&pu1_dst[(2 * nbr_id_from_bl) - 2];
    321                         ihevc_memset_16bit_a9q((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4);
    322                     }
    323 
    324                 }
    325                 nbr_id_from_bl += (nbr_id_from_bl == (T8C_4NT / 2)) ? 1 : 4;
    326             }
    327 
    328         }
    329         else if(nt == 16)
    330         {
    331             /* compute trailing ones based on mbr_flag for substitution process of below left see section .*/
    332             /* as each bit in nbr flags corresponds to 4 pels for bot_left, left, top and topright but 1 pel for topleft */
    333             {
    334                 nbr_id_from_bl = look_up_trailing_zeros((nbr_flags & 0XFF)) * 4; /* for bottom left and left */
    335 
    336                 if(nbr_id_from_bl == 32)
    337                 {
    338                     /* for top left : 1 pel per nbr bit */
    339                     if(!((nbr_flags >> 16) & 0x1))
    340                     {
    341                         /* top left not available */
    342                         nbr_id_from_bl++;
    343                         /* top and top right;  4 pels per nbr bit */
    344                         nbr_id_from_bl += look_up_trailing_zeros((nbr_flags >> 8) & 0xFF) * 4;
    345                     }
    346                 }
    347                 /* Reverse Substitution Process*/
    348                 if(nbr_id_from_bl)
    349                 {
    350                     /* Replicate the bottom-left and subsequent unavailable pixels with the 1st available pixel above */
    351                     pu1_ref_u = pu1_dst[2 * nbr_id_from_bl];
    352                     pu1_ref_v = pu1_dst[2 * nbr_id_from_bl + 1];
    353                     for(i = (2 * (nbr_id_from_bl - 1)); i >= 0; i -= 2)
    354                     {
    355                         pu1_dst[i] = pu1_ref_u;
    356                         pu1_dst[i + 1] = pu1_ref_v;
    357                     }
    358                 }
    359             }
    360 
    361             /* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */
    362             while(nbr_id_from_bl < ((T16C_4NT)+1))
    363             {
    364                 /* To Obtain the next unavailable idx flag after reverse neighbor substitution  */
    365                 /* Devide by 4 to obtain the original index */
    366                 frwd_nbr_flag = (nbr_id_from_bl >> 2); /*+ (nbr_id_from_bl & 0x1);*/
    367 
    368                 /* The Top-left flag is at the last bit location of nbr_flags*/
    369                 if(nbr_id_from_bl == (T16C_4NT / 2))
    370                 {
    371                     get_bits = GET_BIT(nbr_flags, 16);
    372                     /* only pel substitution for TL */
    373                     if(!get_bits)
    374                     {
    375                         pu1_dst[2 * nbr_id_from_bl] = pu1_dst[(2 * nbr_id_from_bl) - 2];
    376                         pu1_dst[(2 * nbr_id_from_bl) + 1] = pu1_dst[(2 * nbr_id_from_bl) - 1];
    377                     }
    378                 }
    379                 else
    380                 {
    381                     get_bits = GET_BIT(nbr_flags, frwd_nbr_flag);
    382                     if(!get_bits)
    383                     {
    384                         UWORD16 *pu2_dst;
    385                         /* 4 pel substitution (other than TL) */
    386                         pu2_dst = (UWORD16 *)&pu1_dst[(2 * nbr_id_from_bl) - 2];
    387                         ihevc_memset_16bit_a9q((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4);
    388                     }
    389 
    390                 }
    391                 nbr_id_from_bl += (nbr_id_from_bl == (T16C_4NT / 2)) ? 1 : 4;
    392             }
    393         }
    394     }
    395 }
    396 
    397 
    398 void ihevc_intra_pred_luma_ref_substitution_a9q(UWORD8 *pu1_top_left,
    399                                                 UWORD8 *pu1_top,
    400                                                 UWORD8 *pu1_left,
    401                                                 WORD32 src_strd,
    402                                                 WORD32 nt,
    403                                                 WORD32 nbr_flags,
    404                                                 UWORD8 *pu1_dst,
    405                                                 WORD32 dst_strd)
    406 {
    407     UWORD8 pu1_ref;
    408     WORD32 dc_val, i;
    409     WORD32 total_samples = (4 * nt) + 1;
    410     WORD32 two_nt = 2 * nt;
    411 
    412     WORD32 three_nt = 3 * nt;
    413     WORD32 get_bits;
    414     WORD32 next;
    415     WORD32 bot_left, left, top, tp_right, tp_left;
    416 
    417     WORD32 idx, nbr_id_from_bl, frwd_nbr_flag;
    418     UNUSED(dst_strd);
    419     /*dc_val = 1 << (BIT_DEPTH - 1);*/
    420     dc_val = 1 << (8 - 1);
    421 
    422 
    423     /* Neighbor Flag Structure*/
    424     /* MSB ---> LSB */
    425     /*    Top-Left | Top-Right | Top | Left | Bottom-Left
    426               1         4         4     4         4
    427      */
    428     /* If no neighbor flags are present, fill the neighbor samples with DC value */
    429     if(nbr_flags == 0)
    430     {
    431         for(i = 0; i < total_samples; i++)
    432         {
    433             pu1_dst[i] = dc_val;
    434         }
    435     }
    436     else
    437     {
    438         if(nt <= 8)
    439         {
    440             /* 1 bit extraction for all the neighboring blocks */
    441             tp_left = (nbr_flags & 0x10000) >> 16;
    442             bot_left = (nbr_flags & 0x8) >> 3;
    443             left = (nbr_flags & 0x80) >> 7;
    444             top = (nbr_flags & 0x100) >> 8;
    445             tp_right = (nbr_flags & 0x1000) >> 12;
    446 
    447             /* Else fill the corresponding samples */
    448             if(tp_left)
    449                 pu1_dst[two_nt] = *pu1_top_left;
    450             else
    451                 pu1_dst[two_nt] = 0;
    452 
    453 
    454             if(left)
    455             {
    456                 for(i = 0; i < nt; i++)
    457                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
    458             }
    459             else
    460             {
    461                 ihevc_memset_a9q(&pu1_dst[two_nt - 1 - (nt - 1)], 0, nt);
    462             }
    463 
    464 
    465             if(bot_left)
    466             {
    467                 for(i = nt; i < two_nt; i++)
    468                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
    469             }
    470             else
    471             {
    472                 ihevc_memset_a9q(&pu1_dst[two_nt - 1 - (two_nt - 1)], 0, nt);
    473             }
    474 
    475 
    476             if(top)
    477             {
    478                 ihevc_memcpy_a9q(&pu1_dst[two_nt + 1], pu1_top, nt);
    479             }
    480             else
    481             {
    482                 ihevc_memset_a9q(&pu1_dst[two_nt + 1], 0, nt);
    483             }
    484 
    485             if(tp_right)
    486             {
    487                 ihevc_memcpy_a9q(&pu1_dst[two_nt + 1 + nt], pu1_top + nt, nt);
    488             }
    489             else
    490             {
    491                 ihevc_memset_a9q(&pu1_dst[two_nt + 1 + nt], 0, nt);
    492             }
    493             next = 1;
    494 
    495             /* If bottom -left is not available, reverse substitution process*/
    496             if(bot_left == 0)
    497             {
    498                 WORD32 a_nbr_flag[5];
    499                 a_nbr_flag[0] = bot_left;
    500                 a_nbr_flag[1] = left;
    501                 a_nbr_flag[2] = tp_left;
    502                 a_nbr_flag[3] = top;
    503                 a_nbr_flag[4] = tp_right;
    504 
    505                 /* Check for the 1st available sample from bottom-left*/
    506                 while(!a_nbr_flag[next])
    507                     next++;
    508 
    509                 /* If Left, top-left are available*/
    510                 if(next <= 2)
    511                 {
    512                     idx = nt * next;
    513                     pu1_ref = pu1_dst[idx];
    514                     for(i = 0; i < idx; i++)
    515                         pu1_dst[i] = pu1_ref;
    516                 }
    517                 else /* If top, top-right are available */
    518                 {
    519                     /* Idx is changed to copy 1 pixel value for top-left ,if top-left is not available*/
    520                     idx = (nt * (next - 1)) + 1;
    521                     pu1_ref = pu1_dst[idx];
    522                     for(i = 0; i < idx; i++)
    523                         pu1_dst[i] = pu1_ref;
    524                 }
    525             }
    526 
    527             /* Forward Substitution Process */
    528             /* If left is Unavailable, copy the last bottom-left value */
    529             if(left == 0)
    530             {
    531                 ihevc_memset_a9q(&pu1_dst[nt], pu1_dst[nt - 1], nt);
    532 
    533             }
    534             /* If top-left is Unavailable, copy the last left value */
    535             if(tp_left == 0)
    536                 pu1_dst[two_nt] = pu1_dst[two_nt - 1];
    537             /* If top is Unavailable, copy the last top-left value */
    538             if(top == 0)
    539             {
    540                 ihevc_memset_a9q(&pu1_dst[two_nt + 1], pu1_dst[two_nt], nt);
    541             }
    542             /* If to right is Unavailable, copy the last top value */
    543             if(tp_right == 0)
    544             {
    545                 ihevc_memset_a9q(&pu1_dst[three_nt + 1], pu1_dst[three_nt], nt);
    546 
    547             }
    548         }
    549 
    550         if(nt == 16)
    551         {
    552             WORD32 nbr_flags_temp = 0;
    553             nbr_flags_temp = ((nbr_flags & 0xC) >> 2) + ((nbr_flags & 0xC0) >> 4)
    554                             + ((nbr_flags & 0x300) >> 4)
    555                             + ((nbr_flags & 0x3000) >> 6)
    556                             + ((nbr_flags & 0x10000) >> 8);
    557 
    558             /* Else fill the corresponding samples */
    559             if(nbr_flags & 0x10000)
    560                 pu1_dst[two_nt] = *pu1_top_left;
    561             else
    562                 pu1_dst[two_nt] = 0;
    563 
    564             if(nbr_flags & 0xC0)
    565             {
    566                 for(i = 0; i < nt; i++)
    567                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
    568             }
    569             else
    570             {
    571                 ihevc_memset_mul_8_a9q(&pu1_dst[two_nt - 1 - (nt - 1)], 0, nt);
    572             }
    573 
    574             if(nbr_flags & 0xC)
    575             {
    576                 for(i = nt; i < two_nt; i++)
    577                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
    578             }
    579             else
    580             {
    581                 ihevc_memset_mul_8_a9q(&pu1_dst[two_nt - 1 - (two_nt - 1)], 0, nt);
    582             }
    583 
    584 
    585             if(nbr_flags & 0x300)
    586             {
    587                 ihevc_memcpy_mul_8_a9q(&pu1_dst[two_nt + 1], pu1_top, nt);
    588             }
    589             else
    590             {
    591                 ihevc_memset_mul_8_a9q(&pu1_dst[two_nt + 1], 0, nt);
    592             }
    593 
    594             if(nbr_flags & 0x3000)
    595             {
    596                 ihevc_memcpy_mul_8_a9q(&pu1_dst[two_nt + 1 + nt], pu1_top + nt, nt);
    597             }
    598             else
    599             {
    600                 ihevc_memset_mul_8_a9q(&pu1_dst[two_nt + 1 + nt], 0, nt);
    601             }
    602             /* compute trailing zeors based on nbr_flag for substitution process of below left see section .*/
    603             /* as each bit in nbr flags corresponds to 8 pels for bot_left, left, top and topright but 1 pel for topleft */
    604             {
    605                 nbr_id_from_bl = look_up_trailing_zeros(nbr_flags_temp & 0XF) * 8; /* for below left and left */
    606 
    607                 if(nbr_id_from_bl == 64)
    608                     nbr_id_from_bl = 32;
    609 
    610                 if(nbr_id_from_bl == 32)
    611                 {
    612                     /* for top left : 1 pel per nbr bit */
    613                     if(!((nbr_flags_temp >> 8) & 0x1))
    614                     {
    615                         nbr_id_from_bl++;
    616                         nbr_id_from_bl += look_up_trailing_zeros((nbr_flags_temp >> 4) & 0xF) * 8; /* top and top right;  8 pels per nbr bit */
    617                         //nbr_id_from_bl += idx * 8;
    618                     }
    619                 }
    620                 /* Reverse Substitution Process*/
    621                 if(nbr_id_from_bl)
    622                 {
    623                     /* Replicate the bottom-left and subsequent unavailable pixels with the 1st available pixel above */
    624                     pu1_ref = pu1_dst[nbr_id_from_bl];
    625                     for(i = (nbr_id_from_bl - 1); i >= 0; i--)
    626                     {
    627                         pu1_dst[i] = pu1_ref;
    628                     }
    629                 }
    630             }
    631 
    632             /* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */
    633             while(nbr_id_from_bl < ((T16_4NT) + 1))
    634             {
    635                 /* To Obtain the next unavailable idx flag after reverse neighbor substitution  */
    636                 /* Devide by 8 to obtain the original index */
    637                 frwd_nbr_flag = (nbr_id_from_bl >> 3); /*+ (nbr_id_from_bl & 0x1);*/
    638 
    639                 /* The Top-left flag is at the last bit location of nbr_flags*/
    640                 if(nbr_id_from_bl == (T16_4NT / 2))
    641                 {
    642                     get_bits = GET_BITS(nbr_flags_temp, 8);
    643 
    644                     /* only pel substitution for TL */
    645                     if(!get_bits)
    646                         pu1_dst[nbr_id_from_bl] = pu1_dst[nbr_id_from_bl - 1];
    647                 }
    648                 else
    649                 {
    650                     get_bits = GET_BITS(nbr_flags_temp, frwd_nbr_flag);
    651                     if(!get_bits)
    652                     {
    653                         /* 8 pel substitution (other than TL) */
    654                         pu1_ref = pu1_dst[nbr_id_from_bl - 1];
    655                         ihevc_memset_mul_8_a9q(pu1_dst + nbr_id_from_bl, pu1_ref, 8);
    656 
    657 
    658                     }
    659 
    660                 }
    661                 nbr_id_from_bl += (nbr_id_from_bl == (T16_4NT / 2)) ? 1 : 8;
    662             }
    663 
    664 
    665         }
    666 
    667         if(nt == 32)
    668         {
    669             /* Else fill the corresponding samples */
    670             if(nbr_flags & 0x10000)
    671                 pu1_dst[two_nt] = *pu1_top_left;
    672             else
    673                 pu1_dst[two_nt] = 0;
    674 
    675             if(nbr_flags & 0xF0)
    676             {
    677                 for(i = 0; i < nt; i++)
    678                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
    679             }
    680             else
    681             {
    682                 ihevc_memset_mul_8_a9q(&pu1_dst[two_nt - 1 - (nt - 1)], 0, nt);
    683             }
    684 
    685             if(nbr_flags & 0xF)
    686             {
    687                 for(i = nt; i < two_nt; i++)
    688                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
    689             }
    690             else
    691             {
    692                 ihevc_memset_mul_8_a9q(&pu1_dst[two_nt - 1 - (two_nt - 1)], 0, nt);
    693             }
    694 
    695 
    696             if(nbr_flags & 0xF00)
    697             {
    698                 ihevc_memcpy_mul_8_a9q(&pu1_dst[two_nt + 1], pu1_top, nt);
    699             }
    700             else
    701             {
    702                 ihevc_memset_mul_8_a9q(&pu1_dst[two_nt + 1], 0, nt);
    703             }
    704 
    705             if(nbr_flags & 0xF000)
    706             {
    707                 ihevc_memcpy_mul_8_a9q(&pu1_dst[two_nt + 1 + nt], pu1_top + nt, nt);
    708             }
    709             else
    710             {
    711                 ihevc_memset_mul_8_a9q(&pu1_dst[two_nt + 1 + nt], 0, nt);
    712             }
    713             /* compute trailing ones based on mbr_flag for substitution process of below left see section .*/
    714             /* as each bit in nbr flags corresponds to 8 pels for bot_left, left, top and topright but 1 pel for topleft */
    715             {
    716                 nbr_id_from_bl = look_up_trailing_zeros((nbr_flags & 0XFF)) * 8; /* for below left and left */
    717 
    718                 if(nbr_id_from_bl == 64)
    719                 {
    720                     /* for top left : 1 pel per nbr bit */
    721                     if(!((nbr_flags >> 16) & 0x1))
    722                     {
    723                         /* top left not available */
    724                         nbr_id_from_bl++;
    725                         /* top and top right;  8 pels per nbr bit */
    726                         nbr_id_from_bl += look_up_trailing_zeros((nbr_flags >> 8) & 0xFF) * 8;
    727                     }
    728                 }
    729                 /* Reverse Substitution Process*/
    730                 if(nbr_id_from_bl)
    731                 {
    732                     /* Replicate the bottom-left and subsequent unavailable pixels with the 1st available pixel above */
    733                     pu1_ref = pu1_dst[nbr_id_from_bl];
    734                     for(i = (nbr_id_from_bl - 1); i >= 0; i--)
    735                         pu1_dst[i] = pu1_ref;
    736                 }
    737             }
    738 
    739             /* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */
    740             while(nbr_id_from_bl < ((T32_4NT) + 1))
    741             {
    742                 /* To Obtain the next unavailable idx flag after reverse neighbor substitution  */
    743                 /* Devide by 8 to obtain the original index */
    744                 frwd_nbr_flag = (nbr_id_from_bl >> 3); /*+ (nbr_id_from_bl & 0x1);*/
    745 
    746                 /* The Top-left flag is at the last bit location of nbr_flags*/
    747                 if(nbr_id_from_bl == (T32_4NT / 2))
    748                 {
    749                     get_bits = GET_BITS(nbr_flags, 16);
    750                     /* only pel substitution for TL */
    751                     if(!get_bits)
    752                         pu1_dst[nbr_id_from_bl] = pu1_dst[nbr_id_from_bl - 1];
    753                 }
    754                 else
    755                 {
    756                     get_bits = GET_BITS(nbr_flags, frwd_nbr_flag);
    757                     if(!get_bits)
    758                     {
    759                         /* 8 pel substitution (other than TL) */
    760                         pu1_ref = pu1_dst[nbr_id_from_bl - 1];
    761                         ihevc_memset_mul_8_a9q(&pu1_dst[nbr_id_from_bl], pu1_ref, 8);
    762 
    763                     }
    764 
    765                 }
    766                 nbr_id_from_bl += (nbr_id_from_bl == (T32_4NT / 2)) ? 1 : 8;
    767             }
    768         }
    769 
    770     }
    771 }
    772