Home | History | Annotate | Download | only in decoder
      1 /******************************************************************************
      2 *
      3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 *
      5 * Licensed under the Apache License, Version 2.0 (the "License");
      6 * you may not use this file except in compliance with the License.
      7 * You may obtain a copy of the License at:
      8 *
      9 * http://www.apache.org/licenses/LICENSE-2.0
     10 *
     11 * Unless required by applicable law or agreed to in writing, software
     12 * distributed under the License is distributed on an "AS IS" BASIS,
     13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 * See the License for the specific language governing permissions and
     15 * limitations under the License.
     16 *
     17 ******************************************************************************/
     18 /**
     19  *******************************************************************************
     20  * @file
     21  *  ihevc_boundary_strength.c
     22  *
     23  * @brief
     24  *  Contains functions for computing boundary strength
     25  *
     26  * @author
     27  *  Harish
     28  *
     29  * @par List of Functions:
     30  *
     31  * @remarks
     32  *  None
     33  *
     34  *******************************************************************************
     35  */
     36 /*****************************************************************************/
     37 /* File Includes                                                             */
     38 /*****************************************************************************/
     39 #include <stdio.h>
     40 #include <stddef.h>
     41 #include <stdlib.h>
     42 #include <string.h>
     43 
     44 #include "ihevc_typedefs.h"
     45 #include "iv.h"
     46 #include "ivd.h"
     47 #include "ihevcd_cxa.h"
     48 #include "ithread.h"
     49 
     50 #include "ihevc_defs.h"
     51 #include "ihevc_debug.h"
     52 #include "ihevc_defs.h"
     53 #include "ihevc_structs.h"
     54 #include "ihevc_macros.h"
     55 #include "ihevc_platform_macros.h"
     56 #include "ihevc_cabac_tables.h"
     57 
     58 #include "ihevc_error.h"
     59 #include "ihevc_common_tables.h"
     60 
     61 #include "ihevcd_trace.h"
     62 #include "ihevcd_defs.h"
     63 #include "ihevcd_function_selector.h"
     64 #include "ihevcd_structs.h"
     65 #include "ihevcd_error.h"
     66 #include "ihevcd_nal.h"
     67 #include "ihevcd_bitstream.h"
     68 #include "ihevcd_job_queue.h"
     69 #include "ihevcd_utils.h"
     70 #include "ihevcd_profile.h"
     71 
     72 /*****************************************************************************/
     73 /* Function Prototypes                                                       */
     74 /*****************************************************************************/
     75 
     76 
     77 #define SET_NGBHR_ALL_AVAIL(avail)          avail = 0x1F;
     78 
     79 #define SET_NGBHR_BOTLEFT_NOTAVAIL(avail)   avail &= ~0x10;
     80 #define SET_NGBHR_LEFT_NOTAVAIL(avail)      avail &= ~0x8;
     81 #define SET_NGBHR_TOPLEFT_NOTAVAIL(avail)   avail &= ~0x4;
     82 #define SET_NGBHR_TOP_NOTAVAIL(avail)       avail &= ~0x2;
     83 #define SET_NGBHR_TOPRIGHT_NOTAVAIL(avail)  avail &= ~0x1;
     84 
     85 WORD32 ihevcd_pu_boundary_strength(pu_t *ps_pu,
     86                                    pu_t *ps_ngbr_pu)
     87 {
     88     WORD32 i4_bs;
     89     UWORD32 l0_ref_pic_buf_id, l1_ref_pic_buf_id;
     90     UWORD32 ngbr_l0_ref_pic_buf_id, ngbr_l1_ref_pic_buf_id;
     91 
     92     WORD16 i2_mv_x0, i2_mv_y0, i2_mv_x1, i2_mv_y1;
     93     WORD16 i2_ngbr_mv_x0, i2_ngbr_mv_y0, i2_ngbr_mv_x1, i2_ngbr_mv_y1;
     94 
     95     WORD32 num_mv, ngbr_num_mv;
     96 
     97     num_mv = (PRED_BI == ps_pu->b2_pred_mode) ? 2 : 1;
     98     ngbr_num_mv = (PRED_BI == ps_ngbr_pu->b2_pred_mode) ? 2 : 1;
     99 
    100     l0_ref_pic_buf_id = ps_pu->mv.i1_l0_ref_pic_buf_id;
    101     l1_ref_pic_buf_id = ps_pu->mv.i1_l1_ref_pic_buf_id;
    102     ngbr_l0_ref_pic_buf_id = ps_ngbr_pu->mv.i1_l0_ref_pic_buf_id;
    103     ngbr_l1_ref_pic_buf_id = ps_ngbr_pu->mv.i1_l1_ref_pic_buf_id;
    104 
    105 
    106     i2_mv_x0 = ps_pu->mv.s_l0_mv.i2_mvx;
    107     i2_mv_y0 = ps_pu->mv.s_l0_mv.i2_mvy;
    108     i2_mv_x1 = ps_pu->mv.s_l1_mv.i2_mvx;
    109     i2_mv_y1 = ps_pu->mv.s_l1_mv.i2_mvy;
    110 
    111     i2_ngbr_mv_x0 = ps_ngbr_pu->mv.s_l0_mv.i2_mvx;
    112     i2_ngbr_mv_y0 = ps_ngbr_pu->mv.s_l0_mv.i2_mvy;
    113     i2_ngbr_mv_x1 = ps_ngbr_pu->mv.s_l1_mv.i2_mvx;
    114     i2_ngbr_mv_y1 = ps_ngbr_pu->mv.s_l1_mv.i2_mvy;
    115 
    116 
    117     /* If two motion vectors are used */
    118     if((2 == num_mv) &&
    119             (2 == ngbr_num_mv))
    120     {
    121         if((l0_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id && l1_ref_pic_buf_id == ngbr_l1_ref_pic_buf_id) ||
    122                 (l0_ref_pic_buf_id == ngbr_l1_ref_pic_buf_id && l1_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id))
    123         {
    124             if(l0_ref_pic_buf_id != l1_ref_pic_buf_id) /* Different L0 and L1 */
    125             {
    126                 if(l0_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id)
    127                 {
    128                     i4_bs = (ABS(i2_mv_x0 - i2_ngbr_mv_x0) < 4) &&
    129                             (ABS(i2_mv_y0 - i2_ngbr_mv_y0) < 4) &&
    130                             (ABS(i2_mv_x1 - i2_ngbr_mv_x1) < 4) &&
    131                             (ABS(i2_mv_y1 - i2_ngbr_mv_y1) < 4) ? 0 : 1;
    132                 }
    133                 else
    134                 {
    135                     i4_bs = (ABS(i2_mv_x0 - i2_ngbr_mv_x1) < 4) &&
    136                             (ABS(i2_mv_y0 - i2_ngbr_mv_y1) < 4) &&
    137                             (ABS(i2_mv_x1 - i2_ngbr_mv_x0) < 4) &&
    138                             (ABS(i2_mv_y1 - i2_ngbr_mv_y0) < 4) ? 0 : 1;
    139                 }
    140             }
    141             else /* Same L0 and L1 */
    142             {
    143                 i4_bs = ((ABS(i2_mv_x0 - i2_ngbr_mv_x0) >= 4) ||
    144                          (ABS(i2_mv_y0 - i2_ngbr_mv_y0) >= 4) ||
    145                          (ABS(i2_mv_x1 - i2_ngbr_mv_x1) >= 4) ||
    146                          (ABS(i2_mv_y1 - i2_ngbr_mv_y1) >= 4)) &&
    147                                 ((ABS(i2_mv_x0 - i2_ngbr_mv_x1) >= 4) ||
    148                                  (ABS(i2_mv_y0 - i2_ngbr_mv_y1) >= 4) ||
    149                                  (ABS(i2_mv_x1 - i2_ngbr_mv_x0) >= 4) ||
    150                                  (ABS(i2_mv_y1 - i2_ngbr_mv_y0) >= 4)) ? 1 : 0;
    151             }
    152         }
    153         else /* If the reference pictures used are different */
    154         {
    155             i4_bs = 1;
    156         }
    157     }
    158 
    159     /* If one motion vector is used in both PUs */
    160     else if((1 == num_mv) &&
    161             (1 == ngbr_num_mv))
    162     {
    163         WORD16 i2_mv_x, i2_mv_y;
    164         WORD16 i2_ngbr_mv_x, i2_ngbr_mv_y;
    165         UWORD32 ref_pic_buf_id, ngbr_ref_pic_buf_id;
    166 
    167         if(PRED_L0 == ps_pu->b2_pred_mode)
    168         {
    169             i2_mv_x = i2_mv_x0;
    170             i2_mv_y = i2_mv_y0;
    171             ref_pic_buf_id = l0_ref_pic_buf_id;
    172         }
    173         else
    174         {
    175             i2_mv_x = i2_mv_x1;
    176             i2_mv_y = i2_mv_y1;
    177             ref_pic_buf_id = l1_ref_pic_buf_id;
    178         }
    179 
    180         if(PRED_L0 == ps_ngbr_pu->b2_pred_mode)
    181         {
    182             i2_ngbr_mv_x = i2_ngbr_mv_x0;
    183             i2_ngbr_mv_y = i2_ngbr_mv_y0;
    184             ngbr_ref_pic_buf_id = ngbr_l0_ref_pic_buf_id;
    185         }
    186         else
    187         {
    188             i2_ngbr_mv_x = i2_ngbr_mv_x1;
    189             i2_ngbr_mv_y = i2_ngbr_mv_y1;
    190             ngbr_ref_pic_buf_id = ngbr_l1_ref_pic_buf_id;
    191         }
    192 
    193         i4_bs = (ref_pic_buf_id == ngbr_ref_pic_buf_id) &&
    194                 (ABS(i2_mv_x - i2_ngbr_mv_x) < 4)  &&
    195                 (ABS(i2_mv_y - i2_ngbr_mv_y) < 4) ? 0 : 1;
    196     }
    197 
    198     /* If the no. of motion vectors is not the same */
    199     else
    200     {
    201         i4_bs = 1;
    202     }
    203 
    204 
    205     return i4_bs;
    206 }
    207 
    208 /* QP is also populated in the same function */
    209 WORD32 ihevcd_ctb_boundary_strength_islice(bs_ctxt_t *ps_bs_ctxt)
    210 {
    211     pps_t *ps_pps;
    212     sps_t *ps_sps;
    213     tu_t *ps_tu;
    214     UWORD32 *pu4_vert_bs;
    215     UWORD32 *pu4_horz_bs;
    216     WORD32 bs_strd;
    217     WORD32 vert_bs0_tmp;
    218     WORD32 horz_bs0_tmp;
    219     UWORD8 *pu1_qp;
    220     WORD32 qp_strd;
    221     UWORD32 u4_qp_const_in_ctb;
    222     WORD32 ctb_indx;
    223     WORD32 i4_tu_cnt;
    224     WORD32 log2_ctb_size;
    225     WORD32 ctb_size;
    226 
    227     WORD8 i1_loop_filter_across_tiles_enabled_flag;
    228     WORD8 i1_loop_filter_across_slices_enabled_flag;
    229 
    230     WORD32 i;
    231 
    232     PROFILE_DISABLE_BOUNDARY_STRENGTH();
    233 
    234     ps_pps = ps_bs_ctxt->ps_pps;
    235     ps_sps = ps_bs_ctxt->ps_sps;
    236     i1_loop_filter_across_tiles_enabled_flag = ps_pps->i1_loop_filter_across_tiles_enabled_flag;
    237     i1_loop_filter_across_slices_enabled_flag = ps_bs_ctxt->ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag;
    238     i4_tu_cnt = ps_bs_ctxt->i4_ctb_tu_cnt;
    239 
    240     log2_ctb_size = ps_sps->i1_log2_ctb_size;
    241     ctb_size = (1 << log2_ctb_size);
    242 
    243     /* strides are in units of number of bytes */
    244     /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
    245     bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
    246 
    247     pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_vert_bs +
    248                     (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
    249                     ps_bs_ctxt->i4_ctb_y * bs_strd);
    250     pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_horz_bs +
    251                     (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
    252                     ps_bs_ctxt->i4_ctb_y * bs_strd);
    253 
    254     /* ctb_size/8 elements per CTB */
    255     qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
    256     pu1_qp = ps_bs_ctxt->pu1_pic_qp + ((ps_bs_ctxt->i4_ctb_x + ps_bs_ctxt->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
    257 
    258     ctb_indx = ps_bs_ctxt->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_bs_ctxt->i4_ctb_y;
    259     u4_qp_const_in_ctb = ps_bs_ctxt->pu1_pic_qp_const_in_ctb[ctb_indx >> 3] & (1 << (ctb_indx & 7));
    260 
    261     vert_bs0_tmp = pu4_vert_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
    262     horz_bs0_tmp = pu4_horz_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
    263 
    264     /* ctb_size/8 is the number of edges per CTB
    265      * ctb_size/4 is the number of BS values needed per edge
    266      * divided by 8 for the number of bytes
    267      * 2 is the number of bits needed for each BS value */
    268 /*
    269     memset(pu4_vert_bs, 0, (ctb_size / 8 + 1) * (ctb_size / 4) / 8 * 2 );
    270     memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) / 8 * 2 );
    271 */
    272     memset(pu4_vert_bs, 0, (1 << (2 * log2_ctb_size - 7)) + ((ctb_size >> 5) << 1));
    273     memset(pu4_horz_bs, 0, (1 << (2 * log2_ctb_size - 7)));
    274 
    275     /* pu4_vert_bs[0] has information about the left CTB which is not required when ctb_x = 0 */
    276     if(0 != ps_bs_ctxt->i4_ctb_x)
    277     {
    278         pu4_vert_bs[0] |= vert_bs0_tmp;
    279     }
    280 
    281     /* pu4_horz_bs[0] has information about the top CTB which is not required when ctb_y = 0 */
    282     if(0 != ps_bs_ctxt->i4_ctb_y)
    283     {
    284         pu4_horz_bs[0] |= horz_bs0_tmp;
    285     }
    286 
    287     ps_tu = ps_bs_ctxt->ps_tu;
    288 
    289     /* Populating the QP array - if const_qp_in_ctb flag is one, set only the first element */
    290     if(u4_qp_const_in_ctb)
    291         pu1_qp[0] = ps_tu->b7_qp;
    292 
    293     for(i = 0; i < i4_tu_cnt; i++)
    294     {
    295         WORD32 start_pos_x;
    296         WORD32 start_pos_y;
    297         WORD32 tu_size;
    298 
    299 
    300         UWORD32 u4_bs;
    301         ps_tu = ps_bs_ctxt->ps_tu + i;
    302 
    303         /* start_pos_x and start_pos_y are in units of min TU size (4x4) */
    304         start_pos_x = ps_tu->b4_pos_x;
    305         start_pos_y = ps_tu->b4_pos_y;
    306 
    307         tu_size = 1 << (ps_tu->b3_size + 2);
    308         tu_size >>= 2; /* TU size divided by 4 */
    309 
    310         u4_bs = DUP_LSB_10(tu_size);
    311 
    312         /* Only if the current edge falls on 8 pixel grid set BS */
    313         if(0 == (start_pos_x & 1))
    314         {
    315             WORD32 shift;
    316             shift = start_pos_y * 2;
    317             /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    318              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    319              *  and deblocking is done on 8x8 grid
    320              */
    321             if(6 != log2_ctb_size)
    322                 shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
    323             pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    324         }
    325         /* Only if the current edge falls on 8 pixel grid set BS */
    326         if(0 == (start_pos_y & 1))
    327         {
    328             WORD32 shift;
    329             shift = start_pos_x * 2;
    330             /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    331              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    332              *  and deblocking is done on 8x8 grid
    333              */
    334             if(6 != log2_ctb_size)
    335                 shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
    336             pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    337         }
    338 
    339         /* Populating the QP array */
    340         if(0 == u4_qp_const_in_ctb)
    341         {
    342             if(0 == (start_pos_x & 1) && 0 == (start_pos_y & 1))
    343             {
    344                 WORD32 row, col;
    345                 for(row = start_pos_y; row < start_pos_y + tu_size; row += 2)
    346                 {
    347                     for(col = start_pos_x; col < start_pos_x + tu_size; col += 2)
    348                     {
    349                         pu1_qp[(row >> 1) * qp_strd + (col >> 1)] = ps_tu->b7_qp;
    350                     }
    351                 }
    352             }
    353         }
    354 
    355     }
    356     {
    357         /*Determine if the slice is dependent, and is its left neighbor belongs to the same slice, in a different tile*/
    358         UWORD32 ctb_addr;
    359         WORD32 slice_idx, left_slice_idx = -1, top_slice_idx = -1;
    360         /* If left neighbor is not available, then set BS for entire first column to zero */
    361         if(!ps_pps->i1_tiles_enabled_flag)
    362         {
    363             if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x) ||
    364                             (0 == i1_loop_filter_across_slices_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
    365                             (0 == ps_bs_ctxt->i4_ctb_x))
    366             {
    367                 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
    368             }
    369         }
    370         else
    371         {
    372             //If across-tiles is disabled
    373             if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x))
    374             {
    375                 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
    376             }
    377             else
    378             {
    379                 ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
    380                 slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
    381                 if(ps_bs_ctxt->i4_ctb_x)
    382                 {
    383                     ctb_addr = (ps_bs_ctxt->i4_ctb_x - 1) + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
    384                     left_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
    385                 }
    386                 /*If the 1st slice in a new tile is a dependent slice*/
    387                 if(!((ps_bs_ctxt->ps_slice_hdr->i1_dependent_slice_flag == 1) && (slice_idx == left_slice_idx)))
    388                 {
    389                     /* Removed reduntant checks */
    390                     if((0 == i1_loop_filter_across_slices_enabled_flag && (
    391                                     ((slice_idx != left_slice_idx) && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
    392                                     ((0 == ps_bs_ctxt->i4_ctb_tile_x) && (slice_idx != left_slice_idx)))) ||
    393                                     (0 == ps_bs_ctxt->i4_ctb_x))
    394                     {
    395                         pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
    396                     }
    397                 }
    398             }
    399         }
    400 
    401         ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
    402         slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
    403         if(ps_bs_ctxt->i4_ctb_y)
    404         {
    405             ctb_addr = (ps_bs_ctxt->i4_ctb_x) + ((ps_bs_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb);
    406             top_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
    407         }
    408 
    409         /* If top neighbor is not available, then set BS for entire first row to zero */
    410         /* Removed reduntant checks */
    411         if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_y)
    412                         || (0 == i1_loop_filter_across_slices_enabled_flag && ((slice_idx != top_slice_idx)))
    413                         || (0 == ps_bs_ctxt->i4_ctb_y))
    414         {
    415             pu4_horz_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
    416         }
    417     }
    418 
    419     /**
    420      *  Set BS of bottom and right frame boundaries to zero if it is an incomplete CTB
    421      *   (They might have been set to  non zero values because of CBF of the current CTB)
    422      *   This block might not be needed for I slices*/
    423     {
    424         WORD32 num_rows_remaining = (ps_sps->i2_pic_height_in_luma_samples - (ps_bs_ctxt->i4_ctb_y << log2_ctb_size)) >> 3;
    425         WORD32 num_cols_remaining = (ps_sps->i2_pic_width_in_luma_samples - (ps_bs_ctxt->i4_ctb_x << log2_ctb_size)) >> 3;
    426         if(num_rows_remaining < (ctb_size >> 3))
    427         {
    428             /* WORD32 offset = (((num_rows_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
    429              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    430              *  and deblocking is done on 8x8 grid
    431              */
    432             WORD32 offset;
    433             offset = (num_rows_remaining >> (6 - log2_ctb_size)) << 2;
    434             if(6 != log2_ctb_size)
    435                 offset += (num_rows_remaining & 1) << (log2_ctb_size - 4);
    436 
    437             memset(((UWORD8 *)pu4_horz_bs) + offset, 0, 1 << (log2_ctb_size - 4));
    438         }
    439 
    440         if(num_cols_remaining < (ctb_size >> 3))
    441         {
    442             /* WORD32 offset = (((num_cols_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
    443              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    444              *  and deblocking is done on 8x8 grid
    445              */
    446 
    447             WORD32 offset;
    448             offset = (num_cols_remaining >> (6 - log2_ctb_size)) << 2;
    449             if(6 != log2_ctb_size)
    450                 offset += (num_cols_remaining & 1) << (log2_ctb_size - 4);
    451 
    452             memset(((UWORD8 *)pu4_vert_bs) + offset, 0, 1 << (log2_ctb_size - 4));
    453         }
    454     }
    455 
    456     return 0;
    457 }
    458 WORD32 ihevcd_ctb_boundary_strength_pbslice(bs_ctxt_t *ps_bs_ctxt)
    459 {
    460     sps_t *ps_sps;
    461     pps_t *ps_pps;
    462     WORD32 cur_ctb_idx, next_ctb_idx = 0;
    463     WORD32 i4_tu_cnt;
    464     WORD32 i4_pu_cnt;
    465     tu_t *ps_tu;
    466 
    467     UWORD32 *pu4_vert_bs;
    468     UWORD32 *pu4_horz_bs;
    469     WORD32 bs_strd;
    470     WORD32 vert_bs0_tmp;
    471     WORD32 horz_bs0_tmp;
    472     UWORD8 *pu1_qp;
    473     WORD32 qp_strd;
    474     UWORD32 u4_qp_const_in_ctb;
    475     WORD32 ctb_indx;
    476     WORD32 log2_ctb_size;
    477     WORD32 ctb_size;
    478 
    479     WORD32 i;
    480     WORD8 i1_loop_filter_across_tiles_enabled_flag;
    481     WORD8 i1_loop_filter_across_slices_enabled_flag;
    482 
    483     PROFILE_DISABLE_BOUNDARY_STRENGTH();
    484 
    485     ps_sps = ps_bs_ctxt->ps_sps;
    486     ps_pps = ps_bs_ctxt->ps_pps;
    487 
    488     log2_ctb_size = ps_sps->i1_log2_ctb_size;
    489     ctb_size = (1 << log2_ctb_size);
    490 
    491     /* strides are in units of number of bytes */
    492     /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
    493     bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
    494 
    495     pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_vert_bs +
    496                     (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
    497                     ps_bs_ctxt->i4_ctb_y * bs_strd);
    498     pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_horz_bs +
    499                     (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
    500                     ps_bs_ctxt->i4_ctb_y * bs_strd);
    501 
    502     vert_bs0_tmp = pu4_vert_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
    503     horz_bs0_tmp = pu4_horz_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
    504 
    505     ps_tu = ps_bs_ctxt->ps_tu;
    506 
    507     /* ctb_size/8 elements per CTB */
    508     qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
    509     pu1_qp = ps_bs_ctxt->pu1_pic_qp + ((ps_bs_ctxt->i4_ctb_x + ps_bs_ctxt->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
    510 
    511     ctb_indx = ps_bs_ctxt->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_bs_ctxt->i4_ctb_y;
    512     u4_qp_const_in_ctb = ps_bs_ctxt->pu1_pic_qp_const_in_ctb[ctb_indx >> 3] & (1 << (ctb_indx & 7));
    513 
    514     i1_loop_filter_across_tiles_enabled_flag = ps_pps->i1_loop_filter_across_tiles_enabled_flag;
    515     i1_loop_filter_across_slices_enabled_flag = ps_bs_ctxt->ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag;
    516 
    517     /* ctb_size/8 is the number of edges per CTB
    518      * ctb_size/4 is the number of BS values needed per edge
    519      * divided by 8 for the number of bytes
    520      * 2 is the number of bits needed for each BS value */
    521 /*
    522     memset(pu4_vert_bs, 0, (ctb_size / 8 + 1) * (ctb_size / 4) * 2 / 8 );
    523     memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) * 2 / 8 );
    524 */
    525     memset(pu4_vert_bs, 0, (1 << (2 * log2_ctb_size - 7)) + (ctb_size >> 4));
    526     memset(pu4_horz_bs, 0, (1 << (2 * log2_ctb_size - 7)));
    527 
    528     /* pu4_vert_bs[0] has information about the left CTB which is not required when ctb_x = 0 */
    529     if(0 != ps_bs_ctxt->i4_ctb_x)
    530     {
    531         pu4_vert_bs[0] |= vert_bs0_tmp;
    532     }
    533 
    534     /* pu4_horz_bs[0] has information about the top CTB which is not required when ctb_y = 0 */
    535     if(0 != ps_bs_ctxt->i4_ctb_y)
    536     {
    537         pu4_horz_bs[0] |= horz_bs0_tmp;
    538     }
    539     /* pu4_horz_bs[bs_strd / 4] corresponds to pu4_horz_bs[0] of the bottom CTB */
    540     *(UWORD32 *)((UWORD8 *)pu4_horz_bs + bs_strd) = 0;
    541 
    542     cur_ctb_idx = ps_bs_ctxt->i4_ctb_x
    543                     + ps_bs_ctxt->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
    544     next_ctb_idx = ps_bs_ctxt->i4_next_tu_ctb_cnt;
    545     if(1 == ps_bs_ctxt->ps_codec->i4_num_cores)
    546     {
    547         i4_tu_cnt = ps_bs_ctxt->pu4_pic_tu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_tu_idx[cur_ctb_idx % RESET_TU_BUF_NCTB];
    548     }
    549     else
    550     {
    551         i4_tu_cnt = ps_bs_ctxt->pu4_pic_tu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_tu_idx[cur_ctb_idx];
    552     }
    553 
    554     ps_tu = ps_bs_ctxt->ps_tu;
    555     if(u4_qp_const_in_ctb)
    556         pu1_qp[0] = ps_tu->b7_qp;
    557 
    558     /* For all TUs in the CTB For left and top edges, check if there are coded coefficients on either sides of the edge */
    559     for(i = 0; i < i4_tu_cnt; i++)
    560     {
    561         WORD32 start_pos_x;
    562         WORD32 start_pos_y;
    563         WORD32 end_pos_x;
    564         WORD32 end_pos_y;
    565         WORD32 tu_size;
    566         UWORD32 u4_bs;
    567         WORD32 intra_flag;
    568         UWORD8 *pu1_pic_intra_flag;
    569 
    570         ps_tu = ps_bs_ctxt->ps_tu + i;
    571 
    572         start_pos_x = ps_tu->b4_pos_x;
    573         start_pos_y = ps_tu->b4_pos_y;
    574 
    575         tu_size = 1 << (ps_tu->b3_size + 2);
    576         tu_size >>= 2;
    577 
    578         end_pos_x = start_pos_x + tu_size;
    579         end_pos_y = start_pos_y + tu_size;
    580 
    581         {
    582             WORD32 tu_abs_x = (ps_bs_ctxt->i4_ctb_x << log2_ctb_size) + (start_pos_x << 2);
    583             WORD32 tu_abs_y = (ps_bs_ctxt->i4_ctb_y << log2_ctb_size) + (start_pos_y << 2);
    584 
    585             WORD32 numbytes_row =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
    586 
    587             pu1_pic_intra_flag = ps_bs_ctxt->ps_codec->pu1_pic_intra_flag;
    588             pu1_pic_intra_flag += (tu_abs_y >> 3) * numbytes_row;
    589             pu1_pic_intra_flag += (tu_abs_x >> 6);
    590 
    591             intra_flag = *pu1_pic_intra_flag;
    592             intra_flag &= (1 << ((tu_abs_x >> 3) % 8));
    593         }
    594         if(intra_flag)
    595         {
    596             u4_bs = DUP_LSB_10(tu_size);
    597 
    598             /* Only if the current edge falls on 8 pixel grid set BS */
    599             if(0 == (start_pos_x & 1))
    600             {
    601                 WORD32 shift;
    602                 shift = start_pos_y * 2;
    603                 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    604                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    605                  *  and deblocking is done on 8x8 grid
    606                  */
    607                 if(6 != log2_ctb_size)
    608                     shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
    609                 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    610             }
    611             /* Only if the current edge falls on 8 pixel grid set BS */
    612             if(0 == (start_pos_y & 1))
    613             {
    614                 WORD32 shift;
    615                 shift = start_pos_x * 2;
    616                 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    617                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    618                  *  and deblocking is done on 8x8 grid
    619                  */
    620                 if(6 != log2_ctb_size)
    621                     shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
    622                 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    623             }
    624         }
    625 
    626 
    627         /* If the current TU is coded then set both top edge and left edge BS to 1 and go to next TU */
    628         if(ps_tu->b1_y_cbf)
    629         {
    630             u4_bs = DUP_LSB_01(tu_size);
    631 
    632             /* Only if the current edge falls on 8 pixel grid set BS */
    633             if(0 == (start_pos_x & 1))
    634             {
    635                 WORD32 shift;
    636                 shift = start_pos_y * 2;
    637                 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    638                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    639                  *  and deblocking is done on 8x8 grid
    640                  */
    641                 if(6 != log2_ctb_size)
    642                     shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
    643                 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    644             }
    645             /* Only if the current edge falls on 8 pixel grid set BS */
    646             if(0 == (start_pos_y & 1))
    647             {
    648                 WORD32 shift;
    649                 shift = start_pos_x * 2;
    650                 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    651                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    652                  *  and deblocking is done on 8x8 grid
    653                  */
    654                 if(6 != log2_ctb_size)
    655                     shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
    656                 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    657             }
    658             /* Only if the current edge falls on 8 pixel grid set BS */
    659             if(0 == (end_pos_x & 1))
    660             {
    661                 if(!(ctb_size / 8 == (end_pos_x >> 1) && ps_bs_ctxt->i4_ctb_x == ps_sps->i2_pic_wd_in_ctb - 1))
    662                 {
    663                     WORD32 shift;
    664                     shift = start_pos_y * 2;
    665                     shift += (((end_pos_x >> 1) & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1));
    666                     pu4_vert_bs[end_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    667                 }
    668             }
    669             /* Only if the current edge falls on 8 pixel grid set BS */
    670             if(0 == (end_pos_y & 1))
    671             {
    672                 /* If end_pos_y corresponds to the bottom of the CTB, write to pu4_horz_bs[0] of the bottom CTB */
    673                 if(ctb_size / 8 == (end_pos_y >> 1))
    674                 {
    675                     *(UWORD32 *)((UWORD8 *)pu4_horz_bs + bs_strd) |= (u4_bs << (start_pos_x * 2));
    676                 }
    677                 else
    678                 {
    679                     WORD32 shift;
    680                     shift = start_pos_x * 2;
    681                     shift += (((end_pos_y >> 1) & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1));
    682                     pu4_horz_bs[end_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    683                 }
    684             }
    685         }
    686 
    687         if(0 == u4_qp_const_in_ctb)
    688         {
    689             if(0 == (start_pos_x & 1) && 0 == (start_pos_y & 1))
    690             {
    691                 WORD32 row, col;
    692                 for(row = start_pos_y; row < start_pos_y + tu_size; row += 2)
    693                 {
    694                     for(col = start_pos_x; col < start_pos_x + tu_size; col += 2)
    695                     {
    696                         pu1_qp[(row >> 1) * qp_strd + (col >> 1)] = ps_tu->b7_qp;
    697                     }
    698                 }
    699             }
    700         }
    701     }
    702 
    703     /* For all PUs in the CTB,
    704     For left and top edges, compute BS */
    705 
    706     cur_ctb_idx = ps_bs_ctxt->i4_ctb_x
    707                     + ps_bs_ctxt->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
    708 
    709     {
    710         WORD32 next_ctb_idx;
    711         next_ctb_idx = ps_bs_ctxt->i4_next_pu_ctb_cnt;
    712         i4_pu_cnt = ps_bs_ctxt->pu4_pic_pu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_pu_idx[cur_ctb_idx];
    713     }
    714 
    715     for(i = 0; i < i4_pu_cnt; i++)
    716     {
    717         WORD32 start_pos_x;
    718         WORD32 start_pos_y;
    719         WORD32 end_pos_x;
    720         WORD32 end_pos_y;
    721         WORD32 pu_wd, pu_ht;
    722         UWORD32 u4_bs;
    723         pu_t *ps_pu = ps_bs_ctxt->ps_pu + i;
    724         pu_t *ps_ngbr_pu;
    725         UWORD32 u4_ngbr_pu_indx;
    726 
    727         start_pos_x = ps_pu->b4_pos_x;
    728         start_pos_y = ps_pu->b4_pos_y;
    729 
    730         pu_wd = (ps_pu->b4_wd + 1);
    731         pu_ht = (ps_pu->b4_ht + 1);
    732 
    733         end_pos_x = start_pos_x + pu_wd;
    734         end_pos_y = start_pos_y + pu_ht;
    735 
    736         /* If the current PU is intra, set Boundary strength as 2 for both top and left edge */
    737         /* Need not mask the BS to zero even if it was set to 1 already since BS 2 and 3 are assumed to be the same in leaf level functions */
    738         if(ps_pu->b1_intra_flag)
    739         {
    740             u4_bs = DUP_LSB_10(pu_ht);
    741 
    742             /* Only if the current edge falls on 8 pixel grid set BS */
    743             if(0 == (start_pos_x & 1))
    744             {
    745                 WORD32 shift;
    746                 shift = start_pos_y * 2;
    747                 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    748                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    749                  *  and deblocking is done on 8x8 grid
    750                  */
    751                 if(6 != log2_ctb_size)
    752                     shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
    753                 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    754             }
    755 
    756             u4_bs = DUP_LSB_10(pu_wd);
    757 
    758             /* Only if the current edge falls on 8 pixel grid set BS */
    759             if(0 == (start_pos_y & 1))
    760             {
    761                 WORD32 shift;
    762                 shift = start_pos_x * 2;
    763                 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    764                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    765                  *  and deblocking is done on 8x8 grid
    766                  */
    767                 if(6 != log2_ctb_size)
    768                     shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
    769                 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    770             }
    771         }
    772 
    773         else
    774         {
    775             /* Vertical edge */
    776             /* Process only if the edge is not a frame edge */
    777             if(0 != ps_bs_ctxt->i4_ctb_x + start_pos_x)
    778             {
    779                 do
    780                 {
    781                     WORD32 pu_ngbr_ht;
    782                     WORD32 min_pu_ht;
    783                     WORD32 ngbr_end_pos_y;
    784                     UWORD32 ngbr_pu_idx_strd;
    785                     ngbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2;
    786                     u4_ngbr_pu_indx = ps_bs_ctxt->pu4_pic_pu_idx_map[(start_pos_y + 1) * ngbr_pu_idx_strd + (start_pos_x)];
    787                     ps_ngbr_pu = ps_bs_ctxt->ps_pic_pu + u4_ngbr_pu_indx;
    788 
    789                     pu_ngbr_ht = ps_ngbr_pu->b4_ht + 1;
    790                     ngbr_end_pos_y = ps_ngbr_pu->b4_pos_y + pu_ngbr_ht;
    791 
    792                     min_pu_ht = MIN(ngbr_end_pos_y, end_pos_y) - start_pos_y;
    793 
    794                     if(ps_ngbr_pu->b1_intra_flag)
    795                     {
    796                         u4_bs = DUP_LSB_10(min_pu_ht);
    797 
    798                         /* Only if the current edge falls on 8 pixel grid set BS */
    799                         if(0 == (start_pos_x & 1))
    800                         {
    801                             WORD32 shift;
    802                             shift = start_pos_y * 2;
    803                             /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    804                              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    805                              *  and deblocking is done on 8x8 grid
    806                              */
    807                             if(6 != log2_ctb_size)
    808                                 shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
    809                             pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    810                         }
    811                     }
    812                     else
    813                     {
    814                         u4_bs = ihevcd_pu_boundary_strength(ps_pu, ps_ngbr_pu);
    815                         if(u4_bs)
    816                         {
    817                             u4_bs = DUP_LSB_01(min_pu_ht);
    818                             if(0 == (start_pos_x & 1))
    819                             {
    820                                 WORD32 shift;
    821                                 shift = start_pos_y * 2;
    822                                 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    823                                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    824                                  *  and deblocking is done on 8x8 grid
    825                                  */
    826                                 if(6 != log2_ctb_size)
    827                                     shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
    828                                 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    829                             }
    830                         }
    831                     }
    832 
    833                     pu_ht -= min_pu_ht;
    834                     start_pos_y += min_pu_ht;
    835                 }while(pu_ht > 0);
    836 
    837                 /* Reinitialising since the values are updated in the previous loop */
    838                 pu_ht = ps_pu->b4_ht + 1;
    839                 start_pos_y = ps_pu->b4_pos_y;
    840             }
    841 
    842             /* Horizontal edge */
    843             /* Process only if the edge is not a frame edge */
    844             if(0 != ps_bs_ctxt->i4_ctb_y + start_pos_y)
    845             {
    846                 do
    847                 {
    848                     WORD32 pu_ngbr_wd;
    849                     WORD32 min_pu_wd;
    850                     WORD32 ngbr_end_pos_x;
    851                     UWORD32 ngbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2;
    852                     u4_ngbr_pu_indx = ps_bs_ctxt->pu4_pic_pu_idx_map[(start_pos_y)*ngbr_pu_idx_strd + (start_pos_x + 1)];
    853                     ps_ngbr_pu = ps_bs_ctxt->ps_pic_pu + u4_ngbr_pu_indx;
    854 
    855                     pu_ngbr_wd = ps_ngbr_pu->b4_wd + 1;
    856                     ngbr_end_pos_x = ps_ngbr_pu->b4_pos_x + pu_ngbr_wd;
    857 
    858                     min_pu_wd = MIN(ngbr_end_pos_x, end_pos_x) - start_pos_x;
    859 
    860                     if(ps_ngbr_pu->b1_intra_flag)
    861                     {
    862                         u4_bs = DUP_LSB_10(min_pu_wd);
    863 
    864                         /* Only if the current edge falls on 8 pixel grid set BS */
    865                         if(0 == (start_pos_y & 1))
    866                         {
    867                             WORD32 shift;
    868                             shift = start_pos_x * 2;
    869                             /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    870                              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    871                              *  and deblocking is done on 8x8 grid
    872                              */
    873                             if(6 != log2_ctb_size)
    874                                 shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
    875                             pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    876                         }
    877                     }
    878                     else
    879                     {
    880                         u4_bs = ihevcd_pu_boundary_strength(ps_pu, ps_ngbr_pu);
    881                         if(u4_bs)
    882                         {
    883                             u4_bs = DUP_LSB_01(min_pu_wd);
    884 
    885                             /* Only if the current edge falls on 8 pixel grid set BS */
    886                             if(0 == (start_pos_y & 1))
    887                             {
    888                                 WORD32 shift;
    889                                 shift = start_pos_x * 2;
    890                                 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    891                                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    892                                  *  and deblocking is done on 8x8 grid
    893                                  */
    894                                 if(6 != log2_ctb_size)
    895                                     shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
    896                                 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    897                             }
    898                         }
    899                     }
    900 
    901                     pu_wd -= min_pu_wd;
    902                     start_pos_x += min_pu_wd;
    903                 }while(pu_wd > 0);
    904 
    905                 /* Reinitialising since the values are updated in the previous loop */
    906                 pu_wd = ps_pu->b4_wd + 1;
    907                 start_pos_x = ps_pu->b4_pos_x;
    908             }
    909         }
    910     }
    911 
    912     {
    913         /* If left neighbor is not available, then set BS for entire first column to zero */
    914         UWORD32 ctb_addr;
    915         WORD32 slice_idx, left_slice_idx = -1, top_slice_idx = -1;
    916 
    917         if(!ps_pps->i1_tiles_enabled_flag)
    918         {
    919             if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x) ||
    920                             (0 == i1_loop_filter_across_slices_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
    921                             (0 == ps_bs_ctxt->i4_ctb_x))
    922             {
    923                 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
    924             }
    925         }
    926         else
    927         {
    928             if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x))
    929             {
    930                 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
    931             }
    932             else
    933             {
    934 
    935                 ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
    936                 slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
    937 
    938                 if(ps_bs_ctxt->i4_ctb_x)
    939                 {
    940                     ctb_addr = (ps_bs_ctxt->i4_ctb_x - 1) + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
    941                     left_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
    942                 }
    943 
    944                 if(!((ps_bs_ctxt->ps_slice_hdr->i1_dependent_slice_flag == 1) && (slice_idx == left_slice_idx)))
    945                 {
    946                     /* Removed reduntant checks */
    947                     if((0 == i1_loop_filter_across_slices_enabled_flag && (
    948                                     (0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
    949                                     ((0 == ps_bs_ctxt->i4_ctb_tile_x) && (slice_idx != left_slice_idx)))) || (0 == ps_bs_ctxt->i4_ctb_x))
    950                     {
    951                         pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
    952                     }
    953                 }
    954             }
    955         }
    956 
    957         ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
    958         slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
    959         if(ps_bs_ctxt->i4_ctb_y)
    960         {
    961             ctb_addr = (ps_bs_ctxt->i4_ctb_x) + ((ps_bs_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb);
    962             top_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
    963         }
    964         /* If top neighbor is not available, then set BS for entire first row to zero */
    965         /* Removed reduntant checks */
    966         if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_y)
    967                         || (0 == i1_loop_filter_across_slices_enabled_flag && ((slice_idx != top_slice_idx)))
    968                         || (0 == ps_bs_ctxt->i4_ctb_y))
    969         {
    970             pu4_horz_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
    971         }
    972     }
    973 
    974     /**
    975      *  Set BS of bottom and right frame boundaries to zero if it is an incomplete CTB
    976      *   (They might have set to  non zero values because of CBF of the current CTB)*/
    977     {
    978         WORD32 num_rows_remaining = (ps_sps->i2_pic_height_in_luma_samples - (ps_bs_ctxt->i4_ctb_y << log2_ctb_size)) >> 3;
    979         WORD32 num_cols_remaining = (ps_sps->i2_pic_width_in_luma_samples - (ps_bs_ctxt->i4_ctb_x << log2_ctb_size)) >> 3;
    980         if(num_rows_remaining < (ctb_size >> 3))
    981         {
    982             /* WORD32 offset = (((num_rows_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
    983              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    984              *  and deblocking is done on 8x8 grid
    985              */
    986             WORD32 offset;
    987             offset = (num_rows_remaining >> (6 - log2_ctb_size)) << 2;
    988             if(6 != log2_ctb_size)
    989                 offset += (num_rows_remaining & 1) << (log2_ctb_size - 4);
    990 
    991             memset(((UWORD8 *)pu4_horz_bs) + offset, 0, 1 << (log2_ctb_size - 4));
    992         }
    993 
    994         if(num_cols_remaining < (ctb_size >> 3))
    995         {
    996             /* WORD32 offset = (((num_cols_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
    997              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    998              *  and deblocking is done on 8x8 grid
    999              */
   1000 
   1001             WORD32 offset;
   1002             offset = (num_cols_remaining >> (6 - log2_ctb_size)) << 2;
   1003             if(6 != log2_ctb_size)
   1004                 offset += (num_cols_remaining & 1) << (log2_ctb_size - 4);
   1005 
   1006             memset(((UWORD8 *)pu4_vert_bs) + offset, 0, 1 << (log2_ctb_size - 4));
   1007         }
   1008     }
   1009     return 0;
   1010 }
   1011