Home | History | Annotate | Download | only in decoder
      1 /******************************************************************************
      2 *
      3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 *
      5 * Licensed under the Apache License, Version 2.0 (the "License");
      6 * you may not use this file except in compliance with the License.
      7 * You may obtain a copy of the License at:
      8 *
      9 * http://www.apache.org/licenses/LICENSE-2.0
     10 *
     11 * Unless required by applicable law or agreed to in writing, software
     12 * distributed under the License is distributed on an "AS IS" BASIS,
     13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 * See the License for the specific language governing permissions and
     15 * limitations under the License.
     16 *
     17 ******************************************************************************/
     18 /**
     19  *******************************************************************************
     20  * @file
     21  *  ihevc_boundary_strength.c
     22  *
     23  * @brief
     24  *  Contains functions for computing boundary strength
     25  *
     26  * @author
     27  *  Harish
     28  *
     29  * @par List of Functions:
     30  *
     31  * @remarks
     32  *  None
     33  *
     34  *******************************************************************************
     35  */
     36 /*****************************************************************************/
     37 /* File Includes                                                             */
     38 /*****************************************************************************/
     39 #include <stdio.h>
     40 #include <stddef.h>
     41 #include <stdlib.h>
     42 #include <string.h>
     43 
     44 #include "ihevc_typedefs.h"
     45 #include "iv.h"
     46 #include "ivd.h"
     47 #include "ihevcd_cxa.h"
     48 #include "ithread.h"
     49 
     50 #include "ihevc_defs.h"
     51 #include "ihevc_debug.h"
     52 #include "ihevc_defs.h"
     53 #include "ihevc_structs.h"
     54 #include "ihevc_macros.h"
     55 #include "ihevc_platform_macros.h"
     56 #include "ihevc_cabac_tables.h"
     57 
     58 #include "ihevc_error.h"
     59 #include "ihevc_common_tables.h"
     60 
     61 #include "ihevcd_trace.h"
     62 #include "ihevcd_defs.h"
     63 #include "ihevcd_function_selector.h"
     64 #include "ihevcd_structs.h"
     65 #include "ihevcd_error.h"
     66 #include "ihevcd_nal.h"
     67 #include "ihevcd_bitstream.h"
     68 #include "ihevcd_job_queue.h"
     69 #include "ihevcd_utils.h"
     70 #include "ihevcd_profile.h"
     71 
     72 /*****************************************************************************/
     73 /* Function Prototypes                                                       */
     74 /*****************************************************************************/
     75 
     76 
     77 #define SET_NGBHR_ALL_AVAIL(avail)          avail = 0x1F;
     78 
     79 #define SET_NGBHR_BOTLEFT_NOTAVAIL(avail)   avail &= ~0x10;
     80 #define SET_NGBHR_LEFT_NOTAVAIL(avail)      avail &= ~0x8;
     81 #define SET_NGBHR_TOPLEFT_NOTAVAIL(avail)   avail &= ~0x4;
     82 #define SET_NGBHR_TOP_NOTAVAIL(avail)       avail &= ~0x2;
     83 #define SET_NGBHR_TOPRIGHT_NOTAVAIL(avail)  avail &= ~0x1;
     84 
     85 WORD32 ihevcd_pu_boundary_strength(pu_t *ps_pu,
     86                                    pu_t *ps_ngbr_pu)
     87 {
     88     WORD32 i4_bs;
     89     UWORD32 l0_ref_pic_buf_id, l1_ref_pic_buf_id;
     90     UWORD32 ngbr_l0_ref_pic_buf_id, ngbr_l1_ref_pic_buf_id;
     91 
     92     WORD16 i2_mv_x0, i2_mv_y0, i2_mv_x1, i2_mv_y1;
     93     WORD16 i2_ngbr_mv_x0, i2_ngbr_mv_y0, i2_ngbr_mv_x1, i2_ngbr_mv_y1;
     94 
     95     WORD32 num_mv, ngbr_num_mv;
     96 
     97     num_mv = (PRED_BI == ps_pu->b2_pred_mode) ? 2 : 1;
     98     ngbr_num_mv = (PRED_BI == ps_ngbr_pu->b2_pred_mode) ? 2 : 1;
     99 
    100     l0_ref_pic_buf_id = ps_pu->mv.i1_l0_ref_pic_buf_id;
    101     l1_ref_pic_buf_id = ps_pu->mv.i1_l1_ref_pic_buf_id;
    102     ngbr_l0_ref_pic_buf_id = ps_ngbr_pu->mv.i1_l0_ref_pic_buf_id;
    103     ngbr_l1_ref_pic_buf_id = ps_ngbr_pu->mv.i1_l1_ref_pic_buf_id;
    104 
    105 
    106     i2_mv_x0 = ps_pu->mv.s_l0_mv.i2_mvx;
    107     i2_mv_y0 = ps_pu->mv.s_l0_mv.i2_mvy;
    108     i2_mv_x1 = ps_pu->mv.s_l1_mv.i2_mvx;
    109     i2_mv_y1 = ps_pu->mv.s_l1_mv.i2_mvy;
    110 
    111     i2_ngbr_mv_x0 = ps_ngbr_pu->mv.s_l0_mv.i2_mvx;
    112     i2_ngbr_mv_y0 = ps_ngbr_pu->mv.s_l0_mv.i2_mvy;
    113     i2_ngbr_mv_x1 = ps_ngbr_pu->mv.s_l1_mv.i2_mvx;
    114     i2_ngbr_mv_y1 = ps_ngbr_pu->mv.s_l1_mv.i2_mvy;
    115 
    116 
    117     /* If two motion vectors are used */
    118     if((2 == num_mv) &&
    119             (2 == ngbr_num_mv))
    120     {
    121         if((l0_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id && l1_ref_pic_buf_id == ngbr_l1_ref_pic_buf_id) ||
    122                 (l0_ref_pic_buf_id == ngbr_l1_ref_pic_buf_id && l1_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id))
    123         {
    124             if(l0_ref_pic_buf_id != l1_ref_pic_buf_id) /* Different L0 and L1 */
    125             {
    126                 if(l0_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id)
    127                 {
    128                     i4_bs = (ABS(i2_mv_x0 - i2_ngbr_mv_x0) < 4) &&
    129                             (ABS(i2_mv_y0 - i2_ngbr_mv_y0) < 4) &&
    130                             (ABS(i2_mv_x1 - i2_ngbr_mv_x1) < 4) &&
    131                             (ABS(i2_mv_y1 - i2_ngbr_mv_y1) < 4) ? 0 : 1;
    132                 }
    133                 else
    134                 {
    135                     i4_bs = (ABS(i2_mv_x0 - i2_ngbr_mv_x1) < 4) &&
    136                             (ABS(i2_mv_y0 - i2_ngbr_mv_y1) < 4) &&
    137                             (ABS(i2_mv_x1 - i2_ngbr_mv_x0) < 4) &&
    138                             (ABS(i2_mv_y1 - i2_ngbr_mv_y0) < 4) ? 0 : 1;
    139                 }
    140             }
    141             else /* Same L0 and L1 */
    142             {
    143                 i4_bs = ((ABS(i2_mv_x0 - i2_ngbr_mv_x0) >= 4) ||
    144                          (ABS(i2_mv_y0 - i2_ngbr_mv_y0) >= 4) ||
    145                          (ABS(i2_mv_x1 - i2_ngbr_mv_x1) >= 4) ||
    146                          (ABS(i2_mv_y1 - i2_ngbr_mv_y1) >= 4)) &&
    147                                 ((ABS(i2_mv_x0 - i2_ngbr_mv_x1) >= 4) ||
    148                                  (ABS(i2_mv_y0 - i2_ngbr_mv_y1) >= 4) ||
    149                                  (ABS(i2_mv_x1 - i2_ngbr_mv_x0) >= 4) ||
    150                                  (ABS(i2_mv_y1 - i2_ngbr_mv_y0) >= 4)) ? 1 : 0;
    151             }
    152         }
    153         else /* If the reference pictures used are different */
    154         {
    155             i4_bs = 1;
    156         }
    157     }
    158 
    159     /* If one motion vector is used in both PUs */
    160     else if((1 == num_mv) &&
    161             (1 == ngbr_num_mv))
    162     {
    163         WORD16 i2_mv_x, i2_mv_y;
    164         WORD16 i2_ngbr_mv_x, i2_ngbr_mv_y;
    165         UWORD32 ref_pic_buf_id, ngbr_ref_pic_buf_id;
    166 
    167         if(PRED_L0 == ps_pu->b2_pred_mode)
    168         {
    169             i2_mv_x = i2_mv_x0;
    170             i2_mv_y = i2_mv_y0;
    171             ref_pic_buf_id = l0_ref_pic_buf_id;
    172         }
    173         else
    174         {
    175             i2_mv_x = i2_mv_x1;
    176             i2_mv_y = i2_mv_y1;
    177             ref_pic_buf_id = l1_ref_pic_buf_id;
    178         }
    179 
    180         if(PRED_L0 == ps_ngbr_pu->b2_pred_mode)
    181         {
    182             i2_ngbr_mv_x = i2_ngbr_mv_x0;
    183             i2_ngbr_mv_y = i2_ngbr_mv_y0;
    184             ngbr_ref_pic_buf_id = ngbr_l0_ref_pic_buf_id;
    185         }
    186         else
    187         {
    188             i2_ngbr_mv_x = i2_ngbr_mv_x1;
    189             i2_ngbr_mv_y = i2_ngbr_mv_y1;
    190             ngbr_ref_pic_buf_id = ngbr_l1_ref_pic_buf_id;
    191         }
    192 
    193         i4_bs = (ref_pic_buf_id == ngbr_ref_pic_buf_id) &&
    194                 (ABS(i2_mv_x - i2_ngbr_mv_x) < 4)  &&
    195                 (ABS(i2_mv_y - i2_ngbr_mv_y) < 4) ? 0 : 1;
    196     }
    197 
    198     /* If the no. of motion vectors is not the same */
    199     else
    200     {
    201         i4_bs = 1;
    202     }
    203 
    204 
    205     return i4_bs;
    206 }
    207 
    208 /* QP is also populated in the same function */
    209 WORD32 ihevcd_ctb_boundary_strength_islice(bs_ctxt_t *ps_bs_ctxt)
    210 {
    211     pps_t *ps_pps;
    212     sps_t *ps_sps;
    213     tu_t *ps_tu;
    214     UWORD32 *pu4_vert_bs;
    215     UWORD32 *pu4_horz_bs;
    216     WORD32 bs_strd;
    217     WORD32 vert_bs0_tmp;
    218     WORD32 horz_bs0_tmp;
    219     UWORD8 *pu1_qp;
    220     WORD32 qp_strd;
    221     UWORD32 u4_qp_const_in_ctb;
    222     WORD32 ctb_indx;
    223     WORD32 i4_tu_cnt;
    224     WORD32 log2_ctb_size;
    225     WORD32 ctb_size;
    226 
    227     WORD8 i1_loop_filter_across_tiles_enabled_flag;
    228     WORD8 i1_loop_filter_across_slices_enabled_flag;
    229 
    230     WORD32 i;
    231 
    232     PROFILE_DISABLE_BOUNDARY_STRENGTH();
    233 
    234     ps_pps = ps_bs_ctxt->ps_pps;
    235     ps_sps = ps_bs_ctxt->ps_sps;
    236     i1_loop_filter_across_tiles_enabled_flag = ps_pps->i1_loop_filter_across_tiles_enabled_flag;
    237     i1_loop_filter_across_slices_enabled_flag = ps_bs_ctxt->ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag;
    238     i4_tu_cnt = ps_bs_ctxt->i4_ctb_tu_cnt;
    239 
    240     log2_ctb_size = ps_sps->i1_log2_ctb_size;
    241     ctb_size = (1 << log2_ctb_size);
    242 
    243     /* strides are in units of number of bytes */
    244     /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
    245     bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
    246 
    247     pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_vert_bs +
    248                     (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
    249                     ps_bs_ctxt->i4_ctb_y * bs_strd);
    250     pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_horz_bs +
    251                     (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
    252                     ps_bs_ctxt->i4_ctb_y * bs_strd);
    253 
    254     /* ctb_size/8 elements per CTB */
    255     qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
    256     pu1_qp = ps_bs_ctxt->pu1_pic_qp + ((ps_bs_ctxt->i4_ctb_x + ps_bs_ctxt->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
    257 
    258     ctb_indx = ps_bs_ctxt->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_bs_ctxt->i4_ctb_y;
    259     u4_qp_const_in_ctb = ps_bs_ctxt->pu1_pic_qp_const_in_ctb[ctb_indx >> 3] & (1 << (ctb_indx & 7));
    260 
    261     vert_bs0_tmp = pu4_vert_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
    262     horz_bs0_tmp = pu4_horz_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
    263 
    264     /* ctb_size/8 is the number of edges per CTB
    265      * ctb_size/4 is the number of BS values needed per edge
    266      * divided by 8 for the number of bytes
    267      * 2 is the number of bits needed for each BS value */
    268 /*
    269     memset(pu4_vert_bs, 0, (ctb_size / 8 + 1) * (ctb_size / 4) / 8 * 2 );
    270     memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) / 8 * 2 );
    271 */
    272     memset(pu4_vert_bs, 0, (1 << (2 * log2_ctb_size - 7)) + ((ctb_size >> 5) << 1));
    273     memset(pu4_horz_bs, 0, (1 << (2 * log2_ctb_size - 7)));
    274 
    275     /* pu4_vert_bs[0] has information about the left CTB which is not required when ctb_x = 0 */
    276     if(0 != ps_bs_ctxt->i4_ctb_x)
    277     {
    278         pu4_vert_bs[0] |= vert_bs0_tmp;
    279     }
    280 
    281     /* pu4_horz_bs[0] has information about the top CTB which is not required when ctb_y = 0 */
    282     if(0 != ps_bs_ctxt->i4_ctb_y)
    283     {
    284         pu4_horz_bs[0] |= horz_bs0_tmp;
    285     }
    286 
    287     ps_tu = ps_bs_ctxt->ps_tu;
    288 
    289     /* Populating the QP array - if const_qp_in_ctb flag is one, set only the first element */
    290     if(u4_qp_const_in_ctb)
    291         pu1_qp[0] = ps_tu->b7_qp;
    292 
    293     for(i = 0; i < i4_tu_cnt; i++)
    294     {
    295         WORD32 start_pos_x;
    296         WORD32 start_pos_y;
    297         WORD32 tu_size;
    298 
    299 
    300         UWORD32 u4_bs;
    301         ps_tu = ps_bs_ctxt->ps_tu + i;
    302 
    303         /* start_pos_x and start_pos_y are in units of min TU size (4x4) */
    304         start_pos_x = ps_tu->b4_pos_x;
    305         start_pos_y = ps_tu->b4_pos_y;
    306 
    307         tu_size = 1 << (ps_tu->b3_size + 2);
    308         tu_size >>= 2; /* TU size divided by 4 */
    309 
    310         u4_bs = DUP_LSB_10(tu_size);
    311 
    312         /* Only if the current edge falls on 8 pixel grid set BS */
    313         if(0 == (start_pos_x & 1))
    314         {
    315             WORD32 shift;
    316             shift = start_pos_y * 2;
    317             /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    318              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    319              *  and deblocking is done on 8x8 grid
    320              */
    321             if(6 != log2_ctb_size)
    322                 shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
    323             pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    324         }
    325         /* Only if the current edge falls on 8 pixel grid set BS */
    326         if(0 == (start_pos_y & 1))
    327         {
    328             WORD32 shift;
    329             shift = start_pos_x * 2;
    330             /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    331              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    332              *  and deblocking is done on 8x8 grid
    333              */
    334             if(6 != log2_ctb_size)
    335                 shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
    336             pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    337         }
    338 
    339         /* Populating the QP array */
    340         if(0 == u4_qp_const_in_ctb)
    341         {
    342             if(0 == (start_pos_x & 1) && 0 == (start_pos_y & 1))
    343             {
    344                 WORD32 row, col;
    345                 for(row = start_pos_y; row < start_pos_y + tu_size; row += 2)
    346                 {
    347                     for(col = start_pos_x; col < start_pos_x + tu_size; col += 2)
    348                     {
    349                         pu1_qp[(row >> 1) * qp_strd + (col >> 1)] = ps_tu->b7_qp;
    350                     }
    351                 }
    352             }
    353         }
    354 
    355     }
    356     {
    357         /*Determine if the slice is dependent, and is its left neighbor belongs to the same slice, in a different tile*/
    358         UWORD32 ctb_addr;
    359         WORD32 slice_idx, left_slice_idx = -1, top_slice_idx = -1;
    360         /* If left neighbor is not available, then set BS for entire first column to zero */
    361         if(!ps_pps->i1_tiles_enabled_flag)
    362         {
    363             if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x) ||
    364                             (0 == i1_loop_filter_across_slices_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
    365                             (0 == ps_bs_ctxt->i4_ctb_x))
    366             {
    367                 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
    368             }
    369         }
    370         else
    371         {
    372             //If across-tiles is disabled
    373             if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x))
    374             {
    375                 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
    376             }
    377             else
    378             {
    379                 ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
    380                 slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
    381                 if(ps_bs_ctxt->i4_ctb_x)
    382                 {
    383                     ctb_addr = (ps_bs_ctxt->i4_ctb_x - 1) + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
    384                     left_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
    385                 }
    386                 /*If the 1st slice in a new tile is a dependent slice*/
    387                 if(!((ps_bs_ctxt->ps_slice_hdr->i1_dependent_slice_flag == 1) && (slice_idx == left_slice_idx)))
    388                 {
    389                     if((0 == i1_loop_filter_across_slices_enabled_flag && (
    390                                     (0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) || (0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_tile_x) ||
    391                                     ((0 == ps_bs_ctxt->i4_ctb_tile_x) && (slice_idx != left_slice_idx)))) ||
    392                                     (0 == ps_bs_ctxt->i4_ctb_x))
    393                     {
    394                         pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
    395                     }
    396                 }
    397             }
    398         }
    399 
    400         ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
    401         slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
    402         if(ps_bs_ctxt->i4_ctb_y)
    403         {
    404             ctb_addr = (ps_bs_ctxt->i4_ctb_x) + ((ps_bs_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb);
    405             top_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
    406         }
    407 
    408         /* If top neighbor is not available, then set BS for entire first row to zero */
    409         if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_y)
    410                         || (0 == i1_loop_filter_across_slices_enabled_flag && ((0 == ps_bs_ctxt->i4_ctb_slice_y) || (slice_idx != top_slice_idx)))
    411                         || (0 == ps_bs_ctxt->i4_ctb_y))
    412         {
    413             pu4_horz_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
    414         }
    415     }
    416 
    417     /**
    418      *  Set BS of bottom and right frame boundaries to zero if it is an incomplete CTB
    419      *   (They might have been set to  non zero values because of CBF of the current CTB)
    420      *   This block might not be needed for I slices*/
    421     {
    422         WORD32 num_rows_remaining = (ps_sps->i2_pic_height_in_luma_samples - (ps_bs_ctxt->i4_ctb_y << log2_ctb_size)) >> 3;
    423         WORD32 num_cols_remaining = (ps_sps->i2_pic_width_in_luma_samples - (ps_bs_ctxt->i4_ctb_x << log2_ctb_size)) >> 3;
    424         if(num_rows_remaining < (ctb_size >> 3))
    425         {
    426             /* WORD32 offset = (((num_rows_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
    427              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    428              *  and deblocking is done on 8x8 grid
    429              */
    430             WORD32 offset;
    431             offset = (num_rows_remaining >> (6 - log2_ctb_size)) << 2;
    432             if(6 != log2_ctb_size)
    433                 offset += (num_rows_remaining & 1) << (log2_ctb_size - 4);
    434 
    435             memset(((UWORD8 *)pu4_horz_bs) + offset, 0, 1 << (log2_ctb_size - 4));
    436         }
    437 
    438         if(num_cols_remaining < (ctb_size >> 3))
    439         {
    440             /* WORD32 offset = (((num_cols_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
    441              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    442              *  and deblocking is done on 8x8 grid
    443              */
    444 
    445             WORD32 offset;
    446             offset = (num_cols_remaining >> (6 - log2_ctb_size)) << 2;
    447             if(6 != log2_ctb_size)
    448                 offset += (num_cols_remaining & 1) << (log2_ctb_size - 4);
    449 
    450             memset(((UWORD8 *)pu4_vert_bs) + offset, 0, 1 << (log2_ctb_size - 4));
    451         }
    452     }
    453 
    454     return 0;
    455 }
    456 WORD32 ihevcd_ctb_boundary_strength_pbslice(bs_ctxt_t *ps_bs_ctxt)
    457 {
    458     sps_t *ps_sps;
    459     pps_t *ps_pps;
    460     WORD32 cur_ctb_idx, next_ctb_idx = 0;
    461     WORD32 i4_tu_cnt;
    462     WORD32 i4_pu_cnt;
    463     tu_t *ps_tu;
    464 
    465     UWORD32 *pu4_vert_bs;
    466     UWORD32 *pu4_horz_bs;
    467     WORD32 bs_strd;
    468     WORD32 vert_bs0_tmp;
    469     WORD32 horz_bs0_tmp;
    470     UWORD8 *pu1_qp;
    471     WORD32 qp_strd;
    472     UWORD32 u4_qp_const_in_ctb;
    473     WORD32 ctb_indx;
    474     WORD32 log2_ctb_size;
    475     WORD32 ctb_size;
    476 
    477     WORD32 i;
    478     WORD8 i1_loop_filter_across_tiles_enabled_flag;
    479     WORD8 i1_loop_filter_across_slices_enabled_flag;
    480 
    481     PROFILE_DISABLE_BOUNDARY_STRENGTH();
    482 
    483     ps_sps = ps_bs_ctxt->ps_sps;
    484     ps_pps = ps_bs_ctxt->ps_pps;
    485 
    486     log2_ctb_size = ps_sps->i1_log2_ctb_size;
    487     ctb_size = (1 << log2_ctb_size);
    488 
    489     /* strides are in units of number of bytes */
    490     /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
    491     bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
    492 
    493     pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_vert_bs +
    494                     (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
    495                     ps_bs_ctxt->i4_ctb_y * bs_strd);
    496     pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_horz_bs +
    497                     (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
    498                     ps_bs_ctxt->i4_ctb_y * bs_strd);
    499 
    500     vert_bs0_tmp = pu4_vert_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
    501     horz_bs0_tmp = pu4_horz_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
    502 
    503     ps_tu = ps_bs_ctxt->ps_tu;
    504 
    505     /* ctb_size/8 elements per CTB */
    506     qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
    507     pu1_qp = ps_bs_ctxt->pu1_pic_qp + ((ps_bs_ctxt->i4_ctb_x + ps_bs_ctxt->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
    508 
    509     ctb_indx = ps_bs_ctxt->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_bs_ctxt->i4_ctb_y;
    510     u4_qp_const_in_ctb = ps_bs_ctxt->pu1_pic_qp_const_in_ctb[ctb_indx >> 3] & (1 << (ctb_indx & 7));
    511 
    512     i1_loop_filter_across_tiles_enabled_flag = ps_pps->i1_loop_filter_across_tiles_enabled_flag;
    513     i1_loop_filter_across_slices_enabled_flag = ps_bs_ctxt->ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag;
    514 
    515     /* ctb_size/8 is the number of edges per CTB
    516      * ctb_size/4 is the number of BS values needed per edge
    517      * divided by 8 for the number of bytes
    518      * 2 is the number of bits needed for each BS value */
    519 /*
    520     memset(pu4_vert_bs, 0, (ctb_size / 8 + 1) * (ctb_size / 4) * 2 / 8 );
    521     memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) * 2 / 8 );
    522 */
    523     memset(pu4_vert_bs, 0, (1 << (2 * log2_ctb_size - 7)) + (ctb_size >> 4));
    524     memset(pu4_horz_bs, 0, (1 << (2 * log2_ctb_size - 7)));
    525 
    526     /* pu4_vert_bs[0] has information about the left CTB which is not required when ctb_x = 0 */
    527     if(0 != ps_bs_ctxt->i4_ctb_x)
    528     {
    529         pu4_vert_bs[0] |= vert_bs0_tmp;
    530     }
    531 
    532     /* pu4_horz_bs[0] has information about the top CTB which is not required when ctb_y = 0 */
    533     if(0 != ps_bs_ctxt->i4_ctb_y)
    534     {
    535         pu4_horz_bs[0] |= horz_bs0_tmp;
    536     }
    537     /* pu4_horz_bs[bs_strd / 4] corresponds to pu4_horz_bs[0] of the bottom CTB */
    538     *(UWORD32 *)((UWORD8 *)pu4_horz_bs + bs_strd) = 0;
    539 
    540     cur_ctb_idx = ps_bs_ctxt->i4_ctb_x
    541                     + ps_bs_ctxt->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
    542     next_ctb_idx = ps_bs_ctxt->i4_next_tu_ctb_cnt;
    543     if(1 == ps_bs_ctxt->ps_codec->i4_num_cores)
    544     {
    545         i4_tu_cnt = ps_bs_ctxt->pu4_pic_tu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_tu_idx[cur_ctb_idx % RESET_TU_BUF_NCTB];
    546     }
    547     else
    548     {
    549         i4_tu_cnt = ps_bs_ctxt->pu4_pic_tu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_tu_idx[cur_ctb_idx];
    550     }
    551 
    552     ps_tu = ps_bs_ctxt->ps_tu;
    553     if(u4_qp_const_in_ctb)
    554         pu1_qp[0] = ps_tu->b7_qp;
    555 
    556     /* For all TUs in the CTB For left and top edges, check if there are coded coefficients on either sides of the edge */
    557     for(i = 0; i < i4_tu_cnt; i++)
    558     {
    559         WORD32 start_pos_x;
    560         WORD32 start_pos_y;
    561         WORD32 end_pos_x;
    562         WORD32 end_pos_y;
    563         WORD32 tu_size;
    564         UWORD32 u4_bs;
    565         WORD32 intra_flag;
    566         UWORD8 *pu1_pic_intra_flag;
    567 
    568         ps_tu = ps_bs_ctxt->ps_tu + i;
    569 
    570         start_pos_x = ps_tu->b4_pos_x;
    571         start_pos_y = ps_tu->b4_pos_y;
    572 
    573         tu_size = 1 << (ps_tu->b3_size + 2);
    574         tu_size >>= 2;
    575 
    576         end_pos_x = start_pos_x + tu_size;
    577         end_pos_y = start_pos_y + tu_size;
    578 
    579         {
    580             WORD32 tu_abs_x = (ps_bs_ctxt->i4_ctb_x << log2_ctb_size) + (start_pos_x << 2);
    581             WORD32 tu_abs_y = (ps_bs_ctxt->i4_ctb_y << log2_ctb_size) + (start_pos_y << 2);
    582 
    583             WORD32 numbytes_row =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
    584 
    585             pu1_pic_intra_flag = ps_bs_ctxt->ps_codec->pu1_pic_intra_flag;
    586             pu1_pic_intra_flag += (tu_abs_y >> 3) * numbytes_row;
    587             pu1_pic_intra_flag += (tu_abs_x >> 6);
    588 
    589             intra_flag = *pu1_pic_intra_flag;
    590             intra_flag &= (1 << ((tu_abs_x >> 3) % 8));
    591         }
    592         if(intra_flag)
    593         {
    594             u4_bs = DUP_LSB_10(tu_size);
    595 
    596             /* Only if the current edge falls on 8 pixel grid set BS */
    597             if(0 == (start_pos_x & 1))
    598             {
    599                 WORD32 shift;
    600                 shift = start_pos_y * 2;
    601                 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    602                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    603                  *  and deblocking is done on 8x8 grid
    604                  */
    605                 if(6 != log2_ctb_size)
    606                     shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
    607                 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    608             }
    609             /* Only if the current edge falls on 8 pixel grid set BS */
    610             if(0 == (start_pos_y & 1))
    611             {
    612                 WORD32 shift;
    613                 shift = start_pos_x * 2;
    614                 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    615                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    616                  *  and deblocking is done on 8x8 grid
    617                  */
    618                 if(6 != log2_ctb_size)
    619                     shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
    620                 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    621             }
    622         }
    623 
    624 
    625         /* If the current TU is coded then set both top edge and left edge BS to 1 and go to next TU */
    626         if(ps_tu->b1_y_cbf)
    627         {
    628             u4_bs = DUP_LSB_01(tu_size);
    629 
    630             /* Only if the current edge falls on 8 pixel grid set BS */
    631             if(0 == (start_pos_x & 1))
    632             {
    633                 WORD32 shift;
    634                 shift = start_pos_y * 2;
    635                 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    636                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    637                  *  and deblocking is done on 8x8 grid
    638                  */
    639                 if(6 != log2_ctb_size)
    640                     shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
    641                 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    642             }
    643             /* Only if the current edge falls on 8 pixel grid set BS */
    644             if(0 == (start_pos_y & 1))
    645             {
    646                 WORD32 shift;
    647                 shift = start_pos_x * 2;
    648                 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    649                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    650                  *  and deblocking is done on 8x8 grid
    651                  */
    652                 if(6 != log2_ctb_size)
    653                     shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
    654                 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    655             }
    656             /* Only if the current edge falls on 8 pixel grid set BS */
    657             if(0 == (end_pos_x & 1))
    658             {
    659                 if(!(ctb_size / 8 == (end_pos_x >> 1) && ps_bs_ctxt->i4_ctb_x == ps_sps->i2_pic_wd_in_ctb - 1))
    660                 {
    661                     WORD32 shift;
    662                     shift = start_pos_y * 2;
    663                     shift += (((end_pos_x >> 1) & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1));
    664                     pu4_vert_bs[end_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    665                 }
    666             }
    667             /* Only if the current edge falls on 8 pixel grid set BS */
    668             if(0 == (end_pos_y & 1))
    669             {
    670                 /* If end_pos_y corresponds to the bottom of the CTB, write to pu4_horz_bs[0] of the bottom CTB */
    671                 if(ctb_size / 8 == (end_pos_y >> 1))
    672                 {
    673                     *(UWORD32 *)((UWORD8 *)pu4_horz_bs + bs_strd) |= (u4_bs << (start_pos_x * 2));
    674                 }
    675                 else
    676                 {
    677                     WORD32 shift;
    678                     shift = start_pos_x * 2;
    679                     shift += (((end_pos_y >> 1) & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1));
    680                     pu4_horz_bs[end_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    681                 }
    682             }
    683         }
    684 
    685         if(0 == u4_qp_const_in_ctb)
    686         {
    687             if(0 == (start_pos_x & 1) && 0 == (start_pos_y & 1))
    688             {
    689                 WORD32 row, col;
    690                 for(row = start_pos_y; row < start_pos_y + tu_size; row += 2)
    691                 {
    692                     for(col = start_pos_x; col < start_pos_x + tu_size; col += 2)
    693                     {
    694                         pu1_qp[(row >> 1) * qp_strd + (col >> 1)] = ps_tu->b7_qp;
    695                     }
    696                 }
    697             }
    698         }
    699     }
    700 
    701     /* For all PUs in the CTB,
    702     For left and top edges, compute BS */
    703 
    704     cur_ctb_idx = ps_bs_ctxt->i4_ctb_x
    705                     + ps_bs_ctxt->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
    706 
    707     {
    708         WORD32 next_ctb_idx;
    709         next_ctb_idx = ps_bs_ctxt->i4_next_pu_ctb_cnt;
    710         i4_pu_cnt = ps_bs_ctxt->pu4_pic_pu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_pu_idx[cur_ctb_idx];
    711     }
    712 
    713     for(i = 0; i < i4_pu_cnt; i++)
    714     {
    715         WORD32 start_pos_x;
    716         WORD32 start_pos_y;
    717         WORD32 end_pos_x;
    718         WORD32 end_pos_y;
    719         WORD32 pu_wd, pu_ht;
    720         UWORD32 u4_bs;
    721         pu_t *ps_pu = ps_bs_ctxt->ps_pu + i;
    722         pu_t *ps_ngbr_pu;
    723         UWORD32 u4_ngbr_pu_indx;
    724 
    725         start_pos_x = ps_pu->b4_pos_x;
    726         start_pos_y = ps_pu->b4_pos_y;
    727 
    728         pu_wd = (ps_pu->b4_wd + 1);
    729         pu_ht = (ps_pu->b4_ht + 1);
    730 
    731         end_pos_x = start_pos_x + pu_wd;
    732         end_pos_y = start_pos_y + pu_ht;
    733 
    734         /* If the current PU is intra, set Boundary strength as 2 for both top and left edge */
    735         /* Need not mask the BS to zero even if it was set to 1 already since BS 2 and 3 are assumed to be the same in leaf level functions */
    736         if(ps_pu->b1_intra_flag)
    737         {
    738             u4_bs = DUP_LSB_10(pu_ht);
    739 
    740             /* Only if the current edge falls on 8 pixel grid set BS */
    741             if(0 == (start_pos_x & 1))
    742             {
    743                 WORD32 shift;
    744                 shift = start_pos_y * 2;
    745                 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    746                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    747                  *  and deblocking is done on 8x8 grid
    748                  */
    749                 if(6 != log2_ctb_size)
    750                     shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
    751                 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    752             }
    753 
    754             u4_bs = DUP_LSB_10(pu_wd);
    755 
    756             /* Only if the current edge falls on 8 pixel grid set BS */
    757             if(0 == (start_pos_y & 1))
    758             {
    759                 WORD32 shift;
    760                 shift = start_pos_x * 2;
    761                 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    762                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    763                  *  and deblocking is done on 8x8 grid
    764                  */
    765                 if(6 != log2_ctb_size)
    766                     shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
    767                 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    768             }
    769         }
    770 
    771         else
    772         {
    773             /* Vertical edge */
    774             /* Process only if the edge is not a frame edge */
    775             if(0 != ps_bs_ctxt->i4_ctb_x + start_pos_x)
    776             {
    777                 do
    778                 {
    779                     WORD32 pu_ngbr_ht;
    780                     WORD32 min_pu_ht;
    781                     WORD32 ngbr_end_pos_y;
    782                     UWORD32 ngbr_pu_idx_strd;
    783                     ngbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2;
    784                     u4_ngbr_pu_indx = ps_bs_ctxt->pu4_pic_pu_idx_map[(start_pos_y + 1) * ngbr_pu_idx_strd + (start_pos_x)];
    785                     ps_ngbr_pu = ps_bs_ctxt->ps_pic_pu + u4_ngbr_pu_indx;
    786 
    787                     pu_ngbr_ht = ps_ngbr_pu->b4_ht + 1;
    788                     ngbr_end_pos_y = ps_ngbr_pu->b4_pos_y + pu_ngbr_ht;
    789 
    790                     min_pu_ht = MIN(ngbr_end_pos_y, end_pos_y) - start_pos_y;
    791 
    792                     if(ps_ngbr_pu->b1_intra_flag)
    793                     {
    794                         u4_bs = DUP_LSB_10(min_pu_ht);
    795 
    796                         /* Only if the current edge falls on 8 pixel grid set BS */
    797                         if(0 == (start_pos_x & 1))
    798                         {
    799                             WORD32 shift;
    800                             shift = start_pos_y * 2;
    801                             /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    802                              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    803                              *  and deblocking is done on 8x8 grid
    804                              */
    805                             if(6 != log2_ctb_size)
    806                                 shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
    807                             pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    808                         }
    809                     }
    810                     else
    811                     {
    812                         u4_bs = ihevcd_pu_boundary_strength(ps_pu, ps_ngbr_pu);
    813                         if(u4_bs)
    814                         {
    815                             u4_bs = DUP_LSB_01(min_pu_ht);
    816                             if(0 == (start_pos_x & 1))
    817                             {
    818                                 WORD32 shift;
    819                                 shift = start_pos_y * 2;
    820                                 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    821                                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    822                                  *  and deblocking is done on 8x8 grid
    823                                  */
    824                                 if(6 != log2_ctb_size)
    825                                     shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
    826                                 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    827                             }
    828                         }
    829                     }
    830 
    831                     pu_ht -= min_pu_ht;
    832                     start_pos_y += min_pu_ht;
    833                 }while(pu_ht > 0);
    834 
    835                 /* Reinitialising since the values are updated in the previous loop */
    836                 pu_ht = ps_pu->b4_ht + 1;
    837                 start_pos_y = ps_pu->b4_pos_y;
    838             }
    839 
    840             /* Horizontal edge */
    841             /* Process only if the edge is not a frame edge */
    842             if(0 != ps_bs_ctxt->i4_ctb_y + start_pos_y)
    843             {
    844                 do
    845                 {
    846                     WORD32 pu_ngbr_wd;
    847                     WORD32 min_pu_wd;
    848                     WORD32 ngbr_end_pos_x;
    849                     UWORD32 ngbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2;
    850                     u4_ngbr_pu_indx = ps_bs_ctxt->pu4_pic_pu_idx_map[(start_pos_y)*ngbr_pu_idx_strd + (start_pos_x + 1)];
    851                     ps_ngbr_pu = ps_bs_ctxt->ps_pic_pu + u4_ngbr_pu_indx;
    852 
    853                     pu_ngbr_wd = ps_ngbr_pu->b4_wd + 1;
    854                     ngbr_end_pos_x = ps_ngbr_pu->b4_pos_x + pu_ngbr_wd;
    855 
    856                     min_pu_wd = MIN(ngbr_end_pos_x, end_pos_x) - start_pos_x;
    857 
    858                     if(ps_ngbr_pu->b1_intra_flag)
    859                     {
    860                         u4_bs = DUP_LSB_10(min_pu_wd);
    861 
    862                         /* Only if the current edge falls on 8 pixel grid set BS */
    863                         if(0 == (start_pos_y & 1))
    864                         {
    865                             WORD32 shift;
    866                             shift = start_pos_x * 2;
    867                             /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    868                              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    869                              *  and deblocking is done on 8x8 grid
    870                              */
    871                             if(6 != log2_ctb_size)
    872                                 shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
    873                             pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    874                         }
    875                     }
    876                     else
    877                     {
    878                         u4_bs = ihevcd_pu_boundary_strength(ps_pu, ps_ngbr_pu);
    879                         if(u4_bs)
    880                         {
    881                             u4_bs = DUP_LSB_01(min_pu_wd);
    882 
    883                             /* Only if the current edge falls on 8 pixel grid set BS */
    884                             if(0 == (start_pos_y & 1))
    885                             {
    886                                 WORD32 shift;
    887                                 shift = start_pos_x * 2;
    888                                 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
    889                                  *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    890                                  *  and deblocking is done on 8x8 grid
    891                                  */
    892                                 if(6 != log2_ctb_size)
    893                                     shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
    894                                 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
    895                             }
    896                         }
    897                     }
    898 
    899                     pu_wd -= min_pu_wd;
    900                     start_pos_x += min_pu_wd;
    901                 }while(pu_wd > 0);
    902 
    903                 /* Reinitialising since the values are updated in the previous loop */
    904                 pu_wd = ps_pu->b4_wd + 1;
    905                 start_pos_x = ps_pu->b4_pos_x;
    906             }
    907         }
    908     }
    909 
    910     {
    911         /* If left neighbor is not available, then set BS for entire first column to zero */
    912         UWORD32 ctb_addr;
    913         WORD32 slice_idx, left_slice_idx = -1, top_slice_idx = -1;
    914 
    915         if(!ps_pps->i1_tiles_enabled_flag)
    916         {
    917             if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x) ||
    918                             (0 == i1_loop_filter_across_slices_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
    919                             (0 == ps_bs_ctxt->i4_ctb_x))
    920             {
    921                 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
    922             }
    923         }
    924         else
    925         {
    926             if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x))
    927             {
    928                 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
    929             }
    930             else
    931             {
    932 
    933                 ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
    934                 slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
    935 
    936                 if(ps_bs_ctxt->i4_ctb_x)
    937                 {
    938                     ctb_addr = (ps_bs_ctxt->i4_ctb_x - 1) + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
    939                     left_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
    940                 }
    941 
    942                 if(!((ps_bs_ctxt->ps_slice_hdr->i1_dependent_slice_flag == 1) && (slice_idx == left_slice_idx)))
    943                 {
    944                     if((0 == i1_loop_filter_across_slices_enabled_flag && (
    945                                     (0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) || (0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_tile_x)
    946                                     || ((0 == ps_bs_ctxt->i4_ctb_tile_x) && (slice_idx != left_slice_idx)))) || (0 == ps_bs_ctxt->i4_ctb_x))
    947                     {
    948                         pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
    949                     }
    950                 }
    951             }
    952         }
    953 
    954         ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
    955         slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
    956         if(ps_bs_ctxt->i4_ctb_y)
    957         {
    958             ctb_addr = (ps_bs_ctxt->i4_ctb_x) + ((ps_bs_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb);
    959             top_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
    960         }
    961         /* If top neighbor is not available, then set BS for entire first row to zero */
    962         if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_y)
    963                         || (0 == i1_loop_filter_across_slices_enabled_flag && ((0 == ps_bs_ctxt->i4_ctb_slice_y) || (slice_idx != top_slice_idx)))
    964                         || (0 == ps_bs_ctxt->i4_ctb_y))
    965         {
    966             pu4_horz_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
    967         }
    968     }
    969 
    970     /**
    971      *  Set BS of bottom and right frame boundaries to zero if it is an incomplete CTB
    972      *   (They might have set to  non zero values because of CBF of the current CTB)*/
    973     {
    974         WORD32 num_rows_remaining = (ps_sps->i2_pic_height_in_luma_samples - (ps_bs_ctxt->i4_ctb_y << log2_ctb_size)) >> 3;
    975         WORD32 num_cols_remaining = (ps_sps->i2_pic_width_in_luma_samples - (ps_bs_ctxt->i4_ctb_x << log2_ctb_size)) >> 3;
    976         if(num_rows_remaining < (ctb_size >> 3))
    977         {
    978             /* WORD32 offset = (((num_rows_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
    979              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    980              *  and deblocking is done on 8x8 grid
    981              */
    982             WORD32 offset;
    983             offset = (num_rows_remaining >> (6 - log2_ctb_size)) << 2;
    984             if(6 != log2_ctb_size)
    985                 offset += (num_rows_remaining & 1) << (log2_ctb_size - 4);
    986 
    987             memset(((UWORD8 *)pu4_horz_bs) + offset, 0, 1 << (log2_ctb_size - 4));
    988         }
    989 
    990         if(num_cols_remaining < (ctb_size >> 3))
    991         {
    992             /* WORD32 offset = (((num_cols_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
    993              *  will reduce to the following assuming ctb size is one of 16, 32 and 64
    994              *  and deblocking is done on 8x8 grid
    995              */
    996 
    997             WORD32 offset;
    998             offset = (num_cols_remaining >> (6 - log2_ctb_size)) << 2;
    999             if(6 != log2_ctb_size)
   1000                 offset += (num_cols_remaining & 1) << (log2_ctb_size - 4);
   1001 
   1002             memset(((UWORD8 *)pu4_vert_bs) + offset, 0, 1 << (log2_ctb_size - 4));
   1003         }
   1004     }
   1005     return 0;
   1006 }
   1007