Home | History | Annotate | Download | only in decoder
      1 /******************************************************************************
      2 *
      3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 *
      5 * Licensed under the Apache License, Version 2.0 (the "License");
      6 * you may not use this file except in compliance with the License.
      7 * You may obtain a copy of the License at:
      8 *
      9 * http://www.apache.org/licenses/LICENSE-2.0
     10 *
     11 * Unless required by applicable law or agreed to in writing, software
     12 * distributed under the License is distributed on an "AS IS" BASIS,
     13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 * See the License for the specific language governing permissions and
     15 * limitations under the License.
     16 *
     17 ******************************************************************************/
     18 /**
     19 *******************************************************************************
     20 * @file
     21 *  ihevc_deblk.c
     22 *
     23 * @brief
     24 *  Contains definition for the ctb level deblk function
     25 *
     26 * @author
     27 *  Srinivas T
     28 *
     29 * @par List of Functions:
     30 *   - ihevc_deblk()
     31 *
     32 * @remarks
     33 *  None
     34 *
     35 *******************************************************************************
     36 */
     37 
     38 #include <stdio.h>
     39 #include <stddef.h>
     40 #include <stdlib.h>
     41 #include <string.h>
     42 #include <assert.h>
     43 
     44 #include "ihevc_typedefs.h"
     45 #include "iv.h"
     46 #include "ivd.h"
     47 #include "ihevcd_cxa.h"
     48 #include "ithread.h"
     49 
     50 #include "ihevc_defs.h"
     51 #include "ihevc_debug.h"
     52 #include "ihevc_defs.h"
     53 #include "ihevc_structs.h"
     54 #include "ihevc_macros.h"
     55 #include "ihevc_platform_macros.h"
     56 #include "ihevc_cabac_tables.h"
     57 
     58 #include "ihevc_error.h"
     59 #include "ihevc_common_tables.h"
     60 
     61 #include "ihevcd_trace.h"
     62 #include "ihevcd_defs.h"
     63 #include "ihevcd_function_selector.h"
     64 #include "ihevcd_structs.h"
     65 #include "ihevcd_error.h"
     66 #include "ihevcd_nal.h"
     67 #include "ihevcd_bitstream.h"
     68 #include "ihevcd_job_queue.h"
     69 #include "ihevcd_utils.h"
     70 #include "ihevcd_debug.h"
     71 
     72 #include "ihevc_deblk.h"
     73 #include "ihevc_deblk_tables.h"
     74 #include "ihevcd_profile.h"
     75 /**
     76 *******************************************************************************
     77 *
     78 * @brief
     79 *     Deblock CTB level function.
     80 *
     81 * @par Description:
     82 *     For a given CTB, deblocking on both vertical and
     83 *     horizontal edges is done. Both the luma and chroma
     84 *     blocks are processed
     85 *
     86 * @param[in] ps_deblk
     87 *  Pointer to the deblock context
     88 *
     89 * @returns
     90 *
     91 * @remarks
     92 *  None
     93 *
     94 *******************************************************************************
     95 */
     96 
     97 void ihevcd_deblk_ctb(deblk_ctxt_t *ps_deblk,
     98                       WORD32 i4_is_last_ctb_x,
     99                       WORD32 i4_is_last_ctb_y)
    100 {
    101     WORD32 ctb_size;
    102     WORD32 log2_ctb_size;
    103     UWORD32 u4_bs;
    104     WORD32 bs_tz; /*Leading zeros in boundary strength*/
    105     WORD32 qp_p, qp_q;
    106 
    107     WORD32 filter_p, filter_q;
    108 
    109     UWORD8 *pu1_src;
    110     WORD32 qp_strd;
    111     UWORD32 *pu4_vert_bs, *pu4_horz_bs;
    112     UWORD32 *pu4_ctb_vert_bs, *pu4_ctb_horz_bs;
    113     WORD32 bs_strd;
    114     WORD32 src_strd;
    115     UWORD8 *pu1_qp;
    116     UWORD16 *pu2_ctb_no_loop_filter_flag;
    117     UWORD16 au2_ctb_no_loop_filter_flag[9];
    118 
    119     WORD32 col, row;
    120 
    121     /* Flag to indicate if QP is constant in CTB
    122      * 0 - top_left, 1 - top, 2 - left, 3 - current */
    123     UWORD32 u4_qp_const_in_ctb[4] = { 0, 0, 0, 0 };
    124     WORD32 ctb_indx;
    125     WORD32  chroma_yuv420sp_vu = ps_deblk->is_chroma_yuv420sp_vu;
    126     sps_t *ps_sps;
    127     pps_t *ps_pps;
    128     codec_t *ps_codec;
    129     slice_header_t *ps_slice_hdr;
    130 
    131     PROFILE_DISABLE_DEBLK();
    132 
    133     ps_sps = ps_deblk->ps_sps;
    134     ps_pps = ps_deblk->ps_pps;
    135     ps_codec = ps_deblk->ps_codec;
    136     ps_slice_hdr = ps_deblk->ps_slice_hdr;
    137 
    138     log2_ctb_size = ps_sps->i1_log2_ctb_size;
    139     ctb_size = (1 << ps_sps->i1_log2_ctb_size);
    140 
    141     /* strides are in units of number of bytes */
    142     /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
    143     bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
    144 
    145     pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_vert_bs +
    146                     (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) +
    147                     ps_deblk->i4_ctb_y * bs_strd);
    148     pu4_ctb_vert_bs = pu4_vert_bs;
    149 
    150     pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_horz_bs +
    151                     (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) +
    152                     ps_deblk->i4_ctb_y * bs_strd);
    153     pu4_ctb_horz_bs = pu4_horz_bs;
    154 
    155     qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
    156     pu1_qp = ps_deblk->s_bs_ctxt.pu1_pic_qp + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
    157 
    158     pu2_ctb_no_loop_filter_flag = ps_deblk->au2_ctb_no_loop_filter_flag;
    159 
    160     ctb_indx = ps_deblk->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_deblk->i4_ctb_y;
    161     if(i4_is_last_ctb_y)
    162     {
    163         pu4_vert_bs = (UWORD32 *)((UWORD8 *)pu4_vert_bs + bs_strd);
    164         pu4_ctb_vert_bs = pu4_vert_bs;
    165         /* ctb_size/8 is the number of edges per CTB
    166          * ctb_size/4 is the number of BS values needed per edge
    167          * divided by 8 for the number of bytes
    168          * 2 is the number of bits needed for each BS value */
    169         memset(pu4_vert_bs, 0, 1 << (2 * log2_ctb_size - 7));
    170 
    171         pu1_qp += (qp_strd << (log2_ctb_size - 3));
    172         pu2_ctb_no_loop_filter_flag += (ctb_size >> 3);
    173         ctb_indx += ps_sps->i2_pic_wd_in_ctb;
    174     }
    175 
    176     if(i4_is_last_ctb_x)
    177     {
    178         pu4_horz_bs = (UWORD32 *)((UWORD8 *)pu4_horz_bs + (1 << (2 * log2_ctb_size - 7)));
    179         pu4_ctb_horz_bs = pu4_horz_bs;
    180         memset(pu4_horz_bs, 0, 1 << (2 * log2_ctb_size - 7));
    181 
    182         pu1_qp += (ctb_size >> 3);
    183 
    184         for(row = 0; row < (ctb_size >> 3) + 1; row++)
    185             au2_ctb_no_loop_filter_flag[row] = ps_deblk->au2_ctb_no_loop_filter_flag[row] >> (ctb_size >> 3);
    186         pu2_ctb_no_loop_filter_flag = au2_ctb_no_loop_filter_flag;
    187         ctb_indx += 1;
    188     }
    189 
    190     u4_qp_const_in_ctb[3] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx) >> 3] & (1 << (ctb_indx & 7));
    191 
    192     if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
    193     {
    194         u4_qp_const_in_ctb[2] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - 1) >> 3] & (1 << ((ctb_indx - 1) & 7));
    195     }
    196 
    197     if((ps_deblk->i4_ctb_x || i4_is_last_ctb_x) && (ps_deblk->i4_ctb_y || i4_is_last_ctb_y))
    198     {
    199         u4_qp_const_in_ctb[0] =
    200                         ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) >> 3] &
    201                         (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) & 7));
    202     }
    203 
    204 
    205 
    206     if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
    207     {
    208         u4_qp_const_in_ctb[1] =
    209                         ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb) >> 3] &
    210                         (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb) & 7));
    211     }
    212 
    213     src_strd = ps_codec->i4_strd;
    214 
    215     /* Luma Vertical Edge */
    216 
    217     if(0 == i4_is_last_ctb_x)
    218     {
    219         /* Top CTB's slice header */
    220         slice_header_t *ps_slice_hdr_top;
    221         {
    222             WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
    223             if(i4_is_last_ctb_y)
    224                 cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
    225             ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
    226         }
    227 
    228         pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << (log2_ctb_size));
    229         pu1_src += i4_is_last_ctb_y ? ps_deblk->ps_codec->i4_strd << log2_ctb_size : 0;
    230 
    231         /** Deblocking is done on a shifted CTB -
    232          *  Vertical edge processing is done by shifting the CTB up by four pixels */
    233         pu1_src -= 4 * src_strd;
    234 
    235         for(col = 0; col < ctb_size / 8; col++)
    236         {
    237             WORD32 shift = 0;
    238 
    239             /*  downshift vert_bs by ctb_size/2 for each column
    240              *  shift = (col & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1);
    241              *  which will reduce to the following assuming ctb size is one of 16, 32 and 64
    242              *  and deblocking is done on 8x8 grid
    243              */
    244             if(6 != log2_ctb_size)
    245                 shift = (col & 1) << (log2_ctb_size - 1);
    246 
    247             /* BS for the column - Last row is excluded and the top row is included*/
    248             u4_bs = (pu4_vert_bs[0] >> shift) << 2;
    249 
    250             if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
    251             {
    252                 /* Picking the last BS of the previous CTB corresponding to the same column */
    253                 UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd);
    254                 UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
    255                 u4_bs |= u4_top_bs & 3;
    256             }
    257 
    258             for(row = 0; row < ctb_size / 4;)
    259             {
    260                 WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
    261                 WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
    262 
    263                 /* Trailing zeros are computed and the corresponding rows are not processed */
    264                 bs_tz = CTZ(u4_bs) >> 1;
    265                 if(0 != bs_tz)
    266                 {
    267                     u4_bs = u4_bs >> (bs_tz << 1);
    268                     if((row + bs_tz) >= (ctb_size / 4))
    269                         pu1_src += 4 * (ctb_size / 4 - row) * src_strd;
    270                     else
    271                         pu1_src += 4 * bs_tz  * src_strd;
    272 
    273                     row += bs_tz;
    274                     continue;
    275                 }
    276 
    277                 if(0 == row)
    278                 {
    279                     i1_beta_offset_div2 = ps_slice_hdr_top->i1_beta_offset_div2;
    280                     i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2;
    281 
    282                     if(0 == col)
    283                     {
    284                         qp_p = u4_qp_const_in_ctb[0] ?
    285                                         pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
    286                                         pu1_qp[-qp_strd - 1];
    287                     }
    288                     else
    289                     {
    290                         qp_p = u4_qp_const_in_ctb[1] ?
    291                                         pu1_qp[-ctb_size / 8 * qp_strd] :
    292                                         pu1_qp[col - 1 - qp_strd];
    293                     }
    294 
    295                     qp_q = u4_qp_const_in_ctb[1] ?
    296                                     pu1_qp[-ctb_size / 8 * qp_strd] :
    297                                     pu1_qp[col - qp_strd];
    298                 }
    299                 else
    300                 {
    301                     if(0 == col)
    302                     {
    303                         qp_p = u4_qp_const_in_ctb[2] ?
    304                                         pu1_qp[-ctb_size / 8] :
    305                                         pu1_qp[((row - 1) >> 1) * qp_strd - 1];
    306                     }
    307                     else
    308                     {
    309                         qp_p = u4_qp_const_in_ctb[3] ?
    310                                         pu1_qp[0] :
    311                                         pu1_qp[((row - 1) >> 1) * qp_strd + col - 1];
    312                     }
    313 
    314                     qp_q = u4_qp_const_in_ctb[3] ?
    315                                     pu1_qp[0] :
    316                                     pu1_qp[((row - 1) >> 1) * qp_strd + col];
    317                 }
    318 
    319                 filter_p = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 1;
    320                 filter_q = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 2;
    321                 /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
    322                 filter_p = !filter_p;
    323                 filter_q = !filter_q;
    324 
    325                 if(filter_p || filter_q)
    326                 {
    327                     DUMP_DEBLK_LUMA_VERT(pu1_src, src_strd,
    328                                          u4_bs & 3, qp_p, qp_q,
    329                                          ps_slice_hdr->i1_beta_offset_div2,
    330                                          ps_slice_hdr->i1_tc_offset_div2,
    331                                          filter_p, filter_q);
    332                     ps_codec->s_func_selector.ihevc_deblk_luma_vert_fptr(pu1_src, src_strd,
    333                                                                          u4_bs & 3, qp_p, qp_q,
    334                                                                          i1_beta_offset_div2,
    335                                                                          i1_tc_offset_div2,
    336                                                                          filter_p, filter_q);
    337                 }
    338 
    339                 pu1_src += 4 * src_strd;
    340                 u4_bs = u4_bs >> 2;
    341                 row++;
    342             }
    343 
    344             if((64 == ctb_size) ||
    345                             ((32 == ctb_size) && (col & 1)))
    346             {
    347                 pu4_vert_bs++;
    348             }
    349             pu1_src -= (src_strd << log2_ctb_size);
    350             pu1_src += 8;
    351         }
    352         pu4_vert_bs = pu4_ctb_vert_bs;
    353     }
    354 
    355 
    356     /* Luma Horizontal Edge */
    357 
    358     if(0 == i4_is_last_ctb_y)
    359     {
    360 
    361         /* Left CTB's slice header */
    362         slice_header_t *ps_slice_hdr_left;
    363         {
    364             WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
    365             if(i4_is_last_ctb_x)
    366                 cur_ctb_indx += 1;
    367             ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
    368         }
    369         pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << log2_ctb_size);
    370         pu1_src += i4_is_last_ctb_x ? ctb_size : 0;
    371 
    372         /** Deblocking is done on a shifted CTB -
    373          *  Horizontal edge processing is done by shifting the CTB left by four pixels */
    374         pu1_src -= 4;
    375         for(row = 0; row < ctb_size / 8; row++)
    376         {
    377             WORD32 shift = 0;
    378 
    379             /* downshift vert_bs by ctb_size/2 for each column
    380              *  shift = (row & (MAX_CTB_SIZE / ctb_size - 1)) * ctb_size / 2;
    381              *  which will reduce to the following assuming ctb size is one of 16, 32 and 64
    382              *  and deblocking is done on 8x8 grid
    383              */
    384             if(6 != log2_ctb_size)
    385                 shift = (row & 1) << (log2_ctb_size - 1);
    386 
    387             /* BS for the row - Last column is excluded and the left column is included*/
    388             u4_bs = (pu4_horz_bs[0] >> shift) << 2;
    389 
    390             if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
    391             {
    392                 /** Picking the last BS of the previous CTB corresponding to the same row
    393                 * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
    394                 */
    395                 UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7)));
    396                 UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
    397                 u4_bs |= u4_left_bs & 3;
    398             }
    399 
    400             for(col = 0; col < ctb_size / 4;)
    401             {
    402                 WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
    403                 WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
    404 
    405                 bs_tz = CTZ(u4_bs) >> 1;
    406                 if(0 != bs_tz)
    407                 {
    408                     u4_bs = u4_bs >> (bs_tz << 1);
    409 
    410                     if((col + bs_tz) >= (ctb_size / 4))
    411                         pu1_src += 4 * (ctb_size / 4 - col);
    412                     else
    413                         pu1_src += 4 * bs_tz;
    414 
    415                     col += bs_tz;
    416                     continue;
    417                 }
    418 
    419                 if(0 == col)
    420                 {
    421                     i1_beta_offset_div2 = ps_slice_hdr_left->i1_beta_offset_div2;
    422                     i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2;
    423 
    424                     if(0 == row)
    425                     {
    426                         qp_p = u4_qp_const_in_ctb[0] ?
    427                                         pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
    428                                         pu1_qp[-qp_strd - 1];
    429                     }
    430                     else
    431                     {
    432                         qp_p = u4_qp_const_in_ctb[2] ?
    433                                         pu1_qp[-ctb_size / 8] :
    434                                         pu1_qp[(row - 1) * qp_strd - 1];
    435                     }
    436 
    437                     qp_q = u4_qp_const_in_ctb[2] ?
    438                                     pu1_qp[-ctb_size / 8] :
    439                                     pu1_qp[row * qp_strd - 1];
    440                 }
    441                 else
    442                 {
    443                     if(0 == row)
    444                     {
    445                         qp_p = u4_qp_const_in_ctb[1] ?
    446                                         pu1_qp[-ctb_size / 8 * qp_strd] :
    447                                         pu1_qp[((col - 1) >> 1) - qp_strd];
    448                     }
    449                     else
    450                     {
    451                         qp_p = u4_qp_const_in_ctb[3] ?
    452                                         pu1_qp[0] :
    453                                         pu1_qp[((col - 1) >> 1) + (row - 1) * qp_strd];
    454                     }
    455 
    456                     qp_q = u4_qp_const_in_ctb[3] ?
    457                                     pu1_qp[0] :
    458                                     pu1_qp[((col - 1) >> 1) + row * qp_strd];
    459                 }
    460 
    461                 filter_p = (pu2_ctb_no_loop_filter_flag[row] >> ((col + 1) >> 1)) & 1;
    462                 filter_q = (pu2_ctb_no_loop_filter_flag[row + 1] >> ((col + 1) >> 1)) & 1;
    463                 /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
    464                 filter_p = !filter_p;
    465                 filter_q = !filter_q;
    466 
    467                 if(filter_p || filter_q)
    468                 {
    469                     DUMP_DEBLK_LUMA_HORZ(pu1_src, src_strd,
    470                                          u4_bs & 3, qp_p, qp_q,
    471                                          ps_slice_hdr->i1_beta_offset_div2,
    472                                          ps_slice_hdr->i1_tc_offset_div2,
    473                                          filter_p, filter_q);
    474                     ps_codec->s_func_selector.ihevc_deblk_luma_horz_fptr(pu1_src, src_strd,
    475                                                                          u4_bs & 3, qp_p, qp_q,
    476                                                                          i1_beta_offset_div2,
    477                                                                          i1_tc_offset_div2, filter_p, filter_q);
    478                 }
    479 
    480                 pu1_src += 4;
    481                 u4_bs = u4_bs >> 2;
    482                 col++;
    483             }
    484 
    485             if((64 == ctb_size) ||
    486                             ((32 == ctb_size) && (row & 1)))
    487             {
    488                 pu4_horz_bs++;
    489             }
    490             pu1_src -= ctb_size;
    491             pu1_src += (src_strd << 3);
    492         }
    493         pu4_horz_bs = pu4_ctb_horz_bs;
    494     }
    495 
    496 
    497     /* Chroma Veritcal Edge */
    498 
    499     if(0 == i4_is_last_ctb_x)
    500     {
    501 
    502         /* Top CTB's slice header */
    503         slice_header_t *ps_slice_hdr_top;
    504         {
    505             WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
    506             if(i4_is_last_ctb_y)
    507                 cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
    508             ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
    509         }
    510 
    511         pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size);
    512         pu1_src += i4_is_last_ctb_y ? (ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size : 0;
    513 
    514         /** Deblocking is done on a shifted CTB -
    515          *  Vertical edge processing is done by shifting the CTB up by four pixels */
    516         pu1_src -= 4 * src_strd;
    517 
    518         for(col = 0; col < ctb_size / 16; col++)
    519         {
    520 
    521             /* BS for the column - Last row is excluded and the top row is included*/
    522             u4_bs = pu4_vert_bs[0] << 2;
    523 
    524             if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
    525             {
    526                 /* Picking the last BS of the previous CTB corresponding to the same column */
    527                 UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd);
    528                 UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> ((1 << (log2_ctb_size - 1)) - 2);
    529                 u4_bs |= u4_top_bs & 3;
    530             }
    531 
    532             /* Every alternate boundary strength value is used for chroma */
    533             u4_bs &= 0x22222222;
    534 
    535             for(row = 0; row < ctb_size / 8;)
    536             {
    537                 WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
    538 
    539                 bs_tz = CTZ(u4_bs) >> 2;
    540                 if(0 != bs_tz)
    541                 {
    542                     if((row + bs_tz) >= (ctb_size / 8))
    543                         pu1_src += 4 * (ctb_size / 8 - row) * src_strd;
    544                     else
    545                         pu1_src += 4 * bs_tz  * src_strd;
    546                     row += bs_tz;
    547                     u4_bs = u4_bs >> (bs_tz << 2);
    548                     continue;
    549                 }
    550 
    551                 if(0 == row)
    552                 {
    553                     i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2;
    554 
    555                     if(0 == col)
    556                     {
    557                         qp_p = u4_qp_const_in_ctb[0] ?
    558                                         pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
    559                                         pu1_qp[-qp_strd - 1];
    560                     }
    561                     else
    562                     {
    563                         qp_p = u4_qp_const_in_ctb[1] ?
    564                                         pu1_qp[-ctb_size / 8 * qp_strd] :
    565                                         pu1_qp[2 * col - 1 - qp_strd];
    566                     }
    567 
    568                     qp_q = u4_qp_const_in_ctb[1] ?
    569                                     pu1_qp[-ctb_size / 8 * qp_strd] :
    570                                     pu1_qp[2 * col - qp_strd];
    571                 }
    572                 else
    573                 {
    574                     if(0 == col)
    575                     {
    576                         qp_p = u4_qp_const_in_ctb[2] ?
    577                                         pu1_qp[-ctb_size / 8] :
    578                                         pu1_qp[(row - 1) * qp_strd - 1];
    579                     }
    580                     else
    581                     {
    582                         qp_p = u4_qp_const_in_ctb[3] ?
    583                                         pu1_qp[0] :
    584                                         pu1_qp[(row - 1) * qp_strd + 2 * col - 1];
    585                     }
    586 
    587                     qp_q = u4_qp_const_in_ctb[3] ?
    588                                     pu1_qp[0] :
    589                                     pu1_qp[(row - 1) * qp_strd + 2 * col];
    590                 }
    591 
    592                 filter_p = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 1;
    593                 filter_q = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 2;
    594                 /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
    595                 filter_p = !filter_p;
    596                 filter_q = !filter_q;
    597 
    598                 if(filter_p || filter_q)
    599                 {
    600                     ASSERT(1 == ((u4_bs & 3) >> 1));
    601                     DUMP_DEBLK_CHROMA_VERT(pu1_src, src_strd,
    602                                            u4_bs & 3, qp_p, qp_q,
    603                                            ps_pps->i1_pic_cb_qp_offset,
    604                                            ps_pps->i1_pic_cr_qp_offset,
    605                                            ps_slice_hdr->i1_tc_offset_div2,
    606                                            filter_p, filter_q);
    607                     if(chroma_yuv420sp_vu)
    608                     {
    609                         ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src,
    610                                                                                src_strd,
    611                                                                                qp_q,
    612                                                                                qp_p,
    613                                                                                ps_pps->i1_pic_cr_qp_offset,
    614                                                                                ps_pps->i1_pic_cb_qp_offset,
    615                                                                                i1_tc_offset_div2,
    616                                                                                filter_q,
    617                                                                                filter_p);
    618                     }
    619                     else
    620                     {
    621                         ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src,
    622                                                                                src_strd,
    623                                                                                qp_p,
    624                                                                                qp_q,
    625                                                                                ps_pps->i1_pic_cb_qp_offset,
    626                                                                                ps_pps->i1_pic_cr_qp_offset,
    627                                                                                i1_tc_offset_div2,
    628                                                                                filter_p,
    629                                                                                filter_q);
    630                     }
    631                 }
    632 
    633                 pu1_src += 4 * src_strd;
    634                 u4_bs = u4_bs >> 4;
    635                 row++;
    636             }
    637 
    638             pu4_vert_bs += (64 == ctb_size) ? 2 : 1;
    639             pu1_src -= ((src_strd / 2) << log2_ctb_size);
    640             pu1_src += 16;
    641         }
    642     }
    643 
    644     /* Chroma Horizontal Edge */
    645 
    646     if(0 == i4_is_last_ctb_y)
    647     {
    648 
    649         /* Left CTB's slice header */
    650         slice_header_t *ps_slice_hdr_left;
    651         {
    652             WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
    653             if(i4_is_last_ctb_x)
    654                 cur_ctb_indx += 1;
    655             ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
    656         }
    657 
    658         pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size);
    659         pu1_src += i4_is_last_ctb_x ? ctb_size : 0;
    660 
    661         /** Deblocking is done on a shifted CTB -
    662          * Vertical edge processing is done by shifting the CTB up by four pixels (8 here beacuse UV are interleaved) */
    663         pu1_src -= 8;
    664         for(row = 0; row < ctb_size / 16; row++)
    665         {
    666             /* BS for the row - Last column is excluded and the left column is included*/
    667             u4_bs = pu4_horz_bs[0] << 2;
    668 
    669             if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
    670             {
    671                 /** Picking the last BS of the previous CTB corresponding to the same row
    672                 * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
    673                 */
    674                 UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7)));
    675                 UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> ((1 << (log2_ctb_size - 1)) - 2);
    676                 u4_bs |= u4_left_bs & 3;
    677             }
    678 
    679             /* Every alternate boundary strength value is used for chroma */
    680             u4_bs &= 0x22222222;
    681 
    682             for(col = 0; col < ctb_size / 8;)
    683             {
    684                 WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
    685 
    686                 bs_tz = CTZ(u4_bs) >> 2;
    687                 if(0 != bs_tz)
    688                 {
    689                     u4_bs = u4_bs >> (bs_tz << 2);
    690 
    691                     if((col + bs_tz) >= (ctb_size / 8))
    692                         pu1_src += 8 * (ctb_size / 8 - col);
    693                     else
    694                         pu1_src += 8 * bs_tz;
    695 
    696                     col += bs_tz;
    697                     continue;
    698                 }
    699 
    700                 if(0 == col)
    701                 {
    702                     i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2;
    703 
    704                     if(0 == row)
    705                     {
    706                         qp_p = u4_qp_const_in_ctb[0] ?
    707                                         pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
    708                                         pu1_qp[-qp_strd - 1];
    709                     }
    710                     else
    711                     {
    712                         qp_p = u4_qp_const_in_ctb[2] ?
    713                                         pu1_qp[-ctb_size / 8] :
    714                                         pu1_qp[(2 * row - 1) * qp_strd - 1];
    715                     }
    716 
    717                     qp_q = u4_qp_const_in_ctb[2] ?
    718                                     pu1_qp[-ctb_size / 8] :
    719                                     pu1_qp[(2 * row) * qp_strd - 1];
    720                 }
    721                 else
    722                 {
    723                     if(0 == row)
    724                     {
    725                         qp_p = u4_qp_const_in_ctb[1] ?
    726                                         pu1_qp[-ctb_size / 8 * qp_strd] :
    727                                         pu1_qp[col - 1 - qp_strd];
    728                     }
    729                     else
    730                     {
    731                         qp_p = u4_qp_const_in_ctb[3] ?
    732                                         pu1_qp[0] :
    733                                         pu1_qp[(col - 1) +  (2 * row - 1) * qp_strd];
    734                     }
    735 
    736                     qp_q = u4_qp_const_in_ctb[3] ?
    737                                     pu1_qp[0] :
    738                                     pu1_qp[(col - 1) + 2 * row * qp_strd];
    739                 }
    740 
    741                 filter_p = (pu2_ctb_no_loop_filter_flag[row << 1] >> col) & 1;
    742                 filter_q = (pu2_ctb_no_loop_filter_flag[(row << 1) + 1] >> col) & 1;
    743                 /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
    744                 filter_p = !filter_p;
    745                 filter_q = !filter_q;
    746 
    747                 if(filter_p || filter_q)
    748                 {
    749                     ASSERT(1 == ((u4_bs & 3) >> 1));
    750                     DUMP_DEBLK_CHROMA_HORZ(pu1_src, src_strd,
    751                                            u4_bs & 3, qp_p, qp_q,
    752                                            ps_pps->i1_pic_cb_qp_offset,
    753                                            ps_pps->i1_pic_cr_qp_offset,
    754                                            ps_slice_hdr->i1_tc_offset_div2,
    755                                            filter_p, filter_q);
    756                     if(chroma_yuv420sp_vu)
    757                     {
    758                         ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src,
    759                                                                                src_strd,
    760                                                                                qp_q,
    761                                                                                qp_p,
    762                                                                                ps_pps->i1_pic_cr_qp_offset,
    763                                                                                ps_pps->i1_pic_cb_qp_offset,
    764                                                                                i1_tc_offset_div2,
    765                                                                                filter_q,
    766                                                                                filter_p);
    767                     }
    768                     else
    769                     {
    770                         ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src,
    771                                                                                src_strd,
    772                                                                                qp_p,
    773                                                                                qp_q,
    774                                                                                ps_pps->i1_pic_cb_qp_offset,
    775                                                                                ps_pps->i1_pic_cr_qp_offset,
    776                                                                                i1_tc_offset_div2,
    777                                                                                filter_p,
    778                                                                                filter_q);
    779                     }
    780                 }
    781 
    782                 pu1_src += 8;
    783                 u4_bs = u4_bs >> 4;
    784                 col++;
    785             }
    786 
    787             pu4_horz_bs += (64 == ctb_size) ? 2 : 1;
    788             pu1_src -= ctb_size;
    789             pu1_src += 8 * src_strd;
    790 
    791         }
    792     }
    793 }
    794