Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 /**
     21 *******************************************************************************
     22 * @file
     23 *  ihevce_deblk.c
     24 *
     25 * @brief
     26 *  Contains definition for the ctb level deblk function
     27 *
     28 * @author
     29 *  ittiam
     30 *
     31 * @List of Functions:
     32 *  ihevce_deblk_populate_qp_map()
     33 *  ihevce_deblk_ctb()
     34 *  ihevce_hbd_deblk_ctb()
     35 *
     36 * @remarks
     37 *  None
     38 *
     39 *******************************************************************************
     40 */
     41 
     42 /*****************************************************************************/
     43 /* File Includes                                                             */
     44 /*****************************************************************************/
     45 /* System include files */
     46 #include <stdio.h>
     47 #include <string.h>
     48 #include <stdlib.h>
     49 #include <assert.h>
     50 #include <stdarg.h>
     51 #include <math.h>
     52 
     53 /* User include files */
     54 #include "ihevc_typedefs.h"
     55 #include "itt_video_api.h"
     56 #include "ihevce_api.h"
     57 
     58 #include "rc_cntrl_param.h"
     59 #include "rc_frame_info_collector.h"
     60 #include "rc_look_ahead_params.h"
     61 
     62 #include "ihevc_defs.h"
     63 #include "ihevc_debug.h"
     64 #include "ihevc_structs.h"
     65 #include "ihevc_platform_macros.h"
     66 #include "ihevc_deblk.h"
     67 #include "ihevc_deblk_tables.h"
     68 #include "ihevc_common_tables.h"
     69 #include "ihevc_itrans_recon.h"
     70 #include "ihevc_chroma_itrans_recon.h"
     71 #include "ihevc_chroma_intra_pred.h"
     72 #include "ihevc_intra_pred.h"
     73 #include "ihevc_inter_pred.h"
     74 #include "ihevc_mem_fns.h"
     75 #include "ihevc_padding.h"
     76 #include "ihevc_weighted_pred.h"
     77 #include "ihevc_sao.h"
     78 #include "ihevc_resi_trans.h"
     79 #include "ihevc_quant_iquant_ssd.h"
     80 #include "ihevc_cabac_tables.h"
     81 
     82 #include "ihevce_defs.h"
     83 #include "ihevce_hle_interface.h"
     84 #include "ihevce_lap_enc_structs.h"
     85 #include "ihevce_multi_thrd_structs.h"
     86 #include "ihevce_me_common_defs.h"
     87 #include "ihevce_had_satd.h"
     88 #include "ihevce_error_codes.h"
     89 #include "ihevce_bitstream.h"
     90 #include "ihevce_cabac.h"
     91 #include "ihevce_rdoq_macros.h"
     92 #include "ihevce_function_selector.h"
     93 #include "ihevce_enc_structs.h"
     94 #include "ihevce_entropy_structs.h"
     95 #include "ihevce_cmn_utils_instr_set_router.h"
     96 #include "ihevce_enc_loop_structs.h"
     97 #include "ihevce_common_utils.h"
     98 #include "ihevce_global_tables.h"
     99 #include "ihevce_deblk.h"
    100 #include "ihevce_tile_interface.h"
    101 
    102 /*****************************************************************************/
    103 /* Function Definitions                                                      */
    104 /*****************************************************************************/
    105 
    106 /*!
    107 ******************************************************************************
    108 * \if Function name : ihevce_deblk_populate_qp_map \endif
    109 *
    110 * \brief
    111 *
    112 *
    113 *****************************************************************************
    114 */
    115 void ihevce_deblk_populate_qp_map(
    116     ihevce_enc_loop_ctxt_t *ps_ctxt,
    117     deblk_ctbrow_prms_t *ps_deblk_ctb_row_params,
    118     ctb_enc_loop_out_t *ps_ctb_out_dblk,
    119     WORD32 vert_ctr,
    120     frm_ctb_ctxt_t *ps_frm_ctb_prms,
    121     ihevce_tile_params_t *ps_col_tile_params)
    122 {
    123     ctb_enc_loop_out_t *ps_ctb_out;
    124     WORD32 ctb_ctr, ctb_start, ctb_end;
    125     WORD32 tile_qp_offset, tile_qp_size, i4_offset_for_last_cu_qp;
    126     /* Create the Qp map for the entire current CTB-row for deblocking purpose(only)*/
    127     /* Do this iff cur pic is referred or recon dump is enabled or psnr calc is on*/
    128     /*Qp of the last CU of previous CTB row*/
    129     WORD8 i1_last_cu_qp;
    130     /*A pointer pointing to the top 4x4 block's Qp for all CTb rows*/
    131     WORD8 *pi1_qp_top_4x4_ctb_row =
    132         ps_deblk_ctb_row_params->api1_qp_top_4x4_ctb_row[ps_ctxt->i4_enc_frm_id] +
    133         (ps_deblk_ctb_row_params->u4_qp_top_4x4_buf_size * ps_ctxt->i4_bitrate_instance_num);
    134 
    135     UWORD32 u4_qp_top_4x4_buf_strd = ps_deblk_ctb_row_params->u4_qp_top_4x4_buf_strd;
    136 
    137     /*The Qp map which has to be populated*/
    138     UWORD32 u4_qp_buffer_stride = ps_deblk_ctb_row_params->u4_qp_buffer_stride;
    139     WORD8 *pi1_ctb_tile_qp = ps_deblk_ctb_row_params->pi1_ctb_row_qp;
    140 
    141     /*Temporary pointers to Qp map at CTB level*/
    142     WORD8 *pi1_ctb_qp_map_tile;
    143 
    144     i4_offset_for_last_cu_qp = ps_ctxt->pi4_offset_for_last_cu_qp[ps_ctxt->i4_tile_col_idx];
    145     /* total QPs to be copied for current row is : */
    146     tile_qp_size = i4_offset_for_last_cu_qp + 1;
    147     /*Pointing to the first CTB of current CTB row*/
    148     ps_ctb_out = ps_ctb_out_dblk;
    149     /* Offset req. for the row QP to the tile start */
    150     tile_qp_offset = ps_col_tile_params->i4_first_ctb_x * (ps_frm_ctb_prms->i4_ctb_size / 4);
    151 
    152     ctb_start = ps_col_tile_params->i4_first_ctb_x;
    153     ctb_end =
    154         (ps_col_tile_params->i4_first_ctb_x + ps_col_tile_params->i4_curr_tile_wd_in_ctb_unit);
    155 
    156     if(vert_ctr) /*Not first CTB row of frame*/
    157     {
    158         /*copy from top4x4_array data stored by upper CTB-row to qp-map*/
    159         memcpy(
    160             pi1_ctb_tile_qp,
    161             (pi1_qp_top_4x4_ctb_row + (vert_ctr - 1) * u4_qp_top_4x4_buf_strd + tile_qp_offset),
    162             tile_qp_size);
    163     }
    164 
    165     /*pu1_ctb_row_qp points to top4x4 row in Qp-map.
    166     Now pointing pu1_ctb_qp_map to cur 4x4 row*/
    167     pi1_ctb_qp_map_tile = pi1_ctb_tile_qp + u4_qp_buffer_stride;
    168 
    169     /* This i1_last_cu_qp will be conditionally overwritten later */
    170     i1_last_cu_qp = ps_ctxt->i4_frame_qp;
    171 
    172     /* -- Loop over all the CTBs in a CTB-row for populating the Qp-map ----- */
    173     for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
    174     {
    175         WORD32 cu_ctr;
    176         cu_enc_loop_out_t *ps_curr_cu;
    177 
    178         /* Update i1_last_cu_qp based on CTB's position in tile */
    179         update_last_coded_cu_qp(
    180             (ps_deblk_ctb_row_params->pi1_ctb_row_qp + i4_offset_for_last_cu_qp),
    181             ps_ctxt->i1_entropy_coding_sync_enabled_flag,
    182             ps_frm_ctb_prms,
    183             ps_ctxt->i4_frame_qp,
    184             vert_ctr,
    185             ctb_ctr,
    186             &i1_last_cu_qp);
    187 
    188         /* store the pointer of first cu of current ctb */
    189         ps_curr_cu = ps_ctb_out->ps_enc_cu;
    190 
    191         /* --------- loop over all the CUs in the CTB --------------- */
    192         for(cu_ctr = 0; cu_ctr < ps_ctb_out->u1_num_cus_in_ctb; cu_ctr++)
    193         {
    194             UWORD8 u1_vert_4x4, u1_horz_4x4;  //for_loop counters
    195             WORD8 *pi1_cu_qp_map;
    196 
    197             WORD8 i1_qp, i1_qp_left, i1_qp_top;
    198 
    199             pi1_cu_qp_map = pi1_ctb_qp_map_tile +
    200                             (ps_curr_cu->b3_cu_pos_y * 2) * u4_qp_buffer_stride +
    201                             (ps_curr_cu->b3_cu_pos_x * 2);
    202 
    203             /*If the current CU is coded in skip_mode/zero_CBF then
    204             for deblocking, Qp of the previously coded CU will be used*/
    205             if(ps_curr_cu->b1_skip_flag || ps_curr_cu->b1_no_residual_syntax_flag)
    206             {
    207                 if(0 == ps_curr_cu->b3_cu_pos_x)
    208                     i1_qp_left = i1_last_cu_qp;
    209                 else
    210                     i1_qp_left = *(pi1_cu_qp_map - 1);
    211 
    212                 if(0 == ps_curr_cu->b3_cu_pos_y)
    213                     i1_qp_top = i1_last_cu_qp;
    214                 else
    215                     i1_qp_top = *(pi1_cu_qp_map - u4_qp_buffer_stride);
    216 
    217                 i1_qp = (i1_qp_left + i1_qp_top + 1) / 2;
    218 
    219                 if(0 == ps_curr_cu->b1_first_cu_in_qg)
    220                 {
    221                     i1_qp = i1_last_cu_qp;
    222                 }
    223             }
    224             else
    225             {
    226                 i1_qp = ps_curr_cu->i1_cu_qp;
    227             }
    228 
    229             i1_last_cu_qp = i1_qp;
    230 
    231             /*---- Loop for populating Qp map for the current CU -------*/
    232             for(u1_vert_4x4 = 0; u1_vert_4x4 < (ps_curr_cu->b4_cu_size * 2); u1_vert_4x4++)
    233             {
    234                 for(u1_horz_4x4 = 0; u1_horz_4x4 < (ps_curr_cu->b4_cu_size * 2); u1_horz_4x4++)
    235                 {
    236                     pi1_cu_qp_map[u1_horz_4x4] = i1_qp;
    237                 }
    238                 pi1_cu_qp_map += u4_qp_buffer_stride;
    239             }
    240             /*Update Qp-map ptr. Qp map is at 4x4 level but b4_cu_size is at 8x8 level*/
    241             ps_curr_cu++;
    242         }
    243         pi1_ctb_qp_map_tile += (ps_frm_ctb_prms->i4_ctb_size / 4);  //one qp per 4x4 block.
    244         ps_ctb_out++;
    245 
    246     }  //for(ctb_ctr = 0; ctb_ctr < num_ctbs_horz; ctb_ctr++)
    247 
    248     /*fill into the top4x4_array Qp for the lower CTB-row from bottom part of cur CTB row*/
    249     memcpy(
    250         (pi1_qp_top_4x4_ctb_row + vert_ctr * u4_qp_top_4x4_buf_strd + tile_qp_offset),
    251         (pi1_ctb_tile_qp + (ps_frm_ctb_prms->i4_ctb_size / 4) * u4_qp_buffer_stride),
    252         tile_qp_size);
    253 }
    254 
    255 /**
    256 *******************************************************************************
    257 *
    258 * @brief
    259 *   Deblock CTB level function.
    260 *
    261 * @par Description:
    262 *   For a given CTB, deblocking on both vertical and
    263 *   horizontal edges is done. Both the luma and chroma
    264 *   blocks are processed
    265 *
    266 * @param[in]
    267 *   ps_deblk:   Pointer to the deblock context
    268 *   last_col:   if the CTB is the last CTB of current CTB-row value is 1 else 0
    269 *   ps_deblk_ctb_row_params: deblk ctb row params
    270 *
    271 * @returns
    272 *
    273 * @remarks
    274 *  None
    275 *
    276 *******************************************************************************
    277 */
    278 void ihevce_deblk_ctb(
    279     deblk_ctb_params_t *ps_deblk, WORD32 last_col, deblk_ctbrow_prms_t *ps_deblk_ctb_row_params)
    280 {
    281     WORD32 ctb_size;
    282     UWORD32 u4_bs;
    283     WORD32 bs_lz; /*Leading zeros in boundary strength*/
    284     WORD32 qp_p, qp_q;
    285     UWORD8 *pu1_src;
    286     UWORD8 *pu1_src_uv;
    287     UWORD8 *pu1_curr_src;
    288     WORD32 col_size;
    289     WORD32 col, row, i4_edge_count;
    290     WORD32 num_columns_for_vert_filt;
    291     WORD32 num_blks_for_vert_filt;
    292     WORD32 num_rows_for_horz_filt;
    293 
    294     ihevc_deblk_chroma_horz_ft *pf_deblk_chroma_horz;
    295     ihevc_deblk_chroma_horz_ft *pf_deblk_chroma_vert;
    296 
    297     /* Filter flags are packed along with the qp info.
    298     6 out of the 8 bits correspond to qp and 1 to filter flag. */
    299     /* filter_p and filter_q are initialized to 1.
    300     They are to be extracted along with the qp info. */
    301     WORD32 filter_p, filter_q;
    302     WORD8 *pi1_ctb_row_qp_p, *pi1_ctb_row_qp_temp;
    303     WORD8 *pi1_ctb_row_qp_q;
    304 
    305     func_selector_t *ps_func_slector = ps_deblk->ps_func_selector;
    306 
    307     WORD32 left_luma_edge_filter_flag = ps_deblk->i4_deblock_left_ctb_edge;
    308     WORD32 top_luma_edge_filter_flag = ps_deblk->i4_deblock_top_ctb_edge;
    309     WORD32 left_chroma_edge_filter_flag = ps_deblk->i4_deblock_left_ctb_edge;
    310     WORD32 top_chroma_edge_filter_flag = ps_deblk->i4_deblock_top_ctb_edge;
    311     UWORD32 *bs_vert = ps_deblk_ctb_row_params->pu4_ctb_row_bs_vert;
    312     UWORD32 *bs_horz = ps_deblk_ctb_row_params->pu4_ctb_row_bs_horz;
    313     UWORD32 *bs_vert_uv = bs_vert;
    314     UWORD32 *bs_horz_uv = bs_horz;
    315     UWORD32 u4_qp_buffer_stride = ps_deblk_ctb_row_params->u4_qp_buffer_stride;
    316     UWORD8 u1_is_422 = (ps_deblk->u1_chroma_array_type == 2);
    317 
    318     if(u1_is_422)
    319     {
    320         pf_deblk_chroma_horz = ps_func_slector->ihevc_deblk_422chroma_horz_fptr;
    321         pf_deblk_chroma_vert = ps_func_slector->ihevc_deblk_422chroma_vert_fptr;
    322     }
    323     else
    324     {
    325         pf_deblk_chroma_horz = ps_func_slector->ihevc_deblk_chroma_horz_fptr;
    326         pf_deblk_chroma_vert = ps_func_slector->ihevc_deblk_chroma_vert_fptr;
    327     }
    328 
    329     ctb_size = ps_deblk->i4_ctb_size;
    330 
    331     /* The PCM filter flag and bypass trans flag are always set to 1 in encoder profile */
    332     /* Can be removed during optimization */
    333     filter_q = 1;
    334     filter_p = 1;
    335 
    336     //////////////////////////////////////////////////////////////////////////////
    337     /* Luma Veritcal Edge */
    338     pu1_src = ps_deblk->pu1_ctb_y;
    339     pi1_ctb_row_qp_temp = ps_deblk_ctb_row_params->pi1_ctb_row_qp + u4_qp_buffer_stride;
    340     num_columns_for_vert_filt = ctb_size / 8;
    341     num_blks_for_vert_filt = ctb_size / 4;
    342 
    343     for(i4_edge_count = 0; i4_edge_count < num_columns_for_vert_filt; i4_edge_count++)
    344     {
    345         u4_bs = *bs_vert;
    346         /* get the current 4x4 vertical pointer */
    347         pu1_curr_src = pu1_src;
    348         pi1_ctb_row_qp_q = pi1_ctb_row_qp_temp + (i4_edge_count << 1);
    349 
    350         /* If the current edge is not the 1st edge of frame or slice */
    351         if(1 == left_luma_edge_filter_flag)
    352         {
    353             for(row = 0; row < num_blks_for_vert_filt;)
    354             {
    355                 bs_lz = CLZ(u4_bs) >> 1;
    356                 /* If BS = 0, skip the egde filtering */
    357                 if(0 != bs_lz)
    358                 {
    359                     u4_bs = u4_bs << (bs_lz << 1);
    360                     pu1_curr_src += ((bs_lz << 2) * ps_deblk->i4_luma_pic_stride);
    361                     pi1_ctb_row_qp_q += (bs_lz * u4_qp_buffer_stride);
    362                     row += bs_lz;
    363                     continue;
    364                 }
    365                 qp_p = *(pi1_ctb_row_qp_q - 1);
    366                 qp_q = *pi1_ctb_row_qp_q;
    367 
    368                 ps_func_slector->ihevc_deblk_luma_vert_fptr(
    369                     pu1_curr_src,
    370                     ps_deblk->i4_luma_pic_stride,
    371                     (u4_bs >> 30), /* bits 31 and 30 are extracted */
    372                     qp_p,
    373                     qp_q,
    374                     ps_deblk->i4_beta_offset_div2,
    375                     ps_deblk->i4_tc_offset_div2,
    376                     filter_p,
    377                     filter_q);
    378 
    379                 u4_bs = u4_bs << 2;
    380                 pu1_curr_src += (ps_deblk->i4_luma_pic_stride << 2);
    381                 pi1_ctb_row_qp_q += u4_qp_buffer_stride;
    382                 row++;
    383             }
    384         }
    385 
    386         /* Increment the boundary strength and src pointer for the next column */
    387         bs_vert += 1;
    388         pu1_src += 8;
    389 
    390         /* Enable for the next edges of ctb*/
    391         left_luma_edge_filter_flag = 1;
    392     }
    393 
    394     //////////////////////////////////////////////////////////////////////////////
    395     /* Chroma Veritcal Edge */
    396     pu1_src_uv = ps_deblk->pu1_ctb_uv;
    397     pi1_ctb_row_qp_temp = ps_deblk_ctb_row_params->pi1_ctb_row_qp + u4_qp_buffer_stride;
    398 
    399     /* Column spacing is 4 for each chroma component */
    400     /* and hence 8 when they are interleaved. */
    401     /* But, only those columns with a x co-ordinate */
    402     /* that is divisiblee by 8 are filtered */
    403     /* Hence, denominator is 16 */
    404     num_columns_for_vert_filt = ctb_size / 16;
    405     /* blk_size is 4 and chroma_ctb_height is ctb_size/2 */
    406     num_blks_for_vert_filt = (0 == u1_is_422) ? (ctb_size / 2) / 4 : (ctb_size) / 4;
    407 
    408     for(i4_edge_count = 0; i4_edge_count < num_columns_for_vert_filt; i4_edge_count++)
    409     {
    410         /* Every alternate boundary strength value is used for 420 chroma */
    411         u4_bs = *(bs_vert_uv) & ((0 == u1_is_422) ? 0x88888888 : 0xaaaaaaaa);
    412         pu1_curr_src = pu1_src_uv;
    413         pi1_ctb_row_qp_q = pi1_ctb_row_qp_temp + (i4_edge_count << 2);
    414 
    415         /* If the current edge is not the 1st edge of frame or slice */
    416         if(1 == left_chroma_edge_filter_flag)
    417         {
    418             /* Each 'bs' is 2 bits long */
    419             /* The divby4 in 420 is */
    420             /* necessitated by the fact that */
    421             /* chroma ctb_ht is half that of luma */
    422             WORD32 i4_log2_num_bits_per_bs = ((0 == u1_is_422) + 1);
    423             /* i4_sub_heightC = 2 for 420 */
    424             /* i4_sub_heightC = 1 for 422 */
    425             WORD32 i4_sub_heightC = i4_log2_num_bits_per_bs;
    426 
    427             for(row = 0; row < num_blks_for_vert_filt;)
    428             {
    429                 bs_lz = CLZ(u4_bs) >> i4_log2_num_bits_per_bs;
    430 
    431                 /* If BS = 0, skip the egde filtering */
    432                 if(0 != bs_lz)
    433                 {
    434                     row += bs_lz;
    435                     u4_bs = u4_bs << (bs_lz << i4_log2_num_bits_per_bs);
    436                     /* '<<2' because of blk_size being 4x4 */
    437                     pu1_curr_src += ((bs_lz << 2) * ps_deblk->i4_chroma_pic_stride);
    438 
    439                     /* In 420, every alternate QP row is skipped, because chroma height */
    440                     /* In 422, no row is skipped */
    441                     pi1_ctb_row_qp_q += ((u4_qp_buffer_stride << (i4_sub_heightC - 1)) * bs_lz);
    442 
    443                     continue;
    444                 }
    445 
    446                 qp_p = *(pi1_ctb_row_qp_q - i4_sub_heightC);
    447                 qp_q = *pi1_ctb_row_qp_q;
    448 
    449                 pf_deblk_chroma_vert(
    450                     pu1_curr_src,
    451                     ps_deblk->i4_chroma_pic_stride,
    452                     qp_p,
    453                     qp_q,
    454                     ps_deblk->i4_cb_qp_indx_offset,
    455                     ps_deblk->i4_cr_qp_indx_offset,
    456                     ps_deblk->i4_tc_offset_div2,
    457                     filter_p,
    458                     filter_q);
    459 
    460                 u4_bs = u4_bs << (1 << i4_log2_num_bits_per_bs);
    461                 pu1_curr_src += (ps_deblk->i4_chroma_pic_stride << 2);
    462                 pi1_ctb_row_qp_q += (u4_qp_buffer_stride << (i4_sub_heightC - 1));
    463                 row++;
    464             }
    465         }
    466         /* Increment the boundary strength by 2 and src pointer for the next column */
    467         /* As the edge filtering happens for alternate column */
    468         bs_vert_uv += 2;
    469         pu1_src_uv += 16;
    470         left_chroma_edge_filter_flag = 1;
    471     }
    472 
    473     //////////////////////////////////////////////////////////////////////////////
    474 
    475     /* Luma Horizontal Edge */
    476     pu1_src = ps_deblk->pu1_ctb_y;
    477     col_size = ctb_size / 4;
    478 
    479     /* If the ctb is the 1st ctb of row,                     */
    480     /* Decrement the loop count to exclude filtering of last 4 pixels */
    481     /* else shift the src pointer by 4 pixels to do filtering for shifted ctb */
    482     if(ps_deblk->i4_deblock_left_ctb_edge == 1)
    483     {
    484         pu1_src -= 4;
    485         /*If the ctb is at the horizonatl end of PIC*/
    486         /* Increase the column size to filter last 4 pixels */
    487         col_size += last_col;
    488     }
    489     else if(!last_col)
    490     {
    491         col_size -= 1;
    492     }
    493     {
    494         UWORD8 *pu1_src_temp = pu1_src;
    495         //pu1_ctb_row_qp_p and pu1_ctb_row_qp_q point to alternate rows
    496         pi1_ctb_row_qp_p = ps_deblk_ctb_row_params->pi1_ctb_row_qp;
    497 
    498         num_rows_for_horz_filt = ctb_size / 8;
    499 
    500         for(i4_edge_count = 0; i4_edge_count < num_rows_for_horz_filt; i4_edge_count++)
    501         {
    502             WORD32 col_size_temp = col_size;
    503             pi1_ctb_row_qp_q = pi1_ctb_row_qp_p + u4_qp_buffer_stride;
    504             pu1_src = pu1_src_temp + (i4_edge_count * 8 * ps_deblk->i4_luma_pic_stride);
    505 
    506             if(1 == top_luma_edge_filter_flag)
    507             {
    508                 //Deblock the last vertical_4x4_column of previous CTB
    509                 if(ps_deblk->i4_deblock_left_ctb_edge == 1)
    510                 {
    511                     u4_bs = ps_deblk->au1_prev_bs[i4_edge_count] & 0x3;
    512                     if(u4_bs != 0)
    513                     {
    514                         qp_p = *(pi1_ctb_row_qp_p - 1);
    515                         qp_q = *(pi1_ctb_row_qp_q - 1);
    516 
    517                         ps_func_slector->ihevc_deblk_luma_horz_fptr(
    518                             pu1_src,
    519                             ps_deblk->i4_luma_pic_stride,
    520                             u4_bs,
    521                             qp_p,
    522                             qp_q,
    523                             ps_deblk->i4_beta_offset_div2,
    524                             ps_deblk->i4_tc_offset_div2,
    525                             1,
    526                             1);
    527                     }
    528 
    529                     pu1_src += 4;
    530                     col_size_temp--;
    531                 }
    532                 //Start deblocking current CTB
    533                 u4_bs = *(bs_horz);
    534 
    535                 for(col = 0; col < col_size_temp;)
    536                 {
    537                     bs_lz = CLZ(u4_bs) >> 1;
    538                     if(0 != bs_lz)
    539                     {
    540                         u4_bs = u4_bs << (bs_lz << 1);
    541                         pu1_src += 4 * bs_lz;
    542                         col += bs_lz;
    543                         continue;
    544                     }
    545                     qp_p = *(pi1_ctb_row_qp_p + col);
    546                     qp_q = *(pi1_ctb_row_qp_q + col);
    547 
    548                     ps_func_slector->ihevc_deblk_luma_horz_fptr(
    549                         pu1_src,
    550                         ps_deblk->i4_luma_pic_stride,
    551                         u4_bs >> (sizeof(u4_bs) * 8 - 2),
    552                         qp_p,
    553                         qp_q,
    554                         ps_deblk->i4_beta_offset_div2,
    555                         ps_deblk->i4_tc_offset_div2,
    556                         filter_p,
    557                         filter_q);
    558 
    559                     pu1_src += 4;
    560                     u4_bs = u4_bs << 2;
    561                     col++;
    562                 }
    563                 //Store the last vertical_4x4 column of CTB's info for next CTB deblocking
    564                 u4_bs = *bs_horz;
    565                 ps_deblk->au1_prev_bs[i4_edge_count] =
    566                     (UWORD8)(((u4_bs << ((ctb_size >> 1) - 2))) >> 30);
    567             }
    568             bs_horz += 1;
    569             pi1_ctb_row_qp_p += (u4_qp_buffer_stride << 1);
    570             top_luma_edge_filter_flag = 1;
    571         }
    572     }
    573 
    574     //////////////////////////////////////////////////////////////////////////////
    575     /* Chroma Horizontal Edge */
    576     pu1_src_uv = ps_deblk->pu1_ctb_uv;
    577     col_size = ctb_size / 8;
    578 
    579     /* If the ctb is the 1st ctb of row,                     */
    580     /* Decrement the loop count to exclude filtering of last 4 pixels */
    581     /* else shift the src pointer by 8 (uv) pixels to do filtering for shifted ctb */
    582     if(ps_deblk->i4_deblock_left_ctb_edge == 1)
    583     {
    584         pu1_src_uv -= 8;
    585 
    586         /*If the ctb is at the horizonatl end of PIC*/
    587         /* Increase the column size to filter last 8 (uv) pixels */
    588         col_size += last_col;
    589     }
    590     else if(!last_col)
    591     {
    592         col_size--;
    593     }
    594 
    595     {
    596         UWORD8 *pu1_src_temp = pu1_src_uv;
    597 
    598         //pu1_ctb_row_qp_p and pu1_ctb_row_qp_q point to alternate rows
    599         pi1_ctb_row_qp_p = ps_deblk_ctb_row_params->pi1_ctb_row_qp;
    600         num_rows_for_horz_filt = ctb_size / ((0 == u1_is_422) ? 16 : 8);
    601 
    602         for(i4_edge_count = 0; i4_edge_count < num_rows_for_horz_filt; i4_edge_count++)
    603         {
    604             WORD32 col_size_temp = col_size;
    605 
    606             pi1_ctb_row_qp_q = pi1_ctb_row_qp_p + u4_qp_buffer_stride;
    607             pu1_src_uv = pu1_src_temp + (i4_edge_count * 8 * ps_deblk->i4_chroma_pic_stride);
    608 
    609             if(1 == top_chroma_edge_filter_flag)
    610             {
    611                 //Deblock the last vertical _4x4_column of previous CTB
    612                 if(ps_deblk->i4_deblock_left_ctb_edge == 1)
    613                 {
    614                     u4_bs = ps_deblk->au1_prev_bs_uv[i4_edge_count] & 0x2;
    615 
    616                     if(u4_bs == 2)
    617                     {
    618                         qp_p = *(pi1_ctb_row_qp_p - 1);
    619                         qp_q = *(pi1_ctb_row_qp_q - 1);
    620 
    621                         pf_deblk_chroma_horz(
    622                             pu1_src_uv,
    623                             ps_deblk->i4_chroma_pic_stride,
    624                             qp_p,
    625                             qp_q,
    626                             ps_deblk->i4_cb_qp_indx_offset,
    627                             ps_deblk->i4_cr_qp_indx_offset,
    628                             ps_deblk->i4_tc_offset_div2,
    629                             1,
    630                             1);
    631                     }
    632 
    633                     pu1_src_uv += 8;
    634                     col_size_temp--;
    635                 }
    636 
    637                 //Start deblocking current CTB
    638                 u4_bs = *(bs_horz_uv)&0x88888888;
    639 
    640                 for(col = 0; col < col_size_temp;)
    641                 {
    642                     bs_lz = CLZ(u4_bs) >> 2;
    643 
    644                     if(0 != bs_lz)
    645                     {
    646                         u4_bs = u4_bs << (bs_lz << 2);
    647                         pu1_src_uv += (8 * bs_lz);
    648 
    649                         col += bs_lz;
    650                         continue;
    651                     }
    652 
    653                     qp_p = *(pi1_ctb_row_qp_p + (col << 1));
    654                     qp_q = *(pi1_ctb_row_qp_q + (col << 1));
    655 
    656                     pf_deblk_chroma_horz(
    657                         pu1_src_uv,
    658                         ps_deblk->i4_chroma_pic_stride,
    659                         qp_p,
    660                         qp_q,
    661                         ps_deblk->i4_cb_qp_indx_offset,
    662                         ps_deblk->i4_cr_qp_indx_offset,
    663                         ps_deblk->i4_tc_offset_div2,
    664                         filter_p,
    665                         filter_q);
    666 
    667                     pu1_src_uv += 8;
    668                     u4_bs = u4_bs << 4;
    669                     col++;
    670                 }
    671 
    672                 //Store the last vertical_4x4 column of CTB's info for next CTB deblocking
    673                 u4_bs = *bs_horz_uv;
    674                 ps_deblk->au1_prev_bs_uv[i4_edge_count] =
    675                     (UWORD8)(((u4_bs << ((ctb_size >> 1) - 4))) >> 30);
    676             }
    677 
    678             bs_horz_uv += ((0 == u1_is_422) + 1);
    679             pi1_ctb_row_qp_p += (u4_qp_buffer_stride << ((0 == u1_is_422) + 1));
    680             top_chroma_edge_filter_flag = 1;
    681         }
    682     }
    683 
    684     return;
    685 }
    686