1 /****************************************************************************** 2 * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 /** 21 ******************************************************************************* 22 * @file 23 * ihevce_deblk.c 24 * 25 * @brief 26 * Contains definition for the ctb level deblk function 27 * 28 * @author 29 * ittiam 30 * 31 * @List of Functions: 32 * ihevce_deblk_populate_qp_map() 33 * ihevce_deblk_ctb() 34 * ihevce_hbd_deblk_ctb() 35 * 36 * @remarks 37 * None 38 * 39 ******************************************************************************* 40 */ 41 42 /*****************************************************************************/ 43 /* File Includes */ 44 /*****************************************************************************/ 45 /* System include files */ 46 #include <stdio.h> 47 #include <string.h> 48 #include <stdlib.h> 49 #include <assert.h> 50 #include <stdarg.h> 51 #include <math.h> 52 53 /* User include files */ 54 #include "ihevc_typedefs.h" 55 #include "itt_video_api.h" 56 #include "ihevce_api.h" 57 58 #include "rc_cntrl_param.h" 59 #include "rc_frame_info_collector.h" 60 #include "rc_look_ahead_params.h" 61 62 #include "ihevc_defs.h" 63 #include "ihevc_debug.h" 64 #include "ihevc_structs.h" 65 #include "ihevc_platform_macros.h" 66 #include "ihevc_deblk.h" 67 #include "ihevc_deblk_tables.h" 68 #include "ihevc_common_tables.h" 69 #include "ihevc_itrans_recon.h" 70 #include "ihevc_chroma_itrans_recon.h" 71 #include "ihevc_chroma_intra_pred.h" 72 #include "ihevc_intra_pred.h" 73 #include "ihevc_inter_pred.h" 74 #include "ihevc_mem_fns.h" 75 #include "ihevc_padding.h" 76 #include "ihevc_weighted_pred.h" 77 #include "ihevc_sao.h" 78 #include "ihevc_resi_trans.h" 79 #include "ihevc_quant_iquant_ssd.h" 80 #include "ihevc_cabac_tables.h" 81 82 #include "ihevce_defs.h" 83 #include "ihevce_hle_interface.h" 84 #include "ihevce_lap_enc_structs.h" 85 #include "ihevce_multi_thrd_structs.h" 86 #include "ihevce_me_common_defs.h" 87 #include "ihevce_had_satd.h" 88 #include "ihevce_error_codes.h" 89 #include "ihevce_bitstream.h" 90 #include "ihevce_cabac.h" 91 #include "ihevce_rdoq_macros.h" 92 #include "ihevce_function_selector.h" 93 #include "ihevce_enc_structs.h" 94 #include "ihevce_entropy_structs.h" 95 #include "ihevce_cmn_utils_instr_set_router.h" 96 #include "ihevce_enc_loop_structs.h" 97 #include "ihevce_common_utils.h" 98 #include "ihevce_global_tables.h" 99 #include "ihevce_deblk.h" 100 #include "ihevce_tile_interface.h" 101 102 /*****************************************************************************/ 103 /* Function Definitions */ 104 /*****************************************************************************/ 105 106 /*! 107 ****************************************************************************** 108 * \if Function name : ihevce_deblk_populate_qp_map \endif 109 * 110 * \brief 111 * 112 * 113 ***************************************************************************** 114 */ 115 void ihevce_deblk_populate_qp_map( 116 ihevce_enc_loop_ctxt_t *ps_ctxt, 117 deblk_ctbrow_prms_t *ps_deblk_ctb_row_params, 118 ctb_enc_loop_out_t *ps_ctb_out_dblk, 119 WORD32 vert_ctr, 120 frm_ctb_ctxt_t *ps_frm_ctb_prms, 121 ihevce_tile_params_t *ps_col_tile_params) 122 { 123 ctb_enc_loop_out_t *ps_ctb_out; 124 WORD32 ctb_ctr, ctb_start, ctb_end; 125 WORD32 tile_qp_offset, tile_qp_size, i4_offset_for_last_cu_qp; 126 /* Create the Qp map for the entire current CTB-row for deblocking purpose(only)*/ 127 /* Do this iff cur pic is referred or recon dump is enabled or psnr calc is on*/ 128 /*Qp of the last CU of previous CTB row*/ 129 WORD8 i1_last_cu_qp; 130 /*A pointer pointing to the top 4x4 block's Qp for all CTb rows*/ 131 WORD8 *pi1_qp_top_4x4_ctb_row = 132 ps_deblk_ctb_row_params->api1_qp_top_4x4_ctb_row[ps_ctxt->i4_enc_frm_id] + 133 (ps_deblk_ctb_row_params->u4_qp_top_4x4_buf_size * ps_ctxt->i4_bitrate_instance_num); 134 135 UWORD32 u4_qp_top_4x4_buf_strd = ps_deblk_ctb_row_params->u4_qp_top_4x4_buf_strd; 136 137 /*The Qp map which has to be populated*/ 138 UWORD32 u4_qp_buffer_stride = ps_deblk_ctb_row_params->u4_qp_buffer_stride; 139 WORD8 *pi1_ctb_tile_qp = ps_deblk_ctb_row_params->pi1_ctb_row_qp; 140 141 /*Temporary pointers to Qp map at CTB level*/ 142 WORD8 *pi1_ctb_qp_map_tile; 143 144 i4_offset_for_last_cu_qp = ps_ctxt->pi4_offset_for_last_cu_qp[ps_ctxt->i4_tile_col_idx]; 145 /* total QPs to be copied for current row is : */ 146 tile_qp_size = i4_offset_for_last_cu_qp + 1; 147 /*Pointing to the first CTB of current CTB row*/ 148 ps_ctb_out = ps_ctb_out_dblk; 149 /* Offset req. for the row QP to the tile start */ 150 tile_qp_offset = ps_col_tile_params->i4_first_ctb_x * (ps_frm_ctb_prms->i4_ctb_size / 4); 151 152 ctb_start = ps_col_tile_params->i4_first_ctb_x; 153 ctb_end = 154 (ps_col_tile_params->i4_first_ctb_x + ps_col_tile_params->i4_curr_tile_wd_in_ctb_unit); 155 156 if(vert_ctr) /*Not first CTB row of frame*/ 157 { 158 /*copy from top4x4_array data stored by upper CTB-row to qp-map*/ 159 memcpy( 160 pi1_ctb_tile_qp, 161 (pi1_qp_top_4x4_ctb_row + (vert_ctr - 1) * u4_qp_top_4x4_buf_strd + tile_qp_offset), 162 tile_qp_size); 163 } 164 165 /*pu1_ctb_row_qp points to top4x4 row in Qp-map. 166 Now pointing pu1_ctb_qp_map to cur 4x4 row*/ 167 pi1_ctb_qp_map_tile = pi1_ctb_tile_qp + u4_qp_buffer_stride; 168 169 /* This i1_last_cu_qp will be conditionally overwritten later */ 170 i1_last_cu_qp = ps_ctxt->i4_frame_qp; 171 172 /* -- Loop over all the CTBs in a CTB-row for populating the Qp-map ----- */ 173 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++) 174 { 175 WORD32 cu_ctr; 176 cu_enc_loop_out_t *ps_curr_cu; 177 178 /* Update i1_last_cu_qp based on CTB's position in tile */ 179 update_last_coded_cu_qp( 180 (ps_deblk_ctb_row_params->pi1_ctb_row_qp + i4_offset_for_last_cu_qp), 181 ps_ctxt->i1_entropy_coding_sync_enabled_flag, 182 ps_frm_ctb_prms, 183 ps_ctxt->i4_frame_qp, 184 vert_ctr, 185 ctb_ctr, 186 &i1_last_cu_qp); 187 188 /* store the pointer of first cu of current ctb */ 189 ps_curr_cu = ps_ctb_out->ps_enc_cu; 190 191 /* --------- loop over all the CUs in the CTB --------------- */ 192 for(cu_ctr = 0; cu_ctr < ps_ctb_out->u1_num_cus_in_ctb; cu_ctr++) 193 { 194 UWORD8 u1_vert_4x4, u1_horz_4x4; //for_loop counters 195 WORD8 *pi1_cu_qp_map; 196 197 WORD8 i1_qp, i1_qp_left, i1_qp_top; 198 199 pi1_cu_qp_map = pi1_ctb_qp_map_tile + 200 (ps_curr_cu->b3_cu_pos_y * 2) * u4_qp_buffer_stride + 201 (ps_curr_cu->b3_cu_pos_x * 2); 202 203 /*If the current CU is coded in skip_mode/zero_CBF then 204 for deblocking, Qp of the previously coded CU will be used*/ 205 if(ps_curr_cu->b1_skip_flag || ps_curr_cu->b1_no_residual_syntax_flag) 206 { 207 if(0 == ps_curr_cu->b3_cu_pos_x) 208 i1_qp_left = i1_last_cu_qp; 209 else 210 i1_qp_left = *(pi1_cu_qp_map - 1); 211 212 if(0 == ps_curr_cu->b3_cu_pos_y) 213 i1_qp_top = i1_last_cu_qp; 214 else 215 i1_qp_top = *(pi1_cu_qp_map - u4_qp_buffer_stride); 216 217 i1_qp = (i1_qp_left + i1_qp_top + 1) / 2; 218 219 if(0 == ps_curr_cu->b1_first_cu_in_qg) 220 { 221 i1_qp = i1_last_cu_qp; 222 } 223 } 224 else 225 { 226 i1_qp = ps_curr_cu->i1_cu_qp; 227 } 228 229 i1_last_cu_qp = i1_qp; 230 231 /*---- Loop for populating Qp map for the current CU -------*/ 232 for(u1_vert_4x4 = 0; u1_vert_4x4 < (ps_curr_cu->b4_cu_size * 2); u1_vert_4x4++) 233 { 234 for(u1_horz_4x4 = 0; u1_horz_4x4 < (ps_curr_cu->b4_cu_size * 2); u1_horz_4x4++) 235 { 236 pi1_cu_qp_map[u1_horz_4x4] = i1_qp; 237 } 238 pi1_cu_qp_map += u4_qp_buffer_stride; 239 } 240 /*Update Qp-map ptr. Qp map is at 4x4 level but b4_cu_size is at 8x8 level*/ 241 ps_curr_cu++; 242 } 243 pi1_ctb_qp_map_tile += (ps_frm_ctb_prms->i4_ctb_size / 4); //one qp per 4x4 block. 244 ps_ctb_out++; 245 246 } //for(ctb_ctr = 0; ctb_ctr < num_ctbs_horz; ctb_ctr++) 247 248 /*fill into the top4x4_array Qp for the lower CTB-row from bottom part of cur CTB row*/ 249 memcpy( 250 (pi1_qp_top_4x4_ctb_row + vert_ctr * u4_qp_top_4x4_buf_strd + tile_qp_offset), 251 (pi1_ctb_tile_qp + (ps_frm_ctb_prms->i4_ctb_size / 4) * u4_qp_buffer_stride), 252 tile_qp_size); 253 } 254 255 /** 256 ******************************************************************************* 257 * 258 * @brief 259 * Deblock CTB level function. 260 * 261 * @par Description: 262 * For a given CTB, deblocking on both vertical and 263 * horizontal edges is done. Both the luma and chroma 264 * blocks are processed 265 * 266 * @param[in] 267 * ps_deblk: Pointer to the deblock context 268 * last_col: if the CTB is the last CTB of current CTB-row value is 1 else 0 269 * ps_deblk_ctb_row_params: deblk ctb row params 270 * 271 * @returns 272 * 273 * @remarks 274 * None 275 * 276 ******************************************************************************* 277 */ 278 void ihevce_deblk_ctb( 279 deblk_ctb_params_t *ps_deblk, WORD32 last_col, deblk_ctbrow_prms_t *ps_deblk_ctb_row_params) 280 { 281 WORD32 ctb_size; 282 UWORD32 u4_bs; 283 WORD32 bs_lz; /*Leading zeros in boundary strength*/ 284 WORD32 qp_p, qp_q; 285 UWORD8 *pu1_src; 286 UWORD8 *pu1_src_uv; 287 UWORD8 *pu1_curr_src; 288 WORD32 col_size; 289 WORD32 col, row, i4_edge_count; 290 WORD32 num_columns_for_vert_filt; 291 WORD32 num_blks_for_vert_filt; 292 WORD32 num_rows_for_horz_filt; 293 294 ihevc_deblk_chroma_horz_ft *pf_deblk_chroma_horz; 295 ihevc_deblk_chroma_horz_ft *pf_deblk_chroma_vert; 296 297 /* Filter flags are packed along with the qp info. 298 6 out of the 8 bits correspond to qp and 1 to filter flag. */ 299 /* filter_p and filter_q are initialized to 1. 300 They are to be extracted along with the qp info. */ 301 WORD32 filter_p, filter_q; 302 WORD8 *pi1_ctb_row_qp_p, *pi1_ctb_row_qp_temp; 303 WORD8 *pi1_ctb_row_qp_q; 304 305 func_selector_t *ps_func_slector = ps_deblk->ps_func_selector; 306 307 WORD32 left_luma_edge_filter_flag = ps_deblk->i4_deblock_left_ctb_edge; 308 WORD32 top_luma_edge_filter_flag = ps_deblk->i4_deblock_top_ctb_edge; 309 WORD32 left_chroma_edge_filter_flag = ps_deblk->i4_deblock_left_ctb_edge; 310 WORD32 top_chroma_edge_filter_flag = ps_deblk->i4_deblock_top_ctb_edge; 311 UWORD32 *bs_vert = ps_deblk_ctb_row_params->pu4_ctb_row_bs_vert; 312 UWORD32 *bs_horz = ps_deblk_ctb_row_params->pu4_ctb_row_bs_horz; 313 UWORD32 *bs_vert_uv = bs_vert; 314 UWORD32 *bs_horz_uv = bs_horz; 315 UWORD32 u4_qp_buffer_stride = ps_deblk_ctb_row_params->u4_qp_buffer_stride; 316 UWORD8 u1_is_422 = (ps_deblk->u1_chroma_array_type == 2); 317 318 if(u1_is_422) 319 { 320 pf_deblk_chroma_horz = ps_func_slector->ihevc_deblk_422chroma_horz_fptr; 321 pf_deblk_chroma_vert = ps_func_slector->ihevc_deblk_422chroma_vert_fptr; 322 } 323 else 324 { 325 pf_deblk_chroma_horz = ps_func_slector->ihevc_deblk_chroma_horz_fptr; 326 pf_deblk_chroma_vert = ps_func_slector->ihevc_deblk_chroma_vert_fptr; 327 } 328 329 ctb_size = ps_deblk->i4_ctb_size; 330 331 /* The PCM filter flag and bypass trans flag are always set to 1 in encoder profile */ 332 /* Can be removed during optimization */ 333 filter_q = 1; 334 filter_p = 1; 335 336 ////////////////////////////////////////////////////////////////////////////// 337 /* Luma Veritcal Edge */ 338 pu1_src = ps_deblk->pu1_ctb_y; 339 pi1_ctb_row_qp_temp = ps_deblk_ctb_row_params->pi1_ctb_row_qp + u4_qp_buffer_stride; 340 num_columns_for_vert_filt = ctb_size / 8; 341 num_blks_for_vert_filt = ctb_size / 4; 342 343 for(i4_edge_count = 0; i4_edge_count < num_columns_for_vert_filt; i4_edge_count++) 344 { 345 u4_bs = *bs_vert; 346 /* get the current 4x4 vertical pointer */ 347 pu1_curr_src = pu1_src; 348 pi1_ctb_row_qp_q = pi1_ctb_row_qp_temp + (i4_edge_count << 1); 349 350 /* If the current edge is not the 1st edge of frame or slice */ 351 if(1 == left_luma_edge_filter_flag) 352 { 353 for(row = 0; row < num_blks_for_vert_filt;) 354 { 355 bs_lz = CLZ(u4_bs) >> 1; 356 /* If BS = 0, skip the egde filtering */ 357 if(0 != bs_lz) 358 { 359 u4_bs = u4_bs << (bs_lz << 1); 360 pu1_curr_src += ((bs_lz << 2) * ps_deblk->i4_luma_pic_stride); 361 pi1_ctb_row_qp_q += (bs_lz * u4_qp_buffer_stride); 362 row += bs_lz; 363 continue; 364 } 365 qp_p = *(pi1_ctb_row_qp_q - 1); 366 qp_q = *pi1_ctb_row_qp_q; 367 368 ps_func_slector->ihevc_deblk_luma_vert_fptr( 369 pu1_curr_src, 370 ps_deblk->i4_luma_pic_stride, 371 (u4_bs >> 30), /* bits 31 and 30 are extracted */ 372 qp_p, 373 qp_q, 374 ps_deblk->i4_beta_offset_div2, 375 ps_deblk->i4_tc_offset_div2, 376 filter_p, 377 filter_q); 378 379 u4_bs = u4_bs << 2; 380 pu1_curr_src += (ps_deblk->i4_luma_pic_stride << 2); 381 pi1_ctb_row_qp_q += u4_qp_buffer_stride; 382 row++; 383 } 384 } 385 386 /* Increment the boundary strength and src pointer for the next column */ 387 bs_vert += 1; 388 pu1_src += 8; 389 390 /* Enable for the next edges of ctb*/ 391 left_luma_edge_filter_flag = 1; 392 } 393 394 ////////////////////////////////////////////////////////////////////////////// 395 /* Chroma Veritcal Edge */ 396 pu1_src_uv = ps_deblk->pu1_ctb_uv; 397 pi1_ctb_row_qp_temp = ps_deblk_ctb_row_params->pi1_ctb_row_qp + u4_qp_buffer_stride; 398 399 /* Column spacing is 4 for each chroma component */ 400 /* and hence 8 when they are interleaved. */ 401 /* But, only those columns with a x co-ordinate */ 402 /* that is divisiblee by 8 are filtered */ 403 /* Hence, denominator is 16 */ 404 num_columns_for_vert_filt = ctb_size / 16; 405 /* blk_size is 4 and chroma_ctb_height is ctb_size/2 */ 406 num_blks_for_vert_filt = (0 == u1_is_422) ? (ctb_size / 2) / 4 : (ctb_size) / 4; 407 408 for(i4_edge_count = 0; i4_edge_count < num_columns_for_vert_filt; i4_edge_count++) 409 { 410 /* Every alternate boundary strength value is used for 420 chroma */ 411 u4_bs = *(bs_vert_uv) & ((0 == u1_is_422) ? 0x88888888 : 0xaaaaaaaa); 412 pu1_curr_src = pu1_src_uv; 413 pi1_ctb_row_qp_q = pi1_ctb_row_qp_temp + (i4_edge_count << 2); 414 415 /* If the current edge is not the 1st edge of frame or slice */ 416 if(1 == left_chroma_edge_filter_flag) 417 { 418 /* Each 'bs' is 2 bits long */ 419 /* The divby4 in 420 is */ 420 /* necessitated by the fact that */ 421 /* chroma ctb_ht is half that of luma */ 422 WORD32 i4_log2_num_bits_per_bs = ((0 == u1_is_422) + 1); 423 /* i4_sub_heightC = 2 for 420 */ 424 /* i4_sub_heightC = 1 for 422 */ 425 WORD32 i4_sub_heightC = i4_log2_num_bits_per_bs; 426 427 for(row = 0; row < num_blks_for_vert_filt;) 428 { 429 bs_lz = CLZ(u4_bs) >> i4_log2_num_bits_per_bs; 430 431 /* If BS = 0, skip the egde filtering */ 432 if(0 != bs_lz) 433 { 434 row += bs_lz; 435 u4_bs = u4_bs << (bs_lz << i4_log2_num_bits_per_bs); 436 /* '<<2' because of blk_size being 4x4 */ 437 pu1_curr_src += ((bs_lz << 2) * ps_deblk->i4_chroma_pic_stride); 438 439 /* In 420, every alternate QP row is skipped, because chroma height */ 440 /* In 422, no row is skipped */ 441 pi1_ctb_row_qp_q += ((u4_qp_buffer_stride << (i4_sub_heightC - 1)) * bs_lz); 442 443 continue; 444 } 445 446 qp_p = *(pi1_ctb_row_qp_q - i4_sub_heightC); 447 qp_q = *pi1_ctb_row_qp_q; 448 449 pf_deblk_chroma_vert( 450 pu1_curr_src, 451 ps_deblk->i4_chroma_pic_stride, 452 qp_p, 453 qp_q, 454 ps_deblk->i4_cb_qp_indx_offset, 455 ps_deblk->i4_cr_qp_indx_offset, 456 ps_deblk->i4_tc_offset_div2, 457 filter_p, 458 filter_q); 459 460 u4_bs = u4_bs << (1 << i4_log2_num_bits_per_bs); 461 pu1_curr_src += (ps_deblk->i4_chroma_pic_stride << 2); 462 pi1_ctb_row_qp_q += (u4_qp_buffer_stride << (i4_sub_heightC - 1)); 463 row++; 464 } 465 } 466 /* Increment the boundary strength by 2 and src pointer for the next column */ 467 /* As the edge filtering happens for alternate column */ 468 bs_vert_uv += 2; 469 pu1_src_uv += 16; 470 left_chroma_edge_filter_flag = 1; 471 } 472 473 ////////////////////////////////////////////////////////////////////////////// 474 475 /* Luma Horizontal Edge */ 476 pu1_src = ps_deblk->pu1_ctb_y; 477 col_size = ctb_size / 4; 478 479 /* If the ctb is the 1st ctb of row, */ 480 /* Decrement the loop count to exclude filtering of last 4 pixels */ 481 /* else shift the src pointer by 4 pixels to do filtering for shifted ctb */ 482 if(ps_deblk->i4_deblock_left_ctb_edge == 1) 483 { 484 pu1_src -= 4; 485 /*If the ctb is at the horizonatl end of PIC*/ 486 /* Increase the column size to filter last 4 pixels */ 487 col_size += last_col; 488 } 489 else if(!last_col) 490 { 491 col_size -= 1; 492 } 493 { 494 UWORD8 *pu1_src_temp = pu1_src; 495 //pu1_ctb_row_qp_p and pu1_ctb_row_qp_q point to alternate rows 496 pi1_ctb_row_qp_p = ps_deblk_ctb_row_params->pi1_ctb_row_qp; 497 498 num_rows_for_horz_filt = ctb_size / 8; 499 500 for(i4_edge_count = 0; i4_edge_count < num_rows_for_horz_filt; i4_edge_count++) 501 { 502 WORD32 col_size_temp = col_size; 503 pi1_ctb_row_qp_q = pi1_ctb_row_qp_p + u4_qp_buffer_stride; 504 pu1_src = pu1_src_temp + (i4_edge_count * 8 * ps_deblk->i4_luma_pic_stride); 505 506 if(1 == top_luma_edge_filter_flag) 507 { 508 //Deblock the last vertical_4x4_column of previous CTB 509 if(ps_deblk->i4_deblock_left_ctb_edge == 1) 510 { 511 u4_bs = ps_deblk->au1_prev_bs[i4_edge_count] & 0x3; 512 if(u4_bs != 0) 513 { 514 qp_p = *(pi1_ctb_row_qp_p - 1); 515 qp_q = *(pi1_ctb_row_qp_q - 1); 516 517 ps_func_slector->ihevc_deblk_luma_horz_fptr( 518 pu1_src, 519 ps_deblk->i4_luma_pic_stride, 520 u4_bs, 521 qp_p, 522 qp_q, 523 ps_deblk->i4_beta_offset_div2, 524 ps_deblk->i4_tc_offset_div2, 525 1, 526 1); 527 } 528 529 pu1_src += 4; 530 col_size_temp--; 531 } 532 //Start deblocking current CTB 533 u4_bs = *(bs_horz); 534 535 for(col = 0; col < col_size_temp;) 536 { 537 bs_lz = CLZ(u4_bs) >> 1; 538 if(0 != bs_lz) 539 { 540 u4_bs = u4_bs << (bs_lz << 1); 541 pu1_src += 4 * bs_lz; 542 col += bs_lz; 543 continue; 544 } 545 qp_p = *(pi1_ctb_row_qp_p + col); 546 qp_q = *(pi1_ctb_row_qp_q + col); 547 548 ps_func_slector->ihevc_deblk_luma_horz_fptr( 549 pu1_src, 550 ps_deblk->i4_luma_pic_stride, 551 u4_bs >> (sizeof(u4_bs) * 8 - 2), 552 qp_p, 553 qp_q, 554 ps_deblk->i4_beta_offset_div2, 555 ps_deblk->i4_tc_offset_div2, 556 filter_p, 557 filter_q); 558 559 pu1_src += 4; 560 u4_bs = u4_bs << 2; 561 col++; 562 } 563 //Store the last vertical_4x4 column of CTB's info for next CTB deblocking 564 u4_bs = *bs_horz; 565 ps_deblk->au1_prev_bs[i4_edge_count] = 566 (UWORD8)(((u4_bs << ((ctb_size >> 1) - 2))) >> 30); 567 } 568 bs_horz += 1; 569 pi1_ctb_row_qp_p += (u4_qp_buffer_stride << 1); 570 top_luma_edge_filter_flag = 1; 571 } 572 } 573 574 ////////////////////////////////////////////////////////////////////////////// 575 /* Chroma Horizontal Edge */ 576 pu1_src_uv = ps_deblk->pu1_ctb_uv; 577 col_size = ctb_size / 8; 578 579 /* If the ctb is the 1st ctb of row, */ 580 /* Decrement the loop count to exclude filtering of last 4 pixels */ 581 /* else shift the src pointer by 8 (uv) pixels to do filtering for shifted ctb */ 582 if(ps_deblk->i4_deblock_left_ctb_edge == 1) 583 { 584 pu1_src_uv -= 8; 585 586 /*If the ctb is at the horizonatl end of PIC*/ 587 /* Increase the column size to filter last 8 (uv) pixels */ 588 col_size += last_col; 589 } 590 else if(!last_col) 591 { 592 col_size--; 593 } 594 595 { 596 UWORD8 *pu1_src_temp = pu1_src_uv; 597 598 //pu1_ctb_row_qp_p and pu1_ctb_row_qp_q point to alternate rows 599 pi1_ctb_row_qp_p = ps_deblk_ctb_row_params->pi1_ctb_row_qp; 600 num_rows_for_horz_filt = ctb_size / ((0 == u1_is_422) ? 16 : 8); 601 602 for(i4_edge_count = 0; i4_edge_count < num_rows_for_horz_filt; i4_edge_count++) 603 { 604 WORD32 col_size_temp = col_size; 605 606 pi1_ctb_row_qp_q = pi1_ctb_row_qp_p + u4_qp_buffer_stride; 607 pu1_src_uv = pu1_src_temp + (i4_edge_count * 8 * ps_deblk->i4_chroma_pic_stride); 608 609 if(1 == top_chroma_edge_filter_flag) 610 { 611 //Deblock the last vertical _4x4_column of previous CTB 612 if(ps_deblk->i4_deblock_left_ctb_edge == 1) 613 { 614 u4_bs = ps_deblk->au1_prev_bs_uv[i4_edge_count] & 0x2; 615 616 if(u4_bs == 2) 617 { 618 qp_p = *(pi1_ctb_row_qp_p - 1); 619 qp_q = *(pi1_ctb_row_qp_q - 1); 620 621 pf_deblk_chroma_horz( 622 pu1_src_uv, 623 ps_deblk->i4_chroma_pic_stride, 624 qp_p, 625 qp_q, 626 ps_deblk->i4_cb_qp_indx_offset, 627 ps_deblk->i4_cr_qp_indx_offset, 628 ps_deblk->i4_tc_offset_div2, 629 1, 630 1); 631 } 632 633 pu1_src_uv += 8; 634 col_size_temp--; 635 } 636 637 //Start deblocking current CTB 638 u4_bs = *(bs_horz_uv)&0x88888888; 639 640 for(col = 0; col < col_size_temp;) 641 { 642 bs_lz = CLZ(u4_bs) >> 2; 643 644 if(0 != bs_lz) 645 { 646 u4_bs = u4_bs << (bs_lz << 2); 647 pu1_src_uv += (8 * bs_lz); 648 649 col += bs_lz; 650 continue; 651 } 652 653 qp_p = *(pi1_ctb_row_qp_p + (col << 1)); 654 qp_q = *(pi1_ctb_row_qp_q + (col << 1)); 655 656 pf_deblk_chroma_horz( 657 pu1_src_uv, 658 ps_deblk->i4_chroma_pic_stride, 659 qp_p, 660 qp_q, 661 ps_deblk->i4_cb_qp_indx_offset, 662 ps_deblk->i4_cr_qp_indx_offset, 663 ps_deblk->i4_tc_offset_div2, 664 filter_p, 665 filter_q); 666 667 pu1_src_uv += 8; 668 u4_bs = u4_bs << 4; 669 col++; 670 } 671 672 //Store the last vertical_4x4 column of CTB's info for next CTB deblocking 673 u4_bs = *bs_horz_uv; 674 ps_deblk->au1_prev_bs_uv[i4_edge_count] = 675 (UWORD8)(((u4_bs << ((ctb_size >> 1) - 4))) >> 30); 676 } 677 678 bs_horz_uv += ((0 == u1_is_422) + 1); 679 pi1_ctb_row_qp_p += (u4_qp_buffer_stride << ((0 == u1_is_422) + 1)); 680 top_chroma_edge_filter_flag = 1; 681 } 682 } 683 684 return; 685 } 686