1 /****************************************************************************** 2 * 3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 /** 19 ******************************************************************************* 20 * @file 21 * ihevc_deblk.c 22 * 23 * @brief 24 * Contains definition for the ctb level deblk function 25 * 26 * @author 27 * Srinivas T 28 * 29 * @par List of Functions: 30 * - ihevc_deblk() 31 * 32 * @remarks 33 * None 34 * 35 ******************************************************************************* 36 */ 37 38 #include <stdio.h> 39 #include <stddef.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <assert.h> 43 44 #include "ihevc_typedefs.h" 45 #include "iv.h" 46 #include "ivd.h" 47 #include "ihevcd_cxa.h" 48 #include "ithread.h" 49 50 #include "ihevc_defs.h" 51 #include "ihevc_debug.h" 52 #include "ihevc_defs.h" 53 #include "ihevc_structs.h" 54 #include "ihevc_macros.h" 55 #include "ihevc_platform_macros.h" 56 #include "ihevc_cabac_tables.h" 57 58 #include "ihevc_error.h" 59 #include "ihevc_common_tables.h" 60 61 #include "ihevcd_trace.h" 62 #include "ihevcd_defs.h" 63 #include "ihevcd_function_selector.h" 64 #include "ihevcd_structs.h" 65 #include "ihevcd_error.h" 66 #include "ihevcd_nal.h" 67 #include "ihevcd_bitstream.h" 68 #include "ihevcd_job_queue.h" 69 #include "ihevcd_utils.h" 70 #include "ihevcd_debug.h" 71 72 #include "ihevc_deblk.h" 73 #include "ihevc_deblk_tables.h" 74 #include "ihevcd_profile.h" 75 /** 76 ******************************************************************************* 77 * 78 * @brief 79 * Deblock CTB level function. 80 * 81 * @par Description: 82 * For a given CTB, deblocking on both vertical and 83 * horizontal edges is done. Both the luma and chroma 84 * blocks are processed 85 * 86 * @param[in] ps_deblk 87 * Pointer to the deblock context 88 * 89 * @returns 90 * 91 * @remarks 92 * None 93 * 94 ******************************************************************************* 95 */ 96 97 void ihevcd_deblk_ctb(deblk_ctxt_t *ps_deblk, 98 WORD32 i4_is_last_ctb_x, 99 WORD32 i4_is_last_ctb_y) 100 { 101 WORD32 ctb_size; 102 WORD32 log2_ctb_size; 103 UWORD32 u4_bs; 104 WORD32 bs_tz; /*Leading zeros in boundary strength*/ 105 WORD32 qp_p, qp_q; 106 107 WORD32 filter_p, filter_q; 108 109 UWORD8 *pu1_src; 110 WORD32 qp_strd; 111 UWORD32 *pu4_vert_bs, *pu4_horz_bs; 112 UWORD32 *pu4_ctb_vert_bs, *pu4_ctb_horz_bs; 113 WORD32 bs_strd; 114 WORD32 src_strd; 115 UWORD8 *pu1_qp; 116 UWORD16 *pu2_ctb_no_loop_filter_flag; 117 UWORD16 au2_ctb_no_loop_filter_flag[9]; 118 119 WORD32 col, row; 120 121 /* Flag to indicate if QP is constant in CTB 122 * 0 - top_left, 1 - top, 2 - left, 3 - current */ 123 UWORD32 u4_qp_const_in_ctb[4] = { 0, 0, 0, 0 }; 124 WORD32 ctb_indx; 125 WORD32 chroma_yuv420sp_vu = ps_deblk->is_chroma_yuv420sp_vu; 126 sps_t *ps_sps; 127 pps_t *ps_pps; 128 codec_t *ps_codec; 129 slice_header_t *ps_slice_hdr; 130 131 PROFILE_DISABLE_DEBLK(); 132 133 ps_sps = ps_deblk->ps_sps; 134 ps_pps = ps_deblk->ps_pps; 135 ps_codec = ps_deblk->ps_codec; 136 ps_slice_hdr = ps_deblk->ps_slice_hdr; 137 138 log2_ctb_size = ps_sps->i1_log2_ctb_size; 139 ctb_size = (1 << ps_sps->i1_log2_ctb_size); 140 141 /* strides are in units of number of bytes */ 142 /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */ 143 bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7); 144 145 pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_vert_bs + 146 (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) + 147 ps_deblk->i4_ctb_y * bs_strd); 148 pu4_ctb_vert_bs = pu4_vert_bs; 149 150 pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_horz_bs + 151 (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) + 152 ps_deblk->i4_ctb_y * bs_strd); 153 pu4_ctb_horz_bs = pu4_horz_bs; 154 155 qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3); 156 pu1_qp = ps_deblk->s_bs_ctxt.pu1_pic_qp + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * qp_strd) << (log2_ctb_size - 3)); 157 158 pu2_ctb_no_loop_filter_flag = ps_deblk->au2_ctb_no_loop_filter_flag; 159 160 ctb_indx = ps_deblk->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_deblk->i4_ctb_y; 161 if(i4_is_last_ctb_y) 162 { 163 pu4_vert_bs = (UWORD32 *)((UWORD8 *)pu4_vert_bs + bs_strd); 164 pu4_ctb_vert_bs = pu4_vert_bs; 165 /* ctb_size/8 is the number of edges per CTB 166 * ctb_size/4 is the number of BS values needed per edge 167 * divided by 8 for the number of bytes 168 * 2 is the number of bits needed for each BS value */ 169 memset(pu4_vert_bs, 0, 1 << (2 * log2_ctb_size - 7)); 170 171 pu1_qp += (qp_strd << (log2_ctb_size - 3)); 172 pu2_ctb_no_loop_filter_flag += (ctb_size >> 3); 173 ctb_indx += ps_sps->i2_pic_wd_in_ctb; 174 } 175 176 if(i4_is_last_ctb_x) 177 { 178 pu4_horz_bs = (UWORD32 *)((UWORD8 *)pu4_horz_bs + (1 << (2 * log2_ctb_size - 7))); 179 pu4_ctb_horz_bs = pu4_horz_bs; 180 memset(pu4_horz_bs, 0, 1 << (2 * log2_ctb_size - 7)); 181 182 pu1_qp += (ctb_size >> 3); 183 184 for(row = 0; row < (ctb_size >> 3) + 1; row++) 185 au2_ctb_no_loop_filter_flag[row] = ps_deblk->au2_ctb_no_loop_filter_flag[row] >> (ctb_size >> 3); 186 pu2_ctb_no_loop_filter_flag = au2_ctb_no_loop_filter_flag; 187 ctb_indx += 1; 188 } 189 190 u4_qp_const_in_ctb[3] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx) >> 3] & (1 << (ctb_indx & 7)); 191 192 if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x) 193 { 194 u4_qp_const_in_ctb[2] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - 1) >> 3] & (1 << ((ctb_indx - 1) & 7)); 195 } 196 197 if((ps_deblk->i4_ctb_x || i4_is_last_ctb_x) && (ps_deblk->i4_ctb_y || i4_is_last_ctb_y)) 198 { 199 u4_qp_const_in_ctb[0] = 200 ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) >> 3] & 201 (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) & 7)); 202 } 203 204 205 206 if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y) 207 { 208 u4_qp_const_in_ctb[1] = 209 ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb) >> 3] & 210 (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb) & 7)); 211 } 212 213 src_strd = ps_codec->i4_strd; 214 215 /* Luma Vertical Edge */ 216 217 if(0 == i4_is_last_ctb_x) 218 { 219 /* Top CTB's slice header */ 220 slice_header_t *ps_slice_hdr_top; 221 { 222 WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb; 223 if(i4_is_last_ctb_y) 224 cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb; 225 ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb]; 226 } 227 228 pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << (log2_ctb_size)); 229 pu1_src += i4_is_last_ctb_y ? ps_deblk->ps_codec->i4_strd << log2_ctb_size : 0; 230 231 /** Deblocking is done on a shifted CTB - 232 * Vertical edge processing is done by shifting the CTB up by four pixels */ 233 pu1_src -= 4 * src_strd; 234 235 for(col = 0; col < ctb_size / 8; col++) 236 { 237 WORD32 shift = 0; 238 239 /* downshift vert_bs by ctb_size/2 for each column 240 * shift = (col & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1); 241 * which will reduce to the following assuming ctb size is one of 16, 32 and 64 242 * and deblocking is done on 8x8 grid 243 */ 244 if(6 != log2_ctb_size) 245 shift = (col & 1) << (log2_ctb_size - 1); 246 247 /* BS for the column - Last row is excluded and the top row is included*/ 248 u4_bs = (pu4_vert_bs[0] >> shift) << 2; 249 250 if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y) 251 { 252 /* Picking the last BS of the previous CTB corresponding to the same column */ 253 UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd); 254 UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> (shift + (1 << (log2_ctb_size - 1)) - 2); 255 u4_bs |= u4_top_bs & 3; 256 } 257 258 for(row = 0; row < ctb_size / 4;) 259 { 260 WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2; 261 WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2; 262 263 /* Trailing zeros are computed and the corresponding rows are not processed */ 264 bs_tz = CTZ(u4_bs) >> 1; 265 if(0 != bs_tz) 266 { 267 u4_bs = u4_bs >> (bs_tz << 1); 268 if((row + bs_tz) >= (ctb_size / 4)) 269 pu1_src += 4 * (ctb_size / 4 - row) * src_strd; 270 else 271 pu1_src += 4 * bs_tz * src_strd; 272 273 row += bs_tz; 274 continue; 275 } 276 277 if(0 == row) 278 { 279 i1_beta_offset_div2 = ps_slice_hdr_top->i1_beta_offset_div2; 280 i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2; 281 282 if(0 == col) 283 { 284 qp_p = u4_qp_const_in_ctb[0] ? 285 pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] : 286 pu1_qp[-qp_strd - 1]; 287 } 288 else 289 { 290 qp_p = u4_qp_const_in_ctb[1] ? 291 pu1_qp[-ctb_size / 8 * qp_strd] : 292 pu1_qp[col - 1 - qp_strd]; 293 } 294 295 qp_q = u4_qp_const_in_ctb[1] ? 296 pu1_qp[-ctb_size / 8 * qp_strd] : 297 pu1_qp[col - qp_strd]; 298 } 299 else 300 { 301 if(0 == col) 302 { 303 qp_p = u4_qp_const_in_ctb[2] ? 304 pu1_qp[-ctb_size / 8] : 305 pu1_qp[((row - 1) >> 1) * qp_strd - 1]; 306 } 307 else 308 { 309 qp_p = u4_qp_const_in_ctb[3] ? 310 pu1_qp[0] : 311 pu1_qp[((row - 1) >> 1) * qp_strd + col - 1]; 312 } 313 314 qp_q = u4_qp_const_in_ctb[3] ? 315 pu1_qp[0] : 316 pu1_qp[((row - 1) >> 1) * qp_strd + col]; 317 } 318 319 filter_p = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 1; 320 filter_q = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 2; 321 /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */ 322 filter_p = !filter_p; 323 filter_q = !filter_q; 324 325 if(filter_p || filter_q) 326 { 327 DUMP_DEBLK_LUMA_VERT(pu1_src, src_strd, 328 u4_bs & 3, qp_p, qp_q, 329 ps_slice_hdr->i1_beta_offset_div2, 330 ps_slice_hdr->i1_tc_offset_div2, 331 filter_p, filter_q); 332 ps_codec->s_func_selector.ihevc_deblk_luma_vert_fptr(pu1_src, src_strd, 333 u4_bs & 3, qp_p, qp_q, 334 i1_beta_offset_div2, 335 i1_tc_offset_div2, 336 filter_p, filter_q); 337 } 338 339 pu1_src += 4 * src_strd; 340 u4_bs = u4_bs >> 2; 341 row++; 342 } 343 344 if((64 == ctb_size) || 345 ((32 == ctb_size) && (col & 1))) 346 { 347 pu4_vert_bs++; 348 } 349 pu1_src -= (src_strd << log2_ctb_size); 350 pu1_src += 8; 351 } 352 pu4_vert_bs = pu4_ctb_vert_bs; 353 } 354 355 356 /* Luma Horizontal Edge */ 357 358 if(0 == i4_is_last_ctb_y) 359 { 360 361 /* Left CTB's slice header */ 362 slice_header_t *ps_slice_hdr_left; 363 { 364 WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb; 365 if(i4_is_last_ctb_x) 366 cur_ctb_indx += 1; 367 ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1]; 368 } 369 pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << log2_ctb_size); 370 pu1_src += i4_is_last_ctb_x ? ctb_size : 0; 371 372 /** Deblocking is done on a shifted CTB - 373 * Horizontal edge processing is done by shifting the CTB left by four pixels */ 374 pu1_src -= 4; 375 for(row = 0; row < ctb_size / 8; row++) 376 { 377 WORD32 shift = 0; 378 379 /* downshift vert_bs by ctb_size/2 for each column 380 * shift = (row & (MAX_CTB_SIZE / ctb_size - 1)) * ctb_size / 2; 381 * which will reduce to the following assuming ctb size is one of 16, 32 and 64 382 * and deblocking is done on 8x8 grid 383 */ 384 if(6 != log2_ctb_size) 385 shift = (row & 1) << (log2_ctb_size - 1); 386 387 /* BS for the row - Last column is excluded and the left column is included*/ 388 u4_bs = (pu4_horz_bs[0] >> shift) << 2; 389 390 if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x) 391 { 392 /** Picking the last BS of the previous CTB corresponding to the same row 393 * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2); 394 */ 395 UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7))); 396 UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> (shift + (1 << (log2_ctb_size - 1)) - 2); 397 u4_bs |= u4_left_bs & 3; 398 } 399 400 for(col = 0; col < ctb_size / 4;) 401 { 402 WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2; 403 WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2; 404 405 bs_tz = CTZ(u4_bs) >> 1; 406 if(0 != bs_tz) 407 { 408 u4_bs = u4_bs >> (bs_tz << 1); 409 410 if((col + bs_tz) >= (ctb_size / 4)) 411 pu1_src += 4 * (ctb_size / 4 - col); 412 else 413 pu1_src += 4 * bs_tz; 414 415 col += bs_tz; 416 continue; 417 } 418 419 if(0 == col) 420 { 421 i1_beta_offset_div2 = ps_slice_hdr_left->i1_beta_offset_div2; 422 i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2; 423 424 if(0 == row) 425 { 426 qp_p = u4_qp_const_in_ctb[0] ? 427 pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] : 428 pu1_qp[-qp_strd - 1]; 429 } 430 else 431 { 432 qp_p = u4_qp_const_in_ctb[2] ? 433 pu1_qp[-ctb_size / 8] : 434 pu1_qp[(row - 1) * qp_strd - 1]; 435 } 436 437 qp_q = u4_qp_const_in_ctb[2] ? 438 pu1_qp[-ctb_size / 8] : 439 pu1_qp[row * qp_strd - 1]; 440 } 441 else 442 { 443 if(0 == row) 444 { 445 qp_p = u4_qp_const_in_ctb[1] ? 446 pu1_qp[-ctb_size / 8 * qp_strd] : 447 pu1_qp[((col - 1) >> 1) - qp_strd]; 448 } 449 else 450 { 451 qp_p = u4_qp_const_in_ctb[3] ? 452 pu1_qp[0] : 453 pu1_qp[((col - 1) >> 1) + (row - 1) * qp_strd]; 454 } 455 456 qp_q = u4_qp_const_in_ctb[3] ? 457 pu1_qp[0] : 458 pu1_qp[((col - 1) >> 1) + row * qp_strd]; 459 } 460 461 filter_p = (pu2_ctb_no_loop_filter_flag[row] >> ((col + 1) >> 1)) & 1; 462 filter_q = (pu2_ctb_no_loop_filter_flag[row + 1] >> ((col + 1) >> 1)) & 1; 463 /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */ 464 filter_p = !filter_p; 465 filter_q = !filter_q; 466 467 if(filter_p || filter_q) 468 { 469 DUMP_DEBLK_LUMA_HORZ(pu1_src, src_strd, 470 u4_bs & 3, qp_p, qp_q, 471 ps_slice_hdr->i1_beta_offset_div2, 472 ps_slice_hdr->i1_tc_offset_div2, 473 filter_p, filter_q); 474 ps_codec->s_func_selector.ihevc_deblk_luma_horz_fptr(pu1_src, src_strd, 475 u4_bs & 3, qp_p, qp_q, 476 i1_beta_offset_div2, 477 i1_tc_offset_div2, filter_p, filter_q); 478 } 479 480 pu1_src += 4; 481 u4_bs = u4_bs >> 2; 482 col++; 483 } 484 485 if((64 == ctb_size) || 486 ((32 == ctb_size) && (row & 1))) 487 { 488 pu4_horz_bs++; 489 } 490 pu1_src -= ctb_size; 491 pu1_src += (src_strd << 3); 492 } 493 pu4_horz_bs = pu4_ctb_horz_bs; 494 } 495 496 497 /* Chroma Veritcal Edge */ 498 499 if(0 == i4_is_last_ctb_x) 500 { 501 502 /* Top CTB's slice header */ 503 slice_header_t *ps_slice_hdr_top; 504 { 505 WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb; 506 if(i4_is_last_ctb_y) 507 cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb; 508 ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb]; 509 } 510 511 pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size); 512 pu1_src += i4_is_last_ctb_y ? (ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size : 0; 513 514 /** Deblocking is done on a shifted CTB - 515 * Vertical edge processing is done by shifting the CTB up by four pixels */ 516 pu1_src -= 4 * src_strd; 517 518 for(col = 0; col < ctb_size / 16; col++) 519 { 520 521 /* BS for the column - Last row is excluded and the top row is included*/ 522 u4_bs = pu4_vert_bs[0] << 2; 523 524 if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y) 525 { 526 /* Picking the last BS of the previous CTB corresponding to the same column */ 527 UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd); 528 UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> ((1 << (log2_ctb_size - 1)) - 2); 529 u4_bs |= u4_top_bs & 3; 530 } 531 532 /* Every alternate boundary strength value is used for chroma */ 533 u4_bs &= 0x22222222; 534 535 for(row = 0; row < ctb_size / 8;) 536 { 537 WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2; 538 539 bs_tz = CTZ(u4_bs) >> 2; 540 if(0 != bs_tz) 541 { 542 if((row + bs_tz) >= (ctb_size / 8)) 543 pu1_src += 4 * (ctb_size / 8 - row) * src_strd; 544 else 545 pu1_src += 4 * bs_tz * src_strd; 546 row += bs_tz; 547 u4_bs = u4_bs >> (bs_tz << 2); 548 continue; 549 } 550 551 if(0 == row) 552 { 553 i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2; 554 555 if(0 == col) 556 { 557 qp_p = u4_qp_const_in_ctb[0] ? 558 pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] : 559 pu1_qp[-qp_strd - 1]; 560 } 561 else 562 { 563 qp_p = u4_qp_const_in_ctb[1] ? 564 pu1_qp[-ctb_size / 8 * qp_strd] : 565 pu1_qp[2 * col - 1 - qp_strd]; 566 } 567 568 qp_q = u4_qp_const_in_ctb[1] ? 569 pu1_qp[-ctb_size / 8 * qp_strd] : 570 pu1_qp[2 * col - qp_strd]; 571 } 572 else 573 { 574 if(0 == col) 575 { 576 qp_p = u4_qp_const_in_ctb[2] ? 577 pu1_qp[-ctb_size / 8] : 578 pu1_qp[(row - 1) * qp_strd - 1]; 579 } 580 else 581 { 582 qp_p = u4_qp_const_in_ctb[3] ? 583 pu1_qp[0] : 584 pu1_qp[(row - 1) * qp_strd + 2 * col - 1]; 585 } 586 587 qp_q = u4_qp_const_in_ctb[3] ? 588 pu1_qp[0] : 589 pu1_qp[(row - 1) * qp_strd + 2 * col]; 590 } 591 592 filter_p = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 1; 593 filter_q = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 2; 594 /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */ 595 filter_p = !filter_p; 596 filter_q = !filter_q; 597 598 if(filter_p || filter_q) 599 { 600 ASSERT(1 == ((u4_bs & 3) >> 1)); 601 DUMP_DEBLK_CHROMA_VERT(pu1_src, src_strd, 602 u4_bs & 3, qp_p, qp_q, 603 ps_pps->i1_pic_cb_qp_offset, 604 ps_pps->i1_pic_cr_qp_offset, 605 ps_slice_hdr->i1_tc_offset_div2, 606 filter_p, filter_q); 607 if(chroma_yuv420sp_vu) 608 { 609 ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src, 610 src_strd, 611 qp_q, 612 qp_p, 613 ps_pps->i1_pic_cr_qp_offset, 614 ps_pps->i1_pic_cb_qp_offset, 615 i1_tc_offset_div2, 616 filter_q, 617 filter_p); 618 } 619 else 620 { 621 ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src, 622 src_strd, 623 qp_p, 624 qp_q, 625 ps_pps->i1_pic_cb_qp_offset, 626 ps_pps->i1_pic_cr_qp_offset, 627 i1_tc_offset_div2, 628 filter_p, 629 filter_q); 630 } 631 } 632 633 pu1_src += 4 * src_strd; 634 u4_bs = u4_bs >> 4; 635 row++; 636 } 637 638 pu4_vert_bs += (64 == ctb_size) ? 2 : 1; 639 pu1_src -= ((src_strd / 2) << log2_ctb_size); 640 pu1_src += 16; 641 } 642 } 643 644 /* Chroma Horizontal Edge */ 645 646 if(0 == i4_is_last_ctb_y) 647 { 648 649 /* Left CTB's slice header */ 650 slice_header_t *ps_slice_hdr_left; 651 { 652 WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb; 653 if(i4_is_last_ctb_x) 654 cur_ctb_indx += 1; 655 ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1]; 656 } 657 658 pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size); 659 pu1_src += i4_is_last_ctb_x ? ctb_size : 0; 660 661 /** Deblocking is done on a shifted CTB - 662 * Vertical edge processing is done by shifting the CTB up by four pixels (8 here beacuse UV are interleaved) */ 663 pu1_src -= 8; 664 for(row = 0; row < ctb_size / 16; row++) 665 { 666 /* BS for the row - Last column is excluded and the left column is included*/ 667 u4_bs = pu4_horz_bs[0] << 2; 668 669 if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x) 670 { 671 /** Picking the last BS of the previous CTB corresponding to the same row 672 * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2); 673 */ 674 UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7))); 675 UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> ((1 << (log2_ctb_size - 1)) - 2); 676 u4_bs |= u4_left_bs & 3; 677 } 678 679 /* Every alternate boundary strength value is used for chroma */ 680 u4_bs &= 0x22222222; 681 682 for(col = 0; col < ctb_size / 8;) 683 { 684 WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2; 685 686 bs_tz = CTZ(u4_bs) >> 2; 687 if(0 != bs_tz) 688 { 689 u4_bs = u4_bs >> (bs_tz << 2); 690 691 if((col + bs_tz) >= (ctb_size / 8)) 692 pu1_src += 8 * (ctb_size / 8 - col); 693 else 694 pu1_src += 8 * bs_tz; 695 696 col += bs_tz; 697 continue; 698 } 699 700 if(0 == col) 701 { 702 i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2; 703 704 if(0 == row) 705 { 706 qp_p = u4_qp_const_in_ctb[0] ? 707 pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] : 708 pu1_qp[-qp_strd - 1]; 709 } 710 else 711 { 712 qp_p = u4_qp_const_in_ctb[2] ? 713 pu1_qp[-ctb_size / 8] : 714 pu1_qp[(2 * row - 1) * qp_strd - 1]; 715 } 716 717 qp_q = u4_qp_const_in_ctb[2] ? 718 pu1_qp[-ctb_size / 8] : 719 pu1_qp[(2 * row) * qp_strd - 1]; 720 } 721 else 722 { 723 if(0 == row) 724 { 725 qp_p = u4_qp_const_in_ctb[1] ? 726 pu1_qp[-ctb_size / 8 * qp_strd] : 727 pu1_qp[col - 1 - qp_strd]; 728 } 729 else 730 { 731 qp_p = u4_qp_const_in_ctb[3] ? 732 pu1_qp[0] : 733 pu1_qp[(col - 1) + (2 * row - 1) * qp_strd]; 734 } 735 736 qp_q = u4_qp_const_in_ctb[3] ? 737 pu1_qp[0] : 738 pu1_qp[(col - 1) + 2 * row * qp_strd]; 739 } 740 741 filter_p = (pu2_ctb_no_loop_filter_flag[row << 1] >> col) & 1; 742 filter_q = (pu2_ctb_no_loop_filter_flag[(row << 1) + 1] >> col) & 1; 743 /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */ 744 filter_p = !filter_p; 745 filter_q = !filter_q; 746 747 if(filter_p || filter_q) 748 { 749 ASSERT(1 == ((u4_bs & 3) >> 1)); 750 DUMP_DEBLK_CHROMA_HORZ(pu1_src, src_strd, 751 u4_bs & 3, qp_p, qp_q, 752 ps_pps->i1_pic_cb_qp_offset, 753 ps_pps->i1_pic_cr_qp_offset, 754 ps_slice_hdr->i1_tc_offset_div2, 755 filter_p, filter_q); 756 if(chroma_yuv420sp_vu) 757 { 758 ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src, 759 src_strd, 760 qp_q, 761 qp_p, 762 ps_pps->i1_pic_cr_qp_offset, 763 ps_pps->i1_pic_cb_qp_offset, 764 i1_tc_offset_div2, 765 filter_q, 766 filter_p); 767 } 768 else 769 { 770 ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src, 771 src_strd, 772 qp_p, 773 qp_q, 774 ps_pps->i1_pic_cb_qp_offset, 775 ps_pps->i1_pic_cr_qp_offset, 776 i1_tc_offset_div2, 777 filter_p, 778 filter_q); 779 } 780 } 781 782 pu1_src += 8; 783 u4_bs = u4_bs >> 4; 784 col++; 785 } 786 787 pu4_horz_bs += (64 == ctb_size) ? 2 : 1; 788 pu1_src -= ctb_size; 789 pu1_src += 8 * src_strd; 790 791 } 792 } 793 } 794