1 /****************************************************************************** 2 * 3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 /** 19 ******************************************************************************* 20 * @file 21 * ihevc_boundary_strength.c 22 * 23 * @brief 24 * Contains functions for computing boundary strength 25 * 26 * @author 27 * Harish 28 * 29 * @par List of Functions: 30 * 31 * @remarks 32 * None 33 * 34 ******************************************************************************* 35 */ 36 /*****************************************************************************/ 37 /* File Includes */ 38 /*****************************************************************************/ 39 #include <stdio.h> 40 #include <stddef.h> 41 #include <stdlib.h> 42 #include <string.h> 43 44 #include "ihevc_typedefs.h" 45 #include "iv.h" 46 #include "ivd.h" 47 #include "ihevcd_cxa.h" 48 #include "ithread.h" 49 50 #include "ihevc_defs.h" 51 #include "ihevc_debug.h" 52 #include "ihevc_defs.h" 53 #include "ihevc_structs.h" 54 #include "ihevc_macros.h" 55 #include "ihevc_platform_macros.h" 56 #include "ihevc_cabac_tables.h" 57 58 #include "ihevc_error.h" 59 #include "ihevc_common_tables.h" 60 61 #include "ihevcd_trace.h" 62 #include "ihevcd_defs.h" 63 #include "ihevcd_function_selector.h" 64 #include "ihevcd_structs.h" 65 #include "ihevcd_error.h" 66 #include "ihevcd_nal.h" 67 #include "ihevcd_bitstream.h" 68 #include "ihevcd_job_queue.h" 69 #include "ihevcd_utils.h" 70 #include "ihevcd_profile.h" 71 72 /*****************************************************************************/ 73 /* Function Prototypes */ 74 /*****************************************************************************/ 75 76 77 #define SET_NGBHR_ALL_AVAIL(avail) avail = 0x1F; 78 79 #define SET_NGBHR_BOTLEFT_NOTAVAIL(avail) avail &= ~0x10; 80 #define SET_NGBHR_LEFT_NOTAVAIL(avail) avail &= ~0x8; 81 #define SET_NGBHR_TOPLEFT_NOTAVAIL(avail) avail &= ~0x4; 82 #define SET_NGBHR_TOP_NOTAVAIL(avail) avail &= ~0x2; 83 #define SET_NGBHR_TOPRIGHT_NOTAVAIL(avail) avail &= ~0x1; 84 85 WORD32 ihevcd_pu_boundary_strength(pu_t *ps_pu, 86 pu_t *ps_ngbr_pu) 87 { 88 WORD32 i4_bs; 89 UWORD32 l0_ref_pic_buf_id, l1_ref_pic_buf_id; 90 UWORD32 ngbr_l0_ref_pic_buf_id, ngbr_l1_ref_pic_buf_id; 91 92 WORD16 i2_mv_x0, i2_mv_y0, i2_mv_x1, i2_mv_y1; 93 WORD16 i2_ngbr_mv_x0, i2_ngbr_mv_y0, i2_ngbr_mv_x1, i2_ngbr_mv_y1; 94 95 WORD32 num_mv, ngbr_num_mv; 96 97 num_mv = (PRED_BI == ps_pu->b2_pred_mode) ? 2 : 1; 98 ngbr_num_mv = (PRED_BI == ps_ngbr_pu->b2_pred_mode) ? 2 : 1; 99 100 l0_ref_pic_buf_id = ps_pu->mv.i1_l0_ref_pic_buf_id; 101 l1_ref_pic_buf_id = ps_pu->mv.i1_l1_ref_pic_buf_id; 102 ngbr_l0_ref_pic_buf_id = ps_ngbr_pu->mv.i1_l0_ref_pic_buf_id; 103 ngbr_l1_ref_pic_buf_id = ps_ngbr_pu->mv.i1_l1_ref_pic_buf_id; 104 105 106 i2_mv_x0 = ps_pu->mv.s_l0_mv.i2_mvx; 107 i2_mv_y0 = ps_pu->mv.s_l0_mv.i2_mvy; 108 i2_mv_x1 = ps_pu->mv.s_l1_mv.i2_mvx; 109 i2_mv_y1 = ps_pu->mv.s_l1_mv.i2_mvy; 110 111 i2_ngbr_mv_x0 = ps_ngbr_pu->mv.s_l0_mv.i2_mvx; 112 i2_ngbr_mv_y0 = ps_ngbr_pu->mv.s_l0_mv.i2_mvy; 113 i2_ngbr_mv_x1 = ps_ngbr_pu->mv.s_l1_mv.i2_mvx; 114 i2_ngbr_mv_y1 = ps_ngbr_pu->mv.s_l1_mv.i2_mvy; 115 116 117 /* If two motion vectors are used */ 118 if((2 == num_mv) && 119 (2 == ngbr_num_mv)) 120 { 121 if((l0_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id && l1_ref_pic_buf_id == ngbr_l1_ref_pic_buf_id) || 122 (l0_ref_pic_buf_id == ngbr_l1_ref_pic_buf_id && l1_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id)) 123 { 124 if(l0_ref_pic_buf_id != l1_ref_pic_buf_id) /* Different L0 and L1 */ 125 { 126 if(l0_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id) 127 { 128 i4_bs = (ABS(i2_mv_x0 - i2_ngbr_mv_x0) < 4) && 129 (ABS(i2_mv_y0 - i2_ngbr_mv_y0) < 4) && 130 (ABS(i2_mv_x1 - i2_ngbr_mv_x1) < 4) && 131 (ABS(i2_mv_y1 - i2_ngbr_mv_y1) < 4) ? 0 : 1; 132 } 133 else 134 { 135 i4_bs = (ABS(i2_mv_x0 - i2_ngbr_mv_x1) < 4) && 136 (ABS(i2_mv_y0 - i2_ngbr_mv_y1) < 4) && 137 (ABS(i2_mv_x1 - i2_ngbr_mv_x0) < 4) && 138 (ABS(i2_mv_y1 - i2_ngbr_mv_y0) < 4) ? 0 : 1; 139 } 140 } 141 else /* Same L0 and L1 */ 142 { 143 i4_bs = ((ABS(i2_mv_x0 - i2_ngbr_mv_x0) >= 4) || 144 (ABS(i2_mv_y0 - i2_ngbr_mv_y0) >= 4) || 145 (ABS(i2_mv_x1 - i2_ngbr_mv_x1) >= 4) || 146 (ABS(i2_mv_y1 - i2_ngbr_mv_y1) >= 4)) && 147 ((ABS(i2_mv_x0 - i2_ngbr_mv_x1) >= 4) || 148 (ABS(i2_mv_y0 - i2_ngbr_mv_y1) >= 4) || 149 (ABS(i2_mv_x1 - i2_ngbr_mv_x0) >= 4) || 150 (ABS(i2_mv_y1 - i2_ngbr_mv_y0) >= 4)) ? 1 : 0; 151 } 152 } 153 else /* If the reference pictures used are different */ 154 { 155 i4_bs = 1; 156 } 157 } 158 159 /* If one motion vector is used in both PUs */ 160 else if((1 == num_mv) && 161 (1 == ngbr_num_mv)) 162 { 163 WORD16 i2_mv_x, i2_mv_y; 164 WORD16 i2_ngbr_mv_x, i2_ngbr_mv_y; 165 UWORD32 ref_pic_buf_id, ngbr_ref_pic_buf_id; 166 167 if(PRED_L0 == ps_pu->b2_pred_mode) 168 { 169 i2_mv_x = i2_mv_x0; 170 i2_mv_y = i2_mv_y0; 171 ref_pic_buf_id = l0_ref_pic_buf_id; 172 } 173 else 174 { 175 i2_mv_x = i2_mv_x1; 176 i2_mv_y = i2_mv_y1; 177 ref_pic_buf_id = l1_ref_pic_buf_id; 178 } 179 180 if(PRED_L0 == ps_ngbr_pu->b2_pred_mode) 181 { 182 i2_ngbr_mv_x = i2_ngbr_mv_x0; 183 i2_ngbr_mv_y = i2_ngbr_mv_y0; 184 ngbr_ref_pic_buf_id = ngbr_l0_ref_pic_buf_id; 185 } 186 else 187 { 188 i2_ngbr_mv_x = i2_ngbr_mv_x1; 189 i2_ngbr_mv_y = i2_ngbr_mv_y1; 190 ngbr_ref_pic_buf_id = ngbr_l1_ref_pic_buf_id; 191 } 192 193 i4_bs = (ref_pic_buf_id == ngbr_ref_pic_buf_id) && 194 (ABS(i2_mv_x - i2_ngbr_mv_x) < 4) && 195 (ABS(i2_mv_y - i2_ngbr_mv_y) < 4) ? 0 : 1; 196 } 197 198 /* If the no. of motion vectors is not the same */ 199 else 200 { 201 i4_bs = 1; 202 } 203 204 205 return i4_bs; 206 } 207 208 /* QP is also populated in the same function */ 209 WORD32 ihevcd_ctb_boundary_strength_islice(bs_ctxt_t *ps_bs_ctxt) 210 { 211 pps_t *ps_pps; 212 sps_t *ps_sps; 213 tu_t *ps_tu; 214 UWORD32 *pu4_vert_bs; 215 UWORD32 *pu4_horz_bs; 216 WORD32 bs_strd; 217 WORD32 vert_bs0_tmp; 218 WORD32 horz_bs0_tmp; 219 UWORD8 *pu1_qp; 220 WORD32 qp_strd; 221 UWORD32 u4_qp_const_in_ctb; 222 WORD32 ctb_indx; 223 WORD32 i4_tu_cnt; 224 WORD32 log2_ctb_size; 225 WORD32 ctb_size; 226 227 WORD8 i1_loop_filter_across_tiles_enabled_flag; 228 WORD8 i1_loop_filter_across_slices_enabled_flag; 229 230 WORD32 i; 231 232 PROFILE_DISABLE_BOUNDARY_STRENGTH(); 233 234 ps_pps = ps_bs_ctxt->ps_pps; 235 ps_sps = ps_bs_ctxt->ps_sps; 236 i1_loop_filter_across_tiles_enabled_flag = ps_pps->i1_loop_filter_across_tiles_enabled_flag; 237 i1_loop_filter_across_slices_enabled_flag = ps_bs_ctxt->ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag; 238 i4_tu_cnt = ps_bs_ctxt->i4_ctb_tu_cnt; 239 240 log2_ctb_size = ps_sps->i1_log2_ctb_size; 241 ctb_size = (1 << log2_ctb_size); 242 243 /* strides are in units of number of bytes */ 244 /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */ 245 bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7); 246 247 pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_vert_bs + 248 (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) + 249 ps_bs_ctxt->i4_ctb_y * bs_strd); 250 pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_horz_bs + 251 (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) + 252 ps_bs_ctxt->i4_ctb_y * bs_strd); 253 254 /* ctb_size/8 elements per CTB */ 255 qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3); 256 pu1_qp = ps_bs_ctxt->pu1_pic_qp + ((ps_bs_ctxt->i4_ctb_x + ps_bs_ctxt->i4_ctb_y * qp_strd) << (log2_ctb_size - 3)); 257 258 ctb_indx = ps_bs_ctxt->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_bs_ctxt->i4_ctb_y; 259 u4_qp_const_in_ctb = ps_bs_ctxt->pu1_pic_qp_const_in_ctb[ctb_indx >> 3] & (1 << (ctb_indx & 7)); 260 261 vert_bs0_tmp = pu4_vert_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2)); 262 horz_bs0_tmp = pu4_horz_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2)); 263 264 /* ctb_size/8 is the number of edges per CTB 265 * ctb_size/4 is the number of BS values needed per edge 266 * divided by 8 for the number of bytes 267 * 2 is the number of bits needed for each BS value */ 268 /* 269 memset(pu4_vert_bs, 0, (ctb_size / 8 + 1) * (ctb_size / 4) / 8 * 2 ); 270 memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) / 8 * 2 ); 271 */ 272 memset(pu4_vert_bs, 0, (1 << (2 * log2_ctb_size - 7)) + ((ctb_size >> 5) << 1)); 273 memset(pu4_horz_bs, 0, (1 << (2 * log2_ctb_size - 7))); 274 275 /* pu4_vert_bs[0] has information about the left CTB which is not required when ctb_x = 0 */ 276 if(0 != ps_bs_ctxt->i4_ctb_x) 277 { 278 pu4_vert_bs[0] |= vert_bs0_tmp; 279 } 280 281 /* pu4_horz_bs[0] has information about the top CTB which is not required when ctb_y = 0 */ 282 if(0 != ps_bs_ctxt->i4_ctb_y) 283 { 284 pu4_horz_bs[0] |= horz_bs0_tmp; 285 } 286 287 ps_tu = ps_bs_ctxt->ps_tu; 288 289 /* Populating the QP array - if const_qp_in_ctb flag is one, set only the first element */ 290 if(u4_qp_const_in_ctb) 291 pu1_qp[0] = ps_tu->b7_qp; 292 293 for(i = 0; i < i4_tu_cnt; i++) 294 { 295 WORD32 start_pos_x; 296 WORD32 start_pos_y; 297 WORD32 tu_size; 298 299 300 UWORD32 u4_bs; 301 ps_tu = ps_bs_ctxt->ps_tu + i; 302 303 /* start_pos_x and start_pos_y are in units of min TU size (4x4) */ 304 start_pos_x = ps_tu->b4_pos_x; 305 start_pos_y = ps_tu->b4_pos_y; 306 307 tu_size = 1 << (ps_tu->b3_size + 2); 308 tu_size >>= 2; /* TU size divided by 4 */ 309 310 u4_bs = DUP_LSB_10(tu_size); 311 312 /* Only if the current edge falls on 8 pixel grid set BS */ 313 if(0 == (start_pos_x & 1)) 314 { 315 WORD32 shift; 316 shift = start_pos_y * 2; 317 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 318 * will reduce to the following assuming ctb size is one of 16, 32 and 64 319 * and deblocking is done on 8x8 grid 320 */ 321 if(6 != log2_ctb_size) 322 shift += ((start_pos_x & 2) << (log2_ctb_size - 2)); 323 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift); 324 } 325 /* Only if the current edge falls on 8 pixel grid set BS */ 326 if(0 == (start_pos_y & 1)) 327 { 328 WORD32 shift; 329 shift = start_pos_x * 2; 330 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 331 * will reduce to the following assuming ctb size is one of 16, 32 and 64 332 * and deblocking is done on 8x8 grid 333 */ 334 if(6 != log2_ctb_size) 335 shift += ((start_pos_y & 2) << (log2_ctb_size - 2)); 336 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift); 337 } 338 339 /* Populating the QP array */ 340 if(0 == u4_qp_const_in_ctb) 341 { 342 if(0 == (start_pos_x & 1) && 0 == (start_pos_y & 1)) 343 { 344 WORD32 row, col; 345 for(row = start_pos_y; row < start_pos_y + tu_size; row += 2) 346 { 347 for(col = start_pos_x; col < start_pos_x + tu_size; col += 2) 348 { 349 pu1_qp[(row >> 1) * qp_strd + (col >> 1)] = ps_tu->b7_qp; 350 } 351 } 352 } 353 } 354 355 } 356 { 357 /*Determine if the slice is dependent, and is its left neighbor belongs to the same slice, in a different tile*/ 358 UWORD32 ctb_addr; 359 WORD32 slice_idx, left_slice_idx = -1, top_slice_idx = -1; 360 /* If left neighbor is not available, then set BS for entire first column to zero */ 361 if(!ps_pps->i1_tiles_enabled_flag) 362 { 363 if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x) || 364 (0 == i1_loop_filter_across_slices_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) || 365 (0 == ps_bs_ctxt->i4_ctb_x)) 366 { 367 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2); 368 } 369 } 370 else 371 { 372 //If across-tiles is disabled 373 if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x)) 374 { 375 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2); 376 } 377 else 378 { 379 ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb); 380 slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr]; 381 if(ps_bs_ctxt->i4_ctb_x) 382 { 383 ctb_addr = (ps_bs_ctxt->i4_ctb_x - 1) + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb); 384 left_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr]; 385 } 386 /*If the 1st slice in a new tile is a dependent slice*/ 387 if(!((ps_bs_ctxt->ps_slice_hdr->i1_dependent_slice_flag == 1) && (slice_idx == left_slice_idx))) 388 { 389 /* Removed reduntant checks */ 390 if((0 == i1_loop_filter_across_slices_enabled_flag && ( 391 ((slice_idx != left_slice_idx) && 0 == ps_bs_ctxt->i4_ctb_slice_y) || 392 ((0 == ps_bs_ctxt->i4_ctb_tile_x) && (slice_idx != left_slice_idx)))) || 393 (0 == ps_bs_ctxt->i4_ctb_x)) 394 { 395 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2); 396 } 397 } 398 } 399 } 400 401 ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb); 402 slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr]; 403 if(ps_bs_ctxt->i4_ctb_y) 404 { 405 ctb_addr = (ps_bs_ctxt->i4_ctb_x) + ((ps_bs_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb); 406 top_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr]; 407 } 408 409 /* If top neighbor is not available, then set BS for entire first row to zero */ 410 /* Removed reduntant checks */ 411 if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_y) 412 || (0 == i1_loop_filter_across_slices_enabled_flag && ((slice_idx != top_slice_idx))) 413 || (0 == ps_bs_ctxt->i4_ctb_y)) 414 { 415 pu4_horz_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2); 416 } 417 } 418 419 /** 420 * Set BS of bottom and right frame boundaries to zero if it is an incomplete CTB 421 * (They might have been set to non zero values because of CBF of the current CTB) 422 * This block might not be needed for I slices*/ 423 { 424 WORD32 num_rows_remaining = (ps_sps->i2_pic_height_in_luma_samples - (ps_bs_ctxt->i4_ctb_y << log2_ctb_size)) >> 3; 425 WORD32 num_cols_remaining = (ps_sps->i2_pic_width_in_luma_samples - (ps_bs_ctxt->i4_ctb_x << log2_ctb_size)) >> 3; 426 if(num_rows_remaining < (ctb_size >> 3)) 427 { 428 /* WORD32 offset = (((num_rows_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4)); 429 * will reduce to the following assuming ctb size is one of 16, 32 and 64 430 * and deblocking is done on 8x8 grid 431 */ 432 WORD32 offset; 433 offset = (num_rows_remaining >> (6 - log2_ctb_size)) << 2; 434 if(6 != log2_ctb_size) 435 offset += (num_rows_remaining & 1) << (log2_ctb_size - 4); 436 437 memset(((UWORD8 *)pu4_horz_bs) + offset, 0, 1 << (log2_ctb_size - 4)); 438 } 439 440 if(num_cols_remaining < (ctb_size >> 3)) 441 { 442 /* WORD32 offset = (((num_cols_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4)); 443 * will reduce to the following assuming ctb size is one of 16, 32 and 64 444 * and deblocking is done on 8x8 grid 445 */ 446 447 WORD32 offset; 448 offset = (num_cols_remaining >> (6 - log2_ctb_size)) << 2; 449 if(6 != log2_ctb_size) 450 offset += (num_cols_remaining & 1) << (log2_ctb_size - 4); 451 452 memset(((UWORD8 *)pu4_vert_bs) + offset, 0, 1 << (log2_ctb_size - 4)); 453 } 454 } 455 456 return 0; 457 } 458 WORD32 ihevcd_ctb_boundary_strength_pbslice(bs_ctxt_t *ps_bs_ctxt) 459 { 460 sps_t *ps_sps; 461 pps_t *ps_pps; 462 WORD32 cur_ctb_idx, next_ctb_idx = 0; 463 WORD32 i4_tu_cnt; 464 WORD32 i4_pu_cnt; 465 tu_t *ps_tu; 466 467 UWORD32 *pu4_vert_bs; 468 UWORD32 *pu4_horz_bs; 469 WORD32 bs_strd; 470 WORD32 vert_bs0_tmp; 471 WORD32 horz_bs0_tmp; 472 UWORD8 *pu1_qp; 473 WORD32 qp_strd; 474 UWORD32 u4_qp_const_in_ctb; 475 WORD32 ctb_indx; 476 WORD32 log2_ctb_size; 477 WORD32 ctb_size; 478 479 WORD32 i; 480 WORD8 i1_loop_filter_across_tiles_enabled_flag; 481 WORD8 i1_loop_filter_across_slices_enabled_flag; 482 483 PROFILE_DISABLE_BOUNDARY_STRENGTH(); 484 485 ps_sps = ps_bs_ctxt->ps_sps; 486 ps_pps = ps_bs_ctxt->ps_pps; 487 488 log2_ctb_size = ps_sps->i1_log2_ctb_size; 489 ctb_size = (1 << log2_ctb_size); 490 491 /* strides are in units of number of bytes */ 492 /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */ 493 bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7); 494 495 pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_vert_bs + 496 (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) + 497 ps_bs_ctxt->i4_ctb_y * bs_strd); 498 pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_horz_bs + 499 (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) + 500 ps_bs_ctxt->i4_ctb_y * bs_strd); 501 502 vert_bs0_tmp = pu4_vert_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2)); 503 horz_bs0_tmp = pu4_horz_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2)); 504 505 ps_tu = ps_bs_ctxt->ps_tu; 506 507 /* ctb_size/8 elements per CTB */ 508 qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3); 509 pu1_qp = ps_bs_ctxt->pu1_pic_qp + ((ps_bs_ctxt->i4_ctb_x + ps_bs_ctxt->i4_ctb_y * qp_strd) << (log2_ctb_size - 3)); 510 511 ctb_indx = ps_bs_ctxt->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_bs_ctxt->i4_ctb_y; 512 u4_qp_const_in_ctb = ps_bs_ctxt->pu1_pic_qp_const_in_ctb[ctb_indx >> 3] & (1 << (ctb_indx & 7)); 513 514 i1_loop_filter_across_tiles_enabled_flag = ps_pps->i1_loop_filter_across_tiles_enabled_flag; 515 i1_loop_filter_across_slices_enabled_flag = ps_bs_ctxt->ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag; 516 517 /* ctb_size/8 is the number of edges per CTB 518 * ctb_size/4 is the number of BS values needed per edge 519 * divided by 8 for the number of bytes 520 * 2 is the number of bits needed for each BS value */ 521 /* 522 memset(pu4_vert_bs, 0, (ctb_size / 8 + 1) * (ctb_size / 4) * 2 / 8 ); 523 memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) * 2 / 8 ); 524 */ 525 memset(pu4_vert_bs, 0, (1 << (2 * log2_ctb_size - 7)) + (ctb_size >> 4)); 526 memset(pu4_horz_bs, 0, (1 << (2 * log2_ctb_size - 7))); 527 528 /* pu4_vert_bs[0] has information about the left CTB which is not required when ctb_x = 0 */ 529 if(0 != ps_bs_ctxt->i4_ctb_x) 530 { 531 pu4_vert_bs[0] |= vert_bs0_tmp; 532 } 533 534 /* pu4_horz_bs[0] has information about the top CTB which is not required when ctb_y = 0 */ 535 if(0 != ps_bs_ctxt->i4_ctb_y) 536 { 537 pu4_horz_bs[0] |= horz_bs0_tmp; 538 } 539 /* pu4_horz_bs[bs_strd / 4] corresponds to pu4_horz_bs[0] of the bottom CTB */ 540 *(UWORD32 *)((UWORD8 *)pu4_horz_bs + bs_strd) = 0; 541 542 cur_ctb_idx = ps_bs_ctxt->i4_ctb_x 543 + ps_bs_ctxt->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb); 544 next_ctb_idx = ps_bs_ctxt->i4_next_tu_ctb_cnt; 545 if(1 == ps_bs_ctxt->ps_codec->i4_num_cores) 546 { 547 i4_tu_cnt = ps_bs_ctxt->pu4_pic_tu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_tu_idx[cur_ctb_idx % RESET_TU_BUF_NCTB]; 548 } 549 else 550 { 551 i4_tu_cnt = ps_bs_ctxt->pu4_pic_tu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_tu_idx[cur_ctb_idx]; 552 } 553 554 ps_tu = ps_bs_ctxt->ps_tu; 555 if(u4_qp_const_in_ctb) 556 pu1_qp[0] = ps_tu->b7_qp; 557 558 /* For all TUs in the CTB For left and top edges, check if there are coded coefficients on either sides of the edge */ 559 for(i = 0; i < i4_tu_cnt; i++) 560 { 561 WORD32 start_pos_x; 562 WORD32 start_pos_y; 563 WORD32 end_pos_x; 564 WORD32 end_pos_y; 565 WORD32 tu_size; 566 UWORD32 u4_bs; 567 WORD32 intra_flag; 568 UWORD8 *pu1_pic_intra_flag; 569 570 ps_tu = ps_bs_ctxt->ps_tu + i; 571 572 start_pos_x = ps_tu->b4_pos_x; 573 start_pos_y = ps_tu->b4_pos_y; 574 575 tu_size = 1 << (ps_tu->b3_size + 2); 576 tu_size >>= 2; 577 578 end_pos_x = start_pos_x + tu_size; 579 end_pos_y = start_pos_y + tu_size; 580 581 { 582 WORD32 tu_abs_x = (ps_bs_ctxt->i4_ctb_x << log2_ctb_size) + (start_pos_x << 2); 583 WORD32 tu_abs_y = (ps_bs_ctxt->i4_ctb_y << log2_ctb_size) + (start_pos_y << 2); 584 585 WORD32 numbytes_row = (ps_sps->i2_pic_width_in_luma_samples + 63) / 64; 586 587 pu1_pic_intra_flag = ps_bs_ctxt->ps_codec->pu1_pic_intra_flag; 588 pu1_pic_intra_flag += (tu_abs_y >> 3) * numbytes_row; 589 pu1_pic_intra_flag += (tu_abs_x >> 6); 590 591 intra_flag = *pu1_pic_intra_flag; 592 intra_flag &= (1 << ((tu_abs_x >> 3) % 8)); 593 } 594 if(intra_flag) 595 { 596 u4_bs = DUP_LSB_10(tu_size); 597 598 /* Only if the current edge falls on 8 pixel grid set BS */ 599 if(0 == (start_pos_x & 1)) 600 { 601 WORD32 shift; 602 shift = start_pos_y * 2; 603 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 604 * will reduce to the following assuming ctb size is one of 16, 32 and 64 605 * and deblocking is done on 8x8 grid 606 */ 607 if(6 != log2_ctb_size) 608 shift += ((start_pos_x & 2) << (log2_ctb_size - 2)); 609 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift); 610 } 611 /* Only if the current edge falls on 8 pixel grid set BS */ 612 if(0 == (start_pos_y & 1)) 613 { 614 WORD32 shift; 615 shift = start_pos_x * 2; 616 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 617 * will reduce to the following assuming ctb size is one of 16, 32 and 64 618 * and deblocking is done on 8x8 grid 619 */ 620 if(6 != log2_ctb_size) 621 shift += ((start_pos_y & 2) << (log2_ctb_size - 2)); 622 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift); 623 } 624 } 625 626 627 /* If the current TU is coded then set both top edge and left edge BS to 1 and go to next TU */ 628 if(ps_tu->b1_y_cbf) 629 { 630 u4_bs = DUP_LSB_01(tu_size); 631 632 /* Only if the current edge falls on 8 pixel grid set BS */ 633 if(0 == (start_pos_x & 1)) 634 { 635 WORD32 shift; 636 shift = start_pos_y * 2; 637 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 638 * will reduce to the following assuming ctb size is one of 16, 32 and 64 639 * and deblocking is done on 8x8 grid 640 */ 641 if(6 != log2_ctb_size) 642 shift += ((start_pos_x & 2) << (log2_ctb_size - 2)); 643 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift); 644 } 645 /* Only if the current edge falls on 8 pixel grid set BS */ 646 if(0 == (start_pos_y & 1)) 647 { 648 WORD32 shift; 649 shift = start_pos_x * 2; 650 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 651 * will reduce to the following assuming ctb size is one of 16, 32 and 64 652 * and deblocking is done on 8x8 grid 653 */ 654 if(6 != log2_ctb_size) 655 shift += ((start_pos_y & 2) << (log2_ctb_size - 2)); 656 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift); 657 } 658 /* Only if the current edge falls on 8 pixel grid set BS */ 659 if(0 == (end_pos_x & 1)) 660 { 661 if(!(ctb_size / 8 == (end_pos_x >> 1) && ps_bs_ctxt->i4_ctb_x == ps_sps->i2_pic_wd_in_ctb - 1)) 662 { 663 WORD32 shift; 664 shift = start_pos_y * 2; 665 shift += (((end_pos_x >> 1) & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1)); 666 pu4_vert_bs[end_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift); 667 } 668 } 669 /* Only if the current edge falls on 8 pixel grid set BS */ 670 if(0 == (end_pos_y & 1)) 671 { 672 /* If end_pos_y corresponds to the bottom of the CTB, write to pu4_horz_bs[0] of the bottom CTB */ 673 if(ctb_size / 8 == (end_pos_y >> 1)) 674 { 675 *(UWORD32 *)((UWORD8 *)pu4_horz_bs + bs_strd) |= (u4_bs << (start_pos_x * 2)); 676 } 677 else 678 { 679 WORD32 shift; 680 shift = start_pos_x * 2; 681 shift += (((end_pos_y >> 1) & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1)); 682 pu4_horz_bs[end_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift); 683 } 684 } 685 } 686 687 if(0 == u4_qp_const_in_ctb) 688 { 689 if(0 == (start_pos_x & 1) && 0 == (start_pos_y & 1)) 690 { 691 WORD32 row, col; 692 for(row = start_pos_y; row < start_pos_y + tu_size; row += 2) 693 { 694 for(col = start_pos_x; col < start_pos_x + tu_size; col += 2) 695 { 696 pu1_qp[(row >> 1) * qp_strd + (col >> 1)] = ps_tu->b7_qp; 697 } 698 } 699 } 700 } 701 } 702 703 /* For all PUs in the CTB, 704 For left and top edges, compute BS */ 705 706 cur_ctb_idx = ps_bs_ctxt->i4_ctb_x 707 + ps_bs_ctxt->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb); 708 709 { 710 WORD32 next_ctb_idx; 711 next_ctb_idx = ps_bs_ctxt->i4_next_pu_ctb_cnt; 712 i4_pu_cnt = ps_bs_ctxt->pu4_pic_pu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_pu_idx[cur_ctb_idx]; 713 } 714 715 for(i = 0; i < i4_pu_cnt; i++) 716 { 717 WORD32 start_pos_x; 718 WORD32 start_pos_y; 719 WORD32 end_pos_x; 720 WORD32 end_pos_y; 721 WORD32 pu_wd, pu_ht; 722 UWORD32 u4_bs; 723 pu_t *ps_pu = ps_bs_ctxt->ps_pu + i; 724 pu_t *ps_ngbr_pu; 725 UWORD32 u4_ngbr_pu_indx; 726 727 start_pos_x = ps_pu->b4_pos_x; 728 start_pos_y = ps_pu->b4_pos_y; 729 730 pu_wd = (ps_pu->b4_wd + 1); 731 pu_ht = (ps_pu->b4_ht + 1); 732 733 end_pos_x = start_pos_x + pu_wd; 734 end_pos_y = start_pos_y + pu_ht; 735 736 /* If the current PU is intra, set Boundary strength as 2 for both top and left edge */ 737 /* Need not mask the BS to zero even if it was set to 1 already since BS 2 and 3 are assumed to be the same in leaf level functions */ 738 if(ps_pu->b1_intra_flag) 739 { 740 u4_bs = DUP_LSB_10(pu_ht); 741 742 /* Only if the current edge falls on 8 pixel grid set BS */ 743 if(0 == (start_pos_x & 1)) 744 { 745 WORD32 shift; 746 shift = start_pos_y * 2; 747 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 748 * will reduce to the following assuming ctb size is one of 16, 32 and 64 749 * and deblocking is done on 8x8 grid 750 */ 751 if(6 != log2_ctb_size) 752 shift += ((start_pos_x & 2) << (log2_ctb_size - 2)); 753 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift); 754 } 755 756 u4_bs = DUP_LSB_10(pu_wd); 757 758 /* Only if the current edge falls on 8 pixel grid set BS */ 759 if(0 == (start_pos_y & 1)) 760 { 761 WORD32 shift; 762 shift = start_pos_x * 2; 763 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 764 * will reduce to the following assuming ctb size is one of 16, 32 and 64 765 * and deblocking is done on 8x8 grid 766 */ 767 if(6 != log2_ctb_size) 768 shift += ((start_pos_y & 2) << (log2_ctb_size - 2)); 769 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift); 770 } 771 } 772 773 else 774 { 775 /* Vertical edge */ 776 /* Process only if the edge is not a frame edge */ 777 if(0 != ps_bs_ctxt->i4_ctb_x + start_pos_x) 778 { 779 do 780 { 781 WORD32 pu_ngbr_ht; 782 WORD32 min_pu_ht; 783 WORD32 ngbr_end_pos_y; 784 UWORD32 ngbr_pu_idx_strd; 785 ngbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2; 786 u4_ngbr_pu_indx = ps_bs_ctxt->pu4_pic_pu_idx_map[(start_pos_y + 1) * ngbr_pu_idx_strd + (start_pos_x)]; 787 ps_ngbr_pu = ps_bs_ctxt->ps_pic_pu + u4_ngbr_pu_indx; 788 789 pu_ngbr_ht = ps_ngbr_pu->b4_ht + 1; 790 ngbr_end_pos_y = ps_ngbr_pu->b4_pos_y + pu_ngbr_ht; 791 792 min_pu_ht = MIN(ngbr_end_pos_y, end_pos_y) - start_pos_y; 793 794 if(ps_ngbr_pu->b1_intra_flag) 795 { 796 u4_bs = DUP_LSB_10(min_pu_ht); 797 798 /* Only if the current edge falls on 8 pixel grid set BS */ 799 if(0 == (start_pos_x & 1)) 800 { 801 WORD32 shift; 802 shift = start_pos_y * 2; 803 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 804 * will reduce to the following assuming ctb size is one of 16, 32 and 64 805 * and deblocking is done on 8x8 grid 806 */ 807 if(6 != log2_ctb_size) 808 shift += ((start_pos_x & 2) << (log2_ctb_size - 2)); 809 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift); 810 } 811 } 812 else 813 { 814 u4_bs = ihevcd_pu_boundary_strength(ps_pu, ps_ngbr_pu); 815 if(u4_bs) 816 { 817 u4_bs = DUP_LSB_01(min_pu_ht); 818 if(0 == (start_pos_x & 1)) 819 { 820 WORD32 shift; 821 shift = start_pos_y * 2; 822 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 823 * will reduce to the following assuming ctb size is one of 16, 32 and 64 824 * and deblocking is done on 8x8 grid 825 */ 826 if(6 != log2_ctb_size) 827 shift += ((start_pos_x & 2) << (log2_ctb_size - 2)); 828 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift); 829 } 830 } 831 } 832 833 pu_ht -= min_pu_ht; 834 start_pos_y += min_pu_ht; 835 }while(pu_ht > 0); 836 837 /* Reinitialising since the values are updated in the previous loop */ 838 pu_ht = ps_pu->b4_ht + 1; 839 start_pos_y = ps_pu->b4_pos_y; 840 } 841 842 /* Horizontal edge */ 843 /* Process only if the edge is not a frame edge */ 844 if(0 != ps_bs_ctxt->i4_ctb_y + start_pos_y) 845 { 846 do 847 { 848 WORD32 pu_ngbr_wd; 849 WORD32 min_pu_wd; 850 WORD32 ngbr_end_pos_x; 851 UWORD32 ngbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2; 852 u4_ngbr_pu_indx = ps_bs_ctxt->pu4_pic_pu_idx_map[(start_pos_y)*ngbr_pu_idx_strd + (start_pos_x + 1)]; 853 ps_ngbr_pu = ps_bs_ctxt->ps_pic_pu + u4_ngbr_pu_indx; 854 855 pu_ngbr_wd = ps_ngbr_pu->b4_wd + 1; 856 ngbr_end_pos_x = ps_ngbr_pu->b4_pos_x + pu_ngbr_wd; 857 858 min_pu_wd = MIN(ngbr_end_pos_x, end_pos_x) - start_pos_x; 859 860 if(ps_ngbr_pu->b1_intra_flag) 861 { 862 u4_bs = DUP_LSB_10(min_pu_wd); 863 864 /* Only if the current edge falls on 8 pixel grid set BS */ 865 if(0 == (start_pos_y & 1)) 866 { 867 WORD32 shift; 868 shift = start_pos_x * 2; 869 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 870 * will reduce to the following assuming ctb size is one of 16, 32 and 64 871 * and deblocking is done on 8x8 grid 872 */ 873 if(6 != log2_ctb_size) 874 shift += ((start_pos_y & 2) << (log2_ctb_size - 2)); 875 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift); 876 } 877 } 878 else 879 { 880 u4_bs = ihevcd_pu_boundary_strength(ps_pu, ps_ngbr_pu); 881 if(u4_bs) 882 { 883 u4_bs = DUP_LSB_01(min_pu_wd); 884 885 /* Only if the current edge falls on 8 pixel grid set BS */ 886 if(0 == (start_pos_y & 1)) 887 { 888 WORD32 shift; 889 shift = start_pos_x * 2; 890 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 891 * will reduce to the following assuming ctb size is one of 16, 32 and 64 892 * and deblocking is done on 8x8 grid 893 */ 894 if(6 != log2_ctb_size) 895 shift += ((start_pos_y & 2) << (log2_ctb_size - 2)); 896 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift); 897 } 898 } 899 } 900 901 pu_wd -= min_pu_wd; 902 start_pos_x += min_pu_wd; 903 }while(pu_wd > 0); 904 905 /* Reinitialising since the values are updated in the previous loop */ 906 pu_wd = ps_pu->b4_wd + 1; 907 start_pos_x = ps_pu->b4_pos_x; 908 } 909 } 910 } 911 912 { 913 /* If left neighbor is not available, then set BS for entire first column to zero */ 914 UWORD32 ctb_addr; 915 WORD32 slice_idx, left_slice_idx = -1, top_slice_idx = -1; 916 917 if(!ps_pps->i1_tiles_enabled_flag) 918 { 919 if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x) || 920 (0 == i1_loop_filter_across_slices_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) || 921 (0 == ps_bs_ctxt->i4_ctb_x)) 922 { 923 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2); 924 } 925 } 926 else 927 { 928 if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x)) 929 { 930 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2); 931 } 932 else 933 { 934 935 ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb); 936 slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr]; 937 938 if(ps_bs_ctxt->i4_ctb_x) 939 { 940 ctb_addr = (ps_bs_ctxt->i4_ctb_x - 1) + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb); 941 left_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr]; 942 } 943 944 if(!((ps_bs_ctxt->ps_slice_hdr->i1_dependent_slice_flag == 1) && (slice_idx == left_slice_idx))) 945 { 946 /* Removed reduntant checks */ 947 if((0 == i1_loop_filter_across_slices_enabled_flag && ( 948 (0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) || 949 ((0 == ps_bs_ctxt->i4_ctb_tile_x) && (slice_idx != left_slice_idx)))) || (0 == ps_bs_ctxt->i4_ctb_x)) 950 { 951 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2); 952 } 953 } 954 } 955 } 956 957 ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb); 958 slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr]; 959 if(ps_bs_ctxt->i4_ctb_y) 960 { 961 ctb_addr = (ps_bs_ctxt->i4_ctb_x) + ((ps_bs_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb); 962 top_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr]; 963 } 964 /* If top neighbor is not available, then set BS for entire first row to zero */ 965 /* Removed reduntant checks */ 966 if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_y) 967 || (0 == i1_loop_filter_across_slices_enabled_flag && ((slice_idx != top_slice_idx))) 968 || (0 == ps_bs_ctxt->i4_ctb_y)) 969 { 970 pu4_horz_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2); 971 } 972 } 973 974 /** 975 * Set BS of bottom and right frame boundaries to zero if it is an incomplete CTB 976 * (They might have set to non zero values because of CBF of the current CTB)*/ 977 { 978 WORD32 num_rows_remaining = (ps_sps->i2_pic_height_in_luma_samples - (ps_bs_ctxt->i4_ctb_y << log2_ctb_size)) >> 3; 979 WORD32 num_cols_remaining = (ps_sps->i2_pic_width_in_luma_samples - (ps_bs_ctxt->i4_ctb_x << log2_ctb_size)) >> 3; 980 if(num_rows_remaining < (ctb_size >> 3)) 981 { 982 /* WORD32 offset = (((num_rows_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4)); 983 * will reduce to the following assuming ctb size is one of 16, 32 and 64 984 * and deblocking is done on 8x8 grid 985 */ 986 WORD32 offset; 987 offset = (num_rows_remaining >> (6 - log2_ctb_size)) << 2; 988 if(6 != log2_ctb_size) 989 offset += (num_rows_remaining & 1) << (log2_ctb_size - 4); 990 991 memset(((UWORD8 *)pu4_horz_bs) + offset, 0, 1 << (log2_ctb_size - 4)); 992 } 993 994 if(num_cols_remaining < (ctb_size >> 3)) 995 { 996 /* WORD32 offset = (((num_cols_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4)); 997 * will reduce to the following assuming ctb size is one of 16, 32 and 64 998 * and deblocking is done on 8x8 grid 999 */ 1000 1001 WORD32 offset; 1002 offset = (num_cols_remaining >> (6 - log2_ctb_size)) << 2; 1003 if(6 != log2_ctb_size) 1004 offset += (num_cols_remaining & 1) << (log2_ctb_size - 4); 1005 1006 memset(((UWORD8 *)pu4_vert_bs) + offset, 0, 1 << (log2_ctb_size - 4)); 1007 } 1008 } 1009 return 0; 1010 } 1011