1 /****************************************************************************** 2 * 3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 /** 19 ******************************************************************************* 20 * @file 21 * ihevc_boundary_strength.c 22 * 23 * @brief 24 * Contains functions for computing boundary strength 25 * 26 * @author 27 * Harish 28 * 29 * @par List of Functions: 30 * 31 * @remarks 32 * None 33 * 34 ******************************************************************************* 35 */ 36 /*****************************************************************************/ 37 /* File Includes */ 38 /*****************************************************************************/ 39 #include <stdio.h> 40 #include <stddef.h> 41 #include <stdlib.h> 42 #include <string.h> 43 44 #include "ihevc_typedefs.h" 45 #include "iv.h" 46 #include "ivd.h" 47 #include "ihevcd_cxa.h" 48 #include "ithread.h" 49 50 #include "ihevc_defs.h" 51 #include "ihevc_debug.h" 52 #include "ihevc_defs.h" 53 #include "ihevc_structs.h" 54 #include "ihevc_macros.h" 55 #include "ihevc_platform_macros.h" 56 #include "ihevc_cabac_tables.h" 57 58 #include "ihevc_error.h" 59 #include "ihevc_common_tables.h" 60 61 #include "ihevcd_trace.h" 62 #include "ihevcd_defs.h" 63 #include "ihevcd_function_selector.h" 64 #include "ihevcd_structs.h" 65 #include "ihevcd_error.h" 66 #include "ihevcd_nal.h" 67 #include "ihevcd_bitstream.h" 68 #include "ihevcd_job_queue.h" 69 #include "ihevcd_utils.h" 70 #include "ihevcd_profile.h" 71 72 /*****************************************************************************/ 73 /* Function Prototypes */ 74 /*****************************************************************************/ 75 76 77 #define SET_NGBHR_ALL_AVAIL(avail) avail = 0x1F; 78 79 #define SET_NGBHR_BOTLEFT_NOTAVAIL(avail) avail &= ~0x10; 80 #define SET_NGBHR_LEFT_NOTAVAIL(avail) avail &= ~0x8; 81 #define SET_NGBHR_TOPLEFT_NOTAVAIL(avail) avail &= ~0x4; 82 #define SET_NGBHR_TOP_NOTAVAIL(avail) avail &= ~0x2; 83 #define SET_NGBHR_TOPRIGHT_NOTAVAIL(avail) avail &= ~0x1; 84 85 WORD32 ihevcd_pu_boundary_strength(pu_t *ps_pu, 86 pu_t *ps_ngbr_pu) 87 { 88 WORD32 i4_bs; 89 UWORD32 l0_ref_pic_buf_id, l1_ref_pic_buf_id; 90 UWORD32 ngbr_l0_ref_pic_buf_id, ngbr_l1_ref_pic_buf_id; 91 92 WORD16 i2_mv_x0, i2_mv_y0, i2_mv_x1, i2_mv_y1; 93 WORD16 i2_ngbr_mv_x0, i2_ngbr_mv_y0, i2_ngbr_mv_x1, i2_ngbr_mv_y1; 94 95 WORD32 num_mv, ngbr_num_mv; 96 97 num_mv = (PRED_BI == ps_pu->b2_pred_mode) ? 2 : 1; 98 ngbr_num_mv = (PRED_BI == ps_ngbr_pu->b2_pred_mode) ? 2 : 1; 99 100 l0_ref_pic_buf_id = ps_pu->mv.i1_l0_ref_pic_buf_id; 101 l1_ref_pic_buf_id = ps_pu->mv.i1_l1_ref_pic_buf_id; 102 ngbr_l0_ref_pic_buf_id = ps_ngbr_pu->mv.i1_l0_ref_pic_buf_id; 103 ngbr_l1_ref_pic_buf_id = ps_ngbr_pu->mv.i1_l1_ref_pic_buf_id; 104 105 106 i2_mv_x0 = ps_pu->mv.s_l0_mv.i2_mvx; 107 i2_mv_y0 = ps_pu->mv.s_l0_mv.i2_mvy; 108 i2_mv_x1 = ps_pu->mv.s_l1_mv.i2_mvx; 109 i2_mv_y1 = ps_pu->mv.s_l1_mv.i2_mvy; 110 111 i2_ngbr_mv_x0 = ps_ngbr_pu->mv.s_l0_mv.i2_mvx; 112 i2_ngbr_mv_y0 = ps_ngbr_pu->mv.s_l0_mv.i2_mvy; 113 i2_ngbr_mv_x1 = ps_ngbr_pu->mv.s_l1_mv.i2_mvx; 114 i2_ngbr_mv_y1 = ps_ngbr_pu->mv.s_l1_mv.i2_mvy; 115 116 117 /* If two motion vectors are used */ 118 if((2 == num_mv) && 119 (2 == ngbr_num_mv)) 120 { 121 if((l0_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id && l1_ref_pic_buf_id == ngbr_l1_ref_pic_buf_id) || 122 (l0_ref_pic_buf_id == ngbr_l1_ref_pic_buf_id && l1_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id)) 123 { 124 if(l0_ref_pic_buf_id != l1_ref_pic_buf_id) /* Different L0 and L1 */ 125 { 126 if(l0_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id) 127 { 128 i4_bs = (ABS(i2_mv_x0 - i2_ngbr_mv_x0) < 4) && 129 (ABS(i2_mv_y0 - i2_ngbr_mv_y0) < 4) && 130 (ABS(i2_mv_x1 - i2_ngbr_mv_x1) < 4) && 131 (ABS(i2_mv_y1 - i2_ngbr_mv_y1) < 4) ? 0 : 1; 132 } 133 else 134 { 135 i4_bs = (ABS(i2_mv_x0 - i2_ngbr_mv_x1) < 4) && 136 (ABS(i2_mv_y0 - i2_ngbr_mv_y1) < 4) && 137 (ABS(i2_mv_x1 - i2_ngbr_mv_x0) < 4) && 138 (ABS(i2_mv_y1 - i2_ngbr_mv_y0) < 4) ? 0 : 1; 139 } 140 } 141 else /* Same L0 and L1 */ 142 { 143 i4_bs = ((ABS(i2_mv_x0 - i2_ngbr_mv_x0) >= 4) || 144 (ABS(i2_mv_y0 - i2_ngbr_mv_y0) >= 4) || 145 (ABS(i2_mv_x1 - i2_ngbr_mv_x1) >= 4) || 146 (ABS(i2_mv_y1 - i2_ngbr_mv_y1) >= 4)) && 147 ((ABS(i2_mv_x0 - i2_ngbr_mv_x1) >= 4) || 148 (ABS(i2_mv_y0 - i2_ngbr_mv_y1) >= 4) || 149 (ABS(i2_mv_x1 - i2_ngbr_mv_x0) >= 4) || 150 (ABS(i2_mv_y1 - i2_ngbr_mv_y0) >= 4)) ? 1 : 0; 151 } 152 } 153 else /* If the reference pictures used are different */ 154 { 155 i4_bs = 1; 156 } 157 } 158 159 /* If one motion vector is used in both PUs */ 160 else if((1 == num_mv) && 161 (1 == ngbr_num_mv)) 162 { 163 WORD16 i2_mv_x, i2_mv_y; 164 WORD16 i2_ngbr_mv_x, i2_ngbr_mv_y; 165 UWORD32 ref_pic_buf_id, ngbr_ref_pic_buf_id; 166 167 if(PRED_L0 == ps_pu->b2_pred_mode) 168 { 169 i2_mv_x = i2_mv_x0; 170 i2_mv_y = i2_mv_y0; 171 ref_pic_buf_id = l0_ref_pic_buf_id; 172 } 173 else 174 { 175 i2_mv_x = i2_mv_x1; 176 i2_mv_y = i2_mv_y1; 177 ref_pic_buf_id = l1_ref_pic_buf_id; 178 } 179 180 if(PRED_L0 == ps_ngbr_pu->b2_pred_mode) 181 { 182 i2_ngbr_mv_x = i2_ngbr_mv_x0; 183 i2_ngbr_mv_y = i2_ngbr_mv_y0; 184 ngbr_ref_pic_buf_id = ngbr_l0_ref_pic_buf_id; 185 } 186 else 187 { 188 i2_ngbr_mv_x = i2_ngbr_mv_x1; 189 i2_ngbr_mv_y = i2_ngbr_mv_y1; 190 ngbr_ref_pic_buf_id = ngbr_l1_ref_pic_buf_id; 191 } 192 193 i4_bs = (ref_pic_buf_id == ngbr_ref_pic_buf_id) && 194 (ABS(i2_mv_x - i2_ngbr_mv_x) < 4) && 195 (ABS(i2_mv_y - i2_ngbr_mv_y) < 4) ? 0 : 1; 196 } 197 198 /* If the no. of motion vectors is not the same */ 199 else 200 { 201 i4_bs = 1; 202 } 203 204 205 return i4_bs; 206 } 207 208 /* QP is also populated in the same function */ 209 WORD32 ihevcd_ctb_boundary_strength_islice(bs_ctxt_t *ps_bs_ctxt) 210 { 211 pps_t *ps_pps; 212 sps_t *ps_sps; 213 tu_t *ps_tu; 214 UWORD32 *pu4_vert_bs; 215 UWORD32 *pu4_horz_bs; 216 WORD32 bs_strd; 217 WORD32 vert_bs0_tmp; 218 WORD32 horz_bs0_tmp; 219 UWORD8 *pu1_qp; 220 WORD32 qp_strd; 221 UWORD32 u4_qp_const_in_ctb; 222 WORD32 ctb_indx; 223 WORD32 i4_tu_cnt; 224 WORD32 log2_ctb_size; 225 WORD32 ctb_size; 226 227 WORD8 i1_loop_filter_across_tiles_enabled_flag; 228 WORD8 i1_loop_filter_across_slices_enabled_flag; 229 230 WORD32 i; 231 232 PROFILE_DISABLE_BOUNDARY_STRENGTH(); 233 234 ps_pps = ps_bs_ctxt->ps_pps; 235 ps_sps = ps_bs_ctxt->ps_sps; 236 i1_loop_filter_across_tiles_enabled_flag = ps_pps->i1_loop_filter_across_tiles_enabled_flag; 237 i1_loop_filter_across_slices_enabled_flag = ps_bs_ctxt->ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag; 238 i4_tu_cnt = ps_bs_ctxt->i4_ctb_tu_cnt; 239 240 log2_ctb_size = ps_sps->i1_log2_ctb_size; 241 ctb_size = (1 << log2_ctb_size); 242 243 /* strides are in units of number of bytes */ 244 /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */ 245 bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7); 246 247 pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_vert_bs + 248 (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) + 249 ps_bs_ctxt->i4_ctb_y * bs_strd); 250 pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_horz_bs + 251 (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) + 252 ps_bs_ctxt->i4_ctb_y * bs_strd); 253 254 /* ctb_size/8 elements per CTB */ 255 qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3); 256 pu1_qp = ps_bs_ctxt->pu1_pic_qp + ((ps_bs_ctxt->i4_ctb_x + ps_bs_ctxt->i4_ctb_y * qp_strd) << (log2_ctb_size - 3)); 257 258 ctb_indx = ps_bs_ctxt->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_bs_ctxt->i4_ctb_y; 259 u4_qp_const_in_ctb = ps_bs_ctxt->pu1_pic_qp_const_in_ctb[ctb_indx >> 3] & (1 << (ctb_indx & 7)); 260 261 vert_bs0_tmp = pu4_vert_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2)); 262 horz_bs0_tmp = pu4_horz_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2)); 263 264 /* ctb_size/8 is the number of edges per CTB 265 * ctb_size/4 is the number of BS values needed per edge 266 * divided by 8 for the number of bytes 267 * 2 is the number of bits needed for each BS value */ 268 /* 269 memset(pu4_vert_bs, 0, (ctb_size / 8 + 1) * (ctb_size / 4) / 8 * 2 ); 270 memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) / 8 * 2 ); 271 */ 272 memset(pu4_vert_bs, 0, (1 << (2 * log2_ctb_size - 7)) + ((ctb_size >> 5) << 1)); 273 memset(pu4_horz_bs, 0, (1 << (2 * log2_ctb_size - 7))); 274 275 /* pu4_vert_bs[0] has information about the left CTB which is not required when ctb_x = 0 */ 276 if(0 != ps_bs_ctxt->i4_ctb_x) 277 { 278 pu4_vert_bs[0] |= vert_bs0_tmp; 279 } 280 281 /* pu4_horz_bs[0] has information about the top CTB which is not required when ctb_y = 0 */ 282 if(0 != ps_bs_ctxt->i4_ctb_y) 283 { 284 pu4_horz_bs[0] |= horz_bs0_tmp; 285 } 286 287 ps_tu = ps_bs_ctxt->ps_tu; 288 289 /* Populating the QP array - if const_qp_in_ctb flag is one, set only the first element */ 290 if(u4_qp_const_in_ctb) 291 pu1_qp[0] = ps_tu->b7_qp; 292 293 for(i = 0; i < i4_tu_cnt; i++) 294 { 295 WORD32 start_pos_x; 296 WORD32 start_pos_y; 297 WORD32 tu_size; 298 299 300 UWORD32 u4_bs; 301 ps_tu = ps_bs_ctxt->ps_tu + i; 302 303 /* start_pos_x and start_pos_y are in units of min TU size (4x4) */ 304 start_pos_x = ps_tu->b4_pos_x; 305 start_pos_y = ps_tu->b4_pos_y; 306 307 tu_size = 1 << (ps_tu->b3_size + 2); 308 tu_size >>= 2; /* TU size divided by 4 */ 309 310 u4_bs = DUP_LSB_10(tu_size); 311 312 /* Only if the current edge falls on 8 pixel grid set BS */ 313 if(0 == (start_pos_x & 1)) 314 { 315 WORD32 shift; 316 shift = start_pos_y * 2; 317 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 318 * will reduce to the following assuming ctb size is one of 16, 32 and 64 319 * and deblocking is done on 8x8 grid 320 */ 321 if(6 != log2_ctb_size) 322 shift += ((start_pos_x & 2) << (log2_ctb_size - 2)); 323 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift); 324 } 325 /* Only if the current edge falls on 8 pixel grid set BS */ 326 if(0 == (start_pos_y & 1)) 327 { 328 WORD32 shift; 329 shift = start_pos_x * 2; 330 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 331 * will reduce to the following assuming ctb size is one of 16, 32 and 64 332 * and deblocking is done on 8x8 grid 333 */ 334 if(6 != log2_ctb_size) 335 shift += ((start_pos_y & 2) << (log2_ctb_size - 2)); 336 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift); 337 } 338 339 /* Populating the QP array */ 340 if(0 == u4_qp_const_in_ctb) 341 { 342 if(0 == (start_pos_x & 1) && 0 == (start_pos_y & 1)) 343 { 344 WORD32 row, col; 345 for(row = start_pos_y; row < start_pos_y + tu_size; row += 2) 346 { 347 for(col = start_pos_x; col < start_pos_x + tu_size; col += 2) 348 { 349 pu1_qp[(row >> 1) * qp_strd + (col >> 1)] = ps_tu->b7_qp; 350 } 351 } 352 } 353 } 354 355 } 356 { 357 /*Determine if the slice is dependent, and is its left neighbor belongs to the same slice, in a different tile*/ 358 UWORD32 ctb_addr; 359 WORD32 slice_idx, left_slice_idx = -1, top_slice_idx = -1; 360 /* If left neighbor is not available, then set BS for entire first column to zero */ 361 if(!ps_pps->i1_tiles_enabled_flag) 362 { 363 if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x) || 364 (0 == i1_loop_filter_across_slices_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) || 365 (0 == ps_bs_ctxt->i4_ctb_x)) 366 { 367 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2); 368 } 369 } 370 else 371 { 372 //If across-tiles is disabled 373 if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x)) 374 { 375 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2); 376 } 377 else 378 { 379 ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb); 380 slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr]; 381 if(ps_bs_ctxt->i4_ctb_x) 382 { 383 ctb_addr = (ps_bs_ctxt->i4_ctb_x - 1) + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb); 384 left_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr]; 385 } 386 /*If the 1st slice in a new tile is a dependent slice*/ 387 if(!((ps_bs_ctxt->ps_slice_hdr->i1_dependent_slice_flag == 1) && (slice_idx == left_slice_idx))) 388 { 389 if((0 == i1_loop_filter_across_slices_enabled_flag && ( 390 (0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) || (0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_tile_x) || 391 ((0 == ps_bs_ctxt->i4_ctb_tile_x) && (slice_idx != left_slice_idx)))) || 392 (0 == ps_bs_ctxt->i4_ctb_x)) 393 { 394 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2); 395 } 396 } 397 } 398 } 399 400 ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb); 401 slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr]; 402 if(ps_bs_ctxt->i4_ctb_y) 403 { 404 ctb_addr = (ps_bs_ctxt->i4_ctb_x) + ((ps_bs_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb); 405 top_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr]; 406 } 407 408 /* If top neighbor is not available, then set BS for entire first row to zero */ 409 if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_y) 410 || (0 == i1_loop_filter_across_slices_enabled_flag && ((0 == ps_bs_ctxt->i4_ctb_slice_y) || (slice_idx != top_slice_idx))) 411 || (0 == ps_bs_ctxt->i4_ctb_y)) 412 { 413 pu4_horz_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2); 414 } 415 } 416 417 /** 418 * Set BS of bottom and right frame boundaries to zero if it is an incomplete CTB 419 * (They might have been set to non zero values because of CBF of the current CTB) 420 * This block might not be needed for I slices*/ 421 { 422 WORD32 num_rows_remaining = (ps_sps->i2_pic_height_in_luma_samples - (ps_bs_ctxt->i4_ctb_y << log2_ctb_size)) >> 3; 423 WORD32 num_cols_remaining = (ps_sps->i2_pic_width_in_luma_samples - (ps_bs_ctxt->i4_ctb_x << log2_ctb_size)) >> 3; 424 if(num_rows_remaining < (ctb_size >> 3)) 425 { 426 /* WORD32 offset = (((num_rows_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4)); 427 * will reduce to the following assuming ctb size is one of 16, 32 and 64 428 * and deblocking is done on 8x8 grid 429 */ 430 WORD32 offset; 431 offset = (num_rows_remaining >> (6 - log2_ctb_size)) << 2; 432 if(6 != log2_ctb_size) 433 offset += (num_rows_remaining & 1) << (log2_ctb_size - 4); 434 435 memset(((UWORD8 *)pu4_horz_bs) + offset, 0, 1 << (log2_ctb_size - 4)); 436 } 437 438 if(num_cols_remaining < (ctb_size >> 3)) 439 { 440 /* WORD32 offset = (((num_cols_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4)); 441 * will reduce to the following assuming ctb size is one of 16, 32 and 64 442 * and deblocking is done on 8x8 grid 443 */ 444 445 WORD32 offset; 446 offset = (num_cols_remaining >> (6 - log2_ctb_size)) << 2; 447 if(6 != log2_ctb_size) 448 offset += (num_cols_remaining & 1) << (log2_ctb_size - 4); 449 450 memset(((UWORD8 *)pu4_vert_bs) + offset, 0, 1 << (log2_ctb_size - 4)); 451 } 452 } 453 454 return 0; 455 } 456 WORD32 ihevcd_ctb_boundary_strength_pbslice(bs_ctxt_t *ps_bs_ctxt) 457 { 458 sps_t *ps_sps; 459 pps_t *ps_pps; 460 WORD32 cur_ctb_idx, next_ctb_idx = 0; 461 WORD32 i4_tu_cnt; 462 WORD32 i4_pu_cnt; 463 tu_t *ps_tu; 464 465 UWORD32 *pu4_vert_bs; 466 UWORD32 *pu4_horz_bs; 467 WORD32 bs_strd; 468 WORD32 vert_bs0_tmp; 469 WORD32 horz_bs0_tmp; 470 UWORD8 *pu1_qp; 471 WORD32 qp_strd; 472 UWORD32 u4_qp_const_in_ctb; 473 WORD32 ctb_indx; 474 WORD32 log2_ctb_size; 475 WORD32 ctb_size; 476 477 WORD32 i; 478 WORD8 i1_loop_filter_across_tiles_enabled_flag; 479 WORD8 i1_loop_filter_across_slices_enabled_flag; 480 481 PROFILE_DISABLE_BOUNDARY_STRENGTH(); 482 483 ps_sps = ps_bs_ctxt->ps_sps; 484 ps_pps = ps_bs_ctxt->ps_pps; 485 486 log2_ctb_size = ps_sps->i1_log2_ctb_size; 487 ctb_size = (1 << log2_ctb_size); 488 489 /* strides are in units of number of bytes */ 490 /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */ 491 bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7); 492 493 pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_vert_bs + 494 (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) + 495 ps_bs_ctxt->i4_ctb_y * bs_strd); 496 pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_horz_bs + 497 (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) + 498 ps_bs_ctxt->i4_ctb_y * bs_strd); 499 500 vert_bs0_tmp = pu4_vert_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2)); 501 horz_bs0_tmp = pu4_horz_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2)); 502 503 ps_tu = ps_bs_ctxt->ps_tu; 504 505 /* ctb_size/8 elements per CTB */ 506 qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3); 507 pu1_qp = ps_bs_ctxt->pu1_pic_qp + ((ps_bs_ctxt->i4_ctb_x + ps_bs_ctxt->i4_ctb_y * qp_strd) << (log2_ctb_size - 3)); 508 509 ctb_indx = ps_bs_ctxt->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_bs_ctxt->i4_ctb_y; 510 u4_qp_const_in_ctb = ps_bs_ctxt->pu1_pic_qp_const_in_ctb[ctb_indx >> 3] & (1 << (ctb_indx & 7)); 511 512 i1_loop_filter_across_tiles_enabled_flag = ps_pps->i1_loop_filter_across_tiles_enabled_flag; 513 i1_loop_filter_across_slices_enabled_flag = ps_bs_ctxt->ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag; 514 515 /* ctb_size/8 is the number of edges per CTB 516 * ctb_size/4 is the number of BS values needed per edge 517 * divided by 8 for the number of bytes 518 * 2 is the number of bits needed for each BS value */ 519 /* 520 memset(pu4_vert_bs, 0, (ctb_size / 8 + 1) * (ctb_size / 4) * 2 / 8 ); 521 memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) * 2 / 8 ); 522 */ 523 memset(pu4_vert_bs, 0, (1 << (2 * log2_ctb_size - 7)) + (ctb_size >> 4)); 524 memset(pu4_horz_bs, 0, (1 << (2 * log2_ctb_size - 7))); 525 526 /* pu4_vert_bs[0] has information about the left CTB which is not required when ctb_x = 0 */ 527 if(0 != ps_bs_ctxt->i4_ctb_x) 528 { 529 pu4_vert_bs[0] |= vert_bs0_tmp; 530 } 531 532 /* pu4_horz_bs[0] has information about the top CTB which is not required when ctb_y = 0 */ 533 if(0 != ps_bs_ctxt->i4_ctb_y) 534 { 535 pu4_horz_bs[0] |= horz_bs0_tmp; 536 } 537 /* pu4_horz_bs[bs_strd / 4] corresponds to pu4_horz_bs[0] of the bottom CTB */ 538 *(UWORD32 *)((UWORD8 *)pu4_horz_bs + bs_strd) = 0; 539 540 cur_ctb_idx = ps_bs_ctxt->i4_ctb_x 541 + ps_bs_ctxt->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb); 542 next_ctb_idx = ps_bs_ctxt->i4_next_tu_ctb_cnt; 543 if(1 == ps_bs_ctxt->ps_codec->i4_num_cores) 544 { 545 i4_tu_cnt = ps_bs_ctxt->pu4_pic_tu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_tu_idx[cur_ctb_idx % RESET_TU_BUF_NCTB]; 546 } 547 else 548 { 549 i4_tu_cnt = ps_bs_ctxt->pu4_pic_tu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_tu_idx[cur_ctb_idx]; 550 } 551 552 ps_tu = ps_bs_ctxt->ps_tu; 553 if(u4_qp_const_in_ctb) 554 pu1_qp[0] = ps_tu->b7_qp; 555 556 /* For all TUs in the CTB For left and top edges, check if there are coded coefficients on either sides of the edge */ 557 for(i = 0; i < i4_tu_cnt; i++) 558 { 559 WORD32 start_pos_x; 560 WORD32 start_pos_y; 561 WORD32 end_pos_x; 562 WORD32 end_pos_y; 563 WORD32 tu_size; 564 UWORD32 u4_bs; 565 WORD32 intra_flag; 566 UWORD8 *pu1_pic_intra_flag; 567 568 ps_tu = ps_bs_ctxt->ps_tu + i; 569 570 start_pos_x = ps_tu->b4_pos_x; 571 start_pos_y = ps_tu->b4_pos_y; 572 573 tu_size = 1 << (ps_tu->b3_size + 2); 574 tu_size >>= 2; 575 576 end_pos_x = start_pos_x + tu_size; 577 end_pos_y = start_pos_y + tu_size; 578 579 { 580 WORD32 tu_abs_x = (ps_bs_ctxt->i4_ctb_x << log2_ctb_size) + (start_pos_x << 2); 581 WORD32 tu_abs_y = (ps_bs_ctxt->i4_ctb_y << log2_ctb_size) + (start_pos_y << 2); 582 583 WORD32 numbytes_row = (ps_sps->i2_pic_width_in_luma_samples + 63) / 64; 584 585 pu1_pic_intra_flag = ps_bs_ctxt->ps_codec->pu1_pic_intra_flag; 586 pu1_pic_intra_flag += (tu_abs_y >> 3) * numbytes_row; 587 pu1_pic_intra_flag += (tu_abs_x >> 6); 588 589 intra_flag = *pu1_pic_intra_flag; 590 intra_flag &= (1 << ((tu_abs_x >> 3) % 8)); 591 } 592 if(intra_flag) 593 { 594 u4_bs = DUP_LSB_10(tu_size); 595 596 /* Only if the current edge falls on 8 pixel grid set BS */ 597 if(0 == (start_pos_x & 1)) 598 { 599 WORD32 shift; 600 shift = start_pos_y * 2; 601 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 602 * will reduce to the following assuming ctb size is one of 16, 32 and 64 603 * and deblocking is done on 8x8 grid 604 */ 605 if(6 != log2_ctb_size) 606 shift += ((start_pos_x & 2) << (log2_ctb_size - 2)); 607 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift); 608 } 609 /* Only if the current edge falls on 8 pixel grid set BS */ 610 if(0 == (start_pos_y & 1)) 611 { 612 WORD32 shift; 613 shift = start_pos_x * 2; 614 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 615 * will reduce to the following assuming ctb size is one of 16, 32 and 64 616 * and deblocking is done on 8x8 grid 617 */ 618 if(6 != log2_ctb_size) 619 shift += ((start_pos_y & 2) << (log2_ctb_size - 2)); 620 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift); 621 } 622 } 623 624 625 /* If the current TU is coded then set both top edge and left edge BS to 1 and go to next TU */ 626 if(ps_tu->b1_y_cbf) 627 { 628 u4_bs = DUP_LSB_01(tu_size); 629 630 /* Only if the current edge falls on 8 pixel grid set BS */ 631 if(0 == (start_pos_x & 1)) 632 { 633 WORD32 shift; 634 shift = start_pos_y * 2; 635 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 636 * will reduce to the following assuming ctb size is one of 16, 32 and 64 637 * and deblocking is done on 8x8 grid 638 */ 639 if(6 != log2_ctb_size) 640 shift += ((start_pos_x & 2) << (log2_ctb_size - 2)); 641 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift); 642 } 643 /* Only if the current edge falls on 8 pixel grid set BS */ 644 if(0 == (start_pos_y & 1)) 645 { 646 WORD32 shift; 647 shift = start_pos_x * 2; 648 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 649 * will reduce to the following assuming ctb size is one of 16, 32 and 64 650 * and deblocking is done on 8x8 grid 651 */ 652 if(6 != log2_ctb_size) 653 shift += ((start_pos_y & 2) << (log2_ctb_size - 2)); 654 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift); 655 } 656 /* Only if the current edge falls on 8 pixel grid set BS */ 657 if(0 == (end_pos_x & 1)) 658 { 659 if(!(ctb_size / 8 == (end_pos_x >> 1) && ps_bs_ctxt->i4_ctb_x == ps_sps->i2_pic_wd_in_ctb - 1)) 660 { 661 WORD32 shift; 662 shift = start_pos_y * 2; 663 shift += (((end_pos_x >> 1) & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1)); 664 pu4_vert_bs[end_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift); 665 } 666 } 667 /* Only if the current edge falls on 8 pixel grid set BS */ 668 if(0 == (end_pos_y & 1)) 669 { 670 /* If end_pos_y corresponds to the bottom of the CTB, write to pu4_horz_bs[0] of the bottom CTB */ 671 if(ctb_size / 8 == (end_pos_y >> 1)) 672 { 673 *(UWORD32 *)((UWORD8 *)pu4_horz_bs + bs_strd) |= (u4_bs << (start_pos_x * 2)); 674 } 675 else 676 { 677 WORD32 shift; 678 shift = start_pos_x * 2; 679 shift += (((end_pos_y >> 1) & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1)); 680 pu4_horz_bs[end_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift); 681 } 682 } 683 } 684 685 if(0 == u4_qp_const_in_ctb) 686 { 687 if(0 == (start_pos_x & 1) && 0 == (start_pos_y & 1)) 688 { 689 WORD32 row, col; 690 for(row = start_pos_y; row < start_pos_y + tu_size; row += 2) 691 { 692 for(col = start_pos_x; col < start_pos_x + tu_size; col += 2) 693 { 694 pu1_qp[(row >> 1) * qp_strd + (col >> 1)] = ps_tu->b7_qp; 695 } 696 } 697 } 698 } 699 } 700 701 /* For all PUs in the CTB, 702 For left and top edges, compute BS */ 703 704 cur_ctb_idx = ps_bs_ctxt->i4_ctb_x 705 + ps_bs_ctxt->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb); 706 707 { 708 WORD32 next_ctb_idx; 709 next_ctb_idx = ps_bs_ctxt->i4_next_pu_ctb_cnt; 710 i4_pu_cnt = ps_bs_ctxt->pu4_pic_pu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_pu_idx[cur_ctb_idx]; 711 } 712 713 for(i = 0; i < i4_pu_cnt; i++) 714 { 715 WORD32 start_pos_x; 716 WORD32 start_pos_y; 717 WORD32 end_pos_x; 718 WORD32 end_pos_y; 719 WORD32 pu_wd, pu_ht; 720 UWORD32 u4_bs; 721 pu_t *ps_pu = ps_bs_ctxt->ps_pu + i; 722 pu_t *ps_ngbr_pu; 723 UWORD32 u4_ngbr_pu_indx; 724 725 start_pos_x = ps_pu->b4_pos_x; 726 start_pos_y = ps_pu->b4_pos_y; 727 728 pu_wd = (ps_pu->b4_wd + 1); 729 pu_ht = (ps_pu->b4_ht + 1); 730 731 end_pos_x = start_pos_x + pu_wd; 732 end_pos_y = start_pos_y + pu_ht; 733 734 /* If the current PU is intra, set Boundary strength as 2 for both top and left edge */ 735 /* Need not mask the BS to zero even if it was set to 1 already since BS 2 and 3 are assumed to be the same in leaf level functions */ 736 if(ps_pu->b1_intra_flag) 737 { 738 u4_bs = DUP_LSB_10(pu_ht); 739 740 /* Only if the current edge falls on 8 pixel grid set BS */ 741 if(0 == (start_pos_x & 1)) 742 { 743 WORD32 shift; 744 shift = start_pos_y * 2; 745 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 746 * will reduce to the following assuming ctb size is one of 16, 32 and 64 747 * and deblocking is done on 8x8 grid 748 */ 749 if(6 != log2_ctb_size) 750 shift += ((start_pos_x & 2) << (log2_ctb_size - 2)); 751 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift); 752 } 753 754 u4_bs = DUP_LSB_10(pu_wd); 755 756 /* Only if the current edge falls on 8 pixel grid set BS */ 757 if(0 == (start_pos_y & 1)) 758 { 759 WORD32 shift; 760 shift = start_pos_x * 2; 761 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 762 * will reduce to the following assuming ctb size is one of 16, 32 and 64 763 * and deblocking is done on 8x8 grid 764 */ 765 if(6 != log2_ctb_size) 766 shift += ((start_pos_y & 2) << (log2_ctb_size - 2)); 767 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift); 768 } 769 } 770 771 else 772 { 773 /* Vertical edge */ 774 /* Process only if the edge is not a frame edge */ 775 if(0 != ps_bs_ctxt->i4_ctb_x + start_pos_x) 776 { 777 do 778 { 779 WORD32 pu_ngbr_ht; 780 WORD32 min_pu_ht; 781 WORD32 ngbr_end_pos_y; 782 UWORD32 ngbr_pu_idx_strd; 783 ngbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2; 784 u4_ngbr_pu_indx = ps_bs_ctxt->pu4_pic_pu_idx_map[(start_pos_y + 1) * ngbr_pu_idx_strd + (start_pos_x)]; 785 ps_ngbr_pu = ps_bs_ctxt->ps_pic_pu + u4_ngbr_pu_indx; 786 787 pu_ngbr_ht = ps_ngbr_pu->b4_ht + 1; 788 ngbr_end_pos_y = ps_ngbr_pu->b4_pos_y + pu_ngbr_ht; 789 790 min_pu_ht = MIN(ngbr_end_pos_y, end_pos_y) - start_pos_y; 791 792 if(ps_ngbr_pu->b1_intra_flag) 793 { 794 u4_bs = DUP_LSB_10(min_pu_ht); 795 796 /* Only if the current edge falls on 8 pixel grid set BS */ 797 if(0 == (start_pos_x & 1)) 798 { 799 WORD32 shift; 800 shift = start_pos_y * 2; 801 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 802 * will reduce to the following assuming ctb size is one of 16, 32 and 64 803 * and deblocking is done on 8x8 grid 804 */ 805 if(6 != log2_ctb_size) 806 shift += ((start_pos_x & 2) << (log2_ctb_size - 2)); 807 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift); 808 } 809 } 810 else 811 { 812 u4_bs = ihevcd_pu_boundary_strength(ps_pu, ps_ngbr_pu); 813 if(u4_bs) 814 { 815 u4_bs = DUP_LSB_01(min_pu_ht); 816 if(0 == (start_pos_x & 1)) 817 { 818 WORD32 shift; 819 shift = start_pos_y * 2; 820 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 821 * will reduce to the following assuming ctb size is one of 16, 32 and 64 822 * and deblocking is done on 8x8 grid 823 */ 824 if(6 != log2_ctb_size) 825 shift += ((start_pos_x & 2) << (log2_ctb_size - 2)); 826 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift); 827 } 828 } 829 } 830 831 pu_ht -= min_pu_ht; 832 start_pos_y += min_pu_ht; 833 }while(pu_ht > 0); 834 835 /* Reinitialising since the values are updated in the previous loop */ 836 pu_ht = ps_pu->b4_ht + 1; 837 start_pos_y = ps_pu->b4_pos_y; 838 } 839 840 /* Horizontal edge */ 841 /* Process only if the edge is not a frame edge */ 842 if(0 != ps_bs_ctxt->i4_ctb_y + start_pos_y) 843 { 844 do 845 { 846 WORD32 pu_ngbr_wd; 847 WORD32 min_pu_wd; 848 WORD32 ngbr_end_pos_x; 849 UWORD32 ngbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2; 850 u4_ngbr_pu_indx = ps_bs_ctxt->pu4_pic_pu_idx_map[(start_pos_y)*ngbr_pu_idx_strd + (start_pos_x + 1)]; 851 ps_ngbr_pu = ps_bs_ctxt->ps_pic_pu + u4_ngbr_pu_indx; 852 853 pu_ngbr_wd = ps_ngbr_pu->b4_wd + 1; 854 ngbr_end_pos_x = ps_ngbr_pu->b4_pos_x + pu_ngbr_wd; 855 856 min_pu_wd = MIN(ngbr_end_pos_x, end_pos_x) - start_pos_x; 857 858 if(ps_ngbr_pu->b1_intra_flag) 859 { 860 u4_bs = DUP_LSB_10(min_pu_wd); 861 862 /* Only if the current edge falls on 8 pixel grid set BS */ 863 if(0 == (start_pos_y & 1)) 864 { 865 WORD32 shift; 866 shift = start_pos_x * 2; 867 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 868 * will reduce to the following assuming ctb size is one of 16, 32 and 64 869 * and deblocking is done on 8x8 grid 870 */ 871 if(6 != log2_ctb_size) 872 shift += ((start_pos_y & 2) << (log2_ctb_size - 2)); 873 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift); 874 } 875 } 876 else 877 { 878 u4_bs = ihevcd_pu_boundary_strength(ps_pu, ps_ngbr_pu); 879 if(u4_bs) 880 { 881 u4_bs = DUP_LSB_01(min_pu_wd); 882 883 /* Only if the current edge falls on 8 pixel grid set BS */ 884 if(0 == (start_pos_y & 1)) 885 { 886 WORD32 shift; 887 shift = start_pos_x * 2; 888 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1)); 889 * will reduce to the following assuming ctb size is one of 16, 32 and 64 890 * and deblocking is done on 8x8 grid 891 */ 892 if(6 != log2_ctb_size) 893 shift += ((start_pos_y & 2) << (log2_ctb_size - 2)); 894 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift); 895 } 896 } 897 } 898 899 pu_wd -= min_pu_wd; 900 start_pos_x += min_pu_wd; 901 }while(pu_wd > 0); 902 903 /* Reinitialising since the values are updated in the previous loop */ 904 pu_wd = ps_pu->b4_wd + 1; 905 start_pos_x = ps_pu->b4_pos_x; 906 } 907 } 908 } 909 910 { 911 /* If left neighbor is not available, then set BS for entire first column to zero */ 912 UWORD32 ctb_addr; 913 WORD32 slice_idx, left_slice_idx = -1, top_slice_idx = -1; 914 915 if(!ps_pps->i1_tiles_enabled_flag) 916 { 917 if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x) || 918 (0 == i1_loop_filter_across_slices_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) || 919 (0 == ps_bs_ctxt->i4_ctb_x)) 920 { 921 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2); 922 } 923 } 924 else 925 { 926 if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x)) 927 { 928 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2); 929 } 930 else 931 { 932 933 ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb); 934 slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr]; 935 936 if(ps_bs_ctxt->i4_ctb_x) 937 { 938 ctb_addr = (ps_bs_ctxt->i4_ctb_x - 1) + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb); 939 left_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr]; 940 } 941 942 if(!((ps_bs_ctxt->ps_slice_hdr->i1_dependent_slice_flag == 1) && (slice_idx == left_slice_idx))) 943 { 944 if((0 == i1_loop_filter_across_slices_enabled_flag && ( 945 (0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) || (0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_tile_x) 946 || ((0 == ps_bs_ctxt->i4_ctb_tile_x) && (slice_idx != left_slice_idx)))) || (0 == ps_bs_ctxt->i4_ctb_x)) 947 { 948 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2); 949 } 950 } 951 } 952 } 953 954 ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb); 955 slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr]; 956 if(ps_bs_ctxt->i4_ctb_y) 957 { 958 ctb_addr = (ps_bs_ctxt->i4_ctb_x) + ((ps_bs_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb); 959 top_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr]; 960 } 961 /* If top neighbor is not available, then set BS for entire first row to zero */ 962 if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_y) 963 || (0 == i1_loop_filter_across_slices_enabled_flag && ((0 == ps_bs_ctxt->i4_ctb_slice_y) || (slice_idx != top_slice_idx))) 964 || (0 == ps_bs_ctxt->i4_ctb_y)) 965 { 966 pu4_horz_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2); 967 } 968 } 969 970 /** 971 * Set BS of bottom and right frame boundaries to zero if it is an incomplete CTB 972 * (They might have set to non zero values because of CBF of the current CTB)*/ 973 { 974 WORD32 num_rows_remaining = (ps_sps->i2_pic_height_in_luma_samples - (ps_bs_ctxt->i4_ctb_y << log2_ctb_size)) >> 3; 975 WORD32 num_cols_remaining = (ps_sps->i2_pic_width_in_luma_samples - (ps_bs_ctxt->i4_ctb_x << log2_ctb_size)) >> 3; 976 if(num_rows_remaining < (ctb_size >> 3)) 977 { 978 /* WORD32 offset = (((num_rows_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4)); 979 * will reduce to the following assuming ctb size is one of 16, 32 and 64 980 * and deblocking is done on 8x8 grid 981 */ 982 WORD32 offset; 983 offset = (num_rows_remaining >> (6 - log2_ctb_size)) << 2; 984 if(6 != log2_ctb_size) 985 offset += (num_rows_remaining & 1) << (log2_ctb_size - 4); 986 987 memset(((UWORD8 *)pu4_horz_bs) + offset, 0, 1 << (log2_ctb_size - 4)); 988 } 989 990 if(num_cols_remaining < (ctb_size >> 3)) 991 { 992 /* WORD32 offset = (((num_cols_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4)); 993 * will reduce to the following assuming ctb size is one of 16, 32 and 64 994 * and deblocking is done on 8x8 grid 995 */ 996 997 WORD32 offset; 998 offset = (num_cols_remaining >> (6 - log2_ctb_size)) << 2; 999 if(6 != log2_ctb_size) 1000 offset += (num_cols_remaining & 1) << (log2_ctb_size - 4); 1001 1002 memset(((UWORD8 *)pu4_vert_bs) + offset, 0, 1 << (log2_ctb_size - 4)); 1003 } 1004 } 1005 return 0; 1006 } 1007