1 /****************************************************************************** 2 * 3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 /** 19 ******************************************************************************* 20 * @file 21 * ihevc_sao.c 22 * 23 * @brief 24 * Contains function definitions for sample adaptive offset process 25 * 26 * @author 27 * Srinivas T 28 * 29 * @par List of Functions: 30 * 31 * @remarks 32 * None 33 * 34 ******************************************************************************* 35 */ 36 37 #include <stdio.h> 38 #include <stddef.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <assert.h> 42 43 #include "ihevc_typedefs.h" 44 #include "iv.h" 45 #include "ivd.h" 46 #include "ihevcd_cxa.h" 47 #include "ithread.h" 48 49 #include "ihevc_defs.h" 50 #include "ihevc_debug.h" 51 #include "ihevc_defs.h" 52 #include "ihevc_structs.h" 53 #include "ihevc_macros.h" 54 #include "ihevc_platform_macros.h" 55 #include "ihevc_cabac_tables.h" 56 #include "ihevc_sao.h" 57 #include "ihevc_mem_fns.h" 58 59 #include "ihevc_error.h" 60 #include "ihevc_common_tables.h" 61 62 #include "ihevcd_trace.h" 63 #include "ihevcd_defs.h" 64 #include "ihevcd_function_selector.h" 65 #include "ihevcd_structs.h" 66 #include "ihevcd_error.h" 67 #include "ihevcd_nal.h" 68 #include "ihevcd_bitstream.h" 69 #include "ihevcd_job_queue.h" 70 #include "ihevcd_utils.h" 71 72 #include "ihevc_deblk.h" 73 #include "ihevc_deblk_tables.h" 74 #include "ihevcd_profile.h" 75 #include "ihevcd_sao.h" 76 #include "ihevcd_debug.h" 77 78 #define SAO_SHIFT_CTB 8 79 80 /** 81 * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions) 82 */ 83 void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt) 84 { 85 codec_t *ps_codec = ps_sao_ctxt->ps_codec; 86 UWORD8 *pu1_src_luma; 87 UWORD8 *pu1_src_chroma; 88 WORD32 src_strd; 89 WORD32 ctb_size; 90 WORD32 log2_ctb_size; 91 sps_t *ps_sps; 92 sao_t *ps_sao; 93 WORD32 row, col; 94 UWORD8 au1_avail_luma[8]; 95 UWORD8 au1_avail_chroma[8]; 96 WORD32 i; 97 UWORD8 *pu1_src_top_luma; 98 UWORD8 *pu1_src_top_chroma; 99 UWORD8 *pu1_src_left_luma; 100 UWORD8 *pu1_src_left_chroma; 101 UWORD8 au1_src_top_right[2]; 102 UWORD8 au1_src_bot_left[2]; 103 UWORD8 *pu1_no_loop_filter_flag; 104 WORD32 loop_filter_strd; 105 106 WORD8 ai1_offset_y[5]; 107 WORD8 ai1_offset_cb[5]; 108 WORD8 ai1_offset_cr[5]; 109 110 PROFILE_DISABLE_SAO(); 111 112 ai1_offset_y[0] = 0; 113 ai1_offset_cb[0] = 0; 114 ai1_offset_cr[0] = 0; 115 116 ps_sps = ps_sao_ctxt->ps_sps; 117 log2_ctb_size = ps_sps->i1_log2_ctb_size; 118 ctb_size = (1 << log2_ctb_size); 119 src_strd = ps_sao_ctxt->ps_codec->i4_strd; 120 pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size)); 121 pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size)); 122 123 ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb; 124 loop_filter_strd = (ps_sps->i2_pic_width_in_luma_samples + 63) / 64; 125 126 /* Current CTB */ 127 { 128 WORD32 sao_wd_luma; 129 WORD32 sao_wd_chroma; 130 WORD32 sao_ht_luma; 131 WORD32 sao_ht_chroma; 132 133 WORD32 remaining_rows; 134 WORD32 remaining_cols; 135 136 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 137 sao_wd_luma = MIN(ctb_size, remaining_cols); 138 sao_wd_chroma = MIN(ctb_size, remaining_cols); 139 140 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 141 sao_ht_luma = MIN(ctb_size, remaining_rows); 142 sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2; 143 144 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 145 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 146 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 147 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 148 149 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag + 150 ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd + 151 ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64); 152 153 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 154 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 155 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 156 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 157 158 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 159 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 160 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 161 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 162 163 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 164 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 165 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 166 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 167 168 for(i = 0; i < 8; i++) 169 { 170 au1_avail_luma[i] = 255; 171 au1_avail_chroma[i] = 255; 172 } 173 174 175 if(0 == ps_sao_ctxt->i4_ctb_x) 176 { 177 au1_avail_luma[0] = 0; 178 au1_avail_luma[4] = 0; 179 au1_avail_luma[6] = 0; 180 181 au1_avail_chroma[0] = 0; 182 au1_avail_chroma[4] = 0; 183 au1_avail_chroma[6] = 0; 184 } 185 186 if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x) 187 { 188 au1_avail_luma[1] = 0; 189 au1_avail_luma[5] = 0; 190 au1_avail_luma[7] = 0; 191 192 au1_avail_chroma[1] = 0; 193 au1_avail_chroma[5] = 0; 194 au1_avail_chroma[7] = 0; 195 } 196 197 if(0 == ps_sao_ctxt->i4_ctb_y) 198 { 199 au1_avail_luma[2] = 0; 200 au1_avail_luma[4] = 0; 201 au1_avail_luma[5] = 0; 202 203 au1_avail_chroma[2] = 0; 204 au1_avail_chroma[4] = 0; 205 au1_avail_chroma[5] = 0; 206 } 207 208 if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y) 209 { 210 au1_avail_luma[3] = 0; 211 au1_avail_luma[6] = 0; 212 au1_avail_luma[7] = 0; 213 214 au1_avail_chroma[3] = 0; 215 au1_avail_chroma[6] = 0; 216 au1_avail_chroma[7] = 0; 217 } 218 219 220 if(0 == ps_sao->b3_y_type_idx) 221 { 222 /* Update left, top and top-left */ 223 for(row = 0; row < sao_ht_luma; row++) 224 { 225 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 226 } 227 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 228 229 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 230 231 } 232 else 233 { 234 UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)]; 235 UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1; 236 WORD32 tmp_strd = MAX_CTB_SIZE + 2; 237 WORD32 no_loop_filter_enabled = 0; 238 239 /* Check the loop filter flags and copy the original values for back up */ 240 { 241 UWORD32 u4_no_loop_filter_flag; 242 WORD32 min_cu = 8; 243 UWORD8 *pu1_src_tmp = pu1_src_luma; 244 245 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++) 246 { 247 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> 248 ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8); 249 u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1; 250 251 if(u4_no_loop_filter_flag) 252 { 253 WORD32 tmp_wd = sao_wd_luma; 254 no_loop_filter_enabled = 1; 255 while(tmp_wd > 0) 256 { 257 if(CTZ(u4_no_loop_filter_flag)) 258 { 259 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 260 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 261 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 262 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu; 263 } 264 else 265 { 266 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++) 267 { 268 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++) 269 { 270 pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col]; 271 } 272 } 273 274 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 275 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 276 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 277 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu; 278 } 279 } 280 281 pu1_src_tmp -= sao_wd_luma; 282 } 283 284 pu1_src_tmp += min_cu * src_strd; 285 pu1_src_copy += min_cu * tmp_strd; 286 } 287 } 288 289 if(1 == ps_sao->b3_y_type_idx) 290 { 291 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma, 292 src_strd, 293 pu1_src_left_luma, 294 pu1_src_top_luma, 295 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb, 296 ps_sao->b5_y_band_pos, 297 ai1_offset_y, 298 sao_wd_luma, 299 sao_ht_luma); 300 } 301 else // if(2 <= ps_sao->b3_y_type_idx) 302 { 303 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma]; 304 au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1]; 305 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma, 306 src_strd, 307 pu1_src_left_luma, 308 pu1_src_top_luma, 309 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb, 310 au1_src_top_right, 311 au1_src_bot_left, 312 au1_avail_luma, 313 ai1_offset_y, 314 sao_wd_luma, 315 sao_ht_luma); 316 } 317 318 /* Check the loop filter flags and copy the original values back if they are set */ 319 if(no_loop_filter_enabled) 320 { 321 UWORD32 u4_no_loop_filter_flag; 322 WORD32 min_cu = 8; 323 UWORD8 *pu1_src_tmp = pu1_src_luma; 324 325 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++) 326 { 327 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8); 328 u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1; 329 330 if(u4_no_loop_filter_flag) 331 { 332 WORD32 tmp_wd = sao_wd_luma; 333 while(tmp_wd > 0) 334 { 335 if(CTZ(u4_no_loop_filter_flag)) 336 { 337 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 338 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 339 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 340 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu; 341 } 342 else 343 { 344 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++) 345 { 346 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++) 347 { 348 pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col]; 349 } 350 } 351 352 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 353 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 354 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 355 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu; 356 } 357 } 358 359 pu1_src_tmp -= sao_wd_luma; 360 } 361 362 pu1_src_tmp += min_cu * src_strd; 363 pu1_src_copy += min_cu * tmp_strd; 364 } 365 } 366 367 } 368 369 if(0 == ps_sao->b3_cb_type_idx) 370 { 371 for(row = 0; row < sao_ht_chroma; row++) 372 { 373 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 374 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 375 } 376 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 377 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 378 379 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 380 } 381 else 382 { 383 UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)]; 384 UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2; 385 WORD32 tmp_strd = MAX_CTB_SIZE + 4; 386 WORD32 no_loop_filter_enabled = 0; 387 388 /* Check the loop filter flags and copy the original values for back up */ 389 { 390 UWORD32 u4_no_loop_filter_flag; 391 WORD32 min_cu = 4; 392 UWORD8 *pu1_src_tmp = pu1_src_chroma; 393 394 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++) 395 { 396 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8); 397 u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1; 398 399 if(u4_no_loop_filter_flag) 400 { 401 WORD32 tmp_wd = sao_wd_chroma; 402 no_loop_filter_enabled = 1; 403 while(tmp_wd > 0) 404 { 405 if(CTZ(u4_no_loop_filter_flag)) 406 { 407 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 408 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 409 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 410 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu; 411 } 412 else 413 { 414 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++) 415 { 416 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++) 417 { 418 pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col]; 419 } 420 } 421 422 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 423 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 424 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 425 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu; 426 } 427 } 428 429 pu1_src_tmp -= sao_wd_chroma; 430 } 431 432 pu1_src_tmp += min_cu * src_strd; 433 pu1_src_copy += min_cu * tmp_strd; 434 } 435 } 436 437 if(1 == ps_sao->b3_cb_type_idx) 438 { 439 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 440 src_strd, 441 pu1_src_left_chroma, 442 pu1_src_top_chroma, 443 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb, 444 ps_sao->b5_cb_band_pos, 445 ps_sao->b5_cr_band_pos, 446 ai1_offset_cb, 447 ai1_offset_cr, 448 sao_wd_chroma, 449 sao_ht_chroma 450 ); 451 } 452 else // if(2 <= ps_sao->b3_cb_type_idx) 453 { 454 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma]; 455 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1]; 456 au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2]; 457 au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1]; 458 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 459 src_strd, 460 pu1_src_left_chroma, 461 pu1_src_top_chroma, 462 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb, 463 au1_src_top_right, 464 au1_src_bot_left, 465 au1_avail_chroma, 466 ai1_offset_cb, 467 ai1_offset_cr, 468 sao_wd_chroma, 469 sao_ht_chroma); 470 } 471 472 /* Check the loop filter flags and copy the original values back if they are set */ 473 if(no_loop_filter_enabled) 474 { 475 UWORD32 u4_no_loop_filter_flag; 476 WORD32 min_cu = 4; 477 UWORD8 *pu1_src_tmp = pu1_src_chroma; 478 479 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++) 480 { 481 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8); 482 u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1; 483 484 if(u4_no_loop_filter_flag) 485 { 486 WORD32 tmp_wd = sao_wd_chroma; 487 while(tmp_wd > 0) 488 { 489 if(CTZ(u4_no_loop_filter_flag)) 490 { 491 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 492 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 493 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 494 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu; 495 } 496 else 497 { 498 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++) 499 { 500 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++) 501 { 502 pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col]; 503 } 504 } 505 506 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 507 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 508 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 509 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu; 510 } 511 } 512 513 pu1_src_tmp -= sao_wd_chroma; 514 } 515 516 pu1_src_tmp += min_cu * src_strd; 517 pu1_src_copy += min_cu * tmp_strd; 518 } 519 } 520 521 } 522 523 } 524 } 525 526 void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt) 527 { 528 codec_t *ps_codec = ps_sao_ctxt->ps_codec; 529 UWORD8 *pu1_src_luma; 530 UWORD8 *pu1_src_chroma; 531 WORD32 src_strd; 532 WORD32 ctb_size; 533 WORD32 log2_ctb_size; 534 sps_t *ps_sps; 535 sao_t *ps_sao; 536 pps_t *ps_pps; 537 slice_header_t *ps_slice_hdr, *ps_slice_hdr_base; 538 tile_t *ps_tile; 539 UWORD16 *pu1_slice_idx; 540 UWORD16 *pu1_tile_idx; 541 WORD32 row, col; 542 UWORD8 au1_avail_luma[8]; 543 UWORD8 au1_avail_chroma[8]; 544 UWORD8 au1_tile_slice_boundary[8]; 545 UWORD8 au4_ilf_across_tile_slice_enable[8]; 546 WORD32 i; 547 UWORD8 *pu1_src_top_luma; 548 UWORD8 *pu1_src_top_chroma; 549 UWORD8 *pu1_src_left_luma; 550 UWORD8 *pu1_src_left_chroma; 551 UWORD8 au1_src_top_right[2]; 552 UWORD8 au1_src_bot_left[2]; 553 UWORD8 *pu1_no_loop_filter_flag; 554 UWORD8 *pu1_src_backup_luma; 555 UWORD8 *pu1_src_backup_chroma; 556 WORD32 backup_strd; 557 WORD32 loop_filter_strd; 558 559 WORD32 no_loop_filter_enabled_luma = 0; 560 WORD32 no_loop_filter_enabled_chroma = 0; 561 UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb; 562 UWORD8 *pu1_sao_src_top_left_luma_curr_ctb; 563 UWORD8 *pu1_sao_src_luma_top_left_ctb; 564 UWORD8 *pu1_sao_src_chroma_top_left_ctb; 565 UWORD8 *pu1_sao_src_top_left_luma_top_right; 566 UWORD8 *pu1_sao_src_top_left_chroma_top_right; 567 UWORD8 u1_sao_src_top_left_luma_bot_left; 568 UWORD8 *pu1_sao_src_top_left_luma_bot_left; 569 UWORD8 *au1_sao_src_top_left_chroma_bot_left; 570 UWORD8 *pu1_sao_src_top_left_chroma_bot_left; 571 572 WORD8 ai1_offset_y[5]; 573 WORD8 ai1_offset_cb[5]; 574 WORD8 ai1_offset_cr[5]; 575 WORD32 chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu; 576 577 PROFILE_DISABLE_SAO(); 578 579 ai1_offset_y[0] = 0; 580 ai1_offset_cb[0] = 0; 581 ai1_offset_cr[0] = 0; 582 583 ps_sps = ps_sao_ctxt->ps_sps; 584 ps_pps = ps_sao_ctxt->ps_pps; 585 ps_tile = ps_sao_ctxt->ps_tile; 586 587 log2_ctb_size = ps_sps->i1_log2_ctb_size; 588 ctb_size = (1 << log2_ctb_size); 589 src_strd = ps_sao_ctxt->ps_codec->i4_strd; 590 ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base; 591 ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1)); 592 593 pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx; 594 pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx; 595 pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size)); 596 pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size)); 597 598 /*Stores the left value for each row ctbs- Needed for column tiles*/ 599 pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y)); 600 pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y)); 601 pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y)); 602 pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y); 603 u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y)); 604 pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y)); 605 au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y); 606 pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y); 607 pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x)); 608 pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x); 609 610 ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb; 611 loop_filter_strd = (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6; 612 backup_strd = 2 * MAX_CTB_SIZE; 613 614 DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma); 615 616 { 617 /* Check the loop filter flags and copy the original values for back up */ 618 /* Luma */ 619 620 /* Done unconditionally since SAO is done on a shifted CTB and the constituent CTBs 621 * can belong to different slice with their own sao_enable flag */ 622 { 623 UWORD32 u4_no_loop_filter_flag; 624 WORD32 loop_filter_bit_pos; 625 WORD32 log2_min_cu = 3; 626 WORD32 min_cu = (1 << log2_min_cu); 627 UWORD8 *pu1_src_tmp_luma = pu1_src_luma; 628 WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB; 629 WORD32 sao_blk_wd = ctb_size; 630 WORD32 remaining_rows; 631 WORD32 remaining_cols; 632 633 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB); 634 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB); 635 if(remaining_rows <= SAO_SHIFT_CTB) 636 sao_blk_ht += remaining_rows; 637 if(remaining_cols <= SAO_SHIFT_CTB) 638 sao_blk_wd += remaining_cols; 639 640 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0; 641 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0; 642 643 pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma; 644 645 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) + 646 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3); 647 if(ps_sao_ctxt->i4_ctb_x > 0) 648 loop_filter_bit_pos -= 1; 649 650 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag + 651 (loop_filter_bit_pos >> 3); 652 653 for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu; 654 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++) 655 { 656 WORD32 tmp_wd = sao_blk_wd; 657 658 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> 659 (loop_filter_bit_pos & 7); 660 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1; 661 662 if(u4_no_loop_filter_flag) 663 { 664 no_loop_filter_enabled_luma = 1; 665 while(tmp_wd > 0) 666 { 667 if(CTZ(u4_no_loop_filter_flag)) 668 { 669 pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 670 pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 671 tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu; 672 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 673 } 674 else 675 { 676 for(row = 0; row < min_cu; row++) 677 { 678 for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++) 679 { 680 pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col]; 681 } 682 } 683 pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 684 pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 685 tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu; 686 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 687 } 688 } 689 690 pu1_src_tmp_luma -= sao_blk_wd; 691 pu1_src_backup_luma -= sao_blk_wd; 692 } 693 694 pu1_src_tmp_luma += (src_strd << log2_min_cu); 695 pu1_src_backup_luma += (backup_strd << log2_min_cu); 696 } 697 } 698 699 /* Chroma */ 700 701 { 702 UWORD32 u4_no_loop_filter_flag; 703 WORD32 loop_filter_bit_pos; 704 WORD32 log2_min_cu = 3; 705 WORD32 min_cu = (1 << log2_min_cu); 706 UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma; 707 WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB; 708 WORD32 sao_blk_wd = ctb_size; 709 WORD32 remaining_rows; 710 WORD32 remaining_cols; 711 712 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB); 713 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB); 714 if(remaining_rows <= 2 * SAO_SHIFT_CTB) 715 sao_blk_ht += remaining_rows; 716 if(remaining_cols <= 2 * SAO_SHIFT_CTB) 717 sao_blk_wd += remaining_cols; 718 719 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0; 720 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0; 721 722 pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma; 723 724 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) + 725 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3); 726 if(ps_sao_ctxt->i4_ctb_x > 0) 727 loop_filter_bit_pos -= 2; 728 729 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag + 730 (loop_filter_bit_pos >> 3); 731 732 for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu; 733 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++) 734 { 735 WORD32 tmp_wd = sao_blk_wd; 736 737 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> 738 (loop_filter_bit_pos & 7); 739 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1; 740 741 if(u4_no_loop_filter_flag) 742 { 743 no_loop_filter_enabled_chroma = 1; 744 while(tmp_wd > 0) 745 { 746 if(CTZ(u4_no_loop_filter_flag)) 747 { 748 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 749 pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 750 tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu; 751 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 752 } 753 else 754 { 755 for(row = 0; row < min_cu / 2; row++) 756 { 757 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++) 758 { 759 pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col]; 760 } 761 } 762 763 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 764 pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 765 tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu; 766 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 767 } 768 } 769 770 pu1_src_tmp_chroma -= sao_blk_wd; 771 pu1_src_backup_chroma -= sao_blk_wd; 772 } 773 774 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu); 775 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu); 776 } 777 } 778 } 779 780 DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma); 781 782 /* Top-left CTB */ 783 if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0) 784 { 785 WORD32 sao_wd_luma = SAO_SHIFT_CTB; 786 WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB; 787 WORD32 sao_ht_luma = SAO_SHIFT_CTB; 788 WORD32 sao_ht_chroma = SAO_SHIFT_CTB; 789 790 WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0; 791 WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0; 792 WORD32 au4_idx_tl[8], idx_tl; 793 794 slice_header_t *ps_slice_hdr_top_left; 795 { 796 WORD32 top_left_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb + 797 (ps_sao_ctxt->i4_ctb_x - 1); 798 ps_slice_hdr_top_left = ps_slice_hdr_base + pu1_slice_idx[top_left_ctb_indx]; 799 } 800 801 802 pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd); 803 pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd); 804 ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb); 805 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma; 806 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma; 807 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma; 808 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma); 809 810 if(ps_slice_hdr_top_left->i1_slice_sao_luma_flag) 811 { 812 if(0 == ps_sao->b3_y_type_idx) 813 { 814 /* Update left, top and top-left */ 815 for(row = 0; row < sao_ht_luma; row++) 816 { 817 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 818 } 819 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 820 821 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 822 823 824 } 825 826 else if(1 == ps_sao->b3_y_type_idx) 827 { 828 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 829 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 830 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 831 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 832 833 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma, 834 src_strd, 835 pu1_src_left_luma, 836 pu1_src_top_luma, 837 pu1_sao_src_luma_top_left_ctb, 838 ps_sao->b5_y_band_pos, 839 ai1_offset_y, 840 sao_wd_luma, 841 sao_ht_luma 842 ); 843 } 844 845 else // if(2 <= ps_sao->b3_y_type_idx) 846 { 847 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 848 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 849 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 850 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 851 852 for(i = 0; i < 8; i++) 853 { 854 au1_avail_luma[i] = 255; 855 au1_tile_slice_boundary[i] = 0; 856 au4_idx_tl[i] = 0; 857 au4_ilf_across_tile_slice_enable[i] = 1; 858 } 859 860 /****************************************************************** 861 * Derive the Top-left CTB's neighbor pixel's slice indices. 862 * 863 * TL_T 864 * 4 _2__5________ 865 * 0 | | | 866 * TL_L | TL | 1 TL_R| 867 * |____|_______|____ 868 * 6|TL_D|7 | | 869 * | 3 | | | 870 * |____|_______| | 871 * | | 872 * | | 873 * |____________| 874 * 875 *****************************************************************/ 876 877 /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/ 878 { 879 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 880 { 881 { 882 /*Assuming that sao shift is uniform along x and y directions*/ 883 if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1)) 884 { 885 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2; 886 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2; 887 } 888 else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma)) 889 { 890 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1; 891 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1; 892 } 893 ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1; 894 ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1; 895 896 ctbx_tl_r = ps_sao_ctxt->i4_ctb_x; 897 ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1; 898 899 ctbx_tl_d = ps_sao_ctxt->i4_ctb_x - 1; 900 ctby_tl_d = ps_sao_ctxt->i4_ctb_y; 901 902 ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1; 903 ctby_tl = ps_sao_ctxt->i4_ctb_y - 1; 904 } 905 906 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 907 { 908 /*Calculate slice indices for neighbor pixels*/ 909 idx_tl = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)]; 910 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)); 911 au4_idx_tl[0] = pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 912 au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 913 au4_idx_tl[3] = au4_idx_tl[6] = pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 914 au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 915 916 if((0 == (1 << log2_ctb_size) - sao_wd_luma)) 917 { 918 if(ps_sao_ctxt->i4_ctb_x == 1) 919 { 920 au4_idx_tl[6] = -1; 921 au4_idx_tl[4] = -1; 922 } 923 else 924 { 925 au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 926 } 927 if(ps_sao_ctxt->i4_ctb_y == 1) 928 { 929 au4_idx_tl[5] = -1; 930 au4_idx_tl[4] = -1; 931 } 932 else 933 { 934 au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 935 au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)]; 936 } 937 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 938 } 939 940 /* Verify that the neighbor ctbs dont cross pic boundary. 941 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 942 * of the pixel having a greater address is checked. Accordingly, set the availability flags. 943 * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels, 944 * the respective pixel's flags are checked 945 */ 946 947 if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)) 948 { 949 au4_ilf_across_tile_slice_enable[4] = 0; 950 au4_ilf_across_tile_slice_enable[6] = 0; 951 } 952 else 953 { 954 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag; 955 } 956 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)) 957 { 958 au4_ilf_across_tile_slice_enable[5] = 0; 959 au4_ilf_across_tile_slice_enable[4] = 0; 960 } 961 else 962 { 963 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 964 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 965 } 966 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 967 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 968 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag; 969 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag; 970 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag; 971 972 if(au4_idx_tl[5] > idx_tl) 973 { 974 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag; 975 } 976 977 /* 978 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 979 * of the pixel having a greater address is checked. Accordingly, set the availability flags. 980 * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels, 981 * the respective pixel's flags are checked 982 */ 983 for(i = 0; i < 8; i++) 984 { 985 /*Sets the edges that lie on the slice/tile boundary*/ 986 if(au4_idx_tl[i] != idx_tl) 987 { 988 au1_tile_slice_boundary[i] = 1; 989 } 990 else 991 { 992 au4_ilf_across_tile_slice_enable[i] = 1; 993 } 994 } 995 996 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32)); 997 } 998 999 if(ps_pps->i1_tiles_enabled_flag) 1000 { 1001 /* Calculate availability flags at slice boundary */ 1002 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 1003 { 1004 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 1005 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 1006 { 1007 /*Set the boundary arrays*/ 1008 /*Calculate tile indices for neighbor pixels*/ 1009 idx_tl = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)]; 1010 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)); 1011 au4_idx_tl[0] = pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 1012 au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 1013 au4_idx_tl[3] = au4_idx_tl[6] = pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1014 au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1015 1016 if((0 == (1 << log2_ctb_size) - sao_wd_luma)) 1017 { 1018 if(ps_sao_ctxt->i4_ctb_x == 1) 1019 { 1020 au4_idx_tl[6] = -1; 1021 au4_idx_tl[4] = -1; 1022 } 1023 else 1024 { 1025 au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 1026 } 1027 if(ps_sao_ctxt->i4_ctb_y == 1) 1028 { 1029 au4_idx_tl[5] = -1; 1030 au4_idx_tl[4] = -1; 1031 } 1032 else 1033 { 1034 au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 1035 au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)]; 1036 } 1037 au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1038 } 1039 for(i = 0; i < 8; i++) 1040 { 1041 /*Sets the edges that lie on the tile boundary*/ 1042 if(au4_idx_tl[i] != idx_tl) 1043 { 1044 au1_tile_slice_boundary[i] |= 1; 1045 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0 1046 } 1047 } 1048 } 1049 } 1050 } 1051 1052 1053 /*Set availability flags based on tile and slice boundaries*/ 1054 for(i = 0; i < 8; i++) 1055 { 1056 /*Sets the edges that lie on the slice/tile boundary*/ 1057 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 1058 { 1059 au1_avail_luma[i] = 0; 1060 } 1061 } 1062 } 1063 } 1064 1065 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) 1066 { 1067 au1_avail_luma[0] = 0; 1068 au1_avail_luma[4] = 0; 1069 au1_avail_luma[6] = 0; 1070 } 1071 1072 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x) 1073 { 1074 au1_avail_luma[1] = 0; 1075 au1_avail_luma[5] = 0; 1076 au1_avail_luma[7] = 0; 1077 } 1078 //y==1 case 1079 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)) 1080 { 1081 au1_avail_luma[2] = 0; 1082 au1_avail_luma[4] = 0; 1083 au1_avail_luma[5] = 0; 1084 } 1085 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y) 1086 { 1087 au1_avail_luma[3] = 0; 1088 au1_avail_luma[6] = 0; 1089 au1_avail_luma[7] = 0; 1090 } 1091 1092 { 1093 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma]; 1094 u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma]; 1095 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma, 1096 src_strd, 1097 pu1_src_left_luma, 1098 pu1_src_top_luma, 1099 pu1_sao_src_luma_top_left_ctb, 1100 au1_src_top_right, 1101 &u1_sao_src_top_left_luma_bot_left, 1102 au1_avail_luma, 1103 ai1_offset_y, 1104 sao_wd_luma, 1105 sao_ht_luma); 1106 } 1107 } 1108 1109 } 1110 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 1111 { 1112 /* Update left, top and top-left */ 1113 for(row = 0; row < sao_ht_luma; row++) 1114 { 1115 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 1116 } 1117 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 1118 1119 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 1120 } 1121 1122 if(ps_slice_hdr_top_left->i1_slice_sao_chroma_flag) 1123 { 1124 if(0 == ps_sao->b3_cb_type_idx) 1125 { 1126 for(row = 0; row < sao_ht_chroma; row++) 1127 { 1128 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 1129 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 1130 } 1131 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 1132 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 1133 1134 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 1135 1136 } 1137 1138 else if(1 == ps_sao->b3_cb_type_idx) 1139 { 1140 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 1141 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 1142 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 1143 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 1144 1145 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 1146 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 1147 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 1148 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 1149 1150 if(chroma_yuv420sp_vu) 1151 { 1152 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 1153 src_strd, 1154 pu1_src_left_chroma, 1155 pu1_src_top_chroma, 1156 pu1_sao_src_chroma_top_left_ctb, 1157 ps_sao->b5_cr_band_pos, 1158 ps_sao->b5_cb_band_pos, 1159 ai1_offset_cr, 1160 ai1_offset_cb, 1161 sao_wd_chroma, 1162 sao_ht_chroma 1163 ); 1164 } 1165 else 1166 { 1167 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 1168 src_strd, 1169 pu1_src_left_chroma, 1170 pu1_src_top_chroma, 1171 pu1_sao_src_chroma_top_left_ctb, 1172 ps_sao->b5_cb_band_pos, 1173 ps_sao->b5_cr_band_pos, 1174 ai1_offset_cb, 1175 ai1_offset_cr, 1176 sao_wd_chroma, 1177 sao_ht_chroma 1178 ); 1179 } 1180 } 1181 1182 else // if(2 <= ps_sao->b3_cb_type_idx) 1183 { 1184 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 1185 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 1186 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 1187 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 1188 1189 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 1190 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 1191 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 1192 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 1193 for(i = 0; i < 8; i++) 1194 { 1195 au1_avail_chroma[i] = 255; 1196 au1_tile_slice_boundary[i] = 0; 1197 au4_idx_tl[i] = 0; 1198 au4_ilf_across_tile_slice_enable[i] = 1; 1199 } 1200 /*In case of slices*/ 1201 { 1202 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 1203 { 1204 if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1)) 1205 { 1206 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2; 1207 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2; 1208 } 1209 else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma)) 1210 { 1211 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1; 1212 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1; 1213 } 1214 ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1; 1215 ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1; 1216 1217 ctbx_tl_r = ps_sao_ctxt->i4_ctb_x; 1218 ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1; 1219 1220 ctbx_tl_d = ps_sao_ctxt->i4_ctb_x - 1; 1221 ctby_tl_d = ps_sao_ctxt->i4_ctb_y; 1222 1223 ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1; 1224 ctby_tl = ps_sao_ctxt->i4_ctb_y - 1; 1225 1226 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 1227 { 1228 1229 idx_tl = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)]; 1230 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)); 1231 au4_idx_tl[0] = pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 1232 au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 1233 au4_idx_tl[3] = au4_idx_tl[6] = pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1234 au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1235 1236 if((0 == (1 << log2_ctb_size) - sao_wd_chroma)) 1237 { 1238 if(ps_sao_ctxt->i4_ctb_x == 1) 1239 { 1240 au4_idx_tl[6] = -1; 1241 au4_idx_tl[4] = -1; 1242 } 1243 else 1244 { 1245 au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 1246 } 1247 if(ps_sao_ctxt->i4_ctb_y == 1) 1248 { 1249 au4_idx_tl[5] = -1; 1250 au4_idx_tl[4] = -1; 1251 } 1252 else 1253 { 1254 au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 1255 au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)]; 1256 } 1257 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1258 } 1259 1260 /* Verify that the neighbor ctbs don't cross pic boundary 1261 * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/ 1262 if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)) 1263 { 1264 au4_ilf_across_tile_slice_enable[4] = 0; 1265 au4_ilf_across_tile_slice_enable[6] = 0; 1266 } 1267 else 1268 { 1269 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag; 1270 } 1271 if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)) 1272 { 1273 au4_ilf_across_tile_slice_enable[5] = 0; 1274 au4_ilf_across_tile_slice_enable[4] = 0; 1275 } 1276 else 1277 { 1278 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 1279 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag; 1280 } 1281 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 1282 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 1283 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag; 1284 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag; 1285 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag; 1286 /* 1287 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 1288 * of the pixel having a greater address is checked. Accordingly, set the availability flags 1289 */ 1290 for(i = 0; i < 8; i++) 1291 { 1292 /*Sets the edges that lie on the slice/tile boundary*/ 1293 if(au4_idx_tl[i] != idx_tl) 1294 { 1295 au1_tile_slice_boundary[i] = 1; 1296 } 1297 else 1298 { 1299 au4_ilf_across_tile_slice_enable[i] = 1; 1300 } 1301 } 1302 1303 /*Reset indices*/ 1304 for(i = 0; i < 8; i++) 1305 { 1306 au4_idx_tl[i] = 0; 1307 } 1308 } 1309 if(ps_pps->i1_tiles_enabled_flag) 1310 { 1311 /* Calculate availability flags at slice boundary */ 1312 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 1313 { 1314 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 1315 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 1316 { 1317 /*Set the boundary arrays*/ 1318 /*Calculate tile indices for neighbor pixels*/ 1319 idx_tl = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)]; 1320 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)); 1321 au4_idx_tl[0] = pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 1322 au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 1323 au4_idx_tl[3] = au4_idx_tl[6] = pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1324 au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1325 1326 if((0 == (1 << log2_ctb_size) - sao_wd_luma)) 1327 { 1328 if(ps_sao_ctxt->i4_ctb_x == 1) 1329 { 1330 au4_idx_tl[6] = -1; 1331 au4_idx_tl[4] = -1; 1332 } 1333 else 1334 { 1335 au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 1336 } 1337 if(ps_sao_ctxt->i4_ctb_y == 1) 1338 { 1339 au4_idx_tl[5] = -1; 1340 au4_idx_tl[4] = -1; 1341 } 1342 else 1343 { 1344 au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 1345 au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)]; 1346 } 1347 au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1348 } 1349 for(i = 0; i < 8; i++) 1350 { 1351 /*Sets the edges that lie on the tile boundary*/ 1352 if(au4_idx_tl[i] != idx_tl) 1353 { 1354 au1_tile_slice_boundary[i] |= 1; 1355 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0 1356 } 1357 } 1358 } 1359 } 1360 } 1361 1362 for(i = 0; i < 8; i++) 1363 { 1364 /*Sets the edges that lie on the slice/tile boundary*/ 1365 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 1366 { 1367 au1_avail_chroma[i] = 0; 1368 } 1369 } 1370 } 1371 } 1372 1373 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) 1374 { 1375 au1_avail_chroma[0] = 0; 1376 au1_avail_chroma[4] = 0; 1377 au1_avail_chroma[6] = 0; 1378 } 1379 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x) 1380 { 1381 au1_avail_chroma[1] = 0; 1382 au1_avail_chroma[5] = 0; 1383 au1_avail_chroma[7] = 0; 1384 } 1385 1386 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma) 1387 { 1388 au1_avail_chroma[2] = 0; 1389 au1_avail_chroma[4] = 0; 1390 au1_avail_chroma[5] = 0; 1391 } 1392 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y) 1393 { 1394 au1_avail_chroma[3] = 0; 1395 au1_avail_chroma[6] = 0; 1396 au1_avail_chroma[7] = 0; 1397 } 1398 1399 { 1400 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma]; 1401 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1]; 1402 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma]; 1403 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1]; 1404 if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1)) 1405 { 1406 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2]; 1407 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1]; 1408 } 1409 1410 if(chroma_yuv420sp_vu) 1411 { 1412 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 1413 src_strd, 1414 pu1_src_left_chroma, 1415 pu1_src_top_chroma, 1416 pu1_sao_src_chroma_top_left_ctb, 1417 au1_src_top_right, 1418 au1_sao_src_top_left_chroma_bot_left, 1419 au1_avail_chroma, 1420 ai1_offset_cr, 1421 ai1_offset_cb, 1422 sao_wd_chroma, 1423 sao_ht_chroma); 1424 } 1425 else 1426 { 1427 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 1428 src_strd, 1429 pu1_src_left_chroma, 1430 pu1_src_top_chroma, 1431 pu1_sao_src_chroma_top_left_ctb, 1432 au1_src_top_right, 1433 au1_sao_src_top_left_chroma_bot_left, 1434 au1_avail_chroma, 1435 ai1_offset_cb, 1436 ai1_offset_cr, 1437 sao_wd_chroma, 1438 sao_ht_chroma); 1439 } 1440 } 1441 } 1442 } 1443 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 1444 { 1445 for(row = 0; row < sao_ht_chroma; row++) 1446 { 1447 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 1448 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 1449 } 1450 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 1451 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 1452 1453 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 1454 } 1455 1456 pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd; 1457 pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd; 1458 ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb); 1459 } 1460 1461 1462 /* Top CTB */ 1463 if((ps_sao_ctxt->i4_ctb_y > 0)) 1464 { 1465 WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB; 1466 WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB; 1467 WORD32 sao_ht_luma = SAO_SHIFT_CTB; 1468 WORD32 sao_ht_chroma = SAO_SHIFT_CTB; 1469 1470 WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0; 1471 WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0; 1472 WORD32 au4_idx_t[8], idx_t; 1473 1474 WORD32 remaining_cols; 1475 1476 slice_header_t *ps_slice_hdr_top; 1477 { 1478 WORD32 top_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb + 1479 (ps_sao_ctxt->i4_ctb_x); 1480 ps_slice_hdr_top = ps_slice_hdr_base + pu1_slice_idx[top_ctb_indx]; 1481 } 1482 1483 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma); 1484 if(remaining_cols <= SAO_SHIFT_CTB) 1485 { 1486 sao_wd_luma += remaining_cols; 1487 } 1488 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma); 1489 if(remaining_cols <= 2 * SAO_SHIFT_CTB) 1490 { 1491 sao_wd_chroma += remaining_cols; 1492 } 1493 1494 pu1_src_luma -= (sao_ht_luma * src_strd); 1495 pu1_src_chroma -= (sao_ht_chroma * src_strd); 1496 ps_sao -= (ps_sps->i2_pic_wd_in_ctb); 1497 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 1498 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 1499 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma; 1500 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma); 1501 1502 if(0 != sao_wd_luma) 1503 { 1504 if(ps_slice_hdr_top->i1_slice_sao_luma_flag) 1505 { 1506 if(0 == ps_sao->b3_y_type_idx) 1507 { 1508 /* Update left, top and top-left */ 1509 for(row = 0; row < sao_ht_luma; row++) 1510 { 1511 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 1512 } 1513 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 1514 1515 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 1516 1517 } 1518 1519 else if(1 == ps_sao->b3_y_type_idx) 1520 { 1521 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 1522 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 1523 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 1524 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 1525 1526 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma, 1527 src_strd, 1528 pu1_src_left_luma, 1529 pu1_src_top_luma, 1530 pu1_sao_src_luma_top_left_ctb, 1531 ps_sao->b5_y_band_pos, 1532 ai1_offset_y, 1533 sao_wd_luma, 1534 sao_ht_luma 1535 ); 1536 } 1537 1538 else // if(2 <= ps_sao->b3_y_type_idx) 1539 { 1540 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 1541 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 1542 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 1543 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 1544 1545 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8); 1546 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8); 1547 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32)); 1548 1549 for(i = 0; i < 8; i++) 1550 { 1551 1552 au4_ilf_across_tile_slice_enable[i] = 1; 1553 } 1554 /****************************************************************** 1555 * Derive the Top-left CTB's neighbor pixel's slice indices. 1556 * 1557 * T_T 1558 * ____________ 1559 * | | | 1560 * | T_L| T |T_R 1561 * | | ______|____ 1562 * | | T_D | | 1563 * | | | | 1564 * |____|_______| | 1565 * | | 1566 * | | 1567 * |____________| 1568 * 1569 *****************************************************************/ 1570 1571 /*In case of slices*/ 1572 { 1573 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 1574 { 1575 1576 ctbx_t_t = ps_sao_ctxt->i4_ctb_x; 1577 ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1; 1578 1579 ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1; 1580 ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1; 1581 1582 ctbx_t_r = ps_sao_ctxt->i4_ctb_x; 1583 ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1; 1584 1585 ctbx_t_d = ps_sao_ctxt->i4_ctb_x; 1586 ctby_t_d = ps_sao_ctxt->i4_ctb_y; 1587 1588 ctbx_t = ps_sao_ctxt->i4_ctb_x; 1589 ctby_t = ps_sao_ctxt->i4_ctb_y - 1; 1590 1591 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 1592 { 1593 /*Calculate neighbor ctb slice indices*/ 1594 if(0 == ps_sao_ctxt->i4_ctb_x) 1595 { 1596 au4_idx_t[0] = -1; 1597 au4_idx_t[6] = -1; 1598 au4_idx_t[4] = -1; 1599 } 1600 else 1601 { 1602 au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)]; 1603 au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1604 } 1605 idx_t = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)]; 1606 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)]; 1607 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)]; 1608 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1609 1610 /*Verify that the neighbor ctbs don't cross pic boundary.*/ 1611 if(0 == ps_sao_ctxt->i4_ctb_x) 1612 { 1613 au4_ilf_across_tile_slice_enable[4] = 0; 1614 au4_ilf_across_tile_slice_enable[6] = 0; 1615 au4_ilf_across_tile_slice_enable[0] = 0; 1616 } 1617 else 1618 { 1619 au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag; 1620 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag; 1621 } 1622 1623 1624 1625 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag; 1626 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag; 1627 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag; 1628 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag; 1629 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag; 1630 1631 if(au4_idx_t[6] < idx_t) 1632 { 1633 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag; 1634 } 1635 1636 /* 1637 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 1638 * of the pixel having a greater address is checked. Accordingly, set the availability flags 1639 */ 1640 1641 for(i = 0; i < 8; i++) 1642 { 1643 /*Sets the edges that lie on the slice/tile boundary*/ 1644 if(au4_idx_t[i] != idx_t) 1645 { 1646 au1_tile_slice_boundary[i] = 1; 1647 /*Check for slice flag at such boundaries*/ 1648 } 1649 else 1650 { 1651 au4_ilf_across_tile_slice_enable[i] = 1; 1652 } 1653 } 1654 /*Reset indices*/ 1655 for(i = 0; i < 8; i++) 1656 { 1657 au4_idx_t[i] = 0; 1658 } 1659 } 1660 1661 if(ps_pps->i1_tiles_enabled_flag) 1662 { 1663 /* Calculate availability flags at slice boundary */ 1664 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 1665 { 1666 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 1667 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 1668 { 1669 /*Calculate neighbor ctb slice indices*/ 1670 if(0 == ps_sao_ctxt->i4_ctb_x) 1671 { 1672 au4_idx_t[0] = -1; 1673 au4_idx_t[6] = -1; 1674 au4_idx_t[4] = -1; 1675 } 1676 else 1677 { 1678 au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)]; 1679 au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1680 } 1681 idx_t = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)]; 1682 au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)]; 1683 au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)]; 1684 au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1685 1686 for(i = 0; i < 8; i++) 1687 { 1688 /*Sets the edges that lie on the tile boundary*/ 1689 if(au4_idx_t[i] != idx_t) 1690 { 1691 au1_tile_slice_boundary[i] |= 1; 1692 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; 1693 } 1694 } 1695 } 1696 } 1697 } 1698 1699 for(i = 0; i < 8; i++) 1700 { 1701 /*Sets the edges that lie on the slice/tile boundary*/ 1702 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 1703 { 1704 au1_avail_luma[i] = 0; 1705 } 1706 } 1707 } 1708 } 1709 1710 1711 if(0 == ps_sao_ctxt->i4_ctb_x) 1712 { 1713 au1_avail_luma[0] = 0; 1714 au1_avail_luma[4] = 0; 1715 au1_avail_luma[6] = 0; 1716 } 1717 1718 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma) 1719 { 1720 au1_avail_luma[1] = 0; 1721 au1_avail_luma[5] = 0; 1722 au1_avail_luma[7] = 0; 1723 } 1724 1725 if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma) 1726 { 1727 au1_avail_luma[2] = 0; 1728 au1_avail_luma[4] = 0; 1729 au1_avail_luma[5] = 0; 1730 } 1731 1732 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y) 1733 { 1734 au1_avail_luma[3] = 0; 1735 au1_avail_luma[6] = 0; 1736 au1_avail_luma[7] = 0; 1737 } 1738 1739 { 1740 au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0]; 1741 u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1]; 1742 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma, 1743 src_strd, 1744 pu1_src_left_luma, 1745 pu1_src_top_luma, 1746 pu1_sao_src_luma_top_left_ctb, 1747 au1_src_top_right, 1748 &u1_sao_src_top_left_luma_bot_left, 1749 au1_avail_luma, 1750 ai1_offset_y, 1751 sao_wd_luma, 1752 sao_ht_luma); 1753 } 1754 } 1755 } 1756 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 1757 { 1758 /* Update left, top and top-left */ 1759 for(row = 0; row < sao_ht_luma; row++) 1760 { 1761 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 1762 } 1763 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 1764 1765 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 1766 } 1767 } 1768 1769 if(0 != sao_wd_chroma) 1770 { 1771 if(ps_slice_hdr_top->i1_slice_sao_chroma_flag) 1772 { 1773 if(0 == ps_sao->b3_cb_type_idx) 1774 { 1775 1776 for(row = 0; row < sao_ht_chroma; row++) 1777 { 1778 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 1779 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 1780 } 1781 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 1782 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 1783 1784 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 1785 1786 } 1787 1788 else if(1 == ps_sao->b3_cb_type_idx) 1789 { 1790 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 1791 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 1792 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 1793 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 1794 1795 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 1796 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 1797 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 1798 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 1799 1800 if(chroma_yuv420sp_vu) 1801 { 1802 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 1803 src_strd, 1804 pu1_src_left_chroma, 1805 pu1_src_top_chroma, 1806 pu1_sao_src_chroma_top_left_ctb, 1807 ps_sao->b5_cr_band_pos, 1808 ps_sao->b5_cb_band_pos, 1809 ai1_offset_cr, 1810 ai1_offset_cb, 1811 sao_wd_chroma, 1812 sao_ht_chroma 1813 ); 1814 } 1815 else 1816 { 1817 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 1818 src_strd, 1819 pu1_src_left_chroma, 1820 pu1_src_top_chroma, 1821 pu1_sao_src_chroma_top_left_ctb, 1822 ps_sao->b5_cb_band_pos, 1823 ps_sao->b5_cr_band_pos, 1824 ai1_offset_cb, 1825 ai1_offset_cr, 1826 sao_wd_chroma, 1827 sao_ht_chroma 1828 ); 1829 } 1830 } 1831 else // if(2 <= ps_sao->b3_cb_type_idx) 1832 { 1833 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 1834 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 1835 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 1836 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 1837 1838 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 1839 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 1840 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 1841 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 1842 1843 for(i = 0; i < 8; i++) 1844 { 1845 au1_avail_chroma[i] = 255; 1846 au1_tile_slice_boundary[i] = 0; 1847 au4_idx_t[i] = 0; 1848 au4_ilf_across_tile_slice_enable[i] = 1; 1849 } 1850 1851 { 1852 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 1853 { 1854 ctbx_t_t = ps_sao_ctxt->i4_ctb_x; 1855 ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1; 1856 1857 ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1; 1858 ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1; 1859 1860 ctbx_t_r = ps_sao_ctxt->i4_ctb_x; 1861 ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1; 1862 1863 ctbx_t_d = ps_sao_ctxt->i4_ctb_x; 1864 ctby_t_d = ps_sao_ctxt->i4_ctb_y; 1865 1866 ctbx_t = ps_sao_ctxt->i4_ctb_x; 1867 ctby_t = ps_sao_ctxt->i4_ctb_y - 1; 1868 1869 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 1870 { 1871 if(0 == ps_sao_ctxt->i4_ctb_x) 1872 { 1873 au4_idx_t[0] = -1; 1874 au4_idx_t[6] = -1; 1875 au4_idx_t[4] = -1; 1876 } 1877 else 1878 { 1879 au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)]; 1880 au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1881 } 1882 idx_t = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)]; 1883 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)]; 1884 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)]; 1885 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1886 1887 /*Verify that the neighbor ctbs don't cross pic boundary.*/ 1888 1889 if(0 == ps_sao_ctxt->i4_ctb_x) 1890 { 1891 au4_ilf_across_tile_slice_enable[4] = 0; 1892 au4_ilf_across_tile_slice_enable[6] = 0; 1893 au4_ilf_across_tile_slice_enable[0] = 0; 1894 } 1895 else 1896 { 1897 au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag; 1898 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag; 1899 } 1900 1901 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_t[5])->i1_slice_loop_filter_across_slices_enabled_flag; 1902 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag; 1903 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag; 1904 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag; 1905 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag; 1906 1907 if(idx_t > au4_idx_t[6]) 1908 { 1909 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag; 1910 } 1911 1912 /* 1913 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 1914 * of the pixel having a greater address is checked. Accordingly, set the availability flags 1915 */ 1916 for(i = 0; i < 8; i++) 1917 { 1918 /*Sets the edges that lie on the slice/tile boundary*/ 1919 if(au4_idx_t[i] != idx_t) 1920 { 1921 au1_tile_slice_boundary[i] = 1; 1922 } 1923 else 1924 { 1925 /*Indicates that the neighbour belongs to same/dependent slice*/ 1926 au4_ilf_across_tile_slice_enable[i] = 1; 1927 } 1928 } 1929 /*Reset indices*/ 1930 for(i = 0; i < 8; i++) 1931 { 1932 au4_idx_t[i] = 0; 1933 } 1934 } 1935 if(ps_pps->i1_tiles_enabled_flag) 1936 { 1937 /* Calculate availability flags at slice boundary */ 1938 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 1939 { 1940 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 1941 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 1942 { 1943 /*Calculate neighbor ctb slice indices*/ 1944 if(0 == ps_sao_ctxt->i4_ctb_x) 1945 { 1946 au4_idx_t[0] = -1; 1947 au4_idx_t[6] = -1; 1948 au4_idx_t[4] = -1; 1949 } 1950 else 1951 { 1952 au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)]; 1953 au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1954 } 1955 idx_t = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)]; 1956 au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)]; 1957 au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)]; 1958 au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1959 1960 for(i = 0; i < 8; i++) 1961 { 1962 /*Sets the edges that lie on the tile boundary*/ 1963 if(au4_idx_t[i] != idx_t) 1964 { 1965 au1_tile_slice_boundary[i] |= 1; 1966 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; 1967 } 1968 } 1969 } 1970 } 1971 } 1972 for(i = 0; i < 8; i++) 1973 { 1974 /*Sets the edges that lie on the slice/tile boundary*/ 1975 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 1976 { 1977 au1_avail_chroma[i] = 0; 1978 } 1979 } 1980 1981 } 1982 } 1983 if(0 == ps_sao_ctxt->i4_ctb_x) 1984 { 1985 au1_avail_chroma[0] = 0; 1986 au1_avail_chroma[4] = 0; 1987 au1_avail_chroma[6] = 0; 1988 } 1989 1990 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma) 1991 { 1992 au1_avail_chroma[1] = 0; 1993 au1_avail_chroma[5] = 0; 1994 au1_avail_chroma[7] = 0; 1995 } 1996 1997 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma) 1998 { 1999 au1_avail_chroma[2] = 0; 2000 au1_avail_chroma[4] = 0; 2001 au1_avail_chroma[5] = 0; 2002 } 2003 2004 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y) 2005 { 2006 au1_avail_chroma[3] = 0; 2007 au1_avail_chroma[6] = 0; 2008 au1_avail_chroma[7] = 0; 2009 } 2010 2011 { 2012 au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0]; 2013 au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1]; 2014 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2]; 2015 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1]; 2016 2017 if(chroma_yuv420sp_vu) 2018 { 2019 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 2020 src_strd, 2021 pu1_src_left_chroma, 2022 pu1_src_top_chroma, 2023 pu1_sao_src_chroma_top_left_ctb, 2024 au1_src_top_right, 2025 au1_sao_src_top_left_chroma_bot_left, 2026 au1_avail_chroma, 2027 ai1_offset_cr, 2028 ai1_offset_cb, 2029 sao_wd_chroma, 2030 sao_ht_chroma); 2031 } 2032 else 2033 { 2034 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 2035 src_strd, 2036 pu1_src_left_chroma, 2037 pu1_src_top_chroma, 2038 pu1_sao_src_chroma_top_left_ctb, 2039 au1_src_top_right, 2040 au1_sao_src_top_left_chroma_bot_left, 2041 au1_avail_chroma, 2042 ai1_offset_cb, 2043 ai1_offset_cr, 2044 sao_wd_chroma, 2045 sao_ht_chroma); 2046 } 2047 } 2048 2049 } 2050 } 2051 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 2052 { 2053 for(row = 0; row < sao_ht_chroma; row++) 2054 { 2055 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 2056 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 2057 } 2058 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 2059 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 2060 2061 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 2062 } 2063 } 2064 2065 pu1_src_luma += sao_ht_luma * src_strd; 2066 pu1_src_chroma += sao_ht_chroma * src_strd; 2067 ps_sao += (ps_sps->i2_pic_wd_in_ctb); 2068 } 2069 2070 /* Left CTB */ 2071 if(ps_sao_ctxt->i4_ctb_x > 0) 2072 { 2073 WORD32 sao_wd_luma = SAO_SHIFT_CTB; 2074 WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB; 2075 WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB; 2076 WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB; 2077 2078 WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0; 2079 WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0; 2080 WORD32 au4_idx_l[8], idx_l; 2081 2082 WORD32 remaining_rows; 2083 slice_header_t *ps_slice_hdr_left; 2084 { 2085 WORD32 left_ctb_indx = (ps_sao_ctxt->i4_ctb_y) * ps_sps->i2_pic_wd_in_ctb + 2086 (ps_sao_ctxt->i4_ctb_x - 1); 2087 ps_slice_hdr_left = ps_slice_hdr_base + pu1_slice_idx[left_ctb_indx]; 2088 } 2089 2090 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma); 2091 if(remaining_rows <= SAO_SHIFT_CTB) 2092 { 2093 sao_ht_luma += remaining_rows; 2094 } 2095 remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma); 2096 if(remaining_rows <= SAO_SHIFT_CTB) 2097 { 2098 sao_ht_chroma += remaining_rows; 2099 } 2100 2101 pu1_src_luma -= sao_wd_luma; 2102 pu1_src_chroma -= sao_wd_chroma; 2103 ps_sao -= 1; 2104 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma; 2105 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma; 2106 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 2107 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 2108 2109 2110 if(0 != sao_ht_luma) 2111 { 2112 if(ps_slice_hdr_left->i1_slice_sao_luma_flag) 2113 { 2114 if(0 == ps_sao->b3_y_type_idx) 2115 { 2116 /* Update left, top and top-left */ 2117 for(row = 0; row < sao_ht_luma; row++) 2118 { 2119 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 2120 } 2121 /*Update in next location*/ 2122 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 2123 2124 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 2125 2126 } 2127 2128 else if(1 == ps_sao->b3_y_type_idx) 2129 { 2130 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 2131 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 2132 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 2133 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 2134 2135 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma, 2136 src_strd, 2137 pu1_src_left_luma, 2138 pu1_src_top_luma, 2139 pu1_sao_src_top_left_luma_curr_ctb, 2140 ps_sao->b5_y_band_pos, 2141 ai1_offset_y, 2142 sao_wd_luma, 2143 sao_ht_luma 2144 ); 2145 } 2146 2147 else // if(2 <= ps_sao->b3_y_type_idx) 2148 { 2149 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 2150 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 2151 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 2152 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 2153 2154 for(i = 0; i < 8; i++) 2155 { 2156 au1_avail_luma[i] = 255; 2157 au1_tile_slice_boundary[i] = 0; 2158 au4_idx_l[i] = 0; 2159 au4_ilf_across_tile_slice_enable[i] = 1; 2160 } 2161 /****************************************************************** 2162 * Derive the Top-left CTB's neighbour pixel's slice indices. 2163 * 2164 * 2165 * ____________ 2166 * | | | 2167 * | L_T| | 2168 * |____|_______|____ 2169 * | | | | 2170 * L_L | L | L_R | | 2171 * |____|_______| | 2172 * | | 2173 * L_D | | 2174 * |____________| 2175 * 2176 *****************************************************************/ 2177 2178 /*In case of slices or tiles*/ 2179 { 2180 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 2181 { 2182 ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1; 2183 ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1; 2184 2185 ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1; 2186 ctby_l_l = ps_sao_ctxt->i4_ctb_y; 2187 2188 ctbx_l_r = ps_sao_ctxt->i4_ctb_x; 2189 ctby_l_r = ps_sao_ctxt->i4_ctb_y; 2190 2191 ctbx_l_d = ps_sao_ctxt->i4_ctb_x - 1; 2192 ctby_l_d = ps_sao_ctxt->i4_ctb_y; 2193 2194 ctbx_l = ps_sao_ctxt->i4_ctb_x - 1; 2195 ctby_l = ps_sao_ctxt->i4_ctb_y; 2196 2197 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 2198 { 2199 if(0 == ps_sao_ctxt->i4_ctb_y) 2200 { 2201 au4_idx_l[2] = -1; 2202 au4_idx_l[4] = -1; 2203 au4_idx_l[5] = -1; 2204 } 2205 else 2206 { 2207 au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2208 au4_idx_l[5] = pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2209 } 2210 idx_l = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)]; 2211 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)]; 2212 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)]; 2213 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)]; 2214 2215 /*Verify that the neighbor ctbs don't cross pic boundary.*/ 2216 if(0 == ps_sao_ctxt->i4_ctb_y) 2217 { 2218 au4_ilf_across_tile_slice_enable[2] = 0; 2219 au4_ilf_across_tile_slice_enable[4] = 0; 2220 au4_ilf_across_tile_slice_enable[5] = 0; 2221 } 2222 else 2223 { 2224 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag; 2225 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2]; 2226 2227 } 2228 //TODO: ILF flag checks for [0] and [6] is missing. 2229 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag; 2230 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag; 2231 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag; 2232 2233 if(idx_l < au4_idx_l[5]) 2234 { 2235 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag; 2236 } 2237 2238 /* 2239 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 2240 * of the pixel having a greater address is checked. Accordingly, set the availability flags 2241 */ 2242 for(i = 0; i < 8; i++) 2243 { 2244 /*Sets the edges that lie on the slice/tile boundary*/ 2245 if(au4_idx_l[i] != idx_l) 2246 { 2247 au1_tile_slice_boundary[i] = 1; 2248 } 2249 else 2250 { 2251 au4_ilf_across_tile_slice_enable[i] = 1; 2252 } 2253 } 2254 /*Reset indices*/ 2255 for(i = 0; i < 8; i++) 2256 { 2257 au4_idx_l[i] = 0; 2258 } 2259 } 2260 2261 if(ps_pps->i1_tiles_enabled_flag) 2262 { 2263 /* Calculate availability flags at slice boundary */ 2264 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 2265 { 2266 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 2267 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 2268 { 2269 if(0 == ps_sao_ctxt->i4_ctb_y) 2270 { 2271 au4_idx_l[2] = -1; 2272 au4_idx_l[4] = -1; 2273 au4_idx_l[5] = -1; 2274 } 2275 else 2276 { 2277 au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2278 au4_idx_l[5] = pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2279 } 2280 2281 idx_l = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)]; 2282 au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)]; 2283 au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)]; 2284 au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)]; 2285 2286 for(i = 0; i < 8; i++) 2287 { 2288 /*Sets the edges that lie on the slice/tile boundary*/ 2289 if(au4_idx_l[i] != idx_l) 2290 { 2291 au1_tile_slice_boundary[i] |= 1; 2292 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; 2293 } 2294 } 2295 } 2296 } 2297 } 2298 2299 for(i = 0; i < 8; i++) 2300 { 2301 /*Sets the edges that lie on the slice/tile boundary*/ 2302 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 2303 { 2304 au1_avail_luma[i] = 0; 2305 } 2306 } 2307 } 2308 } 2309 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) 2310 { 2311 au1_avail_luma[0] = 0; 2312 au1_avail_luma[4] = 0; 2313 au1_avail_luma[6] = 0; 2314 } 2315 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x) 2316 { 2317 au1_avail_luma[1] = 0; 2318 au1_avail_luma[5] = 0; 2319 au1_avail_luma[7] = 0; 2320 } 2321 2322 if(0 == ps_sao_ctxt->i4_ctb_y) 2323 { 2324 au1_avail_luma[2] = 0; 2325 au1_avail_luma[4] = 0; 2326 au1_avail_luma[5] = 0; 2327 } 2328 2329 if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) <= sao_ht_luma) 2330 { 2331 au1_avail_luma[3] = 0; 2332 au1_avail_luma[6] = 0; 2333 au1_avail_luma[7] = 0; 2334 } 2335 2336 { 2337 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma]; 2338 u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0]; 2339 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma, 2340 src_strd, 2341 pu1_src_left_luma, 2342 pu1_src_top_luma, 2343 pu1_sao_src_top_left_luma_curr_ctb, 2344 au1_src_top_right, 2345 &u1_sao_src_top_left_luma_bot_left, 2346 au1_avail_luma, 2347 ai1_offset_y, 2348 sao_wd_luma, 2349 sao_ht_luma); 2350 } 2351 2352 } 2353 } 2354 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 2355 { 2356 /* Update left, top and top-left */ 2357 for(row = 0; row < sao_ht_luma; row++) 2358 { 2359 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 2360 } 2361 /*Update in next location*/ 2362 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 2363 2364 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 2365 } 2366 } 2367 2368 if(0 != sao_ht_chroma) 2369 { 2370 if(ps_slice_hdr_left->i1_slice_sao_chroma_flag) 2371 { 2372 if(0 == ps_sao->b3_cb_type_idx) 2373 { 2374 for(row = 0; row < sao_ht_chroma; row++) 2375 { 2376 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 2377 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 2378 } 2379 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 2380 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 2381 2382 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 2383 } 2384 2385 else if(1 == ps_sao->b3_cb_type_idx) 2386 { 2387 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 2388 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 2389 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 2390 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 2391 2392 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 2393 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 2394 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 2395 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 2396 2397 if(chroma_yuv420sp_vu) 2398 { 2399 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 2400 src_strd, 2401 pu1_src_left_chroma, 2402 pu1_src_top_chroma, 2403 pu1_sao_src_top_left_chroma_curr_ctb, 2404 ps_sao->b5_cr_band_pos, 2405 ps_sao->b5_cb_band_pos, 2406 ai1_offset_cr, 2407 ai1_offset_cb, 2408 sao_wd_chroma, 2409 sao_ht_chroma 2410 ); 2411 } 2412 else 2413 { 2414 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 2415 src_strd, 2416 pu1_src_left_chroma, 2417 pu1_src_top_chroma, 2418 pu1_sao_src_top_left_chroma_curr_ctb, 2419 ps_sao->b5_cb_band_pos, 2420 ps_sao->b5_cr_band_pos, 2421 ai1_offset_cb, 2422 ai1_offset_cr, 2423 sao_wd_chroma, 2424 sao_ht_chroma 2425 ); 2426 } 2427 } 2428 2429 else // if(2 <= ps_sao->b3_cb_type_idx) 2430 { 2431 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 2432 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 2433 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 2434 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 2435 2436 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 2437 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 2438 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 2439 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 2440 2441 for(i = 0; i < 8; i++) 2442 { 2443 au1_avail_chroma[i] = 255; 2444 au1_tile_slice_boundary[i] = 0; 2445 au4_idx_l[i] = 0; 2446 au4_ilf_across_tile_slice_enable[i] = 1; 2447 } 2448 /*In case of slices*/ 2449 { 2450 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 2451 { 2452 ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1; 2453 ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1; 2454 2455 ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1; 2456 ctby_l_l = ps_sao_ctxt->i4_ctb_y; 2457 2458 ctbx_l_r = ps_sao_ctxt->i4_ctb_x; 2459 ctby_l_r = ps_sao_ctxt->i4_ctb_y; 2460 2461 ctbx_l_d = ps_sao_ctxt->i4_ctb_x - 1; 2462 ctby_l_d = ps_sao_ctxt->i4_ctb_y; 2463 2464 ctbx_l = ps_sao_ctxt->i4_ctb_x - 1; 2465 ctby_l = ps_sao_ctxt->i4_ctb_y; 2466 2467 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 2468 { 2469 if(0 == ps_sao_ctxt->i4_ctb_y) 2470 { 2471 au4_idx_l[2] = -1; 2472 au4_idx_l[4] = -1; 2473 au4_idx_l[5] = -1; 2474 } 2475 else 2476 { 2477 au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2478 au4_idx_l[5] = pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2479 } 2480 idx_l = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)]; 2481 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)]; 2482 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)]; 2483 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)]; 2484 2485 /*Verify that the neighbour ctbs dont cross pic boundary.*/ 2486 if(0 == ps_sao_ctxt->i4_ctb_y) 2487 { 2488 au4_ilf_across_tile_slice_enable[2] = 0; 2489 au4_ilf_across_tile_slice_enable[4] = 0; 2490 au4_ilf_across_tile_slice_enable[5] = 0; 2491 } 2492 else 2493 { 2494 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag; 2495 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2]; 2496 } 2497 2498 if(au4_idx_l[5] > idx_l) 2499 { 2500 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag; 2501 } 2502 2503 // au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag; 2504 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag; 2505 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag; 2506 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag; 2507 /* 2508 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 2509 * of the pixel having a greater address is checked. Accordingly, set the availability flags 2510 */ 2511 for(i = 0; i < 8; i++) 2512 { 2513 /*Sets the edges that lie on the slice/tile boundary*/ 2514 if(au4_idx_l[i] != idx_l) 2515 { 2516 au1_tile_slice_boundary[i] = 1; 2517 } 2518 else 2519 { 2520 au4_ilf_across_tile_slice_enable[i] = 1; 2521 } 2522 } 2523 /*Reset indices*/ 2524 for(i = 0; i < 8; i++) 2525 { 2526 au4_idx_l[i] = 0; 2527 } 2528 } 2529 if(ps_pps->i1_tiles_enabled_flag) 2530 { 2531 /* Calculate availability flags at slice boundary */ 2532 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 2533 { 2534 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 2535 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 2536 { 2537 if(0 == ps_sao_ctxt->i4_ctb_y) 2538 { 2539 au4_idx_l[2] = -1; 2540 au4_idx_l[4] = -1; 2541 au4_idx_l[5] = -1; 2542 } 2543 else 2544 { 2545 au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2546 au4_idx_l[5] = pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2547 } 2548 2549 idx_l = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)]; 2550 au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)]; 2551 au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)]; 2552 au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)]; 2553 2554 for(i = 0; i < 8; i++) 2555 { 2556 /*Sets the edges that lie on the slice/tile boundary*/ 2557 if(au4_idx_l[i] != idx_l) 2558 { 2559 au1_tile_slice_boundary[i] |= 1; 2560 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0 2561 } 2562 } 2563 } 2564 } 2565 } 2566 for(i = 0; i < 8; i++) 2567 { 2568 /*Sets the edges that lie on the slice/tile boundary*/ 2569 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 2570 { 2571 au1_avail_chroma[i] = 0; 2572 } 2573 } 2574 } 2575 } 2576 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) 2577 { 2578 au1_avail_chroma[0] = 0; 2579 au1_avail_chroma[4] = 0; 2580 au1_avail_chroma[6] = 0; 2581 } 2582 2583 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x) 2584 { 2585 au1_avail_chroma[1] = 0; 2586 au1_avail_chroma[5] = 0; 2587 au1_avail_chroma[7] = 0; 2588 } 2589 2590 if(0 == ps_sao_ctxt->i4_ctb_y) 2591 { 2592 au1_avail_chroma[2] = 0; 2593 au1_avail_chroma[4] = 0; 2594 au1_avail_chroma[5] = 0; 2595 } 2596 2597 if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) <= sao_ht_chroma) 2598 { 2599 au1_avail_chroma[3] = 0; 2600 au1_avail_chroma[6] = 0; 2601 au1_avail_chroma[7] = 0; 2602 } 2603 2604 { 2605 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma]; 2606 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1]; 2607 au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0]; 2608 au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1]; 2609 //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2]; 2610 //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1]; 2611 if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1)) 2612 { 2613 au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd]; 2614 au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1]; 2615 } 2616 2617 2618 if(chroma_yuv420sp_vu) 2619 { 2620 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 2621 src_strd, 2622 pu1_src_left_chroma, 2623 pu1_src_top_chroma, 2624 pu1_sao_src_top_left_chroma_curr_ctb, 2625 au1_src_top_right, 2626 au1_src_bot_left, 2627 au1_avail_chroma, 2628 ai1_offset_cr, 2629 ai1_offset_cb, 2630 sao_wd_chroma, 2631 sao_ht_chroma); 2632 } 2633 else 2634 { 2635 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 2636 src_strd, 2637 pu1_src_left_chroma, 2638 pu1_src_top_chroma, 2639 pu1_sao_src_top_left_chroma_curr_ctb, 2640 au1_src_top_right, 2641 au1_src_bot_left, 2642 au1_avail_chroma, 2643 ai1_offset_cb, 2644 ai1_offset_cr, 2645 sao_wd_chroma, 2646 sao_ht_chroma); 2647 } 2648 } 2649 2650 } 2651 } 2652 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 2653 { 2654 for(row = 0; row < sao_ht_chroma; row++) 2655 { 2656 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 2657 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 2658 } 2659 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 2660 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 2661 2662 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 2663 } 2664 2665 } 2666 pu1_src_luma += sao_wd_luma; 2667 pu1_src_chroma += sao_wd_chroma; 2668 ps_sao += 1; 2669 } 2670 2671 2672 /* Current CTB */ 2673 { 2674 WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB; 2675 WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2; 2676 WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB; 2677 WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB; 2678 WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0; 2679 WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0; 2680 WORD32 au4_idx_c[8], idx_c; 2681 2682 WORD32 remaining_rows; 2683 WORD32 remaining_cols; 2684 2685 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma); 2686 if(remaining_cols <= SAO_SHIFT_CTB) 2687 { 2688 sao_wd_luma += remaining_cols; 2689 } 2690 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma); 2691 if(remaining_cols <= 2 * SAO_SHIFT_CTB) 2692 { 2693 sao_wd_chroma += remaining_cols; 2694 } 2695 2696 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma); 2697 if(remaining_rows <= SAO_SHIFT_CTB) 2698 { 2699 sao_ht_luma += remaining_rows; 2700 } 2701 remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma); 2702 if(remaining_rows <= SAO_SHIFT_CTB) 2703 { 2704 sao_ht_chroma += remaining_rows; 2705 } 2706 2707 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 2708 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 2709 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 2710 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 2711 2712 if((0 != sao_wd_luma) && (0 != sao_ht_luma)) 2713 { 2714 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag) 2715 { 2716 if(0 == ps_sao->b3_y_type_idx) 2717 { 2718 /* Update left, top and top-left */ 2719 for(row = 0; row < sao_ht_luma; row++) 2720 { 2721 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 2722 } 2723 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 2724 2725 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 2726 2727 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma]; 2728 2729 } 2730 2731 else if(1 == ps_sao->b3_y_type_idx) 2732 { 2733 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 2734 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 2735 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 2736 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 2737 2738 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma, 2739 src_strd, 2740 pu1_src_left_luma, 2741 pu1_src_top_luma, 2742 pu1_sao_src_top_left_luma_curr_ctb, 2743 ps_sao->b5_y_band_pos, 2744 ai1_offset_y, 2745 sao_wd_luma, 2746 sao_ht_luma 2747 ); 2748 } 2749 2750 else // if(2 <= ps_sao->b3_y_type_idx) 2751 { 2752 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 2753 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 2754 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 2755 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 2756 2757 for(i = 0; i < 8; i++) 2758 { 2759 au1_avail_luma[i] = 255; 2760 au1_tile_slice_boundary[i] = 0; 2761 au4_idx_c[i] = 0; 2762 au4_ilf_across_tile_slice_enable[i] = 1; 2763 } 2764 /****************************************************************** 2765 * Derive the Top-left CTB's neighbour pixel's slice indices. 2766 * 2767 * 2768 * ____________ 2769 * | | | 2770 * | | C_T | 2771 * |____|_______|____ 2772 * | | | | 2773 * | C_L| C | C_R| 2774 * |____|_______| | 2775 * | C_D | 2776 * | | 2777 * |____________| 2778 * 2779 *****************************************************************/ 2780 2781 /*In case of slices*/ 2782 { 2783 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 2784 { 2785 ctbx_c_t = ps_sao_ctxt->i4_ctb_x; 2786 ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1; 2787 2788 ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1; 2789 ctby_c_l = ps_sao_ctxt->i4_ctb_y; 2790 2791 ctbx_c_r = ps_sao_ctxt->i4_ctb_x; 2792 ctby_c_r = ps_sao_ctxt->i4_ctb_y; 2793 2794 ctbx_c_d = ps_sao_ctxt->i4_ctb_x; 2795 ctby_c_d = ps_sao_ctxt->i4_ctb_y; 2796 2797 ctbx_c = ps_sao_ctxt->i4_ctb_x; 2798 ctby_c = ps_sao_ctxt->i4_ctb_y; 2799 2800 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 2801 { 2802 if(0 == ps_sao_ctxt->i4_ctb_x) 2803 { 2804 au4_idx_c[6] = -1; 2805 au4_idx_c[0] = -1; 2806 au4_idx_c[4] = -1; 2807 } 2808 else 2809 { 2810 au4_idx_c[0] = au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)]; 2811 } 2812 2813 if(0 == ps_sao_ctxt->i4_ctb_y) 2814 { 2815 au4_idx_c[2] = -1; 2816 au4_idx_c[5] = -1; 2817 au4_idx_c[4] = -1; 2818 } 2819 else 2820 { 2821 au4_idx_c[4] = pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 2822 au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 2823 } 2824 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)]; 2825 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)]; 2826 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)]; 2827 2828 if(0 == ps_sao_ctxt->i4_ctb_x) 2829 { 2830 au4_ilf_across_tile_slice_enable[6] = 0; 2831 au4_ilf_across_tile_slice_enable[0] = 0; 2832 au4_ilf_across_tile_slice_enable[4] = 0; 2833 } 2834 else 2835 { 2836 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag; 2837 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;; 2838 } 2839 if(0 == ps_sao_ctxt->i4_ctb_y) 2840 { 2841 au4_ilf_across_tile_slice_enable[2] = 0; 2842 au4_ilf_across_tile_slice_enable[4] = 0; 2843 au4_ilf_across_tile_slice_enable[5] = 0; 2844 } 2845 else 2846 { 2847 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag; 2848 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2]; 2849 } 2850 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag; 2851 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag; 2852 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag; 2853 2854 if(au4_idx_c[6] < idx_c) 2855 { 2856 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag; 2857 } 2858 2859 /* 2860 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 2861 * of the pixel having a greater address is checked. Accordingly, set the availability flags 2862 */ 2863 for(i = 0; i < 8; i++) 2864 { 2865 /*Sets the edges that lie on the slice/tile boundary*/ 2866 if(au4_idx_c[i] != idx_c) 2867 { 2868 au1_tile_slice_boundary[i] = 1; 2869 } 2870 else 2871 { 2872 au4_ilf_across_tile_slice_enable[i] = 1; 2873 } 2874 } 2875 /*Reset indices*/ 2876 for(i = 0; i < 8; i++) 2877 { 2878 au4_idx_c[i] = 0; 2879 } 2880 } 2881 2882 if(ps_pps->i1_tiles_enabled_flag) 2883 { 2884 /* Calculate availability flags at slice boundary */ 2885 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 2886 { 2887 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 2888 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 2889 { 2890 if(0 == ps_sao_ctxt->i4_ctb_x) 2891 { 2892 au4_idx_c[6] = -1; 2893 au4_idx_c[0] = -1; 2894 au4_idx_c[4] = -1; 2895 } 2896 else 2897 { 2898 au4_idx_c[0] = au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)]; 2899 } 2900 2901 if(0 == ps_sao_ctxt->i4_ctb_y) 2902 { 2903 au4_idx_c[2] = -1; 2904 au4_idx_c[5] = -1; 2905 au4_idx_c[4] = -1; 2906 } 2907 else 2908 { 2909 au4_idx_c[4] = pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 2910 au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 2911 } 2912 idx_c = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)]; 2913 au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)]; 2914 au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)]; 2915 2916 for(i = 0; i < 8; i++) 2917 { 2918 /*Sets the edges that lie on the slice/tile boundary*/ 2919 if(au4_idx_c[i] != idx_c) 2920 { 2921 au1_tile_slice_boundary[i] |= 1; 2922 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0 2923 } 2924 } 2925 } 2926 } 2927 } 2928 2929 for(i = 0; i < 8; i++) 2930 { 2931 /*Sets the edges that lie on the slice/tile boundary*/ 2932 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 2933 { 2934 au1_avail_luma[i] = 0; 2935 } 2936 } 2937 2938 } 2939 } 2940 if(0 == ps_sao_ctxt->i4_ctb_x) 2941 { 2942 au1_avail_luma[0] = 0; 2943 au1_avail_luma[4] = 0; 2944 au1_avail_luma[6] = 0; 2945 } 2946 2947 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma) 2948 { 2949 au1_avail_luma[1] = 0; 2950 au1_avail_luma[5] = 0; 2951 au1_avail_luma[7] = 0; 2952 } 2953 2954 if(0 == ps_sao_ctxt->i4_ctb_y) 2955 { 2956 au1_avail_luma[2] = 0; 2957 au1_avail_luma[4] = 0; 2958 au1_avail_luma[5] = 0; 2959 } 2960 2961 if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) <= sao_ht_luma) 2962 { 2963 au1_avail_luma[3] = 0; 2964 au1_avail_luma[6] = 0; 2965 au1_avail_luma[7] = 0; 2966 } 2967 2968 { 2969 au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd]; 2970 u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1]; 2971 2972 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma, 2973 src_strd, 2974 pu1_src_left_luma, 2975 pu1_src_top_luma, 2976 pu1_sao_src_top_left_luma_curr_ctb, 2977 au1_src_top_right, 2978 &u1_sao_src_top_left_luma_bot_left, 2979 au1_avail_luma, 2980 ai1_offset_y, 2981 sao_wd_luma, 2982 sao_ht_luma); 2983 } 2984 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma]; 2985 pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1]; 2986 } 2987 } 2988 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 2989 { 2990 /* Update left, top and top-left */ 2991 for(row = 0; row < sao_ht_luma; row++) 2992 { 2993 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 2994 } 2995 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 2996 2997 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 2998 2999 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma]; 3000 } 3001 } 3002 3003 if((0 != sao_wd_chroma) && (0 != sao_ht_chroma)) 3004 { 3005 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) 3006 { 3007 if(0 == ps_sao->b3_cb_type_idx) 3008 { 3009 for(row = 0; row < sao_ht_chroma; row++) 3010 { 3011 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 3012 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 3013 } 3014 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 3015 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 3016 3017 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 3018 3019 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma]; 3020 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1]; 3021 } 3022 3023 else if(1 == ps_sao->b3_cb_type_idx) 3024 { 3025 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 3026 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 3027 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 3028 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 3029 3030 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 3031 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 3032 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 3033 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 3034 3035 if(chroma_yuv420sp_vu) 3036 { 3037 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 3038 src_strd, 3039 pu1_src_left_chroma, 3040 pu1_src_top_chroma, 3041 pu1_sao_src_top_left_chroma_curr_ctb, 3042 ps_sao->b5_cr_band_pos, 3043 ps_sao->b5_cb_band_pos, 3044 ai1_offset_cr, 3045 ai1_offset_cb, 3046 sao_wd_chroma, 3047 sao_ht_chroma 3048 ); 3049 } 3050 else 3051 { 3052 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 3053 src_strd, 3054 pu1_src_left_chroma, 3055 pu1_src_top_chroma, 3056 pu1_sao_src_top_left_chroma_curr_ctb, 3057 ps_sao->b5_cb_band_pos, 3058 ps_sao->b5_cr_band_pos, 3059 ai1_offset_cb, 3060 ai1_offset_cr, 3061 sao_wd_chroma, 3062 sao_ht_chroma 3063 ); 3064 } 3065 } 3066 3067 else // if(2 <= ps_sao->b3_cb_type_idx) 3068 { 3069 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 3070 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 3071 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 3072 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 3073 3074 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 3075 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 3076 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 3077 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 3078 3079 for(i = 0; i < 8; i++) 3080 { 3081 au1_avail_chroma[i] = 255; 3082 au1_tile_slice_boundary[i] = 0; 3083 au4_idx_c[i] = 0; 3084 au4_ilf_across_tile_slice_enable[i] = 1; 3085 } 3086 { 3087 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 3088 { 3089 ctbx_c_t = ps_sao_ctxt->i4_ctb_x; 3090 ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1; 3091 3092 ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1; 3093 ctby_c_l = ps_sao_ctxt->i4_ctb_y; 3094 3095 ctbx_c_r = ps_sao_ctxt->i4_ctb_x; 3096 ctby_c_r = ps_sao_ctxt->i4_ctb_y; 3097 3098 ctbx_c_d = ps_sao_ctxt->i4_ctb_x; 3099 ctby_c_d = ps_sao_ctxt->i4_ctb_y; 3100 3101 ctbx_c = ps_sao_ctxt->i4_ctb_x; 3102 ctby_c = ps_sao_ctxt->i4_ctb_y; 3103 3104 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 3105 { 3106 if(0 == ps_sao_ctxt->i4_ctb_x) 3107 { 3108 au4_idx_c[0] = -1; 3109 au4_idx_c[4] = -1; 3110 au4_idx_c[6] = -1; 3111 } 3112 else 3113 { 3114 au4_idx_c[0] = au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)]; 3115 } 3116 3117 if(0 == ps_sao_ctxt->i4_ctb_y) 3118 { 3119 au4_idx_c[2] = -1; 3120 au4_idx_c[4] = -1; 3121 au4_idx_c[5] = -1; 3122 } 3123 else 3124 { 3125 au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 3126 au4_idx_c[4] = pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 3127 } 3128 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)]; 3129 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)]; 3130 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)]; 3131 3132 if(0 == ps_sao_ctxt->i4_ctb_x) 3133 { 3134 au4_ilf_across_tile_slice_enable[0] = 0; 3135 au4_ilf_across_tile_slice_enable[4] = 0; 3136 au4_ilf_across_tile_slice_enable[6] = 0; 3137 } 3138 else 3139 { 3140 au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag; 3141 au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag; 3142 } 3143 3144 if(0 == ps_sao_ctxt->i4_ctb_y) 3145 { 3146 au4_ilf_across_tile_slice_enable[2] = 0; 3147 au4_ilf_across_tile_slice_enable[4] = 0; 3148 au4_ilf_across_tile_slice_enable[5] = 0; 3149 } 3150 else 3151 { 3152 au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag; 3153 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2]; 3154 } 3155 3156 au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag; 3157 au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag; 3158 au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag; 3159 3160 if(idx_c > au4_idx_c[6]) 3161 { 3162 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag; 3163 } 3164 3165 /* 3166 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 3167 * of the pixel having a greater address is checked. Accordingly, set the availability flags 3168 */ 3169 for(i = 0; i < 8; i++) 3170 { 3171 /*Sets the edges that lie on the slice/tile boundary*/ 3172 if(au4_idx_c[i] != idx_c) 3173 { 3174 au1_tile_slice_boundary[i] = 1; 3175 } 3176 else 3177 { 3178 au4_ilf_across_tile_slice_enable[i] = 1; 3179 } 3180 } 3181 /*Reset indices*/ 3182 for(i = 0; i < 8; i++) 3183 { 3184 au4_idx_c[i] = 0; 3185 } 3186 } 3187 3188 if(ps_pps->i1_tiles_enabled_flag) 3189 { 3190 /* Calculate availability flags at slice boundary */ 3191 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 3192 { 3193 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 3194 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 3195 { 3196 if(0 == ps_sao_ctxt->i4_ctb_x) 3197 { 3198 au4_idx_c[6] = -1; 3199 au4_idx_c[0] = -1; 3200 au4_idx_c[4] = -1; 3201 } 3202 else 3203 { 3204 au4_idx_c[0] = au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)]; 3205 } 3206 3207 if(0 == ps_sao_ctxt->i4_ctb_y) 3208 { 3209 au4_idx_c[2] = -1; 3210 au4_idx_c[5] = -1; 3211 au4_idx_c[4] = -1; 3212 } 3213 else 3214 { 3215 au4_idx_c[4] = pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 3216 au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 3217 } 3218 idx_c = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)]; 3219 au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)]; 3220 au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)]; 3221 3222 for(i = 0; i < 8; i++) 3223 { 3224 /*Sets the edges that lie on the slice/tile boundary*/ 3225 if(au4_idx_c[i] != idx_c) 3226 { 3227 au1_tile_slice_boundary[i] |= 1; 3228 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0 3229 } 3230 } 3231 } 3232 } 3233 } 3234 3235 for(i = 0; i < 8; i++) 3236 { 3237 /*Sets the edges that lie on the slice/tile boundary*/ 3238 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 3239 { 3240 au1_avail_chroma[i] = 0; 3241 } 3242 } 3243 } 3244 } 3245 3246 if(0 == ps_sao_ctxt->i4_ctb_x) 3247 { 3248 au1_avail_chroma[0] = 0; 3249 au1_avail_chroma[4] = 0; 3250 au1_avail_chroma[6] = 0; 3251 } 3252 3253 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma) 3254 { 3255 au1_avail_chroma[1] = 0; 3256 au1_avail_chroma[5] = 0; 3257 au1_avail_chroma[7] = 0; 3258 } 3259 3260 if(0 == ps_sao_ctxt->i4_ctb_y) 3261 { 3262 au1_avail_chroma[2] = 0; 3263 au1_avail_chroma[4] = 0; 3264 au1_avail_chroma[5] = 0; 3265 } 3266 3267 if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) <= sao_ht_chroma) 3268 { 3269 au1_avail_chroma[3] = 0; 3270 au1_avail_chroma[6] = 0; 3271 au1_avail_chroma[7] = 0; 3272 } 3273 3274 { 3275 au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd]; 3276 au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1]; 3277 3278 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2]; 3279 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1]; 3280 3281 if(chroma_yuv420sp_vu) 3282 { 3283 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 3284 src_strd, 3285 pu1_src_left_chroma, 3286 pu1_src_top_chroma, 3287 pu1_sao_src_top_left_chroma_curr_ctb, 3288 au1_src_top_right, 3289 au1_sao_src_top_left_chroma_bot_left, 3290 au1_avail_chroma, 3291 ai1_offset_cr, 3292 ai1_offset_cb, 3293 sao_wd_chroma, 3294 sao_ht_chroma); 3295 } 3296 else 3297 { 3298 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 3299 src_strd, 3300 pu1_src_left_chroma, 3301 pu1_src_top_chroma, 3302 pu1_sao_src_top_left_chroma_curr_ctb, 3303 au1_src_top_right, 3304 au1_sao_src_top_left_chroma_bot_left, 3305 au1_avail_chroma, 3306 ai1_offset_cb, 3307 ai1_offset_cr, 3308 sao_wd_chroma, 3309 sao_ht_chroma); 3310 } 3311 } 3312 3313 } 3314 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma]; 3315 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1]; 3316 3317 pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2]; 3318 pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1]; 3319 } 3320 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 3321 { 3322 for(row = 0; row < sao_ht_chroma; row++) 3323 { 3324 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 3325 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 3326 } 3327 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 3328 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 3329 3330 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 3331 3332 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma]; 3333 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1]; 3334 } 3335 3336 } 3337 } 3338 3339 3340 3341 3342 /* If no loop filter is enabled copy the backed up values */ 3343 { 3344 /* Luma */ 3345 if(no_loop_filter_enabled_luma) 3346 { 3347 UWORD32 u4_no_loop_filter_flag; 3348 WORD32 loop_filter_bit_pos; 3349 WORD32 log2_min_cu = 3; 3350 WORD32 min_cu = (1 << log2_min_cu); 3351 UWORD8 *pu1_src_tmp_luma = pu1_src_luma; 3352 WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB; 3353 WORD32 sao_blk_wd = ctb_size; 3354 WORD32 remaining_rows; 3355 WORD32 remaining_cols; 3356 3357 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB); 3358 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB); 3359 if(remaining_rows <= SAO_SHIFT_CTB) 3360 sao_blk_ht += remaining_rows; 3361 if(remaining_cols <= SAO_SHIFT_CTB) 3362 sao_blk_wd += remaining_cols; 3363 3364 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0; 3365 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0; 3366 3367 pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma; 3368 3369 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) + 3370 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3); 3371 if(ps_sao_ctxt->i4_ctb_x > 0) 3372 loop_filter_bit_pos -= 1; 3373 3374 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag + 3375 (loop_filter_bit_pos >> 3); 3376 3377 for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu; 3378 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++) 3379 { 3380 WORD32 tmp_wd = sao_blk_wd; 3381 3382 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> 3383 (loop_filter_bit_pos & 7); 3384 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1; 3385 3386 if(u4_no_loop_filter_flag) 3387 { 3388 while(tmp_wd > 0) 3389 { 3390 if(CTZ(u4_no_loop_filter_flag)) 3391 { 3392 pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3393 pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3394 tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu; 3395 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 3396 } 3397 else 3398 { 3399 for(row = 0; row < min_cu; row++) 3400 { 3401 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++) 3402 { 3403 pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col]; 3404 } 3405 } 3406 pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3407 pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3408 tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu; 3409 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 3410 } 3411 } 3412 3413 pu1_src_tmp_luma -= sao_blk_wd; 3414 pu1_src_backup_luma -= sao_blk_wd; 3415 } 3416 3417 pu1_src_tmp_luma += (src_strd << log2_min_cu); 3418 pu1_src_backup_luma += (backup_strd << log2_min_cu); 3419 } 3420 } 3421 3422 /* Chroma */ 3423 if(no_loop_filter_enabled_chroma) 3424 { 3425 UWORD32 u4_no_loop_filter_flag; 3426 WORD32 loop_filter_bit_pos; 3427 WORD32 log2_min_cu = 3; 3428 WORD32 min_cu = (1 << log2_min_cu); 3429 UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma; 3430 WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB; 3431 WORD32 sao_blk_wd = ctb_size; 3432 WORD32 remaining_rows; 3433 WORD32 remaining_cols; 3434 3435 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB); 3436 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB); 3437 if(remaining_rows <= 2 * SAO_SHIFT_CTB) 3438 sao_blk_ht += remaining_rows; 3439 if(remaining_cols <= 2 * SAO_SHIFT_CTB) 3440 sao_blk_wd += remaining_cols; 3441 3442 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0; 3443 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0; 3444 3445 pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma; 3446 3447 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) + 3448 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3); 3449 if(ps_sao_ctxt->i4_ctb_x > 0) 3450 loop_filter_bit_pos -= 2; 3451 3452 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag + 3453 (loop_filter_bit_pos >> 3); 3454 3455 for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu; 3456 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++) 3457 { 3458 WORD32 tmp_wd = sao_blk_wd; 3459 3460 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> 3461 (loop_filter_bit_pos & 7); 3462 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1; 3463 3464 if(u4_no_loop_filter_flag) 3465 { 3466 while(tmp_wd > 0) 3467 { 3468 if(CTZ(u4_no_loop_filter_flag)) 3469 { 3470 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3471 pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3472 tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu; 3473 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 3474 } 3475 else 3476 { 3477 for(row = 0; row < min_cu / 2; row++) 3478 { 3479 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++) 3480 { 3481 pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col]; 3482 } 3483 } 3484 3485 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3486 pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3487 tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu; 3488 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 3489 } 3490 } 3491 3492 pu1_src_tmp_chroma -= sao_blk_wd; 3493 pu1_src_backup_chroma -= sao_blk_wd; 3494 } 3495 3496 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu); 3497 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu); 3498 } 3499 } 3500 } 3501 3502 } 3503 3504