1 /****************************************************************************** 2 * 3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 /** 19 ******************************************************************************* 20 * @file 21 * ihevc_sao.c 22 * 23 * @brief 24 * Contains function definitions for sample adaptive offset process 25 * 26 * @author 27 * Srinivas T 28 * 29 * @par List of Functions: 30 * 31 * @remarks 32 * None 33 * 34 ******************************************************************************* 35 */ 36 37 #include <stdio.h> 38 #include <stddef.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <assert.h> 42 43 #include "ihevc_typedefs.h" 44 #include "iv.h" 45 #include "ivd.h" 46 #include "ihevcd_cxa.h" 47 #include "ithread.h" 48 49 #include "ihevc_defs.h" 50 #include "ihevc_debug.h" 51 #include "ihevc_defs.h" 52 #include "ihevc_structs.h" 53 #include "ihevc_macros.h" 54 #include "ihevc_platform_macros.h" 55 #include "ihevc_cabac_tables.h" 56 #include "ihevc_sao.h" 57 #include "ihevc_mem_fns.h" 58 59 #include "ihevc_error.h" 60 #include "ihevc_common_tables.h" 61 62 #include "ihevcd_trace.h" 63 #include "ihevcd_defs.h" 64 #include "ihevcd_function_selector.h" 65 #include "ihevcd_structs.h" 66 #include "ihevcd_error.h" 67 #include "ihevcd_nal.h" 68 #include "ihevcd_bitstream.h" 69 #include "ihevcd_job_queue.h" 70 #include "ihevcd_utils.h" 71 72 #include "ihevc_deblk.h" 73 #include "ihevc_deblk_tables.h" 74 #include "ihevcd_profile.h" 75 #include "ihevcd_sao.h" 76 #include "ihevcd_debug.h" 77 78 #define SAO_SHIFT_CTB 8 79 80 /** 81 * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions) 82 */ 83 void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt) 84 { 85 codec_t *ps_codec = ps_sao_ctxt->ps_codec; 86 UWORD8 *pu1_src_luma; 87 UWORD8 *pu1_src_chroma; 88 WORD32 src_strd; 89 WORD32 ctb_size; 90 WORD32 log2_ctb_size; 91 sps_t *ps_sps; 92 sao_t *ps_sao; 93 WORD32 row, col; 94 UWORD8 au1_avail_luma[8]; 95 UWORD8 au1_avail_chroma[8]; 96 WORD32 i; 97 UWORD8 *pu1_src_top_luma; 98 UWORD8 *pu1_src_top_chroma; 99 UWORD8 *pu1_src_left_luma; 100 UWORD8 *pu1_src_left_chroma; 101 UWORD8 au1_src_top_right[2]; 102 UWORD8 au1_src_bot_left[2]; 103 UWORD8 *pu1_no_loop_filter_flag; 104 WORD32 loop_filter_strd; 105 106 WORD8 ai1_offset_y[5]; 107 WORD8 ai1_offset_cb[5]; 108 WORD8 ai1_offset_cr[5]; 109 110 PROFILE_DISABLE_SAO(); 111 112 ai1_offset_y[0] = 0; 113 ai1_offset_cb[0] = 0; 114 ai1_offset_cr[0] = 0; 115 116 ps_sps = ps_sao_ctxt->ps_sps; 117 log2_ctb_size = ps_sps->i1_log2_ctb_size; 118 ctb_size = (1 << log2_ctb_size); 119 src_strd = ps_sao_ctxt->ps_codec->i4_strd; 120 pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size)); 121 pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size)); 122 123 ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb; 124 loop_filter_strd = (ps_sps->i2_pic_width_in_luma_samples + 63) / 64; 125 126 /* Current CTB */ 127 { 128 WORD32 sao_wd_luma; 129 WORD32 sao_wd_chroma; 130 WORD32 sao_ht_luma; 131 WORD32 sao_ht_chroma; 132 133 WORD32 remaining_rows; 134 WORD32 remaining_cols; 135 136 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 137 sao_wd_luma = MIN(ctb_size, remaining_cols); 138 sao_wd_chroma = MIN(ctb_size, remaining_cols); 139 140 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 141 sao_ht_luma = MIN(ctb_size, remaining_rows); 142 sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2; 143 144 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 145 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 146 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 147 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 148 149 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag + 150 ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd + 151 ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64); 152 153 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 154 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 155 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 156 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 157 158 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 159 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 160 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 161 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 162 163 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 164 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 165 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 166 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 167 168 for(i = 0; i < 8; i++) 169 { 170 au1_avail_luma[i] = 255; 171 au1_avail_chroma[i] = 255; 172 } 173 174 175 if(0 == ps_sao_ctxt->i4_ctb_x) 176 { 177 au1_avail_luma[0] = 0; 178 au1_avail_luma[4] = 0; 179 au1_avail_luma[6] = 0; 180 181 au1_avail_chroma[0] = 0; 182 au1_avail_chroma[4] = 0; 183 au1_avail_chroma[6] = 0; 184 } 185 186 if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x) 187 { 188 au1_avail_luma[1] = 0; 189 au1_avail_luma[5] = 0; 190 au1_avail_luma[7] = 0; 191 192 au1_avail_chroma[1] = 0; 193 au1_avail_chroma[5] = 0; 194 au1_avail_chroma[7] = 0; 195 } 196 197 if(0 == ps_sao_ctxt->i4_ctb_y) 198 { 199 au1_avail_luma[2] = 0; 200 au1_avail_luma[4] = 0; 201 au1_avail_luma[5] = 0; 202 203 au1_avail_chroma[2] = 0; 204 au1_avail_chroma[4] = 0; 205 au1_avail_chroma[5] = 0; 206 } 207 208 if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y) 209 { 210 au1_avail_luma[3] = 0; 211 au1_avail_luma[6] = 0; 212 au1_avail_luma[7] = 0; 213 214 au1_avail_chroma[3] = 0; 215 au1_avail_chroma[6] = 0; 216 au1_avail_chroma[7] = 0; 217 } 218 219 220 if(0 == ps_sao->b3_y_type_idx) 221 { 222 /* Update left, top and top-left */ 223 for(row = 0; row < sao_ht_luma; row++) 224 { 225 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 226 } 227 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 228 229 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 230 231 } 232 else 233 { 234 UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)]; 235 UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1; 236 WORD32 tmp_strd = MAX_CTB_SIZE + 2; 237 WORD32 no_loop_filter_enabled = 0; 238 239 /* Check the loop filter flags and copy the original values for back up */ 240 { 241 UWORD32 u4_no_loop_filter_flag; 242 WORD32 min_cu = 8; 243 UWORD8 *pu1_src_tmp = pu1_src_luma; 244 245 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++) 246 { 247 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> 248 ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8); 249 u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1; 250 251 if(u4_no_loop_filter_flag) 252 { 253 WORD32 tmp_wd = sao_wd_luma; 254 no_loop_filter_enabled = 1; 255 while(tmp_wd > 0) 256 { 257 if(CTZ(u4_no_loop_filter_flag)) 258 { 259 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 260 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 261 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 262 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu; 263 } 264 else 265 { 266 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++) 267 { 268 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++) 269 { 270 pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col]; 271 } 272 } 273 274 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 275 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 276 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 277 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu; 278 } 279 } 280 281 pu1_src_tmp -= sao_wd_luma; 282 } 283 284 pu1_src_tmp += min_cu * src_strd; 285 pu1_src_copy += min_cu * tmp_strd; 286 } 287 } 288 289 if(1 == ps_sao->b3_y_type_idx) 290 { 291 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma, 292 src_strd, 293 pu1_src_left_luma, 294 pu1_src_top_luma, 295 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb, 296 ps_sao->b5_y_band_pos, 297 ai1_offset_y, 298 sao_wd_luma, 299 sao_ht_luma); 300 } 301 else // if(2 <= ps_sao->b3_y_type_idx) 302 { 303 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma]; 304 au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1]; 305 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma, 306 src_strd, 307 pu1_src_left_luma, 308 pu1_src_top_luma, 309 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb, 310 au1_src_top_right, 311 au1_src_bot_left, 312 au1_avail_luma, 313 ai1_offset_y, 314 sao_wd_luma, 315 sao_ht_luma); 316 } 317 318 /* Check the loop filter flags and copy the original values back if they are set */ 319 if(no_loop_filter_enabled) 320 { 321 UWORD32 u4_no_loop_filter_flag; 322 WORD32 min_cu = 8; 323 UWORD8 *pu1_src_tmp = pu1_src_luma; 324 325 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++) 326 { 327 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8); 328 u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1; 329 330 if(u4_no_loop_filter_flag) 331 { 332 WORD32 tmp_wd = sao_wd_luma; 333 while(tmp_wd > 0) 334 { 335 if(CTZ(u4_no_loop_filter_flag)) 336 { 337 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 338 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 339 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 340 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu; 341 } 342 else 343 { 344 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++) 345 { 346 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++) 347 { 348 pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col]; 349 } 350 } 351 352 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 353 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 354 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 355 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu; 356 } 357 } 358 359 pu1_src_tmp -= sao_wd_luma; 360 } 361 362 pu1_src_tmp += min_cu * src_strd; 363 pu1_src_copy += min_cu * tmp_strd; 364 } 365 } 366 367 } 368 369 if(0 == ps_sao->b3_cb_type_idx) 370 { 371 for(row = 0; row < sao_ht_chroma; row++) 372 { 373 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 374 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 375 } 376 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 377 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 378 379 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 380 } 381 else 382 { 383 UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)]; 384 UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2; 385 WORD32 tmp_strd = MAX_CTB_SIZE + 4; 386 WORD32 no_loop_filter_enabled = 0; 387 388 /* Check the loop filter flags and copy the original values for back up */ 389 { 390 UWORD32 u4_no_loop_filter_flag; 391 WORD32 min_cu = 4; 392 UWORD8 *pu1_src_tmp = pu1_src_chroma; 393 394 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++) 395 { 396 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8); 397 u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1; 398 399 if(u4_no_loop_filter_flag) 400 { 401 WORD32 tmp_wd = sao_wd_chroma; 402 no_loop_filter_enabled = 1; 403 while(tmp_wd > 0) 404 { 405 if(CTZ(u4_no_loop_filter_flag)) 406 { 407 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 408 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 409 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 410 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu; 411 } 412 else 413 { 414 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++) 415 { 416 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++) 417 { 418 pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col]; 419 } 420 } 421 422 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 423 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 424 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 425 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu; 426 } 427 } 428 429 pu1_src_tmp -= sao_wd_chroma; 430 } 431 432 pu1_src_tmp += min_cu * src_strd; 433 pu1_src_copy += min_cu * tmp_strd; 434 } 435 } 436 437 if(1 == ps_sao->b3_cb_type_idx) 438 { 439 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 440 src_strd, 441 pu1_src_left_chroma, 442 pu1_src_top_chroma, 443 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb, 444 ps_sao->b5_cb_band_pos, 445 ps_sao->b5_cr_band_pos, 446 ai1_offset_cb, 447 ai1_offset_cr, 448 sao_wd_chroma, 449 sao_ht_chroma 450 ); 451 } 452 else // if(2 <= ps_sao->b3_cb_type_idx) 453 { 454 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma]; 455 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1]; 456 au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2]; 457 au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1]; 458 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 459 src_strd, 460 pu1_src_left_chroma, 461 pu1_src_top_chroma, 462 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb, 463 au1_src_top_right, 464 au1_src_bot_left, 465 au1_avail_chroma, 466 ai1_offset_cb, 467 ai1_offset_cr, 468 sao_wd_chroma, 469 sao_ht_chroma); 470 } 471 472 /* Check the loop filter flags and copy the original values back if they are set */ 473 if(no_loop_filter_enabled) 474 { 475 UWORD32 u4_no_loop_filter_flag; 476 WORD32 min_cu = 4; 477 UWORD8 *pu1_src_tmp = pu1_src_chroma; 478 479 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++) 480 { 481 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8); 482 u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1; 483 484 if(u4_no_loop_filter_flag) 485 { 486 WORD32 tmp_wd = sao_wd_chroma; 487 while(tmp_wd > 0) 488 { 489 if(CTZ(u4_no_loop_filter_flag)) 490 { 491 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 492 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 493 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 494 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu; 495 } 496 else 497 { 498 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++) 499 { 500 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++) 501 { 502 pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col]; 503 } 504 } 505 506 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 507 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 508 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 509 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu; 510 } 511 } 512 513 pu1_src_tmp -= sao_wd_chroma; 514 } 515 516 pu1_src_tmp += min_cu * src_strd; 517 pu1_src_copy += min_cu * tmp_strd; 518 } 519 } 520 521 } 522 523 } 524 } 525 526 void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt) 527 { 528 codec_t *ps_codec = ps_sao_ctxt->ps_codec; 529 UWORD8 *pu1_src_luma; 530 UWORD8 *pu1_src_chroma; 531 WORD32 src_strd; 532 WORD32 ctb_size; 533 WORD32 log2_ctb_size; 534 sps_t *ps_sps; 535 sao_t *ps_sao; 536 pps_t *ps_pps; 537 slice_header_t *ps_slice_hdr, *ps_slice_hdr_base; 538 tile_t *ps_tile; 539 UWORD16 *pu1_slice_idx; 540 UWORD16 *pu1_tile_idx; 541 WORD32 row, col; 542 UWORD8 au1_avail_luma[8]; 543 UWORD8 au1_avail_chroma[8]; 544 UWORD8 au1_tile_slice_boundary[8]; 545 UWORD8 au4_ilf_across_tile_slice_enable[8]; 546 WORD32 i; 547 UWORD8 *pu1_src_top_luma; 548 UWORD8 *pu1_src_top_chroma; 549 UWORD8 *pu1_src_left_luma; 550 UWORD8 *pu1_src_left_chroma; 551 UWORD8 au1_src_top_right[2]; 552 UWORD8 au1_src_bot_left[2]; 553 UWORD8 *pu1_no_loop_filter_flag; 554 UWORD8 *pu1_src_backup_luma; 555 UWORD8 *pu1_src_backup_chroma; 556 WORD32 backup_strd; 557 WORD32 loop_filter_strd; 558 559 WORD32 no_loop_filter_enabled_luma = 0; 560 WORD32 no_loop_filter_enabled_chroma = 0; 561 UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb; 562 UWORD8 *pu1_sao_src_top_left_luma_curr_ctb; 563 UWORD8 *pu1_sao_src_luma_top_left_ctb; 564 UWORD8 *pu1_sao_src_chroma_top_left_ctb; 565 UWORD8 *pu1_sao_src_top_left_luma_top_right; 566 UWORD8 *pu1_sao_src_top_left_chroma_top_right; 567 UWORD8 u1_sao_src_top_left_luma_bot_left; 568 UWORD8 *pu1_sao_src_top_left_luma_bot_left; 569 UWORD8 *au1_sao_src_top_left_chroma_bot_left; 570 UWORD8 *pu1_sao_src_top_left_chroma_bot_left; 571 /* Only 5 values are used, but arrays are large 572 enough so that SIMD functions can read 64 bits at a time */ 573 WORD8 ai1_offset_y[8]; 574 WORD8 ai1_offset_cb[8]; 575 WORD8 ai1_offset_cr[8]; 576 WORD32 chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu; 577 578 PROFILE_DISABLE_SAO(); 579 580 ai1_offset_y[0] = 0; 581 ai1_offset_cb[0] = 0; 582 ai1_offset_cr[0] = 0; 583 584 ps_sps = ps_sao_ctxt->ps_sps; 585 ps_pps = ps_sao_ctxt->ps_pps; 586 ps_tile = ps_sao_ctxt->ps_tile; 587 588 log2_ctb_size = ps_sps->i1_log2_ctb_size; 589 ctb_size = (1 << log2_ctb_size); 590 src_strd = ps_sao_ctxt->ps_codec->i4_strd; 591 ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base; 592 ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1)); 593 594 pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx; 595 pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx; 596 pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size)); 597 pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size)); 598 599 /*Stores the left value for each row ctbs- Needed for column tiles*/ 600 pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y)); 601 pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y)); 602 pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y)); 603 pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y); 604 u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y)); 605 pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y)); 606 au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y); 607 pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y); 608 pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x)); 609 pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x); 610 611 ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb; 612 loop_filter_strd = (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6; 613 backup_strd = 2 * MAX_CTB_SIZE; 614 615 DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma); 616 617 { 618 /* Check the loop filter flags and copy the original values for back up */ 619 /* Luma */ 620 621 /* Done unconditionally since SAO is done on a shifted CTB and the constituent CTBs 622 * can belong to different slice with their own sao_enable flag */ 623 { 624 UWORD32 u4_no_loop_filter_flag; 625 WORD32 loop_filter_bit_pos; 626 WORD32 log2_min_cu = 3; 627 WORD32 min_cu = (1 << log2_min_cu); 628 UWORD8 *pu1_src_tmp_luma = pu1_src_luma; 629 WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB; 630 WORD32 sao_blk_wd = ctb_size; 631 WORD32 remaining_rows; 632 WORD32 remaining_cols; 633 634 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB); 635 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB); 636 if(remaining_rows <= SAO_SHIFT_CTB) 637 sao_blk_ht += remaining_rows; 638 if(remaining_cols <= SAO_SHIFT_CTB) 639 sao_blk_wd += remaining_cols; 640 641 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0; 642 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0; 643 644 pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma; 645 646 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) + 647 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3); 648 if(ps_sao_ctxt->i4_ctb_x > 0) 649 loop_filter_bit_pos -= 1; 650 651 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag + 652 (loop_filter_bit_pos >> 3); 653 654 for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu; 655 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++) 656 { 657 WORD32 tmp_wd = sao_blk_wd; 658 659 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> 660 (loop_filter_bit_pos & 7); 661 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1; 662 663 if(u4_no_loop_filter_flag) 664 { 665 no_loop_filter_enabled_luma = 1; 666 while(tmp_wd > 0) 667 { 668 if(CTZ(u4_no_loop_filter_flag)) 669 { 670 pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 671 pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 672 tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu; 673 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 674 } 675 else 676 { 677 for(row = 0; row < min_cu; row++) 678 { 679 for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++) 680 { 681 pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col]; 682 } 683 } 684 pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 685 pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 686 tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu; 687 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 688 } 689 } 690 691 pu1_src_tmp_luma -= sao_blk_wd; 692 pu1_src_backup_luma -= sao_blk_wd; 693 } 694 695 pu1_src_tmp_luma += (src_strd << log2_min_cu); 696 pu1_src_backup_luma += (backup_strd << log2_min_cu); 697 } 698 } 699 700 /* Chroma */ 701 702 { 703 UWORD32 u4_no_loop_filter_flag; 704 WORD32 loop_filter_bit_pos; 705 WORD32 log2_min_cu = 3; 706 WORD32 min_cu = (1 << log2_min_cu); 707 UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma; 708 WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB; 709 WORD32 sao_blk_wd = ctb_size; 710 WORD32 remaining_rows; 711 WORD32 remaining_cols; 712 713 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB); 714 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB); 715 if(remaining_rows <= 2 * SAO_SHIFT_CTB) 716 sao_blk_ht += remaining_rows; 717 if(remaining_cols <= 2 * SAO_SHIFT_CTB) 718 sao_blk_wd += remaining_cols; 719 720 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0; 721 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0; 722 723 pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma; 724 725 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) + 726 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3); 727 if(ps_sao_ctxt->i4_ctb_x > 0) 728 loop_filter_bit_pos -= 2; 729 730 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag + 731 (loop_filter_bit_pos >> 3); 732 733 for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu; 734 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++) 735 { 736 WORD32 tmp_wd = sao_blk_wd; 737 738 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> 739 (loop_filter_bit_pos & 7); 740 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1; 741 742 if(u4_no_loop_filter_flag) 743 { 744 no_loop_filter_enabled_chroma = 1; 745 while(tmp_wd > 0) 746 { 747 if(CTZ(u4_no_loop_filter_flag)) 748 { 749 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 750 pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 751 tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu; 752 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 753 } 754 else 755 { 756 for(row = 0; row < min_cu / 2; row++) 757 { 758 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++) 759 { 760 pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col]; 761 } 762 } 763 764 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 765 pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 766 tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu; 767 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 768 } 769 } 770 771 pu1_src_tmp_chroma -= sao_blk_wd; 772 pu1_src_backup_chroma -= sao_blk_wd; 773 } 774 775 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu); 776 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu); 777 } 778 } 779 } 780 781 DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma); 782 783 /* Top-left CTB */ 784 if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0) 785 { 786 WORD32 sao_wd_luma = SAO_SHIFT_CTB; 787 WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB; 788 WORD32 sao_ht_luma = SAO_SHIFT_CTB; 789 WORD32 sao_ht_chroma = SAO_SHIFT_CTB; 790 791 WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0; 792 WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0; 793 WORD32 au4_idx_tl[8], idx_tl; 794 795 slice_header_t *ps_slice_hdr_top_left; 796 { 797 WORD32 top_left_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb + 798 (ps_sao_ctxt->i4_ctb_x - 1); 799 ps_slice_hdr_top_left = ps_slice_hdr_base + pu1_slice_idx[top_left_ctb_indx]; 800 } 801 802 803 pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd); 804 pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd); 805 ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb); 806 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma; 807 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma; 808 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma; 809 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma); 810 811 if(ps_slice_hdr_top_left->i1_slice_sao_luma_flag) 812 { 813 if(0 == ps_sao->b3_y_type_idx) 814 { 815 /* Update left, top and top-left */ 816 for(row = 0; row < sao_ht_luma; row++) 817 { 818 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 819 } 820 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 821 822 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 823 824 825 } 826 827 else if(1 == ps_sao->b3_y_type_idx) 828 { 829 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 830 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 831 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 832 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 833 834 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma, 835 src_strd, 836 pu1_src_left_luma, 837 pu1_src_top_luma, 838 pu1_sao_src_luma_top_left_ctb, 839 ps_sao->b5_y_band_pos, 840 ai1_offset_y, 841 sao_wd_luma, 842 sao_ht_luma 843 ); 844 } 845 846 else // if(2 <= ps_sao->b3_y_type_idx) 847 { 848 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 849 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 850 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 851 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 852 853 for(i = 0; i < 8; i++) 854 { 855 au1_avail_luma[i] = 255; 856 au1_tile_slice_boundary[i] = 0; 857 au4_idx_tl[i] = 0; 858 au4_ilf_across_tile_slice_enable[i] = 1; 859 } 860 861 /****************************************************************** 862 * Derive the Top-left CTB's neighbor pixel's slice indices. 863 * 864 * TL_T 865 * 4 _2__5________ 866 * 0 | | | 867 * TL_L | TL | 1 TL_R| 868 * |____|_______|____ 869 * 6|TL_D|7 | | 870 * | 3 | | | 871 * |____|_______| | 872 * | | 873 * | | 874 * |____________| 875 * 876 *****************************************************************/ 877 878 /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/ 879 { 880 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 881 { 882 { 883 /*Assuming that sao shift is uniform along x and y directions*/ 884 if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1)) 885 { 886 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2; 887 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2; 888 } 889 else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma)) 890 { 891 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1; 892 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1; 893 } 894 ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1; 895 ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1; 896 897 ctbx_tl_r = ps_sao_ctxt->i4_ctb_x; 898 ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1; 899 900 ctbx_tl_d = ps_sao_ctxt->i4_ctb_x - 1; 901 ctby_tl_d = ps_sao_ctxt->i4_ctb_y; 902 903 ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1; 904 ctby_tl = ps_sao_ctxt->i4_ctb_y - 1; 905 } 906 907 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 908 { 909 /*Calculate slice indices for neighbor pixels*/ 910 idx_tl = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)]; 911 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)); 912 au4_idx_tl[0] = pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 913 au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 914 au4_idx_tl[3] = au4_idx_tl[6] = pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 915 au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 916 917 if((0 == (1 << log2_ctb_size) - sao_wd_luma)) 918 { 919 if(ps_sao_ctxt->i4_ctb_x == 1) 920 { 921 au4_idx_tl[6] = -1; 922 au4_idx_tl[4] = -1; 923 } 924 else 925 { 926 au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 927 } 928 if(ps_sao_ctxt->i4_ctb_y == 1) 929 { 930 au4_idx_tl[5] = -1; 931 au4_idx_tl[4] = -1; 932 } 933 else 934 { 935 au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 936 au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)]; 937 } 938 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 939 } 940 941 /* Verify that the neighbor ctbs dont cross pic boundary. 942 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 943 * of the pixel having a greater address is checked. Accordingly, set the availability flags. 944 * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels, 945 * the respective pixel's flags are checked 946 */ 947 948 if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)) 949 { 950 au4_ilf_across_tile_slice_enable[4] = 0; 951 au4_ilf_across_tile_slice_enable[6] = 0; 952 } 953 else 954 { 955 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag; 956 } 957 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)) 958 { 959 au4_ilf_across_tile_slice_enable[5] = 0; 960 au4_ilf_across_tile_slice_enable[4] = 0; 961 } 962 else 963 { 964 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 965 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 966 } 967 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 968 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 969 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag; 970 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag; 971 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag; 972 973 if(au4_idx_tl[5] > idx_tl) 974 { 975 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag; 976 } 977 978 /* 979 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 980 * of the pixel having a greater address is checked. Accordingly, set the availability flags. 981 * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels, 982 * the respective pixel's flags are checked 983 */ 984 for(i = 0; i < 8; i++) 985 { 986 /*Sets the edges that lie on the slice/tile boundary*/ 987 if(au4_idx_tl[i] != idx_tl) 988 { 989 au1_tile_slice_boundary[i] = 1; 990 } 991 else 992 { 993 au4_ilf_across_tile_slice_enable[i] = 1; 994 } 995 } 996 997 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32)); 998 } 999 1000 if(ps_pps->i1_tiles_enabled_flag) 1001 { 1002 /* Calculate availability flags at slice boundary */ 1003 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 1004 { 1005 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 1006 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 1007 { 1008 /*Set the boundary arrays*/ 1009 /*Calculate tile indices for neighbor pixels*/ 1010 idx_tl = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)]; 1011 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)); 1012 au4_idx_tl[0] = pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 1013 au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 1014 au4_idx_tl[3] = au4_idx_tl[6] = pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1015 au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1016 1017 if((0 == (1 << log2_ctb_size) - sao_wd_luma)) 1018 { 1019 if(ps_sao_ctxt->i4_ctb_x == 1) 1020 { 1021 au4_idx_tl[6] = -1; 1022 au4_idx_tl[4] = -1; 1023 } 1024 else 1025 { 1026 au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 1027 } 1028 if(ps_sao_ctxt->i4_ctb_y == 1) 1029 { 1030 au4_idx_tl[5] = -1; 1031 au4_idx_tl[4] = -1; 1032 } 1033 else 1034 { 1035 au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 1036 au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)]; 1037 } 1038 au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1039 } 1040 for(i = 0; i < 8; i++) 1041 { 1042 /*Sets the edges that lie on the tile boundary*/ 1043 if(au4_idx_tl[i] != idx_tl) 1044 { 1045 au1_tile_slice_boundary[i] |= 1; 1046 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0 1047 } 1048 } 1049 } 1050 } 1051 } 1052 1053 1054 /*Set availability flags based on tile and slice boundaries*/ 1055 for(i = 0; i < 8; i++) 1056 { 1057 /*Sets the edges that lie on the slice/tile boundary*/ 1058 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 1059 { 1060 au1_avail_luma[i] = 0; 1061 } 1062 } 1063 } 1064 } 1065 1066 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) 1067 { 1068 au1_avail_luma[0] = 0; 1069 au1_avail_luma[4] = 0; 1070 au1_avail_luma[6] = 0; 1071 } 1072 1073 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x) 1074 { 1075 au1_avail_luma[1] = 0; 1076 au1_avail_luma[5] = 0; 1077 au1_avail_luma[7] = 0; 1078 } 1079 //y==1 case 1080 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)) 1081 { 1082 au1_avail_luma[2] = 0; 1083 au1_avail_luma[4] = 0; 1084 au1_avail_luma[5] = 0; 1085 } 1086 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y) 1087 { 1088 au1_avail_luma[3] = 0; 1089 au1_avail_luma[6] = 0; 1090 au1_avail_luma[7] = 0; 1091 } 1092 1093 { 1094 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma]; 1095 u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma]; 1096 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma, 1097 src_strd, 1098 pu1_src_left_luma, 1099 pu1_src_top_luma, 1100 pu1_sao_src_luma_top_left_ctb, 1101 au1_src_top_right, 1102 &u1_sao_src_top_left_luma_bot_left, 1103 au1_avail_luma, 1104 ai1_offset_y, 1105 sao_wd_luma, 1106 sao_ht_luma); 1107 } 1108 } 1109 1110 } 1111 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 1112 { 1113 /* Update left, top and top-left */ 1114 for(row = 0; row < sao_ht_luma; row++) 1115 { 1116 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 1117 } 1118 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 1119 1120 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 1121 } 1122 1123 if(ps_slice_hdr_top_left->i1_slice_sao_chroma_flag) 1124 { 1125 if(0 == ps_sao->b3_cb_type_idx) 1126 { 1127 for(row = 0; row < sao_ht_chroma; row++) 1128 { 1129 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 1130 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 1131 } 1132 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 1133 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 1134 1135 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 1136 1137 } 1138 1139 else if(1 == ps_sao->b3_cb_type_idx) 1140 { 1141 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 1142 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 1143 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 1144 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 1145 1146 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 1147 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 1148 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 1149 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 1150 1151 if(chroma_yuv420sp_vu) 1152 { 1153 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 1154 src_strd, 1155 pu1_src_left_chroma, 1156 pu1_src_top_chroma, 1157 pu1_sao_src_chroma_top_left_ctb, 1158 ps_sao->b5_cr_band_pos, 1159 ps_sao->b5_cb_band_pos, 1160 ai1_offset_cr, 1161 ai1_offset_cb, 1162 sao_wd_chroma, 1163 sao_ht_chroma 1164 ); 1165 } 1166 else 1167 { 1168 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 1169 src_strd, 1170 pu1_src_left_chroma, 1171 pu1_src_top_chroma, 1172 pu1_sao_src_chroma_top_left_ctb, 1173 ps_sao->b5_cb_band_pos, 1174 ps_sao->b5_cr_band_pos, 1175 ai1_offset_cb, 1176 ai1_offset_cr, 1177 sao_wd_chroma, 1178 sao_ht_chroma 1179 ); 1180 } 1181 } 1182 1183 else // if(2 <= ps_sao->b3_cb_type_idx) 1184 { 1185 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 1186 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 1187 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 1188 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 1189 1190 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 1191 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 1192 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 1193 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 1194 for(i = 0; i < 8; i++) 1195 { 1196 au1_avail_chroma[i] = 255; 1197 au1_tile_slice_boundary[i] = 0; 1198 au4_idx_tl[i] = 0; 1199 au4_ilf_across_tile_slice_enable[i] = 1; 1200 } 1201 /*In case of slices*/ 1202 { 1203 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 1204 { 1205 if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1)) 1206 { 1207 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2; 1208 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2; 1209 } 1210 else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma)) 1211 { 1212 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1; 1213 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1; 1214 } 1215 ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1; 1216 ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1; 1217 1218 ctbx_tl_r = ps_sao_ctxt->i4_ctb_x; 1219 ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1; 1220 1221 ctbx_tl_d = ps_sao_ctxt->i4_ctb_x - 1; 1222 ctby_tl_d = ps_sao_ctxt->i4_ctb_y; 1223 1224 ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1; 1225 ctby_tl = ps_sao_ctxt->i4_ctb_y - 1; 1226 1227 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 1228 { 1229 1230 idx_tl = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)]; 1231 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)); 1232 au4_idx_tl[0] = pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 1233 au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 1234 au4_idx_tl[3] = au4_idx_tl[6] = pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1235 au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1236 1237 if((0 == (1 << log2_ctb_size) - sao_wd_chroma)) 1238 { 1239 if(ps_sao_ctxt->i4_ctb_x == 1) 1240 { 1241 au4_idx_tl[6] = -1; 1242 au4_idx_tl[4] = -1; 1243 } 1244 else 1245 { 1246 au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 1247 } 1248 if(ps_sao_ctxt->i4_ctb_y == 1) 1249 { 1250 au4_idx_tl[5] = -1; 1251 au4_idx_tl[4] = -1; 1252 } 1253 else 1254 { 1255 au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 1256 au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)]; 1257 } 1258 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1259 } 1260 1261 /* Verify that the neighbor ctbs don't cross pic boundary 1262 * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/ 1263 if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)) 1264 { 1265 au4_ilf_across_tile_slice_enable[4] = 0; 1266 au4_ilf_across_tile_slice_enable[6] = 0; 1267 } 1268 else 1269 { 1270 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag; 1271 } 1272 if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)) 1273 { 1274 au4_ilf_across_tile_slice_enable[5] = 0; 1275 au4_ilf_across_tile_slice_enable[4] = 0; 1276 } 1277 else 1278 { 1279 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 1280 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag; 1281 } 1282 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 1283 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 1284 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag; 1285 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag; 1286 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag; 1287 /* 1288 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 1289 * of the pixel having a greater address is checked. Accordingly, set the availability flags 1290 */ 1291 for(i = 0; i < 8; i++) 1292 { 1293 /*Sets the edges that lie on the slice/tile boundary*/ 1294 if(au4_idx_tl[i] != idx_tl) 1295 { 1296 au1_tile_slice_boundary[i] = 1; 1297 } 1298 else 1299 { 1300 au4_ilf_across_tile_slice_enable[i] = 1; 1301 } 1302 } 1303 1304 /*Reset indices*/ 1305 for(i = 0; i < 8; i++) 1306 { 1307 au4_idx_tl[i] = 0; 1308 } 1309 } 1310 if(ps_pps->i1_tiles_enabled_flag) 1311 { 1312 /* Calculate availability flags at slice boundary */ 1313 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 1314 { 1315 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 1316 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 1317 { 1318 /*Set the boundary arrays*/ 1319 /*Calculate tile indices for neighbor pixels*/ 1320 idx_tl = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)]; 1321 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)); 1322 au4_idx_tl[0] = pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 1323 au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 1324 au4_idx_tl[3] = au4_idx_tl[6] = pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1325 au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1326 1327 if((0 == (1 << log2_ctb_size) - sao_wd_luma)) 1328 { 1329 if(ps_sao_ctxt->i4_ctb_x == 1) 1330 { 1331 au4_idx_tl[6] = -1; 1332 au4_idx_tl[4] = -1; 1333 } 1334 else 1335 { 1336 au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 1337 } 1338 if(ps_sao_ctxt->i4_ctb_y == 1) 1339 { 1340 au4_idx_tl[5] = -1; 1341 au4_idx_tl[4] = -1; 1342 } 1343 else 1344 { 1345 au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 1346 au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)]; 1347 } 1348 au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1349 } 1350 for(i = 0; i < 8; i++) 1351 { 1352 /*Sets the edges that lie on the tile boundary*/ 1353 if(au4_idx_tl[i] != idx_tl) 1354 { 1355 au1_tile_slice_boundary[i] |= 1; 1356 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0 1357 } 1358 } 1359 } 1360 } 1361 } 1362 1363 for(i = 0; i < 8; i++) 1364 { 1365 /*Sets the edges that lie on the slice/tile boundary*/ 1366 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 1367 { 1368 au1_avail_chroma[i] = 0; 1369 } 1370 } 1371 } 1372 } 1373 1374 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) 1375 { 1376 au1_avail_chroma[0] = 0; 1377 au1_avail_chroma[4] = 0; 1378 au1_avail_chroma[6] = 0; 1379 } 1380 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x) 1381 { 1382 au1_avail_chroma[1] = 0; 1383 au1_avail_chroma[5] = 0; 1384 au1_avail_chroma[7] = 0; 1385 } 1386 1387 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma) 1388 { 1389 au1_avail_chroma[2] = 0; 1390 au1_avail_chroma[4] = 0; 1391 au1_avail_chroma[5] = 0; 1392 } 1393 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y) 1394 { 1395 au1_avail_chroma[3] = 0; 1396 au1_avail_chroma[6] = 0; 1397 au1_avail_chroma[7] = 0; 1398 } 1399 1400 { 1401 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma]; 1402 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1]; 1403 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma]; 1404 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1]; 1405 if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1)) 1406 { 1407 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2]; 1408 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1]; 1409 } 1410 1411 if(chroma_yuv420sp_vu) 1412 { 1413 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 1414 src_strd, 1415 pu1_src_left_chroma, 1416 pu1_src_top_chroma, 1417 pu1_sao_src_chroma_top_left_ctb, 1418 au1_src_top_right, 1419 au1_sao_src_top_left_chroma_bot_left, 1420 au1_avail_chroma, 1421 ai1_offset_cr, 1422 ai1_offset_cb, 1423 sao_wd_chroma, 1424 sao_ht_chroma); 1425 } 1426 else 1427 { 1428 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 1429 src_strd, 1430 pu1_src_left_chroma, 1431 pu1_src_top_chroma, 1432 pu1_sao_src_chroma_top_left_ctb, 1433 au1_src_top_right, 1434 au1_sao_src_top_left_chroma_bot_left, 1435 au1_avail_chroma, 1436 ai1_offset_cb, 1437 ai1_offset_cr, 1438 sao_wd_chroma, 1439 sao_ht_chroma); 1440 } 1441 } 1442 } 1443 } 1444 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 1445 { 1446 for(row = 0; row < sao_ht_chroma; row++) 1447 { 1448 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 1449 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 1450 } 1451 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 1452 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 1453 1454 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 1455 } 1456 1457 pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd; 1458 pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd; 1459 ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb); 1460 } 1461 1462 1463 /* Top CTB */ 1464 if((ps_sao_ctxt->i4_ctb_y > 0)) 1465 { 1466 WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB; 1467 WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB; 1468 WORD32 sao_ht_luma = SAO_SHIFT_CTB; 1469 WORD32 sao_ht_chroma = SAO_SHIFT_CTB; 1470 1471 WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0; 1472 WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0; 1473 WORD32 au4_idx_t[8], idx_t; 1474 1475 WORD32 remaining_cols; 1476 1477 slice_header_t *ps_slice_hdr_top; 1478 { 1479 WORD32 top_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb + 1480 (ps_sao_ctxt->i4_ctb_x); 1481 ps_slice_hdr_top = ps_slice_hdr_base + pu1_slice_idx[top_ctb_indx]; 1482 } 1483 1484 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma); 1485 if(remaining_cols <= SAO_SHIFT_CTB) 1486 { 1487 sao_wd_luma += remaining_cols; 1488 } 1489 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma); 1490 if(remaining_cols <= 2 * SAO_SHIFT_CTB) 1491 { 1492 sao_wd_chroma += remaining_cols; 1493 } 1494 1495 pu1_src_luma -= (sao_ht_luma * src_strd); 1496 pu1_src_chroma -= (sao_ht_chroma * src_strd); 1497 ps_sao -= (ps_sps->i2_pic_wd_in_ctb); 1498 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 1499 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 1500 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma; 1501 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma); 1502 1503 if(0 != sao_wd_luma) 1504 { 1505 if(ps_slice_hdr_top->i1_slice_sao_luma_flag) 1506 { 1507 if(0 == ps_sao->b3_y_type_idx) 1508 { 1509 /* Update left, top and top-left */ 1510 for(row = 0; row < sao_ht_luma; row++) 1511 { 1512 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 1513 } 1514 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 1515 1516 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 1517 1518 } 1519 1520 else if(1 == ps_sao->b3_y_type_idx) 1521 { 1522 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 1523 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 1524 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 1525 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 1526 1527 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma, 1528 src_strd, 1529 pu1_src_left_luma, 1530 pu1_src_top_luma, 1531 pu1_sao_src_luma_top_left_ctb, 1532 ps_sao->b5_y_band_pos, 1533 ai1_offset_y, 1534 sao_wd_luma, 1535 sao_ht_luma 1536 ); 1537 } 1538 1539 else // if(2 <= ps_sao->b3_y_type_idx) 1540 { 1541 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 1542 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 1543 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 1544 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 1545 1546 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8); 1547 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8); 1548 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32)); 1549 1550 for(i = 0; i < 8; i++) 1551 { 1552 1553 au4_ilf_across_tile_slice_enable[i] = 1; 1554 } 1555 /****************************************************************** 1556 * Derive the Top-left CTB's neighbor pixel's slice indices. 1557 * 1558 * T_T 1559 * ____________ 1560 * | | | 1561 * | T_L| T |T_R 1562 * | | ______|____ 1563 * | | T_D | | 1564 * | | | | 1565 * |____|_______| | 1566 * | | 1567 * | | 1568 * |____________| 1569 * 1570 *****************************************************************/ 1571 1572 /*In case of slices*/ 1573 { 1574 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 1575 { 1576 1577 ctbx_t_t = ps_sao_ctxt->i4_ctb_x; 1578 ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1; 1579 1580 ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1; 1581 ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1; 1582 1583 ctbx_t_r = ps_sao_ctxt->i4_ctb_x; 1584 ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1; 1585 1586 ctbx_t_d = ps_sao_ctxt->i4_ctb_x; 1587 ctby_t_d = ps_sao_ctxt->i4_ctb_y; 1588 1589 ctbx_t = ps_sao_ctxt->i4_ctb_x; 1590 ctby_t = ps_sao_ctxt->i4_ctb_y - 1; 1591 1592 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 1593 { 1594 /*Calculate neighbor ctb slice indices*/ 1595 if(0 == ps_sao_ctxt->i4_ctb_x) 1596 { 1597 au4_idx_t[0] = -1; 1598 au4_idx_t[6] = -1; 1599 au4_idx_t[4] = -1; 1600 } 1601 else 1602 { 1603 au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)]; 1604 au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1605 } 1606 idx_t = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)]; 1607 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)]; 1608 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)]; 1609 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1610 1611 /*Verify that the neighbor ctbs don't cross pic boundary.*/ 1612 if(0 == ps_sao_ctxt->i4_ctb_x) 1613 { 1614 au4_ilf_across_tile_slice_enable[4] = 0; 1615 au4_ilf_across_tile_slice_enable[6] = 0; 1616 au4_ilf_across_tile_slice_enable[0] = 0; 1617 } 1618 else 1619 { 1620 au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag; 1621 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag; 1622 } 1623 1624 1625 1626 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag; 1627 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag; 1628 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag; 1629 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag; 1630 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag; 1631 1632 if(au4_idx_t[6] < idx_t) 1633 { 1634 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag; 1635 } 1636 1637 /* 1638 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 1639 * of the pixel having a greater address is checked. Accordingly, set the availability flags 1640 */ 1641 1642 for(i = 0; i < 8; i++) 1643 { 1644 /*Sets the edges that lie on the slice/tile boundary*/ 1645 if(au4_idx_t[i] != idx_t) 1646 { 1647 au1_tile_slice_boundary[i] = 1; 1648 /*Check for slice flag at such boundaries*/ 1649 } 1650 else 1651 { 1652 au4_ilf_across_tile_slice_enable[i] = 1; 1653 } 1654 } 1655 /*Reset indices*/ 1656 for(i = 0; i < 8; i++) 1657 { 1658 au4_idx_t[i] = 0; 1659 } 1660 } 1661 1662 if(ps_pps->i1_tiles_enabled_flag) 1663 { 1664 /* Calculate availability flags at slice boundary */ 1665 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 1666 { 1667 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 1668 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 1669 { 1670 /*Calculate neighbor ctb slice indices*/ 1671 if(0 == ps_sao_ctxt->i4_ctb_x) 1672 { 1673 au4_idx_t[0] = -1; 1674 au4_idx_t[6] = -1; 1675 au4_idx_t[4] = -1; 1676 } 1677 else 1678 { 1679 au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)]; 1680 au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1681 } 1682 idx_t = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)]; 1683 au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)]; 1684 au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)]; 1685 au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1686 1687 for(i = 0; i < 8; i++) 1688 { 1689 /*Sets the edges that lie on the tile boundary*/ 1690 if(au4_idx_t[i] != idx_t) 1691 { 1692 au1_tile_slice_boundary[i] |= 1; 1693 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; 1694 } 1695 } 1696 } 1697 } 1698 } 1699 1700 for(i = 0; i < 8; i++) 1701 { 1702 /*Sets the edges that lie on the slice/tile boundary*/ 1703 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 1704 { 1705 au1_avail_luma[i] = 0; 1706 } 1707 } 1708 } 1709 } 1710 1711 1712 if(0 == ps_sao_ctxt->i4_ctb_x) 1713 { 1714 au1_avail_luma[0] = 0; 1715 au1_avail_luma[4] = 0; 1716 au1_avail_luma[6] = 0; 1717 } 1718 1719 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma) 1720 { 1721 au1_avail_luma[1] = 0; 1722 au1_avail_luma[5] = 0; 1723 au1_avail_luma[7] = 0; 1724 } 1725 1726 if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma) 1727 { 1728 au1_avail_luma[2] = 0; 1729 au1_avail_luma[4] = 0; 1730 au1_avail_luma[5] = 0; 1731 } 1732 1733 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y) 1734 { 1735 au1_avail_luma[3] = 0; 1736 au1_avail_luma[6] = 0; 1737 au1_avail_luma[7] = 0; 1738 } 1739 1740 { 1741 au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0]; 1742 u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1]; 1743 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma, 1744 src_strd, 1745 pu1_src_left_luma, 1746 pu1_src_top_luma, 1747 pu1_sao_src_luma_top_left_ctb, 1748 au1_src_top_right, 1749 &u1_sao_src_top_left_luma_bot_left, 1750 au1_avail_luma, 1751 ai1_offset_y, 1752 sao_wd_luma, 1753 sao_ht_luma); 1754 } 1755 } 1756 } 1757 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 1758 { 1759 /* Update left, top and top-left */ 1760 for(row = 0; row < sao_ht_luma; row++) 1761 { 1762 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 1763 } 1764 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 1765 1766 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 1767 } 1768 } 1769 1770 if(0 != sao_wd_chroma) 1771 { 1772 if(ps_slice_hdr_top->i1_slice_sao_chroma_flag) 1773 { 1774 if(0 == ps_sao->b3_cb_type_idx) 1775 { 1776 1777 for(row = 0; row < sao_ht_chroma; row++) 1778 { 1779 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 1780 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 1781 } 1782 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 1783 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 1784 1785 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 1786 1787 } 1788 1789 else if(1 == ps_sao->b3_cb_type_idx) 1790 { 1791 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 1792 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 1793 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 1794 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 1795 1796 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 1797 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 1798 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 1799 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 1800 1801 if(chroma_yuv420sp_vu) 1802 { 1803 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 1804 src_strd, 1805 pu1_src_left_chroma, 1806 pu1_src_top_chroma, 1807 pu1_sao_src_chroma_top_left_ctb, 1808 ps_sao->b5_cr_band_pos, 1809 ps_sao->b5_cb_band_pos, 1810 ai1_offset_cr, 1811 ai1_offset_cb, 1812 sao_wd_chroma, 1813 sao_ht_chroma 1814 ); 1815 } 1816 else 1817 { 1818 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 1819 src_strd, 1820 pu1_src_left_chroma, 1821 pu1_src_top_chroma, 1822 pu1_sao_src_chroma_top_left_ctb, 1823 ps_sao->b5_cb_band_pos, 1824 ps_sao->b5_cr_band_pos, 1825 ai1_offset_cb, 1826 ai1_offset_cr, 1827 sao_wd_chroma, 1828 sao_ht_chroma 1829 ); 1830 } 1831 } 1832 else // if(2 <= ps_sao->b3_cb_type_idx) 1833 { 1834 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 1835 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 1836 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 1837 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 1838 1839 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 1840 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 1841 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 1842 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 1843 1844 for(i = 0; i < 8; i++) 1845 { 1846 au1_avail_chroma[i] = 255; 1847 au1_tile_slice_boundary[i] = 0; 1848 au4_idx_t[i] = 0; 1849 au4_ilf_across_tile_slice_enable[i] = 1; 1850 } 1851 1852 { 1853 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 1854 { 1855 ctbx_t_t = ps_sao_ctxt->i4_ctb_x; 1856 ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1; 1857 1858 ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1; 1859 ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1; 1860 1861 ctbx_t_r = ps_sao_ctxt->i4_ctb_x; 1862 ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1; 1863 1864 ctbx_t_d = ps_sao_ctxt->i4_ctb_x; 1865 ctby_t_d = ps_sao_ctxt->i4_ctb_y; 1866 1867 ctbx_t = ps_sao_ctxt->i4_ctb_x; 1868 ctby_t = ps_sao_ctxt->i4_ctb_y - 1; 1869 1870 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 1871 { 1872 if(0 == ps_sao_ctxt->i4_ctb_x) 1873 { 1874 au4_idx_t[0] = -1; 1875 au4_idx_t[6] = -1; 1876 au4_idx_t[4] = -1; 1877 } 1878 else 1879 { 1880 au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)]; 1881 au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1882 } 1883 idx_t = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)]; 1884 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)]; 1885 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)]; 1886 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1887 1888 /*Verify that the neighbor ctbs don't cross pic boundary.*/ 1889 1890 if(0 == ps_sao_ctxt->i4_ctb_x) 1891 { 1892 au4_ilf_across_tile_slice_enable[4] = 0; 1893 au4_ilf_across_tile_slice_enable[6] = 0; 1894 au4_ilf_across_tile_slice_enable[0] = 0; 1895 } 1896 else 1897 { 1898 au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag; 1899 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag; 1900 } 1901 1902 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_t[5])->i1_slice_loop_filter_across_slices_enabled_flag; 1903 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag; 1904 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag; 1905 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag; 1906 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag; 1907 1908 if(idx_t > au4_idx_t[6]) 1909 { 1910 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag; 1911 } 1912 1913 /* 1914 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 1915 * of the pixel having a greater address is checked. Accordingly, set the availability flags 1916 */ 1917 for(i = 0; i < 8; i++) 1918 { 1919 /*Sets the edges that lie on the slice/tile boundary*/ 1920 if(au4_idx_t[i] != idx_t) 1921 { 1922 au1_tile_slice_boundary[i] = 1; 1923 } 1924 else 1925 { 1926 /*Indicates that the neighbour belongs to same/dependent slice*/ 1927 au4_ilf_across_tile_slice_enable[i] = 1; 1928 } 1929 } 1930 /*Reset indices*/ 1931 for(i = 0; i < 8; i++) 1932 { 1933 au4_idx_t[i] = 0; 1934 } 1935 } 1936 if(ps_pps->i1_tiles_enabled_flag) 1937 { 1938 /* Calculate availability flags at slice boundary */ 1939 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 1940 { 1941 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 1942 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 1943 { 1944 /*Calculate neighbor ctb slice indices*/ 1945 if(0 == ps_sao_ctxt->i4_ctb_x) 1946 { 1947 au4_idx_t[0] = -1; 1948 au4_idx_t[6] = -1; 1949 au4_idx_t[4] = -1; 1950 } 1951 else 1952 { 1953 au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)]; 1954 au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1955 } 1956 idx_t = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)]; 1957 au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)]; 1958 au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)]; 1959 au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1960 1961 for(i = 0; i < 8; i++) 1962 { 1963 /*Sets the edges that lie on the tile boundary*/ 1964 if(au4_idx_t[i] != idx_t) 1965 { 1966 au1_tile_slice_boundary[i] |= 1; 1967 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; 1968 } 1969 } 1970 } 1971 } 1972 } 1973 for(i = 0; i < 8; i++) 1974 { 1975 /*Sets the edges that lie on the slice/tile boundary*/ 1976 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 1977 { 1978 au1_avail_chroma[i] = 0; 1979 } 1980 } 1981 1982 } 1983 } 1984 if(0 == ps_sao_ctxt->i4_ctb_x) 1985 { 1986 au1_avail_chroma[0] = 0; 1987 au1_avail_chroma[4] = 0; 1988 au1_avail_chroma[6] = 0; 1989 } 1990 1991 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma) 1992 { 1993 au1_avail_chroma[1] = 0; 1994 au1_avail_chroma[5] = 0; 1995 au1_avail_chroma[7] = 0; 1996 } 1997 1998 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma) 1999 { 2000 au1_avail_chroma[2] = 0; 2001 au1_avail_chroma[4] = 0; 2002 au1_avail_chroma[5] = 0; 2003 } 2004 2005 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y) 2006 { 2007 au1_avail_chroma[3] = 0; 2008 au1_avail_chroma[6] = 0; 2009 au1_avail_chroma[7] = 0; 2010 } 2011 2012 { 2013 au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0]; 2014 au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1]; 2015 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2]; 2016 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1]; 2017 2018 if(chroma_yuv420sp_vu) 2019 { 2020 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 2021 src_strd, 2022 pu1_src_left_chroma, 2023 pu1_src_top_chroma, 2024 pu1_sao_src_chroma_top_left_ctb, 2025 au1_src_top_right, 2026 au1_sao_src_top_left_chroma_bot_left, 2027 au1_avail_chroma, 2028 ai1_offset_cr, 2029 ai1_offset_cb, 2030 sao_wd_chroma, 2031 sao_ht_chroma); 2032 } 2033 else 2034 { 2035 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 2036 src_strd, 2037 pu1_src_left_chroma, 2038 pu1_src_top_chroma, 2039 pu1_sao_src_chroma_top_left_ctb, 2040 au1_src_top_right, 2041 au1_sao_src_top_left_chroma_bot_left, 2042 au1_avail_chroma, 2043 ai1_offset_cb, 2044 ai1_offset_cr, 2045 sao_wd_chroma, 2046 sao_ht_chroma); 2047 } 2048 } 2049 2050 } 2051 } 2052 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 2053 { 2054 for(row = 0; row < sao_ht_chroma; row++) 2055 { 2056 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 2057 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 2058 } 2059 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 2060 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 2061 2062 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 2063 } 2064 } 2065 2066 pu1_src_luma += sao_ht_luma * src_strd; 2067 pu1_src_chroma += sao_ht_chroma * src_strd; 2068 ps_sao += (ps_sps->i2_pic_wd_in_ctb); 2069 } 2070 2071 /* Left CTB */ 2072 if(ps_sao_ctxt->i4_ctb_x > 0) 2073 { 2074 WORD32 sao_wd_luma = SAO_SHIFT_CTB; 2075 WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB; 2076 WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB; 2077 WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB; 2078 2079 WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0; 2080 WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0; 2081 WORD32 au4_idx_l[8], idx_l; 2082 2083 WORD32 remaining_rows; 2084 slice_header_t *ps_slice_hdr_left; 2085 { 2086 WORD32 left_ctb_indx = (ps_sao_ctxt->i4_ctb_y) * ps_sps->i2_pic_wd_in_ctb + 2087 (ps_sao_ctxt->i4_ctb_x - 1); 2088 ps_slice_hdr_left = ps_slice_hdr_base + pu1_slice_idx[left_ctb_indx]; 2089 } 2090 2091 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma); 2092 if(remaining_rows <= SAO_SHIFT_CTB) 2093 { 2094 sao_ht_luma += remaining_rows; 2095 } 2096 remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma); 2097 if(remaining_rows <= SAO_SHIFT_CTB) 2098 { 2099 sao_ht_chroma += remaining_rows; 2100 } 2101 2102 pu1_src_luma -= sao_wd_luma; 2103 pu1_src_chroma -= sao_wd_chroma; 2104 ps_sao -= 1; 2105 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma; 2106 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma; 2107 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 2108 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 2109 2110 2111 if(0 != sao_ht_luma) 2112 { 2113 if(ps_slice_hdr_left->i1_slice_sao_luma_flag) 2114 { 2115 if(0 == ps_sao->b3_y_type_idx) 2116 { 2117 /* Update left, top and top-left */ 2118 for(row = 0; row < sao_ht_luma; row++) 2119 { 2120 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 2121 } 2122 /*Update in next location*/ 2123 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 2124 2125 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 2126 2127 } 2128 2129 else if(1 == ps_sao->b3_y_type_idx) 2130 { 2131 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 2132 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 2133 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 2134 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 2135 2136 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma, 2137 src_strd, 2138 pu1_src_left_luma, 2139 pu1_src_top_luma, 2140 pu1_sao_src_top_left_luma_curr_ctb, 2141 ps_sao->b5_y_band_pos, 2142 ai1_offset_y, 2143 sao_wd_luma, 2144 sao_ht_luma 2145 ); 2146 } 2147 2148 else // if(2 <= ps_sao->b3_y_type_idx) 2149 { 2150 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 2151 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 2152 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 2153 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 2154 2155 for(i = 0; i < 8; i++) 2156 { 2157 au1_avail_luma[i] = 255; 2158 au1_tile_slice_boundary[i] = 0; 2159 au4_idx_l[i] = 0; 2160 au4_ilf_across_tile_slice_enable[i] = 1; 2161 } 2162 /****************************************************************** 2163 * Derive the Top-left CTB's neighbour pixel's slice indices. 2164 * 2165 * 2166 * ____________ 2167 * | | | 2168 * | L_T| | 2169 * |____|_______|____ 2170 * | | | | 2171 * L_L | L | L_R | | 2172 * |____|_______| | 2173 * | | 2174 * L_D | | 2175 * |____________| 2176 * 2177 *****************************************************************/ 2178 2179 /*In case of slices or tiles*/ 2180 { 2181 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 2182 { 2183 ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1; 2184 ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1; 2185 2186 ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1; 2187 ctby_l_l = ps_sao_ctxt->i4_ctb_y; 2188 2189 ctbx_l_r = ps_sao_ctxt->i4_ctb_x; 2190 ctby_l_r = ps_sao_ctxt->i4_ctb_y; 2191 2192 ctbx_l_d = ps_sao_ctxt->i4_ctb_x - 1; 2193 ctby_l_d = ps_sao_ctxt->i4_ctb_y; 2194 2195 ctbx_l = ps_sao_ctxt->i4_ctb_x - 1; 2196 ctby_l = ps_sao_ctxt->i4_ctb_y; 2197 2198 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 2199 { 2200 if(0 == ps_sao_ctxt->i4_ctb_y) 2201 { 2202 au4_idx_l[2] = -1; 2203 au4_idx_l[4] = -1; 2204 au4_idx_l[5] = -1; 2205 } 2206 else 2207 { 2208 au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2209 au4_idx_l[5] = pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2210 } 2211 idx_l = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)]; 2212 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)]; 2213 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)]; 2214 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)]; 2215 2216 /*Verify that the neighbor ctbs don't cross pic boundary.*/ 2217 if(0 == ps_sao_ctxt->i4_ctb_y) 2218 { 2219 au4_ilf_across_tile_slice_enable[2] = 0; 2220 au4_ilf_across_tile_slice_enable[4] = 0; 2221 au4_ilf_across_tile_slice_enable[5] = 0; 2222 } 2223 else 2224 { 2225 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag; 2226 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2]; 2227 2228 } 2229 //TODO: ILF flag checks for [0] and [6] is missing. 2230 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag; 2231 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag; 2232 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag; 2233 2234 if(idx_l < au4_idx_l[5]) 2235 { 2236 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag; 2237 } 2238 2239 /* 2240 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 2241 * of the pixel having a greater address is checked. Accordingly, set the availability flags 2242 */ 2243 for(i = 0; i < 8; i++) 2244 { 2245 /*Sets the edges that lie on the slice/tile boundary*/ 2246 if(au4_idx_l[i] != idx_l) 2247 { 2248 au1_tile_slice_boundary[i] = 1; 2249 } 2250 else 2251 { 2252 au4_ilf_across_tile_slice_enable[i] = 1; 2253 } 2254 } 2255 /*Reset indices*/ 2256 for(i = 0; i < 8; i++) 2257 { 2258 au4_idx_l[i] = 0; 2259 } 2260 } 2261 2262 if(ps_pps->i1_tiles_enabled_flag) 2263 { 2264 /* Calculate availability flags at slice boundary */ 2265 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 2266 { 2267 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 2268 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 2269 { 2270 if(0 == ps_sao_ctxt->i4_ctb_y) 2271 { 2272 au4_idx_l[2] = -1; 2273 au4_idx_l[4] = -1; 2274 au4_idx_l[5] = -1; 2275 } 2276 else 2277 { 2278 au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2279 au4_idx_l[5] = pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2280 } 2281 2282 idx_l = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)]; 2283 au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)]; 2284 au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)]; 2285 au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)]; 2286 2287 for(i = 0; i < 8; i++) 2288 { 2289 /*Sets the edges that lie on the slice/tile boundary*/ 2290 if(au4_idx_l[i] != idx_l) 2291 { 2292 au1_tile_slice_boundary[i] |= 1; 2293 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; 2294 } 2295 } 2296 } 2297 } 2298 } 2299 2300 for(i = 0; i < 8; i++) 2301 { 2302 /*Sets the edges that lie on the slice/tile boundary*/ 2303 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 2304 { 2305 au1_avail_luma[i] = 0; 2306 } 2307 } 2308 } 2309 } 2310 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) 2311 { 2312 au1_avail_luma[0] = 0; 2313 au1_avail_luma[4] = 0; 2314 au1_avail_luma[6] = 0; 2315 } 2316 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x) 2317 { 2318 au1_avail_luma[1] = 0; 2319 au1_avail_luma[5] = 0; 2320 au1_avail_luma[7] = 0; 2321 } 2322 2323 if(0 == ps_sao_ctxt->i4_ctb_y) 2324 { 2325 au1_avail_luma[2] = 0; 2326 au1_avail_luma[4] = 0; 2327 au1_avail_luma[5] = 0; 2328 } 2329 2330 if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) <= sao_ht_luma) 2331 { 2332 au1_avail_luma[3] = 0; 2333 au1_avail_luma[6] = 0; 2334 au1_avail_luma[7] = 0; 2335 } 2336 2337 { 2338 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma]; 2339 u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0]; 2340 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma, 2341 src_strd, 2342 pu1_src_left_luma, 2343 pu1_src_top_luma, 2344 pu1_sao_src_top_left_luma_curr_ctb, 2345 au1_src_top_right, 2346 &u1_sao_src_top_left_luma_bot_left, 2347 au1_avail_luma, 2348 ai1_offset_y, 2349 sao_wd_luma, 2350 sao_ht_luma); 2351 } 2352 2353 } 2354 } 2355 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 2356 { 2357 /* Update left, top and top-left */ 2358 for(row = 0; row < sao_ht_luma; row++) 2359 { 2360 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 2361 } 2362 /*Update in next location*/ 2363 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 2364 2365 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 2366 } 2367 } 2368 2369 if(0 != sao_ht_chroma) 2370 { 2371 if(ps_slice_hdr_left->i1_slice_sao_chroma_flag) 2372 { 2373 if(0 == ps_sao->b3_cb_type_idx) 2374 { 2375 for(row = 0; row < sao_ht_chroma; row++) 2376 { 2377 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 2378 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 2379 } 2380 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 2381 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 2382 2383 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 2384 } 2385 2386 else if(1 == ps_sao->b3_cb_type_idx) 2387 { 2388 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 2389 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 2390 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 2391 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 2392 2393 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 2394 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 2395 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 2396 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 2397 2398 if(chroma_yuv420sp_vu) 2399 { 2400 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 2401 src_strd, 2402 pu1_src_left_chroma, 2403 pu1_src_top_chroma, 2404 pu1_sao_src_top_left_chroma_curr_ctb, 2405 ps_sao->b5_cr_band_pos, 2406 ps_sao->b5_cb_band_pos, 2407 ai1_offset_cr, 2408 ai1_offset_cb, 2409 sao_wd_chroma, 2410 sao_ht_chroma 2411 ); 2412 } 2413 else 2414 { 2415 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 2416 src_strd, 2417 pu1_src_left_chroma, 2418 pu1_src_top_chroma, 2419 pu1_sao_src_top_left_chroma_curr_ctb, 2420 ps_sao->b5_cb_band_pos, 2421 ps_sao->b5_cr_band_pos, 2422 ai1_offset_cb, 2423 ai1_offset_cr, 2424 sao_wd_chroma, 2425 sao_ht_chroma 2426 ); 2427 } 2428 } 2429 2430 else // if(2 <= ps_sao->b3_cb_type_idx) 2431 { 2432 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 2433 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 2434 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 2435 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 2436 2437 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 2438 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 2439 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 2440 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 2441 2442 for(i = 0; i < 8; i++) 2443 { 2444 au1_avail_chroma[i] = 255; 2445 au1_tile_slice_boundary[i] = 0; 2446 au4_idx_l[i] = 0; 2447 au4_ilf_across_tile_slice_enable[i] = 1; 2448 } 2449 /*In case of slices*/ 2450 { 2451 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 2452 { 2453 ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1; 2454 ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1; 2455 2456 ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1; 2457 ctby_l_l = ps_sao_ctxt->i4_ctb_y; 2458 2459 ctbx_l_r = ps_sao_ctxt->i4_ctb_x; 2460 ctby_l_r = ps_sao_ctxt->i4_ctb_y; 2461 2462 ctbx_l_d = ps_sao_ctxt->i4_ctb_x - 1; 2463 ctby_l_d = ps_sao_ctxt->i4_ctb_y; 2464 2465 ctbx_l = ps_sao_ctxt->i4_ctb_x - 1; 2466 ctby_l = ps_sao_ctxt->i4_ctb_y; 2467 2468 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 2469 { 2470 if(0 == ps_sao_ctxt->i4_ctb_y) 2471 { 2472 au4_idx_l[2] = -1; 2473 au4_idx_l[4] = -1; 2474 au4_idx_l[5] = -1; 2475 } 2476 else 2477 { 2478 au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2479 au4_idx_l[5] = pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2480 } 2481 idx_l = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)]; 2482 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)]; 2483 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)]; 2484 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)]; 2485 2486 /*Verify that the neighbour ctbs dont cross pic boundary.*/ 2487 if(0 == ps_sao_ctxt->i4_ctb_y) 2488 { 2489 au4_ilf_across_tile_slice_enable[2] = 0; 2490 au4_ilf_across_tile_slice_enable[4] = 0; 2491 au4_ilf_across_tile_slice_enable[5] = 0; 2492 } 2493 else 2494 { 2495 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag; 2496 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2]; 2497 } 2498 2499 if(au4_idx_l[5] > idx_l) 2500 { 2501 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag; 2502 } 2503 2504 // au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag; 2505 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag; 2506 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag; 2507 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag; 2508 /* 2509 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 2510 * of the pixel having a greater address is checked. Accordingly, set the availability flags 2511 */ 2512 for(i = 0; i < 8; i++) 2513 { 2514 /*Sets the edges that lie on the slice/tile boundary*/ 2515 if(au4_idx_l[i] != idx_l) 2516 { 2517 au1_tile_slice_boundary[i] = 1; 2518 } 2519 else 2520 { 2521 au4_ilf_across_tile_slice_enable[i] = 1; 2522 } 2523 } 2524 /*Reset indices*/ 2525 for(i = 0; i < 8; i++) 2526 { 2527 au4_idx_l[i] = 0; 2528 } 2529 } 2530 if(ps_pps->i1_tiles_enabled_flag) 2531 { 2532 /* Calculate availability flags at slice boundary */ 2533 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 2534 { 2535 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 2536 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 2537 { 2538 if(0 == ps_sao_ctxt->i4_ctb_y) 2539 { 2540 au4_idx_l[2] = -1; 2541 au4_idx_l[4] = -1; 2542 au4_idx_l[5] = -1; 2543 } 2544 else 2545 { 2546 au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2547 au4_idx_l[5] = pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2548 } 2549 2550 idx_l = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)]; 2551 au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)]; 2552 au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)]; 2553 au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)]; 2554 2555 for(i = 0; i < 8; i++) 2556 { 2557 /*Sets the edges that lie on the slice/tile boundary*/ 2558 if(au4_idx_l[i] != idx_l) 2559 { 2560 au1_tile_slice_boundary[i] |= 1; 2561 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0 2562 } 2563 } 2564 } 2565 } 2566 } 2567 for(i = 0; i < 8; i++) 2568 { 2569 /*Sets the edges that lie on the slice/tile boundary*/ 2570 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 2571 { 2572 au1_avail_chroma[i] = 0; 2573 } 2574 } 2575 } 2576 } 2577 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) 2578 { 2579 au1_avail_chroma[0] = 0; 2580 au1_avail_chroma[4] = 0; 2581 au1_avail_chroma[6] = 0; 2582 } 2583 2584 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x) 2585 { 2586 au1_avail_chroma[1] = 0; 2587 au1_avail_chroma[5] = 0; 2588 au1_avail_chroma[7] = 0; 2589 } 2590 2591 if(0 == ps_sao_ctxt->i4_ctb_y) 2592 { 2593 au1_avail_chroma[2] = 0; 2594 au1_avail_chroma[4] = 0; 2595 au1_avail_chroma[5] = 0; 2596 } 2597 2598 if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) <= sao_ht_chroma) 2599 { 2600 au1_avail_chroma[3] = 0; 2601 au1_avail_chroma[6] = 0; 2602 au1_avail_chroma[7] = 0; 2603 } 2604 2605 { 2606 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma]; 2607 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1]; 2608 au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0]; 2609 au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1]; 2610 //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2]; 2611 //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1]; 2612 if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1)) 2613 { 2614 au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd]; 2615 au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1]; 2616 } 2617 2618 2619 if(chroma_yuv420sp_vu) 2620 { 2621 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 2622 src_strd, 2623 pu1_src_left_chroma, 2624 pu1_src_top_chroma, 2625 pu1_sao_src_top_left_chroma_curr_ctb, 2626 au1_src_top_right, 2627 au1_src_bot_left, 2628 au1_avail_chroma, 2629 ai1_offset_cr, 2630 ai1_offset_cb, 2631 sao_wd_chroma, 2632 sao_ht_chroma); 2633 } 2634 else 2635 { 2636 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 2637 src_strd, 2638 pu1_src_left_chroma, 2639 pu1_src_top_chroma, 2640 pu1_sao_src_top_left_chroma_curr_ctb, 2641 au1_src_top_right, 2642 au1_src_bot_left, 2643 au1_avail_chroma, 2644 ai1_offset_cb, 2645 ai1_offset_cr, 2646 sao_wd_chroma, 2647 sao_ht_chroma); 2648 } 2649 } 2650 2651 } 2652 } 2653 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 2654 { 2655 for(row = 0; row < sao_ht_chroma; row++) 2656 { 2657 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 2658 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 2659 } 2660 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 2661 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 2662 2663 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 2664 } 2665 2666 } 2667 pu1_src_luma += sao_wd_luma; 2668 pu1_src_chroma += sao_wd_chroma; 2669 ps_sao += 1; 2670 } 2671 2672 2673 /* Current CTB */ 2674 { 2675 WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB; 2676 WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2; 2677 WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB; 2678 WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB; 2679 WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0; 2680 WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0; 2681 WORD32 au4_idx_c[8], idx_c; 2682 2683 WORD32 remaining_rows; 2684 WORD32 remaining_cols; 2685 2686 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma); 2687 if(remaining_cols <= SAO_SHIFT_CTB) 2688 { 2689 sao_wd_luma += remaining_cols; 2690 } 2691 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma); 2692 if(remaining_cols <= 2 * SAO_SHIFT_CTB) 2693 { 2694 sao_wd_chroma += remaining_cols; 2695 } 2696 2697 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma); 2698 if(remaining_rows <= SAO_SHIFT_CTB) 2699 { 2700 sao_ht_luma += remaining_rows; 2701 } 2702 remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma); 2703 if(remaining_rows <= SAO_SHIFT_CTB) 2704 { 2705 sao_ht_chroma += remaining_rows; 2706 } 2707 2708 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 2709 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 2710 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 2711 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 2712 2713 if((0 != sao_wd_luma) && (0 != sao_ht_luma)) 2714 { 2715 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag) 2716 { 2717 if(0 == ps_sao->b3_y_type_idx) 2718 { 2719 /* Update left, top and top-left */ 2720 for(row = 0; row < sao_ht_luma; row++) 2721 { 2722 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 2723 } 2724 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 2725 2726 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 2727 2728 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma]; 2729 2730 } 2731 2732 else if(1 == ps_sao->b3_y_type_idx) 2733 { 2734 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 2735 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 2736 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 2737 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 2738 2739 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma, 2740 src_strd, 2741 pu1_src_left_luma, 2742 pu1_src_top_luma, 2743 pu1_sao_src_top_left_luma_curr_ctb, 2744 ps_sao->b5_y_band_pos, 2745 ai1_offset_y, 2746 sao_wd_luma, 2747 sao_ht_luma 2748 ); 2749 } 2750 2751 else // if(2 <= ps_sao->b3_y_type_idx) 2752 { 2753 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 2754 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 2755 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 2756 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 2757 2758 for(i = 0; i < 8; i++) 2759 { 2760 au1_avail_luma[i] = 255; 2761 au1_tile_slice_boundary[i] = 0; 2762 au4_idx_c[i] = 0; 2763 au4_ilf_across_tile_slice_enable[i] = 1; 2764 } 2765 /****************************************************************** 2766 * Derive the Top-left CTB's neighbour pixel's slice indices. 2767 * 2768 * 2769 * ____________ 2770 * | | | 2771 * | | C_T | 2772 * |____|_______|____ 2773 * | | | | 2774 * | C_L| C | C_R| 2775 * |____|_______| | 2776 * | C_D | 2777 * | | 2778 * |____________| 2779 * 2780 *****************************************************************/ 2781 2782 /*In case of slices*/ 2783 { 2784 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 2785 { 2786 ctbx_c_t = ps_sao_ctxt->i4_ctb_x; 2787 ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1; 2788 2789 ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1; 2790 ctby_c_l = ps_sao_ctxt->i4_ctb_y; 2791 2792 ctbx_c_r = ps_sao_ctxt->i4_ctb_x; 2793 ctby_c_r = ps_sao_ctxt->i4_ctb_y; 2794 2795 ctbx_c_d = ps_sao_ctxt->i4_ctb_x; 2796 ctby_c_d = ps_sao_ctxt->i4_ctb_y; 2797 2798 ctbx_c = ps_sao_ctxt->i4_ctb_x; 2799 ctby_c = ps_sao_ctxt->i4_ctb_y; 2800 2801 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 2802 { 2803 if(0 == ps_sao_ctxt->i4_ctb_x) 2804 { 2805 au4_idx_c[6] = -1; 2806 au4_idx_c[0] = -1; 2807 au4_idx_c[4] = -1; 2808 } 2809 else 2810 { 2811 au4_idx_c[0] = au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)]; 2812 } 2813 2814 if(0 == ps_sao_ctxt->i4_ctb_y) 2815 { 2816 au4_idx_c[2] = -1; 2817 au4_idx_c[5] = -1; 2818 au4_idx_c[4] = -1; 2819 } 2820 else 2821 { 2822 au4_idx_c[4] = pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 2823 au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 2824 } 2825 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)]; 2826 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)]; 2827 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)]; 2828 2829 if(0 == ps_sao_ctxt->i4_ctb_x) 2830 { 2831 au4_ilf_across_tile_slice_enable[6] = 0; 2832 au4_ilf_across_tile_slice_enable[0] = 0; 2833 au4_ilf_across_tile_slice_enable[4] = 0; 2834 } 2835 else 2836 { 2837 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag; 2838 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;; 2839 } 2840 if(0 == ps_sao_ctxt->i4_ctb_y) 2841 { 2842 au4_ilf_across_tile_slice_enable[2] = 0; 2843 au4_ilf_across_tile_slice_enable[4] = 0; 2844 au4_ilf_across_tile_slice_enable[5] = 0; 2845 } 2846 else 2847 { 2848 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag; 2849 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2]; 2850 } 2851 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag; 2852 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag; 2853 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag; 2854 2855 if(au4_idx_c[6] < idx_c) 2856 { 2857 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag; 2858 } 2859 2860 /* 2861 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 2862 * of the pixel having a greater address is checked. Accordingly, set the availability flags 2863 */ 2864 for(i = 0; i < 8; i++) 2865 { 2866 /*Sets the edges that lie on the slice/tile boundary*/ 2867 if(au4_idx_c[i] != idx_c) 2868 { 2869 au1_tile_slice_boundary[i] = 1; 2870 } 2871 else 2872 { 2873 au4_ilf_across_tile_slice_enable[i] = 1; 2874 } 2875 } 2876 /*Reset indices*/ 2877 for(i = 0; i < 8; i++) 2878 { 2879 au4_idx_c[i] = 0; 2880 } 2881 } 2882 2883 if(ps_pps->i1_tiles_enabled_flag) 2884 { 2885 /* Calculate availability flags at slice boundary */ 2886 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 2887 { 2888 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 2889 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 2890 { 2891 if(0 == ps_sao_ctxt->i4_ctb_x) 2892 { 2893 au4_idx_c[6] = -1; 2894 au4_idx_c[0] = -1; 2895 au4_idx_c[4] = -1; 2896 } 2897 else 2898 { 2899 au4_idx_c[0] = au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)]; 2900 } 2901 2902 if(0 == ps_sao_ctxt->i4_ctb_y) 2903 { 2904 au4_idx_c[2] = -1; 2905 au4_idx_c[5] = -1; 2906 au4_idx_c[4] = -1; 2907 } 2908 else 2909 { 2910 au4_idx_c[4] = pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 2911 au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 2912 } 2913 idx_c = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)]; 2914 au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)]; 2915 au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)]; 2916 2917 for(i = 0; i < 8; i++) 2918 { 2919 /*Sets the edges that lie on the slice/tile boundary*/ 2920 if(au4_idx_c[i] != idx_c) 2921 { 2922 au1_tile_slice_boundary[i] |= 1; 2923 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0 2924 } 2925 } 2926 } 2927 } 2928 } 2929 2930 for(i = 0; i < 8; i++) 2931 { 2932 /*Sets the edges that lie on the slice/tile boundary*/ 2933 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 2934 { 2935 au1_avail_luma[i] = 0; 2936 } 2937 } 2938 2939 } 2940 } 2941 if(0 == ps_sao_ctxt->i4_ctb_x) 2942 { 2943 au1_avail_luma[0] = 0; 2944 au1_avail_luma[4] = 0; 2945 au1_avail_luma[6] = 0; 2946 } 2947 2948 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma) 2949 { 2950 au1_avail_luma[1] = 0; 2951 au1_avail_luma[5] = 0; 2952 au1_avail_luma[7] = 0; 2953 } 2954 2955 if(0 == ps_sao_ctxt->i4_ctb_y) 2956 { 2957 au1_avail_luma[2] = 0; 2958 au1_avail_luma[4] = 0; 2959 au1_avail_luma[5] = 0; 2960 } 2961 2962 if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) <= sao_ht_luma) 2963 { 2964 au1_avail_luma[3] = 0; 2965 au1_avail_luma[6] = 0; 2966 au1_avail_luma[7] = 0; 2967 } 2968 2969 { 2970 au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd]; 2971 u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1]; 2972 2973 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma, 2974 src_strd, 2975 pu1_src_left_luma, 2976 pu1_src_top_luma, 2977 pu1_sao_src_top_left_luma_curr_ctb, 2978 au1_src_top_right, 2979 &u1_sao_src_top_left_luma_bot_left, 2980 au1_avail_luma, 2981 ai1_offset_y, 2982 sao_wd_luma, 2983 sao_ht_luma); 2984 } 2985 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma]; 2986 pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1]; 2987 } 2988 } 2989 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 2990 { 2991 /* Update left, top and top-left */ 2992 for(row = 0; row < sao_ht_luma; row++) 2993 { 2994 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 2995 } 2996 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 2997 2998 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 2999 3000 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma]; 3001 } 3002 } 3003 3004 if((0 != sao_wd_chroma) && (0 != sao_ht_chroma)) 3005 { 3006 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) 3007 { 3008 if(0 == ps_sao->b3_cb_type_idx) 3009 { 3010 for(row = 0; row < sao_ht_chroma; row++) 3011 { 3012 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 3013 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 3014 } 3015 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 3016 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 3017 3018 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 3019 3020 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma]; 3021 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1]; 3022 } 3023 3024 else if(1 == ps_sao->b3_cb_type_idx) 3025 { 3026 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 3027 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 3028 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 3029 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 3030 3031 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 3032 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 3033 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 3034 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 3035 3036 if(chroma_yuv420sp_vu) 3037 { 3038 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 3039 src_strd, 3040 pu1_src_left_chroma, 3041 pu1_src_top_chroma, 3042 pu1_sao_src_top_left_chroma_curr_ctb, 3043 ps_sao->b5_cr_band_pos, 3044 ps_sao->b5_cb_band_pos, 3045 ai1_offset_cr, 3046 ai1_offset_cb, 3047 sao_wd_chroma, 3048 sao_ht_chroma 3049 ); 3050 } 3051 else 3052 { 3053 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 3054 src_strd, 3055 pu1_src_left_chroma, 3056 pu1_src_top_chroma, 3057 pu1_sao_src_top_left_chroma_curr_ctb, 3058 ps_sao->b5_cb_band_pos, 3059 ps_sao->b5_cr_band_pos, 3060 ai1_offset_cb, 3061 ai1_offset_cr, 3062 sao_wd_chroma, 3063 sao_ht_chroma 3064 ); 3065 } 3066 } 3067 3068 else // if(2 <= ps_sao->b3_cb_type_idx) 3069 { 3070 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 3071 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 3072 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 3073 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 3074 3075 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 3076 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 3077 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 3078 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 3079 3080 for(i = 0; i < 8; i++) 3081 { 3082 au1_avail_chroma[i] = 255; 3083 au1_tile_slice_boundary[i] = 0; 3084 au4_idx_c[i] = 0; 3085 au4_ilf_across_tile_slice_enable[i] = 1; 3086 } 3087 { 3088 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 3089 { 3090 ctbx_c_t = ps_sao_ctxt->i4_ctb_x; 3091 ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1; 3092 3093 ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1; 3094 ctby_c_l = ps_sao_ctxt->i4_ctb_y; 3095 3096 ctbx_c_r = ps_sao_ctxt->i4_ctb_x; 3097 ctby_c_r = ps_sao_ctxt->i4_ctb_y; 3098 3099 ctbx_c_d = ps_sao_ctxt->i4_ctb_x; 3100 ctby_c_d = ps_sao_ctxt->i4_ctb_y; 3101 3102 ctbx_c = ps_sao_ctxt->i4_ctb_x; 3103 ctby_c = ps_sao_ctxt->i4_ctb_y; 3104 3105 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 3106 { 3107 if(0 == ps_sao_ctxt->i4_ctb_x) 3108 { 3109 au4_idx_c[0] = -1; 3110 au4_idx_c[4] = -1; 3111 au4_idx_c[6] = -1; 3112 } 3113 else 3114 { 3115 au4_idx_c[0] = au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)]; 3116 } 3117 3118 if(0 == ps_sao_ctxt->i4_ctb_y) 3119 { 3120 au4_idx_c[2] = -1; 3121 au4_idx_c[4] = -1; 3122 au4_idx_c[5] = -1; 3123 } 3124 else 3125 { 3126 au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 3127 au4_idx_c[4] = pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 3128 } 3129 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)]; 3130 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)]; 3131 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)]; 3132 3133 if(0 == ps_sao_ctxt->i4_ctb_x) 3134 { 3135 au4_ilf_across_tile_slice_enable[0] = 0; 3136 au4_ilf_across_tile_slice_enable[4] = 0; 3137 au4_ilf_across_tile_slice_enable[6] = 0; 3138 } 3139 else 3140 { 3141 au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag; 3142 au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag; 3143 } 3144 3145 if(0 == ps_sao_ctxt->i4_ctb_y) 3146 { 3147 au4_ilf_across_tile_slice_enable[2] = 0; 3148 au4_ilf_across_tile_slice_enable[4] = 0; 3149 au4_ilf_across_tile_slice_enable[5] = 0; 3150 } 3151 else 3152 { 3153 au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag; 3154 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2]; 3155 } 3156 3157 au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag; 3158 au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag; 3159 au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag; 3160 3161 if(idx_c > au4_idx_c[6]) 3162 { 3163 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag; 3164 } 3165 3166 /* 3167 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 3168 * of the pixel having a greater address is checked. Accordingly, set the availability flags 3169 */ 3170 for(i = 0; i < 8; i++) 3171 { 3172 /*Sets the edges that lie on the slice/tile boundary*/ 3173 if(au4_idx_c[i] != idx_c) 3174 { 3175 au1_tile_slice_boundary[i] = 1; 3176 } 3177 else 3178 { 3179 au4_ilf_across_tile_slice_enable[i] = 1; 3180 } 3181 } 3182 /*Reset indices*/ 3183 for(i = 0; i < 8; i++) 3184 { 3185 au4_idx_c[i] = 0; 3186 } 3187 } 3188 3189 if(ps_pps->i1_tiles_enabled_flag) 3190 { 3191 /* Calculate availability flags at slice boundary */ 3192 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 3193 { 3194 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 3195 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 3196 { 3197 if(0 == ps_sao_ctxt->i4_ctb_x) 3198 { 3199 au4_idx_c[6] = -1; 3200 au4_idx_c[0] = -1; 3201 au4_idx_c[4] = -1; 3202 } 3203 else 3204 { 3205 au4_idx_c[0] = au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)]; 3206 } 3207 3208 if(0 == ps_sao_ctxt->i4_ctb_y) 3209 { 3210 au4_idx_c[2] = -1; 3211 au4_idx_c[5] = -1; 3212 au4_idx_c[4] = -1; 3213 } 3214 else 3215 { 3216 au4_idx_c[4] = pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 3217 au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 3218 } 3219 idx_c = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)]; 3220 au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)]; 3221 au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)]; 3222 3223 for(i = 0; i < 8; i++) 3224 { 3225 /*Sets the edges that lie on the slice/tile boundary*/ 3226 if(au4_idx_c[i] != idx_c) 3227 { 3228 au1_tile_slice_boundary[i] |= 1; 3229 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0 3230 } 3231 } 3232 } 3233 } 3234 } 3235 3236 for(i = 0; i < 8; i++) 3237 { 3238 /*Sets the edges that lie on the slice/tile boundary*/ 3239 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 3240 { 3241 au1_avail_chroma[i] = 0; 3242 } 3243 } 3244 } 3245 } 3246 3247 if(0 == ps_sao_ctxt->i4_ctb_x) 3248 { 3249 au1_avail_chroma[0] = 0; 3250 au1_avail_chroma[4] = 0; 3251 au1_avail_chroma[6] = 0; 3252 } 3253 3254 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma) 3255 { 3256 au1_avail_chroma[1] = 0; 3257 au1_avail_chroma[5] = 0; 3258 au1_avail_chroma[7] = 0; 3259 } 3260 3261 if(0 == ps_sao_ctxt->i4_ctb_y) 3262 { 3263 au1_avail_chroma[2] = 0; 3264 au1_avail_chroma[4] = 0; 3265 au1_avail_chroma[5] = 0; 3266 } 3267 3268 if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) <= sao_ht_chroma) 3269 { 3270 au1_avail_chroma[3] = 0; 3271 au1_avail_chroma[6] = 0; 3272 au1_avail_chroma[7] = 0; 3273 } 3274 3275 { 3276 au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd]; 3277 au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1]; 3278 3279 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2]; 3280 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1]; 3281 3282 if(chroma_yuv420sp_vu) 3283 { 3284 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 3285 src_strd, 3286 pu1_src_left_chroma, 3287 pu1_src_top_chroma, 3288 pu1_sao_src_top_left_chroma_curr_ctb, 3289 au1_src_top_right, 3290 au1_sao_src_top_left_chroma_bot_left, 3291 au1_avail_chroma, 3292 ai1_offset_cr, 3293 ai1_offset_cb, 3294 sao_wd_chroma, 3295 sao_ht_chroma); 3296 } 3297 else 3298 { 3299 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 3300 src_strd, 3301 pu1_src_left_chroma, 3302 pu1_src_top_chroma, 3303 pu1_sao_src_top_left_chroma_curr_ctb, 3304 au1_src_top_right, 3305 au1_sao_src_top_left_chroma_bot_left, 3306 au1_avail_chroma, 3307 ai1_offset_cb, 3308 ai1_offset_cr, 3309 sao_wd_chroma, 3310 sao_ht_chroma); 3311 } 3312 } 3313 3314 } 3315 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma]; 3316 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1]; 3317 3318 pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2]; 3319 pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1]; 3320 } 3321 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 3322 { 3323 for(row = 0; row < sao_ht_chroma; row++) 3324 { 3325 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 3326 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 3327 } 3328 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 3329 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 3330 3331 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 3332 3333 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma]; 3334 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1]; 3335 } 3336 3337 } 3338 } 3339 3340 3341 3342 3343 /* If no loop filter is enabled copy the backed up values */ 3344 { 3345 /* Luma */ 3346 if(no_loop_filter_enabled_luma) 3347 { 3348 UWORD32 u4_no_loop_filter_flag; 3349 WORD32 loop_filter_bit_pos; 3350 WORD32 log2_min_cu = 3; 3351 WORD32 min_cu = (1 << log2_min_cu); 3352 UWORD8 *pu1_src_tmp_luma = pu1_src_luma; 3353 WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB; 3354 WORD32 sao_blk_wd = ctb_size; 3355 WORD32 remaining_rows; 3356 WORD32 remaining_cols; 3357 3358 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB); 3359 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB); 3360 if(remaining_rows <= SAO_SHIFT_CTB) 3361 sao_blk_ht += remaining_rows; 3362 if(remaining_cols <= SAO_SHIFT_CTB) 3363 sao_blk_wd += remaining_cols; 3364 3365 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0; 3366 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0; 3367 3368 pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma; 3369 3370 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) + 3371 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3); 3372 if(ps_sao_ctxt->i4_ctb_x > 0) 3373 loop_filter_bit_pos -= 1; 3374 3375 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag + 3376 (loop_filter_bit_pos >> 3); 3377 3378 for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu; 3379 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++) 3380 { 3381 WORD32 tmp_wd = sao_blk_wd; 3382 3383 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> 3384 (loop_filter_bit_pos & 7); 3385 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1; 3386 3387 if(u4_no_loop_filter_flag) 3388 { 3389 while(tmp_wd > 0) 3390 { 3391 if(CTZ(u4_no_loop_filter_flag)) 3392 { 3393 pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3394 pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3395 tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu; 3396 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 3397 } 3398 else 3399 { 3400 for(row = 0; row < min_cu; row++) 3401 { 3402 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++) 3403 { 3404 pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col]; 3405 } 3406 } 3407 pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3408 pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3409 tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu; 3410 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 3411 } 3412 } 3413 3414 pu1_src_tmp_luma -= sao_blk_wd; 3415 pu1_src_backup_luma -= sao_blk_wd; 3416 } 3417 3418 pu1_src_tmp_luma += (src_strd << log2_min_cu); 3419 pu1_src_backup_luma += (backup_strd << log2_min_cu); 3420 } 3421 } 3422 3423 /* Chroma */ 3424 if(no_loop_filter_enabled_chroma) 3425 { 3426 UWORD32 u4_no_loop_filter_flag; 3427 WORD32 loop_filter_bit_pos; 3428 WORD32 log2_min_cu = 3; 3429 WORD32 min_cu = (1 << log2_min_cu); 3430 UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma; 3431 WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB; 3432 WORD32 sao_blk_wd = ctb_size; 3433 WORD32 remaining_rows; 3434 WORD32 remaining_cols; 3435 3436 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB); 3437 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB); 3438 if(remaining_rows <= 2 * SAO_SHIFT_CTB) 3439 sao_blk_ht += remaining_rows; 3440 if(remaining_cols <= 2 * SAO_SHIFT_CTB) 3441 sao_blk_wd += remaining_cols; 3442 3443 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0; 3444 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0; 3445 3446 pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma; 3447 3448 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) + 3449 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3); 3450 if(ps_sao_ctxt->i4_ctb_x > 0) 3451 loop_filter_bit_pos -= 2; 3452 3453 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag + 3454 (loop_filter_bit_pos >> 3); 3455 3456 for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu; 3457 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++) 3458 { 3459 WORD32 tmp_wd = sao_blk_wd; 3460 3461 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> 3462 (loop_filter_bit_pos & 7); 3463 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1; 3464 3465 if(u4_no_loop_filter_flag) 3466 { 3467 while(tmp_wd > 0) 3468 { 3469 if(CTZ(u4_no_loop_filter_flag)) 3470 { 3471 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3472 pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3473 tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu; 3474 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 3475 } 3476 else 3477 { 3478 for(row = 0; row < min_cu / 2; row++) 3479 { 3480 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++) 3481 { 3482 pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col]; 3483 } 3484 } 3485 3486 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3487 pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3488 tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu; 3489 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 3490 } 3491 } 3492 3493 pu1_src_tmp_chroma -= sao_blk_wd; 3494 pu1_src_backup_chroma -= sao_blk_wd; 3495 } 3496 3497 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu); 3498 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu); 3499 } 3500 } 3501 } 3502 3503 } 3504 3505