1 /****************************************************************************** 2 * 3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 /** 19 ******************************************************************************* 20 * @file 21 * ihevc_sao.c 22 * 23 * @brief 24 * Contains function definitions for sample adaptive offset process 25 * 26 * @author 27 * Srinivas T 28 * 29 * @par List of Functions: 30 * 31 * @remarks 32 * None 33 * 34 ******************************************************************************* 35 */ 36 37 #include <stdio.h> 38 #include <stddef.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <assert.h> 42 43 #include "ihevc_typedefs.h" 44 #include "iv.h" 45 #include "ivd.h" 46 #include "ihevcd_cxa.h" 47 #include "ithread.h" 48 49 #include "ihevc_defs.h" 50 #include "ihevc_debug.h" 51 #include "ihevc_defs.h" 52 #include "ihevc_structs.h" 53 #include "ihevc_macros.h" 54 #include "ihevc_platform_macros.h" 55 #include "ihevc_cabac_tables.h" 56 #include "ihevc_sao.h" 57 #include "ihevc_mem_fns.h" 58 59 #include "ihevc_error.h" 60 #include "ihevc_common_tables.h" 61 62 #include "ihevcd_trace.h" 63 #include "ihevcd_defs.h" 64 #include "ihevcd_function_selector.h" 65 #include "ihevcd_structs.h" 66 #include "ihevcd_error.h" 67 #include "ihevcd_nal.h" 68 #include "ihevcd_bitstream.h" 69 #include "ihevcd_job_queue.h" 70 #include "ihevcd_utils.h" 71 72 #include "ihevc_deblk.h" 73 #include "ihevc_deblk_tables.h" 74 #include "ihevcd_profile.h" 75 #include "ihevcd_sao.h" 76 #include "ihevcd_debug.h" 77 78 #define SAO_SHIFT_CTB 8 79 80 /** 81 * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions) 82 */ 83 void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt) 84 { 85 codec_t *ps_codec = ps_sao_ctxt->ps_codec; 86 UWORD8 *pu1_src_luma; 87 UWORD8 *pu1_src_chroma; 88 WORD32 src_strd; 89 WORD32 ctb_size; 90 WORD32 log2_ctb_size; 91 sps_t *ps_sps; 92 sao_t *ps_sao; 93 WORD32 row, col; 94 UWORD8 au1_avail_luma[8]; 95 UWORD8 au1_avail_chroma[8]; 96 WORD32 i; 97 UWORD8 *pu1_src_top_luma; 98 UWORD8 *pu1_src_top_chroma; 99 UWORD8 *pu1_src_left_luma; 100 UWORD8 *pu1_src_left_chroma; 101 UWORD8 au1_src_top_right[2]; 102 UWORD8 au1_src_bot_left[2]; 103 UWORD8 *pu1_no_loop_filter_flag; 104 WORD32 loop_filter_strd; 105 106 WORD8 ai1_offset_y[5]; 107 WORD8 ai1_offset_cb[5]; 108 WORD8 ai1_offset_cr[5]; 109 110 PROFILE_DISABLE_SAO(); 111 112 ai1_offset_y[0] = 0; 113 ai1_offset_cb[0] = 0; 114 ai1_offset_cr[0] = 0; 115 116 ps_sps = ps_sao_ctxt->ps_sps; 117 log2_ctb_size = ps_sps->i1_log2_ctb_size; 118 ctb_size = (1 << log2_ctb_size); 119 src_strd = ps_sao_ctxt->ps_codec->i4_strd; 120 pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size)); 121 pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size)); 122 123 ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb; 124 loop_filter_strd = (ps_sps->i2_pic_width_in_luma_samples + 63) / 64; 125 126 /* Current CTB */ 127 { 128 WORD32 sao_wd_luma; 129 WORD32 sao_wd_chroma; 130 WORD32 sao_ht_luma; 131 WORD32 sao_ht_chroma; 132 133 WORD32 remaining_rows; 134 WORD32 remaining_cols; 135 136 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 137 sao_wd_luma = MIN(ctb_size, remaining_cols); 138 sao_wd_chroma = MIN(ctb_size, remaining_cols); 139 140 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 141 sao_ht_luma = MIN(ctb_size, remaining_rows); 142 sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2; 143 144 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 145 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 146 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 147 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 148 149 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag + 150 ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd + 151 ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64); 152 153 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 154 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 155 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 156 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 157 158 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 159 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 160 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 161 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 162 163 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 164 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 165 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 166 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 167 168 for(i = 0; i < 8; i++) 169 { 170 au1_avail_luma[i] = 255; 171 au1_avail_chroma[i] = 255; 172 } 173 174 175 if(0 == ps_sao_ctxt->i4_ctb_x) 176 { 177 au1_avail_luma[0] = 0; 178 au1_avail_luma[4] = 0; 179 au1_avail_luma[6] = 0; 180 181 au1_avail_chroma[0] = 0; 182 au1_avail_chroma[4] = 0; 183 au1_avail_chroma[6] = 0; 184 } 185 186 if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x) 187 { 188 au1_avail_luma[1] = 0; 189 au1_avail_luma[5] = 0; 190 au1_avail_luma[7] = 0; 191 192 au1_avail_chroma[1] = 0; 193 au1_avail_chroma[5] = 0; 194 au1_avail_chroma[7] = 0; 195 } 196 197 if(0 == ps_sao_ctxt->i4_ctb_y) 198 { 199 au1_avail_luma[2] = 0; 200 au1_avail_luma[4] = 0; 201 au1_avail_luma[5] = 0; 202 203 au1_avail_chroma[2] = 0; 204 au1_avail_chroma[4] = 0; 205 au1_avail_chroma[5] = 0; 206 } 207 208 if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y) 209 { 210 au1_avail_luma[3] = 0; 211 au1_avail_luma[6] = 0; 212 au1_avail_luma[7] = 0; 213 214 au1_avail_chroma[3] = 0; 215 au1_avail_chroma[6] = 0; 216 au1_avail_chroma[7] = 0; 217 } 218 219 220 if(0 == ps_sao->b3_y_type_idx) 221 { 222 /* Update left, top and top-left */ 223 for(row = 0; row < sao_ht_luma; row++) 224 { 225 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 226 } 227 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 228 229 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 230 231 } 232 else 233 { 234 UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)]; 235 UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1; 236 WORD32 tmp_strd = MAX_CTB_SIZE + 2; 237 WORD32 no_loop_filter_enabled = 0; 238 239 /* Check the loop filter flags and copy the original values for back up */ 240 { 241 UWORD32 u4_no_loop_filter_flag; 242 WORD32 min_cu = 8; 243 UWORD8 *pu1_src_tmp = pu1_src_luma; 244 245 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++) 246 { 247 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> 248 ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8); 249 u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1; 250 251 if(u4_no_loop_filter_flag) 252 { 253 WORD32 tmp_wd = sao_wd_luma; 254 no_loop_filter_enabled = 1; 255 while(tmp_wd > 0) 256 { 257 if(CTZ(u4_no_loop_filter_flag)) 258 { 259 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 260 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 261 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 262 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu; 263 } 264 else 265 { 266 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++) 267 { 268 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++) 269 { 270 pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col]; 271 } 272 } 273 274 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 275 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 276 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 277 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu; 278 } 279 } 280 281 pu1_src_tmp -= sao_wd_luma; 282 } 283 284 pu1_src_tmp += min_cu * src_strd; 285 pu1_src_copy += min_cu * tmp_strd; 286 } 287 } 288 289 if(1 == ps_sao->b3_y_type_idx) 290 { 291 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma, 292 src_strd, 293 pu1_src_left_luma, 294 pu1_src_top_luma, 295 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb, 296 ps_sao->b5_y_band_pos, 297 ai1_offset_y, 298 sao_wd_luma, 299 sao_ht_luma); 300 } 301 else // if(2 <= ps_sao->b3_y_type_idx) 302 { 303 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma]; 304 au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1]; 305 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma, 306 src_strd, 307 pu1_src_left_luma, 308 pu1_src_top_luma, 309 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb, 310 au1_src_top_right, 311 au1_src_bot_left, 312 au1_avail_luma, 313 ai1_offset_y, 314 sao_wd_luma, 315 sao_ht_luma); 316 } 317 318 /* Check the loop filter flags and copy the original values back if they are set */ 319 if(no_loop_filter_enabled) 320 { 321 UWORD32 u4_no_loop_filter_flag; 322 WORD32 min_cu = 8; 323 UWORD8 *pu1_src_tmp = pu1_src_luma; 324 325 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++) 326 { 327 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8); 328 u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1; 329 330 if(u4_no_loop_filter_flag) 331 { 332 WORD32 tmp_wd = sao_wd_luma; 333 while(tmp_wd > 0) 334 { 335 if(CTZ(u4_no_loop_filter_flag)) 336 { 337 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 338 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 339 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 340 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu; 341 } 342 else 343 { 344 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++) 345 { 346 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++) 347 { 348 pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col]; 349 } 350 } 351 352 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 353 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 354 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 355 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu; 356 } 357 } 358 359 pu1_src_tmp -= sao_wd_luma; 360 } 361 362 pu1_src_tmp += min_cu * src_strd; 363 pu1_src_copy += min_cu * tmp_strd; 364 } 365 } 366 367 } 368 369 if(0 == ps_sao->b3_cb_type_idx) 370 { 371 for(row = 0; row < sao_ht_chroma; row++) 372 { 373 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 374 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 375 } 376 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 377 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 378 379 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 380 } 381 else 382 { 383 UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)]; 384 UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2; 385 WORD32 tmp_strd = MAX_CTB_SIZE + 4; 386 WORD32 no_loop_filter_enabled = 0; 387 388 /* Check the loop filter flags and copy the original values for back up */ 389 { 390 UWORD32 u4_no_loop_filter_flag; 391 WORD32 min_cu = 4; 392 UWORD8 *pu1_src_tmp = pu1_src_chroma; 393 394 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++) 395 { 396 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8); 397 u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1; 398 399 if(u4_no_loop_filter_flag) 400 { 401 WORD32 tmp_wd = sao_wd_chroma; 402 no_loop_filter_enabled = 1; 403 while(tmp_wd > 0) 404 { 405 if(CTZ(u4_no_loop_filter_flag)) 406 { 407 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 408 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 409 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 410 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu; 411 } 412 else 413 { 414 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++) 415 { 416 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++) 417 { 418 pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col]; 419 } 420 } 421 422 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 423 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 424 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 425 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu; 426 } 427 } 428 429 pu1_src_tmp -= sao_wd_chroma; 430 } 431 432 pu1_src_tmp += min_cu * src_strd; 433 pu1_src_copy += min_cu * tmp_strd; 434 } 435 } 436 437 if(1 == ps_sao->b3_cb_type_idx) 438 { 439 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 440 src_strd, 441 pu1_src_left_chroma, 442 pu1_src_top_chroma, 443 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb, 444 ps_sao->b5_cb_band_pos, 445 ps_sao->b5_cr_band_pos, 446 ai1_offset_cb, 447 ai1_offset_cr, 448 sao_wd_chroma, 449 sao_ht_chroma 450 ); 451 } 452 else // if(2 <= ps_sao->b3_cb_type_idx) 453 { 454 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma]; 455 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1]; 456 au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2]; 457 au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1]; 458 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 459 src_strd, 460 pu1_src_left_chroma, 461 pu1_src_top_chroma, 462 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb, 463 au1_src_top_right, 464 au1_src_bot_left, 465 au1_avail_chroma, 466 ai1_offset_cb, 467 ai1_offset_cr, 468 sao_wd_chroma, 469 sao_ht_chroma); 470 } 471 472 /* Check the loop filter flags and copy the original values back if they are set */ 473 if(no_loop_filter_enabled) 474 { 475 UWORD32 u4_no_loop_filter_flag; 476 WORD32 min_cu = 4; 477 UWORD8 *pu1_src_tmp = pu1_src_chroma; 478 479 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++) 480 { 481 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8); 482 u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1; 483 484 if(u4_no_loop_filter_flag) 485 { 486 WORD32 tmp_wd = sao_wd_chroma; 487 while(tmp_wd > 0) 488 { 489 if(CTZ(u4_no_loop_filter_flag)) 490 { 491 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 492 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 493 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd); 494 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu; 495 } 496 else 497 { 498 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++) 499 { 500 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++) 501 { 502 pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col]; 503 } 504 } 505 506 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 507 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 508 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd); 509 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu; 510 } 511 } 512 513 pu1_src_tmp -= sao_wd_chroma; 514 } 515 516 pu1_src_tmp += min_cu * src_strd; 517 pu1_src_copy += min_cu * tmp_strd; 518 } 519 } 520 521 } 522 523 } 524 } 525 526 void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt) 527 { 528 codec_t *ps_codec = ps_sao_ctxt->ps_codec; 529 UWORD8 *pu1_src_luma; 530 UWORD8 *pu1_src_chroma; 531 WORD32 src_strd; 532 WORD32 ctb_size; 533 WORD32 log2_ctb_size; 534 sps_t *ps_sps; 535 sao_t *ps_sao; 536 pps_t *ps_pps; 537 slice_header_t *ps_slice_hdr, *ps_slice_hdr_base; 538 tile_t *ps_tile; 539 UWORD16 *pu1_slice_idx; 540 UWORD16 *pu1_tile_idx; 541 WORD32 row, col; 542 UWORD8 au1_avail_luma[8]; 543 UWORD8 au1_avail_chroma[8]; 544 UWORD8 au1_tile_slice_boundary[8]; 545 UWORD8 au4_ilf_across_tile_slice_enable[8]; 546 WORD32 i; 547 UWORD8 *pu1_src_top_luma; 548 UWORD8 *pu1_src_top_chroma; 549 UWORD8 *pu1_src_left_luma; 550 UWORD8 *pu1_src_left_chroma; 551 UWORD8 au1_src_top_right[2]; 552 UWORD8 au1_src_bot_left[2]; 553 UWORD8 *pu1_no_loop_filter_flag; 554 UWORD8 *pu1_src_backup_luma; 555 UWORD8 *pu1_src_backup_chroma; 556 WORD32 backup_strd; 557 WORD32 loop_filter_strd; 558 559 WORD32 no_loop_filter_enabled_luma = 0; 560 WORD32 no_loop_filter_enabled_chroma = 0; 561 UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb; 562 UWORD8 *pu1_sao_src_top_left_luma_curr_ctb; 563 UWORD8 *pu1_sao_src_luma_top_left_ctb; 564 UWORD8 *pu1_sao_src_chroma_top_left_ctb; 565 UWORD8 *pu1_sao_src_top_left_luma_top_right; 566 UWORD8 *pu1_sao_src_top_left_chroma_top_right; 567 UWORD8 u1_sao_src_top_left_luma_bot_left; 568 UWORD8 *pu1_sao_src_top_left_luma_bot_left; 569 UWORD8 *au1_sao_src_top_left_chroma_bot_left; 570 UWORD8 *pu1_sao_src_top_left_chroma_bot_left; 571 572 WORD8 ai1_offset_y[5]; 573 WORD8 ai1_offset_cb[5]; 574 WORD8 ai1_offset_cr[5]; 575 WORD32 chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu; 576 577 PROFILE_DISABLE_SAO(); 578 579 ai1_offset_y[0] = 0; 580 ai1_offset_cb[0] = 0; 581 ai1_offset_cr[0] = 0; 582 583 ps_sps = ps_sao_ctxt->ps_sps; 584 ps_pps = ps_sao_ctxt->ps_pps; 585 ps_tile = ps_sao_ctxt->ps_tile; 586 587 log2_ctb_size = ps_sps->i1_log2_ctb_size; 588 ctb_size = (1 << log2_ctb_size); 589 src_strd = ps_sao_ctxt->ps_codec->i4_strd; 590 ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base; 591 ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1)); 592 593 pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx; 594 pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx; 595 pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size)); 596 pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size)); 597 598 /*Stores the left value for each row ctbs- Needed for column tiles*/ 599 pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y)); 600 pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y)); 601 pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y)); 602 pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y); 603 u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y)); 604 pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y)); 605 au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y); 606 pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y); 607 pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x)); 608 pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x); 609 610 ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb; 611 loop_filter_strd = (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6; 612 backup_strd = 2 * MAX_CTB_SIZE; 613 614 DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma); 615 616 { 617 /* Check the loop filter flags and copy the original values for back up */ 618 /* Luma */ 619 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag) 620 { 621 UWORD32 u4_no_loop_filter_flag; 622 WORD32 loop_filter_bit_pos; 623 WORD32 log2_min_cu = 3; 624 WORD32 min_cu = (1 << log2_min_cu); 625 UWORD8 *pu1_src_tmp_luma = pu1_src_luma; 626 WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB; 627 WORD32 sao_blk_wd = ctb_size; 628 WORD32 remaining_rows; 629 WORD32 remaining_cols; 630 631 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB); 632 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB); 633 if(remaining_rows <= SAO_SHIFT_CTB) 634 sao_blk_ht += remaining_rows; 635 if(remaining_cols <= SAO_SHIFT_CTB) 636 sao_blk_wd += remaining_cols; 637 638 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0; 639 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0; 640 641 pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma; 642 643 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) + 644 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3); 645 if(ps_sao_ctxt->i4_ctb_x > 0) 646 loop_filter_bit_pos -= 1; 647 648 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag + 649 (loop_filter_bit_pos >> 3); 650 651 for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu; 652 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++) 653 { 654 WORD32 tmp_wd = sao_blk_wd; 655 656 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> 657 (loop_filter_bit_pos & 7); 658 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1; 659 660 if(u4_no_loop_filter_flag) 661 { 662 no_loop_filter_enabled_luma = 1; 663 while(tmp_wd > 0) 664 { 665 if(CTZ(u4_no_loop_filter_flag)) 666 { 667 pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 668 pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 669 tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu; 670 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 671 } 672 else 673 { 674 for(row = 0; row < min_cu; row++) 675 { 676 for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++) 677 { 678 pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col]; 679 } 680 } 681 pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 682 pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 683 tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu; 684 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 685 } 686 } 687 688 pu1_src_tmp_luma -= sao_blk_wd; 689 pu1_src_backup_luma -= sao_blk_wd; 690 } 691 692 pu1_src_tmp_luma += (src_strd << log2_min_cu); 693 pu1_src_backup_luma += (backup_strd << log2_min_cu); 694 } 695 } 696 697 /* Chroma */ 698 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) 699 { 700 UWORD32 u4_no_loop_filter_flag; 701 WORD32 loop_filter_bit_pos; 702 WORD32 log2_min_cu = 3; 703 WORD32 min_cu = (1 << log2_min_cu); 704 UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma; 705 WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB; 706 WORD32 sao_blk_wd = ctb_size; 707 WORD32 remaining_rows; 708 WORD32 remaining_cols; 709 710 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB); 711 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB); 712 if(remaining_rows <= 2 * SAO_SHIFT_CTB) 713 sao_blk_ht += remaining_rows; 714 if(remaining_cols <= 2 * SAO_SHIFT_CTB) 715 sao_blk_wd += remaining_cols; 716 717 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0; 718 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0; 719 720 pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma; 721 722 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) + 723 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3); 724 if(ps_sao_ctxt->i4_ctb_x > 0) 725 loop_filter_bit_pos -= 2; 726 727 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag + 728 (loop_filter_bit_pos >> 3); 729 730 for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu; 731 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++) 732 { 733 WORD32 tmp_wd = sao_blk_wd; 734 735 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> 736 (loop_filter_bit_pos & 7); 737 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1; 738 739 if(u4_no_loop_filter_flag) 740 { 741 no_loop_filter_enabled_chroma = 1; 742 while(tmp_wd > 0) 743 { 744 if(CTZ(u4_no_loop_filter_flag)) 745 { 746 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 747 pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 748 tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu; 749 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 750 } 751 else 752 { 753 for(row = 0; row < min_cu / 2; row++) 754 { 755 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++) 756 { 757 pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col]; 758 } 759 } 760 761 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 762 pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 763 tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu; 764 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 765 } 766 } 767 768 pu1_src_tmp_chroma -= sao_blk_wd; 769 pu1_src_backup_chroma -= sao_blk_wd; 770 } 771 772 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu); 773 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu); 774 } 775 } 776 } 777 778 DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma); 779 780 /* Top-left CTB */ 781 if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0) 782 { 783 WORD32 sao_wd_luma = SAO_SHIFT_CTB; 784 WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB; 785 WORD32 sao_ht_luma = SAO_SHIFT_CTB; 786 WORD32 sao_ht_chroma = SAO_SHIFT_CTB; 787 788 WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0; 789 WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0; 790 WORD32 au4_idx_tl[8], idx_tl; 791 792 793 pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd); 794 pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd); 795 ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb); 796 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma; 797 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma; 798 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma; 799 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma); 800 801 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag) 802 { 803 if(0 == ps_sao->b3_y_type_idx) 804 { 805 /* Update left, top and top-left */ 806 for(row = 0; row < sao_ht_luma; row++) 807 { 808 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 809 } 810 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 811 812 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 813 814 815 } 816 817 else if(1 == ps_sao->b3_y_type_idx) 818 { 819 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 820 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 821 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 822 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 823 824 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma, 825 src_strd, 826 pu1_src_left_luma, 827 pu1_src_top_luma, 828 pu1_sao_src_luma_top_left_ctb, 829 ps_sao->b5_y_band_pos, 830 ai1_offset_y, 831 sao_wd_luma, 832 sao_ht_luma 833 ); 834 } 835 836 else // if(2 <= ps_sao->b3_y_type_idx) 837 { 838 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 839 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 840 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 841 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 842 843 for(i = 0; i < 8; i++) 844 { 845 au1_avail_luma[i] = 255; 846 au1_tile_slice_boundary[i] = 0; 847 au4_idx_tl[i] = 0; 848 au4_ilf_across_tile_slice_enable[i] = 1; 849 } 850 851 /****************************************************************** 852 * Derive the Top-left CTB's neighbor pixel's slice indices. 853 * 854 * TL_T 855 * 4 _2__5________ 856 * 0 | | | 857 * TL_L | TL | 1 TL_R| 858 * |____|_______|____ 859 * 6|TL_D|7 | | 860 * | 3 | | | 861 * |____|_______| | 862 * | | 863 * | | 864 * |____________| 865 * 866 *****************************************************************/ 867 868 /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/ 869 { 870 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 871 { 872 { 873 /*Assuming that sao shift is uniform along x and y directions*/ 874 if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1)) 875 { 876 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2; 877 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2; 878 } 879 else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma)) 880 { 881 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1; 882 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1; 883 } 884 ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1; 885 ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1; 886 887 ctbx_tl_r = ps_sao_ctxt->i4_ctb_x; 888 ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1; 889 890 ctbx_tl_d = ps_sao_ctxt->i4_ctb_x - 1; 891 ctby_tl_d = ps_sao_ctxt->i4_ctb_y; 892 893 ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1; 894 ctby_tl = ps_sao_ctxt->i4_ctb_y - 1; 895 } 896 897 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 898 { 899 /*Calculate slice indices for neighbor pixels*/ 900 idx_tl = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)]; 901 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)); 902 au4_idx_tl[0] = pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 903 au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 904 au4_idx_tl[3] = au4_idx_tl[6] = pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 905 au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 906 907 if((0 == (1 << log2_ctb_size) - sao_wd_luma)) 908 { 909 if(ps_sao_ctxt->i4_ctb_x == 1) 910 { 911 au4_idx_tl[6] = -1; 912 au4_idx_tl[4] = -1; 913 } 914 else 915 { 916 au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 917 } 918 if(ps_sao_ctxt->i4_ctb_y == 1) 919 { 920 au4_idx_tl[5] = -1; 921 au4_idx_tl[4] = -1; 922 } 923 else 924 { 925 au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 926 au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)]; 927 } 928 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 929 } 930 931 /* Verify that the neighbor ctbs dont cross pic boundary. 932 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 933 * of the pixel having a greater address is checked. Accordingly, set the availability flags. 934 * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels, 935 * the respective pixel's flags are checked 936 */ 937 938 if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)) 939 { 940 au4_ilf_across_tile_slice_enable[4] = 0; 941 au4_ilf_across_tile_slice_enable[6] = 0; 942 } 943 else 944 { 945 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag; 946 } 947 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)) 948 { 949 au4_ilf_across_tile_slice_enable[5] = 0; 950 au4_ilf_across_tile_slice_enable[4] = 0; 951 } 952 else 953 { 954 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 955 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 956 } 957 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 958 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 959 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag; 960 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag; 961 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag; 962 963 /* 964 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 965 * of the pixel having a greater address is checked. Accordingly, set the availability flags. 966 * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels, 967 * the respective pixel's flags are checked 968 */ 969 for(i = 0; i < 8; i++) 970 { 971 /*Sets the edges that lie on the slice/tile boundary*/ 972 if(au4_idx_tl[i] != idx_tl) 973 { 974 au1_tile_slice_boundary[i] = 1; 975 } 976 else 977 { 978 au4_ilf_across_tile_slice_enable[i] = 1; 979 } 980 } 981 982 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32)); 983 } 984 985 if(ps_pps->i1_tiles_enabled_flag) 986 { 987 /* Calculate availability flags at slice boundary */ 988 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 989 { 990 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 991 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 992 { 993 /*Set the boundary arrays*/ 994 /*Calculate tile indices for neighbor pixels*/ 995 idx_tl = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)]; 996 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)); 997 au4_idx_tl[0] = pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 998 au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 999 au4_idx_tl[3] = au4_idx_tl[6] = pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1000 au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1001 1002 if((0 == (1 << log2_ctb_size) - sao_wd_luma)) 1003 { 1004 if(ps_sao_ctxt->i4_ctb_x == 1) 1005 { 1006 au4_idx_tl[6] = -1; 1007 au4_idx_tl[4] = -1; 1008 } 1009 else 1010 { 1011 au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 1012 } 1013 if(ps_sao_ctxt->i4_ctb_y == 1) 1014 { 1015 au4_idx_tl[5] = -1; 1016 au4_idx_tl[4] = -1; 1017 } 1018 else 1019 { 1020 au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 1021 au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)]; 1022 } 1023 au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1024 } 1025 for(i = 0; i < 8; i++) 1026 { 1027 /*Sets the edges that lie on the tile boundary*/ 1028 if(au4_idx_tl[i] != idx_tl) 1029 { 1030 au1_tile_slice_boundary[i] |= 1; 1031 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0 1032 } 1033 } 1034 } 1035 } 1036 } 1037 1038 1039 /*Set availability flags based on tile and slice boundaries*/ 1040 for(i = 0; i < 8; i++) 1041 { 1042 /*Sets the edges that lie on the slice/tile boundary*/ 1043 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 1044 { 1045 au1_avail_luma[i] = 0; 1046 } 1047 } 1048 } 1049 } 1050 1051 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) 1052 { 1053 au1_avail_luma[0] = 0; 1054 au1_avail_luma[4] = 0; 1055 au1_avail_luma[6] = 0; 1056 } 1057 1058 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x) 1059 { 1060 au1_avail_luma[1] = 0; 1061 au1_avail_luma[5] = 0; 1062 au1_avail_luma[7] = 0; 1063 } 1064 //y==1 case 1065 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)) 1066 { 1067 au1_avail_luma[2] = 0; 1068 au1_avail_luma[4] = 0; 1069 au1_avail_luma[5] = 0; 1070 } 1071 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y) 1072 { 1073 au1_avail_luma[3] = 0; 1074 au1_avail_luma[6] = 0; 1075 au1_avail_luma[7] = 0; 1076 } 1077 1078 { 1079 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma]; 1080 u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma]; 1081 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma, 1082 src_strd, 1083 pu1_src_left_luma, 1084 pu1_src_top_luma, 1085 pu1_sao_src_luma_top_left_ctb, 1086 au1_src_top_right, 1087 &u1_sao_src_top_left_luma_bot_left, 1088 au1_avail_luma, 1089 ai1_offset_y, 1090 sao_wd_luma, 1091 sao_ht_luma); 1092 } 1093 } 1094 1095 } 1096 1097 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) 1098 { 1099 if(0 == ps_sao->b3_cb_type_idx) 1100 { 1101 for(row = 0; row < sao_ht_chroma; row++) 1102 { 1103 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 1104 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 1105 } 1106 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 1107 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 1108 1109 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 1110 1111 } 1112 1113 else if(1 == ps_sao->b3_cb_type_idx) 1114 { 1115 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 1116 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 1117 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 1118 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 1119 1120 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 1121 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 1122 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 1123 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 1124 1125 if(chroma_yuv420sp_vu) 1126 { 1127 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 1128 src_strd, 1129 pu1_src_left_chroma, 1130 pu1_src_top_chroma, 1131 pu1_sao_src_chroma_top_left_ctb, 1132 ps_sao->b5_cr_band_pos, 1133 ps_sao->b5_cb_band_pos, 1134 ai1_offset_cr, 1135 ai1_offset_cb, 1136 sao_wd_chroma, 1137 sao_ht_chroma 1138 ); 1139 } 1140 else 1141 { 1142 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 1143 src_strd, 1144 pu1_src_left_chroma, 1145 pu1_src_top_chroma, 1146 pu1_sao_src_chroma_top_left_ctb, 1147 ps_sao->b5_cb_band_pos, 1148 ps_sao->b5_cr_band_pos, 1149 ai1_offset_cb, 1150 ai1_offset_cr, 1151 sao_wd_chroma, 1152 sao_ht_chroma 1153 ); 1154 } 1155 } 1156 1157 else // if(2 <= ps_sao->b3_cb_type_idx) 1158 { 1159 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 1160 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 1161 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 1162 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 1163 1164 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 1165 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 1166 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 1167 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 1168 for(i = 0; i < 8; i++) 1169 { 1170 au1_avail_chroma[i] = 255; 1171 au1_tile_slice_boundary[i] = 0; 1172 au4_idx_tl[i] = 0; 1173 au4_ilf_across_tile_slice_enable[i] = 1; 1174 } 1175 /*In case of slices*/ 1176 { 1177 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 1178 { 1179 if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1)) 1180 { 1181 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2; 1182 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2; 1183 } 1184 else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma)) 1185 { 1186 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1; 1187 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1; 1188 } 1189 ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1; 1190 ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1; 1191 1192 ctbx_tl_r = ps_sao_ctxt->i4_ctb_x; 1193 ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1; 1194 1195 ctbx_tl_d = ps_sao_ctxt->i4_ctb_x - 1; 1196 ctby_tl_d = ps_sao_ctxt->i4_ctb_y; 1197 1198 ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1; 1199 ctby_tl = ps_sao_ctxt->i4_ctb_y - 1; 1200 1201 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 1202 { 1203 1204 idx_tl = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)]; 1205 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)); 1206 au4_idx_tl[0] = pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 1207 au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 1208 au4_idx_tl[3] = au4_idx_tl[6] = pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1209 au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1210 1211 if((0 == (1 << log2_ctb_size) - sao_wd_chroma)) 1212 { 1213 if(ps_sao_ctxt->i4_ctb_x == 1) 1214 { 1215 au4_idx_tl[6] = -1; 1216 au4_idx_tl[4] = -1; 1217 } 1218 else 1219 { 1220 au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 1221 } 1222 if(ps_sao_ctxt->i4_ctb_y == 1) 1223 { 1224 au4_idx_tl[5] = -1; 1225 au4_idx_tl[4] = -1; 1226 } 1227 else 1228 { 1229 au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 1230 au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)]; 1231 } 1232 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1233 } 1234 1235 /* Verify that the neighbor ctbs don't cross pic boundary 1236 * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/ 1237 if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)) 1238 { 1239 au4_ilf_across_tile_slice_enable[4] = 0; 1240 au4_ilf_across_tile_slice_enable[6] = 0; 1241 } 1242 else 1243 { 1244 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag; 1245 } 1246 if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)) 1247 { 1248 au4_ilf_across_tile_slice_enable[5] = 0; 1249 au4_ilf_across_tile_slice_enable[4] = 0; 1250 } 1251 else 1252 { 1253 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 1254 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 1255 } 1256 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 1257 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag; 1258 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag; 1259 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag; 1260 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag; 1261 /* 1262 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 1263 * of the pixel having a greater address is checked. Accordingly, set the availability flags 1264 */ 1265 for(i = 0; i < 8; i++) 1266 { 1267 /*Sets the edges that lie on the slice/tile boundary*/ 1268 if(au4_idx_tl[i] != idx_tl) 1269 { 1270 au1_tile_slice_boundary[i] = 1; 1271 } 1272 else 1273 { 1274 au4_ilf_across_tile_slice_enable[i] = 1; 1275 } 1276 } 1277 1278 /*Reset indices*/ 1279 for(i = 0; i < 8; i++) 1280 { 1281 au4_idx_tl[i] = 0; 1282 } 1283 } 1284 if(ps_pps->i1_tiles_enabled_flag) 1285 { 1286 /* Calculate availability flags at slice boundary */ 1287 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 1288 { 1289 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 1290 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 1291 { 1292 /*Set the boundary arrays*/ 1293 /*Calculate tile indices for neighbor pixels*/ 1294 idx_tl = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)]; 1295 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)); 1296 au4_idx_tl[0] = pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 1297 au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 1298 au4_idx_tl[3] = au4_idx_tl[6] = pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1299 au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1300 1301 if((0 == (1 << log2_ctb_size) - sao_wd_luma)) 1302 { 1303 if(ps_sao_ctxt->i4_ctb_x == 1) 1304 { 1305 au4_idx_tl[6] = -1; 1306 au4_idx_tl[4] = -1; 1307 } 1308 else 1309 { 1310 au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)]; 1311 } 1312 if(ps_sao_ctxt->i4_ctb_y == 1) 1313 { 1314 au4_idx_tl[5] = -1; 1315 au4_idx_tl[4] = -1; 1316 } 1317 else 1318 { 1319 au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)]; 1320 au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)]; 1321 } 1322 au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)]; 1323 } 1324 for(i = 0; i < 8; i++) 1325 { 1326 /*Sets the edges that lie on the tile boundary*/ 1327 if(au4_idx_tl[i] != idx_tl) 1328 { 1329 au1_tile_slice_boundary[i] |= 1; 1330 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0 1331 } 1332 } 1333 } 1334 } 1335 } 1336 1337 for(i = 0; i < 8; i++) 1338 { 1339 /*Sets the edges that lie on the slice/tile boundary*/ 1340 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 1341 { 1342 au1_avail_chroma[i] = 0; 1343 } 1344 } 1345 } 1346 } 1347 1348 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) 1349 { 1350 au1_avail_chroma[0] = 0; 1351 au1_avail_chroma[4] = 0; 1352 au1_avail_chroma[6] = 0; 1353 } 1354 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x) 1355 { 1356 au1_avail_chroma[1] = 0; 1357 au1_avail_chroma[5] = 0; 1358 au1_avail_chroma[7] = 0; 1359 } 1360 1361 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma) 1362 { 1363 au1_avail_chroma[2] = 0; 1364 au1_avail_chroma[4] = 0; 1365 au1_avail_chroma[5] = 0; 1366 } 1367 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y) 1368 { 1369 au1_avail_chroma[3] = 0; 1370 au1_avail_chroma[6] = 0; 1371 au1_avail_chroma[7] = 0; 1372 } 1373 1374 { 1375 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma]; 1376 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1]; 1377 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma]; 1378 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1]; 1379 if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1)) 1380 { 1381 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2]; 1382 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1]; 1383 } 1384 1385 if(chroma_yuv420sp_vu) 1386 { 1387 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 1388 src_strd, 1389 pu1_src_left_chroma, 1390 pu1_src_top_chroma, 1391 pu1_sao_src_chroma_top_left_ctb, 1392 au1_src_top_right, 1393 au1_sao_src_top_left_chroma_bot_left, 1394 au1_avail_chroma, 1395 ai1_offset_cr, 1396 ai1_offset_cb, 1397 sao_wd_chroma, 1398 sao_ht_chroma); 1399 } 1400 else 1401 { 1402 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 1403 src_strd, 1404 pu1_src_left_chroma, 1405 pu1_src_top_chroma, 1406 pu1_sao_src_chroma_top_left_ctb, 1407 au1_src_top_right, 1408 au1_sao_src_top_left_chroma_bot_left, 1409 au1_avail_chroma, 1410 ai1_offset_cb, 1411 ai1_offset_cr, 1412 sao_wd_chroma, 1413 sao_ht_chroma); 1414 } 1415 } 1416 } 1417 } 1418 1419 pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd; 1420 pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd; 1421 ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb); 1422 } 1423 1424 1425 /* Top CTB */ 1426 if((ps_sao_ctxt->i4_ctb_y > 0)) 1427 { 1428 WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB; 1429 WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB; 1430 WORD32 sao_ht_luma = SAO_SHIFT_CTB; 1431 WORD32 sao_ht_chroma = SAO_SHIFT_CTB; 1432 1433 WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0; 1434 WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0; 1435 WORD32 au4_idx_t[8], idx_t; 1436 1437 WORD32 remaining_cols; 1438 1439 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma); 1440 if(remaining_cols <= SAO_SHIFT_CTB) 1441 { 1442 sao_wd_luma += remaining_cols; 1443 } 1444 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma); 1445 if(remaining_cols <= 2 * SAO_SHIFT_CTB) 1446 { 1447 sao_wd_chroma += remaining_cols; 1448 } 1449 1450 pu1_src_luma -= (sao_ht_luma * src_strd); 1451 pu1_src_chroma -= (sao_ht_chroma * src_strd); 1452 ps_sao -= (ps_sps->i2_pic_wd_in_ctb); 1453 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 1454 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 1455 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma; 1456 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma); 1457 1458 if(0 != sao_wd_luma) 1459 { 1460 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag) 1461 { 1462 if(0 == ps_sao->b3_y_type_idx) 1463 { 1464 /* Update left, top and top-left */ 1465 for(row = 0; row < sao_ht_luma; row++) 1466 { 1467 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 1468 } 1469 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 1470 1471 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 1472 1473 } 1474 1475 else if(1 == ps_sao->b3_y_type_idx) 1476 { 1477 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 1478 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 1479 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 1480 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 1481 1482 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma, 1483 src_strd, 1484 pu1_src_left_luma, 1485 pu1_src_top_luma, 1486 pu1_sao_src_luma_top_left_ctb, 1487 ps_sao->b5_y_band_pos, 1488 ai1_offset_y, 1489 sao_wd_luma, 1490 sao_ht_luma 1491 ); 1492 } 1493 1494 else // if(2 <= ps_sao->b3_y_type_idx) 1495 { 1496 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 1497 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 1498 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 1499 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 1500 1501 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8); 1502 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8); 1503 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32)); 1504 1505 for(i = 0; i < 8; i++) 1506 { 1507 1508 au4_ilf_across_tile_slice_enable[i] = 1; 1509 } 1510 /****************************************************************** 1511 * Derive the Top-left CTB's neighbor pixel's slice indices. 1512 * 1513 * T_T 1514 * ____________ 1515 * | | | 1516 * | T_L| T |T_R 1517 * | | ______|____ 1518 * | | T_D | | 1519 * | | | | 1520 * |____|_______| | 1521 * | | 1522 * | | 1523 * |____________| 1524 * 1525 *****************************************************************/ 1526 1527 /*In case of slices*/ 1528 { 1529 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 1530 { 1531 1532 ctbx_t_t = ps_sao_ctxt->i4_ctb_x; 1533 ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1; 1534 1535 ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1; 1536 ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1; 1537 1538 ctbx_t_r = ps_sao_ctxt->i4_ctb_x; 1539 ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1; 1540 1541 ctbx_t_d = ps_sao_ctxt->i4_ctb_x; 1542 ctby_t_d = ps_sao_ctxt->i4_ctb_y; 1543 1544 ctbx_t = ps_sao_ctxt->i4_ctb_x; 1545 ctby_t = ps_sao_ctxt->i4_ctb_y - 1; 1546 1547 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 1548 { 1549 /*Calculate neighbor ctb slice indices*/ 1550 if(0 == ps_sao_ctxt->i4_ctb_x) 1551 { 1552 au4_idx_t[0] = -1; 1553 au4_idx_t[6] = -1; 1554 au4_idx_t[4] = -1; 1555 } 1556 else 1557 { 1558 au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)]; 1559 au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1560 } 1561 idx_t = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)]; 1562 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)]; 1563 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)]; 1564 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1565 1566 /*Verify that the neighbor ctbs don't cross pic boundary.*/ 1567 if(0 == ps_sao_ctxt->i4_ctb_x) 1568 { 1569 au4_ilf_across_tile_slice_enable[4] = 0; 1570 au4_ilf_across_tile_slice_enable[6] = 0; 1571 au4_ilf_across_tile_slice_enable[0] = 0; 1572 } 1573 else 1574 { 1575 au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag; 1576 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag; 1577 } 1578 1579 1580 1581 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag; 1582 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag; 1583 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag; 1584 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag; 1585 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag; 1586 /* 1587 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 1588 * of the pixel having a greater address is checked. Accordingly, set the availability flags 1589 */ 1590 1591 for(i = 0; i < 8; i++) 1592 { 1593 /*Sets the edges that lie on the slice/tile boundary*/ 1594 if(au4_idx_t[i] != idx_t) 1595 { 1596 au1_tile_slice_boundary[i] = 1; 1597 /*Check for slice flag at such boundaries*/ 1598 } 1599 else 1600 { 1601 au4_ilf_across_tile_slice_enable[i] = 1; 1602 } 1603 } 1604 /*Reset indices*/ 1605 for(i = 0; i < 8; i++) 1606 { 1607 au4_idx_t[i] = 0; 1608 } 1609 } 1610 1611 if(ps_pps->i1_tiles_enabled_flag) 1612 { 1613 /* Calculate availability flags at slice boundary */ 1614 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 1615 { 1616 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 1617 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 1618 { 1619 /*Calculate neighbor ctb slice indices*/ 1620 if(0 == ps_sao_ctxt->i4_ctb_x) 1621 { 1622 au4_idx_t[0] = -1; 1623 au4_idx_t[6] = -1; 1624 au4_idx_t[4] = -1; 1625 } 1626 else 1627 { 1628 au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)]; 1629 au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1630 } 1631 idx_t = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)]; 1632 au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)]; 1633 au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)]; 1634 au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1635 1636 for(i = 0; i < 8; i++) 1637 { 1638 /*Sets the edges that lie on the tile boundary*/ 1639 if(au4_idx_t[i] != idx_t) 1640 { 1641 au1_tile_slice_boundary[i] |= 1; 1642 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; 1643 } 1644 } 1645 } 1646 } 1647 } 1648 1649 for(i = 0; i < 8; i++) 1650 { 1651 /*Sets the edges that lie on the slice/tile boundary*/ 1652 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 1653 { 1654 au1_avail_luma[i] = 0; 1655 } 1656 } 1657 } 1658 } 1659 1660 1661 if(0 == ps_sao_ctxt->i4_ctb_x) 1662 { 1663 au1_avail_luma[0] = 0; 1664 au1_avail_luma[4] = 0; 1665 au1_avail_luma[6] = 0; 1666 } 1667 1668 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma) 1669 { 1670 au1_avail_luma[1] = 0; 1671 au1_avail_luma[5] = 0; 1672 au1_avail_luma[7] = 0; 1673 } 1674 1675 if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma) 1676 { 1677 au1_avail_luma[2] = 0; 1678 au1_avail_luma[4] = 0; 1679 au1_avail_luma[5] = 0; 1680 } 1681 1682 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y) 1683 { 1684 au1_avail_luma[3] = 0; 1685 au1_avail_luma[6] = 0; 1686 au1_avail_luma[7] = 0; 1687 } 1688 1689 { 1690 au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0]; 1691 u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1]; 1692 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma, 1693 src_strd, 1694 pu1_src_left_luma, 1695 pu1_src_top_luma, 1696 pu1_sao_src_luma_top_left_ctb, 1697 au1_src_top_right, 1698 &u1_sao_src_top_left_luma_bot_left, 1699 au1_avail_luma, 1700 ai1_offset_y, 1701 sao_wd_luma, 1702 sao_ht_luma); 1703 } 1704 } 1705 } 1706 } 1707 1708 if(0 != sao_wd_chroma) 1709 { 1710 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) 1711 { 1712 if(0 == ps_sao->b3_cb_type_idx) 1713 { 1714 1715 for(row = 0; row < sao_ht_chroma; row++) 1716 { 1717 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 1718 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 1719 } 1720 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 1721 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 1722 1723 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 1724 1725 } 1726 1727 else if(1 == ps_sao->b3_cb_type_idx) 1728 { 1729 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 1730 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 1731 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 1732 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 1733 1734 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 1735 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 1736 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 1737 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 1738 1739 if(chroma_yuv420sp_vu) 1740 { 1741 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 1742 src_strd, 1743 pu1_src_left_chroma, 1744 pu1_src_top_chroma, 1745 pu1_sao_src_chroma_top_left_ctb, 1746 ps_sao->b5_cr_band_pos, 1747 ps_sao->b5_cb_band_pos, 1748 ai1_offset_cr, 1749 ai1_offset_cb, 1750 sao_wd_chroma, 1751 sao_ht_chroma 1752 ); 1753 } 1754 else 1755 { 1756 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 1757 src_strd, 1758 pu1_src_left_chroma, 1759 pu1_src_top_chroma, 1760 pu1_sao_src_chroma_top_left_ctb, 1761 ps_sao->b5_cb_band_pos, 1762 ps_sao->b5_cr_band_pos, 1763 ai1_offset_cb, 1764 ai1_offset_cr, 1765 sao_wd_chroma, 1766 sao_ht_chroma 1767 ); 1768 } 1769 } 1770 else // if(2 <= ps_sao->b3_cb_type_idx) 1771 { 1772 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 1773 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 1774 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 1775 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 1776 1777 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 1778 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 1779 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 1780 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 1781 1782 for(i = 0; i < 8; i++) 1783 { 1784 au1_avail_chroma[i] = 255; 1785 au1_tile_slice_boundary[i] = 0; 1786 au4_idx_t[i] = 0; 1787 au4_ilf_across_tile_slice_enable[i] = 1; 1788 } 1789 1790 { 1791 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 1792 { 1793 ctbx_t_t = ps_sao_ctxt->i4_ctb_x; 1794 ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1; 1795 1796 ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1; 1797 ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1; 1798 1799 ctbx_t_r = ps_sao_ctxt->i4_ctb_x; 1800 ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1; 1801 1802 ctbx_t_d = ps_sao_ctxt->i4_ctb_x; 1803 ctby_t_d = ps_sao_ctxt->i4_ctb_y; 1804 1805 ctbx_t = ps_sao_ctxt->i4_ctb_x; 1806 ctby_t = ps_sao_ctxt->i4_ctb_y - 1; 1807 1808 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 1809 { 1810 if(0 == ps_sao_ctxt->i4_ctb_x) 1811 { 1812 au4_idx_t[0] = -1; 1813 au4_idx_t[6] = -1; 1814 au4_idx_t[4] = -1; 1815 } 1816 else 1817 { 1818 au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)]; 1819 au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1820 } 1821 idx_t = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)]; 1822 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)]; 1823 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)]; 1824 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1825 1826 /*Verify that the neighbor ctbs don't cross pic boundary.*/ 1827 1828 if(0 == ps_sao_ctxt->i4_ctb_x) 1829 { 1830 au4_ilf_across_tile_slice_enable[4] = 0; 1831 au4_ilf_across_tile_slice_enable[6] = 0; 1832 au4_ilf_across_tile_slice_enable[0] = 0; 1833 } 1834 else 1835 { 1836 au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag; 1837 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag; 1838 } 1839 1840 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag; 1841 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag; 1842 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag; 1843 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag; 1844 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag; 1845 /* 1846 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 1847 * of the pixel having a greater address is checked. Accordingly, set the availability flags 1848 */ 1849 for(i = 0; i < 8; i++) 1850 { 1851 /*Sets the edges that lie on the slice/tile boundary*/ 1852 if(au4_idx_t[i] != idx_t) 1853 { 1854 au1_tile_slice_boundary[i] = 1; 1855 } 1856 else 1857 { 1858 /*Indicates that the neighbour belongs to same/dependent slice*/ 1859 au4_ilf_across_tile_slice_enable[i] = 1; 1860 } 1861 } 1862 /*Reset indices*/ 1863 for(i = 0; i < 8; i++) 1864 { 1865 au4_idx_t[i] = 0; 1866 } 1867 } 1868 if(ps_pps->i1_tiles_enabled_flag) 1869 { 1870 /* Calculate availability flags at slice boundary */ 1871 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 1872 { 1873 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 1874 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 1875 { 1876 /*Calculate neighbor ctb slice indices*/ 1877 if(0 == ps_sao_ctxt->i4_ctb_x) 1878 { 1879 au4_idx_t[0] = -1; 1880 au4_idx_t[6] = -1; 1881 au4_idx_t[4] = -1; 1882 } 1883 else 1884 { 1885 au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)]; 1886 au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1887 } 1888 idx_t = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)]; 1889 au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)]; 1890 au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)]; 1891 au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)]; 1892 1893 for(i = 0; i < 8; i++) 1894 { 1895 /*Sets the edges that lie on the tile boundary*/ 1896 if(au4_idx_t[i] != idx_t) 1897 { 1898 au1_tile_slice_boundary[i] |= 1; 1899 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; 1900 } 1901 } 1902 } 1903 } 1904 } 1905 for(i = 0; i < 8; i++) 1906 { 1907 /*Sets the edges that lie on the slice/tile boundary*/ 1908 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 1909 { 1910 au1_avail_chroma[i] = 0; 1911 } 1912 } 1913 1914 } 1915 } 1916 if(0 == ps_sao_ctxt->i4_ctb_x) 1917 { 1918 au1_avail_chroma[0] = 0; 1919 au1_avail_chroma[4] = 0; 1920 au1_avail_chroma[6] = 0; 1921 } 1922 1923 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma) 1924 { 1925 au1_avail_chroma[1] = 0; 1926 au1_avail_chroma[5] = 0; 1927 au1_avail_chroma[7] = 0; 1928 } 1929 1930 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma) 1931 { 1932 au1_avail_chroma[2] = 0; 1933 au1_avail_chroma[4] = 0; 1934 au1_avail_chroma[5] = 0; 1935 } 1936 1937 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y) 1938 { 1939 au1_avail_chroma[3] = 0; 1940 au1_avail_chroma[6] = 0; 1941 au1_avail_chroma[7] = 0; 1942 } 1943 1944 { 1945 au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0]; 1946 au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1]; 1947 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2]; 1948 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1]; 1949 1950 if(chroma_yuv420sp_vu) 1951 { 1952 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 1953 src_strd, 1954 pu1_src_left_chroma, 1955 pu1_src_top_chroma, 1956 pu1_sao_src_chroma_top_left_ctb, 1957 au1_src_top_right, 1958 au1_sao_src_top_left_chroma_bot_left, 1959 au1_avail_chroma, 1960 ai1_offset_cr, 1961 ai1_offset_cb, 1962 sao_wd_chroma, 1963 sao_ht_chroma); 1964 } 1965 else 1966 { 1967 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 1968 src_strd, 1969 pu1_src_left_chroma, 1970 pu1_src_top_chroma, 1971 pu1_sao_src_chroma_top_left_ctb, 1972 au1_src_top_right, 1973 au1_sao_src_top_left_chroma_bot_left, 1974 au1_avail_chroma, 1975 ai1_offset_cb, 1976 ai1_offset_cr, 1977 sao_wd_chroma, 1978 sao_ht_chroma); 1979 } 1980 } 1981 1982 } 1983 } 1984 } 1985 1986 pu1_src_luma += sao_ht_luma * src_strd; 1987 pu1_src_chroma += sao_ht_chroma * src_strd; 1988 ps_sao += (ps_sps->i2_pic_wd_in_ctb); 1989 } 1990 1991 /* Left CTB */ 1992 if(ps_sao_ctxt->i4_ctb_x > 0) 1993 { 1994 WORD32 sao_wd_luma = SAO_SHIFT_CTB; 1995 WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB; 1996 WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB; 1997 WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB; 1998 1999 WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0; 2000 WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0; 2001 WORD32 au4_idx_l[8], idx_l; 2002 2003 WORD32 remaining_rows; 2004 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma); 2005 if(remaining_rows <= SAO_SHIFT_CTB) 2006 { 2007 sao_ht_luma += remaining_rows; 2008 } 2009 remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma); 2010 if(remaining_rows <= SAO_SHIFT_CTB) 2011 { 2012 sao_ht_chroma += remaining_rows; 2013 } 2014 2015 pu1_src_luma -= sao_wd_luma; 2016 pu1_src_chroma -= sao_wd_chroma; 2017 ps_sao -= 1; 2018 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma; 2019 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma; 2020 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 2021 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 2022 2023 2024 if(0 != sao_ht_luma) 2025 { 2026 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag) 2027 { 2028 if(0 == ps_sao->b3_y_type_idx) 2029 { 2030 /* Update left, top and top-left */ 2031 for(row = 0; row < sao_ht_luma; row++) 2032 { 2033 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 2034 } 2035 /*Update in next location*/ 2036 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 2037 2038 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 2039 2040 } 2041 2042 else if(1 == ps_sao->b3_y_type_idx) 2043 { 2044 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 2045 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 2046 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 2047 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 2048 2049 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma, 2050 src_strd, 2051 pu1_src_left_luma, 2052 pu1_src_top_luma, 2053 pu1_sao_src_top_left_luma_curr_ctb, 2054 ps_sao->b5_y_band_pos, 2055 ai1_offset_y, 2056 sao_wd_luma, 2057 sao_ht_luma 2058 ); 2059 } 2060 2061 else // if(2 <= ps_sao->b3_y_type_idx) 2062 { 2063 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 2064 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 2065 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 2066 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 2067 2068 for(i = 0; i < 8; i++) 2069 { 2070 au1_avail_luma[i] = 255; 2071 au1_tile_slice_boundary[i] = 0; 2072 au4_idx_l[i] = 0; 2073 au4_ilf_across_tile_slice_enable[i] = 1; 2074 } 2075 /****************************************************************** 2076 * Derive the Top-left CTB's neighbour pixel's slice indices. 2077 * 2078 * 2079 * ____________ 2080 * | | | 2081 * | L_T| | 2082 * |____|_______|____ 2083 * | | | | 2084 * L_L | L | L_R | | 2085 * |____|_______| | 2086 * | | 2087 * L_D | | 2088 * |____________| 2089 * 2090 *****************************************************************/ 2091 2092 /*In case of slices or tiles*/ 2093 { 2094 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 2095 { 2096 ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1; 2097 ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1; 2098 2099 ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1; 2100 ctby_l_l = ps_sao_ctxt->i4_ctb_y; 2101 2102 ctbx_l_r = ps_sao_ctxt->i4_ctb_x; 2103 ctby_l_r = ps_sao_ctxt->i4_ctb_y; 2104 2105 ctbx_l_d = ps_sao_ctxt->i4_ctb_x - 1; 2106 ctby_l_d = ps_sao_ctxt->i4_ctb_y; 2107 2108 ctbx_l = ps_sao_ctxt->i4_ctb_x - 1; 2109 ctby_l = ps_sao_ctxt->i4_ctb_y; 2110 2111 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 2112 { 2113 if(0 == ps_sao_ctxt->i4_ctb_y) 2114 { 2115 au4_idx_l[2] = -1; 2116 au4_idx_l[4] = -1; 2117 au4_idx_l[5] = -1; 2118 } 2119 else 2120 { 2121 au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2122 au4_idx_l[5] = pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2123 } 2124 idx_l = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)]; 2125 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)]; 2126 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)]; 2127 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)]; 2128 2129 /*Verify that the neighbor ctbs don't cross pic boundary.*/ 2130 if(0 == ps_sao_ctxt->i4_ctb_y) 2131 { 2132 au4_ilf_across_tile_slice_enable[2] = 0; 2133 au4_ilf_across_tile_slice_enable[4] = 0; 2134 au4_ilf_across_tile_slice_enable[5] = 0; 2135 } 2136 else 2137 { 2138 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag; 2139 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2]; 2140 2141 } 2142 //TODO: ILF flag checks for [0] and [6] is missing. 2143 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag; 2144 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag; 2145 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag; 2146 /* 2147 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 2148 * of the pixel having a greater address is checked. Accordingly, set the availability flags 2149 */ 2150 for(i = 0; i < 8; i++) 2151 { 2152 /*Sets the edges that lie on the slice/tile boundary*/ 2153 if(au4_idx_l[i] != idx_l) 2154 { 2155 au1_tile_slice_boundary[i] = 1; 2156 } 2157 else 2158 { 2159 au4_ilf_across_tile_slice_enable[i] = 1; 2160 } 2161 } 2162 /*Reset indices*/ 2163 for(i = 0; i < 8; i++) 2164 { 2165 au4_idx_l[i] = 0; 2166 } 2167 } 2168 2169 if(ps_pps->i1_tiles_enabled_flag) 2170 { 2171 /* Calculate availability flags at slice boundary */ 2172 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 2173 { 2174 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 2175 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 2176 { 2177 if(0 == ps_sao_ctxt->i4_ctb_y) 2178 { 2179 au4_idx_l[2] = -1; 2180 au4_idx_l[4] = -1; 2181 au4_idx_l[5] = -1; 2182 } 2183 else 2184 { 2185 au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2186 au4_idx_l[5] = pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2187 } 2188 2189 idx_l = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)]; 2190 au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)]; 2191 au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)]; 2192 au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)]; 2193 2194 for(i = 0; i < 8; i++) 2195 { 2196 /*Sets the edges that lie on the slice/tile boundary*/ 2197 if(au4_idx_l[i] != idx_l) 2198 { 2199 au1_tile_slice_boundary[i] |= 1; 2200 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; 2201 } 2202 } 2203 } 2204 } 2205 } 2206 2207 for(i = 0; i < 8; i++) 2208 { 2209 /*Sets the edges that lie on the slice/tile boundary*/ 2210 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 2211 { 2212 au1_avail_luma[i] = 0; 2213 } 2214 } 2215 } 2216 } 2217 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) 2218 { 2219 au1_avail_luma[0] = 0; 2220 au1_avail_luma[4] = 0; 2221 au1_avail_luma[6] = 0; 2222 } 2223 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x) 2224 { 2225 au1_avail_luma[1] = 0; 2226 au1_avail_luma[5] = 0; 2227 au1_avail_luma[7] = 0; 2228 } 2229 2230 if(0 == ps_sao_ctxt->i4_ctb_y) 2231 { 2232 au1_avail_luma[2] = 0; 2233 au1_avail_luma[4] = 0; 2234 au1_avail_luma[5] = 0; 2235 } 2236 2237 if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) <= sao_ht_luma) 2238 { 2239 au1_avail_luma[3] = 0; 2240 au1_avail_luma[6] = 0; 2241 au1_avail_luma[7] = 0; 2242 } 2243 2244 { 2245 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma]; 2246 u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0]; 2247 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma, 2248 src_strd, 2249 pu1_src_left_luma, 2250 pu1_src_top_luma, 2251 pu1_sao_src_top_left_luma_curr_ctb, 2252 au1_src_top_right, 2253 &u1_sao_src_top_left_luma_bot_left, 2254 au1_avail_luma, 2255 ai1_offset_y, 2256 sao_wd_luma, 2257 sao_ht_luma); 2258 } 2259 2260 } 2261 } 2262 } 2263 2264 if(0 != sao_ht_chroma) 2265 { 2266 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) 2267 { 2268 if(0 == ps_sao->b3_cb_type_idx) 2269 { 2270 for(row = 0; row < sao_ht_chroma; row++) 2271 { 2272 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 2273 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 2274 } 2275 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 2276 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 2277 2278 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 2279 } 2280 2281 else if(1 == ps_sao->b3_cb_type_idx) 2282 { 2283 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 2284 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 2285 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 2286 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 2287 2288 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 2289 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 2290 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 2291 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 2292 2293 if(chroma_yuv420sp_vu) 2294 { 2295 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 2296 src_strd, 2297 pu1_src_left_chroma, 2298 pu1_src_top_chroma, 2299 pu1_sao_src_top_left_chroma_curr_ctb, 2300 ps_sao->b5_cr_band_pos, 2301 ps_sao->b5_cb_band_pos, 2302 ai1_offset_cr, 2303 ai1_offset_cb, 2304 sao_wd_chroma, 2305 sao_ht_chroma 2306 ); 2307 } 2308 else 2309 { 2310 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 2311 src_strd, 2312 pu1_src_left_chroma, 2313 pu1_src_top_chroma, 2314 pu1_sao_src_top_left_chroma_curr_ctb, 2315 ps_sao->b5_cb_band_pos, 2316 ps_sao->b5_cr_band_pos, 2317 ai1_offset_cb, 2318 ai1_offset_cr, 2319 sao_wd_chroma, 2320 sao_ht_chroma 2321 ); 2322 } 2323 } 2324 2325 else // if(2 <= ps_sao->b3_cb_type_idx) 2326 { 2327 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 2328 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 2329 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 2330 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 2331 2332 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 2333 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 2334 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 2335 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 2336 2337 for(i = 0; i < 8; i++) 2338 { 2339 au1_avail_chroma[i] = 255; 2340 au1_tile_slice_boundary[i] = 0; 2341 au4_idx_l[i] = 0; 2342 au4_ilf_across_tile_slice_enable[i] = 1; 2343 } 2344 /*In case of slices*/ 2345 { 2346 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 2347 { 2348 ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1; 2349 ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1; 2350 2351 ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1; 2352 ctby_l_l = ps_sao_ctxt->i4_ctb_y; 2353 2354 ctbx_l_r = ps_sao_ctxt->i4_ctb_x; 2355 ctby_l_r = ps_sao_ctxt->i4_ctb_y; 2356 2357 ctbx_l_d = ps_sao_ctxt->i4_ctb_x - 1; 2358 ctby_l_d = ps_sao_ctxt->i4_ctb_y; 2359 2360 ctbx_l = ps_sao_ctxt->i4_ctb_x - 1; 2361 ctby_l = ps_sao_ctxt->i4_ctb_y; 2362 2363 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 2364 { 2365 if(0 == ps_sao_ctxt->i4_ctb_y) 2366 { 2367 au4_idx_l[2] = -1; 2368 au4_idx_l[4] = -1; 2369 au4_idx_l[5] = -1; 2370 } 2371 else 2372 { 2373 au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2374 au4_idx_l[5] = pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2375 } 2376 idx_l = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)]; 2377 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)]; 2378 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)]; 2379 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)]; 2380 2381 /*Verify that the neighbour ctbs dont cross pic boundary.*/ 2382 if(0 == ps_sao_ctxt->i4_ctb_y) 2383 { 2384 au4_ilf_across_tile_slice_enable[2] = 0; 2385 au4_ilf_across_tile_slice_enable[4] = 0; 2386 au4_ilf_across_tile_slice_enable[5] = 0; 2387 } 2388 else 2389 { 2390 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag; 2391 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2]; 2392 } 2393 // au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag; 2394 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag; 2395 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag; 2396 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag; 2397 /* 2398 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 2399 * of the pixel having a greater address is checked. Accordingly, set the availability flags 2400 */ 2401 for(i = 0; i < 8; i++) 2402 { 2403 /*Sets the edges that lie on the slice/tile boundary*/ 2404 if(au4_idx_l[i] != idx_l) 2405 { 2406 au1_tile_slice_boundary[i] = 1; 2407 } 2408 else 2409 { 2410 au4_ilf_across_tile_slice_enable[i] = 1; 2411 } 2412 } 2413 /*Reset indices*/ 2414 for(i = 0; i < 8; i++) 2415 { 2416 au4_idx_l[i] = 0; 2417 } 2418 } 2419 if(ps_pps->i1_tiles_enabled_flag) 2420 { 2421 /* Calculate availability flags at slice boundary */ 2422 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 2423 { 2424 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 2425 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 2426 { 2427 if(0 == ps_sao_ctxt->i4_ctb_y) 2428 { 2429 au4_idx_l[2] = -1; 2430 au4_idx_l[4] = -1; 2431 au4_idx_l[5] = -1; 2432 } 2433 else 2434 { 2435 au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2436 au4_idx_l[5] = pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)]; 2437 } 2438 2439 idx_l = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)]; 2440 au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)]; 2441 au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)]; 2442 au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)]; 2443 2444 for(i = 0; i < 8; i++) 2445 { 2446 /*Sets the edges that lie on the slice/tile boundary*/ 2447 if(au4_idx_l[i] != idx_l) 2448 { 2449 au1_tile_slice_boundary[i] |= 1; 2450 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0 2451 } 2452 } 2453 } 2454 } 2455 } 2456 for(i = 0; i < 8; i++) 2457 { 2458 /*Sets the edges that lie on the slice/tile boundary*/ 2459 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 2460 { 2461 au1_avail_chroma[i] = 0; 2462 } 2463 } 2464 } 2465 } 2466 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) 2467 { 2468 au1_avail_chroma[0] = 0; 2469 au1_avail_chroma[4] = 0; 2470 au1_avail_chroma[6] = 0; 2471 } 2472 2473 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x) 2474 { 2475 au1_avail_chroma[1] = 0; 2476 au1_avail_chroma[5] = 0; 2477 au1_avail_chroma[7] = 0; 2478 } 2479 2480 if(0 == ps_sao_ctxt->i4_ctb_y) 2481 { 2482 au1_avail_chroma[2] = 0; 2483 au1_avail_chroma[4] = 0; 2484 au1_avail_chroma[5] = 0; 2485 } 2486 2487 if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) <= sao_ht_chroma) 2488 { 2489 au1_avail_chroma[3] = 0; 2490 au1_avail_chroma[6] = 0; 2491 au1_avail_chroma[7] = 0; 2492 } 2493 2494 { 2495 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma]; 2496 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1]; 2497 au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0]; 2498 au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1]; 2499 //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2]; 2500 //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1]; 2501 if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1)) 2502 { 2503 au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd]; 2504 au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1]; 2505 } 2506 2507 2508 if(chroma_yuv420sp_vu) 2509 { 2510 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 2511 src_strd, 2512 pu1_src_left_chroma, 2513 pu1_src_top_chroma, 2514 pu1_sao_src_top_left_chroma_curr_ctb, 2515 au1_src_top_right, 2516 au1_src_bot_left, 2517 au1_avail_chroma, 2518 ai1_offset_cr, 2519 ai1_offset_cb, 2520 sao_wd_chroma, 2521 sao_ht_chroma); 2522 } 2523 else 2524 { 2525 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 2526 src_strd, 2527 pu1_src_left_chroma, 2528 pu1_src_top_chroma, 2529 pu1_sao_src_top_left_chroma_curr_ctb, 2530 au1_src_top_right, 2531 au1_src_bot_left, 2532 au1_avail_chroma, 2533 ai1_offset_cb, 2534 ai1_offset_cr, 2535 sao_wd_chroma, 2536 sao_ht_chroma); 2537 } 2538 } 2539 2540 } 2541 } 2542 2543 } 2544 pu1_src_luma += sao_wd_luma; 2545 pu1_src_chroma += sao_wd_chroma; 2546 ps_sao += 1; 2547 } 2548 2549 2550 /* Current CTB */ 2551 { 2552 WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB; 2553 WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2; 2554 WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB; 2555 WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB; 2556 WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0; 2557 WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0; 2558 WORD32 au4_idx_c[8], idx_c; 2559 2560 WORD32 remaining_rows; 2561 WORD32 remaining_cols; 2562 2563 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma); 2564 if(remaining_cols <= SAO_SHIFT_CTB) 2565 { 2566 sao_wd_luma += remaining_cols; 2567 } 2568 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma); 2569 if(remaining_cols <= 2 * SAO_SHIFT_CTB) 2570 { 2571 sao_wd_chroma += remaining_cols; 2572 } 2573 2574 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma); 2575 if(remaining_rows <= SAO_SHIFT_CTB) 2576 { 2577 sao_ht_luma += remaining_rows; 2578 } 2579 remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma); 2580 if(remaining_rows <= SAO_SHIFT_CTB) 2581 { 2582 sao_ht_chroma += remaining_rows; 2583 } 2584 2585 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 2586 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size); 2587 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 2588 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size); 2589 2590 if((0 != sao_wd_luma) && (0 != sao_ht_luma)) 2591 { 2592 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag) 2593 { 2594 if(0 == ps_sao->b3_y_type_idx) 2595 { 2596 /* Update left, top and top-left */ 2597 for(row = 0; row < sao_ht_luma; row++) 2598 { 2599 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)]; 2600 } 2601 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1]; 2602 2603 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); 2604 2605 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma]; 2606 2607 } 2608 2609 else if(1 == ps_sao->b3_y_type_idx) 2610 { 2611 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 2612 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 2613 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 2614 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 2615 2616 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma, 2617 src_strd, 2618 pu1_src_left_luma, 2619 pu1_src_top_luma, 2620 pu1_sao_src_top_left_luma_curr_ctb, 2621 ps_sao->b5_y_band_pos, 2622 ai1_offset_y, 2623 sao_wd_luma, 2624 sao_ht_luma 2625 ); 2626 } 2627 2628 else // if(2 <= ps_sao->b3_y_type_idx) 2629 { 2630 ai1_offset_y[1] = ps_sao->b4_y_offset_1; 2631 ai1_offset_y[2] = ps_sao->b4_y_offset_2; 2632 ai1_offset_y[3] = ps_sao->b4_y_offset_3; 2633 ai1_offset_y[4] = ps_sao->b4_y_offset_4; 2634 2635 for(i = 0; i < 8; i++) 2636 { 2637 au1_avail_luma[i] = 255; 2638 au1_tile_slice_boundary[i] = 0; 2639 au4_idx_c[i] = 0; 2640 au4_ilf_across_tile_slice_enable[i] = 1; 2641 } 2642 /****************************************************************** 2643 * Derive the Top-left CTB's neighbour pixel's slice indices. 2644 * 2645 * 2646 * ____________ 2647 * | | | 2648 * | | C_T | 2649 * |____|_______|____ 2650 * | | | | 2651 * | C_L| C | C_R| 2652 * |____|_______| | 2653 * | C_D | 2654 * | | 2655 * |____________| 2656 * 2657 *****************************************************************/ 2658 2659 /*In case of slices*/ 2660 { 2661 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 2662 { 2663 ctbx_c_t = ps_sao_ctxt->i4_ctb_x; 2664 ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1; 2665 2666 ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1; 2667 ctby_c_l = ps_sao_ctxt->i4_ctb_y; 2668 2669 ctbx_c_r = ps_sao_ctxt->i4_ctb_x; 2670 ctby_c_r = ps_sao_ctxt->i4_ctb_y; 2671 2672 ctbx_c_d = ps_sao_ctxt->i4_ctb_x; 2673 ctby_c_d = ps_sao_ctxt->i4_ctb_y; 2674 2675 ctbx_c = ps_sao_ctxt->i4_ctb_x; 2676 ctby_c = ps_sao_ctxt->i4_ctb_y; 2677 2678 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 2679 { 2680 if(0 == ps_sao_ctxt->i4_ctb_x) 2681 { 2682 au4_idx_c[6] = -1; 2683 au4_idx_c[0] = -1; 2684 au4_idx_c[4] = -1; 2685 } 2686 else 2687 { 2688 au4_idx_c[0] = au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)]; 2689 } 2690 2691 if(0 == ps_sao_ctxt->i4_ctb_y) 2692 { 2693 au4_idx_c[2] = -1; 2694 au4_idx_c[5] = -1; 2695 au4_idx_c[4] = -1; 2696 } 2697 else 2698 { 2699 au4_idx_c[4] = pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 2700 au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 2701 } 2702 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)]; 2703 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)]; 2704 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)]; 2705 2706 if(0 == ps_sao_ctxt->i4_ctb_x) 2707 { 2708 au4_ilf_across_tile_slice_enable[6] = 0; 2709 au4_ilf_across_tile_slice_enable[0] = 0; 2710 au4_ilf_across_tile_slice_enable[4] = 0; 2711 } 2712 else 2713 { 2714 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag; 2715 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;; 2716 } 2717 if(0 == ps_sao_ctxt->i4_ctb_y) 2718 { 2719 au4_ilf_across_tile_slice_enable[2] = 0; 2720 au4_ilf_across_tile_slice_enable[4] = 0; 2721 au4_ilf_across_tile_slice_enable[5] = 0; 2722 } 2723 else 2724 { 2725 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag; 2726 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2]; 2727 } 2728 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag; 2729 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag; 2730 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag; 2731 2732 /* 2733 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 2734 * of the pixel having a greater address is checked. Accordingly, set the availability flags 2735 */ 2736 for(i = 0; i < 8; i++) 2737 { 2738 /*Sets the edges that lie on the slice/tile boundary*/ 2739 if(au4_idx_c[i] != idx_c) 2740 { 2741 au1_tile_slice_boundary[i] = 1; 2742 } 2743 else 2744 { 2745 au4_ilf_across_tile_slice_enable[i] = 1; 2746 } 2747 } 2748 /*Reset indices*/ 2749 for(i = 0; i < 8; i++) 2750 { 2751 au4_idx_c[i] = 0; 2752 } 2753 } 2754 2755 if(ps_pps->i1_tiles_enabled_flag) 2756 { 2757 /* Calculate availability flags at slice boundary */ 2758 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 2759 { 2760 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 2761 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 2762 { 2763 if(0 == ps_sao_ctxt->i4_ctb_x) 2764 { 2765 au4_idx_c[6] = -1; 2766 au4_idx_c[0] = -1; 2767 au4_idx_c[4] = -1; 2768 } 2769 else 2770 { 2771 au4_idx_c[0] = au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)]; 2772 } 2773 2774 if(0 == ps_sao_ctxt->i4_ctb_y) 2775 { 2776 au4_idx_c[2] = -1; 2777 au4_idx_c[5] = -1; 2778 au4_idx_c[4] = -1; 2779 } 2780 else 2781 { 2782 au4_idx_c[4] = pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 2783 au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 2784 } 2785 idx_c = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)]; 2786 au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)]; 2787 au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)]; 2788 2789 for(i = 0; i < 8; i++) 2790 { 2791 /*Sets the edges that lie on the slice/tile boundary*/ 2792 if(au4_idx_c[i] != idx_c) 2793 { 2794 au1_tile_slice_boundary[i] |= 1; 2795 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0 2796 } 2797 } 2798 } 2799 } 2800 } 2801 2802 for(i = 0; i < 8; i++) 2803 { 2804 /*Sets the edges that lie on the slice/tile boundary*/ 2805 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 2806 { 2807 au1_avail_luma[i] = 0; 2808 } 2809 } 2810 2811 } 2812 } 2813 if(0 == ps_sao_ctxt->i4_ctb_x) 2814 { 2815 au1_avail_luma[0] = 0; 2816 au1_avail_luma[4] = 0; 2817 au1_avail_luma[6] = 0; 2818 } 2819 2820 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma) 2821 { 2822 au1_avail_luma[1] = 0; 2823 au1_avail_luma[5] = 0; 2824 au1_avail_luma[7] = 0; 2825 } 2826 2827 if(0 == ps_sao_ctxt->i4_ctb_y) 2828 { 2829 au1_avail_luma[2] = 0; 2830 au1_avail_luma[4] = 0; 2831 au1_avail_luma[5] = 0; 2832 } 2833 2834 if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) <= sao_ht_luma) 2835 { 2836 au1_avail_luma[3] = 0; 2837 au1_avail_luma[6] = 0; 2838 au1_avail_luma[7] = 0; 2839 } 2840 2841 { 2842 au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd]; 2843 u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1]; 2844 2845 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma, 2846 src_strd, 2847 pu1_src_left_luma, 2848 pu1_src_top_luma, 2849 pu1_sao_src_top_left_luma_curr_ctb, 2850 au1_src_top_right, 2851 &u1_sao_src_top_left_luma_bot_left, 2852 au1_avail_luma, 2853 ai1_offset_y, 2854 sao_wd_luma, 2855 sao_ht_luma); 2856 } 2857 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma]; 2858 pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1]; 2859 } 2860 } 2861 } 2862 2863 if((0 != sao_wd_chroma) && (0 != sao_ht_chroma)) 2864 { 2865 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) 2866 { 2867 if(0 == ps_sao->b3_cb_type_idx) 2868 { 2869 for(row = 0; row < sao_ht_chroma; row++) 2870 { 2871 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)]; 2872 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)]; 2873 } 2874 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2]; 2875 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1]; 2876 2877 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma); 2878 2879 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma]; 2880 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1]; 2881 } 2882 2883 else if(1 == ps_sao->b3_cb_type_idx) 2884 { 2885 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 2886 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 2887 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 2888 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 2889 2890 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 2891 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 2892 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 2893 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 2894 2895 if(chroma_yuv420sp_vu) 2896 { 2897 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 2898 src_strd, 2899 pu1_src_left_chroma, 2900 pu1_src_top_chroma, 2901 pu1_sao_src_top_left_chroma_curr_ctb, 2902 ps_sao->b5_cr_band_pos, 2903 ps_sao->b5_cb_band_pos, 2904 ai1_offset_cr, 2905 ai1_offset_cb, 2906 sao_wd_chroma, 2907 sao_ht_chroma 2908 ); 2909 } 2910 else 2911 { 2912 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma, 2913 src_strd, 2914 pu1_src_left_chroma, 2915 pu1_src_top_chroma, 2916 pu1_sao_src_top_left_chroma_curr_ctb, 2917 ps_sao->b5_cb_band_pos, 2918 ps_sao->b5_cr_band_pos, 2919 ai1_offset_cb, 2920 ai1_offset_cr, 2921 sao_wd_chroma, 2922 sao_ht_chroma 2923 ); 2924 } 2925 } 2926 2927 else // if(2 <= ps_sao->b3_cb_type_idx) 2928 { 2929 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1; 2930 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2; 2931 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3; 2932 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4; 2933 2934 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1; 2935 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2; 2936 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3; 2937 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4; 2938 2939 for(i = 0; i < 8; i++) 2940 { 2941 au1_avail_chroma[i] = 255; 2942 au1_tile_slice_boundary[i] = 0; 2943 au4_idx_c[i] = 0; 2944 au4_ilf_across_tile_slice_enable[i] = 1; 2945 } 2946 { 2947 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) 2948 { 2949 ctbx_c_t = ps_sao_ctxt->i4_ctb_x; 2950 ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1; 2951 2952 ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1; 2953 ctby_c_l = ps_sao_ctxt->i4_ctb_y; 2954 2955 ctbx_c_r = ps_sao_ctxt->i4_ctb_x; 2956 ctby_c_r = ps_sao_ctxt->i4_ctb_y; 2957 2958 ctbx_c_d = ps_sao_ctxt->i4_ctb_x; 2959 ctby_c_d = ps_sao_ctxt->i4_ctb_y; 2960 2961 ctbx_c = ps_sao_ctxt->i4_ctb_x; 2962 ctby_c = ps_sao_ctxt->i4_ctb_y; 2963 2964 if(!ps_slice_hdr->i1_first_slice_in_pic_flag) 2965 { 2966 if(0 == ps_sao_ctxt->i4_ctb_x) 2967 { 2968 au4_idx_c[0] = -1; 2969 au4_idx_c[4] = -1; 2970 au4_idx_c[6] = -1; 2971 } 2972 else 2973 { 2974 au4_idx_c[0] = au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)]; 2975 } 2976 2977 if(0 == ps_sao_ctxt->i4_ctb_y) 2978 { 2979 au4_idx_c[2] = -1; 2980 au4_idx_c[4] = -1; 2981 au4_idx_c[5] = -1; 2982 } 2983 else 2984 { 2985 au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 2986 au4_idx_c[4] = pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 2987 } 2988 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)]; 2989 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)]; 2990 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)]; 2991 2992 if(0 == ps_sao_ctxt->i4_ctb_x) 2993 { 2994 au4_ilf_across_tile_slice_enable[0] = 0; 2995 au4_ilf_across_tile_slice_enable[4] = 0; 2996 au4_ilf_across_tile_slice_enable[6] = 0; 2997 } 2998 else 2999 { 3000 au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag; 3001 au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag; 3002 } 3003 3004 if(0 == ps_sao_ctxt->i4_ctb_y) 3005 { 3006 au4_ilf_across_tile_slice_enable[2] = 0; 3007 au4_ilf_across_tile_slice_enable[4] = 0; 3008 au4_ilf_across_tile_slice_enable[5] = 0; 3009 } 3010 else 3011 { 3012 au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag; 3013 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2]; 3014 } 3015 3016 au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag; 3017 au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag; 3018 au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag; 3019 3020 /* 3021 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag 3022 * of the pixel having a greater address is checked. Accordingly, set the availability flags 3023 */ 3024 for(i = 0; i < 8; i++) 3025 { 3026 /*Sets the edges that lie on the slice/tile boundary*/ 3027 if(au4_idx_c[i] != idx_c) 3028 { 3029 au1_tile_slice_boundary[i] = 1; 3030 } 3031 else 3032 { 3033 au4_ilf_across_tile_slice_enable[i] = 1; 3034 } 3035 } 3036 /*Reset indices*/ 3037 for(i = 0; i < 8; i++) 3038 { 3039 au4_idx_c[i] = 0; 3040 } 3041 } 3042 3043 if(ps_pps->i1_tiles_enabled_flag) 3044 { 3045 /* Calculate availability flags at slice boundary */ 3046 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y)))) 3047 { 3048 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */ 3049 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag) 3050 { 3051 if(0 == ps_sao_ctxt->i4_ctb_x) 3052 { 3053 au4_idx_c[6] = -1; 3054 au4_idx_c[0] = -1; 3055 au4_idx_c[4] = -1; 3056 } 3057 else 3058 { 3059 au4_idx_c[0] = au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)]; 3060 } 3061 3062 if(0 == ps_sao_ctxt->i4_ctb_y) 3063 { 3064 au4_idx_c[2] = -1; 3065 au4_idx_c[5] = -1; 3066 au4_idx_c[4] = -1; 3067 } 3068 else 3069 { 3070 au4_idx_c[4] = pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 3071 au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)]; 3072 } 3073 idx_c = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)]; 3074 au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)]; 3075 au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)]; 3076 3077 for(i = 0; i < 8; i++) 3078 { 3079 /*Sets the edges that lie on the slice/tile boundary*/ 3080 if(au4_idx_c[i] != idx_c) 3081 { 3082 au1_tile_slice_boundary[i] |= 1; 3083 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0 3084 } 3085 } 3086 } 3087 } 3088 } 3089 3090 for(i = 0; i < 8; i++) 3091 { 3092 /*Sets the edges that lie on the slice/tile boundary*/ 3093 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i])) 3094 { 3095 au1_avail_chroma[i] = 0; 3096 } 3097 } 3098 } 3099 } 3100 3101 if(0 == ps_sao_ctxt->i4_ctb_x) 3102 { 3103 au1_avail_chroma[0] = 0; 3104 au1_avail_chroma[4] = 0; 3105 au1_avail_chroma[6] = 0; 3106 } 3107 3108 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma) 3109 { 3110 au1_avail_chroma[1] = 0; 3111 au1_avail_chroma[5] = 0; 3112 au1_avail_chroma[7] = 0; 3113 } 3114 3115 if(0 == ps_sao_ctxt->i4_ctb_y) 3116 { 3117 au1_avail_chroma[2] = 0; 3118 au1_avail_chroma[4] = 0; 3119 au1_avail_chroma[5] = 0; 3120 } 3121 3122 if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) <= sao_ht_chroma) 3123 { 3124 au1_avail_chroma[3] = 0; 3125 au1_avail_chroma[6] = 0; 3126 au1_avail_chroma[7] = 0; 3127 } 3128 3129 { 3130 au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd]; 3131 au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1]; 3132 3133 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2]; 3134 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1]; 3135 3136 if(chroma_yuv420sp_vu) 3137 { 3138 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 3139 src_strd, 3140 pu1_src_left_chroma, 3141 pu1_src_top_chroma, 3142 pu1_sao_src_top_left_chroma_curr_ctb, 3143 au1_src_top_right, 3144 au1_sao_src_top_left_chroma_bot_left, 3145 au1_avail_chroma, 3146 ai1_offset_cr, 3147 ai1_offset_cb, 3148 sao_wd_chroma, 3149 sao_ht_chroma); 3150 } 3151 else 3152 { 3153 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma, 3154 src_strd, 3155 pu1_src_left_chroma, 3156 pu1_src_top_chroma, 3157 pu1_sao_src_top_left_chroma_curr_ctb, 3158 au1_src_top_right, 3159 au1_sao_src_top_left_chroma_bot_left, 3160 au1_avail_chroma, 3161 ai1_offset_cb, 3162 ai1_offset_cr, 3163 sao_wd_chroma, 3164 sao_ht_chroma); 3165 } 3166 } 3167 3168 } 3169 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma]; 3170 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1]; 3171 3172 pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2]; 3173 pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1]; 3174 } 3175 3176 } 3177 } 3178 3179 3180 3181 3182 /* If no loop filter is enabled copy the backed up values */ 3183 { 3184 /* Luma */ 3185 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag && no_loop_filter_enabled_luma) 3186 { 3187 UWORD32 u4_no_loop_filter_flag; 3188 WORD32 loop_filter_bit_pos; 3189 WORD32 log2_min_cu = 3; 3190 WORD32 min_cu = (1 << log2_min_cu); 3191 UWORD8 *pu1_src_tmp_luma = pu1_src_luma; 3192 WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB; 3193 WORD32 sao_blk_wd = ctb_size; 3194 WORD32 remaining_rows; 3195 WORD32 remaining_cols; 3196 3197 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB); 3198 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB); 3199 if(remaining_rows <= SAO_SHIFT_CTB) 3200 sao_blk_ht += remaining_rows; 3201 if(remaining_cols <= SAO_SHIFT_CTB) 3202 sao_blk_wd += remaining_cols; 3203 3204 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0; 3205 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0; 3206 3207 pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma; 3208 3209 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) + 3210 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3); 3211 if(ps_sao_ctxt->i4_ctb_x > 0) 3212 loop_filter_bit_pos -= 1; 3213 3214 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag + 3215 (loop_filter_bit_pos >> 3); 3216 3217 for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu; 3218 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++) 3219 { 3220 WORD32 tmp_wd = sao_blk_wd; 3221 3222 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> 3223 (loop_filter_bit_pos & 7); 3224 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1; 3225 3226 if(u4_no_loop_filter_flag) 3227 { 3228 while(tmp_wd > 0) 3229 { 3230 if(CTZ(u4_no_loop_filter_flag)) 3231 { 3232 pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3233 pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3234 tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu; 3235 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 3236 } 3237 else 3238 { 3239 for(row = 0; row < min_cu; row++) 3240 { 3241 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++) 3242 { 3243 pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col]; 3244 } 3245 } 3246 pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3247 pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3248 tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu; 3249 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 3250 } 3251 } 3252 3253 pu1_src_tmp_luma -= sao_blk_wd; 3254 pu1_src_backup_luma -= sao_blk_wd; 3255 } 3256 3257 pu1_src_tmp_luma += (src_strd << log2_min_cu); 3258 pu1_src_backup_luma += (backup_strd << log2_min_cu); 3259 } 3260 } 3261 3262 /* Chroma */ 3263 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag && no_loop_filter_enabled_chroma) 3264 { 3265 UWORD32 u4_no_loop_filter_flag; 3266 WORD32 loop_filter_bit_pos; 3267 WORD32 log2_min_cu = 3; 3268 WORD32 min_cu = (1 << log2_min_cu); 3269 UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma; 3270 WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB; 3271 WORD32 sao_blk_wd = ctb_size; 3272 WORD32 remaining_rows; 3273 WORD32 remaining_cols; 3274 3275 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB); 3276 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB); 3277 if(remaining_rows <= 2 * SAO_SHIFT_CTB) 3278 sao_blk_ht += remaining_rows; 3279 if(remaining_cols <= 2 * SAO_SHIFT_CTB) 3280 sao_blk_wd += remaining_cols; 3281 3282 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0; 3283 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0; 3284 3285 pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma; 3286 3287 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) + 3288 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3); 3289 if(ps_sao_ctxt->i4_ctb_x > 0) 3290 loop_filter_bit_pos -= 2; 3291 3292 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag + 3293 (loop_filter_bit_pos >> 3); 3294 3295 for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu; 3296 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++) 3297 { 3298 WORD32 tmp_wd = sao_blk_wd; 3299 3300 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> 3301 (loop_filter_bit_pos & 7); 3302 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1; 3303 3304 if(u4_no_loop_filter_flag) 3305 { 3306 while(tmp_wd > 0) 3307 { 3308 if(CTZ(u4_no_loop_filter_flag)) 3309 { 3310 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3311 pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3312 tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu; 3313 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag)); 3314 } 3315 else 3316 { 3317 for(row = 0; row < min_cu / 2; row++) 3318 { 3319 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++) 3320 { 3321 pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col]; 3322 } 3323 } 3324 3325 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3326 pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); 3327 tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu; 3328 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag)); 3329 } 3330 } 3331 3332 pu1_src_tmp_chroma -= sao_blk_wd; 3333 pu1_src_backup_chroma -= sao_blk_wd; 3334 } 3335 3336 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu); 3337 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu); 3338 } 3339 } 3340 } 3341 3342 } 3343 3344