1 /****************************************************************************** 2 * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 /** 21 ******************************************************************************* 22 * @file 23 * ihevce_sao.c 24 * 25 * @brief 26 * Contains definition for the ctb level sao function 27 * 28 * @author 29 * Ittiam 30 * 31 * @par List of Functions: 32 * ihevce_sao_set_avilability() 33 * ihevce_sao_ctb() 34 * ihevce_sao_analyse() 35 * 36 * @remarks 37 * None 38 * 39 ******************************************************************************* 40 */ 41 42 /*****************************************************************************/ 43 /* File Includes */ 44 /*****************************************************************************/ 45 /* System include files */ 46 #include <stdio.h> 47 #include <string.h> 48 #include <stdlib.h> 49 #include <assert.h> 50 #include <stdarg.h> 51 #include <math.h> 52 53 /* User include files */ 54 #include "ihevc_typedefs.h" 55 #include "itt_video_api.h" 56 #include "ihevce_api.h" 57 58 #include "rc_cntrl_param.h" 59 #include "rc_frame_info_collector.h" 60 #include "rc_look_ahead_params.h" 61 62 #include "ihevc_defs.h" 63 #include "ihevc_structs.h" 64 #include "ihevc_platform_macros.h" 65 #include "ihevc_deblk.h" 66 #include "ihevc_itrans_recon.h" 67 #include "ihevc_chroma_itrans_recon.h" 68 #include "ihevc_chroma_intra_pred.h" 69 #include "ihevc_intra_pred.h" 70 #include "ihevc_inter_pred.h" 71 #include "ihevc_mem_fns.h" 72 #include "ihevc_padding.h" 73 #include "ihevc_weighted_pred.h" 74 #include "ihevc_sao.h" 75 #include "ihevc_resi_trans.h" 76 #include "ihevc_quant_iquant_ssd.h" 77 #include "ihevc_cabac_tables.h" 78 79 #include "ihevce_defs.h" 80 #include "ihevce_lap_enc_structs.h" 81 #include "ihevce_multi_thrd_structs.h" 82 #include "ihevce_me_common_defs.h" 83 #include "ihevce_had_satd.h" 84 #include "ihevce_error_codes.h" 85 #include "ihevce_bitstream.h" 86 #include "ihevce_cabac.h" 87 #include "ihevce_rdoq_macros.h" 88 #include "ihevce_function_selector.h" 89 #include "ihevce_enc_structs.h" 90 #include "ihevce_entropy_structs.h" 91 #include "ihevce_cmn_utils_instr_set_router.h" 92 #include "ihevce_enc_loop_structs.h" 93 #include "ihevce_cabac_rdo.h" 94 #include "ihevce_sao.h" 95 96 /*****************************************************************************/ 97 /* Function Definitions */ 98 /*****************************************************************************/ 99 100 /** 101 ******************************************************************************* 102 * 103 * @brief 104 * ihevce_sao_set_avilability 105 * 106 * @par Description: 107 * Sets the availability flag for SAO. 108 * 109 * @param[in] 110 * ps_sao_ctxt: Pointer to SAO context 111 * @returns 112 * 113 * @remarks 114 * None 115 * 116 ******************************************************************************* 117 */ 118 void ihevce_sao_set_avilability( 119 UWORD8 *pu1_avail, sao_ctxt_t *ps_sao_ctxt, ihevce_tile_params_t *ps_tile_params) 120 { 121 WORD32 i; 122 123 WORD32 ctb_x_pos = ps_sao_ctxt->i4_ctb_x; 124 WORD32 ctb_y_pos = ps_sao_ctxt->i4_ctb_y; 125 126 for(i = 0; i < 8; i++) 127 { 128 pu1_avail[i] = 255; 129 } 130 131 /* SAO_note_01: If the CTB lies on a tile or a slice boundary and 132 in-loop filtering is enabled at tile and slice boundary, then SAO must 133 be performed at tile/slice boundaries also. 134 Hence the boundary checks should be based on frame position of CTB 135 rather than s_ctb_nbr_avail_flags.u1_left_avail flags. 136 Search for <SAO_note_01> in workspace to know more */ 137 /* Availaibility flags for first col*/ 138 if(ctb_x_pos == ps_tile_params->i4_first_ctb_x) 139 { 140 pu1_avail[0] = 0; 141 pu1_avail[4] = 0; 142 pu1_avail[6] = 0; 143 } 144 145 /* Availaibility flags for last col*/ 146 if((ctb_x_pos + 1) == 147 (ps_tile_params->i4_first_ctb_x + ps_tile_params->i4_curr_tile_wd_in_ctb_unit)) 148 { 149 pu1_avail[1] = 0; 150 pu1_avail[5] = 0; 151 pu1_avail[7] = 0; 152 } 153 154 /* Availaibility flags for first row*/ 155 if(ctb_y_pos == ps_tile_params->i4_first_ctb_y) 156 { 157 pu1_avail[2] = 0; 158 pu1_avail[4] = 0; 159 pu1_avail[5] = 0; 160 } 161 162 /* Availaibility flags for last row*/ 163 if((ctb_y_pos + 1) == 164 (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit)) 165 { 166 pu1_avail[3] = 0; 167 pu1_avail[6] = 0; 168 pu1_avail[7] = 0; 169 } 170 } 171 172 /** 173 ******************************************************************************* 174 * 175 * @brief 176 * Sao CTB level function. 177 * 178 * @par Description: 179 * For a given CTB, sao is done. Both the luma and chroma 180 * blocks are processed 181 * 182 * @param[in] 183 * ps_sao_ctxt: Pointer to SAO context 184 * 185 * @returns 186 * 187 * @remarks 188 * None 189 * 190 ******************************************************************************* 191 */ 192 void ihevce_sao_ctb(sao_ctxt_t *ps_sao_ctxt, ihevce_tile_params_t *ps_tile_params) 193 { 194 sao_enc_t *ps_sao; 195 UWORD8 u1_src_top_left_luma, u1_src_top_left_chroma[2]; 196 UWORD8 *pu1_src_left_luma_buf, *pu1_src_top_luma_buf; 197 UWORD8 *pu1_src_left_chroma_buf, *pu1_src_top_chroma_buf; 198 UWORD8 *pu1_src_luma, *pu1_src_chroma; 199 WORD32 luma_src_stride, ctb_size; 200 WORD32 chroma_src_stride; 201 UWORD8 au1_avail_luma[8], au1_avail_chroma[8]; 202 WORD32 sao_blk_wd, sao_blk_ht, sao_wd_chroma, sao_ht_chroma; 203 UWORD8 *pu1_top_left_luma, *pu1_top_left_chroma; 204 UWORD8 *pu1_src_bot_left_luma, *pu1_src_top_right_luma; 205 UWORD8 *pu1_src_bot_left_chroma, *pu1_src_top_right_chroma; 206 UWORD8 u1_is_422 = (ps_sao_ctxt->ps_sps->i1_chroma_format_idc == 2); 207 208 ps_sao = ps_sao_ctxt->ps_sao; 209 210 ASSERT( 211 (abs(ps_sao->u1_y_offset[1]) <= 7) && (abs(ps_sao->u1_y_offset[2]) <= 7) && 212 (abs(ps_sao->u1_y_offset[3]) <= 7) && (abs(ps_sao->u1_y_offset[4]) <= 7)); 213 ASSERT( 214 (abs(ps_sao->u1_cb_offset[1]) <= 7) && (abs(ps_sao->u1_cb_offset[2]) <= 7) && 215 (abs(ps_sao->u1_cb_offset[3]) <= 7) && (abs(ps_sao->u1_cb_offset[4]) <= 7)); 216 ASSERT( 217 (abs(ps_sao->u1_cr_offset[1]) <= 7) && (abs(ps_sao->u1_cr_offset[2]) <= 7) && 218 (abs(ps_sao->u1_cr_offset[3]) <= 7) && (abs(ps_sao->u1_cr_offset[4]) <= 7)); 219 ASSERT( 220 (ps_sao->b5_y_band_pos <= 28) && (ps_sao->b5_cb_band_pos <= 28) && 221 (ps_sao->b5_cr_band_pos <= 28)); 222 223 if(ps_sao_ctxt->i1_slice_sao_luma_flag) 224 { 225 /*initialize the src pointer to current row*/ 226 luma_src_stride = ps_sao_ctxt->i4_cur_luma_recon_stride; 227 228 ctb_size = ps_sao_ctxt->i4_ctb_size; 229 230 /* 1 extra byte in top buf stride for top left of 1st ctb of every row*/ 231 ps_sao->u1_y_offset[0] = 0; /* 0th element is not being used */ 232 sao_blk_wd = ps_sao_ctxt->i4_sao_blk_wd; 233 sao_blk_ht = ps_sao_ctxt->i4_sao_blk_ht; 234 235 pu1_src_luma = ps_sao_ctxt->pu1_cur_luma_recon_buf; 236 /* Pointer to the top luma buffer corresponding to the current ctb row*/ 237 pu1_src_top_luma_buf = ps_sao_ctxt->pu1_curr_sao_src_top_luma; 238 239 /* Pointer to left luma buffer corresponding to the current ctb row*/ 240 pu1_src_left_luma_buf = ps_sao_ctxt->au1_left_luma_scratch; 241 242 /* Pointer to the top right luma buffer corresponding to the current ctb row*/ 243 pu1_src_top_right_luma = pu1_src_top_luma_buf /*- top_buf_stide*/ + sao_blk_wd; 244 245 /* Pointer to the bottom left luma buffer corresponding to the current ctb row*/ 246 pu1_src_bot_left_luma = 247 ps_sao_ctxt->pu1_frm_luma_recon_buf + ctb_size * ps_sao_ctxt->i4_frm_luma_recon_stride - 248 1 + (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) + 249 (ps_sao_ctxt->i4_ctb_x * ctb_size); /* Bottom left*/ 250 251 /* Back up the top left pixel for (x+1, y+1)th ctb*/ 252 u1_src_top_left_luma = *(pu1_src_top_luma_buf + sao_blk_wd - 1); 253 pu1_top_left_luma = pu1_src_top_luma_buf - 1; 254 255 if(SAO_BAND == ps_sao->b3_y_type_idx) 256 { 257 ihevc_sao_band_offset_luma( 258 pu1_src_luma, 259 luma_src_stride, 260 pu1_src_left_luma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */ 261 pu1_src_top_luma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */ 262 pu1_src_top_luma_buf - 1, /* Top left*/ 263 ps_sao->b5_y_band_pos, 264 ps_sao->u1_y_offset, 265 sao_blk_wd, 266 sao_blk_ht); 267 268 if((ps_sao_ctxt->i4_ctb_y > 0)) 269 { 270 *(pu1_src_top_luma_buf + sao_blk_wd - 1) = u1_src_top_left_luma; 271 } 272 } 273 else if(ps_sao->b3_y_type_idx >= SAO_EDGE_0_DEG) 274 { 275 /*In case of edge offset, 1st and 2nd offsets are always inferred as offsets 276 * corresponding to EO category 1 and 2 which should be always positive 277 * And 3rd and 4th offsets are always inferred as offsets corresponding to 278 * EO category 3 and 4 which should be negative for all the EO classes(or EO typeidx) 279 */ 280 // clang-format off 281 ASSERT((ps_sao->u1_y_offset[1] >= 0) && (ps_sao->u1_y_offset[2] >= 0)); 282 ASSERT((ps_sao->u1_y_offset[3] <= 0) && (ps_sao->u1_y_offset[4] <= 0)); 283 // clang-format on 284 285 ihevce_sao_set_avilability(au1_avail_luma, ps_sao_ctxt, ps_tile_params); 286 287 ps_sao_ctxt->apf_sao_luma[ps_sao->b3_y_type_idx - 2]( 288 pu1_src_luma, 289 luma_src_stride, 290 pu1_src_left_luma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */ 291 pu1_src_top_luma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */ 292 pu1_top_left_luma, /* Top left*/ 293 pu1_src_top_right_luma, /* Top right*/ 294 pu1_src_bot_left_luma, /* Bottom left*/ 295 au1_avail_luma, 296 ps_sao->u1_y_offset, 297 sao_blk_wd, 298 sao_blk_ht); 299 300 if((ps_sao_ctxt->i4_ctb_y > 0)) 301 { 302 *(pu1_src_top_luma_buf + sao_blk_wd - 1) = u1_src_top_left_luma; 303 } 304 } 305 } 306 307 if(ps_sao_ctxt->i1_slice_sao_chroma_flag) 308 { 309 /*initialize the src pointer to current row*/ 310 chroma_src_stride = ps_sao_ctxt->i4_cur_chroma_recon_stride; 311 ctb_size = ps_sao_ctxt->i4_ctb_size; 312 313 /* 1 extra byte in top buf stride for top left of 1st ctb of every row*/ 314 //top_buf_stide = ps_sao_ctxt->u4_ctb_aligned_wd + 2; 315 ps_sao->u1_cb_offset[0] = 0; /* 0th element is not used */ 316 ps_sao->u1_cr_offset[0] = 0; 317 sao_wd_chroma = ps_sao_ctxt->i4_sao_blk_wd; 318 sao_ht_chroma = ps_sao_ctxt->i4_sao_blk_ht / (!u1_is_422 + 1); 319 320 pu1_src_chroma = ps_sao_ctxt->pu1_cur_chroma_recon_buf; 321 /* Pointer to the top luma buffer corresponding to the current ctb row*/ 322 pu1_src_top_chroma_buf = ps_sao_ctxt->pu1_curr_sao_src_top_chroma; 323 // clang-format off 324 /* Pointer to left luma buffer corresponding to the current ctb row*/ 325 pu1_src_left_chroma_buf = ps_sao_ctxt->au1_left_chroma_scratch; //ps_sao_ctxt->au1_sao_src_left_chroma; 326 // clang-format on 327 /* Pointer to the top right chroma buffer corresponding to the current ctb row*/ 328 pu1_src_top_right_chroma = pu1_src_top_chroma_buf /*- top_buf_stide*/ + sao_wd_chroma; 329 330 /* Pointer to the bottom left luma buffer corresponding to the current ctb row*/ 331 pu1_src_bot_left_chroma = 332 ps_sao_ctxt->pu1_frm_chroma_recon_buf + 333 (ctb_size >> !u1_is_422) * ps_sao_ctxt->i4_frm_chroma_recon_stride - 2 + 334 (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y * 335 (ctb_size >> !u1_is_422)) + 336 (ps_sao_ctxt->i4_ctb_x * ctb_size); /* Bottom left*/ 337 338 /* Back up the top left pixel for (x+1, y+1)th ctb*/ 339 u1_src_top_left_chroma[0] = *(pu1_src_top_chroma_buf + sao_wd_chroma - 2); 340 u1_src_top_left_chroma[1] = *(pu1_src_top_chroma_buf + sao_wd_chroma - 1); 341 pu1_top_left_chroma = pu1_src_top_chroma_buf - 2; 342 343 if(SAO_BAND == ps_sao->b3_cb_type_idx) 344 { 345 ihevc_sao_band_offset_chroma( 346 pu1_src_chroma, 347 chroma_src_stride, 348 pu1_src_left_chroma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */ 349 pu1_src_top_chroma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */ 350 pu1_top_left_chroma, /* Top left*/ 351 ps_sao->b5_cb_band_pos, 352 ps_sao->b5_cr_band_pos, 353 ps_sao->u1_cb_offset, 354 ps_sao->u1_cr_offset, 355 sao_wd_chroma, 356 sao_ht_chroma); 357 358 if((ps_sao_ctxt->i4_ctb_y > 0)) 359 { 360 *(pu1_src_top_chroma_buf + sao_wd_chroma - 2) = u1_src_top_left_chroma[0]; 361 *(pu1_src_top_chroma_buf + sao_wd_chroma - 1) = u1_src_top_left_chroma[1]; 362 } 363 } 364 else if(ps_sao->b3_cb_type_idx >= SAO_EDGE_0_DEG) 365 { 366 /*In case of edge offset, 1st and 2nd offsets are always inferred as offsets 367 * corresponding to EO category 1 and 2 which should be always positive 368 * And 3rd and 4th offsets are always inferred as offsets corresponding to 369 * EO category 3 and 4 which should be negative for all the EO classes(or EO typeidx) 370 */ 371 ASSERT((ps_sao->u1_cb_offset[1] >= 0) && (ps_sao->u1_cb_offset[2] >= 0)); 372 ASSERT((ps_sao->u1_cb_offset[3] <= 0) && (ps_sao->u1_cb_offset[4] <= 0)); 373 374 ASSERT((ps_sao->u1_cr_offset[1] >= 0) && (ps_sao->u1_cr_offset[2] >= 0)); 375 ASSERT((ps_sao->u1_cr_offset[3] <= 0) && (ps_sao->u1_cr_offset[4] <= 0)); 376 377 ihevce_sao_set_avilability(au1_avail_chroma, ps_sao_ctxt, ps_tile_params); 378 379 ps_sao_ctxt->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2]( 380 pu1_src_chroma, 381 chroma_src_stride, 382 pu1_src_left_chroma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */ 383 pu1_src_top_chroma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */ 384 pu1_top_left_chroma, /* Top left*/ 385 pu1_src_top_right_chroma, /* Top right*/ 386 pu1_src_bot_left_chroma, /* Bottom left*/ 387 au1_avail_chroma, 388 ps_sao->u1_cb_offset, 389 ps_sao->u1_cr_offset, 390 sao_wd_chroma, 391 sao_ht_chroma); 392 393 if((ps_sao_ctxt->i4_ctb_y > 0)) 394 { 395 *(pu1_src_top_chroma_buf + sao_wd_chroma - 2) = u1_src_top_left_chroma[0]; 396 *(pu1_src_top_chroma_buf + sao_wd_chroma - 1) = u1_src_top_left_chroma[1]; 397 } 398 } 399 } 400 } 401 402 /** 403 ******************************************************************************* 404 * 405 * @brief 406 * CTB level function to do SAO analysis. 407 * 408 * @par Description: 409 * For a given CTB, sao analysis is done for both luma and chroma. 410 * 411 * 412 * @param[in] 413 * ps_sao_ctxt: Pointer to SAO context 414 * ps_ctb_enc_loop_out : pointer to ctb level output structure from enc loop 415 * 416 * @returns 417 * 418 * @remarks 419 * None 420 * 421 * @Assumptions: 422 * 1) Initial Cabac state for current ctb to be sao'ed (i.e (x-1,y-1)th ctb) is assumed to be 423 * almost same as cabac state of (x,y)th ctb. 424 * 2) Distortion is calculated in spatial domain but lamda used to calculate the cost is 425 * in freq domain. 426 ******************************************************************************* 427 */ 428 void ihevce_sao_analyse( 429 sao_ctxt_t *ps_sao_ctxt, 430 ctb_enc_loop_out_t *ps_ctb_enc_loop_out, 431 UWORD32 *pu4_frame_rdopt_header_bits, 432 ihevce_tile_params_t *ps_tile_params) 433 { 434 UWORD8 *pu1_luma_scratch_buf; 435 UWORD8 *pu1_chroma_scratch_buf; 436 UWORD8 *pu1_src_luma, *pu1_recon_luma; 437 UWORD8 *pu1_src_chroma, *pu1_recon_chroma; 438 WORD32 luma_src_stride, luma_recon_stride, ctb_size, ctb_wd, ctb_ht; 439 WORD32 chroma_src_stride, chroma_recon_stride; 440 WORD32 i4_luma_scratch_buf_stride; 441 WORD32 i4_chroma_scratch_buf_stride; 442 sao_ctxt_t s_sao_ctxt; 443 UWORD32 ctb_bits = 0, distortion = 0, curr_cost = 0, best_cost = 0; 444 LWORD64 i8_cl_ssd_lambda_qf, i8_cl_ssd_lambda_chroma_qf; 445 WORD32 rdo_cand, num_luma_rdo_cand = 0, num_rdo_cand = 0; 446 WORD32 curr_buf_idx, best_buf_idx, best_cand_idx; 447 WORD32 row; 448 WORD32 edgeidx; 449 WORD32 acc_error_category[5] = { 0, 0, 0, 0, 0 }, category_count[5] = { 0, 0, 0, 0, 0 }; 450 sao_enc_t s_best_luma_chroma_cand; 451 WORD32 best_ctb_sao_bits = 0; 452 #if DISABLE_SAO_WHEN_NOISY && !defined(ENC_VER_v2) 453 UWORD8 u1_force_no_offset = 454 ps_sao_ctxt 455 ->ps_ctb_data 456 [ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_data_stride * ps_sao_ctxt->i4_ctb_y] 457 .s_ctb_noise_params.i4_noise_present; 458 #endif 459 UWORD8 u1_is_422 = (ps_sao_ctxt->ps_sps->i1_chroma_format_idc == 2); 460 461 *pu4_frame_rdopt_header_bits = 0; 462 463 ctb_size = ps_sao_ctxt->i4_ctb_size; 464 ctb_wd = ps_sao_ctxt->i4_sao_blk_wd; 465 ctb_ht = ps_sao_ctxt->i4_sao_blk_ht; 466 467 s_sao_ctxt = ps_sao_ctxt[0]; 468 469 /* Memset the best luma_chroma_cand structure to avoid asserts in debug mode*/ 470 memset(&s_best_luma_chroma_cand, 0, sizeof(sao_enc_t)); 471 472 /* Initialize the pointer and strides for luma buffers*/ 473 pu1_recon_luma = ps_sao_ctxt->pu1_cur_luma_recon_buf; 474 luma_recon_stride = ps_sao_ctxt->i4_cur_luma_recon_stride; 475 476 pu1_src_luma = ps_sao_ctxt->pu1_cur_luma_src_buf; 477 luma_src_stride = ps_sao_ctxt->i4_cur_luma_src_stride; 478 i4_luma_scratch_buf_stride = SCRATCH_BUF_STRIDE; 479 480 /* Initialize the pointer and strides for luma buffers*/ 481 pu1_recon_chroma = ps_sao_ctxt->pu1_cur_chroma_recon_buf; 482 chroma_recon_stride = ps_sao_ctxt->i4_cur_chroma_recon_stride; 483 484 pu1_src_chroma = ps_sao_ctxt->pu1_cur_chroma_src_buf; 485 chroma_src_stride = ps_sao_ctxt->i4_cur_chroma_src_stride; 486 i4_chroma_scratch_buf_stride = SCRATCH_BUF_STRIDE; 487 488 i8_cl_ssd_lambda_qf = ps_sao_ctxt->i8_cl_ssd_lambda_qf; 489 i8_cl_ssd_lambda_chroma_qf = ps_sao_ctxt->i8_cl_ssd_lambda_chroma_qf; 490 491 /*****************************************************/ 492 /********************RDO FOR LUMA CAND****************/ 493 /*****************************************************/ 494 495 #if !DISABLE_SAO_WHEN_NOISY 496 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag) 497 #else 498 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag && !u1_force_no_offset) 499 #endif 500 { 501 /* Candidate for Edge offset SAO*/ 502 /* Following is the convention for curr pixel and 503 * two neighbouring pixels for 0 deg, 90 deg, 135 deg and 45 deg */ 504 /* 505 * 0 deg : a c b 90 deg: a 135 deg: a 45 deg: a 506 * c c c 507 * b b b 508 */ 509 510 /* 0 deg SAO CAND*/ 511 /* Reset the error and edge count*/ 512 for(edgeidx = 0; edgeidx < 5; edgeidx++) 513 { 514 acc_error_category[edgeidx] = 0; 515 category_count[edgeidx] = 0; 516 } 517 518 /* Call the funciton to populate the EO parameter for this ctb for 0 deg EO class*/ 519 // clang-format off 520 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_0_DEG, 521 acc_error_category, category_count); 522 // clang-format on 523 // clang-format off 524 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_0_DEG; 525 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0] 526 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7)) 527 : 0; 528 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1] 529 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7)) 530 : 0; 531 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3] 532 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0)) 533 : 0; 534 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] =category_count[4] 535 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0)) 536 : 0; 537 // clang-format on 538 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0; 539 // clang-format off 540 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE; 541 // clang-format on 542 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0; 543 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0; 544 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0; 545 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0; 546 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0; 547 548 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE; 549 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0; 550 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0; 551 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0; 552 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0; 553 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0; 554 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0; 555 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0; 556 557 num_luma_rdo_cand++; 558 559 /* 90 degree SAO CAND*/ 560 for(edgeidx = 0; edgeidx < 5; edgeidx++) 561 { 562 acc_error_category[edgeidx] = 0; 563 category_count[edgeidx] = 0; 564 } 565 566 /* Call the funciton to populate the EO parameter for this ctb for 90 deg EO class*/ 567 // clang-format off 568 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_90_DEG, 569 acc_error_category, category_count); 570 571 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_90_DEG; 572 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0] 573 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7)) 574 : 0; 575 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1] 576 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7)) 577 : 0; 578 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3] 579 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0)) 580 : 0; 581 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4] 582 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0)) 583 : 0; 584 // clang-format on 585 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0; 586 587 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE; 588 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0; 589 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0; 590 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0; 591 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0; 592 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0; 593 594 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE; 595 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0; 596 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0; 597 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0; 598 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0; 599 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0; 600 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0; 601 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0; 602 603 num_luma_rdo_cand++; 604 605 /* 135 degree SAO CAND*/ 606 for(edgeidx = 0; edgeidx < 5; edgeidx++) 607 { 608 acc_error_category[edgeidx] = 0; 609 category_count[edgeidx] = 0; 610 } 611 612 /* Call the funciton to populate the EO parameter for this ctb for 135 deg EO class*/ 613 // clang-format off 614 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_135_DEG, 615 acc_error_category, category_count); 616 617 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_135_DEG; 618 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0] 619 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7)) 620 : 0; 621 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1] 622 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7)) 623 : 0; 624 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3] 625 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0)) 626 : 0; 627 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4] 628 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0)) 629 : 0; 630 // clang-format on 631 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0; 632 633 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE; 634 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0; 635 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0; 636 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0; 637 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0; 638 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0; 639 640 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE; 641 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0; 642 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0; 643 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0; 644 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0; 645 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0; 646 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0; 647 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0; 648 649 num_luma_rdo_cand++; 650 651 /* 45 degree SAO CAND*/ 652 for(edgeidx = 0; edgeidx < 5; edgeidx++) 653 { 654 acc_error_category[edgeidx] = 0; 655 category_count[edgeidx] = 0; 656 } 657 658 /* Call the funciton to populate the EO parameter for this ctb for 45 deg EO class*/ 659 // clang-format off 660 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_45_DEG, 661 acc_error_category, category_count); 662 663 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_45_DEG; 664 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0] 665 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7)) 666 : 0; 667 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1] 668 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7)) 669 : 0; 670 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3] 671 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0)) 672 : 0; 673 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4] 674 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0)) 675 : 0; 676 // clang-format on 677 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0; 678 679 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE; 680 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0; 681 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0; 682 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0; 683 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0; 684 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0; 685 686 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE; 687 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0; 688 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0; 689 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0; 690 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0; 691 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0; 692 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0; 693 ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0; 694 695 num_luma_rdo_cand++; 696 697 /* First cand will be best cand after 1st iteration*/ 698 curr_buf_idx = 0; 699 best_buf_idx = 1; 700 best_cost = 0xFFFFFFFF; 701 best_cand_idx = 0; 702 703 /*Back up the top pixels for (x,y+1)th ctb*/ 704 if(!ps_sao_ctxt->i4_is_last_ctb_row) 705 { 706 memcpy( 707 ps_sao_ctxt->pu1_curr_sao_src_top_luma + ps_sao_ctxt->i4_frm_top_luma_buf_stride, 708 pu1_recon_luma + luma_recon_stride * (ctb_size - 1), 709 ps_sao_ctxt->i4_sao_blk_wd); 710 } 711 712 for(rdo_cand = 0; rdo_cand < num_luma_rdo_cand; rdo_cand++) 713 { 714 s_sao_ctxt.ps_sao = &ps_sao_ctxt->as_sao_rd_cand[rdo_cand]; 715 716 /* This memcpy is required because cabac uses parameters from this structure 717 * to evaluate bits and this structure ptr is sent to cabac through 718 * "ihevce_cabac_rdo_encode_sao" function 719 */ 720 memcpy(&ps_ctb_enc_loop_out->s_sao, s_sao_ctxt.ps_sao, sizeof(sao_enc_t)); 721 722 /* Copy the left pixels to the scratch buffer for evry rdo cand because its 723 overwritten by the sao leaf level function for next ctb*/ 724 memcpy( 725 s_sao_ctxt.au1_left_luma_scratch, 726 ps_sao_ctxt->au1_sao_src_left_luma, 727 ps_sao_ctxt->i4_sao_blk_ht); 728 729 /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its 730 overwritten by the sao leaf level function for next ctb*/ 731 memcpy( 732 s_sao_ctxt.au1_top_luma_scratch, 733 ps_sao_ctxt->pu1_curr_sao_src_top_luma - 1, 734 ps_sao_ctxt->i4_sao_blk_wd + 2); 735 s_sao_ctxt.pu1_curr_sao_src_top_luma = s_sao_ctxt.au1_top_luma_scratch + 1; 736 737 pu1_luma_scratch_buf = ps_sao_ctxt->au1_sao_luma_scratch[curr_buf_idx]; 738 739 ASSERT( 740 (abs(s_sao_ctxt.ps_sao->u1_y_offset[1]) <= 7) && 741 (abs(s_sao_ctxt.ps_sao->u1_y_offset[2]) <= 7) && 742 (abs(s_sao_ctxt.ps_sao->u1_y_offset[3]) <= 7) && 743 (abs(s_sao_ctxt.ps_sao->u1_y_offset[4]) <= 7)); 744 ASSERT( 745 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[1]) <= 7) && 746 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[2]) <= 7) && 747 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[3]) <= 7) && 748 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[4]) <= 7)); 749 ASSERT( 750 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[1]) <= 7) && 751 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[2]) <= 7) && 752 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[3]) <= 7) && 753 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[4]) <= 7)); 754 ASSERT( 755 (s_sao_ctxt.ps_sao->b5_y_band_pos <= 28) && 756 (s_sao_ctxt.ps_sao->b5_cb_band_pos <= 28) && 757 (s_sao_ctxt.ps_sao->b5_cr_band_pos <= 28)); 758 759 /* Copy the deblocked recon data to scratch buffer to do sao*/ 760 761 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d( 762 pu1_luma_scratch_buf, 763 i4_luma_scratch_buf_stride, 764 pu1_recon_luma, 765 luma_recon_stride, 766 SCRATCH_BUF_STRIDE, 767 ctb_ht + 1); 768 769 s_sao_ctxt.pu1_cur_luma_recon_buf = pu1_luma_scratch_buf; 770 s_sao_ctxt.i4_cur_luma_recon_stride = i4_luma_scratch_buf_stride; 771 772 s_sao_ctxt.i1_slice_sao_luma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_luma_flag; 773 s_sao_ctxt.i1_slice_sao_chroma_flag = 0; 774 775 ihevce_sao_ctb(&s_sao_ctxt, ps_tile_params); 776 777 /* Calculate the distortion between sao'ed ctb and original src ctb*/ 778 // clang-format off 779 distortion = 780 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma, 781 s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride, 782 s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd, ctb_ht); 783 // clang-format on 784 785 ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx; 786 ctb_bits = ihevce_cabac_rdo_encode_sao( 787 ps_sao_ctxt->ps_rdopt_entropy_ctxt, ps_ctb_enc_loop_out); 788 789 /* Calculate the cost as D+(lamda)*R */ 790 curr_cost = distortion + 791 COMPUTE_RATE_COST_CLIP30(ctb_bits, i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); 792 793 if(curr_cost < best_cost) 794 { 795 best_cost = curr_cost; 796 best_buf_idx = ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx; 797 best_cand_idx = rdo_cand; 798 curr_buf_idx = !curr_buf_idx; 799 } 800 } 801 802 /* Copy the sao parameters of the best luma cand into the luma_chroma cnad structure for next stage of RDO 803 * between luma_chroma combined cand, NO SAO cand, LEFT and TOP merge cand 804 */ 805 s_best_luma_chroma_cand.b3_y_type_idx = 806 ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].b3_y_type_idx; 807 s_best_luma_chroma_cand.u1_y_offset[1] = 808 ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[1]; 809 s_best_luma_chroma_cand.u1_y_offset[2] = 810 ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[2]; 811 s_best_luma_chroma_cand.u1_y_offset[3] = 812 ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[3]; 813 s_best_luma_chroma_cand.u1_y_offset[4] = 814 ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[4]; 815 s_best_luma_chroma_cand.b5_y_band_pos = 816 ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].b5_y_band_pos; 817 } 818 else 819 { 820 /*Back up the top pixels for (x,y+1)th ctb*/ 821 if(!ps_sao_ctxt->i4_is_last_ctb_row) 822 { 823 memcpy( 824 ps_sao_ctxt->pu1_curr_sao_src_top_luma + ps_sao_ctxt->i4_frm_top_luma_buf_stride, 825 pu1_recon_luma + luma_recon_stride * (ctb_size - 1), 826 ps_sao_ctxt->i4_sao_blk_wd); 827 } 828 829 s_best_luma_chroma_cand.b3_y_type_idx = SAO_NONE; 830 s_best_luma_chroma_cand.u1_y_offset[1] = 0; 831 s_best_luma_chroma_cand.u1_y_offset[2] = 0; 832 s_best_luma_chroma_cand.u1_y_offset[3] = 0; 833 s_best_luma_chroma_cand.u1_y_offset[4] = 0; 834 s_best_luma_chroma_cand.b5_y_band_pos = 0; 835 s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0; 836 s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0; 837 838 s_best_luma_chroma_cand.b3_cb_type_idx = SAO_NONE; 839 s_best_luma_chroma_cand.u1_cb_offset[1] = 0; 840 s_best_luma_chroma_cand.u1_cb_offset[2] = 0; 841 s_best_luma_chroma_cand.u1_cb_offset[3] = 0; 842 s_best_luma_chroma_cand.u1_cb_offset[4] = 0; 843 s_best_luma_chroma_cand.b5_cb_band_pos = 0; 844 845 s_best_luma_chroma_cand.b3_cr_type_idx = SAO_NONE; 846 s_best_luma_chroma_cand.u1_cr_offset[1] = 0; 847 s_best_luma_chroma_cand.u1_cr_offset[2] = 0; 848 s_best_luma_chroma_cand.u1_cr_offset[3] = 0; 849 s_best_luma_chroma_cand.u1_cr_offset[4] = 0; 850 s_best_luma_chroma_cand.b5_cr_band_pos = 0; 851 } 852 /*****************************************************/ 853 /********************RDO FOR CHROMA CAND**************/ 854 /*****************************************************/ 855 #if !DISABLE_SAO_WHEN_NOISY 856 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) 857 #else 858 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag && !u1_force_no_offset) 859 #endif 860 { 861 /*Back up the top pixels for (x,y+1)th ctb*/ 862 if(!ps_sao_ctxt->i4_is_last_ctb_row) 863 { 864 memcpy( 865 ps_sao_ctxt->pu1_curr_sao_src_top_chroma + 866 ps_sao_ctxt->i4_frm_top_chroma_buf_stride, 867 pu1_recon_chroma + chroma_recon_stride * ((ctb_size >> !u1_is_422) - 1), 868 ps_sao_ctxt->i4_sao_blk_wd); 869 } 870 871 /* Reset the error and edge count*/ 872 for(edgeidx = 0; edgeidx < 5; edgeidx++) 873 { 874 acc_error_category[edgeidx] = 0; 875 category_count[edgeidx] = 0; 876 } 877 // clang-format off 878 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_chroma_eo_sao_params(ps_sao_ctxt, 879 s_best_luma_chroma_cand.b3_y_type_idx, acc_error_category, 880 category_count); 881 // clang-format on 882 883 /* Copy the sao parameters of the best luma cand into the luma_chroma cnad structure for next stage of RDO 884 * between luma_chroma combined cand, NO SAO cand, LEFT and TOP merge cand 885 */ 886 // clang-format off 887 s_best_luma_chroma_cand.b3_cb_type_idx = s_best_luma_chroma_cand.b3_y_type_idx; 888 s_best_luma_chroma_cand.u1_cb_offset[1] = category_count[0] 889 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7)) 890 : 0; 891 s_best_luma_chroma_cand.u1_cb_offset[2] = category_count[1] 892 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7)) 893 : 0; 894 s_best_luma_chroma_cand.u1_cb_offset[3] = category_count[3] 895 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0)) 896 : 0; 897 s_best_luma_chroma_cand.u1_cb_offset[4] = category_count[4] 898 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0)) 899 : 0; 900 s_best_luma_chroma_cand.b5_cb_band_pos = 0; 901 902 s_best_luma_chroma_cand.b3_cr_type_idx = s_best_luma_chroma_cand.b3_y_type_idx; 903 s_best_luma_chroma_cand.u1_cr_offset[1] = category_count[0] 904 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7)) 905 : 0; 906 s_best_luma_chroma_cand.u1_cr_offset[2] = category_count[1] 907 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7)) 908 : 0; 909 s_best_luma_chroma_cand.u1_cr_offset[3] = category_count[3] 910 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0)) 911 : 0; 912 s_best_luma_chroma_cand.u1_cr_offset[4] = category_count[4] 913 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0)) 914 : 0; 915 // clang-format on 916 s_best_luma_chroma_cand.b5_cr_band_pos = 0; 917 } 918 else 919 { 920 /*Back up the top pixels for (x,y+1)th ctb*/ 921 if(!ps_sao_ctxt->i4_is_last_ctb_row) 922 { 923 memcpy( 924 ps_sao_ctxt->pu1_curr_sao_src_top_chroma + 925 ps_sao_ctxt->i4_frm_top_chroma_buf_stride, 926 pu1_recon_chroma + chroma_recon_stride * ((ctb_size >> !u1_is_422) - 1), 927 ps_sao_ctxt->i4_sao_blk_wd); 928 } 929 930 s_best_luma_chroma_cand.b3_cb_type_idx = SAO_NONE; 931 s_best_luma_chroma_cand.u1_cb_offset[1] = 0; 932 s_best_luma_chroma_cand.u1_cb_offset[2] = 0; 933 s_best_luma_chroma_cand.u1_cb_offset[3] = 0; 934 s_best_luma_chroma_cand.u1_cb_offset[4] = 0; 935 s_best_luma_chroma_cand.b5_cb_band_pos = 0; 936 937 s_best_luma_chroma_cand.b3_cr_type_idx = SAO_NONE; 938 s_best_luma_chroma_cand.u1_cr_offset[1] = 0; 939 s_best_luma_chroma_cand.u1_cr_offset[2] = 0; 940 s_best_luma_chroma_cand.u1_cr_offset[3] = 0; 941 s_best_luma_chroma_cand.u1_cr_offset[4] = 0; 942 s_best_luma_chroma_cand.b5_cr_band_pos = 0; 943 944 s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0; 945 s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0; 946 } 947 948 s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0; 949 s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0; 950 951 /*****************************************************/ 952 /**RDO for Best Luma - Chroma combined, No SAO,*******/ 953 /*************Left merge and Top merge****************/ 954 /*****************************************************/ 955 956 /* No SAO cand*/ 957 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0; 958 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0; 959 960 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_y_type_idx = SAO_NONE; 961 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[1] = 0; 962 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[2] = 0; 963 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[3] = 0; 964 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[4] = 0; 965 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_y_band_pos = 0; 966 967 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_cb_type_idx = SAO_NONE; 968 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[1] = 0; 969 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[2] = 0; 970 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[3] = 0; 971 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[4] = 0; 972 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_cb_band_pos = 0; 973 974 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_cr_type_idx = SAO_NONE; 975 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[1] = 0; 976 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[2] = 0; 977 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[3] = 0; 978 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[4] = 0; 979 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_cr_band_pos = 0; 980 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0; 981 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0; 982 983 num_rdo_cand++; 984 985 /* SAO_note_01: If the CTB lies on a tile or a slice boundary, then 986 the standard mandates that the merge candidates must be set to unavailable. 987 Hence, check for tile boundary condition by reading 988 s_ctb_nbr_avail_flags.u1_left_avail rather than frame position of CTB. 989 A special case: Merge-candidates should be available at dependent-slices boundaries. 990 Search for <SAO_note_01> in workspace to know more */ 991 992 #if !DISABLE_SAO_WHEN_NOISY 993 if(1) 994 #else 995 if(!u1_force_no_offset) 996 #endif 997 { 998 /* Merge left cand*/ 999 if(ps_ctb_enc_loop_out->s_ctb_nbr_avail_flags.u1_left_avail) 1000 { 1001 memcpy( 1002 &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand], 1003 &ps_sao_ctxt->s_left_ctb_sao, 1004 sizeof(sao_enc_t)); 1005 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 1; 1006 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0; 1007 num_rdo_cand++; 1008 } 1009 1010 /* Merge top cand*/ 1011 if(ps_ctb_enc_loop_out->s_ctb_nbr_avail_flags.u1_top_avail) 1012 { 1013 memcpy( 1014 &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand], 1015 (ps_sao_ctxt->ps_top_ctb_sao - ps_sao_ctxt->u4_num_ctbs_horz), 1016 sizeof(sao_enc_t)); 1017 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0; 1018 ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 1; 1019 num_rdo_cand++; 1020 } 1021 1022 /* Best luma-chroma candidate*/ 1023 memcpy( 1024 &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand], 1025 &s_best_luma_chroma_cand, 1026 sizeof(sao_enc_t)); 1027 num_rdo_cand++; 1028 } 1029 1030 { 1031 UWORD32 luma_distortion = 0, chroma_distortion = 0; 1032 /* First cand will be best cand after 1st iteration*/ 1033 curr_buf_idx = 0; 1034 best_buf_idx = 1; 1035 best_cost = 0xFFFFFFFF; 1036 best_cand_idx = 0; 1037 1038 for(rdo_cand = 0; rdo_cand < num_rdo_cand; rdo_cand++) 1039 { 1040 s_sao_ctxt.ps_sao = &ps_sao_ctxt->as_sao_rd_cand[rdo_cand]; 1041 1042 distortion = 0; 1043 1044 /* This memcpy is required because cabac uses parameters from this structure 1045 * to evaluate bits and this structure ptr is sent to cabac through 1046 * "ihevce_cabac_rdo_encode_sao" function 1047 */ 1048 memcpy(&ps_ctb_enc_loop_out->s_sao, s_sao_ctxt.ps_sao, sizeof(sao_enc_t)); 1049 1050 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag) 1051 { 1052 /* Copy the left pixels to the scratch buffer for evry rdo cand because its 1053 overwritten by the sao leaf level function for next ctb*/ 1054 memcpy( 1055 s_sao_ctxt.au1_left_luma_scratch, 1056 ps_sao_ctxt->au1_sao_src_left_luma, 1057 ps_sao_ctxt->i4_sao_blk_ht); 1058 1059 /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its 1060 overwritten by the sao leaf level function for next ctb*/ 1061 memcpy( 1062 s_sao_ctxt.au1_top_luma_scratch, 1063 ps_sao_ctxt->pu1_curr_sao_src_top_luma - 1, 1064 ps_sao_ctxt->i4_sao_blk_wd + 2); 1065 s_sao_ctxt.pu1_curr_sao_src_top_luma = s_sao_ctxt.au1_top_luma_scratch + 1; 1066 1067 pu1_luma_scratch_buf = ps_sao_ctxt->au1_sao_luma_scratch[curr_buf_idx]; 1068 1069 /* Copy the deblocked recon data to scratch buffer to do sao*/ 1070 1071 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d( 1072 pu1_luma_scratch_buf, 1073 i4_luma_scratch_buf_stride, 1074 pu1_recon_luma, 1075 luma_recon_stride, 1076 SCRATCH_BUF_STRIDE, 1077 ctb_ht + 1); 1078 s_sao_ctxt.pu1_cur_luma_recon_buf = pu1_luma_scratch_buf; 1079 s_sao_ctxt.i4_cur_luma_recon_stride = i4_luma_scratch_buf_stride; 1080 1081 ASSERT( 1082 (abs(s_sao_ctxt.ps_sao->u1_y_offset[1]) <= 7) && 1083 (abs(s_sao_ctxt.ps_sao->u1_y_offset[2]) <= 7) && 1084 (abs(s_sao_ctxt.ps_sao->u1_y_offset[3]) <= 7) && 1085 (abs(s_sao_ctxt.ps_sao->u1_y_offset[4]) <= 7)); 1086 } 1087 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) 1088 { 1089 /* Copy the left pixels to the scratch buffer for evry rdo cand because its 1090 overwritten by the sao leaf level function for next ctb*/ 1091 memcpy( 1092 s_sao_ctxt.au1_left_chroma_scratch, 1093 ps_sao_ctxt->au1_sao_src_left_chroma, 1094 (ps_sao_ctxt->i4_sao_blk_ht >> !u1_is_422) * 2); 1095 1096 /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its 1097 overwritten by the sao leaf level function for next ctb*/ 1098 memcpy( 1099 s_sao_ctxt.au1_top_chroma_scratch, 1100 ps_sao_ctxt->pu1_curr_sao_src_top_chroma - 2, 1101 ps_sao_ctxt->i4_sao_blk_wd + 4); 1102 1103 s_sao_ctxt.pu1_curr_sao_src_top_chroma = s_sao_ctxt.au1_top_chroma_scratch + 2; 1104 1105 pu1_chroma_scratch_buf = ps_sao_ctxt->au1_sao_chroma_scratch[curr_buf_idx]; 1106 1107 /* Copy the deblocked recon data to scratch buffer to do sao*/ 1108 1109 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d( 1110 pu1_chroma_scratch_buf, 1111 i4_chroma_scratch_buf_stride, 1112 pu1_recon_chroma, 1113 chroma_recon_stride, 1114 SCRATCH_BUF_STRIDE, 1115 (ctb_ht >> !u1_is_422) + 1); 1116 1117 s_sao_ctxt.pu1_cur_chroma_recon_buf = pu1_chroma_scratch_buf; 1118 s_sao_ctxt.i4_cur_chroma_recon_stride = i4_chroma_scratch_buf_stride; 1119 1120 ASSERT( 1121 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[1]) <= 7) && 1122 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[2]) <= 7) && 1123 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[3]) <= 7) && 1124 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[4]) <= 7)); 1125 ASSERT( 1126 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[1]) <= 7) && 1127 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[2]) <= 7) && 1128 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[3]) <= 7) && 1129 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[4]) <= 7)); 1130 } 1131 1132 ASSERT( 1133 (s_sao_ctxt.ps_sao->b5_y_band_pos <= 28) && 1134 (s_sao_ctxt.ps_sao->b5_cb_band_pos <= 28) && 1135 (s_sao_ctxt.ps_sao->b5_cr_band_pos <= 28)); 1136 1137 s_sao_ctxt.i1_slice_sao_luma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_luma_flag; 1138 s_sao_ctxt.i1_slice_sao_chroma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_chroma_flag; 1139 1140 ihevce_sao_ctb(&s_sao_ctxt, ps_tile_params); 1141 1142 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag) 1143 { // clang-format off 1144 luma_distortion = 1145 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma, 1146 s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride, 1147 s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd, 1148 ctb_ht); 1149 } // clang-format on 1150 1151 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) 1152 { // clang-format off 1153 chroma_distortion = 1154 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_chroma, 1155 s_sao_ctxt.pu1_cur_chroma_recon_buf, 1156 chroma_src_stride, 1157 s_sao_ctxt.i4_cur_chroma_recon_stride, ctb_wd, 1158 (ctb_ht >> !u1_is_422)); 1159 } // clang-format on 1160 1161 /*chroma distortion is added after correction because of lambda difference*/ 1162 distortion = 1163 luma_distortion + 1164 (UWORD32)(chroma_distortion * (i8_cl_ssd_lambda_qf / i8_cl_ssd_lambda_chroma_qf)); 1165 1166 ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx; 1167 ctb_bits = ihevce_cabac_rdo_encode_sao( 1168 ps_sao_ctxt->ps_rdopt_entropy_ctxt, ps_ctb_enc_loop_out); 1169 1170 /* Calculate the cost as D+(lamda)*R */ 1171 curr_cost = distortion + 1172 COMPUTE_RATE_COST_CLIP30(ctb_bits, i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); 1173 1174 if(curr_cost < best_cost) 1175 { 1176 best_ctb_sao_bits = ctb_bits; 1177 best_cost = curr_cost; 1178 best_buf_idx = ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx; 1179 best_cand_idx = rdo_cand; 1180 curr_buf_idx = !curr_buf_idx; 1181 } 1182 } 1183 /*Adding sao bits to header bits*/ 1184 *pu4_frame_rdopt_header_bits = best_ctb_sao_bits; 1185 1186 ihevce_update_best_sao_cabac_state(ps_sao_ctxt->ps_rdopt_entropy_ctxt, best_buf_idx); 1187 1188 /* store the sao parameters of curr ctb for top merge and left merge*/ 1189 memcpy( 1190 ps_sao_ctxt->ps_top_ctb_sao, 1191 &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx], 1192 sizeof(sao_enc_t)); 1193 memcpy( 1194 &ps_sao_ctxt->s_left_ctb_sao, 1195 &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx], 1196 sizeof(sao_enc_t)); 1197 1198 /* Copy the sao parameters of winning candidate into the structure which will be sent to entropy thrd*/ 1199 memcpy( 1200 &ps_ctb_enc_loop_out->s_sao, 1201 &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx], 1202 sizeof(sao_enc_t)); 1203 1204 if(!ps_sao_ctxt->i4_is_last_ctb_col) 1205 { 1206 /* Update left luma buffer for next ctb */ 1207 for(row = 0; row < ps_sao_ctxt->i4_sao_blk_ht; row++) 1208 { 1209 ps_sao_ctxt->au1_sao_src_left_luma[row] = 1210 ps_sao_ctxt->pu1_cur_luma_recon_buf 1211 [row * ps_sao_ctxt->i4_cur_luma_recon_stride + 1212 (ps_sao_ctxt->i4_sao_blk_wd - 1)]; 1213 } 1214 } 1215 1216 if(!ps_sao_ctxt->i4_is_last_ctb_col) 1217 { 1218 /* Update left chroma buffer for next ctb */ 1219 for(row = 0; row < (ps_sao_ctxt->i4_sao_blk_ht >> 1); row++) 1220 { 1221 *(UWORD16 *)(ps_sao_ctxt->au1_sao_src_left_chroma + row * 2) = 1222 *(UWORD16 *)(ps_sao_ctxt->pu1_cur_chroma_recon_buf + 1223 row * ps_sao_ctxt->i4_cur_chroma_recon_stride + 1224 (ps_sao_ctxt->i4_sao_blk_wd - 2)); 1225 } 1226 } 1227 1228 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag) 1229 { 1230 /* Copy the sao'ed output of the best candidate to the recon buffer*/ 1231 1232 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d( 1233 ps_sao_ctxt->pu1_cur_luma_recon_buf, 1234 ps_sao_ctxt->i4_cur_luma_recon_stride, 1235 ps_sao_ctxt->au1_sao_luma_scratch[best_buf_idx], 1236 i4_luma_scratch_buf_stride, 1237 ctb_wd, 1238 ctb_ht); 1239 } 1240 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) 1241 { 1242 /* Copy the sao'ed output of the best candidate to the chroma recon buffer*/ 1243 1244 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d( 1245 ps_sao_ctxt->pu1_cur_chroma_recon_buf, 1246 ps_sao_ctxt->i4_cur_chroma_recon_stride, 1247 ps_sao_ctxt->au1_sao_chroma_scratch[best_buf_idx], 1248 i4_chroma_scratch_buf_stride, 1249 ctb_wd, 1250 ctb_ht >> !u1_is_422); 1251 } 1252 } 1253 } 1254