Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 /**
     21 *******************************************************************************
     22 * @file
     23 *  ihevce_sao.c
     24 *
     25 * @brief
     26 *  Contains definition for the ctb level sao function
     27 *
     28 * @author
     29 *  Ittiam
     30 *
     31 * @par List of Functions:
     32 *  ihevce_sao_set_avilability()
     33 *  ihevce_sao_ctb()
     34 *  ihevce_sao_analyse()
     35 *
     36 * @remarks
     37 *  None
     38 *
     39 *******************************************************************************
     40 */
     41 
     42 /*****************************************************************************/
     43 /* File Includes                                                             */
     44 /*****************************************************************************/
     45 /* System include files */
     46 #include <stdio.h>
     47 #include <string.h>
     48 #include <stdlib.h>
     49 #include <assert.h>
     50 #include <stdarg.h>
     51 #include <math.h>
     52 
     53 /* User include files */
     54 #include "ihevc_typedefs.h"
     55 #include "itt_video_api.h"
     56 #include "ihevce_api.h"
     57 
     58 #include "rc_cntrl_param.h"
     59 #include "rc_frame_info_collector.h"
     60 #include "rc_look_ahead_params.h"
     61 
     62 #include "ihevc_defs.h"
     63 #include "ihevc_structs.h"
     64 #include "ihevc_platform_macros.h"
     65 #include "ihevc_deblk.h"
     66 #include "ihevc_itrans_recon.h"
     67 #include "ihevc_chroma_itrans_recon.h"
     68 #include "ihevc_chroma_intra_pred.h"
     69 #include "ihevc_intra_pred.h"
     70 #include "ihevc_inter_pred.h"
     71 #include "ihevc_mem_fns.h"
     72 #include "ihevc_padding.h"
     73 #include "ihevc_weighted_pred.h"
     74 #include "ihevc_sao.h"
     75 #include "ihevc_resi_trans.h"
     76 #include "ihevc_quant_iquant_ssd.h"
     77 #include "ihevc_cabac_tables.h"
     78 
     79 #include "ihevce_defs.h"
     80 #include "ihevce_lap_enc_structs.h"
     81 #include "ihevce_multi_thrd_structs.h"
     82 #include "ihevce_me_common_defs.h"
     83 #include "ihevce_had_satd.h"
     84 #include "ihevce_error_codes.h"
     85 #include "ihevce_bitstream.h"
     86 #include "ihevce_cabac.h"
     87 #include "ihevce_rdoq_macros.h"
     88 #include "ihevce_function_selector.h"
     89 #include "ihevce_enc_structs.h"
     90 #include "ihevce_entropy_structs.h"
     91 #include "ihevce_cmn_utils_instr_set_router.h"
     92 #include "ihevce_enc_loop_structs.h"
     93 #include "ihevce_cabac_rdo.h"
     94 #include "ihevce_sao.h"
     95 
     96 /*****************************************************************************/
     97 /* Function Definitions                                                      */
     98 /*****************************************************************************/
     99 
    100 /**
    101 *******************************************************************************
    102 *
    103 * @brief
    104 *     ihevce_sao_set_avilability
    105 *
    106 * @par Description:
    107 *     Sets the availability flag for SAO.
    108 *
    109 * @param[in]
    110 *   ps_sao_ctxt:   Pointer to SAO context
    111 * @returns
    112 *
    113 * @remarks
    114 *  None
    115 *
    116 *******************************************************************************
    117 */
    118 void ihevce_sao_set_avilability(
    119     UWORD8 *pu1_avail, sao_ctxt_t *ps_sao_ctxt, ihevce_tile_params_t *ps_tile_params)
    120 {
    121     WORD32 i;
    122 
    123     WORD32 ctb_x_pos = ps_sao_ctxt->i4_ctb_x;
    124     WORD32 ctb_y_pos = ps_sao_ctxt->i4_ctb_y;
    125 
    126     for(i = 0; i < 8; i++)
    127     {
    128         pu1_avail[i] = 255;
    129     }
    130 
    131     /* SAO_note_01: If the CTB lies on a tile or a slice boundary and
    132     in-loop filtering is enabled at tile and slice boundary, then SAO must
    133     be performed at tile/slice boundaries also.
    134     Hence the boundary checks should be based on frame position of CTB
    135     rather than s_ctb_nbr_avail_flags.u1_left_avail flags.
    136     Search for <SAO_note_01> in workspace to know more */
    137     /* Availaibility flags for first col*/
    138     if(ctb_x_pos == ps_tile_params->i4_first_ctb_x)
    139     {
    140         pu1_avail[0] = 0;
    141         pu1_avail[4] = 0;
    142         pu1_avail[6] = 0;
    143     }
    144 
    145     /* Availaibility flags for last col*/
    146     if((ctb_x_pos + 1) ==
    147        (ps_tile_params->i4_first_ctb_x + ps_tile_params->i4_curr_tile_wd_in_ctb_unit))
    148     {
    149         pu1_avail[1] = 0;
    150         pu1_avail[5] = 0;
    151         pu1_avail[7] = 0;
    152     }
    153 
    154     /* Availaibility flags for first row*/
    155     if(ctb_y_pos == ps_tile_params->i4_first_ctb_y)
    156     {
    157         pu1_avail[2] = 0;
    158         pu1_avail[4] = 0;
    159         pu1_avail[5] = 0;
    160     }
    161 
    162     /* Availaibility flags for last row*/
    163     if((ctb_y_pos + 1) ==
    164        (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit))
    165     {
    166         pu1_avail[3] = 0;
    167         pu1_avail[6] = 0;
    168         pu1_avail[7] = 0;
    169     }
    170 }
    171 
    172 /**
    173 *******************************************************************************
    174 *
    175 * @brief
    176 *   Sao CTB level function.
    177 *
    178 * @par Description:
    179 *   For a given CTB, sao is done. Both the luma and chroma
    180 *   blocks are processed
    181 *
    182 * @param[in]
    183 *   ps_sao_ctxt:   Pointer to SAO context
    184 *
    185 * @returns
    186 *
    187 * @remarks
    188 *  None
    189 *
    190 *******************************************************************************
    191 */
    192 void ihevce_sao_ctb(sao_ctxt_t *ps_sao_ctxt, ihevce_tile_params_t *ps_tile_params)
    193 {
    194     sao_enc_t *ps_sao;
    195     UWORD8 u1_src_top_left_luma, u1_src_top_left_chroma[2];
    196     UWORD8 *pu1_src_left_luma_buf, *pu1_src_top_luma_buf;
    197     UWORD8 *pu1_src_left_chroma_buf, *pu1_src_top_chroma_buf;
    198     UWORD8 *pu1_src_luma, *pu1_src_chroma;
    199     WORD32 luma_src_stride, ctb_size;
    200     WORD32 chroma_src_stride;
    201     UWORD8 au1_avail_luma[8], au1_avail_chroma[8];
    202     WORD32 sao_blk_wd, sao_blk_ht, sao_wd_chroma, sao_ht_chroma;
    203     UWORD8 *pu1_top_left_luma, *pu1_top_left_chroma;
    204     UWORD8 *pu1_src_bot_left_luma, *pu1_src_top_right_luma;
    205     UWORD8 *pu1_src_bot_left_chroma, *pu1_src_top_right_chroma;
    206     UWORD8 u1_is_422 = (ps_sao_ctxt->ps_sps->i1_chroma_format_idc == 2);
    207 
    208     ps_sao = ps_sao_ctxt->ps_sao;
    209 
    210     ASSERT(
    211         (abs(ps_sao->u1_y_offset[1]) <= 7) && (abs(ps_sao->u1_y_offset[2]) <= 7) &&
    212         (abs(ps_sao->u1_y_offset[3]) <= 7) && (abs(ps_sao->u1_y_offset[4]) <= 7));
    213     ASSERT(
    214         (abs(ps_sao->u1_cb_offset[1]) <= 7) && (abs(ps_sao->u1_cb_offset[2]) <= 7) &&
    215         (abs(ps_sao->u1_cb_offset[3]) <= 7) && (abs(ps_sao->u1_cb_offset[4]) <= 7));
    216     ASSERT(
    217         (abs(ps_sao->u1_cr_offset[1]) <= 7) && (abs(ps_sao->u1_cr_offset[2]) <= 7) &&
    218         (abs(ps_sao->u1_cr_offset[3]) <= 7) && (abs(ps_sao->u1_cr_offset[4]) <= 7));
    219     ASSERT(
    220         (ps_sao->b5_y_band_pos <= 28) && (ps_sao->b5_cb_band_pos <= 28) &&
    221         (ps_sao->b5_cr_band_pos <= 28));
    222 
    223     if(ps_sao_ctxt->i1_slice_sao_luma_flag)
    224     {
    225         /*initialize the src pointer to current row*/
    226         luma_src_stride = ps_sao_ctxt->i4_cur_luma_recon_stride;
    227 
    228         ctb_size = ps_sao_ctxt->i4_ctb_size;
    229 
    230         /* 1 extra byte in top buf stride for top left of 1st ctb of every row*/
    231         ps_sao->u1_y_offset[0] = 0; /* 0th element is not being used  */
    232         sao_blk_wd = ps_sao_ctxt->i4_sao_blk_wd;
    233         sao_blk_ht = ps_sao_ctxt->i4_sao_blk_ht;
    234 
    235         pu1_src_luma = ps_sao_ctxt->pu1_cur_luma_recon_buf;
    236         /* Pointer to the top luma buffer corresponding to the current ctb row*/
    237         pu1_src_top_luma_buf = ps_sao_ctxt->pu1_curr_sao_src_top_luma;
    238 
    239         /* Pointer to left luma buffer corresponding to the current ctb row*/
    240         pu1_src_left_luma_buf = ps_sao_ctxt->au1_left_luma_scratch;
    241 
    242         /* Pointer to the top right luma buffer corresponding to the current ctb row*/
    243         pu1_src_top_right_luma = pu1_src_top_luma_buf /*- top_buf_stide*/ + sao_blk_wd;
    244 
    245         /* Pointer to the bottom left luma buffer corresponding to the current ctb row*/
    246         pu1_src_bot_left_luma =
    247             ps_sao_ctxt->pu1_frm_luma_recon_buf + ctb_size * ps_sao_ctxt->i4_frm_luma_recon_stride -
    248             1 + (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
    249             (ps_sao_ctxt->i4_ctb_x * ctb_size); /* Bottom left*/
    250 
    251         /* Back up the top left pixel for (x+1, y+1)th ctb*/
    252         u1_src_top_left_luma = *(pu1_src_top_luma_buf + sao_blk_wd - 1);
    253         pu1_top_left_luma = pu1_src_top_luma_buf - 1;
    254 
    255         if(SAO_BAND == ps_sao->b3_y_type_idx)
    256         {
    257             ihevc_sao_band_offset_luma(
    258                 pu1_src_luma,
    259                 luma_src_stride,
    260                 pu1_src_left_luma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
    261                 pu1_src_top_luma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
    262                 pu1_src_top_luma_buf - 1, /* Top left*/
    263                 ps_sao->b5_y_band_pos,
    264                 ps_sao->u1_y_offset,
    265                 sao_blk_wd,
    266                 sao_blk_ht);
    267 
    268             if((ps_sao_ctxt->i4_ctb_y > 0))
    269             {
    270                 *(pu1_src_top_luma_buf + sao_blk_wd - 1) = u1_src_top_left_luma;
    271             }
    272         }
    273         else if(ps_sao->b3_y_type_idx >= SAO_EDGE_0_DEG)
    274         {
    275             /*In case of edge offset, 1st and 2nd offsets are always inferred as offsets
    276             * corresponding to EO category 1 and 2 which should be always positive
    277             * And 3rd and 4th offsets are always inferred as offsets corresponding to
    278             * EO category 3 and 4 which should be negative for all the EO classes(or EO typeidx)
    279             */
    280             // clang-format off
    281             ASSERT((ps_sao->u1_y_offset[1] >= 0) && (ps_sao->u1_y_offset[2] >= 0));
    282             ASSERT((ps_sao->u1_y_offset[3] <= 0) && (ps_sao->u1_y_offset[4] <= 0));
    283             // clang-format on
    284 
    285             ihevce_sao_set_avilability(au1_avail_luma, ps_sao_ctxt, ps_tile_params);
    286 
    287             ps_sao_ctxt->apf_sao_luma[ps_sao->b3_y_type_idx - 2](
    288                 pu1_src_luma,
    289                 luma_src_stride,
    290                 pu1_src_left_luma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
    291                 pu1_src_top_luma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
    292                 pu1_top_left_luma, /* Top left*/
    293                 pu1_src_top_right_luma, /* Top right*/
    294                 pu1_src_bot_left_luma, /* Bottom left*/
    295                 au1_avail_luma,
    296                 ps_sao->u1_y_offset,
    297                 sao_blk_wd,
    298                 sao_blk_ht);
    299 
    300             if((ps_sao_ctxt->i4_ctb_y > 0))
    301             {
    302                 *(pu1_src_top_luma_buf + sao_blk_wd - 1) = u1_src_top_left_luma;
    303             }
    304         }
    305     }
    306 
    307     if(ps_sao_ctxt->i1_slice_sao_chroma_flag)
    308     {
    309         /*initialize the src pointer to current row*/
    310         chroma_src_stride = ps_sao_ctxt->i4_cur_chroma_recon_stride;
    311         ctb_size = ps_sao_ctxt->i4_ctb_size;
    312 
    313         /* 1 extra byte in top buf stride for top left of 1st ctb of every row*/
    314         //top_buf_stide = ps_sao_ctxt->u4_ctb_aligned_wd + 2;
    315         ps_sao->u1_cb_offset[0] = 0; /* 0th element is not used  */
    316         ps_sao->u1_cr_offset[0] = 0;
    317         sao_wd_chroma = ps_sao_ctxt->i4_sao_blk_wd;
    318         sao_ht_chroma = ps_sao_ctxt->i4_sao_blk_ht / (!u1_is_422 + 1);
    319 
    320         pu1_src_chroma = ps_sao_ctxt->pu1_cur_chroma_recon_buf;
    321         /* Pointer to the top luma buffer corresponding to the current ctb row*/
    322         pu1_src_top_chroma_buf = ps_sao_ctxt->pu1_curr_sao_src_top_chroma;
    323         // clang-format off
    324         /* Pointer to left luma buffer corresponding to the current ctb row*/
    325         pu1_src_left_chroma_buf = ps_sao_ctxt->au1_left_chroma_scratch;  //ps_sao_ctxt->au1_sao_src_left_chroma;
    326         // clang-format on
    327         /* Pointer to the top right chroma buffer corresponding to the current ctb row*/
    328         pu1_src_top_right_chroma = pu1_src_top_chroma_buf /*- top_buf_stide*/ + sao_wd_chroma;
    329 
    330         /* Pointer to the bottom left luma buffer corresponding to the current ctb row*/
    331         pu1_src_bot_left_chroma =
    332             ps_sao_ctxt->pu1_frm_chroma_recon_buf +
    333             (ctb_size >> !u1_is_422) * ps_sao_ctxt->i4_frm_chroma_recon_stride - 2 +
    334             (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
    335              (ctb_size >> !u1_is_422)) +
    336             (ps_sao_ctxt->i4_ctb_x * ctb_size); /* Bottom left*/
    337 
    338         /* Back up the top left pixel for (x+1, y+1)th ctb*/
    339         u1_src_top_left_chroma[0] = *(pu1_src_top_chroma_buf + sao_wd_chroma - 2);
    340         u1_src_top_left_chroma[1] = *(pu1_src_top_chroma_buf + sao_wd_chroma - 1);
    341         pu1_top_left_chroma = pu1_src_top_chroma_buf - 2;
    342 
    343         if(SAO_BAND == ps_sao->b3_cb_type_idx)
    344         {
    345             ihevc_sao_band_offset_chroma(
    346                 pu1_src_chroma,
    347                 chroma_src_stride,
    348                 pu1_src_left_chroma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
    349                 pu1_src_top_chroma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
    350                 pu1_top_left_chroma, /* Top left*/
    351                 ps_sao->b5_cb_band_pos,
    352                 ps_sao->b5_cr_band_pos,
    353                 ps_sao->u1_cb_offset,
    354                 ps_sao->u1_cr_offset,
    355                 sao_wd_chroma,
    356                 sao_ht_chroma);
    357 
    358             if((ps_sao_ctxt->i4_ctb_y > 0))
    359             {
    360                 *(pu1_src_top_chroma_buf + sao_wd_chroma - 2) = u1_src_top_left_chroma[0];
    361                 *(pu1_src_top_chroma_buf + sao_wd_chroma - 1) = u1_src_top_left_chroma[1];
    362             }
    363         }
    364         else if(ps_sao->b3_cb_type_idx >= SAO_EDGE_0_DEG)
    365         {
    366             /*In case of edge offset, 1st and 2nd offsets are always inferred as offsets
    367             * corresponding to EO category 1 and 2 which should be always positive
    368             * And 3rd and 4th offsets are always inferred as offsets corresponding to
    369             * EO category 3 and 4 which should be negative for all the EO classes(or EO typeidx)
    370             */
    371             ASSERT((ps_sao->u1_cb_offset[1] >= 0) && (ps_sao->u1_cb_offset[2] >= 0));
    372             ASSERT((ps_sao->u1_cb_offset[3] <= 0) && (ps_sao->u1_cb_offset[4] <= 0));
    373 
    374             ASSERT((ps_sao->u1_cr_offset[1] >= 0) && (ps_sao->u1_cr_offset[2] >= 0));
    375             ASSERT((ps_sao->u1_cr_offset[3] <= 0) && (ps_sao->u1_cr_offset[4] <= 0));
    376 
    377             ihevce_sao_set_avilability(au1_avail_chroma, ps_sao_ctxt, ps_tile_params);
    378 
    379             ps_sao_ctxt->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](
    380                 pu1_src_chroma,
    381                 chroma_src_stride,
    382                 pu1_src_left_chroma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
    383                 pu1_src_top_chroma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
    384                 pu1_top_left_chroma, /* Top left*/
    385                 pu1_src_top_right_chroma, /* Top right*/
    386                 pu1_src_bot_left_chroma, /* Bottom left*/
    387                 au1_avail_chroma,
    388                 ps_sao->u1_cb_offset,
    389                 ps_sao->u1_cr_offset,
    390                 sao_wd_chroma,
    391                 sao_ht_chroma);
    392 
    393             if((ps_sao_ctxt->i4_ctb_y > 0))
    394             {
    395                 *(pu1_src_top_chroma_buf + sao_wd_chroma - 2) = u1_src_top_left_chroma[0];
    396                 *(pu1_src_top_chroma_buf + sao_wd_chroma - 1) = u1_src_top_left_chroma[1];
    397             }
    398         }
    399     }
    400 }
    401 
    402 /**
    403 *******************************************************************************
    404 *
    405 * @brief
    406 *   CTB level function to do SAO analysis.
    407 *
    408 * @par Description:
    409 *   For a given CTB, sao analysis is done for both luma and chroma.
    410 *
    411 *
    412 * @param[in]
    413 *   ps_sao_ctxt:   Pointer to SAO context
    414 *   ps_ctb_enc_loop_out : pointer to ctb level output structure from enc loop
    415 *
    416 * @returns
    417 *
    418 * @remarks
    419 *  None
    420 *
    421 * @Assumptions:
    422 *   1) Initial Cabac state for current ctb to be sao'ed (i.e (x-1,y-1)th ctb) is assumed to be
    423 *      almost same as cabac state of (x,y)th ctb.
    424 *   2) Distortion is calculated in spatial domain but lamda used to calculate the cost is
    425 *      in freq domain.
    426 *******************************************************************************
    427 */
    428 void ihevce_sao_analyse(
    429     sao_ctxt_t *ps_sao_ctxt,
    430     ctb_enc_loop_out_t *ps_ctb_enc_loop_out,
    431     UWORD32 *pu4_frame_rdopt_header_bits,
    432     ihevce_tile_params_t *ps_tile_params)
    433 {
    434     UWORD8 *pu1_luma_scratch_buf;
    435     UWORD8 *pu1_chroma_scratch_buf;
    436     UWORD8 *pu1_src_luma, *pu1_recon_luma;
    437     UWORD8 *pu1_src_chroma, *pu1_recon_chroma;
    438     WORD32 luma_src_stride, luma_recon_stride, ctb_size, ctb_wd, ctb_ht;
    439     WORD32 chroma_src_stride, chroma_recon_stride;
    440     WORD32 i4_luma_scratch_buf_stride;
    441     WORD32 i4_chroma_scratch_buf_stride;
    442     sao_ctxt_t s_sao_ctxt;
    443     UWORD32 ctb_bits = 0, distortion = 0, curr_cost = 0, best_cost = 0;
    444     LWORD64 i8_cl_ssd_lambda_qf, i8_cl_ssd_lambda_chroma_qf;
    445     WORD32 rdo_cand, num_luma_rdo_cand = 0, num_rdo_cand = 0;
    446     WORD32 curr_buf_idx, best_buf_idx, best_cand_idx;
    447     WORD32 row;
    448     WORD32 edgeidx;
    449     WORD32 acc_error_category[5] = { 0, 0, 0, 0, 0 }, category_count[5] = { 0, 0, 0, 0, 0 };
    450     sao_enc_t s_best_luma_chroma_cand;
    451     WORD32 best_ctb_sao_bits = 0;
    452 #if DISABLE_SAO_WHEN_NOISY && !defined(ENC_VER_v2)
    453     UWORD8 u1_force_no_offset =
    454         ps_sao_ctxt
    455             ->ps_ctb_data
    456                 [ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_data_stride * ps_sao_ctxt->i4_ctb_y]
    457             .s_ctb_noise_params.i4_noise_present;
    458 #endif
    459     UWORD8 u1_is_422 = (ps_sao_ctxt->ps_sps->i1_chroma_format_idc == 2);
    460 
    461     *pu4_frame_rdopt_header_bits = 0;
    462 
    463     ctb_size = ps_sao_ctxt->i4_ctb_size;
    464     ctb_wd = ps_sao_ctxt->i4_sao_blk_wd;
    465     ctb_ht = ps_sao_ctxt->i4_sao_blk_ht;
    466 
    467     s_sao_ctxt = ps_sao_ctxt[0];
    468 
    469     /* Memset the best luma_chroma_cand structure to avoid asserts in debug mode*/
    470     memset(&s_best_luma_chroma_cand, 0, sizeof(sao_enc_t));
    471 
    472     /* Initialize the pointer and strides for luma buffers*/
    473     pu1_recon_luma = ps_sao_ctxt->pu1_cur_luma_recon_buf;
    474     luma_recon_stride = ps_sao_ctxt->i4_cur_luma_recon_stride;
    475 
    476     pu1_src_luma = ps_sao_ctxt->pu1_cur_luma_src_buf;
    477     luma_src_stride = ps_sao_ctxt->i4_cur_luma_src_stride;
    478     i4_luma_scratch_buf_stride = SCRATCH_BUF_STRIDE;
    479 
    480     /* Initialize the pointer and strides for luma buffers*/
    481     pu1_recon_chroma = ps_sao_ctxt->pu1_cur_chroma_recon_buf;
    482     chroma_recon_stride = ps_sao_ctxt->i4_cur_chroma_recon_stride;
    483 
    484     pu1_src_chroma = ps_sao_ctxt->pu1_cur_chroma_src_buf;
    485     chroma_src_stride = ps_sao_ctxt->i4_cur_chroma_src_stride;
    486     i4_chroma_scratch_buf_stride = SCRATCH_BUF_STRIDE;
    487 
    488     i8_cl_ssd_lambda_qf = ps_sao_ctxt->i8_cl_ssd_lambda_qf;
    489     i8_cl_ssd_lambda_chroma_qf = ps_sao_ctxt->i8_cl_ssd_lambda_chroma_qf;
    490 
    491     /*****************************************************/
    492     /********************RDO FOR LUMA CAND****************/
    493     /*****************************************************/
    494 
    495 #if !DISABLE_SAO_WHEN_NOISY
    496     if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
    497 #else
    498     if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag && !u1_force_no_offset)
    499 #endif
    500     {
    501         /* Candidate for Edge offset SAO*/
    502         /* Following is the convention for curr pixel and
    503         * two neighbouring pixels for 0 deg, 90 deg, 135 deg and 45 deg */
    504         /*
    505         * 0 deg :  a c b     90 deg:  a       135 deg: a          45 deg:     a
    506         *                             c                  c                  c
    507         *                             b                    b              b
    508         */
    509 
    510         /* 0 deg SAO CAND*/
    511         /* Reset the error and edge count*/
    512         for(edgeidx = 0; edgeidx < 5; edgeidx++)
    513         {
    514             acc_error_category[edgeidx] = 0;
    515             category_count[edgeidx] = 0;
    516         }
    517 
    518         /* Call the funciton to populate the EO parameter for this ctb for 0 deg EO class*/
    519         // clang-format off
    520         ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_0_DEG,
    521                 acc_error_category, category_count);
    522         // clang-format on
    523         // clang-format off
    524         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_0_DEG;
    525         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
    526                 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
    527                 : 0;
    528         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
    529                 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
    530                 : 0;
    531         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
    532                 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
    533                 : 0;
    534         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] =category_count[4]
    535                 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
    536                 : 0;
    537         // clang-format on
    538         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
    539         // clang-format off
    540         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
    541         // clang-format on
    542         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
    543         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
    544         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
    545         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
    546         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
    547 
    548         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
    549         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
    550         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
    551         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
    552         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
    553         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
    554         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
    555         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
    556 
    557         num_luma_rdo_cand++;
    558 
    559         /* 90 degree SAO CAND*/
    560         for(edgeidx = 0; edgeidx < 5; edgeidx++)
    561         {
    562             acc_error_category[edgeidx] = 0;
    563             category_count[edgeidx] = 0;
    564         }
    565 
    566         /* Call the funciton to populate the EO parameter for this ctb for 90 deg EO class*/
    567         // clang-format off
    568         ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_90_DEG,
    569                 acc_error_category, category_count);
    570 
    571         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_90_DEG;
    572         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
    573                 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
    574                 : 0;
    575         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
    576                 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
    577                 : 0;
    578         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
    579                 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
    580                 : 0;
    581         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4]
    582                 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
    583                 : 0;
    584         // clang-format on
    585         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
    586 
    587         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
    588         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
    589         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
    590         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
    591         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
    592         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
    593 
    594         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
    595         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
    596         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
    597         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
    598         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
    599         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
    600         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
    601         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
    602 
    603         num_luma_rdo_cand++;
    604 
    605         /* 135 degree SAO CAND*/
    606         for(edgeidx = 0; edgeidx < 5; edgeidx++)
    607         {
    608             acc_error_category[edgeidx] = 0;
    609             category_count[edgeidx] = 0;
    610         }
    611 
    612         /* Call the funciton to populate the EO parameter for this ctb for 135 deg EO class*/
    613         // clang-format off
    614         ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_135_DEG,
    615                 acc_error_category, category_count);
    616 
    617         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_135_DEG;
    618         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
    619                 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
    620                 : 0;
    621         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
    622                 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
    623                 : 0;
    624         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
    625                 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
    626                 : 0;
    627         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4]
    628                 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
    629                 : 0;
    630         // clang-format on
    631         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
    632 
    633         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
    634         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
    635         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
    636         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
    637         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
    638         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
    639 
    640         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
    641         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
    642         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
    643         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
    644         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
    645         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
    646         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
    647         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
    648 
    649         num_luma_rdo_cand++;
    650 
    651         /* 45 degree SAO CAND*/
    652         for(edgeidx = 0; edgeidx < 5; edgeidx++)
    653         {
    654             acc_error_category[edgeidx] = 0;
    655             category_count[edgeidx] = 0;
    656         }
    657 
    658         /* Call the funciton to populate the EO parameter for this ctb for 45 deg EO class*/
    659         // clang-format off
    660         ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_45_DEG,
    661                 acc_error_category, category_count);
    662 
    663         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_45_DEG;
    664         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
    665                 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
    666                 : 0;
    667         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
    668                 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
    669                 : 0;
    670         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
    671                 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
    672                 : 0;
    673         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4]
    674                 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
    675                 : 0;
    676         // clang-format on
    677         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
    678 
    679         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
    680         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
    681         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
    682         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
    683         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
    684         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
    685 
    686         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
    687         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
    688         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
    689         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
    690         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
    691         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
    692         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
    693         ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
    694 
    695         num_luma_rdo_cand++;
    696 
    697         /* First cand will be best cand after 1st iteration*/
    698         curr_buf_idx = 0;
    699         best_buf_idx = 1;
    700         best_cost = 0xFFFFFFFF;
    701         best_cand_idx = 0;
    702 
    703         /*Back up the top pixels for (x,y+1)th ctb*/
    704         if(!ps_sao_ctxt->i4_is_last_ctb_row)
    705         {
    706             memcpy(
    707                 ps_sao_ctxt->pu1_curr_sao_src_top_luma + ps_sao_ctxt->i4_frm_top_luma_buf_stride,
    708                 pu1_recon_luma + luma_recon_stride * (ctb_size - 1),
    709                 ps_sao_ctxt->i4_sao_blk_wd);
    710         }
    711 
    712         for(rdo_cand = 0; rdo_cand < num_luma_rdo_cand; rdo_cand++)
    713         {
    714             s_sao_ctxt.ps_sao = &ps_sao_ctxt->as_sao_rd_cand[rdo_cand];
    715 
    716             /* This memcpy is required because cabac uses parameters from this structure
    717             * to evaluate bits and this structure ptr is sent to cabac through
    718             * "ihevce_cabac_rdo_encode_sao" function
    719             */
    720             memcpy(&ps_ctb_enc_loop_out->s_sao, s_sao_ctxt.ps_sao, sizeof(sao_enc_t));
    721 
    722             /* Copy the left pixels to the scratch buffer for evry rdo cand because its
    723             overwritten by the sao leaf level function for next ctb*/
    724             memcpy(
    725                 s_sao_ctxt.au1_left_luma_scratch,
    726                 ps_sao_ctxt->au1_sao_src_left_luma,
    727                 ps_sao_ctxt->i4_sao_blk_ht);
    728 
    729             /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its
    730             overwritten by the sao leaf level function for next ctb*/
    731             memcpy(
    732                 s_sao_ctxt.au1_top_luma_scratch,
    733                 ps_sao_ctxt->pu1_curr_sao_src_top_luma - 1,
    734                 ps_sao_ctxt->i4_sao_blk_wd + 2);
    735             s_sao_ctxt.pu1_curr_sao_src_top_luma = s_sao_ctxt.au1_top_luma_scratch + 1;
    736 
    737             pu1_luma_scratch_buf = ps_sao_ctxt->au1_sao_luma_scratch[curr_buf_idx];
    738 
    739             ASSERT(
    740                 (abs(s_sao_ctxt.ps_sao->u1_y_offset[1]) <= 7) &&
    741                 (abs(s_sao_ctxt.ps_sao->u1_y_offset[2]) <= 7) &&
    742                 (abs(s_sao_ctxt.ps_sao->u1_y_offset[3]) <= 7) &&
    743                 (abs(s_sao_ctxt.ps_sao->u1_y_offset[4]) <= 7));
    744             ASSERT(
    745                 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[1]) <= 7) &&
    746                 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[2]) <= 7) &&
    747                 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[3]) <= 7) &&
    748                 (abs(s_sao_ctxt.ps_sao->u1_cb_offset[4]) <= 7));
    749             ASSERT(
    750                 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[1]) <= 7) &&
    751                 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[2]) <= 7) &&
    752                 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[3]) <= 7) &&
    753                 (abs(s_sao_ctxt.ps_sao->u1_cr_offset[4]) <= 7));
    754             ASSERT(
    755                 (s_sao_ctxt.ps_sao->b5_y_band_pos <= 28) &&
    756                 (s_sao_ctxt.ps_sao->b5_cb_band_pos <= 28) &&
    757                 (s_sao_ctxt.ps_sao->b5_cr_band_pos <= 28));
    758 
    759             /* Copy the deblocked recon data to scratch buffer to do sao*/
    760 
    761             ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
    762                 pu1_luma_scratch_buf,
    763                 i4_luma_scratch_buf_stride,
    764                 pu1_recon_luma,
    765                 luma_recon_stride,
    766                 SCRATCH_BUF_STRIDE,
    767                 ctb_ht + 1);
    768 
    769             s_sao_ctxt.pu1_cur_luma_recon_buf = pu1_luma_scratch_buf;
    770             s_sao_ctxt.i4_cur_luma_recon_stride = i4_luma_scratch_buf_stride;
    771 
    772             s_sao_ctxt.i1_slice_sao_luma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_luma_flag;
    773             s_sao_ctxt.i1_slice_sao_chroma_flag = 0;
    774 
    775             ihevce_sao_ctb(&s_sao_ctxt, ps_tile_params);
    776 
    777             /* Calculate the distortion between sao'ed ctb and original src ctb*/
    778             // clang-format off
    779             distortion =
    780                 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma,
    781                         s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride,
    782                         s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd, ctb_ht);
    783             // clang-format on
    784 
    785             ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx;
    786             ctb_bits = ihevce_cabac_rdo_encode_sao(
    787                 ps_sao_ctxt->ps_rdopt_entropy_ctxt, ps_ctb_enc_loop_out);
    788 
    789             /* Calculate the cost as D+(lamda)*R   */
    790             curr_cost = distortion +
    791                         COMPUTE_RATE_COST_CLIP30(ctb_bits, i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
    792 
    793             if(curr_cost < best_cost)
    794             {
    795                 best_cost = curr_cost;
    796                 best_buf_idx = ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
    797                 best_cand_idx = rdo_cand;
    798                 curr_buf_idx = !curr_buf_idx;
    799             }
    800         }
    801 
    802         /* Copy the sao parameters of the best luma cand into the luma_chroma cnad structure for next stage of RDO
    803         * between luma_chroma combined cand, NO SAO cand, LEFT and TOP merge cand
    804         */
    805         s_best_luma_chroma_cand.b3_y_type_idx =
    806             ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].b3_y_type_idx;
    807         s_best_luma_chroma_cand.u1_y_offset[1] =
    808             ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[1];
    809         s_best_luma_chroma_cand.u1_y_offset[2] =
    810             ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[2];
    811         s_best_luma_chroma_cand.u1_y_offset[3] =
    812             ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[3];
    813         s_best_luma_chroma_cand.u1_y_offset[4] =
    814             ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[4];
    815         s_best_luma_chroma_cand.b5_y_band_pos =
    816             ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].b5_y_band_pos;
    817     }
    818     else
    819     {
    820         /*Back up the top pixels for (x,y+1)th ctb*/
    821         if(!ps_sao_ctxt->i4_is_last_ctb_row)
    822         {
    823             memcpy(
    824                 ps_sao_ctxt->pu1_curr_sao_src_top_luma + ps_sao_ctxt->i4_frm_top_luma_buf_stride,
    825                 pu1_recon_luma + luma_recon_stride * (ctb_size - 1),
    826                 ps_sao_ctxt->i4_sao_blk_wd);
    827         }
    828 
    829         s_best_luma_chroma_cand.b3_y_type_idx = SAO_NONE;
    830         s_best_luma_chroma_cand.u1_y_offset[1] = 0;
    831         s_best_luma_chroma_cand.u1_y_offset[2] = 0;
    832         s_best_luma_chroma_cand.u1_y_offset[3] = 0;
    833         s_best_luma_chroma_cand.u1_y_offset[4] = 0;
    834         s_best_luma_chroma_cand.b5_y_band_pos = 0;
    835         s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0;
    836         s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0;
    837 
    838         s_best_luma_chroma_cand.b3_cb_type_idx = SAO_NONE;
    839         s_best_luma_chroma_cand.u1_cb_offset[1] = 0;
    840         s_best_luma_chroma_cand.u1_cb_offset[2] = 0;
    841         s_best_luma_chroma_cand.u1_cb_offset[3] = 0;
    842         s_best_luma_chroma_cand.u1_cb_offset[4] = 0;
    843         s_best_luma_chroma_cand.b5_cb_band_pos = 0;
    844 
    845         s_best_luma_chroma_cand.b3_cr_type_idx = SAO_NONE;
    846         s_best_luma_chroma_cand.u1_cr_offset[1] = 0;
    847         s_best_luma_chroma_cand.u1_cr_offset[2] = 0;
    848         s_best_luma_chroma_cand.u1_cr_offset[3] = 0;
    849         s_best_luma_chroma_cand.u1_cr_offset[4] = 0;
    850         s_best_luma_chroma_cand.b5_cr_band_pos = 0;
    851     }
    852     /*****************************************************/
    853     /********************RDO FOR CHROMA CAND**************/
    854     /*****************************************************/
    855 #if !DISABLE_SAO_WHEN_NOISY
    856     if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
    857 #else
    858     if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag && !u1_force_no_offset)
    859 #endif
    860     {
    861         /*Back up the top pixels for (x,y+1)th ctb*/
    862         if(!ps_sao_ctxt->i4_is_last_ctb_row)
    863         {
    864             memcpy(
    865                 ps_sao_ctxt->pu1_curr_sao_src_top_chroma +
    866                     ps_sao_ctxt->i4_frm_top_chroma_buf_stride,
    867                 pu1_recon_chroma + chroma_recon_stride * ((ctb_size >> !u1_is_422) - 1),
    868                 ps_sao_ctxt->i4_sao_blk_wd);
    869         }
    870 
    871         /* Reset the error and edge count*/
    872         for(edgeidx = 0; edgeidx < 5; edgeidx++)
    873         {
    874             acc_error_category[edgeidx] = 0;
    875             category_count[edgeidx] = 0;
    876         }
    877         // clang-format off
    878         ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_chroma_eo_sao_params(ps_sao_ctxt,
    879                 s_best_luma_chroma_cand.b3_y_type_idx, acc_error_category,
    880                 category_count);
    881         // clang-format on
    882 
    883         /* Copy the sao parameters of the best luma cand into the luma_chroma cnad structure for next stage of RDO
    884         * between luma_chroma combined cand, NO SAO cand, LEFT and TOP merge cand
    885         */
    886         // clang-format off
    887         s_best_luma_chroma_cand.b3_cb_type_idx = s_best_luma_chroma_cand.b3_y_type_idx;
    888         s_best_luma_chroma_cand.u1_cb_offset[1] = category_count[0]
    889                 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
    890                 : 0;
    891         s_best_luma_chroma_cand.u1_cb_offset[2] = category_count[1]
    892                 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
    893                 : 0;
    894         s_best_luma_chroma_cand.u1_cb_offset[3] = category_count[3]
    895                 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
    896                 : 0;
    897         s_best_luma_chroma_cand.u1_cb_offset[4] = category_count[4]
    898                 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
    899                 : 0;
    900         s_best_luma_chroma_cand.b5_cb_band_pos = 0;
    901 
    902         s_best_luma_chroma_cand.b3_cr_type_idx = s_best_luma_chroma_cand.b3_y_type_idx;
    903         s_best_luma_chroma_cand.u1_cr_offset[1] = category_count[0]
    904                 ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
    905                 : 0;
    906         s_best_luma_chroma_cand.u1_cr_offset[2] = category_count[1]
    907                 ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
    908                 : 0;
    909         s_best_luma_chroma_cand.u1_cr_offset[3] = category_count[3]
    910                 ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
    911                 : 0;
    912         s_best_luma_chroma_cand.u1_cr_offset[4] = category_count[4]
    913                 ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
    914                 : 0;
    915         // clang-format on
    916         s_best_luma_chroma_cand.b5_cr_band_pos = 0;
    917     }
    918     else
    919     {
    920         /*Back up the top pixels for (x,y+1)th ctb*/
    921         if(!ps_sao_ctxt->i4_is_last_ctb_row)
    922         {
    923             memcpy(
    924                 ps_sao_ctxt->pu1_curr_sao_src_top_chroma +
    925                     ps_sao_ctxt->i4_frm_top_chroma_buf_stride,
    926                 pu1_recon_chroma + chroma_recon_stride * ((ctb_size >> !u1_is_422) - 1),
    927                 ps_sao_ctxt->i4_sao_blk_wd);
    928         }
    929 
    930         s_best_luma_chroma_cand.b3_cb_type_idx = SAO_NONE;
    931         s_best_luma_chroma_cand.u1_cb_offset[1] = 0;
    932         s_best_luma_chroma_cand.u1_cb_offset[2] = 0;
    933         s_best_luma_chroma_cand.u1_cb_offset[3] = 0;
    934         s_best_luma_chroma_cand.u1_cb_offset[4] = 0;
    935         s_best_luma_chroma_cand.b5_cb_band_pos = 0;
    936 
    937         s_best_luma_chroma_cand.b3_cr_type_idx = SAO_NONE;
    938         s_best_luma_chroma_cand.u1_cr_offset[1] = 0;
    939         s_best_luma_chroma_cand.u1_cr_offset[2] = 0;
    940         s_best_luma_chroma_cand.u1_cr_offset[3] = 0;
    941         s_best_luma_chroma_cand.u1_cr_offset[4] = 0;
    942         s_best_luma_chroma_cand.b5_cr_band_pos = 0;
    943 
    944         s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0;
    945         s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0;
    946     }
    947 
    948     s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0;
    949     s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0;
    950 
    951     /*****************************************************/
    952     /**RDO for Best Luma - Chroma combined, No SAO,*******/
    953     /*************Left merge and Top merge****************/
    954     /*****************************************************/
    955 
    956     /* No SAO cand*/
    957     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0;
    958     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0;
    959 
    960     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_y_type_idx = SAO_NONE;
    961     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[1] = 0;
    962     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[2] = 0;
    963     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[3] = 0;
    964     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[4] = 0;
    965     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_y_band_pos = 0;
    966 
    967     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_cb_type_idx = SAO_NONE;
    968     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[1] = 0;
    969     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[2] = 0;
    970     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[3] = 0;
    971     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[4] = 0;
    972     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_cb_band_pos = 0;
    973 
    974     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_cr_type_idx = SAO_NONE;
    975     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[1] = 0;
    976     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[2] = 0;
    977     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[3] = 0;
    978     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[4] = 0;
    979     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_cr_band_pos = 0;
    980     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0;
    981     ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0;
    982 
    983     num_rdo_cand++;
    984 
    985     /* SAO_note_01: If the CTB lies on a tile or a slice boundary, then
    986     the standard mandates that the merge candidates must be set to unavailable.
    987     Hence, check for tile boundary condition by reading
    988     s_ctb_nbr_avail_flags.u1_left_avail rather than frame position of CTB.
    989     A special case: Merge-candidates should be available at dependent-slices boundaries.
    990     Search for <SAO_note_01> in workspace to know more */
    991 
    992 #if !DISABLE_SAO_WHEN_NOISY
    993     if(1)
    994 #else
    995     if(!u1_force_no_offset)
    996 #endif
    997     {
    998         /* Merge left cand*/
    999         if(ps_ctb_enc_loop_out->s_ctb_nbr_avail_flags.u1_left_avail)
   1000         {
   1001             memcpy(
   1002                 &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand],
   1003                 &ps_sao_ctxt->s_left_ctb_sao,
   1004                 sizeof(sao_enc_t));
   1005             ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 1;
   1006             ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0;
   1007             num_rdo_cand++;
   1008         }
   1009 
   1010         /* Merge top cand*/
   1011         if(ps_ctb_enc_loop_out->s_ctb_nbr_avail_flags.u1_top_avail)
   1012         {
   1013             memcpy(
   1014                 &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand],
   1015                 (ps_sao_ctxt->ps_top_ctb_sao - ps_sao_ctxt->u4_num_ctbs_horz),
   1016                 sizeof(sao_enc_t));
   1017             ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0;
   1018             ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 1;
   1019             num_rdo_cand++;
   1020         }
   1021 
   1022         /* Best luma-chroma candidate*/
   1023         memcpy(
   1024             &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand],
   1025             &s_best_luma_chroma_cand,
   1026             sizeof(sao_enc_t));
   1027         num_rdo_cand++;
   1028     }
   1029 
   1030     {
   1031         UWORD32 luma_distortion = 0, chroma_distortion = 0;
   1032         /* First cand will be best cand after 1st iteration*/
   1033         curr_buf_idx = 0;
   1034         best_buf_idx = 1;
   1035         best_cost = 0xFFFFFFFF;
   1036         best_cand_idx = 0;
   1037 
   1038         for(rdo_cand = 0; rdo_cand < num_rdo_cand; rdo_cand++)
   1039         {
   1040             s_sao_ctxt.ps_sao = &ps_sao_ctxt->as_sao_rd_cand[rdo_cand];
   1041 
   1042             distortion = 0;
   1043 
   1044             /* This memcpy is required because cabac uses parameters from this structure
   1045             * to evaluate bits and this structure ptr is sent to cabac through
   1046             * "ihevce_cabac_rdo_encode_sao" function
   1047             */
   1048             memcpy(&ps_ctb_enc_loop_out->s_sao, s_sao_ctxt.ps_sao, sizeof(sao_enc_t));
   1049 
   1050             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
   1051             {
   1052                 /* Copy the left pixels to the scratch buffer for evry rdo cand because its
   1053                 overwritten by the sao leaf level function for next ctb*/
   1054                 memcpy(
   1055                     s_sao_ctxt.au1_left_luma_scratch,
   1056                     ps_sao_ctxt->au1_sao_src_left_luma,
   1057                     ps_sao_ctxt->i4_sao_blk_ht);
   1058 
   1059                 /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its
   1060                 overwritten by the sao leaf level function for next ctb*/
   1061                 memcpy(
   1062                     s_sao_ctxt.au1_top_luma_scratch,
   1063                     ps_sao_ctxt->pu1_curr_sao_src_top_luma - 1,
   1064                     ps_sao_ctxt->i4_sao_blk_wd + 2);
   1065                 s_sao_ctxt.pu1_curr_sao_src_top_luma = s_sao_ctxt.au1_top_luma_scratch + 1;
   1066 
   1067                 pu1_luma_scratch_buf = ps_sao_ctxt->au1_sao_luma_scratch[curr_buf_idx];
   1068 
   1069                 /* Copy the deblocked recon data to scratch buffer to do sao*/
   1070 
   1071                 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
   1072                     pu1_luma_scratch_buf,
   1073                     i4_luma_scratch_buf_stride,
   1074                     pu1_recon_luma,
   1075                     luma_recon_stride,
   1076                     SCRATCH_BUF_STRIDE,
   1077                     ctb_ht + 1);
   1078                 s_sao_ctxt.pu1_cur_luma_recon_buf = pu1_luma_scratch_buf;
   1079                 s_sao_ctxt.i4_cur_luma_recon_stride = i4_luma_scratch_buf_stride;
   1080 
   1081                 ASSERT(
   1082                     (abs(s_sao_ctxt.ps_sao->u1_y_offset[1]) <= 7) &&
   1083                     (abs(s_sao_ctxt.ps_sao->u1_y_offset[2]) <= 7) &&
   1084                     (abs(s_sao_ctxt.ps_sao->u1_y_offset[3]) <= 7) &&
   1085                     (abs(s_sao_ctxt.ps_sao->u1_y_offset[4]) <= 7));
   1086             }
   1087             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
   1088             {
   1089                 /* Copy the left pixels to the scratch buffer for evry rdo cand because its
   1090                 overwritten by the sao leaf level function for next ctb*/
   1091                 memcpy(
   1092                     s_sao_ctxt.au1_left_chroma_scratch,
   1093                     ps_sao_ctxt->au1_sao_src_left_chroma,
   1094                     (ps_sao_ctxt->i4_sao_blk_ht >> !u1_is_422) * 2);
   1095 
   1096                 /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its
   1097                 overwritten by the sao leaf level function for next ctb*/
   1098                 memcpy(
   1099                     s_sao_ctxt.au1_top_chroma_scratch,
   1100                     ps_sao_ctxt->pu1_curr_sao_src_top_chroma - 2,
   1101                     ps_sao_ctxt->i4_sao_blk_wd + 4);
   1102 
   1103                 s_sao_ctxt.pu1_curr_sao_src_top_chroma = s_sao_ctxt.au1_top_chroma_scratch + 2;
   1104 
   1105                 pu1_chroma_scratch_buf = ps_sao_ctxt->au1_sao_chroma_scratch[curr_buf_idx];
   1106 
   1107                 /* Copy the deblocked recon data to scratch buffer to do sao*/
   1108 
   1109                 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
   1110                     pu1_chroma_scratch_buf,
   1111                     i4_chroma_scratch_buf_stride,
   1112                     pu1_recon_chroma,
   1113                     chroma_recon_stride,
   1114                     SCRATCH_BUF_STRIDE,
   1115                     (ctb_ht >> !u1_is_422) + 1);
   1116 
   1117                 s_sao_ctxt.pu1_cur_chroma_recon_buf = pu1_chroma_scratch_buf;
   1118                 s_sao_ctxt.i4_cur_chroma_recon_stride = i4_chroma_scratch_buf_stride;
   1119 
   1120                 ASSERT(
   1121                     (abs(s_sao_ctxt.ps_sao->u1_cb_offset[1]) <= 7) &&
   1122                     (abs(s_sao_ctxt.ps_sao->u1_cb_offset[2]) <= 7) &&
   1123                     (abs(s_sao_ctxt.ps_sao->u1_cb_offset[3]) <= 7) &&
   1124                     (abs(s_sao_ctxt.ps_sao->u1_cb_offset[4]) <= 7));
   1125                 ASSERT(
   1126                     (abs(s_sao_ctxt.ps_sao->u1_cr_offset[1]) <= 7) &&
   1127                     (abs(s_sao_ctxt.ps_sao->u1_cr_offset[2]) <= 7) &&
   1128                     (abs(s_sao_ctxt.ps_sao->u1_cr_offset[3]) <= 7) &&
   1129                     (abs(s_sao_ctxt.ps_sao->u1_cr_offset[4]) <= 7));
   1130             }
   1131 
   1132             ASSERT(
   1133                 (s_sao_ctxt.ps_sao->b5_y_band_pos <= 28) &&
   1134                 (s_sao_ctxt.ps_sao->b5_cb_band_pos <= 28) &&
   1135                 (s_sao_ctxt.ps_sao->b5_cr_band_pos <= 28));
   1136 
   1137             s_sao_ctxt.i1_slice_sao_luma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_luma_flag;
   1138             s_sao_ctxt.i1_slice_sao_chroma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_chroma_flag;
   1139 
   1140             ihevce_sao_ctb(&s_sao_ctxt, ps_tile_params);
   1141 
   1142             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
   1143             {  // clang-format off
   1144                 luma_distortion =
   1145                     ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma,
   1146                             s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride,
   1147                             s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd,
   1148                             ctb_ht);
   1149             }  // clang-format on
   1150 
   1151             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
   1152             {  // clang-format off
   1153                 chroma_distortion =
   1154                     ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_chroma,
   1155                             s_sao_ctxt.pu1_cur_chroma_recon_buf,
   1156                             chroma_src_stride,
   1157                             s_sao_ctxt.i4_cur_chroma_recon_stride, ctb_wd,
   1158                             (ctb_ht >> !u1_is_422));
   1159             }  // clang-format on
   1160 
   1161             /*chroma distortion is added after correction because of lambda difference*/
   1162             distortion =
   1163                 luma_distortion +
   1164                 (UWORD32)(chroma_distortion * (i8_cl_ssd_lambda_qf / i8_cl_ssd_lambda_chroma_qf));
   1165 
   1166             ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx;
   1167             ctb_bits = ihevce_cabac_rdo_encode_sao(
   1168                 ps_sao_ctxt->ps_rdopt_entropy_ctxt, ps_ctb_enc_loop_out);
   1169 
   1170             /* Calculate the cost as D+(lamda)*R   */
   1171             curr_cost = distortion +
   1172                         COMPUTE_RATE_COST_CLIP30(ctb_bits, i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
   1173 
   1174             if(curr_cost < best_cost)
   1175             {
   1176                 best_ctb_sao_bits = ctb_bits;
   1177                 best_cost = curr_cost;
   1178                 best_buf_idx = ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
   1179                 best_cand_idx = rdo_cand;
   1180                 curr_buf_idx = !curr_buf_idx;
   1181             }
   1182         }
   1183         /*Adding sao bits to header bits*/
   1184         *pu4_frame_rdopt_header_bits = best_ctb_sao_bits;
   1185 
   1186         ihevce_update_best_sao_cabac_state(ps_sao_ctxt->ps_rdopt_entropy_ctxt, best_buf_idx);
   1187 
   1188         /* store the sao parameters of curr ctb for top merge and left merge*/
   1189         memcpy(
   1190             ps_sao_ctxt->ps_top_ctb_sao,
   1191             &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx],
   1192             sizeof(sao_enc_t));
   1193         memcpy(
   1194             &ps_sao_ctxt->s_left_ctb_sao,
   1195             &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx],
   1196             sizeof(sao_enc_t));
   1197 
   1198         /* Copy the sao parameters of winning candidate into the structure which will be sent to entropy thrd*/
   1199         memcpy(
   1200             &ps_ctb_enc_loop_out->s_sao,
   1201             &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx],
   1202             sizeof(sao_enc_t));
   1203 
   1204         if(!ps_sao_ctxt->i4_is_last_ctb_col)
   1205         {
   1206             /* Update left luma buffer for next ctb */
   1207             for(row = 0; row < ps_sao_ctxt->i4_sao_blk_ht; row++)
   1208             {
   1209                 ps_sao_ctxt->au1_sao_src_left_luma[row] =
   1210                     ps_sao_ctxt->pu1_cur_luma_recon_buf
   1211                         [row * ps_sao_ctxt->i4_cur_luma_recon_stride +
   1212                          (ps_sao_ctxt->i4_sao_blk_wd - 1)];
   1213             }
   1214         }
   1215 
   1216         if(!ps_sao_ctxt->i4_is_last_ctb_col)
   1217         {
   1218             /* Update left chroma buffer for next ctb */
   1219             for(row = 0; row < (ps_sao_ctxt->i4_sao_blk_ht >> 1); row++)
   1220             {
   1221                 *(UWORD16 *)(ps_sao_ctxt->au1_sao_src_left_chroma + row * 2) =
   1222                     *(UWORD16 *)(ps_sao_ctxt->pu1_cur_chroma_recon_buf +
   1223                                  row * ps_sao_ctxt->i4_cur_chroma_recon_stride +
   1224                                  (ps_sao_ctxt->i4_sao_blk_wd - 2));
   1225             }
   1226         }
   1227 
   1228         if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
   1229         {
   1230             /* Copy the sao'ed output of the best candidate to the recon buffer*/
   1231 
   1232             ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
   1233                 ps_sao_ctxt->pu1_cur_luma_recon_buf,
   1234                 ps_sao_ctxt->i4_cur_luma_recon_stride,
   1235                 ps_sao_ctxt->au1_sao_luma_scratch[best_buf_idx],
   1236                 i4_luma_scratch_buf_stride,
   1237                 ctb_wd,
   1238                 ctb_ht);
   1239         }
   1240         if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
   1241         {
   1242             /* Copy the sao'ed output of the best candidate to the chroma recon buffer*/
   1243 
   1244             ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
   1245                 ps_sao_ctxt->pu1_cur_chroma_recon_buf,
   1246                 ps_sao_ctxt->i4_cur_chroma_recon_stride,
   1247                 ps_sao_ctxt->au1_sao_chroma_scratch[best_buf_idx],
   1248                 i4_chroma_scratch_buf_stride,
   1249                 ctb_wd,
   1250                 ctb_ht >> !u1_is_422);
   1251         }
   1252     }
   1253 }
   1254