Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 
     21 /*!
     22 ******************************************************************************
     23 * \file ihevce_enc_loop_pass.c
     24 *
     25 * \brief
     26 *    This file contains Encoder normative loop pass related functions
     27 *
     28 * \date
     29 *    18/09/2012
     30 *
     31 * \author
     32 *    Ittiam
     33 *
     34 *
     35 * List of Functions
     36 *
     37 *
     38 ******************************************************************************
     39 */
     40 
     41 /*****************************************************************************/
     42 /* File Includes                                                             */
     43 /*****************************************************************************/
     44 /* System include files */
     45 #include <stdio.h>
     46 #include <string.h>
     47 #include <stdlib.h>
     48 #include <assert.h>
     49 #include <stdarg.h>
     50 #include <math.h>
     51 #include <limits.h>
     52 
     53 /* User include files */
     54 #include "ihevc_typedefs.h"
     55 #include "itt_video_api.h"
     56 #include "ihevce_api.h"
     57 
     58 #include "rc_cntrl_param.h"
     59 #include "rc_frame_info_collector.h"
     60 #include "rc_look_ahead_params.h"
     61 
     62 #include "ihevc_defs.h"
     63 #include "ihevc_macros.h"
     64 #include "ihevc_debug.h"
     65 #include "ihevc_structs.h"
     66 #include "ihevc_platform_macros.h"
     67 #include "ihevc_deblk.h"
     68 #include "ihevc_itrans_recon.h"
     69 #include "ihevc_chroma_itrans_recon.h"
     70 #include "ihevc_chroma_intra_pred.h"
     71 #include "ihevc_intra_pred.h"
     72 #include "ihevc_inter_pred.h"
     73 #include "ihevc_mem_fns.h"
     74 #include "ihevc_padding.h"
     75 #include "ihevc_weighted_pred.h"
     76 #include "ihevc_sao.h"
     77 #include "ihevc_resi_trans.h"
     78 #include "ihevc_quant_iquant_ssd.h"
     79 #include "ihevc_cabac_tables.h"
     80 #include "ihevc_common_tables.h"
     81 #include "ihevc_quant_tables.h"
     82 
     83 #include "ihevce_defs.h"
     84 #include "ihevce_hle_interface.h"
     85 #include "ihevce_lap_enc_structs.h"
     86 #include "ihevce_multi_thrd_structs.h"
     87 #include "ihevce_multi_thrd_funcs.h"
     88 #include "ihevce_me_common_defs.h"
     89 #include "ihevce_had_satd.h"
     90 #include "ihevce_error_codes.h"
     91 #include "ihevce_bitstream.h"
     92 #include "ihevce_cabac.h"
     93 #include "ihevce_rdoq_macros.h"
     94 #include "ihevce_function_selector.h"
     95 #include "ihevce_enc_structs.h"
     96 #include "ihevce_entropy_structs.h"
     97 #include "ihevce_cmn_utils_instr_set_router.h"
     98 #include "ihevce_ipe_instr_set_router.h"
     99 #include "ihevce_decomp_pre_intra_structs.h"
    100 #include "ihevce_decomp_pre_intra_pass.h"
    101 #include "ihevce_enc_loop_structs.h"
    102 #include "ihevce_nbr_avail.h"
    103 #include "ihevce_enc_loop_utils.h"
    104 #include "ihevce_sub_pic_rc.h"
    105 #include "ihevce_global_tables.h"
    106 #include "ihevce_bs_compute_ctb.h"
    107 #include "ihevce_cabac_rdo.h"
    108 #include "ihevce_deblk.h"
    109 #include "ihevce_frame_process.h"
    110 #include "ihevce_rc_enc_structs.h"
    111 #include "hme_datatype.h"
    112 #include "hme_interface.h"
    113 #include "hme_common_defs.h"
    114 #include "hme_defs.h"
    115 #include "ihevce_me_instr_set_router.h"
    116 #include "ihevce_enc_subpel_gen.h"
    117 #include "ihevce_inter_pred.h"
    118 #include "ihevce_mv_pred.h"
    119 #include "ihevce_mv_pred_merge.h"
    120 #include "ihevce_enc_loop_inter_mode_sifter.h"
    121 #include "ihevce_enc_cu_recursion.h"
    122 #include "ihevce_enc_loop_pass.h"
    123 #include "ihevce_common_utils.h"
    124 #include "ihevce_dep_mngr_interface.h"
    125 #include "ihevce_sao.h"
    126 #include "ihevce_tile_interface.h"
    127 #include "ihevce_profile.h"
    128 
    129 #include "cast_types.h"
    130 #include "osal.h"
    131 #include "osal_defaults.h"
    132 
    133 /*****************************************************************************/
    134 /* Globals                                                                   */
    135 /*****************************************************************************/
    136 extern PART_ID_T ge_part_type_to_part_id[MAX_PART_TYPES][MAX_NUM_PARTS];
    137 
    138 extern UWORD8 gau1_num_parts_in_part_type[MAX_PART_TYPES];
    139 
    140 /*****************************************************************************/
    141 /* Constant Macros                                                           */
    142 /*****************************************************************************/
    143 #define UPDATE_QP_AT_CTB 6
    144 
    145 /*****************************************************************************/
    146 /* Function Definitions                                                      */
    147 /*****************************************************************************/
    148 
    149 /*!
    150 ******************************************************************************
    151 * \if Function name : ihevce_enc_loop_ctb_left_copy \endif
    152 *
    153 * \brief
    154 *    This function copy the right data of CTB to context buffers
    155 *
    156 * \date
    157 *    18/09/2012
    158 *
    159 * \author
    160 *    Ittiam
    161 *
    162 * \return
    163 *
    164 * List of Functions
    165 *
    166 *
    167 ******************************************************************************
    168 */
    169 void ihevce_enc_loop_ctb_left_copy(ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_prms_t *ps_cu_prms)
    170 {
    171     /* ------------------------------------------------------------------ */
    172     /* copy the right coloum data to the context buffers                  */
    173     /* ------------------------------------------------------------------ */
    174 
    175     nbr_4x4_t *ps_left_nbr;
    176     nbr_4x4_t *ps_nbr;
    177     UWORD8 *pu1_buff;
    178     WORD32 num_pels;
    179     UWORD8 *pu1_luma_left, *pu1_chrm_left;
    180 
    181     UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
    182 
    183     pu1_luma_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
    184     pu1_chrm_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
    185     ps_left_nbr = &ps_ctxt->as_left_col_nbr[0];
    186 
    187     /* copy right luma data */
    188     pu1_buff = ps_cu_prms->pu1_luma_recon + ps_cu_prms->i4_ctb_size - 1;
    189 
    190     for(num_pels = 0; num_pels < ps_cu_prms->i4_ctb_size; num_pels++)
    191     {
    192         WORD32 i4_indx = ps_cu_prms->i4_luma_recon_stride * num_pels;
    193 
    194         pu1_luma_left[num_pels] = pu1_buff[i4_indx];
    195     }
    196 
    197     /* copy right chroma data */
    198     pu1_buff = ps_cu_prms->pu1_chrm_recon + ps_cu_prms->i4_ctb_size - 2;
    199 
    200     for(num_pels = 0; num_pels < (ps_cu_prms->i4_ctb_size >> (0 == u1_is_422)); num_pels++)
    201     {
    202         WORD32 i4_indx = ps_cu_prms->i4_chrm_recon_stride * num_pels;
    203 
    204         *pu1_chrm_left++ = pu1_buff[i4_indx];
    205         *pu1_chrm_left++ = pu1_buff[i4_indx + 1];
    206     }
    207 
    208     /* store the nbr 4x4 data at ctb level */
    209     {
    210         WORD32 ctr;
    211         WORD32 nbr_strd;
    212 
    213         nbr_strd = ps_cu_prms->i4_ctb_size >> 2;
    214 
    215         /* copy right nbr data */
    216         ps_nbr = &ps_ctxt->as_ctb_nbr_arr[0];
    217         ps_nbr += ((ps_cu_prms->i4_ctb_size >> 2) - 1);
    218 
    219         for(ctr = 0; ctr < (ps_cu_prms->i4_ctb_size >> 2); ctr++)
    220         {
    221             WORD32 i4_indx = nbr_strd * ctr;
    222 
    223             ps_left_nbr[ctr] = ps_nbr[i4_indx];
    224         }
    225     }
    226     return;
    227 }
    228 
    229 /*!
    230 ******************************************************************************
    231 * \if Function name : ihevce_mark_all_modes_to_evaluate \endif
    232 *
    233 * \brief
    234 *   Mark all modes for inter/intra for evaluation. This function will be
    235 *   called by ref instance
    236 *
    237 * \param[in] pv_ctxt : pointer to enc_loop module
    238 * \param[in] ps_cu_analyse : pointer to cu analyse
    239 *
    240 * \return
    241 *    None
    242 *
    243 * \author
    244 *  Ittiam
    245 *
    246 *****************************************************************************
    247 */
    248 void ihevce_mark_all_modes_to_evaluate(void *pv_ctxt, cu_analyse_t *ps_cu_analyse)
    249 {
    250     UWORD8 ctr;
    251     WORD32 i4_part;
    252 
    253     (void)pv_ctxt;
    254     /* run a loop over all Inter cands */
    255     for(ctr = 0; ctr < MAX_INTER_CU_CANDIDATES; ctr++)
    256     {
    257         ps_cu_analyse->as_cu_inter_cand[ctr].b1_eval_mark = 1;
    258     }
    259 
    260     /* run a loop over all intra candidates */
    261     if(0 != ps_cu_analyse->u1_num_intra_rdopt_cands)
    262     {
    263         for(ctr = 0; ctr < MAX_INTRA_CU_CANDIDATES + 1; ctr++)
    264         {
    265             ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr] = 1;
    266             ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr] = 1;
    267 
    268             for(i4_part = 0; i4_part < NUM_PU_PARTS; i4_part++)
    269             {
    270                 ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[i4_part][ctr] = 1;
    271             }
    272         }
    273     }
    274 }
    275 
    276 /*!
    277 ******************************************************************************
    278 * \if Function name : ihevce_cu_mode_decide \endif
    279 *
    280 * \brief
    281 *    Coding Unit mode decide function. Performs RD opt and decides the best mode
    282 *
    283 * \param[in] ps_ctxt : pointer to enc_loop module
    284 * \param[in] ps_cu_prms  : pointer to coding unit params (position, buffer pointers)
    285 * \param[in] ps_cu_analyse : pointer to cu analyse
    286 * \param[out] ps_cu_final : pointer to cu final
    287 * \param[out] pu1_ecd_data :pointer to store coeff data for ECD
    288 * \param[out]ps_row_col_pu; colocated pu buffer pointer
    289 * \param[out]pu1_row_pu_map; colocated pu map buffer pointer
    290 * \param[in]col_start_pu_idx : pu index start value
    291 *
    292 * \return
    293 *    None
    294 *
    295 *
    296 * \author
    297 *  Ittiam
    298 *
    299 *****************************************************************************
    300 */
    301 LWORD64 ihevce_cu_mode_decide(
    302     ihevce_enc_loop_ctxt_t *ps_ctxt,
    303     enc_loop_cu_prms_t *ps_cu_prms,
    304     cu_analyse_t *ps_cu_analyse,
    305     final_mode_state_t *ps_final_mode_state,
    306     UWORD8 *pu1_ecd_data,
    307     pu_col_mv_t *ps_col_pu,
    308     UWORD8 *pu1_col_pu_map,
    309     WORD32 col_start_pu_idx)
    310 {
    311     enc_loop_chrm_cu_buf_prms_t s_chrm_cu_buf_prms;
    312     cu_nbr_prms_t s_cu_nbr_prms;
    313     inter_cu_mode_info_t s_inter_cu_mode_info;
    314     cu_inter_cand_t *ps_best_inter_cand = NULL;
    315     UWORD8 *pu1_cu_top;
    316     UWORD8 *pu1_cu_top_left;
    317     UWORD8 *pu1_cu_left;
    318     UWORD8 *pu1_final_recon = NULL;
    319     UWORD8 *pu1_curr_src = NULL;
    320     void *pv_curr_src = NULL;
    321     void *pv_cu_left = NULL;
    322     void *pv_cu_top = NULL;
    323     void *pv_cu_top_left = NULL;
    324 
    325     WORD32 cu_left_stride = 0;
    326     WORD32 ctr;
    327     WORD32 rd_opt_best_idx;
    328     LWORD64 rd_opt_least_cost;
    329     WORD32 rd_opt_curr_idx;
    330     WORD32 num_4x4_in_ctb;
    331     WORD32 nbr_4x4_left_strd = 0;
    332 
    333     nbr_4x4_t *ps_topleft_nbr_4x4;
    334     nbr_4x4_t *ps_left_nbr_4x4 = NULL;
    335     nbr_4x4_t *ps_top_nbr_4x4 = NULL;
    336     nbr_4x4_t *ps_curr_nbr_4x4;
    337     WORD32 enable_intra_eval_flag;
    338     WORD32 i4_best_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1;
    339     WORD32 curr_cu_pos_in_row;
    340     WORD32 cu_top_right_offset;
    341     WORD32 cu_top_right_dep_pos;
    342     WORD32 i4_ctb_x_off, i4_ctb_y_off;
    343 
    344     UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
    345     (void)ps_final_mode_state;
    346     /* default init */
    347     rd_opt_least_cost = MAX_COST_64;
    348     ps_ctxt->as_cu_prms[0].i8_best_rdopt_cost = MAX_COST_64;
    349     ps_ctxt->as_cu_prms[1].i8_best_rdopt_cost = MAX_COST_64;
    350 
    351     /* Zero cbf tool is enabled by default for all presets */
    352     ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
    353 
    354     rd_opt_best_idx = 1;
    355     rd_opt_curr_idx = 0;
    356     enable_intra_eval_flag = 1;
    357 
    358     /* CU params in enc ctxt*/
    359     ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
    360     ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
    361     ps_ctxt->ps_enc_out_ctxt->u1_cu_size = ps_cu_analyse->u1_cu_size;
    362 
    363     num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
    364     ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
    365     ps_curr_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
    366     ps_curr_nbr_4x4 += ((ps_cu_analyse->b3_cu_pos_y << 1) * num_4x4_in_ctb);
    367 
    368     /* CB and Cr are pixel interleaved */
    369     s_chrm_cu_buf_prms.i4_chrm_recon_stride = ps_cu_prms->i4_chrm_recon_stride;
    370 
    371     s_chrm_cu_buf_prms.i4_chrm_src_stride = ps_cu_prms->i4_chrm_src_stride;
    372 
    373     if(!ps_ctxt->u1_is_input_data_hbd)
    374     {
    375         /* --------------------------------------- */
    376         /* ----- Luma Pointers Derivation -------- */
    377         /* --------------------------------------- */
    378 
    379         /* based on CU position derive the pointers */
    380         pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
    381 
    382         pu1_curr_src = ps_cu_prms->pu1_luma_src + (ps_cu_analyse->b3_cu_pos_x << 3);
    383 
    384         pu1_final_recon += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
    385 
    386         pu1_curr_src += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_src_stride);
    387 
    388         pv_curr_src = pu1_curr_src;
    389 
    390         /* CU left */
    391         if(0 == ps_cu_analyse->b3_cu_pos_x)
    392         {
    393             /* CTB boundary */
    394             pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
    395             pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << 3);
    396             cu_left_stride = 1;
    397 
    398             ps_left_nbr_4x4 = &ps_ctxt->as_left_col_nbr[0];
    399             ps_left_nbr_4x4 += ps_cu_analyse->b3_cu_pos_y << 1;
    400             nbr_4x4_left_strd = 1;
    401         }
    402         else
    403         {
    404             /* inside CTB */
    405             pu1_cu_left = pu1_final_recon - 1;
    406             cu_left_stride = ps_cu_prms->i4_luma_recon_stride;
    407 
    408             ps_left_nbr_4x4 = ps_curr_nbr_4x4 - 1;
    409             nbr_4x4_left_strd = num_4x4_in_ctb;
    410         }
    411 
    412         pv_cu_left = pu1_cu_left;
    413 
    414         /* CU top */
    415         if(0 == ps_cu_analyse->b3_cu_pos_y)
    416         {
    417             /* CTB boundary */
    418             pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_luma;
    419             pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
    420             pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
    421 
    422             ps_top_nbr_4x4 = ps_ctxt->ps_top_row_nbr;
    423             ps_top_nbr_4x4 += (ps_cu_prms->i4_ctb_pos * (ps_cu_prms->i4_ctb_size >> 2));
    424             ps_top_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
    425         }
    426         else
    427         {
    428             /* inside CTB */
    429             pu1_cu_top = pu1_final_recon - ps_cu_prms->i4_luma_recon_stride;
    430 
    431             ps_top_nbr_4x4 = ps_curr_nbr_4x4 - num_4x4_in_ctb;
    432         }
    433 
    434         pv_cu_top = pu1_cu_top;
    435 
    436         /* CU top left */
    437         if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
    438         {
    439             /* left ctb boundary but not first row */
    440             pu1_cu_top_left = pu1_cu_left - 1; /* stride is 1 */
    441             ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - 1; /* stride is 1 */
    442         }
    443         else
    444         {
    445             /* rest all cases topleft is top -1 */
    446             pu1_cu_top_left = pu1_cu_top - 1;
    447             ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
    448         }
    449 
    450         pv_cu_top_left = pu1_cu_top_left;
    451 
    452         /* Store the CU nbr information in the ctxt for final reconstruction fun. */
    453         s_cu_nbr_prms.nbr_4x4_left_strd = nbr_4x4_left_strd;
    454         s_cu_nbr_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
    455         s_cu_nbr_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
    456         s_cu_nbr_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
    457         s_cu_nbr_prms.pu1_cu_left = pu1_cu_left;
    458         s_cu_nbr_prms.pu1_cu_top = pu1_cu_top;
    459         s_cu_nbr_prms.pu1_cu_top_left = pu1_cu_top_left;
    460         s_cu_nbr_prms.cu_left_stride = cu_left_stride;
    461 
    462         /* ------------------------------------------------------------ */
    463         /* -- Initialize the number of neigbour skip cu count for rdo --*/
    464         /* ------------------------------------------------------------ */
    465         {
    466             nbr_avail_flags_t s_nbr;
    467             WORD32 i4_num_nbr_skip_cus = 0;
    468 
    469             /* get the neighbour availability flags for current cu  */
    470             ihevce_get_nbr_intra(
    471                 &s_nbr,
    472                 ps_ctxt->pu1_ctb_nbr_map,
    473                 ps_ctxt->i4_nbr_map_strd,
    474                 (ps_cu_analyse->b3_cu_pos_x << 1),
    475                 (ps_cu_analyse->b3_cu_pos_y << 1),
    476                 (ps_cu_analyse->u1_cu_size >> 2));
    477             if(s_nbr.u1_top_avail)
    478             {
    479                 i4_num_nbr_skip_cus += ps_top_nbr_4x4->b1_skip_flag;
    480             }
    481 
    482             if(s_nbr.u1_left_avail)
    483             {
    484                 i4_num_nbr_skip_cus += ps_left_nbr_4x4->b1_skip_flag;
    485             }
    486             ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0].i4_num_nbr_skip_cus =
    487                 i4_num_nbr_skip_cus;
    488             ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1].i4_num_nbr_skip_cus =
    489                 i4_num_nbr_skip_cus;
    490         }
    491 
    492         /* --------------------------------------- */
    493         /* --- Chroma Pointers Derivation -------- */
    494         /* --------------------------------------- */
    495 
    496         /* based on CU position derive the pointers */
    497         s_chrm_cu_buf_prms.pu1_final_recon =
    498             ps_cu_prms->pu1_chrm_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
    499 
    500         s_chrm_cu_buf_prms.pu1_curr_src =
    501             ps_cu_prms->pu1_chrm_src + (ps_cu_analyse->b3_cu_pos_x << 3);
    502 
    503         s_chrm_cu_buf_prms.pu1_final_recon +=
    504             ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_recon_stride);
    505 
    506         s_chrm_cu_buf_prms.pu1_curr_src +=
    507             ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_src_stride);
    508 
    509         /* CU left */
    510         if(0 == ps_cu_analyse->b3_cu_pos_x)
    511         {
    512             /* CTB boundary */
    513             s_chrm_cu_buf_prms.pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
    514             s_chrm_cu_buf_prms.pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 3));
    515             s_chrm_cu_buf_prms.i4_cu_left_stride = 2;
    516         }
    517         else
    518         {
    519             /* inside CTB */
    520             s_chrm_cu_buf_prms.pu1_cu_left = s_chrm_cu_buf_prms.pu1_final_recon - 2;
    521             s_chrm_cu_buf_prms.i4_cu_left_stride = ps_cu_prms->i4_chrm_recon_stride;
    522         }
    523 
    524         /* CU top */
    525         if(0 == ps_cu_analyse->b3_cu_pos_y)
    526         {
    527             /* CTB boundary */
    528             s_chrm_cu_buf_prms.pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_chroma;
    529             s_chrm_cu_buf_prms.pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
    530             s_chrm_cu_buf_prms.pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
    531         }
    532         else
    533         {
    534             /* inside CTB */
    535             s_chrm_cu_buf_prms.pu1_cu_top =
    536                 s_chrm_cu_buf_prms.pu1_final_recon - ps_cu_prms->i4_chrm_recon_stride;
    537         }
    538 
    539         /* CU top left */
    540         if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
    541         {
    542             /* left ctb boundary but not first row */
    543             s_chrm_cu_buf_prms.pu1_cu_top_left =
    544                 s_chrm_cu_buf_prms.pu1_cu_left - 2; /* stride is 1 (2 pixels) */
    545         }
    546         else
    547         {
    548             /* rest all cases topleft is top -2 */
    549             s_chrm_cu_buf_prms.pu1_cu_top_left = s_chrm_cu_buf_prms.pu1_cu_top - 2;
    550         }
    551     }
    552 
    553     /* Set Variables for Dep. Checking and Setting */
    554     i4_ctb_x_off = (ps_cu_prms->i4_ctb_pos << 6);
    555 
    556     i4_ctb_y_off = ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y;
    557     ps_ctxt->i4_satd_buf_idx = rd_opt_curr_idx;
    558 
    559     /* Set the pred pointer count for ME/intra to 0 to start */
    560     ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count = 0;
    561 
    562     ASSERT(
    563         (ps_cu_analyse->u1_num_inter_cands > 0) || (ps_cu_analyse->u1_num_intra_rdopt_cands > 0));
    564 
    565     ASSERT(ps_cu_analyse->u1_num_inter_cands <= MAX_INTER_CU_CANDIDATES);
    566     s_inter_cu_mode_info.u1_num_inter_cands = 0;
    567     s_inter_cu_mode_info.u1_idx_of_worst_cost_in_cost_array = 0;
    568     s_inter_cu_mode_info.u1_idx_of_worst_cost_in_pred_buf_array = 0;
    569 
    570     ps_ctxt->s_cu_inter_merge_skip.u1_num_merge_cands = 0;
    571     ps_ctxt->s_cu_inter_merge_skip.u1_num_skip_cands = 0;
    572     ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type0_cands = 0;
    573     ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type1_cands = 0;
    574     ps_ctxt->s_pred_buf_data.i4_pred_stride = ps_cu_analyse->u1_cu_size;
    575     if(0 != ps_cu_analyse->u1_num_inter_cands)
    576     {
    577         ihevce_inter_cand_sifter_prms_t s_prms;
    578 
    579         UWORD8 u1_enable_top_row_sync;
    580 
    581         if(ps_ctxt->u1_disable_intra_eval)
    582         {
    583             u1_enable_top_row_sync = !DISABLE_TOP_SYNC;
    584         }
    585         else
    586         {
    587             u1_enable_top_row_sync = 1;
    588         }
    589 
    590         if((!ps_ctxt->u1_use_top_at_ctb_boundary) && u1_enable_top_row_sync)
    591         {
    592             /* Wait till top data is ready          */
    593             /* Currently checking till top right CU */
    594             curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
    595 
    596             if(i4_ctb_y_off == 0)
    597             {
    598                 /* No wait for 1st row */
    599                 cu_top_right_offset = -(MAX_CTB_SIZE);
    600                 {
    601                     ihevce_tile_params_t *ps_col_tile_params =
    602                         ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
    603                          ps_ctxt->i4_tile_col_idx);
    604                     /* No wait for 1st row */
    605                     cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
    606                 }
    607                 cu_top_right_dep_pos = 0;
    608             }
    609             else
    610             {
    611                 cu_top_right_offset = (ps_cu_analyse->u1_cu_size) + 4;
    612                 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
    613             }
    614 
    615             if(0 == ps_cu_analyse->b3_cu_pos_y)
    616             {
    617                 ihevce_dmgr_chk_row_row_sync(
    618                     ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
    619                     curr_cu_pos_in_row,
    620                     cu_top_right_offset,
    621                     cu_top_right_dep_pos,
    622                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
    623                     ps_ctxt->thrd_id);
    624             }
    625         }
    626 
    627         s_prms.i4_ctb_nbr_map_stride = ps_ctxt->i4_nbr_map_strd;
    628         s_prms.i4_max_num_inter_rdopt_cands = ps_ctxt->i4_max_num_inter_rdopt_cands;
    629         s_prms.i4_nbr_4x4_left_strd = nbr_4x4_left_strd;
    630         s_prms.i4_src_strd = ps_cu_prms->i4_luma_src_stride;
    631         s_prms.ps_cu_inter_merge_skip = &ps_ctxt->s_cu_inter_merge_skip;
    632         s_prms.aps_cu_nbr_buf[0] = &ps_ctxt->as_cu_nbr[ps_ctxt->i4_satd_buf_idx][0];
    633         s_prms.aps_cu_nbr_buf[1] = &ps_ctxt->as_cu_nbr[!ps_ctxt->i4_satd_buf_idx][0];
    634         s_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
    635         s_prms.ps_mc_ctxt = &ps_ctxt->s_mc_ctxt;
    636         s_prms.ps_me_cands = ps_cu_analyse->as_cu_inter_cand;
    637         s_prms.ps_mixed_modes_datastore = &ps_ctxt->s_mixed_mode_inter_cu;
    638         s_prms.ps_mv_pred_ctxt = &ps_ctxt->s_mv_pred_ctxt;
    639         s_prms.ps_pred_buf_data = &ps_ctxt->s_pred_buf_data;
    640         s_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
    641         s_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
    642         s_prms.pu1_ctb_nbr_map = ps_ctxt->pu1_ctb_nbr_map;
    643         s_prms.pv_src = pv_curr_src;
    644         s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 3;
    645         s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 3;
    646         s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
    647         s_prms.u1_max_merge_candidates = ps_ctxt->i4_max_merge_candidates;
    648         s_prms.u1_num_me_cands = ps_cu_analyse->u1_num_inter_cands;
    649         s_prms.u1_use_satd_for_merge_eval = ps_ctxt->i4_use_satd_for_merge_eval;
    650         s_prms.u1_quality_preset = ps_ctxt->i4_quality_preset;
    651         s_prms.i1_slice_type = ps_ctxt->i1_slice_type;
    652         s_prms.ps_cu_me_intra_pred_prms = &ps_ctxt->s_cu_me_intra_pred_prms;
    653         s_prms.u1_is_hbd = (ps_ctxt->u1_bit_depth > 8);
    654         s_prms.ps_inter_cu_mode_info = &s_inter_cu_mode_info;
    655         s_prms.pai4_mv_cost = ps_cu_analyse->ai4_mv_cost;
    656         s_prms.i4_lambda_qf = ps_ctxt->i4_sad_lamda;
    657         s_prms.u1_use_merge_cand_from_top_row =
    658             (u1_enable_top_row_sync || (s_prms.u1_cu_pos_y > 0));
    659         s_prms.u1_merge_idx_cabac_model =
    660             ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[IHEVC_CAB_MERGE_IDX_EXT];
    661 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
    662         s_prms.pai4_me_err_metric = ps_cu_analyse->ai4_err_metric;
    663         s_prms.u1_reuse_me_sad = 1;
    664 #else
    665         s_prms.u1_reuse_me_sad = 0;
    666 #endif
    667 
    668         if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_type != PSLICE)
    669         {
    670             if(ps_ctxt->i4_temporal_layer == 1)
    671             {
    672                 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_BREF;
    673             }
    674             else
    675             {
    676                 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME;
    677             }
    678         }
    679         else
    680         {
    681             s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_P;
    682         }
    683         s_prms.u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
    684 
    685         if(s_prms.u1_is_cu_noisy)
    686         {
    687             s_prms.i4_lambda_qf =
    688                 ((float)s_prms.i4_lambda_qf) * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f;
    689         }
    690         s_prms.pf_luma_inter_pred_pu = ihevce_luma_inter_pred_pu;
    691 
    692         s_prms.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
    693 
    694         s_prms.pf_evalsad_pt_npu_mxn_8bit = (FT_SAD_EVALUATOR *)ps_ctxt->pv_evalsad_pt_npu_mxn_8bit;
    695         ihevce_inter_cand_sifter(&s_prms);
    696     }
    697     if(u1_is_422)
    698     {
    699         UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX - 1];
    700         UWORD8 u1_num_bufs_allocated;
    701 
    702         u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
    703             au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX - 1);
    704 
    705         ASSERT(u1_num_bufs_allocated == (NUM_CU_ME_INTRA_PRED_IDX - 1));
    706 
    707         for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
    708             ctr++)
    709         {
    710             {
    711                 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
    712                     (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
    713             }
    714 
    715             ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
    716 
    717             ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
    718         }
    719 
    720         {
    721             ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
    722                 (UWORD8 *)ps_ctxt->pv_422_chroma_intra_pred_buf;
    723         }
    724 
    725         ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
    726 
    727         ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
    728     }
    729     else
    730     {
    731         UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX];
    732         UWORD8 u1_num_bufs_allocated;
    733 
    734         u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
    735             au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX);
    736 
    737         ASSERT(u1_num_bufs_allocated == NUM_CU_ME_INTRA_PRED_IDX);
    738 
    739         for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
    740             ctr++)
    741         {
    742             {
    743                 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
    744                     (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
    745             }
    746 
    747             ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
    748 
    749             ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
    750         }
    751     }
    752 
    753     ihevce_mark_all_modes_to_evaluate(ps_ctxt, ps_cu_analyse);
    754 
    755     ps_ctxt->as_cu_prms[0].s_recon_datastore.u1_is_lumaRecon_available = 0;
    756     ps_ctxt->as_cu_prms[1].s_recon_datastore.u1_is_lumaRecon_available = 0;
    757     ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
    758     ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
    759     ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
    760     ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
    761     ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
    762     ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
    763     /* --------------------------------------- */
    764     /* ------ Inter RD OPT stage ------------- */
    765     /* --------------------------------------- */
    766     if(0 != s_inter_cu_mode_info.u1_num_inter_cands)
    767     {
    768         UWORD8 u1_ssd_bit_info_ctr = 0;
    769 
    770         /* -- run a loop over all Inter rd opt cands ------ */
    771         for(ctr = 0; ctr < s_inter_cu_mode_info.u1_num_inter_cands; ctr++)
    772         {
    773             cu_inter_cand_t *ps_inter_cand;
    774 
    775             LWORD64 rd_opt_cost = 0;
    776 
    777             ps_inter_cand = s_inter_cu_mode_info.aps_cu_data[ctr];
    778 
    779             if((ps_inter_cand->b1_skip_flag) || (ps_inter_cand->as_inter_pu[0].b1_merge_flag) ||
    780                (ps_inter_cand->b3_part_size && ps_inter_cand->as_inter_pu[1].b1_merge_flag))
    781             {
    782                 ps_inter_cand->b1_eval_mark = 1;
    783             }
    784 
    785             /****************************************************************/
    786             /* This check is only valid for derived instances.              */
    787             /* check if this mode needs to be evaluated or not.             */
    788             /* if it is a skip candidate, go ahead and evaluate it even if  */
    789             /* it has not been marked while sorting.                        */
    790             /****************************************************************/
    791             if((0 == ps_inter_cand->b1_eval_mark) && (0 == ps_inter_cand->b1_skip_flag))
    792             {
    793                 continue;
    794             }
    795 
    796             /* RDOPT related copies and settings */
    797             ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
    798 
    799             /* RDOPT copy States : Prev Cu best to current init */
    800             COPY_CABAC_STATES(
    801                 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
    802                 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
    803                 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
    804             /* MVP ,MVD calc and Motion compensation */
    805             rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
    806                 ps_ctxt,
    807                 ps_inter_cand,
    808                 ps_cu_analyse->u1_cu_size,
    809                 ps_cu_analyse->b3_cu_pos_x,
    810                 ps_cu_analyse->b3_cu_pos_y,
    811                 ps_left_nbr_4x4,
    812                 ps_top_nbr_4x4,
    813                 ps_topleft_nbr_4x4,
    814                 nbr_4x4_left_strd,
    815                 rd_opt_curr_idx);
    816 
    817 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
    818             if((ps_ctxt->u1_bit_depth == 8) && (!ps_inter_cand->b1_skip_flag))
    819             {
    820                 ihevce_determine_tu_tree_distribution(
    821                     ps_inter_cand,
    822                     (me_func_selector_t *)ps_ctxt->pv_err_func_selector,
    823                     ps_ctxt->ai2_scratch,
    824                     (UWORD8 *)pv_curr_src,
    825                     ps_cu_prms->i4_luma_src_stride,
    826                     ps_ctxt->i4_satd_lamda,
    827                     LAMBDA_Q_SHIFT,
    828                     ps_cu_analyse->u1_cu_size,
    829                     ps_ctxt->u1_max_tr_depth);
    830             }
    831 #endif
    832 #if DISABLE_ZERO_ZBF_IN_INTER
    833             ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
    834 #else
    835             ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
    836 #endif
    837             /* Recon loop with different TUs based on partition type*/
    838             rd_opt_cost += ((pf_inter_rdopt_cu_ntu)ps_ctxt->pv_inter_rdopt_cu_ntu)(
    839                 ps_ctxt,
    840                 ps_cu_prms,
    841                 pv_curr_src,
    842                 ps_cu_analyse->u1_cu_size,
    843                 ps_cu_analyse->b3_cu_pos_x,
    844                 ps_cu_analyse->b3_cu_pos_y,
    845                 rd_opt_curr_idx,
    846                 &s_chrm_cu_buf_prms,
    847                 ps_inter_cand,
    848                 ps_cu_analyse,
    849                 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
    850                                        : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
    851                                           (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
    852                                              100.0);
    853 
    854 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
    855             if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
    856             {
    857                 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
    858                 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
    859                     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
    860             }
    861 #endif
    862 
    863             /* based on the rd opt cost choose the best and current index */
    864             if(rd_opt_cost < rd_opt_least_cost)
    865             {
    866                 /* swap the best and current indx */
    867                 rd_opt_best_idx = !rd_opt_best_idx;
    868                 rd_opt_curr_idx = !rd_opt_curr_idx;
    869 
    870                 ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
    871                 rd_opt_least_cost = rd_opt_cost;
    872                 i4_best_cu_qp = ps_ctxt->i4_cu_qp;
    873 
    874                 /* Store the best Inter cand. for final_recon function */
    875                 ps_best_inter_cand = ps_inter_cand;
    876             }
    877 
    878             /* set the neighbour map to 0 */
    879             ihevce_set_nbr_map(
    880                 ps_ctxt->pu1_ctb_nbr_map,
    881                 ps_ctxt->i4_nbr_map_strd,
    882                 (ps_cu_analyse->b3_cu_pos_x << 1),
    883                 (ps_cu_analyse->b3_cu_pos_y << 1),
    884                 (ps_cu_analyse->u1_cu_size >> 2),
    885                 0);
    886 
    887         } /* end of loop for all the Inter RD OPT cand */
    888     }
    889     /* --------------------------------------- */
    890     /* ---- Conditional Eval of Intra -------- */
    891     /* --------------------------------------- */
    892     {
    893         enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
    894         ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
    895 
    896         /* check if inter candidates are valid */
    897         if(0 != ps_cu_analyse->u1_num_inter_cands)
    898         {
    899             /* if skip or no residual inter candidates has won then */
    900             /* evaluation of intra candidates is disabled           */
    901             if((1 == ps_enc_loop_bestprms->u1_skip_flag) ||
    902                (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
    903             {
    904                 enable_intra_eval_flag = 0;
    905             }
    906         }
    907         /* Disable Intra Gating for HIGH QUALITY PRESET */
    908 #if !ENABLE_INTRA_GATING_FOR_HQ
    909         if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
    910         {
    911             enable_intra_eval_flag = 1;
    912 
    913 #if DISABLE_LARGE_INTRA_PQ
    914             if((IHEVCE_QUALITY_P0 == ps_ctxt->i4_quality_preset) && (ps_cu_prms->u1_is_cu_noisy) &&
    915                (ps_ctxt->i1_slice_type != ISLICE) && (0 != s_inter_cu_mode_info.u1_num_inter_cands))
    916             {
    917                 if(ps_cu_analyse->u1_cu_size > 16)
    918                 {
    919                     /* Disable 32x32 / 64x64 Intra in PQ P and B pics */
    920                     enable_intra_eval_flag = 0;
    921                 }
    922                 else if(ps_cu_analyse->u1_cu_size == 16)
    923                 {
    924                     /* Disable tu equal to cu mode in 16x16 Intra in PQ P and B pics */
    925                     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
    926                 }
    927             }
    928 #endif
    929         }
    930 #endif
    931     }
    932 
    933     /* --------------------------------------- */
    934     /* ------ Intra RD OPT stage ------------- */
    935     /* --------------------------------------- */
    936 
    937     /* -- run a loop over all Intra rd opt cands ------ */
    938     if((0 != ps_cu_analyse->u1_num_intra_rdopt_cands) && (1 == enable_intra_eval_flag))
    939     {
    940         LWORD64 rd_opt_cost;
    941         WORD32 end_flag = 0;
    942         WORD32 cu_eval_done = 0;
    943         WORD32 subcu_eval_done = 0;
    944         WORD32 subpu_eval_done = 0;
    945         WORD32 max_trans_size;
    946         WORD32 sync_wait_stride;
    947         max_trans_size = MIN(MAX_TU_SIZE, (ps_cu_analyse->u1_cu_size));
    948         sync_wait_stride = (ps_cu_analyse->u1_cu_size) + max_trans_size;
    949 
    950         if(!ps_ctxt->u1_use_top_at_ctb_boundary)
    951         {
    952             /* Wait till top data is ready          */
    953             /* Currently checking till top right CU */
    954             curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
    955 
    956             if(i4_ctb_y_off == 0)
    957             {
    958                 /* No wait for 1st row */
    959                 cu_top_right_offset = -(MAX_CTB_SIZE);
    960                 {
    961                     ihevce_tile_params_t *ps_col_tile_params =
    962                         ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
    963                          ps_ctxt->i4_tile_col_idx);
    964                     /* No wait for 1st row */
    965                     cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
    966                 }
    967                 cu_top_right_dep_pos = 0;
    968             }
    969             else
    970             {
    971                 cu_top_right_offset = sync_wait_stride;
    972                 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
    973             }
    974 
    975             if(0 == ps_cu_analyse->b3_cu_pos_y)
    976             {
    977                 ihevce_dmgr_chk_row_row_sync(
    978                     ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
    979                     curr_cu_pos_in_row,
    980                     cu_top_right_offset,
    981                     cu_top_right_dep_pos,
    982                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
    983                     ps_ctxt->thrd_id);
    984             }
    985         }
    986         ctr = 0;
    987 
    988         /* Zero cbf tool is disabled for intra CUs */
    989 #if ENABLE_ZERO_CBF_IN_INTRA
    990         ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
    991 #else
    992         ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
    993 #endif
    994 
    995         /* Intra Mode gating based on MPM cand list and encoder quality preset */
    996         if((ps_ctxt->i1_slice_type != ISLICE) && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
    997         {
    998             ihevce_mpm_idx_based_filter_RDOPT_cand(
    999                 ps_ctxt,
   1000                 ps_cu_analyse,
   1001                 ps_left_nbr_4x4,
   1002                 ps_top_nbr_4x4,
   1003                 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0],
   1004                 &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[0]);
   1005 
   1006             ihevce_mpm_idx_based_filter_RDOPT_cand(
   1007                 ps_ctxt,
   1008                 ps_cu_analyse,
   1009                 ps_left_nbr_4x4,
   1010                 ps_top_nbr_4x4,
   1011                 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0],
   1012                 &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[0]);
   1013         }
   1014 
   1015         /* Call Chroma SATD function for curr_func_mode in HIGH QUALITY mode */
   1016         if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd)
   1017         {
   1018             /* For cu_size = 64, there won't be any TU_EQ_CU case */
   1019             if(64 != ps_cu_analyse->u1_cu_size)
   1020             {
   1021                 /* RDOPT copy States : Prev Cu best to current init */
   1022                 COPY_CABAC_STATES(
   1023                     &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   1024                     &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
   1025                     IHEVC_CAB_CTXT_END);
   1026 
   1027                 /* RDOPT related copies and settings */
   1028                 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
   1029 
   1030                 /* Calc. best SATD mode for TU_EQ_CU case */
   1031                 ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
   1032                     ps_ctxt,
   1033                     &s_chrm_cu_buf_prms,
   1034                     ps_cu_analyse,
   1035                     rd_opt_curr_idx,
   1036                     TU_EQ_CU,
   1037                     !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
   1038                                            : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
   1039                                               (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
   1040                                                  100.0,
   1041                     ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
   1042 
   1043 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
   1044                 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
   1045                 {
   1046                     ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
   1047                     ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
   1048                         ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
   1049                 }
   1050 #endif
   1051             }
   1052 
   1053             /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
   1054             TU_EQ_CU_DIV2 case */
   1055 
   1056             if((ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] !=
   1057                 255) &&
   1058                (8 != ps_cu_analyse->u1_cu_size))
   1059             {
   1060                 /* RDOPT copy States : Prev Cu best to current init */
   1061                 COPY_CABAC_STATES(
   1062                     &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   1063                     &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
   1064                     IHEVC_CAB_CTXT_END);
   1065 
   1066                 /* RDOPT related copies and settings */
   1067                 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
   1068 
   1069                 /* Calc. best SATD mode for TU_EQ_CU_DIV2 case */
   1070                 ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
   1071                     ps_ctxt,
   1072                     &s_chrm_cu_buf_prms,
   1073                     ps_cu_analyse,
   1074                     rd_opt_curr_idx,
   1075                     TU_EQ_CU_DIV2,
   1076                     !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
   1077                                            : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
   1078                                               (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
   1079                                                  100.0,
   1080                     ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
   1081 
   1082 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
   1083                 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
   1084                 {
   1085                     ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
   1086                     ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
   1087                         ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
   1088                 }
   1089 #endif
   1090             }
   1091         }
   1092 
   1093         while(0 == end_flag)
   1094         {
   1095             UWORD8 *pu1_mode = NULL;
   1096             WORD32 curr_func_mode = 0;
   1097             void *pv_pred;
   1098 
   1099             ASSERT(ctr < 36);
   1100 
   1101             /* TU equal to CU size evaluation of different modes */
   1102             if(0 == cu_eval_done)
   1103             {
   1104                 /* check if the all the modes have been evaluated */
   1105                 if(255 == ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr])
   1106                 {
   1107                     cu_eval_done = 1;
   1108                     ctr = 0;
   1109                 }
   1110                 else if(
   1111                     (1 == ctr) &&
   1112                     ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
   1113                      (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
   1114                     (ps_ctxt->i1_slice_type != ISLICE))
   1115                 {
   1116                     ctr = 0;
   1117                     cu_eval_done = 1;
   1118                     subcu_eval_done = 1;
   1119                     subpu_eval_done = 1;
   1120                 }
   1121                 else
   1122                 {
   1123                     if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr])
   1124                     {
   1125                         ctr++;
   1126                         continue;
   1127                     }
   1128 
   1129                     pu1_mode =
   1130                         &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr];
   1131                     ctr++;
   1132                     curr_func_mode = TU_EQ_CU;
   1133                 }
   1134             }
   1135             /* Sub CU (NXN) mode evaluation of different pred modes */
   1136             if((0 == subpu_eval_done) && (1 == cu_eval_done))
   1137             {
   1138                 /*For NxN modes evaluation all candidates for all PU parts are evaluated */
   1139                 /*inside the ihevce_intra_rdopt_cu_ntu function, so the subpu_eval_done is set to 1 */
   1140                 {
   1141                     pu1_mode = &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][ctr];
   1142 
   1143                     curr_func_mode = TU_EQ_SUBCU;
   1144                     /* check if the any modes have to be evaluated */
   1145                     if(255 == *pu1_mode)
   1146                     {
   1147                         subpu_eval_done = 1;
   1148                         ctr = 0;
   1149                     }
   1150                     else if(ctr != 0) /* If the modes have to be evaluated, then terminate, as all modes are already evaluated */
   1151                     {
   1152                         subpu_eval_done = 1;
   1153                         ctr = 0;
   1154                     }
   1155                     else
   1156                     {
   1157                         ctr++;
   1158                     }
   1159                 }
   1160             }
   1161 
   1162             /* TU size equal to CU div2 mode evaluation of different pred modes */
   1163             if((0 == subcu_eval_done) && (1 == subpu_eval_done) && (1 == cu_eval_done))
   1164             {
   1165                 /* check if the all the modes have been evaluated */
   1166                 if(255 ==
   1167                    ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr])
   1168                 {
   1169                     subcu_eval_done = 1;
   1170                 }
   1171                 else if(
   1172                     (1 == ctr) &&
   1173                     ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
   1174                      (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
   1175                     (ps_ctxt->i1_slice_type != ISLICE) && (ps_cu_analyse->u1_cu_size == 64))
   1176                 {
   1177                     subcu_eval_done = 1;
   1178                 }
   1179                 else
   1180                 {
   1181                     if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr])
   1182                     {
   1183                         ctr++;
   1184                         continue;
   1185                     }
   1186 
   1187                     pu1_mode = &ps_cu_analyse->s_cu_intra_cand
   1188                                     .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr];
   1189 
   1190                     ctr++;
   1191                     curr_func_mode = TU_EQ_CU_DIV2;
   1192                 }
   1193             }
   1194 
   1195             /* check if all CU option have been evalueted */
   1196             if((1 == cu_eval_done) && (1 == subcu_eval_done) && (1 == subpu_eval_done))
   1197             {
   1198                 break;
   1199             }
   1200 
   1201             /* RDOPT related copies and settings */
   1202             ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
   1203 
   1204             /* Assign ME/Intra pred buf. to the current intra cand. since we
   1205             are storing pred data for final_reon function */
   1206             {
   1207                 pv_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_curr_idx];
   1208             }
   1209 
   1210             /* RDOPT copy States : Prev Cu best to current init */
   1211             COPY_CABAC_STATES(
   1212                 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   1213                 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
   1214                 IHEVC_CAB_CTXT_END);
   1215 
   1216             /* call the function which performs the normative Intra encode */
   1217             rd_opt_cost = ((pf_intra_rdopt_cu_ntu)ps_ctxt->pv_intra_rdopt_cu_ntu)(
   1218                 ps_ctxt,
   1219                 ps_cu_prms,
   1220                 pv_pred,
   1221                 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_curr_idx],
   1222                 &s_chrm_cu_buf_prms,
   1223                 pu1_mode,
   1224                 ps_cu_analyse,
   1225                 pv_curr_src,
   1226                 pv_cu_left,
   1227                 pv_cu_top,
   1228                 pv_cu_top_left,
   1229                 ps_left_nbr_4x4,
   1230                 ps_top_nbr_4x4,
   1231                 nbr_4x4_left_strd,
   1232                 cu_left_stride,
   1233                 rd_opt_curr_idx,
   1234                 curr_func_mode,
   1235                 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
   1236                                        : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
   1237                                           (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
   1238                                              100.0);
   1239 
   1240 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
   1241             if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
   1242             {
   1243                 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
   1244                 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
   1245                     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
   1246             }
   1247 #endif
   1248 
   1249             /* based on the rd opt cost choose the best and current index */
   1250             if(rd_opt_cost < rd_opt_least_cost)
   1251             {
   1252                 /* swap the best and current indx */
   1253                 rd_opt_best_idx = !rd_opt_best_idx;
   1254                 rd_opt_curr_idx = !rd_opt_curr_idx;
   1255                 i4_best_cu_qp = ps_ctxt->i4_cu_qp;
   1256 
   1257                 rd_opt_least_cost = rd_opt_cost;
   1258                 ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
   1259             }
   1260 
   1261             if((TU_EQ_SUBCU == curr_func_mode) &&
   1262                (ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_intra_flag) &&
   1263                (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P2) && !FORCE_INTRA_TU_DEPTH_TO_0)
   1264             {
   1265                 UWORD8 au1_tu_eq_cu_div2_modes[4];
   1266                 UWORD8 au1_freq_of_mode[4];
   1267 
   1268                 if(ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_part_mode == SIZE_2Nx2N)
   1269                 {
   1270                     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
   1271                         255;  //ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode[0];
   1272                     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
   1273                         255;
   1274                 }
   1275                 else
   1276                 {
   1277                     WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
   1278                         ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode,
   1279                         au1_tu_eq_cu_div2_modes,
   1280                         au1_freq_of_mode,
   1281                         4);
   1282 
   1283                     if(2 == i4_num_clusters)
   1284                     {
   1285                         if(au1_freq_of_mode[0] == 3)
   1286                         {
   1287                             ps_cu_analyse->s_cu_intra_cand
   1288                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
   1289                                 au1_tu_eq_cu_div2_modes[0];
   1290                             ps_cu_analyse->s_cu_intra_cand
   1291                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
   1292                         }
   1293                         else if(au1_freq_of_mode[1] == 3)
   1294                         {
   1295                             ps_cu_analyse->s_cu_intra_cand
   1296                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
   1297                                 au1_tu_eq_cu_div2_modes[1];
   1298                             ps_cu_analyse->s_cu_intra_cand
   1299                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
   1300                         }
   1301                         else
   1302                         {
   1303                             ps_cu_analyse->s_cu_intra_cand
   1304                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
   1305                                 au1_tu_eq_cu_div2_modes[0];
   1306                             ps_cu_analyse->s_cu_intra_cand
   1307                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
   1308                                 au1_tu_eq_cu_div2_modes[1];
   1309                             ps_cu_analyse->s_cu_intra_cand
   1310                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[2] = 255;
   1311                         }
   1312                     }
   1313                 }
   1314             }
   1315 
   1316             /* set the neighbour map to 0 */
   1317             ihevce_set_nbr_map(
   1318                 ps_ctxt->pu1_ctb_nbr_map,
   1319                 ps_ctxt->i4_nbr_map_strd,
   1320                 (ps_cu_analyse->b3_cu_pos_x << 1),
   1321                 (ps_cu_analyse->b3_cu_pos_y << 1),
   1322                 (ps_cu_analyse->u1_cu_size >> 2),
   1323                 0);
   1324         }
   1325 
   1326     } /* end of Intra RD OPT cand evaluation */
   1327 
   1328     ASSERT(i4_best_cu_qp > (ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1));
   1329     ps_ctxt->i4_cu_qp = i4_best_cu_qp;
   1330     ps_cu_analyse->i1_cu_qp = i4_best_cu_qp;
   1331 
   1332     /* --------------------------------------- */
   1333     /* --------Final mode Recon ---------- */
   1334     /* --------------------------------------- */
   1335     {
   1336         enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
   1337         void *pv_final_pred = NULL;
   1338         WORD32 final_pred_strd = 0;
   1339         void *pv_final_pred_chrm = NULL;
   1340         WORD32 final_pred_strd_chrm = 0;
   1341         WORD32 packed_pred_mode;
   1342 
   1343 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
   1344         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
   1345         {
   1346             pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
   1347         }
   1348 #else
   1349         pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
   1350 #endif
   1351 
   1352         ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
   1353         packed_pred_mode =
   1354             ps_enc_loop_bestprms->u1_intra_flag + (ps_enc_loop_bestprms->u1_skip_flag) * 2;
   1355 
   1356         if(!ps_ctxt->u1_is_input_data_hbd)
   1357         {
   1358             if(ps_enc_loop_bestprms->u1_intra_flag)
   1359             {
   1360                 pv_final_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_best_idx];
   1361                 final_pred_strd =
   1362                     ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_best_idx];
   1363             }
   1364             else
   1365             {
   1366                 pv_final_pred = ps_best_inter_cand->pu1_pred_data;
   1367                 final_pred_strd = ps_best_inter_cand->i4_pred_data_stride;
   1368             }
   1369 
   1370             pv_final_pred_chrm =
   1371                 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
   1372                 rd_opt_best_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
   1373                                    (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
   1374             final_pred_strd_chrm =
   1375                 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
   1376         }
   1377 
   1378         ihevce_set_eval_flags(ps_ctxt, ps_enc_loop_bestprms);
   1379 
   1380         {
   1381             final_mode_process_prms_t s_prms;
   1382 
   1383             void *pv_cu_luma_recon;
   1384             void *pv_cu_chroma_recon;
   1385             WORD32 luma_stride, chroma_stride;
   1386 
   1387             if(!ps_ctxt->u1_is_input_data_hbd)
   1388             {
   1389 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
   1390                 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
   1391                 {
   1392                     pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
   1393                     pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
   1394                     luma_stride = ps_cu_analyse->u1_cu_size;
   1395                     chroma_stride = ps_cu_analyse->u1_cu_size;
   1396                 }
   1397                 else
   1398                 {
   1399                     /* based on CU position derive the luma pointers */
   1400                     pv_cu_luma_recon = pu1_final_recon;
   1401 
   1402                     /* based on CU position derive the chroma pointers */
   1403                     pv_cu_chroma_recon = s_chrm_cu_buf_prms.pu1_final_recon;
   1404 
   1405                     luma_stride = ps_cu_prms->i4_luma_recon_stride;
   1406 
   1407                     chroma_stride = ps_cu_prms->i4_chrm_recon_stride;
   1408                 }
   1409 #else
   1410                 pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
   1411                 pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
   1412                 luma_stride = ps_cu_analyse->u1_cu_size;
   1413                 chroma_stride = ps_cu_analyse->u1_cu_size;
   1414 #endif
   1415 
   1416                 s_prms.ps_cu_nbr_prms = &s_cu_nbr_prms;
   1417                 s_prms.ps_best_inter_cand = ps_best_inter_cand;
   1418                 s_prms.ps_chrm_cu_buf_prms = &s_chrm_cu_buf_prms;
   1419                 s_prms.packed_pred_mode = packed_pred_mode;
   1420                 s_prms.rd_opt_best_idx = rd_opt_best_idx;
   1421                 s_prms.pv_src = pu1_curr_src;
   1422                 s_prms.src_strd = ps_cu_prms->i4_luma_src_stride;
   1423                 s_prms.pv_pred = pv_final_pred;
   1424                 s_prms.pred_strd = final_pred_strd;
   1425                 s_prms.pv_pred_chrm = pv_final_pred_chrm;
   1426                 s_prms.pred_chrm_strd = final_pred_strd_chrm;
   1427                 s_prms.pu1_final_ecd_data = pu1_ecd_data;
   1428                 s_prms.pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
   1429                 s_prms.csbf_strd = ps_ctxt->i4_cu_csbf_strd;
   1430                 s_prms.pv_luma_recon = pv_cu_luma_recon;
   1431                 s_prms.recon_luma_strd = luma_stride;
   1432                 s_prms.pv_chrm_recon = pv_cu_chroma_recon;
   1433                 s_prms.recon_chrma_strd = chroma_stride;
   1434                 s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
   1435                 s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
   1436                 s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
   1437                 s_prms.i1_cu_qp = ps_cu_analyse->i1_cu_qp;
   1438                 s_prms.u1_will_cabac_state_change = 1;
   1439                 s_prms.u1_recompute_sbh_and_rdoq = 0;
   1440                 s_prms.u1_is_first_pass = 1;
   1441             }
   1442 
   1443 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   1444             s_prms.u1_is_cu_noisy = !ps_enc_loop_bestprms->u1_intra_flag
   1445                                         ? ps_cu_prms->u1_is_cu_noisy
   1446                                         : ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
   1447 #endif
   1448 
   1449             ((pf_final_rdopt_mode_prcs)ps_ctxt->pv_final_rdopt_mode_prcs)(ps_ctxt, &s_prms);
   1450 
   1451 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
   1452             if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
   1453             {
   1454                 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
   1455                 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
   1456                     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
   1457             }
   1458 #endif
   1459         }
   1460     }
   1461 
   1462     /* --------------------------------------- */
   1463     /* --------Populate CU out prms ---------- */
   1464     /* --------------------------------------- */
   1465     {
   1466         enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
   1467         UWORD8 *pu1_pu_map;
   1468         ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
   1469 
   1470         /* Corner case : If Part is 2Nx2N and Merge has all TU with zero cbf */
   1471         /* then it has to be coded as skip CU */
   1472         if((SIZE_2Nx2N == ps_enc_loop_bestprms->u1_part_mode) &&
   1473            (1 == ps_enc_loop_bestprms->as_pu_enc_loop[0].b1_merge_flag) &&
   1474            (0 == ps_enc_loop_bestprms->u1_skip_flag) && (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
   1475         {
   1476             ps_enc_loop_bestprms->u1_skip_flag = 1;
   1477         }
   1478 
   1479         /* update number PUs in CU */
   1480         ps_cu_prms->i4_num_pus_in_cu = ps_enc_loop_bestprms->u2_num_pus_in_cu;
   1481 
   1482         /* ---- populate the colocated pu map index --- */
   1483         for(ctr = 0; ctr < ps_enc_loop_bestprms->u2_num_pus_in_cu; ctr++)
   1484         {
   1485             WORD32 i;
   1486             WORD32 vert_ht;
   1487             WORD32 horz_wd;
   1488 
   1489             if(ps_enc_loop_bestprms->u1_intra_flag)
   1490             {
   1491                 ps_enc_loop_bestprms->as_col_pu_enc_loop[ctr].b1_intra_flag = 1;
   1492                 vert_ht = ps_cu_analyse->u1_cu_size >> 2;
   1493                 horz_wd = ps_cu_analyse->u1_cu_size >> 2;
   1494             }
   1495             else
   1496             {
   1497                 vert_ht = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_ht + 1) << 2) >> 2);
   1498                 horz_wd = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_wd + 1) << 2) >> 2);
   1499             }
   1500 
   1501             pu1_pu_map = pu1_col_pu_map + ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_x;
   1502             pu1_pu_map += (ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_y * num_4x4_in_ctb);
   1503 
   1504             for(i = 0; i < vert_ht; i++)
   1505             {
   1506                 memset(pu1_pu_map, col_start_pu_idx, horz_wd);
   1507                 pu1_pu_map += num_4x4_in_ctb;
   1508             }
   1509             /* increment the index */
   1510             col_start_pu_idx++;
   1511         }
   1512         /* ---- copy the colocated PUs to frm pu ----- */
   1513         memcpy(
   1514             ps_col_pu,
   1515             &ps_enc_loop_bestprms->as_col_pu_enc_loop[0],
   1516             ps_enc_loop_bestprms->u2_num_pus_in_cu * sizeof(pu_col_mv_t));
   1517 
   1518         /*---populate qp for 4x4 nbr array based on skip and cbf zero flag---*/
   1519         {
   1520             entropy_context_t *ps_entropy_ctxt;
   1521 
   1522             WORD32 diff_cu_qp_delta_depth, log2_ctb_size;
   1523 
   1524             WORD32 log2_min_cu_qp_delta_size;
   1525             UWORD32 block_addr_align;
   1526             ps_entropy_ctxt = ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt;
   1527 
   1528             log2_ctb_size = ps_entropy_ctxt->i1_log2_ctb_size;
   1529             diff_cu_qp_delta_depth = ps_entropy_ctxt->ps_pps->i1_diff_cu_qp_delta_depth;
   1530 
   1531             log2_min_cu_qp_delta_size = log2_ctb_size - diff_cu_qp_delta_depth;
   1532             block_addr_align = 15 << (log2_min_cu_qp_delta_size - 3);
   1533 
   1534             ps_entropy_ctxt->i4_qg_pos_x = ps_cu_analyse->b3_cu_pos_x & block_addr_align;
   1535             ps_entropy_ctxt->i4_qg_pos_y = ps_cu_analyse->b3_cu_pos_y & block_addr_align;
   1536             /*Update the Qp value used. It will not have a valid value iff
   1537             current CU is (skipped/no_cbf). In that case the Qp needed for
   1538             deblocking is calculated from top/left/previous coded CU*/
   1539 
   1540             ps_ctxt->ps_enc_out_ctxt->i1_cu_qp = ps_cu_analyse->i1_cu_qp;
   1541 
   1542             if(ps_entropy_ctxt->i4_qg_pos_x == ps_cu_analyse->b3_cu_pos_x &&
   1543                ps_entropy_ctxt->i4_qg_pos_y == ps_cu_analyse->b3_cu_pos_y)
   1544             {
   1545                 ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 1;
   1546             }
   1547             else
   1548             {
   1549                 ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 0;
   1550             }
   1551         }
   1552 
   1553         /* -- at the end of CU set the neighbour map to 1 -- */
   1554         ihevce_set_nbr_map(
   1555             ps_ctxt->pu1_ctb_nbr_map,
   1556             ps_ctxt->i4_nbr_map_strd,
   1557             (ps_cu_analyse->b3_cu_pos_x << 1),
   1558             (ps_cu_analyse->b3_cu_pos_y << 1),
   1559             (ps_cu_analyse->u1_cu_size >> 2),
   1560             1);
   1561 
   1562         /* -- at the end of CU update best cabac rdopt states -- */
   1563         /* -- and also set the top row skip flags  ------------- */
   1564         ihevce_entropy_update_best_cu_states(
   1565             &ps_ctxt->s_rdopt_entropy_ctxt,
   1566             ps_cu_analyse->b3_cu_pos_x,
   1567             ps_cu_analyse->b3_cu_pos_y,
   1568             ps_cu_analyse->u1_cu_size,
   1569             0,
   1570             rd_opt_best_idx);
   1571     }
   1572 
   1573     /* Store Output struct */
   1574 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
   1575     {
   1576         {
   1577             memcpy(
   1578                 &ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
   1579                 &ps_ctxt->as_cu_prms[rd_opt_best_idx],
   1580                 sizeof(enc_loop_cu_final_prms_t));
   1581         }
   1582 
   1583         memcpy(
   1584             &ps_ctxt->as_cu_recur_nbr[0],
   1585             &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
   1586             sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
   1587                 (ps_cu_analyse->u1_cu_size >> 2));
   1588 
   1589         ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
   1590 
   1591         ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
   1592     }
   1593 #else
   1594     if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
   1595     {
   1596         ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
   1597 
   1598         ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
   1599 
   1600         if(ps_ctxt->u1_disable_intra_eval && ps_ctxt->i4_deblk_pad_hpel_cur_pic)
   1601         {
   1602             /* Wait till top data is ready          */
   1603             /* Currently checking till top right CU */
   1604             curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
   1605 
   1606             if(i4_ctb_y_off == 0)
   1607             {
   1608                 /* No wait for 1st row */
   1609                 cu_top_right_offset = -(MAX_CTB_SIZE);
   1610                 {
   1611                     ihevce_tile_params_t *ps_col_tile_params =
   1612                         ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
   1613                          ps_ctxt->i4_tile_col_idx);
   1614 
   1615                     /* No wait for 1st row */
   1616                     cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
   1617                 }
   1618                 cu_top_right_dep_pos = 0;
   1619             }
   1620             else
   1621             {
   1622                 cu_top_right_offset = (ps_cu_analyse->u1_cu_size);
   1623                 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
   1624             }
   1625 
   1626             if(0 == ps_cu_analyse->b3_cu_pos_y)
   1627             {
   1628                 ihevce_dmgr_chk_row_row_sync(
   1629                     ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
   1630                     curr_cu_pos_in_row,
   1631                     cu_top_right_offset,
   1632                     cu_top_right_dep_pos,
   1633                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
   1634                     ps_ctxt->thrd_id);
   1635             }
   1636         }
   1637     }
   1638     else
   1639     {
   1640         {
   1641             memcpy(
   1642                 &ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
   1643                 &ps_ctxt->as_cu_prms[rd_opt_best_idx],
   1644                 sizeof(enc_loop_cu_final_prms_t));
   1645         }
   1646 
   1647         memcpy(
   1648             &ps_ctxt->as_cu_recur_nbr[0],
   1649             &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
   1650             sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
   1651                 (ps_cu_analyse->u1_cu_size >> 2));
   1652 
   1653         ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
   1654 
   1655         ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
   1656     }
   1657 #endif
   1658 
   1659     ps_ctxt->s_pred_buf_data.u4_is_buf_in_use &=
   1660         ~((1 << (ps_ctxt->i4_max_num_inter_rdopt_cands + 4)) - 1);
   1661 
   1662     return rd_opt_least_cost;
   1663 }
   1664 
   1665 /*!
   1666 ******************************************************************************
   1667 * \if Function name : ihevce_enc_loop_process_row \endif
   1668 *
   1669 * \brief
   1670 *    Row level enc_loop pass function
   1671 *
   1672 * \param[in] pv_ctxt : pointer to enc_loop module
   1673 * \param[in] ps_curr_src_bufs  : pointer to input yuv buffer (row buffer)
   1674 * \param[out] ps_curr_recon_bufs : pointer recon picture structure pointer (row buffer)
   1675 * \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (row buffer)
   1676 * \param[out] ps_ctb_out : pointer CTB output structure (row buffer)
   1677 * \param[out] ps_cu_out : pointer CU output structure (row buffer)
   1678 * \param[out] ps_tu_out : pointer TU output structure (row buffer)
   1679 * \param[out] pi2_frm_coeffs : pointer coeff output (row buffer)
   1680 * \param[in] i4_poc : current poc. Needed to send recon in dist-client mode
   1681 *
   1682 * \return
   1683 *    None
   1684 *
   1685 * Note : Currently the frame level calcualtions done assumes that
   1686 *        framewidth of the input /recon are excat multiple of ctbsize
   1687 *
   1688 * \author
   1689 *  Ittiam
   1690 *
   1691 *****************************************************************************
   1692 */
   1693 void ihevce_enc_loop_process_row(
   1694     ihevce_enc_loop_ctxt_t *ps_ctxt,
   1695     iv_enc_yuv_buf_t *ps_curr_src_bufs,
   1696     iv_enc_yuv_buf_t *ps_curr_recon_bufs,
   1697     iv_enc_yuv_buf_src_t *ps_curr_recon_bufs_src,
   1698     UWORD8 **ppu1_y_subpel_planes,
   1699     ctb_analyse_t *ps_ctb_in,
   1700     ctb_enc_loop_out_t *ps_ctb_out,
   1701     ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse,
   1702     cur_ctb_cu_tree_t *ps_row_cu_tree,
   1703     cu_enc_loop_out_t *ps_row_cu,
   1704     tu_enc_loop_out_t *ps_row_tu,
   1705     pu_t *ps_row_pu,
   1706     pu_col_mv_t *ps_row_col_pu,
   1707     UWORD16 *pu2_num_pu_map,
   1708     UWORD8 *pu1_row_pu_map,
   1709     UWORD8 *pu1_row_ecd_data,
   1710     UWORD32 *pu4_pu_offsets,
   1711     frm_ctb_ctxt_t *ps_frm_ctb_prms,
   1712     WORD32 vert_ctr,
   1713     recon_pic_buf_t *ps_frm_recon,
   1714     void *pv_dep_mngr_encloop_dep_me,
   1715     pad_interp_recon_frm_t *ps_pad_interp_recon,
   1716     WORD32 i4_pass,
   1717     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
   1718     ihevce_tile_params_t *ps_tile_params)
   1719 {
   1720     enc_loop_cu_prms_t s_cu_prms;
   1721     ctb_enc_loop_out_t *ps_ctb_out_dblk;
   1722 
   1723     WORD32 ctb_ctr, ctb_start, ctb_end;
   1724     WORD32 col_pu_map_idx;
   1725     WORD32 num_ctbs_horz_pic;
   1726     WORD32 ctb_size;
   1727     WORD32 last_ctb_row_flag;
   1728     WORD32 last_ctb_col_flag;
   1729     WORD32 last_hz_ctb_wd;
   1730     WORD32 last_vt_ctb_ht;
   1731     void *pv_dep_mngr_enc_loop_dblk;
   1732     void *pv_dep_mngr_enc_loop_cu_top_right;
   1733     WORD32 dblk_offset, dblk_check_dep_pos;
   1734     WORD32 aux_offset, aux_check_dep_pos;
   1735     void *pv_dep_mngr_me_dep_encloop;
   1736     ctb_enc_loop_out_t *ps_ctb_out_sao;
   1737     /*Structure to store deblocking parameters at CTB-row level*/
   1738     deblk_ctbrow_prms_t s_deblk_ctb_row_params;
   1739     UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
   1740 
   1741     pv_dep_mngr_me_dep_encloop = (void *)ps_frm_recon->pv_dep_mngr_recon;
   1742     num_ctbs_horz_pic = ps_frm_ctb_prms->i4_num_ctbs_horz;
   1743     ctb_size = ps_frm_ctb_prms->i4_ctb_size;
   1744 
   1745     /* Store the num_ctb_horz in sao context*/
   1746     ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_horz = ps_frm_ctb_prms->i4_num_ctbs_horz;
   1747     ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_vert = ps_frm_ctb_prms->i4_num_ctbs_vert;
   1748 
   1749     /* Get the EncLoop Deblock Dep Mngr */
   1750     pv_dep_mngr_enc_loop_dblk = ps_ctxt->pv_dep_mngr_enc_loop_dblk;
   1751     /* Get the EncLoop Top-Right CU Dep Mngr */
   1752     pv_dep_mngr_enc_loop_cu_top_right = ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right;
   1753     /* Set Variables for Dep. Checking and Setting */
   1754     aux_check_dep_pos = vert_ctr;
   1755     aux_offset = 2; /* Should be there for 0th row also */
   1756     if(vert_ctr > 0)
   1757     {
   1758         dblk_check_dep_pos = vert_ctr - 1;
   1759         dblk_offset = 2;
   1760     }
   1761     else
   1762     {
   1763         /* First row should run without waiting */
   1764         dblk_check_dep_pos = 0;
   1765         dblk_offset = -(ps_tile_params->i4_first_sample_x + 1);
   1766     }
   1767 
   1768     /* check if the current row processed in last CTb row */
   1769     last_ctb_row_flag = (vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1));
   1770 
   1771     /* Valid Width (pixels) in the last CTB in every row (padding cases) */
   1772     last_hz_ctb_wd = ps_frm_ctb_prms->i4_cu_aligned_pic_wd - ((num_ctbs_horz_pic - 1) * ctb_size);
   1773 
   1774     /* Valid Height (pixels) in the last CTB row (padding cases) */
   1775     last_vt_ctb_ht = ps_frm_ctb_prms->i4_cu_aligned_pic_ht -
   1776                      ((ps_frm_ctb_prms->i4_num_ctbs_vert - 1) * ctb_size);
   1777     /* reset the states copied flag */
   1778     ps_ctxt->u1_cabac_states_next_row_copied_flag = 0;
   1779     ps_ctxt->u1_cabac_states_first_cu_copied_flag = 0;
   1780 
   1781     /* populate the cu prms which are common for entire ctb row */
   1782     s_cu_prms.i4_luma_src_stride = ps_curr_src_bufs->i4_y_strd;
   1783     s_cu_prms.i4_chrm_src_stride = ps_curr_src_bufs->i4_uv_strd;
   1784     s_cu_prms.i4_luma_recon_stride = ps_curr_recon_bufs->i4_y_strd;
   1785     s_cu_prms.i4_chrm_recon_stride = ps_curr_recon_bufs->i4_uv_strd;
   1786     s_cu_prms.i4_ctb_size = ctb_size;
   1787 
   1788     ps_ctxt->i4_is_first_cu_qg_coded = 0;
   1789 
   1790     /* Initialize the number of PUs for the first CTB to 0 */
   1791     *pu2_num_pu_map = 0;
   1792 
   1793     /*Getting the address of BS and Qp arrays and other info*/
   1794     memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
   1795     {
   1796         WORD32 num_ctbs_horz_tile;
   1797         /* Update the pointers which are accessed not by using ctb_ctr
   1798         to the tile start here! */
   1799         ps_ctb_in += ps_tile_params->i4_first_ctb_x;
   1800         ps_ctb_out += ps_tile_params->i4_first_ctb_x;
   1801 
   1802         ps_row_cu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_cus_in_ctb);
   1803         ps_row_tu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_tus_in_ctb);
   1804         ps_row_pu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
   1805         pu1_row_pu_map += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
   1806         pu1_row_ecd_data +=
   1807             (ps_tile_params->i4_first_ctb_x *
   1808              ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_num_tus_in_ctb << 1)
   1809                                 : ((ps_frm_ctb_prms->i4_num_tus_in_ctb * 3) >> 1)) *
   1810              MAX_SCAN_COEFFS_BYTES_4x4);
   1811 
   1812         /* Update the pointers to the tile start */
   1813         s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
   1814             (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one vertical edge per 8x8 block
   1815         s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
   1816             (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one horizontal edge per 8x8 block
   1817         s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
   1818 
   1819         num_ctbs_horz_tile = ps_tile_params->i4_curr_tile_wd_in_ctb_unit;
   1820 
   1821         ctb_start = ps_tile_params->i4_first_ctb_x;
   1822         ctb_end = ps_tile_params->i4_first_ctb_x + num_ctbs_horz_tile;
   1823     }
   1824     ps_ctb_out_dblk = ps_ctb_out;
   1825 
   1826     ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_ctxt->i4_frame_qp;
   1827 
   1828     /* --------- Loop over all the CTBs in a row --------------- */
   1829     for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
   1830     {
   1831         cu_final_update_prms s_cu_update_prms;
   1832 
   1833         cur_ctb_cu_tree_t *ps_cu_tree_analyse;
   1834         me_ctb_data_t *ps_cu_me_data;
   1835         ipe_l0_ctb_analyse_for_me_t *ps_ctb_ipe_analyse;
   1836         cu_enc_loop_out_t *ps_cu_final;
   1837         pu_col_mv_t *ps_ctb_col_pu;
   1838 
   1839         WORD32 cur_ctb_ht, cur_ctb_wd;
   1840         WORD32 last_cu_pos_in_ctb;
   1841         WORD32 last_cu_size;
   1842         WORD32 num_pus_in_ctb;
   1843         UWORD8 u1_is_ctb_noisy;
   1844         ps_ctb_col_pu = ps_row_col_pu + ctb_ctr * ps_frm_ctb_prms->i4_num_pus_in_ctb;
   1845 
   1846         if(ctb_ctr)
   1847         {
   1848             ps_ctxt->i4_prev_QP = ps_ctxt->i4_last_cu_qp_from_prev_ctb;
   1849         }
   1850         /*If Sup pic rc is enabled*/
   1851         if(ps_ctxt->i4_sub_pic_level_rc)
   1852         {
   1853             ihevce_sub_pic_rc_scale_query((void *)ps_multi_thrd_ctxt, (void *)ps_ctxt);
   1854         }
   1855         /* check if the current row processed in last CTb row */
   1856         last_ctb_col_flag = (ctb_ctr == (num_ctbs_horz_pic - 1));
   1857         if(1 == last_ctb_col_flag)
   1858         {
   1859             cur_ctb_wd = last_hz_ctb_wd;
   1860         }
   1861         else
   1862         {
   1863             cur_ctb_wd = ctb_size;
   1864         }
   1865 
   1866         /* If it's the last CTB, get the actual ht of CTB */
   1867         if(1 == last_ctb_row_flag)
   1868         {
   1869             cur_ctb_ht = last_vt_ctb_ht;
   1870         }
   1871         else
   1872         {
   1873             cur_ctb_ht = ctb_size;
   1874         }
   1875 
   1876         ps_ctxt->u4_cur_ctb_ht = cur_ctb_ht;
   1877         ps_ctxt->u4_cur_ctb_wd = cur_ctb_wd;
   1878 
   1879         /* Wait till reference frame recon is available */
   1880 
   1881         /* ------------ Wait till current data is ready from ME -------------- */
   1882 
   1883         /*only for ref instance and Non I pics */
   1884         if((ps_ctxt->i4_bitrate_instance_num == 0) &&
   1885            ((ISLICE != ps_ctxt->i1_slice_type) || L0ME_IN_OPENLOOP_MODE))
   1886         {
   1887             if(ctb_ctr < (num_ctbs_horz_pic))
   1888             {
   1889                 ihevce_dmgr_chk_row_row_sync(
   1890                     pv_dep_mngr_encloop_dep_me,
   1891                     ctb_ctr,
   1892                     1,
   1893                     vert_ctr,
   1894                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
   1895                     ps_ctxt->thrd_id);
   1896             }
   1897         }
   1898 
   1899         /* store the cu pointer for current ctb out */
   1900         ps_ctb_out->ps_enc_cu = ps_row_cu;
   1901         ps_cu_final = ps_row_cu;
   1902 
   1903         /* Get the base point of CU recursion tree */
   1904         if(ISLICE != ps_ctxt->i1_slice_type)
   1905         {
   1906             ps_cu_tree_analyse = ps_ctb_in->ps_cu_tree;
   1907             ASSERT(ps_ctb_in->ps_cu_tree == (ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE)));
   1908         }
   1909         else
   1910         {
   1911             /* Initialize ptr to current CTB */
   1912             ps_cu_tree_analyse = ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE);
   1913         }
   1914 
   1915         /* Get the ME data pointer for 16x16 block data in ctb */
   1916         ps_cu_me_data = ps_ctb_in->ps_me_ctb_data;
   1917         u1_is_ctb_noisy = ps_ctb_in->s_ctb_noise_params.i4_noise_present;
   1918         s_cu_prms.u1_is_cu_noisy = u1_is_ctb_noisy;
   1919         s_cu_prms.pu1_is_8x8Blk_noisy = ps_ctb_in->s_ctb_noise_params.au1_is_8x8Blk_noisy;
   1920 
   1921         /* store the ctb level prms in cu prms */
   1922         s_cu_prms.i4_ctb_pos = ctb_ctr;
   1923 
   1924         s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
   1925         s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
   1926 
   1927         {
   1928             s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
   1929             s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
   1930         }
   1931 
   1932         s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
   1933 
   1934         s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
   1935 
   1936         s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
   1937 
   1938         /* Initialize ptr to current CTB */
   1939         ps_ctb_ipe_analyse = ps_row_ipe_analyse + ctb_ctr;  // * ctb_size;
   1940 
   1941         /* reset the map idx for current ctb */
   1942         col_pu_map_idx = 0;
   1943         num_pus_in_ctb = 0;
   1944 
   1945         /* reset the map buffer to 0*/
   1946 
   1947         memset(
   1948             &ps_ctxt->au1_nbr_ctb_map[0][0],
   1949             0,
   1950             (MAX_PU_IN_CTB_ROW + 1 + 8) * (MAX_PU_IN_CTB_ROW + 1 + 8));
   1951 
   1952         /* set the CTB neighbour availability flags */
   1953         ihevce_set_ctb_nbr(
   1954             &ps_ctb_out->s_ctb_nbr_avail_flags,
   1955             ps_ctxt->pu1_ctb_nbr_map,
   1956             ps_ctxt->i4_nbr_map_strd,
   1957             ctb_ctr,
   1958             vert_ctr,
   1959             ps_frm_ctb_prms);
   1960 
   1961         /* -------- update the cur CTB offsets for inter prediction-------- */
   1962         ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = ctb_ctr * ctb_size;
   1963         ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = vert_ctr * ctb_size;
   1964 
   1965         /* -------- update the cur CTB offsets for MV prediction-------- */
   1966         ps_ctxt->s_mv_pred_ctxt.i4_ctb_x = ctb_ctr;
   1967         ps_ctxt->s_mv_pred_ctxt.i4_ctb_y = vert_ctr;
   1968 
   1969         /* -------------- Boundary Strength Initialization ----------- */
   1970         if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
   1971         {
   1972             ihevce_bs_init_ctb(&ps_ctxt->s_deblk_bs_prms, ps_frm_ctb_prms, ctb_ctr, vert_ctr);
   1973         }
   1974 
   1975         /* -------- update cur CTB offsets for entropy rdopt context------- */
   1976         ihevce_entropy_rdo_ctb_init(&ps_ctxt->s_rdopt_entropy_ctxt, ctb_ctr, vert_ctr);
   1977 
   1978         /* --------- CU Recursion --------------- */
   1979 
   1980         {
   1981 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
   1982             WORD32 i4_max_tree_depth = 4;
   1983 #endif
   1984             WORD32 i4_tree_depth = 0;
   1985             /* Init no. of CU in CTB to 0*/
   1986             ps_ctb_out->u1_num_cus_in_ctb = 0;
   1987 
   1988 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
   1989             if(ps_ctxt->i4_bitrate_instance_num == 0)
   1990             {
   1991                 WORD32 i4_max_tree_depth = 4;
   1992                 WORD32 i;
   1993                 for(i = 0; i < i4_max_tree_depth; i++)
   1994                 {
   1995                     COPY_CABAC_STATES(
   1996                         &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
   1997                         &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
   1998                         IHEVC_CAB_CTXT_END * sizeof(UWORD8));
   1999                 }
   2000             }
   2001 #else
   2002             if(ps_ctxt->i4_bitrate_instance_num == 0)
   2003             {
   2004                 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
   2005                 {
   2006                     WORD32 i4_max_tree_depth = 4;
   2007                     WORD32 i;
   2008                     for(i = 0; i < i4_max_tree_depth; i++)
   2009                     {
   2010                         COPY_CABAC_STATES(
   2011                             &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
   2012                             &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
   2013                             IHEVC_CAB_CTXT_END * sizeof(UWORD8));
   2014                     }
   2015                 }
   2016             }
   2017 
   2018 #endif
   2019             if(ps_ctxt->i4_bitrate_instance_num == 0)
   2020             {
   2021                 /* FOR I- PIC populate the curr_ctb accordingly */
   2022                 if(ISLICE == ps_ctxt->i1_slice_type)
   2023                 {
   2024                     ps_ctb_ipe_analyse->ps_cu_tree_root = ps_cu_tree_analyse;
   2025                     ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
   2026 
   2027                     ihevce_populate_cu_tree(
   2028                         ps_ctb_ipe_analyse,
   2029                         ps_cu_tree_analyse,
   2030                         0,
   2031                         (IHEVCE_QUALITY_CONFIG_T)ps_ctxt->i4_quality_preset,
   2032                         POS_NA,
   2033                         POS_NA,
   2034                         POS_NA);
   2035                 }
   2036             }
   2037             ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
   2038             ps_ctxt->ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
   2039             ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
   2040             if(ps_ctxt->i4_use_ctb_level_lamda)
   2041             {
   2042                 ihevce_compute_cu_level_QP(
   2043                     ps_ctxt, -1, ps_ctb_ipe_analyse->i4_64x64_act_factor[3][1], 0);
   2044             }
   2045 
   2046             s_cu_update_prms.ppu1_row_ecd_data = &pu1_row_ecd_data;
   2047             s_cu_update_prms.pi4_last_cu_pos_in_ctb = &last_cu_pos_in_ctb;
   2048             s_cu_update_prms.pi4_last_cu_size = &last_cu_size;
   2049             s_cu_update_prms.pi4_num_pus_in_ctb = &num_pus_in_ctb;
   2050             s_cu_update_prms.pps_cu_final = &ps_cu_final;
   2051             s_cu_update_prms.pps_row_pu = &ps_row_pu;
   2052             s_cu_update_prms.pps_row_tu = &ps_row_tu;
   2053             s_cu_update_prms.pu1_num_cus_in_ctb_out = &ps_ctb_out->u1_num_cus_in_ctb;
   2054 
   2055             // source satd computation
   2056             /* compute the source 8x8 SATD for the current CTB */
   2057             /* populate  pui4_source_satd in some structure and pass it inside */
   2058             if(ps_ctxt->u1_enable_psyRDOPT)
   2059             {
   2060                 /* declare local variables */
   2061                 WORD32 i;
   2062                 WORD32 ctb_size;
   2063                 WORD32 num_comp_had_blocks;
   2064                 UWORD8 *pu1_l0_block;
   2065                 WORD32 block_ht;
   2066                 WORD32 block_wd;
   2067                 WORD32 ht_offset;
   2068                 WORD32 wd_offset;
   2069 
   2070                 WORD32 num_horz_blocks;
   2071                 WORD32 had_block_size;
   2072                 WORD32 total_had_block_size;
   2073                 WORD16 pi2_residue_had_zscan[64];
   2074                 UWORD8 ai1_zeros_buffer[64];
   2075 
   2076                 WORD32 index_satd;
   2077                 WORD32 is_hbd;
   2078                 /* initialize the variables */
   2079                 block_ht = cur_ctb_ht;
   2080                 block_wd = cur_ctb_wd;
   2081 
   2082                 is_hbd = ps_ctxt->u1_is_input_data_hbd;
   2083 
   2084                 had_block_size = 8;
   2085                 total_had_block_size = had_block_size * had_block_size;
   2086 
   2087                 for(i = 0; i < total_had_block_size; i++)
   2088                 {
   2089                     ai1_zeros_buffer[i] = 0;
   2090                 }
   2091 
   2092                 ctb_size = block_ht * block_wd;  //ctb_width * ctb_height;
   2093                 num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
   2094 
   2095                 num_horz_blocks = block_wd / had_block_size;  //ctb_width / had_block_size;
   2096                 ht_offset = -had_block_size;
   2097                 wd_offset = -had_block_size;
   2098 
   2099                 index_satd = 0;
   2100                 /*Loop over all 8x8 blocsk in the CTB*/
   2101                 for(i = 0; i < num_comp_had_blocks; i++)
   2102                 {
   2103                     if(i % num_horz_blocks == 0)
   2104                     {
   2105                         wd_offset = -had_block_size;
   2106                         ht_offset += had_block_size;
   2107                     }
   2108                     wd_offset += had_block_size;
   2109 
   2110                     if(!is_hbd)
   2111                     {
   2112                         /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
   2113                         pu1_l0_block = s_cu_prms.pu1_luma_src +
   2114                                        ps_curr_src_bufs->i4_y_strd * ht_offset + wd_offset;
   2115 
   2116                         ps_ctxt->ai4_source_satd_8x8[index_satd] =
   2117 
   2118                             ps_ctxt->s_cmn_opt_func.pf_AC_HAD_8x8_8bit(
   2119                                 pu1_l0_block,
   2120                                 ps_curr_src_bufs->i4_y_strd,
   2121                                 ai1_zeros_buffer,
   2122                                 had_block_size,
   2123                                 pi2_residue_had_zscan,
   2124                                 had_block_size);
   2125                     }
   2126                     index_satd++;
   2127                 }
   2128             }
   2129 
   2130             if(ps_ctxt->u1_enable_psyRDOPT)
   2131             {
   2132                 /* declare local variables */
   2133                 WORD32 i;
   2134                 WORD32 ctb_size;
   2135                 WORD32 num_comp_had_blocks;
   2136                 UWORD8 *pu1_l0_block;
   2137                 UWORD8 *pu1_l0_block_prev = NULL;
   2138                 WORD32 block_ht;
   2139                 WORD32 block_wd;
   2140                 WORD32 ht_offset;
   2141                 WORD32 wd_offset;
   2142 
   2143                 WORD32 num_horz_blocks;
   2144                 WORD32 had_block_size;
   2145                 WORD16 pi2_residue_had[64];
   2146                 UWORD8 ai1_zeros_buffer[64];
   2147                 WORD32 index_satd = 0;
   2148 
   2149                 WORD32 is_hbd;
   2150                 is_hbd = ps_ctxt->u1_is_input_data_hbd;  // 8 bit
   2151 
   2152                 /* initialize the variables */
   2153                 /* change this based ont he bit depth */
   2154                 // ps_ctxt->u1_chroma_array_type
   2155                 if(ps_ctxt->u1_chroma_array_type == 1)
   2156                 {
   2157                     block_ht = cur_ctb_ht / 2;
   2158                     block_wd = cur_ctb_wd / 2;
   2159                 }
   2160                 else
   2161                 {
   2162                     block_ht = cur_ctb_ht;
   2163                     block_wd = cur_ctb_wd / 2;
   2164                 }
   2165 
   2166                 had_block_size = 4;
   2167                 memset(ai1_zeros_buffer, 0, 64 * sizeof(UWORD8));
   2168 
   2169                 ctb_size = block_ht * block_wd;  //ctb_width * ctb_height;
   2170                 num_comp_had_blocks = 2 * ctb_size / (had_block_size * had_block_size);
   2171 
   2172                 num_horz_blocks = 2 * block_wd / had_block_size;  //ctb_width / had_block_size;
   2173                 ht_offset = -had_block_size;
   2174                 wd_offset = -had_block_size;
   2175 
   2176                 if(!is_hbd)
   2177                 {
   2178                     /* loop over for every 4x4 blocks in the CU for Cb */
   2179                     for(i = 0; i < num_comp_had_blocks; i++)
   2180                     {
   2181                         if(i % num_horz_blocks == 0)
   2182                         {
   2183                             wd_offset = -had_block_size;
   2184                             ht_offset += had_block_size;
   2185                         }
   2186                         wd_offset += had_block_size;
   2187 
   2188                         /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
   2189                         if(i % 2 != 0)
   2190                         {
   2191                             if(!is_hbd)
   2192                             {
   2193                                 pu1_l0_block = pu1_l0_block_prev + 1;
   2194                             }
   2195                         }
   2196                         else
   2197                         {
   2198                             if(!is_hbd)
   2199                             {
   2200                                 pu1_l0_block = s_cu_prms.pu1_chrm_src +
   2201                                                s_cu_prms.i4_chrm_src_stride * ht_offset + wd_offset;
   2202                                 pu1_l0_block_prev = pu1_l0_block;
   2203                             }
   2204                         }
   2205 
   2206                         if(had_block_size == 4)
   2207                         {
   2208                             if(!is_hbd)
   2209                             {
   2210                                 ps_ctxt->ai4_source_chroma_satd[index_satd] =
   2211                                     ps_ctxt->s_cmn_opt_func.pf_chroma_AC_HAD_4x4_8bit(
   2212                                         pu1_l0_block,
   2213                                         s_cu_prms.i4_chrm_src_stride,
   2214                                         ai1_zeros_buffer,
   2215                                         had_block_size,
   2216                                         pi2_residue_had,
   2217                                         had_block_size);
   2218                             }
   2219 
   2220                             index_satd++;
   2221 
   2222                         }  // block size of 4x4
   2223 
   2224                     }  // for all blocks
   2225 
   2226                 }  // is hbd check
   2227             }
   2228 
   2229             ihevce_cu_recurse_decide(
   2230                 ps_ctxt,
   2231                 &s_cu_prms,
   2232                 ps_cu_tree_analyse,
   2233                 ps_cu_tree_analyse,
   2234                 ps_ctb_ipe_analyse,
   2235                 ps_cu_me_data,
   2236                 &ps_ctb_col_pu,
   2237                 &s_cu_update_prms,
   2238                 pu1_row_pu_map,
   2239                 &col_pu_map_idx,
   2240                 i4_tree_depth,
   2241                 ctb_ctr << 6,
   2242                 vert_ctr << 6,
   2243                 cur_ctb_ht);
   2244 
   2245             if(ps_ctxt->i1_slice_type != ISLICE)
   2246             {
   2247                 ASSERT(
   2248                     (cur_ctb_wd * cur_ctb_ht) <=
   2249                     ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree_analyse));
   2250             }
   2251             /*If Sup pic rc is enabled*/
   2252             if(1 == ps_ctxt->i4_sub_pic_level_rc)
   2253             {
   2254                 /*In a row, after the required CTB is reached, send data and query scale from Bit Control thread */
   2255                 ihevce_sub_pic_rc_in_data(
   2256                     (void *)ps_multi_thrd_ctxt,
   2257                     (void *)ps_ctxt,
   2258                     (void *)ps_ctb_ipe_analyse,
   2259                     (void *)ps_frm_ctb_prms);
   2260             }
   2261 
   2262             ps_ctxt->ps_enc_out_ctxt->u1_cu_size = 128;
   2263 
   2264         } /* End of CU recursion block */
   2265 
   2266 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
   2267         {
   2268             ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
   2269             enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
   2270             ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
   2271 
   2272             do
   2273             {
   2274                 ihevce_update_final_cu_results(
   2275                     ps_ctxt,
   2276                     ps_enc_out_ctxt,
   2277                     ps_cu_prms,
   2278                     NULL, /* &ps_ctb_col_pu */
   2279                     NULL, /* &col_pu_map_idx */
   2280                     &s_cu_update_prms,
   2281                     ctb_ctr,
   2282                     vert_ctr);
   2283 
   2284                 ps_enc_out_ctxt++;
   2285 
   2286                 ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
   2287 
   2288             } while(ps_enc_out_ctxt->u1_cu_size != 128);
   2289         }
   2290 #else
   2291         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
   2292         {
   2293             ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
   2294             enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
   2295             ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
   2296 
   2297             do
   2298             {
   2299                 ihevce_update_final_cu_results(
   2300                     ps_ctxt,
   2301                     ps_enc_out_ctxt,
   2302                     ps_cu_prms,
   2303                     NULL, /* &ps_ctb_col_pu */
   2304                     NULL, /* &col_pu_map_idx */
   2305                     &s_cu_update_prms,
   2306                     ctb_ctr,
   2307                     vert_ctr);
   2308 
   2309                 ps_enc_out_ctxt++;
   2310 
   2311                 ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
   2312 
   2313             } while(ps_enc_out_ctxt->u1_cu_size != 128);
   2314         }
   2315 #endif
   2316 
   2317         /* --- ctb level copy of data to left buffers--*/
   2318         ((pf_enc_loop_ctb_left_copy)ps_ctxt->pv_enc_loop_ctb_left_copy)(ps_ctxt, &s_cu_prms);
   2319 
   2320         if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
   2321         {
   2322             /* For the Unaligned CTB, make the invalid edge boundary strength 0 */
   2323             ihevce_bs_clear_invalid(
   2324                 &ps_ctxt->s_deblk_bs_prms,
   2325                 last_ctb_row_flag,
   2326                 (ctb_ctr == (num_ctbs_horz_pic - 1)),
   2327                 last_hz_ctb_wd,
   2328                 last_vt_ctb_ht);
   2329 
   2330             /* -----------------Read boundary strengts for current CTB------------- */
   2331 
   2332             if((0 == ps_ctxt->i4_deblock_type) && (ps_ctxt->i4_deblk_pad_hpel_cur_pic))
   2333             {
   2334                 /*Storing boundary strengths of current CTB*/
   2335                 UWORD32 *pu4_bs_horz = &ps_ctxt->s_deblk_bs_prms.au4_horz_bs[0];
   2336                 UWORD32 *pu4_bs_vert = &ps_ctxt->s_deblk_bs_prms.au4_vert_bs[0];
   2337 
   2338                 memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_vert, pu4_bs_vert, (ctb_size * 4) / 8);
   2339                 memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_horz, pu4_bs_horz, (ctb_size * 4) / 8);
   2340             }
   2341             //Increment for storing next CTB info
   2342             s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
   2343                 (ctb_size >> 3);  //one vertical edge per 8x8 block
   2344             s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
   2345                 (ctb_size >> 3);  //one horizontal edge per 8x8 block
   2346         }
   2347 
   2348         /* -------------- ctb level updates ----------------- */
   2349         ps_row_cu += ps_ctb_out->u1_num_cus_in_ctb;
   2350 
   2351         pu1_row_pu_map += (ctb_size >> 2) * (ctb_size >> 2);
   2352 
   2353         /* first ctb offset will be populated by the caller */
   2354         if(0 != ctb_ctr)
   2355         {
   2356             pu4_pu_offsets[ctb_ctr] = pu4_pu_offsets[ctb_ctr - 1] + num_pus_in_ctb;
   2357         }
   2358         pu2_num_pu_map[ctb_ctr] = num_pus_in_ctb;
   2359         ASSERT(ps_ctb_out->u1_num_cus_in_ctb != 0);
   2360 
   2361         ps_ctb_in++;
   2362         ps_ctb_out++;
   2363     }
   2364 
   2365     /* ---------- Encloop end of row updates ----------------- */
   2366 
   2367     /* at the end of row processing cu pixel counter is set to */
   2368     /* (num ctb * ctbzise) + ctb size                          */
   2369     /* this is to set the dependency for right most cu of last */
   2370     /* ctb's top right data dependency                         */
   2371     /* this even takes care of entropy dependency for          */
   2372     /* incomplete ctb as well                                  */
   2373     ihevce_dmgr_set_row_row_sync(
   2374         pv_dep_mngr_enc_loop_cu_top_right,
   2375         (ctb_ctr * ctb_size + ctb_size),
   2376         vert_ctr,
   2377         ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
   2378 
   2379     ps_ctxt->s_sao_ctxt_t.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
   2380 
   2381     /* Restore structure.
   2382     Getting the address of stored-BS and Qp-map and other info */
   2383     memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
   2384     {
   2385         /* Update the pointers to the tile start */
   2386         s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
   2387             (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one vertical edge per 8x8 block
   2388         s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
   2389             (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one horizontal edge per 8x8 block
   2390         s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
   2391     }
   2392 
   2393 #if PROFILE_ENC_REG_DATA
   2394     s_profile.u8_enc_reg_data[vert_ctr] = 0;
   2395 #endif
   2396 
   2397     /* -- Loop over all the CTBs in a row for Deblocking and Subpel gen --- */
   2398     if(!ps_ctxt->u1_is_input_data_hbd)
   2399     {
   2400         WORD32 last_col_pic, last_col_tile;
   2401 
   2402         for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
   2403         {
   2404             /* store the ctb level prms in cu prms */
   2405             s_cu_prms.i4_ctb_pos = ctb_ctr;
   2406             s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
   2407             s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
   2408 
   2409             s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
   2410             s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
   2411             s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
   2412 
   2413             s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
   2414 
   2415             s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
   2416 
   2417             /* If last ctb in the horizontal row */
   2418             if(ctb_ctr == (num_ctbs_horz_pic - 1))
   2419             {
   2420                 last_col_pic = 1;
   2421             }
   2422             else
   2423             {
   2424                 last_col_pic = 0;
   2425             }
   2426 
   2427             /* If last ctb in the tile row */
   2428             if(ctb_ctr == (ctb_end - 1))
   2429             {
   2430                 last_col_tile = 1;
   2431             }
   2432             else
   2433             {
   2434                 last_col_tile = 0;
   2435             }
   2436 
   2437             if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
   2438             {
   2439                 /* Wait till top neighbour CTB has done it's deblocking*/
   2440                 if(ctb_ctr < (ctb_end)-1)
   2441                 {
   2442                     ihevce_dmgr_chk_row_row_sync(
   2443                         pv_dep_mngr_enc_loop_dblk,
   2444                         ctb_ctr,
   2445                         dblk_offset,
   2446                         dblk_check_dep_pos,
   2447                         ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
   2448                         ps_ctxt->thrd_id);
   2449                 }
   2450 
   2451                 if((0 == ps_ctxt->i4_deblock_type))
   2452                 {
   2453                     /* Populate Qp-map */
   2454                     if(ctb_start == ctb_ctr)
   2455                     {
   2456                         ihevce_deblk_populate_qp_map(
   2457                             ps_ctxt,
   2458                             &s_deblk_ctb_row_params,
   2459                             ps_ctb_out_dblk,
   2460                             vert_ctr,
   2461                             ps_frm_ctb_prms,
   2462                             ps_tile_params);
   2463                     }
   2464                     ps_ctxt->s_deblk_prms.i4_ctb_size = ctb_size;
   2465 
   2466                     /* recon pointers and stride */
   2467                     ps_ctxt->s_deblk_prms.pu1_ctb_y = s_cu_prms.pu1_luma_recon;
   2468                     ps_ctxt->s_deblk_prms.pu1_ctb_uv = s_cu_prms.pu1_chrm_recon;
   2469                     ps_ctxt->s_deblk_prms.i4_luma_pic_stride = s_cu_prms.i4_luma_recon_stride;
   2470                     ps_ctxt->s_deblk_prms.i4_chroma_pic_stride = s_cu_prms.i4_chrm_recon_stride;
   2471 
   2472                     ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge = (0 == vert_ctr) ? 0 : 1;
   2473                     {
   2474                         ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge =
   2475                             (ps_tile_params->i4_first_ctb_y == vert_ctr) ? 0 : 1;
   2476                     }
   2477                     ps_ctxt->s_deblk_prms.i4_deblock_left_ctb_edge = (ctb_start == ctb_ctr) ? 0 : 1;
   2478                     //or according to slice boundary. Support yet to be added !!!!
   2479 
   2480                     ihevce_deblk_ctb(
   2481                         &ps_ctxt->s_deblk_prms, last_col_tile, &s_deblk_ctb_row_params);
   2482 
   2483                     //Increment for storing next CTB info
   2484                     s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
   2485                         (ctb_size >> 3);  //one vertical edge per 8x8 block
   2486                     s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
   2487                         (ctb_size >> 3);  //one horizontal edge per 8x8 block
   2488                     s_deblk_ctb_row_params.pi1_ctb_row_qp +=
   2489                         (ctb_size >> 2);  //one qp per 4x4 block.
   2490 
   2491                 }  //end of if((0 == ps_ctxt->i4_deblock_type)
   2492             }  // end of if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
   2493 
   2494             /* Apply SAO over the previous CTB-row */
   2495             if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
   2496                ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
   2497             {
   2498                 sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
   2499 
   2500                 if((vert_ctr > ps_tile_params->i4_first_ctb_y) &&
   2501                    (ctb_ctr > ctb_start))  //if((vert_ctr > 0) && (ctb_ctr > 0))
   2502                 {
   2503                     /* Call the sao function to do sao for the current ctb*/
   2504 
   2505                     /* Register the curr ctb's x pos in sao context*/
   2506                     ps_sao_ctxt->i4_ctb_x = ctb_ctr - 1;
   2507 
   2508                     /* Register the curr ctb's y pos in sao context*/
   2509                     ps_sao_ctxt->i4_ctb_y = vert_ctr - 1;
   2510 
   2511                     ps_ctb_out_sao = ps_sao_ctxt->ps_ctb_out +
   2512                                      (vert_ctr - 1) * ps_frm_ctb_prms->i4_num_ctbs_horz +
   2513                                      (ctb_ctr - 1);
   2514                     ps_sao_ctxt->ps_sao = &ps_ctb_out_sao->s_sao;
   2515                     ps_sao_ctxt->i4_sao_blk_wd = ctb_size;
   2516                     ps_sao_ctxt->i4_sao_blk_ht = ctb_size;
   2517 
   2518                     ps_sao_ctxt->i4_is_last_ctb_row = 0;
   2519                     ps_sao_ctxt->i4_is_last_ctb_col = 0;
   2520 
   2521                     /* Calculate the recon buf pointer and stride for teh current ctb */
   2522                     ps_sao_ctxt->pu1_cur_luma_recon_buf =
   2523                         ps_sao_ctxt->pu1_frm_luma_recon_buf +
   2524                         (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
   2525                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
   2526 
   2527                     ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
   2528 
   2529                     ps_sao_ctxt->pu1_cur_chroma_recon_buf =
   2530                         ps_sao_ctxt->pu1_frm_chroma_recon_buf +
   2531                         (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
   2532                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
   2533                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
   2534 
   2535                     ps_sao_ctxt->i4_cur_chroma_recon_stride =
   2536                         ps_sao_ctxt->i4_frm_chroma_recon_stride;
   2537 
   2538                     ps_sao_ctxt->pu1_cur_luma_src_buf =
   2539                         ps_sao_ctxt->pu1_frm_luma_src_buf +
   2540                         (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
   2541                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
   2542 
   2543                     ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
   2544 
   2545                     ps_sao_ctxt->pu1_cur_chroma_src_buf =
   2546                         ps_sao_ctxt->pu1_frm_chroma_src_buf +
   2547                         (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
   2548                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
   2549                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
   2550 
   2551                     ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
   2552 
   2553                     /* Calculate the pointer to buff to store the (x,y)th sao
   2554                     * for the top merge of (x,y+1)th ctb
   2555                     */
   2556                     ps_sao_ctxt->ps_top_ctb_sao =
   2557                         &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
   2558                                                          [ps_sao_ctxt->i4_ctb_x +
   2559                                                           (ps_sao_ctxt->i4_ctb_y) *
   2560                                                               ps_frm_ctb_prms->i4_num_ctbs_horz +
   2561                                                           (ps_ctxt->i4_bitrate_instance_num *
   2562                                                            ps_sao_ctxt->i4_num_ctb_units)];
   2563 
   2564                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
   2565                     ps_sao_ctxt->pu1_curr_sao_src_top_luma =
   2566                         ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
   2567                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
   2568                         ps_sao_ctxt->i4_ctb_x * ctb_size +
   2569                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
   2570                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
   2571 
   2572                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
   2573                     ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
   2574                         ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
   2575                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
   2576                         ps_sao_ctxt->i4_ctb_x * ctb_size +
   2577                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
   2578                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
   2579 
   2580                     {
   2581                         UWORD32 u4_ctb_sao_bits;
   2582 
   2583                         ihevce_sao_analyse(
   2584                             &ps_ctxt->s_sao_ctxt_t,
   2585                             ps_ctb_out_sao,
   2586                             &u4_ctb_sao_bits,
   2587                             ps_tile_params);
   2588                         ps_ctxt
   2589                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
   2590                                                      [ps_ctxt->i4_bitrate_instance_num]
   2591                             ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
   2592                         ps_ctxt
   2593                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
   2594                                                      [ps_ctxt->i4_bitrate_instance_num]
   2595                             ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
   2596                     }
   2597                     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic &
   2598                        0x1) /** Subpel generation not done for non-ref picture **/
   2599                     {
   2600                         /* Padding and Subpel Plane Generation */
   2601                         ihevce_pad_interp_recon_ctb(
   2602                             ps_pad_interp_recon,
   2603                             ctb_ctr - 1,
   2604                             vert_ctr - 1,
   2605                             ps_ctxt->i4_quality_preset,
   2606                             ps_frm_ctb_prms,
   2607                             ps_ctxt->ai2_scratch,
   2608                             ps_ctxt->i4_bitrate_instance_num,
   2609                             ps_ctxt->ps_func_selector);
   2610                     }
   2611                 }
   2612 
   2613                 /* Call the sao function again for the last ctb of the previous row*/
   2614                 if(((ctb_ctr + 1) == (ctb_end)) &&
   2615                    (vert_ctr >
   2616                     ps_tile_params
   2617                         ->i4_first_ctb_y))  //( ((ctb_ctr+1) == ps_frm_ctb_prms->i4_num_ctbs_horz) && (vert_ctr > 0) )
   2618                 {
   2619                     /* Register the curr ctb's x pos in sao context*/
   2620                     ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr;
   2621 
   2622                     /* Register the curr ctb's y pos in sao context*/
   2623                     ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr - 1;
   2624 
   2625                     ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
   2626                                      (vert_ctr - 1) * ps_frm_ctb_prms->i4_num_ctbs_horz + (ctb_ctr);
   2627 
   2628                     ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
   2629 
   2630                     ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd =
   2631                         ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
   2632                                     ps_tile_params->i4_curr_tile_width);
   2633 
   2634                     ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht = ps_ctxt->s_sao_ctxt_t.i4_ctb_size;
   2635 
   2636                     ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 0;
   2637                     ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 1;
   2638 
   2639                     /* Calculate the recon buf pointer and stride for teh current ctb */
   2640                     ps_sao_ctxt->pu1_cur_luma_recon_buf =
   2641                         ps_sao_ctxt->pu1_frm_luma_recon_buf +
   2642                         (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
   2643                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
   2644 
   2645                     ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
   2646 
   2647                     ps_sao_ctxt->pu1_cur_chroma_recon_buf =
   2648                         ps_sao_ctxt->pu1_frm_chroma_recon_buf +
   2649                         (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
   2650                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
   2651                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
   2652 
   2653                     ps_sao_ctxt->i4_cur_chroma_recon_stride =
   2654                         ps_sao_ctxt->i4_frm_chroma_recon_stride;
   2655 
   2656                     ps_sao_ctxt->pu1_cur_luma_src_buf =
   2657                         ps_sao_ctxt->pu1_frm_luma_src_buf +
   2658                         (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
   2659                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
   2660 
   2661                     ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
   2662 
   2663                     ps_sao_ctxt->pu1_cur_chroma_src_buf =
   2664                         ps_sao_ctxt->pu1_frm_chroma_src_buf +
   2665                         (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
   2666                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
   2667                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
   2668 
   2669                     ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
   2670 
   2671                     /* Calculate the pointer to buff to store the (x,y)th sao
   2672                     * for the top merge of (x,y+1)th ctb
   2673                     */
   2674                     ps_sao_ctxt->ps_top_ctb_sao =
   2675                         &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
   2676                                                          [ps_sao_ctxt->i4_ctb_x +
   2677                                                           (ps_sao_ctxt->i4_ctb_y) *
   2678                                                               ps_frm_ctb_prms->i4_num_ctbs_horz +
   2679                                                           (ps_ctxt->i4_bitrate_instance_num *
   2680                                                            ps_sao_ctxt->i4_num_ctb_units)];
   2681 
   2682                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
   2683                     ps_sao_ctxt->pu1_curr_sao_src_top_luma =
   2684                         ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
   2685                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
   2686                         ps_sao_ctxt->i4_ctb_x * ctb_size +
   2687                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
   2688                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
   2689 
   2690                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
   2691                     ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
   2692                         ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
   2693                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
   2694                         ps_sao_ctxt->i4_ctb_x * ctb_size +
   2695                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
   2696                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
   2697 
   2698                     {
   2699                         UWORD32 u4_ctb_sao_bits;
   2700 
   2701                         ihevce_sao_analyse(
   2702                             &ps_ctxt->s_sao_ctxt_t,
   2703                             ps_ctb_out_sao,
   2704                             &u4_ctb_sao_bits,
   2705                             ps_tile_params);
   2706                         ps_ctxt
   2707                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
   2708                                                      [ps_ctxt->i4_bitrate_instance_num]
   2709                             ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
   2710                         ps_ctxt
   2711                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
   2712                                                      [ps_ctxt->i4_bitrate_instance_num]
   2713                             ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
   2714                     }
   2715                     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic &
   2716                        0x1) /** Subpel generation not done for non-ref picture **/
   2717                     {
   2718                         /* Padding and Subpel Plane Generation */
   2719                         ihevce_pad_interp_recon_ctb(
   2720                             ps_pad_interp_recon,
   2721                             ctb_ctr,
   2722                             vert_ctr - 1,
   2723                             ps_ctxt->i4_quality_preset,
   2724                             ps_frm_ctb_prms,
   2725                             ps_ctxt->ai2_scratch,
   2726                             ps_ctxt->i4_bitrate_instance_num,
   2727                             ps_ctxt->ps_func_selector);
   2728                     }
   2729                 }
   2730             }
   2731             else  //SAO Disabled
   2732             {
   2733                 if(1 == ps_ctxt->i4_deblk_pad_hpel_cur_pic)
   2734                 {
   2735                     /* Padding and Subpel Plane Generation */
   2736                     ihevce_pad_interp_recon_ctb(
   2737                         ps_pad_interp_recon,
   2738                         ctb_ctr,
   2739                         vert_ctr,
   2740                         ps_ctxt->i4_quality_preset,
   2741                         ps_frm_ctb_prms,
   2742                         ps_ctxt->ai2_scratch,
   2743                         ps_ctxt->i4_bitrate_instance_num,
   2744                         ps_ctxt->ps_func_selector);
   2745                 }
   2746             }
   2747 
   2748             /* update the number of ctbs deblocked for this row */
   2749             ihevce_dmgr_set_row_row_sync(
   2750                 pv_dep_mngr_enc_loop_dblk,
   2751                 (ctb_ctr + 1),
   2752                 vert_ctr,
   2753                 ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
   2754         }  //end of loop over CTBs in current CTB-row
   2755         {
   2756             if(!ps_ctxt->i4_bitrate_instance_num)
   2757             {
   2758                 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
   2759                    ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
   2760                 {
   2761                     /* If SAO is on, then signal completion of previous CTB row */
   2762                     if(0 != vert_ctr)
   2763                     {
   2764                         {
   2765                             WORD32 post_ctb_ctr;
   2766 
   2767                             for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
   2768                             {
   2769                                 ihevce_dmgr_map_set_sync(
   2770                                     pv_dep_mngr_me_dep_encloop,
   2771                                     post_ctb_ctr,
   2772                                     (vert_ctr - 1),
   2773                                     MAP_CTB_COMPLETE);
   2774                             }
   2775                         }
   2776                     }
   2777                 }
   2778                 else
   2779                 {
   2780                     {
   2781                         WORD32 post_ctb_ctr;
   2782 
   2783                         for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
   2784                         {
   2785                             ihevce_dmgr_map_set_sync(
   2786                                 pv_dep_mngr_me_dep_encloop,
   2787                                 post_ctb_ctr,
   2788                                 vert_ctr,
   2789                                 MAP_CTB_COMPLETE);
   2790                         }
   2791                     }
   2792                 }
   2793             }
   2794         }
   2795 
   2796         /* Call the sao function again for the last ctb row of frame */
   2797         if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
   2798            ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
   2799         {
   2800             sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
   2801 
   2802             for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
   2803             {
   2804                 if((vert_ctr == (ps_tile_params->i4_first_ctb_y +
   2805                                  ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1)) &&
   2806                    (ctb_ctr >
   2807                     ctb_start))  //((vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1)) && (ctb_ctr > 0))
   2808                 {
   2809                     /* Register the curr ctb's x pos in sao context*/
   2810                     ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr - 1;
   2811 
   2812                     /* Register the curr ctb's y pos in sao context*/
   2813                     ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr;
   2814 
   2815                     ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
   2816                                      (vert_ctr)*ps_frm_ctb_prms->i4_num_ctbs_horz + (ctb_ctr - 1);
   2817 
   2818                     ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
   2819 
   2820                     ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd = ps_ctxt->s_sao_ctxt_t.i4_ctb_size;
   2821                     ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 0;
   2822 
   2823                     ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht =
   2824                         ctb_size - ((ps_tile_params->i4_curr_tile_ht_in_ctb_unit * ctb_size) -
   2825                                     ps_tile_params->i4_curr_tile_height);
   2826 
   2827                     ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 1;
   2828 
   2829                     /* Calculate the recon buf pointer and stride for teh current ctb */
   2830                     ps_sao_ctxt->pu1_cur_luma_recon_buf =
   2831                         ps_sao_ctxt->pu1_frm_luma_recon_buf +
   2832                         (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
   2833                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
   2834 
   2835                     ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
   2836 
   2837                     ps_sao_ctxt->pu1_cur_chroma_recon_buf =
   2838                         ps_sao_ctxt->pu1_frm_chroma_recon_buf +
   2839                         (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
   2840                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
   2841                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
   2842 
   2843                     ps_sao_ctxt->i4_cur_chroma_recon_stride =
   2844                         ps_sao_ctxt->i4_frm_chroma_recon_stride;
   2845 
   2846                     ps_sao_ctxt->pu1_cur_luma_src_buf =
   2847                         ps_sao_ctxt->pu1_frm_luma_src_buf +
   2848                         (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
   2849                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
   2850 
   2851                     ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
   2852 
   2853                     ps_sao_ctxt->pu1_cur_chroma_src_buf =
   2854                         ps_sao_ctxt->pu1_frm_chroma_src_buf +
   2855                         (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
   2856                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
   2857                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
   2858 
   2859                     ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
   2860 
   2861                     /* Calculate the pointer to buff to store the (x,y)th sao
   2862                     * for the top merge of (x,y+1)th ctb
   2863                     */
   2864                     ps_sao_ctxt->ps_top_ctb_sao =
   2865                         &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
   2866                                                          [ps_sao_ctxt->i4_ctb_x +
   2867                                                           (ps_sao_ctxt->i4_ctb_y) *
   2868                                                               ps_frm_ctb_prms->i4_num_ctbs_horz +
   2869                                                           (ps_ctxt->i4_bitrate_instance_num *
   2870                                                            ps_sao_ctxt->i4_num_ctb_units)];
   2871 
   2872                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
   2873                     ps_sao_ctxt->pu1_curr_sao_src_top_luma =
   2874                         ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
   2875                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
   2876                         ps_sao_ctxt->i4_ctb_x * ctb_size +
   2877                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
   2878                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
   2879 
   2880                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
   2881                     ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
   2882                         ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
   2883                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
   2884                         ps_sao_ctxt->i4_ctb_x * ctb_size +
   2885                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
   2886                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
   2887 
   2888                     {
   2889                         UWORD32 u4_ctb_sao_bits;
   2890                         ihevce_sao_analyse(
   2891                             &ps_ctxt->s_sao_ctxt_t,
   2892                             ps_ctb_out_sao,
   2893                             &u4_ctb_sao_bits,
   2894                             ps_tile_params);
   2895                         ps_ctxt
   2896                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
   2897                                                      [ps_ctxt->i4_bitrate_instance_num]
   2898                             ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
   2899                         ps_ctxt
   2900                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
   2901                                                      [ps_ctxt->i4_bitrate_instance_num]
   2902                             ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
   2903                     }
   2904                     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic &
   2905                        0x1) /** Subpel generation not done for non-ref picture **/
   2906                     {
   2907                         /* Padding and Subpel Plane Generation */
   2908                         ihevce_pad_interp_recon_ctb(
   2909                             ps_pad_interp_recon,
   2910                             ctb_ctr - 1,
   2911                             vert_ctr,
   2912                             ps_ctxt->i4_quality_preset,
   2913                             ps_frm_ctb_prms,
   2914                             ps_ctxt->ai2_scratch,
   2915                             ps_ctxt->i4_bitrate_instance_num,
   2916                             ps_ctxt->ps_func_selector);
   2917                     }
   2918                 }
   2919                 /* Call the sao function again for the last ctb of the last ctb row of frame */
   2920                 if((vert_ctr == (ps_tile_params->i4_first_ctb_y +
   2921                                  ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1)) &&
   2922                    ((ctb_ctr + 1) ==
   2923                     (ctb_end)))  //( ((ctb_ctr+1) == ps_frm_ctb_prms->i4_num_ctbs_horz))
   2924                 {
   2925                     /* Register the curr ctb's x pos in sao context*/
   2926                     ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr;
   2927 
   2928                     /* Register the curr ctb's y pos in sao context*/
   2929                     ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr;
   2930 
   2931                     ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
   2932                                      (vert_ctr)*ps_frm_ctb_prms->i4_num_ctbs_horz + (ctb_ctr);
   2933 
   2934                     ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
   2935 
   2936                     ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd =
   2937                         ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
   2938                                     ps_tile_params->i4_curr_tile_width);
   2939 
   2940                     ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht =
   2941                         ctb_size - ((ps_tile_params->i4_curr_tile_ht_in_ctb_unit * ctb_size) -
   2942                                     ps_tile_params->i4_curr_tile_height);
   2943 
   2944                     ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 1;
   2945                     ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 1;
   2946 
   2947                     /* Calculate the recon buf pointer and stride for teh current ctb */
   2948                     ps_sao_ctxt->pu1_cur_luma_recon_buf =
   2949                         ps_sao_ctxt->pu1_frm_luma_recon_buf +
   2950                         (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
   2951                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
   2952 
   2953                     ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
   2954 
   2955                     ps_sao_ctxt->pu1_cur_chroma_recon_buf =
   2956                         ps_sao_ctxt->pu1_frm_chroma_recon_buf +
   2957                         (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
   2958                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
   2959                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
   2960 
   2961                     ps_sao_ctxt->i4_cur_chroma_recon_stride =
   2962                         ps_sao_ctxt->i4_frm_chroma_recon_stride;
   2963 
   2964                     ps_sao_ctxt->pu1_cur_luma_src_buf =
   2965                         ps_sao_ctxt->pu1_frm_luma_src_buf +
   2966                         (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
   2967                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
   2968 
   2969                     ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
   2970 
   2971                     ps_sao_ctxt->pu1_cur_chroma_src_buf =
   2972                         ps_sao_ctxt->pu1_frm_chroma_src_buf +
   2973                         (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
   2974                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
   2975                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
   2976 
   2977                     ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
   2978 
   2979                     /* Calculate the pointer to buff to store the (x,y)th sao
   2980                     * for the top merge of (x,y+1)th ctb
   2981                     */
   2982                     ps_sao_ctxt->ps_top_ctb_sao =
   2983                         &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
   2984                                                          [ps_sao_ctxt->i4_ctb_x +
   2985                                                           ps_sao_ctxt->i4_ctb_y *
   2986                                                               ps_frm_ctb_prms->i4_num_ctbs_horz +
   2987                                                           (ps_ctxt->i4_bitrate_instance_num *
   2988                                                            ps_sao_ctxt->i4_num_ctb_units)];
   2989 
   2990                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
   2991                     ps_sao_ctxt->pu1_curr_sao_src_top_luma =
   2992                         ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
   2993                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
   2994                         ps_sao_ctxt->i4_ctb_x * ctb_size +
   2995                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
   2996                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
   2997 
   2998                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
   2999                     ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
   3000                         ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
   3001                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
   3002                         ps_sao_ctxt->i4_ctb_x * ctb_size +
   3003                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
   3004                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
   3005 
   3006                     {
   3007                         UWORD32 u4_ctb_sao_bits;
   3008 
   3009                         ihevce_sao_analyse(
   3010                             &ps_ctxt->s_sao_ctxt_t,
   3011                             ps_ctb_out_sao,
   3012                             &u4_ctb_sao_bits,
   3013                             ps_tile_params);
   3014                         ps_ctxt
   3015                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
   3016                                                      [ps_ctxt->i4_bitrate_instance_num]
   3017                             ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
   3018                         ps_ctxt
   3019                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
   3020                                                      [ps_ctxt->i4_bitrate_instance_num]
   3021                             ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
   3022                     }
   3023                     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic &
   3024                        0x1) /** Subpel generation not done for non-ref picture **/
   3025                     {
   3026                         /* Padding and Subpel Plane Generation */
   3027                         ihevce_pad_interp_recon_ctb(
   3028                             ps_pad_interp_recon,
   3029                             ctb_ctr,
   3030                             vert_ctr,
   3031                             ps_ctxt->i4_quality_preset,
   3032                             ps_frm_ctb_prms,
   3033                             ps_ctxt->ai2_scratch,
   3034                             ps_ctxt->i4_bitrate_instance_num,
   3035                             ps_ctxt->ps_func_selector);
   3036                     }
   3037                 }
   3038             }  //end of loop over CTBs in current CTB-row
   3039 
   3040             /* If SAO is on, then signal completion of the last CTB row of frame */
   3041             {
   3042                 if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1))
   3043                 {
   3044                     if(!ps_ctxt->i4_bitrate_instance_num)
   3045                     {
   3046                         {
   3047                             WORD32 post_ctb_ctr;
   3048 
   3049                             for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
   3050                             {
   3051                                 ihevce_dmgr_map_set_sync(
   3052                                     pv_dep_mngr_me_dep_encloop,
   3053                                     post_ctb_ctr,
   3054                                     vert_ctr,
   3055                                     MAP_CTB_COMPLETE);
   3056                             }
   3057                         }
   3058                     }
   3059                 }
   3060             }
   3061         }
   3062     }
   3063 
   3064     return;
   3065 }
   3066 
   3067 /*!
   3068 ******************************************************************************
   3069 * \if Function name : ihevce_enc_loop_pass \endif
   3070 *
   3071 * \brief
   3072 *    Frame level enc_loop pass function
   3073 *
   3074 * \param[in] pv_ctxt : pointer to enc_loop module
   3075 * \param[in] ps_frm_lamda : Frame level Lambda params
   3076 * \param[in] ps_inp  : pointer to input yuv buffer (frame buffer)
   3077 * \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (frame buffer)
   3078 * \param[out] ps_frm_recon : pointer recon picture structure pointer (frame buffer)
   3079 * \param[out] ps_ctb_out : pointer CTB output structure (frame buffer)
   3080 * \param[out] ps_cu_out : pointer CU output structure (frame buffer)
   3081 * \param[out] ps_tu_out : pointer TU output structure (frame buffer)
   3082 * \param[out] pi2_frm_coeffs : pointer coeff output frame buffer)
   3083 *
   3084 * \return
   3085 *    None
   3086 *
   3087 * Note : Currently the frame level calcualtions done assumes that
   3088 *        framewidth of the input /recon are excat multiple of ctbsize
   3089 *
   3090 * \author
   3091 *  Ittiam
   3092 *
   3093 *****************************************************************************
   3094 */
   3095 void ihevce_enc_loop_process(
   3096     void *pv_ctxt,
   3097     ihevce_lap_enc_buf_t *ps_curr_inp,
   3098     ctb_analyse_t *ps_ctb_in,
   3099     ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse,
   3100     recon_pic_buf_t *ps_frm_recon,
   3101     cur_ctb_cu_tree_t *ps_cu_tree_out,
   3102     ctb_enc_loop_out_t *ps_ctb_out,
   3103     cu_enc_loop_out_t *ps_cu_out,
   3104     tu_enc_loop_out_t *ps_tu_out,
   3105     pu_t *ps_pu_out,
   3106     UWORD8 *pu1_frm_ecd_data,
   3107     frm_ctb_ctxt_t *ps_frm_ctb_prms,
   3108     frm_lambda_ctxt_t *ps_frm_lamda,
   3109     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
   3110     WORD32 thrd_id,
   3111     WORD32 i4_enc_frm_id,
   3112     WORD32 i4_pass)
   3113 {
   3114     WORD32 vert_ctr;
   3115     WORD32 tile_col_idx;
   3116     iv_enc_yuv_buf_t s_curr_src_bufs;
   3117     iv_enc_yuv_buf_t s_curr_recon_bufs;
   3118     iv_enc_yuv_buf_src_t s_curr_recon_bufs_src;
   3119     UWORD32 *pu4_pu_offsets;
   3120     WORD32 end_of_frame;
   3121     UWORD8 *apu1_y_sub_pel_planes[3];
   3122     pad_interp_recon_frm_t s_pad_interp_recon;
   3123     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_ctxt;
   3124 
   3125     ihevce_enc_loop_ctxt_t *ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[thrd_id];
   3126 
   3127     WORD32 i4_bitrate_instance_num = ps_ctxt->i4_bitrate_instance_num;
   3128 
   3129     /* initialize the closed loop lambda for the current frame */
   3130     ps_ctxt->i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf;
   3131     ps_ctxt->i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf;
   3132     ps_ctxt->u4_chroma_cost_weighing_factor = ps_frm_lamda->u4_chroma_cost_weighing_factor;
   3133     ps_ctxt->i4_satd_lamda = ps_frm_lamda->i4_cl_satd_lambda_qf;
   3134     ps_ctxt->i4_sad_lamda = ps_frm_lamda->i4_cl_sad_type2_lambda_qf;
   3135     ps_ctxt->thrd_id = thrd_id;
   3136     ps_ctxt->u1_is_refPic = ps_curr_inp->s_lap_out.i4_is_ref_pic;
   3137 
   3138 #if DISABLE_SAO_WHEN_NOISY
   3139     ps_ctxt->s_sao_ctxt_t.ps_ctb_data = ps_ctb_in;
   3140     ps_ctxt->s_sao_ctxt_t.i4_ctb_data_stride = ps_frm_ctb_prms->i4_num_ctbs_horz;
   3141 #endif
   3142 
   3143 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
   3144     ps_ctxt->pv_err_func_selector = ps_func_selector;
   3145 #endif
   3146 
   3147     /*Bit0 -  of this Flag indicates whether current pictute needs to be deblocked,
   3148     padded and hpel planes need to be generated.
   3149     Bit1 - of this flag set to 1 if sao is enabled. This is to enable deblocking when sao is enabled*/
   3150     ps_ctxt->i4_deblk_pad_hpel_cur_pic =
   3151         (ps_frm_recon->i4_deblk_pad_hpel_cur_pic) ||
   3152         ((ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
   3153           ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
   3154          << 1);
   3155 
   3156     /* Share all reference pictures with nbr clients. This flag will be used only
   3157     in case of dist-enc mode */
   3158     ps_ctxt->i4_share_flag = (ps_frm_recon->i4_is_reference != 0);
   3159     ps_ctxt->pv_frm_recon = (void *)ps_frm_recon;
   3160 
   3161     /* Register the frame level ssd lamda for both luma and chroma*/
   3162     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf;
   3163     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf;
   3164 
   3165     ihevce_populate_cl_cu_lambda_prms(
   3166         ps_ctxt,
   3167         ps_frm_lamda,
   3168         (WORD32)ps_ctxt->i1_slice_type,
   3169         ps_curr_inp->s_lap_out.i4_temporal_lyr_id,
   3170         ENC_LOOP_LAMBDA_TYPE);
   3171 
   3172     ps_ctxt->u1_disable_intra_eval = DISABLE_INTRA_IN_BPICS &&
   3173                                      (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) &&
   3174                                      (ps_ctxt->i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE);
   3175 
   3176     end_of_frame = 0;
   3177 
   3178     /* ----------------------------------------------------- */
   3179     /* store the stride and dimensions of source and recon   */
   3180     /* buffer pointers will be over written at every CTB row */
   3181     /* ----------------------------------------------------- */
   3182     memcpy(&s_curr_src_bufs, &ps_curr_inp->s_lap_out.s_input_buf, sizeof(iv_enc_yuv_buf_t));
   3183 
   3184     memcpy(&s_curr_recon_bufs, &ps_frm_recon->s_yuv_buf_desc, sizeof(iv_enc_yuv_buf_t));
   3185 
   3186     memcpy(&s_curr_recon_bufs_src, &ps_frm_recon->s_yuv_buf_desc_src, sizeof(iv_enc_yuv_buf_src_t));
   3187 
   3188     /* get the frame level pu offset pointer*/
   3189     pu4_pu_offsets = ps_frm_recon->pu4_pu_off;
   3190 
   3191     s_pad_interp_recon.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
   3192 
   3193     /* ------------ Loop over all the CTB rows --------------- */
   3194     while(0 == end_of_frame)
   3195     {
   3196         UWORD8 *pu1_tmp;
   3197         UWORD8 *pu1_row_pu_map;
   3198         UWORD8 *pu1_row_ecd_data;
   3199         ctb_analyse_t *ps_ctb_row_in;
   3200         ctb_enc_loop_out_t *ps_ctb_row_out;
   3201         cu_enc_loop_out_t *ps_row_cu;
   3202         tu_enc_loop_out_t *ps_row_tu;
   3203         pu_t *ps_row_pu;
   3204         pu_col_mv_t *ps_row_col_pu;
   3205         job_queue_t *ps_job;
   3206         UWORD32 *pu4_pu_row_offsets;
   3207         UWORD16 *pu2_num_pu_row;
   3208 
   3209         ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse;
   3210         cur_ctb_cu_tree_t *ps_row_cu_tree;
   3211         UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
   3212 
   3213         /* Get the current row from the job queue */
   3214         ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
   3215             ps_multi_thrd_ctxt, ENC_LOOP_JOB + i4_bitrate_instance_num, 1, i4_enc_frm_id);
   3216 
   3217         /* Register the pointer to ctb out of the current frame*/
   3218         ps_ctxt->s_sao_ctxt_t.ps_ctb_out = ps_ctb_out;
   3219 
   3220         /* If all rows are done, set the end of process flag to 1, */
   3221         /* and the current row to -1 */
   3222         if(NULL == ps_job)
   3223         {
   3224             vert_ctr = -1;
   3225             tile_col_idx = -1;
   3226             end_of_frame = 1;
   3227         }
   3228         else
   3229         {
   3230             ihevce_tile_params_t *ps_col_tile_params_temp;
   3231             ihevce_tile_params_t *ps_tile_params;
   3232             WORD32 i4_tile_id;
   3233 
   3234             ASSERT((ENC_LOOP_JOB + i4_bitrate_instance_num) == ps_job->i4_task_type);
   3235             /* set the output dependency */
   3236             ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_enc_frm_id);
   3237 
   3238             /* Obtain the current row's details from the job */
   3239             vert_ctr = ps_job->s_job_info.s_enc_loop_job_info.i4_ctb_row_no;
   3240             {
   3241                 /* Obtain the current colum tile index from the job */
   3242                 tile_col_idx = ps_job->s_job_info.s_enc_loop_job_info.i4_tile_col_idx;
   3243 
   3244                 /* The tile parameter for the col. idx. Use only the properties
   3245                 which is same for all the bottom tiles like width, start_x, etc.
   3246                 Don't use height, start_y, etc.                                  */
   3247                 ps_col_tile_params_temp =
   3248                     ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + tile_col_idx);
   3249 
   3250                 /* Derive actual tile_id based on vert_ctr */
   3251                 i4_tile_id =
   3252                     *(ps_frm_ctb_prms->pi4_tile_id_map +
   3253                       vert_ctr * ps_frm_ctb_prms->i4_tile_id_ctb_map_stride +
   3254                       ps_col_tile_params_temp->i4_first_ctb_x);
   3255                 /* Derive pointer to current tile prms */
   3256                 ps_tile_params =
   3257                     ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + i4_tile_id);
   3258             }
   3259 
   3260             ps_ctxt->i4_tile_col_idx = tile_col_idx;
   3261             /* derive the current ctb row pointers */
   3262 
   3263             /* luma src */
   3264             pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf +
   3265                       (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y *
   3266                        ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) +
   3267                       ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x;
   3268 
   3269             pu1_tmp +=
   3270                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size *
   3271                  ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd);
   3272 
   3273             s_curr_src_bufs.pv_y_buf = pu1_tmp;
   3274 
   3275             if(!ps_ctxt->u1_is_input_data_hbd)
   3276             {
   3277                 /* cb src */
   3278                 pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
   3279                 pu1_tmp +=
   3280                     (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) *
   3281                      ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd);
   3282 
   3283                 s_curr_src_bufs.pv_u_buf = pu1_tmp;
   3284             }
   3285 
   3286             /* luma recon */
   3287             pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
   3288             pu1_tmp +=
   3289                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
   3290 
   3291             s_curr_recon_bufs.pv_y_buf = pu1_tmp;
   3292             s_pad_interp_recon.pu1_luma_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
   3293             s_pad_interp_recon.i4_luma_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_y_strd;
   3294             if(!ps_ctxt->u1_is_input_data_hbd)
   3295             {
   3296                 /* cb recon */
   3297                 pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
   3298                 pu1_tmp +=
   3299                     (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) *
   3300                      ps_frm_recon->s_yuv_buf_desc.i4_uv_strd);
   3301 
   3302                 s_curr_recon_bufs.pv_u_buf = pu1_tmp;
   3303                 s_pad_interp_recon.pu1_chrm_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
   3304                 s_pad_interp_recon.i4_chrm_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_uv_strd;
   3305 
   3306                 s_pad_interp_recon.i4_ctb_size = ps_frm_ctb_prms->i4_ctb_size;
   3307 
   3308                 /* Register the source buffer pointers in sao context*/
   3309                 ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_src_buf =
   3310                     (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf +
   3311                     (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y *
   3312                      ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) +
   3313                     ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x;
   3314 
   3315                 ps_ctxt->s_sao_ctxt_t.i4_frm_luma_src_stride =
   3316                     ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd;
   3317 
   3318                 ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_src_buf =
   3319                     (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
   3320 
   3321                 ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_src_stride =
   3322                     ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd;
   3323             }
   3324 
   3325             /* Subpel planes hxfy, fxhy, hxhy*/
   3326             pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[0];
   3327             pu1_tmp +=
   3328                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
   3329             apu1_y_sub_pel_planes[0] = pu1_tmp;
   3330             s_pad_interp_recon.pu1_sbpel_hxfy = ps_frm_recon->apu1_y_sub_pel_planes[0];
   3331 
   3332             pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[1];
   3333             pu1_tmp +=
   3334                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
   3335             apu1_y_sub_pel_planes[1] = pu1_tmp;
   3336             s_pad_interp_recon.pu1_sbpel_fxhy = ps_frm_recon->apu1_y_sub_pel_planes[1];
   3337 
   3338             pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[2];
   3339             pu1_tmp +=
   3340                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
   3341             apu1_y_sub_pel_planes[2] = pu1_tmp;
   3342             s_pad_interp_recon.pu1_sbpel_hxhy = ps_frm_recon->apu1_y_sub_pel_planes[2];
   3343 
   3344             /* row level coeffs buffer */
   3345             pu1_row_ecd_data =
   3346                 pu1_frm_ecd_data +
   3347                 (vert_ctr *
   3348                  ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_max_tus_in_row << 1)
   3349                                     : ((ps_frm_ctb_prms->i4_max_tus_in_row * 3) >> 1)) *
   3350                  MAX_SCAN_COEFFS_BYTES_4x4);
   3351 
   3352             /* Row level CU buffer */
   3353             ps_row_cu = ps_cu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_cus_in_row);
   3354 
   3355             /* Row level TU buffer */
   3356             ps_row_tu = ps_tu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_tus_in_row);
   3357 
   3358             /* Row level PU buffer */
   3359             ps_row_pu = ps_pu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row);
   3360 
   3361             /* Row level colocated PU buffer */
   3362             /* ps_frm_col_mv has (i4_num_ctbs_horz + 1) CTBs for stride */
   3363             ps_row_col_pu =
   3364                 ps_frm_recon->ps_frm_col_mv + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) *
   3365                                                ps_frm_ctb_prms->i4_num_pus_in_ctb);
   3366             /* Row level col PU map buffer */
   3367             /* pu1_frm_pu_map has (i4_num_ctbs_horz + 1) CTBs for stride */
   3368             pu1_row_pu_map =
   3369                 ps_frm_recon->pu1_frm_pu_map + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) *
   3370                                                 ps_frm_ctb_prms->i4_num_pus_in_ctb);
   3371             /* row ctb in pointer  */
   3372             ps_ctb_row_in = ps_ctb_in + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
   3373 
   3374             /* row ctb out pointer  */
   3375             ps_ctb_row_out = ps_ctb_out + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
   3376 
   3377             /* row number of PUs map pointer */
   3378             pu2_num_pu_row =
   3379                 ps_frm_recon->pu2_num_pu_map + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
   3380 
   3381             /* row pu offsets pointer  */
   3382             pu4_pu_row_offsets = pu4_pu_offsets + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
   3383             /* store the first CTB pu offset pointer */
   3384             *pu4_pu_row_offsets = vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row;
   3385             /* Initialize ptr to current IPE row */
   3386             ps_row_ipe_analyse = ps_ipe_analyse + (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz);
   3387 
   3388             /* Initialize ptr to current row */
   3389             ps_row_cu_tree = ps_cu_tree_out +
   3390                              (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE);
   3391 
   3392             /* Get the EncLoop Top-Right CU Dep Mngr */
   3393             ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right =
   3394                 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[ps_ctxt->i4_enc_frm_id]
   3395                                                                    [i4_bitrate_instance_num];
   3396             /* Get the EncLoop Deblock Dep Mngr */
   3397             ps_ctxt->pv_dep_mngr_enc_loop_dblk =
   3398                 ps_master_ctxt
   3399                     ->aapv_dep_mngr_enc_loop_dblk[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num];
   3400 
   3401             ps_ctxt->pu1_curr_row_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr][0];
   3402 
   3403             {
   3404                 /* derive the pointers of top row buffers */
   3405                 ps_ctxt->pv_top_row_luma =
   3406                     (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] +
   3407                     (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) +
   3408                     (vert_ctr - 1) * ps_ctxt->i4_top_row_luma_stride;
   3409 
   3410                 ps_ctxt->pv_top_row_chroma =
   3411                     (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] +
   3412                     (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) +
   3413                     (vert_ctr - 1) * ps_ctxt->i4_top_row_chroma_stride;
   3414 
   3415                 /* derive the pointers of bottom row buffers to update current row data */
   3416                 ps_ctxt->pv_bot_row_luma =
   3417                     (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] +
   3418                     (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) +
   3419                     (vert_ctr)*ps_ctxt->i4_top_row_luma_stride;
   3420 
   3421                 ps_ctxt->pv_bot_row_chroma =
   3422                     (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] +
   3423                     (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) +
   3424                     (vert_ctr)*ps_ctxt->i4_top_row_chroma_stride;
   3425 
   3426                 /* Register the buffer pointers in sao context*/
   3427                 ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_recon_buf =
   3428                     (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
   3429                 ps_ctxt->s_sao_ctxt_t.i4_frm_luma_recon_stride =
   3430                     ps_frm_recon->s_yuv_buf_desc.i4_y_strd;
   3431 
   3432                 ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_recon_buf =
   3433                     (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
   3434                 ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_recon_stride =
   3435                     ps_frm_recon->s_yuv_buf_desc.i4_uv_strd;
   3436 
   3437                 ps_ctxt->s_sao_ctxt_t.ps_rdopt_entropy_ctxt = &ps_ctxt->s_rdopt_entropy_ctxt;
   3438 
   3439                 ps_ctxt->s_sao_ctxt_t.i4_frm_top_luma_buf_stride =
   3440                     ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 1;
   3441 
   3442                 ps_ctxt->s_sao_ctxt_t.i4_frm_top_chroma_buf_stride =
   3443                     ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 2;
   3444             }
   3445 
   3446             ps_ctxt->ps_top_row_nbr =
   3447                 ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] +
   3448                 (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) +
   3449                 (vert_ctr - 1) * ps_ctxt->i4_top_row_nbr_stride;
   3450 
   3451             ps_ctxt->ps_bot_row_nbr =
   3452                 ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] +
   3453                 (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) +
   3454                 (vert_ctr)*ps_ctxt->i4_top_row_nbr_stride;
   3455 
   3456             if(vert_ctr > 0)
   3457             {
   3458                 ps_ctxt->pu1_top_rt_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr - 1][0];
   3459             }
   3460             else
   3461             {
   3462                 ps_ctxt->pu1_top_rt_cabac_state = NULL;
   3463             }
   3464 
   3465             ASSERT(
   3466                 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0]
   3467                     .ps_pps->i1_sign_data_hiding_flag ==
   3468                 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1]
   3469                     .ps_pps->i1_sign_data_hiding_flag);
   3470 
   3471             /* call the row level processing function */
   3472             ihevce_enc_loop_process_row(
   3473                 ps_ctxt,
   3474                 &s_curr_src_bufs,
   3475                 &s_curr_recon_bufs,
   3476                 &s_curr_recon_bufs_src,
   3477                 &apu1_y_sub_pel_planes[0],
   3478                 ps_ctb_row_in,
   3479                 ps_ctb_row_out,
   3480                 ps_row_ipe_analyse,
   3481                 ps_row_cu_tree,
   3482                 ps_row_cu,
   3483                 ps_row_tu,
   3484                 ps_row_pu,
   3485                 ps_row_col_pu,
   3486                 pu2_num_pu_row,
   3487                 pu1_row_pu_map,
   3488                 pu1_row_ecd_data,
   3489                 pu4_pu_row_offsets,
   3490                 ps_frm_ctb_prms,
   3491                 vert_ctr,
   3492                 ps_frm_recon,
   3493                 ps_ctxt->pv_dep_mngr_encloop_dep_me,
   3494                 &s_pad_interp_recon,
   3495                 i4_pass,
   3496                 ps_multi_thrd_ctxt,
   3497                 ps_tile_params);
   3498         }
   3499     }
   3500 }
   3501 
   3502 /*!
   3503 ******************************************************************************
   3504 * \if Function name : ihevce_enc_loop_dblk_get_prms_dep_mngr \endif
   3505 *
   3506 * \brief Returns to the caller key attributes relevant for dependency manager,
   3507 *        ie, the number of vertical units in l0 layer
   3508 *
   3509 * \par Description:
   3510 *
   3511 * \param[in] pai4_ht    : ht
   3512 * \param[out] pi4_num_vert_units_in_lyr : Pointer to store num vertical units
   3513 *                                         for deblocking
   3514 *
   3515 * \return
   3516 *    None
   3517 *
   3518 * \author
   3519 *  Ittiam
   3520 *
   3521 *****************************************************************************
   3522 */
   3523 void ihevce_enc_loop_dblk_get_prms_dep_mngr(WORD32 i4_ht, WORD32 *pi4_num_vert_units_in_lyr)
   3524 {
   3525     /* Blk ht at a given layer*/
   3526     WORD32 unit_ht_c;
   3527     WORD32 ctb_size = 64;
   3528 
   3529     /* compute blk ht and unit ht */
   3530     unit_ht_c = ctb_size;
   3531 
   3532     /* set the numebr of vertical units */
   3533     *pi4_num_vert_units_in_lyr = (i4_ht + unit_ht_c - 1) / unit_ht_c;
   3534 }
   3535 
   3536 /*!
   3537 ******************************************************************************
   3538 * \if Function name : ihevce_enc_loop_get_num_mem_recs \endif
   3539 *
   3540 * \brief
   3541 *    Number of memory records are returned for enc_loop module
   3542 * Note : Include TOT MEM. req. for ENC.LOOP + TOT MEM. req. for Dep Mngr for Dblk
   3543 *
   3544 * \return
   3545 *    None
   3546 *
   3547 * \author
   3548 *  Ittiam
   3549 *
   3550 *****************************************************************************
   3551 */
   3552 WORD32
   3553     ihevce_enc_loop_get_num_mem_recs(WORD32 i4_num_bitrate_inst, WORD32 i4_num_enc_loop_frm_pllel)
   3554 {
   3555     WORD32 enc_loop_mem_recs = NUM_ENC_LOOP_MEM_RECS;
   3556     WORD32 enc_loop_dblk_dep_mngr_mem_recs =
   3557         i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
   3558     WORD32 enc_loop_cu_top_right_dep_mngr_mem_recs =
   3559         i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
   3560     WORD32 enc_loop_aux_br_dep_mngr_mem_recs =
   3561         i4_num_enc_loop_frm_pllel * (i4_num_bitrate_inst - 1) * ihevce_dmgr_get_num_mem_recs();
   3562 
   3563     return (
   3564         (enc_loop_mem_recs + enc_loop_dblk_dep_mngr_mem_recs +
   3565          enc_loop_cu_top_right_dep_mngr_mem_recs + enc_loop_aux_br_dep_mngr_mem_recs));
   3566 }
   3567 /*!
   3568 ******************************************************************************
   3569 * \if Function name : ihevce_enc_loop_get_mem_recs \endif
   3570 *
   3571 * \brief
   3572 *    Memory requirements are returned for ENC_LOOP.
   3573 *
   3574 * \param[in,out]  ps_mem_tab : pointer to memory descriptors table
   3575 * \param[in] ps_init_prms : Create time static parameters
   3576 * \param[in] i4_num_proc_thrds : Number of processing threads for this module
   3577 * \param[in] i4_mem_space : memspace in whihc memory request should be done
   3578 *
   3579 * \return
   3580 *    None
   3581 *
   3582 * \author
   3583 *  Ittiam
   3584 *
   3585 *****************************************************************************
   3586 */
   3587 WORD32 ihevce_enc_loop_get_mem_recs(
   3588     iv_mem_rec_t *ps_mem_tab,
   3589     ihevce_static_cfg_params_t *ps_init_prms,
   3590     WORD32 i4_num_proc_thrds,
   3591     WORD32 i4_num_bitrate_inst,
   3592     WORD32 i4_num_enc_loop_frm_pllel,
   3593     WORD32 i4_mem_space,
   3594     WORD32 i4_resolution_id)
   3595 {
   3596     UWORD32 u4_width, u4_height, n_tabs;
   3597     UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
   3598     WORD32 ctr;
   3599     WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format;
   3600 
   3601     /* derive frame dimensions */
   3602     /*width of the input YUV to be encoded */
   3603     u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
   3604     /*making the width a multiple of CTB size*/
   3605     u4_width += SET_CTB_ALIGN(
   3606         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
   3607 
   3608     /*height of the input YUV to be encoded */
   3609     u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
   3610     /*making the height a multiple of CTB size*/
   3611     u4_height += SET_CTB_ALIGN(
   3612         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE);
   3613     u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
   3614     u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE);
   3615     /* memories should be requested assuming worst case requirememnts */
   3616 
   3617     /* Module context structure */
   3618     ps_mem_tab[ENC_LOOP_CTXT].i4_mem_size = sizeof(ihevce_enc_loop_master_ctxt_t);
   3619 
   3620     ps_mem_tab[ENC_LOOP_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3621 
   3622     ps_mem_tab[ENC_LOOP_CTXT].i4_mem_alignment = 8;
   3623 
   3624     /* Thread context structure */
   3625     ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_size =
   3626         i4_num_proc_thrds * sizeof(ihevce_enc_loop_ctxt_t);
   3627 
   3628     ps_mem_tab[ENC_LOOP_THRDS_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3629 
   3630     ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_alignment = 16;
   3631 
   3632     /* Scale matrices */
   3633     ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16);
   3634 
   3635     ps_mem_tab[ENC_LOOP_SCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3636 
   3637     ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_alignment = 8;
   3638 
   3639     /* Rescale matrices */
   3640     ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16);
   3641 
   3642     ps_mem_tab[ENC_LOOP_RESCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3643 
   3644     ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_alignment = 8;
   3645 
   3646     /* top row luma one row of pixel data per CTB row */
   3647     if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
   3648     {
   3649         ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) *
   3650                                                     (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD16) *
   3651                                                     i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
   3652     }
   3653     else
   3654     {
   3655         ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) *
   3656                                                     (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD8) *
   3657                                                     i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
   3658     }
   3659 
   3660     ps_mem_tab[ENC_LOOP_TOP_LUMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3661 
   3662     ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_alignment = 8;
   3663 
   3664     /* top row chroma */
   3665     if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
   3666     {
   3667         ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size =
   3668             (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD16) *
   3669             i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
   3670     }
   3671     else
   3672     {
   3673         ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size =
   3674             (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD8) *
   3675             i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
   3676     }
   3677 
   3678     ps_mem_tab[ENC_LOOP_TOP_CHROMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3679 
   3680     ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_alignment = 8;
   3681 
   3682     /* top row neighbour 4x4 */
   3683     ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_size =
   3684         (u4_ctb_rows_in_a_frame + 1) * (((u4_width + MAX_CU_SIZE) >> 2) + 1) * sizeof(nbr_4x4_t) *
   3685         i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
   3686 
   3687     ps_mem_tab[ENC_LOOP_TOP_NBR4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3688 
   3689     ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_alignment = 8;
   3690 
   3691     /* memory to dump rate control parameters by each thread for each bit-rate instance */
   3692     /* RC params collated by each thread for each bit-rate instance separately */
   3693     ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_size = i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel *
   3694                                                  i4_num_proc_thrds * sizeof(enc_loop_rc_params_t);
   3695 
   3696     ps_mem_tab[ENC_LOOP_RC_PARAMS].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3697 
   3698     ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_alignment = 8;
   3699     /* Memory required for deblocking */
   3700     {
   3701         /* Memory to store Qp of top4x4 blocks for each CTB row.
   3702         This memory is allocated at frame level and shared across
   3703         all cores. The Qp values are needed to form Qp-map(described
   3704         in the ENC_LOOP_DEBLOCKING section below)*/
   3705 
   3706         UWORD32 u4_size_bs_memory, u4_size_qp_memory;
   3707         UWORD32 u4_size_top_4x4_qp_memory;
   3708 
   3709         /*Memory required to store Qp of top4x4 blocks for a CTB row for entire frame*/
   3710         /*Space required per CTB*/
   3711         u4_size_top_4x4_qp_memory = (MAX_CTB_SIZE / 4);
   3712         /*Space required for entire CTB row*/
   3713         u4_size_top_4x4_qp_memory *= u4_ctb_in_a_row;
   3714         /*Space required for entire frame*/
   3715         u4_size_top_4x4_qp_memory *= u4_ctb_rows_in_a_frame;
   3716         /*Space required for multiple bitrate*/
   3717         u4_size_top_4x4_qp_memory *= i4_num_bitrate_inst;
   3718         /*Space required for multiple frames in parallel*/
   3719         u4_size_top_4x4_qp_memory *= i4_num_enc_loop_frm_pllel;
   3720 
   3721         ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_size = u4_size_top_4x4_qp_memory;
   3722         ps_mem_tab[ENC_LOOP_QP_TOP_4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3723         ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_alignment = 8;
   3724 
   3725         /* Memory allocation of BS and Qp-map for deblocking at CTB-row level:
   3726         ## Boundary Strength(Vertical):
   3727         BS stored per CTB at one stretch i.e. for a 64x CTB first 8 entries belongs to first CTB
   3728         of the row followed by 8 entries of second CTB and so on.
   3729         8 entries: Includes left edge of current CTB and excludes right edge.
   3730         ## Boundary Strength(Horizontal):
   3731         Same as Vertical.
   3732         8 entries:  Includes top edge of current CTB and excludes bottom edge.
   3733 
   3734         ## Qp-map storage:
   3735         T0 T1 T2 T3 T4 T5 ..........to the end of the CTB row
   3736         00 01 02 03 04 05 ..........to the end of the CTB row
   3737         10 11 12 13 14 15 ..........to the end of the CTB row
   3738         20 21 22 23 24 25 ..........to the end of the CTB row
   3739         30 31 32 33 34 35 ..........to the end of the CTB row
   3740         40 41 42 43 44 45 ..........to the end of the CTB row
   3741         ............................to the end of the CTB row
   3742         upto height_of_CTB..........to the end of the CTB row
   3743 
   3744         Qp is stored for each "4x4 block" in a proper 2-D array format (One entry for each 4x4).
   3745         A 2-D array of height= (height_of_CTB +1), and width = (width_of_CTB).
   3746         where,
   3747         => height_of_CTB = number of 4x4 blocks in a CTB  vertically,
   3748         => +1 is done to store Qp of lowest 4x4-block layer of top-CTB
   3749         in order to deblock top edge of current CTB.
   3750         => width_of_CTB  = number of 4x4 blocks in a CTB  horizontally,
   3751         */
   3752 
   3753         /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/
   3754         /*1 vertical edge per 8 pixel*/
   3755         u4_size_bs_memory = (MAX_CTB_SIZE >> 3);
   3756         /*Vertical edges for entire width of CTB row*/
   3757         u4_size_bs_memory *= u4_ctb_in_a_row;
   3758         /*Each vertical edge of CTB row is 4 bytes*/
   3759         u4_size_bs_memory = u4_size_bs_memory << 2;
   3760         /*Adding Memory required for storing horizontal BS by doubling*/
   3761         u4_size_bs_memory = u4_size_bs_memory << 1;
   3762 
   3763         /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/
   3764         /*Number of 4x4 blocks in the width of a CTB*/
   3765         u4_size_qp_memory = (MAX_CTB_SIZE >> 2);
   3766         /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest
   3767         4x4-block layer of top-CTB in order to deblock top edge of current CTB*/
   3768         u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1);
   3769         /*Storage for entire CTB row*/
   3770         u4_size_qp_memory *= u4_ctb_in_a_row;
   3771 
   3772         /*Multiplying by i4_num_proc_thrds to assign memory for each core*/
   3773         ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_size =
   3774             i4_num_proc_thrds * (u4_size_bs_memory + u4_size_qp_memory);
   3775 
   3776         ps_mem_tab[ENC_LOOP_DEBLOCKING].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3777 
   3778         ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_alignment = 8;
   3779     }
   3780 
   3781     /* Memory required to store pred for 422 chroma */
   3782     ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_size =
   3783         i4_num_proc_thrds * MAX_CTB_SIZE * MAX_CTB_SIZE * 2 *
   3784         (i4_chroma_format == IV_YUV_422SP_UV) *
   3785         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
   3786 
   3787     ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3788 
   3789     ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_alignment = 8;
   3790 
   3791     /* Memory for inter pred buffers */
   3792     {
   3793         WORD32 i4_num_bufs_per_thread = 0;
   3794 
   3795         WORD32 i4_buf_size_per_cand =
   3796             (MAX_CTB_SIZE) * (MAX_CTB_SIZE) *
   3797             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
   3798         WORD32 i4_quality_preset =
   3799             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
   3800         switch(i4_quality_preset)
   3801         {
   3802         case IHEVCE_QUALITY_P0:
   3803         {
   3804             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_PQ;
   3805             break;
   3806         }
   3807         case IHEVCE_QUALITY_P2:
   3808         {
   3809             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HQ;
   3810             break;
   3811         }
   3812         case IHEVCE_QUALITY_P3:
   3813         {
   3814             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_MS;
   3815             break;
   3816         }
   3817         case IHEVCE_QUALITY_P4:
   3818         {
   3819             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HS;
   3820             break;
   3821         }
   3822         case IHEVCE_QUALITY_P5:
   3823         case IHEVCE_QUALITY_P6:
   3824         case IHEVCE_QUALITY_P7:
   3825         {
   3826             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_ES;
   3827             break;
   3828         }
   3829         default:
   3830         {
   3831             ASSERT(0);
   3832         }
   3833         }
   3834 
   3835         i4_num_bufs_per_thread += 4;
   3836 
   3837         ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size =
   3838             i4_num_bufs_per_thread * i4_num_proc_thrds * i4_buf_size_per_cand;
   3839 
   3840         ps_mem_tab[ENC_LOOP_INTER_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3841 
   3842         ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_alignment = 8;
   3843     }
   3844 
   3845     /* Memory required to store chroma intra pred */
   3846     ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_size =
   3847         i4_num_proc_thrds * (MAX_TU_SIZE) * (MAX_TU_SIZE)*2 * NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD *
   3848         ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
   3849         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
   3850 
   3851     ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3852 
   3853     ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_alignment = 8;
   3854 
   3855     /* Memory required to store pred for reference substitution output */
   3856     ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_size =
   3857         i4_num_proc_thrds * ((MAX_TU_SIZE * 2 * 2) + 4) *
   3858         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
   3859 
   3860     ps_mem_tab[ENC_LOOP_REF_SUB_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3861 
   3862     ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_alignment = 8;
   3863 
   3864     /* Memory required to store pred for reference filtering output */
   3865     ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_size =
   3866         i4_num_proc_thrds * ((MAX_TU_SIZE * 2 * 2) + 4) *
   3867         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
   3868 
   3869     ps_mem_tab[ENC_LOOP_REF_FILT_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3870 
   3871     ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_alignment = 8;
   3872 
   3873 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
   3874     if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0)
   3875 #endif
   3876     {
   3877         /* Memory assignments for recon storage during CU Recursion */
   3878         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size =
   3879             i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) *
   3880             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
   3881 
   3882         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3883 
   3884         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8;
   3885 
   3886         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size =
   3887             i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) *
   3888             ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
   3889             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
   3890 
   3891         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3892 
   3893         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8;
   3894     }
   3895 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
   3896     else
   3897     {
   3898         /* Memory assignments for recon storage during CU Recursion */
   3899         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size = 0;
   3900 
   3901         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3902 
   3903         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8;
   3904 
   3905         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size = 0;
   3906 
   3907         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3908 
   3909         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8;
   3910     }
   3911 #endif
   3912 
   3913 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
   3914     if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0)
   3915 #endif
   3916     {
   3917         /* Memory assignments for pred storage during CU Recursion */
   3918         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size =
   3919             i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) *
   3920             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
   3921 
   3922         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3923 
   3924         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8;
   3925 
   3926         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size =
   3927             i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) *
   3928             ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
   3929             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
   3930 
   3931         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3932 
   3933         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8;
   3934     }
   3935 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
   3936     else
   3937     {
   3938         /* Memory assignments for pred storage during CU Recursion */
   3939         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size = 0;
   3940 
   3941         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3942 
   3943         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8;
   3944 
   3945         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size = 0;
   3946 
   3947         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3948 
   3949         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8;
   3950     }
   3951 #endif
   3952 
   3953     /* Memory assignments for CTB left luma data storage */
   3954     ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_size =
   3955         i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) *
   3956         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
   3957 
   3958     ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3959 
   3960     ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_alignment = 8;
   3961 
   3962     /* Memory assignments for CTB left chroma data storage */
   3963     ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size =
   3964         i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) *
   3965         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
   3966     ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size <<=
   3967         ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0);
   3968 
   3969     ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   3970 
   3971     ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_alignment = 8;
   3972 
   3973     /* Memory required for SAO */
   3974     {
   3975         WORD32 num_vert_units;
   3976         WORD32 num_horz_units;
   3977         WORD32 ctb_aligned_ht, ctb_aligned_wd;
   3978         WORD32 luma_buf, chroma_buf;
   3979 
   3980         num_vert_units = u4_height / MAX_CTB_SIZE;
   3981         num_horz_units = u4_width / MAX_CTB_SIZE;
   3982 
   3983         ctb_aligned_ht = u4_height;
   3984         ctb_aligned_wd = u4_width;
   3985 
   3986         /* Memory for top buffer. 1 extra width is required for top buf ptr for row 0
   3987         * and 1 extra location is required for top left buf ptr for row 0
   3988         * Also 1 extra byte is required for every row for top left pixel if
   3989         * the top left ptr is to be passed to leaf level unconditionally
   3990         */
   3991         luma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 1) * (num_vert_units + 1)) *
   3992                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   3993         chroma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 2) * (num_vert_units + 1)) *
   3994                      ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   3995 
   3996         ps_mem_tab[ENC_LOOP_SAO].i4_mem_size =
   3997             (luma_buf + chroma_buf) * (i4_num_bitrate_inst) * (i4_num_enc_loop_frm_pllel);
   3998 
   3999         /* Add the memory required to store the sao information of top ctb for top merge
   4000         * This is frame level buffer.
   4001         */
   4002         ps_mem_tab[ENC_LOOP_SAO].i4_mem_size +=
   4003             ((num_horz_units * sizeof(sao_enc_t)) * num_vert_units) * (i4_num_bitrate_inst) *
   4004             (i4_num_enc_loop_frm_pllel);
   4005 
   4006         ps_mem_tab[ENC_LOOP_SAO].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   4007 
   4008         ps_mem_tab[ENC_LOOP_SAO].i4_mem_alignment = 8;
   4009     }
   4010 
   4011     /* Memory for CU level Coeff data buffer */
   4012     {
   4013         /* 16 additional bytes are required to ensure alignment */
   4014         {
   4015             ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_size =
   4016                 i4_num_proc_thrds *
   4017                 (((MAX_LUMA_COEFFS_CTB +
   4018                    (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) +
   4019                   16) *
   4020                  (2) * sizeof(UWORD8));
   4021         }
   4022 
   4023         ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   4024 
   4025         ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_alignment = 16;
   4026 
   4027         ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_size =
   4028             i4_num_proc_thrds *
   4029             (MAX_LUMA_COEFFS_CTB +
   4030              (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) *
   4031             sizeof(UWORD8);
   4032 
   4033         ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   4034 
   4035         ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_alignment = 16;
   4036     }
   4037 
   4038     /* Memory for CU dequant data buffer */
   4039     {
   4040         /* 16 additional bytes are required to ensure alignment */
   4041         {
   4042             ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_size =
   4043                 i4_num_proc_thrds *
   4044                 (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1))
   4045                                                         : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) +
   4046                  8) *
   4047                 (2) * sizeof(WORD16);
   4048         }
   4049 
   4050         ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   4051 
   4052         ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_alignment = 16;
   4053     }
   4054 
   4055     /* Memory for Recon Datastore (Used around and within the RDOPT loop) */
   4056     {
   4057         WORD32 i4_memSize_perThread;
   4058 
   4059         WORD32 i4_chroma_memSize_perThread = 0;
   4060         /* 2 bufs each allocated to the two 'enc_loop_cu_final_prms_t' structs */
   4061         /* used in RDOPT to store cur and best modes' data */
   4062         WORD32 i4_luma_memSize_perThread =
   4063             4 * MAX_CU_SIZE * MAX_CU_SIZE *
   4064             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4065 
   4066         /* 'Glossary' for comments in the following codeBlock */
   4067         /* 1 - 2 Bufs for storing recons of the best modes determined in the */
   4068         /* function 'ihevce_intra_chroma_pred_mode_selector' */
   4069         /* 2 - 1 buf each allocated to the two 'enc_loop_cu_final_prms_t' structs */
   4070         /* used in RDOPT to store cur and best modes' data */
   4071         if(i4_chroma_format == IV_YUV_422SP_UV)
   4072         {
   4073             WORD32 i4_quality_preset =
   4074                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
   4075             switch(i4_quality_preset)
   4076             {
   4077             case IHEVCE_QUALITY_P0:
   4078             {
   4079                 /* 1 */
   4080                 i4_chroma_memSize_perThread +=
   4081                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ *
   4082                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4083 
   4084                 /* 2 */
   4085                 i4_chroma_memSize_perThread +=
   4086                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ *
   4087                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4088 
   4089                 break;
   4090             }
   4091             case IHEVCE_QUALITY_P2:
   4092             {
   4093                 /* 1 */
   4094                 i4_chroma_memSize_perThread +=
   4095                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ *
   4096                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4097 
   4098                 /* 2 */
   4099                 i4_chroma_memSize_perThread +=
   4100                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ *
   4101                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4102 
   4103                 break;
   4104             }
   4105             case IHEVCE_QUALITY_P3:
   4106             {
   4107                 /* 1 */
   4108                 i4_chroma_memSize_perThread +=
   4109                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_MS *
   4110                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4111 
   4112                 /* 2 */
   4113                 i4_chroma_memSize_perThread +=
   4114                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS *
   4115                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4116 
   4117                 break;
   4118             }
   4119             case IHEVCE_QUALITY_P4:
   4120             {
   4121                 /* 1 */
   4122                 i4_chroma_memSize_perThread +=
   4123                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HS *
   4124                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4125 
   4126                 /* 2 */
   4127                 i4_chroma_memSize_perThread +=
   4128                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS *
   4129                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4130 
   4131                 break;
   4132             }
   4133             case IHEVCE_QUALITY_P5:
   4134             {
   4135                 /* 1 */
   4136                 i4_chroma_memSize_perThread +=
   4137                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS *
   4138                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4139 
   4140                 /* 2 */
   4141                 i4_chroma_memSize_perThread +=
   4142                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS *
   4143                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4144 
   4145                 break;
   4146             }
   4147             case IHEVCE_QUALITY_P6:
   4148             case IHEVCE_QUALITY_P7:
   4149             {
   4150                 /* 1 */
   4151                 i4_chroma_memSize_perThread +=
   4152                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 *
   4153                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4154 
   4155                 /* 2 */
   4156                 i4_chroma_memSize_perThread +=
   4157                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 *
   4158                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4159 
   4160                 break;
   4161             }
   4162             }
   4163         }
   4164         else
   4165         {
   4166             WORD32 i4_quality_preset =
   4167                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
   4168             switch(i4_quality_preset)
   4169             {
   4170             case IHEVCE_QUALITY_P0:
   4171             {
   4172                 /* 1 */
   4173                 i4_chroma_memSize_perThread +=
   4174                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ *
   4175                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4176 
   4177                 /* 2 */
   4178                 i4_chroma_memSize_perThread +=
   4179                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
   4180                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ *
   4181                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4182 
   4183                 break;
   4184             }
   4185             case IHEVCE_QUALITY_P2:
   4186             {
   4187                 /* 1 */
   4188                 i4_chroma_memSize_perThread +=
   4189                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ *
   4190                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4191 
   4192                 /* 2 */
   4193                 i4_chroma_memSize_perThread +=
   4194                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
   4195                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ *
   4196                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4197 
   4198                 break;
   4199             }
   4200             case IHEVCE_QUALITY_P3:
   4201             {
   4202                 /* 1 */
   4203                 i4_chroma_memSize_perThread +=
   4204                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_MS *
   4205                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4206 
   4207                 /* 2 */
   4208                 i4_chroma_memSize_perThread +=
   4209                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
   4210                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS *
   4211                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4212 
   4213                 break;
   4214             }
   4215             case IHEVCE_QUALITY_P4:
   4216             {
   4217                 /* 1 */
   4218                 i4_chroma_memSize_perThread +=
   4219                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HS *
   4220                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4221 
   4222                 /* 2 */
   4223                 i4_chroma_memSize_perThread +=
   4224                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
   4225                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS *
   4226                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4227 
   4228                 break;
   4229             }
   4230             case IHEVCE_QUALITY_P5:
   4231             {
   4232                 /* 1 */
   4233                 i4_chroma_memSize_perThread +=
   4234                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS *
   4235                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4236 
   4237                 /* 2 */
   4238                 i4_chroma_memSize_perThread +=
   4239                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
   4240                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS *
   4241                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4242 
   4243                 break;
   4244             }
   4245             case IHEVCE_QUALITY_P6:
   4246             case IHEVCE_QUALITY_P7:
   4247             {
   4248                 /* 1 */
   4249                 i4_chroma_memSize_perThread +=
   4250                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 *
   4251                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4252 
   4253                 /* 2 */
   4254                 i4_chroma_memSize_perThread +=
   4255                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
   4256                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 *
   4257                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
   4258 
   4259                 break;
   4260             }
   4261             }
   4262         }
   4263 
   4264         i4_memSize_perThread = i4_luma_memSize_perThread + i4_chroma_memSize_perThread;
   4265 
   4266         ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size =
   4267             i4_num_proc_thrds * i4_memSize_perThread * sizeof(UWORD8);
   4268 
   4269         ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
   4270 
   4271         ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_alignment = 16;
   4272     }
   4273 
   4274     n_tabs = NUM_ENC_LOOP_MEM_RECS;
   4275 
   4276     /*************************************************************************/
   4277     /* --- EncLoop Deblock sync Dep Mngr Mem requests --                     */
   4278     /*************************************************************************/
   4279 
   4280     /* Fill the memtabs for  EncLoop Deblock Dep Mngr */
   4281     {
   4282         WORD32 count;
   4283         WORD32 num_vert_units;
   4284         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
   4285 
   4286         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
   4287         ASSERT(num_vert_units > 0);
   4288         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
   4289         {
   4290             for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
   4291             {
   4292                 n_tabs += ihevce_dmgr_get_mem_recs(
   4293                     &ps_mem_tab[n_tabs],
   4294                     DEP_MNGR_ROW_ROW_SYNC,
   4295                     num_vert_units,
   4296                     ps_init_prms->s_app_tile_params.i4_num_tile_cols,
   4297                     i4_num_proc_thrds,
   4298                     i4_mem_space);
   4299             }
   4300         }
   4301     }
   4302 
   4303     /*************************************************************************/
   4304     /* --- EncLoop Top-Right CU sync Dep Mngr Mem requests --                */
   4305     /*************************************************************************/
   4306 
   4307     /* Fill the memtabs for  Top-Right CU sync Dep Mngr */
   4308     {
   4309         WORD32 count;
   4310         WORD32 num_vert_units;
   4311         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
   4312         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
   4313         ASSERT(num_vert_units > 0);
   4314 
   4315         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
   4316         {
   4317             for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
   4318             {
   4319                 n_tabs += ihevce_dmgr_get_mem_recs(
   4320                     &ps_mem_tab[n_tabs],
   4321                     DEP_MNGR_ROW_ROW_SYNC,
   4322                     num_vert_units,
   4323                     ps_init_prms->s_app_tile_params.i4_num_tile_cols,
   4324                     i4_num_proc_thrds,
   4325                     i4_mem_space);
   4326             }
   4327         }
   4328     }
   4329 
   4330     /*************************************************************************/
   4331     /* --- EncLoop Aux. on Ref. bitrate sync Dep Mngr Mem requests --        */
   4332     /*************************************************************************/
   4333 
   4334     /* Fill the memtabs for  EncLoop Aux. on Ref. bitrate Dep Mngr */
   4335     {
   4336         WORD32 count;
   4337         WORD32 num_vert_units;
   4338         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
   4339 
   4340         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
   4341         ASSERT(num_vert_units > 0);
   4342 
   4343         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
   4344         {
   4345             for(ctr = 1; ctr < i4_num_bitrate_inst; ctr++)
   4346             {
   4347                 n_tabs += ihevce_dmgr_get_mem_recs(
   4348                     &ps_mem_tab[n_tabs],
   4349                     DEP_MNGR_ROW_ROW_SYNC,
   4350                     num_vert_units,
   4351                     ps_init_prms->s_app_tile_params.i4_num_tile_cols,
   4352                     i4_num_proc_thrds,
   4353                     i4_mem_space);
   4354             }
   4355         }
   4356     }
   4357 
   4358     return (n_tabs);
   4359 }
   4360 
   4361 /*!
   4362 ******************************************************************************
   4363 * \if Function name : ihevce_enc_loop_init \endif
   4364 *
   4365 * \brief
   4366 *    Intialization for ENC_LOOP context state structure .
   4367 *
   4368 * \param[in] ps_mem_tab : pointer to memory descriptors table
   4369 * \param[in] ps_init_prms : Create time static parameters
   4370 * \param[in] pv_osal_handle : Osal handle
   4371 *
   4372 * \return
   4373 *    None
   4374 *
   4375 * \author
   4376 *  Ittiam
   4377 *
   4378 *****************************************************************************
   4379 */
   4380 void *ihevce_enc_loop_init(
   4381     iv_mem_rec_t *ps_mem_tab,
   4382     ihevce_static_cfg_params_t *ps_init_prms,
   4383     WORD32 i4_num_proc_thrds,
   4384     void *pv_osal_handle,
   4385     func_selector_t *ps_func_selector,
   4386     rc_quant_t *ps_rc_quant_ctxt,
   4387     ihevce_tile_params_t *ps_tile_params_base,
   4388     WORD32 i4_resolution_id,
   4389     WORD32 i4_num_enc_loop_frm_pllel,
   4390     UWORD8 u1_is_popcnt_available)
   4391 {
   4392     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
   4393     ihevce_enc_loop_ctxt_t *ps_ctxt;
   4394     WORD32 ctr, n_tabs;
   4395     UWORD32 u4_width, u4_height;
   4396     UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
   4397     UWORD32 u4_size_bs_memory, u4_size_qp_memory;
   4398     UWORD8 *pu1_deblk_base; /*Store the base address of deblcoking memory*/
   4399     WORD32 i;
   4400     WORD32 i4_num_bitrate_inst =
   4401         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_num_bitrate_instances;
   4402     enc_loop_rc_params_t *ps_enc_loop_rc_params;
   4403     UWORD8 *pu1_sao_base; /* store the base address of sao*/
   4404     UWORD32 u4_ctb_aligned_wd, ctb_size, u4_ctb_aligned_ht, num_vert_units;
   4405     WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format;
   4406     WORD32 is_hbd_mode = (ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8);
   4407     WORD32 i4_enc_frm_id;
   4408     WORD32 num_cu_in_ctb;
   4409     WORD32 i4_num_tile_cols = 1;  //Default value is 1
   4410 
   4411     /* ENC_LOOP state structure */
   4412     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)ps_mem_tab[ENC_LOOP_CTXT].pv_base;
   4413 
   4414     ps_master_ctxt->i4_num_proc_thrds = i4_num_proc_thrds;
   4415 
   4416     ps_ctxt = (ihevce_enc_loop_ctxt_t *)ps_mem_tab[ENC_LOOP_THRDS_CTXT].pv_base;
   4417     ps_enc_loop_rc_params = (enc_loop_rc_params_t *)ps_mem_tab[ENC_LOOP_RC_PARAMS].pv_base;
   4418     ps_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt;
   4419     /*Calculation of memory sizes for deblocking*/
   4420     {
   4421         /*width of the input YUV to be encoded. */
   4422         u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
   4423         /*making the width a multiple of CTB size*/
   4424         u4_width += SET_CTB_ALIGN(
   4425             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
   4426 
   4427         u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
   4428 
   4429         /*height of the input YUV to be encoded */
   4430         u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
   4431         /*making the height a multiple of CTB size*/
   4432         u4_height += SET_CTB_ALIGN(
   4433             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE);
   4434 
   4435         u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE);
   4436 
   4437         /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/
   4438         /*1 vertical edge per 8 pixel*/
   4439         u4_size_bs_memory = (MAX_CTB_SIZE >> 3);
   4440         /*Vertical edges for entire width of CTB row*/
   4441         u4_size_bs_memory *= u4_ctb_in_a_row;
   4442         /*Each vertical edge of CTB row is 4 bytes*/
   4443         u4_size_bs_memory = u4_size_bs_memory << 2;
   4444         /*Adding Memory required for storing horizontal BS by doubling*/
   4445         u4_size_bs_memory = u4_size_bs_memory << 1;
   4446 
   4447         /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/
   4448         /*Number of 4x4 blocks in the width of a CTB*/
   4449         u4_size_qp_memory = (MAX_CTB_SIZE >> 2);
   4450         /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest
   4451         4x4-block layer of top-CTB in order to deblock top edge of current CTB*/
   4452         u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1);
   4453         /*Storage for entire CTB row*/
   4454         u4_size_qp_memory *= u4_ctb_in_a_row;
   4455 
   4456         pu1_deblk_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_DEBLOCKING].pv_base;
   4457     }
   4458 
   4459     /*Derive the base pointer of sao*/
   4460     pu1_sao_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_SAO].pv_base;
   4461     ctb_size = (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
   4462     u4_ctb_aligned_wd = u4_width;
   4463     u4_ctb_aligned_ht = u4_height;
   4464     num_vert_units = (u4_height) / ctb_size;
   4465 
   4466     for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++)
   4467     {
   4468         ps_master_ctxt->aps_enc_loop_thrd_ctxt[ctr] = ps_ctxt;
   4469         /* Store Tile params base into EncLoop context */
   4470         ps_ctxt->pv_tile_params_base = (void *)ps_tile_params_base;
   4471         ihevce_cmn_utils_instr_set_router(
   4472             &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type);
   4473         ihevce_sifter_sad_fxn_assigner(
   4474             (FT_SAD_EVALUATOR **)(&ps_ctxt->pv_evalsad_pt_npu_mxn_8bit), ps_init_prms->e_arch_type);
   4475         ps_ctxt->i4_max_search_range_horizontal =
   4476             ps_init_prms->s_config_prms.i4_max_search_range_horz;
   4477         ps_ctxt->i4_max_search_range_vertical =
   4478             ps_init_prms->s_config_prms.i4_max_search_range_vert;
   4479 
   4480         ps_ctxt->i4_quality_preset =
   4481             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
   4482 
   4483         if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P7)
   4484         {
   4485             ps_ctxt->i4_quality_preset = IHEVCE_QUALITY_P6;
   4486         }
   4487 
   4488         ps_ctxt->i4_num_proc_thrds = ps_master_ctxt->i4_num_proc_thrds;
   4489 
   4490         ps_ctxt->i4_rc_pass = ps_init_prms->s_pass_prms.i4_pass;
   4491 
   4492         ps_ctxt->u1_chroma_array_type = (i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1;
   4493 
   4494         ps_ctxt->s_deblk_prms.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
   4495 
   4496         ps_ctxt->pi2_scal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_SCALE_MAT].pv_base;
   4497 
   4498         ps_ctxt->pi2_rescal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_RESCALE_MAT].pv_base;
   4499 
   4500         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
   4501         {
   4502             ps_ctxt->i4_use_ctb_level_lamda = 0;
   4503         }
   4504         else
   4505         {
   4506             ps_ctxt->i4_use_ctb_level_lamda = 0;
   4507         }
   4508 
   4509         /** Register the function selector pointer*/
   4510         ps_ctxt->ps_func_selector = ps_func_selector;
   4511 
   4512         ps_ctxt->s_mc_ctxt.ps_func_selector = ps_func_selector;
   4513 
   4514         /* Initiallization for non-distributed mode */
   4515         ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[0] = 0;
   4516         ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[1] = 0;
   4517         ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[2] = 0;
   4518         ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[3] = 0;
   4519 
   4520         ps_ctxt->s_deblk_prms.ps_func_selector = ps_func_selector;
   4521         ps_ctxt->i4_top_row_luma_stride = (u4_width + MAX_CU_SIZE + 1);
   4522 
   4523         ps_ctxt->i4_frm_top_row_luma_size =
   4524             ps_ctxt->i4_top_row_luma_stride * (u4_ctb_rows_in_a_frame + 1);
   4525 
   4526         ps_ctxt->i4_top_row_chroma_stride = (u4_width + MAX_CU_SIZE + 2);
   4527 
   4528         ps_ctxt->i4_frm_top_row_chroma_size =
   4529             ps_ctxt->i4_top_row_chroma_stride * (u4_ctb_rows_in_a_frame + 1);
   4530 
   4531         {
   4532             for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
   4533             {
   4534                 /* +1 is to provision top left pel */
   4535                 ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] =
   4536                     (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_LUMA].pv_base + 1 +
   4537                     (ps_ctxt->i4_frm_top_row_luma_size * i4_enc_frm_id * i4_num_bitrate_inst);
   4538 
   4539                 /* pointer incremented by 1 row to avoid OOB access in 0th row */
   4540                 ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] =
   4541                     (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] +
   4542                     ps_ctxt->i4_top_row_luma_stride;
   4543 
   4544                 /* +2 is to provision top left pel */
   4545                 ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] =
   4546                     (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_CHROMA].pv_base + 2 +
   4547                     (ps_ctxt->i4_frm_top_row_chroma_size * i4_enc_frm_id * i4_num_bitrate_inst);
   4548 
   4549                 /* pointer incremented by 1 row to avoid OOB access in 0th row */
   4550                 ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] =
   4551                     (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] +
   4552                     ps_ctxt->i4_top_row_chroma_stride;
   4553             }
   4554         }
   4555 
   4556         /* +1 is to provision top left nbr */
   4557         ps_ctxt->i4_top_row_nbr_stride = (((u4_width + MAX_CU_SIZE) >> 2) + 1);
   4558         ps_ctxt->i4_frm_top_row_nbr_size =
   4559             ps_ctxt->i4_top_row_nbr_stride * (u4_ctb_rows_in_a_frame + 1);
   4560         for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
   4561         {
   4562             ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] =
   4563                 (nbr_4x4_t *)ps_mem_tab[ENC_LOOP_TOP_NBR4X4].pv_base + 1 +
   4564                 (ps_ctxt->i4_frm_top_row_nbr_size * i4_enc_frm_id * i4_num_bitrate_inst);
   4565             ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] += ps_ctxt->i4_top_row_nbr_stride;
   4566         }
   4567 
   4568         num_cu_in_ctb = ctb_size / MIN_CU_SIZE;
   4569         num_cu_in_ctb *= num_cu_in_ctb;
   4570 
   4571         /* pointer incremented by 1 row to avoid OOB access in 0th row */
   4572 
   4573         /* Memory for CU level Coeff data buffer */
   4574         {
   4575             WORD32 i4_16byte_boundary_overshoot;
   4576             WORD32 buf_size_per_cu;
   4577             WORD32 buf_size_per_thread_wo_alignment_req;
   4578             WORD32 buf_size_per_thread;
   4579 
   4580             buf_size_per_cu =
   4581                 ((MAX_LUMA_COEFFS_CTB +
   4582                   (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) +
   4583                  16) *
   4584                 sizeof(UWORD8);
   4585             buf_size_per_thread_wo_alignment_req = buf_size_per_cu - 16 * sizeof(UWORD8);
   4586 
   4587             {
   4588                 buf_size_per_thread = buf_size_per_cu * (2);
   4589 
   4590                 for(i = 0; i < 2; i++)
   4591                 {
   4592                     ps_ctxt->as_cu_prms[i].pu1_cu_coeffs =
   4593                         (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].pv_base +
   4594                         (ctr * buf_size_per_thread) + (i * buf_size_per_cu);
   4595 
   4596                     i4_16byte_boundary_overshoot =
   4597                         ((LWORD64)ps_ctxt->as_cu_prms[i].pu1_cu_coeffs & 0xf);
   4598 
   4599                     ps_ctxt->as_cu_prms[i].pu1_cu_coeffs += (16 - i4_16byte_boundary_overshoot);
   4600                 }
   4601             }
   4602 
   4603             ps_ctxt->pu1_cu_recur_coeffs =
   4604                 (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].pv_base +
   4605                 (ctr * buf_size_per_thread_wo_alignment_req);
   4606         }
   4607 
   4608         /* Memory for CU dequant data buffer */
   4609         {
   4610             WORD32 buf_size_per_thread;
   4611             WORD32 i4_16byte_boundary_overshoot;
   4612 
   4613             WORD32 buf_size_per_cu =
   4614                 (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1))
   4615                                                         : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) +
   4616                  8) *
   4617                 sizeof(WORD16);
   4618 
   4619             {
   4620                 buf_size_per_thread = buf_size_per_cu * 2;
   4621 
   4622                 for(i = 0; i < 2; i++)
   4623                 {
   4624                     ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs =
   4625                         (WORD16
   4626                              *)((UWORD8 *)ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].pv_base + (ctr * buf_size_per_thread) + (i * buf_size_per_cu));
   4627 
   4628                     i4_16byte_boundary_overshoot =
   4629                         ((LWORD64)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs & 0xf);
   4630 
   4631                     ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs =
   4632                         (WORD16
   4633                              *)((UWORD8 *)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs + (16 - i4_16byte_boundary_overshoot));
   4634                 }
   4635             }
   4636         }
   4637 
   4638         /*------ Deblocking memory's pointers assignements starts ------*/
   4639 
   4640         /*Assign stride = 4x4 blocks in horizontal edge*/
   4641         ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row;
   4642 
   4643         ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size =
   4644             ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd * u4_ctb_rows_in_a_frame;
   4645 
   4646         /*Assign frame level memory to store the Qp of
   4647         top 4x4 neighbours of each CTB row*/
   4648         for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
   4649         {
   4650             ps_ctxt->s_deblk_ctbrow_prms.api1_qp_top_4x4_ctb_row[i4_enc_frm_id] =
   4651                 (WORD8 *)ps_mem_tab[ENC_LOOP_QP_TOP_4X4].pv_base +
   4652                 (ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size * i4_num_bitrate_inst *
   4653                  i4_enc_frm_id);
   4654         }
   4655 
   4656         ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_vert = (UWORD32 *)pu1_deblk_base;
   4657 
   4658         ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_horz =
   4659             (UWORD32 *)(pu1_deblk_base + (u4_size_bs_memory >> 1));
   4660 
   4661         ps_ctxt->s_deblk_ctbrow_prms.pi1_ctb_row_qp = (WORD8 *)pu1_deblk_base + u4_size_bs_memory;
   4662 
   4663         /*Assign stride = 4x4 blocks in horizontal edge*/
   4664         ps_ctxt->s_deblk_ctbrow_prms.u4_qp_buffer_stride = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row;
   4665 
   4666         pu1_deblk_base += (u4_size_bs_memory + u4_size_qp_memory);
   4667 
   4668         /*------Deblocking memory's pointers assignements ends ------*/
   4669 
   4670         /*------SAO memory's pointer assignment starts------------*/
   4671         if(!is_hbd_mode)
   4672         {
   4673             /* 2 is added to allocate top left pixel */
   4674             ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size =
   4675                 u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1);
   4676             ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size =
   4677                 u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 2) * (num_vert_units + 1);
   4678             ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units =
   4679                 num_vert_units * (u4_ctb_aligned_wd / MAX_CTB_SIZE);
   4680 
   4681             for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
   4682             {
   4683                 ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_luma[i4_enc_frm_id] =
   4684                     pu1_sao_base +
   4685                     ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size +
   4686                       ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) *
   4687                      i4_num_bitrate_inst * i4_enc_frm_id) +  // move to the next frame_id
   4688                     u4_ctb_aligned_wd +
   4689                     2;
   4690 
   4691                 ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_chroma[i4_enc_frm_id] =
   4692                     pu1_sao_base +
   4693                     ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size +
   4694                       ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) *
   4695                      i4_num_bitrate_inst * i4_enc_frm_id) +
   4696                     +u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1) +
   4697                     u4_ctb_aligned_wd + 4;
   4698 
   4699                 ps_ctxt->s_sao_ctxt_t.aps_frm_top_ctb_sao[i4_enc_frm_id] = (sao_enc_t *) (pu1_sao_base +
   4700                     ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size + ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size)
   4701                     *i4_num_bitrate_inst*i4_num_enc_loop_frm_pllel) +
   4702                     (ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units * sizeof(sao_enc_t) *i4_num_bitrate_inst * i4_enc_frm_id));
   4703             }
   4704             ps_ctxt->s_sao_ctxt_t.i4_ctb_size =
   4705                 (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
   4706             ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd = u4_ctb_aligned_wd;
   4707         }
   4708 
   4709         /*------SAO memory's pointer assignment ends------------*/
   4710 
   4711         /* perform all one time initialisation here */
   4712         ps_ctxt->i4_nbr_map_strd = MAX_PU_IN_CTB_ROW + 1 + 8;
   4713 
   4714         ps_ctxt->pu1_ctb_nbr_map = ps_ctxt->au1_nbr_ctb_map[0];
   4715 
   4716         ps_ctxt->i4_deblock_type = ps_init_prms->s_coding_tools_prms.i4_deblocking_type;
   4717 
   4718         /* move the pointer to 1,2 location */
   4719         ps_ctxt->pu1_ctb_nbr_map += ps_ctxt->i4_nbr_map_strd;
   4720         ps_ctxt->pu1_ctb_nbr_map++;
   4721 
   4722         ps_ctxt->i4_cu_csbf_strd = MAX_TU_IN_CTB_ROW;
   4723 
   4724         CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map4x4TU, 1, 4, ps_ctxt->i4_cu_csbf_strd);
   4725 
   4726         CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map8x8TU, 4, 8, ps_ctxt->i4_cu_csbf_strd);
   4727 
   4728         CREATE_SUBBLOCK2CSBFID_MAP(
   4729             gai4_subBlock2csbfId_map16x16TU, 16, 16, ps_ctxt->i4_cu_csbf_strd);
   4730 
   4731         CREATE_SUBBLOCK2CSBFID_MAP(
   4732             gai4_subBlock2csbfId_map32x32TU, 64, 32, ps_ctxt->i4_cu_csbf_strd);
   4733 
   4734         /* For both instance initialise the chroma dequant start idx */
   4735         ps_ctxt->as_cu_prms[0].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE);
   4736         ps_ctxt->as_cu_prms[1].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE);
   4737 
   4738         /* initialise all the function pointer tables */
   4739         {
   4740             ps_ctxt->pv_inter_rdopt_cu_mc_mvp =
   4741                 (pf_inter_rdopt_cu_mc_mvp)ihevce_inter_rdopt_cu_mc_mvp;
   4742 
   4743             ps_ctxt->pv_inter_rdopt_cu_ntu = (pf_inter_rdopt_cu_ntu)ihevce_inter_rdopt_cu_ntu;
   4744 
   4745 #if ENABLE_RDO_BASED_TU_RECURSION
   4746             if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
   4747             {
   4748                 ps_ctxt->pv_inter_rdopt_cu_ntu =
   4749                     (pf_inter_rdopt_cu_ntu)ihevce_inter_tu_tree_selector_and_rdopt_cost_computer;
   4750             }
   4751 #endif
   4752             ps_ctxt->pv_intra_chroma_pred_mode_selector =
   4753                 (pf_intra_chroma_pred_mode_selector)ihevce_intra_chroma_pred_mode_selector;
   4754             ps_ctxt->pv_intra_rdopt_cu_ntu = (pf_intra_rdopt_cu_ntu)ihevce_intra_rdopt_cu_ntu;
   4755             ps_ctxt->pv_final_rdopt_mode_prcs =
   4756                 (pf_final_rdopt_mode_prcs)ihevce_final_rdopt_mode_prcs;
   4757             ps_ctxt->pv_store_cu_results = (pf_store_cu_results)ihevce_store_cu_results;
   4758             ps_ctxt->pv_enc_loop_cu_bot_copy = (pf_enc_loop_cu_bot_copy)ihevce_enc_loop_cu_bot_copy;
   4759             ps_ctxt->pv_enc_loop_ctb_left_copy =
   4760                 (pf_enc_loop_ctb_left_copy)ihevce_enc_loop_ctb_left_copy;
   4761 
   4762             /* Memory assignments for chroma intra pred buffer */
   4763             {
   4764                 WORD32 pred_buf_size =
   4765                     MAX_TU_SIZE * MAX_TU_SIZE * 2 * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
   4766                 WORD32 pred_buf_size_per_thread =
   4767                     NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD * pred_buf_size;
   4768                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].pv_base +
   4769                                    (ctr * pred_buf_size_per_thread);
   4770 
   4771                 for(i = 0; i < NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD; i++)
   4772                 {
   4773                     ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[i].pv_pred_data = pu1_base;
   4774                     pu1_base += pred_buf_size;
   4775                 }
   4776             }
   4777 
   4778             /* Memory assignments for reference substitution output */
   4779             {
   4780                 WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + 4);
   4781                 WORD32 pred_buf_size_per_thread = pred_buf_size;
   4782                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_SUB_OUT].pv_base +
   4783                                    (ctr * pred_buf_size_per_thread);
   4784 
   4785                 ps_ctxt->pv_ref_sub_out = pu1_base;
   4786             }
   4787 
   4788             /* Memory assignments for reference filtering output */
   4789             {
   4790                 WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + 4);
   4791                 WORD32 pred_buf_size_per_thread = pred_buf_size;
   4792                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_FILT_OUT].pv_base +
   4793                                    (ctr * pred_buf_size_per_thread);
   4794 
   4795                 ps_ctxt->pv_ref_filt_out = pu1_base;
   4796             }
   4797 
   4798             /* Memory assignments for recon storage during CU Recursion */
   4799 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
   4800             if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
   4801 #endif
   4802             {
   4803                 {
   4804                     WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE);
   4805                     WORD32 pred_buf_size_per_thread = pred_buf_size;
   4806                     UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].pv_base +
   4807                                        (ctr * pred_buf_size_per_thread);
   4808 
   4809                     ps_ctxt->pv_cu_luma_recon = pu1_base;
   4810                 }
   4811 
   4812                 {
   4813                     WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) *
   4814                                            ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
   4815                     WORD32 pred_buf_size_per_thread = pred_buf_size;
   4816                     UWORD8 *pu1_base =
   4817                         (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].pv_base +
   4818                         (ctr * pred_buf_size_per_thread);
   4819 
   4820                     ps_ctxt->pv_cu_chrma_recon = pu1_base;
   4821                 }
   4822             }
   4823 
   4824             /* Memory assignments for pred storage during CU Recursion */
   4825 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
   4826             if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
   4827 #endif
   4828             {
   4829                 {
   4830                     WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE);
   4831                     WORD32 pred_buf_size_per_thread = pred_buf_size;
   4832                     UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].pv_base +
   4833                                        (ctr * pred_buf_size_per_thread);
   4834 
   4835                     ps_ctxt->pv_CTB_pred_luma = pu1_base;
   4836                 }
   4837 
   4838                 {
   4839                     WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) *
   4840                                            ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
   4841                     WORD32 pred_buf_size_per_thread = pred_buf_size;
   4842                     UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].pv_base +
   4843                                        (ctr * pred_buf_size_per_thread);
   4844 
   4845                     ps_ctxt->pv_CTB_pred_chroma = pu1_base;
   4846                 }
   4847             }
   4848 
   4849             /* Memory assignments for CTB left luma data storage */
   4850             {
   4851                 WORD32 pred_buf_size = (MAX_CTB_SIZE + MAX_TU_SIZE);
   4852                 WORD32 pred_buf_size_per_thread = pred_buf_size;
   4853                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].pv_base +
   4854                                    (ctr * pred_buf_size_per_thread);
   4855 
   4856                 ps_ctxt->pv_left_luma_data = pu1_base;
   4857             }
   4858 
   4859             /* Memory assignments for CTB left chroma data storage */
   4860             {
   4861                 WORD32 pred_buf_size =
   4862                     (MAX_CTB_SIZE + MAX_TU_SIZE) * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
   4863                 WORD32 pred_buf_size_per_thread = pred_buf_size;
   4864                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].pv_base +
   4865                                    (ctr * pred_buf_size_per_thread);
   4866 
   4867                 ps_ctxt->pv_left_chrm_data = pu1_base;
   4868             }
   4869         }
   4870 
   4871         /* Memory for inter pred buffers */
   4872         {
   4873             WORD32 i4_num_bufs_per_thread;
   4874 
   4875             WORD32 i4_buf_size_per_cand =
   4876                 (MAX_CTB_SIZE) * (MAX_CTB_SIZE) *
   4877                 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
   4878 
   4879             i4_num_bufs_per_thread =
   4880                 (ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size / i4_num_proc_thrds) /
   4881                 i4_buf_size_per_cand;
   4882 
   4883             ps_ctxt->i4_max_num_inter_rdopt_cands = i4_num_bufs_per_thread - 4;
   4884 
   4885             ps_ctxt->s_pred_buf_data.u4_is_buf_in_use = UINT_MAX;
   4886 
   4887             {
   4888                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_INTER_PRED].pv_base +
   4889                                    +(ctr * i4_buf_size_per_cand * i4_num_bufs_per_thread);
   4890 
   4891                 for(i = 0; i < i4_num_bufs_per_thread; i++)
   4892                 {
   4893                     ps_ctxt->s_pred_buf_data.apv_inter_pred_data[i] =
   4894                         pu1_base + i * i4_buf_size_per_cand;
   4895                     ps_ctxt->s_pred_buf_data.u4_is_buf_in_use ^= (1 << i);
   4896                 }
   4897             }
   4898         }
   4899 
   4900         /* Memory required to store pred for 422 chroma */
   4901         if(i4_chroma_format == IV_YUV_422SP_UV)
   4902         {
   4903             WORD32 pred_buf_size = MAX_CTB_SIZE * MAX_CTB_SIZE * 2;
   4904             WORD32 pred_buf_size_per_thread =
   4905                 pred_buf_size * ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) *
   4906                 sizeof(UWORD8);
   4907             void *pv_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].pv_base +
   4908                             (ctr * pred_buf_size_per_thread);
   4909 
   4910             ps_ctxt->pv_422_chroma_intra_pred_buf = pv_base;
   4911         }
   4912         else
   4913         {
   4914             ps_ctxt->pv_422_chroma_intra_pred_buf = NULL;
   4915         }
   4916 
   4917         /* Memory for Recon Datastore (Used around and within the RDOPT loop) */
   4918         {
   4919             WORD32 i4_lumaBufSize = MAX_CU_SIZE * MAX_CU_SIZE;
   4920             WORD32 i4_chromaBufSize =
   4921                 MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ((i4_chroma_format == IV_YUV_422SP_UV) + 1);
   4922             WORD32 i4_memSize_perThread = ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size /
   4923                                           (i4_num_proc_thrds * sizeof(UWORD8) * (is_hbd_mode + 1));
   4924             WORD32 i4_quality_preset = ps_ctxt->i4_quality_preset;
   4925             {
   4926                 UWORD8 *pu1_mem_base =
   4927                     (((UWORD8 *)ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].pv_base) +
   4928                      ctr * i4_memSize_perThread);
   4929 
   4930                 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[0] =
   4931                     pu1_mem_base + i4_lumaBufSize * 0;
   4932                 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[1] =
   4933                     pu1_mem_base + i4_lumaBufSize * 1;
   4934                 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[0] =
   4935                     pu1_mem_base + i4_lumaBufSize * 2;
   4936                 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[1] =
   4937                     pu1_mem_base + i4_lumaBufSize * 3;
   4938 
   4939                 pu1_mem_base += i4_lumaBufSize * 4;
   4940 
   4941                 switch(i4_quality_preset)
   4942                 {
   4943                 case IHEVCE_QUALITY_P0:
   4944                 {
   4945 #if ENABLE_CHROMA_RDOPT_EVAL_IN_PQ
   4946                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
   4947                         pu1_mem_base + i4_chromaBufSize * 0;
   4948                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
   4949                         pu1_mem_base + i4_chromaBufSize * 1;
   4950 #else
   4951                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
   4952                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
   4953 #endif
   4954 
   4955 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ
   4956                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
   4957                         pu1_mem_base + i4_chromaBufSize * 2;
   4958                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
   4959                         pu1_mem_base + i4_chromaBufSize * 3;
   4960                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
   4961                         pu1_mem_base + i4_chromaBufSize * 2;
   4962                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
   4963                         pu1_mem_base + i4_chromaBufSize * 3;
   4964 #else
   4965                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
   4966                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
   4967                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
   4968                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
   4969 #endif
   4970 
   4971                     break;
   4972                 }
   4973                 case IHEVCE_QUALITY_P2:
   4974                 {
   4975 #if ENABLE_CHROMA_RDOPT_EVAL_IN_HQ
   4976                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
   4977                         pu1_mem_base + i4_chromaBufSize * 0;
   4978                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
   4979                         pu1_mem_base + i4_chromaBufSize * 1;
   4980 #else
   4981                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
   4982                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
   4983 #endif
   4984 
   4985 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ
   4986                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
   4987                         pu1_mem_base + i4_chromaBufSize * 2;
   4988                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
   4989                         pu1_mem_base + i4_chromaBufSize * 3;
   4990                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
   4991                         pu1_mem_base + i4_chromaBufSize * 2;
   4992                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
   4993                         pu1_mem_base + i4_chromaBufSize * 3;
   4994 #else
   4995                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
   4996                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
   4997                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
   4998                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
   4999 #endif
   5000 
   5001                     break;
   5002                 }
   5003                 case IHEVCE_QUALITY_P3:
   5004                 {
   5005 #if ENABLE_CHROMA_RDOPT_EVAL_IN_MS
   5006                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
   5007                         pu1_mem_base + i4_chromaBufSize * 0;
   5008                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
   5009                         pu1_mem_base + i4_chromaBufSize * 1;
   5010 #else
   5011                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
   5012                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
   5013 #endif
   5014 
   5015 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS
   5016                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
   5017                         pu1_mem_base + i4_chromaBufSize * 2;
   5018                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
   5019                         pu1_mem_base + i4_chromaBufSize * 3;
   5020                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
   5021                         pu1_mem_base + i4_chromaBufSize * 2;
   5022                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
   5023                         pu1_mem_base + i4_chromaBufSize * 3;
   5024 #else
   5025                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
   5026                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
   5027                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
   5028                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
   5029 #endif
   5030 
   5031                     break;
   5032                 }
   5033                 case IHEVCE_QUALITY_P4:
   5034                 {
   5035 #if ENABLE_CHROMA_RDOPT_EVAL_IN_HS
   5036                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
   5037                         pu1_mem_base + i4_chromaBufSize * 0;
   5038                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
   5039                         pu1_mem_base + i4_chromaBufSize * 1;
   5040 #else
   5041                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
   5042                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
   5043 #endif
   5044 
   5045 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS
   5046                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
   5047                         pu1_mem_base + i4_chromaBufSize * 2;
   5048                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
   5049                         pu1_mem_base + i4_chromaBufSize * 3;
   5050                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
   5051                         pu1_mem_base + i4_chromaBufSize * 2;
   5052                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
   5053                         pu1_mem_base + i4_chromaBufSize * 3;
   5054 #else
   5055                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
   5056                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
   5057                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
   5058                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
   5059 #endif
   5060 
   5061                     break;
   5062                 }
   5063                 case IHEVCE_QUALITY_P5:
   5064                 {
   5065 #if ENABLE_CHROMA_RDOPT_EVAL_IN_XS
   5066                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
   5067                         pu1_mem_base + i4_chromaBufSize * 0;
   5068                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
   5069                         pu1_mem_base + i4_chromaBufSize * 1;
   5070 #else
   5071                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
   5072                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
   5073 #endif
   5074 
   5075 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS
   5076                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
   5077                         pu1_mem_base + i4_chromaBufSize * 2;
   5078                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
   5079                         pu1_mem_base + i4_chromaBufSize * 3;
   5080                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
   5081                         pu1_mem_base + i4_chromaBufSize * 2;
   5082                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
   5083                         pu1_mem_base + i4_chromaBufSize * 3;
   5084 #else
   5085                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
   5086                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
   5087                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
   5088                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
   5089 #endif
   5090 
   5091                     break;
   5092                 }
   5093                 }
   5094             }
   5095 
   5096             ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE;
   5097             ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE;
   5098             ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE;
   5099             ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE;
   5100 
   5101         } /* Recon Datastore */
   5102 
   5103         /****************************************************/
   5104         /****************************************************/
   5105         /* ps_pps->i1_sign_data_hiding_flag  == UNHIDDEN    */
   5106         /* when NO_SBH. else HIDDEN                         */
   5107         /****************************************************/
   5108         /****************************************************/
   5109         /* Zero cbf tool is enabled by default for all presets */
   5110         ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
   5111 
   5112         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3)
   5113         {
   5114             ps_ctxt->i4_quant_rounding_level = CU_LEVEL_QUANT_ROUNDING;
   5115             ps_ctxt->i4_chroma_quant_rounding_level = CHROMA_QUANT_ROUNDING;
   5116             ps_ctxt->i4_rdoq_level = ALL_CAND_RDOQ;
   5117             ps_ctxt->i4_sbh_level = ALL_CAND_SBH;
   5118         }
   5119         else if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P3)
   5120         {
   5121             ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
   5122             ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
   5123             ps_ctxt->i4_rdoq_level = NO_RDOQ;
   5124             ps_ctxt->i4_sbh_level = NO_SBH;
   5125         }
   5126         else
   5127         {
   5128             ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
   5129             ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
   5130             ps_ctxt->i4_rdoq_level = NO_RDOQ;
   5131             ps_ctxt->i4_sbh_level = NO_SBH;
   5132         }
   5133 
   5134 #if DISABLE_QUANT_ROUNDING
   5135         ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
   5136         ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
   5137 #endif
   5138         /*Disabling RDOQ only when spatial modulation is enabled
   5139                 as RDOQ degrades visual quality*/
   5140         if(ps_init_prms->s_config_prms.i4_cu_level_rc & 1)
   5141         {
   5142             ps_ctxt->i4_rdoq_level = NO_RDOQ;
   5143         }
   5144 
   5145 #if DISABLE_RDOQ
   5146         ps_ctxt->i4_rdoq_level = NO_RDOQ;
   5147 #endif
   5148 
   5149 #if DISABLE_SBH
   5150         ps_ctxt->i4_sbh_level = NO_SBH;
   5151 #endif
   5152 
   5153         /*Rounding factor calc based on previous cabac states */
   5154 
   5155         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_4x4[0][0];
   5156         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_8x8[0][0];
   5157         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_16x16[0][0];
   5158         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[4] = &ps_ctxt->i4_quant_round_32x32[0][0];
   5159 
   5160         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_4x4[1][0];
   5161         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_8x8[1][0];
   5162         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_16x16[1][0];
   5163         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[4] = &ps_ctxt->i4_quant_round_32x32[1][0];
   5164 
   5165         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_cr_4x4[0][0];
   5166         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_cr_8x8[0][0];
   5167         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_cr_16x16[0][0];
   5168 
   5169         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_cr_4x4[1][0];
   5170         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_cr_8x8[1][0];
   5171         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_cr_16x16[1][0];
   5172 
   5173         /****************************************************************************************/
   5174         /* Setting the perform rdoq and sbh flags appropriately                                 */
   5175         /****************************************************************************************/
   5176         {
   5177             /******************************************/
   5178             /* For best cand rdoq and/or sbh          */
   5179             /******************************************/
   5180             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq =
   5181                 (ps_ctxt->i4_rdoq_level == BEST_CAND_RDOQ);
   5182             /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
   5183             we would have to do RDOQ again.*/
   5184             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq =
   5185                 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq ||
   5186                 ((BEST_CAND_SBH == ps_ctxt->i4_sbh_level) &&
   5187                  (ALL_CAND_RDOQ == ps_ctxt->i4_rdoq_level));
   5188 
   5189             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh =
   5190                 (ps_ctxt->i4_sbh_level == BEST_CAND_SBH);
   5191 
   5192             /* SBH should be performed if
   5193             a) i4_sbh_level is BEST_CAND_SBH.
   5194             b) For all quality presets above medium speed(i.e. high speed and extreme speed) and
   5195             if SBH has to be done because for these presets the quant, iquant and scan coeff
   5196             data are calculated in this function and not during the RDOPT stage*/
   5197 
   5198             /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
   5199             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh =
   5200                 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh ||
   5201                 ((BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level) &&
   5202                  (ALL_CAND_SBH == ps_ctxt->i4_sbh_level));
   5203 
   5204             /******************************************/
   5205             /* For all cand rdoq and/or sbh          */
   5206             /******************************************/
   5207             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq =
   5208                 (ps_ctxt->i4_rdoq_level == ALL_CAND_RDOQ);
   5209             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh =
   5210                 (ps_ctxt->i4_sbh_level == ALL_CAND_SBH);
   5211             ps_ctxt->s_rdoq_sbh_ctxt.i4_bit_depth =
   5212                 ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
   5213         }
   5214 
   5215         if(!is_hbd_mode)
   5216         {
   5217             if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
   5218             {
   5219                 if(ps_ctxt->i4_rdoq_level == NO_RDOQ)
   5220                 {
   5221                     ps_ctxt->apf_quant_iquant_ssd[0] =
   5222                         ps_func_selector->ihevc_quant_iquant_ssd_fptr;
   5223                     ps_ctxt->apf_quant_iquant_ssd[2] = ps_func_selector->ihevc_quant_iquant_fptr;
   5224                 }
   5225                 else
   5226                 {
   5227                     ps_ctxt->apf_quant_iquant_ssd[0] =
   5228                         ps_func_selector->ihevc_quant_iquant_ssd_rdoq_fptr;
   5229                     ps_ctxt->apf_quant_iquant_ssd[2] =
   5230                         ps_func_selector->ihevc_quant_iquant_rdoq_fptr;
   5231                 }
   5232 
   5233                 /*If coef level RDOQ is enabled, quantization based on corr. error to be done */
   5234                 if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)
   5235                 {
   5236                     ps_ctxt->apf_quant_iquant_ssd[1] =
   5237                         ps_func_selector->ihevc_q_iq_ssd_var_rnd_fact_fptr;
   5238                     ps_ctxt->apf_quant_iquant_ssd[3] =
   5239                         ps_func_selector->ihevc_q_iq_var_rnd_fact_fptr;
   5240                 }
   5241                 else
   5242                 {
   5243                     ps_ctxt->apf_quant_iquant_ssd[1] =
   5244                         ps_func_selector->ihevc_quant_iquant_ssd_fptr;
   5245                     ps_ctxt->apf_quant_iquant_ssd[3] = ps_func_selector->ihevc_quant_iquant_fptr;
   5246                 }
   5247             }
   5248             else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
   5249             {
   5250                 if(ps_ctxt->i4_rdoq_level == NO_RDOQ)
   5251                 {
   5252                     ps_ctxt->apf_quant_iquant_ssd[0] =
   5253                         ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
   5254                     ps_ctxt->apf_quant_iquant_ssd[2] =
   5255                         ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr;
   5256                 }
   5257                 else
   5258                 {
   5259                     ps_ctxt->apf_quant_iquant_ssd[0] =
   5260                         ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_fptr;
   5261                     ps_ctxt->apf_quant_iquant_ssd[2] =
   5262                         ps_func_selector->ihevc_quant_iquant_flat_scale_mat_rdoq_fptr;
   5263                 }
   5264 
   5265                 /*If coef level RDOQ is enabled, quantization based on corr. error to be done */
   5266                 if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)
   5267                 {
   5268                     ps_ctxt->apf_quant_iquant_ssd[1] =
   5269                         ps_func_selector->ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_fptr;
   5270                     ps_ctxt->apf_quant_iquant_ssd[3] =
   5271                         ps_func_selector->ihevc_q_iq_flat_scale_mat_var_rnd_fact_fptr;
   5272                 }
   5273                 else
   5274                 {
   5275                     ps_ctxt->apf_quant_iquant_ssd[1] =
   5276                         ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
   5277                     ps_ctxt->apf_quant_iquant_ssd[3] =
   5278                         ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr;
   5279                 }
   5280             }
   5281 
   5282             ps_ctxt->s_sao_ctxt_t.apf_sao_luma[0] =
   5283                 ps_func_selector->ihevc_sao_edge_offset_class0_fptr;
   5284             ps_ctxt->s_sao_ctxt_t.apf_sao_luma[1] =
   5285                 ps_func_selector->ihevc_sao_edge_offset_class1_fptr;
   5286             ps_ctxt->s_sao_ctxt_t.apf_sao_luma[2] =
   5287                 ps_func_selector->ihevc_sao_edge_offset_class2_fptr;
   5288             ps_ctxt->s_sao_ctxt_t.apf_sao_luma[3] =
   5289                 ps_func_selector->ihevc_sao_edge_offset_class3_fptr;
   5290 
   5291             ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[0] =
   5292                 ps_func_selector->ihevc_sao_edge_offset_class0_chroma_fptr;
   5293             ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[1] =
   5294                 ps_func_selector->ihevc_sao_edge_offset_class1_chroma_fptr;
   5295             ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[2] =
   5296                 ps_func_selector->ihevc_sao_edge_offset_class2_chroma_fptr;
   5297             ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[3] =
   5298                 ps_func_selector->ihevc_sao_edge_offset_class3_chroma_fptr;
   5299 
   5300             ps_ctxt->apf_it_recon[0] = ps_func_selector->ihevc_itrans_recon_4x4_ttype1_fptr;
   5301             ps_ctxt->apf_it_recon[1] = ps_func_selector->ihevc_itrans_recon_4x4_fptr;
   5302             ps_ctxt->apf_it_recon[2] = ps_func_selector->ihevc_itrans_recon_8x8_fptr;
   5303             ps_ctxt->apf_it_recon[3] = ps_func_selector->ihevc_itrans_recon_16x16_fptr;
   5304             ps_ctxt->apf_it_recon[4] = ps_func_selector->ihevc_itrans_recon_32x32_fptr;
   5305 
   5306             ps_ctxt->apf_chrm_it_recon[0] = ps_func_selector->ihevc_chroma_itrans_recon_4x4_fptr;
   5307             ps_ctxt->apf_chrm_it_recon[1] = ps_func_selector->ihevc_chroma_itrans_recon_8x8_fptr;
   5308             ps_ctxt->apf_chrm_it_recon[2] = ps_func_selector->ihevc_chroma_itrans_recon_16x16_fptr;
   5309 
   5310             ps_ctxt->apf_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr;
   5311             ps_ctxt->apf_resd_trns[1] = ps_func_selector->ihevc_resi_trans_4x4_fptr;
   5312             ps_ctxt->apf_resd_trns[2] = ps_func_selector->ihevc_resi_trans_8x8_fptr;
   5313             ps_ctxt->apf_resd_trns[3] = ps_func_selector->ihevc_resi_trans_16x16_fptr;
   5314             ps_ctxt->apf_resd_trns[4] = ps_func_selector->ihevc_resi_trans_32x32_fptr;
   5315 
   5316             ps_ctxt->apf_chrm_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_fptr;
   5317             ps_ctxt->apf_chrm_resd_trns[1] = ps_func_selector->ihevc_resi_trans_8x8_fptr;
   5318             ps_ctxt->apf_chrm_resd_trns[2] = ps_func_selector->ihevc_resi_trans_16x16_fptr;
   5319 
   5320             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_0] =
   5321                 ps_func_selector->ihevc_intra_pred_luma_planar_fptr;
   5322             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_1] = ps_func_selector->ihevc_intra_pred_luma_dc_fptr;
   5323             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_2] =
   5324                 ps_func_selector->ihevc_intra_pred_luma_mode2_fptr;
   5325             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_3TO9] =
   5326                 ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr;
   5327             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_10] =
   5328                 ps_func_selector->ihevc_intra_pred_luma_horz_fptr;
   5329             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_11TO17] =
   5330                 ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr;
   5331             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_18_34] =
   5332                 ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr;
   5333             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_19TO25] =
   5334                 ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr;
   5335             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_26] = ps_func_selector->ihevc_intra_pred_luma_ver_fptr;
   5336             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_27TO33] =
   5337                 ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr;
   5338 
   5339             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_0] =
   5340                 ps_func_selector->ihevc_intra_pred_chroma_planar_fptr;
   5341             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_1] =
   5342                 ps_func_selector->ihevc_intra_pred_chroma_dc_fptr;
   5343             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_2] =
   5344                 ps_func_selector->ihevc_intra_pred_chroma_mode2_fptr;
   5345             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_3TO9] =
   5346                 ps_func_selector->ihevc_intra_pred_chroma_mode_3_to_9_fptr;
   5347             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_10] =
   5348                 ps_func_selector->ihevc_intra_pred_chroma_horz_fptr;
   5349             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_11TO17] =
   5350                 ps_func_selector->ihevc_intra_pred_chroma_mode_11_to_17_fptr;
   5351             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_18_34] =
   5352                 ps_func_selector->ihevc_intra_pred_chroma_mode_18_34_fptr;
   5353             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_19TO25] =
   5354                 ps_func_selector->ihevc_intra_pred_chroma_mode_19_to_25_fptr;
   5355             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_26] =
   5356                 ps_func_selector->ihevc_intra_pred_chroma_ver_fptr;
   5357             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_27TO33] =
   5358                 ps_func_selector->ihevc_intra_pred_chroma_mode_27_to_33_fptr;
   5359 
   5360             ps_ctxt->apf_chrm_resd_trns_had[0] =
   5361                 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_4x4_8bit;
   5362             ps_ctxt->apf_chrm_resd_trns_had[1] =
   5363                 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_8x8_8bit;
   5364             ps_ctxt->apf_chrm_resd_trns_had[2] =
   5365                 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_16x16_8bit;
   5366         }
   5367 
   5368         if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
   5369         {
   5370             /* initialise the scale & rescale matricies */
   5371             ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
   5372             ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
   5373             ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
   5374             ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
   5375             ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
   5376             /*init for inter matrix*/
   5377             ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
   5378             ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
   5379             ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
   5380             ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
   5381             ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
   5382 
   5383             /*init for rescale matrix*/
   5384             ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
   5385             ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
   5386             ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
   5387             ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
   5388             ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
   5389             /*init for rescale inter matrix*/
   5390             ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
   5391             ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
   5392             ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
   5393             ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
   5394             ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
   5395         }
   5396         else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
   5397         {
   5398             /* initialise the scale & rescale matricies */
   5399             ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
   5400             ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
   5401             ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_intra_default_scale_mat_8x8[0];
   5402             ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_intra_default_scale_mat_16x16[0];
   5403             ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_intra_default_scale_mat_32x32[0];
   5404             /*init for inter matrix*/
   5405             ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
   5406             ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
   5407             ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_inter_default_scale_mat_8x8[0];
   5408             ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_inter_default_scale_mat_16x16[0];
   5409             ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_inter_default_scale_mat_32x32[0];
   5410 
   5411             /*init for rescale matrix*/
   5412             ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
   5413             ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
   5414             ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_intra_default_rescale_mat_8x8[0];
   5415             ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_intra_default_rescale_mat_16x16[0];
   5416             ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_intra_default_rescale_mat_32x32[0];
   5417             /*init for rescale inter matrix*/
   5418             ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
   5419             ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
   5420             ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_inter_default_rescale_mat_8x8[0];
   5421             ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_inter_default_rescale_mat_16x16[0];
   5422             ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_inter_default_rescale_mat_32x32[0];
   5423         }
   5424         else
   5425         {
   5426             ASSERT(0);
   5427         }
   5428 
   5429         /* Not recomputing Luma pred-data and header data for any preset now */
   5430         ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0;
   5431         ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0;
   5432         ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1;
   5433 
   5434         switch(ps_ctxt->i4_quality_preset)
   5435         {
   5436         case IHEVCE_QUALITY_P0:
   5437         {
   5438             ps_ctxt->i4_max_merge_candidates = 5;
   5439             ps_ctxt->i4_use_satd_for_merge_eval = 1;
   5440             ps_ctxt->u1_use_top_at_ctb_boundary = 1;
   5441             ps_ctxt->u1_use_early_cbf_data = 0;
   5442             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_PQ;
   5443             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
   5444                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ;
   5445 
   5446             break;
   5447         }
   5448         case IHEVCE_QUALITY_P2:
   5449         {
   5450             ps_ctxt->i4_max_merge_candidates = 5;
   5451             ps_ctxt->i4_use_satd_for_merge_eval = 1;
   5452             ps_ctxt->u1_use_top_at_ctb_boundary = 1;
   5453             ps_ctxt->u1_use_early_cbf_data = 0;
   5454 
   5455             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HQ;
   5456             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
   5457                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ;
   5458 
   5459             break;
   5460         }
   5461         case IHEVCE_QUALITY_P3:
   5462         {
   5463             ps_ctxt->i4_max_merge_candidates = 3;
   5464             ps_ctxt->i4_use_satd_for_merge_eval = 1;
   5465             ps_ctxt->u1_use_top_at_ctb_boundary = 0;
   5466 
   5467             ps_ctxt->u1_use_early_cbf_data = 0;
   5468             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_MS;
   5469             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
   5470                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS;
   5471 
   5472             break;
   5473         }
   5474         case IHEVCE_QUALITY_P4:
   5475         {
   5476             ps_ctxt->i4_max_merge_candidates = 2;
   5477             ps_ctxt->i4_use_satd_for_merge_eval = 1;
   5478             ps_ctxt->u1_use_top_at_ctb_boundary = 0;
   5479             ps_ctxt->u1_use_early_cbf_data = 0;
   5480             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HS;
   5481             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
   5482                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS;
   5483 
   5484             break;
   5485         }
   5486         case IHEVCE_QUALITY_P5:
   5487         {
   5488             ps_ctxt->i4_max_merge_candidates = 2;
   5489             ps_ctxt->i4_use_satd_for_merge_eval = 0;
   5490             ps_ctxt->u1_use_top_at_ctb_boundary = 0;
   5491             ps_ctxt->u1_use_early_cbf_data = 0;
   5492             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_XS;
   5493             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
   5494                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS;
   5495 
   5496             break;
   5497         }
   5498         case IHEVCE_QUALITY_P6:
   5499         {
   5500             ps_ctxt->i4_max_merge_candidates = 2;
   5501             ps_ctxt->i4_use_satd_for_merge_eval = 0;
   5502             ps_ctxt->u1_use_top_at_ctb_boundary = 0;
   5503             ps_ctxt->u1_use_early_cbf_data = EARLY_CBF_ON;
   5504             break;
   5505         }
   5506         default:
   5507         {
   5508             ASSERT(0);
   5509         }
   5510         }
   5511 
   5512 #if DISABLE_SKIP_AND_MERGE_EVAL
   5513         ps_ctxt->i4_max_merge_candidates = 0;
   5514 #endif
   5515 
   5516         ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data =
   5517             !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
   5518 
   5519         /*initialize memory for RC related parameters required/populated by enc_loop */
   5520         /* the allocated memory is distributed as follows assuming encoder is running for 3 bit-rate instnaces
   5521         |-------|-> Thread 0, instance 0
   5522         |       |
   5523         |       |
   5524         |       |
   5525         |-------|-> thread 0, instance 1
   5526         |       |
   5527         |       |
   5528         |       |
   5529         |-------|-> thread 0, intance 2
   5530         |       |
   5531         |       |
   5532         |       |
   5533         |-------|-> thread 1, instance 0
   5534         |       |
   5535         |       |
   5536         |       |
   5537         |-------|-> thread 1, instance 1
   5538         |       |
   5539         |       |
   5540         |       |
   5541         |-------|-> thread 1, instance 2
   5542         ...         ...
   5543 
   5544         Each theard will collate the data corresponding to the bit-rate instnace it's running at the appropriate place.
   5545         Finally, one thread will become master and collate the data from all the threads */
   5546         for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
   5547         {
   5548             for(i = 0; i < i4_num_bitrate_inst; i++)
   5549             {
   5550                 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i] = ps_enc_loop_rc_params;
   5551                 ps_enc_loop_rc_params++;
   5552             }
   5553         }
   5554         /* Non-Luma modes for Chroma are evaluated only in HIGH QUALITY preset */
   5555 
   5556 #if !ENABLE_SEPARATE_LUMA_CHROMA_INTRA_MODE
   5557         ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = 0;
   5558 #endif
   5559 
   5560         ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_iq_buff_stride =
   5561             MAX_TU_SIZE;
   5562         ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_iq_buff_stride =
   5563             MAX_TU_SIZE;
   5564         /*Multiplying by two to account for interleaving of cb and cr*/
   5565         ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_pred_stride = MAX_TU_SIZE
   5566                                                                                        << 1;
   5567         ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_pred_stride =
   5568             MAX_TU_SIZE << 1;
   5569 
   5570         /*     Memory for a frame level memory to store tile-id                  */
   5571         /*              corresponding to each CTB of frame                       */
   5572         ps_ctxt->pi4_offset_for_last_cu_qp = &ps_master_ctxt->ai4_offset_for_last_cu_qp[0];
   5573 
   5574         ps_ctxt->i4_qp_mod = ps_init_prms->s_config_prms.i4_cu_level_rc & 1;
   5575         /* psy rd strength is a run time parametr control by bit field 5-7 in the VQET field.*/
   5576         /* we disable psyrd if the the psy strength is zero or the BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER field is not set */
   5577         if(ps_init_prms->s_coding_tools_prms.i4_vqet &
   5578            (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER))
   5579         {
   5580             UWORD32 psy_strength;
   5581             UWORD32 psy_strength_mask =
   5582                 224;  // only bits 5,6,7 are ones. These three bits represent the psy strength
   5583             psy_strength = ps_init_prms->s_coding_tools_prms.i4_vqet & psy_strength_mask;
   5584             ps_ctxt->u1_enable_psyRDOPT = 1;
   5585             ps_ctxt->u4_psy_strength = psy_strength >> BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1;
   5586             if(psy_strength == 0)
   5587             {
   5588                 ps_ctxt->u1_enable_psyRDOPT = 0;
   5589                 ps_ctxt->u4_psy_strength = 0;
   5590             }
   5591         }
   5592 
   5593         ps_ctxt->u1_is_stasino_enabled =
   5594             ((ps_init_prms->s_coding_tools_prms.i4_vqet &
   5595               (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
   5596              (ps_init_prms->s_coding_tools_prms.i4_vqet &
   5597               (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)));
   5598 
   5599         ps_ctxt->u1_max_inter_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_nI;
   5600         ps_ctxt->u1_max_intra_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_I;
   5601         ps_ctxt++;
   5602     }
   5603     /* Store Tile params base into EncLoop Master context */
   5604     ps_master_ctxt->pv_tile_params_base = (void *)ps_tile_params_base;
   5605 
   5606     if(1 == ps_tile_params_base->i4_tiles_enabled_flag)
   5607     {
   5608         i4_num_tile_cols = ps_tile_params_base->i4_num_tile_cols;
   5609     }
   5610 
   5611     /* Updating  ai4_offset_for_last_cu_qp[] array for all tile-colums of frame */
   5612     /* Loop over all tile-cols in frame */
   5613     for(ctr = 0; ctr < i4_num_tile_cols; ctr++)
   5614     {
   5615         WORD32 i4_tile_col_wd_in_ctb_unit =
   5616             (ps_tile_params_base + ctr)->i4_curr_tile_wd_in_ctb_unit;
   5617         WORD32 offset_x;
   5618 
   5619         if(ctr == (i4_num_tile_cols - 1))
   5620         { /* Last tile-row of frame */
   5621             WORD32 min_cu_size = 1 << ps_init_prms->s_config_prms.i4_min_log2_cu_size;
   5622 
   5623             WORD32 cu_aligned_pic_wd =
   5624                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
   5625                 SET_CTB_ALIGN(
   5626                     ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width,
   5627                     min_cu_size);
   5628 
   5629             WORD32 last_hz_ctb_wd = MAX_CTB_SIZE - (u4_width - cu_aligned_pic_wd);
   5630 
   5631             offset_x = (i4_tile_col_wd_in_ctb_unit - 1) * MAX_CTB_SIZE;
   5632             offset_x += last_hz_ctb_wd;
   5633         }
   5634         else
   5635         { /* Not the last tile-row of frame */
   5636             offset_x = (i4_tile_col_wd_in_ctb_unit)*MAX_CTB_SIZE;
   5637         }
   5638 
   5639         offset_x /= 4;
   5640         offset_x -= 1;
   5641 
   5642         ps_master_ctxt->ai4_offset_for_last_cu_qp[ctr] = offset_x;
   5643     }
   5644 
   5645     n_tabs = NUM_ENC_LOOP_MEM_RECS;
   5646 
   5647     /*store num bit-rate instances in the master context */
   5648     ps_master_ctxt->i4_num_bitrates = i4_num_bitrate_inst;
   5649     ps_master_ctxt->i4_num_enc_loop_frm_pllel = i4_num_enc_loop_frm_pllel;
   5650     /*************************************************************************/
   5651     /* --- EncLoop Deblock sync Dep Mngr Mem init --                         */
   5652     /*************************************************************************/
   5653     {
   5654         WORD32 count;
   5655         WORD32 num_vert_units, num_blks_in_row;
   5656         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
   5657         WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
   5658 
   5659         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
   5660         ihevce_enc_loop_dblk_get_prms_dep_mngr(wd, &num_blks_in_row);
   5661         ASSERT(num_vert_units > 0);
   5662         ASSERT(num_blks_in_row > 0);
   5663 
   5664         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
   5665         {
   5666             for(i = 0; i < i4_num_bitrate_inst; i++)
   5667             {
   5668                 ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[count][i] = ihevce_dmgr_init(
   5669                     &ps_mem_tab[n_tabs],
   5670                     pv_osal_handle,
   5671                     DEP_MNGR_ROW_ROW_SYNC,
   5672                     num_vert_units,
   5673                     num_blks_in_row,
   5674                     i4_num_tile_cols, /* Number of Col Tiles */
   5675                     i4_num_proc_thrds,
   5676                     0 /*Sem Disabled*/
   5677                 );
   5678 
   5679                 n_tabs += ihevce_dmgr_get_num_mem_recs();
   5680             }
   5681         }
   5682     }
   5683     /*************************************************************************/
   5684     /* --- EncLoop Top-Right CU synnc Dep Mngr Mem init --                   */
   5685     /*************************************************************************/
   5686     {
   5687         WORD32 count;
   5688         WORD32 num_vert_units, num_blks_in_row;
   5689         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
   5690         WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
   5691 
   5692         WORD32 i4_sem = 0;
   5693 
   5694         if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset >=
   5695            IHEVCE_QUALITY_P4)
   5696             i4_sem = 0;
   5697         else
   5698             i4_sem = 1;
   5699         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
   5700         /* For Top-Right CU sync, adding one more CTB since value updation */
   5701         /* happens in that way for the last CTB in the row                 */
   5702         num_blks_in_row = wd + SET_CTB_ALIGN(wd, MAX_CU_SIZE);
   5703         num_blks_in_row += MAX_CTB_SIZE;
   5704 
   5705         ASSERT(num_vert_units > 0);
   5706         ASSERT(num_blks_in_row > 0);
   5707 
   5708         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
   5709         {
   5710             for(i = 0; i < i4_num_bitrate_inst; i++)
   5711             {
   5712                 /* For ES/HS, CU level updates uses spin-locks than semaphore */
   5713                 {
   5714                     ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[count][i] =
   5715                         ihevce_dmgr_init(
   5716                             &ps_mem_tab[n_tabs],
   5717                             pv_osal_handle,
   5718                             DEP_MNGR_ROW_ROW_SYNC,
   5719                             num_vert_units,
   5720                             num_blks_in_row,
   5721                             i4_num_tile_cols, /* Number of Col Tiles */
   5722                             i4_num_proc_thrds,
   5723                             i4_sem /*Sem Disabled*/
   5724                         );
   5725                 }
   5726                 n_tabs += ihevce_dmgr_get_num_mem_recs();
   5727             }
   5728         }
   5729     }
   5730 
   5731     for(i = 1; i < 5; i++)
   5732     {
   5733         WORD32 i4_log2_trans_size = i + 1;
   5734         WORD32 i4_bit_depth = ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
   5735 
   5736         ga_trans_shift[i] = (MAX_TR_DYNAMIC_RANGE - i4_bit_depth - i4_log2_trans_size) << 1;
   5737     }
   5738 
   5739     ga_trans_shift[0] = ga_trans_shift[1];
   5740 
   5741     /* return the handle to caller */
   5742     return ((void *)ps_master_ctxt);
   5743 }
   5744 
   5745 /*!
   5746 ******************************************************************************
   5747 * \if Function name : ihevce_enc_loop_reg_sem_hdls \endif
   5748 *
   5749 * \brief
   5750 *    Intialization for ENC_LOOP context state structure .
   5751 *
   5752 * \param[in] ps_mem_tab : pointer to memory descriptors table
   5753 * \param[in] ppv_sem_hdls : Array of semaphore handles
   5754 * \param[in] i4_num_proc_thrds : Number of processing threads
   5755 *
   5756 * \return
   5757 *    None
   5758 *
   5759 * \author
   5760 *  Ittiam
   5761 *
   5762 *****************************************************************************
   5763 */
   5764 void ihevce_enc_loop_reg_sem_hdls(
   5765     void *pv_enc_loop_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds)
   5766 {
   5767     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
   5768     WORD32 i, enc_frm_id;
   5769 
   5770     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
   5771 
   5772     /*************************************************************************/
   5773     /* --- EncLoop Deblock sync Dep Mngr reg Semaphores --                   */
   5774     /*************************************************************************/
   5775     for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
   5776     {
   5777         for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
   5778         {
   5779             ihevce_dmgr_reg_sem_hdls(
   5780                 ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][i],
   5781                 ppv_sem_hdls,
   5782                 i4_num_proc_thrds);
   5783         }
   5784     }
   5785 
   5786     /*************************************************************************/
   5787     /* --- EncLoop Top-Right CU synnc Dep Mngr reg Semaphores --             */
   5788     /*************************************************************************/
   5789     for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
   5790     {
   5791         for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
   5792         {
   5793             ihevce_dmgr_reg_sem_hdls(
   5794                 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][i],
   5795                 ppv_sem_hdls,
   5796                 i4_num_proc_thrds);
   5797         }
   5798     }
   5799 
   5800     return;
   5801 }
   5802 
   5803 /*!
   5804 ******************************************************************************
   5805 * \if Function name : ihevce_enc_loop_delete \endif
   5806 *
   5807 * \brief
   5808 *    Destroy EncLoop module
   5809 * Note : Only Destroys the resources allocated in the module like
   5810 *   semaphore,etc. Memory free is done Separately using memtabs
   5811 *
   5812 * \param[in] pv_me_ctxt : pointer to EncLoop ctxt
   5813 *
   5814 * \return
   5815 *    None
   5816 *
   5817 * \author
   5818 *  Ittiam
   5819 *
   5820 *****************************************************************************
   5821 */
   5822 void ihevce_enc_loop_delete(void *pv_enc_loop_ctxt)
   5823 {
   5824     ihevce_enc_loop_master_ctxt_t *ps_enc_loop_ctxt;
   5825     WORD32 ctr, enc_frm_id;
   5826 
   5827     ps_enc_loop_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
   5828 
   5829     for(enc_frm_id = 0; enc_frm_id < ps_enc_loop_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
   5830     {
   5831         for(ctr = 0; ctr < ps_enc_loop_ctxt->i4_num_bitrates; ctr++)
   5832         {
   5833             /* --- EncLoop Deblock sync Dep Mngr Delete --*/
   5834             ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][ctr]);
   5835             /* --- EncLoop Top-Right CU sync Dep Mngr Delete --*/
   5836             ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][ctr]);
   5837         }
   5838     }
   5839 }
   5840 
   5841 /*!
   5842 ******************************************************************************
   5843 * \if Function name : ihevce_enc_loop_dep_mngr_frame_reset \endif
   5844 *
   5845 * \brief
   5846 *    Frame level Reset for the Dependency Mngrs local to EncLoop.,
   5847 *    ie CU_TopRight and Dblk
   5848 *
   5849 * \param[in] pv_enc_loop_ctxt       : Enc_loop context pointer
   5850 *
   5851 * \return
   5852 *    None
   5853 *
   5854 * \author
   5855 *  Ittiam
   5856 *
   5857 *****************************************************************************
   5858 */
   5859 void ihevce_enc_loop_dep_mngr_frame_reset(void *pv_enc_loop_ctxt, WORD32 enc_frm_id)
   5860 {
   5861     WORD32 ctr, frame_id;
   5862     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
   5863 
   5864     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
   5865 
   5866     if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel)
   5867     {
   5868         frame_id = 0;
   5869     }
   5870     else
   5871     {
   5872         frame_id = enc_frm_id;
   5873     }
   5874 
   5875     for(ctr = 0; ctr < ps_master_ctxt->i4_num_bitrates; ctr++)
   5876     {
   5877         /* Dep. Mngr : Reset the num ctb Deblocked in every row  for ENC sync */
   5878         ihevce_dmgr_rst_row_row_sync(ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[frame_id][ctr]);
   5879 
   5880         /* Dep. Mngr : Reset the TopRight CU Processed in every row  for ENC sync */
   5881         ihevce_dmgr_rst_row_row_sync(
   5882             ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[frame_id][ctr]);
   5883     }
   5884 }
   5885 
   5886 /*!
   5887 ******************************************************************************
   5888 * \if Function name : ihevce_enc_loop_frame_init \endif
   5889 *
   5890 * \brief
   5891 *    Frame level init of enocde loop function .
   5892 *
   5893 * \param[in] pv_enc_loop_ctxt           : Enc_loop context pointer
   5894 * \param[in] pi4_cu_processed           : ptr to cur frame cu process in pix.
   5895 * \param[in] aps_ref_list               : ref pic list for the current frame
   5896 * \param[in] ps_slice_hdr               : ptr to current slice header params
   5897 * \param[in] ps_pps                     : ptr to active pps params
   5898 * \param[in] ps_sps                     : ptr to active sps params
   5899 * \param[in] ps_vps                     : ptr to active vps params
   5900 
   5901 
   5902 * \param[in] i1_weighted_pred_flag      : weighted pred enable flag (unidir)
   5903 * \param[in] i1_weighted_bipred_flag    : weighted pred enable flag (bidir)
   5904 * \param[in] log2_luma_wght_denom       : down shift factor for weighted pred of luma
   5905 * \param[in] log2_chroma_wght_denom       : down shift factor for weighted pred of chroma
   5906 * \param[in] cur_poc                    : currennt frame poc
   5907 * \param[in] i4_bitrate_instance_num    : number indicating the instance of bit-rate for multi-rate encoder
   5908 *
   5909 * \return
   5910 *    None
   5911 *
   5912 * \author
   5913 *  Ittiam
   5914 *
   5915 *****************************************************************************
   5916 */
   5917 void ihevce_enc_loop_frame_init(
   5918     void *pv_enc_loop_ctxt,
   5919     WORD32 i4_frm_qp,
   5920     recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2],
   5921     recon_pic_buf_t *ps_frm_recon,
   5922     slice_header_t *ps_slice_hdr,
   5923     pps_t *ps_pps,
   5924     sps_t *ps_sps,
   5925     vps_t *ps_vps,
   5926     WORD8 i1_weighted_pred_flag,
   5927     WORD8 i1_weighted_bipred_flag,
   5928     WORD32 log2_luma_wght_denom,
   5929     WORD32 log2_chroma_wght_denom,
   5930     WORD32 cur_poc,
   5931     WORD32 i4_display_num,
   5932     enc_ctxt_t *ps_enc_ctxt,
   5933     me_enc_rdopt_ctxt_t *ps_curr_inp_prms,
   5934     WORD32 i4_bitrate_instance_num,
   5935     WORD32 i4_thrd_id,
   5936     WORD32 i4_enc_frm_id,
   5937     WORD32 i4_num_bitrates,
   5938     WORD32 i4_quality_preset,
   5939     void *pv_dep_mngr_encloop_dep_me)
   5940 {
   5941     /* local variables */
   5942     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
   5943     ihevce_enc_loop_ctxt_t *ps_ctxt;
   5944     WORD32 chroma_qp_offset, i4_div_factor;
   5945     WORD8 i1_slice_type = ps_slice_hdr->i1_slice_type;
   5946     WORD8 i1_strong_intra_smoothing_enable_flag = ps_sps->i1_strong_intra_smoothing_enable_flag;
   5947 
   5948     /* ENC_LOOP master state structure */
   5949     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
   5950 
   5951     /* Nithya: Store the current POC in the slice header */
   5952     ps_slice_hdr->i4_abs_pic_order_cnt = cur_poc;
   5953 
   5954     /* Update the POC list of the current frame to the recon buffer */
   5955     if(ps_slice_hdr->i1_num_ref_idx_l0_active != 0)
   5956     {
   5957         int i4_i;
   5958         for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l0_active; i4_i++)
   5959         {
   5960             ps_frm_recon->ai4_col_l0_poc[i4_i] = aps_ref_list[0][i4_i]->i4_poc;
   5961         }
   5962     }
   5963     if(ps_slice_hdr->i1_num_ref_idx_l1_active != 0)
   5964     {
   5965         int i4_i;
   5966         for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l1_active; i4_i++)
   5967         {
   5968             ps_frm_recon->ai4_col_l1_poc[i4_i] = aps_ref_list[1][i4_i]->i4_poc;
   5969         }
   5970     }
   5971 
   5972     /* loop over all the threads */
   5973     // for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++)
   5974     {
   5975         /* ENC_LOOP state structure */
   5976         ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i4_thrd_id];
   5977 
   5978         /* SAO ctxt structure initialization*/
   5979         ps_ctxt->s_sao_ctxt_t.ps_pps = ps_pps;
   5980         ps_ctxt->s_sao_ctxt_t.ps_sps = ps_sps;
   5981         ps_ctxt->s_sao_ctxt_t.ps_slice_hdr = ps_slice_hdr;
   5982 
   5983         /*bit-rate instance number for Multi-bitrate (MBR) encode */
   5984         ps_ctxt->i4_bitrate_instance_num = i4_bitrate_instance_num;
   5985         ps_ctxt->i4_num_bitrates = i4_num_bitrates;
   5986         ps_ctxt->i4_chroma_format = ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format;
   5987         ps_ctxt->i4_is_first_query = 1;
   5988         ps_ctxt->i4_is_ctb_qp_modified = 0;
   5989 
   5990         /* enc_frm_id for multiframe encode */
   5991 
   5992         if(1 == ps_enc_ctxt->s_multi_thrd.i4_num_enc_loop_frm_pllel)
   5993         {
   5994             ps_ctxt->i4_enc_frm_id = 0;
   5995             i4_enc_frm_id = 0;
   5996         }
   5997         else
   5998         {
   5999             ps_ctxt->i4_enc_frm_id = i4_enc_frm_id;
   6000         }
   6001 
   6002         /*Initialize the sub pic rc buf appropriately */
   6003 
   6004         /*Set the thrd id flag */
   6005         ps_enc_ctxt->s_multi_thrd
   6006             .ai4_thrd_id_valid_flag[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 1;
   6007 
   6008         ps_enc_ctxt->s_multi_thrd
   6009             .ai8_nctb_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
   6010         ps_enc_ctxt->s_multi_thrd
   6011             .ai8_nctb_me_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
   6012 
   6013         ps_enc_ctxt->s_multi_thrd
   6014             .ai8_nctb_l0_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
   6015         ps_enc_ctxt->s_multi_thrd
   6016             .ai8_nctb_act_factor[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
   6017 
   6018         ps_enc_ctxt->s_multi_thrd
   6019             .ai8_nctb_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
   6020         ps_enc_ctxt->s_multi_thrd
   6021             .ai8_acc_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
   6022         ps_enc_ctxt->s_multi_thrd
   6023             .ai8_acc_bits_mul_qs_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
   6024         ps_enc_ctxt->s_multi_thrd
   6025             .ai8_nctb_hdr_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
   6026         ps_enc_ctxt->s_multi_thrd
   6027             .ai8_nctb_mpm_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
   6028         ps_enc_ctxt->s_multi_thrd.ai4_prev_chunk_qp[i4_enc_frm_id][i4_bitrate_instance_num] =
   6029             i4_frm_qp;
   6030 
   6031         /*Frame level data for Sub Pic rc is initalized here */
   6032         /*Can be sent once per frame*/
   6033         {
   6034             WORD32 i4_tot_frame_ctb = ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert *
   6035                                       ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz;
   6036 
   6037             /*Accumalated bits of all cu for required CTBS estimated during RDO evaluation*/
   6038             ps_ctxt->u4_total_cu_bits = 0;
   6039             ps_ctxt->u4_total_cu_hdr_bits = 0;
   6040 
   6041             ps_ctxt->u4_cu_tot_bits_into_qscale = 0;
   6042             ps_ctxt->u4_cu_tot_bits = 0;
   6043             ps_ctxt->u4_total_cu_bits_mul_qs = 0;
   6044             ps_ctxt->i4_display_num = i4_display_num;
   6045             ps_ctxt->i4_sub_pic_level_rc = ps_enc_ctxt->s_multi_thrd.i4_in_frame_rc_enabled;
   6046             /*The Qscale is to be generated every 10th of total frame ctb is completed */
   6047             //ps_ctxt->i4_num_ctb_for_out_scale = (10 * i4_tot_frame_ctb)/100 ;
   6048             ps_ctxt->i4_num_ctb_for_out_scale = (UPDATE_QP_AT_CTB * i4_tot_frame_ctb) / 100;
   6049 
   6050             ps_ctxt->i4_cu_qp_sub_pic_rc = (1 << QP_LEVEL_MOD_ACT_FACTOR);
   6051             /*Sub Pic RC frame level params */
   6052             ps_ctxt->i8_frame_l1_ipe_sad =
   6053                 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_pre_intra_sad;
   6054             ps_ctxt->i8_frame_l0_ipe_satd =
   6055                 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_l0_acc_satd;
   6056             ps_ctxt->i8_frame_l1_me_sad =
   6057                 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_l1_coarse_me_sad;
   6058             ps_ctxt->i8_frame_l1_activity_fact =
   6059                 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_level_activity_fact;
   6060             if(ps_ctxt->i4_sub_pic_level_rc)
   6061             {
   6062                 ASSERT(
   6063                     ps_curr_inp_prms->ps_curr_inp->s_lap_out
   6064                         .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num] != 0);
   6065 
   6066                 ps_ctxt->ai4_frame_bits_estimated[ps_ctxt->i4_enc_frm_id]
   6067                                                  [ps_ctxt->i4_bitrate_instance_num] =
   6068                     ps_curr_inp_prms->ps_curr_inp->s_lap_out
   6069                         .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num];
   6070             }
   6071             //ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type = 1;
   6072 
   6073             ps_ctxt->i4_is_I_scenecut =
   6074                 ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) &&
   6075                  (ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_IDR_FRAME ||
   6076                   ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_I_FRAME));
   6077 
   6078             ps_ctxt->i4_is_non_I_scenecut =
   6079                 ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) &&
   6080                  (ps_ctxt->i4_is_I_scenecut == 0));
   6081 
   6082             /*ps_ctxt->i4_is_I_only_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_I_only_scd;
   6083             ps_ctxt->i4_is_non_I_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_non_I_scd;*/
   6084             ps_ctxt->i4_is_model_valid =
   6085                 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i4_is_model_valid;
   6086         }
   6087         /* cb and cr offsets are assumed to be same */
   6088         chroma_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset + ps_pps->i1_pic_cb_qp_offset;
   6089 
   6090         /* assumption of cb = cr qp */
   6091         ASSERT(ps_slice_hdr->i1_slice_cb_qp_offset == ps_slice_hdr->i1_slice_cr_qp_offset);
   6092         ASSERT(ps_pps->i1_pic_cb_qp_offset == ps_pps->i1_pic_cr_qp_offset);
   6093 
   6094         ps_ctxt->u1_is_input_data_hbd = (ps_sps->i1_bit_depth_luma_minus8 > 0);
   6095 
   6096         ps_ctxt->u1_bit_depth = ps_sps->i1_bit_depth_luma_minus8 + 8;
   6097 
   6098         ps_ctxt->s_mc_ctxt.i4_bit_depth = ps_ctxt->u1_bit_depth;
   6099         ps_ctxt->s_mc_ctxt.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
   6100 
   6101         /*remember chroma qp offset as qp related parameters are calculated at CU level*/
   6102         ps_ctxt->i4_chroma_qp_offset = chroma_qp_offset;
   6103         ps_ctxt->i1_cu_qp_delta_enable = ps_pps->i1_cu_qp_delta_enabled_flag;
   6104         ps_ctxt->i1_entropy_coding_sync_enabled_flag = ps_pps->i1_entropy_coding_sync_enabled_flag;
   6105 
   6106         ps_ctxt->i4_is_ref_pic = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_ref_pic;
   6107         ps_ctxt->i4_temporal_layer = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_temporal_lyr_id;
   6108         ps_ctxt->i4_use_const_lamda_modifier = USE_CONSTANT_LAMBDA_MODIFIER;
   6109         ps_ctxt->i4_use_const_lamda_modifier =
   6110             ps_ctxt->i4_use_const_lamda_modifier ||
   6111             ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
   6112               (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
   6113              ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
   6114                (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) ||
   6115               (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
   6116                (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1)) ||
   6117               (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
   6118                (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2)) ||
   6119               (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
   6120                (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3))));
   6121 
   6122         {
   6123             ps_ctxt->f_i_pic_lamda_modifier =
   6124                 ps_curr_inp_prms->ps_curr_inp->s_lap_out.f_i_pic_lamda_modifier;
   6125         }
   6126 
   6127         ps_ctxt->i4_frame_qp = i4_frm_qp;
   6128         ps_ctxt->i4_frame_mod_qp = i4_frm_qp;
   6129         ps_ctxt->i4_cu_qp = i4_frm_qp;
   6130         ps_ctxt->i4_prev_cu_qp = i4_frm_qp;
   6131         ps_ctxt->i4_chrm_cu_qp =
   6132             (ps_ctxt->u1_chroma_array_type == 2)
   6133                 ? MIN(i4_frm_qp + chroma_qp_offset, 51)
   6134                 : gai1_ihevc_chroma_qp_scale[i4_frm_qp + chroma_qp_offset + MAX_QP_BD_OFFSET];
   6135 
   6136         ps_ctxt->i4_cu_qp_div6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
   6137         i4_div_factor = (i4_frm_qp + 3) / 6;
   6138         i4_div_factor = CLIP3(i4_div_factor, 3, 6);
   6139         ps_ctxt->i4_cu_qp_mod6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
   6140 
   6141         ps_ctxt->i4_chrm_cu_qp_div6 =
   6142             (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
   6143         ps_ctxt->i4_chrm_cu_qp_mod6 =
   6144             (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
   6145 
   6146 #define INTER_RND_QP_BY_6
   6147 #ifdef INTER_RND_QP_BY_6
   6148 
   6149         { /*1/6 rounding for 8 bit b frames*/
   6150             ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = 85
   6151                 /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/;
   6152         }
   6153 #else
   6154         /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
   6155         ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3;
   6156 #endif
   6157 
   6158         if(ISLICE == i1_slice_type)
   6159         {
   6160             /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
   6161             ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = 171
   6162                 /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/;
   6163         }
   6164         else
   6165         {
   6166             /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */
   6167             ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
   6168                 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER];
   6169             /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */
   6170         }
   6171 
   6172         ps_ctxt->i1_strong_intra_smoothing_enable_flag = i1_strong_intra_smoothing_enable_flag;
   6173 
   6174         ps_ctxt->i1_slice_type = i1_slice_type;
   6175 
   6176         /* intialize the inter pred (MC) context at frame level */
   6177         ps_ctxt->s_mc_ctxt.ps_ref_list = aps_ref_list;
   6178         ps_ctxt->s_mc_ctxt.i1_weighted_pred_flag = i1_weighted_pred_flag;
   6179         ps_ctxt->s_mc_ctxt.i1_weighted_bipred_flag = i1_weighted_bipred_flag;
   6180         ps_ctxt->s_mc_ctxt.i4_log2_luma_wght_denom = log2_luma_wght_denom;
   6181         ps_ctxt->s_mc_ctxt.i4_log2_chroma_wght_denom = log2_chroma_wght_denom;
   6182 
   6183         /* intialize the MV pred context at frame level */
   6184         ps_ctxt->s_mv_pred_ctxt.ps_ref_list = aps_ref_list;
   6185         ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr = ps_slice_hdr;
   6186         ps_ctxt->s_mv_pred_ctxt.ps_sps = ps_sps;
   6187         ps_ctxt->s_mv_pred_ctxt.i4_log2_parallel_merge_level_minus2 =
   6188             ps_pps->i1_log2_parallel_merge_level - 2;
   6189 
   6190 #if ADAPT_COLOCATED_FROM_L0_FLAG
   6191         if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_temporal_mvp_enable_flag)
   6192         {
   6193             if((ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_num_ref_idx_l1_active > 0) &&
   6194                (ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][0]->i4_frame_qp <
   6195                 ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][0]->i4_frame_qp))
   6196             {
   6197                 ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_collocated_from_l0_flag = 1;
   6198             }
   6199         }
   6200 #endif
   6201         /* Initialization of deblocking params */
   6202         ps_ctxt->s_deblk_prms.i4_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
   6203         ps_ctxt->s_deblk_prms.i4_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
   6204 
   6205         ps_ctxt->s_deblk_prms.i4_cb_qp_indx_offset = ps_pps->i1_pic_cb_qp_offset;
   6206 
   6207         ps_ctxt->s_deblk_prms.i4_cr_qp_indx_offset = ps_pps->i1_pic_cr_qp_offset;
   6208         /*init frame level stat accumualtion parameters */
   6209         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
   6210             ->u4_frame_sad_acc = 0;
   6211         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
   6212             ->u4_frame_intra_sad_acc = 0;
   6213         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
   6214             ->u4_frame_open_loop_intra_sad = 0;
   6215         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
   6216             ->i8_frame_open_loop_ssd = 0;
   6217         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
   6218             ->u4_frame_inter_sad_acc = 0;
   6219 
   6220         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
   6221             ->i8_frame_cost_acc = 0;
   6222         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
   6223             ->i8_frame_intra_cost_acc = 0;
   6224         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
   6225             ->i8_frame_inter_cost_acc = 0;
   6226 
   6227         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
   6228             ->u4_frame_intra_sad = 0;
   6229         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
   6230             ->u4_frame_rdopt_bits = 0;
   6231         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
   6232             ->u4_frame_rdopt_header_bits = 0;
   6233         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
   6234             ->i4_qp_normalized_8x8_cu_sum[0] = 0;
   6235         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
   6236             ->i4_qp_normalized_8x8_cu_sum[1] = 0;
   6237         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
   6238             ->i4_8x8_cu_sum[0] = 0;
   6239         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
   6240             ->i4_8x8_cu_sum[1] = 0;
   6241         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
   6242             ->i8_sad_by_qscale[0] = 0;
   6243         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
   6244             ->i8_sad_by_qscale[1] = 0;
   6245         /* Compute the frame_qstep */
   6246         GET_FRAME_QSTEP_FROM_QP(ps_ctxt->i4_frame_qp, ps_ctxt->i4_frame_qstep);
   6247 
   6248         ps_ctxt->u1_max_tr_depth = ps_sps->i1_max_transform_hierarchy_depth_inter;
   6249 
   6250         ps_ctxt->ps_rc_quant_ctxt = &ps_enc_ctxt->s_rc_quant;
   6251         /* intialize the cabac rdopt context at frame level */
   6252         ihevce_entropy_rdo_frame_init(
   6253             &ps_ctxt->s_rdopt_entropy_ctxt,
   6254             ps_slice_hdr,
   6255             ps_pps,
   6256             ps_sps,
   6257             ps_vps,
   6258             ps_master_ctxt->au1_cu_skip_top_row,
   6259             &ps_enc_ctxt->s_rc_quant);
   6260 
   6261         /* register the dep mngr instance for forward ME sync */
   6262         ps_ctxt->pv_dep_mngr_encloop_dep_me = pv_dep_mngr_encloop_dep_me;
   6263     }
   6264 }
   6265 /*
   6266 ******************************************************************************
   6267 * \if Function name : ihevce_enc_loop_get_frame_rc_prms \endif
   6268 *
   6269 * \brief
   6270 *    returns Nil
   6271 *
   6272 * \param[in] pv_enc_loop_ctxt : pointer to encode loop context
   6273 * \param[out]ps_rc_prms       : ptr to frame level info structure
   6274 *
   6275 * \return
   6276 *    None
   6277 *
   6278 * \author
   6279 *  Ittiam
   6280 *
   6281 *****************************************************************************
   6282 */
   6283 void ihevce_enc_loop_get_frame_rc_prms(
   6284     void *pv_enc_loop_ctxt,
   6285     rc_bits_sad_t *ps_rc_prms,
   6286     WORD32 i4_br_id,  //bitrate instance id
   6287     WORD32 i4_enc_frm_id)  // frame id
   6288 {
   6289     /*Get the master thread pointer*/
   6290     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
   6291     ihevce_enc_loop_ctxt_t *ps_ctxt;
   6292     UWORD32 total_frame_intra_sad = 0, total_frame_open_loop_intra_sad = 0;
   6293     LWORD64 i8_total_ssd_frame = 0;
   6294     UWORD32 total_frame_sad = 0;
   6295     UWORD32 total_frame_rdopt_bits = 0;
   6296     UWORD32 total_frame_rdopt_header_bits = 0;
   6297     WORD32 i4_qp_normalized_8x8_cu_sum[2] = { 0, 0 };
   6298     WORD32 i4_8x8_cu_sum[2] = { 0, 0 };
   6299     LWORD64 i8_sad_by_qscale[2] = { 0, 0 };
   6300     WORD32 i4_curr_qp_acc = 0;
   6301     WORD32 i;
   6302 
   6303     /* ENC_LOOP master state structure */
   6304     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
   6305 
   6306     if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel)
   6307     {
   6308         i4_enc_frm_id = 0;
   6309     }
   6310     /*loop through all threads and accumulate intra sad across all threads*/
   6311     for(i = 0; i < ps_master_ctxt->i4_num_proc_thrds; i++)
   6312     {
   6313         /* ENC_LOOP state structure */
   6314         ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i];
   6315         total_frame_open_loop_intra_sad +=
   6316             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_open_loop_intra_sad;
   6317         i8_total_ssd_frame +=
   6318             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_open_loop_ssd;
   6319         total_frame_intra_sad +=
   6320             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_intra_sad;
   6321         total_frame_sad +=
   6322             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_sad_acc;
   6323         total_frame_rdopt_bits +=
   6324             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_bits;
   6325         total_frame_rdopt_header_bits +=
   6326             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_header_bits;
   6327         i4_qp_normalized_8x8_cu_sum[0] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
   6328                                               ->i4_qp_normalized_8x8_cu_sum[0];
   6329         i4_qp_normalized_8x8_cu_sum[1] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
   6330                                               ->i4_qp_normalized_8x8_cu_sum[1];
   6331         i4_8x8_cu_sum[0] +=
   6332             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[0];
   6333         i4_8x8_cu_sum[1] +=
   6334             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[1];
   6335         i8_sad_by_qscale[0] +=
   6336             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[0];
   6337         i8_sad_by_qscale[1] +=
   6338             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[1];
   6339     }
   6340 
   6341     ps_rc_prms->u4_open_loop_intra_sad = total_frame_open_loop_intra_sad;
   6342     ps_rc_prms->i8_total_ssd_frame = i8_total_ssd_frame;
   6343     ps_rc_prms->u4_total_sad = total_frame_sad;
   6344     ps_rc_prms->u4_total_texture_bits = total_frame_rdopt_bits - total_frame_rdopt_header_bits;
   6345     ps_rc_prms->u4_total_header_bits = total_frame_rdopt_header_bits;
   6346     /*This accumulation of intra frame sad is not intact. This can only be a temp change*/
   6347     ps_rc_prms->u4_total_intra_sad = total_frame_intra_sad;
   6348     ps_rc_prms->i4_qp_normalized_8x8_cu_sum[0] = i4_qp_normalized_8x8_cu_sum[0];
   6349     ps_rc_prms->i4_qp_normalized_8x8_cu_sum[1] = i4_qp_normalized_8x8_cu_sum[1];
   6350     ps_rc_prms->i4_8x8_cu_sum[0] = i4_8x8_cu_sum[0];
   6351     ps_rc_prms->i4_8x8_cu_sum[1] = i4_8x8_cu_sum[1];
   6352     ps_rc_prms->i8_sad_by_qscale[0] = i8_sad_by_qscale[0];
   6353     ps_rc_prms->i8_sad_by_qscale[1] = i8_sad_by_qscale[1];
   6354 }
   6355