Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 
     21 /*!
     22 ******************************************************************************
     23 * \file ihevce_enc_loop_utils.c
     24 *
     25 * \brief
     26 *    This file contains utility functions of Encode loop
     27 *
     28 * \date
     29 *    18/09/2012
     30 *
     31 * \author
     32 *    Ittiam
     33 *
     34 *
     35 * List of Functions
     36 *
     37 *
     38 ******************************************************************************
     39 */
     40 
     41 /*****************************************************************************/
     42 /* File Includes                                                             */
     43 /*****************************************************************************/
     44 /* System include files */
     45 #include <stdio.h>
     46 #include <string.h>
     47 #include <stdlib.h>
     48 #include <assert.h>
     49 #include <stdarg.h>
     50 #include <math.h>
     51 #include <limits.h>
     52 
     53 /* User include files */
     54 #include "ihevc_typedefs.h"
     55 #include "itt_video_api.h"
     56 #include "ihevce_api.h"
     57 
     58 #include "rc_cntrl_param.h"
     59 #include "rc_frame_info_collector.h"
     60 #include "rc_look_ahead_params.h"
     61 
     62 #include "ihevc_defs.h"
     63 #include "ihevc_macros.h"
     64 #include "ihevc_debug.h"
     65 #include "ihevc_structs.h"
     66 #include "ihevc_platform_macros.h"
     67 #include "ihevc_deblk.h"
     68 #include "ihevc_itrans_recon.h"
     69 #include "ihevc_chroma_itrans_recon.h"
     70 #include "ihevc_chroma_intra_pred.h"
     71 #include "ihevc_intra_pred.h"
     72 #include "ihevc_inter_pred.h"
     73 #include "ihevc_mem_fns.h"
     74 #include "ihevc_padding.h"
     75 #include "ihevc_weighted_pred.h"
     76 #include "ihevc_sao.h"
     77 #include "ihevc_resi_trans.h"
     78 #include "ihevc_quant_iquant_ssd.h"
     79 #include "ihevc_cabac_tables.h"
     80 #include "ihevc_common_tables.h"
     81 
     82 #include "ihevce_defs.h"
     83 #include "ihevce_hle_interface.h"
     84 #include "ihevce_lap_enc_structs.h"
     85 #include "ihevce_multi_thrd_structs.h"
     86 #include "ihevce_multi_thrd_funcs.h"
     87 #include "ihevce_me_common_defs.h"
     88 #include "ihevce_had_satd.h"
     89 #include "ihevce_error_codes.h"
     90 #include "ihevce_bitstream.h"
     91 #include "ihevce_cabac.h"
     92 #include "ihevce_rdoq_macros.h"
     93 #include "ihevce_function_selector.h"
     94 #include "ihevce_enc_structs.h"
     95 #include "ihevce_entropy_structs.h"
     96 #include "ihevce_cmn_utils_instr_set_router.h"
     97 #include "ihevce_ipe_instr_set_router.h"
     98 #include "ihevce_decomp_pre_intra_structs.h"
     99 #include "ihevce_decomp_pre_intra_pass.h"
    100 #include "ihevce_enc_loop_structs.h"
    101 #include "ihevce_nbr_avail.h"
    102 #include "ihevce_enc_loop_utils.h"
    103 #include "ihevce_sub_pic_rc.h"
    104 #include "ihevce_global_tables.h"
    105 #include "ihevce_bs_compute_ctb.h"
    106 #include "ihevce_cabac_rdo.h"
    107 #include "ihevce_deblk.h"
    108 #include "ihevce_frame_process.h"
    109 #include "ihevce_rc_enc_structs.h"
    110 #include "hme_datatype.h"
    111 #include "hme_interface.h"
    112 #include "hme_common_defs.h"
    113 #include "hme_defs.h"
    114 #include "hme_common_utils.h"
    115 #include "ihevce_me_instr_set_router.h"
    116 #include "ihevce_enc_subpel_gen.h"
    117 #include "ihevce_inter_pred.h"
    118 #include "ihevce_mv_pred.h"
    119 #include "ihevce_mv_pred_merge.h"
    120 #include "ihevce_enc_loop_inter_mode_sifter.h"
    121 #include "ihevce_enc_cu_recursion.h"
    122 #include "ihevce_enc_loop_pass.h"
    123 #include "ihevce_common_utils.h"
    124 #include "ihevce_dep_mngr_interface.h"
    125 #include "ihevce_sao.h"
    126 #include "ihevce_tile_interface.h"
    127 #include "ihevce_profile.h"
    128 #include "ihevce_stasino_helpers.h"
    129 #include "ihevce_tu_tree_selector.h"
    130 
    131 /*****************************************************************************/
    132 /* Globals                                                                   */
    133 /*****************************************************************************/
    134 
    135 extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
    136 extern const UWORD8 gu1_hevce_scan4x4[3][16];
    137 extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc[4][16];
    138 extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_tr4[16];
    139 extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_00[16];
    140 
    141 /*****************************************************************************/
    142 /* Constant Macros                                                           */
    143 /*****************************************************************************/
    144 #define ENABLE_ZERO_CBF 1
    145 #define DISABLE_RDOQ_INTRA 0
    146 
    147 /*****************************************************************************/
    148 /* Function Definitions                                                      */
    149 /*****************************************************************************/
    150 void *ihevce_tu_tree_update(
    151     tu_prms_t *ps_tu_prms,
    152     WORD32 *pnum_tu_in_cu,
    153     WORD32 depth,
    154     WORD32 tu_split_flag,
    155     WORD32 tu_early_cbf,
    156     WORD32 i4_x_off,
    157     WORD32 i4_y_off)
    158 {
    159     //WORD32 tu_split_flag = p_tu_split_flag[0];
    160     WORD32 p_tu_split_flag[4];
    161     WORD32 p_tu_early_cbf[4];
    162 
    163     WORD32 tu_size = ps_tu_prms->u1_tu_size;
    164 
    165     if(((tu_size >> depth) >= 16) && (tu_split_flag & 0x1))
    166     {
    167         if((tu_size >> depth) == 32)
    168         {
    169             /* Get the individual TU split flags */
    170             p_tu_split_flag[0] = (tu_split_flag >> 16) & 0x1F;
    171             p_tu_split_flag[1] = (tu_split_flag >> 11) & 0x1F;
    172             p_tu_split_flag[2] = (tu_split_flag >> 6) & 0x1F;
    173             p_tu_split_flag[3] = (tu_split_flag >> 1) & 0x1F;
    174 
    175             /* Get the early CBF flags */
    176             p_tu_early_cbf[0] = (tu_early_cbf >> 16) & 0x1F;
    177             p_tu_early_cbf[1] = (tu_early_cbf >> 11) & 0x1F;
    178             p_tu_early_cbf[2] = (tu_early_cbf >> 6) & 0x1F;
    179             p_tu_early_cbf[3] = (tu_early_cbf >> 1) & 0x1F;
    180         }
    181         else
    182         {
    183             /* Get the individual TU split flags */
    184             p_tu_split_flag[0] = ((tu_split_flag >> 4) & 0x1);
    185             p_tu_split_flag[1] = ((tu_split_flag >> 3) & 0x1);
    186             p_tu_split_flag[2] = ((tu_split_flag >> 2) & 0x1);
    187             p_tu_split_flag[3] = ((tu_split_flag >> 1) & 0x1);
    188 
    189             /* Get the early CBF flags */
    190             p_tu_early_cbf[0] = ((tu_early_cbf >> 4) & 0x1);
    191             p_tu_early_cbf[1] = ((tu_early_cbf >> 3) & 0x1);
    192             p_tu_early_cbf[2] = ((tu_early_cbf >> 2) & 0x1);
    193             p_tu_early_cbf[3] = ((tu_early_cbf >> 1) & 0x1);
    194         }
    195 
    196         ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
    197             ps_tu_prms,
    198             pnum_tu_in_cu,
    199             depth + 1,
    200             p_tu_split_flag[0],
    201             p_tu_early_cbf[0],
    202             i4_x_off,
    203             i4_y_off);
    204 
    205         ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
    206             ps_tu_prms,
    207             pnum_tu_in_cu,
    208             depth + 1,
    209             p_tu_split_flag[1],
    210             p_tu_early_cbf[1],
    211             (i4_x_off + (tu_size >> (depth + 1))),
    212             i4_y_off);
    213 
    214         ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
    215             ps_tu_prms,
    216             pnum_tu_in_cu,
    217             depth + 1,
    218             p_tu_split_flag[2],
    219             p_tu_early_cbf[2],
    220             i4_x_off,
    221             (i4_y_off + (tu_size >> (depth + 1))));
    222 
    223         ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
    224             ps_tu_prms,
    225             pnum_tu_in_cu,
    226             depth + 1,
    227             p_tu_split_flag[3],
    228             p_tu_early_cbf[3],
    229             (i4_x_off + (tu_size >> (depth + 1))),
    230             (i4_y_off + (tu_size >> (depth + 1))));
    231     }
    232     else
    233     {
    234         if(tu_split_flag & 0x1)
    235         {
    236             /* This piece of code will be entered for the 8x8, if it is split
    237             Update the 4 child TU's accordingly. */
    238 
    239             (*pnum_tu_in_cu) += 4;
    240 
    241             /* TL TU update */
    242             ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
    243 
    244             ps_tu_prms->u1_x_off = i4_x_off;
    245 
    246             ps_tu_prms->u1_y_off = i4_y_off;
    247 
    248             /* Early CBF is not done for 4x4 transforms */
    249             ps_tu_prms->i4_early_cbf = 1;
    250 
    251             ps_tu_prms++;
    252 
    253             /* TR TU update */
    254             ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
    255 
    256             ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1));
    257 
    258             ps_tu_prms->u1_y_off = i4_y_off;
    259 
    260             /* Early CBF is not done for 4x4 transforms */
    261             ps_tu_prms->i4_early_cbf = 1;
    262 
    263             ps_tu_prms++;
    264 
    265             /* BL TU update */
    266             ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
    267 
    268             ps_tu_prms->u1_x_off = i4_x_off;
    269 
    270             ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1));
    271 
    272             /* Early CBF is not done for 4x4 transforms */
    273             ps_tu_prms->i4_early_cbf = 1;
    274 
    275             ps_tu_prms++;
    276 
    277             /* BR TU update */
    278             ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
    279 
    280             ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1));
    281 
    282             ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1));
    283 
    284             /* Early CBF is not done for 4x4 transforms */
    285             ps_tu_prms->i4_early_cbf = 1;
    286         }
    287         else
    288         {
    289             /* Update the TU params */
    290             ps_tu_prms->u1_tu_size = tu_size >> depth;
    291 
    292             ps_tu_prms->u1_x_off = i4_x_off;
    293 
    294             ps_tu_prms->u1_y_off = i4_y_off;
    295 
    296             (*pnum_tu_in_cu)++;
    297 
    298             /* Early CBF update for current TU */
    299             ps_tu_prms->i4_early_cbf = tu_early_cbf & 0x1;
    300         }
    301         if((*pnum_tu_in_cu) < MAX_TU_IN_CTB)
    302         {
    303             ps_tu_prms++;
    304 
    305             ps_tu_prms->u1_tu_size = tu_size;
    306         }
    307     }
    308 
    309     return ps_tu_prms;
    310 }
    311 
    312 /*!
    313 ******************************************************************************
    314 * \if Function name : ihevce_compute_quant_rel_param \endif
    315 *
    316 * \brief
    317 *    This function updates quantization related parameters like qp_mod_6 etc in
    318 *       context according to new qp
    319 *
    320 * \date
    321 *    08/01/2013
    322 *
    323 * \author
    324 *    Ittiam
    325 *
    326 * \return
    327 *
    328 * List of Functions
    329 *
    330 *
    331 ******************************************************************************
    332 */
    333 void ihevce_compute_quant_rel_param(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD8 i1_cu_qp)
    334 {
    335     WORD32 i4_div_factor;
    336 
    337     ps_ctxt->i4_chrm_cu_qp =
    338         (ps_ctxt->u1_chroma_array_type == 2)
    339             ? MIN(i1_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51)
    340             : gai1_ihevc_chroma_qp_scale[i1_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET];
    341     ps_ctxt->i4_cu_qp_div6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
    342     i4_div_factor = (i1_cu_qp + 3) / 6;
    343     i4_div_factor = CLIP3(i4_div_factor, 3, 6);
    344     ps_ctxt->i4_cu_qp_mod6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
    345     ps_ctxt->i4_chrm_cu_qp_div6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
    346     ps_ctxt->i4_chrm_cu_qp_mod6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
    347 
    348 #define INTER_RND_QP_BY_6
    349 #ifdef INTER_RND_QP_BY_6
    350     /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
    351     {
    352         ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] =
    353             (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)6) + 0.5f);
    354     }
    355 #else
    356     /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
    357     ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3;
    358 #endif
    359 
    360     if(ISLICE == ps_ctxt->i1_slice_type)
    361     {
    362         /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
    363         ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
    364             (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f);
    365     }
    366     else
    367     {
    368         if(0) /*TRAQO_EXT_ENABLE_ONE_THIRD_RND*/
    369         {
    370             /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
    371             ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
    372                 (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f);
    373         }
    374         else
    375         {
    376             /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */
    377             ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
    378                 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER];
    379             /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */
    380         }
    381     }
    382 }
    383 
    384 /*!
    385 ******************************************************************************
    386 * \if Function name : ihevce_populate_cl_cu_lambda_prms \endif
    387 *
    388 * \brief
    389 *    Function whihc calculates the Lambda params for current picture
    390 *
    391 * \param[in] ps_enc_ctxt : encoder ctxt pointer
    392 * \param[in] ps_cur_pic_ctxt : current pic ctxt
    393 * \param[in] i4_cur_frame_qp : current pic QP
    394 * \param[in] first_field : is first field flag
    395 * \param[in] i4_temporal_lyr_id : Current picture layer id
    396 *
    397 * \return
    398 *    None
    399 *
    400 * \author
    401 *  Ittiam
    402 *
    403 *****************************************************************************
    404 */
    405 void ihevce_populate_cl_cu_lambda_prms(
    406     ihevce_enc_loop_ctxt_t *ps_ctxt,
    407     frm_lambda_ctxt_t *ps_frm_lamda,
    408     WORD32 i4_slice_type,
    409     WORD32 i4_temporal_lyr_id,
    410     WORD32 i4_lambda_type)
    411 {
    412     WORD32 i4_curr_cu_qp, i4_curr_cu_qp_offset;
    413     double lambda_modifier;
    414     double lambda_uv_modifier;
    415     double lambda;
    416     double lambda_uv;
    417 
    418     WORD32 i4_qp_bdoffset = 6 * (ps_ctxt->u1_bit_depth - 8);
    419 
    420     /*Populate lamda modifier */
    421     ps_ctxt->i4_lamda_modifier = ps_frm_lamda->lambda_modifier;
    422     ps_ctxt->i4_uv_lamda_modifier = ps_frm_lamda->lambda_uv_modifier;
    423     ps_ctxt->i4_temporal_layer_id = i4_temporal_lyr_id;
    424 
    425     for(i4_curr_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
    426         i4_curr_cu_qp <= ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
    427         i4_curr_cu_qp++)
    428     {
    429         WORD32 chroma_qp = (ps_ctxt->i4_chroma_format == IV_YUV_422SP_UV)
    430                                ? MIN(i4_curr_cu_qp, 51)
    431                                : gai1_ihevc_chroma_qp_scale[i4_curr_cu_qp + MAX_QP_BD_OFFSET];
    432 
    433         i4_curr_cu_qp_offset = i4_curr_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
    434 
    435         lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
    436         lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
    437 
    438         if((BSLICE == i4_slice_type) && (i4_temporal_lyr_id))
    439         {
    440             lambda_modifier = ps_frm_lamda->lambda_modifier *
    441                               CLIP3((((double)(i4_curr_cu_qp - 12)) / 6.0), 2.00, 4.00);
    442             lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier *
    443                                  CLIP3((((double)(chroma_qp - 12)) / 6.0), 2.00, 4.00);
    444         }
    445         else
    446         {
    447             lambda_modifier = ps_frm_lamda->lambda_modifier;
    448             lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier;
    449         }
    450         if(ps_ctxt->i4_use_const_lamda_modifier)
    451         {
    452             if(ISLICE == ps_ctxt->i1_slice_type)
    453             {
    454                 lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
    455                 lambda_uv_modifier = ps_ctxt->f_i_pic_lamda_modifier;
    456             }
    457             else
    458             {
    459                 lambda_modifier = CONST_LAMDA_MOD_VAL;
    460                 lambda_uv_modifier = CONST_LAMDA_MOD_VAL;
    461             }
    462         }
    463         switch(i4_lambda_type)
    464         {
    465         case 0:
    466         {
    467             i4_qp_bdoffset = 0;
    468 
    469             lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
    470             lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
    471 
    472             lambda *= lambda_modifier;
    473             lambda_uv *= lambda_uv_modifier;
    474 
    475             ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
    476                 (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
    477 
    478             ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
    479                 (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
    480 
    481             ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
    482                 (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
    483             if(ps_ctxt->i4_use_const_lamda_modifier)
    484             {
    485                 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
    486                     (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
    487             }
    488             else
    489             {
    490                 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
    491                     (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
    492             }
    493 
    494             ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
    495                 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
    496 
    497             ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
    498                 ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset];
    499 
    500             ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
    501                 ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset];
    502 
    503             ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
    504                 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset];
    505 
    506             ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
    507                 ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset];
    508 
    509             break;
    510         }
    511         case 1:
    512         {
    513             lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
    514             lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
    515 
    516             lambda *= lambda_modifier;
    517             lambda_uv *= lambda_uv_modifier;
    518 
    519             ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
    520                 (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
    521 
    522             ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
    523                 (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
    524 
    525             ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
    526                 (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
    527             if(ps_ctxt->i4_use_const_lamda_modifier)
    528             {
    529                 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
    530                     (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
    531             }
    532             else
    533             {
    534                 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
    535                     (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
    536             }
    537             ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
    538                 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
    539 
    540             ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
    541                 ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset];
    542 
    543             ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
    544                 ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset];
    545 
    546             ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
    547                 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset];
    548 
    549             ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
    550                 ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset];
    551 
    552             break;
    553         }
    554         case 2:
    555         {
    556             lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
    557             lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
    558 
    559             lambda *= lambda_modifier;
    560             lambda_uv *= lambda_uv_modifier;
    561 
    562             ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
    563                 (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
    564 
    565             ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
    566                 (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
    567 
    568             ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
    569                 (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
    570 
    571             if(ps_ctxt->i4_use_const_lamda_modifier)
    572             {
    573                 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
    574                     (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
    575             }
    576             else
    577             {
    578                 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
    579                     (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
    580             }
    581             ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
    582                 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
    583 
    584             /* lambda corresponding to 8- bit, for metrics based on 8- bit ( Example 8bit SAD in encloop)*/
    585             lambda = pow(2.0, (((double)(i4_curr_cu_qp - 12)) / 3.0));
    586             lambda_uv = pow(2.0, (((double)(chroma_qp - 12)) / 3.0));
    587 
    588             lambda *= lambda_modifier;
    589             lambda_uv *= lambda_uv_modifier;
    590 
    591             ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
    592                 (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
    593 
    594             ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
    595                 (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
    596 
    597             ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
    598                 (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
    599             if(ps_ctxt->i4_use_const_lamda_modifier)
    600             {
    601                 ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
    602                     (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
    603             }
    604             else
    605             {
    606                 ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
    607                     (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
    608             }
    609 
    610             ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
    611                 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
    612 
    613             break;
    614         }
    615         default:
    616         {
    617             /* Intended to be a barren wasteland! */
    618             ASSERT(0);
    619         }
    620         }
    621     }
    622 }
    623 
    624 /*!
    625 ******************************************************************************
    626 * \if Function name : ihevce_get_cl_cu_lambda_prms \endif
    627 *
    628 * \brief
    629 *    Function whihc calculates the Lambda params for current picture
    630 *
    631 * \param[in] ps_enc_ctxt : encoder ctxt pointer
    632 * \param[in] ps_cur_pic_ctxt : current pic ctxt
    633 * \param[in] i4_cur_frame_qp : current pic QP
    634 * \param[in] first_field : is first field flag
    635 * \param[in] i4_temporal_lyr_id : Current picture layer id
    636 *
    637 * \return
    638 *    None
    639 *
    640 * \author
    641 *  Ittiam
    642 *
    643 *****************************************************************************
    644 */
    645 void ihevce_get_cl_cu_lambda_prms(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 i4_cur_cu_qp)
    646 {
    647     WORD32 chroma_qp = (ps_ctxt->u1_chroma_array_type == 2)
    648                            ? MIN(i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51)
    649                            : gai1_ihevc_chroma_qp_scale
    650                                  [i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET];
    651 
    652     /* closed loop ssd lambda is same as final lambda */
    653     ps_ctxt->i8_cl_ssd_lambda_qf =
    654         ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
    655     ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
    656         ps_ctxt
    657             ->i8_cl_ssd_lambda_chroma_qf_array[chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
    658     ps_ctxt->u4_chroma_cost_weighing_factor =
    659         ps_ctxt->au4_chroma_cost_weighing_factor_array
    660             [chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
    661     /* --- Initialized the lambda for SATD computations --- */
    662     /* --- 0.95 is the multiplication factor as per HM --- */
    663     /* --- 1.9 is the multiplication factor for Hadamard Transform --- */
    664     ps_ctxt->i4_satd_lamda =
    665         ps_ctxt->i4_satd_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
    666     ps_ctxt->i4_sad_lamda =
    667         ps_ctxt->i4_sad_type2_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
    668 }
    669 
    670 /*!
    671 ******************************************************************************
    672 * \if Function name : ihevce_update_pred_qp \endif
    673 *
    674 * \brief
    675 *    Computes pred qp for the given CU
    676 *
    677 * \param[in]
    678 *
    679 * \return
    680 *
    681 *
    682 * \author
    683 *  Ittiam
    684 *
    685 *****************************************************************************
    686 */
    687 void ihevce_update_pred_qp(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 cu_pos_x, WORD32 cu_pos_y)
    688 {
    689     WORD32 i4_pred_qp = 0x7FFFFFFF;
    690     WORD32 i4_top, i4_left;
    691     if(cu_pos_x == 0 && cu_pos_y == 0) /*CTB start*/
    692     {
    693         i4_pred_qp = ps_ctxt->i4_prev_QP;
    694     }
    695     else
    696     {
    697         if(cu_pos_y == 0) /*CTB boundary*/
    698         {
    699             i4_top = ps_ctxt->i4_prev_QP;
    700         }
    701         else /*within CTB*/
    702         {
    703             i4_top = ps_ctxt->ai4_qp_qg[(cu_pos_y - 1) * 8 + (cu_pos_x)];
    704         }
    705         if(cu_pos_x == 0) /*CTB boundary*/
    706         {
    707             i4_left = ps_ctxt->i4_prev_QP;
    708         }
    709         else /*within CTB*/
    710         {
    711             i4_left = ps_ctxt->ai4_qp_qg[(cu_pos_y)*8 + (cu_pos_x - 1)];
    712         }
    713         i4_pred_qp = (i4_left + i4_top + 1) >> 1;
    714     }
    715     ps_ctxt->i4_pred_qp = i4_pred_qp;
    716     return;
    717 }
    718 /*!
    719 ******************************************************************************
    720 * \if Function name : ihevce_compute_cu_level_QP \endif
    721 *
    722 * \brief
    723 *    Computes cu level QP with Traqo,Spatial Mod and In-frame RC
    724 *
    725 * \param[in]
    726 *
    727 * \return
    728 *
    729 *
    730 * \author
    731 *  Ittiam
    732 *
    733 *****************************************************************************
    734 */
    735 void ihevce_compute_cu_level_QP(
    736     ihevce_enc_loop_ctxt_t *ps_ctxt,
    737     WORD32 i4_activity_for_qp,
    738     WORD32 i4_activity_for_lamda,
    739     WORD32 i4_reduce_qp)
    740 {
    741     /*modify quant related param in ctxt based on current cu qp*/
    742     WORD32 i4_input_QP = ps_ctxt->i4_frame_mod_qp;
    743     WORD32 cu_qp = i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
    744 
    745     WORD32 i4_max_qp_allowed;
    746     WORD32 i4_min_qp_allowed;
    747     WORD32 i4_pred_qp;
    748 
    749     i4_pred_qp = ps_ctxt->i4_pred_qp;
    750 
    751     if(ps_ctxt->i4_sub_pic_level_rc)
    752     {
    753         i4_max_qp_allowed = (i4_pred_qp + (25 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2)));
    754         i4_min_qp_allowed = (i4_pred_qp - (26 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2)));
    755     }
    756     else
    757     {
    758         i4_max_qp_allowed = (i4_input_QP + (7 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4)));
    759         i4_min_qp_allowed = (i4_input_QP - (18 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4)));
    760     }
    761     if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6))
    762         return;
    763 
    764 #if LAMDA_BASED_ON_QUANT
    765     i4_activity_for_lamda = i4_activity_for_qp;
    766 #endif
    767 
    768     if(i4_activity_for_qp != -1)
    769     {
    770         cu_qp = (ps_ctxt->ps_rc_quant_ctxt
    771                      ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
    772         if(ps_ctxt->i4_qp_mod)
    773         {
    774             /*Recompute the Qp as per enc thread's frame level Qp*/
    775             ASSERT(i4_activity_for_qp > 0);
    776             cu_qp = ((cu_qp * i4_activity_for_qp) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
    777                     QP_LEVEL_MOD_ACT_FACTOR;
    778         }
    779 
    780         // To avoid access of uninitialised Qscale to qp conversion table
    781         if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
    782             cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
    783         else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
    784             cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
    785 
    786         cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp];
    787 
    788         if((1 == i4_reduce_qp) && (cu_qp > 1))
    789             cu_qp--;
    790 
    791         /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/
    792         if(cu_qp > i4_max_qp_allowed)
    793             cu_qp = i4_max_qp_allowed;
    794         else if(cu_qp < i4_min_qp_allowed)
    795             cu_qp = i4_min_qp_allowed;
    796 
    797         /* CLIP to maintain Qp between user configured and min and max Qp values*/
    798         if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
    799             cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
    800         else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
    801             cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
    802 
    803         /*cu qp must be populated in cu_analyse_t struct*/
    804         ps_ctxt->i4_cu_qp = cu_qp;
    805         /*recompute quant related param at every cu level*/
    806         ihevce_compute_quant_rel_param(ps_ctxt, cu_qp);
    807     }
    808 
    809     /*Decoupling qp and lamda calculation */
    810     if(i4_activity_for_lamda != -1)
    811     {
    812         cu_qp = (ps_ctxt->ps_rc_quant_ctxt
    813                      ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
    814 
    815         if(ps_ctxt->i4_qp_mod)
    816         {
    817 #if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON
    818             /*Recompute the Qp as per enc thread's frame level Qp*/
    819             ASSERT(i4_activity_for_lamda > 0);
    820             cu_qp = ((cu_qp * i4_activity_for_lamda) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
    821                     QP_LEVEL_MOD_ACT_FACTOR;
    822 #endif
    823         }
    824         if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
    825             cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
    826         else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
    827             cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
    828 
    829         cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp];
    830 
    831         /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/
    832         if(cu_qp > i4_max_qp_allowed)
    833             cu_qp = i4_max_qp_allowed;
    834         else if(cu_qp < i4_min_qp_allowed)
    835             cu_qp = i4_min_qp_allowed;
    836 
    837         /* CLIP to maintain Qp between user configured and min and max Qp values*/
    838         if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
    839             cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
    840         else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
    841             cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
    842         /* get frame level lambda params */
    843         ihevce_get_cl_cu_lambda_prms(
    844             ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? cu_qp : ps_ctxt->i4_frame_qp);
    845     }
    846 }
    847 
    848 /**
    849 *******************************************************************************
    850 * \if Function name : ihevce_scan_coeffs \endif
    851 *
    852 * @brief * Computes the coeff buffer for a coded TU for entropy coding
    853 *
    854 * @par   Description
    855 * Computes the coeff buffer for a coded TU for entropy coding
    856 *
    857 * \param[in] pi2_quan_coeffs Quantized coefficient context
    858 *
    859 * \param[in] scan_idx Scan index specifying the scan order
    860 *
    861 * \param[in] trans_size Transform unit size
    862 *
    863 * \param[inout] pu1_out_data output coeff buffer for a coded TU for entropy coding
    864 *
    865 * \param[in] pu1_csbf_buf csb flag buffer
    866 *
    867 * @returns num_bytes
    868 * Number of bytes written to pu1_out_data
    869 *
    870 * @remarks
    871 *
    872 * \author
    873 *  Ittiam
    874 *
    875 *******************************************************************************
    876 */
    877 
    878 WORD32 ihevce_scan_coeffs(
    879     WORD16 *pi2_quant_coeffs,
    880     WORD32 *pi4_subBlock2csbfId_map,
    881     WORD32 scan_idx,
    882     WORD32 trans_size,
    883     UWORD8 *pu1_out_data,
    884     UWORD8 *pu1_csbf_buf,
    885     WORD32 i4_csbf_stride)
    886 {
    887     WORD32 i, trans_unit_idx, num_gt1_flag;
    888     UWORD16 u2_csbf0flags;
    889     WORD32 num_bytes = 0;
    890     UWORD8 *pu1_trans_table;
    891     UWORD8 *pu1_csb_table;
    892     WORD32 shift_value, mask_value;
    893     UWORD16 u2_sig_coeff_abs_gt0_flags = 0, u2_sig_coeff_abs_gt1_flags = 0;
    894     UWORD16 u2_sign_flags;
    895     UWORD16 u2_abs_coeff_remaining[16];
    896     WORD32 blk_row, blk_col;
    897 
    898     UWORD8 *pu1_out_data_header;
    899     UWORD16 *pu2_out_data_coeff;
    900 
    901     WORD32 x_pos, y_pos;
    902     WORD32 quant_coeff;
    903 
    904     WORD32 num_gt0_flag;
    905     (void)i4_csbf_stride;
    906     pu1_out_data_header = pu1_out_data;
    907     /* Need only last 3 bits, rest are reserved for debugging and making */
    908     /* WORD alignment */
    909     u2_csbf0flags = 0xBAD0;
    910 
    911     /* Select proper order for your transform unit and csb based on scan_idx*/
    912     /* and the trans_size */
    913 
    914     /* scan order inside a csb */
    915     pu1_csb_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
    916     /* GETRANGE will give the log_2 of trans_size to shift_value */
    917     GETRANGE(shift_value, trans_size);
    918     shift_value = shift_value - 3; /* for finding. row no. from scan index */
    919     mask_value = (trans_size / 4) - 1; /*for finding the col. no. from scan index*/
    920     switch(trans_size)
    921     {
    922     case 32:
    923         pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_8x8[scan_idx][0]);
    924         break;
    925     case 16:
    926         pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
    927         break;
    928     case 8:
    929         pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_2x2[scan_idx][0]);
    930         break;
    931     case 4:
    932         pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_1x1[0]);
    933         break;
    934     default:
    935         DBG_PRINTF("Invalid Trans Size\n");
    936         return -1;
    937         break;
    938     }
    939 
    940     /*go through each csb in the scan order for first non-zero coded sub-block*/
    941     for(trans_unit_idx = (trans_size * trans_size / 16) - 1; trans_unit_idx >= 0; trans_unit_idx--)
    942     {
    943         /* check for the first csb flag in our scan order */
    944         if(pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]])
    945         {
    946             UWORD8 u1_last_x, u1_last_y;
    947             /* row of csb */
    948             blk_row = pu1_trans_table[trans_unit_idx] >> shift_value;
    949             /* col of csb */
    950             blk_col = pu1_trans_table[trans_unit_idx] & mask_value;
    951 
    952             /*check for the 1st non-0 values inside the csb in our scan order*/
    953             for(i = 15; i >= 0; i--)
    954             {
    955                 x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
    956                 y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
    957 
    958                 quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
    959 
    960                 if(quant_coeff != 0)
    961                     break;
    962             }
    963 
    964             ASSERT(i >= 0);
    965 
    966             u1_last_x = x_pos;
    967             u1_last_y = y_pos;
    968 
    969             /* storing last_x and last_y */
    970             *pu1_out_data_header = u1_last_x;
    971             pu1_out_data_header++;
    972             num_bytes++;
    973             *pu1_out_data_header = u1_last_y;
    974             pu1_out_data_header++;
    975             num_bytes++;
    976 
    977             /* storing the scan order */
    978             *pu1_out_data_header = scan_idx;
    979             pu1_out_data_header++;
    980             num_bytes++;
    981             /* storing last_sub_block pos. in scan order count */
    982             *pu1_out_data_header = trans_unit_idx;
    983             pu1_out_data_header++;
    984             num_bytes++;
    985 
    986             /*stored the first 4 bytes, now all are word16. So word16 pointer*/
    987             pu2_out_data_coeff = (UWORD16 *)pu1_out_data_header;
    988 
    989             /* u2_csbf0flags word */
    990             u2_csbf0flags = 0xBAD0 | 1; /*since right&bottom csbf is 0*/
    991             /* storing u2_csbf0flags word */
    992             *pu2_out_data_coeff = u2_csbf0flags;
    993             pu2_out_data_coeff++;
    994             num_bytes += 2;
    995 
    996             num_gt0_flag = 1;
    997             num_gt1_flag = 0;
    998             u2_sign_flags = 0;
    999 
   1000             /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
   1001             u2_sig_coeff_abs_gt0_flags = u2_sig_coeff_abs_gt0_flags | (1 << i);
   1002             if(abs(quant_coeff) > 1)
   1003             {
   1004                 /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
   1005                 u2_sig_coeff_abs_gt1_flags = u2_sig_coeff_abs_gt1_flags | (1 << i);
   1006                 /* update u2_abs_coeff_remaining */
   1007                 u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
   1008 
   1009                 num_gt1_flag++;
   1010             }
   1011 
   1012             if(quant_coeff < 0)
   1013             {
   1014                 /* set the i th bit of u2_sign_flags */
   1015                 u2_sign_flags = u2_sign_flags | (1 << i);
   1016             }
   1017 
   1018             /* Test remaining elements in our scan order */
   1019             /* Can optimize further by CLZ macro */
   1020             for(i = i - 1; i >= 0; i--)
   1021             {
   1022                 x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
   1023                 y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
   1024 
   1025                 quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
   1026 
   1027                 if(quant_coeff != 0)
   1028                 {
   1029                     /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
   1030                     u2_sig_coeff_abs_gt0_flags |= (1 << i);
   1031 
   1032                     if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE))
   1033                     {
   1034                         /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
   1035                         u2_sig_coeff_abs_gt1_flags |= (1 << i);
   1036 
   1037                         /* update u2_abs_coeff_remaining */
   1038                         u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
   1039 
   1040                         num_gt1_flag++; /*n0. of Ones in sig_coeff_abs_gt1_flag*/
   1041                     }
   1042 
   1043                     if(quant_coeff < 0)
   1044                     {
   1045                         /* set the i th bit of u2_sign_flags */
   1046                         u2_sign_flags |= (1 << i);
   1047                     }
   1048 
   1049                     num_gt0_flag++;
   1050                 }
   1051             }
   1052 
   1053             /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
   1054             *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags;
   1055             pu2_out_data_coeff++;
   1056             num_bytes += 2;
   1057             /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
   1058             *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags;
   1059             pu2_out_data_coeff++;
   1060             num_bytes += 2;
   1061             /* storing u2_sign_flags 2 bytes */
   1062             *pu2_out_data_coeff = u2_sign_flags;
   1063             pu2_out_data_coeff++;
   1064             num_bytes += 2;
   1065 
   1066             /* Store the u2_abs_coeff_remaining[] */
   1067             for(i = 0; i < num_gt1_flag; i++)
   1068             {
   1069                 /* storing u2_abs_coeff_remaining[i] 2 bytes */
   1070                 *pu2_out_data_coeff = u2_abs_coeff_remaining[i];
   1071                 pu2_out_data_coeff++;
   1072                 num_bytes += 2;
   1073             }
   1074 
   1075             break; /*We just need this loop for finding 1st non-zero csb only*/
   1076         }
   1077     }
   1078 
   1079     /* go through remaining csb in the scan order */
   1080     for(trans_unit_idx = trans_unit_idx - 1; trans_unit_idx >= 0; trans_unit_idx--)
   1081     {
   1082         blk_row = pu1_trans_table[trans_unit_idx] >> shift_value; /*row of csb*/
   1083         blk_col = pu1_trans_table[trans_unit_idx] & mask_value; /*col of csb*/
   1084 
   1085         /* u2_csbf0flags word */
   1086         u2_csbf0flags = 0xBAD0 | /* assuming csbf_buf has only 0 or 1 values */
   1087                         (pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]]);
   1088 
   1089         /********************************************************************/
   1090         /* Minor hack: As per HEVC spec csbf in not signalled in stream for */
   1091         /* block0, instead sig coeff map is directly signalled. This is     */
   1092         /* taken care by forcing csbf for block0 to be 1 even if it is 0    */
   1093         /********************************************************************/
   1094         if(0 == trans_unit_idx)
   1095         {
   1096             u2_csbf0flags |= 1;
   1097         }
   1098 
   1099         if((blk_col + 1 < trans_size / 4)) /* checking right boundary */
   1100         {
   1101             if(pu1_csbf_buf[pi4_subBlock2csbfId_map[blk_row * trans_size / 4 + blk_col + 1]])
   1102             {
   1103                 /* set the 2nd bit of u2_csbf0flags for right csbf */
   1104                 u2_csbf0flags = u2_csbf0flags | (1 << 1);
   1105             }
   1106         }
   1107         if((blk_row + 1 < trans_size / 4)) /* checking bottom oundary */
   1108         {
   1109             if(pu1_csbf_buf[pi4_subBlock2csbfId_map[(blk_row + 1) * trans_size / 4 + blk_col]])
   1110             {
   1111                 /* set the 3rd bit of u2_csbf0flags  for bottom csbf */
   1112                 u2_csbf0flags = u2_csbf0flags | (1 << 2);
   1113             }
   1114         }
   1115 
   1116         /* storing u2_csbf0flags word */
   1117         *pu2_out_data_coeff = u2_csbf0flags;
   1118         pu2_out_data_coeff++;
   1119         num_bytes += 2;
   1120 
   1121         /* check for the csb flag in our scan order */
   1122         if(u2_csbf0flags & 0x1)
   1123         {
   1124             u2_sig_coeff_abs_gt0_flags = 0;
   1125             u2_sig_coeff_abs_gt1_flags = 0;
   1126             u2_sign_flags = 0;
   1127 
   1128             num_gt0_flag = 0;
   1129             num_gt1_flag = 0;
   1130             /* check for the non-0 values inside the csb in our scan order */
   1131             /* Can optimize further by CLZ macro */
   1132             for(i = 15; i >= 0; i--)
   1133             {
   1134                 x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
   1135                 y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
   1136 
   1137                 quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
   1138 
   1139                 if(quant_coeff != 0)
   1140                 {
   1141                     /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
   1142                     u2_sig_coeff_abs_gt0_flags |= (1 << i);
   1143 
   1144                     if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE))
   1145                     {
   1146                         /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
   1147                         u2_sig_coeff_abs_gt1_flags |= (1 << i);
   1148 
   1149                         /* update u2_abs_coeff_remaining */
   1150                         u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
   1151 
   1152                         num_gt1_flag++;
   1153                     }
   1154 
   1155                     if(quant_coeff < 0)
   1156                     {
   1157                         /* set the i th bit of u2_sign_flags */
   1158                         u2_sign_flags = u2_sign_flags | (1 << i);
   1159                     }
   1160 
   1161                     num_gt0_flag++;
   1162                 }
   1163             }
   1164 
   1165             /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
   1166             *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags;
   1167             pu2_out_data_coeff++;
   1168             num_bytes += 2;
   1169 
   1170             /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
   1171             *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags;
   1172             pu2_out_data_coeff++;
   1173             num_bytes += 2;
   1174 
   1175             /* storing u2_sign_flags 2 bytes */
   1176             *pu2_out_data_coeff = u2_sign_flags;
   1177             pu2_out_data_coeff++;
   1178             num_bytes += 2;
   1179 
   1180             /* Store the u2_abs_coeff_remaining[] */
   1181             for(i = 0; i < num_gt1_flag; i++)
   1182             {
   1183                 /* storing u2_abs_coeff_remaining[i] 2 bytes */
   1184                 *pu2_out_data_coeff = u2_abs_coeff_remaining[i];
   1185                 pu2_out_data_coeff++;
   1186                 num_bytes += 2;
   1187             }
   1188         }
   1189     }
   1190 
   1191     return num_bytes; /* Return the number of bytes written to out_data */
   1192 }
   1193 
   1194 /**
   1195 *******************************************************************************
   1196 * \if Function name : ihevce_populate_intra_pred_mode \endif
   1197 *
   1198 * \brief * populates intra pred modes,b2_mpm_idx,b1_prev_intra_luma_pred_flag &
   1199 * b5_rem_intra_pred_mode for a CU based on nieghbouring CUs,
   1200 *
   1201 * \par   Description
   1202 * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
   1203 * for a CU
   1204 *
   1205 * \param[in] top_intra_mode Top intra mode
   1206 * \param[in] left_intra_mode Left intra mode
   1207 * \param[in] available_top Top availability flag
   1208 * \param[in] available_left Left availability flag
   1209 * \param[in] cu_pos_y CU 'y' position
   1210 * \param[in] ps_cand_mode_list pointer to populate candidate list
   1211 *
   1212 * \returns none
   1213 *
   1214 * \author
   1215 *  Ittiam
   1216 *
   1217 *******************************************************************************
   1218 */
   1219 
   1220 void ihevce_populate_intra_pred_mode(
   1221     WORD32 top_intra_mode,
   1222     WORD32 left_intra_mode,
   1223     WORD32 available_top,
   1224     WORD32 available_left,
   1225     WORD32 cu_pos_y,
   1226     WORD32 *ps_cand_mode_list)
   1227 {
   1228     /* local variables */
   1229     WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
   1230 
   1231     /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
   1232     /* N = top */
   1233     if(0 == available_top)
   1234     {
   1235         cand_intra_pred_mode_top = INTRA_DC;
   1236     }
   1237     /* for neighbour != INTRA, setting DC is done outside */
   1238     else if(0 == cu_pos_y) /* It's on the CTB boundary */
   1239     {
   1240         cand_intra_pred_mode_top = INTRA_DC;
   1241     }
   1242     else
   1243     {
   1244         cand_intra_pred_mode_top = top_intra_mode;
   1245     }
   1246 
   1247     /* N = left */
   1248     if(0 == available_left)
   1249     {
   1250         cand_intra_pred_mode_left = INTRA_DC;
   1251     }
   1252     /* for neighbour != INTRA, setting DC is done outside */
   1253     else
   1254     {
   1255         cand_intra_pred_mode_left = left_intra_mode;
   1256     }
   1257 
   1258     /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
   1259     if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
   1260     {
   1261         if(cand_intra_pred_mode_left < 2)
   1262         {
   1263             ps_cand_mode_list[0] = INTRA_PLANAR;
   1264             ps_cand_mode_list[1] = INTRA_DC;
   1265             ps_cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
   1266         }
   1267         else
   1268         {
   1269             ps_cand_mode_list[0] = cand_intra_pred_mode_left;
   1270             ps_cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
   1271             ps_cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
   1272         }
   1273     }
   1274     else
   1275     {
   1276         ps_cand_mode_list[0] = cand_intra_pred_mode_left;
   1277         ps_cand_mode_list[1] = cand_intra_pred_mode_top;
   1278 
   1279         if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
   1280            (cand_intra_pred_mode_top != INTRA_PLANAR))
   1281         {
   1282             ps_cand_mode_list[2] = INTRA_PLANAR;
   1283         }
   1284         else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
   1285         {
   1286             ps_cand_mode_list[2] = INTRA_DC;
   1287         }
   1288         else
   1289         {
   1290             ps_cand_mode_list[2] = INTRA_ANGULAR(26);
   1291         }
   1292     }
   1293 }
   1294 /**
   1295 *******************************************************************************
   1296 * \if Function name : ihevce_intra_pred_mode_signaling \endif
   1297 *
   1298 * \brief * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx &
   1299 * b5_rem_intra_pred_mode for a CU
   1300 *
   1301 * \par   Description
   1302 * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
   1303 * for a CU
   1304 *
   1305 * \param[in] ps_nbr_top Top neighbour context
   1306 * \param[in] ps_nbr_left Left neighbour context
   1307 * \param[in] available_top Top availability flag
   1308 * \param[in] available_left Left availability flag
   1309 * \param[in] cu_pos_y CU 'y' position
   1310 * \param[in] luma_intra_pred_mode_current the intra_pred_mode of current block
   1311 * \param[inout] ps_intra_pred_mode_current
   1312 * Pointer to structure having b1_prev_intra_luma_pred_flag, b2_mpm_idx and
   1313 * b5_rem_intra_pred_mode
   1314 *
   1315 * \returns none
   1316 *
   1317 * \author
   1318 *  Ittiam
   1319 *
   1320 *******************************************************************************
   1321 */
   1322 
   1323 void ihevce_intra_pred_mode_signaling(
   1324     WORD32 top_intra_mode,
   1325     WORD32 left_intra_mode,
   1326     WORD32 available_top,
   1327     WORD32 available_left,
   1328     WORD32 cu_pos_y,
   1329     WORD32 luma_intra_pred_mode_current,
   1330     intra_prev_rem_flags_t *ps_intra_pred_mode_current)
   1331 {
   1332     /* local variables */
   1333     WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
   1334     WORD32 cand_mode_list[3];
   1335 
   1336     ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0;
   1337     ps_intra_pred_mode_current->b2_mpm_idx = 0;  // for safety purpose
   1338     ps_intra_pred_mode_current->b5_rem_intra_pred_mode = 0;
   1339 
   1340     /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
   1341     /* N = top */
   1342     if(0 == available_top)
   1343     {
   1344         cand_intra_pred_mode_top = INTRA_DC;
   1345     }
   1346     /* for neighbour != INTRA, setting DC is done outside */
   1347     else if(0 == cu_pos_y) /* It's on the CTB boundary */
   1348     {
   1349         cand_intra_pred_mode_top = INTRA_DC;
   1350     }
   1351     else
   1352     {
   1353         cand_intra_pred_mode_top = top_intra_mode;
   1354     }
   1355 
   1356     /* N = left */
   1357     if(0 == available_left)
   1358     {
   1359         cand_intra_pred_mode_left = INTRA_DC;
   1360     }
   1361     /* for neighbour != INTRA, setting DC is done outside */
   1362     else
   1363     {
   1364         cand_intra_pred_mode_left = left_intra_mode;
   1365     }
   1366 
   1367     /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
   1368     if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
   1369     {
   1370         if(cand_intra_pred_mode_left < 2)
   1371         {
   1372             cand_mode_list[0] = INTRA_PLANAR;
   1373             cand_mode_list[1] = INTRA_DC;
   1374             cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
   1375         }
   1376         else
   1377         {
   1378             cand_mode_list[0] = cand_intra_pred_mode_left;
   1379             cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
   1380             cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
   1381         }
   1382     }
   1383     else
   1384     {
   1385         cand_mode_list[0] = cand_intra_pred_mode_left;
   1386         cand_mode_list[1] = cand_intra_pred_mode_top;
   1387 
   1388         if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
   1389            (cand_intra_pred_mode_top != INTRA_PLANAR))
   1390         {
   1391             cand_mode_list[2] = INTRA_PLANAR;
   1392         }
   1393         else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
   1394         {
   1395             cand_mode_list[2] = INTRA_DC;
   1396         }
   1397         else
   1398         {
   1399             cand_mode_list[2] = INTRA_ANGULAR(26);
   1400         }
   1401     }
   1402 
   1403     /* Signal Generation */
   1404 
   1405     /* Flag & mpm_index generation */
   1406     if(cand_mode_list[0] == luma_intra_pred_mode_current)
   1407     {
   1408         ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
   1409         ps_intra_pred_mode_current->b2_mpm_idx = 0;
   1410     }
   1411     else if(cand_mode_list[1] == luma_intra_pred_mode_current)
   1412     {
   1413         ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
   1414         ps_intra_pred_mode_current->b2_mpm_idx = 1;
   1415     }
   1416     else if(cand_mode_list[2] == luma_intra_pred_mode_current)
   1417     {
   1418         ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
   1419         ps_intra_pred_mode_current->b2_mpm_idx = 2;
   1420     }
   1421     /* Flag & b5_rem_intra_pred_mode generation */
   1422     else
   1423     {
   1424         WORD32 rem_mode;
   1425 
   1426         ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0;
   1427 
   1428         /* sorting cand_mode_list */
   1429         if(cand_mode_list[0] > cand_mode_list[1])
   1430         {
   1431             SWAP(cand_mode_list[0], cand_mode_list[1]);
   1432         }
   1433         if(cand_mode_list[0] > cand_mode_list[2])
   1434         {
   1435             SWAP(cand_mode_list[0], cand_mode_list[2]);
   1436         }
   1437         if(cand_mode_list[1] > cand_mode_list[2])
   1438         {
   1439             SWAP(cand_mode_list[1], cand_mode_list[2]);
   1440         }
   1441 
   1442         rem_mode = luma_intra_pred_mode_current;
   1443 
   1444         if((rem_mode) >= cand_mode_list[2])
   1445         {
   1446             (rem_mode)--;
   1447         }
   1448         if((rem_mode) >= cand_mode_list[1])
   1449         {
   1450             (rem_mode)--;
   1451         }
   1452         if((rem_mode) >= cand_mode_list[0])
   1453         {
   1454             (rem_mode)--;
   1455         }
   1456         ps_intra_pred_mode_current->b5_rem_intra_pred_mode = rem_mode;
   1457     }
   1458 }
   1459 
   1460 void ihevce_quant_rounding_factor_gen(
   1461     WORD32 i4_trans_size,
   1462     WORD32 is_luma,
   1463     rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
   1464     WORD32 *pi4_quant_round_0_1,
   1465     WORD32 *pi4_quant_round_1_2,
   1466     double i4_lamda_modifier,
   1467     UWORD8 i4_is_tu_level_quant_rounding)
   1468 {
   1469     //WORD32 i4_scan_idx = ps_ctxt->i4_scan_idx;
   1470     UWORD8 *pu1_ctxt_model;
   1471     WORD32 scan_pos;
   1472     WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag    */
   1473     WORD32 abs_gt1_base_ctxt;
   1474     WORD32 log2_tr_size, i;
   1475     UWORD16 u4_bits_estimated_r0, u4_bits_estimated_r1, u4_bits_estimated_r2;
   1476     UWORD16 u4_bits_estimated_r1_temp;
   1477     WORD32 j = 0;
   1478     WORD32 k = 0;
   1479     WORD32 temp2;
   1480 
   1481     double i4_lamda_mod = i4_lamda_modifier * pow(2.0, (-8.0 / 3.0));
   1482     LWORD64 lamda_mod = (LWORD64)(i4_lamda_mod * (1 << LAMDA_Q_SHIFT_FACT));
   1483     /* transform size to log2transform size */
   1484     GETRANGE(log2_tr_size, i4_trans_size);
   1485     log2_tr_size -= 1;
   1486 
   1487     if(1 == i4_is_tu_level_quant_rounding)
   1488     {
   1489         entropy_context_t *ps_cur_tu_entropy;
   1490         cab_ctxt_t *ps_cabac;
   1491         WORD32 curr_buf_idx = ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
   1492         ps_cur_tu_entropy = &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[curr_buf_idx];
   1493 
   1494         ps_cabac = &ps_cur_tu_entropy->s_cabac_ctxt;
   1495 
   1496         pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0];
   1497     }
   1498     else
   1499     {
   1500         pu1_ctxt_model = &ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states[0];
   1501     }
   1502     /*If transform size is 4x4, then only one sub-block*/
   1503     if(is_luma)
   1504     {
   1505         sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
   1506         abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
   1507 
   1508         if(3 == log2_tr_size)
   1509         {
   1510             /* 8x8 transform size */
   1511             /* Assuming diagnol scan idx for now */
   1512             sig_coeff_base_ctxt += 9;
   1513         }
   1514         else if(3 < log2_tr_size)
   1515         {
   1516             /* larger transform sizes */
   1517             sig_coeff_base_ctxt += 21;
   1518         }
   1519     }
   1520     else
   1521     {
   1522         /* chroma context initializations */
   1523         sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
   1524         abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
   1525 
   1526         if(3 == log2_tr_size)
   1527         {
   1528             /* 8x8 transform size */
   1529             sig_coeff_base_ctxt += 9;
   1530         }
   1531         else if(3 < log2_tr_size)
   1532         {
   1533             /* larger transform sizes */
   1534             sig_coeff_base_ctxt += 12;
   1535         }
   1536     }
   1537 
   1538     /*Transform size of 4x4 will have only a single CSB */
   1539     /* derive the context inc as per section 9.3.3.1.4 */
   1540 
   1541     if(2 == log2_tr_size)
   1542     {
   1543         UWORD8 sig_ctxinc;
   1544         WORD32 state_mps;
   1545         WORD32 gt1_ctxt = 0;
   1546         WORD32 ctxt_set = 0;
   1547         WORD32 ctxt_idx = 0;
   1548 
   1549         /* context set based on luma subblock pos */
   1550 
   1551         /* Encodet the abs level gt1 bins */
   1552         /* Currently calculating trade off between mps(2) and mps(1)*/
   1553         /* The estimation has to be further done for mps(11) and mps(111)*/
   1554         /*ctxt_set = 0 as transform 4x4 has only one csb with DC */
   1555         /* gt1_ctxt = 0 for the co-ef value to be 2 */
   1556 
   1557         ctxt_set = gt1_ctxt = 0;
   1558         ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
   1559 
   1560         state_mps = pu1_ctxt_model[ctxt_idx];
   1561 
   1562         u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
   1563 
   1564         u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
   1565 
   1566         QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1_temp, lamda_mod);
   1567         for(scan_pos = 0; scan_pos < 16; scan_pos++)
   1568         {
   1569             *(pi4_quant_round_1_2 + scan_pos) = temp2;
   1570         }
   1571 
   1572         for(scan_pos = 0; scan_pos < 16; scan_pos++)
   1573         {
   1574             //UWORD8 nbr_csbf = 1;
   1575             /* derive the x,y pos */
   1576             UWORD8 y_pos_x_pos = scan_pos;  //gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
   1577 
   1578             /* 4x4 transform size increment uses lookup */
   1579             sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
   1580 
   1581             /*Get the mps state based on ctxt modes */
   1582             state_mps = pu1_ctxt_model[sig_ctxinc + sig_coeff_base_ctxt];
   1583 
   1584             /* Bits taken to encode sig co-ef flag as 0 */
   1585             u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
   1586 
   1587             /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
   1588             //
   1589             u4_bits_estimated_r1 =
   1590                 (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
   1591 
   1592             /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
   1593             u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
   1594 
   1595             QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
   1596             *(pi4_quant_round_0_1 + scan_pos) = temp2;
   1597         }
   1598     }
   1599     else
   1600     {
   1601         UWORD8 *pu1_hevce_sigcoeff_ctxtinc;
   1602         WORD32 is_nbr_csb_state_mps;
   1603 
   1604         WORD32 state_mps;
   1605         WORD32 gt1_ctxt = 0;
   1606         WORD32 ctxt_set = 0;
   1607         WORD32 ctxt_idx;
   1608         /*1to2 rounding factor is same for all sub blocks except for sub-block = 0*/
   1609         /*Hence will write all the sub-block with i >=1 coeff, and then overwrite for i = 0*/
   1610 
   1611         /*ctxt_set = 0 DC subblock, the previous state did not have 2
   1612         ctxt_set = 1 DC subblock, the previous state did have >= 2
   1613         ctxt_set = 2 AC subblock, the previous state did not have 2
   1614         ctxt_set = 3 AC subblock, the previous state did have >= 2*/
   1615         i = 1;
   1616         ctxt_set = (i && is_luma) ? 2 : 0;
   1617 
   1618         ctxt_set++;
   1619 
   1620         /*0th position indicates the probability of 2 */
   1621         /*1th position indicates the probability of 1 */
   1622         /*2th position indicates the probability of 11 */
   1623         /*3th position indicates the probability of 111 */
   1624 
   1625         gt1_ctxt = 0;
   1626         ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
   1627 
   1628         state_mps = pu1_ctxt_model[ctxt_idx];
   1629 
   1630         u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
   1631 
   1632         u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
   1633         QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod);
   1634 
   1635         for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4)); scan_pos++)
   1636         {
   1637             *(pi4_quant_round_1_2 + scan_pos) = temp2;
   1638         }
   1639 
   1640         i = 0;
   1641         ctxt_set = (i && is_luma) ? 2 : 0;
   1642         ctxt_set++;
   1643 
   1644         /*0th position indicates the probability of 2 */
   1645         /*1th position indicates the probability of 1 */
   1646         /*2th position indicates the probability of 11 */
   1647         /*3th position indicates the probability of 111 */
   1648 
   1649         gt1_ctxt = 0;
   1650         ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
   1651 
   1652         state_mps = pu1_ctxt_model[ctxt_idx];
   1653 
   1654         u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
   1655 
   1656         u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
   1657         QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod);
   1658 
   1659         for(scan_pos = 0; scan_pos < 16; scan_pos++)
   1660         {
   1661             *(pi4_quant_round_1_2 + ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2;
   1662         }
   1663 
   1664         {
   1665             WORD32 ctxt_idx;
   1666 
   1667             WORD32 nbr_csbf_0, nbr_csbf_1;
   1668             WORD32 state_mps_0, state_mps_1;
   1669             ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
   1670             ctxt_idx += is_luma ? 0 : 2;
   1671 
   1672             /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
   1673             /* if neibhor not available, ctxt idx = 0*/
   1674             nbr_csbf_0 = 0;
   1675             ctxt_idx += nbr_csbf_0 ? 1 : 0;
   1676             state_mps_0 = pu1_ctxt_model[ctxt_idx];
   1677 
   1678             nbr_csbf_1 = 1;
   1679             ctxt_idx += nbr_csbf_1 ? 1 : 0;
   1680             state_mps_1 = pu1_ctxt_model[ctxt_idx];
   1681 
   1682             is_nbr_csb_state_mps = ((state_mps_0 % 2) == 1) && ((state_mps_1 % 2) == 1);
   1683         }
   1684 
   1685         if(1 == is_nbr_csb_state_mps)
   1686         {
   1687             for(i = 0; i < (i4_trans_size * i4_trans_size >> 4); i++)
   1688             {
   1689                 UWORD8 sig_ctxinc;
   1690                 WORD32 state_mps;
   1691                 WORD32 gt1_ctxt = 0;
   1692                 WORD32 ctxt_set = 0;
   1693 
   1694                 WORD32 ctxt_idx;
   1695 
   1696                 /*Check if the cabac states had previous nbr available */
   1697 
   1698                 if(i == 0)
   1699                     pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[3][0];
   1700                 else if(i < (i4_trans_size >> 2))
   1701                     pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[1][0];
   1702                 else if((i % (i4_trans_size >> 2)) == 0)
   1703                     pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[2][0];
   1704                 else
   1705                     pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0];
   1706 
   1707                 if(((i % (i4_trans_size >> 2)) == 0) && (i != 0))
   1708                     k++;
   1709 
   1710                 j = ((i4_trans_size * 4) * k) + ((i % (i4_trans_size >> 2)) * 4);
   1711                 /*ctxt_set = 0 DC subblock, the previous state did not have 2
   1712                 ctxt_set = 1 DC subblock, the previous state did have >= 2
   1713                 ctxt_set = 2 AC subblock, the previous state did not have 2
   1714                 ctxt_set = 3 AC subblock, the previous state did have >= 2*/
   1715 
   1716                 ctxt_set = (i && is_luma) ? 2 : 0;
   1717 
   1718                 /* gt1_ctxt = 1 for the co-ef value to be 1 */
   1719                 gt1_ctxt = 0;
   1720                 ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
   1721 
   1722                 state_mps = pu1_ctxt_model[ctxt_idx];
   1723 
   1724                 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
   1725                 u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
   1726 
   1727                 for(scan_pos = 0; scan_pos < 16; scan_pos++)
   1728                 {
   1729                     UWORD8 y_pos_x_pos;
   1730 
   1731                     if(scan_pos || i)
   1732                     {
   1733                         y_pos_x_pos = scan_pos;  // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
   1734                         /* ctxt for AC coeff depends on curpos and neigbour csbf */
   1735                         sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
   1736 
   1737                         /* based on luma subblock pos */
   1738                         sig_ctxinc += (i && is_luma) ? 3 : 0;
   1739 
   1740                         sig_ctxinc += sig_coeff_base_ctxt;
   1741                     }
   1742                     else
   1743                     {
   1744                         /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */
   1745                         /* DC coeff has fixed context for luma and chroma */
   1746                         sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
   1747                     }
   1748 
   1749                     /*Get the mps state based on ctxt modes */
   1750                     state_mps = pu1_ctxt_model[sig_ctxinc];
   1751 
   1752                     /* Bits taken to encode sig co-ef flag as 0 */
   1753                     u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
   1754 
   1755                     u4_bits_estimated_r1 =
   1756                         (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
   1757 
   1758                     /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
   1759                     u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
   1760                     {
   1761                         QUANT_ROUND_FACTOR(
   1762                             temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
   1763                         *(pi4_quant_round_0_1 +
   1764                           ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size)) + j) = temp2;
   1765                     }
   1766                 }
   1767             }
   1768         }
   1769         else
   1770         {
   1771             /*If Both nbr csbfs are 0, then all the coef in sub-blocks will have same value except for 1st subblock,
   1772             Hence will write the same value to all sub block, and overwrite for the 1st one */
   1773             i = 1;
   1774             {
   1775                 UWORD8 sig_ctxinc;
   1776                 UWORD8 y_pos_x_pos;
   1777                 WORD32 quant_rounding_0_1;
   1778 
   1779                 pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc_00[0];
   1780 
   1781                 scan_pos = 0;
   1782                 y_pos_x_pos = scan_pos;  // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
   1783                 /* ctxt for AC coeff depends on curpos and neigbour csbf */
   1784                 sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
   1785 
   1786                 /* based on luma subblock pos */
   1787                 sig_ctxinc += (is_luma) ? 3 : 0;
   1788 
   1789                 sig_ctxinc += sig_coeff_base_ctxt;
   1790 
   1791                 /*Get the mps state based on ctxt modes */
   1792                 state_mps = pu1_ctxt_model[sig_ctxinc];
   1793 
   1794                 /* Bits taken to encode sig co-ef flag as 0 */
   1795                 u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
   1796 
   1797                 u4_bits_estimated_r1 =
   1798                     (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
   1799 
   1800                 /*ctxt_set = 0 DC subblock, the previous state did not have 2
   1801                 ctxt_set = 1 DC subblock, the previous state did have >= 2
   1802                 ctxt_set = 2 AC subblock, the previous state did not have 2
   1803                 ctxt_set = 3 AC subblock, the previous state did have >= 2*/
   1804 
   1805                 ctxt_set = (i && is_luma) ? 2 : 0;
   1806 
   1807                 /* gt1_ctxt = 1 for the co-ef value to be 1 */
   1808                 gt1_ctxt = 0;
   1809                 ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
   1810 
   1811                 state_mps = pu1_ctxt_model[ctxt_idx];
   1812 
   1813                 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
   1814                 u4_bits_estimated_r1 += gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
   1815 
   1816                 QUANT_ROUND_FACTOR(
   1817                     quant_rounding_0_1, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
   1818 
   1819                 for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4));
   1820                     scan_pos++)
   1821                 {
   1822                     *(pi4_quant_round_0_1 + scan_pos) = quant_rounding_0_1;
   1823                 }
   1824             }
   1825 
   1826             /*First Subblock*/
   1827             i = 0;
   1828 
   1829             {
   1830                 UWORD8 sig_ctxinc;
   1831                 WORD32 state_mps;
   1832                 WORD32 gt1_ctxt = 0;
   1833                 WORD32 ctxt_set = 0;
   1834 
   1835                 WORD32 ctxt_idx;
   1836 
   1837                 /*Check if the cabac states had previous nbr available */
   1838 
   1839                 {
   1840                     pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0];
   1841 
   1842                     /*ctxt_set = 0 DC subblock, the previous state did not have 2
   1843                     ctxt_set = 1 DC subblock, the previous state did have >= 2
   1844                     ctxt_set = 2 AC subblock, the previous state did not have 2
   1845                     ctxt_set = 3 AC subblock, the previous state did have >= 2*/
   1846                     ctxt_set = (i && is_luma) ? 2 : 0;
   1847 
   1848                     /* gt1_ctxt = 1 for the co-ef value to be 1 */
   1849                     gt1_ctxt = 0;
   1850                     ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
   1851 
   1852                     state_mps = pu1_ctxt_model[ctxt_idx];
   1853 
   1854                     /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
   1855                     u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
   1856 
   1857                     for(scan_pos = 0; scan_pos < 16; scan_pos++)
   1858                     {
   1859                         UWORD8 y_pos_x_pos;
   1860 
   1861                         if(scan_pos)
   1862                         {
   1863                             y_pos_x_pos = scan_pos;  // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
   1864                             /* ctxt for AC coeff depends on curpos and neigbour csbf */
   1865                             sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
   1866 
   1867                             /* based on luma subblock pos */
   1868                             sig_ctxinc += (i && is_luma) ? 3 : 0;
   1869 
   1870                             sig_ctxinc += sig_coeff_base_ctxt;
   1871                         }
   1872                         else
   1873                         {
   1874                             /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */
   1875                             /* DC coeff has fixed context for luma and chroma */
   1876                             sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
   1877                         }
   1878 
   1879                         /*Get the mps state based on ctxt modes */
   1880                         state_mps = pu1_ctxt_model[sig_ctxinc];
   1881 
   1882                         /* Bits taken to encode sig co-ef flag as 0 */
   1883                         u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
   1884 
   1885                         u4_bits_estimated_r1 =
   1886                             (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
   1887 
   1888                         /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
   1889                         u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
   1890                         {
   1891                             QUANT_ROUND_FACTOR(
   1892                                 temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
   1893                             *(pi4_quant_round_0_1 +
   1894                               ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2;
   1895                         }
   1896                     }
   1897                 }
   1898             }
   1899         }
   1900     }
   1901     return;
   1902 }
   1903 
   1904 /*!
   1905 ******************************************************************************
   1906 * \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif
   1907 *
   1908 * \brief
   1909 *    Transform unit level (Luma) enc_loop function
   1910 *
   1911 * \param[in] ps_ctxt    enc_loop module ctxt pointer
   1912 * \param[in] pu1_pred   pointer to predicted data buffer
   1913 * \param[in] pred_strd  predicted buffer stride
   1914 * \param[in] pu1_src    pointer to source data buffer
   1915 * \param[in] src_strd   source buffer stride
   1916 * \param[in] pi2_deq_data   pointer to store iq data
   1917 * \param[in] deq_data_strd  iq data buffer stride
   1918 * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
   1919 * \param[out] pu1_csbf_buf  pointer to store the csbf for all 4x4 in a current
   1920 *                           block
   1921 * \param[out] csbf_strd  csbf buffer stride
   1922 * \param[in] trans_size transform size (4, 8, 16,32)
   1923 * \param[in] packed_pred_mode   0:Inter 1:Intra 2:Skip
   1924 * \param[out] pi4_cost      pointer to store the cost
   1925 * \param[out] pi4_coeff_off pointer to store the number of bytes produced in
   1926 *                           coeff buffer
   1927 * \param[out] pu4_tu_bits   pointer to store the best TU bits required encode
   1928 the current TU in RDopt Mode
   1929 * \param[out] pu4_blk_sad   pointer to store the block sad for RC
   1930 * \param[out] pi4_zero_col  pointer to store the zero_col info for the TU
   1931 * \param[out] pi4_zero_row  pointer to store the zero_row info for the TU
   1932 * \param[in]  i4_perform_rdoq Indicates if RDOQ should be performed or not
   1933 * \param[in]  i4_perform_sbh Indicates if SBH should be performed or not
   1934 *
   1935 * \return
   1936 *    CBF of the current block
   1937 *
   1938 * \author
   1939 *  Ittiam
   1940 *
   1941 *****************************************************************************
   1942 */
   1943 
   1944 WORD32 ihevce_t_q_iq_ssd_scan_fxn(
   1945     ihevce_enc_loop_ctxt_t *ps_ctxt,
   1946     UWORD8 *pu1_pred,
   1947     WORD32 pred_strd,
   1948     UWORD8 *pu1_src,
   1949     WORD32 src_strd,
   1950     WORD16 *pi2_deq_data,
   1951     WORD32 deq_data_strd,
   1952     UWORD8 *pu1_recon,
   1953     WORD32 i4_recon_stride,
   1954     UWORD8 *pu1_ecd_data,
   1955     UWORD8 *pu1_csbf_buf,
   1956     WORD32 csbf_strd,
   1957     WORD32 trans_size,
   1958     WORD32 packed_pred_mode,
   1959     LWORD64 *pi8_cost,
   1960     WORD32 *pi4_coeff_off,
   1961     WORD32 *pi4_tu_bits,
   1962     UWORD32 *pu4_blk_sad,
   1963     WORD32 *pi4_zero_col,
   1964     WORD32 *pi4_zero_row,
   1965     UWORD8 *pu1_is_recon_available,
   1966     WORD32 i4_perform_rdoq,
   1967     WORD32 i4_perform_sbh,
   1968 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   1969     WORD32 i4_alpha_stim_multiplier,
   1970     UWORD8 u1_is_cu_noisy,
   1971 #endif
   1972     SSD_TYPE_T e_ssd_type,
   1973     WORD32 early_cbf)
   1974 {
   1975     WORD32 cbf = 0;
   1976     WORD32 trans_idx;
   1977     WORD32 quant_scale_mat_offset;
   1978     WORD32 *pi4_trans_scratch;
   1979     WORD16 *pi2_trans_values;
   1980     WORD16 *pi2_quant_coeffs;
   1981     WORD32 *pi4_subBlock2csbfId_map = NULL;
   1982 
   1983 #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
   1984     WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i;
   1985 #endif
   1986 
   1987     rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt;
   1988 
   1989     WORD32 i4_perform_zcbf = (ENABLE_INTER_ZCU_COST && (PRED_MODE_INTRA != packed_pred_mode)) ||
   1990                              (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE);
   1991     WORD32 i4_perform_coeff_level_rdoq = (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING);
   1992     WORD8 intra_flag = 0;
   1993     ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW);
   1994 
   1995     *pi4_tu_bits = 0;
   1996     *pi4_coeff_off = 0;
   1997     pu1_is_recon_available[0] = 0;
   1998 
   1999     if((PRED_MODE_SKIP == packed_pred_mode) || (0 == early_cbf))
   2000     {
   2001         if(e_ssd_type != NULL_TYPE)
   2002         {
   2003             /* SSD cost is stored to the pointer */
   2004             pi8_cost[0] =
   2005 
   2006                 ps_ctxt->s_cmn_opt_func.pf_ssd_and_sad_calculator(
   2007                     pu1_pred, pred_strd, pu1_src, src_strd, trans_size, pu4_blk_sad);
   2008 
   2009 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   2010             if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
   2011             {
   2012                 pi8_cost[0] = ihevce_inject_stim_into_distortion(
   2013                     pu1_src,
   2014                     src_strd,
   2015                     pu1_pred,
   2016                     pred_strd,
   2017                     pi8_cost[0],
   2018                     !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
   2019                                            : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
   2020                                               (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
   2021                                                  100.0,
   2022                     trans_size,
   2023                     0,
   2024                     ps_ctxt->u1_enable_psyRDOPT,
   2025                     NULL_PLANE);
   2026             }
   2027 #endif
   2028 
   2029             /* copy pred to recon for skip mode */
   2030             if(SPATIAL_DOMAIN_SSD == e_ssd_type)
   2031             {
   2032                 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
   2033                     pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size);
   2034                 pu1_is_recon_available[0] = 1;
   2035             }
   2036             else
   2037             {
   2038                 pu1_is_recon_available[0] = 0;
   2039             }
   2040 
   2041 #if ENABLE_INTER_ZCU_COST
   2042             ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
   2043 #endif
   2044         }
   2045         else
   2046         {
   2047             pi8_cost[0] = UINT_MAX;
   2048         }
   2049 
   2050         /* cbf is returned as 0 */
   2051         return (0);
   2052     }
   2053 
   2054     /* derive context variables */
   2055     pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0];
   2056     pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
   2057     pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2);
   2058 
   2059     /* translate the transform size to index for 4x4 and 8x8 */
   2060     trans_idx = trans_size >> 2;
   2061 
   2062     if(PRED_MODE_INTRA == packed_pred_mode)
   2063     {
   2064         quant_scale_mat_offset = 0;
   2065         intra_flag = 1;
   2066 #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
   2067         ai4_quant_rounding_factors[0][0] =
   2068             MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3);
   2069 
   2070         for(i = 0; i < trans_size * trans_size; i++)
   2071         {
   2072             ai4_quant_rounding_factors[1][i] =
   2073                 MAX(ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3][i],
   2074                     (1 << QUANT_ROUND_FACTOR_Q) / 3);
   2075             ai4_quant_rounding_factors[2][i] =
   2076                 MAX(ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3][i],
   2077                     (1 << QUANT_ROUND_FACTOR_Q) / 3);
   2078         }
   2079 #endif
   2080     }
   2081     else
   2082     {
   2083         quant_scale_mat_offset = NUM_TRANS_TYPES;
   2084     }
   2085     /* for intra 4x4 DST transform should be used */
   2086     if((1 == trans_idx) && (1 == intra_flag))
   2087     {
   2088         trans_idx = 0;
   2089     }
   2090     /* for 16x16 cases */
   2091     else if(16 == trans_size)
   2092     {
   2093         trans_idx = 3;
   2094     }
   2095     /* for 32x32 cases */
   2096     else if(32 == trans_size)
   2097     {
   2098         trans_idx = 4;
   2099     }
   2100 
   2101     switch(trans_size)
   2102     {
   2103     case 4:
   2104     {
   2105         pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU;
   2106 
   2107         break;
   2108     }
   2109     case 8:
   2110     {
   2111         pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU;
   2112 
   2113         break;
   2114     }
   2115     case 16:
   2116     {
   2117         pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU;
   2118 
   2119         break;
   2120     }
   2121     case 32:
   2122     {
   2123         pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU;
   2124 
   2125         break;
   2126     }
   2127     }
   2128 
   2129     /* Do not call the FT and Quant functions if early_cbf is 0 */
   2130     if(1 == early_cbf)
   2131     {
   2132         /* ---------- call residue and transform block ------- */
   2133         *pu4_blk_sad = ps_ctxt->apf_resd_trns[trans_idx](
   2134             pu1_src,
   2135             pu1_pred,
   2136             pi4_trans_scratch,
   2137             pi2_trans_values,
   2138             src_strd,
   2139             pred_strd,
   2140             ((trans_size << 16) + 0)); /* dst strd and chroma flag are packed together */
   2141 
   2142         cbf = ps_ctxt->apf_quant_iquant_ssd
   2143                   [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2](
   2144                       pi2_trans_values,
   2145                       ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset],
   2146                       pi2_quant_coeffs,
   2147                       pi2_deq_data,
   2148                       trans_size,
   2149                       ps_ctxt->i4_cu_qp_div6,
   2150                       ps_ctxt->i4_cu_qp_mod6,
   2151 #if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
   2152                       ps_ctxt->i4_quant_rnd_factor[intra_flag],
   2153                       ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
   2154                       ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
   2155 #else
   2156                       intra_flag ? ai4_quant_rounding_factors[0][0]
   2157                                  : ps_ctxt->i4_quant_rnd_factor[intra_flag],
   2158                       intra_flag ? ai4_quant_rounding_factors[1]
   2159                                  : ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
   2160                       intra_flag ? ai4_quant_rounding_factors[2]
   2161                                  : ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
   2162 #endif
   2163                       trans_size,
   2164                       trans_size,
   2165                       deq_data_strd,
   2166                       pu1_csbf_buf,
   2167                       csbf_strd,
   2168                       pi4_zero_col,
   2169                       pi4_zero_row,
   2170                       ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset],
   2171                       pi8_cost);
   2172 
   2173         if(e_ssd_type != FREQUENCY_DOMAIN_SSD)
   2174         {
   2175             pi8_cost[0] = UINT_MAX;
   2176         }
   2177     }
   2178 
   2179     if(0 != cbf)
   2180     {
   2181         if(i4_perform_sbh || i4_perform_rdoq)
   2182         {
   2183             ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd;
   2184             ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size;
   2185             ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map;
   2186 
   2187             ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_cu_qp_div6;
   2188             ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_cu_qp_mod6;
   2189             ps_rdoq_sbh_ctxt->i4_scan_idx = ps_ctxt->i4_scan_idx;
   2190             ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
   2191             ps_rdoq_sbh_ctxt->i4_trans_size = trans_size;
   2192 
   2193             ps_rdoq_sbh_ctxt->pi2_dequant_coeff =
   2194                 ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset];
   2195             ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data;
   2196             ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs;
   2197             ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values;
   2198             ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf;
   2199 
   2200             /* ------- call coeffs scan function ------- */
   2201             if((!i4_perform_rdoq))
   2202             {
   2203                 ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
   2204 
   2205                 pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
   2206             }
   2207         }
   2208 
   2209         *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
   2210             pi2_quant_coeffs,
   2211             pi4_subBlock2csbfId_map,
   2212             ps_ctxt->i4_scan_idx,
   2213             trans_size,
   2214             pu1_ecd_data,
   2215             pu1_csbf_buf,
   2216             csbf_strd);
   2217     }
   2218     *pi8_cost >>= ga_trans_shift[trans_idx];
   2219 
   2220 #if RDOPT_ZERO_CBF_ENABLE
   2221     /* compare null cbf cost with encode tu rd-cost */
   2222     if(cbf != 0)
   2223     {
   2224         WORD32 tu_bits;
   2225         LWORD64 tu_rd_cost;
   2226 
   2227         LWORD64 zero_cbf_cost = 0;
   2228 
   2229         /*Populating the feilds of rdoq_ctxt structure*/
   2230         if(i4_perform_rdoq)
   2231         {
   2232             /* transform size to log2transform size */
   2233             GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size);
   2234             ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1;
   2235             ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_qf;
   2236             ps_rdoq_sbh_ctxt->i4_is_luma = 1;
   2237             ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx];
   2238             ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td =
   2239                 (1 << ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td) / 2;
   2240             ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0;
   2241             ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col;
   2242             ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row;
   2243         }
   2244         else if(i4_perform_zcbf)
   2245         {
   2246             zero_cbf_cost =
   2247 
   2248                 ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
   2249                     pu1_src, pu1_pred, src_strd, pred_strd, trans_size, trans_size);
   2250         }
   2251 
   2252         /************************************************************************/
   2253         /* call the entropy rdo encode to get the bit estimate for current tu   */
   2254         /* note that tu includes only residual coding bits and does not include */
   2255         /* tu split, cbf and qp delta encoding bits for a TU                    */
   2256         /************************************************************************/
   2257         if(i4_perform_rdoq)
   2258         {
   2259             tu_bits = ihevce_entropy_rdo_encode_tu_rdoq(
   2260                 &ps_ctxt->s_rdopt_entropy_ctxt,
   2261                 (pu1_ecd_data),
   2262                 trans_size,
   2263                 1,
   2264                 ps_rdoq_sbh_ctxt,
   2265                 pi8_cost,
   2266                 &zero_cbf_cost,
   2267                 0);
   2268 
   2269             if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0)
   2270             {
   2271                 cbf = 0;
   2272                 *pi4_coeff_off = 0;
   2273             }
   2274 
   2275             if((i4_perform_sbh) && (0 != cbf))
   2276             {
   2277                 ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
   2278                 ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
   2279                 *pi8_cost = ps_rdoq_sbh_ctxt->i8_ssd_cost;
   2280             }
   2281 
   2282             /*Add round value before normalizing*/
   2283             *pi8_cost += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td;
   2284             *pi8_cost >>= ga_trans_shift[trans_idx];
   2285 
   2286             if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1)
   2287             {
   2288                 pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
   2289                 *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
   2290                     pi2_quant_coeffs,
   2291                     pi4_subBlock2csbfId_map,
   2292                     ps_ctxt->i4_scan_idx,
   2293                     trans_size,
   2294                     pu1_ecd_data,
   2295                     pu1_csbf_buf,
   2296                     csbf_strd);
   2297             }
   2298         }
   2299         else
   2300         {
   2301             tu_bits = ihevce_entropy_rdo_encode_tu(
   2302                 &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 1, i4_perform_sbh);
   2303         }
   2304 
   2305         *pi4_tu_bits = tu_bits;
   2306 
   2307         if(e_ssd_type == SPATIAL_DOMAIN_SSD)
   2308         {
   2309             *pi8_cost = ihevce_it_recon_ssd(
   2310                 ps_ctxt,
   2311                 pu1_src,
   2312                 src_strd,
   2313                 pu1_pred,
   2314                 pred_strd,
   2315                 pi2_deq_data,
   2316                 deq_data_strd,
   2317                 pu1_recon,
   2318                 i4_recon_stride,
   2319                 pu1_ecd_data,
   2320                 trans_size,
   2321                 packed_pred_mode,
   2322                 cbf,
   2323                 *pi4_zero_col,
   2324                 *pi4_zero_row,
   2325                 NULL_PLANE);
   2326 
   2327             pu1_is_recon_available[0] = 1;
   2328         }
   2329 
   2330 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   2331         if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
   2332         {
   2333             pi8_cost[0] = ihevce_inject_stim_into_distortion(
   2334                 pu1_src,
   2335                 src_strd,
   2336                 pu1_recon,
   2337                 i4_recon_stride,
   2338                 pi8_cost[0],
   2339                 i4_alpha_stim_multiplier,
   2340                 trans_size,
   2341                 0,
   2342                 ps_ctxt->u1_enable_psyRDOPT,
   2343                 NULL_PLANE);
   2344         }
   2345         else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
   2346         {
   2347             pi8_cost[0] = ihevce_inject_stim_into_distortion(
   2348                 pu1_src,
   2349                 src_strd,
   2350                 pu1_pred,
   2351                 pred_strd,
   2352                 pi8_cost[0],
   2353                 i4_alpha_stim_multiplier,
   2354                 trans_size,
   2355                 0,
   2356                 ps_ctxt->u1_enable_psyRDOPT,
   2357                 NULL_PLANE);
   2358         }
   2359 #endif
   2360 
   2361         /* add the SSD cost to bits estimate given by ECD */
   2362         tu_rd_cost = *pi8_cost + COMPUTE_RATE_COST_CLIP30(
   2363                                      tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
   2364 
   2365         if(i4_perform_zcbf)
   2366         {
   2367 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   2368             if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
   2369             {
   2370                 zero_cbf_cost = ihevce_inject_stim_into_distortion(
   2371                     pu1_src,
   2372                     src_strd,
   2373                     pu1_pred,
   2374                     pred_strd,
   2375                     zero_cbf_cost,
   2376                     !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
   2377                                            : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
   2378                                               (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
   2379                                                  100.0,
   2380                     trans_size,
   2381                     0,
   2382                     ps_ctxt->u1_enable_psyRDOPT,
   2383                     NULL_PLANE);
   2384             }
   2385 #endif
   2386 
   2387             /* force the tu as zero cbf if zero_cbf_cost is lower */
   2388             if(zero_cbf_cost < tu_rd_cost)
   2389             {
   2390                 /* num bytes is set to 0 */
   2391                 *pi4_coeff_off = 0;
   2392 
   2393                 /* cbf is returned as 0 */
   2394                 cbf = 0;
   2395 
   2396                 /* cost is returned as 0 cbf cost */
   2397                 *pi8_cost = zero_cbf_cost;
   2398 
   2399                 /* TU bits is set to 0 */
   2400                 *pi4_tu_bits = 0;
   2401                 pu1_is_recon_available[0] = 0;
   2402 
   2403                 if(SPATIAL_DOMAIN_SSD == e_ssd_type)
   2404                 {
   2405                     /* copy pred to recon for zcbf mode */
   2406 
   2407                     ps_ctxt->s_cmn_opt_func.pf_copy_2d(
   2408                         pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size);
   2409 
   2410                     pu1_is_recon_available[0] = 1;
   2411                 }
   2412             }
   2413             /* accumulate cu not coded cost with zcbf cost */
   2414 #if ENABLE_INTER_ZCU_COST
   2415             ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost;
   2416 #endif
   2417         }
   2418     }
   2419     else
   2420     {
   2421         /* cbf = 0, accumulate cu not coded cost */
   2422         if(e_ssd_type == SPATIAL_DOMAIN_SSD)
   2423         {
   2424             *pi8_cost = ihevce_it_recon_ssd(
   2425                 ps_ctxt,
   2426                 pu1_src,
   2427                 src_strd,
   2428                 pu1_pred,
   2429                 pred_strd,
   2430                 pi2_deq_data,
   2431                 deq_data_strd,
   2432                 pu1_recon,
   2433                 i4_recon_stride,
   2434                 pu1_ecd_data,
   2435                 trans_size,
   2436                 packed_pred_mode,
   2437                 cbf,
   2438                 *pi4_zero_col,
   2439                 *pi4_zero_row,
   2440                 NULL_PLANE);
   2441 
   2442             pu1_is_recon_available[0] = 1;
   2443         }
   2444 
   2445 #if ENABLE_INTER_ZCU_COST
   2446         {
   2447 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   2448             if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
   2449             {
   2450                 pi8_cost[0] = ihevce_inject_stim_into_distortion(
   2451                     pu1_src,
   2452                     src_strd,
   2453                     pu1_recon,
   2454                     i4_recon_stride,
   2455                     pi8_cost[0],
   2456                     !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
   2457                                            : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
   2458                                               (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
   2459                                                  100.0,
   2460                     trans_size,
   2461                     0,
   2462                     ps_ctxt->u1_enable_psyRDOPT,
   2463                     NULL_PLANE);
   2464             }
   2465             else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
   2466             {
   2467                 pi8_cost[0] = ihevce_inject_stim_into_distortion(
   2468                     pu1_src,
   2469                     src_strd,
   2470                     pu1_pred,
   2471                     pred_strd,
   2472                     pi8_cost[0],
   2473                     !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
   2474                                            : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
   2475                                               (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
   2476                                                  100.0,
   2477                     trans_size,
   2478                     0,
   2479                     ps_ctxt->u1_enable_psyRDOPT,
   2480                     NULL_PLANE);
   2481             }
   2482 #endif
   2483 
   2484             ps_ctxt->i8_cu_not_coded_cost += *pi8_cost;
   2485         }
   2486 #endif /* ENABLE_INTER_ZCU_COST */
   2487     }
   2488 #endif
   2489 
   2490     return (cbf);
   2491 }
   2492 
   2493 /*!
   2494 ******************************************************************************
   2495 * \if Function name : ihevce_it_recon_fxn \endif
   2496 *
   2497 * \brief
   2498 *    Transform unit level (Luma) IT Recon function
   2499 *
   2500 * \param[in] ps_ctxt        enc_loop module ctxt pointer
   2501 * \param[in] pi2_deq_data   pointer to iq data
   2502 * \param[in] deq_data_strd  iq data buffer stride
   2503 * \param[in] pu1_pred       pointer to predicted data buffer
   2504 * \param[in] pred_strd      predicted buffer stride
   2505 * \param[in] pu1_recon      pointer to recon buffer
   2506 * \param[in] recon_strd     recon buffer stride
   2507 * \param[out] pu1_ecd_data  pointer coeff output buffer (input to ent cod)
   2508 * \param[in] trans_size     transform size (4, 8, 16,32)
   2509 * \param[in] packed_pred_mode   0:Inter 1:Intra 2:Skip
   2510 * \param[in] cbf            CBF of the current block
   2511 * \param[in] zero_cols      zero_cols of the current block
   2512 * \param[in] zero_rows      zero_rows of the current block
   2513 *
   2514 * \return
   2515 *
   2516 * \author
   2517 *  Ittiam
   2518 *
   2519 *****************************************************************************
   2520 */
   2521 
   2522 void ihevce_it_recon_fxn(
   2523     ihevce_enc_loop_ctxt_t *ps_ctxt,
   2524     WORD16 *pi2_deq_data,
   2525     WORD32 deq_dat_strd,
   2526     UWORD8 *pu1_pred,
   2527     WORD32 pred_strd,
   2528     UWORD8 *pu1_recon,
   2529     WORD32 recon_strd,
   2530     UWORD8 *pu1_ecd_data,
   2531     WORD32 trans_size,
   2532     WORD32 packed_pred_mode,
   2533     WORD32 cbf,
   2534     WORD32 zero_cols,
   2535     WORD32 zero_rows)
   2536 {
   2537     WORD32 dc_add_flag = 0;
   2538     WORD32 trans_idx;
   2539 
   2540     /* translate the transform size to index for 4x4 and 8x8 */
   2541     trans_idx = trans_size >> 2;
   2542 
   2543     /* if SKIP mode needs to be evaluated the pred is copied to recon */
   2544     if(PRED_MODE_SKIP == packed_pred_mode)
   2545     {
   2546         UWORD8 *pu1_curr_recon, *pu1_curr_pred;
   2547 
   2548         pu1_curr_pred = pu1_pred;
   2549         pu1_curr_recon = pu1_recon;
   2550 
   2551         /* 2D copy of data */
   2552 
   2553         ps_ctxt->s_cmn_opt_func.pf_2d_square_copy(
   2554             pu1_curr_recon, recon_strd, pu1_curr_pred, pred_strd, trans_size, sizeof(UWORD8));
   2555 
   2556         return;
   2557     }
   2558 
   2559     /* for intra 4x4 DST transform should be used */
   2560     if((1 == trans_idx) && (PRED_MODE_INTRA == packed_pred_mode))
   2561     {
   2562         trans_idx = 0;
   2563     }
   2564     /* for 16x16 cases */
   2565     else if(16 == trans_size)
   2566     {
   2567         trans_idx = 3;
   2568     }
   2569     /* for 32x32 cases */
   2570     else if(32 == trans_size)
   2571     {
   2572         trans_idx = 4;
   2573     }
   2574 
   2575     /*if (lastx == 0 && lasty == 0) , ie only 1 coefficient */
   2576     if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1]))
   2577     {
   2578         dc_add_flag = 1;
   2579     }
   2580 
   2581     if(0 == cbf)
   2582     {
   2583         /* buffer copy */
   2584         ps_ctxt->s_cmn_opt_func.pf_2d_square_copy(
   2585             pu1_recon, recon_strd, pu1_pred, pred_strd, trans_size, 1);
   2586     }
   2587     else if((1 == dc_add_flag) && (0 != trans_idx))
   2588     {
   2589         /* dc add */
   2590         ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc(
   2591             pu1_pred,
   2592             pred_strd,
   2593             pu1_recon,
   2594             recon_strd,
   2595             trans_size,
   2596             pi2_deq_data[0],
   2597             NULL_PLANE /* luma */
   2598         );
   2599     }
   2600     else
   2601     {
   2602         ps_ctxt->apf_it_recon[trans_idx](
   2603             pi2_deq_data,
   2604             &ps_ctxt->ai2_scratch[0],
   2605             pu1_pred,
   2606             pu1_recon,
   2607             deq_dat_strd,
   2608             pred_strd,
   2609             recon_strd,
   2610             zero_cols,
   2611             zero_rows);
   2612     }
   2613 }
   2614 
   2615 /*!
   2616 ******************************************************************************
   2617 * \if Function name : ihevce_chroma_it_recon_fxn \endif
   2618 *
   2619 * \brief
   2620 *    Transform unit level (Chroma) IT Recon function
   2621 *
   2622 * \param[in] ps_ctxt        enc_loop module ctxt pointer
   2623 * \param[in] pi2_deq_data   pointer to iq data
   2624 * \param[in] deq_data_strd  iq data buffer stride
   2625 * \param[in] pu1_pred       pointer to predicted data buffer
   2626 * \param[in] pred_strd      predicted buffer stride
   2627 * \param[in] pu1_recon      pointer to recon buffer
   2628 * \param[in] recon_strd     recon buffer stride
   2629 * \param[out] pu1_ecd_data  pointer coeff output buffer (input to ent cod)
   2630 * \param[in] trans_size     transform size (4, 8, 16)
   2631 * \param[in] cbf            CBF of the current block
   2632 * \param[in] zero_cols      zero_cols of the current block
   2633 * \param[in] zero_rows      zero_rows of the current block
   2634 *
   2635 * \return
   2636 *
   2637 * \author
   2638 *  Ittiam
   2639 *
   2640 *****************************************************************************
   2641 */
   2642 
   2643 void ihevce_chroma_it_recon_fxn(
   2644     ihevce_enc_loop_ctxt_t *ps_ctxt,
   2645     WORD16 *pi2_deq_data,
   2646     WORD32 deq_dat_strd,
   2647     UWORD8 *pu1_pred,
   2648     WORD32 pred_strd,
   2649     UWORD8 *pu1_recon,
   2650     WORD32 recon_strd,
   2651     UWORD8 *pu1_ecd_data,
   2652     WORD32 trans_size,
   2653     WORD32 cbf,
   2654     WORD32 zero_cols,
   2655     WORD32 zero_rows,
   2656     CHROMA_PLANE_ID_T e_chroma_plane)
   2657 {
   2658     WORD32 trans_idx;
   2659 
   2660     ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
   2661 
   2662     /* since 2x2 transform is not allowed for chroma*/
   2663     if(2 == trans_size)
   2664     {
   2665         trans_size = 4;
   2666     }
   2667 
   2668     /* translate the transform size to index */
   2669     trans_idx = trans_size >> 2;
   2670 
   2671     /* for 16x16 cases */
   2672     if(16 == trans_size)
   2673     {
   2674         trans_idx = 3;
   2675     }
   2676 
   2677     if(0 == cbf)
   2678     {
   2679         /* buffer copy */
   2680         ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
   2681             pu1_pred, pred_strd, pu1_recon, recon_strd, trans_size, trans_size, e_chroma_plane);
   2682     }
   2683     else if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1]))
   2684     {
   2685         /* dc add */
   2686         ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc(
   2687             pu1_pred,
   2688             pred_strd,
   2689             pu1_recon,
   2690             recon_strd,
   2691             trans_size,
   2692             pi2_deq_data[0],
   2693             e_chroma_plane /* chroma plane */
   2694         );
   2695     }
   2696     else
   2697     {
   2698         ps_ctxt->apf_chrm_it_recon[trans_idx - 1](
   2699             pi2_deq_data,
   2700             &ps_ctxt->ai2_scratch[0],
   2701             pu1_pred + (WORD32)e_chroma_plane,
   2702             pu1_recon + (WORD32)e_chroma_plane,
   2703             deq_dat_strd,
   2704             pred_strd,
   2705             recon_strd,
   2706             zero_cols,
   2707             zero_rows);
   2708     }
   2709 }
   2710 
   2711 /**
   2712 *******************************************************************************
   2713 * \if Function name : ihevce_mpm_idx_based_filter_RDOPT_cand \endif
   2714 *
   2715 * \brief * Filters the RDOPT candidates based on mpm_idx
   2716 *
   2717 * \par   Description
   2718 * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
   2719 * for a CU
   2720 *
   2721 * \param[in] ps_ctxt : ptr to enc loop context
   2722 * \param[in] ps_cu_analyse : ptr to CU analyse structure
   2723 * \param[in] ps_top_nbr_4x4 top 4x4 neighbour pointer
   2724 * \param[in] ps_left_nbr_4x4 left 4x4 neighbour pointer
   2725 * \param[in] pu1_luma_mode luma mode
   2726 *
   2727 * \returns none
   2728 *
   2729 * \author
   2730 *  Ittiam
   2731 *
   2732 *******************************************************************************
   2733 */
   2734 
   2735 void ihevce_mpm_idx_based_filter_RDOPT_cand(
   2736     ihevce_enc_loop_ctxt_t *ps_ctxt,
   2737     cu_analyse_t *ps_cu_analyse,
   2738     nbr_4x4_t *ps_left_nbr_4x4,
   2739     nbr_4x4_t *ps_top_nbr_4x4,
   2740     UWORD8 *pu1_luma_mode,
   2741     UWORD8 *pu1_eval_mark)
   2742 {
   2743     WORD32 cu_pos_x;
   2744     WORD32 cu_pos_y;
   2745     nbr_avail_flags_t s_nbr;
   2746     WORD32 trans_size;
   2747     WORD32 au4_cand_mode_list[3];
   2748     WORD32 nbr_flags;
   2749     UWORD8 *pu1_intra_luma_modes;
   2750     WORD32 rdopt_cand_ctr = 0;
   2751     UWORD8 *pu1_luma_eval_mark;
   2752 
   2753     cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 1;
   2754     cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 1;
   2755     trans_size = ps_cu_analyse->u1_cu_size;
   2756 
   2757     /* get the neighbour availability flags */
   2758     nbr_flags = ihevce_get_nbr_intra(
   2759         &s_nbr,
   2760         ps_ctxt->pu1_ctb_nbr_map,
   2761         ps_ctxt->i4_nbr_map_strd,
   2762         cu_pos_x,
   2763         cu_pos_y,
   2764         trans_size >> 2);
   2765     (void)nbr_flags;
   2766     /*Call the fun to populate luma intra pred mode fro TU=CU and use the same list fro
   2767     *TU=CU/2 also since the modes are same in both the cases.
   2768     */
   2769     ihevce_populate_intra_pred_mode(
   2770         ps_top_nbr_4x4->b6_luma_intra_mode,
   2771         ps_left_nbr_4x4->b6_luma_intra_mode,
   2772         s_nbr.u1_top_avail,
   2773         s_nbr.u1_left_avail,
   2774         cu_pos_y,
   2775         &au4_cand_mode_list[0]);
   2776 
   2777     /*Loop through all the RDOPT candidates of TU=CU and TU=CU/2 and check if the current RDOPT
   2778     *cand is present in a4_cand_mode_list, If yes set eval flag to 1 else set it to zero
   2779     */
   2780 
   2781     pu1_intra_luma_modes = pu1_luma_mode;
   2782     pu1_luma_eval_mark = pu1_eval_mark;
   2783 
   2784     while(pu1_intra_luma_modes[rdopt_cand_ctr] != 255)
   2785     {
   2786         WORD32 i;
   2787         WORD32 found_flag = 0;
   2788 
   2789         /*1st candidate of TU=CU list and TU=CU/2 list must go through RDOPT stage
   2790         *irrespective of whether the cand is present in the mpm idx list or not
   2791         */
   2792         if(rdopt_cand_ctr == 0)
   2793         {
   2794             rdopt_cand_ctr++;
   2795             continue;
   2796         }
   2797 
   2798         for(i = 0; i < 3; i++)
   2799         {
   2800             if(pu1_intra_luma_modes[rdopt_cand_ctr] == au4_cand_mode_list[i])
   2801             {
   2802                 found_flag = 1;
   2803                 break;
   2804             }
   2805         }
   2806 
   2807         if(found_flag == 0)
   2808         {
   2809             pu1_luma_eval_mark[rdopt_cand_ctr] = 0;
   2810         }
   2811 
   2812         rdopt_cand_ctr++;
   2813     }
   2814 }
   2815 
   2816 /*!
   2817 ******************************************************************************
   2818 * \if Function name : ihevce_intra_rdopt_cu_ntu \endif
   2819 *
   2820 * \brief
   2821 *    Intra Coding unit funtion for RD opt mode
   2822 *
   2823 * \param[in] ps_ctxt    enc_loop module ctxt pointer
   2824 * \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure
   2825 * \param[in] pu1_luma_mode : pointer to luma mode
   2826 * \param[in] ps_cu_analyse  pointer to cu analyse pointer
   2827 * \param[in] pu1_src    pointer to source data buffer
   2828 * \param[in] src_strd   source buffer stride
   2829 * \param[in] pu1_cu_left pointer to left recon data buffer
   2830 * \param[in] pu1_cu_top  pointer to top recon data buffer
   2831 * \param[in] pu1_cu_top_left pointer to top left recon data buffer
   2832 * \param[in] ps_left_nbr_4x4 : left 4x4 neighbour pointer
   2833 * \param[in] ps_top_nbr_4x4 : top 4x4 neighbour pointer
   2834 * \param[in] nbr_4x4_left_strd left nbr4x4 stride
   2835 * \param[in] cu_left_stride left recon buffer stride
   2836 * \param[in] curr_buf_idx RD opt buffer index for current usage
   2837 * \param[in] func_proc_mode : function procesing mode @sa TU_SIZE_WRT_CU_T
   2838 *
   2839 * \return
   2840 *    RDopt cost
   2841 *
   2842 * \author
   2843 *  Ittiam
   2844 *
   2845 *****************************************************************************
   2846 */
   2847 LWORD64 ihevce_intra_rdopt_cu_ntu(
   2848     ihevce_enc_loop_ctxt_t *ps_ctxt,
   2849     enc_loop_cu_prms_t *ps_cu_prms,
   2850     void *pv_pred_org,
   2851     WORD32 pred_strd_org,
   2852     enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
   2853     UWORD8 *pu1_luma_mode,
   2854     cu_analyse_t *ps_cu_analyse,
   2855     void *pv_curr_src,
   2856     void *pv_cu_left,
   2857     void *pv_cu_top,
   2858     void *pv_cu_top_left,
   2859     nbr_4x4_t *ps_left_nbr_4x4,
   2860     nbr_4x4_t *ps_top_nbr_4x4,
   2861     WORD32 nbr_4x4_left_strd,
   2862     WORD32 cu_left_stride,
   2863     WORD32 curr_buf_idx,
   2864     WORD32 func_proc_mode,
   2865     WORD32 i4_alpha_stim_multiplier)
   2866 {
   2867     enc_loop_cu_final_prms_t *ps_final_prms;
   2868     nbr_avail_flags_t s_nbr;
   2869     nbr_4x4_t *ps_nbr_4x4;
   2870     nbr_4x4_t *ps_tmp_lt_4x4;
   2871     recon_datastore_t *ps_recon_datastore;
   2872 
   2873     ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
   2874 
   2875     UWORD32 *pu4_nbr_flags;
   2876     UWORD8 *pu1_intra_pred_mode;
   2877     WORD32 cu_pos_x;
   2878     WORD32 cu_pos_y;
   2879     WORD32 trans_size = 0;
   2880     UWORD8 *pu1_left;
   2881     UWORD8 *pu1_top;
   2882     UWORD8 *pu1_top_left;
   2883     UWORD8 *pu1_recon;
   2884     UWORD8 *pu1_csbf_buf;
   2885     UWORD8 *pu1_ecd_data;
   2886     WORD16 *pi2_deq_data;
   2887     WORD32 deq_data_strd;
   2888     LWORD64 total_rdopt_cost;
   2889     WORD32 ctr;
   2890     WORD32 left_strd;
   2891     WORD32 i4_recon_stride;
   2892     WORD32 csbf_strd;
   2893     WORD32 ecd_data_bytes_cons;
   2894     WORD32 num_4x4_in_tu;
   2895     WORD32 num_4x4_in_cu;
   2896     WORD32 chrm_present_flag;
   2897     WORD32 tx_size;
   2898     WORD32 cu_bits;
   2899     WORD32 num_cu_parts = 0;
   2900     WORD32 num_cands = 0;
   2901     WORD32 cu_pos_x_8pelunits;
   2902     WORD32 cu_pos_y_8pelunits;
   2903     WORD32 i4_perform_rdoq;
   2904     WORD32 i4_perform_sbh;
   2905     UWORD8 u1_compute_spatial_ssd;
   2906     UWORD8 u1_compute_recon;
   2907     UWORD8 au1_intra_nxn_rdopt_ctxt_models[2][IHEVC_CAB_CTXT_END];
   2908 
   2909     UWORD16 u2_num_tus_in_cu = 0;
   2910     WORD32 is_sub_pu_in_hq = 0;
   2911     /* Get the RDOPT cost of the best CU mode for early_exit */
   2912     LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
   2913     /* cabac context of prev intra luma pred flag */
   2914     UWORD8 u1_prev_flag_cabac_ctxt =
   2915         ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_INTRA_LUMA_PRED_FLAG];
   2916     WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
   2917 
   2918     UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
   2919 
   2920     total_rdopt_cost = 0;
   2921     ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
   2922     ps_recon_datastore = &ps_final_prms->s_recon_datastore;
   2923     i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
   2924     csbf_strd = ps_ctxt->i4_cu_csbf_strd;
   2925     pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
   2926     pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
   2927     pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
   2928     deq_data_strd = ps_cu_analyse->u1_cu_size; /* deq_data stride is cu size */
   2929     ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
   2930     ps_tmp_lt_4x4 = ps_left_nbr_4x4;
   2931     pu4_nbr_flags = &ps_final_prms->au4_nbr_flags[0];
   2932     pu1_intra_pred_mode = &ps_final_prms->au1_intra_pred_mode[0];
   2933     cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
   2934     cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
   2935     cu_pos_x_8pelunits = cu_pos_x;
   2936     cu_pos_y_8pelunits = cu_pos_y;
   2937 
   2938     /* reset cu not coded cost */
   2939     ps_ctxt->i8_cu_not_coded_cost = 0;
   2940 
   2941     /* based on the Processng mode */
   2942     if(TU_EQ_CU == func_proc_mode)
   2943     {
   2944         ps_final_prms->u1_part_mode = SIZE_2Nx2N;
   2945         trans_size = ps_cu_analyse->u1_cu_size;
   2946         num_cu_parts = 1;
   2947         num_cands = 1;
   2948         u2_num_tus_in_cu = 1;
   2949     }
   2950     else if(TU_EQ_CU_DIV2 == func_proc_mode)
   2951     {
   2952         ps_final_prms->u1_part_mode = SIZE_2Nx2N;
   2953         trans_size = ps_cu_analyse->u1_cu_size >> 1;
   2954         num_cu_parts = 4;
   2955         num_cands = 1;
   2956         u2_num_tus_in_cu = 4;
   2957     }
   2958     else if(TU_EQ_SUBCU == func_proc_mode)
   2959     {
   2960         ps_final_prms->u1_part_mode = SIZE_NxN;
   2961         trans_size = ps_cu_analyse->u1_cu_size >> 1;
   2962         num_cu_parts = 4;
   2963         /*In HQ for TU = SUBPU, all 35 modes used for RDOPT instead of 3 modes */
   2964         if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
   2965         {
   2966             if(ps_ctxt->i1_slice_type != BSLICE)
   2967             {
   2968                 num_cands = (4 * MAX_INTRA_CU_CANDIDATES) + 2;
   2969             }
   2970             else
   2971             {
   2972                 num_cands = (2 * MAX_INTRA_CU_CANDIDATES);
   2973             }
   2974         }
   2975         else
   2976         {
   2977             num_cands = MAX_INTRA_CU_CANDIDATES;
   2978         }
   2979         u2_num_tus_in_cu = 4;
   2980     }
   2981     else
   2982     {
   2983         /* should not enter here */
   2984         ASSERT(0);
   2985     }
   2986 
   2987     if(ps_ctxt->i1_cu_qp_delta_enable)
   2988     {
   2989         WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0;
   2990         if(ps_cu_analyse->u1_cu_size == 64)
   2991         {
   2992             ASSERT(
   2993                 (trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4));
   2994             i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4));
   2995             i4_act_counter_lamda = 3;
   2996         }
   2997         else if(ps_cu_analyse->u1_cu_size == 32)
   2998         {
   2999             ASSERT(
   3000                 (trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4));
   3001             i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4));
   3002             i4_act_counter_lamda = 0;
   3003         }
   3004         else if(ps_cu_analyse->u1_cu_size == 16)
   3005         {
   3006             ASSERT((trans_size == 16) || (trans_size == 8) || (trans_size == 4));
   3007             i4_act_counter = (trans_size == 8) || (trans_size == 4);
   3008             i4_act_counter_lamda = 0;
   3009         }
   3010         else if(ps_cu_analyse->u1_cu_size == 8)
   3011         {
   3012             ASSERT((trans_size == 8) || (trans_size == 4));
   3013             i4_act_counter = 1;
   3014             i4_act_counter_lamda = 0;
   3015         }
   3016         else
   3017         {
   3018             ASSERT(0);
   3019         }
   3020         if(ps_ctxt->i4_use_ctb_level_lamda)
   3021         {
   3022             ihevce_compute_cu_level_QP(
   3023                 ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][1], -1, 0);
   3024         }
   3025         else
   3026         {
   3027             ihevce_compute_cu_level_QP(
   3028                 ps_ctxt,
   3029                 ps_cu_analyse->i4_act_factor[i4_act_counter][1],
   3030                 ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][1],
   3031                 0);
   3032         }
   3033 
   3034         ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp;
   3035     }
   3036     if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
   3037     {
   3038         ps_ctxt->i8_cl_ssd_lambda_qf =
   3039             ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
   3040              100.0f);
   3041         ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
   3042             ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
   3043              (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
   3044     }
   3045 
   3046     u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
   3047                              (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
   3048                              CONVERT_SSDS_TO_SPATIAL_DOMAIN;
   3049 
   3050     if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
   3051     {
   3052         u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
   3053                                  CONVERT_SSDS_TO_SPATIAL_DOMAIN;
   3054     }
   3055 
   3056     /* populate the neigbours */
   3057     pu1_left = (UWORD8 *)pv_cu_left;
   3058     pu1_top = (UWORD8 *)pv_cu_top;
   3059     pu1_top_left = (UWORD8 *)pv_cu_top_left;
   3060     left_strd = cu_left_stride;
   3061     num_4x4_in_tu = (trans_size >> 2);
   3062     num_4x4_in_cu = (ps_cu_analyse->u1_cu_size >> 2);
   3063     chrm_present_flag = 1;
   3064     ecd_data_bytes_cons = 0;
   3065     cu_bits = 0;
   3066 
   3067     /* get the 4x4 level postion of current cu */
   3068     cu_pos_x = cu_pos_x << 1;
   3069     cu_pos_y = cu_pos_y << 1;
   3070 
   3071     /* pouplate cu level params knowing that current is intra */
   3072     ps_final_prms->u1_skip_flag = 0;
   3073     ps_final_prms->u1_intra_flag = PRED_MODE_INTRA;
   3074     ps_final_prms->u2_num_pus_in_cu = 1;
   3075     /*init the is_cu_coded flag*/
   3076     ps_final_prms->u1_is_cu_coded = 0;
   3077     ps_final_prms->u4_cu_sad = 0;
   3078 
   3079     ps_final_prms->as_pu_enc_loop[0].b1_intra_flag = PRED_MODE_INTRA;
   3080     ps_final_prms->as_pu_enc_loop[0].b4_wd = (trans_size >> 1) - 1;
   3081     ps_final_prms->as_pu_enc_loop[0].b4_ht = (trans_size >> 1) - 1;
   3082     ps_final_prms->as_pu_enc_loop[0].b4_pos_x = cu_pos_x;
   3083     ps_final_prms->as_pu_enc_loop[0].b4_pos_y = cu_pos_y;
   3084     ps_final_prms->as_pu_enc_loop[0].b1_merge_flag = 0;
   3085 
   3086     ps_final_prms->as_col_pu_enc_loop[0].b1_intra_flag = 1;
   3087 
   3088     /*copy qp directly as intra cant be skip*/
   3089     ps_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
   3090     ps_nbr_4x4->mv.s_l0_mv.i2_mvx = 0;
   3091     ps_nbr_4x4->mv.s_l0_mv.i2_mvy = 0;
   3092     ps_nbr_4x4->mv.s_l1_mv.i2_mvx = 0;
   3093     ps_nbr_4x4->mv.s_l1_mv.i2_mvy = 0;
   3094     ps_nbr_4x4->mv.i1_l0_ref_pic_buf_id = -1;
   3095     ps_nbr_4x4->mv.i1_l1_ref_pic_buf_id = -1;
   3096     ps_nbr_4x4->mv.i1_l0_ref_idx = -1;
   3097     ps_nbr_4x4->mv.i1_l1_ref_idx = -1;
   3098 
   3099     /* RDOPT copy States :  TU init (best until prev TU) to current */
   3100     memcpy(
   3101         &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
   3102              .s_cabac_ctxt.au1_ctxt_models[0],
   3103         &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   3104         IHEVC_CAB_COEFFX_PREFIX);
   3105 
   3106     /* RDOPT copy States :update to init state if 0 cbf */
   3107     memcpy(
   3108         &au1_intra_nxn_rdopt_ctxt_models[0][0],
   3109         &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   3110         IHEVC_CAB_COEFFX_PREFIX);
   3111     memcpy(
   3112         &au1_intra_nxn_rdopt_ctxt_models[1][0],
   3113         &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   3114         IHEVC_CAB_COEFFX_PREFIX);
   3115 
   3116     /* loop for all partitions in CU  blocks */
   3117     for(ctr = 0; ctr < num_cu_parts; ctr++)
   3118     {
   3119         UWORD8 *pu1_curr_mode;
   3120         WORD32 cand_ctr;
   3121         WORD32 nbr_flags;
   3122 
   3123         /* for NxN case to track the best mode       */
   3124         /* for other cases zeroth index will be used */
   3125         intra_prev_rem_flags_t as_intra_prev_rem[2];
   3126         LWORD64 ai8_cand_rdopt_cost[2];
   3127         UWORD32 au4_tu_sad[2];
   3128         WORD32 ai4_tu_bits[2];
   3129         WORD32 ai4_cbf[2];
   3130         WORD32 ai4_curr_bytes[2];
   3131         WORD32 ai4_zero_col[2];
   3132         WORD32 ai4_zero_row[2];
   3133         /* To store the pred, coeff and dequant for TU_EQ_SUBCU case (since mul.
   3134         cand. are there) ping-pong buffer to store the best and current */
   3135         UWORD8 au1_cur_pred_data[2][MIN_TU_SIZE * MIN_TU_SIZE];
   3136         UWORD8 au1_intra_coeffs[2][MAX_SCAN_COEFFS_BYTES_4x4];
   3137         WORD16 ai2_intra_deq_coeffs[2][MIN_TU_SIZE * MIN_TU_SIZE];
   3138         /* Context models stored for RDopt store and restore purpose */
   3139 
   3140         UWORD8 au1_recon_availability[2];
   3141 
   3142         WORD32 best_cand_idx = 0;
   3143         LWORD64 best_cand_cost = MAX_COST_64;
   3144         /* counters to toggle b/w best and current */
   3145         WORD32 best_intra_buf_idx = 1;
   3146         WORD32 curr_intra_buf_idx = 0;
   3147 
   3148         /* copy the mode pointer to be used in inner loop */
   3149         pu1_curr_mode = pu1_luma_mode;
   3150 
   3151         /* get the neighbour availability flags */
   3152         nbr_flags = ihevce_get_nbr_intra(
   3153             &s_nbr,
   3154             ps_ctxt->pu1_ctb_nbr_map,
   3155             ps_ctxt->i4_nbr_map_strd,
   3156             cu_pos_x,
   3157             cu_pos_y,
   3158             num_4x4_in_tu);
   3159 
   3160         /* copy the nbr flags for chroma reuse */
   3161         if(4 != trans_size)
   3162         {
   3163             *pu4_nbr_flags = nbr_flags;
   3164         }
   3165         else if(1 == chrm_present_flag)
   3166         {
   3167             /* compute the avail flags assuming luma trans is 8x8 */
   3168             /* get the neighbour availability flags */
   3169             *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
   3170                 ps_ctxt->pu1_ctb_nbr_map,
   3171                 ps_ctxt->i4_nbr_map_strd,
   3172                 cu_pos_x,
   3173                 cu_pos_y,
   3174                 (num_4x4_in_tu << 1),
   3175                 (num_4x4_in_tu << 1));
   3176         }
   3177 
   3178         u1_compute_recon = !u1_compute_spatial_ssd && ((num_cu_parts > 1) && (ctr < 3));
   3179 
   3180         if(!ctr && (u1_compute_spatial_ssd || u1_compute_recon))
   3181         {
   3182             ps_recon_datastore->u1_is_lumaRecon_available = 1;
   3183         }
   3184         else if(!ctr)
   3185         {
   3186             ps_recon_datastore->u1_is_lumaRecon_available = 0;
   3187         }
   3188 
   3189         ihevc_intra_pred_luma_ref_substitution_fptr =
   3190             ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
   3191 
   3192         /* call reference array substitution */
   3193         ihevc_intra_pred_luma_ref_substitution_fptr(
   3194             pu1_top_left,
   3195             pu1_top,
   3196             pu1_left,
   3197             left_strd,
   3198             trans_size,
   3199             nbr_flags,
   3200             (UWORD8 *)ps_ctxt->pv_ref_sub_out,
   3201             1);
   3202 
   3203         /* Intra Mode gating based on MPM cand list and encoder quality preset */
   3204         if((ps_ctxt->i1_slice_type != ISLICE) && (TU_EQ_SUBCU == func_proc_mode) &&
   3205            (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
   3206         {
   3207             ihevce_mpm_idx_based_filter_RDOPT_cand(
   3208                 ps_ctxt,
   3209                 ps_cu_analyse,
   3210                 ps_left_nbr_4x4,
   3211                 ps_top_nbr_4x4,
   3212                 pu1_luma_mode,
   3213                 &ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][0]);
   3214         }
   3215 
   3216         if((TU_EQ_SUBCU == func_proc_mode) && (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
   3217            (ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr] >= MAX_INTRA_CU_CANDIDATES))
   3218         {
   3219             WORD32 ai4_mpm_mode_list[3];
   3220             WORD32 i;
   3221 
   3222             WORD32 i4_curr_index = ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr];
   3223 
   3224             ihevce_populate_intra_pred_mode(
   3225                 ps_top_nbr_4x4->b6_luma_intra_mode,
   3226                 ps_tmp_lt_4x4->b6_luma_intra_mode,
   3227                 s_nbr.u1_top_avail,
   3228                 s_nbr.u1_left_avail,
   3229                 cu_pos_y,
   3230                 &ai4_mpm_mode_list[0]);
   3231 
   3232             for(i = 0; i < 3; i++)
   3233             {
   3234                 if(ps_cu_analyse->s_cu_intra_cand
   3235                        .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] == 0)
   3236                 {
   3237                     ASSERT(ai4_mpm_mode_list[i] < 35);
   3238 
   3239                     ps_cu_analyse->s_cu_intra_cand
   3240                         .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] = 1;
   3241                     pu1_luma_mode[i4_curr_index] = ai4_mpm_mode_list[i];
   3242                     ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr]++;
   3243                     i4_curr_index++;
   3244                 }
   3245             }
   3246 
   3247             pu1_luma_mode[i4_curr_index] = 255;
   3248         }
   3249 
   3250         /* loop over candidates for each partition */
   3251         for(cand_ctr = 0; cand_ctr < num_cands; cand_ctr++)
   3252         {
   3253             WORD32 curr_pred_mode;
   3254             WORD32 bits = 0;
   3255             LWORD64 curr_cost;
   3256             WORD32 luma_pred_func_idx;
   3257             UWORD8 *pu1_curr_ecd_data;
   3258             WORD16 *pi2_curr_deq_data;
   3259             WORD32 curr_deq_data_strd;
   3260             WORD32 pred_strd;
   3261             UWORD8 *pu1_pred;
   3262 
   3263             /* if NXN case the recon and ecd data is stored in temp buffers */
   3264             if(TU_EQ_SUBCU == func_proc_mode)
   3265             {
   3266                 pu1_pred = &au1_cur_pred_data[curr_intra_buf_idx][0];
   3267                 pred_strd = trans_size;
   3268                 pu1_curr_ecd_data = &au1_intra_coeffs[curr_intra_buf_idx][0];
   3269                 pi2_curr_deq_data = &ai2_intra_deq_coeffs[curr_intra_buf_idx][0];
   3270                 curr_deq_data_strd = trans_size;
   3271 
   3272                 ASSERT(trans_size == MIN_TU_SIZE);
   3273             }
   3274             else
   3275             {
   3276                 pu1_pred = (UWORD8 *)pv_pred_org;
   3277                 pred_strd = pred_strd_org;
   3278                 pu1_curr_ecd_data = pu1_ecd_data;
   3279                 pi2_curr_deq_data = pi2_deq_data;
   3280                 curr_deq_data_strd = deq_data_strd;
   3281             }
   3282 
   3283             pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[curr_intra_buf_idx]) +
   3284                         (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride;
   3285 
   3286             if(is_sub_pu_in_hq == 1)
   3287             {
   3288                 curr_pred_mode = cand_ctr;
   3289             }
   3290             else
   3291             {
   3292                 curr_pred_mode = pu1_curr_mode[cand_ctr];
   3293             }
   3294 
   3295             /* If the candidate mode is 255, then break */
   3296             if(255 == curr_pred_mode)
   3297             {
   3298                 break;
   3299             }
   3300             else if(250 == curr_pred_mode)
   3301             {
   3302                 continue;
   3303             }
   3304 
   3305             /* check if this mode needs to be evaluated or not. For 2nx2n cases, this   */
   3306             /* function will be called once per candidate, so this check has been done  */
   3307             /* outside this function call. For NxN case, this function will be called   */
   3308             /* only once, and all the candidates will be evaluated here.                */
   3309             if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3)
   3310             {
   3311                 if((TU_EQ_SUBCU == func_proc_mode) &&
   3312                    (0 == ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][cand_ctr]))
   3313                 {
   3314                     continue;
   3315                 }
   3316             }
   3317 
   3318             /* call reference filtering */
   3319             ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr(
   3320                 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
   3321                 trans_size,
   3322                 (UWORD8 *)ps_ctxt->pv_ref_filt_out,
   3323                 curr_pred_mode,
   3324                 ps_ctxt->i1_strong_intra_smoothing_enable_flag);
   3325 
   3326             /* use the look up to get the function idx */
   3327             luma_pred_func_idx = g_i4_ip_funcs[curr_pred_mode];
   3328 
   3329             /* call the intra prediction function */
   3330             ps_ctxt->apf_lum_ip[luma_pred_func_idx](
   3331                 (UWORD8 *)ps_ctxt->pv_ref_filt_out,
   3332                 1,
   3333                 pu1_pred,
   3334                 pred_strd,
   3335                 trans_size,
   3336                 curr_pred_mode);
   3337 
   3338             /* populate the coeffs scan idx */
   3339             ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
   3340 
   3341             /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/
   3342             if(trans_size < 16)
   3343             {
   3344                 /* for modes from 22 upto 30 horizontal scan is used */
   3345                 if((curr_pred_mode > 21) && (curr_pred_mode < 31))
   3346                 {
   3347                     ps_ctxt->i4_scan_idx = SCAN_HORZ;
   3348                 }
   3349                 /* for modes from 6 upto 14 horizontal scan is used */
   3350                 else if((curr_pred_mode > 5) && (curr_pred_mode < 15))
   3351                 {
   3352                     ps_ctxt->i4_scan_idx = SCAN_VERT;
   3353                 }
   3354             }
   3355 
   3356             /* RDOPT copy States :  TU init (best until prev TU) to current */
   3357             COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
   3358                 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
   3359                         .s_cabac_ctxt.au1_ctxt_models[0] +
   3360                     IHEVC_CAB_COEFFX_PREFIX,
   3361                 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
   3362                 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
   3363 
   3364             i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
   3365             i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
   3366 
   3367 #if DISABLE_RDOQ_INTRA
   3368             i4_perform_rdoq = 0;
   3369 #endif
   3370 
   3371             /*2 Multi- dimensinal array based on trans size  of rounding factor to be added here */
   3372             /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
   3373             /* Currently the complete array will contain only single value*/
   3374             /*The rounding factor is calculated with the formula
   3375             Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
   3376             rounding factor = (1 - DeadZone Val)
   3377 
   3378             Assumption: Cabac states of All the sub-blocks in the TU are considered independent
   3379             */
   3380             if((ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING))
   3381             {
   3382                 if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0))
   3383                 {
   3384                     double i4_lamda_modifier;
   3385 
   3386                     if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
   3387                     {
   3388                         i4_lamda_modifier =
   3389                             ps_ctxt->i4_lamda_modifier *
   3390                             CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
   3391                     }
   3392                     else
   3393                     {
   3394                         i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
   3395                     }
   3396                     if(ps_ctxt->i4_use_const_lamda_modifier)
   3397                     {
   3398                         if(ISLICE == ps_ctxt->i1_slice_type)
   3399                         {
   3400                             i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
   3401                         }
   3402                         else
   3403                         {
   3404                             i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
   3405                         }
   3406                     }
   3407 
   3408                     ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
   3409                         &ps_ctxt->i4_quant_round_tu[0][0];
   3410                     ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
   3411                         &ps_ctxt->i4_quant_round_tu[1][0];
   3412 
   3413                     memset(
   3414                         ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
   3415                         0,
   3416                         trans_size * trans_size * sizeof(WORD32));
   3417                     memset(
   3418                         ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
   3419                         0,
   3420                         trans_size * trans_size * sizeof(WORD32));
   3421 
   3422                     ihevce_quant_rounding_factor_gen(
   3423                         trans_size,
   3424                         1,
   3425                         &ps_ctxt->s_rdopt_entropy_ctxt,
   3426                         ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
   3427                         ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
   3428                         i4_lamda_modifier,
   3429                         1);
   3430                 }
   3431                 else
   3432                 {
   3433                     ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
   3434                         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
   3435                     ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
   3436                         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
   3437                 }
   3438             }
   3439 
   3440             /* call T Q IT IQ and recon function */
   3441             ai4_cbf[curr_intra_buf_idx] = ihevce_t_q_iq_ssd_scan_fxn(
   3442                 ps_ctxt,
   3443                 pu1_pred,
   3444                 pred_strd,
   3445                 (UWORD8 *)pv_curr_src,
   3446                 src_strd,
   3447                 pi2_curr_deq_data,
   3448                 curr_deq_data_strd,
   3449                 pu1_recon,
   3450                 i4_recon_stride,
   3451                 pu1_curr_ecd_data,
   3452                 pu1_csbf_buf,
   3453                 csbf_strd,
   3454                 trans_size,
   3455                 PRED_MODE_INTRA,
   3456                 &ai8_cand_rdopt_cost[curr_intra_buf_idx],
   3457                 &ai4_curr_bytes[curr_intra_buf_idx],
   3458                 &ai4_tu_bits[curr_intra_buf_idx],
   3459                 &au4_tu_sad[curr_intra_buf_idx],
   3460                 &ai4_zero_col[curr_intra_buf_idx],
   3461                 &ai4_zero_row[curr_intra_buf_idx],
   3462                 &au1_recon_availability[curr_intra_buf_idx],
   3463                 i4_perform_rdoq,
   3464                 i4_perform_sbh,
   3465 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   3466                 i4_alpha_stim_multiplier,
   3467                 u1_is_cu_noisy,
   3468 #endif
   3469                 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
   3470                 1 /*early_cbf */
   3471             );
   3472 
   3473 #if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   3474             if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
   3475             {
   3476 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
   3477                 ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
   3478                     pv_curr_src,
   3479                     src_strd,
   3480                     pu1_pred,
   3481                     pred_strd,
   3482                     ai8_cand_rdopt_cost[curr_intra_buf_idx],
   3483                     i4_alpha_stim_multiplier,
   3484                     trans_size,
   3485                     0,
   3486                     ps_ctxt->u1_enable_psyRDOPT,
   3487                     NULL_PLANE);
   3488 #else
   3489                 if(u1_compute_spatial_ssd && au1_recon_availability[curr_intra_buf_idx])
   3490                 {
   3491                     ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
   3492                         pv_curr_src,
   3493                         src_strd,
   3494                         pu1_recon,
   3495                         i4_recon_stride,
   3496                         ai8_cand_rdopt_cost[curr_intra_buf_idx],
   3497                         i4_alpha_stim_multiplier,
   3498                         trans_size,
   3499                         0,
   3500                         ps_ctxt->u1_enable_psyRDOPT,
   3501                         NULL_PLANE);
   3502                 }
   3503                 else
   3504                 {
   3505                     ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
   3506                         pv_curr_src,
   3507                         src_strd,
   3508                         pu1_pred,
   3509                         pred_strd,
   3510                         ai8_cand_rdopt_cost[curr_intra_buf_idx],
   3511                         i4_alpha_stim_multiplier,
   3512                         trans_size,
   3513                         0,
   3514                         ps_ctxt->u1_enable_psyRDOPT,
   3515                         NULL_PLANE);
   3516                 }
   3517 #endif
   3518             }
   3519 #endif
   3520 
   3521             if(TU_EQ_SUBCU == func_proc_mode)
   3522             {
   3523                 ASSERT(ai4_curr_bytes[curr_intra_buf_idx] < MAX_SCAN_COEFFS_BYTES_4x4);
   3524             }
   3525 
   3526             /* based on CBF/No CBF copy the corresponding state */
   3527             if(0 == ai4_cbf[curr_intra_buf_idx])
   3528             {
   3529                 /* RDOPT copy States :update to init state if 0 cbf */
   3530                 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
   3531                     &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] +
   3532                         IHEVC_CAB_COEFFX_PREFIX,
   3533                     &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
   3534                     IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
   3535             }
   3536             else
   3537             {
   3538                 /* RDOPT copy States :update to new state only if CBF is non zero */
   3539                 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
   3540                     &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] +
   3541                         IHEVC_CAB_COEFFX_PREFIX,
   3542                     &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
   3543                             .s_cabac_ctxt.au1_ctxt_models[0] +
   3544                         IHEVC_CAB_COEFFX_PREFIX,
   3545                     IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
   3546             }
   3547 
   3548             /* call the function which perform intra mode prediction */
   3549             ihevce_intra_pred_mode_signaling(
   3550                 ps_top_nbr_4x4->b6_luma_intra_mode,
   3551                 ps_tmp_lt_4x4->b6_luma_intra_mode,
   3552                 s_nbr.u1_top_avail,
   3553                 s_nbr.u1_left_avail,
   3554                 cu_pos_y,
   3555                 curr_pred_mode,
   3556                 &as_intra_prev_rem[curr_intra_buf_idx]);
   3557             /******************************************************************/
   3558             /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN
   3559             The bits for these are evaluated for every RDO mode of current subcu
   3560             as they can significantly contribute to RDO cost.  Note that these
   3561             bits are not accounted for here (ai8_cand_rdopt_cost) as they
   3562             are accounted for in encode_cu call later */
   3563 
   3564             /******************************************************************/
   3565             /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN
   3566             The bits for these are evaluated for every RDO mode of current subcu
   3567             as they can significantly contribute to RDO cost.  Note that these
   3568             bits are not accounted for here (ai8_cand_rdopt_cost) as they
   3569             are accounted for in encode_cu call later */
   3570 
   3571             /* Estimate bits to encode prev rem flag  for NXN mode */
   3572             {
   3573                 WORD32 bits_frac = gau2_ihevce_cabac_bin_to_bits
   3574                     [u1_prev_flag_cabac_ctxt ^
   3575                      as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag];
   3576 
   3577                 /* rounding the fractional bits to nearest integer */
   3578                 bits = ((bits_frac + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q);
   3579             }
   3580 
   3581             /* based on prev flag all the mpmidx bits and rem bits */
   3582             if(1 == as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag)
   3583             {
   3584                 /* mpm_idx */
   3585                 bits += as_intra_prev_rem[curr_intra_buf_idx].b2_mpm_idx ? 2 : 1;
   3586             }
   3587             else
   3588             {
   3589                 /* rem intra mode */
   3590                 bits += 5;
   3591             }
   3592 
   3593             bits += ai4_tu_bits[curr_intra_buf_idx];
   3594 
   3595             /* compute the total cost for current candidate */
   3596             curr_cost = ai8_cand_rdopt_cost[curr_intra_buf_idx];
   3597 
   3598             /* get the final ssd cost */
   3599             curr_cost +=
   3600                 COMPUTE_RATE_COST_CLIP30(bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
   3601 
   3602             /* check of the best candidate cost */
   3603             if(curr_cost < best_cand_cost)
   3604             {
   3605                 best_cand_cost = curr_cost;
   3606                 best_cand_idx = cand_ctr;
   3607                 best_intra_buf_idx = curr_intra_buf_idx;
   3608                 curr_intra_buf_idx = !curr_intra_buf_idx;
   3609             }
   3610         }
   3611 
   3612         /***************    For TU_EQ_SUBCU case    *****************/
   3613         /* Copy the pred for best cand. to the final pred array     */
   3614         /* Copy the iq-coeff for best cand. to the final array      */
   3615         /* copy the best coeffs data to final buffer                */
   3616         if(TU_EQ_SUBCU == func_proc_mode)
   3617         {
   3618             /* Copy the pred for best cand. to the final pred array */
   3619 
   3620             ps_ctxt->s_cmn_opt_func.pf_copy_2d(
   3621                 (UWORD8 *)pv_pred_org,
   3622                 pred_strd_org,
   3623                 &au1_cur_pred_data[best_intra_buf_idx][0],
   3624                 trans_size,
   3625                 trans_size,
   3626                 trans_size);
   3627 
   3628             /* Copy the deq-coeff for best cand. to the final array */
   3629 
   3630             ps_ctxt->s_cmn_opt_func.pf_copy_2d(
   3631                 (UWORD8 *)pi2_deq_data,
   3632                 deq_data_strd << 1,
   3633                 (UWORD8 *)&ai2_intra_deq_coeffs[best_intra_buf_idx][0],
   3634                 trans_size << 1,
   3635                 trans_size << 1,
   3636                 trans_size);
   3637             /* copy the coeffs to final cu ecd bytes buffer */
   3638             memcpy(
   3639                 pu1_ecd_data,
   3640                 &au1_intra_coeffs[best_intra_buf_idx][0],
   3641                 ai4_curr_bytes[best_intra_buf_idx]);
   3642 
   3643             pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[best_intra_buf_idx]) +
   3644                         (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride;
   3645         }
   3646 
   3647         /*----------   Calculate Recon for the best INTRA mode     ---------*/
   3648         /* TU_EQ_CU case : No need for recon, otherwise recon is required   */
   3649         /* Compute recon only for the best mode for TU_EQ_SUBCU case        */
   3650         if(u1_compute_recon)
   3651         {
   3652             ihevce_it_recon_fxn(
   3653                 ps_ctxt,
   3654                 pi2_deq_data,
   3655                 deq_data_strd,
   3656                 (UWORD8 *)pv_pred_org,
   3657                 pred_strd_org,
   3658                 pu1_recon,
   3659                 i4_recon_stride,
   3660                 pu1_ecd_data,
   3661                 trans_size,
   3662                 PRED_MODE_INTRA,
   3663                 ai4_cbf[best_intra_buf_idx],
   3664                 ai4_zero_col[best_intra_buf_idx],
   3665                 ai4_zero_row[best_intra_buf_idx]);
   3666 
   3667             ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx;
   3668         }
   3669         else if(u1_compute_spatial_ssd && au1_recon_availability[best_intra_buf_idx])
   3670         {
   3671             ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx;
   3672         }
   3673         else
   3674         {
   3675             ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
   3676         }
   3677 
   3678         /* RDOPT copy States :update to best modes state */
   3679         COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
   3680             &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
   3681             &au1_intra_nxn_rdopt_ctxt_models[best_intra_buf_idx][0] + IHEVC_CAB_COEFFX_PREFIX,
   3682             IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
   3683 
   3684         /* copy the prev,mpm_idx and rem modes from best cand */
   3685         ps_final_prms->as_intra_prev_rem[ctr] = as_intra_prev_rem[best_intra_buf_idx];
   3686 
   3687         /* update the cabac context of prev intra pred mode flag */
   3688         u1_prev_flag_cabac_ctxt = gau1_ihevc_next_state
   3689             [(u1_prev_flag_cabac_ctxt << 1) |
   3690              as_intra_prev_rem[best_intra_buf_idx].b1_prev_intra_luma_pred_flag];
   3691 
   3692         /* accumulate the TU bits into cu bits */
   3693         cu_bits += ai4_tu_bits[best_intra_buf_idx];
   3694 
   3695         /* copy the intra pred mode for chroma reuse */
   3696         if(is_sub_pu_in_hq == 0)
   3697         {
   3698             *pu1_intra_pred_mode = pu1_curr_mode[best_cand_idx];
   3699         }
   3700         else
   3701         {
   3702             *pu1_intra_pred_mode = best_cand_idx;
   3703         }
   3704 
   3705         /* Store luma mode as chroma mode. If chroma prcs happens, and
   3706         if a diff. mode wins, it should update this!! */
   3707         if(1 == chrm_present_flag)
   3708         {
   3709             if(is_sub_pu_in_hq == 0)
   3710             {
   3711                 ps_final_prms->u1_chroma_intra_pred_actual_mode =
   3712                     ((ps_ctxt->u1_chroma_array_type == 2)
   3713                          ? gau1_chroma422_intra_angle_mapping[pu1_curr_mode[best_cand_idx]]
   3714                          : pu1_curr_mode[best_cand_idx]);
   3715             }
   3716             else
   3717             {
   3718                 ps_final_prms->u1_chroma_intra_pred_actual_mode =
   3719                     ((ps_ctxt->u1_chroma_array_type == 2)
   3720                          ? gau1_chroma422_intra_angle_mapping[best_cand_idx]
   3721                          : best_cand_idx);
   3722             }
   3723 
   3724             ps_final_prms->u1_chroma_intra_pred_mode = 4;
   3725         }
   3726 
   3727         /*remember the cbf flag to replicate qp for 4x4 neighbour*/
   3728         ps_final_prms->u1_is_cu_coded |= ai4_cbf[best_intra_buf_idx];
   3729 
   3730         /*accumulate ssd over all TU of intra CU*/
   3731         ps_final_prms->u4_cu_sad += au4_tu_sad[best_intra_buf_idx];
   3732 
   3733         /* update the bytes */
   3734         ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
   3735         ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed =
   3736             ai4_curr_bytes[best_intra_buf_idx];
   3737         /* update the zero_row and col info for the final mode */
   3738         ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col =
   3739             ai4_zero_col[best_intra_buf_idx];
   3740         ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row =
   3741             ai4_zero_row[best_intra_buf_idx];
   3742 
   3743         ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
   3744 
   3745         /* update the total bytes cons */
   3746         ecd_data_bytes_cons += ai4_curr_bytes[best_intra_buf_idx];
   3747         pu1_ecd_data += ai4_curr_bytes[best_intra_buf_idx];
   3748 
   3749         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = ai4_cbf[best_intra_buf_idx];
   3750         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
   3751         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
   3752         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
   3753         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
   3754         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag;
   3755         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp;
   3756         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0;
   3757         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0;
   3758         GETRANGE(tx_size, trans_size);
   3759         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
   3760         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x;
   3761         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y;
   3762 
   3763         /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
   3764         ps_nbr_4x4->b1_skip_flag = 0;
   3765         ps_nbr_4x4->b1_intra_flag = 1;
   3766         ps_nbr_4x4->b1_pred_l0_flag = 0;
   3767         ps_nbr_4x4->b1_pred_l1_flag = 0;
   3768 
   3769         if(is_sub_pu_in_hq == 0)
   3770         {
   3771             ps_nbr_4x4->b6_luma_intra_mode = pu1_curr_mode[best_cand_idx];
   3772         }
   3773         else
   3774         {
   3775             ps_nbr_4x4->b6_luma_intra_mode = best_cand_idx;
   3776         }
   3777 
   3778         ps_nbr_4x4->b1_y_cbf = ai4_cbf[best_intra_buf_idx];
   3779 
   3780         /* since tu size can be less than cusize, replication is done with strd */
   3781         {
   3782             WORD32 i, j;
   3783             nbr_4x4_t *ps_tmp_4x4;
   3784 
   3785             ps_tmp_4x4 = ps_nbr_4x4;
   3786 
   3787             for(i = 0; i < num_4x4_in_tu; i++)
   3788             {
   3789                 for(j = 0; j < num_4x4_in_tu; j++)
   3790                 {
   3791                     ps_tmp_4x4[j] = *ps_nbr_4x4;
   3792                 }
   3793                 /* row level update*/
   3794                 ps_tmp_4x4 += num_4x4_in_cu;
   3795             }
   3796         }
   3797 
   3798         if(TU_EQ_SUBCU == func_proc_mode)
   3799         {
   3800             pu1_luma_mode += ((MAX_INTRA_CU_CANDIDATES * 4) + 2 + 1);
   3801         }
   3802 
   3803         if((num_cu_parts > 1) && (ctr < 3))
   3804         {
   3805             /* set the neighbour map to 1 */
   3806             ihevce_set_nbr_map(
   3807                 ps_ctxt->pu1_ctb_nbr_map,
   3808                 ps_ctxt->i4_nbr_map_strd,
   3809                 cu_pos_x,
   3810                 cu_pos_y,
   3811                 trans_size >> 2,
   3812                 1);
   3813 
   3814             /* block level updates block number (1 & 3 )*/
   3815             pv_curr_src = (UWORD8 *)pv_curr_src + trans_size;
   3816             pv_pred_org = (UWORD8 *)pv_pred_org + trans_size;
   3817             pi2_deq_data += trans_size;
   3818 
   3819             switch(ctr)
   3820             {
   3821             case 0:
   3822             {
   3823                 pu1_left = pu1_recon + trans_size - 1;
   3824                 pu1_top += trans_size;
   3825                 pu1_top_left = pu1_top - 1;
   3826                 left_strd = i4_recon_stride;
   3827 
   3828                 break;
   3829             }
   3830             case 1:
   3831             {
   3832                 ASSERT(
   3833                     (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 0) ||
   3834                     (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 1));
   3835 
   3836                 /* Since the 'lumaRefSubstitution' function expects both Top and */
   3837                 /* TopRight recon pixels to be present in the same buffer */
   3838                 if(ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] !=
   3839                    ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1])
   3840                 {
   3841                     UWORD8 *pu1_src =
   3842                         ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
   3843                              [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) +
   3844                         trans_size;
   3845                     UWORD8 *pu1_dst =
   3846                         ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
   3847                              [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) +
   3848                         trans_size;
   3849 
   3850                     ps_ctxt->s_cmn_opt_func.pf_copy_2d(
   3851                         pu1_dst, i4_recon_stride, pu1_src, i4_recon_stride, trans_size, trans_size);
   3852 
   3853                     ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] =
   3854                         ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0];
   3855                 }
   3856 
   3857                 pu1_left = (UWORD8 *)pv_cu_left + trans_size * cu_left_stride;
   3858                 pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
   3859                                [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) +
   3860                           (trans_size - 1) * i4_recon_stride;
   3861                 pu1_top_left = pu1_left - cu_left_stride;
   3862                 left_strd = cu_left_stride;
   3863 
   3864                 break;
   3865             }
   3866             case 2:
   3867             {
   3868                 ASSERT(
   3869                     (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 0) ||
   3870                     (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 1));
   3871 
   3872                 pu1_left = pu1_recon + trans_size - 1;
   3873                 pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
   3874                                [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) +
   3875                           (trans_size - 1) * i4_recon_stride + trans_size;
   3876                 pu1_top_left = pu1_top - 1;
   3877                 left_strd = i4_recon_stride;
   3878 
   3879                 break;
   3880             }
   3881             }
   3882 
   3883             pu1_csbf_buf += num_4x4_in_tu;
   3884             cu_pos_x += num_4x4_in_tu;
   3885             ps_nbr_4x4 += num_4x4_in_tu;
   3886             ps_top_nbr_4x4 += num_4x4_in_tu;
   3887             ps_tmp_lt_4x4 = ps_nbr_4x4 - 1;
   3888 
   3889             pu1_intra_pred_mode++;
   3890 
   3891             /* after 2 blocks increment the pointers to bottom blocks */
   3892             if(1 == ctr)
   3893             {
   3894                 pv_curr_src = (UWORD8 *)pv_curr_src - (trans_size << 1);
   3895                 pv_curr_src = (UWORD8 *)pv_curr_src + (trans_size * src_strd);
   3896 
   3897                 pv_pred_org = (UWORD8 *)pv_pred_org - (trans_size << 1);
   3898                 pv_pred_org = (UWORD8 *)pv_pred_org + (trans_size * pred_strd_org);
   3899                 pi2_deq_data -= (trans_size << 1);
   3900                 pi2_deq_data += (trans_size * deq_data_strd);
   3901 
   3902                 pu1_csbf_buf -= (num_4x4_in_tu << 1);
   3903                 pu1_csbf_buf += (num_4x4_in_tu * csbf_strd);
   3904 
   3905                 ps_nbr_4x4 -= (num_4x4_in_tu << 1);
   3906                 ps_nbr_4x4 += (num_4x4_in_tu * num_4x4_in_cu);
   3907                 ps_top_nbr_4x4 = ps_nbr_4x4 - num_4x4_in_cu;
   3908                 ps_tmp_lt_4x4 = ps_left_nbr_4x4 + (num_4x4_in_tu * nbr_4x4_left_strd);
   3909 
   3910                 /* decrement pos x to start */
   3911                 cu_pos_x -= (num_4x4_in_tu << 1);
   3912                 cu_pos_y += num_4x4_in_tu;
   3913             }
   3914         }
   3915 
   3916 #if RDOPT_ENABLE
   3917         /* compute the RDOPT cost for the current TU */
   3918         ai8_cand_rdopt_cost[best_intra_buf_idx] += COMPUTE_RATE_COST_CLIP30(
   3919             ai4_tu_bits[best_intra_buf_idx], ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
   3920 #endif
   3921 
   3922         /* accumulate the costs */
   3923         total_rdopt_cost += ai8_cand_rdopt_cost[best_intra_buf_idx];
   3924 
   3925         if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
   3926         {
   3927             /* Early exit : If the current running cost exceeds
   3928             the prev. best mode cost, break */
   3929             if(total_rdopt_cost > prev_best_rdopt_cost)
   3930             {
   3931                 return (total_rdopt_cost);
   3932             }
   3933         }
   3934 
   3935         /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/
   3936         chrm_present_flag = (4 != trans_size) ? 1 : INTRA_PRED_CHROMA_IDX_NONE;
   3937 
   3938         pu4_nbr_flags++;
   3939     }
   3940     /* Modify the cost function for this CU. */
   3941     /* loop in for 8x8 blocks */
   3942     if(ps_ctxt->u1_enable_psyRDOPT)
   3943     {
   3944         UWORD8 *pu1_recon_cu;
   3945         WORD32 recon_stride;
   3946         WORD32 curr_pos_x;
   3947         WORD32 curr_pos_y;
   3948         WORD32 start_index;
   3949         WORD32 num_horz_cu_in_ctb;
   3950         WORD32 cu_size;
   3951         WORD32 had_block_size;
   3952 
   3953         /* tODO: sreenivasa ctb size has to be used appropriately */
   3954         had_block_size = 8;
   3955         cu_size = ps_cu_analyse->u1_cu_size; /* todo */
   3956         num_horz_cu_in_ctb = 64 / had_block_size;
   3957 
   3958         curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
   3959         curr_pos_y = ps_cu_analyse->b3_cu_pos_y << 3; /* pel units */
   3960         recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
   3961         pu1_recon_cu =
   3962             ((UWORD8 *)ps_final_prms->s_recon_datastore
   3963                  .apv_luma_recon_bufs[ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]);
   3964         /* + \  curr_pos_x + curr_pos_y * recon_stride; */
   3965 
   3966         /* start index to index the source satd of curr cu int he current ctb*/
   3967         start_index =
   3968             (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
   3969 
   3970         {
   3971             total_rdopt_cost += ihevce_psy_rd_cost(
   3972                 ps_ctxt->ai4_source_satd_8x8,
   3973                 pu1_recon_cu,
   3974                 recon_stride,
   3975                 1,  //
   3976                 cu_size,
   3977                 0,  // pic type
   3978                 0,  //layer id
   3979                 ps_ctxt->i4_satd_lamda,  // lambda
   3980                 start_index,
   3981                 ps_ctxt->u1_is_input_data_hbd,
   3982                 ps_ctxt->u4_psy_strength,
   3983                 &ps_ctxt->s_cmn_opt_func
   3984 
   3985             );  // 8 bit
   3986         }
   3987     }
   3988 
   3989 #if !FORCE_INTRA_TU_DEPTH_TO_0  //RATIONALISE_NUM_RDO_MODES_IN_PQ_AND_HQ
   3990     if(TU_EQ_SUBCU == func_proc_mode)
   3991     {
   3992         UWORD8 au1_tu_eq_cu_div2_modes[4];
   3993         UWORD8 au1_freq_of_mode[4];
   3994 
   3995         WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
   3996             ps_final_prms->au1_intra_pred_mode, au1_tu_eq_cu_div2_modes, au1_freq_of_mode, 4);
   3997 
   3998         if(1 == i4_num_clusters)
   3999         {
   4000             ps_final_prms->u2_num_pus_in_cu = 1;
   4001             ps_final_prms->u1_part_mode = SIZE_2Nx2N;
   4002         }
   4003     }
   4004 #endif
   4005 
   4006     /* store the num TUs*/
   4007     ps_final_prms->u2_num_tus_in_cu = u2_num_tus_in_cu;
   4008 
   4009     /* update the bytes consumed */
   4010     ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
   4011 
   4012     /* store the current cu size to final prms */
   4013     ps_final_prms->u1_cu_size = ps_cu_analyse->u1_cu_size;
   4014 
   4015     /* cu bits will be having luma residual bits till this point    */
   4016     /* if zero_cbf eval is disabled then cu bits will be zero       */
   4017     ps_final_prms->u4_cu_luma_res_bits = cu_bits;
   4018 
   4019     /* ------------- Chroma processing -------------- */
   4020     /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
   4021     if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
   4022     {
   4023         LWORD64 chrm_rdopt_cost;
   4024         WORD32 chrm_rdopt_tu_bits;
   4025 
   4026         /* Store the current RDOPT cost to enable early exit in chrom_prcs */
   4027         ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
   4028 
   4029         chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
   4030             ps_ctxt,
   4031             curr_buf_idx,
   4032             func_proc_mode,
   4033             ps_chrm_cu_buf_prms->pu1_curr_src,
   4034             ps_chrm_cu_buf_prms->i4_chrm_src_stride,
   4035             ps_chrm_cu_buf_prms->pu1_cu_left,
   4036             ps_chrm_cu_buf_prms->pu1_cu_top,
   4037             ps_chrm_cu_buf_prms->pu1_cu_top_left,
   4038             ps_chrm_cu_buf_prms->i4_cu_left_stride,
   4039             cu_pos_x_8pelunits,
   4040             cu_pos_y_8pelunits,
   4041             &chrm_rdopt_tu_bits,
   4042             i4_alpha_stim_multiplier,
   4043             u1_is_cu_noisy);
   4044 
   4045 #if WEIGH_CHROMA_COST
   4046         chrm_rdopt_cost = (LWORD64)(
   4047             (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
   4048              (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
   4049             CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
   4050 #endif
   4051 
   4052 #if CHROMA_RDOPT_ENABLE
   4053         total_rdopt_cost += chrm_rdopt_cost;
   4054 #endif
   4055         cu_bits += chrm_rdopt_tu_bits;
   4056 
   4057         /* cu bits for chroma residual if chroma rdopt is on       */
   4058         /* if zero_cbf eval is disabled then cu bits will be zero  */
   4059         ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
   4060 
   4061         if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
   4062         {
   4063             /* Early exit : If the current running cost exceeds
   4064             the prev. best mode cost, break */
   4065             if(total_rdopt_cost > prev_best_rdopt_cost)
   4066             {
   4067                 return (total_rdopt_cost);
   4068             }
   4069         }
   4070     }
   4071     else
   4072     {}
   4073 
   4074     /* RDOPT copy States :  Best after all luma TUs to current */
   4075     COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
   4076         &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
   4077                 .s_cabac_ctxt.au1_ctxt_models[0] +
   4078             IHEVC_CAB_COEFFX_PREFIX,
   4079         &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
   4080         IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
   4081 
   4082     /* get the neighbour availability flags for current cu  */
   4083     ihevce_get_only_nbr_flag(
   4084         &s_nbr,
   4085         ps_ctxt->pu1_ctb_nbr_map,
   4086         ps_ctxt->i4_nbr_map_strd,
   4087         (cu_pos_x_8pelunits << 1),
   4088         (cu_pos_y_8pelunits << 1),
   4089         (trans_size << 1),
   4090         (trans_size << 1));
   4091 
   4092     /* call the entropy rdo encode to get the bit estimate for current cu */
   4093     /*if ZERO_CBF eval is enabled then this function will return only CU header bits */
   4094     {
   4095         /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
   4096         WORD32 cbf_bits, header_bits;
   4097 
   4098         header_bits = ihevce_entropy_rdo_encode_cu(
   4099             &ps_ctxt->s_rdopt_entropy_ctxt,
   4100             ps_final_prms,
   4101             cu_pos_x_8pelunits,
   4102             cu_pos_y_8pelunits,
   4103             ps_cu_analyse->u1_cu_size,
   4104             s_nbr.u1_top_avail,
   4105             s_nbr.u1_left_avail,
   4106             &ps_final_prms->pu1_cu_coeffs[0],
   4107             &cbf_bits);
   4108 
   4109         cu_bits += header_bits;
   4110 
   4111         /* cbf bits are excluded from header bits, instead considered as texture bits */
   4112         /* incase if zero cbf eval is disabled then texture bits gets added here */
   4113         ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
   4114         ps_final_prms->u4_cu_cbf_bits = cbf_bits;
   4115 
   4116 #if RDOPT_ENABLE
   4117         /* add the cost of coding the cu bits */
   4118         total_rdopt_cost +=
   4119             COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
   4120 #endif
   4121     }
   4122     return (total_rdopt_cost);
   4123 }
   4124 /*!
   4125 ******************************************************************************
   4126 * \if Function name : ihevce_inter_rdopt_cu_ntu \endif
   4127 *
   4128 * \brief
   4129 *    Inter Coding unit funtion whic perfomr the TQ IT IQ recon for luma
   4130 *
   4131 * \param[in] ps_ctxt       enc_loop module ctxt pointer
   4132 * \param[in] ps_inter_cand pointer to inter candidate structure
   4133 * \param[in] pu1_src       pointer to source data buffer
   4134 * \param[in] cu_size       Current CU size
   4135 * \param[in] cu_pos_x      cu position x w.r.t to ctb
   4136 * \param[in] cu_pos_y      cu position y w.r.t to ctb
   4137 * \param[in] src_strd      source buffer stride
   4138 * \param[in] curr_buf_idx  buffer index for current output storage
   4139 * \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure
   4140 *
   4141 * \return
   4142 *    Rdopt cost
   4143 *
   4144 * \author
   4145 *  Ittiam
   4146 *
   4147 *****************************************************************************
   4148 */
   4149 LWORD64 ihevce_inter_rdopt_cu_ntu(
   4150     ihevce_enc_loop_ctxt_t *ps_ctxt,
   4151     enc_loop_cu_prms_t *ps_cu_prms,
   4152     void *pv_src,
   4153     WORD32 cu_size,
   4154     WORD32 cu_pos_x,
   4155     WORD32 cu_pos_y,
   4156     WORD32 curr_buf_idx,
   4157     enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
   4158     cu_inter_cand_t *ps_inter_cand,
   4159     cu_analyse_t *ps_cu_analyse,
   4160     WORD32 i4_alpha_stim_multiplier)
   4161 {
   4162     enc_loop_cu_final_prms_t *ps_final_prms;
   4163     nbr_4x4_t *ps_nbr_4x4;
   4164     tu_prms_t s_tu_prms[64 * 4];
   4165     tu_prms_t *ps_tu_prms;
   4166 
   4167     WORD32 i4_perform_rdoq;
   4168     WORD32 i4_perform_sbh;
   4169     WORD32 ai4_tu_split_flags[4];
   4170     WORD32 ai4_tu_early_cbf[4];
   4171     WORD32 num_split_flags = 1;
   4172     WORD32 i;
   4173     UWORD8 u1_tu_size;
   4174     UWORD8 *pu1_pred;
   4175     UWORD8 *pu1_ecd_data;
   4176     WORD16 *pi2_deq_data;
   4177     UWORD8 *pu1_csbf_buf;
   4178     UWORD8 *pu1_tu_sz_sft;
   4179     UWORD8 *pu1_tu_posx;
   4180     UWORD8 *pu1_tu_posy;
   4181     LWORD64 total_rdopt_cost;
   4182     WORD32 ctr;
   4183     WORD32 chrm_ctr;
   4184     WORD32 num_tu_in_cu = 0;
   4185     WORD32 pred_stride;
   4186     WORD32 recon_stride;
   4187     WORD32 trans_size = ps_cu_analyse->u1_cu_size;
   4188     WORD32 csbf_strd;
   4189     WORD32 chrm_present_flag;
   4190     WORD32 ecd_data_bytes_cons;
   4191     WORD32 num_4x4_in_cu;
   4192     WORD32 num_4x4_in_tu;
   4193     WORD32 recon_func_mode;
   4194     WORD32 cu_bits;
   4195     UWORD8 u1_compute_spatial_ssd;
   4196 
   4197     /* min_trans_size is initialized to some huge number than usual TU sizes */
   4198     WORD32 i4_min_trans_size = 256;
   4199     /* Get the RDOPT cost of the best CU mode for early_exit */
   4200     LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
   4201     WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
   4202 
   4203     /* model for no residue syntax qt root cbf flag */
   4204     UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX];
   4205 
   4206     /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
   4207     UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
   4208 
   4209     /* for skip cases tables are not reqquired */
   4210     UWORD8 u1_skip_tu_sz_sft = 0;
   4211     UWORD8 u1_skip_tu_posx = 0;
   4212     UWORD8 u1_skip_tu_posy = 0;
   4213     UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
   4214 
   4215     /* get the pointers based on curbuf idx */
   4216     ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
   4217     ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
   4218     pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
   4219     pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
   4220     csbf_strd = ps_ctxt->i4_cu_csbf_strd;
   4221     pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
   4222 
   4223     pred_stride = ps_inter_cand->i4_pred_data_stride;
   4224     recon_stride = cu_size;
   4225     pu1_pred = ps_inter_cand->pu1_pred_data;
   4226     chrm_ctr = 0;
   4227     ecd_data_bytes_cons = 0;
   4228     total_rdopt_cost = 0;
   4229     num_4x4_in_cu = cu_size >> 2;
   4230     recon_func_mode = PRED_MODE_INTER;
   4231     cu_bits = 0;
   4232 
   4233     /* get the 4x4 level postion of current cu */
   4234     cu_pos_x = cu_pos_x << 1;
   4235     cu_pos_y = cu_pos_y << 1;
   4236 
   4237     /* default value for cu coded flag */
   4238     ps_final_prms->u1_is_cu_coded = 0;
   4239 
   4240     /*init of ssd of CU accuumulated over all TU*/
   4241     ps_final_prms->u4_cu_sad = 0;
   4242 
   4243     /* populate the coeffs scan idx */
   4244     ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
   4245 
   4246 #if ENABLE_INTER_ZCU_COST
   4247     /* reset cu not coded cost */
   4248     ps_ctxt->i8_cu_not_coded_cost = 0;
   4249 
   4250     /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
   4251     memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END);
   4252 #endif
   4253 
   4254     if(ps_cu_analyse->u1_cu_size == 64)
   4255     {
   4256         num_split_flags = 4;
   4257         u1_tu_size = 32;
   4258     }
   4259     else
   4260     {
   4261         num_split_flags = 1;
   4262         u1_tu_size = ps_cu_analyse->u1_cu_size;
   4263     }
   4264 
   4265     /* ckeck for skip mode */
   4266     if(1 == ps_final_prms->u1_skip_flag)
   4267     {
   4268         if(64 == cu_size)
   4269         {
   4270             /* TU = CU/2 is set but no trnaform is evaluated  */
   4271             num_tu_in_cu = 4;
   4272             pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
   4273             pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
   4274             pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
   4275         }
   4276         else
   4277         {
   4278             /* TU = CU is set but no trnaform is evaluated  */
   4279             num_tu_in_cu = 1;
   4280             pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
   4281             pu1_tu_posx = &u1_skip_tu_posx;
   4282             pu1_tu_posy = &u1_skip_tu_posy;
   4283         }
   4284 
   4285         recon_func_mode = PRED_MODE_SKIP;
   4286     }
   4287     /* check for PU part mode being AMP or No AMP */
   4288     else if(ps_final_prms->u1_part_mode < SIZE_2NxnU)
   4289     {
   4290         if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64))
   4291         {
   4292             /* TU= CU is evaluated 2Nx2N inter case */
   4293             num_tu_in_cu = 1;
   4294             pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
   4295             pu1_tu_posx = &u1_skip_tu_posx;
   4296             pu1_tu_posy = &u1_skip_tu_posy;
   4297         }
   4298         else
   4299         {
   4300             /* currently TU= CU/2 is evaluated for all inter case */
   4301             num_tu_in_cu = 4;
   4302             pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
   4303             pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
   4304             pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
   4305         }
   4306     }
   4307     else
   4308     {
   4309         /* for AMP cases one level of TU recurssion is done */
   4310         /* based on oreintation of the partitions           */
   4311         num_tu_in_cu = 10;
   4312         pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0];
   4313         pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
   4314         pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
   4315     }
   4316 
   4317     ps_tu_prms = &s_tu_prms[0];
   4318     num_tu_in_cu = 0;
   4319 
   4320     for(i = 0; i < num_split_flags; i++)
   4321     {
   4322         WORD32 i4_x_off = 0, i4_y_off = 0;
   4323 
   4324         if(i == 1 || i == 3)
   4325         {
   4326             i4_x_off = 32;
   4327         }
   4328 
   4329         if(i == 2 || i == 3)
   4330         {
   4331             i4_y_off = 32;
   4332         }
   4333 
   4334         if(1 == ps_final_prms->u1_skip_flag)
   4335         {
   4336             ai4_tu_split_flags[0] = 0;
   4337             ps_inter_cand->ai4_tu_split_flag[i] = 0;
   4338 
   4339             ai4_tu_early_cbf[0] = 0;
   4340         }
   4341         else
   4342         {
   4343             ai4_tu_split_flags[0] = ps_inter_cand->ai4_tu_split_flag[i];
   4344             ai4_tu_early_cbf[0] = ps_inter_cand->ai4_tu_early_cbf[i];
   4345         }
   4346 
   4347         ps_tu_prms->u1_tu_size = u1_tu_size;
   4348 
   4349         ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
   4350             ps_tu_prms,
   4351             &num_tu_in_cu,
   4352             0,
   4353             ai4_tu_split_flags[0],
   4354             ai4_tu_early_cbf[0],
   4355             i4_x_off,
   4356             i4_y_off);
   4357     }
   4358 
   4359     /* loop for all tu blocks in current cu */
   4360     ps_tu_prms = &s_tu_prms[0];
   4361     for(ctr = 0; ctr < num_tu_in_cu; ctr++)
   4362     {
   4363         trans_size = ps_tu_prms->u1_tu_size;
   4364 
   4365         if(i4_min_trans_size > trans_size)
   4366         {
   4367             i4_min_trans_size = trans_size;
   4368         }
   4369         ps_tu_prms++;
   4370     }
   4371 
   4372     if(ps_ctxt->i1_cu_qp_delta_enable)
   4373     {
   4374         WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0;
   4375 
   4376         if(ps_cu_analyse->u1_cu_size == 64)
   4377         {
   4378             ASSERT(
   4379                 (i4_min_trans_size == 32) || (i4_min_trans_size == 16) ||
   4380                 (i4_min_trans_size == 8) || (i4_min_trans_size == 4));
   4381             i4_act_counter = (i4_min_trans_size == 16) +
   4382                              2 * ((i4_min_trans_size == 8) || (i4_min_trans_size == 4));
   4383             i4_act_counter_lamda = 3;
   4384         }
   4385         else if(ps_cu_analyse->u1_cu_size == 32)
   4386         {
   4387             ASSERT(
   4388                 (i4_min_trans_size == 32) || (i4_min_trans_size == 16) ||
   4389                 (i4_min_trans_size == 8) || (i4_min_trans_size == 4));
   4390             i4_act_counter = (i4_min_trans_size == 16) +
   4391                              2 * ((i4_min_trans_size == 8) || (i4_min_trans_size == 4));
   4392             i4_act_counter_lamda = 0;
   4393         }
   4394         else if(ps_cu_analyse->u1_cu_size == 16)
   4395         {
   4396             ASSERT(
   4397                 (i4_min_trans_size == 16) || (i4_min_trans_size == 8) || (i4_min_trans_size == 4));
   4398             i4_act_counter = (i4_min_trans_size == 8) || (i4_min_trans_size == 4);
   4399             i4_act_counter_lamda = 0;
   4400         }
   4401         else if(ps_cu_analyse->u1_cu_size == 8)
   4402         {
   4403             ASSERT((i4_min_trans_size == 8) || (i4_min_trans_size == 4));
   4404             i4_act_counter = 1;
   4405             i4_act_counter_lamda = 0;
   4406         }
   4407         else
   4408         {
   4409             ASSERT(0);
   4410         }
   4411         if(ps_ctxt->i4_use_ctb_level_lamda)
   4412         {
   4413             ihevce_compute_cu_level_QP(
   4414                 ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][0], -1, 0);
   4415         }
   4416         else
   4417         {
   4418             ihevce_compute_cu_level_QP(
   4419                 ps_ctxt,
   4420                 ps_cu_analyse->i4_act_factor[i4_act_counter][0],
   4421                 ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][0],
   4422                 0);
   4423         }
   4424 
   4425         ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp;
   4426     }
   4427     if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
   4428     {
   4429         ps_ctxt->i8_cl_ssd_lambda_qf =
   4430             ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
   4431              100.0f);
   4432         ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
   4433             ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
   4434              (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
   4435     }
   4436 
   4437     u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
   4438                              (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
   4439                              CONVERT_SSDS_TO_SPATIAL_DOMAIN;
   4440 
   4441     if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
   4442     {
   4443         u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
   4444                                  CONVERT_SSDS_TO_SPATIAL_DOMAIN;
   4445     }
   4446 
   4447     if(!u1_compute_spatial_ssd)
   4448     {
   4449         ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
   4450         ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
   4451     }
   4452     else
   4453     {
   4454         ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1;
   4455     }
   4456 
   4457     ps_tu_prms = &s_tu_prms[0];
   4458 
   4459     ASSERT(num_tu_in_cu <= 256);
   4460 
   4461     /* RDOPT copy States :  TU init (best until prev TU) to current */
   4462     memcpy(
   4463         &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
   4464              .s_cabac_ctxt.au1_ctxt_models[0],
   4465         &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   4466         IHEVC_CAB_COEFFX_PREFIX);
   4467 
   4468     for(ctr = 0; ctr < num_tu_in_cu; ctr++)
   4469     {
   4470         WORD32 curr_bytes;
   4471         WORD32 tx_size;
   4472         WORD32 cbf, zero_col, zero_row;
   4473         LWORD64 rdopt_cost;
   4474         UWORD8 u1_is_recon_available;
   4475 
   4476         WORD32 curr_pos_x;
   4477         WORD32 curr_pos_y;
   4478         nbr_4x4_t *ps_cur_nbr_4x4;
   4479         UWORD8 *pu1_cur_pred;
   4480         UWORD8 *pu1_cur_src;
   4481         UWORD8 *pu1_cur_recon;
   4482         WORD16 *pi2_cur_deq_data;
   4483         UWORD32 u4_tu_sad;
   4484         WORD32 tu_bits;
   4485 
   4486         WORD32 i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
   4487 
   4488         trans_size = ps_tu_prms->u1_tu_size;
   4489         /* get the current pos x and pos y in pixels */
   4490         curr_pos_x = ps_tu_prms->u1_x_off;  //((cu_size >> 2) * pu1_tu_posx[ctr]);
   4491         curr_pos_y = ps_tu_prms->u1_y_off;  //((cu_size >> 2) * pu1_tu_posy[ctr]);
   4492 
   4493         num_4x4_in_tu = trans_size >> 2;
   4494 
   4495 #if FORCE_8x8_TFR
   4496         if(cu_size == 64)
   4497         {
   4498             curr_pos_x = ((cu_size >> 3) * pu1_tu_posx[ctr]);
   4499             curr_pos_y = ((cu_size >> 3) * pu1_tu_posy[ctr]);
   4500         }
   4501 #endif
   4502 
   4503         /* increment the pointers to start of current TU  */
   4504         pu1_cur_src = ((UWORD8 *)pv_src + curr_pos_x);
   4505         pu1_cur_src += (curr_pos_y * src_strd);
   4506         pu1_cur_pred = (pu1_pred + curr_pos_x);
   4507         pu1_cur_pred += (curr_pos_y * pred_stride);
   4508         pi2_cur_deq_data = pi2_deq_data + curr_pos_x;
   4509         pi2_cur_deq_data += (curr_pos_y * cu_size);
   4510         pu1_cur_recon = ((UWORD8 *)ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0]) +
   4511                         curr_pos_x + curr_pos_y * i4_recon_stride;
   4512 
   4513         ps_cur_nbr_4x4 = (ps_nbr_4x4 + (curr_pos_x >> 2));
   4514         ps_cur_nbr_4x4 += ((curr_pos_y >> 2) * num_4x4_in_cu);
   4515 
   4516         /* RDOPT copy States :  TU init (best until prev TU) to current */
   4517         COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
   4518             &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
   4519                     .s_cabac_ctxt.au1_ctxt_models[0] +
   4520                 IHEVC_CAB_COEFFX_PREFIX,
   4521             &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
   4522             IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
   4523 
   4524         i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
   4525         i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
   4526 
   4527         /*2 Multi- dimensinal array based on trans size  of rounding factor to be added here */
   4528         /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
   4529         /* Currently the complete array will contain only single value*/
   4530         /*The rounding factor is calculated with the formula
   4531         Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
   4532         rounding factor = (1 - DeadZone Val)
   4533 
   4534         Assumption: Cabac states of All the sub-blocks in the TU are considered independent
   4535         */
   4536         if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0))
   4537         {
   4538             double i4_lamda_modifier;
   4539 
   4540             if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
   4541             {
   4542                 i4_lamda_modifier = ps_ctxt->i4_lamda_modifier *
   4543                                     CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
   4544             }
   4545             else
   4546             {
   4547                 i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
   4548             }
   4549             if(ps_ctxt->i4_use_const_lamda_modifier)
   4550             {
   4551                 if(ISLICE == ps_ctxt->i1_slice_type)
   4552                 {
   4553                     i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
   4554                 }
   4555                 else
   4556                 {
   4557                     i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
   4558                 }
   4559             }
   4560             ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
   4561                 &ps_ctxt->i4_quant_round_tu[0][0];
   4562             ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
   4563                 &ps_ctxt->i4_quant_round_tu[1][0];
   4564 
   4565             memset(
   4566                 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
   4567                 0,
   4568                 trans_size * trans_size * sizeof(WORD32));
   4569             memset(
   4570                 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
   4571                 0,
   4572                 trans_size * trans_size * sizeof(WORD32));
   4573 
   4574             ihevce_quant_rounding_factor_gen(
   4575                 trans_size,
   4576                 1,
   4577                 &ps_ctxt->s_rdopt_entropy_ctxt,
   4578                 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
   4579                 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
   4580                 i4_lamda_modifier,
   4581                 1);
   4582         }
   4583         else
   4584         {
   4585             ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
   4586                 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
   4587             ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
   4588                 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
   4589         }
   4590 
   4591         /* call T Q IT IQ and recon function */
   4592         cbf = ihevce_t_q_iq_ssd_scan_fxn(
   4593             ps_ctxt,
   4594             pu1_cur_pred,
   4595             pred_stride,
   4596             pu1_cur_src,
   4597             src_strd,
   4598             pi2_cur_deq_data,
   4599             cu_size,
   4600             pu1_cur_recon,
   4601             i4_recon_stride,
   4602             pu1_ecd_data,
   4603             pu1_csbf_buf,
   4604             csbf_strd,
   4605             trans_size,
   4606             recon_func_mode,
   4607             &rdopt_cost,
   4608             &curr_bytes,
   4609             &tu_bits,
   4610             &u4_tu_sad,
   4611             &zero_col,
   4612             &zero_row,
   4613             &u1_is_recon_available,
   4614             i4_perform_rdoq,
   4615             i4_perform_sbh,
   4616 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   4617             i4_alpha_stim_multiplier,
   4618             u1_is_cu_noisy,
   4619 #endif
   4620             u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
   4621             ps_ctxt->u1_use_early_cbf_data ? ps_tu_prms->i4_early_cbf : 1);
   4622 
   4623 #if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   4624         if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
   4625         {
   4626 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
   4627             rdopt_cost = ihevce_inject_stim_into_distortion(
   4628                 pu1_cur_src,
   4629                 src_strd,
   4630                 pu1_cur_pred,
   4631                 pred_stride,
   4632                 rdopt_cost,
   4633                 i4_alpha_stim_multiplier,
   4634                 trans_size,
   4635                 0,
   4636                 ps_ctxt->u1_enable_psyRDOPT,
   4637                 NULL_PLANE);
   4638 #else
   4639             if(u1_compute_spatial_ssd && u1_is_recon_available)
   4640             {
   4641                 rdopt_cost = ihevce_inject_stim_into_distortion(
   4642                     pu1_cur_src,
   4643                     src_strd,
   4644                     pu1_cur_recon,
   4645                     i4_recon_stride,
   4646                     rdopt_cost,
   4647                     i4_alpha_stim_multiplier,
   4648                     trans_size,
   4649                     0,
   4650                     NULL_PLANE);
   4651             }
   4652             else
   4653             {
   4654                 rdopt_cost = ihevce_inject_stim_into_distortion(
   4655                     pu1_cur_src,
   4656                     src_strd,
   4657                     pu1_cur_pred,
   4658                     pred_stride,
   4659                     rdopt_cost,
   4660                     i4_alpha_stim_multiplier,
   4661                     trans_size,
   4662                     0,
   4663                     ps_ctxt->u1_enable_psyRDOPT,
   4664                     NULL_PLANE);
   4665             }
   4666 #endif
   4667         }
   4668 #endif
   4669 
   4670         if(u1_compute_spatial_ssd && u1_is_recon_available)
   4671         {
   4672             ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = 0;
   4673         }
   4674         else
   4675         {
   4676             ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
   4677         }
   4678 
   4679         /* accumulate the TU sad into cu sad */
   4680         ps_final_prms->u4_cu_sad += u4_tu_sad;
   4681 
   4682         /* accumulate the TU bits into cu bits */
   4683         cu_bits += tu_bits;
   4684 
   4685         /* inter cu is coded if any of the tu is coded in it */
   4686         ps_final_prms->u1_is_cu_coded |= cbf;
   4687 
   4688         /* call the entropy function to get the bits */
   4689         /* add that to rd opt cost(SSD)              */
   4690 
   4691         /* update the bytes */
   4692         ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
   4693         ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = curr_bytes;
   4694         /* update the zero_row and col info for the final mode */
   4695         ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col = zero_col;
   4696         ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row = zero_row;
   4697 
   4698         /* update the bytes */
   4699         ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
   4700 
   4701         /* update the total bytes cons */
   4702         ecd_data_bytes_cons += curr_bytes;
   4703         pu1_ecd_data += curr_bytes;
   4704 
   4705         /* RDOPT copy States :  New updated after curr TU to TU init */
   4706         if(0 != cbf)
   4707         {
   4708             /* update to new state only if CBF is non zero */
   4709             COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
   4710                 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
   4711                 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
   4712                         .s_cabac_ctxt.au1_ctxt_models[0] +
   4713                     IHEVC_CAB_COEFFX_PREFIX,
   4714                 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
   4715         }
   4716 
   4717         /* by default chroma present is set to 1*/
   4718         chrm_present_flag = 1;
   4719         if(4 == trans_size)
   4720         {
   4721             /* if tusize is 4x4 then only first luma 4x4 will have chroma*/
   4722             if(0 != chrm_ctr)
   4723             {
   4724                 chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE;
   4725             }
   4726 
   4727             /* increment the chrm ctr unconditionally */
   4728             chrm_ctr++;
   4729 
   4730             /* after ctr reached 4 reset it */
   4731             if(4 == chrm_ctr)
   4732             {
   4733                 chrm_ctr = 0;
   4734             }
   4735         }
   4736 
   4737         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = cbf;
   4738         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
   4739         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
   4740         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
   4741         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
   4742         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag;
   4743         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp;
   4744         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0;
   4745         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0;
   4746         GETRANGE(tx_size, trans_size);
   4747         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
   4748         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + (curr_pos_x >> 2);
   4749         ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + (curr_pos_y >> 2);
   4750 
   4751         /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
   4752         ps_cur_nbr_4x4->b1_y_cbf = cbf;
   4753         /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/
   4754         ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
   4755 
   4756         /* Qp and cbf are stored for the all 4x4 in TU */
   4757         {
   4758             WORD32 i, j;
   4759             nbr_4x4_t *ps_tmp_4x4;
   4760             ps_tmp_4x4 = ps_cur_nbr_4x4;
   4761 
   4762             for(i = 0; i < num_4x4_in_tu; i++)
   4763             {
   4764                 for(j = 0; j < num_4x4_in_tu; j++)
   4765                 {
   4766                     ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp;
   4767                     ps_tmp_4x4[j].b1_y_cbf = cbf;
   4768                 }
   4769                 /* row level update*/
   4770                 ps_tmp_4x4 += num_4x4_in_cu;
   4771             }
   4772         }
   4773 
   4774 #if RDOPT_ENABLE
   4775         /* compute the rdopt cost */
   4776         rdopt_cost +=
   4777             COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
   4778 #endif
   4779         /* accumulate the costs */
   4780         total_rdopt_cost += rdopt_cost;
   4781 
   4782         ps_tu_prms++;
   4783 
   4784         if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
   4785         {
   4786             /* Early exit : If the current running cost exceeds
   4787             the prev. best mode cost, break */
   4788             if(total_rdopt_cost > prev_best_rdopt_cost)
   4789             {
   4790                 return (total_rdopt_cost);
   4791             }
   4792         }
   4793     }
   4794 
   4795     /* Modify the cost function for this CU. */
   4796     /* loop in for 8x8 blocks */
   4797     if(ps_ctxt->u1_enable_psyRDOPT)
   4798     {
   4799         UWORD8 *pu1_recon_cu;
   4800         WORD32 recon_stride;
   4801         WORD32 curr_pos_x;
   4802         WORD32 curr_pos_y;
   4803         WORD32 start_index;
   4804         WORD32 num_horz_cu_in_ctb;
   4805         WORD32 had_block_size;
   4806 
   4807         /* tODO: sreenivasa ctb size has to be used appropriately */
   4808         had_block_size = 8;
   4809         num_horz_cu_in_ctb = 64 / had_block_size;
   4810 
   4811         curr_pos_x = cu_pos_x << 2; /* pel units */
   4812         curr_pos_y = cu_pos_y << 2; /* pel units */
   4813         recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
   4814         pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore
   4815                             .apv_luma_recon_bufs[0]);  // already pointing to the current CU recon
   4816         //+ \curr_pos_x + curr_pos_y * recon_stride;
   4817 
   4818         /* start index to index the source satd of curr cu int he current ctb*/
   4819         start_index =
   4820             (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
   4821 
   4822         {
   4823             total_rdopt_cost += ihevce_psy_rd_cost(
   4824                 ps_ctxt->ai4_source_satd_8x8,
   4825                 pu1_recon_cu,
   4826                 recon_stride,
   4827                 1,  //howz stride
   4828                 cu_size,
   4829                 0,  // pic type
   4830                 0,  //layer id
   4831                 ps_ctxt->i4_satd_lamda,  // lambda
   4832                 start_index,
   4833                 ps_ctxt->u1_is_input_data_hbd,
   4834                 ps_ctxt->u4_psy_strength,
   4835                 &ps_ctxt->s_cmn_opt_func);  // 8 bit
   4836         }
   4837     }
   4838 
   4839     /* store the num TUs*/
   4840     ps_final_prms->u2_num_tus_in_cu = num_tu_in_cu;
   4841 
   4842     /* update the bytes consumed */
   4843     ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
   4844 
   4845     /* store the current cu size to final prms */
   4846     ps_final_prms->u1_cu_size = cu_size;
   4847 
   4848     /* cu bits will be having luma residual bits till this point    */
   4849     /* if zero_cbf eval is disabled then cu bits will be zero       */
   4850     ps_final_prms->u4_cu_luma_res_bits = cu_bits;
   4851 
   4852     /* ------------- Chroma processing -------------- */
   4853     /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
   4854     if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
   4855     {
   4856         LWORD64 chrm_rdopt_cost;
   4857         WORD32 chrm_rdopt_tu_bits;
   4858 
   4859         /* Store the current RDOPT cost to enable early exit in chrom_prcs */
   4860         ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
   4861 
   4862         chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
   4863             ps_ctxt,
   4864             curr_buf_idx,
   4865             0, /* TU mode : Don't care in Inter patrh */
   4866             ps_chrm_cu_buf_prms->pu1_curr_src,
   4867             ps_chrm_cu_buf_prms->i4_chrm_src_stride,
   4868             ps_chrm_cu_buf_prms->pu1_cu_left,
   4869             ps_chrm_cu_buf_prms->pu1_cu_top,
   4870             ps_chrm_cu_buf_prms->pu1_cu_top_left,
   4871             ps_chrm_cu_buf_prms->i4_cu_left_stride,
   4872             (cu_pos_x >> 1),
   4873             (cu_pos_y >> 1),
   4874             &chrm_rdopt_tu_bits,
   4875             i4_alpha_stim_multiplier,
   4876             u1_is_cu_noisy);
   4877 
   4878 #if WEIGH_CHROMA_COST
   4879         chrm_rdopt_cost = (LWORD64)(
   4880             (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
   4881              (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
   4882             CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
   4883 #endif
   4884 
   4885 #if CHROMA_RDOPT_ENABLE
   4886         total_rdopt_cost += chrm_rdopt_cost;
   4887 #endif
   4888         cu_bits += chrm_rdopt_tu_bits;
   4889 
   4890         /* during chroma evaluation if skip decision was over written     */
   4891         /* then the current skip candidate is set to a non skip candidate */
   4892         ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag;
   4893 
   4894         /* cu bits for chroma residual if chroma rdopt is on       */
   4895         /* if zero_cbf eval is disabled then cu bits will be zero  */
   4896         ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
   4897 
   4898         if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
   4899         {
   4900             /* Early exit : If the current running cost exceeds
   4901             the prev. best mode cost, break */
   4902             if(total_rdopt_cost > prev_best_rdopt_cost)
   4903             {
   4904                 return (total_rdopt_cost);
   4905             }
   4906         }
   4907     }
   4908     else
   4909     {}
   4910 
   4911 #if SHRINK_INTER_TUTREE
   4912     /* ------------- Quadtree TU split  optimization ------------  */
   4913     if(ps_final_prms->u1_is_cu_coded)
   4914     {
   4915         ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree(
   4916             &ps_final_prms->as_tu_enc_loop[0],
   4917             &ps_final_prms->as_tu_enc_loop_temp_prms[0],
   4918             &ps_final_prms->s_recon_datastore,
   4919             num_tu_in_cu,
   4920             (ps_ctxt->u1_chroma_array_type == 2));
   4921     }
   4922 #endif
   4923 
   4924     /* RDOPT copy States :  Best after all luma TUs (and chroma,if enabled)to current */
   4925     COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
   4926         &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
   4927                 .s_cabac_ctxt.au1_ctxt_models[0] +
   4928             IHEVC_CAB_COEFFX_PREFIX,
   4929         &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
   4930         IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
   4931 
   4932     /* -------- Bit estimate for RD opt -------------- */
   4933     {
   4934         nbr_avail_flags_t s_nbr;
   4935         /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
   4936         WORD32 cbf_bits, header_bits;
   4937 
   4938         /* get the neighbour availability flags for current cu  */
   4939         ihevce_get_only_nbr_flag(
   4940             &s_nbr,
   4941             ps_ctxt->pu1_ctb_nbr_map,
   4942             ps_ctxt->i4_nbr_map_strd,
   4943             cu_pos_x,
   4944             cu_pos_y,
   4945             (cu_size >> 2),
   4946             (cu_size >> 2));
   4947 
   4948         /* call the entropy rdo encode to get the bit estimate for current cu */
   4949         header_bits = ihevce_entropy_rdo_encode_cu(
   4950             &ps_ctxt->s_rdopt_entropy_ctxt,
   4951             ps_final_prms,
   4952             (cu_pos_x >> 1), /*  back to 8x8 pel units   */
   4953             (cu_pos_y >> 1), /*  back to 8x8 pel units   */
   4954             cu_size,
   4955             ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
   4956                                            : s_nbr.u1_top_avail,
   4957             s_nbr.u1_left_avail,
   4958             &ps_final_prms->pu1_cu_coeffs[0],
   4959             &cbf_bits);
   4960 
   4961         cu_bits += header_bits;
   4962 
   4963         /* cbf bits are excluded from header bits, instead considered as texture bits */
   4964         /* incase if zero cbf eval is disabled then texture bits gets added here */
   4965         ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
   4966         ps_final_prms->u4_cu_cbf_bits = cbf_bits;
   4967 
   4968 #if RDOPT_ENABLE
   4969         /* add the cost of coding the header bits */
   4970         total_rdopt_cost +=
   4971             COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
   4972 
   4973 #if ENABLE_INTER_ZCU_COST
   4974         /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */
   4975         if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level))
   4976         {
   4977             LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
   4978 
   4979             WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) &&
   4980                                       (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag);
   4981 
   4982             cab_ctxt_t *ps_cab_ctxt =
   4983                 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt;
   4984 
   4985             /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call  */
   4986             UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12;
   4987 
   4988             /* account for coding qt_root_cbf = 0 */
   4989             /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */
   4990             u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0];
   4991             if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1])
   4992                 u4_cu_hdr_bits_q12 = 0;
   4993             else
   4994                 u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1];
   4995 
   4996             /* add the cost of coding the header bits */
   4997             i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30(
   4998                 u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */,
   4999                 ps_ctxt->i8_cl_ssd_lambda_qf,
   5000                 (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
   5001 
   5002             if(ps_ctxt->u1_enable_psyRDOPT)
   5003             {
   5004                 i8_cu_not_coded_cost = total_rdopt_cost + 1;
   5005             }
   5006 
   5007             /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */
   5008             if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu))
   5009             {
   5010                 WORD32 tx_size;
   5011 
   5012                 /* force cu as not coded and update the cost */
   5013                 ps_final_prms->u1_is_cu_coded = 0;
   5014                 ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
   5015                 ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
   5016 
   5017                 total_rdopt_cost = i8_cu_not_coded_cost;
   5018 
   5019                 /* reset num TUs to 1 unless cu size id 64 */
   5020                 ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1;
   5021                 trans_size = (64 == cu_size) ? 32 : cu_size;
   5022                 GETRANGE(tx_size, trans_size);
   5023 
   5024                 /* reset the bytes consumed */
   5025                 ps_final_prms->i4_num_bytes_ecd_data = 0;
   5026 
   5027                 /* reset texture related bits and roll back header bits*/
   5028                 ps_final_prms->u4_cu_cbf_bits = 0;
   5029                 ps_final_prms->u4_cu_luma_res_bits = 0;
   5030                 ps_final_prms->u4_cu_chroma_res_bits = 0;
   5031                 ps_final_prms->u4_cu_hdr_bits =
   5032                     (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q;
   5033 
   5034                 /* update cabac model with qtroot cbf = 0 decision */
   5035                 ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] =
   5036                     gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1];
   5037 
   5038                 /* restore untouched cabac models for, tusplit, cbfs, texture etc */
   5039                 memcpy(
   5040                     &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM],
   5041                     &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM],
   5042                     (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM));
   5043 
   5044                 /* mark all tus as not coded for final eval */
   5045                 for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++)
   5046                 {
   5047                     WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0;
   5048                     WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0;
   5049 
   5050                     nbr_4x4_t *ps_cur_nbr_4x4 =
   5051                         ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu);
   5052 
   5053                     num_4x4_in_tu = trans_size >> 2;
   5054 
   5055                     ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0;
   5056                     ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0;
   5057                     ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0;
   5058 
   5059                     ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0;
   5060                     ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
   5061                     ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
   5062 
   5063                     ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
   5064                     ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
   5065 
   5066                     ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
   5067                     ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x;
   5068                     ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y;
   5069 
   5070                     /* reset cbf for the all 4x4 in TU */
   5071                     {
   5072                         WORD32 i, j;
   5073                         nbr_4x4_t *ps_tmp_4x4;
   5074                         ps_tmp_4x4 = ps_cur_nbr_4x4;
   5075 
   5076                         for(i = 0; i < num_4x4_in_tu; i++)
   5077                         {
   5078                             for(j = 0; j < num_4x4_in_tu; j++)
   5079                             {
   5080                                 ps_tmp_4x4[j].b1_y_cbf = 0;
   5081                             }
   5082                             /* row level update*/
   5083                             ps_tmp_4x4 += num_4x4_in_cu;
   5084                         }
   5085                     }
   5086                 }
   5087             }
   5088         }
   5089 #endif /* ENABLE_INTER_ZCU_COST */
   5090 
   5091 #endif /* RDOPT_ENABLE */
   5092     }
   5093 
   5094     return (total_rdopt_cost);
   5095 }
   5096 
   5097 #if ENABLE_RDO_BASED_TU_RECURSION
   5098 LWORD64 ihevce_inter_tu_tree_selector_and_rdopt_cost_computer(
   5099     ihevce_enc_loop_ctxt_t *ps_ctxt,
   5100     enc_loop_cu_prms_t *ps_cu_prms,
   5101     void *pv_src,
   5102     WORD32 cu_size,
   5103     WORD32 cu_pos_x,
   5104     WORD32 cu_pos_y,
   5105     WORD32 curr_buf_idx,
   5106     enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
   5107     cu_inter_cand_t *ps_inter_cand,
   5108     cu_analyse_t *ps_cu_analyse,
   5109     WORD32 i4_alpha_stim_multiplier)
   5110 {
   5111     tu_tree_node_t as_tu_nodes[256 + 64 + 16 + 4 + 1];
   5112     buffer_data_for_tu_t s_buffer_data_for_tu;
   5113     enc_loop_cu_final_prms_t *ps_final_prms;
   5114     nbr_4x4_t *ps_nbr_4x4;
   5115 
   5116     WORD32 num_split_flags = 1;
   5117     UWORD8 u1_tu_size;
   5118     UWORD8 *pu1_pred;
   5119     UWORD8 *pu1_ecd_data;
   5120     WORD16 *pi2_deq_data;
   5121     UWORD8 *pu1_csbf_buf;
   5122     UWORD8 *pu1_tu_sz_sft;
   5123     UWORD8 *pu1_tu_posx;
   5124     UWORD8 *pu1_tu_posy;
   5125     LWORD64 total_rdopt_cost;
   5126     WORD32 ctr;
   5127     WORD32 chrm_ctr;
   5128     WORD32 pred_stride;
   5129     WORD32 recon_stride;
   5130     WORD32 trans_size = ps_cu_analyse->u1_cu_size;
   5131     WORD32 csbf_strd;
   5132     WORD32 ecd_data_bytes_cons;
   5133     WORD32 num_4x4_in_cu;
   5134     WORD32 num_4x4_in_tu;
   5135     WORD32 recon_func_mode;
   5136     WORD32 cu_bits;
   5137     UWORD8 u1_compute_spatial_ssd;
   5138     /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
   5139     UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
   5140 
   5141     WORD32 i4_min_trans_size = 256;
   5142     LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
   5143     WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
   5144     /* model for no residue syntax qt root cbf flag */
   5145     UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX];
   5146     UWORD8 u1_skip_tu_sz_sft = 0;
   5147     UWORD8 u1_skip_tu_posx = 0;
   5148     UWORD8 u1_skip_tu_posy = 0;
   5149     UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
   5150 
   5151     ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
   5152     ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
   5153     pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
   5154     pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
   5155     csbf_strd = ps_ctxt->i4_cu_csbf_strd;
   5156     pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
   5157     pred_stride = ps_inter_cand->i4_pred_data_stride;
   5158     recon_stride = cu_size;
   5159     pu1_pred = ps_inter_cand->pu1_pred_data;
   5160     chrm_ctr = 0;
   5161     ecd_data_bytes_cons = 0;
   5162     total_rdopt_cost = 0;
   5163     num_4x4_in_cu = cu_size >> 2;
   5164     recon_func_mode = PRED_MODE_INTER;
   5165     cu_bits = 0;
   5166 
   5167     /* get the 4x4 level postion of current cu */
   5168     cu_pos_x = cu_pos_x << 1;
   5169     cu_pos_y = cu_pos_y << 1;
   5170 
   5171     ps_final_prms->u1_is_cu_coded = 0;
   5172     ps_final_prms->u4_cu_sad = 0;
   5173 
   5174     /* populate the coeffs scan idx */
   5175     ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
   5176 
   5177 #if ENABLE_INTER_ZCU_COST
   5178     /* reset cu not coded cost */
   5179     ps_ctxt->i8_cu_not_coded_cost = 0;
   5180 
   5181     /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
   5182     memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END);
   5183 #endif
   5184 
   5185     if(ps_cu_analyse->u1_cu_size == 64)
   5186     {
   5187         num_split_flags = 4;
   5188         u1_tu_size = 32;
   5189     }
   5190     else
   5191     {
   5192         num_split_flags = 1;
   5193         u1_tu_size = ps_cu_analyse->u1_cu_size;
   5194     }
   5195 
   5196     if(1 == ps_final_prms->u1_skip_flag)
   5197     {
   5198         if(64 == cu_size)
   5199         {
   5200             /* TU = CU/2 is set but no trnaform is evaluated  */
   5201             pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
   5202             pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
   5203             pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
   5204         }
   5205         else
   5206         {
   5207             /* TU = CU is set but no trnaform is evaluated  */
   5208             pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
   5209             pu1_tu_posx = &u1_skip_tu_posx;
   5210             pu1_tu_posy = &u1_skip_tu_posy;
   5211         }
   5212 
   5213         recon_func_mode = PRED_MODE_SKIP;
   5214     }
   5215     /* check for PU part mode being AMP or No AMP */
   5216     else if(ps_final_prms->u1_part_mode < SIZE_2NxnU)
   5217     {
   5218         if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64))
   5219         {
   5220             /* TU= CU is evaluated 2Nx2N inter case */
   5221             pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
   5222             pu1_tu_posx = &u1_skip_tu_posx;
   5223             pu1_tu_posy = &u1_skip_tu_posy;
   5224         }
   5225         else
   5226         {
   5227             /* currently TU= CU/2 is evaluated for all inter case */
   5228             pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
   5229             pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
   5230             pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
   5231         }
   5232     }
   5233     else
   5234     {
   5235         /* for AMP cases one level of TU recurssion is done */
   5236         /* based on oreintation of the partitions           */
   5237         pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0];
   5238         pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
   5239         pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
   5240     }
   5241 
   5242     i4_min_trans_size = 4;
   5243 
   5244     if(ps_ctxt->i1_cu_qp_delta_enable)
   5245     {
   5246         WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0;
   5247         if(ps_cu_analyse->u1_cu_size == 64)
   5248         {
   5249             ASSERT(
   5250                 (i4_min_trans_size == 32) || (i4_min_trans_size == 16) ||
   5251                 (i4_min_trans_size == 8) || (i4_min_trans_size == 4));
   5252             i4_act_counter = (i4_min_trans_size == 16) +
   5253                              2 * ((i4_min_trans_size == 8) || (i4_min_trans_size == 4));
   5254             i4_act_counter_lamda = 3;
   5255         }
   5256         else if(ps_cu_analyse->u1_cu_size == 32)
   5257         {
   5258             ASSERT(
   5259                 (i4_min_trans_size == 32) || (i4_min_trans_size == 16) ||
   5260                 (i4_min_trans_size == 8) || (i4_min_trans_size == 4));
   5261             i4_act_counter = (i4_min_trans_size == 16) +
   5262                              2 * ((i4_min_trans_size == 8) || (i4_min_trans_size == 4));
   5263             i4_act_counter_lamda = 0;
   5264         }
   5265         else if(ps_cu_analyse->u1_cu_size == 16)
   5266         {
   5267             ASSERT(
   5268                 (i4_min_trans_size == 16) || (i4_min_trans_size == 8) || (i4_min_trans_size == 4));
   5269             i4_act_counter = (i4_min_trans_size == 8) || (i4_min_trans_size == 4);
   5270             i4_act_counter_lamda = 0;
   5271         }
   5272         else if(ps_cu_analyse->u1_cu_size == 8)
   5273         {
   5274             ASSERT((i4_min_trans_size == 8) || (i4_min_trans_size == 4));
   5275             i4_act_counter = 1;
   5276             i4_act_counter_lamda = 0;
   5277         }
   5278         else
   5279         {
   5280             ASSERT(0);
   5281         }
   5282         if(ps_ctxt->i4_use_ctb_level_lamda)
   5283         {
   5284             ihevce_compute_cu_level_QP(
   5285                 ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][0], -1, 0);
   5286         }
   5287         else
   5288         {
   5289             ihevce_compute_cu_level_QP(
   5290                 ps_ctxt,
   5291                 ps_cu_analyse->i4_act_factor[i4_act_counter][0],
   5292                 ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][0],
   5293                 0);
   5294         }
   5295 
   5296         ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp;
   5297     }
   5298 
   5299     if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
   5300     {
   5301         ps_ctxt->i8_cl_ssd_lambda_qf =
   5302             ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
   5303              100.0f);
   5304         ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
   5305             ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
   5306              (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
   5307     }
   5308 
   5309     u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
   5310                              (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
   5311                              CONVERT_SSDS_TO_SPATIAL_DOMAIN;
   5312 
   5313     if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
   5314     {
   5315         u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
   5316                                  CONVERT_SSDS_TO_SPATIAL_DOMAIN;
   5317     }
   5318 
   5319     if(!u1_compute_spatial_ssd)
   5320     {
   5321         ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
   5322         ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
   5323     }
   5324     else
   5325     {
   5326         ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1;
   5327 
   5328         if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))
   5329         {
   5330             ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 1;
   5331         }
   5332     }
   5333 
   5334     /* RDOPT copy States :  TU init (best until prev TU) to current */
   5335     memcpy(
   5336         &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
   5337              .s_cabac_ctxt.au1_ctxt_models[0],
   5338         &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   5339         IHEVC_CAB_COEFFX_PREFIX);
   5340 
   5341     ihevce_tu_tree_init(
   5342         as_tu_nodes,
   5343         cu_size,
   5344         (cu_size == 64) ? !ps_inter_cand->b1_skip_flag : 0,
   5345         ps_inter_cand->b1_skip_flag ? 0 : ps_ctxt->u1_max_inter_tr_depth,
   5346         INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
   5347         ps_ctxt->u1_chroma_array_type == 2);
   5348 
   5349     if(!ps_inter_cand->b1_skip_flag && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
   5350     {
   5351         ihevce_tuSplitArray_to_tuTree_mapper(
   5352             as_tu_nodes,
   5353             ps_inter_cand->ai4_tu_split_flag,
   5354             cu_size,
   5355             cu_size,
   5356             MAX(MIN_TU_SIZE, (cu_size >> ps_ctxt->u1_max_inter_tr_depth)),
   5357             MIN(MAX_TU_SIZE, cu_size),
   5358             ps_inter_cand->b1_skip_flag);
   5359     }
   5360 
   5361     ASSERT(ihevce_tu_tree_coverage_in_cu(as_tu_nodes) == cu_size * cu_size);
   5362 
   5363 #if ENABLE_INTER_ZCU_COST
   5364     ps_ctxt->i8_cu_not_coded_cost = 0;
   5365 #endif
   5366 
   5367     s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_src = pv_src;
   5368     s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_pred = pu1_pred;
   5369     s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_recon =
   5370         ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0];
   5371     s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_src_stride = src_strd;
   5372     s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_pred_stride = pred_stride;
   5373     s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_recon_stride =
   5374         ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
   5375     s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_src = ps_chrm_cu_buf_prms->pu1_curr_src;
   5376     s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred =
   5377         ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
   5378         curr_buf_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) + ((ps_ctxt->u1_chroma_array_type == 2) *
   5379                                                               (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
   5380     s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_recon =
   5381         ps_final_prms->s_recon_datastore.apv_chroma_recon_bufs[0];
   5382     s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_src_stride =
   5383         ps_chrm_cu_buf_prms->i4_chrm_src_stride;
   5384     s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride =
   5385         ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
   5386     s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_recon_stride =
   5387         ps_final_prms->s_recon_datastore.i4_chromaRecon_stride;
   5388     s_buffer_data_for_tu.ps_nbr_data_buf = ps_nbr_4x4;
   5389     s_buffer_data_for_tu.pi2_deq_data = pi2_deq_data;
   5390     s_buffer_data_for_tu.pi2_deq_data_chroma =
   5391         pi2_deq_data + ps_final_prms->i4_chrm_deq_coeff_strt_idx;
   5392     s_buffer_data_for_tu.i4_nbr_data_buf_stride = num_4x4_in_cu;
   5393     s_buffer_data_for_tu.i4_deq_data_stride = cu_size;
   5394     s_buffer_data_for_tu.i4_deq_data_stride_chroma = cu_size;
   5395     s_buffer_data_for_tu.ppu1_ecd = &pu1_ecd_data;
   5396 
   5397     if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))
   5398     {
   5399         UWORD8 i;
   5400 
   5401         UWORD8 *pu1_pred = (UWORD8 *)s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred;
   5402 
   5403         for(i = 0; i < (!!ps_inter_cand->b3_part_size) + 1; i++)
   5404         {
   5405             pu_t *ps_pu;
   5406 
   5407             WORD32 inter_pu_wd;
   5408             WORD32 inter_pu_ht;
   5409 
   5410             ps_pu = ps_inter_cand->as_inter_pu + i;
   5411 
   5412             inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
   5413             inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
   5414             inter_pu_ht <<= (ps_ctxt->u1_chroma_array_type == 2);
   5415             ihevce_chroma_inter_pred_pu(
   5416                 &ps_ctxt->s_mc_ctxt,
   5417                 ps_pu,
   5418                 pu1_pred,
   5419                 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride);
   5420             if(!!ps_inter_cand->b3_part_size)
   5421             {
   5422                 /* 2Nx__ partion case */
   5423                 if(inter_pu_wd == cu_size)
   5424                 {
   5425                     pu1_pred +=
   5426                         (inter_pu_ht *
   5427                          s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride);
   5428                 }
   5429 
   5430                 /* __x2N partion case */
   5431                 if(inter_pu_ht == (cu_size >> !(ps_ctxt->u1_chroma_array_type == 2)))
   5432                 {
   5433                     pu1_pred += inter_pu_wd;
   5434                 }
   5435             }
   5436         }
   5437     }
   5438 
   5439 #if !ENABLE_TOP_DOWN_TU_RECURSION
   5440     total_rdopt_cost = ihevce_tu_tree_selector(
   5441         ps_ctxt,
   5442         as_tu_nodes,
   5443         &s_buffer_data_for_tu,
   5444         &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
   5445              .s_cabac_ctxt.au1_ctxt_models[0],
   5446         recon_func_mode,
   5447 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   5448         i4_alpha_stim_multiplier,
   5449         u1_is_cu_noisy,
   5450 #endif
   5451         0,
   5452         ps_ctxt->u1_max_inter_tr_depth,
   5453         ps_inter_cand->b3_part_size,
   5454         u1_compute_spatial_ssd);
   5455 #else
   5456     total_rdopt_cost = ihevce_topDown_tu_tree_selector(
   5457         ps_ctxt,
   5458         as_tu_nodes,
   5459         &s_buffer_data_for_tu,
   5460         &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
   5461              .s_cabac_ctxt.au1_ctxt_models[0],
   5462         recon_func_mode,
   5463 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   5464         i4_alpha_stim_multiplier,
   5465         u1_is_cu_noisy,
   5466 #endif
   5467         0,
   5468         ps_ctxt->u1_max_inter_tr_depth,
   5469         ps_inter_cand->b3_part_size,
   5470         INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
   5471         u1_compute_spatial_ssd);
   5472 #endif
   5473 
   5474     ps_final_prms->u2_num_tus_in_cu = 0;
   5475     ps_final_prms->u4_cu_luma_res_bits = 0;
   5476     ps_final_prms->u4_cu_sad = 0;
   5477     total_rdopt_cost = 0;
   5478     ecd_data_bytes_cons = 0;
   5479     cu_bits = 0;
   5480 #if ENABLE_INTER_ZCU_COST
   5481     ps_ctxt->i8_cu_not_coded_cost = 0;
   5482 #endif
   5483     ps_final_prms->u1_is_cu_coded = 0;
   5484     ps_final_prms->u1_cu_size = cu_size;
   5485 
   5486     ihevce_tu_selector_debriefer(
   5487         as_tu_nodes,
   5488         ps_final_prms,
   5489         &total_rdopt_cost,
   5490 #if ENABLE_INTER_ZCU_COST
   5491         &ps_ctxt->i8_cu_not_coded_cost,
   5492 #endif
   5493         &ecd_data_bytes_cons,
   5494         &cu_bits,
   5495         &ps_final_prms->u2_num_tus_in_cu,
   5496         ps_ctxt->i4_cu_qp,
   5497         cu_pos_x * 4,
   5498         cu_pos_y * 4,
   5499         INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
   5500         (ps_ctxt->u1_chroma_array_type == 2),
   5501         POS_TL);
   5502 
   5503     if(!(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)))
   5504     {
   5505         ps_final_prms->i4_chrm_cu_coeff_strt_idx = ecd_data_bytes_cons;
   5506     }
   5507 
   5508     /* Modify the cost function for this CU. */
   5509     /* loop in for 8x8 blocks */
   5510     if(ps_ctxt->u1_enable_psyRDOPT)
   5511     {
   5512         UWORD8 *pu1_recon_cu;
   5513         WORD32 recon_stride;
   5514         WORD32 curr_pos_x;
   5515         WORD32 curr_pos_y;
   5516         WORD32 start_index;
   5517         WORD32 num_horz_cu_in_ctb;
   5518         WORD32 had_block_size;
   5519 
   5520         /* tODO: sreenivasa ctb size has to be used appropriately */
   5521         had_block_size = 8;
   5522         num_horz_cu_in_ctb = 64 / had_block_size;
   5523 
   5524         curr_pos_x = cu_pos_x << 2; /* pel units */
   5525         curr_pos_y = cu_pos_y << 2; /* pel units */
   5526         recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
   5527         pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore
   5528                             .apv_luma_recon_bufs[0]);  // already pointing to the current CU recon
   5529         //+ \curr_pos_x + curr_pos_y * recon_stride;
   5530 
   5531         /* start index to index the source satd of curr cu int he current ctb*/
   5532         start_index =
   5533             (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
   5534 
   5535         {
   5536             total_rdopt_cost += ihevce_psy_rd_cost(
   5537                 ps_ctxt->ai4_source_satd_8x8,
   5538                 pu1_recon_cu,
   5539                 recon_stride,
   5540                 1,  //howz stride
   5541                 cu_size,
   5542                 0,  // pic type
   5543                 0,  //layer id
   5544                 ps_ctxt->i4_satd_lamda,  // lambda
   5545                 start_index,
   5546                 ps_ctxt->u1_is_input_data_hbd,
   5547                 ps_ctxt->u4_psy_strength,
   5548                 &ps_ctxt->s_cmn_opt_func);  // 8 bit
   5549         }
   5550     }
   5551 
   5552     ps_final_prms->u1_chroma_intra_pred_mode = 4;
   5553 
   5554     /* update the bytes consumed */
   5555     ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
   5556 
   5557     /* store the current cu size to final prms */
   5558     ps_final_prms->u1_cu_size = cu_size;
   5559     /* ------------- Chroma processing -------------- */
   5560     /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
   5561     if(ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt &&
   5562        !(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)))
   5563     {
   5564         LWORD64 chrm_rdopt_cost;
   5565         WORD32 chrm_rdopt_tu_bits;
   5566 
   5567         /* Store the current RDOPT cost to enable early exit in chrom_prcs */
   5568         ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
   5569 
   5570         chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
   5571             ps_ctxt,
   5572             curr_buf_idx,
   5573             0, /* TU mode : Don't care in Inter patrh */
   5574             ps_chrm_cu_buf_prms->pu1_curr_src,
   5575             ps_chrm_cu_buf_prms->i4_chrm_src_stride,
   5576             ps_chrm_cu_buf_prms->pu1_cu_left,
   5577             ps_chrm_cu_buf_prms->pu1_cu_top,
   5578             ps_chrm_cu_buf_prms->pu1_cu_top_left,
   5579             ps_chrm_cu_buf_prms->i4_cu_left_stride,
   5580             (cu_pos_x >> 1),
   5581             (cu_pos_y >> 1),
   5582             &chrm_rdopt_tu_bits,
   5583             i4_alpha_stim_multiplier,
   5584             u1_is_cu_noisy);
   5585 
   5586 #if WEIGH_CHROMA_COST
   5587         chrm_rdopt_cost = (LWORD64)(
   5588             (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
   5589              (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
   5590             CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
   5591 #endif
   5592 
   5593 #if CHROMA_RDOPT_ENABLE
   5594         total_rdopt_cost += chrm_rdopt_cost;
   5595 #endif
   5596         cu_bits += chrm_rdopt_tu_bits;
   5597 
   5598         /* during chroma evaluation if skip decision was over written     */
   5599         /* then the current skip candidate is set to a non skip candidate */
   5600         ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag;
   5601 
   5602         /* cu bits for chroma residual if chroma rdopt is on       */
   5603         /* if zero_cbf eval is disabled then cu bits will be zero  */
   5604         ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
   5605 
   5606         if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
   5607         {
   5608             /* Early exit : If the current running cost exceeds
   5609             the prev. best mode cost, break */
   5610             if(total_rdopt_cost > prev_best_rdopt_cost)
   5611             {
   5612                 return (total_rdopt_cost);
   5613             }
   5614         }
   5615     }
   5616     else
   5617     {}
   5618 
   5619 #if SHRINK_INTER_TUTREE
   5620     /* ------------- Quadtree TU split  optimization ------------  */
   5621     if(ps_final_prms->u1_is_cu_coded)
   5622     {
   5623         ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree(
   5624             &ps_final_prms->as_tu_enc_loop[0],
   5625             &ps_final_prms->as_tu_enc_loop_temp_prms[0],
   5626             &ps_final_prms->s_recon_datastore,
   5627             ps_final_prms->u2_num_tus_in_cu,
   5628             (ps_ctxt->u1_chroma_array_type == 2));
   5629     }
   5630 #endif
   5631 
   5632     /* RDOPT copy States :  Best after all luma TUs (and chroma,if enabled)to current */
   5633     COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
   5634         &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
   5635                 .s_cabac_ctxt.au1_ctxt_models[0] +
   5636             IHEVC_CAB_COEFFX_PREFIX,
   5637         &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
   5638         IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
   5639 
   5640     /* -------- Bit estimate for RD opt -------------- */
   5641     {
   5642         nbr_avail_flags_t s_nbr;
   5643         /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
   5644         WORD32 cbf_bits, header_bits;
   5645 
   5646         /* get the neighbour availability flags for current cu  */
   5647         ihevce_get_only_nbr_flag(
   5648             &s_nbr,
   5649             ps_ctxt->pu1_ctb_nbr_map,
   5650             ps_ctxt->i4_nbr_map_strd,
   5651             cu_pos_x,
   5652             cu_pos_y,
   5653             (cu_size >> 2),
   5654             (cu_size >> 2));
   5655 
   5656         /* call the entropy rdo encode to get the bit estimate for current cu */
   5657         header_bits = ihevce_entropy_rdo_encode_cu(
   5658             &ps_ctxt->s_rdopt_entropy_ctxt,
   5659             ps_final_prms,
   5660             (cu_pos_x >> 1), /*  back to 8x8 pel units   */
   5661             (cu_pos_y >> 1), /*  back to 8x8 pel units   */
   5662             cu_size,
   5663             ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
   5664                                            : s_nbr.u1_top_avail,
   5665             s_nbr.u1_left_avail,
   5666             &ps_final_prms->pu1_cu_coeffs[0],
   5667             &cbf_bits);
   5668 
   5669         cu_bits += header_bits;
   5670 
   5671         /* cbf bits are excluded from header bits, instead considered as texture bits */
   5672         /* incase if zero cbf eval is disabled then texture bits gets added here */
   5673         ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
   5674         ps_final_prms->u4_cu_cbf_bits = cbf_bits;
   5675 
   5676 #if RDOPT_ENABLE
   5677         /* add the cost of coding the header bits */
   5678         total_rdopt_cost +=
   5679             COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
   5680 
   5681 #if ENABLE_INTER_ZCU_COST
   5682         /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */
   5683         if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level))
   5684         {
   5685             LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
   5686 
   5687             WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) &&
   5688                                       (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag);
   5689 
   5690             cab_ctxt_t *ps_cab_ctxt =
   5691                 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt;
   5692 
   5693             /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call  */
   5694             UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12;
   5695 
   5696             /* account for coding qt_root_cbf = 0 */
   5697             /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */
   5698             u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0];
   5699             if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1])
   5700                 u4_cu_hdr_bits_q12 = 0;
   5701             else
   5702                 u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1];
   5703 
   5704             /* add the cost of coding the header bits */
   5705             i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30(
   5706                 u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */,
   5707                 ps_ctxt->i8_cl_ssd_lambda_qf,
   5708                 (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
   5709 
   5710             if(ps_ctxt->u1_enable_psyRDOPT)
   5711             {
   5712                 i8_cu_not_coded_cost = total_rdopt_cost + 1;
   5713             }
   5714 
   5715             /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */
   5716             if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu))
   5717             {
   5718                 WORD32 tx_size;
   5719 
   5720                 /* force cu as not coded and update the cost */
   5721                 ps_final_prms->u1_is_cu_coded = 0;
   5722                 ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
   5723                 ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
   5724 
   5725                 total_rdopt_cost = i8_cu_not_coded_cost;
   5726 
   5727                 /* reset num TUs to 1 unless cu size id 64 */
   5728                 ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1;
   5729                 trans_size = (64 == cu_size) ? 32 : cu_size;
   5730                 GETRANGE(tx_size, trans_size);
   5731 
   5732                 /* reset the bytes consumed */
   5733                 ps_final_prms->i4_num_bytes_ecd_data = 0;
   5734 
   5735                 /* reset texture related bits and roll back header bits*/
   5736                 ps_final_prms->u4_cu_cbf_bits = 0;
   5737                 ps_final_prms->u4_cu_luma_res_bits = 0;
   5738                 ps_final_prms->u4_cu_chroma_res_bits = 0;
   5739                 ps_final_prms->u4_cu_hdr_bits =
   5740                     (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q;
   5741 
   5742                 /* update cabac model with qtroot cbf = 0 decision */
   5743                 ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] =
   5744                     gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1];
   5745 
   5746                 /* restore untouched cabac models for, tusplit, cbfs, texture etc */
   5747                 memcpy(
   5748                     &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM],
   5749                     &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM],
   5750                     (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM));
   5751 
   5752                 /* mark all tus as not coded for final eval */
   5753                 for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++)
   5754                 {
   5755                     WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0;
   5756                     WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0;
   5757 
   5758                     nbr_4x4_t *ps_cur_nbr_4x4 =
   5759                         ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu);
   5760 
   5761                     num_4x4_in_tu = trans_size >> 2;
   5762 
   5763                     ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0;
   5764                     ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0;
   5765                     ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0;
   5766 
   5767                     ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0;
   5768                     ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
   5769                     ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
   5770 
   5771                     ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
   5772                     ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
   5773 
   5774                     ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
   5775                     ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x;
   5776                     ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y;
   5777 
   5778                     /* reset cbf for the all 4x4 in TU */
   5779                     {
   5780                         WORD32 i, j;
   5781                         nbr_4x4_t *ps_tmp_4x4;
   5782                         ps_tmp_4x4 = ps_cur_nbr_4x4;
   5783 
   5784                         for(i = 0; i < num_4x4_in_tu; i++)
   5785                         {
   5786                             for(j = 0; j < num_4x4_in_tu; j++)
   5787                             {
   5788                                 ps_tmp_4x4[j].b1_y_cbf = 0;
   5789                             }
   5790                             /* row level update*/
   5791                             ps_tmp_4x4 += num_4x4_in_cu;
   5792                         }
   5793                     }
   5794                 }
   5795             }
   5796         }
   5797 #endif /* ENABLE_INTER_ZCU_COST */
   5798 
   5799 #endif /* RDOPT_ENABLE */
   5800     }
   5801 
   5802     return (total_rdopt_cost);
   5803 }
   5804 #endif
   5805 
   5806 /*!
   5807 ******************************************************************************
   5808 * \if Function name : ihevce_inter_rdopt_cu_mc_mvp \endif
   5809 *
   5810 * \brief
   5811 *    Inter Coding unit funtion which performs MC and MVP calc for RD opt mode
   5812 *
   5813 * \param[in] ps_ctxt       enc_loop module ctxt pointer
   5814 * \param[in] ps_inter_cand pointer to inter candidate structure
   5815 * \param[in] cu_size         Current CU size
   5816 * \param[in] cu_pos_x        cu position x w.r.t to ctb
   5817 * \param[in] cu_pos_y        cu position y w.r.t to ctb
   5818 * \param[in] ps_left_nbr_4x4 Left neighbour 4x4 structure pointer
   5819 * \param[in] ps_top_nbr_4x4  top neighbour 4x4 structure pointer
   5820 * \param[in] ps_topleft_nbr_4x4  top left neighbour 4x4 structure pointer
   5821 * \param[in] nbr_4x4_left_strd  left neighbour 4x4 buffer stride
   5822 * \param[in] curr_buf_idx Current Buffer index
   5823 *
   5824 * \return
   5825 *    Rdopt cost
   5826 *
   5827 * \author
   5828 *  Ittiam
   5829 *
   5830 *****************************************************************************
   5831 */
   5832 LWORD64 ihevce_inter_rdopt_cu_mc_mvp(
   5833     ihevce_enc_loop_ctxt_t *ps_ctxt,
   5834     cu_inter_cand_t *ps_inter_cand,
   5835     WORD32 cu_size,
   5836     WORD32 cu_pos_x,
   5837     WORD32 cu_pos_y,
   5838     nbr_4x4_t *ps_left_nbr_4x4,
   5839     nbr_4x4_t *ps_top_nbr_4x4,
   5840     nbr_4x4_t *ps_topleft_nbr_4x4,
   5841     WORD32 nbr_4x4_left_strd,
   5842     WORD32 curr_buf_idx)
   5843 {
   5844     /* local variables */
   5845     enc_loop_cu_final_prms_t *ps_final_prms;
   5846     nbr_avail_flags_t s_nbr;
   5847     nbr_4x4_t *ps_nbr_4x4;
   5848 
   5849     UWORD8 au1_is_top_used[2][MAX_MVP_LIST_CAND];
   5850     UWORD8 *pu1_pred;
   5851     WORD32 rdopt_cost;
   5852     WORD32 ctr;
   5853     WORD32 num_cu_part;
   5854     WORD32 inter_pu_wd;
   5855     WORD32 inter_pu_ht;
   5856     WORD32 pred_stride;
   5857 
   5858     /* get the pointers based on curbuf idx */
   5859     ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
   5860     ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
   5861     pu1_pred = ps_inter_cand->pu1_pred_data;
   5862 
   5863     pred_stride = ps_inter_cand->i4_pred_data_stride;
   5864 
   5865     /* store the partition mode in final prms */
   5866     ps_final_prms->u1_part_mode = ps_inter_cand->b3_part_size;
   5867 
   5868     /* since encoder does not support NXN part type */
   5869     /* num parts can be either 1 or 2 only          */
   5870     ASSERT(SIZE_NxN != ps_inter_cand->b3_part_size);
   5871 
   5872     num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1;
   5873 
   5874     /* get the 4x4 level position of current cu */
   5875     cu_pos_x = cu_pos_x << 1;
   5876     cu_pos_y = cu_pos_y << 1;
   5877 
   5878     /* populate cu level params */
   5879     ps_final_prms->u1_intra_flag = PRED_MODE_INTER;
   5880     ps_final_prms->u2_num_pus_in_cu = num_cu_part;
   5881 
   5882     /* run a loop over all the partitons in cu */
   5883     for(ctr = 0; ctr < num_cu_part; ctr++)
   5884     {
   5885         pu_mv_t as_pred_mv[MAX_MVP_LIST_CAND];
   5886         pu_t *ps_pu;
   5887         WORD32 skip_or_merge_flag;
   5888         UWORD8 u1_use_mvp_from_top_row;
   5889 
   5890         ps_pu = &ps_inter_cand->as_inter_pu[ctr];
   5891 
   5892         /* IF AMP then each partitions can have diff wd ht */
   5893         inter_pu_wd = (ps_pu->b4_wd + 1) << 2;
   5894         inter_pu_ht = (ps_pu->b4_ht + 1) << 2;
   5895 
   5896         /* populate reference pic buf id for bs compute */
   5897 
   5898         /* L0 */
   5899         if(-1 != ps_pu->mv.i1_l0_ref_idx)
   5900         {
   5901             ps_pu->mv.i1_l0_ref_pic_buf_id =
   5902                 ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx]->i4_buf_id;
   5903         }
   5904 
   5905         /* L1 */
   5906         if(-1 != ps_pu->mv.i1_l1_ref_idx)
   5907         {
   5908             ps_pu->mv.i1_l1_ref_pic_buf_id =
   5909                 ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx]->i4_buf_id;
   5910         }
   5911 
   5912         /* SKIP or merge check for every part */
   5913         skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag;
   5914 
   5915         /* ----------- MV Prediction ----------------- */
   5916         if(0 == skip_or_merge_flag)
   5917         {
   5918             /* get the neighbour availability flags */
   5919             ihevce_get_only_nbr_flag(
   5920                 &s_nbr,
   5921                 ps_ctxt->pu1_ctb_nbr_map,
   5922                 ps_ctxt->i4_nbr_map_strd,
   5923                 cu_pos_x,
   5924                 cu_pos_y,
   5925                 inter_pu_wd >> 2,
   5926                 inter_pu_ht >> 2);
   5927 
   5928             if(ps_ctxt->u1_disable_intra_eval && DISABLE_TOP_SYNC && (ps_pu->b4_pos_y == 0))
   5929             {
   5930                 u1_use_mvp_from_top_row = 0;
   5931             }
   5932             else
   5933             {
   5934                 u1_use_mvp_from_top_row = 1;
   5935             }
   5936 
   5937             if(!u1_use_mvp_from_top_row)
   5938             {
   5939                 if(s_nbr.u1_top_avail || s_nbr.u1_top_lt_avail || s_nbr.u1_top_rt_avail)
   5940                 {
   5941                     if(!s_nbr.u1_left_avail && !s_nbr.u1_bot_lt_avail)
   5942                     {
   5943                         WORD32 curr_cu_pos_in_row, cu_top_right_offset, cu_top_right_dep_pos;
   5944 
   5945                         /* Ensure Top Right Sync */
   5946                         if(!ps_ctxt->u1_use_top_at_ctb_boundary)
   5947                         {
   5948                             curr_cu_pos_in_row =
   5949                                 ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x + (cu_pos_x << 2);
   5950 
   5951                             if(ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y == 0)
   5952                             {
   5953                                 /* No wait for 1st row */
   5954                                 cu_top_right_offset = -(MAX_CTB_SIZE);
   5955                                 {
   5956                                     ihevce_tile_params_t *ps_col_tile_params =
   5957                                         ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
   5958                                          ps_ctxt->i4_tile_col_idx);
   5959 
   5960                                     /* No wait for 1st row */
   5961                                     cu_top_right_offset =
   5962                                         -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
   5963                                 }
   5964                                 cu_top_right_dep_pos = 0;
   5965                             }
   5966                             else
   5967                             {
   5968                                 cu_top_right_offset = (cu_size) + 4;
   5969                                 cu_top_right_dep_pos =
   5970                                     (ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y >> 6) - 1;
   5971                             }
   5972 
   5973                             ihevce_dmgr_chk_row_row_sync(
   5974                                 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
   5975                                 curr_cu_pos_in_row,
   5976                                 cu_top_right_offset,
   5977                                 cu_top_right_dep_pos,
   5978                                 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
   5979                                 ps_ctxt->thrd_id);
   5980                         }
   5981 
   5982                         u1_use_mvp_from_top_row = 1;
   5983                     }
   5984                     else
   5985                     {
   5986                         s_nbr.u1_top_avail = 0;
   5987                         s_nbr.u1_top_lt_avail = 0;
   5988                         s_nbr.u1_top_rt_avail = 0;
   5989                     }
   5990                 }
   5991                 else
   5992                 {
   5993                     u1_use_mvp_from_top_row = 1;
   5994                 }
   5995             }
   5996             /* Call the MV prediction module to get MVP */
   5997             ihevce_mv_pred(
   5998                 &ps_ctxt->s_mv_pred_ctxt,
   5999                 ps_top_nbr_4x4,
   6000                 ps_left_nbr_4x4,
   6001                 ps_topleft_nbr_4x4,
   6002                 nbr_4x4_left_strd,
   6003                 &s_nbr,
   6004                 NULL, /* colocated MV */
   6005                 ps_pu,
   6006                 &as_pred_mv[0],
   6007                 au1_is_top_used);
   6008         }
   6009 
   6010         /* store the nbr 4x4 structure */
   6011         ps_nbr_4x4->b1_skip_flag = ps_inter_cand->b1_skip_flag;
   6012         ps_nbr_4x4->b1_intra_flag = 0;
   6013         ps_nbr_4x4->b1_pred_l0_flag = 0;
   6014         ps_nbr_4x4->b1_pred_l1_flag = 0;
   6015 
   6016         /* DC is default mode for inter cu, required for intra mode signalling */
   6017         ps_nbr_4x4->b6_luma_intra_mode = 1;
   6018 
   6019         /* copy the motion vectors to neighbour structure */
   6020         ps_nbr_4x4->mv = ps_pu->mv;
   6021 
   6022         /* copy the PU to final out pu */
   6023         ps_final_prms->as_pu_enc_loop[ctr] = *ps_pu;
   6024 
   6025         /* copy the PU to chroma */
   6026         ps_final_prms->as_pu_chrm_proc[ctr] = *ps_pu;
   6027 
   6028         /* store the skip flag to final prms */
   6029         ps_final_prms->u1_skip_flag = ps_inter_cand->b1_skip_flag;
   6030 
   6031         /* MVP index & MVD calc is gated on skip/merge flag */
   6032         if(0 == skip_or_merge_flag)
   6033         {
   6034             /* calculate the MVDs and popluate the MVP idx for L0 */
   6035             if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode))
   6036             {
   6037                 WORD32 idx0_cost, idx1_cost;
   6038 
   6039                 /* calculate the ABS mvd for cand 0 */
   6040                 idx0_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[0].s_l0_mv.i2_mvx);
   6041                 idx0_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[0].s_l0_mv.i2_mvy);
   6042 
   6043                 /* calculate the ABS mvd for cand 1 */
   6044                 if(u1_use_mvp_from_top_row)
   6045                 {
   6046                     idx1_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[1].s_l0_mv.i2_mvx);
   6047                     idx1_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[1].s_l0_mv.i2_mvy);
   6048                 }
   6049                 else
   6050                 {
   6051                     idx1_cost = INT_MAX;
   6052                 }
   6053 
   6054                 /* based on the least cost choose the mvp idx */
   6055                 if(idx0_cost <= idx1_cost)
   6056                 {
   6057                     ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -=
   6058                         as_pred_mv[0].s_l0_mv.i2_mvx;
   6059                     ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -=
   6060                         as_pred_mv[0].s_l0_mv.i2_mvy;
   6061 
   6062                     ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 0;
   6063                 }
   6064                 else
   6065                 {
   6066                     ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -=
   6067                         as_pred_mv[1].s_l0_mv.i2_mvx;
   6068                     ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -=
   6069                         as_pred_mv[1].s_l0_mv.i2_mvy;
   6070 
   6071                     ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 1;
   6072                 }
   6073 
   6074                 /* set the pred l0 flag for neighbour storage */
   6075                 ps_nbr_4x4->b1_pred_l0_flag = 1;
   6076             }
   6077             /* calculate the MVDs and popluate the MVP idx for L1 */
   6078             if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode))
   6079             {
   6080                 WORD32 idx0_cost, idx1_cost;
   6081 
   6082                 /* calculate the ABS mvd for cand 0 */
   6083                 idx0_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[0].s_l1_mv.i2_mvx);
   6084                 idx0_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[0].s_l1_mv.i2_mvy);
   6085 
   6086                 /* calculate the ABS mvd for cand 1 */
   6087                 if(u1_use_mvp_from_top_row)
   6088                 {
   6089                     idx1_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[1].s_l1_mv.i2_mvx);
   6090                     idx1_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[1].s_l1_mv.i2_mvy);
   6091                 }
   6092                 else
   6093                 {
   6094                     idx1_cost = INT_MAX;
   6095                 }
   6096 
   6097                 /* based on the least cost choose the mvp idx */
   6098                 if(idx0_cost <= idx1_cost)
   6099                 {
   6100                     ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -=
   6101                         as_pred_mv[0].s_l1_mv.i2_mvx;
   6102                     ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -=
   6103                         as_pred_mv[0].s_l1_mv.i2_mvy;
   6104 
   6105                     ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 0;
   6106                 }
   6107                 else
   6108                 {
   6109                     ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -=
   6110                         as_pred_mv[1].s_l1_mv.i2_mvx;
   6111                     ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -=
   6112                         as_pred_mv[1].s_l1_mv.i2_mvy;
   6113 
   6114                     ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 1;
   6115                 }
   6116 
   6117                 /* set the pred l1 flag for neighbour storage */
   6118                 ps_nbr_4x4->b1_pred_l1_flag = 1;
   6119             }
   6120 
   6121             /* set the merge flag to 0 */
   6122             ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = 0;
   6123             ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = 0;
   6124         }
   6125         else
   6126         {
   6127             /* copy the merge index from candidate */
   6128             ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = ps_pu->b1_merge_flag;
   6129 
   6130             ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = ps_pu->b3_merge_idx;
   6131 
   6132             if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode))
   6133             {
   6134                 /* set the pred l0 flag for neighbour storage */
   6135                 ps_nbr_4x4->b1_pred_l0_flag = 1;
   6136             }
   6137 
   6138             /* calculate the MVDs and popluate the MVP idx for L1 */
   6139             if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode))
   6140             {
   6141                 /* set the pred l1 flag for neighbour storage */
   6142                 ps_nbr_4x4->b1_pred_l1_flag = 1;
   6143             }
   6144         }
   6145 
   6146         /* RD opt cost computation is part of cu_ntu func hence here it is set to 0 */
   6147         rdopt_cost = 0;
   6148 
   6149         /* copy the MV to colocated Mv structure */
   6150         ps_final_prms->as_col_pu_enc_loop[ctr].s_l0_mv = ps_pu->mv.s_l0_mv;
   6151         ps_final_prms->as_col_pu_enc_loop[ctr].s_l1_mv = ps_pu->mv.s_l1_mv;
   6152         ps_final_prms->as_col_pu_enc_loop[ctr].i1_l0_ref_idx = ps_pu->mv.i1_l0_ref_idx;
   6153         ps_final_prms->as_col_pu_enc_loop[ctr].i1_l1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
   6154         ps_final_prms->as_col_pu_enc_loop[ctr].b2_pred_mode = ps_pu->b2_pred_mode;
   6155         ps_final_prms->as_col_pu_enc_loop[ctr].b1_intra_flag = 0;
   6156 
   6157         /* replicate neighbour 4x4 strcuture for entire partition */
   6158         {
   6159             WORD32 i, j;
   6160             nbr_4x4_t *ps_tmp_4x4;
   6161 
   6162             ps_tmp_4x4 = ps_nbr_4x4;
   6163 
   6164             for(i = 0; i < (inter_pu_ht >> 2); i++)
   6165             {
   6166                 for(j = 0; j < (inter_pu_wd >> 2); j++)
   6167                 {
   6168                     ps_tmp_4x4[j] = *ps_nbr_4x4;
   6169                 }
   6170                 /* row level update*/
   6171                 ps_tmp_4x4 += (cu_size >> 2);
   6172             }
   6173         }
   6174         /* set the neighbour map to 1 */
   6175         ihevce_set_inter_nbr_map(
   6176             ps_ctxt->pu1_ctb_nbr_map,
   6177             ps_ctxt->i4_nbr_map_strd,
   6178             cu_pos_x,
   6179             cu_pos_y,
   6180             (inter_pu_wd >> 2),
   6181             (inter_pu_ht >> 2),
   6182             1);
   6183         /* ----------- Motion Compensation for Luma ----------- */
   6184 #if !ENABLE_MIXED_INTER_MODE_EVAL
   6185         {
   6186             IV_API_CALL_STATUS_T valid_mv_cand;
   6187 
   6188             /*If the inter candidate is neither merge cand nor skip cand
   6189             then calculate the mc.*/
   6190             if(0 == skip_or_merge_flag || (ps_ctxt->u1_high_speed_cu_dec_on))
   6191             {
   6192                 valid_mv_cand =
   6193                     ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 0);
   6194 
   6195                 /* assert if the MC is given a valid mv candidate */
   6196                 ASSERT(valid_mv_cand == IV_SUCCESS);
   6197             }
   6198         }
   6199 #endif
   6200         if((2 == num_cu_part) && (0 == ctr))
   6201         {
   6202             /* 2Nx__ partion case */
   6203             if(inter_pu_wd == cu_size)
   6204             {
   6205                 cu_pos_y += (inter_pu_ht >> 2);
   6206                 pu1_pred += (inter_pu_ht * pred_stride);
   6207                 ps_nbr_4x4 += (inter_pu_ht >> 2) * (cu_size >> 2);
   6208                 ps_left_nbr_4x4 += (inter_pu_ht >> 2) * nbr_4x4_left_strd;
   6209                 ps_top_nbr_4x4 = ps_nbr_4x4 - (cu_size >> 2);
   6210                 ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - nbr_4x4_left_strd;
   6211             }
   6212 
   6213             /* __x2N partion case */
   6214             if(inter_pu_ht == cu_size)
   6215             {
   6216                 cu_pos_x += (inter_pu_wd >> 2);
   6217                 pu1_pred += inter_pu_wd;
   6218                 ps_nbr_4x4 += (inter_pu_wd >> 2);
   6219                 ps_left_nbr_4x4 = ps_nbr_4x4 - 1;
   6220                 ps_top_nbr_4x4 += (inter_pu_wd >> 2);
   6221                 ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
   6222                 nbr_4x4_left_strd = (cu_size >> 2);
   6223             }
   6224         }
   6225     }
   6226 
   6227     return (rdopt_cost);
   6228 }
   6229 
   6230 /*!
   6231 ******************************************************************************
   6232 * \if Function name : ihevce_intra_chroma_pred_mode_selector \endif
   6233 *
   6234 * \brief
   6235 *    Coding unit processing function for chroma special modes (Non-Luma modes)
   6236 *
   6237 * \param[in] ps_ctxt       enc_loop module ctxt pointer
   6238 * \param[in] ps_chrm_cu_buf_prms    ctxt having chroma related prms
   6239 * \param[in] ps_cu_analyse      pointer to cu analyse
   6240 * \param[in] rd_opt_curr_idx    index in the array of RDopt params
   6241 * \param[in] tu_mode            TU_EQ_CU or other case
   6242 *
   6243 * \return
   6244 *    Stores the best SATD mode, it's RDOPT cost, CABAC state, TU bits
   6245 *
   6246 * \author
   6247 *  Ittiam
   6248 *
   6249 *****************************************************************************
   6250 */
   6251 UWORD8 ihevce_distortion_based_intra_chroma_mode_selector(
   6252     cu_analyse_t *ps_cu_analyse,
   6253     ihevc_intra_pred_chroma_ref_substitution_ft *pf_ref_substitution,
   6254     pf_intra_pred *ppf_chroma_ip,
   6255     pf_res_trans_luma_had_chroma *ppf_resd_trns_had,
   6256     UWORD8 *pu1_src,
   6257     WORD32 i4_src_stride,
   6258     UWORD8 *pu1_pred,
   6259     WORD32 i4_pred_stride,
   6260     UWORD8 *pu1_ctb_nbr_map,
   6261     WORD32 i4_nbr_map_strd,
   6262     UWORD8 *pu1_ref_sub_out,
   6263     WORD32 i4_alpha_stim_multiplier,
   6264     UWORD8 u1_is_cu_noisy,
   6265     UWORD8 u1_trans_size,
   6266     UWORD8 u1_trans_idx,
   6267     UWORD8 u1_num_tus_in_cu,
   6268     UWORD8 u1_num_4x4_luma_blks_in_tu,
   6269     UWORD8 u1_enable_psyRDOPT,
   6270     UWORD8 u1_is_422)
   6271 {
   6272     UWORD8 u1_chrm_mode;
   6273     UWORD8 ctr;
   6274     WORD32 i4_subtu_idx;
   6275 
   6276     WORD32 i = 0;
   6277     UWORD8 u1_chrm_modes[4] = { 0, 1, 10, 26 };
   6278     WORD32 i4_satd_had[4] = { 0 };
   6279     WORD32 i4_best_satd_had = INT_MAX;
   6280     UWORD8 u1_cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1);
   6281     UWORD8 u1_cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1);
   6282     WORD32 i4_num_sub_tus = u1_is_422 + 1;
   6283     UWORD8 u1_best_chrm_mode = 0;
   6284 
   6285     /* Get the best satd among all possible modes */
   6286     for(i = 0; i < 4; i++)
   6287     {
   6288         WORD32 left_strd = i4_src_stride;
   6289 
   6290         u1_chrm_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[u1_chrm_modes[i]]
   6291                                         : u1_chrm_modes[i];
   6292 
   6293         /* loop based on num tus in a cu */
   6294         for(ctr = 0; ctr < u1_num_tus_in_cu; ctr++)
   6295         {
   6296             WORD32 luma_nbr_flags;
   6297             WORD32 chrm_pred_func_idx;
   6298 
   6299             WORD32 i4_trans_size_m2 = u1_trans_size << 1;
   6300             UWORD8 *pu1_tu_src = pu1_src + ((ctr & 1) * i4_trans_size_m2) +
   6301                                  (((ctr > 1) * u1_trans_size * i4_src_stride) << u1_is_422);
   6302             UWORD8 *pu1_tu_pred = pu1_pred + ((ctr & 1) * i4_trans_size_m2) +
   6303                                   (((ctr > 1) * u1_trans_size * i4_pred_stride) << u1_is_422);
   6304             WORD32 i4_curr_tu_pos_x = u1_cu_pos_x + ((ctr & 1) * u1_num_4x4_luma_blks_in_tu);
   6305             WORD32 i4_curr_tu_pos_y = u1_cu_pos_y + ((ctr > 1) * u1_num_4x4_luma_blks_in_tu);
   6306 
   6307             luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
   6308                 pu1_ctb_nbr_map,
   6309                 i4_nbr_map_strd,
   6310                 i4_curr_tu_pos_x,
   6311                 i4_curr_tu_pos_y,
   6312                 u1_num_4x4_luma_blks_in_tu,
   6313                 u1_num_4x4_luma_blks_in_tu);
   6314 
   6315             for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++)
   6316             {
   6317                 WORD32 nbr_flags;
   6318 
   6319                 UWORD8 *pu1_cur_src =
   6320                     pu1_tu_src + ((i4_subtu_idx == 1) * u1_trans_size * i4_src_stride);
   6321                 UWORD8 *pu1_cur_pred =
   6322                     pu1_tu_pred + ((i4_subtu_idx == 1) * u1_trans_size * i4_pred_stride);
   6323                 UWORD8 *pu1_left = pu1_cur_src - 2;
   6324                 UWORD8 *pu1_top = pu1_cur_src - i4_src_stride;
   6325                 UWORD8 *pu1_top_left = pu1_top - 2;
   6326 
   6327                 nbr_flags = ihevce_get_intra_chroma_tu_nbr(
   6328                     luma_nbr_flags, i4_subtu_idx, u1_trans_size, u1_is_422);
   6329 
   6330                 /* call the chroma reference array substitution */
   6331                 pf_ref_substitution(
   6332                     pu1_top_left,
   6333                     pu1_top,
   6334                     pu1_left,
   6335                     left_strd,
   6336                     u1_trans_size,
   6337                     nbr_flags,
   6338                     pu1_ref_sub_out,
   6339                     1);
   6340 
   6341                 /* use the look up to get the function idx */
   6342                 chrm_pred_func_idx = g_i4_ip_funcs[u1_chrm_mode];
   6343 
   6344                 /* call the intra prediction function */
   6345                 ppf_chroma_ip[chrm_pred_func_idx](
   6346                     pu1_ref_sub_out, 1, pu1_cur_pred, i4_pred_stride, u1_trans_size, u1_chrm_mode);
   6347 
   6348                 if(!u1_is_cu_noisy || !i4_alpha_stim_multiplier)
   6349                 {
   6350                     /* compute Hadamard-transform satd : Cb */
   6351                     i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1](
   6352                         pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0);
   6353 
   6354                     /* compute Hadamard-transform satd : Cr */
   6355                     i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1](
   6356                         pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0);
   6357                 }
   6358                 else
   6359                 {
   6360                     WORD32 i4_satd;
   6361 
   6362                     /* compute Hadamard-transform satd : Cb */
   6363                     i4_satd = ppf_resd_trns_had[u1_trans_idx - 1](
   6364                         pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0);
   6365 
   6366                     i4_satd = ihevce_inject_stim_into_distortion(
   6367                         pu1_cur_src,
   6368                         i4_src_stride,
   6369                         pu1_cur_pred,
   6370                         i4_pred_stride,
   6371                         i4_satd,
   6372                         i4_alpha_stim_multiplier,
   6373                         u1_trans_size,
   6374                         0,
   6375                         u1_enable_psyRDOPT,
   6376                         U_PLANE);
   6377 
   6378                     i4_satd_had[i] += i4_satd;
   6379 
   6380                     /* compute Hadamard-transform satd : Cr */
   6381                     i4_satd = ppf_resd_trns_had[u1_trans_idx - 1](
   6382                         pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0);
   6383 
   6384                     i4_satd = ihevce_inject_stim_into_distortion(
   6385                         pu1_cur_src,
   6386                         i4_src_stride,
   6387                         pu1_cur_pred,
   6388                         i4_pred_stride,
   6389                         i4_satd,
   6390                         i4_alpha_stim_multiplier,
   6391                         u1_trans_size,
   6392                         0,
   6393                         u1_enable_psyRDOPT,
   6394                         V_PLANE);
   6395 
   6396                     i4_satd_had[i] += i4_satd;
   6397                 }
   6398             }
   6399 
   6400             /* set the neighbour map to 1 */
   6401             ihevce_set_nbr_map(
   6402                 pu1_ctb_nbr_map,
   6403                 i4_nbr_map_strd,
   6404                 i4_curr_tu_pos_x,
   6405                 i4_curr_tu_pos_y,
   6406                 u1_num_4x4_luma_blks_in_tu,
   6407                 1);
   6408         }
   6409 
   6410         /* set the neighbour map to 0 */
   6411         ihevce_set_nbr_map(
   6412             pu1_ctb_nbr_map,
   6413             i4_nbr_map_strd,
   6414             (ps_cu_analyse->b3_cu_pos_x << 1),
   6415             (ps_cu_analyse->b3_cu_pos_y << 1),
   6416             (ps_cu_analyse->u1_cu_size >> 2),
   6417             0);
   6418 
   6419         /* Get the least SATD and corresponding mode */
   6420         if(i4_best_satd_had > i4_satd_had[i])
   6421         {
   6422             i4_best_satd_had = i4_satd_had[i];
   6423             u1_best_chrm_mode = u1_chrm_mode;
   6424         }
   6425     }
   6426 
   6427     return u1_best_chrm_mode;
   6428 }
   6429 
   6430 void ihevce_intra_chroma_pred_mode_selector(
   6431     ihevce_enc_loop_ctxt_t *ps_ctxt,
   6432     enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
   6433     cu_analyse_t *ps_cu_analyse,
   6434     WORD32 rd_opt_curr_idx,
   6435     WORD32 tu_mode,
   6436     WORD32 i4_alpha_stim_multiplier,
   6437     UWORD8 u1_is_cu_noisy)
   6438 {
   6439     chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt;
   6440 
   6441     ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
   6442 
   6443     UWORD8 *pu1_pred;
   6444     WORD32 trans_size;
   6445     WORD32 num_tus_in_cu;
   6446     WORD32 pred_strd;
   6447     WORD32 ctr;
   6448     WORD32 i4_subtu_idx;
   6449     WORD32 i4_num_sub_tus;
   6450     WORD32 trans_idx;
   6451     WORD32 scan_idx;
   6452     WORD32 num_4x4_luma_in_tu;
   6453     WORD32 cu_pos_x;
   6454     WORD32 cu_pos_y;
   6455 
   6456     recon_datastore_t *aps_recon_datastore[2] = { &ps_ctxt->as_cu_prms[0].s_recon_datastore,
   6457                                                   &ps_ctxt->as_cu_prms[1].s_recon_datastore };
   6458 
   6459     LWORD64 chrm_cod_cost = 0;
   6460     WORD32 chrm_tu_bits = 0;
   6461     WORD32 best_chrm_mode = DM_CHROMA_IDX;
   6462     UWORD8 *pu1_chrm_src = ps_chrm_cu_buf_prms->pu1_curr_src;
   6463     WORD32 chrm_src_stride = ps_chrm_cu_buf_prms->i4_chrm_src_stride;
   6464     UWORD8 *pu1_cu_left = ps_chrm_cu_buf_prms->pu1_cu_left;
   6465     UWORD8 *pu1_cu_top = ps_chrm_cu_buf_prms->pu1_cu_top;
   6466     UWORD8 *pu1_cu_top_left = ps_chrm_cu_buf_prms->pu1_cu_top_left;
   6467     WORD32 cu_left_stride = ps_chrm_cu_buf_prms->i4_cu_left_stride;
   6468     WORD32 cu_size = ps_cu_analyse->u1_cu_size;
   6469     WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
   6470     WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
   6471     UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
   6472 
   6473     ihevc_intra_pred_chroma_ref_substitution_fptr =
   6474         ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
   6475     i4_num_sub_tus = (u1_is_422 == 1) + 1;
   6476 
   6477 #if DISABLE_RDOQ_INTRA
   6478     i4_perform_rdoq = 0;
   6479 #endif
   6480 
   6481     if(TU_EQ_CU == tu_mode)
   6482     {
   6483         num_tus_in_cu = 1;
   6484         trans_size = cu_size >> 1;
   6485         num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/
   6486         ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode];
   6487     }
   6488     else
   6489     {
   6490         num_tus_in_cu = 4;
   6491         trans_size = cu_size >> 2;
   6492         num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/
   6493 
   6494         /* For 8x8 CU only one TU */
   6495         if(MIN_TU_SIZE > trans_size)
   6496         {
   6497             trans_size = MIN_TU_SIZE;
   6498             num_tus_in_cu = 1;
   6499             /* chroma nbr avail. is derived based on luma.
   6500             for 4x4 chrm use 8x8 luma's size */
   6501             num_4x4_luma_in_tu = num_4x4_luma_in_tu << 1;
   6502         }
   6503 
   6504         ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode];
   6505     }
   6506 
   6507     /* Can't be TU_EQ_SUBCU case */
   6508     ASSERT(TU_EQ_SUBCU != tu_mode);
   6509 
   6510     /* translate the transform size to index */
   6511     trans_idx = trans_size >> 2;
   6512 
   6513     pu1_pred = (UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data;
   6514 
   6515     pred_strd = ps_chr_intra_satd_ctxt->i4_pred_stride;
   6516 
   6517     /* for 16x16 cases */
   6518     if(16 == trans_size)
   6519     {
   6520         trans_idx = 3;
   6521     }
   6522 
   6523     best_chrm_mode = ihevce_distortion_based_intra_chroma_mode_selector(
   6524         ps_cu_analyse,
   6525         ihevc_intra_pred_chroma_ref_substitution_fptr,
   6526         ps_ctxt->apf_chrm_ip,
   6527         ps_ctxt->apf_chrm_resd_trns_had,
   6528         pu1_chrm_src,
   6529         chrm_src_stride,
   6530         pu1_pred,
   6531         pred_strd,
   6532         ps_ctxt->pu1_ctb_nbr_map,
   6533         ps_ctxt->i4_nbr_map_strd,
   6534         (UWORD8 *)ps_ctxt->pv_ref_sub_out,
   6535         i4_alpha_stim_multiplier,
   6536         u1_is_cu_noisy,
   6537         trans_size,
   6538         trans_idx,
   6539         num_tus_in_cu,
   6540         num_4x4_luma_in_tu,
   6541         ps_ctxt->u1_enable_psyRDOPT,
   6542         u1_is_422);
   6543 
   6544     /* Store the best chroma mode */
   6545     ps_chr_intra_satd_ctxt->u1_best_cr_mode = best_chrm_mode;
   6546 
   6547     /* evaluate RDOPT cost for the Best mode */
   6548     {
   6549         WORD32 i4_subtu_pos_x;
   6550         WORD32 i4_subtu_pos_y;
   6551         UWORD8 u1_compute_spatial_ssd;
   6552 
   6553         WORD32 ai4_total_bytes_offset_cb[2] = { 0, 0 };
   6554         WORD32 ai4_total_bytes_offset_cr[2] = { 0, 0 };
   6555         /* State for prefix bin of chroma intra pred mode before CU encode */
   6556         UWORD8 u1_chroma_intra_mode_prefix_state =
   6557             ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_CHROMA_PRED_MODE];
   6558         WORD32 luma_trans_size = trans_size << 1;
   6559         WORD32 calc_recon = 0;
   6560         UWORD8 *pu1_left = pu1_cu_left;
   6561         UWORD8 *pu1_top = pu1_cu_top;
   6562         UWORD8 *pu1_top_left = pu1_cu_top_left;
   6563         WORD32 left_strd = cu_left_stride;
   6564 
   6565         if(ps_ctxt->i1_cu_qp_delta_enable)
   6566         {
   6567             WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0;
   6568             if(ps_cu_analyse->u1_cu_size == 64)
   6569             {
   6570                 ASSERT(
   6571                     (luma_trans_size == 32) || (luma_trans_size == 16) || (luma_trans_size == 8) ||
   6572                     (luma_trans_size == 4));
   6573                 i4_act_counter = (luma_trans_size == 16) +
   6574                                  2 * ((luma_trans_size == 8) || (luma_trans_size == 4));
   6575                 i4_act_counter_lamda = 3;
   6576             }
   6577             else if(ps_cu_analyse->u1_cu_size == 32)
   6578             {
   6579                 ASSERT(
   6580                     (luma_trans_size == 32) || (luma_trans_size == 16) || (luma_trans_size == 8) ||
   6581                     (luma_trans_size == 4));
   6582                 i4_act_counter = (luma_trans_size == 16) +
   6583                                  2 * ((luma_trans_size == 8) || (luma_trans_size == 4));
   6584                 i4_act_counter_lamda = 0;
   6585             }
   6586             else if(ps_cu_analyse->u1_cu_size == 16)
   6587             {
   6588                 ASSERT((luma_trans_size == 16) || (luma_trans_size == 8) || (luma_trans_size == 4));
   6589                 i4_act_counter = (luma_trans_size == 8) || (luma_trans_size == 4);
   6590                 i4_act_counter_lamda = 0;
   6591             }
   6592             else if(ps_cu_analyse->u1_cu_size == 8)
   6593             {
   6594                 ASSERT((luma_trans_size == 8) || (luma_trans_size == 4));
   6595                 i4_act_counter = 1;
   6596                 i4_act_counter_lamda = 0;
   6597             }
   6598             else
   6599             {
   6600                 ASSERT(0);
   6601             }
   6602             /*assumption is that control comes here for intras*/
   6603             if(ps_ctxt->i4_use_ctb_level_lamda)
   6604             {
   6605                 ihevce_compute_cu_level_QP(
   6606                     ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][1], -1, 0);
   6607             }
   6608             else
   6609             {
   6610                 ihevce_compute_cu_level_QP(
   6611                     ps_ctxt,
   6612                     ps_cu_analyse->i4_act_factor[i4_act_counter][1],
   6613                     ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][1],
   6614                     0);
   6615             }
   6616 
   6617             ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp;
   6618         }
   6619 
   6620         u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
   6621                                  (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
   6622                                  CONVERT_SSDS_TO_SPATIAL_DOMAIN;
   6623 
   6624         if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
   6625         {
   6626             u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
   6627                                      CONVERT_SSDS_TO_SPATIAL_DOMAIN;
   6628         }
   6629 
   6630         /* get the 4x4 level postion of current cu */
   6631         cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1);
   6632         cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1);
   6633 
   6634         calc_recon = !u1_compute_spatial_ssd && ((4 == num_tus_in_cu) || (u1_is_422 == 1));
   6635 
   6636         if(calc_recon || u1_compute_spatial_ssd)
   6637         {
   6638             aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1;
   6639             aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1;
   6640         }
   6641         else
   6642         {
   6643             aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0;
   6644             aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0;
   6645         }
   6646 
   6647         /* loop based on num tus in a cu */
   6648         for(ctr = 0; ctr < num_tus_in_cu; ctr++)
   6649         {
   6650             WORD16 *pi2_cur_deq_data_cb;
   6651             WORD16 *pi2_cur_deq_data_cr;
   6652 
   6653             WORD32 deq_data_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride;
   6654             WORD32 luma_nbr_flags = 0;
   6655 
   6656             luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
   6657                 ps_ctxt->pu1_ctb_nbr_map,
   6658                 ps_ctxt->i4_nbr_map_strd,
   6659                 (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x,
   6660                 (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y,
   6661                 (luma_trans_size >> 2),
   6662                 (luma_trans_size >> 2));
   6663 
   6664             for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++)
   6665             {
   6666                 WORD32 cbf, num_bytes;
   6667                 LWORD64 trans_ssd_u, trans_ssd_v;
   6668                 UWORD8 u1_is_recon_available;
   6669 
   6670                 WORD32 trans_size_m2 = trans_size << 1;
   6671                 UWORD8 *pu1_cur_src = pu1_chrm_src + ((ctr & 1) * trans_size_m2) +
   6672                                       (((ctr > 1) * trans_size * chrm_src_stride) << u1_is_422) +
   6673                                       (i4_subtu_idx * trans_size * chrm_src_stride);
   6674                 UWORD8 *pu1_cur_pred = pu1_pred + ((ctr & 1) * trans_size_m2) +
   6675                                        (((ctr > 1) * trans_size * pred_strd) << u1_is_422) +
   6676                                        (i4_subtu_idx * trans_size * pred_strd);
   6677                 WORD32 i4_recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride;
   6678                 UWORD8 *pu1_cur_recon = ((UWORD8 *)aps_recon_datastore[0]
   6679                                              ->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)]) +
   6680                                         ((ctr & 1) * trans_size_m2) +
   6681                                         (((ctr > 1) * trans_size * i4_recon_stride) << u1_is_422) +
   6682                                         (i4_subtu_idx * trans_size * i4_recon_stride);
   6683 
   6684                 /* Use Chroma coeff/iq buf of the cur. intra cand. Not rememb.
   6685                 chroma coeff/iq for high quality intra SATD special modes. Will
   6686                 be over written by coeff of luma mode in chroma_rdopt call */
   6687                 UWORD8 *pu1_ecd_data_cb =
   6688                     &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0];
   6689                 UWORD8 *pu1_ecd_data_cr =
   6690                     &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0];
   6691 
   6692                 WORD32 chrm_pred_func_idx = 0;
   6693                 LWORD64 curr_cb_cod_cost = 0;
   6694                 LWORD64 curr_cr_cod_cost = 0;
   6695                 WORD32 nbr_flags = 0;
   6696 
   6697                 i4_subtu_pos_x = (((ctr & 1) * trans_size_m2) >> 2);
   6698                 i4_subtu_pos_y = (((ctr > 1) * trans_size) >> (!u1_is_422 + 1)) +
   6699                                  ((i4_subtu_idx * trans_size) >> 2);
   6700                 pi2_cur_deq_data_cb = &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] +
   6701                                       ((ctr & 1) * trans_size) +
   6702                                       (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) +
   6703                                       (i4_subtu_idx * trans_size * deq_data_strd);
   6704                 pi2_cur_deq_data_cr = &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] +
   6705                                       ((ctr & 1) * trans_size) +
   6706                                       (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) +
   6707                                       (i4_subtu_idx * trans_size * deq_data_strd);
   6708 
   6709                 /* left cu boundary */
   6710                 if(0 == i4_subtu_pos_x)
   6711                 {
   6712                     left_strd = cu_left_stride;
   6713                     pu1_left = pu1_cu_left + (i4_subtu_pos_y << 2) * left_strd;
   6714                 }
   6715                 else
   6716                 {
   6717                     pu1_left = pu1_cur_recon - 2;
   6718                     left_strd = i4_recon_stride;
   6719                 }
   6720 
   6721                 /* top cu boundary */
   6722                 if(0 == i4_subtu_pos_y)
   6723                 {
   6724                     pu1_top = pu1_cu_top + (i4_subtu_pos_x << 2);
   6725                 }
   6726                 else
   6727                 {
   6728                     pu1_top = pu1_cur_recon - i4_recon_stride;
   6729                 }
   6730 
   6731                 /* by default top left is set to cu top left */
   6732                 pu1_top_left = pu1_cu_top_left;
   6733 
   6734                 /* top left based on position */
   6735                 if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x))
   6736                 {
   6737                     pu1_top_left = pu1_left - left_strd;
   6738                 }
   6739                 else if(0 != i4_subtu_pos_x)
   6740                 {
   6741                     pu1_top_left = pu1_top - 2;
   6742                 }
   6743 
   6744                 /* populate the coeffs scan idx */
   6745                 scan_idx = SCAN_DIAG_UPRIGHT;
   6746 
   6747                 /* RDOPT copy States :  TU init (best until prev TU) to current */
   6748                 COPY_CABAC_STATES(
   6749                     &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
   6750                          .s_cabac_ctxt.au1_ctxt_models[0],
   6751                     &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   6752                     IHEVC_CAB_CTXT_END);
   6753 
   6754                 /* for 4x4 transforms based on intra pred mode scan is choosen*/
   6755                 if(4 == trans_size)
   6756                 {
   6757                     /* for modes from 22 upto 30 horizontal scan is used */
   6758                     if((best_chrm_mode > 21) && (best_chrm_mode < 31))
   6759                     {
   6760                         scan_idx = SCAN_HORZ;
   6761                     }
   6762                     /* for modes from 6 upto 14 horizontal scan is used */
   6763                     else if((best_chrm_mode > 5) && (best_chrm_mode < 15))
   6764                     {
   6765                         scan_idx = SCAN_VERT;
   6766                     }
   6767                 }
   6768 
   6769                 nbr_flags = ihevce_get_intra_chroma_tu_nbr(
   6770                     luma_nbr_flags, i4_subtu_idx, trans_size, u1_is_422);
   6771 
   6772                 /* call the chroma reference array substitution */
   6773                 ihevc_intra_pred_chroma_ref_substitution_fptr(
   6774                     pu1_top_left,
   6775                     pu1_top,
   6776                     pu1_left,
   6777                     left_strd,
   6778                     trans_size,
   6779                     nbr_flags,
   6780                     (UWORD8 *)ps_ctxt->pv_ref_sub_out,
   6781                     1);
   6782 
   6783                 /* use the look up to get the function idx */
   6784                 chrm_pred_func_idx = g_i4_ip_funcs[best_chrm_mode];
   6785 
   6786                 /* call the intra prediction function */
   6787                 ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
   6788                     (UWORD8 *)ps_ctxt->pv_ref_sub_out,
   6789                     1,
   6790                     pu1_cur_pred,
   6791                     pred_strd,
   6792                     trans_size,
   6793                     best_chrm_mode);
   6794 
   6795                 /* UPLANE RDOPT Loop */
   6796                 {
   6797                     WORD32 tu_bits;
   6798 
   6799                     cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
   6800                         ps_ctxt,
   6801                         pu1_cur_pred,
   6802                         pred_strd,
   6803                         pu1_cur_src,
   6804                         chrm_src_stride,
   6805                         pi2_cur_deq_data_cb,
   6806                         deq_data_strd,
   6807                         pu1_cur_recon,
   6808                         i4_recon_stride,
   6809                         pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx],
   6810                         ps_ctxt->au1_cu_csbf,
   6811                         ps_ctxt->i4_cu_csbf_strd,
   6812                         trans_size,
   6813                         scan_idx,
   6814                         1,
   6815                         &num_bytes,
   6816                         &tu_bits,
   6817                         &ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr],
   6818                         &ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr],
   6819                         &u1_is_recon_available,
   6820                         i4_perform_sbh,
   6821                         i4_perform_rdoq,
   6822                         &trans_ssd_u,
   6823 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   6824                         i4_alpha_stim_multiplier,
   6825                         u1_is_cu_noisy,
   6826 #endif
   6827                         0,
   6828                         u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
   6829                         U_PLANE);
   6830 
   6831 #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL
   6832                     if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
   6833                     {
   6834 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
   6835                         trans_ssd_u = ihevce_inject_stim_into_distortion(
   6836                             pu1_cur_src,
   6837                             chrm_src_stride,
   6838                             pu1_cur_pred,
   6839                             pred_strd,
   6840                             trans_ssd_u,
   6841                             i4_alpha_stim_multiplier,
   6842                             trans_size,
   6843                             0,
   6844                             ps_ctxt->u1_enable_psyRDOPT,
   6845                             U_PLANE);
   6846 #else
   6847                         if(u1_compute_spatial_ssd && u1_is_recon_available)
   6848                         {
   6849                             trans_ssd_u = ihevce_inject_stim_into_distortion(
   6850                                 pu1_cur_src,
   6851                                 chrm_src_stride,
   6852                                 pu1_cur_recon,
   6853                                 i4_recon_stride,
   6854                                 trans_ssd_u,
   6855                                 i4_alpha_stim_multiplier,
   6856                                 trans_size,
   6857                                 0,
   6858                                 ps_ctxt->u1_enable_psyRDOPT,
   6859                                 U_PLANE);
   6860                         }
   6861                         else
   6862                         {
   6863                             trans_ssd_u = ihevce_inject_stim_into_distortion(
   6864                                 pu1_cur_src,
   6865                                 chrm_src_stride,
   6866                                 pu1_cur_pred,
   6867                                 pred_strd,
   6868                                 trans_ssd_u,
   6869                                 i4_alpha_stim_multiplier,
   6870                                 trans_size,
   6871                                 0,
   6872                                 ps_ctxt->u1_enable_psyRDOPT,
   6873                                 U_PLANE);
   6874                         }
   6875 #endif
   6876                     }
   6877 #endif
   6878 
   6879                     /* RDOPT copy States :  New updated after curr TU to TU init */
   6880                     if(0 != cbf)
   6881                     {
   6882                         memcpy(
   6883                             &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   6884                             &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
   6885                                  .s_cabac_ctxt.au1_ctxt_models[0],
   6886                             IHEVC_CAB_CTXT_END);
   6887                     }
   6888                     /* RDOPT copy States :  Restoring back the Cb init state to Cr */
   6889                     else
   6890                     {
   6891                         memcpy(
   6892                             &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
   6893                                  .s_cabac_ctxt.au1_ctxt_models[0],
   6894                             &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   6895                             IHEVC_CAB_CTXT_END);
   6896                     }
   6897 
   6898                     if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd))
   6899                     {
   6900                         ihevce_chroma_it_recon_fxn(
   6901                             ps_ctxt,
   6902                             pi2_cur_deq_data_cb,
   6903                             deq_data_strd,
   6904                             pu1_cur_pred,
   6905                             pred_strd,
   6906                             pu1_cur_recon,
   6907                             i4_recon_stride,
   6908                             (pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx]),
   6909                             trans_size,
   6910                             cbf,
   6911                             ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr],
   6912                             ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr],
   6913                             U_PLANE);
   6914                     }
   6915 
   6916                     ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr] = cbf;
   6917                     curr_cb_cod_cost =
   6918                         trans_ssd_u +
   6919                         COMPUTE_RATE_COST_CLIP30(
   6920                             tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
   6921                     chrm_tu_bits += tu_bits;
   6922                     ai4_total_bytes_offset_cb[i4_subtu_idx] += num_bytes;
   6923                     ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr] =
   6924                         num_bytes;
   6925                 }
   6926 
   6927                 /* VPLANE RDOPT Loop */
   6928                 {
   6929                     WORD32 tu_bits;
   6930 
   6931                     cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
   6932                         ps_ctxt,
   6933                         pu1_cur_pred,
   6934                         pred_strd,
   6935                         pu1_cur_src,
   6936                         chrm_src_stride,
   6937                         pi2_cur_deq_data_cr,
   6938                         deq_data_strd,
   6939                         pu1_cur_recon,
   6940                         i4_recon_stride,
   6941                         pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx],
   6942                         ps_ctxt->au1_cu_csbf,
   6943                         ps_ctxt->i4_cu_csbf_strd,
   6944                         trans_size,
   6945                         scan_idx,
   6946                         1,
   6947                         &num_bytes,
   6948                         &tu_bits,
   6949                         &ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr],
   6950                         &ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr],
   6951                         &u1_is_recon_available,
   6952                         i4_perform_sbh,
   6953                         i4_perform_rdoq,
   6954                         &trans_ssd_v,
   6955 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   6956                         i4_alpha_stim_multiplier,
   6957                         u1_is_cu_noisy,
   6958 #endif
   6959                         0,
   6960                         u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
   6961                         V_PLANE);
   6962 
   6963 #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL
   6964                     if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
   6965                     {
   6966 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
   6967                         trans_ssd_v = ihevce_inject_stim_into_distortion(
   6968                             pu1_cur_src,
   6969                             chrm_src_stride,
   6970                             pu1_cur_pred,
   6971                             pred_strd,
   6972                             trans_ssd_v,
   6973                             i4_alpha_stim_multiplier,
   6974                             trans_size,
   6975                             0,
   6976                             ps_ctxt->u1_enable_psyRDOPT,
   6977                             V_PLANE);
   6978 #else
   6979                         if(u1_compute_spatial_ssd && u1_is_recon_available)
   6980                         {
   6981                             trans_ssd_v = ihevce_inject_stim_into_distortion(
   6982                                 pu1_cur_src,
   6983                                 chrm_src_stride,
   6984                                 pu1_cur_recon,
   6985                                 i4_recon_stride,
   6986                                 trans_ssd_v,
   6987                                 i4_alpha_stim_multiplier,
   6988                                 trans_size,
   6989                                 0,
   6990                                 ps_ctxt->u1_enable_psyRDOPT,
   6991                                 V_PLANE);
   6992                         }
   6993                         else
   6994                         {
   6995                             trans_ssd_v = ihevce_inject_stim_into_distortion(
   6996                                 pu1_cur_src,
   6997                                 chrm_src_stride,
   6998                                 pu1_cur_pred,
   6999                                 pred_strd,
   7000                                 trans_ssd_v,
   7001                                 i4_alpha_stim_multiplier,
   7002                                 trans_size,
   7003                                 0,
   7004                                 ps_ctxt->u1_enable_psyRDOPT,
   7005                                 V_PLANE);
   7006                         }
   7007 #endif
   7008                     }
   7009 #endif
   7010 
   7011                     /* RDOPT copy States :  New updated after curr TU to TU init */
   7012                     if(0 != cbf)
   7013                     {
   7014                         COPY_CABAC_STATES(
   7015                             &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   7016                             &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
   7017                                  .s_cabac_ctxt.au1_ctxt_models[0],
   7018                             IHEVC_CAB_CTXT_END);
   7019                     }
   7020                     /* RDOPT copy States :  Restoring back the Cb init state to Cr */
   7021                     else
   7022                     {
   7023                         COPY_CABAC_STATES(
   7024                             &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
   7025                                  .s_cabac_ctxt.au1_ctxt_models[0],
   7026                             &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   7027                             IHEVC_CAB_CTXT_END);
   7028                     }
   7029 
   7030                     if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd))
   7031                     {
   7032                         ihevce_chroma_it_recon_fxn(
   7033                             ps_ctxt,
   7034                             pi2_cur_deq_data_cr,
   7035                             deq_data_strd,
   7036                             pu1_cur_pred,
   7037                             pred_strd,
   7038                             pu1_cur_recon,
   7039                             i4_recon_stride,
   7040                             (pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx]),
   7041                             trans_size,
   7042                             cbf,
   7043                             ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr],
   7044                             ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr],
   7045                             V_PLANE);
   7046                     }
   7047 
   7048                     ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr] = cbf;
   7049                     curr_cr_cod_cost =
   7050                         trans_ssd_v +
   7051                         COMPUTE_RATE_COST_CLIP30(
   7052                             tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
   7053                     chrm_tu_bits += tu_bits;
   7054                     ai4_total_bytes_offset_cr[i4_subtu_idx] += num_bytes;
   7055                     ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr] =
   7056                         num_bytes;
   7057                 }
   7058 
   7059                 chrm_cod_cost += curr_cb_cod_cost;
   7060                 chrm_cod_cost += curr_cr_cod_cost;
   7061             }
   7062 
   7063             /* set the neighbour map to 1 */
   7064             ihevce_set_nbr_map(
   7065                 ps_ctxt->pu1_ctb_nbr_map,
   7066                 ps_ctxt->i4_nbr_map_strd,
   7067                 (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x,
   7068                 (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y,
   7069                 (luma_trans_size >> 2),
   7070                 1);
   7071         }
   7072 
   7073         /* set the neighbour map to 0 */
   7074         ihevce_set_nbr_map(
   7075             ps_ctxt->pu1_ctb_nbr_map,
   7076             ps_ctxt->i4_nbr_map_strd,
   7077             (ps_cu_analyse->b3_cu_pos_x << 1),
   7078             (ps_cu_analyse->b3_cu_pos_y << 1),
   7079             (ps_cu_analyse->u1_cu_size >> 2),
   7080             0);
   7081 
   7082         /* Account for coding b3_chroma_intra_pred_mode prefix and suffix bins */
   7083         /* This is done by adding the bits for signalling chroma mode (0-3)    */
   7084         /* and subtracting the bits for chroma mode same as luma mode (4)      */
   7085 #if CHROMA_RDOPT_ENABLE
   7086         {
   7087             /* Estimate bits to encode prefix bin as 1 for b3_chroma_intra_pred_mode */
   7088             WORD32 bits_frac_1 =
   7089                 gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 1];
   7090 
   7091             WORD32 bits_for_mode_0to3 = (2 << CABAC_FRAC_BITS_Q) + bits_frac_1;
   7092 
   7093             /* Estimate bits to encode prefix bin as 0 for b3_chroma_intra_pred_mode */
   7094             WORD32 bits_for_mode4 =
   7095                 gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 0];
   7096 
   7097             /* accumulate into final rd cost for chroma */
   7098             ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode = COMPUTE_RATE_COST_CLIP30(
   7099                 (bits_for_mode_0to3 - bits_for_mode4),
   7100                 ps_ctxt->i8_cl_ssd_lambda_chroma_qf,
   7101                 (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
   7102 
   7103             chrm_cod_cost += ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode;
   7104         }
   7105 #endif
   7106 
   7107         if(ps_ctxt->u1_enable_psyRDOPT)
   7108         {
   7109             UWORD8 *pu1_recon_cu;
   7110             WORD32 recon_stride;
   7111             WORD32 curr_pos_x;
   7112             WORD32 curr_pos_y;
   7113             WORD32 start_index;
   7114             WORD32 num_horz_cu_in_ctb;
   7115             WORD32 had_block_size;
   7116 
   7117             /* tODO: sreenivasa ctb size has to be used appropriately */
   7118             had_block_size = 8;
   7119             num_horz_cu_in_ctb = 2 * 64 / had_block_size;
   7120             curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
   7121             curr_pos_y = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
   7122             recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride;
   7123             pu1_recon_cu =
   7124                 aps_recon_datastore[0]->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)];  //
   7125 
   7126             /* start index to index the source satd of curr cu int he current ctb*/
   7127             start_index = 2 * (curr_pos_x / had_block_size) +
   7128                           (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
   7129 
   7130             {
   7131                 chrm_cod_cost += ihevce_psy_rd_cost_croma(
   7132                     ps_ctxt->ai4_source_chroma_satd,
   7133                     pu1_recon_cu,
   7134                     recon_stride,
   7135                     1,  //
   7136                     cu_size,
   7137                     0,  // pic type
   7138                     0,  //layer id
   7139                     ps_ctxt->i4_satd_lamda,  // lambda
   7140                     start_index,
   7141                     ps_ctxt->u1_is_input_data_hbd,  // 8 bit
   7142                     ps_ctxt->u1_chroma_array_type,
   7143                     &ps_ctxt->s_cmn_opt_func
   7144 
   7145                 );  // chroma subsampling 420
   7146             }
   7147         }
   7148 
   7149         ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt = chrm_cod_cost;
   7150         ps_chr_intra_satd_ctxt->i4_chrm_tu_bits = chrm_tu_bits;
   7151 
   7152         memcpy(
   7153             &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0],
   7154             &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   7155             IHEVC_CAB_CTXT_END);
   7156     }
   7157 }
   7158 
   7159 /*!
   7160 ******************************************************************************
   7161 * \if Function name : ihevce_chroma_cu_prcs_rdopt \endif
   7162 *
   7163 * \brief
   7164 *    Coding unit processing function for chroma
   7165 *
   7166 * \param[in] ps_ctxt    enc_loop module ctxt pointer
   7167 * \param[in] rd_opt_curr_idx index in the array of RDopt params
   7168 * \param[in] func_proc_mode TU_EQ_CU or other case
   7169 * \param[in] pu1_chrm_src  pointer to source data buffer
   7170 * \param[in] chrm_src_stride   source buffer stride
   7171 * \param[in] pu1_cu_left pointer to left recon data buffer
   7172 * \param[in] pu1_cu_top  pointer to top recon data buffer
   7173 * \param[in] pu1_cu_top_left pointer to top left recon data buffer
   7174 * \param[in] left_stride left recon buffer stride
   7175 * \param[out] cu_pos_x position x of current CU in CTB
   7176 * \param[out] cu_pos_y position y of current CU in CTB
   7177 * \param[out] pi4_chrm_tu_bits pointer to store the totla chroma bits
   7178 *
   7179 * \return
   7180 *    Chroma coding cost (cb adn Cr included)
   7181 *
   7182 * \author
   7183 *  Ittiam
   7184 *
   7185 *****************************************************************************
   7186 */
   7187 LWORD64 ihevce_chroma_cu_prcs_rdopt(
   7188     ihevce_enc_loop_ctxt_t *ps_ctxt,
   7189     WORD32 rd_opt_curr_idx,
   7190     WORD32 func_proc_mode,
   7191     UWORD8 *pu1_chrm_src,
   7192     WORD32 chrm_src_stride,
   7193     UWORD8 *pu1_cu_left,
   7194     UWORD8 *pu1_cu_top,
   7195     UWORD8 *pu1_cu_top_left,
   7196     WORD32 cu_left_stride,
   7197     WORD32 cu_pos_x,
   7198     WORD32 cu_pos_y,
   7199     WORD32 *pi4_chrm_tu_bits,
   7200     WORD32 i4_alpha_stim_multiplier,
   7201     UWORD8 u1_is_cu_noisy)
   7202 {
   7203     tu_enc_loop_out_t *ps_tu;
   7204     tu_enc_loop_temp_prms_t *ps_tu_temp_prms;
   7205 
   7206     ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
   7207 
   7208     UWORD8 *pu1_pred;
   7209     UWORD8 *pu1_recon;
   7210     WORD32 i4_recon_stride;
   7211     WORD32 cu_size, trans_size = 0;
   7212     WORD32 pred_strd;
   7213     WORD32 ctr, i4_subtu_idx;
   7214     WORD32 scan_idx;
   7215     WORD32 u1_is_cu_coded_old;
   7216     WORD32 init_bytes_offset;
   7217 
   7218     enc_loop_cu_final_prms_t *ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_curr_idx];
   7219     recon_datastore_t *ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore;
   7220 
   7221     WORD32 total_bytes_offset = 0;
   7222     LWORD64 chrm_cod_cost = 0;
   7223     WORD32 chrm_tu_bits = 0;
   7224     WORD32 chrm_pred_mode = DM_CHROMA_IDX, luma_pred_mode = 35;
   7225     LWORD64 i8_ssd_cb = 0;
   7226     WORD32 i4_bits_cb = 0;
   7227     LWORD64 i8_ssd_cr = 0;
   7228     WORD32 i4_bits_cr = 0;
   7229     UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
   7230     UWORD8 u1_num_tus =
   7231         /* NumChromaTU's = 1, if TUSize = 4 and CUSize = 8 */
   7232         (!ps_best_cu_prms->as_tu_enc_loop[0].s_tu.b3_size && ps_best_cu_prms->u1_intra_flag)
   7233             ? 1
   7234             : ps_best_cu_prms->u2_num_tus_in_cu;
   7235     UWORD8 u1_num_subtus_in_tu = u1_is_422 + 1;
   7236     UWORD8 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
   7237                                     (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
   7238                                     CONVERT_SSDS_TO_SPATIAL_DOMAIN;
   7239     /* Get the RDOPT cost of the best CU mode for early_exit */
   7240     LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!rd_opt_curr_idx].i8_best_rdopt_cost;
   7241     /* Get the current running RDOPT (Luma RDOPT) for early_exit */
   7242     LWORD64 curr_rdopt_cost = ps_ctxt->as_cu_prms[rd_opt_curr_idx].i8_curr_rdopt_cost;
   7243     WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
   7244     WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
   7245 
   7246     ihevc_intra_pred_chroma_ref_substitution_fptr =
   7247         ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
   7248 
   7249     if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
   7250     {
   7251         u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
   7252                                  CONVERT_SSDS_TO_SPATIAL_DOMAIN;
   7253     }
   7254 
   7255     /* Store the init bytes offset from luma */
   7256     init_bytes_offset = ps_best_cu_prms->i4_num_bytes_ecd_data;
   7257 
   7258     /* Unused pred buffer in merge_skip_pred_data_t structure is used as
   7259     Chroma pred storage buf. for final_recon function.
   7260     The buffer is split into two and used as a ping-pong buffer */
   7261     pu1_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
   7262                rd_opt_curr_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
   7263                                   (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
   7264 
   7265     pred_strd = ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
   7266 
   7267     pu1_recon = (UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs[0];
   7268     i4_recon_stride = ps_recon_datastore->i4_chromaRecon_stride;
   7269     cu_size = ps_best_cu_prms->u1_cu_size;
   7270     chrm_tu_bits = 0;
   7271 
   7272     /* get the first TU pointer */
   7273     ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
   7274     /* get the first TU enc_loop temp prms pointer */
   7275     ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
   7276 
   7277     if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag)
   7278     {
   7279         /* Mode signalled by intra prediction for luma */
   7280         luma_pred_mode = ps_best_cu_prms->au1_intra_pred_mode[0];
   7281 
   7282 #if DISABLE_RDOQ_INTRA
   7283         i4_perform_rdoq = 0;
   7284 #endif
   7285     }
   7286 
   7287     else
   7288     {
   7289         UWORD8 *pu1_pred_org = pu1_pred;
   7290 
   7291         /* ------ Motion Compensation for Chroma -------- */
   7292         for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++)
   7293         {
   7294             pu_t *ps_pu;
   7295             WORD32 inter_pu_wd;
   7296             WORD32 inter_pu_ht;
   7297 
   7298             ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr];
   7299 
   7300             inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
   7301             inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
   7302             inter_pu_ht <<= u1_is_422;
   7303 
   7304             ihevce_chroma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_strd);
   7305 
   7306             if(2 == ps_best_cu_prms->u2_num_pus_in_cu)
   7307             {
   7308                 /* 2Nx__ partion case */
   7309                 if(inter_pu_wd == cu_size)
   7310                 {
   7311                     pu1_pred += (inter_pu_ht * pred_strd);
   7312                 }
   7313 
   7314                 /* __x2N partion case */
   7315                 if(inter_pu_ht == (cu_size >> (u1_is_422 == 0)))
   7316                 {
   7317                     pu1_pred += inter_pu_wd;
   7318                 }
   7319             }
   7320         }
   7321 
   7322         /* restore the pred pointer to start for transform loop */
   7323         pu1_pred = pu1_pred_org;
   7324     }
   7325 
   7326     /* Used to store back only the luma based info. if SATD based chorma
   7327     mode also comes */
   7328     u1_is_cu_coded_old = ps_best_cu_prms->u1_is_cu_coded;
   7329 
   7330     /* evaluate chroma candidates (same as luma) and
   7331     if INTRA & HIGH_QUALITY compare with best SATD mode */
   7332     {
   7333         WORD32 calc_recon = 0, deq_data_strd;
   7334         WORD16 *pi2_deq_data;
   7335         UWORD8 *pu1_ecd_data;
   7336         UWORD8 u1_is_mode_eq_chroma_satd_mode = 0;
   7337 
   7338         pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0];
   7339         pi2_deq_data += ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx;
   7340         deq_data_strd = cu_size;
   7341         /* update ecd buffer for storing coeff. */
   7342         pu1_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0];
   7343         pu1_ecd_data += init_bytes_offset;
   7344         /* store chroma starting index */
   7345         ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx = init_bytes_offset;
   7346 
   7347         /* get the first TU pointer */
   7348         ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
   7349         ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
   7350 
   7351         /* Reset total_bytes_offset for each candidate */
   7352         chrm_pred_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[luma_pred_mode]
   7353                                           : luma_pred_mode;
   7354 
   7355         total_bytes_offset = 0;
   7356 
   7357         if(TU_EQ_SUBCU == func_proc_mode)
   7358         {
   7359             func_proc_mode = TU_EQ_CU_DIV2;
   7360         }
   7361 
   7362         /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
   7363         TU_EQ_CU_DIV2 and  TU_EQ_SUBCU case */
   7364         if(8 == cu_size)
   7365         {
   7366             func_proc_mode = TU_EQ_CU;
   7367         }
   7368 
   7369         /* loop based on num tus in a cu */
   7370         if(!ps_best_cu_prms->u1_intra_flag || !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd ||
   7371            (ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd &&
   7372             (chrm_pred_mode !=
   7373              ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode].u1_best_cr_mode)))
   7374         {
   7375             /* loop based on num tus in a cu */
   7376             for(ctr = 0; ctr < u1_num_tus; ctr++)
   7377             {
   7378                 WORD32 num_bytes = 0;
   7379                 LWORD64 curr_cb_cod_cost = 0;
   7380                 LWORD64 curr_cr_cod_cost = 0;
   7381                 WORD32 chrm_pred_func_idx = 0;
   7382                 UWORD8 u1_is_early_exit_condition_satisfied = 0;
   7383 
   7384                 /* Default cb and cr offset initializatio for b3_chroma_intra_mode_idx=7   */
   7385                 /* FIX for TU tree shrinkage caused by ecd data copies in final mode recon */
   7386                 ps_tu->s_tu.b1_cb_cbf = ps_tu->s_tu.b1_cr_cbf = 0;
   7387                 ps_tu->s_tu.b1_cb_cbf_subtu1 = ps_tu->s_tu.b1_cr_cbf_subtu1 = 0;
   7388                 ps_tu->ai4_cb_coeff_offset[0] = total_bytes_offset + init_bytes_offset;
   7389                 ps_tu->ai4_cr_coeff_offset[0] = total_bytes_offset + init_bytes_offset;
   7390                 ps_tu->ai4_cb_coeff_offset[1] = total_bytes_offset + init_bytes_offset;
   7391                 ps_tu->ai4_cr_coeff_offset[1] = total_bytes_offset + init_bytes_offset;
   7392                 ps_tu_temp_prms->ai2_cb_bytes_consumed[0] = 0;
   7393                 ps_tu_temp_prms->ai2_cr_bytes_consumed[0] = 0;
   7394                 ps_tu_temp_prms->ai2_cb_bytes_consumed[1] = 0;
   7395                 ps_tu_temp_prms->ai2_cr_bytes_consumed[1] = 0;
   7396 
   7397                 /* TU level inits */
   7398                 /* check if chroma present flag is set */
   7399                 if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
   7400                 {
   7401                     /* RDOPT copy States :  TU init (best until prev TU) to current */
   7402                     COPY_CABAC_STATES(
   7403                         &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
   7404                              .s_cabac_ctxt.au1_ctxt_models[0],
   7405                         &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   7406                         IHEVC_CAB_CTXT_END);
   7407 
   7408                     /* get the current transform size */
   7409                     trans_size = ps_tu->s_tu.b3_size;
   7410                     trans_size = (1 << (trans_size + 1)); /* in chroma units */
   7411 
   7412                     /* since 2x2 transform is not allowed for chroma*/
   7413                     if(2 == trans_size)
   7414                     {
   7415                         trans_size = 4;
   7416                     }
   7417                 }
   7418 
   7419                 for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++)
   7420                 {
   7421                     WORD32 cbf;
   7422                     UWORD8 u1_is_recon_available;
   7423 
   7424                     WORD32 nbr_flags = 0;
   7425                     WORD32 zero_cols = 0;
   7426                     WORD32 zero_rows = 0;
   7427 
   7428                     /* check if chroma present flag is set */
   7429                     if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
   7430                     {
   7431                         UWORD8 *pu1_cur_pred;
   7432                         UWORD8 *pu1_cur_recon;
   7433                         UWORD8 *pu1_cur_src;
   7434                         WORD16 *pi2_cur_deq_data;
   7435                         WORD32 curr_pos_x, curr_pos_y;
   7436                         LWORD64 trans_ssd_u, trans_ssd_v;
   7437 
   7438                         /* get the current sub-tu posx and posy w.r.t to cu */
   7439                         curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3);
   7440                         curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) +
   7441                                      (i4_subtu_idx * trans_size);
   7442 
   7443                         /* 420sp case only vertical height will be half */
   7444                         if(u1_is_422 == 0)
   7445                         {
   7446                             curr_pos_y >>= 1;
   7447                         }
   7448 
   7449                         /* increment the pointers to start of current Sub-TU */
   7450                         pu1_cur_recon = (pu1_recon + curr_pos_x);
   7451                         pu1_cur_recon += (curr_pos_y * i4_recon_stride);
   7452                         pu1_cur_src = (pu1_chrm_src + curr_pos_x);
   7453                         pu1_cur_src += (curr_pos_y * chrm_src_stride);
   7454                         pu1_cur_pred = (pu1_pred + curr_pos_x);
   7455                         pu1_cur_pred += (curr_pos_y * pred_strd);
   7456                         pi2_cur_deq_data = pi2_deq_data + curr_pos_x;
   7457                         pi2_cur_deq_data += (curr_pos_y * deq_data_strd);
   7458 
   7459                         /* populate the coeffs scan idx */
   7460                         scan_idx = SCAN_DIAG_UPRIGHT;
   7461 
   7462                         /* perform intra prediction only for Intra case */
   7463                         if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag)
   7464                         {
   7465                             UWORD8 *pu1_top_left;
   7466                             UWORD8 *pu1_top;
   7467                             UWORD8 *pu1_left;
   7468                             WORD32 left_strd;
   7469 
   7470                             calc_recon = !u1_compute_spatial_ssd &&
   7471                                          ((4 == u1_num_tus) || (u1_is_422 == 1)) &&
   7472                                          (((u1_num_tus == 1) && (0 == i4_subtu_idx)) ||
   7473                                           ((ctr == 3) && (0 == i4_subtu_idx) && (u1_is_422 == 1)) ||
   7474                                           ((u1_num_tus == 4) && (ctr < 3)));
   7475 
   7476                             /* left cu boundary */
   7477                             if(0 == curr_pos_x)
   7478                             {
   7479                                 pu1_left = pu1_cu_left + curr_pos_y * cu_left_stride;
   7480                                 left_strd = cu_left_stride;
   7481                             }
   7482                             else
   7483                             {
   7484                                 pu1_left = pu1_cur_recon - 2;
   7485                                 left_strd = i4_recon_stride;
   7486                             }
   7487 
   7488                             /* top cu boundary */
   7489                             if(0 == curr_pos_y)
   7490                             {
   7491                                 pu1_top = pu1_cu_top + curr_pos_x;
   7492                             }
   7493                             else
   7494                             {
   7495                                 pu1_top = pu1_cur_recon - i4_recon_stride;
   7496                             }
   7497 
   7498                             /* by default top left is set to cu top left */
   7499                             pu1_top_left = pu1_cu_top_left;
   7500 
   7501                             /* top left based on position */
   7502                             if((0 != curr_pos_y) && (0 == curr_pos_x))
   7503                             {
   7504                                 pu1_top_left = pu1_left - cu_left_stride;
   7505                             }
   7506                             else if(0 != curr_pos_x)
   7507                             {
   7508                                 pu1_top_left = pu1_top - 2;
   7509                             }
   7510 
   7511                             /* for 4x4 transforms based on intra pred mode scan is choosen*/
   7512                             if(4 == trans_size)
   7513                             {
   7514                                 /* for modes from 22 upto 30 horizontal scan is used */
   7515                                 if((chrm_pred_mode > 21) && (chrm_pred_mode < 31))
   7516                                 {
   7517                                     scan_idx = SCAN_HORZ;
   7518                                 }
   7519                                 /* for modes from 6 upto 14 horizontal scan is used */
   7520                                 else if((chrm_pred_mode > 5) && (chrm_pred_mode < 15))
   7521                                 {
   7522                                     scan_idx = SCAN_VERT;
   7523                                 }
   7524                             }
   7525 
   7526                             nbr_flags = ihevce_get_intra_chroma_tu_nbr(
   7527                                 ps_best_cu_prms->au4_nbr_flags[ctr],
   7528                                 i4_subtu_idx,
   7529                                 trans_size,
   7530                                 u1_is_422);
   7531 
   7532                             /* call the chroma reference array substitution */
   7533                             ihevc_intra_pred_chroma_ref_substitution_fptr(
   7534                                 pu1_top_left,
   7535                                 pu1_top,
   7536                                 pu1_left,
   7537                                 left_strd,
   7538                                 trans_size,
   7539                                 nbr_flags,
   7540                                 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
   7541                                 1);
   7542 
   7543                             /* use the look up to get the function idx */
   7544                             chrm_pred_func_idx = g_i4_ip_funcs[chrm_pred_mode];
   7545 
   7546                             /* call the intra prediction function */
   7547                             ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
   7548                                 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
   7549                                 1,
   7550                                 pu1_cur_pred,
   7551                                 pred_strd,
   7552                                 trans_size,
   7553                                 chrm_pred_mode);
   7554                         }
   7555 
   7556                         if(!ctr && !i4_subtu_idx && (u1_compute_spatial_ssd || calc_recon))
   7557                         {
   7558                             ps_recon_datastore->au1_is_chromaRecon_available[0] =
   7559                                 !ps_best_cu_prms->u1_skip_flag;
   7560                         }
   7561                         else if(!ctr && !i4_subtu_idx)
   7562                         {
   7563                             ps_recon_datastore->au1_is_chromaRecon_available[0] = 0;
   7564                         }
   7565                         /************************************************************/
   7566                         /* recon loop is done for all cases including skip cu       */
   7567                         /* This is because skipping chroma reisdual based on luma   */
   7568                         /* skip decision can lead to chroma artifacts               */
   7569                         /************************************************************/
   7570                         /************************************************************/
   7571                         /*In the high quality and medium speed modes, wherein chroma*/
   7572                         /*and luma costs are included in the total cost calculation */
   7573                         /*the cost is just a ssd cost, and not that obtained through*/
   7574                         /*iq_it path                                                */
   7575                         /************************************************************/
   7576                         if(ps_best_cu_prms->u1_skip_flag == 0)
   7577                         {
   7578                             WORD32 tu_bits;
   7579 
   7580                             cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
   7581                                 ps_ctxt,
   7582                                 pu1_cur_pred,
   7583                                 pred_strd,
   7584                                 pu1_cur_src,
   7585                                 chrm_src_stride,
   7586                                 pi2_cur_deq_data,
   7587                                 deq_data_strd,
   7588                                 pu1_cur_recon,
   7589                                 i4_recon_stride,
   7590                                 pu1_ecd_data + total_bytes_offset,
   7591                                 ps_ctxt->au1_cu_csbf,
   7592                                 ps_ctxt->i4_cu_csbf_strd,
   7593                                 trans_size,
   7594                                 scan_idx,
   7595                                 PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag,
   7596                                 &num_bytes,
   7597                                 &tu_bits,
   7598                                 &zero_cols,
   7599                                 &zero_rows,
   7600                                 &u1_is_recon_available,
   7601                                 i4_perform_sbh,
   7602                                 i4_perform_rdoq,
   7603                                 &trans_ssd_u,
   7604 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   7605                                 i4_alpha_stim_multiplier,
   7606                                 u1_is_cu_noisy,
   7607 #endif
   7608                                 ps_best_cu_prms->u1_skip_flag,
   7609                                 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
   7610                                 U_PLANE);
   7611 
   7612                             if(u1_compute_spatial_ssd && u1_is_recon_available)
   7613                             {
   7614                                 ps_recon_datastore
   7615                                     ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
   7616                                                                         [i4_subtu_idx] = 0;
   7617                             }
   7618                             else
   7619                             {
   7620                                 ps_recon_datastore
   7621                                     ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
   7622                                                                         [i4_subtu_idx] = UCHAR_MAX;
   7623                             }
   7624 
   7625 #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   7626                             if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
   7627                             {
   7628 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
   7629                                 trans_ssd_u = ihevce_inject_stim_into_distortion(
   7630                                     pu1_cur_src,
   7631                                     chrm_src_stride,
   7632                                     pu1_cur_pred,
   7633                                     pred_strd,
   7634                                     trans_ssd_u,
   7635                                     i4_alpha_stim_multiplier,
   7636                                     trans_size,
   7637                                     0,
   7638                                     ps_ctxt->u1_enable_psyRDOPT,
   7639                                     U_PLANE);
   7640 #else
   7641                                 if(u1_compute_spatial_ssd && u1_is_recon_available)
   7642                                 {
   7643                                     trans_ssd_u = ihevce_inject_stim_into_distortion(
   7644                                         pu1_cur_src,
   7645                                         chrm_src_stride,
   7646                                         pu1_cur_recon,
   7647                                         i4_recon_stride,
   7648                                         trans_ssd_u,
   7649                                         i4_alpha_stim_multiplier,
   7650                                         trans_size,
   7651                                         0,
   7652                                         ps_ctxt->u1_enable_psyRDOPT,
   7653                                         U_PLANE);
   7654                                 }
   7655                                 else
   7656                                 {
   7657                                     trans_ssd_u = ihevce_inject_stim_into_distortion(
   7658                                         pu1_cur_src,
   7659                                         chrm_src_stride,
   7660                                         pu1_cur_pred,
   7661                                         pred_strd,
   7662                                         trans_ssd_u,
   7663                                         i4_alpha_stim_multiplier,
   7664                                         trans_size,
   7665                                         0,
   7666                                         ps_ctxt->u1_enable_psyRDOPT,
   7667                                         U_PLANE);
   7668                                 }
   7669 #endif
   7670                             }
   7671 #endif
   7672 
   7673                             curr_cb_cod_cost =
   7674                                 trans_ssd_u +
   7675                                 COMPUTE_RATE_COST_CLIP30(
   7676                                     tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
   7677 
   7678                             chrm_tu_bits += tu_bits;
   7679                             i4_bits_cb += tu_bits;
   7680 
   7681                             /* RDOPT copy States :  New updated after curr TU to TU init */
   7682                             if(0 != cbf)
   7683                             {
   7684                                 COPY_CABAC_STATES(
   7685                                     &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   7686                                     &ps_ctxt->s_rdopt_entropy_ctxt
   7687                                          .as_cu_entropy_ctxt[rd_opt_curr_idx]
   7688                                          .s_cabac_ctxt.au1_ctxt_models[0],
   7689                                     IHEVC_CAB_CTXT_END);
   7690                             }
   7691                             /* RDOPT copy States :  Restoring back the Cb init state to Cr */
   7692                             else
   7693                             {
   7694                                 COPY_CABAC_STATES(
   7695                                     &ps_ctxt->s_rdopt_entropy_ctxt
   7696                                          .as_cu_entropy_ctxt[rd_opt_curr_idx]
   7697                                          .s_cabac_ctxt.au1_ctxt_models[0],
   7698                                     &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   7699                                     IHEVC_CAB_CTXT_END);
   7700                             }
   7701 
   7702                             /* If Intra and TU=CU/2, need recon for next TUs */
   7703                             if(calc_recon)
   7704                             {
   7705                                 ihevce_chroma_it_recon_fxn(
   7706                                     ps_ctxt,
   7707                                     pi2_cur_deq_data,
   7708                                     deq_data_strd,
   7709                                     pu1_cur_pred,
   7710                                     pred_strd,
   7711                                     pu1_cur_recon,
   7712                                     i4_recon_stride,
   7713                                     (pu1_ecd_data + total_bytes_offset),
   7714                                     trans_size,
   7715                                     cbf,
   7716                                     zero_cols,
   7717                                     zero_rows,
   7718                                     U_PLANE);
   7719 
   7720                                 ps_recon_datastore
   7721                                     ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
   7722                                                                         [i4_subtu_idx] = 0;
   7723                             }
   7724                             else
   7725                             {
   7726                                 ps_recon_datastore
   7727                                     ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
   7728                                                                         [i4_subtu_idx] = UCHAR_MAX;
   7729                             }
   7730                         }
   7731                         else
   7732                         {
   7733                             /* num bytes is set to 0 */
   7734                             num_bytes = 0;
   7735 
   7736                             /* cbf is returned as 0 */
   7737                             cbf = 0;
   7738 
   7739                             curr_cb_cod_cost = trans_ssd_u =
   7740 
   7741                                 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
   7742                                     pu1_cur_pred,
   7743                                     pu1_cur_src,
   7744                                     pred_strd,
   7745                                     chrm_src_stride,
   7746                                     trans_size,
   7747                                     trans_size);
   7748 
   7749                             if(u1_compute_spatial_ssd)
   7750                             {
   7751                                 /* buffer copy fromp pred to recon */
   7752 
   7753                                 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
   7754                                     pu1_cur_pred,
   7755                                     pred_strd,
   7756                                     pu1_cur_recon,
   7757                                     i4_recon_stride,
   7758                                     trans_size,
   7759                                     trans_size,
   7760                                     U_PLANE);
   7761 
   7762                                 ps_recon_datastore
   7763                                     ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
   7764                                                                         [i4_subtu_idx] = 0;
   7765                             }
   7766 
   7767                             if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
   7768                             {
   7769                                 trans_ssd_u = ihevce_inject_stim_into_distortion(
   7770                                     pu1_cur_src,
   7771                                     chrm_src_stride,
   7772                                     pu1_cur_pred,
   7773                                     pred_strd,
   7774                                     trans_ssd_u,
   7775                                     i4_alpha_stim_multiplier,
   7776                                     trans_size,
   7777                                     0,
   7778                                     ps_ctxt->u1_enable_psyRDOPT,
   7779                                     U_PLANE);
   7780                             }
   7781 
   7782 #if ENABLE_INTER_ZCU_COST
   7783 #if !WEIGH_CHROMA_COST
   7784                             /* cbf = 0, accumulate cu not coded cost */
   7785                             ps_ctxt->i8_cu_not_coded_cost += curr_cb_cod_cost;
   7786 #else
   7787                             /* cbf = 0, accumulate cu not coded cost */
   7788 
   7789                             ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
   7790                                 (curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
   7791                                  (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
   7792                                 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
   7793 #endif
   7794 #endif
   7795                         }
   7796 
   7797 #if !WEIGH_CHROMA_COST
   7798                         curr_rdopt_cost += curr_cb_cod_cost;
   7799 #else
   7800                         curr_rdopt_cost +=
   7801                             ((curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
   7802                               (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
   7803                              CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
   7804 #endif
   7805                         chrm_cod_cost += curr_cb_cod_cost;
   7806                         i8_ssd_cb += trans_ssd_u;
   7807 
   7808                         if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
   7809                         {
   7810                             /* Early exit : If the current running cost exceeds
   7811                             the prev. best mode cost, break */
   7812                             if(curr_rdopt_cost > prev_best_rdopt_cost)
   7813                             {
   7814                                 u1_is_early_exit_condition_satisfied = 1;
   7815                                 break;
   7816                             }
   7817                         }
   7818 
   7819                         /* inter cu is coded if any of the tu is coded in it */
   7820                         ps_best_cu_prms->u1_is_cu_coded |= cbf;
   7821 
   7822                         /* update CB related params */
   7823                         ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] =
   7824                             total_bytes_offset + init_bytes_offset;
   7825 
   7826                         if(0 == i4_subtu_idx)
   7827                         {
   7828                             ps_tu->s_tu.b1_cb_cbf = cbf;
   7829                         }
   7830                         else
   7831                         {
   7832                             ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf;
   7833                         }
   7834 
   7835                         total_bytes_offset += num_bytes;
   7836 
   7837                         ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] = zero_cols;
   7838                         ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] = zero_rows;
   7839                         ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes;
   7840 
   7841                         /* recon loop is done for non skip cases */
   7842                         if(ps_best_cu_prms->u1_skip_flag == 0)
   7843                         {
   7844                             WORD32 tu_bits;
   7845 
   7846                             cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
   7847                                 ps_ctxt,
   7848                                 pu1_cur_pred,
   7849                                 pred_strd,
   7850                                 pu1_cur_src,
   7851                                 chrm_src_stride,
   7852                                 pi2_cur_deq_data + trans_size,
   7853                                 deq_data_strd,
   7854                                 pu1_cur_recon,
   7855                                 i4_recon_stride,
   7856                                 pu1_ecd_data + total_bytes_offset,
   7857                                 ps_ctxt->au1_cu_csbf,
   7858                                 ps_ctxt->i4_cu_csbf_strd,
   7859                                 trans_size,
   7860                                 scan_idx,
   7861                                 PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag,
   7862                                 &num_bytes,
   7863                                 &tu_bits,
   7864                                 &zero_cols,
   7865                                 &zero_rows,
   7866                                 &u1_is_recon_available,
   7867                                 i4_perform_sbh,
   7868                                 i4_perform_rdoq,
   7869                                 &trans_ssd_v,
   7870 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   7871                                 i4_alpha_stim_multiplier,
   7872                                 u1_is_cu_noisy,
   7873 #endif
   7874                                 ps_best_cu_prms->u1_skip_flag,
   7875                                 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
   7876                                 V_PLANE);
   7877 
   7878                             if(u1_compute_spatial_ssd && u1_is_recon_available)
   7879                             {
   7880                                 ps_recon_datastore
   7881                                     ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
   7882                                                                         [i4_subtu_idx] = 0;
   7883                             }
   7884                             else
   7885                             {
   7886                                 ps_recon_datastore
   7887                                     ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
   7888                                                                         [i4_subtu_idx] = UCHAR_MAX;
   7889                             }
   7890 
   7891 #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   7892                             if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
   7893                             {
   7894 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
   7895                                 trans_ssd_v = ihevce_inject_stim_into_distortion(
   7896                                     pu1_cur_src,
   7897                                     chrm_src_stride,
   7898                                     pu1_cur_pred,
   7899                                     pred_strd,
   7900                                     trans_ssd_v,
   7901                                     i4_alpha_stim_multiplier,
   7902                                     trans_size,
   7903                                     0,
   7904                                     ps_ctxt->u1_enable_psyRDOPT,
   7905                                     V_PLANE);
   7906 #else
   7907                                 if(u1_compute_spatial_ssd && u1_is_recon_available)
   7908                                 {
   7909                                     trans_ssd_v = ihevce_inject_stim_into_distortion(
   7910                                         pu1_cur_src,
   7911                                         chrm_src_stride,
   7912                                         pu1_cur_recon,
   7913                                         i4_recon_stride,
   7914                                         trans_ssd_v,
   7915                                         i4_alpha_stim_multiplier,
   7916                                         trans_size,
   7917                                         0,
   7918                                         ps_ctxt->u1_enable_psyRDOPT,
   7919                                         V_PLANE);
   7920                                 }
   7921                                 else
   7922                                 {
   7923                                     trans_ssd_v = ihevce_inject_stim_into_distortion(
   7924                                         pu1_cur_src,
   7925                                         chrm_src_stride,
   7926                                         pu1_cur_pred,
   7927                                         pred_strd,
   7928                                         trans_ssd_v,
   7929                                         i4_alpha_stim_multiplier,
   7930                                         trans_size,
   7931                                         0,
   7932                                         ps_ctxt->u1_enable_psyRDOPT,
   7933                                         V_PLANE);
   7934                                 }
   7935 #endif
   7936                             }
   7937 #endif
   7938 
   7939                             curr_cr_cod_cost =
   7940                                 trans_ssd_v +
   7941                                 COMPUTE_RATE_COST_CLIP30(
   7942                                     tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
   7943                             chrm_tu_bits += tu_bits;
   7944                             i4_bits_cr += tu_bits;
   7945 
   7946                             /* RDOPT copy States :  New updated after curr TU to TU init */
   7947                             if(0 != cbf)
   7948                             {
   7949                                 COPY_CABAC_STATES(
   7950                                     &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   7951                                     &ps_ctxt->s_rdopt_entropy_ctxt
   7952                                          .as_cu_entropy_ctxt[rd_opt_curr_idx]
   7953                                          .s_cabac_ctxt.au1_ctxt_models[0],
   7954                                     IHEVC_CAB_CTXT_END);
   7955                             }
   7956                             /* RDOPT copy States :  Restoring back the Cb init state to Cr */
   7957                             else
   7958                             {
   7959                                 COPY_CABAC_STATES(
   7960                                     &ps_ctxt->s_rdopt_entropy_ctxt
   7961                                          .as_cu_entropy_ctxt[rd_opt_curr_idx]
   7962                                          .s_cabac_ctxt.au1_ctxt_models[0],
   7963                                     &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   7964                                     IHEVC_CAB_CTXT_END);
   7965                             }
   7966 
   7967                             /* If Intra and TU=CU/2, need recon for next TUs */
   7968                             if(calc_recon)
   7969                             {
   7970                                 ihevce_chroma_it_recon_fxn(
   7971                                     ps_ctxt,
   7972                                     (pi2_cur_deq_data + trans_size),
   7973                                     deq_data_strd,
   7974                                     pu1_cur_pred,
   7975                                     pred_strd,
   7976                                     pu1_cur_recon,
   7977                                     i4_recon_stride,
   7978                                     (pu1_ecd_data + total_bytes_offset),
   7979                                     trans_size,
   7980                                     cbf,
   7981                                     zero_cols,
   7982                                     zero_rows,
   7983                                     V_PLANE);
   7984 
   7985                                 ps_recon_datastore
   7986                                     ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
   7987                                                                         [i4_subtu_idx] = 0;
   7988                             }
   7989                             else
   7990                             {
   7991                                 ps_recon_datastore
   7992                                     ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
   7993                                                                         [i4_subtu_idx] = UCHAR_MAX;
   7994                             }
   7995                         }
   7996                         else
   7997                         {
   7998                             /* num bytes is set to 0 */
   7999                             num_bytes = 0;
   8000 
   8001                             /* cbf is returned as 0 */
   8002                             cbf = 0;
   8003 
   8004                             curr_cr_cod_cost = trans_ssd_v =
   8005 
   8006                                 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
   8007                                     pu1_cur_pred + 1,
   8008                                     pu1_cur_src + 1,
   8009                                     pred_strd,
   8010                                     chrm_src_stride,
   8011                                     trans_size,
   8012                                     trans_size);
   8013 
   8014                             if(u1_compute_spatial_ssd)
   8015                             {
   8016                                 /* buffer copy fromp pred to recon */
   8017                                 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
   8018                                     pu1_cur_pred,
   8019                                     pred_strd,
   8020                                     pu1_cur_recon,
   8021                                     i4_recon_stride,
   8022                                     trans_size,
   8023                                     trans_size,
   8024                                     V_PLANE);
   8025 
   8026                                 ps_recon_datastore
   8027                                     ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
   8028                                                                         [i4_subtu_idx] = 0;
   8029                             }
   8030 
   8031                             if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
   8032                             {
   8033                                 trans_ssd_v = ihevce_inject_stim_into_distortion(
   8034                                     pu1_cur_src,
   8035                                     chrm_src_stride,
   8036                                     pu1_cur_pred,
   8037                                     pred_strd,
   8038                                     trans_ssd_v,
   8039                                     i4_alpha_stim_multiplier,
   8040                                     trans_size,
   8041                                     0,
   8042                                     ps_ctxt->u1_enable_psyRDOPT,
   8043                                     V_PLANE);
   8044                             }
   8045 
   8046 #if ENABLE_INTER_ZCU_COST
   8047 #if !WEIGH_CHROMA_COST
   8048                             /* cbf = 0, accumulate cu not coded cost */
   8049                             ps_ctxt->i8_cu_not_coded_cost += curr_cr_cod_cost;
   8050 #else
   8051                             /* cbf = 0, accumulate cu not coded cost */
   8052 
   8053                             ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
   8054                                 (curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
   8055                                  (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
   8056                                 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
   8057 #endif
   8058 #endif
   8059                         }
   8060 
   8061 #if !WEIGH_CHROMA_COST
   8062                         curr_rdopt_cost += curr_cr_cod_cost;
   8063 #else
   8064                         curr_rdopt_cost +=
   8065                             ((curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
   8066                               (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
   8067                              CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
   8068 #endif
   8069 
   8070                         chrm_cod_cost += curr_cr_cod_cost;
   8071                         i8_ssd_cr += trans_ssd_v;
   8072 
   8073                         if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
   8074                         {
   8075                             /* Early exit : If the current running cost exceeds
   8076                             the prev. best mode cost, break */
   8077                             if(curr_rdopt_cost > prev_best_rdopt_cost)
   8078                             {
   8079                                 u1_is_early_exit_condition_satisfied = 1;
   8080                                 break;
   8081                             }
   8082                         }
   8083 
   8084                         /* inter cu is coded if any of the tu is coded in it */
   8085                         ps_best_cu_prms->u1_is_cu_coded |= cbf;
   8086 
   8087                         /* update CR related params */
   8088                         ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] =
   8089                             total_bytes_offset + init_bytes_offset;
   8090 
   8091                         if(0 == i4_subtu_idx)
   8092                         {
   8093                             ps_tu->s_tu.b1_cr_cbf = cbf;
   8094                         }
   8095                         else
   8096                         {
   8097                             ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf;
   8098                         }
   8099 
   8100                         total_bytes_offset += num_bytes;
   8101 
   8102                         ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] = zero_cols;
   8103                         ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] = zero_rows;
   8104                         ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes;
   8105                     }
   8106                     else
   8107                     {
   8108                         ps_recon_datastore
   8109                             ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx] =
   8110                             UCHAR_MAX;
   8111                         ps_recon_datastore
   8112                             ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx] =
   8113                             UCHAR_MAX;
   8114                     }
   8115                 }
   8116 
   8117                 if(u1_is_early_exit_condition_satisfied)
   8118                 {
   8119                     break;
   8120                 }
   8121 
   8122                 /* loop increments */
   8123                 ps_tu++;
   8124                 ps_tu_temp_prms++;
   8125             }
   8126 
   8127             /* Signal as luma mode. HIGH_QUALITY may update it */
   8128             ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
   8129 
   8130             /* modify the cost chrm_cod_cost */
   8131             if(ps_ctxt->u1_enable_psyRDOPT)
   8132             {
   8133                 UWORD8 *pu1_recon_cu;
   8134                 WORD32 recon_stride;
   8135                 WORD32 curr_pos_x;
   8136                 WORD32 curr_pos_y;
   8137                 WORD32 start_index;
   8138                 WORD32 num_horz_cu_in_ctb;
   8139                 WORD32 had_block_size;
   8140                 /* tODO: sreenivasa ctb size has to be used appropriately */
   8141                 had_block_size = 8;
   8142                 num_horz_cu_in_ctb = 2 * 64 / had_block_size;
   8143 
   8144                 curr_pos_x = cu_pos_x << 3; /* pel units */
   8145                 curr_pos_y = cu_pos_y << 3; /* pel units */
   8146                 recon_stride = i4_recon_stride;
   8147                 pu1_recon_cu = pu1_recon;
   8148 
   8149                 /* start index to index the source satd of curr cu int he current ctb*/
   8150                 start_index = 2 * (curr_pos_x / had_block_size) +
   8151                               (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
   8152 
   8153                 {
   8154                     chrm_cod_cost += ihevce_psy_rd_cost_croma(
   8155                         ps_ctxt->ai4_source_chroma_satd,
   8156                         pu1_recon,
   8157                         recon_stride,
   8158                         1,  //
   8159                         cu_size,
   8160                         0,  // pic type
   8161                         0,  //layer id
   8162                         ps_ctxt->i4_satd_lamda,  // lambda
   8163                         start_index,
   8164                         ps_ctxt->u1_is_input_data_hbd,  // 8 bit
   8165                         ps_ctxt->u1_chroma_array_type,
   8166                         &ps_ctxt->s_cmn_opt_func
   8167 
   8168                     );  // chroma subsampling 420
   8169                 }
   8170             }
   8171         }
   8172         else
   8173         {
   8174             u1_is_mode_eq_chroma_satd_mode = 1;
   8175             chrm_cod_cost = MAX_COST_64;
   8176         }
   8177 
   8178         /* If Intra Block and preset is HIGH QUALITY, then compare with best SATD mode */
   8179         if((PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag) &&
   8180            (1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd))
   8181         {
   8182             if(64 == cu_size)
   8183             {
   8184                 ASSERT(TU_EQ_CU != func_proc_mode);
   8185             }
   8186 
   8187             if(ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode]
   8188                    .i8_chroma_best_rdopt < chrm_cod_cost)
   8189             {
   8190                 UWORD8 *pu1_src;
   8191                 UWORD8 *pu1_ecd_data_src_cb;
   8192                 UWORD8 *pu1_ecd_data_src_cr;
   8193 
   8194                 chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt =
   8195                     &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode];
   8196 
   8197                 UWORD8 *pu1_dst = &ps_ctxt->au1_rdopt_init_ctxt_models[0];
   8198                 WORD32 ai4_ecd_data_cb_offset[2] = { 0, 0 };
   8199                 WORD32 ai4_ecd_data_cr_offset[2] = { 0, 0 };
   8200 
   8201                 pu1_src = &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0];
   8202                 chrm_cod_cost = ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt;
   8203                 chrm_pred_mode = ps_chr_intra_satd_ctxt->u1_best_cr_mode;
   8204                 chrm_tu_bits = ps_chr_intra_satd_ctxt->i4_chrm_tu_bits;
   8205 
   8206                 if(u1_is_mode_eq_chroma_satd_mode)
   8207                 {
   8208                     chrm_cod_cost -= ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode;
   8209                 }
   8210 
   8211                 /*Resetting total_num_bytes_to 0*/
   8212                 total_bytes_offset = 0;
   8213 
   8214                 /* Update the CABAC state corresponding to chroma only */
   8215                 /* Chroma Cbf */
   8216                 memcpy(pu1_dst + IHEVC_CAB_CBCR_IDX, pu1_src + IHEVC_CAB_CBCR_IDX, 2);
   8217                 /* Chroma transform skip */
   8218                 memcpy(pu1_dst + IHEVC_CAB_TFM_SKIP12, pu1_src + IHEVC_CAB_TFM_SKIP12, 1);
   8219                 /* Chroma last coeff x prefix */
   8220                 memcpy(
   8221                     pu1_dst + IHEVC_CAB_COEFFX_PREFIX + 15,
   8222                     pu1_src + IHEVC_CAB_COEFFX_PREFIX + 15,
   8223                     3);
   8224                 /* Chroma last coeff y prefix */
   8225                 memcpy(
   8226                     pu1_dst + IHEVC_CAB_COEFFY_PREFIX + 15,
   8227                     pu1_src + IHEVC_CAB_COEFFY_PREFIX + 15,
   8228                     3);
   8229                 /* Chroma csbf */
   8230                 memcpy(
   8231                     pu1_dst + IHEVC_CAB_CODED_SUBLK_IDX + 2,
   8232                     pu1_src + IHEVC_CAB_CODED_SUBLK_IDX + 2,
   8233                     2);
   8234                 /* Chroma sig coeff flags */
   8235                 memcpy(
   8236                     pu1_dst + IHEVC_CAB_COEFF_FLAG + 27, pu1_src + IHEVC_CAB_COEFF_FLAG + 27, 15);
   8237                 /* Chroma absgt1 flags */
   8238                 memcpy(
   8239                     pu1_dst + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16,
   8240                     pu1_src + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16,
   8241                     8);
   8242                 /* Chroma absgt2 flags */
   8243                 memcpy(
   8244                     pu1_dst + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4,
   8245                     pu1_src + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4,
   8246                     2);
   8247 
   8248                 ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
   8249                 ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
   8250 
   8251                 /* update to luma decision as we update chroma in final mode */
   8252                 ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded_old;
   8253 
   8254                 for(ctr = 0; ctr < u1_num_tus; ctr++)
   8255                 {
   8256                     for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++)
   8257                     {
   8258                         WORD32 cbf;
   8259                         WORD32 num_bytes;
   8260 
   8261                         pu1_ecd_data_src_cb =
   8262                             &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0];
   8263                         pu1_ecd_data_src_cr =
   8264                             &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0];
   8265 
   8266                         /* check if chroma present flag is set */
   8267                         if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
   8268                         {
   8269                             UWORD8 *pu1_cur_pred_dest;
   8270                             UWORD8 *pu1_cur_pred_src;
   8271                             WORD32 pred_src_strd;
   8272                             WORD16 *pi2_cur_deq_data_dest;
   8273                             WORD16 *pi2_cur_deq_data_src_cb;
   8274                             WORD16 *pi2_cur_deq_data_src_cr;
   8275                             WORD32 deq_src_strd;
   8276 
   8277                             WORD32 curr_pos_x, curr_pos_y;
   8278 
   8279                             trans_size = ps_tu->s_tu.b3_size;
   8280                             trans_size = (1 << (trans_size + 1)); /* in chroma units */
   8281 
   8282                             /*Deriving stride values*/
   8283                             pred_src_strd = ps_chr_intra_satd_ctxt->i4_pred_stride;
   8284                             deq_src_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride;
   8285 
   8286                             /* since 2x2 transform is not allowed for chroma*/
   8287                             if(2 == trans_size)
   8288                             {
   8289                                 trans_size = 4;
   8290                             }
   8291 
   8292                             /* get the current tu posx and posy w.r.t to cu */
   8293                             curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3);
   8294                             curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) +
   8295                                          (i4_subtu_idx * trans_size);
   8296 
   8297                             /* 420sp case only vertical height will be half */
   8298                             if(0 == u1_is_422)
   8299                             {
   8300                                 curr_pos_y >>= 1;
   8301                             }
   8302 
   8303                             /* increment the pointers to start of current TU  */
   8304                             pu1_cur_pred_src =
   8305                                 ((UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data + curr_pos_x);
   8306                             pu1_cur_pred_src += (curr_pos_y * pred_src_strd);
   8307                             pu1_cur_pred_dest = (pu1_pred + curr_pos_x);
   8308                             pu1_cur_pred_dest += (curr_pos_y * pred_strd);
   8309 
   8310                             pi2_cur_deq_data_src_cb =
   8311                                 &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] + (curr_pos_x >> 1);
   8312                             pi2_cur_deq_data_src_cr =
   8313                                 &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] + (curr_pos_x >> 1);
   8314                             pi2_cur_deq_data_src_cb += (curr_pos_y * deq_src_strd);
   8315                             pi2_cur_deq_data_src_cr += (curr_pos_y * deq_src_strd);
   8316                             pi2_cur_deq_data_dest = pi2_deq_data + curr_pos_x;
   8317                             pi2_cur_deq_data_dest += (curr_pos_y * deq_data_strd);
   8318 
   8319                             /*Overwriting deq data with that belonging to the winning special mode
   8320                             (luma mode !=  chroma mode)
   8321                             ihevce_copy_2d takes source and dest arguments as UWORD8 *. We have to
   8322                             correspondingly manipulate to copy WORD16 data*/
   8323 
   8324                             ps_ctxt->s_cmn_opt_func.pf_copy_2d(
   8325                                 (UWORD8 *)pi2_cur_deq_data_dest,
   8326                                 (deq_data_strd << 1),
   8327                                 (UWORD8 *)pi2_cur_deq_data_src_cb,
   8328                                 (deq_src_strd << 1),
   8329                                 (trans_size << 1),
   8330                                 trans_size);
   8331 
   8332                             ps_ctxt->s_cmn_opt_func.pf_copy_2d(
   8333                                 (UWORD8 *)(pi2_cur_deq_data_dest + trans_size),
   8334                                 (deq_data_strd << 1),
   8335                                 (UWORD8 *)pi2_cur_deq_data_src_cr,
   8336                                 (deq_src_strd << 1),
   8337                                 (trans_size << 1),
   8338                                 trans_size);
   8339 
   8340                             /*Overwriting pred data with that belonging to the winning special mode
   8341                             (luma mode !=  chroma mode)*/
   8342 
   8343                             ps_ctxt->s_cmn_opt_func.pf_copy_2d(
   8344                                 pu1_cur_pred_dest,
   8345                                 pred_strd,
   8346                                 pu1_cur_pred_src,
   8347                                 pred_src_strd,
   8348                                 (trans_size << 1),
   8349                                 trans_size);
   8350 
   8351                             num_bytes = ps_chr_intra_satd_ctxt
   8352                                             ->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr];
   8353                             cbf = ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr];
   8354                             /* inter cu is coded if any of the tu is coded in it */
   8355                             ps_best_cu_prms->u1_is_cu_coded |= cbf;
   8356 
   8357                             /* update CB related params */
   8358                             ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] =
   8359                                 total_bytes_offset + init_bytes_offset;
   8360 
   8361                             if(0 == i4_subtu_idx)
   8362                             {
   8363                                 ps_tu->s_tu.b1_cb_cbf = cbf;
   8364                             }
   8365                             else
   8366                             {
   8367                                 ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf;
   8368                             }
   8369 
   8370                             /*Overwriting the cb ecd data corresponding to the special mode*/
   8371                             if(0 != num_bytes)
   8372                             {
   8373                                 memcpy(
   8374                                     (pu1_ecd_data + total_bytes_offset),
   8375                                     pu1_ecd_data_src_cb + ai4_ecd_data_cb_offset[i4_subtu_idx],
   8376                                     num_bytes);
   8377                             }
   8378 
   8379                             total_bytes_offset += num_bytes;
   8380                             ai4_ecd_data_cb_offset[i4_subtu_idx] += num_bytes;
   8381                             ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes;
   8382 
   8383                             num_bytes = ps_chr_intra_satd_ctxt
   8384                                             ->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr];
   8385                             cbf = ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr];
   8386                             /* inter cu is coded if any of the tu is coded in it */
   8387                             ps_best_cu_prms->u1_is_cu_coded |= cbf;
   8388 
   8389                             /*Overwriting the cr ecd data corresponding to the special mode*/
   8390                             if(0 != num_bytes)
   8391                             {
   8392                                 memcpy(
   8393                                     (pu1_ecd_data + total_bytes_offset),
   8394                                     pu1_ecd_data_src_cr + ai4_ecd_data_cr_offset[i4_subtu_idx],
   8395                                     num_bytes);
   8396                             }
   8397 
   8398                             /* update CR related params */
   8399                             ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] =
   8400                                 total_bytes_offset + init_bytes_offset;
   8401 
   8402                             if(0 == i4_subtu_idx)
   8403                             {
   8404                                 ps_tu->s_tu.b1_cr_cbf = cbf;
   8405                             }
   8406                             else
   8407                             {
   8408                                 ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf;
   8409                             }
   8410 
   8411                             total_bytes_offset += num_bytes;
   8412                             ai4_ecd_data_cr_offset[i4_subtu_idx] += num_bytes;
   8413 
   8414                             /*Updating zero rows and zero cols*/
   8415                             ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] =
   8416                                 ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr];
   8417                             ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] =
   8418                                 ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr];
   8419                             ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] =
   8420                                 ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr];
   8421                             ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] =
   8422                                 ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr];
   8423 
   8424                             ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes;
   8425 
   8426                             if((u1_num_tus > 1) &&
   8427                                ps_recon_datastore->au1_is_chromaRecon_available[2])
   8428                             {
   8429                                 ps_recon_datastore
   8430                                     ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
   8431                                                                         [i4_subtu_idx] = 2;
   8432                                 ps_recon_datastore
   8433                                     ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
   8434                                                                         [i4_subtu_idx] = 2;
   8435                             }
   8436                             else if(
   8437                                 (1 == u1_num_tus) &&
   8438                                 ps_recon_datastore->au1_is_chromaRecon_available[1])
   8439                             {
   8440                                 ps_recon_datastore
   8441                                     ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
   8442                                                                         [i4_subtu_idx] = 1;
   8443                                 ps_recon_datastore
   8444                                     ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
   8445                                                                         [i4_subtu_idx] = 1;
   8446                             }
   8447                             else
   8448                             {
   8449                                 ps_recon_datastore
   8450                                     ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
   8451                                                                         [i4_subtu_idx] = UCHAR_MAX;
   8452                                 ps_recon_datastore
   8453                                     ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
   8454                                                                         [i4_subtu_idx] = UCHAR_MAX;
   8455                             }
   8456                         }
   8457                     }
   8458 
   8459                     /* loop increments */
   8460                     ps_tu++;
   8461                     ps_tu_temp_prms++;
   8462                 }
   8463             }
   8464 
   8465             if(!u1_is_422)
   8466             {
   8467                 if(chrm_pred_mode == luma_pred_mode)
   8468                 {
   8469                     ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
   8470                 }
   8471                 else if(chrm_pred_mode == 0)
   8472                 {
   8473                     ps_best_cu_prms->u1_chroma_intra_pred_mode = 0;
   8474                 }
   8475                 else if(chrm_pred_mode == 1)
   8476                 {
   8477                     ps_best_cu_prms->u1_chroma_intra_pred_mode = 3;
   8478                 }
   8479                 else if(chrm_pred_mode == 10)
   8480                 {
   8481                     ps_best_cu_prms->u1_chroma_intra_pred_mode = 2;
   8482                 }
   8483                 else if(chrm_pred_mode == 26)
   8484                 {
   8485                     ps_best_cu_prms->u1_chroma_intra_pred_mode = 1;
   8486                 }
   8487                 else
   8488                 {
   8489                     ASSERT(0); /*Should not come here*/
   8490                 }
   8491             }
   8492             else
   8493             {
   8494                 if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[luma_pred_mode])
   8495                 {
   8496                     ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
   8497                 }
   8498                 else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[0])
   8499                 {
   8500                     ps_best_cu_prms->u1_chroma_intra_pred_mode = 0;
   8501                 }
   8502                 else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[1])
   8503                 {
   8504                     ps_best_cu_prms->u1_chroma_intra_pred_mode = 3;
   8505                 }
   8506                 else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[10])
   8507                 {
   8508                     ps_best_cu_prms->u1_chroma_intra_pred_mode = 2;
   8509                 }
   8510                 else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[26])
   8511                 {
   8512                     ps_best_cu_prms->u1_chroma_intra_pred_mode = 1;
   8513                 }
   8514                 else
   8515                 {
   8516                     ASSERT(0); /*Should not come here*/
   8517                 }
   8518             }
   8519         }
   8520 
   8521         /* Store the actual chroma mode */
   8522         ps_best_cu_prms->u1_chroma_intra_pred_actual_mode = chrm_pred_mode;
   8523     }
   8524 
   8525     /* update the total bytes produced */
   8526     ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes_offset + init_bytes_offset;
   8527 
   8528     /* store the final chrm bits accumulated */
   8529     *pi4_chrm_tu_bits = chrm_tu_bits;
   8530 
   8531     return (chrm_cod_cost);
   8532 }
   8533 
   8534 /*!
   8535 ******************************************************************************
   8536 * \if Function name : ihevce_final_rdopt_mode_prcs \endif
   8537 *
   8538 * \brief
   8539 *    Final RDOPT mode process function. Performs Recon computation for the
   8540 *    final mode. Re-use or Compute pred, iq-data, coeff based on the flags.
   8541 *
   8542 * \param[in] pv_ctxt : pointer to enc_loop module
   8543 * \param[in] ps_prms : pointer to struct containing requisite parameters
   8544 *
   8545 * \return
   8546 *    None
   8547 *
   8548 * \author
   8549 *  Ittiam
   8550 *
   8551 *****************************************************************************
   8552 */
   8553 void ihevce_final_rdopt_mode_prcs(
   8554     ihevce_enc_loop_ctxt_t *ps_ctxt, final_mode_process_prms_t *ps_prms)
   8555 {
   8556     enc_loop_cu_final_prms_t *ps_best_cu_prms;
   8557     tu_enc_loop_out_t *ps_tu_enc_loop;
   8558     tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms;
   8559     nbr_avail_flags_t s_nbr;
   8560     recon_datastore_t *ps_recon_datastore;
   8561 
   8562     ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
   8563     ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
   8564     ihevc_intra_pred_ref_filtering_ft *ihevc_intra_pred_ref_filtering_fptr;
   8565 
   8566     WORD32 num_tu_in_cu;
   8567     LWORD64 rd_opt_cost;
   8568     WORD32 ctr;
   8569     WORD32 i4_subtu_idx;
   8570     WORD32 cu_size;
   8571     WORD32 cu_pos_x, cu_pos_y;
   8572     WORD32 chrm_present_flag = 1;
   8573     WORD32 num_bytes, total_bytes = 0;
   8574     WORD32 chrm_ctr = 0;
   8575     WORD32 u1_is_cu_coded;
   8576     UWORD8 *pu1_old_ecd_data;
   8577     UWORD8 *pu1_chrm_old_ecd_data;
   8578     UWORD8 *pu1_cur_pred;
   8579     WORD16 *pi2_deq_data;
   8580     WORD16 *pi2_chrm_deq_data;
   8581     WORD16 *pi2_cur_deq_data;
   8582     WORD16 *pi2_cur_deq_data_chrm;
   8583     UWORD8 *pu1_cur_luma_recon;
   8584     UWORD8 *pu1_cur_chroma_recon;
   8585     UWORD8 *pu1_cur_src;
   8586     UWORD8 *pu1_cur_src_chrm;
   8587     UWORD8 *pu1_cur_pred_chrm;
   8588     UWORD8 *pu1_intra_pred_mode;
   8589     UWORD32 *pu4_nbr_flags;
   8590     LWORD64 i8_ssd;
   8591 
   8592     cu_nbr_prms_t *ps_cu_nbr_prms = ps_prms->ps_cu_nbr_prms;
   8593     cu_inter_cand_t *ps_best_inter_cand = ps_prms->ps_best_inter_cand;
   8594     enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms = ps_prms->ps_chrm_cu_buf_prms;
   8595 
   8596     WORD32 packed_pred_mode = ps_prms->packed_pred_mode;
   8597     WORD32 rd_opt_best_idx = ps_prms->rd_opt_best_idx;
   8598     UWORD8 *pu1_src = (UWORD8 *)ps_prms->pv_src;
   8599     WORD32 src_strd = ps_prms->src_strd;
   8600     UWORD8 *pu1_pred = (UWORD8 *)ps_prms->pv_pred;
   8601     WORD32 pred_strd = ps_prms->pred_strd;
   8602     UWORD8 *pu1_pred_chrm = (UWORD8 *)ps_prms->pv_pred_chrm;
   8603     WORD32 pred_chrm_strd = ps_prms->pred_chrm_strd;
   8604     UWORD8 *pu1_final_ecd_data = ps_prms->pu1_final_ecd_data;
   8605     UWORD8 *pu1_csbf_buf = ps_prms->pu1_csbf_buf;
   8606     WORD32 csbf_strd = ps_prms->csbf_strd;
   8607     UWORD8 *pu1_luma_recon = (UWORD8 *)ps_prms->pv_luma_recon;
   8608     WORD32 recon_luma_strd = ps_prms->recon_luma_strd;
   8609     UWORD8 *pu1_chrm_recon = (UWORD8 *)ps_prms->pv_chrm_recon;
   8610     WORD32 recon_chrma_strd = ps_prms->recon_chrma_strd;
   8611     UWORD8 u1_cu_pos_x = ps_prms->u1_cu_pos_x;
   8612     UWORD8 u1_cu_pos_y = ps_prms->u1_cu_pos_y;
   8613     UWORD8 u1_cu_size = ps_prms->u1_cu_size;
   8614     WORD8 i1_cu_qp = ps_prms->i1_cu_qp;
   8615     UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
   8616     UWORD8 u1_num_subtus = (u1_is_422 == 1) + 1;
   8617     /* Get the Chroma pointer and parameters */
   8618     UWORD8 *pu1_src_chrm = ps_chrm_cu_buf_prms->pu1_curr_src;
   8619     WORD32 src_chrm_strd = ps_chrm_cu_buf_prms->i4_chrm_src_stride;
   8620     UWORD8 u1_compute_spatial_ssd_luma = 0;
   8621     UWORD8 u1_compute_spatial_ssd_chroma = 0;
   8622     /* Get the pointer for function selector */
   8623     ihevc_intra_pred_luma_ref_substitution_fptr =
   8624         ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
   8625 
   8626     ihevc_intra_pred_ref_filtering_fptr =
   8627         ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr;
   8628 
   8629     ihevc_intra_pred_chroma_ref_substitution_fptr =
   8630         ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
   8631 
   8632     /* Get the best CU parameters */
   8633     ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
   8634     num_tu_in_cu = ps_best_cu_prms->u2_num_tus_in_cu;
   8635     cu_size = ps_best_cu_prms->u1_cu_size;
   8636     cu_pos_x = u1_cu_pos_x;
   8637     cu_pos_y = u1_cu_pos_y;
   8638     pu1_intra_pred_mode = &ps_best_cu_prms->au1_intra_pred_mode[0];
   8639     pu4_nbr_flags = &ps_best_cu_prms->au4_nbr_flags[0];
   8640     ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore;
   8641 
   8642     /* get the first TU pointer */
   8643     ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0];
   8644     /* get the first TU only enc_loop prms pointer */
   8645     ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
   8646     /*modify quant related param in ctxt based on current cu qp*/
   8647     if((ps_ctxt->i1_cu_qp_delta_enable))
   8648     {
   8649         /*recompute quant related param at every cu level*/
   8650         ihevce_compute_quant_rel_param(ps_ctxt, i1_cu_qp);
   8651 
   8652         /* get frame level lambda params */
   8653         ihevce_get_cl_cu_lambda_prms(
   8654             ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? i1_cu_qp : ps_ctxt->i4_frame_qp);
   8655     }
   8656 
   8657     ps_best_cu_prms->i8_cu_ssd = 0;
   8658     ps_best_cu_prms->u4_cu_open_intra_sad = 0;
   8659 
   8660     /* For skip case : Set TU_size = CU_size and make cbf = 0
   8661     so that same TU loop can be used for all modes */
   8662     if(PRED_MODE_SKIP == packed_pred_mode)
   8663     {
   8664         for(ctr = 0; ctr < num_tu_in_cu; ctr++)
   8665         {
   8666             ps_tu_enc_loop->s_tu.b1_y_cbf = 0;
   8667 
   8668             ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = 0;
   8669 
   8670             ps_tu_enc_loop++;
   8671             ps_tu_enc_loop_temp_prms++;
   8672         }
   8673 
   8674         /* go back to the first TU pointer */
   8675         ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0];
   8676         ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
   8677     }
   8678     /**   For inter case, pred calculation is outside the loop     **/
   8679     if(PRED_MODE_INTRA != packed_pred_mode)
   8680     {
   8681         /**------------- Compute pred data if required --------------**/
   8682         if((1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data))
   8683         {
   8684             nbr_4x4_t *ps_topleft_nbr_4x4;
   8685             nbr_4x4_t *ps_left_nbr_4x4;
   8686             nbr_4x4_t *ps_top_nbr_4x4;
   8687             WORD32 nbr_4x4_left_strd;
   8688 
   8689             ps_best_inter_cand->pu1_pred_data = pu1_pred;
   8690             ps_best_inter_cand->i4_pred_data_stride = pred_strd;
   8691 
   8692             /* Get the CU nbr information */
   8693             ps_topleft_nbr_4x4 = ps_cu_nbr_prms->ps_topleft_nbr_4x4;
   8694             ps_left_nbr_4x4 = ps_cu_nbr_prms->ps_left_nbr_4x4;
   8695             ps_top_nbr_4x4 = ps_cu_nbr_prms->ps_top_nbr_4x4;
   8696             nbr_4x4_left_strd = ps_cu_nbr_prms->nbr_4x4_left_strd;
   8697 
   8698             /* MVP ,MVD calc and Motion compensation */
   8699             rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
   8700                 ps_ctxt,
   8701                 ps_best_inter_cand,
   8702                 u1_cu_size,
   8703                 cu_pos_x,
   8704                 cu_pos_y,
   8705                 ps_left_nbr_4x4,
   8706                 ps_top_nbr_4x4,
   8707                 ps_topleft_nbr_4x4,
   8708                 nbr_4x4_left_strd,
   8709                 rd_opt_best_idx);
   8710         }
   8711 
   8712         /** ------ Motion Compensation for Chroma -------- **/
   8713         if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data)
   8714         {
   8715             UWORD8 *pu1_cur_pred;
   8716             pu1_cur_pred = pu1_pred_chrm;
   8717 
   8718             /* run a loop over all the partitons in cu */
   8719             for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++)
   8720             {
   8721                 pu_t *ps_pu;
   8722                 WORD32 inter_pu_wd, inter_pu_ht;
   8723 
   8724                 ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr];
   8725 
   8726                 /* IF AMP then each partitions can have diff wd ht */
   8727                 inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
   8728                 inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
   8729                 inter_pu_ht <<= u1_is_422;
   8730                 /* chroma mc func */
   8731                 ihevce_chroma_inter_pred_pu(
   8732                     &ps_ctxt->s_mc_ctxt, ps_pu, pu1_cur_pred, pred_chrm_strd);
   8733                 if(2 == ps_best_cu_prms->u2_num_pus_in_cu)
   8734                 {
   8735                     /* 2Nx__ partion case */
   8736                     if(inter_pu_wd == ps_best_cu_prms->u1_cu_size)
   8737                     {
   8738                         pu1_cur_pred += (inter_pu_ht * pred_chrm_strd);
   8739                     }
   8740                     /* __x2N partion case */
   8741                     if(inter_pu_ht == (ps_best_cu_prms->u1_cu_size >> (u1_is_422 == 0)))
   8742                     {
   8743                         pu1_cur_pred += inter_pu_wd;
   8744                     }
   8745                 }
   8746             }
   8747         }
   8748     }
   8749     pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0];
   8750     pi2_chrm_deq_data =
   8751         &ps_best_cu_prms->pi2_cu_deq_coeffs[0] + ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx;
   8752     pu1_old_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0];
   8753     pu1_chrm_old_ecd_data =
   8754         &ps_best_cu_prms->pu1_cu_coeffs[0] + ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx;
   8755 
   8756     /* default value for cu coded flag */
   8757     u1_is_cu_coded = 0;
   8758 
   8759     /* If we are re-computing coeff, set sad to 0 and start accumulating */
   8760     /* else use the best cand. sad from RDOPT stage                    */
   8761     if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)
   8762     {
   8763         /*init of ssd of CU accuumulated over all TU*/
   8764         ps_best_cu_prms->u4_cu_sad = 0;
   8765 
   8766         /* reset the luma residual bits */
   8767         ps_best_cu_prms->u4_cu_luma_res_bits = 0;
   8768     }
   8769 
   8770     if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)
   8771     {
   8772         /* reset the chroma residual bits */
   8773         ps_best_cu_prms->u4_cu_chroma_res_bits = 0;
   8774     }
   8775 
   8776     if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data) ||
   8777        (1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data))
   8778     {
   8779         /*Header bits have to be reevaluated if luma and chroma reevaluation is done, as
   8780         the quantized coefficients might be changed.
   8781         We are copying only those states which correspond to the header from the cabac state
   8782         of the previous CU, because the header is going to be recomputed for this condition*/
   8783         ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1;
   8784         memcpy(
   8785             &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   8786             &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
   8787             IHEVC_CAB_COEFFX_PREFIX);
   8788 
   8789         if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data))
   8790         {
   8791             COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
   8792                 (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX),
   8793                 (&ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0] +
   8794                  IHEVC_CAB_COEFFX_PREFIX),
   8795                 (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX));
   8796         }
   8797         else
   8798         {
   8799             COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
   8800                 (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX),
   8801                 (&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
   8802                       .s_cabac_ctxt.au1_ctxt_models[0] +
   8803                  IHEVC_CAB_COEFFX_PREFIX),
   8804                 (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX));
   8805         }
   8806         ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_best_idx;
   8807     }
   8808     else
   8809     {
   8810         ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0;
   8811     }
   8812 
   8813     /* Zero cbf tool is disabled for intra CUs */
   8814     if(PRED_MODE_INTRA == packed_pred_mode)
   8815     {
   8816 #if ENABLE_ZERO_CBF_IN_INTRA
   8817         ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
   8818 #else
   8819         ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
   8820 #endif
   8821     }
   8822     else
   8823     {
   8824 #if DISABLE_ZERO_ZBF_IN_INTER
   8825         ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
   8826 #else
   8827         ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
   8828 #endif
   8829     }
   8830 
   8831     /** Loop for all tu blocks in current cu and do reconstruction **/
   8832     for(ctr = 0; ctr < num_tu_in_cu; ctr++)
   8833     {
   8834         tu_t *ps_tu;
   8835         WORD32 trans_size, num_4x4_in_tu;
   8836         WORD32 cbf, zero_rows, zero_cols;
   8837         WORD32 cu_pos_x_in_4x4, cu_pos_y_in_4x4;
   8838         WORD32 cu_pos_x_in_pix, cu_pos_y_in_pix;
   8839         WORD32 luma_pred_mode, chroma_pred_mode = 0;
   8840         UWORD8 au1_is_recon_available[2];
   8841 
   8842         ps_tu = &(ps_tu_enc_loop->s_tu); /* Points to the TU property ctxt */
   8843 
   8844         u1_compute_spatial_ssd_luma = 0;
   8845         u1_compute_spatial_ssd_chroma = 0;
   8846 
   8847         trans_size = 1 << (ps_tu->b3_size + 2);
   8848         num_4x4_in_tu = (trans_size >> 2);
   8849         cu_pos_x_in_4x4 = ps_tu->b4_pos_x;
   8850         cu_pos_y_in_4x4 = ps_tu->b4_pos_y;
   8851 
   8852         /* populate the coeffs scan idx */
   8853         ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
   8854 
   8855         /* get the current pos x and pos y in pixels */
   8856         cu_pos_x_in_pix = (cu_pos_x_in_4x4 << 2) - (cu_pos_x << 3);
   8857         cu_pos_y_in_pix = (cu_pos_y_in_4x4 << 2) - (cu_pos_y << 3);
   8858 
   8859         /* Update pointers based on the location */
   8860         pu1_cur_src = pu1_src + cu_pos_x_in_pix;
   8861         pu1_cur_src += (cu_pos_y_in_pix * src_strd);
   8862         pu1_cur_pred = pu1_pred + cu_pos_x_in_pix;
   8863         pu1_cur_pred += (cu_pos_y_in_pix * pred_strd);
   8864 
   8865         pu1_cur_luma_recon = pu1_luma_recon + cu_pos_x_in_pix;
   8866         pu1_cur_luma_recon += (cu_pos_y_in_pix * recon_luma_strd);
   8867 
   8868         pi2_cur_deq_data = pi2_deq_data + cu_pos_x_in_pix;
   8869         pi2_cur_deq_data += cu_pos_y_in_pix * cu_size;
   8870 
   8871         pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix;
   8872         pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) +
   8873                             (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd));
   8874 
   8875         pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix;
   8876         pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) +
   8877                              (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd));
   8878 
   8879         pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix;
   8880         pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) +
   8881                                 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd));
   8882 
   8883         pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix;
   8884         pi2_cur_deq_data_chrm +=
   8885             ((cu_pos_y_in_pix >> 1) * cu_size) + (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size));
   8886 
   8887         /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/
   8888         chrm_present_flag = 1; /* by default chroma present is set to 1*/
   8889 
   8890         if(4 == trans_size)
   8891         {
   8892             /* if tusize is 4x4 then only first luma 4x4 will have chroma*/
   8893             if(0 != chrm_ctr)
   8894             {
   8895                 chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE;
   8896             }
   8897 
   8898             /* increment the chrm ctr unconditionally */
   8899             chrm_ctr++;
   8900             /* after ctr reached 4 reset it */
   8901             if(4 == chrm_ctr)
   8902             {
   8903                 chrm_ctr = 0;
   8904             }
   8905         }
   8906 
   8907         /**------------- Compute pred data if required --------------**/
   8908         if(PRED_MODE_INTRA == packed_pred_mode) /* Inter pred calc. is done outside loop */
   8909         {
   8910             /* Get the pred mode for scan idx calculation, even if pred is not required */
   8911             luma_pred_mode = *pu1_intra_pred_mode;
   8912 
   8913             if((ps_ctxt->i4_rc_pass == 1) ||
   8914                (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data))
   8915             {
   8916                 WORD32 nbr_flags;
   8917                 WORD32 luma_pred_func_idx;
   8918                 UWORD8 *pu1_left;
   8919                 UWORD8 *pu1_top;
   8920                 UWORD8 *pu1_top_left;
   8921                 WORD32 left_strd;
   8922 
   8923                 /* left cu boundary */
   8924                 if(0 == cu_pos_x_in_pix)
   8925                 {
   8926                     left_strd = ps_cu_nbr_prms->cu_left_stride;
   8927                     pu1_left = ps_cu_nbr_prms->pu1_cu_left + cu_pos_y_in_pix * left_strd;
   8928                 }
   8929                 else
   8930                 {
   8931                     pu1_left = pu1_cur_luma_recon - 1;
   8932                     left_strd = recon_luma_strd;
   8933                 }
   8934 
   8935                 /* top cu boundary */
   8936                 if(0 == cu_pos_y_in_pix)
   8937                 {
   8938                     pu1_top = ps_cu_nbr_prms->pu1_cu_top + cu_pos_x_in_pix;
   8939                 }
   8940                 else
   8941                 {
   8942                     pu1_top = pu1_cur_luma_recon - recon_luma_strd;
   8943                 }
   8944 
   8945                 /* by default top left is set to cu top left */
   8946                 pu1_top_left = ps_cu_nbr_prms->pu1_cu_top_left;
   8947 
   8948                 /* top left based on position */
   8949                 if((0 != cu_pos_y_in_pix) && (0 == cu_pos_x_in_pix))
   8950                 {
   8951                     pu1_top_left = pu1_left - left_strd;
   8952                 }
   8953                 else if(0 != cu_pos_x_in_pix)
   8954                 {
   8955                     pu1_top_left = pu1_top - 1;
   8956                 }
   8957 
   8958                 /* get the neighbour availability flags */
   8959                 nbr_flags = ihevce_get_nbr_intra(
   8960                     &s_nbr,
   8961                     ps_ctxt->pu1_ctb_nbr_map,
   8962                     ps_ctxt->i4_nbr_map_strd,
   8963                     cu_pos_x_in_4x4,
   8964                     cu_pos_y_in_4x4,
   8965                     num_4x4_in_tu);
   8966 
   8967                 if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data)
   8968                 {
   8969                     /* copy the nbr flags for chroma reuse */
   8970                     if(4 != trans_size)
   8971                     {
   8972                         *pu4_nbr_flags = nbr_flags;
   8973                     }
   8974                     else if(1 == chrm_present_flag)
   8975                     {
   8976                         /* compute the avail flags assuming luma trans is 8x8 */
   8977                         /* get the neighbour availability flags */
   8978                         *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
   8979                             ps_ctxt->pu1_ctb_nbr_map,
   8980                             ps_ctxt->i4_nbr_map_strd,
   8981                             cu_pos_x_in_4x4,
   8982                             cu_pos_y_in_4x4,
   8983                             (num_4x4_in_tu << 1),
   8984                             (num_4x4_in_tu << 1));
   8985                     }
   8986 
   8987                     /* call reference array substitution */
   8988                     ihevc_intra_pred_luma_ref_substitution_fptr(
   8989                         pu1_top_left,
   8990                         pu1_top,
   8991                         pu1_left,
   8992                         left_strd,
   8993                         trans_size,
   8994                         nbr_flags,
   8995                         (UWORD8 *)ps_ctxt->pv_ref_sub_out,
   8996                         1);
   8997 
   8998                     /* call reference filtering */
   8999                     ihevc_intra_pred_ref_filtering_fptr(
   9000                         (UWORD8 *)ps_ctxt->pv_ref_sub_out,
   9001                         trans_size,
   9002                         (UWORD8 *)ps_ctxt->pv_ref_filt_out,
   9003                         luma_pred_mode,
   9004                         ps_ctxt->i1_strong_intra_smoothing_enable_flag);
   9005 
   9006                     /* use the look up to get the function idx */
   9007                     luma_pred_func_idx = g_i4_ip_funcs[luma_pred_mode];
   9008 
   9009                     /* call the intra prediction function */
   9010                     ps_ctxt->apf_lum_ip[luma_pred_func_idx](
   9011                         (UWORD8 *)ps_ctxt->pv_ref_filt_out,
   9012                         1,
   9013                         pu1_cur_pred,
   9014                         pred_strd,
   9015                         trans_size,
   9016                         luma_pred_mode);
   9017                 }
   9018             }
   9019             else if(
   9020                 (1 == chrm_present_flag) &&
   9021                 (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data))
   9022             {
   9023                 WORD32 temp_num_4x4_in_tu = num_4x4_in_tu;
   9024 
   9025                 if(4 == trans_size) /* compute the avail flags assuming luma trans is 8x8 */
   9026                 {
   9027                     temp_num_4x4_in_tu = num_4x4_in_tu << 1;
   9028                 }
   9029 
   9030                 *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
   9031                     ps_ctxt->pu1_ctb_nbr_map,
   9032                     ps_ctxt->i4_nbr_map_strd,
   9033                     cu_pos_x_in_4x4,
   9034                     cu_pos_y_in_4x4,
   9035                     temp_num_4x4_in_tu,
   9036                     temp_num_4x4_in_tu);
   9037             }
   9038 
   9039             /* Get the pred mode for scan idx calculation, even if pred is not required */
   9040             chroma_pred_mode = ps_best_cu_prms->u1_chroma_intra_pred_actual_mode;
   9041         }
   9042 
   9043         if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)
   9044         {
   9045             WORD32 temp_bits;
   9046             LWORD64 temp_cost;
   9047             UWORD32 u4_tu_sad;
   9048             WORD32 perform_sbh, perform_rdoq;
   9049 
   9050             if(PRED_MODE_INTRA == packed_pred_mode)
   9051             {
   9052                 /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/
   9053                 if(trans_size < 16)
   9054                 {
   9055                     /* for modes from 22 upto 30 horizontal scan is used */
   9056                     if((luma_pred_mode > 21) && (luma_pred_mode < 31))
   9057                     {
   9058                         ps_ctxt->i4_scan_idx = SCAN_HORZ;
   9059                     }
   9060                     /* for modes from 6 upto 14 horizontal scan is used */
   9061                     else if((luma_pred_mode > 5) && (luma_pred_mode < 15))
   9062                     {
   9063                         ps_ctxt->i4_scan_idx = SCAN_VERT;
   9064                     }
   9065                 }
   9066             }
   9067 
   9068             /* RDOPT copy States :  TU init (best until prev TU) to current */
   9069             COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
   9070                 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
   9071                         .s_cabac_ctxt.au1_ctxt_models[0] +
   9072                     IHEVC_CAB_COEFFX_PREFIX,
   9073                 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
   9074                 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
   9075 
   9076             if(ps_prms->u1_recompute_sbh_and_rdoq)
   9077             {
   9078                 perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH);
   9079                 perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ);
   9080             }
   9081             else
   9082             {
   9083                 /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
   9084                 perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh;
   9085                 /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
   9086                 we would have to do RDOQ again.*/
   9087                 perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq;
   9088             }
   9089 
   9090 #if DISABLE_RDOQ_INTRA
   9091             if(PRED_MODE_INTRA == packed_pred_mode)
   9092             {
   9093                 perform_rdoq = 0;
   9094             }
   9095 #endif
   9096             /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled
   9097             so that all candidates and best candidate are quantized with same rounding factor  */
   9098             if(1 == perform_rdoq)
   9099             {
   9100                 ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING);
   9101             }
   9102 
   9103             cbf = ihevce_t_q_iq_ssd_scan_fxn(
   9104                 ps_ctxt,
   9105                 pu1_cur_pred,
   9106                 pred_strd,
   9107                 pu1_cur_src,
   9108                 src_strd,
   9109                 pi2_cur_deq_data,
   9110                 cu_size, /*deq_data stride is cu_size*/
   9111                 pu1_cur_luma_recon,
   9112                 recon_luma_strd,
   9113                 pu1_final_ecd_data,
   9114                 pu1_csbf_buf,
   9115                 csbf_strd,
   9116                 trans_size,
   9117                 packed_pred_mode,
   9118                 &temp_cost,
   9119                 &num_bytes,
   9120                 &temp_bits,
   9121                 &u4_tu_sad,
   9122                 &zero_cols,
   9123                 &zero_rows,
   9124                 &au1_is_recon_available[0],
   9125                 perform_rdoq,  //(BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level),
   9126                 perform_sbh,
   9127 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   9128                 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
   9129                                        : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
   9130                                           (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
   9131                                              100.0,
   9132                 ps_prms->u1_is_cu_noisy,
   9133 #endif
   9134                 u1_compute_spatial_ssd_luma ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
   9135                 1 /*early cbf*/
   9136             );  //(BEST_CAND_SBH == ps_ctxt->i4_sbh_level));
   9137 
   9138             /* Accumulate luma residual bits */
   9139             ps_best_cu_prms->u4_cu_luma_res_bits += temp_bits;
   9140 
   9141             /* RDOPT copy States :  New updated after curr TU to TU init */
   9142             if(0 != cbf)
   9143             {
   9144                 /* update to new state only if CBF is non zero */
   9145                 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
   9146                     &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
   9147                     &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
   9148                             .s_cabac_ctxt.au1_ctxt_models[0] +
   9149                         IHEVC_CAB_COEFFX_PREFIX,
   9150                     IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
   9151             }
   9152 
   9153             /* accumulate the TU sad into cu sad */
   9154             ps_best_cu_prms->u4_cu_sad += u4_tu_sad;
   9155             ps_tu->b1_y_cbf = cbf;
   9156             ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = num_bytes;
   9157 
   9158             /* If somebody updates cbf (RDOQ or SBH), update in nbr str. for BS */
   9159             if((ps_prms->u1_will_cabac_state_change) && (!ps_prms->u1_is_first_pass))
   9160             {
   9161                 WORD32 num_4x4_in_cu = u1_cu_size >> 2;
   9162                 nbr_4x4_t *ps_cur_nbr_4x4 = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
   9163                 ps_cur_nbr_4x4 = (ps_cur_nbr_4x4 + (cu_pos_x_in_pix >> 2));
   9164                 ps_cur_nbr_4x4 += ((cu_pos_y_in_pix >> 2) * num_4x4_in_cu);
   9165                 /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
   9166                 ps_cur_nbr_4x4->b1_y_cbf = cbf;
   9167                 /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/
   9168                 ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
   9169                 /* Qp and cbf are stored for the all 4x4 in TU */
   9170                 {
   9171                     WORD32 i, j;
   9172                     nbr_4x4_t *ps_tmp_4x4;
   9173                     ps_tmp_4x4 = ps_cur_nbr_4x4;
   9174 
   9175                     for(i = 0; i < num_4x4_in_tu; i++)
   9176                     {
   9177                         for(j = 0; j < num_4x4_in_tu; j++)
   9178                         {
   9179                             ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp;
   9180                             ps_tmp_4x4[j].b1_y_cbf = cbf;
   9181                         }
   9182                         /* row level update*/
   9183                         ps_tmp_4x4 += num_4x4_in_cu;
   9184                     }
   9185                 }
   9186             }
   9187         }
   9188         else
   9189         {
   9190             zero_cols = ps_tu_enc_loop_temp_prms->u4_luma_zero_col;
   9191             zero_rows = ps_tu_enc_loop_temp_prms->u4_luma_zero_row;
   9192 
   9193             if(ps_prms->u1_will_cabac_state_change)
   9194             {
   9195                 num_bytes = ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed;
   9196             }
   9197             else
   9198             {
   9199                 num_bytes = 0;
   9200             }
   9201 
   9202             /* copy luma ecd data to final buffer */
   9203             memcpy(pu1_final_ecd_data, pu1_old_ecd_data, num_bytes);
   9204 
   9205             pu1_old_ecd_data += num_bytes;
   9206 
   9207             au1_is_recon_available[0] = 0;
   9208         }
   9209 
   9210         /**-------- Compute Recon data (Do IT & Recon) : Luma  -----------**/
   9211         if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
   9212            (!u1_compute_spatial_ssd_luma ||
   9213             (!au1_is_recon_available[0] && u1_compute_spatial_ssd_luma)))
   9214         {
   9215             if(!ps_recon_datastore->u1_is_lumaRecon_available ||
   9216                (ps_recon_datastore->u1_is_lumaRecon_available &&
   9217                 (UCHAR_MAX == ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr])))
   9218             {
   9219                 ihevce_it_recon_fxn(
   9220                     ps_ctxt,
   9221                     pi2_cur_deq_data,
   9222                     cu_size,
   9223                     pu1_cur_pred,
   9224                     pred_strd,
   9225                     pu1_cur_luma_recon,
   9226                     recon_luma_strd,
   9227                     pu1_final_ecd_data,
   9228                     trans_size,
   9229                     packed_pred_mode,
   9230                     ps_tu->b1_y_cbf,
   9231                     zero_cols,
   9232                     zero_rows);
   9233             }
   9234             else if(
   9235                 ps_recon_datastore->u1_is_lumaRecon_available &&
   9236                 (UCHAR_MAX != ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]))
   9237             {
   9238                 UWORD8 *pu1_recon_src =
   9239                     ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
   9240                          [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]]) +
   9241                     cu_pos_x_in_pix + cu_pos_y_in_pix * ps_recon_datastore->i4_lumaRecon_stride;
   9242 
   9243                 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
   9244                     pu1_cur_luma_recon,
   9245                     recon_luma_strd,
   9246                     pu1_recon_src,
   9247                     ps_recon_datastore->i4_lumaRecon_stride,
   9248                     trans_size,
   9249                     trans_size);
   9250             }
   9251         }
   9252 
   9253         if(ps_prms->u1_will_cabac_state_change)
   9254         {
   9255             ps_tu_enc_loop->i4_luma_coeff_offset = total_bytes;
   9256         }
   9257 
   9258         pu1_final_ecd_data += num_bytes;
   9259         /* update total bytes consumed */
   9260         total_bytes += num_bytes;
   9261 
   9262         u1_is_cu_coded |= ps_tu->b1_y_cbf;
   9263 
   9264         /***************** Compute T,Q,IQ,IT & Recon for Chroma ********************/
   9265         if(1 == chrm_present_flag)
   9266         {
   9267             pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix;
   9268             pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) +
   9269                                 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd));
   9270 
   9271             pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix;
   9272             pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) +
   9273                                  (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd));
   9274 
   9275             pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix;
   9276             pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) +
   9277                                     (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd));
   9278 
   9279             pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix;
   9280             pi2_cur_deq_data_chrm += ((cu_pos_y_in_pix >> 1) * cu_size) +
   9281                                      (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size));
   9282 
   9283             if(INCLUDE_CHROMA_DURING_TU_RECURSION &&
   9284                (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0) &&
   9285                (PRED_MODE_INTRA != packed_pred_mode))
   9286             {
   9287                 WORD32 i4_num_bytes;
   9288                 UWORD8 *pu1_chroma_pred;
   9289                 UWORD8 *pu1_chroma_recon;
   9290                 WORD16 *pi2_chroma_deq;
   9291                 UWORD32 u4_zero_col;
   9292                 UWORD32 u4_zero_row;
   9293 
   9294                 for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
   9295                 {
   9296                     WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
   9297                     WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
   9298                     WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
   9299 
   9300                     if(0 == u1_is_422)
   9301                     {
   9302                         i4_subtu_pos_y >>= 1;
   9303                     }
   9304 
   9305                     pu1_chroma_pred =
   9306                         pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
   9307                     pu1_chroma_recon = pu1_cur_chroma_recon +
   9308                                        (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
   9309                     pi2_chroma_deq =
   9310                         pi2_cur_deq_data_chrm + (i4_subtu_idx * chroma_trans_size * cu_size);
   9311 
   9312                     u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx];
   9313                     u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx];
   9314 
   9315                     if(ps_prms->u1_will_cabac_state_change)
   9316                     {
   9317                         i4_num_bytes =
   9318                             ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx];
   9319                     }
   9320                     else
   9321                     {
   9322                         i4_num_bytes = 0;
   9323                     }
   9324 
   9325                     memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes);
   9326 
   9327                     pu1_old_ecd_data += i4_num_bytes;
   9328 
   9329                     au1_is_recon_available[U_PLANE] = 0;
   9330 
   9331                     if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
   9332                        (!u1_compute_spatial_ssd_chroma ||
   9333                         (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma)))
   9334                     {
   9335                         if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
   9336                            (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
   9337                             (UCHAR_MAX ==
   9338                              ps_recon_datastore
   9339                                  ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])))
   9340                         {
   9341                             ihevce_chroma_it_recon_fxn(
   9342                                 ps_ctxt,
   9343                                 pi2_chroma_deq,
   9344                                 cu_size,
   9345                                 pu1_chroma_pred,
   9346                                 pred_chrm_strd,
   9347                                 pu1_chroma_recon,
   9348                                 recon_chrma_strd,
   9349                                 pu1_final_ecd_data,
   9350                                 chroma_trans_size,
   9351                                 (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1,
   9352                                 u4_zero_col,
   9353                                 u4_zero_row,
   9354                                 U_PLANE);
   9355                         }
   9356                         else if(
   9357                             ps_recon_datastore->au1_is_chromaRecon_available[0] &&
   9358                             (UCHAR_MAX !=
   9359                              ps_recon_datastore
   9360                                  ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))
   9361                         {
   9362                             UWORD8 *pu1_recon_src =
   9363                                 ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
   9364                                      [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
   9365                                           [U_PLANE][ctr][i4_subtu_idx]]) +
   9366                                 i4_subtu_pos_x +
   9367                                 i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
   9368 
   9369                             ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
   9370                                 pu1_recon_src,
   9371                                 ps_recon_datastore->i4_lumaRecon_stride,
   9372                                 pu1_chroma_recon,
   9373                                 recon_chrma_strd,
   9374                                 chroma_trans_size,
   9375                                 chroma_trans_size,
   9376                                 U_PLANE);
   9377                         }
   9378                     }
   9379 
   9380                     u1_is_cu_coded |=
   9381                         ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf);
   9382 
   9383                     pu1_final_ecd_data += i4_num_bytes;
   9384                     total_bytes += i4_num_bytes;
   9385                 }
   9386 
   9387                 for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
   9388                 {
   9389                     WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
   9390                     WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
   9391                     WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
   9392 
   9393                     if(0 == u1_is_422)
   9394                     {
   9395                         i4_subtu_pos_y >>= 1;
   9396                     }
   9397 
   9398                     pu1_chroma_pred =
   9399                         pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
   9400                     pu1_chroma_recon = pu1_cur_chroma_recon +
   9401                                        (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
   9402                     pi2_chroma_deq = pi2_cur_deq_data_chrm +
   9403                                      (i4_subtu_idx * chroma_trans_size * cu_size) +
   9404                                      chroma_trans_size;
   9405 
   9406                     u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx];
   9407                     u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx];
   9408 
   9409                     if(ps_prms->u1_will_cabac_state_change)
   9410                     {
   9411                         i4_num_bytes =
   9412                             ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx];
   9413                     }
   9414                     else
   9415                     {
   9416                         i4_num_bytes = 0;
   9417                     }
   9418 
   9419                     memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes);
   9420 
   9421                     pu1_old_ecd_data += i4_num_bytes;
   9422 
   9423                     au1_is_recon_available[V_PLANE] = 0;
   9424 
   9425                     if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
   9426                        (!u1_compute_spatial_ssd_chroma ||
   9427                         (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma)))
   9428                     {
   9429                         if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
   9430                            (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
   9431                             (UCHAR_MAX ==
   9432                              ps_recon_datastore
   9433                                  ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])))
   9434                         {
   9435                             ihevce_chroma_it_recon_fxn(
   9436                                 ps_ctxt,
   9437                                 pi2_chroma_deq,
   9438                                 cu_size,
   9439                                 pu1_chroma_pred,
   9440                                 pred_chrm_strd,
   9441                                 pu1_chroma_recon,
   9442                                 recon_chrma_strd,
   9443                                 pu1_final_ecd_data,
   9444                                 chroma_trans_size,
   9445                                 (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1,
   9446                                 u4_zero_col,
   9447                                 u4_zero_row,
   9448                                 V_PLANE);
   9449                         }
   9450                         else if(
   9451                             ps_recon_datastore->au1_is_chromaRecon_available[0] &&
   9452                             (UCHAR_MAX !=
   9453                              ps_recon_datastore
   9454                                  ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))
   9455                         {
   9456                             UWORD8 *pu1_recon_src =
   9457                                 ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
   9458                                      [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
   9459                                           [V_PLANE][ctr][i4_subtu_idx]]) +
   9460                                 i4_subtu_pos_x +
   9461                                 i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
   9462 
   9463                             ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
   9464                                 pu1_recon_src,
   9465                                 ps_recon_datastore->i4_lumaRecon_stride,
   9466                                 pu1_chroma_recon,
   9467                                 recon_chrma_strd,
   9468                                 chroma_trans_size,
   9469                                 chroma_trans_size,
   9470                                 V_PLANE);
   9471                         }
   9472                     }
   9473 
   9474                     u1_is_cu_coded |=
   9475                         ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf);
   9476 
   9477                     pu1_final_ecd_data += i4_num_bytes;
   9478                     total_bytes += i4_num_bytes;
   9479                 }
   9480             }
   9481             else
   9482             {
   9483                 WORD32 cb_zero_col, cb_zero_row, cr_zero_col, cr_zero_row;
   9484 
   9485                 for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
   9486                 {
   9487                     WORD32 cb_cbf, cr_cbf;
   9488                     WORD32 cb_num_bytes, cr_num_bytes;
   9489 
   9490                     WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
   9491 
   9492                     WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
   9493                     WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
   9494 
   9495                     if(0 == u1_is_422)
   9496                     {
   9497                         i4_subtu_pos_y >>= 1;
   9498                     }
   9499 
   9500                     pu1_cur_src_chrm += (i4_subtu_idx * chroma_trans_size * src_chrm_strd);
   9501                     pu1_cur_pred_chrm += (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
   9502                     pu1_cur_chroma_recon += (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
   9503                     pi2_cur_deq_data_chrm += (i4_subtu_idx * chroma_trans_size * cu_size);
   9504 
   9505                     if((PRED_MODE_INTRA == packed_pred_mode) &&
   9506                        (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data))
   9507                     {
   9508                         WORD32 nbr_flags, left_strd_chrm, chrm_pred_func_idx;
   9509                         UWORD8 *pu1_left_chrm;
   9510                         UWORD8 *pu1_top_chrm;
   9511                         UWORD8 *pu1_top_left_chrm;
   9512 
   9513                         nbr_flags = ihevce_get_intra_chroma_tu_nbr(
   9514                             *pu4_nbr_flags, i4_subtu_idx, chroma_trans_size, u1_is_422);
   9515 
   9516                         /* left cu boundary */
   9517                         if(0 == i4_subtu_pos_x)
   9518                         {
   9519                             left_strd_chrm = ps_chrm_cu_buf_prms->i4_cu_left_stride;
   9520                             pu1_left_chrm =
   9521                                 ps_chrm_cu_buf_prms->pu1_cu_left + i4_subtu_pos_y * left_strd_chrm;
   9522                         }
   9523                         else
   9524                         {
   9525                             pu1_left_chrm = pu1_cur_chroma_recon - 2;
   9526                             left_strd_chrm = recon_chrma_strd;
   9527                         }
   9528 
   9529                         /* top cu boundary */
   9530                         if(0 == i4_subtu_pos_y)
   9531                         {
   9532                             pu1_top_chrm = ps_chrm_cu_buf_prms->pu1_cu_top + i4_subtu_pos_x;
   9533                         }
   9534                         else
   9535                         {
   9536                             pu1_top_chrm = pu1_cur_chroma_recon - recon_chrma_strd;
   9537                         }
   9538 
   9539                         /* by default top left is set to cu top left */
   9540                         pu1_top_left_chrm = ps_chrm_cu_buf_prms->pu1_cu_top_left;
   9541 
   9542                         /* top left based on position */
   9543                         if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x))
   9544                         {
   9545                             pu1_top_left_chrm = pu1_left_chrm - left_strd_chrm;
   9546                         }
   9547                         else if(0 != i4_subtu_pos_x)
   9548                         {
   9549                             pu1_top_left_chrm = pu1_top_chrm - 2;
   9550                         }
   9551 
   9552                         /* call the chroma reference array substitution */
   9553                         ihevc_intra_pred_chroma_ref_substitution_fptr(
   9554                             pu1_top_left_chrm,
   9555                             pu1_top_chrm,
   9556                             pu1_left_chrm,
   9557                             left_strd_chrm,
   9558                             chroma_trans_size,
   9559                             nbr_flags,
   9560                             (UWORD8 *)ps_ctxt->pv_ref_sub_out,
   9561                             1);
   9562 
   9563                         /* use the look up to get the function idx */
   9564                         chrm_pred_func_idx = g_i4_ip_funcs[chroma_pred_mode];
   9565 
   9566                         /* call the intra prediction function */
   9567                         ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
   9568                             (UWORD8 *)ps_ctxt->pv_ref_sub_out,
   9569                             1,
   9570                             pu1_cur_pred_chrm,
   9571                             pred_chrm_strd,
   9572                             chroma_trans_size,
   9573                             chroma_pred_mode);
   9574                     }
   9575 
   9576                     /**---------- Compute iq&coeff data if required : Chroma ------------**/
   9577                     if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)
   9578                     {
   9579                         WORD32 perform_sbh, perform_rdoq, temp_bits;
   9580 
   9581                         if(ps_prms->u1_recompute_sbh_and_rdoq)
   9582                         {
   9583                             perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH);
   9584                             perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ);
   9585                         }
   9586                         else
   9587                         {
   9588                             /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
   9589                             perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh;
   9590                             /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
   9591                         we would have to do RDOQ again.*/
   9592                             perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq;
   9593                         }
   9594 
   9595                         /* populate the coeffs scan idx */
   9596                         ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
   9597 
   9598                         if(PRED_MODE_INTRA == packed_pred_mode)
   9599                         {
   9600                             /* for 4x4 transforms based on intra pred mode scan is choosen*/
   9601                             if(4 == chroma_trans_size)
   9602                             {
   9603                                 /* for modes from 22 upto 30 horizontal scan is used */
   9604                                 if((chroma_pred_mode > 21) && (chroma_pred_mode < 31))
   9605                                 {
   9606                                     ps_ctxt->i4_scan_idx = SCAN_HORZ;
   9607                                 }
   9608                                 /* for modes from 6 upto 14 horizontal scan is used */
   9609                                 else if((chroma_pred_mode > 5) && (chroma_pred_mode < 15))
   9610                                 {
   9611                                     ps_ctxt->i4_scan_idx = SCAN_VERT;
   9612                                 }
   9613                             }
   9614                         }
   9615 
   9616 #if DISABLE_RDOQ_INTRA
   9617                         if(PRED_MODE_INTRA == packed_pred_mode)
   9618                         {
   9619                             perform_rdoq = 0;
   9620                         }
   9621 #endif
   9622 
   9623                         /* RDOPT copy States :  TU init (best until prev TU) to current */
   9624                         COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
   9625                             &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
   9626                                     .s_cabac_ctxt.au1_ctxt_models[0] +
   9627                                 IHEVC_CAB_COEFFX_PREFIX,
   9628                             &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
   9629                             IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
   9630 
   9631                         ASSERT(rd_opt_best_idx == ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx);
   9632                         /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled
   9633                     so that all candidates and best candidate are quantized with same rounding factor  */
   9634                         if(1 == perform_rdoq)
   9635                         {
   9636                             ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING);
   9637                         }
   9638 
   9639                         if(!ps_best_cu_prms->u1_skip_flag ||
   9640                            !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
   9641                         {
   9642                             /* Cb */
   9643                             cb_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
   9644                                 ps_ctxt,
   9645                                 pu1_cur_pred_chrm,
   9646                                 pred_chrm_strd,
   9647                                 pu1_cur_src_chrm,
   9648                                 src_chrm_strd,
   9649                                 pi2_cur_deq_data_chrm,
   9650                                 cu_size,
   9651                                 pu1_chrm_recon,
   9652                                 recon_chrma_strd,
   9653                                 pu1_final_ecd_data,
   9654                                 pu1_csbf_buf,
   9655                                 csbf_strd,
   9656                                 chroma_trans_size,
   9657                                 ps_ctxt->i4_scan_idx,
   9658                                 (PRED_MODE_INTRA == packed_pred_mode),
   9659                                 &cb_num_bytes,
   9660                                 &temp_bits,
   9661                                 &cb_zero_col,
   9662                                 &cb_zero_row,
   9663                                 &au1_is_recon_available[U_PLANE],
   9664                                 perform_sbh,
   9665                                 perform_rdoq,
   9666                                 &i8_ssd,
   9667 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   9668                                 !ps_ctxt->u1_is_refPic
   9669                                     ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
   9670                                     : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
   9671                                        (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
   9672                                           100.0,
   9673                                 ps_prms->u1_is_cu_noisy,
   9674 #endif
   9675                                 ps_best_cu_prms->u1_skip_flag &&
   9676                                     ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt,
   9677                                 u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD
   9678                                                               : FREQUENCY_DOMAIN_SSD,
   9679                                 U_PLANE);
   9680                         }
   9681                         else
   9682                         {
   9683                             cb_cbf = 0;
   9684                             temp_bits = 0;
   9685                             cb_num_bytes = 0;
   9686                             au1_is_recon_available[U_PLANE] = 0;
   9687                             cb_zero_col = 0;
   9688                             cb_zero_row = 0;
   9689                         }
   9690 
   9691                         /* Accumulate chroma residual bits */
   9692                         ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits;
   9693 
   9694                         /* RDOPT copy States :  New updated after curr TU to TU init */
   9695                         if(0 != cb_cbf)
   9696                         {
   9697                             COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
   9698                                 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
   9699                                 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
   9700                                         .s_cabac_ctxt.au1_ctxt_models[0] +
   9701                                     IHEVC_CAB_COEFFX_PREFIX,
   9702                                 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
   9703                         }
   9704                         /* RDOPT copy States :  Restoring back the Cb init state to Cr */
   9705                         else
   9706                         {
   9707                             COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
   9708                                 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
   9709                                         .s_cabac_ctxt.au1_ctxt_models[0] +
   9710                                     IHEVC_CAB_COEFFX_PREFIX,
   9711                                 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
   9712                                 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
   9713                         }
   9714 
   9715                         if(!ps_best_cu_prms->u1_skip_flag ||
   9716                            !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
   9717                         {
   9718                             /* Cr */
   9719                             cr_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
   9720                                 ps_ctxt,
   9721                                 pu1_cur_pred_chrm,
   9722                                 pred_chrm_strd,
   9723                                 pu1_cur_src_chrm,
   9724                                 src_chrm_strd,
   9725                                 pi2_cur_deq_data_chrm + chroma_trans_size,
   9726                                 cu_size,
   9727                                 pu1_chrm_recon,
   9728                                 recon_chrma_strd,
   9729                                 pu1_final_ecd_data + cb_num_bytes,
   9730                                 pu1_csbf_buf,
   9731                                 csbf_strd,
   9732                                 chroma_trans_size,
   9733                                 ps_ctxt->i4_scan_idx,
   9734                                 (PRED_MODE_INTRA == packed_pred_mode),
   9735                                 &cr_num_bytes,
   9736                                 &temp_bits,
   9737                                 &cr_zero_col,
   9738                                 &cr_zero_row,
   9739                                 &au1_is_recon_available[V_PLANE],
   9740                                 perform_sbh,
   9741                                 perform_rdoq,
   9742                                 &i8_ssd,
   9743 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   9744                                 !ps_ctxt->u1_is_refPic
   9745                                     ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
   9746                                     : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
   9747                                        (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
   9748                                           100.0,
   9749                                 ps_prms->u1_is_cu_noisy,
   9750 #endif
   9751                                 ps_best_cu_prms->u1_skip_flag &&
   9752                                     ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt,
   9753                                 u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD
   9754                                                               : FREQUENCY_DOMAIN_SSD,
   9755                                 V_PLANE);
   9756                         }
   9757                         else
   9758                         {
   9759                             cr_cbf = 0;
   9760                             temp_bits = 0;
   9761                             cr_num_bytes = 0;
   9762                             au1_is_recon_available[V_PLANE] = 0;
   9763                             cr_zero_col = 0;
   9764                             cr_zero_row = 0;
   9765                         }
   9766 
   9767                         /* Accumulate chroma residual bits */
   9768                         ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits;
   9769 
   9770                         /* RDOPT copy States :  New updated after curr TU to TU init */
   9771                         if(0 != cr_cbf)
   9772                         {
   9773                             COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
   9774                                 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
   9775                                 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
   9776                                         .s_cabac_ctxt.au1_ctxt_models[0] +
   9777                                     IHEVC_CAB_COEFFX_PREFIX,
   9778                                 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
   9779                         }
   9780 
   9781                         if(0 == i4_subtu_idx)
   9782                         {
   9783                             ps_tu->b1_cb_cbf = cb_cbf;
   9784                             ps_tu->b1_cr_cbf = cr_cbf;
   9785                         }
   9786                         else
   9787                         {
   9788                             ps_tu->b1_cb_cbf_subtu1 = cb_cbf;
   9789                             ps_tu->b1_cr_cbf_subtu1 = cr_cbf;
   9790                         }
   9791                     }
   9792                     else
   9793                     {
   9794                         cb_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx];
   9795                         cb_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx];
   9796                         cr_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx];
   9797                         cr_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx];
   9798 
   9799                         if(ps_prms->u1_will_cabac_state_change)
   9800                         {
   9801                             cb_num_bytes =
   9802                                 ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx];
   9803                         }
   9804                         else
   9805                         {
   9806                             cb_num_bytes = 0;
   9807                         }
   9808 
   9809                         if(ps_prms->u1_will_cabac_state_change)
   9810                         {
   9811                             cr_num_bytes =
   9812                                 ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx];
   9813                         }
   9814                         else
   9815                         {
   9816                             cr_num_bytes = 0;
   9817                         }
   9818 
   9819                         /* copy cb ecd data to final buffer */
   9820                         memcpy(pu1_final_ecd_data, pu1_chrm_old_ecd_data, cb_num_bytes);
   9821 
   9822                         pu1_chrm_old_ecd_data += cb_num_bytes;
   9823 
   9824                         /* copy cb ecd data to final buffer */
   9825                         memcpy(
   9826                             (pu1_final_ecd_data + cb_num_bytes),
   9827                             pu1_chrm_old_ecd_data,
   9828                             cr_num_bytes);
   9829 
   9830                         pu1_chrm_old_ecd_data += cr_num_bytes;
   9831 
   9832                         au1_is_recon_available[U_PLANE] = 0;
   9833                         au1_is_recon_available[V_PLANE] = 0;
   9834                     }
   9835 
   9836                     /**-------- Compute Recon data (Do IT & Recon) : Chroma  -----------**/
   9837                     if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
   9838                        (!u1_compute_spatial_ssd_chroma ||
   9839                         (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma)))
   9840                     {
   9841                         if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
   9842                            (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
   9843                             (UCHAR_MAX ==
   9844                              ps_recon_datastore
   9845                                  ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])))
   9846                         {
   9847                             ihevce_chroma_it_recon_fxn(
   9848                                 ps_ctxt,
   9849                                 pi2_cur_deq_data_chrm,
   9850                                 cu_size,
   9851                                 pu1_cur_pred_chrm,
   9852                                 pred_chrm_strd,
   9853                                 pu1_cur_chroma_recon,
   9854                                 recon_chrma_strd,
   9855                                 pu1_final_ecd_data,
   9856                                 chroma_trans_size,
   9857                                 (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1,
   9858                                 cb_zero_col,
   9859                                 cb_zero_row,
   9860                                 U_PLANE);
   9861                         }
   9862                         else if(
   9863                             ps_recon_datastore->au1_is_chromaRecon_available[0] &&
   9864                             (UCHAR_MAX !=
   9865                              ps_recon_datastore
   9866                                  ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))
   9867                         {
   9868                             UWORD8 *pu1_recon_src =
   9869                                 ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
   9870                                      [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
   9871                                           [U_PLANE][ctr][i4_subtu_idx]]) +
   9872                                 i4_subtu_pos_x +
   9873                                 i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
   9874 
   9875                             ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
   9876                                 pu1_recon_src,
   9877                                 ps_recon_datastore->i4_lumaRecon_stride,
   9878                                 pu1_cur_chroma_recon,
   9879                                 recon_chrma_strd,
   9880                                 chroma_trans_size,
   9881                                 chroma_trans_size,
   9882                                 U_PLANE);
   9883                         }
   9884                     }
   9885 
   9886                     u1_is_cu_coded |=
   9887                         ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf);
   9888 
   9889                     if(ps_prms->u1_will_cabac_state_change)
   9890                     {
   9891                         ps_tu_enc_loop->ai4_cb_coeff_offset[i4_subtu_idx] = total_bytes;
   9892                     }
   9893 
   9894                     pu1_final_ecd_data += cb_num_bytes;
   9895                     /* update total bytes consumed */
   9896                     total_bytes += cb_num_bytes;
   9897 
   9898                     if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
   9899                        (!u1_compute_spatial_ssd_chroma ||
   9900                         (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma)))
   9901                     {
   9902                         if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
   9903                            (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
   9904                             (UCHAR_MAX ==
   9905                              ps_recon_datastore
   9906                                  ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])))
   9907                         {
   9908                             ihevce_chroma_it_recon_fxn(
   9909                                 ps_ctxt,
   9910                                 pi2_cur_deq_data_chrm + chroma_trans_size,
   9911                                 cu_size,
   9912                                 pu1_cur_pred_chrm,
   9913                                 pred_chrm_strd,
   9914                                 pu1_cur_chroma_recon,
   9915                                 recon_chrma_strd,
   9916                                 pu1_final_ecd_data,
   9917                                 chroma_trans_size,
   9918                                 (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1,
   9919                                 cr_zero_col,
   9920                                 cr_zero_row,
   9921                                 V_PLANE);
   9922                         }
   9923                         else if(
   9924                             ps_recon_datastore->au1_is_chromaRecon_available[0] &&
   9925                             (UCHAR_MAX !=
   9926                              ps_recon_datastore
   9927                                  ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))
   9928                         {
   9929                             UWORD8 *pu1_recon_src =
   9930                                 ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
   9931                                      [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
   9932                                           [V_PLANE][ctr][i4_subtu_idx]]) +
   9933                                 i4_subtu_pos_x +
   9934                                 i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
   9935 
   9936                             ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
   9937                                 pu1_recon_src,
   9938                                 ps_recon_datastore->i4_lumaRecon_stride,
   9939                                 pu1_cur_chroma_recon,
   9940                                 recon_chrma_strd,
   9941                                 chroma_trans_size,
   9942                                 chroma_trans_size,
   9943                                 V_PLANE);
   9944                         }
   9945                     }
   9946 
   9947                     u1_is_cu_coded |=
   9948                         ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf);
   9949 
   9950                     if(ps_prms->u1_will_cabac_state_change)
   9951                     {
   9952                         ps_tu_enc_loop->ai4_cr_coeff_offset[i4_subtu_idx] = total_bytes;
   9953                     }
   9954 
   9955                     pu1_final_ecd_data += cr_num_bytes;
   9956                     /* update total bytes consumed */
   9957                     total_bytes += cr_num_bytes;
   9958                 }
   9959             }
   9960         }
   9961         else
   9962         {
   9963             ps_tu_enc_loop->ai4_cb_coeff_offset[0] = total_bytes;
   9964             ps_tu_enc_loop->ai4_cr_coeff_offset[0] = total_bytes;
   9965             ps_tu_enc_loop->ai4_cb_coeff_offset[1] = total_bytes;
   9966             ps_tu_enc_loop->ai4_cr_coeff_offset[1] = total_bytes;
   9967             ps_tu->b1_cb_cbf = 0;
   9968             ps_tu->b1_cr_cbf = 0;
   9969             ps_tu->b1_cb_cbf_subtu1 = 0;
   9970             ps_tu->b1_cr_cbf_subtu1 = 0;
   9971         }
   9972 
   9973         /* Update to next TU */
   9974         ps_tu_enc_loop++;
   9975         ps_tu_enc_loop_temp_prms++;
   9976 
   9977         pu4_nbr_flags++;
   9978         pu1_intra_pred_mode++;
   9979 
   9980         /*Do not set the nbr map for last pu in cu */
   9981         if((num_tu_in_cu - 1) != ctr)
   9982         {
   9983             /* set the neighbour map to 1 */
   9984             ihevce_set_nbr_map(
   9985                 ps_ctxt->pu1_ctb_nbr_map,
   9986                 ps_ctxt->i4_nbr_map_strd,
   9987                 cu_pos_x_in_4x4,
   9988                 cu_pos_y_in_4x4,
   9989                 (trans_size >> 2),
   9990                 1);
   9991         }
   9992     }
   9993 
   9994     if(ps_prms->u1_will_cabac_state_change)
   9995     {
   9996         ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded;
   9997 
   9998         /* Modify skip flag, if luma is skipped & Chroma is coded */
   9999         if((1 == u1_is_cu_coded) && (PRED_MODE_SKIP == packed_pred_mode))
   10000         {
   10001             ps_best_cu_prms->u1_skip_flag = 0;
   10002         }
   10003     }
   10004 
   10005     /* during chroma evaluation if skip decision was over written     */
   10006     /* then the current skip candidate is set to a non skip candidate */
   10007     if(PRED_MODE_INTRA != packed_pred_mode)
   10008     {
   10009         ps_best_inter_cand->b1_skip_flag = ps_best_cu_prms->u1_skip_flag;
   10010     }
   10011 
   10012     /**------------- Compute header data if required --------------**/
   10013     if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data)
   10014     {
   10015         WORD32 cbf_bits;
   10016         WORD32 cu_bits;
   10017         WORD32 unit_4x4_size = cu_size >> 2;
   10018 
   10019         /*Restoring the running reference into the best rdopt_ctxt cabac states which will then
   10020         be copied as the base reference for the next cu
   10021         Assumption : We are ensuring that the u1_eval_header_data flag is set to 1 only if either
   10022         luma and chroma are being reevaluated*/
   10023         COPY_CABAC_STATES(
   10024             &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
   10025                  .s_cabac_ctxt.au1_ctxt_models[0],
   10026             &ps_ctxt->au1_rdopt_init_ctxt_models[0],
   10027             IHEVC_CAB_CTXT_END);
   10028 
   10029         /* get the neighbour availability flags for current cu  */
   10030         ihevce_get_only_nbr_flag(
   10031             &s_nbr,
   10032             ps_ctxt->pu1_ctb_nbr_map,
   10033             ps_ctxt->i4_nbr_map_strd,
   10034             (cu_pos_x << 1),
   10035             (cu_pos_y << 1),
   10036             unit_4x4_size,
   10037             unit_4x4_size);
   10038 
   10039         cu_bits = ihevce_entropy_rdo_encode_cu(
   10040             &ps_ctxt->s_rdopt_entropy_ctxt,
   10041             ps_best_cu_prms,
   10042             cu_pos_x,
   10043             cu_pos_y,
   10044             cu_size,
   10045             ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
   10046                                            : s_nbr.u1_top_avail,
   10047             s_nbr.u1_left_avail,
   10048             (pu1_final_ecd_data - total_bytes),
   10049             &cbf_bits);
   10050 
   10051         /* cbf bits are excluded from header bits, instead considered as texture bits */
   10052         ps_best_cu_prms->u4_cu_hdr_bits = cu_bits - cbf_bits;
   10053         ps_best_cu_prms->u4_cu_cbf_bits = cbf_bits;
   10054     }
   10055 
   10056     if(ps_prms->u1_will_cabac_state_change)
   10057     {
   10058         ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes;
   10059     }
   10060 }
   10061 
   10062 /*!
   10063 ******************************************************************************
   10064 * \if Function name : ihevce_set_eval_flags \endif
   10065 *
   10066 * \brief
   10067 *    Function which decides which eval flags have to be set based on present
   10068 *    and RDOQ conditions
   10069 *
   10070 * \param[in] ps_ctxt : encoder ctxt pointer
   10071 * \param[in] enc_loop_cu_final_prms_t : pointer to final cu params
   10072 *
   10073 * \return
   10074 *    None
   10075 *
   10076 * \author
   10077 *  Ittiam
   10078 *
   10079 *****************************************************************************
   10080 */
   10081 void ihevce_set_eval_flags(
   10082     ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_final_prms_t *ps_enc_loop_bestprms)
   10083 {
   10084     WORD32 count = 0;
   10085 
   10086     ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0;
   10087 
   10088     ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data =
   10089         !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
   10090 
   10091     if(ps_ctxt->u1_disable_intra_eval && (!(ps_ctxt->i4_deblk_pad_hpel_cur_pic & 0x1)))
   10092     {
   10093         ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 0;
   10094     }
   10095     else
   10096     {
   10097         ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1;
   10098     }
   10099 
   10100     if((1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq) ||
   10101        (1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh))
   10102     {
   10103         /* When rdoq is enabled only for the best candidate, in case of in Intra nTU
   10104         RDOQ might have altered the coeffs of the neighbour CU. As a result, the pred
   10105         for the current CU will change. Therefore, we need to reevaluate the pred data*/
   10106         if((ps_enc_loop_bestprms->u2_num_tus_in_cu > 1) &&
   10107            (ps_enc_loop_bestprms->u1_intra_flag == 1))
   10108         {
   10109             ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 1;
   10110             ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data = 1;
   10111         }
   10112         if(ps_enc_loop_bestprms->u1_skip_flag == 1)
   10113         {
   10114             for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
   10115             {
   10116                 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
   10117                     .b1_eval_luma_iq_and_coeff_data = 0;
   10118                 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
   10119                     .b1_eval_chroma_iq_and_coeff_data = 0;
   10120             }
   10121         }
   10122         else
   10123         {
   10124             for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
   10125             {
   10126                 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
   10127                     .b1_eval_luma_iq_and_coeff_data = 1;
   10128                 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
   10129                     .b1_eval_chroma_iq_and_coeff_data = 1;
   10130             }
   10131         }
   10132     }
   10133     else
   10134     {
   10135         switch(ps_ctxt->i4_quality_preset)
   10136         {
   10137         case IHEVCE_QUALITY_P0:
   10138         case IHEVCE_QUALITY_P2:
   10139         case IHEVCE_QUALITY_P3:
   10140         {
   10141             for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
   10142             {
   10143                 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
   10144                     .b1_eval_luma_iq_and_coeff_data = 0;
   10145                 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
   10146                     .b1_eval_chroma_iq_and_coeff_data =
   10147                     !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
   10148             }
   10149 
   10150             break;
   10151         }
   10152         case IHEVCE_QUALITY_P4:
   10153         case IHEVCE_QUALITY_P5:
   10154         {
   10155             for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
   10156             {
   10157                 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
   10158                     .b1_eval_luma_iq_and_coeff_data = 0;
   10159                 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
   10160                     .b1_eval_chroma_iq_and_coeff_data =
   10161                     !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
   10162             }
   10163 
   10164             break;
   10165         }
   10166         case IHEVCE_QUALITY_P6:
   10167         {
   10168             for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
   10169             {
   10170                 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
   10171                     .b1_eval_luma_iq_and_coeff_data = 0;
   10172 #if !ENABLE_CHROMA_TRACKING_OF_LUMA_CBF_IN_XS25
   10173                 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
   10174                     .b1_eval_chroma_iq_and_coeff_data =
   10175                     !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
   10176 #else
   10177                 if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_temporal_layer_id > 1) &&
   10178                    (ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b3_size >= 2))
   10179                 {
   10180                     ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
   10181                         .b1_eval_chroma_iq_and_coeff_data =
   10182                         ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b1_y_cbf;
   10183                 }
   10184                 else
   10185                 {
   10186                     ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
   10187                         .b1_eval_chroma_iq_and_coeff_data =
   10188                         !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
   10189                 }
   10190 #endif
   10191             }
   10192 
   10193             break;
   10194         }
   10195         default:
   10196         {
   10197             break;
   10198         }
   10199         }
   10200     }
   10201 
   10202     /* Not recomputing Luma pred-data and header data for any preset now */
   10203     ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1;
   10204 }
   10205 
   10206 /**
   10207 ******************************************************************************
   10208 *
   10209 *  @brief Shrink's TU tree of inter CUs by merging redundnant child nodes
   10210 *         (not coded children) into a parent node(not coded).
   10211 *
   10212 *  @par   Description
   10213 *         This is required post RDO evaluation as TU decisions are
   10214 *         pre-determined(pre RDO) based on recursive SATD,
   10215 *         while the quad children TU's can be skipped during RDO
   10216 *
   10217 *         The shrink process is applied iteratively till there are no
   10218 *         more modes to shrink
   10219 *
   10220 *  @param[inout]   ps_tu_enc_loop
   10221 *       pointer to tu enc loop params of inter cu
   10222 *
   10223 *  @param[inout]   ps_tu_enc_loop_temp_prms
   10224 *       pointer to temp tu enc loop params of inter cu
   10225 *
   10226 *  @param[in]   num_tu_in_cu
   10227 *       number of tus in cu
   10228 *
   10229 *  @return      modified number of tus in cu
   10230 *
   10231 ******************************************************************************
   10232 */
   10233 WORD32 ihevce_shrink_inter_tu_tree(
   10234     tu_enc_loop_out_t *ps_tu_enc_loop,
   10235     tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms,
   10236     recon_datastore_t *ps_recon_datastore,
   10237     WORD32 num_tu_in_cu,
   10238     UWORD8 u1_is_422)
   10239 {
   10240     WORD32 recurse = 1;
   10241     WORD32 ctr;
   10242 
   10243     /* ------------- Quadtree TU Split Transform flag optimization ------------  */
   10244     /* Post RDO, if all 4 child nodes are not coded the overheads of split TU    */
   10245     /* flags and cbf flags are saved by merging to parent node and marking       */
   10246     /* parent TU as not coded                                                    */
   10247     /*                                                                           */
   10248     /*                               ParentTUSplit=1                             */
   10249     /*                                      |                                    */
   10250     /*       ---------------------------------------------------------           */
   10251     /*       |C0(Not coded) | C1(Not coded) | C2(Not coded) | C3(Not coded)      */
   10252     /*                                     ||                                    */
   10253     /*                                     \/                                    */
   10254     /*                                                                           */
   10255     /*                              ParentTUSplit=0 (Not Coded)                  */
   10256     /*                                                                           */
   10257     /* ------------- Quadtree TU Split Transform flag optimization ------------  */
   10258     while((num_tu_in_cu > 4) && recurse)
   10259     {
   10260         recurse = 0;
   10261 
   10262         /* Validate inter CU */
   10263         //ASSERT(ps_tu_enc_loop[0].s_tu.s_tu.b1_intra_flag == 0); /*b1_intra_flag no longer a member of tu structure */
   10264 
   10265         /* loop for all tu blocks in current cu */
   10266         for(ctr = 0; ctr < num_tu_in_cu;)
   10267         {
   10268             /* Get current tu posx, posy and size */
   10269             WORD32 curr_pos_x = ps_tu_enc_loop[ctr].s_tu.b4_pos_x << 2;
   10270             WORD32 curr_pos_y = ps_tu_enc_loop[ctr].s_tu.b4_pos_y << 2;
   10271             /* +1 is for parents size */
   10272             WORD32 parent_tu_size = 1 << (ps_tu_enc_loop[ctr].s_tu.b3_size + 2 + 1);
   10273 
   10274             /* eval merge if leaf nodes reached i.e all child tus are of same size and first tu pos is same as parent pos */
   10275             WORD32 eval_merge = ((curr_pos_x & (parent_tu_size - 1)) == 0);
   10276             eval_merge &= ((curr_pos_y & (parent_tu_size - 1)) == 0);
   10277 
   10278             /* As TUs are published in encode order (Z SCAN),                      */
   10279             /* Four consecutive TUS of same size implies we have hit leaf nodes.   */
   10280             if(((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 1].s_tu.b3_size)) &&
   10281                ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 2].s_tu.b3_size)) &&
   10282                ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 3].s_tu.b3_size)) &&
   10283                eval_merge)
   10284             {
   10285                 WORD32 merge_parent = 1;
   10286 
   10287                 /* If any leaf noded is coded, it cannot be merged to parent */
   10288                 if((ps_tu_enc_loop[ctr].s_tu.b1_y_cbf) || (ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf) ||
   10289                    (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf) ||
   10290 
   10291                    (ps_tu_enc_loop[ctr + 1].s_tu.b1_y_cbf) ||
   10292                    (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf) ||
   10293                    (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf) ||
   10294 
   10295                    (ps_tu_enc_loop[ctr + 2].s_tu.b1_y_cbf) ||
   10296                    (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf) ||
   10297                    (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf) ||
   10298 
   10299                    (ps_tu_enc_loop[ctr + 3].s_tu.b1_y_cbf) ||
   10300                    (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf) ||
   10301                    (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf))
   10302                 {
   10303                     merge_parent = 0;
   10304                 }
   10305 
   10306                 if(u1_is_422)
   10307                 {
   10308                     if((ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1) ||
   10309                        (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1) ||
   10310 
   10311                        (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf_subtu1) ||
   10312                        (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf_subtu1) ||
   10313 
   10314                        (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf_subtu1) ||
   10315                        (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf_subtu1) ||
   10316 
   10317                        (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf_subtu1) ||
   10318                        (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf_subtu1))
   10319                     {
   10320                         merge_parent = 0;
   10321                     }
   10322                 }
   10323 
   10324                 if(merge_parent)
   10325                 {
   10326                     /* Merge all the children (ctr,ctr+1,ctr+2,ctr+3) to parent (ctr) */
   10327 
   10328                     if(ps_recon_datastore->u1_is_lumaRecon_available)
   10329                     {
   10330                         ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
   10331 
   10332                         memmove(
   10333                             &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 1],
   10334                             &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 4],
   10335                             (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
   10336                     }
   10337 
   10338                     if(ps_recon_datastore->au1_is_chromaRecon_available[0])
   10339                     {
   10340                         ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][0] =
   10341                             UCHAR_MAX;
   10342                         ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][0] =
   10343                             UCHAR_MAX;
   10344 
   10345                         memmove(
   10346                             &ps_recon_datastore
   10347                                  ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][0],
   10348                             &ps_recon_datastore
   10349                                  ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][0],
   10350                             (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
   10351 
   10352                         memmove(
   10353                             &ps_recon_datastore
   10354                                  ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][0],
   10355                             &ps_recon_datastore
   10356                                  ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][0],
   10357                             (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
   10358 
   10359                         if(u1_is_422)
   10360                         {
   10361                             ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][1] =
   10362                                 UCHAR_MAX;
   10363                             ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][1] =
   10364                                 UCHAR_MAX;
   10365 
   10366                             memmove(
   10367                                 &ps_recon_datastore
   10368                                      ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][1],
   10369                                 &ps_recon_datastore
   10370                                      ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][1],
   10371                                 (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
   10372 
   10373                             memmove(
   10374                                 &ps_recon_datastore
   10375                                      ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][1],
   10376                                 &ps_recon_datastore
   10377                                      ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][1],
   10378                                 (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
   10379                         }
   10380                     }
   10381 
   10382                     /* Parent node size is one more than that of child */
   10383                     ps_tu_enc_loop[ctr].s_tu.b3_size++;
   10384 
   10385                     ctr++;
   10386 
   10387                     /* move the subsequent TUs to next element */
   10388                     ASSERT(num_tu_in_cu >= (ctr + 3));
   10389                     memmove(
   10390                         (void *)(ps_tu_enc_loop + ctr),
   10391                         (void *)(ps_tu_enc_loop + ctr + 3),
   10392                         (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_out_t));
   10393 
   10394                     /* Also memmove the temp TU params */
   10395                     memmove(
   10396                         (void *)(ps_tu_enc_loop_temp_prms + ctr),
   10397                         (void *)(ps_tu_enc_loop_temp_prms + ctr + 3),
   10398                         (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_temp_prms_t));
   10399 
   10400                     /* Number of TUs in CU are now less by 3 */
   10401                     num_tu_in_cu -= 3;
   10402 
   10403                     /* Recurse again as new parent also be can be merged later */
   10404                     recurse = 1;
   10405                 }
   10406                 else
   10407                 {
   10408                     /* Go to next set of leaf nodes */
   10409                     ctr += 4;
   10410                 }
   10411             }
   10412             else
   10413             {
   10414                 ctr++;
   10415             }
   10416         }
   10417     }
   10418 
   10419     /* return the modified num TUs*/
   10420     ASSERT(num_tu_in_cu > 0);
   10421     return (num_tu_in_cu);
   10422 }
   10423 
   10424 UWORD8 ihevce_intra_mode_nxn_hash_updater(
   10425     UWORD8 *pu1_mode_array, UWORD8 *pu1_hash_table, UWORD8 u1_num_ipe_modes)
   10426 {
   10427     WORD32 i;
   10428     WORD32 i4_mode;
   10429 
   10430     for(i = 0; i < MAX_INTRA_CU_CANDIDATES; i++)
   10431     {
   10432         if(pu1_mode_array[i] < 35)
   10433         {
   10434             if(pu1_mode_array[i] != 0)
   10435             {
   10436                 i4_mode = pu1_mode_array[i] - 1;
   10437 
   10438                 if(!pu1_hash_table[i4_mode])
   10439                 {
   10440                     pu1_hash_table[i4_mode] = 1;
   10441                     pu1_mode_array[u1_num_ipe_modes] = i4_mode;
   10442                     u1_num_ipe_modes++;
   10443                 }
   10444             }
   10445 
   10446             if(pu1_mode_array[i] != 34)
   10447             {
   10448                 i4_mode = pu1_mode_array[i] + 1;
   10449 
   10450                 if((!pu1_hash_table[i4_mode]))
   10451                 {
   10452                     pu1_hash_table[i4_mode] = 1;
   10453                     pu1_mode_array[u1_num_ipe_modes] = i4_mode;
   10454                     u1_num_ipe_modes++;
   10455                 }
   10456             }
   10457         }
   10458     }
   10459 
   10460     if(!pu1_hash_table[INTRA_PLANAR])
   10461     {
   10462         pu1_hash_table[INTRA_PLANAR] = 1;
   10463         pu1_mode_array[u1_num_ipe_modes] = INTRA_PLANAR;
   10464         u1_num_ipe_modes++;
   10465     }
   10466 
   10467     if(!pu1_hash_table[INTRA_DC])
   10468     {
   10469         pu1_hash_table[INTRA_DC] = 1;
   10470         pu1_mode_array[u1_num_ipe_modes] = INTRA_DC;
   10471         u1_num_ipe_modes++;
   10472     }
   10473 
   10474     return u1_num_ipe_modes;
   10475 }
   10476 
   10477 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
   10478 WORD32 ihevce_determine_tu_tree_distribution(
   10479     cu_inter_cand_t *ps_cu_data,
   10480     me_func_selector_t *ps_func_selector,
   10481     WORD16 *pi2_scratch_mem,
   10482     UWORD8 *pu1_inp,
   10483     WORD32 i4_inp_stride,
   10484     WORD32 i4_lambda,
   10485     UWORD8 u1_lambda_q_shift,
   10486     UWORD8 u1_cu_size,
   10487     UWORD8 u1_max_tr_depth)
   10488 {
   10489     err_prms_t s_err_prms;
   10490 
   10491     PF_SAD_FXN_TU_REC pf_err_compute[4];
   10492 
   10493     WORD32 i4_satd;
   10494 
   10495     s_err_prms.pi4_sad_grid = &i4_satd;
   10496     s_err_prms.pi4_tu_split_flags = ps_cu_data->ai4_tu_split_flag;
   10497     s_err_prms.pu1_inp = pu1_inp;
   10498     s_err_prms.pu1_ref = ps_cu_data->pu1_pred_data;
   10499     s_err_prms.i4_inp_stride = i4_inp_stride;
   10500     s_err_prms.i4_ref_stride = ps_cu_data->i4_pred_data_stride;
   10501     s_err_prms.pu1_wkg_mem = (UWORD8 *)pi2_scratch_mem;
   10502 
   10503     if(u1_cu_size == 64)
   10504     {
   10505         s_err_prms.u1_max_tr_depth = MIN(1, u1_max_tr_depth);
   10506     }
   10507     else
   10508     {
   10509         s_err_prms.u1_max_tr_depth = u1_max_tr_depth;
   10510     }
   10511 
   10512     pf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec;
   10513     pf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec;
   10514     pf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec;
   10515     pf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec;
   10516 
   10517     i4_satd = pf_err_compute[hme_get_range(u1_cu_size) - 4](
   10518         &s_err_prms, i4_lambda, u1_lambda_q_shift, 0, ps_func_selector);
   10519 
   10520     if((0 == u1_max_tr_depth) && (ps_cu_data->b3_part_size != 0) && (u1_cu_size != 64))
   10521     {
   10522         ps_cu_data->ai4_tu_split_flag[0] = 1;
   10523     }
   10524 
   10525     return i4_satd;
   10526 }
   10527 #endif
   10528 
   10529 void ihevce_populate_nbr_4x4_with_pu_data(
   10530     nbr_4x4_t *ps_nbr_4x4, pu_t *ps_pu, WORD32 i4_nbr_buf_stride)
   10531 {
   10532     WORD32 i, j;
   10533 
   10534     nbr_4x4_t *ps_tmp_4x4 = ps_nbr_4x4;
   10535 
   10536     WORD32 ht = (ps_pu->b4_ht + 1);
   10537     WORD32 wd = (ps_pu->b4_wd + 1);
   10538 
   10539     ps_nbr_4x4->b1_intra_flag = 0;
   10540     ps_nbr_4x4->b1_pred_l0_flag = !(ps_pu->b2_pred_mode & 1);
   10541     ps_nbr_4x4->b1_pred_l1_flag = (ps_pu->b2_pred_mode > PRED_L0);
   10542     ps_nbr_4x4->mv = ps_pu->mv;
   10543 
   10544     for(i = 0; i < ht; i++)
   10545     {
   10546         for(j = 0; j < wd; j++)
   10547         {
   10548             ps_tmp_4x4[j] = *ps_nbr_4x4;
   10549         }
   10550 
   10551         ps_tmp_4x4 += i4_nbr_buf_stride;
   10552     }
   10553 }
   10554 
   10555 void ihevce_call_luma_inter_pred_rdopt_pass1(
   10556     ihevce_enc_loop_ctxt_t *ps_ctxt, cu_inter_cand_t *ps_inter_cand, WORD32 cu_size)
   10557 {
   10558     pu_t *ps_pu;
   10559     UWORD8 *pu1_pred;
   10560     WORD32 pred_stride, ctr, num_cu_part, skip_or_merge_flag = 0;
   10561     WORD32 inter_pu_wd, inter_pu_ht;
   10562 
   10563     pu1_pred = ps_inter_cand->pu1_pred_data_scr;
   10564     pred_stride = ps_inter_cand->i4_pred_data_stride;
   10565     num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1;
   10566 
   10567     for(ctr = 0; ctr < num_cu_part; ctr++)
   10568     {
   10569         ps_pu = &ps_inter_cand->as_inter_pu[ctr];
   10570 
   10571         /* IF AMP then each partitions can have diff wd ht */
   10572         inter_pu_wd = (ps_pu->b4_wd + 1) << 2;
   10573         inter_pu_ht = (ps_pu->b4_ht + 1) << 2;
   10574 
   10575         skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag;
   10576         //if(0 == skip_or_merge_flag)
   10577         {
   10578             ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 1);
   10579         }
   10580         if((2 == num_cu_part) && (0 == ctr))
   10581         {
   10582             /* 2Nx__ partion case */
   10583             if(inter_pu_wd == cu_size)
   10584             {
   10585                 pu1_pred += (inter_pu_ht * pred_stride);
   10586             }
   10587 
   10588             /* __x2N partion case */
   10589             if(inter_pu_ht == cu_size)
   10590             {
   10591                 pu1_pred += inter_pu_wd;
   10592             }
   10593         }
   10594     }
   10595 }
   10596 
   10597 LWORD64 ihevce_it_recon_ssd(
   10598     ihevce_enc_loop_ctxt_t *ps_ctxt,
   10599     UWORD8 *pu1_src,
   10600     WORD32 i4_src_strd,
   10601     UWORD8 *pu1_pred,
   10602     WORD32 i4_pred_strd,
   10603     WORD16 *pi2_deq_data,
   10604     WORD32 i4_deq_data_strd,
   10605     UWORD8 *pu1_recon,
   10606     WORD32 i4_recon_stride,
   10607     UWORD8 *pu1_ecd_data,
   10608     UWORD8 u1_trans_size,
   10609     UWORD8 u1_pred_mode,
   10610     WORD32 i4_cbf,
   10611     WORD32 i4_zero_col,
   10612     WORD32 i4_zero_row,
   10613     CHROMA_PLANE_ID_T e_chroma_plane)
   10614 {
   10615     if(NULL_PLANE == e_chroma_plane)
   10616     {
   10617         ihevce_it_recon_fxn(
   10618             ps_ctxt,
   10619             pi2_deq_data,
   10620             i4_deq_data_strd,
   10621             pu1_pred,
   10622             i4_pred_strd,
   10623             pu1_recon,
   10624             i4_recon_stride,
   10625             pu1_ecd_data,
   10626             u1_trans_size,
   10627             u1_pred_mode,
   10628             i4_cbf,
   10629             i4_zero_col,
   10630             i4_zero_row);
   10631 
   10632         return ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
   10633             pu1_recon, pu1_src, i4_recon_stride, i4_src_strd, u1_trans_size, u1_trans_size);
   10634     }
   10635     else
   10636     {
   10637         ihevce_chroma_it_recon_fxn(
   10638             ps_ctxt,
   10639             pi2_deq_data,
   10640             i4_deq_data_strd,
   10641             pu1_pred,
   10642             i4_pred_strd,
   10643             pu1_recon,
   10644             i4_recon_stride,
   10645             pu1_ecd_data,
   10646             u1_trans_size,
   10647             i4_cbf,
   10648             i4_zero_col,
   10649             i4_zero_row,
   10650             e_chroma_plane);
   10651 
   10652         return ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
   10653             pu1_recon + (e_chroma_plane == V_PLANE),
   10654             pu1_src + (e_chroma_plane == V_PLANE),
   10655             i4_recon_stride,
   10656             i4_src_strd,
   10657             u1_trans_size,
   10658             u1_trans_size);
   10659     }
   10660 }
   10661 
   10662 /*!
   10663 ******************************************************************************
   10664 * \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif
   10665 *
   10666 * \brief
   10667 *    Transform unit level (Chroma) enc_loop function
   10668 *
   10669 * \param[in] ps_ctxt    enc_loop module ctxt pointer
   10670 * \param[in] pu1_pred       pointer to predicted data buffer
   10671 * \param[in] pred_strd      predicted buffer stride
   10672 * \param[in] pu1_src    pointer to source data buffer
   10673 * \param[in] src_strd   source buffer stride
   10674 * \param[in] pi2_deq_data   pointer to store iq data
   10675 * \param[in] deq_data_strd  iq data buffer stride
   10676 * \param[out] pu1_ecd_data  pointer coeff output buffer (input to ent cod)
   10677 * \param[out] pu1_csbf_buf  pointer to store the csbf for all 4x4 in a current
   10678 *                           block
   10679 * \param[out] csbf_strd     csbf buffer stride
   10680 * \param[in] trans_size     transform size (4, 8, 16)
   10681 * \param[in] intra_flag     0:Inter/Skip 1:Intra
   10682 * \param[out] pi4_coeff_off pointer to store the number of bytes produced in
   10683 *                           coeff buffer
   10684 the current TU in RDopt Mode
   10685 * \param[out] pi4_zero_col  pointer to store the zero_col info for the TU
   10686 * \param[out] pi4_zero_row  pointer to store the zero_row info for the TU
   10687 *
   10688 * \return
   10689 *    CBF of the current block
   10690 *
   10691 * \author
   10692 *  Ittiam
   10693 *
   10694 *****************************************************************************
   10695 */
   10696 WORD32 ihevce_chroma_t_q_iq_ssd_scan_fxn(
   10697     ihevce_enc_loop_ctxt_t *ps_ctxt,
   10698     UWORD8 *pu1_pred,
   10699     WORD32 pred_strd,
   10700     UWORD8 *pu1_src,
   10701     WORD32 src_strd,
   10702     WORD16 *pi2_deq_data,
   10703     WORD32 deq_data_strd,
   10704     UWORD8 *pu1_recon,
   10705     WORD32 i4_recon_stride,
   10706     UWORD8 *pu1_ecd_data,
   10707     UWORD8 *pu1_csbf_buf,
   10708     WORD32 csbf_strd,
   10709     WORD32 trans_size,
   10710     WORD32 i4_scan_idx,
   10711     WORD32 intra_flag,
   10712     WORD32 *pi4_coeff_off,
   10713     WORD32 *pi4_tu_bits,
   10714     WORD32 *pi4_zero_col,
   10715     WORD32 *pi4_zero_row,
   10716     UWORD8 *pu1_is_recon_available,
   10717     WORD32 i4_perform_sbh,
   10718     WORD32 i4_perform_rdoq,
   10719     LWORD64 *pi8_cost,
   10720 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   10721     WORD32 i4_alpha_stim_multiplier,
   10722     UWORD8 u1_is_cu_noisy,
   10723 #endif
   10724     UWORD8 u1_is_skip,
   10725     SSD_TYPE_T e_ssd_type,
   10726     CHROMA_PLANE_ID_T e_chroma_plane)
   10727 {
   10728     WORD32 trans_idx, cbf, u4_blk_sad;
   10729     WORD16 *pi2_quant_coeffs;
   10730     WORD16 *pi2_trans_values;
   10731     WORD32 quant_scale_mat_offset;
   10732     WORD32 *pi4_trans_scratch;
   10733     WORD32 *pi4_subBlock2csbfId_map = NULL;
   10734 
   10735 #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
   10736     WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i;
   10737 #endif
   10738 
   10739     rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt;
   10740 
   10741     WORD32 i4_perform_zcbf = (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE) ||
   10742                              (!intra_flag && ENABLE_INTER_ZCU_COST);
   10743     WORD32 i4_perform_coeff_level_rdoq =
   10744         (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING) &&
   10745         (ps_ctxt->i4_chroma_quant_rounding_level == CHROMA_QUANT_ROUNDING);
   10746 
   10747     ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
   10748     ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW);
   10749 
   10750     *pi4_coeff_off = 0;
   10751     *pi4_tu_bits = 0;
   10752     pu1_is_recon_available[0] = 0;
   10753 
   10754     pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0];
   10755     pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
   10756     pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2);
   10757 
   10758     if(2 == trans_size)
   10759     {
   10760         trans_size = 4;
   10761     }
   10762 
   10763     /* translate the transform size to index */
   10764     trans_idx = trans_size >> 2;
   10765 
   10766     if(16 == trans_size)
   10767     {
   10768         trans_idx = 3;
   10769     }
   10770 
   10771     if(u1_is_skip)
   10772     {
   10773         pi8_cost[0] = ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
   10774             pu1_pred + e_chroma_plane,
   10775             pu1_src + e_chroma_plane,
   10776             pred_strd,
   10777             src_strd,
   10778             trans_size,
   10779             trans_size);
   10780 
   10781         if(e_ssd_type == SPATIAL_DOMAIN_SSD)
   10782         {
   10783             /* buffer copy fromp pred to recon */
   10784             ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
   10785                 pu1_pred,
   10786                 pred_strd,
   10787                 pu1_recon,
   10788                 i4_recon_stride,
   10789                 trans_size,
   10790                 trans_size,
   10791                 e_chroma_plane);
   10792 
   10793             pu1_is_recon_available[0] = 1;
   10794         }
   10795 
   10796 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   10797         if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
   10798         {
   10799             pi8_cost[0] = ihevce_inject_stim_into_distortion(
   10800                 pu1_src,
   10801                 src_strd,
   10802                 pu1_pred,
   10803                 pred_strd,
   10804                 pi8_cost[0],
   10805                 i4_alpha_stim_multiplier,
   10806                 trans_size,
   10807                 0,
   10808                 ps_ctxt->u1_enable_psyRDOPT,
   10809                 e_chroma_plane);
   10810         }
   10811 #endif
   10812 
   10813 #if ENABLE_INTER_ZCU_COST
   10814 #if !WEIGH_CHROMA_COST
   10815         /* cbf = 0, accumulate cu not coded cost */
   10816         ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
   10817 #else
   10818         ps_ctxt->i8_cu_not_coded_cost += (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor +
   10819                                           (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
   10820                                          CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT;
   10821 #endif
   10822 #endif
   10823 
   10824         return 0;
   10825     }
   10826 
   10827     if(intra_flag == 1)
   10828     {
   10829         quant_scale_mat_offset = 0;
   10830 
   10831 #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
   10832         ai4_quant_rounding_factors[0][0] =
   10833             MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3);
   10834 
   10835         for(i = 0; i < trans_size * trans_size; i++)
   10836         {
   10837             ai4_quant_rounding_factors[1][i] =
   10838                 MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3][i],
   10839                     (1 << QUANT_ROUND_FACTOR_Q) / 3);
   10840             ai4_quant_rounding_factors[2][i] =
   10841                 MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3][i],
   10842                     (1 << QUANT_ROUND_FACTOR_Q) / 3);
   10843         }
   10844 #endif
   10845     }
   10846     else
   10847     {
   10848         quant_scale_mat_offset = NUM_TRANS_TYPES;
   10849     }
   10850 
   10851     switch(trans_size)
   10852     {
   10853     case 4:
   10854     {
   10855         pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU;
   10856 
   10857         break;
   10858     }
   10859     case 8:
   10860     {
   10861         pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU;
   10862 
   10863         break;
   10864     }
   10865     case 16:
   10866     {
   10867         pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU;
   10868 
   10869         break;
   10870     }
   10871     case 32:
   10872     {
   10873         pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU;
   10874 
   10875         break;
   10876     }
   10877     }
   10878 
   10879     /* ---------- call residue and transform block ------- */
   10880     u4_blk_sad = ps_ctxt->apf_chrm_resd_trns[trans_idx - 1](
   10881         pu1_src + (e_chroma_plane == V_PLANE),
   10882         pu1_pred + (e_chroma_plane == V_PLANE),
   10883         pi4_trans_scratch,
   10884         pi2_trans_values,
   10885         src_strd,
   10886         pred_strd,
   10887         ((trans_size << 16) + 1)); /* dst strd and chroma flag are packed together */
   10888     (void)u4_blk_sad;
   10889     /* -------- calculate SSD calculation in Transform Domain ------ */
   10890 
   10891     cbf = ps_ctxt->apf_quant_iquant_ssd
   10892               [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2]
   10893 
   10894           (pi2_trans_values,
   10895            ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset],
   10896            pi2_quant_coeffs,
   10897            pi2_deq_data,
   10898            trans_size,
   10899            ps_ctxt->i4_chrm_cu_qp_div6,
   10900            ps_ctxt->i4_chrm_cu_qp_mod6,
   10901 #if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
   10902            ps_ctxt->i4_quant_rnd_factor[intra_flag],
   10903            ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3],
   10904            ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3],
   10905 #else
   10906            intra_flag ? ai4_quant_rounding_factors[0][0] : ps_ctxt->i4_quant_rnd_factor[intra_flag],
   10907            intra_flag ? ai4_quant_rounding_factors[1]
   10908                       : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3],
   10909            intra_flag ? ai4_quant_rounding_factors[2]
   10910                       : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3],
   10911 #endif
   10912            trans_size,
   10913            trans_size,
   10914            deq_data_strd,
   10915            pu1_csbf_buf,
   10916            csbf_strd,
   10917            pi4_zero_col,
   10918            pi4_zero_row,
   10919            ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset],
   10920            pi8_cost);
   10921 
   10922     if(e_ssd_type != FREQUENCY_DOMAIN_SSD)
   10923     {
   10924         pi8_cost[0] = UINT_MAX;
   10925     }
   10926 
   10927     if(0 != cbf)
   10928     {
   10929         if(i4_perform_sbh || i4_perform_rdoq)
   10930         {
   10931             ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd;
   10932             ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size;
   10933 
   10934             ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_chrm_cu_qp_div6;
   10935             ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_chrm_cu_qp_mod6;
   10936             ps_rdoq_sbh_ctxt->i4_scan_idx = i4_scan_idx;
   10937             ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
   10938             ps_rdoq_sbh_ctxt->i4_trans_size = trans_size;
   10939 
   10940             ps_rdoq_sbh_ctxt->pi2_dequant_coeff =
   10941                 ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset];
   10942             ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data;
   10943             ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs;
   10944             ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values;
   10945             ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf;
   10946             ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map;
   10947 
   10948             if((!i4_perform_rdoq))
   10949             {
   10950                 ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
   10951 
   10952                 pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
   10953             }
   10954         }
   10955 
   10956         /* ------- call coeffs scan function ------- */
   10957         *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
   10958             pi2_quant_coeffs,
   10959             pi4_subBlock2csbfId_map,
   10960             i4_scan_idx,
   10961             trans_size,
   10962             pu1_ecd_data,
   10963             pu1_csbf_buf,
   10964             csbf_strd);
   10965     }
   10966 
   10967     /*  Normalize Cost. Note : trans_idx, not (trans_idx-1) */
   10968     pi8_cost[0] >>= ga_trans_shift[trans_idx];
   10969 
   10970 #if RDOPT_ZERO_CBF_ENABLE
   10971     if((0 != cbf))
   10972     {
   10973         WORD32 tu_bits;
   10974         LWORD64 zero_cbf_cost_u, curr_cb_cod_cost;
   10975 
   10976         zero_cbf_cost_u = 0;
   10977 
   10978         /*Populating the feilds of rdoq_ctxt structure*/
   10979         if(i4_perform_rdoq)
   10980         {
   10981             //memset(ps_rdoq_sbh_ctxt,0,sizeof(rdoq_sbh_ctxt_t));
   10982             /* transform size to log2transform size */
   10983             GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size);
   10984             ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1;
   10985 
   10986             ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_chroma_qf;
   10987             ps_rdoq_sbh_ctxt->i4_is_luma = 0;
   10988             ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx];
   10989             ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td =
   10990                 (1 << (ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td - 1));
   10991             ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0;
   10992             ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col;
   10993             ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row;
   10994         }
   10995         else if(i4_perform_zcbf)
   10996         {
   10997             /* cost of zero cbf encoding */
   10998             zero_cbf_cost_u =
   10999 
   11000                 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
   11001                     pu1_pred + (e_chroma_plane == V_PLANE),
   11002                     pu1_src + (e_chroma_plane == V_PLANE),
   11003                     pred_strd,
   11004                     src_strd,
   11005                     trans_size,
   11006                     trans_size);
   11007         }
   11008 
   11009         /************************************************************************/
   11010         /* call the entropy rdo encode to get the bit estimate for current tu   */
   11011         /* note that tu includes only residual coding bits and does not include */
   11012         /* tu split, cbf and qp delta encoding bits for a TU                    */
   11013         /************************************************************************/
   11014         if(i4_perform_rdoq)
   11015         {
   11016             tu_bits = ihevce_entropy_rdo_encode_tu_rdoq(
   11017                 &ps_ctxt->s_rdopt_entropy_ctxt,
   11018                 pu1_ecd_data,
   11019                 trans_size,
   11020                 0,
   11021                 ps_rdoq_sbh_ctxt,
   11022                 pi8_cost,
   11023                 &zero_cbf_cost_u,
   11024                 0);
   11025             //Currently, we are not accounting for sign bit in RDOPT bits calculation when RDOQ is turned on
   11026 
   11027             if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0)
   11028             {
   11029                 cbf = 0;
   11030 
   11031                 /* num bytes is set to 0 */
   11032                 *pi4_coeff_off = 0;
   11033             }
   11034 
   11035             (*pi4_tu_bits) += tu_bits;
   11036 
   11037             if((i4_perform_sbh) && (0 != cbf))
   11038             {
   11039                 ps_rdoq_sbh_ctxt->i8_ssd_cost = pi8_cost[0];
   11040 
   11041                 ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
   11042 
   11043                 pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
   11044             }
   11045 
   11046             /*Add round value before normalizing*/
   11047             pi8_cost[0] += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td;
   11048             pi8_cost[0] >>= ga_trans_shift[trans_idx];
   11049 
   11050             if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1)
   11051             {
   11052                 *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
   11053                     pi2_quant_coeffs,
   11054                     pi4_subBlock2csbfId_map,
   11055                     i4_scan_idx,
   11056                     trans_size,
   11057                     pu1_ecd_data,
   11058                     ps_rdoq_sbh_ctxt->pu1_csbf_buf,
   11059                     csbf_strd);
   11060             }
   11061         }
   11062         else
   11063         {
   11064             /************************************************************************/
   11065             /* call the entropy rdo encode to get the bit estimate for current tu   */
   11066             /* note that tu includes only residual coding bits and does not include */
   11067             /* tu split, cbf and qp delta encoding bits for a TU                    */
   11068             /************************************************************************/
   11069             tu_bits = ihevce_entropy_rdo_encode_tu(
   11070                 &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 0, i4_perform_sbh);
   11071 
   11072             (*pi4_tu_bits) += tu_bits;
   11073         }
   11074 
   11075         if(e_ssd_type == SPATIAL_DOMAIN_SSD)
   11076         {
   11077             pi8_cost[0] = ihevce_it_recon_ssd(
   11078                 ps_ctxt,
   11079                 pu1_src,
   11080                 src_strd,
   11081                 pu1_pred,
   11082                 pred_strd,
   11083                 pi2_deq_data,
   11084                 deq_data_strd,
   11085                 pu1_recon,
   11086                 i4_recon_stride,
   11087                 pu1_ecd_data,
   11088                 trans_size,
   11089                 PRED_MODE_INTRA,
   11090                 cbf,
   11091                 pi4_zero_col[0],
   11092                 pi4_zero_row[0],
   11093                 e_chroma_plane);
   11094 
   11095             pu1_is_recon_available[0] = 1;
   11096         }
   11097 
   11098 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   11099         if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
   11100         {
   11101             pi8_cost[0] = ihevce_inject_stim_into_distortion(
   11102                 pu1_src,
   11103                 src_strd,
   11104                 pu1_recon,
   11105                 i4_recon_stride,
   11106                 pi8_cost[0],
   11107                 i4_alpha_stim_multiplier,
   11108                 trans_size,
   11109                 0,
   11110                 ps_ctxt->u1_enable_psyRDOPT,
   11111                 e_chroma_plane);
   11112         }
   11113         else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
   11114         {
   11115             pi8_cost[0] = ihevce_inject_stim_into_distortion(
   11116                 pu1_src,
   11117                 src_strd,
   11118                 pu1_pred,
   11119                 pred_strd,
   11120                 pi8_cost[0],
   11121                 i4_alpha_stim_multiplier,
   11122                 trans_size,
   11123                 0,
   11124                 ps_ctxt->u1_enable_psyRDOPT,
   11125                 e_chroma_plane);
   11126         }
   11127 #endif
   11128 
   11129         curr_cb_cod_cost = pi8_cost[0];
   11130 
   11131         /* add the SSD cost to bits estimate given by ECD */
   11132         curr_cb_cod_cost +=
   11133             COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
   11134 
   11135         if(i4_perform_zcbf)
   11136         {
   11137 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   11138             if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
   11139             {
   11140                 zero_cbf_cost_u = ihevce_inject_stim_into_distortion(
   11141                     pu1_src,
   11142                     src_strd,
   11143                     pu1_pred,
   11144                     pred_strd,
   11145                     zero_cbf_cost_u,
   11146                     !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
   11147                                            : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
   11148                                               (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
   11149                                                  100.0,
   11150                     trans_size,
   11151                     0,
   11152                     ps_ctxt->u1_enable_psyRDOPT,
   11153                     e_chroma_plane);
   11154             }
   11155 #endif
   11156             /* force the tu as zero cbf if zero_cbf_cost is lower */
   11157             if(zero_cbf_cost_u < curr_cb_cod_cost)
   11158             {
   11159                 *pi4_coeff_off = 0;
   11160                 cbf = 0;
   11161                 (*pi4_tu_bits) = 0;
   11162                 pi8_cost[0] = zero_cbf_cost_u;
   11163 
   11164                 pu1_is_recon_available[0] = 0;
   11165 
   11166                 if(e_ssd_type == SPATIAL_DOMAIN_SSD)
   11167                 {
   11168                     ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
   11169                         pu1_pred,
   11170                         pred_strd,
   11171                         pu1_recon,
   11172                         i4_recon_stride,
   11173                         trans_size,
   11174                         trans_size,
   11175                         e_chroma_plane);
   11176 
   11177                     pu1_is_recon_available[0] = 1;
   11178                 }
   11179             }
   11180 
   11181 #if ENABLE_INTER_ZCU_COST
   11182             if(!intra_flag)
   11183             {
   11184 #if !WEIGH_CHROMA_COST
   11185                 ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost_u;
   11186 #else
   11187                 ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
   11188                     (zero_cbf_cost_u * ps_ctxt->u4_chroma_cost_weighing_factor +
   11189                      (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
   11190                     CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
   11191 #endif
   11192             }
   11193 #endif
   11194         }
   11195     }
   11196     else
   11197     {
   11198         if(e_ssd_type == SPATIAL_DOMAIN_SSD)
   11199         {
   11200             pi8_cost[0] = ihevce_it_recon_ssd(
   11201                 ps_ctxt,
   11202                 pu1_src,
   11203                 src_strd,
   11204                 pu1_pred,
   11205                 pred_strd,
   11206                 pi2_deq_data,
   11207                 deq_data_strd,
   11208                 pu1_recon,
   11209                 i4_recon_stride,
   11210                 pu1_ecd_data,
   11211                 trans_size,
   11212                 PRED_MODE_INTRA,
   11213                 cbf,
   11214                 pi4_zero_col[0],
   11215                 pi4_zero_row[0],
   11216                 e_chroma_plane);
   11217 
   11218             pu1_is_recon_available[0] = 1;
   11219         }
   11220 
   11221 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   11222         if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
   11223         {
   11224             pi8_cost[0] = ihevce_inject_stim_into_distortion(
   11225                 pu1_src,
   11226                 src_strd,
   11227                 pu1_recon,
   11228                 i4_recon_stride,
   11229                 pi8_cost[0],
   11230                 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
   11231                                        : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
   11232                                           (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
   11233                                              100.0,
   11234                 trans_size,
   11235                 0,
   11236                 ps_ctxt->u1_enable_psyRDOPT,
   11237                 e_chroma_plane);
   11238         }
   11239         else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
   11240         {
   11241             pi8_cost[0] = ihevce_inject_stim_into_distortion(
   11242                 pu1_src,
   11243                 src_strd,
   11244                 pu1_pred,
   11245                 pred_strd,
   11246                 pi8_cost[0],
   11247                 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
   11248                                        : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
   11249                                           (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
   11250                                              100.0,
   11251                 trans_size,
   11252                 0,
   11253                 ps_ctxt->u1_enable_psyRDOPT,
   11254                 e_chroma_plane);
   11255         }
   11256 #endif
   11257 
   11258 #if ENABLE_INTER_ZCU_COST
   11259         if(!intra_flag)
   11260         {
   11261 #if !WEIGH_CHROMA_COST
   11262             /* cbf = 0, accumulate cu not coded cost */
   11263             ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
   11264 #else
   11265             /* cbf = 0, accumulate cu not coded cost */
   11266 
   11267             ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
   11268                 (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor +
   11269                  (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
   11270                 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
   11271 #endif
   11272         }
   11273 #endif
   11274     }
   11275 #endif /* RDOPT_ZERO_CBF_ENABLE */
   11276 
   11277     return (cbf);
   11278 }
   11279