Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 
     21 /**
     22 ******************************************************************************
     23 * \file ihevce_enc_cu_recursion.c
     24 *
     25 * \brief
     26 *    This file contains Encoder normative loop pass related functions
     27 *
     28 * \date
     29 *    18/09/2012
     30 *
     31 * \author
     32 *    Ittiam
     33 *
     34 *
     35 * List of Functions
     36 *
     37 *
     38 ******************************************************************************
     39 */
     40 
     41 /*****************************************************************************/
     42 /* File Includes                                                             */
     43 /*****************************************************************************/
     44 /* System include files */
     45 #include <stdio.h>
     46 #include <string.h>
     47 #include <stdlib.h>
     48 #include <assert.h>
     49 #include <stdarg.h>
     50 #include <math.h>
     51 
     52 /* User include files */
     53 #include "ihevc_typedefs.h"
     54 #include "itt_video_api.h"
     55 #include "ihevce_api.h"
     56 
     57 #include "rc_cntrl_param.h"
     58 #include "rc_frame_info_collector.h"
     59 #include "rc_look_ahead_params.h"
     60 
     61 #include "ihevc_defs.h"
     62 #include "ihevc_macros.h"
     63 #include "ihevc_debug.h"
     64 #include "ihevc_structs.h"
     65 #include "ihevc_platform_macros.h"
     66 #include "ihevc_deblk.h"
     67 #include "ihevc_itrans_recon.h"
     68 #include "ihevc_chroma_itrans_recon.h"
     69 #include "ihevc_chroma_intra_pred.h"
     70 #include "ihevc_intra_pred.h"
     71 #include "ihevc_inter_pred.h"
     72 #include "ihevc_mem_fns.h"
     73 #include "ihevc_padding.h"
     74 #include "ihevc_weighted_pred.h"
     75 #include "ihevc_sao.h"
     76 #include "ihevc_resi_trans.h"
     77 #include "ihevc_quant_iquant_ssd.h"
     78 #include "ihevc_cabac_tables.h"
     79 
     80 #include "ihevce_defs.h"
     81 #include "ihevce_hle_interface.h"
     82 #include "ihevce_lap_enc_structs.h"
     83 #include "ihevce_multi_thrd_structs.h"
     84 #include "ihevce_multi_thrd_funcs.h"
     85 #include "ihevce_me_common_defs.h"
     86 #include "ihevce_had_satd.h"
     87 #include "ihevce_error_codes.h"
     88 #include "ihevce_bitstream.h"
     89 #include "ihevce_cabac.h"
     90 #include "ihevce_rdoq_macros.h"
     91 #include "ihevce_function_selector.h"
     92 #include "ihevce_enc_structs.h"
     93 #include "ihevce_entropy_structs.h"
     94 #include "ihevce_cmn_utils_instr_set_router.h"
     95 #include "ihevce_ipe_instr_set_router.h"
     96 #include "ihevce_decomp_pre_intra_structs.h"
     97 #include "ihevce_decomp_pre_intra_pass.h"
     98 #include "ihevce_enc_loop_structs.h"
     99 #include "ihevce_global_tables.h"
    100 #include "ihevce_nbr_avail.h"
    101 #include "ihevce_enc_loop_utils.h"
    102 #include "ihevce_bs_compute_ctb.h"
    103 #include "ihevce_cabac_rdo.h"
    104 #include "ihevce_dep_mngr_interface.h"
    105 #include "ihevce_enc_loop_pass.h"
    106 #include "ihevce_rc_enc_structs.h"
    107 #include "ihevce_enc_cu_recursion.h"
    108 #include "ihevce_stasino_helpers.h"
    109 
    110 #include "cast_types.h"
    111 #include "osal.h"
    112 #include "osal_defaults.h"
    113 
    114 /*****************************************************************************/
    115 /* Macros                                                                    */
    116 /*****************************************************************************/
    117 #define NUM_CTB_QUANT_ROUNDING 6
    118 
    119 /*****************************************************************************/
    120 /* Function Definitions                                                      */
    121 /*****************************************************************************/
    122 
    123 /**
    124 *********************************************************************************
    125 * Function name : ihevce_store_cu_final
    126 *
    127 * \brief
    128 *    This function store cu info to the enc loop cu context
    129 *
    130 * \param[in] ps_ctxt : pointer to enc loop context structure
    131 * \param[in] ps_cu_final  : pointer to enc loop output CU structure
    132 * \param[in] pu1_ecd_data : ecd data pointer
    133 * \param[in] ps_enc_out_ctxt : pointer to CU information structure
    134 * \param[in] ps_cu_prms : pointer to  cu level parameters for SATD / RDOPT
    135 *
    136 * \return
    137 *    None
    138 *
    139 **********************************************************************************/
    140 void ihevce_store_cu_final(
    141     ihevce_enc_loop_ctxt_t *ps_ctxt,
    142     cu_enc_loop_out_t *ps_cu_final,
    143     UWORD8 *pu1_ecd_data,
    144     ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
    145     enc_loop_cu_prms_t *ps_cu_prms)
    146 {
    147     enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
    148     WORD32 i4_8x8_blks_in_cu;
    149     WORD32 i4_br_id, i4_enc_frm_id;
    150 
    151     WORD32 u4_tex_bits, u4_hdr_bits;
    152     WORD32 i4_qscale, i4_qscale_ctb;
    153     ps_enc_loop_bestprms = ps_enc_out_ctxt->ps_cu_prms;
    154     i4_qscale = ((ps_ctxt->ps_rc_quant_ctxt->pi4_qp_to_qscale
    155                       [ps_enc_out_ctxt->i1_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]));
    156     i4_qscale_ctb = ((
    157         ps_ctxt->ps_rc_quant_ctxt
    158             ->pi4_qp_to_qscale[ps_ctxt->i4_frame_mod_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]));
    159 
    160     /* All texture bits accumulated */
    161     u4_tex_bits = ps_enc_loop_bestprms->u4_cu_luma_res_bits +
    162                   ps_enc_loop_bestprms->u4_cu_chroma_res_bits +
    163                   ps_enc_loop_bestprms->u4_cu_cbf_bits;
    164 
    165     u4_hdr_bits = ps_enc_loop_bestprms->u4_cu_hdr_bits;
    166 
    167     i4_br_id = ps_ctxt->i4_bitrate_instance_num;
    168     i4_enc_frm_id = ps_ctxt->i4_enc_frm_id;
    169 
    170     i4_8x8_blks_in_cu = ((ps_enc_out_ctxt->u1_cu_size >> 3) * (ps_enc_out_ctxt->u1_cu_size >> 3));
    171 
    172     ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_open_loop_ssd +=
    173         ps_enc_loop_bestprms
    174             ->i8_cu_ssd;  // + (((float)(ps_ctxt->i8_cl_ssd_lambda_qf/ (1<< LAMBDA_Q_SHIFT))) * ps_enc_loop_bestprms->u4_cu_hdr_bits);
    175 
    176     ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_open_loop_intra_sad +=
    177         (UWORD32)(
    178             ps_enc_loop_bestprms->u4_cu_open_intra_sad +
    179             (((float)(ps_ctxt->i4_sad_lamda) / (1 << LAMBDA_Q_SHIFT)) *
    180              ps_enc_loop_bestprms->u4_cu_hdr_bits));
    181 
    182     if(1 == ps_enc_loop_bestprms->u1_intra_flag)
    183     {
    184         ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_intra_sad_acc +=
    185             ps_enc_loop_bestprms->u4_cu_sad;
    186         ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_intra_cost_acc +=
    187             ps_enc_loop_bestprms->i8_best_rdopt_cost;
    188     }
    189     else
    190     {
    191         ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_inter_sad_acc +=
    192             ps_enc_loop_bestprms->u4_cu_sad;
    193         ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_inter_cost_acc +=
    194             ps_enc_loop_bestprms->i8_best_rdopt_cost;
    195     }
    196     /*accumulating the frame level stats across frame*/
    197     ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_sad_acc +=
    198         ps_enc_loop_bestprms->u4_cu_sad;
    199 
    200     ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_cost_acc +=
    201         ps_enc_loop_bestprms->i8_best_rdopt_cost;
    202 
    203     ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_bits +=
    204         (u4_tex_bits + u4_hdr_bits);
    205 
    206     /*Total bits and header bits accumalted here for CTB*/
    207     ps_ctxt->u4_total_cu_bits += (u4_tex_bits + u4_hdr_bits);
    208     ps_ctxt->u4_total_cu_bits_mul_qs +=
    209         ((ULWORD64)((u4_tex_bits + u4_hdr_bits) * (i4_qscale_ctb)) + (1 << (QSCALE_Q_FAC_3 - 1))) >>
    210         QSCALE_Q_FAC_3;
    211     ps_ctxt->u4_total_cu_hdr_bits += u4_hdr_bits;
    212     ps_ctxt->u4_cu_tot_bits_into_qscale +=
    213         ((ULWORD64)((u4_tex_bits + u4_hdr_bits) * (i4_qscale)) + (1 << (QSCALE_Q_FAC_3 - 1))) >>
    214         QSCALE_Q_FAC_3;
    215     ps_ctxt->u4_cu_tot_bits += (u4_tex_bits + u4_hdr_bits);
    216 
    217     ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_header_bits +=
    218         u4_hdr_bits;
    219 
    220     ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
    221         ->i8_sad_by_qscale[ps_enc_loop_bestprms->u1_intra_flag] +=
    222         ((((LWORD64)ps_enc_loop_bestprms->u4_cu_sad) << SAD_BY_QSCALE_Q) / i4_qscale);
    223 
    224     ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
    225         ->i4_qp_normalized_8x8_cu_sum[ps_enc_loop_bestprms->u1_intra_flag] +=
    226         (i4_8x8_blks_in_cu * i4_qscale);
    227 
    228     ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
    229         ->i4_8x8_cu_sum[ps_enc_loop_bestprms->u1_intra_flag] += i4_8x8_blks_in_cu;
    230 
    231     /* PCM not supported */
    232     ps_cu_final->b1_pcm_flag = 0;
    233     ps_cu_final->b1_pred_mode_flag = ps_enc_loop_bestprms->u1_intra_flag;
    234 
    235     ps_cu_final->b1_skip_flag = ps_enc_loop_bestprms->u1_skip_flag;
    236     ps_cu_final->b1_tq_bypass_flag = 0;
    237     ps_cu_final->b3_part_mode = ps_enc_loop_bestprms->u1_part_mode;
    238 
    239     ps_cu_final->pv_coeff = pu1_ecd_data;
    240 
    241     ps_cu_final->i1_cu_qp = ps_enc_out_ctxt->i1_cu_qp;
    242     if(ps_enc_loop_bestprms->u1_is_cu_coded)
    243     {
    244         ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_enc_out_ctxt->i1_cu_qp;
    245     }
    246     else
    247     {
    248         ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_ctxt->i4_pred_qp;
    249     }
    250     ps_cu_final->b1_first_cu_in_qg = ps_enc_out_ctxt->b1_first_cu_in_qg;
    251 
    252     /* Update the no residue flag. Needed for inter cu. */
    253     /* Needed for deblocking inter/intra both           */
    254     //if(ps_cu_final->b1_pred_mode_flag == PRED_MODE_INTER)
    255     {
    256         ps_cu_final->b1_no_residual_syntax_flag = !ps_enc_loop_bestprms->u1_is_cu_coded;
    257     }
    258 
    259     /* store the number of TUs */
    260     ps_cu_final->u2_num_tus_in_cu = ps_enc_loop_bestprms->u2_num_tus_in_cu;
    261 
    262     /* ---- copy the TUs to final structure ----- */
    263     memcpy(
    264         ps_cu_final->ps_enc_tu,
    265         &ps_enc_loop_bestprms->as_tu_enc_loop[0],
    266         ps_enc_loop_bestprms->u2_num_tus_in_cu * sizeof(tu_enc_loop_out_t));
    267 
    268     /* ---- copy the PUs to final structure ----- */
    269     memcpy(
    270         ps_cu_final->ps_pu,
    271         &ps_enc_loop_bestprms->as_pu_enc_loop[0],
    272         ps_enc_loop_bestprms->u2_num_pus_in_cu * sizeof(pu_t));
    273 
    274     /* --- copy reminder and prev_flags ----- */
    275     /* only required for intra */
    276     if(PRED_MODE_INTRA == ps_cu_final->b1_pred_mode_flag)
    277     {
    278         memcpy(
    279             &ps_cu_final->as_prev_rem[0],
    280             &ps_enc_loop_bestprms->as_intra_prev_rem[0],
    281             ps_enc_loop_bestprms->u2_num_tus_in_cu * sizeof(intra_prev_rem_flags_t));
    282 
    283         ps_cu_final->b3_chroma_intra_pred_mode = ps_enc_loop_bestprms->u1_chroma_intra_pred_mode;
    284     }
    285 
    286     /* --------------------------------------------------- */
    287     /* ---- Boundary Strength Calculation at CU level ---- */
    288     /* --------------------------------------------------- */
    289     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
    290     {
    291         WORD32 num_4x4_in_ctb;
    292         nbr_4x4_t *ps_left_nbr_4x4;
    293         nbr_4x4_t *ps_top_nbr_4x4;
    294         nbr_4x4_t *ps_curr_nbr_4x4;
    295         WORD32 nbr_4x4_left_strd;
    296 
    297         num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
    298 
    299         ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
    300         ps_curr_nbr_4x4 += (ps_enc_out_ctxt->b3_cu_pos_x << 1);
    301         ps_curr_nbr_4x4 += ((ps_enc_out_ctxt->b3_cu_pos_y << 1) * num_4x4_in_ctb);
    302 
    303         /* CU left */
    304         if(0 == ps_enc_out_ctxt->b3_cu_pos_x)
    305         {
    306             ps_left_nbr_4x4 = &ps_ctxt->as_left_col_nbr[0];
    307             ps_left_nbr_4x4 += ps_enc_out_ctxt->b3_cu_pos_y << 1;
    308             nbr_4x4_left_strd = 1;
    309         }
    310         else
    311         {
    312             /* inside CTB */
    313             ps_left_nbr_4x4 = ps_curr_nbr_4x4 - 1;
    314             nbr_4x4_left_strd = num_4x4_in_ctb;
    315         }
    316 
    317         /* CU top */
    318         if(0 == ps_enc_out_ctxt->b3_cu_pos_y)
    319         {
    320             /* CTB boundary */
    321             ps_top_nbr_4x4 = ps_ctxt->ps_top_row_nbr;
    322             ps_top_nbr_4x4 += (ps_cu_prms->i4_ctb_pos * (ps_cu_prms->i4_ctb_size >> 2));
    323             ps_top_nbr_4x4 += (ps_enc_out_ctxt->b3_cu_pos_x << 1);
    324         }
    325         else
    326         {
    327             /* inside CTB */
    328             ps_top_nbr_4x4 = ps_curr_nbr_4x4 - num_4x4_in_ctb;
    329         }
    330 
    331         ihevce_bs_compute_cu(
    332             ps_cu_final,
    333             ps_top_nbr_4x4,
    334             ps_left_nbr_4x4,
    335             ps_curr_nbr_4x4,
    336             nbr_4x4_left_strd,
    337             num_4x4_in_ctb,
    338             &ps_ctxt->s_deblk_bs_prms);
    339     }
    340 }
    341 
    342 /**
    343 *********************************************************************************
    344 * Function name : ihevce_store_cu_results
    345 *
    346 * \brief
    347 *    This function store cu result to cu info context
    348 *
    349 * \param[in] ps_ctxt : pointer to enc loop context structure
    350 * \param[out] ps_cu_prms : pointer to  cu level parameters for SATD / RDOPT
    351 *
    352 * \return
    353 *    None
    354 *
    355 **********************************************************************************/
    356 void ihevce_store_cu_results(
    357     ihevce_enc_loop_ctxt_t *ps_ctxt,
    358     enc_loop_cu_prms_t *ps_cu_prms,
    359     final_mode_state_t *ps_final_state)
    360 {
    361     ihevce_enc_cu_node_ctxt_t *ps_enc_tmp_out_ctxt;
    362     nbr_4x4_t *ps_nbr_4x4, *ps_tmp_nbr_4x4, *ps_curr_nbr_4x4;
    363 
    364     UWORD8 *pu1_recon, *pu1_final_recon;
    365     WORD32 num_4x4_in_ctb, ctr;
    366     WORD32 num_4x4_in_cu;
    367     UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
    368     WORD32 cu_depth, log2_ctb_size, log2_cu_size;
    369 
    370     ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt;
    371     (void)ps_final_state;
    372 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
    373     {
    374         /* ---- copy the child luma recon back to curr. recon -------- */
    375         pu1_recon = (UWORD8 *)ps_ctxt->pv_cu_luma_recon;
    376 
    377         /* based on CU position derive the luma pointers */
    378         pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 3);
    379 
    380         pu1_final_recon +=
    381             ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
    382 
    383         ps_ctxt->s_cmn_opt_func.pf_copy_2d(
    384             pu1_final_recon,
    385             ps_cu_prms->i4_luma_recon_stride,
    386             pu1_recon,
    387             ps_enc_tmp_out_ctxt->u1_cu_size,
    388             ps_enc_tmp_out_ctxt->u1_cu_size,
    389             ps_enc_tmp_out_ctxt->u1_cu_size);
    390 
    391         /* ---- copy the child chroma recon back to curr. recon -------- */
    392         pu1_recon = (UWORD8 *)ps_ctxt->pv_cu_chrma_recon;
    393 
    394         /* based on CU position derive the chroma pointers */
    395         pu1_final_recon = ps_cu_prms->pu1_chrm_recon + (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 3);
    396 
    397         pu1_final_recon +=
    398             ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << (u1_is_422 + 2)) *
    399              ps_cu_prms->i4_chrm_recon_stride);
    400 
    401         /* Cb and Cr pixel interleaved */
    402         ps_ctxt->s_cmn_opt_func.pf_copy_2d(
    403             pu1_final_recon,
    404             ps_cu_prms->i4_chrm_recon_stride,
    405             pu1_recon,
    406             ps_enc_tmp_out_ctxt->u1_cu_size,
    407             ps_enc_tmp_out_ctxt->u1_cu_size,
    408             (ps_enc_tmp_out_ctxt->u1_cu_size >> (0 == u1_is_422)));
    409     }
    410 #else
    411     if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
    412     {
    413         /* ---- copy the child luma recon back to curr. recon -------- */
    414         pu1_recon = (UWORD8 *)ps_ctxt->pv_cu_luma_recon;
    415 
    416         /* based on CU position derive the luma pointers */
    417         pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 3);
    418 
    419         pu1_final_recon +=
    420             ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
    421 
    422         ps_ctxt->s_cmn_opt_func.pf_copy_2d(
    423             pu1_final_recon,
    424             ps_cu_prms->i4_luma_recon_stride,
    425             pu1_recon,
    426             ps_enc_tmp_out_ctxt->u1_cu_size,
    427             ps_enc_tmp_out_ctxt->u1_cu_size,
    428             ps_enc_tmp_out_ctxt->u1_cu_size);
    429 
    430         /* ---- copy the child chroma recon back to curr. recon -------- */
    431         pu1_recon = (UWORD8 *)ps_ctxt->pv_cu_chrma_recon;
    432 
    433         /* based on CU position derive the chroma pointers */
    434         pu1_final_recon = ps_cu_prms->pu1_chrm_recon + (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 3);
    435 
    436         pu1_final_recon +=
    437             ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << (u1_is_422 + 2)) *
    438              ps_cu_prms->i4_chrm_recon_stride);
    439 
    440         ps_ctxt->s_cmn_opt_func.pf_copy_2d(
    441             pu1_final_recon,
    442             ps_cu_prms->i4_chrm_recon_stride,
    443             pu1_recon,
    444             ps_enc_tmp_out_ctxt->u1_cu_size,
    445             ps_enc_tmp_out_ctxt->u1_cu_size,
    446             (ps_enc_tmp_out_ctxt->u1_cu_size >> (0 == u1_is_422)));
    447     }
    448 #endif
    449     /*copy qp for qg*/
    450     {
    451         WORD32 i4_num_8x8, i4_x, i4_y;
    452         WORD32 i4_cu_pos_x, i4_cu_pox_y;
    453         i4_num_8x8 = ps_enc_tmp_out_ctxt->u1_cu_size >> 3;
    454         i4_cu_pos_x = ps_enc_tmp_out_ctxt->b3_cu_pos_x;
    455         i4_cu_pox_y = ps_enc_tmp_out_ctxt->b3_cu_pos_y;
    456         for(i4_y = 0; i4_y < i4_num_8x8; i4_y++)
    457         {
    458             for(i4_x = 0; i4_x < i4_num_8x8; i4_x++)
    459             {
    460                 if(ps_enc_tmp_out_ctxt->ps_cu_prms->u1_is_cu_coded)
    461                 {
    462                     ps_ctxt->ai4_qp_qg[((i4_cu_pox_y + i4_y) * 8) + (i4_cu_pos_x + i4_x)] =
    463                         ps_ctxt->i4_cu_qp;
    464                 }
    465                 else
    466                 {
    467                     ps_ctxt->ai4_qp_qg[((i4_cu_pox_y + i4_y) * 8) + (i4_cu_pos_x + i4_x)] =
    468                         ps_ctxt->i4_pred_qp;
    469                 }
    470             }
    471         }
    472     }
    473 
    474     /* ------ copy the nbr 4x4 to final output ------ */
    475     num_4x4_in_cu = ps_enc_tmp_out_ctxt->u1_cu_size >> 2;
    476     num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
    477 
    478     ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
    479     ps_curr_nbr_4x4 += (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 1);
    480     ps_curr_nbr_4x4 += ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << 1) * num_4x4_in_ctb);
    481     ps_tmp_nbr_4x4 = ps_curr_nbr_4x4;
    482 
    483     ps_nbr_4x4 = ps_ctxt->ps_cu_recur_nbr;
    484 
    485     GETRANGE(log2_ctb_size, ps_cu_prms->i4_ctb_size);
    486     GETRANGE(log2_cu_size, ps_enc_tmp_out_ctxt->u1_cu_size);
    487     cu_depth = log2_ctb_size - log2_cu_size;
    488 
    489     ASSERT(cu_depth <= 3);
    490     ASSERT(cu_depth >= 0);
    491 
    492     /*assign qp for all 4x4 nbr blocks*/
    493     for(ctr = 0; ctr < num_4x4_in_cu * num_4x4_in_cu; ctr++, ps_nbr_4x4++)
    494     {
    495         ps_nbr_4x4->b1_skip_flag = ps_enc_tmp_out_ctxt->s_cu_prms.u1_skip_flag;
    496         ps_nbr_4x4->b2_cu_depth = cu_depth;
    497         ps_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
    498     }
    499 
    500     ps_nbr_4x4 = ps_ctxt->ps_cu_recur_nbr;
    501 
    502     for(ctr = 0; ctr < num_4x4_in_cu; ctr++)
    503     {
    504         memcpy(ps_tmp_nbr_4x4, ps_nbr_4x4, num_4x4_in_cu * sizeof(nbr_4x4_t));
    505 
    506         ps_tmp_nbr_4x4 += num_4x4_in_ctb;
    507         ps_nbr_4x4 += num_4x4_in_cu;
    508     }
    509 }
    510 
    511 /**
    512 *********************************************************************************
    513 * Function name : ihevce_populate_cu_struct
    514 *
    515 * \brief
    516 *    This function populate cu struct
    517 *
    518 * \param[in] ps_ctxt : pointer to enc loop context structure
    519 * \param[in] ps_cur_ipe_ctb : pointer to  IPE L0 analyze structure
    520 * \param[in] ps_cu_tree_analyse : pointer to  Structure for CU recursion
    521 * \param[in] ps_best_results : pointer to  strcuture  contain result for partition type of CU
    522 * \param[in] ps_cu_out : pointer to  structre contain  mode analysis info
    523 * \param[in] i4_32x32_id : noise estimation id
    524 * \param[in] u1_num_best_results : num best result value
    525 *
    526 * \return
    527 *    None
    528 *
    529 **********************************************************************************/
    530 void ihevce_populate_cu_struct(
    531     ihevce_enc_loop_ctxt_t *ps_ctxt,
    532     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
    533     cur_ctb_cu_tree_t *ps_cu_tree_analyse,
    534     part_type_results_t *ps_best_results,
    535     cu_analyse_t *ps_cu_out,
    536     WORD32 i4_32x32_id,
    537 #if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
    538     UWORD8 u1_is_cu_noisy,
    539 #endif
    540     UWORD8 u1_num_best_results)
    541 {
    542     cu_inter_cand_t *ps_cu_candt;
    543 
    544     WORD32 j;
    545     /* open loop intra cost by IPE */
    546     WORD32 intra_cost_ol;
    547     /* closed loop intra cost based on empirical coding noise estimate */
    548     WORD32 intra_cost_cl_est = 0;
    549     /* closed loop intra coding noise estimate */
    550     WORD32 intra_noise_cl_est;
    551     WORD32 num_results_to_copy = 0;
    552 
    553     WORD32 found_intra = 0;
    554     WORD32 quality_preset = ps_ctxt->i4_quality_preset;
    555     WORD32 frm_qp = ps_ctxt->i4_frame_qp;
    556     WORD32 frm_qstep_multiplier = gau4_frame_qstep_multiplier[frm_qp - 1];
    557     WORD32 frm_qstep = ps_ctxt->i4_frame_qstep;
    558     UWORD8 u1_cu_size = ps_cu_tree_analyse->u1_cu_size;
    559     UWORD8 u1_x_off = ps_cu_tree_analyse->b3_cu_pos_x << 3;
    560     UWORD8 u1_y_off = ps_cu_tree_analyse->b3_cu_pos_y << 3;
    561     UWORD8 u1_threshold_multi;
    562     switch(quality_preset)
    563     {
    564     case IHEVCE_QUALITY_P0:
    565     case IHEVCE_QUALITY_P2:
    566     {
    567         num_results_to_copy =
    568             MIN(MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_PQ_AND_HQ, u1_num_best_results);
    569         break;
    570     }
    571     case IHEVCE_QUALITY_P3:
    572     {
    573         num_results_to_copy = MIN(MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_MS, u1_num_best_results);
    574         break;
    575     }
    576     case IHEVCE_QUALITY_P4:
    577     case IHEVCE_QUALITY_P5:
    578     case IHEVCE_QUALITY_P6:
    579     {
    580         num_results_to_copy =
    581             MIN(MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_HS_AND_XS, u1_num_best_results);
    582         break;
    583     }
    584     }
    585 
    586     ps_cu_out->u1_num_inter_cands = 0;
    587 
    588     /***************************************************************/
    589     /* Depending CU size that has won in ME,                       */
    590     /*     Estimate the closed loop intra cost for enabling intra  */
    591     /*     evaluation in rdopt stage based on preset               */
    592     /***************************************************************/
    593     switch(u1_cu_size)
    594     {
    595     case 64:
    596     {
    597         /* coding noise estimate for intra closed loop cost */
    598         intra_cost_ol = ps_cur_ipe_ctb->i4_best64x64_intra_cost - frm_qstep * 256;
    599 
    600         intra_noise_cl_est = (frm_qstep * frm_qstep_multiplier) + (intra_cost_ol >> 4);
    601 
    602         intra_noise_cl_est = MIN(intra_noise_cl_est, (frm_qstep * 16)) * 16;
    603 
    604         intra_cost_cl_est = intra_cost_ol + intra_noise_cl_est;
    605         break;
    606     }
    607     case 32:
    608     {
    609         /* coding noise estimate for intra closed loop cost */
    610         intra_cost_ol = ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id] - frm_qstep * 64;
    611 
    612         intra_noise_cl_est = (frm_qstep * frm_qstep_multiplier) + (intra_cost_ol >> 4);
    613 
    614         intra_noise_cl_est = MIN(intra_noise_cl_est, (frm_qstep * 16)) * 4;
    615 
    616         intra_cost_cl_est = intra_cost_ol + intra_noise_cl_est;
    617         break;
    618     }
    619     case 16:
    620     {
    621         /* coding noise estimate for intra closed loop cost */
    622         intra_cost_ol =
    623             ps_cur_ipe_ctb->ai4_best16x16_intra_cost[(u1_x_off >> 4) + ((u1_y_off >> 4) << 2)] -
    624             frm_qstep * 16;
    625 
    626         intra_noise_cl_est = (frm_qstep * frm_qstep_multiplier) + (intra_cost_ol >> 4);
    627 
    628         intra_noise_cl_est = MIN(intra_noise_cl_est, (frm_qstep * 16));
    629 
    630         intra_cost_cl_est = intra_cost_ol + intra_noise_cl_est;
    631         break;
    632     }
    633     case 8:
    634     {
    635         /* coding noise estimate for intra closed loop cost */
    636         intra_cost_ol =
    637             ps_cur_ipe_ctb->ai4_best8x8_intra_cost[(u1_x_off >> 3) + u1_y_off] - frm_qstep * 4;
    638 
    639         intra_noise_cl_est = (frm_qstep * frm_qstep_multiplier) + (intra_cost_ol >> 4);
    640 
    641         intra_noise_cl_est = MIN(intra_noise_cl_est, (frm_qstep * 16)) >> 2;
    642 
    643         intra_cost_cl_est = intra_cost_ol + intra_noise_cl_est;
    644         break;
    645     }
    646     }
    647 #if DISABLE_INTER_CANDIDATES
    648     return;
    649 #endif
    650 
    651     u1_threshold_multi = 1;
    652 #if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
    653     if(u1_is_cu_noisy)
    654     {
    655         intra_cost_cl_est = INT_MAX;
    656     }
    657 #endif
    658 
    659     ps_cu_candt = ps_cu_out->as_cu_inter_cand;
    660 
    661     /* Check if the first best candidate is inter or intra */
    662     if(ps_best_results[0].as_pu_results[0].pu.b1_intra_flag)
    663     {
    664         ps_cu_out->u1_best_is_intra = 1;
    665     }
    666     else
    667     {
    668         ps_cu_out->u1_best_is_intra = 0;
    669     }
    670 
    671     for(j = 0; j < u1_num_best_results; j++)
    672     {
    673         part_type_results_t *ps_best = &ps_best_results[j];
    674 
    675         if(ps_best->as_pu_results[0].pu.b1_intra_flag)
    676         {
    677             found_intra = 1;
    678         }
    679         else
    680         {
    681             /* populate the TU split flags, 4 flags copied as max cu can be 64 */
    682             memcpy(ps_cu_candt->ai4_tu_split_flag, ps_best->ai4_tu_split_flag, 4 * sizeof(WORD32));
    683 
    684             /* populate the TU early CBF flags, 4 flags copied as max cu can be 64 */
    685             memcpy(ps_cu_candt->ai4_tu_early_cbf, ps_best->ai4_tu_early_cbf, 4 * sizeof(WORD32));
    686 
    687             /* Note: the enums of part size and me part types shall match */
    688             ps_cu_candt->b3_part_size = ps_best->u1_part_type;
    689 
    690             /* ME will always set the skip flag to 0            */
    691             /* in closed loop skip will be added as a candidate */
    692             ps_cu_candt->b1_skip_flag = 0;
    693 
    694             /* copy the inter pus : Note: assuming NxN part type is not supported */
    695             ps_cu_candt->as_inter_pu[0] = ps_best->as_pu_results[0].pu;
    696 
    697             ps_cu_candt->as_inter_pu[0].b1_merge_flag = 0;
    698 
    699             /* Copy the total cost of the CU candt */
    700             ps_cu_candt->i4_total_cost = ps_best->i4_tot_cost;
    701 
    702             ps_cu_out->ai4_mv_cost[ps_cu_out->u1_num_inter_cands][0] =
    703                 ps_best->as_pu_results[0].i4_mv_cost;
    704 
    705 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
    706             ps_cu_out->ai4_err_metric[ps_cu_out->u1_num_inter_cands][0] =
    707                 ps_best->as_pu_results[0].i4_tot_cost - ps_best->as_pu_results[0].i4_mv_cost;
    708 #endif
    709 
    710             if(ps_best->u1_part_type)
    711             {
    712                 ps_cu_candt->as_inter_pu[1] = ps_best->as_pu_results[1].pu;
    713                 ps_cu_out->ai4_mv_cost[ps_cu_out->u1_num_inter_cands][1] =
    714                     ps_best->as_pu_results[1].i4_mv_cost;
    715 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
    716                 ps_cu_out->ai4_err_metric[ps_cu_out->u1_num_inter_cands][1] =
    717                     ps_best->as_pu_results[1].i4_tot_cost - ps_best->as_pu_results[1].i4_mv_cost;
    718 #endif
    719 
    720                 ps_cu_candt->as_inter_pu[1].b1_merge_flag = 0;
    721             }
    722 
    723             ps_cu_candt++;
    724             ps_cu_out->u1_num_inter_cands++;
    725             if(intra_cost_cl_est < ((ps_best->i4_tot_cost * u1_threshold_multi) >> 0))
    726             {
    727                 /* The rationale - */
    728                 /* Artefacts were being observed in some sequences, */
    729                 /* Brooklyn_1080p in particular - where it was readily */
    730                 /* apparent. The cause was coding of CU's as inter CU's */
    731                 /* when they actually needed to be coded as intra CU's. */
    732                 /* This was observed during either fade-outs aor flashes. */
    733                 /* After tinkering with the magnitude of the coding noise */
    734                 /* factor that was added to the intra cost to see when the */
    735                 /* artefacts in Brooklyn vanished, it was observed that the */
    736                 /* factor multiplied with the frame_qstep followed a pattern. */
    737                 /* When the pattern was subjected to a regression analysis, the */
    738                 /* formula seen below emerged. Also note the fact that the coding */
    739                 /* noise factor is the product of the frame_qstep and a constant */
    740                 /* multiplier */
    741 
    742                 /*UWORD32 frm_qstep_multiplier =
    743                 -3.346 * log((float)frm_qstep) + 15.925;*/
    744                 found_intra = 1;
    745             }
    746 
    747             if(ps_cu_out->u1_num_inter_cands >= num_results_to_copy)
    748             {
    749                 break;
    750             }
    751         }
    752     }
    753 
    754     if(quality_preset < IHEVCE_QUALITY_P4)
    755     {
    756         found_intra = 1;
    757     }
    758 
    759     if(!found_intra)
    760     {
    761         /* rdopt evaluation of intra disabled as inter is clear winner */
    762         ps_cu_out->u1_num_intra_rdopt_cands = 0;
    763 
    764         /* all the modes invalidated */
    765         ps_cu_out->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
    766         ps_cu_out->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
    767         ps_cu_out->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][0] = 255;
    768         ps_cu_out->u1_chroma_intra_pred_mode = 255;
    769 
    770         /* no intra candt to verify */
    771         ps_cu_out->s_cu_intra_cand.b6_num_intra_cands = 0;
    772     }
    773 }
    774 
    775 /**
    776 *********************************************************************************
    777 * Function name : ihevce_create_child_nodes_cu_tree
    778 *
    779 * \brief
    780 *    This function create child node from cu tree
    781 *
    782 * \param[in] ps_cu_tree_root : pointer to Structure for CU recursion
    783 * \param[out] ps_cu_tree_cur_node : pointer to  Structure for CU recursion
    784 * \param[in] ai4_child_node_enable : child node enable flag
    785 * \param[in] nodes_already_created : already created node value
    786 * \return
    787 *    None
    788 *
    789 **********************************************************************************/
    790 WORD32 ihevce_create_child_nodes_cu_tree(
    791     cur_ctb_cu_tree_t *ps_cu_tree_root,
    792     cur_ctb_cu_tree_t *ps_cu_tree_cur_node,
    793     WORD32 *ai4_child_node_enable,
    794     WORD32 nodes_already_created)
    795 {
    796     cur_ctb_cu_tree_t *ps_tl;
    797     cur_ctb_cu_tree_t *ps_tr;
    798     cur_ctb_cu_tree_t *ps_bl;
    799     cur_ctb_cu_tree_t *ps_br;
    800 
    801     ps_tl = ps_cu_tree_root + nodes_already_created;
    802     ps_tr = ps_tl + 1;
    803     ps_bl = ps_tr + 1;
    804     ps_br = ps_bl + 1;
    805 
    806     if(1 == ps_cu_tree_cur_node->is_node_valid)
    807     {
    808         ps_tl = (ai4_child_node_enable[0]) ? ps_tl : NULL;
    809         ps_tr = (ai4_child_node_enable[1]) ? ps_tr : NULL;
    810         ps_bl = (ai4_child_node_enable[2]) ? ps_bl : NULL;
    811         ps_br = (ai4_child_node_enable[3]) ? ps_br : NULL;
    812 
    813         /* In incomplete CTB, if any of the child nodes are assigned to NULL */
    814         /* then parent node ceases to be valid */
    815         if((ps_tl == NULL) || (ps_tr == NULL) || (ps_br == NULL) || (ps_bl == NULL))
    816         {
    817             ps_cu_tree_cur_node->is_node_valid = 0;
    818         }
    819     }
    820     ps_cu_tree_cur_node->ps_child_node_tl = ps_tl;
    821     ps_cu_tree_cur_node->ps_child_node_tr = ps_tr;
    822     ps_cu_tree_cur_node->ps_child_node_bl = ps_bl;
    823     ps_cu_tree_cur_node->ps_child_node_br = ps_br;
    824 
    825     return 4;
    826 }
    827 
    828 /**
    829 *********************************************************************************
    830 * Function name : ihevce_populate_cu_tree
    831 *
    832 * \brief
    833 *    This function create child node from cu tree
    834 *
    835 * \param[in] ps_cur_ipe_ctb : pointer to Structure for CU recursion
    836 * \param[out] ps_cu_tree : pointer to  Structure for CU recursion
    837 * \param[in] tree_depth : child node enable flag
    838 * \param[in] e_quality_preset : already created node value
    839 * \param[in] e_grandparent_blk_pos : already created node value
    840 * \param[in] e_parent_blk_pos : already created node value
    841 * \param[in] e_cur_blk_pos : already created node value
    842 *
    843 * \return
    844 *    None
    845 *
    846 **********************************************************************************/
    847 void ihevce_populate_cu_tree(
    848     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
    849     cur_ctb_cu_tree_t *ps_cu_tree,
    850     WORD32 tree_depth,
    851     IHEVCE_QUALITY_CONFIG_T e_quality_preset,
    852     CU_POS_T e_grandparent_blk_pos,
    853     CU_POS_T e_parent_blk_pos,
    854     CU_POS_T e_cur_blk_pos)
    855 {
    856     WORD32 ai4_child_enable[4];
    857     WORD32 children_nodes_required = 0;
    858     WORD32 cu_pos_x = 0;
    859     WORD32 cu_pos_y = 0;
    860     WORD32 cu_size = 0;
    861     WORD32 i;
    862     WORD32 node_validity = 0;
    863 
    864     if(NULL == ps_cu_tree)
    865     {
    866         return;
    867     }
    868 
    869     switch(tree_depth)
    870     {
    871     case 0:
    872     {
    873         /* 64x64 block */
    874         intra32_analyse_t *ps_intra32_analyse = ps_cur_ipe_ctb->as_intra32_analyse;
    875 
    876         children_nodes_required = 1;
    877         cu_size = 64;
    878         cu_pos_x = 0;
    879         cu_pos_y = 0;
    880 
    881         node_validity = !ps_cur_ipe_ctb->u1_split_flag;
    882 
    883         if(e_quality_preset >= IHEVCE_QUALITY_P2)
    884         {
    885             if(node_validity == 1)
    886             {
    887                 children_nodes_required = 0;
    888             }
    889         }
    890 
    891         for(i = 0; i < 4; i++)
    892         {
    893             ai4_child_enable[i] = ps_intra32_analyse[i].b1_valid_cu;
    894         }
    895 
    896         break;
    897     }
    898     case 1:
    899     {
    900         /* 32x32 block */
    901         WORD32 valid_flag_32 = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_valid_cu);
    902 
    903         intra16_analyse_t *ps_intra16_analyse =
    904             ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].as_intra16_analyse;
    905 
    906         cu_size = 32;
    907 
    908         /* Explanation for logic below - */
    909         /* * pos_x and pos_y are in units of 8x8 CU's */
    910         /* * pos_x = 0 for TL and BL children */
    911         /* * pos_x = 4 for TR and BR children */
    912         /* * pos_y = 0 for TL and TR children */
    913         /* * pos_y = 4 for BL and BR children */
    914         cu_pos_x = (e_cur_blk_pos & 1) << 2;
    915         cu_pos_y = (e_cur_blk_pos & 2) << 1;
    916 
    917         {
    918             node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag);
    919 
    920             if(e_quality_preset >= IHEVCE_QUALITY_P2)
    921             {
    922                 node_validity = (!ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_split_flag);
    923             }
    924 
    925             node_validity = node_validity && valid_flag_32;
    926             children_nodes_required = !node_validity || ps_cur_ipe_ctb->u1_split_flag;
    927         }
    928 
    929         if(e_quality_preset >= IHEVCE_QUALITY_P2)
    930         {
    931             if(node_validity == 1)
    932             {
    933                 children_nodes_required = 0;
    934             }
    935             else
    936             {
    937                 children_nodes_required =
    938                     (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_split_flag);
    939             }
    940         }
    941 
    942         for(i = 0; i < 4; i++)
    943         {
    944             ai4_child_enable[i] = ps_intra16_analyse[i].b1_valid_cu;
    945         }
    946 
    947         break;
    948     }
    949     case 2:
    950     {
    951         /* 16x16 block */
    952         WORD32 cu_pos_x_parent;
    953         WORD32 cu_pos_y_parent;
    954         WORD32 merge_flag_16;
    955         WORD32 merge_flag_32;
    956 
    957         intra8_analyse_t *ps_intra8_analyse = ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
    958                                                   .as_intra16_analyse[e_cur_blk_pos]
    959                                                   .as_intra8_analyse;
    960 
    961         WORD32 valid_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
    962                                     .as_intra16_analyse[e_cur_blk_pos]
    963                                     .b1_valid_cu);
    964 
    965         cu_size = 16;
    966 
    967         /* Explanation for logic below - */
    968         /* See similar explanation above */
    969         cu_pos_x_parent = (e_parent_blk_pos & 1) << 2;
    970         cu_pos_y_parent = (e_parent_blk_pos & 2) << 1;
    971         cu_pos_x = cu_pos_x_parent + ((e_cur_blk_pos & 1) << 1);
    972         cu_pos_y = cu_pos_y_parent + (e_cur_blk_pos & 2);
    973 
    974         merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
    975                              .as_intra16_analyse[e_cur_blk_pos]
    976                              .b1_merge_flag);
    977         merge_flag_32 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos].b1_merge_flag);
    978 
    979 #if !ENABLE_UNIFORM_CU_SIZE_8x8
    980         node_validity = (merge_flag_16) || ((ps_cur_ipe_ctb->u1_split_flag) && (!merge_flag_32));
    981 #else
    982         node_validity = 0;
    983 #endif
    984 
    985         node_validity = (merge_flag_16) || ((ps_cur_ipe_ctb->u1_split_flag) && (!merge_flag_32));
    986 
    987         if(e_quality_preset >= IHEVCE_QUALITY_P2)
    988         {
    989             node_validity = (!ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
    990                                   .as_intra16_analyse[e_cur_blk_pos]
    991                                   .b1_split_flag);
    992         }
    993 
    994         node_validity = node_validity && valid_flag_16;
    995 
    996         children_nodes_required = ((ps_cur_ipe_ctb->u1_split_flag) && (!merge_flag_32)) ||
    997                                   !merge_flag_16;
    998 
    999         if(e_quality_preset >= IHEVCE_QUALITY_P2)
   1000         {
   1001             children_nodes_required = !node_validity;
   1002         }
   1003 
   1004         for(i = 0; i < 4; i++)
   1005         {
   1006             ai4_child_enable[i] = ps_intra8_analyse[i].b1_valid_cu;
   1007         }
   1008         break;
   1009     }
   1010     case 3:
   1011     {
   1012         /* 8x8 block */
   1013         WORD32 cu_pos_x_grandparent;
   1014         WORD32 cu_pos_y_grandparent;
   1015 
   1016         WORD32 cu_pos_x_parent;
   1017         WORD32 cu_pos_y_parent;
   1018 
   1019         WORD32 valid_flag_8 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos]
   1020                                    .as_intra16_analyse[e_parent_blk_pos]
   1021                                    .as_intra8_analyse[e_cur_blk_pos]
   1022                                    .b1_valid_cu);
   1023 
   1024         cu_size = 8;
   1025 
   1026         cu_pos_x_grandparent = (e_grandparent_blk_pos & 1) << 2;
   1027         cu_pos_y_grandparent = (e_grandparent_blk_pos & 2) << 1;
   1028         cu_pos_x_parent = cu_pos_x_grandparent + ((e_parent_blk_pos & 1) << 1);
   1029         cu_pos_y_parent = cu_pos_y_grandparent + (e_parent_blk_pos & 2);
   1030         cu_pos_x = cu_pos_x_parent + (e_cur_blk_pos & 1);
   1031         cu_pos_y = cu_pos_y_parent + ((e_cur_blk_pos & 2) >> 1);
   1032 
   1033         node_validity = 1 && valid_flag_8;
   1034 
   1035         children_nodes_required = 0;
   1036 
   1037         break;
   1038     }
   1039     }
   1040 
   1041     /* Fill the current cu_tree node */
   1042     ps_cu_tree->is_node_valid = node_validity;
   1043     ps_cu_tree->u1_cu_size = cu_size;
   1044     ps_cu_tree->b3_cu_pos_x = cu_pos_x;
   1045     ps_cu_tree->b3_cu_pos_y = cu_pos_y;
   1046 
   1047     if(children_nodes_required)
   1048     {
   1049         tree_depth++;
   1050 
   1051         ps_cur_ipe_ctb->nodes_created_in_cu_tree += ihevce_create_child_nodes_cu_tree(
   1052             ps_cur_ipe_ctb->ps_cu_tree_root,
   1053             ps_cu_tree,
   1054             ai4_child_enable,
   1055             ps_cur_ipe_ctb->nodes_created_in_cu_tree);
   1056 
   1057         ihevce_populate_cu_tree(
   1058             ps_cur_ipe_ctb,
   1059             ps_cu_tree->ps_child_node_tl,
   1060             tree_depth,
   1061             e_quality_preset,
   1062             e_parent_blk_pos,
   1063             e_cur_blk_pos,
   1064             POS_TL);
   1065 
   1066         ihevce_populate_cu_tree(
   1067             ps_cur_ipe_ctb,
   1068             ps_cu_tree->ps_child_node_tr,
   1069             tree_depth,
   1070             e_quality_preset,
   1071             e_parent_blk_pos,
   1072             e_cur_blk_pos,
   1073             POS_TR);
   1074 
   1075         ihevce_populate_cu_tree(
   1076             ps_cur_ipe_ctb,
   1077             ps_cu_tree->ps_child_node_bl,
   1078             tree_depth,
   1079             e_quality_preset,
   1080             e_parent_blk_pos,
   1081             e_cur_blk_pos,
   1082             POS_BL);
   1083 
   1084         ihevce_populate_cu_tree(
   1085             ps_cur_ipe_ctb,
   1086             ps_cu_tree->ps_child_node_br,
   1087             tree_depth,
   1088             e_quality_preset,
   1089             e_parent_blk_pos,
   1090             e_cur_blk_pos,
   1091             POS_BR);
   1092     }
   1093     else
   1094     {
   1095         ps_cu_tree->ps_child_node_tl = NULL;
   1096         ps_cu_tree->ps_child_node_tr = NULL;
   1097         ps_cu_tree->ps_child_node_bl = NULL;
   1098         ps_cu_tree->ps_child_node_br = NULL;
   1099     }
   1100 }
   1101 
   1102 /**
   1103 *********************************************************************************
   1104 * Function name : ihevce_intra_mode_populator
   1105 *
   1106 * \brief
   1107 *    This function populate intra mode info to strcut
   1108 *
   1109 * \param[in] ps_cu_intra_cand : pointer to Structure contain cu intra candidate info
   1110 * \param[out] ps_ipe_data : pointer to  IPE L0 analyze structure
   1111 * \param[in] ps_cu_tree_data : poniter to cu recursive struct
   1112 * \param[in] i1_slice_type : contain slice type value
   1113 * \param[in] i4_quality_preset : contain quality preset value
   1114 *
   1115 * \return
   1116 *    None
   1117 *
   1118 **********************************************************************************/
   1119 static void ihevce_intra_mode_populator(
   1120     cu_intra_cand_t *ps_cu_intra_cand,
   1121     ipe_l0_ctb_analyse_for_me_t *ps_ipe_data,
   1122     cur_ctb_cu_tree_t *ps_cu_tree_data,
   1123     WORD8 i1_slice_type,
   1124     WORD32 i4_quality_preset)
   1125 {
   1126     WORD32 i4_32x32_id, i4_16x16_id, i4_8x8_id;
   1127 
   1128     UWORD8 u1_cu_pos_x = ps_cu_tree_data->b3_cu_pos_x;
   1129     UWORD8 u1_cu_pos_y = ps_cu_tree_data->b3_cu_pos_y;
   1130 
   1131     i4_32x32_id = ((u1_cu_pos_x & 4) >> 2) + ((u1_cu_pos_y & 4) >> 1);
   1132 
   1133     i4_16x16_id = ((u1_cu_pos_x & 2) >> 1) + ((u1_cu_pos_y & 2));
   1134 
   1135     i4_8x8_id = (u1_cu_pos_x & 1) + ((u1_cu_pos_y & 1) << 1);
   1136 
   1137     if(i4_quality_preset < IHEVCE_QUALITY_P3)
   1138     {
   1139         switch(ps_cu_tree_data->u1_cu_size)
   1140         {
   1141         case 64:
   1142         {
   1143             memcpy(
   1144                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
   1145                 ps_ipe_data->au1_best_modes_32x32_tu,
   1146                 MAX_INTRA_CU_CANDIDATES + 1);
   1147 
   1148             break;
   1149         }
   1150         case 32:
   1151         {
   1152             intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
   1153 
   1154             memcpy(
   1155                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
   1156                 ps_32x32_ipe_analyze->au1_best_modes_32x32_tu,
   1157                 MAX_INTRA_CU_CANDIDATES + 1);
   1158 
   1159             if((i1_slice_type != ISLICE) && (i4_quality_preset == IHEVCE_QUALITY_P0))
   1160             {
   1161                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
   1162             }
   1163             else if((i1_slice_type == ISLICE) && (i4_quality_preset == IHEVCE_QUALITY_P0))
   1164             {
   1165                 if((ps_cu_tree_data->ps_child_node_bl != NULL) &&
   1166                    (ps_cu_tree_data->ps_child_node_bl->is_node_valid))
   1167                 {
   1168                     ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
   1169                 }
   1170                 else
   1171                 {
   1172                     memcpy(
   1173                         ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
   1174                         ps_32x32_ipe_analyze->au1_best_modes_16x16_tu,
   1175                         MAX_INTRA_CU_CANDIDATES + 1);
   1176                 }
   1177             }
   1178             else
   1179             {
   1180                 memcpy(
   1181                     ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
   1182                     ps_32x32_ipe_analyze->au1_best_modes_16x16_tu,
   1183                     MAX_INTRA_CU_CANDIDATES + 1);
   1184             }
   1185 
   1186             break;
   1187         }
   1188         case 16:
   1189         {
   1190             /* Copy best 16x16 CU modes */
   1191             intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
   1192 
   1193             intra16_analyse_t *ps_16x16_ipe_analyze =
   1194                 &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
   1195 
   1196             memcpy(
   1197                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
   1198                 ps_16x16_ipe_analyze->au1_best_modes_16x16_tu,
   1199                 MAX_INTRA_CU_CANDIDATES + 1);
   1200 
   1201             if((i1_slice_type != ISLICE) && (i4_quality_preset == IHEVCE_QUALITY_P0))
   1202             {
   1203                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
   1204             }
   1205             else if((i1_slice_type == ISLICE) && (i4_quality_preset == IHEVCE_QUALITY_P0))
   1206             {
   1207                 if((ps_cu_tree_data->ps_child_node_bl != NULL) &&
   1208                    (ps_cu_tree_data->ps_child_node_bl->is_node_valid))
   1209                 {
   1210                     ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
   1211                 }
   1212                 else
   1213                 {
   1214                     memcpy(
   1215                         ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
   1216                         ps_16x16_ipe_analyze->au1_best_modes_8x8_tu,
   1217                         MAX_INTRA_CU_CANDIDATES + 1);
   1218                 }
   1219             }
   1220             else
   1221             {
   1222                 memcpy(
   1223                     ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
   1224                     ps_16x16_ipe_analyze->au1_best_modes_8x8_tu,
   1225                     MAX_INTRA_CU_CANDIDATES + 1);
   1226             }
   1227 
   1228             break;
   1229         }
   1230         case 8:
   1231         {
   1232             intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
   1233 
   1234             intra16_analyse_t *ps_16x16_ipe_analyze =
   1235                 &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
   1236 
   1237             intra8_analyse_t *ps_8x8_ipe_analyze =
   1238                 &ps_16x16_ipe_analyze->as_intra8_analyse[i4_8x8_id];
   1239 
   1240             memcpy(
   1241                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
   1242                 ps_8x8_ipe_analyze->au1_best_modes_8x8_tu,
   1243                 MAX_INTRA_CU_CANDIDATES + 1);
   1244 
   1245             ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
   1246 
   1247             /* Initialise the hash */
   1248             {
   1249                 WORD32 i, j;
   1250 
   1251                 for(i = 0; i < NUM_PU_PARTS; i++)
   1252                 {
   1253                     ps_cu_intra_cand->au1_num_modes_added[i] = 0;
   1254 
   1255                     for(j = 0; j < MAX_INTRA_CANDIDATES; j++)
   1256                     {
   1257                         ps_cu_intra_cand->au1_intra_luma_mode_nxn_hash[i][j] = 0;
   1258                     }
   1259                 }
   1260 
   1261                 for(i = 0; i < NUM_PU_PARTS; i++)
   1262                 {
   1263                     for(j = 0; j < MAX_INTRA_CU_CANDIDATES; j++)
   1264                     {
   1265                         if(ps_8x8_ipe_analyze->au1_4x4_best_modes[i][j] == 255)
   1266                         {
   1267                             ps_cu_intra_cand->au1_intra_luma_modes_nxn[i][j] = 255;
   1268                             break;
   1269                         }
   1270 
   1271                         ps_cu_intra_cand->au1_intra_luma_modes_nxn[i][j] =
   1272                             ps_8x8_ipe_analyze->au1_4x4_best_modes[i][j];
   1273 
   1274                         ps_cu_intra_cand->au1_intra_luma_mode_nxn_hash
   1275                             [i][ps_8x8_ipe_analyze->au1_4x4_best_modes[i][j]] = 1;
   1276 
   1277                         ps_cu_intra_cand->au1_num_modes_added[i]++;
   1278                     }
   1279 
   1280                     if(ps_cu_intra_cand->au1_num_modes_added[i] == MAX_INTRA_CU_CANDIDATES)
   1281                     {
   1282                         if(i1_slice_type != BSLICE)
   1283                         {
   1284                             ps_cu_intra_cand->au1_num_modes_added[i] =
   1285                                 ihevce_intra_mode_nxn_hash_updater(
   1286                                     ps_cu_intra_cand->au1_intra_luma_modes_nxn[i],
   1287                                     ps_cu_intra_cand->au1_intra_luma_mode_nxn_hash[i],
   1288                                     ps_cu_intra_cand->au1_num_modes_added[i]);
   1289                         }
   1290                     }
   1291                 }
   1292             }
   1293 
   1294             break;
   1295         }
   1296         }
   1297     }
   1298     else if(i4_quality_preset == IHEVCE_QUALITY_P6)
   1299     {
   1300         switch(ps_cu_tree_data->u1_cu_size)
   1301         {
   1302         case 64:
   1303         {
   1304             memcpy(
   1305                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
   1306                 ps_ipe_data->au1_best_modes_32x32_tu,
   1307                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
   1308 
   1309             ps_cu_intra_cand->b1_eval_tx_cusize = 0;
   1310             ps_cu_intra_cand->b1_eval_tx_cusize_by2 = 1;
   1311             ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
   1312 
   1313 #if ENABLE_INTRA_MODE_FILTERING_IN_XS25
   1314             ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2
   1315                 [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
   1316 #endif
   1317 
   1318             break;
   1319         }
   1320         case 32:
   1321         {
   1322             intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
   1323 
   1324             memcpy(
   1325                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
   1326                 ps_32x32_ipe_analyze->au1_best_modes_32x32_tu,
   1327                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
   1328 
   1329             memcpy(
   1330                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
   1331                 ps_32x32_ipe_analyze->au1_best_modes_16x16_tu,
   1332                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
   1333 
   1334 #if ENABLE_INTRA_MODE_FILTERING_IN_XS25
   1335             ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu
   1336                 [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
   1337             ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2
   1338                 [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
   1339 #endif
   1340 
   1341             break;
   1342         }
   1343         case 16:
   1344         {
   1345             /* Copy best 16x16 CU modes */
   1346             intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
   1347 
   1348             intra16_analyse_t *ps_16x16_ipe_analyze =
   1349                 &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
   1350 
   1351             memcpy(
   1352                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
   1353                 ps_16x16_ipe_analyze->au1_best_modes_16x16_tu,
   1354                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
   1355 
   1356             memcpy(
   1357                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
   1358                 ps_16x16_ipe_analyze->au1_best_modes_8x8_tu,
   1359                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
   1360 
   1361 #if ENABLE_INTRA_MODE_FILTERING_IN_XS25
   1362             ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu
   1363                 [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
   1364             ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2
   1365                 [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
   1366 #endif
   1367 
   1368             break;
   1369         }
   1370         case 8:
   1371         {
   1372             WORD32 i;
   1373 
   1374             intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
   1375 
   1376             intra16_analyse_t *ps_16x16_ipe_analyze =
   1377                 &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
   1378 
   1379             intra8_analyse_t *ps_8x8_ipe_analyze =
   1380                 &ps_16x16_ipe_analyze->as_intra8_analyse[i4_8x8_id];
   1381 
   1382             memcpy(
   1383                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
   1384                 ps_8x8_ipe_analyze->au1_best_modes_8x8_tu,
   1385                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
   1386 
   1387 #if !ENABLE_INTRA_MODE_FILTERING_IN_XS25
   1388             memcpy(
   1389                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
   1390                 ps_8x8_ipe_analyze->au1_best_modes_4x4_tu,
   1391                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
   1392 
   1393             for(i = 0; i < 4; i++)
   1394             {
   1395                 memcpy(
   1396                     ps_cu_intra_cand->au1_intra_luma_modes_nxn[i],
   1397                     ps_8x8_ipe_analyze->au1_4x4_best_modes[i],
   1398                     (NUM_BEST_MODES + 1) * sizeof(UWORD8));
   1399 
   1400                 ps_cu_intra_cand->au1_intra_luma_modes_nxn[i][MAX_INTRA_CU_CANDIDATES] = 255;
   1401             }
   1402 #else
   1403             if(255 == ps_8x8_ipe_analyze->au1_4x4_best_modes[0][0])
   1404             {
   1405                 memcpy(
   1406                     ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
   1407                     ps_8x8_ipe_analyze->au1_best_modes_4x4_tu,
   1408                     (NUM_BEST_MODES + 1) * sizeof(UWORD8));
   1409 
   1410                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2
   1411                     [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
   1412             }
   1413             else
   1414             {
   1415                 for(i = 0; i < 4; i++)
   1416                 {
   1417                     memcpy(
   1418                         ps_cu_intra_cand->au1_intra_luma_modes_nxn[i],
   1419                         ps_8x8_ipe_analyze->au1_4x4_best_modes[i],
   1420                         (NUM_BEST_MODES + 1) * sizeof(UWORD8));
   1421 
   1422                     ps_cu_intra_cand->au1_intra_luma_modes_nxn
   1423                         [i][MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
   1424                 }
   1425             }
   1426 
   1427             ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu
   1428                 [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
   1429 #endif
   1430 
   1431 #if FORCE_NXN_MODE_BASED_ON_OL_IPE
   1432             if((i4_quality_preset == IHEVCE_QUALITY_P6) && (i1_slice_type != ISLICE))
   1433             {
   1434                 /*Evaluate nxn mode for 8x8 if ol ipe wins for nxn over cu=tu and cu=4tu.*/
   1435                 /*Disbale CU=TU and CU=4TU modes */
   1436                 if(ps_8x8_ipe_analyze->b1_enable_nxn == 1)
   1437                 {
   1438                     ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
   1439                     ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
   1440                     ps_cu_intra_cand->au1_intra_luma_modes_nxn[0][1] = 255;
   1441                     ps_cu_intra_cand->au1_intra_luma_modes_nxn[1][1] = 255;
   1442                     ps_cu_intra_cand->au1_intra_luma_modes_nxn[2][1] = 255;
   1443                     ps_cu_intra_cand->au1_intra_luma_modes_nxn[3][1] = 255;
   1444                 }
   1445             }
   1446 #endif
   1447 
   1448             break;
   1449         }
   1450         }
   1451     }
   1452     else
   1453     {
   1454         switch(ps_cu_tree_data->u1_cu_size)
   1455         {
   1456         case 64:
   1457         {
   1458             memcpy(
   1459                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
   1460                 ps_ipe_data->au1_best_modes_32x32_tu,
   1461                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
   1462 
   1463             ps_cu_intra_cand->b1_eval_tx_cusize = 0;
   1464             ps_cu_intra_cand->b1_eval_tx_cusize_by2 = 1;
   1465             ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
   1466 
   1467             break;
   1468         }
   1469         case 32:
   1470         {
   1471             intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
   1472 
   1473             memcpy(
   1474                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
   1475                 ps_32x32_ipe_analyze->au1_best_modes_32x32_tu,
   1476                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
   1477 
   1478             memcpy(
   1479                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
   1480                 ps_32x32_ipe_analyze->au1_best_modes_16x16_tu,
   1481                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
   1482 
   1483             break;
   1484         }
   1485         case 16:
   1486         {
   1487             /* Copy best 16x16 CU modes */
   1488             intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
   1489 
   1490             intra16_analyse_t *ps_16x16_ipe_analyze =
   1491                 &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
   1492 
   1493             memcpy(
   1494                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
   1495                 ps_16x16_ipe_analyze->au1_best_modes_16x16_tu,
   1496                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
   1497 
   1498             memcpy(
   1499                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
   1500                 ps_16x16_ipe_analyze->au1_best_modes_8x8_tu,
   1501                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
   1502 
   1503             break;
   1504         }
   1505         case 8:
   1506         {
   1507             WORD32 i;
   1508 
   1509             intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
   1510 
   1511             intra16_analyse_t *ps_16x16_ipe_analyze =
   1512                 &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
   1513 
   1514             intra8_analyse_t *ps_8x8_ipe_analyze =
   1515                 &ps_16x16_ipe_analyze->as_intra8_analyse[i4_8x8_id];
   1516 
   1517             memcpy(
   1518                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
   1519                 ps_8x8_ipe_analyze->au1_best_modes_8x8_tu,
   1520                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
   1521 
   1522             memcpy(
   1523                 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
   1524                 ps_8x8_ipe_analyze->au1_best_modes_4x4_tu,
   1525                 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
   1526 
   1527             for(i = 0; i < 4; i++)
   1528             {
   1529                 memcpy(
   1530                     ps_cu_intra_cand->au1_intra_luma_modes_nxn[i],
   1531                     ps_8x8_ipe_analyze->au1_4x4_best_modes[i],
   1532                     (NUM_BEST_MODES + 1) * sizeof(UWORD8));
   1533 
   1534                 ps_cu_intra_cand->au1_intra_luma_modes_nxn[i][MAX_INTRA_CU_CANDIDATES] = 255;
   1535             }
   1536 
   1537             break;
   1538         }
   1539         }
   1540     }
   1541 }
   1542 /**
   1543 ******************************************************************************
   1544 * \if Function name : ihevce_compute_rdo \endif
   1545 *
   1546 * \brief
   1547 *    Coding Unit mode decide function. Performs RD opt and decides the best mode
   1548 *
   1549 * \param[in] pv_ctxt : pointer to enc_loop module
   1550 * \param[in] ps_cu_prms  : pointer to coding unit params (position, buffer pointers)
   1551 * \param[in] ps_cu_analyse : pointer to cu analyse
   1552 * \param[out] ps_cu_final : pointer to cu final
   1553 * \param[out] pu1_ecd_data :pointer to store coeff data for ECD
   1554 * \param[out]ps_row_col_pu; colocated pu buffer pointer
   1555 * \param[out]pu1_row_pu_map; colocated pu map buffer pointer
   1556 * \param[in]col_start_pu_idx : pu index start value
   1557 *
   1558 * \return
   1559 *    None
   1560 *
   1561 *
   1562 * \author
   1563 *  Ittiam
   1564 *
   1565 *****************************************************************************
   1566 */
   1567 LWORD64 ihevce_compute_rdo(
   1568     ihevce_enc_loop_ctxt_t *ps_ctxt,
   1569     enc_loop_cu_prms_t *ps_cu_prms,
   1570     cur_ctb_cu_tree_t *ps_cu_tree_analyse,
   1571     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
   1572     me_ctb_data_t *ps_cu_me_data,
   1573     pu_col_mv_t *ps_col_pu,
   1574     final_mode_state_t *ps_final_mode_state,
   1575     UWORD8 *pu1_col_pu_map,
   1576     UWORD8 *pu1_ecd_data,
   1577     WORD32 col_start_pu_idx,
   1578     WORD32 i4_ctb_x_off,
   1579     WORD32 i4_ctb_y_off)
   1580 {
   1581     /* Populate the rdo candiates to the structure */
   1582     cu_analyse_t s_cu_analyse;
   1583     LWORD64 rdopt_best_cost;
   1584     /* Populate candidates of child nodes to CU analyse struct for further evaluation */
   1585     cu_analyse_t *ps_cu_analyse;
   1586     WORD32 curr_cu_pos_in_row;
   1587     WORD32 cu_top_right_offset, cu_top_right_dep_pos;
   1588     WORD32 is_first_cu_in_ctb, is_ctb_level_quant_rounding, is_nctb_level_quant_rounding;
   1589 
   1590     WORD32 cu_pos_x = ps_cu_tree_analyse->b3_cu_pos_x;
   1591     WORD32 cu_pos_y = ps_cu_tree_analyse->b3_cu_pos_y;
   1592 
   1593     /*Derive the indices of 32*32, 16*16 and 8*8 blocks*/
   1594     WORD32 i4_32x32_id = ((cu_pos_x & 4) >> 2) + ((cu_pos_y & 4) >> 1);
   1595 
   1596     WORD32 i4_16x16_id = ((cu_pos_x & 2) >> 1) + ((cu_pos_y & 2));
   1597 
   1598     WORD32 i4_8x8_id = (cu_pos_x & 1) + ((cu_pos_y & 1) << 1);
   1599     if(i4_ctb_y_off == 0)
   1600     {
   1601         /* No wait for 1st row */
   1602         cu_top_right_offset = -(MAX_CTB_SIZE);
   1603         {
   1604             ihevce_tile_params_t *ps_col_tile_params =
   1605                 ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base + ps_ctxt->i4_tile_col_idx);
   1606 
   1607             cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
   1608         }
   1609 
   1610         cu_top_right_dep_pos = 0;
   1611     }
   1612     else
   1613     {
   1614         cu_top_right_offset = ps_cu_tree_analyse->u1_cu_size << 1;
   1615         cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
   1616     }
   1617     ps_cu_analyse = &s_cu_analyse;
   1618 
   1619     ps_cu_analyse->b3_cu_pos_x = cu_pos_x;
   1620     ps_cu_analyse->b3_cu_pos_y = cu_pos_y;
   1621     ps_cu_analyse->u1_cu_size = ps_cu_tree_analyse->u1_cu_size;
   1622 
   1623     /* Default initializations */
   1624     ps_cu_analyse->u1_num_intra_rdopt_cands = MAX_INTRA_CU_CANDIDATES;
   1625     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][0] = 255;
   1626     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
   1627     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
   1628 
   1629     ps_cu_analyse->s_cu_intra_cand.b1_eval_tx_cusize = 1;
   1630     ps_cu_analyse->s_cu_intra_cand.b1_eval_tx_cusize_by2 = 1;
   1631 
   1632     switch(ps_cu_tree_analyse->u1_cu_size)
   1633     {
   1634     case 64:
   1635     {
   1636         memcpy(
   1637             ps_cu_analyse[0].i4_act_factor,
   1638             ps_cur_ipe_ctb->i4_64x64_act_factor,
   1639             4 * 2 * sizeof(WORD32));
   1640 
   1641         ps_cu_analyse[0].s_cu_intra_cand.b1_eval_tx_cusize = 0;
   1642         ps_cu_analyse[0].s_cu_intra_cand.b1_eval_tx_cusize_by2 = 1;
   1643         ps_cu_analyse[0].s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
   1644 
   1645         break;
   1646     }
   1647     case 32:
   1648     {
   1649         memcpy(
   1650             ps_cu_analyse[0].i4_act_factor,
   1651             ps_cur_ipe_ctb->i4_32x32_act_factor[i4_32x32_id],
   1652             3 * 2 * sizeof(WORD32));
   1653 
   1654         break;
   1655     }
   1656     case 16:
   1657     {
   1658         memcpy(
   1659             ps_cu_analyse[0].i4_act_factor,
   1660             ps_cur_ipe_ctb->i4_16x16_act_factor[(i4_32x32_id << 2) + i4_16x16_id],
   1661             2 * 2 * sizeof(WORD32));
   1662 
   1663         break;
   1664     }
   1665     case 8:
   1666     {
   1667         memcpy(
   1668             ps_cu_analyse[0].i4_act_factor,
   1669             ps_cur_ipe_ctb->i4_16x16_act_factor[(i4_32x32_id << 2) + i4_16x16_id],
   1670             2 * 2 * sizeof(WORD32));
   1671 
   1672         break;
   1673     }
   1674     }
   1675 
   1676     /* Populate the me data in cu_analyse struct */
   1677     /* For CU size 32 and 64, add me data to array of cu analyse struct */
   1678     if(ISLICE != ps_ctxt->i1_slice_type)
   1679     {
   1680         if((ps_cu_tree_analyse->u1_cu_size >= 32) && (ps_cu_tree_analyse->u1_inter_eval_enable))
   1681         {
   1682             if(32 == ps_cu_tree_analyse->u1_cu_size)
   1683             {
   1684                 ihevce_populate_cu_struct(
   1685                     ps_ctxt,
   1686                     ps_cur_ipe_ctb,
   1687                     ps_cu_tree_analyse,
   1688                     ps_cu_me_data->as_32x32_block_data[i4_32x32_id].as_best_results,
   1689                     ps_cu_analyse,
   1690                     i4_32x32_id,
   1691 #if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
   1692                     ps_cu_prms->u1_is_cu_noisy,
   1693 #endif
   1694                     ps_cu_me_data->as_32x32_block_data[i4_32x32_id].num_best_results);
   1695             }
   1696             else
   1697             {
   1698                 ihevce_populate_cu_struct(
   1699                     ps_ctxt,
   1700                     ps_cur_ipe_ctb,
   1701                     ps_cu_tree_analyse,
   1702                     ps_cu_me_data->s_64x64_block_data.as_best_results,
   1703                     ps_cu_analyse,
   1704                     i4_32x32_id,
   1705 #if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
   1706                     ps_cu_prms->u1_is_cu_noisy,
   1707 #endif
   1708                     ps_cu_me_data->s_64x64_block_data.num_best_results);
   1709             }
   1710         }
   1711         else if(ps_cu_tree_analyse->u1_cu_size < 32)
   1712         {
   1713             i4_8x8_id += (i4_32x32_id << 4) + (i4_16x16_id << 2);
   1714             i4_16x16_id += (i4_32x32_id << 2);
   1715 
   1716             if(16 == ps_cu_tree_analyse->u1_cu_size)
   1717             {
   1718                 block_data_16x16_t *ps_data = &ps_cu_me_data->as_block_data[i4_16x16_id];
   1719 
   1720                 if(ps_cu_tree_analyse->u1_inter_eval_enable)
   1721                 {
   1722                     ihevce_populate_cu_struct(
   1723                         ps_ctxt,
   1724                         ps_cur_ipe_ctb,
   1725                         ps_cu_tree_analyse,
   1726                         ps_data->as_best_results,
   1727                         ps_cu_analyse,
   1728                         i4_32x32_id,
   1729 #if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
   1730                         ps_cu_prms->u1_is_cu_noisy,
   1731 #endif
   1732                         ps_data->num_best_results);
   1733                 }
   1734                 else
   1735                 {
   1736                     ps_cu_analyse->u1_num_inter_cands = 0;
   1737                     ps_cu_analyse->u1_best_is_intra = 1;
   1738                 }
   1739             }
   1740             else /* If CU size is 8 */
   1741             {
   1742                 block_data_8x8_t *ps_data = &ps_cu_me_data->as_8x8_block_data[i4_8x8_id];
   1743 
   1744                 if(ps_cu_tree_analyse->u1_inter_eval_enable)
   1745                 {
   1746                     ihevce_populate_cu_struct(
   1747                         ps_ctxt,
   1748                         ps_cur_ipe_ctb,
   1749                         ps_cu_tree_analyse,
   1750                         ps_data->as_best_results,
   1751                         ps_cu_analyse,
   1752                         i4_32x32_id,
   1753 #if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
   1754                         ps_cu_prms->u1_is_cu_noisy,
   1755 #endif
   1756                         ps_data->num_best_results);
   1757                 }
   1758                 else
   1759                 {
   1760                     ps_cu_analyse->u1_num_inter_cands = 0;
   1761                     ps_cu_analyse->u1_best_is_intra = 1;
   1762                 }
   1763             }
   1764         }
   1765         else
   1766         {
   1767             ps_cu_analyse->u1_num_inter_cands = 0;
   1768             ps_cu_analyse->u1_best_is_intra = 1;
   1769         }
   1770     }
   1771     else
   1772     {
   1773         ps_cu_analyse->u1_num_inter_cands = 0;
   1774         ps_cu_analyse->u1_best_is_intra = 1;
   1775     }
   1776 
   1777     if(!ps_ctxt->i1_cu_qp_delta_enable)
   1778     {
   1779         ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_frame_qp;
   1780 
   1781         /*cu qp must be populated in cu_analyse_t struct*/
   1782         ps_ctxt->i4_cu_qp = ps_cu_analyse->i1_cu_qp;
   1783     }
   1784     else
   1785     {
   1786         ASSERT(ps_cu_analyse->i4_act_factor[0] > 0);
   1787         ASSERT(
   1788             ((ps_cu_analyse->i4_act_factor[1] > 0) && (ps_cu_analyse->u1_cu_size != 8)) ||
   1789             ((ps_cu_analyse->u1_cu_size == 8)));
   1790         ASSERT(
   1791             ((ps_cu_analyse->i4_act_factor[2] > 0) && (ps_cu_analyse->u1_cu_size == 32)) ||
   1792             ((ps_cu_analyse->u1_cu_size != 32)));
   1793     }
   1794 
   1795     if(ps_ctxt->u1_disable_intra_eval)
   1796     {
   1797         /* rdopt evaluation of intra disabled as inter is clear winner */
   1798         ps_cu_analyse->u1_num_intra_rdopt_cands = 0;
   1799 
   1800         /* all the modes invalidated */
   1801         ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
   1802         ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
   1803         ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][0] = 255;
   1804         ps_cu_analyse->u1_chroma_intra_pred_mode = 255;
   1805 
   1806         /* no intra candt to verify */
   1807         ps_cu_analyse->s_cu_intra_cand.b6_num_intra_cands = 0;
   1808     }
   1809 
   1810 #if DISABLE_L2_IPE_IN_PB_L1_IN_B
   1811     if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) && (ps_cu_analyse->u1_cu_size == 32) &&
   1812        (ps_ctxt->i1_slice_type != ISLICE))
   1813     {
   1814         /* rdopt evaluation of intra disabled as inter is clear winner */
   1815         ps_cu_analyse->u1_num_intra_rdopt_cands = 0;
   1816 
   1817         /* all the modes invalidated */
   1818         ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
   1819         ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
   1820         ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][0] = 255;
   1821         ps_cu_analyse->u1_chroma_intra_pred_mode = 255;
   1822 
   1823         /* no intra candt to verify */
   1824         ps_cu_analyse->s_cu_intra_cand.b6_num_intra_cands = 0;
   1825     }
   1826 #endif
   1827 
   1828     if(DISABLE_INTRA_WHEN_NOISY && ps_cu_prms->u1_is_cu_noisy)
   1829     {
   1830         ps_cu_analyse->u1_num_intra_rdopt_cands = 0;
   1831     }
   1832 
   1833     if(ps_cu_analyse->u1_num_intra_rdopt_cands || ps_cu_tree_analyse->u1_intra_eval_enable)
   1834     {
   1835         ihevce_intra_mode_populator(
   1836             &ps_cu_analyse->s_cu_intra_cand,
   1837             ps_cur_ipe_ctb,
   1838             ps_cu_tree_analyse,
   1839             ps_ctxt->i1_slice_type,
   1840             ps_ctxt->i4_quality_preset);
   1841 
   1842         ps_cu_analyse->u1_num_intra_rdopt_cands = 1;
   1843     }
   1844 
   1845     ASSERT(!!ps_cu_analyse->u1_num_intra_rdopt_cands || ps_cu_analyse->u1_num_inter_cands);
   1846 
   1847     if(ps_ctxt->u1_use_top_at_ctb_boundary)
   1848     {
   1849         /* Wait till top data is ready          */
   1850         /* Currently checking till top right CU */
   1851         curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
   1852 
   1853         if(0 == ps_cu_analyse->b3_cu_pos_y)
   1854         {
   1855             ihevce_dmgr_chk_row_row_sync(
   1856                 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
   1857                 curr_cu_pos_in_row,
   1858                 cu_top_right_offset,
   1859                 cu_top_right_dep_pos,
   1860                 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
   1861                 ps_ctxt->thrd_id);
   1862         }
   1863     }
   1864 
   1865 #if !DISABLE_TOP_SYNC
   1866     {
   1867         if(0 == ps_cu_analyse->b3_cu_pos_y)
   1868         {
   1869             if((0 == i4_ctb_x_off) && (i4_ctb_y_off != 0))
   1870             {
   1871                 if(ps_cu_analyse->b3_cu_pos_x == 0)
   1872                 {
   1873                     if(!ps_ctxt->u1_use_top_at_ctb_boundary)
   1874                     {
   1875                         /* Wait till top data is ready          */
   1876                         /* Currently checking till top right CU */
   1877                         curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
   1878 
   1879                         if(0 == ps_cu_analyse->b3_cu_pos_y)
   1880                         {
   1881                             ihevce_dmgr_chk_row_row_sync(
   1882                                 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
   1883                                 curr_cu_pos_in_row,
   1884                                 cu_top_right_offset,
   1885                                 cu_top_right_dep_pos,
   1886                                 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
   1887                                 ps_ctxt->thrd_id);
   1888                         }
   1889                     }
   1890 
   1891                     ihevce_entropy_rdo_copy_states(
   1892                         &ps_ctxt->s_rdopt_entropy_ctxt,
   1893                         ps_ctxt->pu1_top_rt_cabac_state,
   1894                         UPDATE_ENT_SYNC_RDO_STATE);
   1895                 }
   1896             }
   1897         }
   1898     }
   1899 #else
   1900     {
   1901         if((0 == ps_cu_analyse->b3_cu_pos_y) && (IHEVCE_QUALITY_P6 != ps_ctxt->i4_quality_preset))
   1902         {
   1903             if((0 == i4_ctb_x_off) && (i4_ctb_y_off != 0))
   1904             {
   1905                 if(ps_cu_analyse->b3_cu_pos_x == 0)
   1906                 {
   1907                     if(!ps_ctxt->u1_use_top_at_ctb_boundary)
   1908                     {
   1909                         /* Wait till top data is ready          */
   1910                         /* Currently checking till top right CU */
   1911                         curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
   1912 
   1913                         if(0 == ps_cu_analyse->b3_cu_pos_y)
   1914                         {
   1915                             ihevce_dmgr_chk_row_row_sync(
   1916                                 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
   1917                                 curr_cu_pos_in_row,
   1918                                 cu_top_right_offset,
   1919                                 cu_top_right_dep_pos,
   1920                                 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
   1921                                 ps_ctxt->thrd_id);
   1922                         }
   1923                     }
   1924 
   1925                     ihevce_entropy_rdo_copy_states(
   1926                         &ps_ctxt->s_rdopt_entropy_ctxt,
   1927                         ps_ctxt->pu1_top_rt_cabac_state,
   1928                         UPDATE_ENT_SYNC_RDO_STATE);
   1929                 }
   1930             }
   1931         }
   1932         else if((0 == ps_cu_analyse->b3_cu_pos_y) && (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset))
   1933         {
   1934             UWORD8 u1_cabac_init_idc;
   1935             WORD8 i1_cabac_init_flag =
   1936                 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt->ps_slice_hdr->i1_cabac_init_flag;
   1937 
   1938             if(ps_ctxt->i1_slice_type == ISLICE)
   1939             {
   1940                 u1_cabac_init_idc = 0;
   1941             }
   1942             else if(ps_ctxt->i1_slice_type == PSLICE)
   1943             {
   1944                 u1_cabac_init_idc = i1_cabac_init_flag ? 2 : 1;
   1945             }
   1946             else
   1947             {
   1948                 u1_cabac_init_idc = i1_cabac_init_flag ? 1 : 2;
   1949             }
   1950 
   1951             ihevce_entropy_rdo_copy_states(
   1952                 &ps_ctxt->s_rdopt_entropy_ctxt,
   1953                 (UWORD8 *)gau1_ihevc_cab_ctxts[u1_cabac_init_idc][ps_ctxt->i4_frame_qp],
   1954                 UPDATE_ENT_SYNC_RDO_STATE);
   1955         }
   1956     }
   1957 #endif
   1958 
   1959     /*2 Multi- dimensinal array based on trans size  of rounding factor to be added here */
   1960     /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
   1961     /* Currently the complete array will contain only single value*/
   1962     /*The rounding factor is calculated with the formula
   1963     Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
   1964     rounding factor = (1 - DeadZone Val)
   1965 
   1966     Assumption: Cabac states of All the sub-blocks in the TU are considered independent
   1967     */
   1968 
   1969     /*As long as coef level rdoq is enabled perform this operation */
   1970     is_first_cu_in_ctb = ((0 == ps_cu_analyse->b3_cu_pos_x) && (0 == ps_cu_analyse->b3_cu_pos_y));
   1971     is_ctb_level_quant_rounding =
   1972         ((ps_ctxt->i4_quant_rounding_level == CTB_LEVEL_QUANT_ROUNDING) &&
   1973          (1 == is_first_cu_in_ctb));
   1974     is_nctb_level_quant_rounding =
   1975         ((ps_ctxt->i4_quant_rounding_level == NCTB_LEVEL_QUANT_ROUNDING) &&
   1976          (1 == is_first_cu_in_ctb) && (((i4_ctb_x_off >> 6) % NUM_CTB_QUANT_ROUNDING) == 0));
   1977 
   1978     if((ps_ctxt->i4_quant_rounding_level == CU_LEVEL_QUANT_ROUNDING) ||
   1979        (ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) ||
   1980        (1 == is_ctb_level_quant_rounding) || (1 == is_nctb_level_quant_rounding))
   1981     {
   1982         double i4_lamda_modifier, i4_lamda_modifier_uv;
   1983         WORD32 trans_size, trans_size_cr;
   1984         trans_size = ps_cu_analyse->u1_cu_size;
   1985 
   1986         if((1 == is_ctb_level_quant_rounding) || (1 == is_nctb_level_quant_rounding))
   1987         {
   1988             trans_size = MAX_TU_SIZE;
   1989         }
   1990         else
   1991         {
   1992             if(ps_cu_analyse->u1_cu_size == 64)
   1993             {
   1994                 trans_size >>= 1;
   1995             }
   1996         }
   1997 
   1998         /*Chroma trans size = half of luma trans size */
   1999         trans_size_cr = trans_size >> 1;
   2000 
   2001         if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
   2002         {
   2003             i4_lamda_modifier = ps_ctxt->i4_lamda_modifier *
   2004                                 CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
   2005             i4_lamda_modifier_uv =
   2006                 ps_ctxt->i4_uv_lamda_modifier *
   2007                 CLIP3((((double)(ps_ctxt->i4_chrm_cu_qp - 12)) / 6.0), 2.00, 4.00);
   2008         }
   2009         else
   2010         {
   2011             i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
   2012             i4_lamda_modifier_uv = ps_ctxt->i4_uv_lamda_modifier;
   2013         }
   2014         if(ps_ctxt->i4_use_const_lamda_modifier)
   2015         {
   2016             if(ISLICE == ps_ctxt->i1_slice_type)
   2017             {
   2018                 i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
   2019                 i4_lamda_modifier_uv = ps_ctxt->f_i_pic_lamda_modifier;
   2020             }
   2021             else
   2022             {
   2023                 i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
   2024                 i4_lamda_modifier_uv = CONST_LAMDA_MOD_VAL;
   2025             }
   2026         }
   2027 
   2028         do
   2029         {
   2030             memset(
   2031                 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3],
   2032                 0,
   2033                 trans_size * trans_size * sizeof(WORD32));
   2034             memset(
   2035                 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3],
   2036                 0,
   2037                 trans_size * trans_size * sizeof(WORD32));
   2038 
   2039             /*ps_ctxt->i4_quant_rnd_factor[intra_flag], is currently not used */
   2040             ihevce_quant_rounding_factor_gen(
   2041                 trans_size,
   2042                 1,  //is_luma = 1
   2043                 &ps_ctxt->s_rdopt_entropy_ctxt,
   2044                 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3],
   2045                 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3],
   2046                 i4_lamda_modifier,
   2047                 0);  //is_tu_level_quant rounding = 0
   2048 
   2049             trans_size = trans_size >> 1;
   2050 
   2051         } while(trans_size >= 4);
   2052 
   2053         /*CHROMA Quant Rounding is to be enabled with CU/TU/CTB/NCTB Luma rounding */
   2054         /*Please note chroma is calcualted only for 1st TU at TU level Rounding */
   2055         if(ps_ctxt->i4_chroma_quant_rounding_level == CHROMA_QUANT_ROUNDING)
   2056         {
   2057             do
   2058             {
   2059                 memset(
   2060                     ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size_cr >> 3],
   2061                     0,
   2062                     trans_size_cr * trans_size_cr * sizeof(WORD32));
   2063                 memset(
   2064                     ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size_cr >> 3],
   2065                     0,
   2066                     trans_size_cr * trans_size_cr * sizeof(WORD32));
   2067 
   2068                 ihevce_quant_rounding_factor_gen(
   2069                     trans_size_cr,
   2070                     0,  //is_luma = 0
   2071                     &ps_ctxt->s_rdopt_entropy_ctxt,
   2072                     ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size_cr >> 3],
   2073                     ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size_cr >> 3],
   2074                     i4_lamda_modifier_uv,
   2075                     0);  //is_tu_level_quant rounding = 0
   2076 
   2077                 trans_size_cr = trans_size_cr >> 1;
   2078 
   2079             } while(trans_size_cr >= 4);
   2080         }
   2081     }
   2082 
   2083 #if DISABLE_INTRAS_IN_BPIC
   2084     if((ps_ctxt->i1_slice_type == BSLICE) && (ps_cu_analyse->u1_num_inter_cands))
   2085     {
   2086         ps_cu_analyse->u1_num_intra_rdopt_cands = 0;
   2087     }
   2088 #endif
   2089 
   2090     rdopt_best_cost = ihevce_cu_mode_decide(
   2091         ps_ctxt,
   2092         ps_cu_prms,
   2093         ps_cu_analyse,
   2094         ps_final_mode_state,
   2095         pu1_ecd_data,
   2096         ps_col_pu,
   2097         pu1_col_pu_map,
   2098         col_start_pu_idx);
   2099 
   2100     return rdopt_best_cost;
   2101 }
   2102 
   2103 /**
   2104 ******************************************************************************
   2105 * \if Function name : ihevce_enc_loop_cu_bot_copy \endif
   2106 *
   2107 * \brief
   2108 *    This function copy the bottom data at CU level to row buffers
   2109 *
   2110 * \date
   2111 *    18/09/2012
   2112 *
   2113 * \author
   2114 *    Ittiam
   2115 *
   2116 * \return
   2117 *
   2118 * List of Functions
   2119 *
   2120 *
   2121 ******************************************************************************
   2122 */
   2123 void ihevce_enc_loop_cu_bot_copy(
   2124     ihevce_enc_loop_ctxt_t *ps_ctxt,
   2125     enc_loop_cu_prms_t *ps_cu_prms,
   2126     ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
   2127     WORD32 curr_cu_pos_in_row,
   2128     WORD32 curr_cu_pos_in_ctb)
   2129 {
   2130     /* ---------------------------------------------- */
   2131     /* copy the bottom row  data to the row buffers   */
   2132     /* ---------------------------------------------- */
   2133     nbr_4x4_t *ps_top_nbr;
   2134     UWORD8 *pu1_buff;
   2135     UWORD8 *pu1_luma_top, *pu1_chrm_top;
   2136     WORD32 nbr_strd;
   2137 
   2138     WORD32 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
   2139 
   2140     /* derive the appropraite pointers */
   2141     pu1_luma_top = (UWORD8 *)ps_ctxt->pv_bot_row_luma + curr_cu_pos_in_row;
   2142     pu1_chrm_top = (UWORD8 *)ps_ctxt->pv_bot_row_chroma + curr_cu_pos_in_row;
   2143     ps_top_nbr = ps_ctxt->ps_bot_row_nbr + (curr_cu_pos_in_row >> 2);
   2144     nbr_strd = ps_cu_prms->i4_ctb_size >> 2;
   2145 
   2146     /* copy bottom luma data */
   2147     pu1_buff = ps_cu_prms->pu1_luma_recon +
   2148                (ps_cu_prms->i4_luma_recon_stride * (ps_cu_prms->i4_ctb_size - 1));
   2149 
   2150     pu1_buff += curr_cu_pos_in_ctb;
   2151 
   2152     memcpy(pu1_luma_top, pu1_buff, ps_enc_out_ctxt->u1_cu_size);
   2153 
   2154     /* copy bottom chroma data cb and cr pixel interleaved */
   2155     pu1_buff = ps_cu_prms->pu1_chrm_recon + (ps_cu_prms->i4_chrm_recon_stride *
   2156                                              ((ps_cu_prms->i4_ctb_size >> (0 == u1_is_422)) - 1));
   2157 
   2158     pu1_buff += curr_cu_pos_in_ctb;
   2159 
   2160     memcpy(pu1_chrm_top, pu1_buff, ps_enc_out_ctxt->u1_cu_size);
   2161 
   2162     /* store the nbr 4x4 data at cu level */
   2163     {
   2164         nbr_4x4_t *ps_nbr;
   2165 
   2166         /* copy bottom nbr data */
   2167         ps_nbr = &ps_ctxt->as_ctb_nbr_arr[0];
   2168         ps_nbr += ((ps_cu_prms->i4_ctb_size >> 2) - 1) * nbr_strd;
   2169 
   2170         ps_nbr += (curr_cu_pos_in_ctb >> 2);
   2171 
   2172         memcpy(ps_top_nbr, ps_nbr, (ps_enc_out_ctxt->u1_cu_size >> 2) * sizeof(nbr_4x4_t));
   2173     }
   2174     return;
   2175 }
   2176 
   2177 /**
   2178 ******************************************************************************
   2179 * \if Function name : ihevce_update_final_cu_results \endif
   2180 *
   2181 * \brief
   2182 *
   2183 * \return
   2184 *    None
   2185 *
   2186 * \author
   2187 *  Ittiam
   2188 *
   2189 *****************************************************************************
   2190 */
   2191 void ihevce_update_final_cu_results(
   2192     ihevce_enc_loop_ctxt_t *ps_ctxt,
   2193     ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
   2194     enc_loop_cu_prms_t *ps_cu_prms,
   2195     pu_col_mv_t **pps_row_col_pu,
   2196     WORD32 *pi4_col_pu_map_idx,
   2197     cu_final_update_prms *ps_cu_update_prms,
   2198     WORD32 ctb_ctr,
   2199     WORD32 vert_ctb_ctr)
   2200 {
   2201     WORD32 curr_cu_pos_in_row;
   2202 
   2203     cu_enc_loop_out_t *ps_cu_final = *ps_cu_update_prms->pps_cu_final;
   2204     pu_t **pps_row_pu = ps_cu_update_prms->pps_row_pu;
   2205     tu_enc_loop_out_t **pps_row_tu = ps_cu_update_prms->pps_row_tu;
   2206     UWORD8 **ppu1_row_ecd_data = ps_cu_update_prms->ppu1_row_ecd_data;
   2207     WORD32 *pi4_num_pus_in_ctb = ps_cu_update_prms->pi4_num_pus_in_ctb;
   2208     UWORD32 u4_cu_size = ps_enc_out_ctxt->u1_cu_size;
   2209     ps_cu_final->b3_cu_pos_x = ps_enc_out_ctxt->b3_cu_pos_x;
   2210     ps_cu_final->b3_cu_pos_y = ps_enc_out_ctxt->b3_cu_pos_y;
   2211 
   2212     ps_cu_final->b4_cu_size = ps_enc_out_ctxt->u1_cu_size >> 3;
   2213 
   2214     /* store the current pu and tu pointes */
   2215     ps_cu_final->ps_pu = *pps_row_pu;
   2216     ps_cu_final->ps_enc_tu = *pps_row_tu;
   2217     curr_cu_pos_in_row = ctb_ctr * ps_cu_prms->i4_ctb_size + (ps_cu_final->b3_cu_pos_x << 3);
   2218 
   2219     ihevce_store_cu_final(ps_ctxt, ps_cu_final, *ppu1_row_ecd_data, ps_enc_out_ctxt, ps_cu_prms);
   2220 
   2221     if(NULL != pps_row_col_pu)
   2222     {
   2223         (*pps_row_col_pu) += ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
   2224     }
   2225     if(NULL != pi4_col_pu_map_idx)
   2226     {
   2227         (*pi4_col_pu_map_idx) += ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
   2228     }
   2229     (*pi4_num_pus_in_ctb) += ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
   2230     (*pps_row_tu) += ps_cu_final->u2_num_tus_in_cu;
   2231     (*pps_row_pu) += ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
   2232     (*ppu1_row_ecd_data) += ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
   2233 
   2234     (*ps_cu_update_prms->pps_cu_final)++;
   2235     (*ps_cu_update_prms->pu1_num_cus_in_ctb_out)++;
   2236 
   2237     /* Updated for each CU in bottom row  of CTB */
   2238     if(((ps_cu_final->b3_cu_pos_y << 3) + u4_cu_size) == ps_ctxt->u4_cur_ctb_ht)
   2239     {
   2240         /* copy the bottom data to row buffers */
   2241         ((pf_enc_loop_cu_bot_copy)ps_ctxt->pv_enc_loop_cu_bot_copy)(
   2242             ps_ctxt,
   2243             ps_cu_prms,
   2244             ps_enc_out_ctxt,
   2245             curr_cu_pos_in_row,
   2246             (ps_enc_out_ctxt->b3_cu_pos_x << 3));
   2247 
   2248         /* Setting Dependency for CU TopRight */
   2249         ihevce_dmgr_set_row_row_sync(
   2250             ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
   2251             (curr_cu_pos_in_row + ps_enc_out_ctxt->u1_cu_size),
   2252             vert_ctb_ctr,
   2253             ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
   2254 
   2255         /* Setting Dependency for Entropy to consume is made at CTB level */
   2256     }
   2257 }
   2258 
   2259 /**
   2260 ******************************************************************************
   2261 * \if Function name : ihevce_cu_recurse_decide \endif
   2262 *
   2263 * \brief
   2264 *    Coding Unit mode decide function. Performs RD opt and decides the best mode
   2265 *
   2266 * \param[in] pv_ctxt : pointer to enc_loop module
   2267 * \param[in] ps_cu_prms  : pointer to coding unit params (position, buffer pointers)
   2268 * \param[in] ps_cu_analyse : pointer to cu analyse
   2269 * \param[out] ps_cu_final : pointer to cu final
   2270 * \param[out] pu1_ecd_data :pointer to store coeff data for ECD
   2271 * \param[out]ps_row_col_pu; colocated pu buffer pointer
   2272 * \param[out]pu1_row_pu_map; colocated pu map buffer pointer
   2273 * \param[in]col_start_pu_idx : pu index start value
   2274 *
   2275 * \return
   2276 *    None
   2277 *
   2278 *
   2279 * \author
   2280 *  Ittiam
   2281 *
   2282 *****************************************************************************
   2283 */
   2284 WORD32 ihevce_cu_recurse_decide(
   2285     ihevce_enc_loop_ctxt_t *ps_ctxt,
   2286     enc_loop_cu_prms_t *ps_cu_prms,
   2287     cur_ctb_cu_tree_t *ps_cu_tree_analyse,
   2288     cur_ctb_cu_tree_t *ps_cu_tree_analyse_parent,
   2289     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
   2290     me_ctb_data_t *ps_cu_me_data,
   2291     pu_col_mv_t **pps_col_pu,
   2292     cu_final_update_prms *ps_cu_update_prms,
   2293     UWORD8 *pu1_col_pu_map,
   2294     WORD32 *pi4_col_start_pu_idx,
   2295     WORD32 i4_tree_depth,
   2296     WORD32 i4_ctb_x_off,
   2297     WORD32 i4_ctb_y_off,
   2298     WORD32 cur_ctb_ht)
   2299 {
   2300     cur_ctb_cu_tree_t *ps_cu_tree_analyse_child[4];
   2301     final_mode_state_t s_final_mode_state;
   2302 
   2303     WORD32 i;
   2304     WORD32 child_nodes_null;
   2305     LWORD64 i8_least_child_cost;
   2306 
   2307     WORD32 num_children_encoded = 0;
   2308 
   2309     /* Take backup of collocated start PU index for parent node rdo for PQ */
   2310     WORD32 i4_col_pu_idx_bkup = *pi4_col_start_pu_idx;
   2311     pu_col_mv_t *ps_col_mv_bkup = *pps_col_pu;
   2312 
   2313 #if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
   2314     WORD32 x0_frm = i4_ctb_x_off + (ps_cu_tree_analyse->b3_cu_pos_x << 3);
   2315     WORD32 y0_frm = i4_ctb_y_off + (ps_cu_tree_analyse->b3_cu_pos_y << 3);
   2316     WORD32 pic_wd = ps_ctxt->s_sao_ctxt_t.ps_sps->i2_pic_width_in_luma_samples;
   2317     WORD32 pic_ht = ps_ctxt->s_sao_ctxt_t.ps_sps->i2_pic_height_in_luma_samples;
   2318     WORD32 log2_min_cb_size = ps_ctxt->s_sao_ctxt_t.ps_sps->i1_log2_min_coding_block_size;
   2319     WORD32 cu_size = ps_cu_tree_analyse->u1_cu_size;
   2320 
   2321     /* bits for coding split_cu_flag = 1 */
   2322     WORD32 split_cu1_bits_q12 = 0;
   2323 
   2324     /* bits for coding split_cu_flag = 0 */
   2325     WORD32 split_cu0_bits_q12 = 0;
   2326 #endif
   2327 
   2328     UWORD8 u1_is_cu_noisy = ps_ctxt->u1_is_stasino_enabled
   2329                                 ? ihevce_determine_cu_noise_based_on_8x8Blk_data(
   2330                                       ps_cu_prms->pu1_is_8x8Blk_noisy,
   2331                                       ((ps_cu_tree_analyse->b3_cu_pos_x << 3) >> 4) << 4,
   2332                                       ((ps_cu_tree_analyse->b3_cu_pos_y << 3) >> 4) << 4,
   2333                                       MAX(16, ps_cu_tree_analyse->u1_cu_size))
   2334                                 : 0;
   2335 
   2336 #if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
   2337     LWORD64 i8_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
   2338 #endif
   2339 
   2340     (void)ps_cu_tree_analyse_parent;
   2341 
   2342 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
   2343     if(!ps_ctxt->u1_enable_psyRDOPT && u1_is_cu_noisy)
   2344     {
   2345         ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
   2346         ps_ctxt->i8_cl_ssd_lambda_chroma_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
   2347     }
   2348 #endif
   2349 
   2350     if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
   2351     {
   2352         i8_lambda_qf = ((float)i8_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
   2353     }
   2354 
   2355     ps_cu_tree_analyse_child[0] = ps_cu_tree_analyse->ps_child_node_tl;
   2356     ps_cu_tree_analyse_child[1] = ps_cu_tree_analyse->ps_child_node_tr;
   2357     ps_cu_tree_analyse_child[2] = ps_cu_tree_analyse->ps_child_node_bl;
   2358     ps_cu_tree_analyse_child[3] = ps_cu_tree_analyse->ps_child_node_br;
   2359 
   2360     child_nodes_null =
   2361         ((ps_cu_tree_analyse_child[0] == NULL) + (ps_cu_tree_analyse_child[1] == NULL) +
   2362          (ps_cu_tree_analyse_child[2] == NULL) + (ps_cu_tree_analyse_child[3] == NULL));
   2363 
   2364 #if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
   2365 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
   2366     if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
   2367 #endif
   2368     {
   2369         /*----------------------------------------------*/
   2370         /* ---------- CU Depth Bit Estimation --------- */
   2371         /*----------------------------------------------*/
   2372 
   2373         /* Encode cu split flags based on following conditions; See section 7.3.8*/
   2374         if(((x0_frm + cu_size) <= pic_wd) && ((y0_frm + cu_size) <= pic_ht) &&
   2375            (cu_size > (1 << log2_min_cb_size))) /* &&(ps_entropy_ctxt->i1_ctb_num_pcm_blks == 0)) */
   2376         {
   2377             WORD32 left_cu_depth = 0;
   2378             WORD32 top_cu_depth = 0;
   2379             WORD32 pos_x_4x4 = ps_cu_tree_analyse->b3_cu_pos_x << 1;
   2380             WORD32 pos_y_4x4 = ps_cu_tree_analyse->b3_cu_pos_y << 1;
   2381             WORD32 num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
   2382             WORD32 cur_4x4_in_ctb = pos_x_4x4 + (pos_y_4x4 * num_4x4_in_ctb);
   2383             UWORD8 u1_split_cu_flag_cab_model;
   2384             WORD32 split_cu_ctxt_inc;
   2385 
   2386             /* Left and Top CU depth is required for cabac context */
   2387 
   2388             /* CU left */
   2389             if(0 == pos_x_4x4)
   2390             {
   2391                 /* CTB boundary */
   2392                 if(i4_ctb_x_off)
   2393                 {
   2394                     left_cu_depth = ps_ctxt->as_left_col_nbr[pos_y_4x4].b2_cu_depth;
   2395                 }
   2396             }
   2397             else
   2398             {
   2399                 /* inside CTB */
   2400                 left_cu_depth = ps_ctxt->as_ctb_nbr_arr[cur_4x4_in_ctb - 1].b2_cu_depth;
   2401             }
   2402 
   2403             /* CU top */
   2404             if(0 == pos_y_4x4)
   2405             {
   2406                 /* CTB boundary */
   2407                 if(i4_ctb_y_off)
   2408                 {
   2409                     /* Wait till top cu depth is available */
   2410                     ihevce_dmgr_chk_row_row_sync(
   2411                         ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
   2412                         (i4_ctb_x_off) + (pos_x_4x4 << 2),
   2413                         4,
   2414                         ((i4_ctb_y_off >> 6) - 1),
   2415                         ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
   2416                         ps_ctxt->thrd_id);
   2417 
   2418                     top_cu_depth =
   2419                         ps_ctxt->ps_top_row_nbr[(i4_ctb_x_off >> 2) + pos_x_4x4].b2_cu_depth;
   2420                 }
   2421             }
   2422             else
   2423             {
   2424                 /* inside CTB */
   2425                 top_cu_depth = ps_ctxt->as_ctb_nbr_arr[cur_4x4_in_ctb - num_4x4_in_ctb].b2_cu_depth;
   2426             }
   2427 
   2428             split_cu_ctxt_inc = IHEVC_CAB_SPLIT_CU_FLAG + (left_cu_depth > i4_tree_depth) +
   2429                                 (top_cu_depth > i4_tree_depth);
   2430 
   2431             u1_split_cu_flag_cab_model =
   2432                 ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][split_cu_ctxt_inc];
   2433 
   2434             /* bits for coding split_cu_flag = 1 */
   2435             split_cu1_bits_q12 = gau2_ihevce_cabac_bin_to_bits[u1_split_cu_flag_cab_model ^ 1];
   2436 
   2437             /* bits for coding split_cu_flag = 0 */
   2438             split_cu0_bits_q12 = gau2_ihevce_cabac_bin_to_bits[u1_split_cu_flag_cab_model ^ 0];
   2439 
   2440             /* update the cu split cabac context of all child nodes before evaluating child */
   2441             for(i = (i4_tree_depth + 1); i < 4; i++)
   2442             {
   2443                 ps_ctxt->au1_rdopt_recur_ctxt_models[i][split_cu_ctxt_inc] =
   2444                     gau1_ihevc_next_state[(u1_split_cu_flag_cab_model << 1) | 1];
   2445             }
   2446 
   2447             /* update the cu split cabac context of the parent node with split flag = 0 */
   2448             ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][split_cu_ctxt_inc] =
   2449                 gau1_ihevc_next_state[(u1_split_cu_flag_cab_model << 1) | 0];
   2450         }
   2451     }
   2452 #endif
   2453 
   2454     /* If all the child nodes are null, then do rdo for this node and return the cost */
   2455     if((1 == ps_cu_tree_analyse->is_node_valid) && (4 == child_nodes_null))
   2456     {
   2457         WORD32 i4_num_bytes_ecd_data;
   2458 
   2459 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
   2460         COPY_CABAC_STATES(
   2461             &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
   2462             &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
   2463             IHEVC_CAB_CTXT_END * sizeof(UWORD8));
   2464 #else
   2465         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
   2466         {
   2467             COPY_CABAC_STATES(
   2468                 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
   2469                 &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
   2470                 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
   2471         }
   2472 #endif
   2473 
   2474         ps_cu_prms->u1_is_cu_noisy = u1_is_cu_noisy;
   2475         ihevce_update_pred_qp(
   2476             ps_ctxt, ps_cu_tree_analyse->b3_cu_pos_x, ps_cu_tree_analyse->b3_cu_pos_y);
   2477         /* DO rdo for current node here */
   2478         /* return rdo cost for current node*/
   2479         ps_cu_tree_analyse->i8_best_rdopt_cost = ihevce_compute_rdo(
   2480             ps_ctxt,
   2481             ps_cu_prms,
   2482             ps_cu_tree_analyse,
   2483             ps_cur_ipe_ctb,
   2484             ps_cu_me_data,
   2485             *pps_col_pu,
   2486             &s_final_mode_state,
   2487             pu1_col_pu_map,
   2488             *ps_cu_update_prms->ppu1_row_ecd_data,
   2489             *pi4_col_start_pu_idx,
   2490             i4_ctb_x_off,
   2491             i4_ctb_y_off);
   2492 
   2493         if((((ps_cu_tree_analyse->b3_cu_pos_y << 3) + ps_cu_tree_analyse->u1_cu_size) ==
   2494             cur_ctb_ht) &&
   2495            (ps_cu_tree_analyse->b3_cu_pos_x == 0) && (i4_ctb_x_off == 0))
   2496         {
   2497             /* copy the state to row level context after 1st Cu, in the Last CU row of CTB */
   2498             /* copy current ctb CU states into a entropy sync state */
   2499             /* to be used for next row                              */
   2500             COPY_CABAC_STATES(
   2501                 ps_ctxt->pu1_curr_row_cabac_state,
   2502                 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
   2503                 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
   2504         }
   2505 
   2506 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
   2507         {
   2508 #if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
   2509             /* Add parent split cu = 0 cost signalling */
   2510             ps_cu_tree_analyse->i8_best_rdopt_cost += COMPUTE_RATE_COST_CLIP30(
   2511                 split_cu0_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
   2512 #endif
   2513             for(i = (i4_tree_depth); i < 4; i++)
   2514             {
   2515                 COPY_CABAC_STATES(
   2516                     &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
   2517                     &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
   2518                     IHEVC_CAB_CTXT_END * sizeof(UWORD8));
   2519             }
   2520         }
   2521 #else
   2522         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
   2523         {
   2524 #if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
   2525             /* Add parent split cu = 0 cost signalling */
   2526             ps_cu_tree_analyse->i8_best_rdopt_cost += COMPUTE_RATE_COST_CLIP30(
   2527                 split_cu0_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
   2528 #endif
   2529 
   2530             for(i = (i4_tree_depth); i < 4; i++)
   2531             {
   2532                 COPY_CABAC_STATES(
   2533                     &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
   2534                     &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
   2535                     IHEVC_CAB_CTXT_END * sizeof(UWORD8));
   2536             }
   2537         }
   2538 #endif
   2539 
   2540         ((pf_store_cu_results)ps_ctxt->pv_store_cu_results)(
   2541             ps_ctxt, ps_cu_prms, &s_final_mode_state);
   2542 
   2543 #if(!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
   2544         if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
   2545         {
   2546             ihevce_update_final_cu_results(
   2547                 ps_ctxt,
   2548                 ps_ctxt->ps_enc_out_ctxt,
   2549                 ps_cu_prms,
   2550                 pps_col_pu,
   2551                 pi4_col_start_pu_idx,
   2552                 ps_cu_update_prms,
   2553                 i4_ctb_x_off >> 6,
   2554                 i4_ctb_y_off >> 6);
   2555         }
   2556         else
   2557         {
   2558             /* ---- copy the luma & chroma coeffs to final output -------- */
   2559             i4_num_bytes_ecd_data = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
   2560 
   2561             if(0 != i4_num_bytes_ecd_data)
   2562             {
   2563                 memcpy(
   2564                     ps_ctxt->pu1_ecd_data,
   2565                     &ps_ctxt->pu1_cu_recur_coeffs[0],
   2566                     i4_num_bytes_ecd_data * sizeof(UWORD8));
   2567 
   2568                 ps_ctxt->pu1_ecd_data += i4_num_bytes_ecd_data;
   2569             }
   2570 
   2571             /* Collocated PU updates */
   2572             *pps_col_pu += ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
   2573             *pi4_col_start_pu_idx += ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
   2574         }
   2575 #else
   2576         /* ---- copy the luma & chroma coeffs to final output -------- */
   2577         i4_num_bytes_ecd_data = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
   2578         if(0 != i4_num_bytes_ecd_data)
   2579         {
   2580             memcpy(
   2581                 ps_ctxt->pu1_ecd_data,
   2582                 &ps_ctxt->pu1_cu_recur_coeffs[0],
   2583                 i4_num_bytes_ecd_data * sizeof(UWORD8));
   2584 
   2585             ps_ctxt->pu1_ecd_data += i4_num_bytes_ecd_data;
   2586         }
   2587 
   2588         /* Collocated PU updates */
   2589         *pps_col_pu += ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
   2590         *pi4_col_start_pu_idx += ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
   2591 #endif
   2592 
   2593         ps_ctxt->ps_enc_out_ctxt++;
   2594         num_children_encoded++;
   2595     }
   2596     else
   2597     {
   2598         i8_least_child_cost = 0;
   2599 
   2600         for(i = 0; i < 4; i++)
   2601         {
   2602             if(ps_cu_tree_analyse_child[i] != NULL)
   2603             {
   2604                 num_children_encoded += ihevce_cu_recurse_decide(
   2605                     ps_ctxt,
   2606                     ps_cu_prms,
   2607                     ps_cu_tree_analyse_child[i],
   2608                     ps_cu_tree_analyse,
   2609                     ps_cur_ipe_ctb,
   2610                     ps_cu_me_data,
   2611                     pps_col_pu,
   2612                     ps_cu_update_prms,
   2613                     pu1_col_pu_map,
   2614                     pi4_col_start_pu_idx,
   2615                     i4_tree_depth + 1,
   2616                     i4_ctb_x_off,
   2617                     i4_ctb_y_off,
   2618                     cur_ctb_ht);
   2619 
   2620                 /* In case of incomplete ctb, */
   2621                 //if(MAX_COST != ps_cu_tree_analyse_child[i]->i4_best_rdopt_cost)
   2622                 if(((ULWORD64)(
   2623                        i8_least_child_cost + ps_cu_tree_analyse_child[i]->i8_best_rdopt_cost)) >
   2624                    MAX_COST_64)
   2625                 {
   2626                     i8_least_child_cost = MAX_COST_64;
   2627                 }
   2628                 else
   2629                 {
   2630                     i8_least_child_cost += ps_cu_tree_analyse_child[i]->i8_best_rdopt_cost;
   2631                 }
   2632             }
   2633             else
   2634             {
   2635                 /* If the child node is NULL, return MAX_COST*/
   2636                 i8_least_child_cost = MAX_COST_64;
   2637             }
   2638         }
   2639 
   2640         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
   2641         {
   2642 #if !ENABLE_4CTB_EVALUATION
   2643             if((ps_cu_tree_analyse->u1_cu_size == 64) && (num_children_encoded > 10) &&
   2644                (ps_ctxt->i1_slice_type != ISLICE))
   2645             {
   2646                 ps_cu_tree_analyse->is_node_valid = 0;
   2647             }
   2648 #endif
   2649         }
   2650 
   2651         /* If current CU node is valid, do rdo for the node and decide btwn child nodes and parent nodes  */
   2652         if(ps_cu_tree_analyse->is_node_valid)
   2653         {
   2654             UWORD8 au1_cu_pu_map[(MAX_CTB_SIZE / MIN_PU_SIZE) * (MAX_CTB_SIZE / MIN_PU_SIZE)];
   2655             pu_col_mv_t as_col_mv[2]; /* Max of 2 PUs only per CU */
   2656 
   2657             WORD32 i4_col_pu_idx_start = i4_col_pu_idx_bkup;
   2658 
   2659             /* Copy the collocated PU map to the local array */
   2660             memcpy(
   2661                 au1_cu_pu_map,
   2662                 pu1_col_pu_map,
   2663                 (MAX_CTB_SIZE / MIN_PU_SIZE) * (MAX_CTB_SIZE / MIN_PU_SIZE));
   2664 
   2665 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
   2666             COPY_CABAC_STATES(
   2667                 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
   2668                 &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
   2669                 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
   2670 
   2671             /* Reset the nbr maps while computing Parent CU node ()*/
   2672             /* set the neighbour map to 0 */
   2673             ihevce_set_nbr_map(
   2674                 ps_ctxt->pu1_ctb_nbr_map,
   2675                 ps_ctxt->i4_nbr_map_strd,
   2676                 (ps_cu_tree_analyse->b3_cu_pos_x << 1),
   2677                 (ps_cu_tree_analyse->b3_cu_pos_y << 1),
   2678                 (ps_cu_tree_analyse->u1_cu_size >> 2),
   2679                 0);
   2680 #else
   2681             if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
   2682             {
   2683                 COPY_CABAC_STATES(
   2684                     &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
   2685                     &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
   2686                     IHEVC_CAB_CTXT_END * sizeof(UWORD8));
   2687 
   2688                 /* Reset the nbr maps while computing Parent CU node ()*/
   2689                 /* set the neighbour map to 0 */
   2690                 ihevce_set_nbr_map(
   2691                     ps_ctxt->pu1_ctb_nbr_map,
   2692                     ps_ctxt->i4_nbr_map_strd,
   2693                     (ps_cu_tree_analyse->b3_cu_pos_x << 1),
   2694                     (ps_cu_tree_analyse->b3_cu_pos_y << 1),
   2695                     (ps_cu_tree_analyse->u1_cu_size >> 2),
   2696                     0);
   2697             }
   2698 #endif
   2699 
   2700             /* Do rdo for the parent node */
   2701             /* Compare parent node cost vs child node costs */
   2702             ps_ctxt->is_parent_cu_rdopt = 1;
   2703 
   2704             ps_cu_prms->u1_is_cu_noisy = u1_is_cu_noisy;
   2705 
   2706             ihevce_update_pred_qp(
   2707                 ps_ctxt, ps_cu_tree_analyse->b3_cu_pos_x, ps_cu_tree_analyse->b3_cu_pos_y);
   2708 
   2709             ps_cu_tree_analyse->i8_best_rdopt_cost = ihevce_compute_rdo(
   2710                 ps_ctxt,
   2711                 ps_cu_prms,
   2712                 ps_cu_tree_analyse,
   2713                 ps_cur_ipe_ctb,
   2714                 ps_cu_me_data,
   2715                 as_col_mv,
   2716                 &s_final_mode_state,
   2717                 au1_cu_pu_map,
   2718                 *ps_cu_update_prms->ppu1_row_ecd_data,
   2719                 i4_col_pu_idx_start,
   2720                 i4_ctb_x_off,
   2721                 i4_ctb_y_off);
   2722 
   2723             ps_ctxt->is_parent_cu_rdopt = 0;
   2724 
   2725 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
   2726             /* Add parent split cu cost signalling */
   2727             ps_cu_tree_analyse->i8_best_rdopt_cost += COMPUTE_RATE_COST_CLIP30(
   2728                 split_cu0_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
   2729 
   2730             COPY_CABAC_STATES(
   2731                 &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
   2732                 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
   2733                 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
   2734 
   2735             /* i8_least_child_cost += (num_children_encoded * ps_ctxt->i4_sad_lamda\
   2736             + ((1 << (LAMBDA_Q_SHIFT)))) >> (LAMBDA_Q_SHIFT + 1) */
   2737             ;
   2738             /* bits for coding cu split flag as  1 */
   2739             i8_least_child_cost += COMPUTE_RATE_COST_CLIP30(
   2740                 split_cu1_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
   2741 #else
   2742 #if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
   2743             if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
   2744             {
   2745                 /* Add parent split cu cost signalling */
   2746                 ps_cu_tree_analyse->i8_best_rdopt_cost += COMPUTE_RATE_COST_CLIP30(
   2747                     split_cu0_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
   2748 
   2749                 COPY_CABAC_STATES(
   2750                     &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
   2751                     &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
   2752                     IHEVC_CAB_CTXT_END * sizeof(UWORD8));
   2753 
   2754                 /* i8_least_child_cost += (num_children_encoded * ps_ctxt->i4_sad_lamda\
   2755                 + ((1 << (LAMBDA_Q_SHIFT)))) >> (LAMBDA_Q_SHIFT + 1) */
   2756                 ;
   2757                 /* bits for coding cu split flag as  1 */
   2758                 i8_least_child_cost += COMPUTE_RATE_COST_CLIP30(
   2759                     split_cu1_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
   2760             }
   2761 #else
   2762             i8_least_child_cost +=
   2763                 (num_children_encoded * ps_ctxt->i4_sad_lamda + ((1 << (LAMBDA_Q_SHIFT)))) >>
   2764                 (LAMBDA_Q_SHIFT + 1);
   2765 #endif
   2766 #endif
   2767 
   2768             /* If child modes win over parent, discard parent enc ctxt */
   2769             /* else discard child ctxt */
   2770             if(ps_cu_tree_analyse->i8_best_rdopt_cost > i8_least_child_cost)
   2771             {
   2772 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
   2773                 /* Store child node Models for evalution of next CU */
   2774                 for(i = (i4_tree_depth); i < 4; i++)
   2775                 {
   2776                     COPY_CABAC_STATES(
   2777                         &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
   2778                         &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
   2779                         IHEVC_CAB_CTXT_END * sizeof(UWORD8));
   2780                 }
   2781                 /* Reset cabac states if child has won */
   2782                 COPY_CABAC_STATES(
   2783                     &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
   2784                     &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
   2785                     IHEVC_CAB_CTXT_END * sizeof(UWORD8));
   2786 #else
   2787                 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
   2788                 {
   2789                     for(i = i4_tree_depth; i < 4; i++)
   2790                     {
   2791                         COPY_CABAC_STATES(
   2792                             &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
   2793                             &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
   2794                             IHEVC_CAB_CTXT_END * sizeof(UWORD8));
   2795                     }
   2796                     /* Reset cabac states if child has won */
   2797                     COPY_CABAC_STATES(
   2798                         &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
   2799                         &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
   2800                         IHEVC_CAB_CTXT_END * sizeof(UWORD8));
   2801                 }
   2802 #endif
   2803                 ps_cu_tree_analyse->i8_best_rdopt_cost = i8_least_child_cost;
   2804                 ps_cu_tree_analyse->is_node_valid = 0;
   2805             }
   2806             else
   2807             {
   2808                 /* Parent node wins over child node */
   2809                 ihevce_enc_cu_node_ctxt_t *ps_enc_tmp_out_ctxt;
   2810                 WORD32 i4_num_bytes_ecd_data;
   2811                 WORD32 num_child_nodes = 0;
   2812                 WORD32 i4_num_pus_in_cu;
   2813 
   2814                 if((((ps_cu_tree_analyse->b3_cu_pos_y << 3) + ps_cu_tree_analyse->u1_cu_size) ==
   2815                     cur_ctb_ht) &&
   2816                    (ps_cu_tree_analyse->b3_cu_pos_x == 0) && (i4_ctb_x_off == 0))
   2817                 {
   2818                     /* copy the state to row level context after 1st Cu, in the Last CU row of CTB */
   2819                     /* copy current ctb CU states into a entropy sync state */
   2820                     /* to be used for next row                              */
   2821                     COPY_CABAC_STATES(
   2822                         ps_ctxt->pu1_curr_row_cabac_state,
   2823                         &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
   2824                         IHEVC_CAB_CTXT_END * sizeof(UWORD8));
   2825                 }
   2826 
   2827 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
   2828                 /* Store parent node Models for evalution of next CU */
   2829                 for(i = (i4_tree_depth + 1); i < 4; i++)
   2830                 {
   2831                     COPY_CABAC_STATES(
   2832                         &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
   2833                         &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
   2834                         IHEVC_CAB_CTXT_END * sizeof(UWORD8));
   2835                 }
   2836 #else
   2837                 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
   2838                 {
   2839                     for(i = (i4_tree_depth + 1); i < 4; i++)
   2840                     {
   2841                         COPY_CABAC_STATES(
   2842                             &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
   2843                             &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
   2844                             IHEVC_CAB_CTXT_END * sizeof(UWORD8));
   2845                     }
   2846                 }
   2847 #endif
   2848                 ((pf_store_cu_results)ps_ctxt->pv_store_cu_results)(
   2849                     ps_ctxt, ps_cu_prms, &s_final_mode_state);
   2850 
   2851 #if(!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
   2852                 if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
   2853                 {
   2854                     ihevce_update_final_cu_results(
   2855                         ps_ctxt,
   2856                         ps_ctxt->ps_enc_out_ctxt,
   2857                         ps_cu_prms,
   2858                         pps_col_pu,
   2859                         pi4_col_start_pu_idx,
   2860                         ps_cu_update_prms,
   2861                         i4_ctb_x_off >> 6,
   2862                         i4_ctb_y_off >> 6);
   2863 
   2864                     ps_ctxt->ps_enc_out_ctxt++;
   2865                 }
   2866                 else
   2867                 {
   2868                     ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt;
   2869 
   2870                     num_child_nodes = num_children_encoded;
   2871 
   2872                     /* ---- copy the luma & chroma coeffs to final output -------- */
   2873                     for(i = 0; i < num_child_nodes; i++)
   2874                     {
   2875                         i4_num_bytes_ecd_data =
   2876                             (ps_ctxt->ps_enc_out_ctxt - i - 1)->ps_cu_prms->i4_num_bytes_ecd_data;
   2877                         ps_ctxt->pu1_ecd_data -= i4_num_bytes_ecd_data;
   2878                     }
   2879 
   2880                     i4_num_bytes_ecd_data =
   2881                         ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
   2882                     if(0 != i4_num_bytes_ecd_data)
   2883                     {
   2884                         memcpy(
   2885                             ps_ctxt->pu1_ecd_data,
   2886                             &ps_ctxt->pu1_cu_recur_coeffs[0],
   2887                             i4_num_bytes_ecd_data);
   2888 
   2889                         ps_ctxt->pu1_ecd_data += i4_num_bytes_ecd_data;
   2890                     }
   2891 
   2892                     ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt - num_child_nodes;
   2893 
   2894                     memcpy(
   2895                         ps_enc_tmp_out_ctxt,
   2896                         ps_ctxt->ps_enc_out_ctxt,
   2897                         sizeof(ihevce_enc_cu_node_ctxt_t));
   2898                     ps_enc_tmp_out_ctxt->ps_cu_prms = &ps_enc_tmp_out_ctxt->s_cu_prms;
   2899 
   2900                     /* Collocated PU updates */
   2901                     i4_num_pus_in_cu = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
   2902                     /* Copy the collocated MVs and the PU map to frame buffers */
   2903                     memcpy(ps_col_mv_bkup, as_col_mv, sizeof(pu_col_mv_t) * i4_num_pus_in_cu);
   2904                     memcpy(
   2905                         pu1_col_pu_map,
   2906                         au1_cu_pu_map,
   2907                         (MAX_CTB_SIZE / MIN_PU_SIZE) * (MAX_CTB_SIZE / MIN_PU_SIZE));
   2908                     /* Update the frame buffer pointer and the map index */
   2909                     *pps_col_pu = ps_col_mv_bkup + i4_num_pus_in_cu;
   2910                     *pi4_col_start_pu_idx = i4_col_pu_idx_bkup + i4_num_pus_in_cu;
   2911 
   2912                     ps_ctxt->ps_enc_out_ctxt = ps_enc_tmp_out_ctxt + 1;
   2913                 }
   2914 #else
   2915 
   2916                 ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt;
   2917 
   2918                 num_child_nodes = num_children_encoded;
   2919 
   2920                 /* ---- copy the luma & chroma coeffs to final output -------- */
   2921                 for(i = 0; i < num_child_nodes; i++)
   2922                 {
   2923                     i4_num_bytes_ecd_data =
   2924                         (ps_ctxt->ps_enc_out_ctxt - i - 1)->ps_cu_prms->i4_num_bytes_ecd_data;
   2925                     ps_ctxt->pu1_ecd_data -= i4_num_bytes_ecd_data;
   2926                 }
   2927 
   2928                 i4_num_bytes_ecd_data = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
   2929                 if(0 != i4_num_bytes_ecd_data)
   2930                 {
   2931                     memcpy(
   2932                         ps_ctxt->pu1_ecd_data,
   2933                         &ps_ctxt->pu1_cu_recur_coeffs[0],
   2934                         i4_num_bytes_ecd_data * sizeof(UWORD8));
   2935 
   2936                     ps_ctxt->pu1_ecd_data += i4_num_bytes_ecd_data;
   2937                 }
   2938 
   2939                 ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt - num_child_nodes;
   2940 
   2941                 memcpy(
   2942                     ps_enc_tmp_out_ctxt,
   2943                     ps_ctxt->ps_enc_out_ctxt,
   2944                     sizeof(ihevce_enc_cu_node_ctxt_t));
   2945 
   2946                 ps_enc_tmp_out_ctxt->ps_cu_prms = &ps_enc_tmp_out_ctxt->s_cu_prms;
   2947 
   2948                 /* Collocated PU updates */
   2949                 i4_num_pus_in_cu = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
   2950                 /* Copy the collocated MVs and the PU map to frame buffers */
   2951                 memcpy(ps_col_mv_bkup, as_col_mv, sizeof(pu_col_mv_t) * i4_num_pus_in_cu);
   2952                 memcpy(
   2953                     pu1_col_pu_map,
   2954                     au1_cu_pu_map,
   2955                     (MAX_CTB_SIZE / MIN_PU_SIZE) * (MAX_CTB_SIZE / MIN_PU_SIZE));
   2956                 /* Update the frame buffer pointer and the map index */
   2957                 *pps_col_pu = ps_col_mv_bkup + i4_num_pus_in_cu;
   2958                 *pi4_col_start_pu_idx = i4_col_pu_idx_bkup + i4_num_pus_in_cu;
   2959 
   2960                 ps_ctxt->ps_enc_out_ctxt = ps_enc_tmp_out_ctxt + 1;
   2961 #endif
   2962 
   2963                 num_children_encoded = 1;
   2964                 DISABLE_THE_CHILDREN_NODES(ps_cu_tree_analyse);
   2965             }
   2966         }
   2967         else /* if(ps_cu_tree_analyse->is_node_valid) */
   2968         {
   2969             ps_cu_tree_analyse->i8_best_rdopt_cost = i8_least_child_cost;
   2970 
   2971             /* Tree depth of four will occur for Incomplete CTB */
   2972             if((i8_least_child_cost > 0) && (i4_tree_depth != 3))
   2973             {
   2974 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
   2975                 /* Store child node Models for evalution of next CU */
   2976                 for(i = i4_tree_depth; i < 4; i++)
   2977                 {
   2978                     COPY_CABAC_STATES(
   2979                         &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
   2980                         &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
   2981                         IHEVC_CAB_CTXT_END * sizeof(UWORD8));
   2982                 }
   2983 #else
   2984                 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
   2985                 {
   2986                     for(i = (i4_tree_depth); i < 4; i++)
   2987                     {
   2988                         COPY_CABAC_STATES(
   2989                             &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
   2990                             &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
   2991                             IHEVC_CAB_CTXT_END * sizeof(UWORD8));
   2992                     }
   2993                 }
   2994 #endif
   2995             }
   2996         }
   2997     }
   2998 
   2999     return num_children_encoded;
   3000 }
   3001 
   3002 static UWORD8 ihevce_intraData_availability_extractor(
   3003     WORD8 *pi1_8x8CULevel_intraData_availability_indicator,
   3004     UWORD8 u1_cu_size,
   3005     UWORD8 u1_x_8x8CU_units,
   3006     UWORD8 u1_y_8x8CU_units)
   3007 {
   3008     if(8 == u1_cu_size)
   3009     {
   3010         return (!pi1_8x8CULevel_intraData_availability_indicator
   3011                     [u1_x_8x8CU_units + MAX_CU_IN_CTB_ROW * u1_y_8x8CU_units]);
   3012     }
   3013     else
   3014     {
   3015         UWORD8 u1_data_availability = 0;
   3016         UWORD8 u1_child_cu_size = u1_cu_size / 2;
   3017 
   3018         u1_data_availability |= ihevce_intraData_availability_extractor(
   3019             pi1_8x8CULevel_intraData_availability_indicator,
   3020             u1_child_cu_size,
   3021             u1_x_8x8CU_units,
   3022             u1_y_8x8CU_units);
   3023 
   3024         u1_data_availability |= ihevce_intraData_availability_extractor(
   3025             pi1_8x8CULevel_intraData_availability_indicator,
   3026             u1_child_cu_size,
   3027             u1_x_8x8CU_units + u1_child_cu_size / 8,
   3028             u1_y_8x8CU_units);
   3029 
   3030         u1_data_availability |= ihevce_intraData_availability_extractor(
   3031             pi1_8x8CULevel_intraData_availability_indicator,
   3032             u1_child_cu_size,
   3033             u1_x_8x8CU_units,
   3034             u1_y_8x8CU_units + u1_child_cu_size / 8);
   3035 
   3036         u1_data_availability |= ihevce_intraData_availability_extractor(
   3037             pi1_8x8CULevel_intraData_availability_indicator,
   3038             u1_child_cu_size,
   3039             u1_x_8x8CU_units + u1_child_cu_size / 8,
   3040             u1_y_8x8CU_units + u1_child_cu_size / 8);
   3041 
   3042         return u1_data_availability;
   3043     }
   3044 }
   3045 
   3046 void ihevce_intra_and_inter_cuTree_merger(
   3047     cur_ctb_cu_tree_t *ps_merged_tree,
   3048     cur_ctb_cu_tree_t *ps_intra_tree,
   3049     cur_ctb_cu_tree_t *ps_inter_tree,
   3050     WORD8 *pi1_8x8CULevel_intraData_availability_indicator)
   3051 {
   3052     /* 0 => Intra and inter children valid */
   3053     /* 1 => Only Intra valid */
   3054     /* 2 => Only Inter valid */
   3055     /* 3 => Neither */
   3056     UWORD8 au1_children_recursive_call_type[4];
   3057 
   3058     if(NULL != ps_intra_tree)
   3059     {
   3060         ps_intra_tree->is_node_valid =
   3061             ps_intra_tree->is_node_valid &
   3062             ihevce_intraData_availability_extractor(
   3063                 pi1_8x8CULevel_intraData_availability_indicator,
   3064                 ps_intra_tree->u1_cu_size,
   3065                 ps_intra_tree->b3_cu_pos_x & ((8 == ps_intra_tree->u1_cu_size) ? 0xfe : 0xff),
   3066                 ps_intra_tree->b3_cu_pos_y & ((8 == ps_intra_tree->u1_cu_size) ? 0xfe : 0xff));
   3067     }
   3068 
   3069     switch(((NULL == ps_intra_tree) << 1) | (NULL == ps_inter_tree))
   3070     {
   3071     case 0:
   3072     {
   3073         ps_merged_tree->is_node_valid = ps_intra_tree->is_node_valid ||
   3074                                         ps_inter_tree->is_node_valid;
   3075         ps_merged_tree->u1_inter_eval_enable = ps_inter_tree->is_node_valid;
   3076         ps_merged_tree->u1_intra_eval_enable = ps_intra_tree->is_node_valid;
   3077 
   3078         au1_children_recursive_call_type[POS_TL] =
   3079             ((NULL == ps_intra_tree->ps_child_node_tl) << 1) |
   3080             (NULL == ps_inter_tree->ps_child_node_tl);
   3081         au1_children_recursive_call_type[POS_TR] =
   3082             ((NULL == ps_intra_tree->ps_child_node_tr) << 1) |
   3083             (NULL == ps_inter_tree->ps_child_node_tr);
   3084         au1_children_recursive_call_type[POS_BL] =
   3085             ((NULL == ps_intra_tree->ps_child_node_bl) << 1) |
   3086             (NULL == ps_inter_tree->ps_child_node_bl);
   3087         au1_children_recursive_call_type[POS_BR] =
   3088             ((NULL == ps_intra_tree->ps_child_node_br) << 1) |
   3089             (NULL == ps_inter_tree->ps_child_node_br);
   3090 
   3091         break;
   3092     }
   3093     case 1:
   3094     {
   3095         ps_merged_tree->is_node_valid = ps_intra_tree->is_node_valid;
   3096         ps_merged_tree->u1_inter_eval_enable = 0;
   3097         ps_merged_tree->u1_intra_eval_enable = ps_intra_tree->is_node_valid;
   3098 
   3099         au1_children_recursive_call_type[POS_TL] =
   3100             ((NULL == ps_intra_tree->ps_child_node_tl) << 1) + 1;
   3101         au1_children_recursive_call_type[POS_TR] =
   3102             ((NULL == ps_intra_tree->ps_child_node_tr) << 1) + 1;
   3103         au1_children_recursive_call_type[POS_BL] =
   3104             ((NULL == ps_intra_tree->ps_child_node_bl) << 1) + 1;
   3105         au1_children_recursive_call_type[POS_BR] =
   3106             ((NULL == ps_intra_tree->ps_child_node_br) << 1) + 1;
   3107 
   3108         break;
   3109     }
   3110     case 2:
   3111     {
   3112         ps_merged_tree->is_node_valid = ps_inter_tree->is_node_valid;
   3113         ps_merged_tree->u1_inter_eval_enable = ps_inter_tree->is_node_valid;
   3114         ps_merged_tree->u1_intra_eval_enable = 0;
   3115 
   3116         au1_children_recursive_call_type[POS_TL] = 2 + (NULL == ps_inter_tree->ps_child_node_tl);
   3117         au1_children_recursive_call_type[POS_TR] = 2 + (NULL == ps_inter_tree->ps_child_node_tr);
   3118         au1_children_recursive_call_type[POS_BL] = 2 + (NULL == ps_inter_tree->ps_child_node_bl);
   3119         au1_children_recursive_call_type[POS_BR] = 2 + (NULL == ps_inter_tree->ps_child_node_br);
   3120 
   3121         break;
   3122     }
   3123     case 3:
   3124     {
   3125         /* The swamps of Dagobah! */
   3126         ASSERT(0);
   3127 
   3128         break;
   3129     }
   3130     }
   3131 
   3132     switch(au1_children_recursive_call_type[POS_TL])
   3133     {
   3134     case 0:
   3135     {
   3136         ihevce_intra_and_inter_cuTree_merger(
   3137             ps_merged_tree->ps_child_node_tl,
   3138             ps_intra_tree->ps_child_node_tl,
   3139             ps_inter_tree->ps_child_node_tl,
   3140             pi1_8x8CULevel_intraData_availability_indicator);
   3141 
   3142         break;
   3143     }
   3144     case 2:
   3145     {
   3146         ihevce_intra_and_inter_cuTree_merger(
   3147             ps_merged_tree->ps_child_node_tl,
   3148             NULL,
   3149             ps_inter_tree->ps_child_node_tl,
   3150             pi1_8x8CULevel_intraData_availability_indicator);
   3151 
   3152         break;
   3153     }
   3154     case 1:
   3155     {
   3156         ihevce_intra_and_inter_cuTree_merger(
   3157             ps_merged_tree->ps_child_node_tl,
   3158             ps_intra_tree->ps_child_node_tl,
   3159             NULL,
   3160             pi1_8x8CULevel_intraData_availability_indicator);
   3161 
   3162         break;
   3163     }
   3164     }
   3165 
   3166     switch(au1_children_recursive_call_type[POS_TR])
   3167     {
   3168     case 0:
   3169     {
   3170         ihevce_intra_and_inter_cuTree_merger(
   3171             ps_merged_tree->ps_child_node_tr,
   3172             ps_intra_tree->ps_child_node_tr,
   3173             ps_inter_tree->ps_child_node_tr,
   3174             pi1_8x8CULevel_intraData_availability_indicator);
   3175 
   3176         break;
   3177     }
   3178     case 2:
   3179     {
   3180         ihevce_intra_and_inter_cuTree_merger(
   3181             ps_merged_tree->ps_child_node_tr,
   3182             NULL,
   3183             ps_inter_tree->ps_child_node_tr,
   3184             pi1_8x8CULevel_intraData_availability_indicator);
   3185 
   3186         break;
   3187     }
   3188     case 1:
   3189     {
   3190         ihevce_intra_and_inter_cuTree_merger(
   3191             ps_merged_tree->ps_child_node_tr,
   3192             ps_intra_tree->ps_child_node_tr,
   3193             NULL,
   3194             pi1_8x8CULevel_intraData_availability_indicator);
   3195 
   3196         break;
   3197     }
   3198     }
   3199 
   3200     switch(au1_children_recursive_call_type[POS_BL])
   3201     {
   3202     case 0:
   3203     {
   3204         ihevce_intra_and_inter_cuTree_merger(
   3205             ps_merged_tree->ps_child_node_bl,
   3206             ps_intra_tree->ps_child_node_bl,
   3207             ps_inter_tree->ps_child_node_bl,
   3208             pi1_8x8CULevel_intraData_availability_indicator);
   3209 
   3210         break;
   3211     }
   3212     case 2:
   3213     {
   3214         ihevce_intra_and_inter_cuTree_merger(
   3215             ps_merged_tree->ps_child_node_bl,
   3216             NULL,
   3217             ps_inter_tree->ps_child_node_bl,
   3218             pi1_8x8CULevel_intraData_availability_indicator);
   3219 
   3220         break;
   3221     }
   3222     case 1:
   3223     {
   3224         ihevce_intra_and_inter_cuTree_merger(
   3225             ps_merged_tree->ps_child_node_bl,
   3226             ps_intra_tree->ps_child_node_bl,
   3227             NULL,
   3228             pi1_8x8CULevel_intraData_availability_indicator);
   3229 
   3230         break;
   3231     }
   3232     }
   3233 
   3234     switch(au1_children_recursive_call_type[POS_BR])
   3235     {
   3236     case 0:
   3237     {
   3238         ihevce_intra_and_inter_cuTree_merger(
   3239             ps_merged_tree->ps_child_node_br,
   3240             ps_intra_tree->ps_child_node_br,
   3241             ps_inter_tree->ps_child_node_br,
   3242             pi1_8x8CULevel_intraData_availability_indicator);
   3243 
   3244         break;
   3245     }
   3246     case 2:
   3247     {
   3248         ihevce_intra_and_inter_cuTree_merger(
   3249             ps_merged_tree->ps_child_node_br,
   3250             NULL,
   3251             ps_inter_tree->ps_child_node_br,
   3252             pi1_8x8CULevel_intraData_availability_indicator);
   3253 
   3254         break;
   3255     }
   3256     case 1:
   3257     {
   3258         ihevce_intra_and_inter_cuTree_merger(
   3259             ps_merged_tree->ps_child_node_br,
   3260             ps_intra_tree->ps_child_node_br,
   3261             NULL,
   3262             pi1_8x8CULevel_intraData_availability_indicator);
   3263 
   3264         break;
   3265     }
   3266     }
   3267 }
   3268