Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 /*!
     21 ******************************************************************************
     22 * \file ihevce_enc_loop_structs.h
     23 *
     24 * \brief
     25 *    This file contains strcutures of enc_loop pass
     26 *
     27 * \date
     28 *    18/09/2012
     29 *
     30 * \author
     31 *    Ittiam
     32 *
     33 ******************************************************************************
     34 */
     35 
     36 #ifndef _IHEVCE_ENC_LOOP_STRUCTS_H_
     37 #define _IHEVCE_ENC_LOOP_STRUCTS_H_
     38 
     39 #include "ihevc_macros.h"
     40 
     41 extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
     42 
     43 /*****************************************************************************/
     44 /* Constant Macros                                                           */
     45 /*****************************************************************************/
     46 /** /breif 4x4 DST, 4x4, 8x8, 16x16, 32x32 */
     47 #define NUM_TRANS_TYPES 5
     48 #define INTRA_PLANAR 0
     49 #define INTRA_DC 1
     50 #define NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD 2
     51 #define MAX_TU_IN_TU_EQ_DIV_2 4
     52 #define MAX_MVP_LIST_CAND 2
     53 #define MAX_COST 0x7ffffff
     54 #define MAX_COST_64 0x7ffffffffffffff
     55 #define NUM_32CU_AND_64CU_IN_CTB 5 /* 4 - 32x32 + 1 64x64*/
     56 #define PING_PONG 2
     57 #define MAX_SAO_RD_CAND 10
     58 #define SCRATCH_BUF_STRIDE 80
     59 
     60 /*****************************************************************************/
     61 /* Function Macros                                                           */
     62 /*****************************************************************************/
     63 #define INTRA_ANGULAR(x) (x)
     64 
     65 /** @breif max 30bit value */
     66 #define MAX30 ((1 << 30) - 1)
     67 
     68 /* @brief macro to clip a data to max of 30bits (assuming unsgined) */
     69 #define CLIP30(x) ((x) > MAX30 ? MAX30 : (x))
     70 
     71 /* @brief compute the (lambda * rate) with a qshift and clip result to 30bits */
     72 #define COMPUTE_RATE_COST_CLIP30(r, l, qshift) ((WORD32)CLIP30((ULWORD64)((r) * (l)) >> (qshift)))
     73 
     74 #define IHEVCE_INV_WT_PRED(inp, wt, off, shift)                                                    \
     75     (((((inp) - (off)) << (shift)) * wt + (1 << 14)) >> 15)
     76 
     77 #define POPULATE_PU_STRUCT(ps_pu, mvx, mvy, offset_x, offset_y, wd, ht, ref_idx, pred_lx)          \
     78     {                                                                                              \
     79         (ps_pu)->b4_pos_x = (offset_x) >> 2;                                                       \
     80         (ps_pu)->b4_pos_y = (offset_y) >> 2;                                                       \
     81         (ps_pu)->b4_wd = ((wd) >> 2) - 1;                                                          \
     82         (ps_pu)->b4_ht = ((ht) >> 2) - 1;                                                          \
     83         (ps_pu)->b1_intra_flag = 0;                                                                \
     84         (ps_pu)->b2_pred_mode = pred_lx;                                                           \
     85         if(pred_lx)                                                                                \
     86         {                                                                                          \
     87             (ps_pu)->mv.i1_l0_ref_idx = -1;                                                        \
     88             (ps_pu)->mv.i1_l1_ref_idx = ref_idx;                                                   \
     89             (ps_pu)->mv.s_l1_mv.i2_mvx = mvx;                                                      \
     90             (ps_pu)->mv.s_l1_mv.i2_mvy = mvy;                                                      \
     91         }                                                                                          \
     92         else                                                                                       \
     93         {                                                                                          \
     94             (ps_pu)->mv.i1_l0_ref_idx = ref_idx;                                                   \
     95             (ps_pu)->mv.i1_l1_ref_idx = -1;                                                        \
     96             (ps_pu)->mv.s_l0_mv.i2_mvx = mvx;                                                      \
     97             (ps_pu)->mv.s_l0_mv.i2_mvy = mvy;                                                      \
     98         }                                                                                          \
     99     }
    100 
    101 #define GET_FRAME_QSTEP_FROM_QP(frame_qp, frame_qstep)                                             \
    102     {                                                                                              \
    103         double q_steps[6] = { 0.625, 0.703, 0.79, 0.889, 1.0, 1.125 };                             \
    104                                                                                                    \
    105         frame_qstep = (WORD32)((1 << ((frame_qp) / 6)) * q_steps[(frame_qp) % 6]);                 \
    106     }
    107 
    108 #define INITIALISE_MERGE_RESULT_STRUCT(ps_merge_data, pas_pu_results)                              \
    109     {                                                                                              \
    110         WORD32 i, j, k;                                                                            \
    111                                                                                                    \
    112         for(i = 0; i < TOT_NUM_PARTS; i++)                                                         \
    113         {                                                                                          \
    114             (ps_merge_data)->s_pu_results.u1_num_results_per_part_l0[i] = 0;                       \
    115             (ps_merge_data)->s_pu_results.u1_num_results_per_part_l1[i] = 0;                       \
    116         }                                                                                          \
    117         for(i = 0; i < 2; i++)                                                                     \
    118         {                                                                                          \
    119             for(j = 0; j < TOT_NUM_PARTS; j++)                                                     \
    120             {                                                                                      \
    121                 (ps_merge_data)->s_pu_results.aps_pu_results[i][j] = pas_pu_results[i][j];         \
    122                 for(k = 0; k < MAX_NUM_RESULTS_PER_PART_LIST; k++)                                 \
    123                 {                                                                                  \
    124                     pas_pu_results[i][j][k].i4_tot_cost = MAX_COST;                                \
    125                     pas_pu_results[i][j][k].pu.mv.i1_l0_ref_idx = -1;                              \
    126                     pas_pu_results[i][j][k].pu.mv.i1_l1_ref_idx = -1;                              \
    127                 }                                                                                  \
    128             }                                                                                      \
    129         }                                                                                          \
    130     }
    131 
    132 #define POPULATE_CTB_PARAMS                                                                        \
    133     (ps_common_frm_prms,                                                                           \
    134      apu1_wt_inp,                                                                                  \
    135      i4_ctb_x_off,                                                                                 \
    136      i4_ctb_y_off,                                                                                 \
    137      ppu1_pred,                                                                                    \
    138      cu_size,                                                                                      \
    139      ref_stride,                                                                                   \
    140      bidir_enabled,                                                                                \
    141      num_refs,                                                                                     \
    142      pps_rec_list_l0,                                                                              \
    143      pps_rec_list_l1,                                                                              \
    144      pu1_non_wt_inp,                                                                               \
    145      lambda,                                                                                       \
    146      lambda_q_shift,                                                                               \
    147      wpred_log_wdc)                                                                                \
    148     {                                                                                              \
    149         WORD32 i, j;                                                                               \
    150         (ps_common_frm_prms)->i4_bidir_enabled = bidir_enabled;                                    \
    151         (ps_common_frm_prms)->i4_ctb_x_off = i4_ctb_x_off;                                         \
    152         (ps_common_frm_prms)->i4_ctb_y_off = i4_ctb_y_off;                                         \
    153         (ps_common_frm_prms)->i4_inp_stride = cu_size;                                             \
    154         (ps_common_frm_prms)->i4_lamda = lambda;                                                   \
    155         (ps_common_frm_prms)->i4_pred_stride = cu_size;                                            \
    156         (ps_common_frm_prms)->i4_rec_stride = ref_stride;                                          \
    157         (ps_common_frm_prms)->pps_rec_list_l0 = pps_rec_list_l0;                                   \
    158         (ps_common_frm_prms)->pps_rec_list_l1 = pps_rec_list_l1;                                   \
    159         (ps_common_frm_prms)->ppu1_pred = ppu1_pred;                                               \
    160         (ps_common_frm_prms)->pu1_non_wt_inp = pu1_non_wt_inp;                                     \
    161         (ps_common_frm_prms)->pu1_wkg_mem = NULL;                                                  \
    162         (ps_common_frm_prms)->u1_lamda_qshift = lambda_q_shift;                                    \
    163         (ps_common_frm_prms)->u1_num_ref = num_refs;                                               \
    164         (ps_common_frm_prms)->wpred_log_wdc = wpred_log_wdc;                                       \
    165         for(i = 0; i < 2; i++)                                                                     \
    166         {                                                                                          \
    167             for(j = 0; j < MAX_NUM_REF; j++)                                                       \
    168             {                                                                                      \
    169                 (ps_common_frm_prms)->apu1_wt_inp = (apu1_wt_inp)[i][j];                           \
    170             }                                                                                      \
    171         }                                                                                          \
    172     }
    173 
    174 #define COMPUTE_MERGE_IDX_COST(merge_idx_0_model, merge_idx, max_merge_cand, lambda, cost)         \
    175     {                                                                                              \
    176         WORD32 cab_bits_q12 = 0;                                                                   \
    177                                                                                                    \
    178         /* sanity checks */                                                                        \
    179         ASSERT((merge_idx >= 0) && (merge_idx < max_merge_cand));                                  \
    180                                                                                                    \
    181         /* encode the merge idx only if required */                                                \
    182         if(max_merge_cand > 1)                                                                     \
    183         {                                                                                          \
    184             WORD32 bin = (merge_idx > 0);                                                          \
    185                                                                                                    \
    186             /* bits for the context modelled first bin */                                          \
    187             cab_bits_q12 += gau2_ihevce_cabac_bin_to_bits[merge_idx_0_model ^ bin];                \
    188                                                                                                    \
    189             /* bits for larged merge idx coded as bypass tunary */                                 \
    190             if((max_merge_cand > 2) && (merge_idx > 0))                                            \
    191             {                                                                                      \
    192                 cab_bits_q12 += (MIN(merge_idx, (max_merge_cand - 2))) << CABAC_FRAC_BITS_Q;       \
    193             }                                                                                      \
    194                                                                                                    \
    195             cost = COMPUTE_RATE_COST_CLIP30(                                                       \
    196                 cab_bits_q12, lambda, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));                       \
    197         }                                                                                          \
    198         else                                                                                       \
    199         {                                                                                          \
    200             cost = 0;                                                                              \
    201         }                                                                                          \
    202     }
    203 
    204 /*****************************************************************************/
    205 /* Typedefs                                                                  */
    206 /*****************************************************************************/
    207 
    208 typedef FT_CALC_HAD_SATD_8BIT *pf_res_trans_luma_had_chroma;
    209 
    210 /** \breif function pointer prototype for residue and transform enc_loop */
    211 typedef UWORD32 (*pf_res_trans_chroma)(
    212     UWORD8 *pu1_src,
    213     UWORD8 *pu1_pred,
    214     WORD32 *pi4_tmp,
    215     WORD16 *pi2_dst,
    216     WORD32 src_strd,
    217     WORD32 pred_strd,
    218     WORD32 dst_strd_chr_flag);
    219 
    220 /** \breif function pointer prototype for quantization and inv Quant for ssd
    221 calc. for all transform sizes */
    222 typedef WORD32 (*pf_quant_iquant_ssd)(
    223     WORD16 *pi2_coeffs,
    224     WORD16 *pi2_quant_coeff,
    225     WORD16 *pi2_q_dst,
    226     WORD16 *pi2_iq_dst,
    227     WORD32 trans_size,
    228     WORD32 qp_div, /* qpscaled / 6 */
    229     WORD32 qp_rem, /* qpscaled % 6 */
    230     WORD32 q_add,
    231     WORD32 *pi4_quant_round_factor_0_1,
    232     WORD32 *pi4_quant_round_factor_1_2,
    233     WORD32 src_strd,
    234     WORD32 dst_q_strd,
    235     WORD32 dst_iq_strd,
    236     UWORD8 *csbf,
    237     WORD32 csbf_strd,
    238     WORD32 *zero_col,
    239     WORD32 *zero_row,
    240     WORD16 *pi2_dequant_coeff,
    241     LWORD64 *pi8_cost);
    242 
    243 /** \breif function pointer prototype for quantization and inv Quant for ssd
    244 calc. for all transform sizes (in case of RDOQ + SBH) */
    245 typedef WORD32 (*pf_quant_iquant_ssd_sbh)(
    246     WORD16 *pi2_coeffs,
    247     WORD16 *pi2_quant_coeff,
    248     WORD16 *pi2_q_dst,
    249     WORD16 *pi2_iq_dst,
    250     WORD32 trans_size,
    251     WORD32 qp_div, /* qpscaled / 6 */
    252     WORD32 qp_rem, /* qpscaled % 6 */
    253     WORD32 q_add,
    254     WORD32 src_strd,
    255     WORD32 dst_q_strd,
    256     WORD32 dst_iq_strd,
    257     UWORD8 *csbf,
    258     WORD32 csbf_strd,
    259     WORD32 *zero_col,
    260     WORD32 *zero_row,
    261     WORD16 *pi2_dequant_coeff,
    262     WORD32 *pi4_cost,
    263     WORD32 i4_scan_idx,
    264     WORD32 i4_perform_rdoq);
    265 
    266 /** \breif function pointer prototype for inverse transform and recon
    267 for all transform sizes : Luma */
    268 typedef void (*pf_it_recon)(
    269     WORD16 *pi2_src,
    270     WORD16 *pi2_tmp,
    271     UWORD8 *pu1_pred,
    272     UWORD8 *pu1_dst,
    273     WORD32 src_strd,
    274     WORD32 pred_strd,
    275     WORD32 dst_strd,
    276     WORD32 zero_cols,
    277     WORD32 zero_rows);
    278 
    279 /** \breif function pointer prototype for inverse transform and recon
    280 for all transform sizes : Chroma */
    281 typedef void (*pf_it_recon_chroma)(
    282     WORD16 *pi2_src,
    283     WORD16 *pi2_tmp,
    284     UWORD8 *pu1_pred,
    285     UWORD8 *pu1_dst,
    286     WORD32 src_strd,
    287     WORD32 pred_strd,
    288     WORD32 dst_strd,
    289     WORD32 zero_cols,
    290     WORD32 zero_rows);
    291 
    292 /** \breif function pointer prototype for luma sao. */
    293 typedef void (*pf_sao_luma)(
    294     UWORD8 *pu1_src,
    295     WORD32 src_strd,
    296     UWORD8 *pu1_src_left,
    297     UWORD8 *pu1_src_top,
    298     UWORD8 *pu1_src_top_left,
    299     UWORD8 *pu1_src_top_right,
    300     UWORD8 *pu1_src_bot_left,
    301     UWORD8 *pu1_avail,
    302     WORD8 *pi1_sao_offset,
    303     WORD32 wd,
    304     WORD32 ht);
    305 
    306 /** \breif function pointer prototype for chroma sao. */
    307 typedef void (*pf_sao_chroma)(
    308     UWORD8 *pu1_src,
    309     WORD32 src_strd,
    310     UWORD8 *pu1_src_left,
    311     UWORD8 *pu1_src_top,
    312     UWORD8 *pu1_src_top_left,
    313     UWORD8 *pu1_src_top_right,
    314     UWORD8 *pu1_src_bot_left,
    315     UWORD8 *pu1_avail,
    316     WORD8 *pi1_sao_offset_u,
    317     WORD8 *pi1_sao_offset_v,
    318     WORD32 wd,
    319     WORD32 ht);
    320 
    321 /*****************************************************************************/
    322 /* Enums                                                                     */
    323 /*****************************************************************************/
    324 
    325 typedef enum
    326 {
    327     IP_FUNC_MODE_0 = 0,
    328     IP_FUNC_MODE_1,
    329     IP_FUNC_MODE_2,
    330     IP_FUNC_MODE_3TO9,
    331     IP_FUNC_MODE_10,
    332     IP_FUNC_MODE_11TO17,
    333     IP_FUNC_MODE_18_34,
    334     IP_FUNC_MODE_19TO25,
    335     IP_FUNC_MODE_26,
    336     IP_FUNC_MODE_27TO33,
    337 
    338     NUM_IP_FUNCS
    339 
    340 } IP_FUNCS_T;
    341 
    342 typedef enum
    343 {
    344     /* currently only cu and cu/2 modes are supported */
    345     TU_EQ_CU = 0,
    346     TU_EQ_CU_DIV2,
    347     TU_EQ_SUBCU, /* only applicable for NXN mode at mincusize */
    348 
    349     /* support for below modes needs to be added */
    350     TU_EQ_CU_DIV4,
    351     TU_EQ_CU_DIV8,
    352     TU_EQ_CU_DIV16,
    353 
    354     NUM_TU_WRT_CU,
    355 
    356 } TU_SIZE_WRT_CU_T;
    357 
    358 typedef enum
    359 {
    360     RDOPT_MODE = 0,
    361     RDOPT_SKIP_MODE = 1,
    362 
    363     NUM_CORE_CALL_MODES,
    364 
    365 } CORE_FUNC_CALL_MODE_T;
    366 
    367 typedef enum
    368 {
    369     ENC_LOOP_CTXT = 0,
    370     ENC_LOOP_THRDS_CTXT,
    371     ENC_LOOP_SCALE_MAT,
    372     ENC_LOOP_RESCALE_MAT,
    373     ENC_LOOP_TOP_LUMA,
    374     ENC_LOOP_TOP_CHROMA,
    375     ENC_LOOP_TOP_NBR4X4,
    376     ENC_LOOP_RC_PARAMS, /* memory to dump rate control parameters by each thread for each bit-rate instance */
    377     ENC_LOOP_QP_TOP_4X4,
    378     ENC_LOOP_DEBLOCKING,
    379     ENC_LOOP_422_CHROMA_INTRA_PRED,
    380     ENC_LOOP_INTER_PRED,
    381     ENC_LOOP_CHROMA_PRED_INTRA,
    382     ENC_LOOP_REF_SUB_OUT,
    383     ENC_LOOP_REF_FILT_OUT,
    384     ENC_LOOP_CU_RECUR_LUMA_RECON,
    385     ENC_LOOP_CU_RECUR_CHROMA_RECON,
    386     ENC_LOOP_CU_RECUR_LUMA_PRED,
    387     ENC_LOOP_CU_RECUR_CHROMA_PRED,
    388     ENC_LOOP_LEFT_LUMA_DATA,
    389     ENC_LOOP_LEFT_CHROMA_DATA,
    390     ENC_LOOP_SAO,
    391     ENC_LOOP_CU_COEFF_DATA,
    392     ENC_LOOP_CU_RECUR_COEFF_DATA,
    393     ENC_LOOP_CU_DEQUANT_DATA,
    394     ENC_LOOP_RECON_DATA_STORE,
    395     /* should always be the last entry */
    396     NUM_ENC_LOOP_MEM_RECS
    397 
    398 } ENC_LOOP_MEM_TABS_T;
    399 
    400 /** This is for assigning the pred buiffers for luma (2 ping-pong) and
    401 chroma(1)   */
    402 typedef enum
    403 {
    404     CU_ME_INTRA_PRED_LUMA_IDX0 = 0,
    405     CU_ME_INTRA_PRED_LUMA_IDX1,
    406     CU_ME_INTRA_PRED_CHROMA_IDX,
    407 
    408     /* should be always the last entry */
    409     NUM_CU_ME_INTRA_PRED_IDX
    410 
    411 } CU_ME_INTRA_PRED_IDX_T;
    412 
    413 /*****************************************************************************/
    414 /* Structure                                                                 */
    415 /*****************************************************************************/
    416 
    417 /**
    418 ******************************************************************************
    419 *  @brief     Structure to store TU prms req. for enc_loop only
    420 ******************************************************************************
    421 */
    422 typedef struct
    423 {
    424     /** Zero_col info. for the current TU Luma */
    425     UWORD32 u4_luma_zero_col;
    426     /** Zero_row info. for the current TU Luma */
    427     UWORD32 u4_luma_zero_row;
    428 
    429     /** Zero_col info. for the current TU Chroma Cb */
    430     UWORD32 au4_cb_zero_col[2];
    431     /** Zero_row info. for the current TU Chroma Cb */
    432     UWORD32 au4_cb_zero_row[2];
    433     /** Zero_col info. for the current TU Chroma Cr */
    434     UWORD32 au4_cr_zero_col[2];
    435     /** Zero_row info. for the current TU Chroma Cr */
    436     UWORD32 au4_cr_zero_row[2];
    437 
    438     /** bytes consumed by the luma ecd data */
    439     WORD16 i2_luma_bytes_consumed;
    440     /** bytes consumed by the Cb ecd data */
    441     WORD16 ai2_cb_bytes_consumed[2];
    442     /** bytes consumed by the Cr ecd data */
    443     WORD16 ai2_cr_bytes_consumed[2];
    444 
    445     /** flag to re-evaluate IQ and Coeff data of luma in the final_recon
    446     function. If zero, uses the data from RDOPT cand.                   */
    447     UWORD16 b1_eval_luma_iq_and_coeff_data : 1;
    448     /** flag to re-evaluate IQ and Coeff data of chroma in the final_recon
    449     function. If zero, uses the data from RDOPT cand.                   */
    450     UWORD16 b1_eval_chroma_iq_and_coeff_data : 1;
    451 
    452     /* TO DO : No support now, need to add. Always comapre ZERO_CBF cost */
    453     /** Luma ZERO_CBF cost is compared with residue coding cost only if this
    454     flag is enabled */
    455     UWORD16 b1_eval_luma_zero_cbf_cost : 1;
    456     /** Chroma ZERO_CBF cost is compared with residue coding cost only if this
    457     flag is enabled */
    458     UWORD16 b1_eval_chroma_zero_cbf_cost : 1;
    459 
    460     /** Reserved to make WORD32 alignment */
    461     UWORD16 b12_reserved : 12;
    462 
    463 } tu_enc_loop_temp_prms_t;
    464 
    465 typedef struct recon_datastore_t
    466 {
    467     /* 2 to store current and best */
    468     void *apv_luma_recon_bufs[2];
    469 
    470     /* 0 to store cur chroma mode recon */
    471     /* 1 to store winning independent chroma mode with a single TU's recon */
    472     /* 2 to store winning independent chroma mode with 4 TUs' recon */
    473     void *apv_chroma_recon_bufs[3];
    474 
    475     /* The following two arrays are used to store the ID's of the buffers */
    476     /* where the winning recon is being stored */
    477     /* For Luma buffers, the permissible values are 0, 1 and UCHAR_MAX */
    478     /* For Chroma buffers, the permissible values are 0, 1, 2 and UCHAR_MAX */
    479     /* The value 'UCHAR_MAX' indicates the absence of Recon for that particular TU */
    480     UWORD8 au1_bufId_with_winning_LumaRecon[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
    481 
    482     /* 2 - 2 Chroma planes */
    483     /* 2 - 2 possible subTU's */
    484     UWORD8 au1_bufId_with_winning_ChromaRecon[2][MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW][2];
    485 
    486     WORD32 i4_lumaRecon_stride;
    487 
    488     WORD32 i4_chromaRecon_stride;
    489 
    490     UWORD8 au1_is_chromaRecon_available[3];
    491 
    492     UWORD8 u1_is_lumaRecon_available;
    493 
    494 } recon_datastore_t;
    495 
    496 typedef struct enc_loop_cu_final_prms_t
    497 {
    498     recon_datastore_t s_recon_datastore;
    499 
    500     /**
    501     * Cu size of the current cu being processed
    502     */
    503     UWORD8 u1_cu_size;
    504     /**
    505     * flags to indicate the final cu prediction mode
    506     */
    507     UWORD8 u1_intra_flag;
    508 
    509     /**
    510     * flags to indicate Skip mode for CU
    511     */
    512     UWORD8 u1_skip_flag;
    513 
    514     /**
    515     * number of tu in current cu for a given mode
    516     * if skip then this value should be 1
    517     */
    518     UWORD16 u2_num_tus_in_cu;
    519 
    520     /**
    521     * number of pu in current cu for a given mode
    522     * if skip then this value should be 1
    523     */
    524     UWORD16 u2_num_pus_in_cu;
    525 
    526     /**
    527     * total bytes produced in ECD data buffer
    528     * if skip then this value should be 0
    529     */
    530     WORD32 i4_num_bytes_ecd_data;
    531 
    532     /**
    533     * Partition mode of the best candidate
    534     * if skip then this value should be SIZE_2Nx2N
    535     * @sa PART_SIZE_E
    536     */
    537     UWORD8 u1_part_mode;
    538 
    539     /**
    540     * indicates if inter cu has coded coeffs 1: coded, 0: not coded
    541     * if skip then this value shoudl be ignored
    542     */
    543     UWORD8 u1_is_cu_coded;
    544 
    545     /**
    546     * Chroma pred mode as signalled in bitstream
    547     */
    548     UWORD8 u1_chroma_intra_pred_mode;
    549 
    550     /**
    551     * To store the best chroma mode for TU. Will be same for NxN case.
    552     * Actual Chroma pred
    553     */
    554     UWORD8 u1_chroma_intra_pred_actual_mode;
    555 
    556     /**
    557     * sad accumulated over all Tus of given CU
    558     */
    559     UWORD32 u4_cu_sad;
    560 
    561     /**
    562     * sad accumulated over all Tus of given CU
    563     */
    564     LWORD64 i8_cu_ssd;
    565 
    566     /**
    567     * open loop intra sad
    568     */
    569     UWORD32 u4_cu_open_intra_sad;
    570 
    571     /**
    572     * header bits of cu estimated during RDO evaluation.
    573     * Includes tu splits flags excludes cbf flags
    574     */
    575     UWORD32 u4_cu_hdr_bits;
    576     /**
    577     * luma residual bits of a cu estimated during RDO evaluation.
    578     */
    579     UWORD32 u4_cu_luma_res_bits;
    580 
    581     /**
    582     * chroma residual bits of a cu estimated during RDO evaluation.
    583     */
    584     UWORD32 u4_cu_chroma_res_bits;
    585 
    586     /**
    587     * cbf bits of a cu estimated during RDO evaluation (considered as part of texture bits later)
    588     */
    589     UWORD32 u4_cu_cbf_bits;
    590 
    591     /**
    592     * array of PU for current CU
    593     * For Inter PUs this will contain the follwoing
    594     *   - merge flag
    595     *   - (MVD and reference indicies) or (Merge Index)
    596     *   - (if Cu is skipped then Merge index for skip
    597     *      will be in 1st PU entry in array)
    598     * for intra PU only intra flag will be set to 1
    599     *
    600     */
    601     pu_t as_pu_enc_loop[NUM_PU_PARTS];
    602 
    603     /**
    604     * array of PU for chroma usage
    605     * in case of Merge MVs and reference idx of the final candidate
    606     * used by luma need sto be stored
    607     * for intra PU this will not be used
    608     */
    609     pu_t as_pu_chrm_proc[NUM_PU_PARTS];
    610 
    611     /**
    612     * array of colocated PU for current CU
    613     * MV and Ref pic id should be stored in this
    614     * for intra PU only intra flag will be set to 1
    615     */
    616     pu_col_mv_t as_col_pu_enc_loop[NUM_INTER_PU_PARTS];
    617 
    618     /** array to store the intra mode pred related params
    619     * if nxn mode the all 4 lcoations will be used
    620     */
    621     intra_prev_rem_flags_t as_intra_prev_rem[NUM_PU_PARTS];
    622 
    623     /**
    624     * array to store TU propeties of the each tu in a CU
    625     */
    626     tu_enc_loop_out_t as_tu_enc_loop[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
    627 
    628     /**
    629     * array to store TU propeties (req. for enc_loop only and not for
    630     * entropy) of the each tu in a CU
    631     */
    632     tu_enc_loop_temp_prms_t as_tu_enc_loop_temp_prms[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
    633 
    634     /**
    635     * Neighbour flags stored for chroma reuse
    636     */
    637     UWORD32 au4_nbr_flags[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
    638 
    639     /**
    640     * intra pred modes stored for chroma reuse
    641     */
    642     UWORD8 au1_intra_pred_mode[4];
    643 
    644     /**
    645     * array for storing coeffs during RD opt stage at CU level.
    646     * Luma and chroma together
    647     */
    648     UWORD8 *pu1_cu_coeffs;
    649 
    650     /**
    651     * Chroma deq_coeffs start point in the ai2_cu_deq_coeffs buffer.
    652     */
    653     WORD32 i4_chrm_cu_coeff_strt_idx;
    654 
    655     /**
    656     * array for storing dequantized vals. during RD opt stage at CU level
    657     * Luma and chroma together.
    658     * Stride is assumed to be cu_size
    659     * u-v interleaved storing is at TU level
    660     */
    661     WORD16 *pi2_cu_deq_coeffs;
    662 
    663     /**
    664     * Chroma deq_coeffs start point in the ai2_cu_deq_coeffs buffer.
    665     */
    666     WORD32 i4_chrm_deq_coeff_strt_idx;
    667 
    668     /**
    669     * The total RDOPT cost of the CU for the best mode
    670     */
    671     LWORD64 i8_best_rdopt_cost;
    672 
    673     /**
    674     * The current running RDOPT cost for the current mode
    675     */
    676     LWORD64 i8_curr_rdopt_cost;
    677 
    678     LWORD64 i8_best_distortion;
    679 
    680 } enc_loop_cu_final_prms_t;
    681 
    682 typedef struct
    683 {
    684     /** Current Cu chroma recon pointer in pic buffer */
    685     UWORD8 *pu1_final_recon;
    686 
    687     UWORD16 *pu2_final_recon;
    688 
    689     /** Current Cu chroma source pointer in pic buffer */
    690     UWORD8 *pu1_curr_src;
    691 
    692     UWORD16 *pu2_curr_src;
    693 
    694     /** Current CU chroma reocn buffer stride */
    695     WORD32 i4_chrm_recon_stride;
    696 
    697     /** Current CU chroma source buffer stride */
    698     WORD32 i4_chrm_src_stride;
    699 
    700     /** Current Cu chroma Left pointer for intra pred */
    701     UWORD8 *pu1_cu_left;
    702 
    703     UWORD16 *pu2_cu_left;
    704 
    705     /** Left buffer stride */
    706     WORD32 i4_cu_left_stride;
    707 
    708     /** Current Cu chroma top pointer for intra pred */
    709     UWORD8 *pu1_cu_top;
    710 
    711     UWORD16 *pu2_cu_top;
    712 
    713     /** Current Cu chroma top left pointer for intra pred */
    714     UWORD8 *pu1_cu_top_left;
    715 
    716     UWORD16 *pu2_cu_top_left;
    717 
    718 } enc_loop_chrm_cu_buf_prms_t;
    719 
    720 typedef struct
    721 {
    722     /** cost of the current satd cand */
    723     WORD32 i4_cost;
    724 
    725     /** tu size w.r.t to cu of the current satd cand
    726     * @sa TU_SIZE_WRT_CU_T
    727     */
    728     WORD8 i4_tu_depth;
    729 
    730     /**
    731     *  access valid number of entries in this array based on u1_part_size
    732     */
    733     UWORD8 au1_intra_luma_modes[NUM_PU_PARTS];
    734 
    735     /** @remarks u1_part_size 2Nx2N or  NxN  */
    736     UWORD8 u1_part_mode; /* @sa: PART_SIZE_E */
    737 
    738     /** Flag to indicate whether current candidate needs to be evaluated */
    739     UWORD8 u1_eval_flag;
    740 
    741 } cu_intra_satd_out_t;
    742 
    743 /** \brief cu level parameters for SATD / RDOPT function */
    744 
    745 typedef struct
    746 {
    747     /** pointer to source luma pointer
    748     *  pointer will be pointing to CTB start location
    749     *  At CU level based on the CU position this pointer
    750     *  has to appropriately incremented
    751     */
    752     UWORD8 *pu1_luma_src;
    753 
    754     UWORD16 *pu2_luma_src;
    755 
    756     /** pointer to source chroma pointer
    757     *  pointer will be pointing to CTB start location
    758     *  At CU level based on the CU position this pointer
    759     *  has to appropriately incremented
    760     */
    761     UWORD8 *pu1_chrm_src;
    762 
    763     UWORD16 *pu2_chrm_src;
    764 
    765     /** pointer to recon luma pointer
    766     *  pointer will be pointing to CTB start location
    767     *  At CU level based on the CU position this pointer
    768     *  has to appropriately incremented
    769     */
    770     UWORD8 *pu1_luma_recon;
    771 
    772     UWORD16 *pu2_luma_recon;
    773 
    774     /** pointer to recon chroma pointer
    775     *  pointer will be pointing to CTB start location
    776     *  At CU level based on the CU position this pointer
    777     *  has to appropriately incremented
    778     */
    779     UWORD8 *pu1_chrm_recon;
    780 
    781     UWORD16 *pu2_chrm_recon;
    782 
    783     /*1st pass parallel dpb buffer pointers aimilar to the above*/
    784     UWORD8 *pu1_luma_recon_src;
    785 
    786     UWORD16 *pu2_luma_recon_src;
    787 
    788     UWORD8 *pu1_chrm_recon_src;
    789 
    790     UWORD16 *pu2_chrm_recon_src;
    791 
    792     /** Pointer to Subpel Plane Buffer */
    793     UWORD8 *pu1_sbpel_hxfy;
    794 
    795     /** Pointer to Subpel Plane Buffer */
    796     UWORD8 *pu1_sbpel_fxhy;
    797 
    798     /** Pointer to Subpel Plane Buffer */
    799     UWORD8 *pu1_sbpel_hxhy;
    800 
    801     /** Luma source stride */
    802     WORD32 i4_luma_src_stride;
    803 
    804     /** chroma soruce stride */
    805     WORD32 i4_chrm_src_stride;
    806 
    807     /** Luma recon stride */
    808     WORD32 i4_luma_recon_stride;
    809 
    810     /** chroma recon stride */
    811     WORD32 i4_chrm_recon_stride;
    812 
    813     /** ctb size */
    814     WORD32 i4_ctb_size;
    815 
    816     /** current ctb postion horz */
    817     WORD32 i4_ctb_pos;
    818 
    819     /** number of PU finalized for curr CU  */
    820     WORD32 i4_num_pus_in_cu;
    821 
    822     /** number of bytes consumed for current in ecd data buf */
    823     WORD32 i4_num_bytes_cons;
    824 
    825     UWORD8 u1_is_cu_noisy;
    826 
    827     UWORD8 *pu1_is_8x8Blk_noisy;
    828 
    829 } enc_loop_cu_prms_t;
    830 
    831 /**
    832 ******************************************************************************
    833 *  @brief Pad inter pred recon context
    834 ******************************************************************************
    835 */
    836 typedef struct
    837 {
    838     /** Pointer to Subpel Plane Buffer */
    839     UWORD8 *pu1_sbpel_hxfy;
    840 
    841     /** Pointer to Subpel Plane Buffer */
    842     UWORD8 *pu1_sbpel_fxhy;
    843 
    844     /** Pointer to Subpel Plane Buffer */
    845     UWORD8 *pu1_sbpel_hxhy;
    846 
    847     /** pointer to recon luma pointer
    848     *  pointer will be pointing to CTB start location
    849     *  At CU level based on the CU position this pointer
    850     *  has to appropriately incremented
    851     */
    852     UWORD8 *pu1_luma_recon;
    853 
    854     /** pointer to recon chroma pointer
    855     *  pointer will be pointing to CTB start location
    856     *  At CU level based on the CU position this pointer
    857     *  has to appropriately incremented
    858     */
    859     UWORD8 *pu1_chrm_recon;
    860 
    861     /*FOr recon source 1st pass starts*/
    862 
    863     UWORD8 *pu1_luma_recon_src;
    864 
    865     /** pointer to recon chroma pointer
    866     *  pointer will be pointing to CTB start location
    867     *  At CU level based on the CU position this pointer
    868     *  has to appropriately incremented
    869     */
    870     UWORD8 *pu1_chrm_recon_src;
    871     /*FOr recon source 1st pass ends */
    872     /** Luma recon stride */
    873     WORD32 i4_luma_recon_stride;
    874 
    875     /** chroma recon stride */
    876     WORD32 i4_chrm_recon_stride;
    877 
    878     /** ctb size */
    879     WORD32 i4_ctb_size;
    880 
    881     /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
    882     UWORD8 u1_chroma_array_type;
    883 
    884 } pad_interp_recon_frm_t;
    885 
    886 /**
    887 ******************************************************************************
    888 *  @brief inter prediction (MC) context for enc loop
    889 ******************************************************************************
    890 */
    891 /*IMPORTANT please keep inter_pred_ctxt_t and inter_pred_me_ctxt_t as identical*/
    892 typedef struct
    893 {
    894     /** pointer to reference lists */
    895     recon_pic_buf_t *(*ps_ref_list)[HEVCE_MAX_REF_PICS * 2];
    896 
    897     /** scratch buffer for horizontal interpolation destination */
    898     WORD16 MEM_ALIGN16 ai2_horz_scratch[MAX_CTB_SIZE * (MAX_CTB_SIZE + 8)];
    899 
    900     /** scratch 16 bit buffer for interpolation in l0 direction */
    901     WORD16 MEM_ALIGN16 ai2_scratch_buf_l0[MAX_CTB_SIZE * MAX_CTB_SIZE];
    902 
    903     /** scratch 16 bit buffer for interpolation in l1 direction */
    904     WORD16 MEM_ALIGN16 ai2_scratch_buf_l1[MAX_CTB_SIZE * MAX_CTB_SIZE];
    905 
    906     /** Pointer to struct containing function pointers to
    907     functions in the 'common' library' */
    908     func_selector_t *ps_func_selector;
    909 
    910     /** common denominator used for luma weights */
    911     WORD32 i4_log2_luma_wght_denom;
    912 
    913     /** common denominator used for chroma weights */
    914     WORD32 i4_log2_chroma_wght_denom;
    915 
    916     /**  offset w.r.t frame start in horz direction (pels) */
    917     WORD32 i4_ctb_frm_pos_x;
    918 
    919     /**  offset w.r.t frame start in vert direction (pels) */
    920     WORD32 i4_ctb_frm_pos_y;
    921 
    922     /* Bit Depth of Input */
    923     WORD32 i4_bit_depth;
    924 
    925     /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
    926     UWORD8 u1_chroma_array_type;
    927 
    928     /** weighted_pred_flag      */
    929     WORD8 i1_weighted_pred_flag;
    930 
    931     /** weighted_bipred_flag    */
    932     WORD8 i1_weighted_bipred_flag;
    933 
    934     /** Structure to describe extra CTBs around frame due to search
    935     range associated with distributed-mode. Entries are top, left,
    936     right and bottom */
    937     WORD32 ai4_tile_xtra_pel[4];
    938 
    939 } inter_pred_ctxt_t;
    940 /*IMPORTANT please keep inter_pred_ctxt_t and inter_pred_me_ctxt_t as identical*/
    941 
    942 typedef IV_API_CALL_STATUS_T (*PF_LUMA_INTER_PRED_PU)(
    943     void *pv_inter_pred_ctxt,
    944     pu_t *ps_pu,
    945     void *pv_dst_buf,
    946     WORD32 dst_stride,
    947     WORD32 i4_flag_inter_pred_source);
    948 
    949 /**
    950 ******************************************************************************
    951 *  @brief  Motion predictor context structure
    952 ******************************************************************************
    953 */
    954 typedef struct
    955 {
    956     /** pointer to reference lists */
    957     recon_pic_buf_t *(*ps_ref_list)[HEVCE_MAX_REF_PICS * 2];
    958 
    959     /** pointer to the slice header */
    960     slice_header_t *ps_slice_hdr;
    961 
    962     /** pointer to SPS */
    963     sps_t *ps_sps;
    964 
    965     /** CTB x. In CTB unit*/
    966     WORD32 i4_ctb_x;
    967 
    968     /** CTB y. In CTB unit */
    969     WORD32 i4_ctb_y;
    970 
    971     /** Log2 Parallel Merge Level - 2  */
    972     WORD32 i4_log2_parallel_merge_level_minus2;
    973 
    974     /* Number of extra CTBs external to tile due to fetched search-range around Tile */
    975     /* TOP, left, right and bottom */
    976     WORD32 ai4_tile_xtra_ctb[4];
    977 
    978 } mv_pred_ctxt_t;
    979 
    980 /**
    981 ******************************************************************************
    982 *  @brief  Deblocking and Boundary strength CTB level structure
    983 ******************************************************************************
    984 */
    985 typedef struct
    986 {
    987     /** Array to store the packed BS values in horizontal direction  */
    988     UWORD32 au4_horz_bs[(MAX_CTB_SIZE >> 3) + 1];
    989 
    990     /** Array to store the packed BS values in vertical direction  */
    991     UWORD32 au4_vert_bs[(MAX_CTB_SIZE >> 3) + 1];
    992 
    993     /** CTB neighbour availability flags for deblocking */
    994     UWORD8 u1_not_first_ctb_col_of_frame;
    995     UWORD8 u1_not_first_ctb_row_of_frame;
    996 
    997 } deblk_bs_ctb_ctxt_t;
    998 
    999 /**
   1000 ******************************************************************************
   1001 *  @brief  Deblocking and CTB level structure
   1002 ******************************************************************************
   1003 */
   1004 typedef struct
   1005 {
   1006     /**
   1007     * BS of the last vertical 4x4 column of previous CTB
   1008     */
   1009     UWORD8 au1_prev_bs[MAX_CTB_SIZE >> 3];
   1010 
   1011     /**
   1012     * BS of the last vertical 4x4 column of previous CTB
   1013     */
   1014     UWORD8 au1_prev_bs_uv[MAX_CTB_SIZE >> 3];
   1015 
   1016     /** pointer to top 4x4 ctb nbr structure; for accessing qp  */
   1017     nbr_4x4_t *ps_top_ctb_nbr_4x4;
   1018 
   1019     /** pointer to left 4x4 ctb nbr structure; for accessing qp */
   1020     nbr_4x4_t *ps_left_ctb_nbr_4x4;
   1021 
   1022     /** pointer to current 4x4 ctb nbr structure; for accessing qp */
   1023     nbr_4x4_t *ps_cur_ctb_4x4;
   1024 
   1025     /** max of 8 such contiguous bs to be computed for 64x64 ctb */
   1026     UWORD32 *pu4_bs_horz;
   1027 
   1028     /** max of 8 such contiguous bs to be computed for 64x64 ctb */
   1029     UWORD32 *pu4_bs_vert;
   1030 
   1031     /** ptr to current ctb luma pel in frame */
   1032     UWORD8 *pu1_ctb_y;
   1033 
   1034     UWORD16 *pu2_ctb_y;
   1035 
   1036     /** ptr to current ctb sp interleaved chroma pel in frame */
   1037     UWORD8 *pu1_ctb_uv;
   1038 
   1039     UWORD16 *pu2_ctb_uv;
   1040 
   1041     func_selector_t *ps_func_selector;
   1042 
   1043     /** left nbr buffer stride in terms of 4x4 units */
   1044     WORD32 i4_left_nbr_4x4_strd;
   1045 
   1046     /** current  buffer stride in terms of 4x4 units */
   1047     WORD32 i4_cur_4x4_strd;
   1048 
   1049     /** size in pels 16 / 32 /64 */
   1050     WORD32 i4_ctb_size;
   1051 
   1052     /** stride for luma       */
   1053     WORD32 i4_luma_pic_stride;
   1054 
   1055     /** stride for  chroma */
   1056     WORD32 i4_chroma_pic_stride;
   1057 
   1058     /** boolean indicating if left ctb edge is to be deblocked or not */
   1059     WORD32 i4_deblock_left_ctb_edge;
   1060 
   1061     /** boolean indicating if top ctb edge is to be deblocked or not */
   1062     WORD32 i4_deblock_top_ctb_edge;
   1063 
   1064     /** beta offset index */
   1065     WORD32 i4_beta_offset_div2;
   1066 
   1067     /** tc offset index */
   1068     WORD32 i4_tc_offset_div2;
   1069 
   1070     /** chroma cb qp offset index */
   1071     WORD32 i4_cb_qp_indx_offset;
   1072 
   1073     /** chroma cr qp offset index */
   1074     WORD32 i4_cr_qp_indx_offset;
   1075 
   1076     WORD32 i4_bit_depth;
   1077 
   1078     /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
   1079     UWORD8 u1_chroma_array_type;
   1080 
   1081 } deblk_ctb_params_t;
   1082 
   1083 /**
   1084 ******************************************************************************
   1085 *  @brief  Stores the BS and Qp of a CTB row. For CTB-row level deblocking
   1086 ******************************************************************************
   1087 */
   1088 typedef struct deblk_ctbrow_prms
   1089 {
   1090     /**
   1091     * Refer to ihevce_enc_loop_get_mem_recs() and
   1092     * ihevce_enc_loop_init()for more info
   1093     * regarding memory allocation to each one below.
   1094     */
   1095 
   1096     /**
   1097     * Stores the vertical boundary strength of a CTB row.
   1098     */
   1099     UWORD32 *pu4_ctb_row_bs_vert;
   1100 
   1101     /**
   1102     * Storage is same as above. Contains horizontal BS.
   1103     */
   1104     UWORD32 *pu4_ctb_row_bs_horz;
   1105 
   1106     /**
   1107     * Pointer to the CTB row's Qp storage
   1108     */
   1109     WORD8 *pi1_ctb_row_qp;
   1110 
   1111     /**
   1112     * Stride of the pu1_ctb_row_qp_p buffer in WORD32 unit
   1113     */
   1114     WORD32 u4_qp_buffer_stride;
   1115 
   1116     /*
   1117     *   Pointer to the  memory which contains the Qp of
   1118     *   top4x4 neighbour blocks for each CTB row.
   1119     *   This memory is at frame level.
   1120     */
   1121     WORD8 *api1_qp_top_4x4_ctb_row[MAX_NUM_ENC_LOOP_PARALLEL];
   1122 
   1123     /*
   1124     *   Stride of the above memory location.
   1125     *   Values in one-stride correspondes to one CTB row.
   1126     */
   1127     WORD32 u4_qp_top_4x4_buf_strd;
   1128 
   1129     /*size of frm level qp buffer*/
   1130     WORD32 u4_qp_top_4x4_buf_size;
   1131 
   1132 } deblk_ctbrow_prms_t;
   1133 
   1134 /**
   1135 ******************************************************************************
   1136 *  @brief  Entropy rd opt context for cabac bit estimation and RDO
   1137 ******************************************************************************
   1138 */
   1139 typedef struct rdopt_entropy_ctxt
   1140 {
   1141     /**
   1142     * array for entropy contexts during RD opt stage at CU level
   1143     * one best and one current is required
   1144     */
   1145     entropy_context_t as_cu_entropy_ctxt[2];
   1146 
   1147     /**
   1148     * init state of entropy context models during CU RD opt stage,
   1149     * required for saving and restoring the cabac states
   1150     */
   1151     UWORD8 au1_init_cabac_ctxt_states[IHEVC_CAB_CTXT_END];
   1152 
   1153     /*
   1154     * ptr to top row cu skip flags (1 bit per 8x8CU)
   1155     */
   1156     UWORD8 *pu1_cu_skip_top_row;
   1157 
   1158     /**
   1159     * Current entropy ctxt idx
   1160     */
   1161     WORD32 i4_curr_buf_idx;
   1162 
   1163 } rdopt_entropy_ctxt_t;
   1164 
   1165 /**
   1166 ******************************************************************************
   1167 *  @brief  structure to save predicted data from Inter SATD stage to Inter RD opt stage
   1168 ******************************************************************************
   1169 */
   1170 typedef struct
   1171 {
   1172     /*Buffer to store the predicted data after motion compensation for merge and
   1173     * skip candidates.
   1174     * [2] Because for a given candidate we do motion compensation for 5 merge candidates.
   1175     *     store the pred data after mc for the first 2 candidates and from 3rd candidate
   1176     *     onwards, overwrite the data which has higher SATD cost.
   1177     */
   1178     void *apv_pred_data[2];
   1179 
   1180     /** Stride to store the predicted data
   1181     */
   1182     WORD32 i4_pred_data_stride;
   1183 
   1184 } merge_skip_pred_data_t;
   1185 /**
   1186 ******************************************************************************
   1187 *  @brief  Structure to hold Rate control related parameters
   1188 *          for each bit-rate instance and each thread
   1189 ******************************************************************************
   1190 */
   1191 typedef struct
   1192 {
   1193     /**
   1194     *frame level open loop intra sad
   1195     *
   1196     */
   1197     LWORD64 i8_frame_open_loop_ssd;
   1198 
   1199     /**
   1200     *frame level open loop intra sad
   1201     *
   1202     */
   1203     UWORD32 u4_frame_open_loop_intra_sad;
   1204     /**
   1205     * frame level intra sad accumulator
   1206     */
   1207     UWORD32 u4_frame_intra_sad;
   1208 
   1209     /**
   1210     *  frame level sad accumulator
   1211     */
   1212     UWORD32 u4_frame_sad_acc;
   1213 
   1214     /**
   1215     *  frame level intra sad accumulator
   1216     */
   1217     UWORD32 u4_frame_inter_sad_acc;
   1218 
   1219     /**
   1220     *  frame level inter sad accumulator
   1221     */
   1222     UWORD32 u4_frame_intra_sad_acc;
   1223 
   1224     /**
   1225     *  frame level cost accumulator
   1226     */
   1227     LWORD64 i8_frame_cost_acc;
   1228 
   1229     /**
   1230     *  frame level intra cost accumulator
   1231     */
   1232     LWORD64 i8_frame_inter_cost_acc;
   1233 
   1234     /**
   1235     *  frame level inter cost accumulator
   1236     */
   1237     LWORD64 i8_frame_intra_cost_acc;
   1238 
   1239     /**
   1240     * frame level rdopt bits accumulator
   1241     */
   1242     UWORD32 u4_frame_rdopt_bits;
   1243 
   1244     /**
   1245     * frame level rdopt header bits accumulator
   1246     */
   1247     UWORD32 u4_frame_rdopt_header_bits;
   1248 
   1249     /* Sum the Qps of each 8*8 block in CU
   1250     * 8*8 block is considered as Min CU size possible as per standard is 8
   1251     * 0 corresponds to INTER and 1 corresponds to INTRA
   1252     */
   1253     WORD32 i4_qp_normalized_8x8_cu_sum[2];
   1254 
   1255     /* Count the number of 8x8 blocks in each CU type (INTER/INTRA)
   1256     * 0 corresponds to INTER and 1 corresponds to INTRA
   1257     */
   1258     WORD32 i4_8x8_cu_sum[2];
   1259 
   1260     /* SAD/Qscale accumulated over all CUs. CU size is inherently
   1261     * taken care in SAD
   1262     */
   1263     LWORD64 i8_sad_by_qscale[2];
   1264 
   1265 } enc_loop_rc_params_t;
   1266 /**
   1267 ******************************************************************************
   1268 *  @brief  CU information structure. This is to store the
   1269 *  CU final out after Recursion
   1270 ******************************************************************************
   1271 */
   1272 typedef struct ihevce_enc_cu_node_ctxt_t
   1273 {
   1274     /* CU params */
   1275     /** CU X position in terms of min CU (8x8) units */
   1276     UWORD8 b3_cu_pos_x : 3;
   1277 
   1278     /** CU Y position in terms of min CU (8x8) units */
   1279     UWORD8 b3_cu_pos_y : 3;
   1280 
   1281     /** reserved bytes */
   1282     UWORD8 b2_reserved : 2;
   1283 
   1284     /** CU size 2N (width or height) in pixels */
   1285     UWORD8 u1_cu_size;
   1286 
   1287     /**
   1288     * array for storing cu level final params for a given mode
   1289     * one best and one current is required
   1290     */
   1291     enc_loop_cu_final_prms_t s_cu_prms;
   1292 
   1293     /**
   1294     * array for storing cu level final params for a given mode
   1295     * one best and one current is required
   1296     */
   1297     enc_loop_cu_final_prms_t *ps_cu_prms;
   1298 
   1299     /* flag to indicate if current CU is the first
   1300     CU of the Quantisation group*/
   1301     UWORD32 b1_first_cu_in_qg : 1;
   1302 
   1303     /** qp used during for CU
   1304     * @remarks :
   1305     */
   1306     WORD8 i1_cu_qp;
   1307 
   1308 } ihevce_enc_cu_node_ctxt_t;
   1309 
   1310 typedef struct
   1311 {
   1312     WORD32 i4_sad;
   1313 
   1314     WORD32 i4_mv_cost;
   1315 
   1316     WORD32 i4_tot_cost;
   1317 
   1318     WORD8 i1_ref_idx;
   1319 
   1320     mv_t s_mv;
   1321 
   1322 } block_merge_nodes_t;
   1323 
   1324 /**
   1325 ******************************************************************************
   1326 *  @brief  This struct is used for storing output of block merge
   1327 ******************************************************************************
   1328 */
   1329 typedef struct
   1330 {
   1331     block_merge_nodes_t *aps_best_results[MAX_NUM_PARTS];
   1332 
   1333     /* Contains the best uni dir for each partition type */
   1334     WORD32 ai4_best_uni_dir[MAX_NUM_PARTS];
   1335 
   1336     /* Contains the best pred dir for each partition type */
   1337     WORD32 ai4_best_pred_dir[MAX_NUM_PARTS];
   1338 
   1339     WORD32 i4_tot_cost;
   1340 
   1341     PART_TYPE_T e_part_type;
   1342 } block_merge_results_t;
   1343 
   1344 /**
   1345 ******************************************************************************
   1346 *  @brief  This struct is used for storing output of block merge and also
   1347 *          all of the intermediate results required
   1348 ******************************************************************************
   1349 */
   1350 typedef struct
   1351 {
   1352     block_merge_results_t as_best_results[3 + 1][NUM_BEST_ME_OUTPUTS];
   1353 
   1354     block_merge_nodes_t as_nodes[3][TOT_NUM_PARTS][NUM_BEST_ME_OUTPUTS];
   1355 
   1356     WORD32 part_mask;
   1357 
   1358     WORD32 num_results_per_part;
   1359 
   1360     WORD32 num_best_results;
   1361 
   1362     /**
   1363     * Overall best CU cost, while other entries store CU costs
   1364     * in single direction, this is best CU cost, where each
   1365     * partition cost is evaluated as best of uni/bi
   1366     */
   1367     WORD32 best_cu_cost;
   1368 
   1369 } block_merge_data_t;
   1370 /**
   1371 ******************************************************************************
   1372 *  @brief  CU nbr information structure. This is to store the
   1373 *  neighbour information for final reconstruction function
   1374 ******************************************************************************
   1375 */
   1376 typedef struct
   1377 {
   1378     /* Pointer to top-left nbr */
   1379     nbr_4x4_t *ps_topleft_nbr_4x4;
   1380     /* Pointer to left nbr */
   1381     nbr_4x4_t *ps_left_nbr_4x4;
   1382     /* Pointer to top nbr */
   1383     nbr_4x4_t *ps_top_nbr_4x4;
   1384     /* stride of left_nbr_4x4 */
   1385     WORD32 nbr_4x4_left_strd;
   1386 
   1387     /* Pointer to CU top */
   1388     UWORD8 *pu1_cu_top;
   1389 
   1390     UWORD16 *pu2_cu_top;
   1391 
   1392     /* Pointer to CU top-left */
   1393     UWORD8 *pu1_cu_top_left;
   1394 
   1395     UWORD16 *pu2_cu_top_left;
   1396 
   1397     /* Pointer to CU left */
   1398     UWORD8 *pu1_cu_left;
   1399 
   1400     UWORD16 *pu2_cu_left;
   1401 
   1402     /* stride of left pointer */
   1403     WORD32 cu_left_stride;
   1404 } cu_nbr_prms_t;
   1405 
   1406 /** Structure to save the flags required for Final mode Reconstruction
   1407 function. These flags are set based on quality presets and
   1408 the bit-rate we are working on */
   1409 typedef struct
   1410 {
   1411     /** Flag to indicate whether Luma pred data need to recomputed in the
   1412     final_recon function. Now disabled for all modes */
   1413     UWORD8 u1_eval_luma_pred_data;
   1414 
   1415     /** Flag to indicate whether Chroma pred data need to recomputed in the
   1416     final_recon function. Now disabled for MedSpeed only */
   1417     UWORD8 u1_eval_chroma_pred_data;
   1418 
   1419     /** Flag to indicate whether header data need to recomputed in the
   1420     final_recon function. Now disabled for all modes */
   1421     UWORD8 u1_eval_header_data;
   1422 
   1423     UWORD8 u1_eval_recon_data;
   1424 } cu_final_recon_flags_t;
   1425 
   1426 /**
   1427 ******************************************************************************
   1428 *  @brief  structure to save pred data of ME cand. 1 ping-pong to store the
   1429 *  the best and current luma cand. 1 buffer to store the best chroma pred
   1430 ******************************************************************************
   1431 */
   1432 typedef struct
   1433 {
   1434     /** Pointers to store luma pred data of me/intra cand.(2) and chroma(1) */
   1435     UWORD8 *pu1_pred_data[NUM_CU_ME_INTRA_PRED_IDX];
   1436 
   1437     UWORD16 *pu2_pred_data[NUM_CU_ME_INTRA_PRED_IDX];
   1438 
   1439     /** Stride to store the predicted data of me/intra cand.(2) and chroma(1) */
   1440     WORD32 ai4_pred_data_stride[NUM_CU_ME_INTRA_PRED_IDX];
   1441     /** Counter saying how many pointers are assigned */
   1442     WORD32 i4_pointer_count;
   1443 
   1444 } cu_me_intra_pred_prms_t;
   1445 
   1446 /**
   1447 ******************************************************************************
   1448 *  @brief  Chroma RDOPT context structure
   1449 ******************************************************************************
   1450 */
   1451 typedef struct
   1452 {
   1453     /** Storing the inverse quantized data (cb) for the special modes*/
   1454     WORD16 ai2_iq_data_cb[(MAX_TU_SIZE * MAX_TU_SIZE) << 1];
   1455 
   1456     /** Storing the inverse quantized data (cr) for the special modes*/
   1457     WORD16 ai2_iq_data_cr[(MAX_TU_SIZE * MAX_TU_SIZE) << 1];
   1458 
   1459     /** Storing the scan coeffs (cb) for the special modes*/
   1460     UWORD8 au1_scan_coeff_cb[2][(MAX_TU_IN_CTB >> 1) * MAX_SCAN_COEFFS_BYTES_4x4];
   1461 
   1462     /** Storing the scan coeffs (cb) for the special modes*/
   1463     UWORD8 au1_scan_coeff_cr[2][(MAX_TU_IN_CTB >> 1) * MAX_SCAN_COEFFS_BYTES_4x4];
   1464 
   1465     /** Max number of bytes filled in scan coeff data (cb) per TU*/
   1466     WORD32 ai4_num_bytes_scan_coeff_cb_per_tu[2][MAX_TU_IN_TU_EQ_DIV_2];
   1467 
   1468     /** Max number of bytes filled in scan coeff data (cr) per TU*/
   1469     WORD32 ai4_num_bytes_scan_coeff_cr_per_tu[2][MAX_TU_IN_TU_EQ_DIV_2];
   1470 
   1471     /** Stride of the iq buffer*/
   1472     WORD32 i4_iq_buff_stride;
   1473 
   1474     /** Storing the pred data
   1475     The predicted data is always interleaved. Therefore the size of this array will be
   1476     ((MAX_TU_SIZE * MAX_TU_SIZE) >> 2) * 2)*/
   1477     void *pv_pred_data;
   1478 
   1479     /** Predicted data stride*/
   1480     WORD32 i4_pred_stride;
   1481 
   1482     /** Storing the cbfs for each tu
   1483     For 1 tu case, only the 0th element will be valid*/
   1484     UWORD8 au1_cbf_cb[2][MAX_TU_IN_TU_EQ_DIV_2];
   1485 
   1486     /** Storing the cbfs for each tu
   1487     For 1 tu case, only the 0th element will be valid*/
   1488     UWORD8 au1_cbf_cr[2][MAX_TU_IN_TU_EQ_DIV_2];
   1489 
   1490     /** To store the cabac ctxt model updated by the RDOPT of best chroma mode
   1491     [0] : for 1 TU case, [1] : for 4 TU case */
   1492     UWORD8 au1_chrm_satd_updated_ctxt_models[IHEVC_CAB_CTXT_END];
   1493 
   1494     /** Best SATD chroma mode, [0] : for 1 TU case (TU_EQ_CU) , [1] : for 4 TU case
   1495     Values : 0(PLANAR), 1(VERT), 2(HOR), 3(DC) chroma mode per each TU */
   1496     UWORD8 u1_best_cr_mode;
   1497 
   1498     /** Best SATD chroma mode's RDOPT cost, [0] : for 1 TU case, [1] : for 4 TU case */
   1499     LWORD64 i8_chroma_best_rdopt;
   1500 
   1501     /* Account for coding b3_chroma_intra_pred_mode prefix and suffix bins */
   1502     /* This is done by adding the bits for signalling chroma mode (0-3)    */
   1503     /* and subtracting the bits for chroma mode same as luma mode (4)      */
   1504     LWORD64 i8_cost_to_encode_chroma_mode;
   1505 
   1506     /** Best SATD chroma mode's tu bits, [0] : for 1 TU case, [1] : for 4 TU case */
   1507     WORD32 i4_chrm_tu_bits;
   1508 
   1509     /** Storing the zero col values for each TU for cb*/
   1510     WORD32 ai4_zero_col_cb[2][MAX_TU_IN_TU_EQ_DIV_2];
   1511 
   1512     /** Storing the zero col values for each TU for cr*/
   1513     WORD32 ai4_zero_col_cr[2][MAX_TU_IN_TU_EQ_DIV_2];
   1514 
   1515     /** Storing the zero row values for each TU for cb*/
   1516     WORD32 ai4_zero_row_cb[2][MAX_TU_IN_TU_EQ_DIV_2];
   1517 
   1518     /** Storing the zero row values for each TU for cr*/
   1519     WORD32 ai4_zero_row_cr[2][MAX_TU_IN_TU_EQ_DIV_2];
   1520 } chroma_intra_satd_ctxt_t;
   1521 
   1522 /**
   1523 ******************************************************************************
   1524 *  @brief  Chroma RDOPT context structure
   1525 ******************************************************************************
   1526 */
   1527 typedef struct
   1528 {
   1529     /** Chroma SATD context structure. It is an array of two to account for the TU_EQ_CU candidate
   1530     and the TU_EQ_CU_DIV2 candidate*/
   1531     chroma_intra_satd_ctxt_t as_chr_intra_satd_ctxt[NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD];
   1532 
   1533     /** Chroma SATD has has to be evaluated only for the HIGH QUALITY */
   1534     UWORD8 u1_eval_chrm_satd;
   1535 
   1536     /** Chroma RDOPT has to be evaluated only for the HIGH QUALITY / MEDIUM SPEED preset */
   1537     UWORD8 u1_eval_chrm_rdopt;
   1538 
   1539 } ihevce_chroma_rdopt_ctxt_t;
   1540 
   1541 typedef struct
   1542 {
   1543     inter_cu_results_t s_cu_results;
   1544 
   1545     inter_pu_results_t s_pu_results;
   1546 } block_merge_output_t;
   1547 
   1548 /**
   1549 ******************************************************************************
   1550 *  @brief  Structure to store the Merge/Skip Cand. for EncLoop
   1551 ******************************************************************************
   1552 */
   1553 typedef struct
   1554 {
   1555     /** List of all  merge/skip candidates to be evalauted (SATD/RDOPT) for
   1556     * this CU
   1557     */
   1558     cu_inter_cand_t as_cu_inter_merge_skip_cand[MAX_NUM_CU_MERGE_SKIP_CAND];
   1559 
   1560     /** number of merge/skip candidates
   1561     */
   1562     UWORD8 u1_num_merge_cands;
   1563 
   1564     UWORD8 u1_num_skip_cands;
   1565 
   1566     UWORD8 u1_num_merge_skip_cands;
   1567 
   1568 } cu_inter_merge_skip_t;
   1569 
   1570 /** Structure to store the Mixed mode Cand. for EncLoop */
   1571 typedef struct
   1572 {
   1573     cu_inter_cand_t as_cu_data[MAX_NUM_MIXED_MODE_INTER_RDO_CANDS];
   1574 
   1575     UWORD8 u1_num_mixed_mode_type0_cands;
   1576 
   1577     UWORD8 u1_num_mixed_mode_type1_cands;
   1578 
   1579 } cu_mixed_mode_inter_t;
   1580 
   1581 typedef struct
   1582 {
   1583     /* +2 because an additional buffer is required for */
   1584     /* storing both cur and best during merge eval */
   1585     void *apv_inter_pred_data[MAX_NUM_INTER_RDO_CANDS + 4];
   1586 
   1587     /* Bit field used to determine the indices of free bufs in 'apv_pred_data' buf array */
   1588     UWORD32 u4_is_buf_in_use;
   1589 
   1590     /* Assumption is that the same stride is used for the */
   1591     /* entire set of buffers above and is equal to the */
   1592     /* CU size */
   1593     WORD32 i4_pred_stride;
   1594 
   1595 } ihevce_inter_pred_buf_data_t;
   1596 /** Structure to store the Inter Cand. info in EncLoop */
   1597 typedef struct
   1598 {
   1599     cu_inter_cand_t *aps_cu_data[MAX_NUM_INTER_RDO_CANDS];
   1600 
   1601     UWORD32 au4_cost[MAX_NUM_INTER_RDO_CANDS];
   1602 
   1603     UWORD8 au1_pred_buf_idx[MAX_NUM_INTER_RDO_CANDS];
   1604 
   1605     UWORD32 u4_src_variance;
   1606 
   1607     UWORD8 u1_idx_of_worst_cost_in_cost_array;
   1608 
   1609     UWORD8 u1_idx_of_worst_cost_in_pred_buf_array;
   1610 
   1611     UWORD8 u1_num_inter_cands;
   1612 
   1613 } inter_cu_mode_info_t;
   1614 typedef struct
   1615 {
   1616     /*Frame level base pointer of buffers for each ctb row to store the top pixels
   1617     *and top left pixel for the next ctb row.These buffers are common accross all threads
   1618     */
   1619     UWORD8 *apu1_sao_src_frm_top_luma[MAX_NUM_ENC_LOOP_PARALLEL];
   1620     /*Ctb level pointer to buffer to store the top pixels
   1621     *and top left pixel for the next ctb row.These buffers are common accross all threads
   1622     */
   1623     UWORD8 *pu1_curr_sao_src_top_luma;
   1624     /*Buffer to store the left boundary before
   1625     * doing sao on current ctb for the next ctb in the current row
   1626     */
   1627     UWORD8 au1_sao_src_left_luma[MAX_CTB_SIZE];
   1628     /*Frame level base pointer of buffers for each ctb row to store the top pixels
   1629     *and top left pixel for the next ctb row.These buffers are common accross all threads
   1630     */
   1631     UWORD8 *apu1_sao_src_frm_top_chroma[MAX_NUM_ENC_LOOP_PARALLEL];
   1632 
   1633     WORD32 i4_frm_top_chroma_buf_stride;
   1634 
   1635     /*Ctb level pointer to buffer to store the top chroma pixels
   1636     *and top left pixel for the next ctb row.These buffers are common accross all threads
   1637     */
   1638     UWORD8 *pu1_curr_sao_src_top_chroma;
   1639 
   1640     /*Scratch buffer to store the left boundary before
   1641     * doing sao on current ctb for the next ctb in the current row
   1642     */
   1643     UWORD8 au1_sao_src_left_chroma[MAX_CTB_SIZE * 2];
   1644 
   1645     /**
   1646     * Luma recon buffer
   1647     */
   1648     UWORD8 *pu1_frm_luma_recon_buf;
   1649     /**
   1650     * Chroma recon buffer
   1651     */
   1652     UWORD8 *pu1_frm_chroma_recon_buf;
   1653     /**
   1654     * Luma recon buffer for curr ctb
   1655     */
   1656     UWORD8 *pu1_cur_luma_recon_buf;
   1657     /**
   1658     * Chroma recon buffer for curr ctb
   1659     */
   1660     UWORD8 *pu1_cur_chroma_recon_buf;
   1661     /**
   1662     * Luma src buffer
   1663     */
   1664     UWORD8 *pu1_frm_luma_src_buf;
   1665     /**
   1666     * Chroma src buffer
   1667     */
   1668     UWORD8 *pu1_frm_chroma_src_buf;
   1669     /**
   1670     * Luma src(input yuv) buffer for curr ctb
   1671     */
   1672     UWORD8 *pu1_cur_luma_src_buf;
   1673     /**
   1674     * Chroma src buffer for curr ctb
   1675     */
   1676     UWORD8 *pu1_cur_chroma_src_buf;
   1677     /* Left luma scratch buffer required for sao RD optimisation*/
   1678     UWORD8 au1_left_luma_scratch[MAX_CTB_SIZE];
   1679 
   1680     /* Left chroma scratch buffer required for sao RD optimisation*/
   1681     /* Min size required= MAX_CTB_SIZE/2 * 2
   1682     * Multiplied by 2 because size reuired is MAX_CTB_SIZE/2 each for U and V
   1683     */
   1684     UWORD8 au1_left_chroma_scratch[MAX_CTB_SIZE * 2];
   1685 
   1686     /* Top luma scratch buffer required for sao RD optimisation*/
   1687     UWORD8 au1_top_luma_scratch[MAX_CTB_SIZE + 2];  // +1 for top left pixel and +1 for top right
   1688 
   1689     /* Top chroma scratch buffer required for sao RD optimisation*/
   1690     UWORD8 au1_top_chroma_scratch[MAX_CTB_SIZE + 4];  // +2 for top left pixel and +2 for top right
   1691 
   1692     /* Scratch buffer to store the sao'ed output during sao RD optimisation*/
   1693     /* One extra row(bot pixels) is copied to scratch buf but 2d buf copy func copies multiple of 4 ,hence
   1694     MAX_CTB _SIZE + 4*/
   1695     UWORD8 au1_sao_luma_scratch[PING_PONG][SCRATCH_BUF_STRIDE * (MAX_CTB_SIZE + 4)];
   1696 
   1697     /* Scratch buffer to store the sao'ed output during sao RD optimisation*/
   1698     /* One extra row(bot pixels) is copied to scratch buf but 2d buf copy func copies multiple of 4 ,hence
   1699     MAX_CTB _SIZE + 4*/
   1700     UWORD8 au1_sao_chroma_scratch[PING_PONG][SCRATCH_BUF_STRIDE * (MAX_CTB_SIZE + 4)];
   1701 
   1702     /**
   1703     * CTB size
   1704     */
   1705     WORD32 i4_ctb_size;
   1706     /**
   1707     * Luma recon buffer stride
   1708     */
   1709     WORD32 i4_frm_luma_recon_stride;
   1710     /**
   1711     * Chroma recon buffer stride
   1712     */
   1713     WORD32 i4_frm_chroma_recon_stride;
   1714     /**
   1715     * Luma recon buffer stride for curr ctb
   1716     */
   1717     WORD32 i4_cur_luma_recon_stride;
   1718     /**
   1719     * Chroma recon buffer stride for curr ctb
   1720     */
   1721     WORD32 i4_cur_chroma_recon_stride;
   1722     /**
   1723     * Luma src buffer stride
   1724     */
   1725     WORD32 i4_frm_luma_src_stride;
   1726     /**
   1727     * Chroma src buffer stride
   1728     */
   1729     WORD32 i4_frm_chroma_src_stride;
   1730 
   1731     WORD32 i4_frm_top_luma_buf_stride;
   1732     /**
   1733     * Luma src buffer stride for curr ctb
   1734     */
   1735     WORD32 i4_cur_luma_src_stride;
   1736     /**
   1737     * Chroma src buffer stride for curr ctb
   1738     */
   1739     WORD32 i4_cur_chroma_src_stride;
   1740 
   1741     /* Top luma buffer size */
   1742     WORD32 i4_top_luma_buf_size;
   1743 
   1744     /* Top Chroma buffer size */
   1745     WORD32 i4_top_chroma_buf_size;
   1746 
   1747     /*** Number of CTB units **/
   1748     WORD32 i4_num_ctb_units;
   1749 
   1750     /**
   1751     * CTB x pos
   1752     */
   1753     WORD32 i4_ctb_x;
   1754     /**
   1755     * CTB y pos
   1756     */
   1757     WORD32 i4_ctb_y;
   1758     /* SAO block width*/
   1759     WORD32 i4_sao_blk_wd;
   1760 
   1761     /* SAO block height*/
   1762     WORD32 i4_sao_blk_ht;
   1763 
   1764     /* Last ctb row flag*/
   1765     WORD32 i4_is_last_ctb_row;
   1766 
   1767     /* Last ctb col flag*/
   1768     WORD32 i4_is_last_ctb_col;
   1769 
   1770     /* CTB aligned width */
   1771     UWORD32 u4_ctb_aligned_wd;
   1772 
   1773     /* Number of ctbs in a row*/
   1774     UWORD32 u4_num_ctbs_horz;
   1775 
   1776     UWORD32 u4_num_ctbs_vert;
   1777     /**
   1778     * Closed loop SSD Lambda
   1779     * This is multiplied with bits for RD cost computations in SSD mode
   1780     * This is represented in q format with shift of LAMBDA_Q_SHIFT
   1781     */
   1782     LWORD64 i8_cl_ssd_lambda_qf;
   1783 
   1784     /**
   1785     * Closed loop SSD Lambda for chroma (chroma qp is different from luma qp)
   1786     * This is multiplied with bits for RD cost computations in SSD mode
   1787     * This is represented in q format with shift of LAMBDA_Q_SHIFT
   1788     */
   1789     LWORD64 i8_cl_ssd_lambda_chroma_qf;
   1790     /**
   1791     * Pointer to current PPS
   1792     */
   1793     pps_t *ps_pps;  //not used currently
   1794     /**
   1795     * Pointer to current SPS
   1796     */
   1797     sps_t *ps_sps;
   1798 
   1799     /**
   1800     * Pointer to current slice header structure
   1801     */
   1802     slice_header_t *ps_slice_hdr;
   1803     /**
   1804     * Pointer to current frame ctb out array of structures
   1805     */
   1806     ctb_enc_loop_out_t *ps_ctb_out;
   1807     /**
   1808     *  context for cabac bit estimation used during rdopt stage
   1809     */
   1810     rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt;
   1811     /**
   1812     * Pointer to sao_enc_t for the current ctb
   1813     */
   1814     sao_enc_t *ps_sao;
   1815     /*
   1816     * Pointer to an array to store the sao information of the top ctb
   1817     * This is required for to decide top merge
   1818     */
   1819     sao_enc_t *aps_frm_top_ctb_sao[MAX_NUM_ENC_LOOP_PARALLEL];
   1820 
   1821     /*
   1822     * Pointer to structure to store the sao parameters of (x,y)th ctb
   1823     * for top merge of (x,y+1)th ctb
   1824     */
   1825     sao_enc_t *ps_top_ctb_sao;
   1826 
   1827     /* structure to store the sao parameters of (x,y)th ctb for
   1828     * the left merge of (x+1,y)th ctb
   1829     */
   1830     sao_enc_t s_left_ctb_sao;
   1831 
   1832     /* Array of structures for SAO RDO candidates*/
   1833     sao_enc_t as_sao_rd_cand[MAX_SAO_RD_CAND];
   1834 
   1835     /** array of function pointers for luma sao */
   1836     pf_sao_luma apf_sao_luma[4];
   1837 
   1838     /** array of function pointers for chroma sao */
   1839     pf_sao_chroma apf_sao_chroma[4];
   1840 
   1841     /* Flag to do SAO luma and chroma filtering*/
   1842     WORD8 i1_slice_sao_luma_flag;
   1843 
   1844     WORD8 i1_slice_sao_chroma_flag;
   1845 
   1846 #if DISABLE_SAO_WHEN_NOISY
   1847     ctb_analyse_t *ps_ctb_data;
   1848 
   1849     WORD32 i4_ctb_data_stride;
   1850 #endif
   1851 
   1852     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list;
   1853 
   1854 } sao_ctxt_t;
   1855 
   1856 /**
   1857 ******************************************************************************
   1858 *  @brief  Encode loop module context structure
   1859 ******************************************************************************
   1860 */
   1861 typedef struct
   1862 {
   1863 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
   1864     void *pv_err_func_selector;
   1865 #endif
   1866 
   1867     /**
   1868     * Quality preset for comtrolling numbe of RD opt cand
   1869     * @sa : IHEVCE_QUALITY_CONFIG_T
   1870     */
   1871     WORD32 i4_quality_preset;
   1872     /**
   1873     *
   1874     *
   1875     */
   1876     WORD32 i4_rc_pass;
   1877     /**
   1878     * Lamda to be mulitplied with bits for SATD
   1879     * should be equal to Lamda*Qp
   1880     */
   1881     WORD32 i4_satd_lamda;
   1882 
   1883     /**
   1884     * Lamda to be mulitplied with bits for SAD
   1885     * should be equal to Lamda*Qp
   1886     */
   1887     WORD32 i4_sad_lamda;
   1888 
   1889     /**
   1890     * Closed loop SSD Lambda
   1891     * This is multiplied with bits for RD cost computations in SSD mode
   1892     * This is represented in q format with shift of LAMBDA_Q_SHIFT
   1893     */
   1894     LWORD64 i8_cl_ssd_lambda_qf;
   1895 
   1896     /**
   1897     * Closed loop SSD Lambda for chroma (chroma qp is different from luma qp)
   1898     * This is multiplied with bits for RD cost computations in SSD mode
   1899     * This is represented in q format with shift of LAMBDA_Q_SHIFT
   1900     */
   1901     LWORD64 i8_cl_ssd_lambda_chroma_qf;
   1902 
   1903     /**
   1904     * Ratio of Closed loop SSD Lambda and Closed loop SSD Lambda for chroma
   1905     * This is multiplied with (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT)
   1906     * to keep the precision of the ratio
   1907     */
   1908     UWORD32 u4_chroma_cost_weighing_factor;
   1909     /**
   1910     * Frame level QP to be used
   1911     */
   1912     WORD32 i4_frame_qp;
   1913 
   1914     WORD32 i4_frame_mod_qp;
   1915 
   1916     WORD32 i4_frame_qstep;
   1917 
   1918     UWORD8 u1_max_tr_depth;
   1919 
   1920     /**
   1921     * CU level Qp
   1922     */
   1923     WORD32 i4_cu_qp;
   1924 
   1925     /**
   1926     * CU level Qp / 6
   1927     */
   1928     WORD32 i4_cu_qp_div6;
   1929 
   1930     /**
   1931     * CU level Qp % 6
   1932     */
   1933     WORD32 i4_cu_qp_mod6;
   1934 
   1935     /**
   1936     *  CU level QP to be used
   1937     */
   1938     WORD32 i4_chrm_cu_qp;
   1939 
   1940     /**
   1941     * CU level Qp / 6
   1942     */
   1943     WORD32 i4_chrm_cu_qp_div6;
   1944 
   1945     /**
   1946     * CU level Qp % 6
   1947     */
   1948     WORD32 i4_chrm_cu_qp_mod6;
   1949 
   1950     /** previous cu qp
   1951     * @remarks : This needs to be remembered to handle skip cases in deblocking.
   1952     */
   1953     WORD32 i4_prev_cu_qp;
   1954 
   1955     /** chroma qp offset
   1956     * @remarks : Used to calculate chroma qp and other qp related parameter at CU level
   1957     */
   1958     WORD32 i4_chroma_qp_offset;
   1959 
   1960     /**
   1961     * Buffer Pointer to populate the scale matrix for all transform size
   1962     */
   1963     WORD16 *pi2_scal_mat;
   1964 
   1965     /**
   1966     * Buffer Pointer to populate the rescale matrix for all transform size
   1967     */
   1968     WORD16 *pi2_rescal_mat;
   1969 
   1970     /** array of pointer to store the scaling matrices for
   1971     *  all transform sizes and qp % 6 (pre computed)
   1972     */
   1973     WORD16 *api2_scal_mat[NUM_TRANS_TYPES * 2];
   1974 
   1975     /** array of pointer to store the re-scaling matrices for
   1976     *  all transform sizes and qp % 6 (pre computed)
   1977     */
   1978     WORD16 *api2_rescal_mat[NUM_TRANS_TYPES * 2];
   1979 
   1980     /** array of function pointers for residual and
   1981     *  forward transform for all transform sizes
   1982     */
   1983     pf_res_trans_luma apf_resd_trns[NUM_TRANS_TYPES];
   1984 
   1985     /** array of function pointers for residual and
   1986     *  forward HAD transform for all transform sizes
   1987     */
   1988     pf_res_trans_luma_had_chroma apf_chrm_resd_trns_had[NUM_TRANS_TYPES - 2];
   1989 
   1990     /** array of function pointers for residual and
   1991     *  forward transform for all transform sizes
   1992     *  for chroma
   1993     */
   1994     pf_res_trans_chroma apf_chrm_resd_trns[NUM_TRANS_TYPES - 2];
   1995 
   1996     /** array of function pointers for qunatization and
   1997     *  inv Quant for ssd calc. for all transform sizes
   1998     */
   1999     pf_quant_iquant_ssd apf_quant_iquant_ssd[4];
   2000 
   2001     /** array of function pointers for inv.transform and
   2002     *  recon for all transform sizes
   2003     */
   2004     pf_it_recon apf_it_recon[NUM_TRANS_TYPES];
   2005 
   2006     /** array of function pointers for inverse transform
   2007     * and recon for all transform sizes for chroma
   2008     */
   2009     pf_it_recon_chroma apf_chrm_it_recon[NUM_TRANS_TYPES - 2];
   2010 
   2011     /** array of luma intra prediction function pointers */
   2012     pf_intra_pred apf_lum_ip[NUM_IP_FUNCS];
   2013 
   2014     /** array of chroma intra prediction function pointers */
   2015     pf_intra_pred apf_chrm_ip[NUM_IP_FUNCS];
   2016 
   2017     /* - Function pointer to cu_mode_decide function */
   2018     /* - The 'void *' is used since one of the parameters of */
   2019     /* this class of functions is the current structure */
   2020     /* - This function pointer is used to choose the */
   2021     /* appropriate function depending on whether bit_depth is */
   2022     /* chosen as 8 bits or greater */
   2023     /* - This function pointer's type is defined at the end */
   2024     /* of this file */
   2025     void *pv_cu_mode_decide;
   2026 
   2027     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
   2028     void *pv_inter_rdopt_cu_mc_mvp;
   2029 
   2030     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
   2031     void *pv_inter_rdopt_cu_ntu;
   2032 
   2033     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
   2034     void *pv_intra_chroma_pred_mode_selector;
   2035 
   2036     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
   2037     void *pv_intra_rdopt_cu_ntu;
   2038 
   2039     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
   2040     void *pv_final_rdopt_mode_prcs;
   2041 
   2042     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
   2043     void *pv_store_cu_results;
   2044 
   2045     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
   2046     void *pv_enc_loop_cu_bot_copy;
   2047 
   2048     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
   2049     void *pv_final_mode_reevaluation_with_modified_cu_qp;
   2050 
   2051     /* Infer from the comment for the variable 'pv_cu_mode_decide' */
   2052     void *pv_enc_loop_ctb_left_copy;
   2053 
   2054     /** Qunatization rounding factor for inter and intra CUs */
   2055     WORD32 i4_quant_rnd_factor[2];
   2056 
   2057     /**
   2058     * Frame Buffer Pointer to store the top row luma data.
   2059     * one pixel row in every ctb row
   2060     */
   2061     void *apv_frm_top_row_luma[MAX_NUM_ENC_LOOP_PARALLEL];
   2062 
   2063     /**
   2064     * One CTB row size of Top row luma data buffer
   2065     */
   2066     WORD32 i4_top_row_luma_stride;
   2067 
   2068     /**
   2069     * One frm of Top row luma data buffer
   2070     */
   2071     WORD32 i4_frm_top_row_luma_size;
   2072 
   2073     /**
   2074     * Current luma row bottom data store pointer
   2075     */
   2076     void *pv_bot_row_luma;
   2077 
   2078     /**
   2079     * Top luma row top data access pointer
   2080     */
   2081     void *pv_top_row_luma;
   2082 
   2083     /**
   2084     * Frame Buffer Pointer to store the top row chroma data (Cb  Cr pixel interleaved )
   2085     * one pixel row in every ctb row
   2086     */
   2087     void *apv_frm_top_row_chroma[MAX_NUM_ENC_LOOP_PARALLEL];
   2088 
   2089     /**
   2090     * One CTB row size of Top row chroma data buffer (Cb  Cr pixel interleaved )
   2091     */
   2092     WORD32 i4_top_row_chroma_stride;
   2093 
   2094     /**
   2095     * One frm size of Top row chroma data buffer (Cb  Cr pixel interleaved )
   2096     */
   2097     WORD32 i4_frm_top_row_chroma_size;
   2098 
   2099     /**
   2100     * Current chroma row bottom data store pointer
   2101     */
   2102     void *pv_bot_row_chroma;
   2103 
   2104     /**
   2105     * Top chroma row top data access pointer
   2106     */
   2107     void *pv_top_row_chroma;
   2108 
   2109     /**
   2110     * Frame Buffer Pointer to store the top row neighbour modes stored at 4x4 level
   2111     * one 4x4 row in every ctb row
   2112     */
   2113     nbr_4x4_t *aps_frm_top_row_nbr[MAX_NUM_ENC_LOOP_PARALLEL];
   2114 
   2115     /**
   2116     * One CTB row size of Top row nbr 4x4 params buffer
   2117     */
   2118     WORD32 i4_top_row_nbr_stride;
   2119 
   2120     /**
   2121     * One frm size of Top row nbr 4x4 params buffer
   2122     */
   2123     WORD32 i4_frm_top_row_nbr_size;
   2124 
   2125     /**
   2126     * Current row nbr prms bottom data store pointer
   2127     */
   2128     nbr_4x4_t *ps_bot_row_nbr;
   2129 
   2130     /**
   2131     * Top row nbr prms top data access pointer
   2132     */
   2133     nbr_4x4_t *ps_top_row_nbr;
   2134 
   2135     /**
   2136     * Pointer to (1,1) location in au1_nbr_ctb_map
   2137     */
   2138     UWORD8 *pu1_ctb_nbr_map;
   2139 
   2140     /**
   2141     * neigbour map buffer stride;
   2142     */
   2143     WORD32 i4_nbr_map_strd;
   2144 
   2145     /**
   2146     * Array at ctb level to store the neighour map
   2147     * its size is 25x25 for ctb size of 64x64
   2148     */
   2149     UWORD8 au1_nbr_ctb_map[MAX_PU_IN_CTB_ROW + 1 + 8][MAX_PU_IN_CTB_ROW + 1 + 8];
   2150 
   2151     /**
   2152     * Array to store left ctb data for luma
   2153     * some padding is added to take care of unconditional access
   2154     */
   2155     void *pv_left_luma_data;
   2156 
   2157     /**
   2158     * Array to store left ctb data for chroma (cb abd cr pixel interleaved
   2159     * some padding is added to take care of unconditional access
   2160     */
   2161     void *pv_left_chrm_data;
   2162 
   2163     /**
   2164     * Array to store the left neighbour modes at 4x4 level
   2165     */
   2166     nbr_4x4_t as_left_col_nbr[MAX_PU_IN_CTB_ROW];
   2167 
   2168     /**
   2169     * Array to store currrent CTb pred modes at a 4x4 level
   2170     * used for prediction inside ctb
   2171     */
   2172     nbr_4x4_t as_ctb_nbr_arr[MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW];
   2173 
   2174     /**
   2175     * array for storing csbf during RD opt stage at CU level
   2176     * one best and one current is required
   2177     */
   2178     UWORD8 au1_cu_csbf[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
   2179 
   2180     /**
   2181     * Stride of csbf buffer. will be useful for scanning access
   2182     * if stored in a 2D order. right now set to max tx size >> 4;
   2183     */
   2184     WORD32 i4_cu_csbf_strd;
   2185 
   2186     /**
   2187     * Array to store pred modes  during SATD and RD opt stage at CU level
   2188     * one best and one current is required
   2189     */
   2190     nbr_4x4_t as_cu_nbr[2][MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW];
   2191 
   2192     /**
   2193     * array to store the output of reference substitution process output
   2194     * for intra CUs
   2195     * TOP (32 x 2) + Left (32 x 2) + Top left (1) + Alignment (3)
   2196     */
   2197     void *pv_ref_sub_out;
   2198 
   2199     /**
   2200     * array to store the filtered reference samples for intra CUs
   2201     * TOP (32 x 2) + Left (32 x 2) + Top left (1) + Alignment (3)
   2202     */
   2203     void *pv_ref_filt_out;
   2204 
   2205     /**
   2206     * Used for 3 purposes
   2207     *
   2208     * 1. MC Intermediate buffer
   2209     * array for storing intermediate 16-bit value for hxhy subpel
   2210     * generation at CTB level (+ 16) for subpel planes boundary
   2211     * +4 is for horizontal 4pels
   2212     *
   2213     * 2. Temprory scratch buffer for transform and coeffs storage
   2214     * MAX_TRANS_SIZE *2 for trans_scratch(32bit) and MAX_TRANS_SIZE *1 for trans_values
   2215     * The first part i.e. from 0 to MAX_TRANS_SIZE is then reused for storing the quant coeffs
   2216     * Max of both are used
   2217     *
   2218     * 3. MC Intermediate buffer
   2219     * buffer for storing intermediate 16 bit values prior to conversion to 8bit in HBD
   2220     *
   2221     */
   2222     MEM_ALIGN16 WORD16 ai2_scratch[(MAX_CTB_SIZE + 8 + 8) * (MAX_CTB_SIZE + 8 + 8 + 8) * 2];
   2223 
   2224     /**
   2225     * array for storing cu level final params for a given mode
   2226     * one best and one current is required
   2227     */
   2228     enc_loop_cu_final_prms_t as_cu_prms[2];
   2229 
   2230     /**
   2231     * Scan index to be used for any gien transform
   2232     * this is a scartch variable used to communicate
   2233     * scan idx at every transform level
   2234     */
   2235     WORD32 i4_scan_idx;
   2236 
   2237     /**
   2238     * Buffer index in ping pong buffers
   2239     * to be used SATD mode evaluations
   2240     */
   2241     WORD32 i4_satd_buf_idx;
   2242 
   2243     /**
   2244     * Motion Compensation module context structre
   2245     */
   2246     inter_pred_ctxt_t s_mc_ctxt;
   2247 
   2248     /**
   2249     * MV pred module context structre
   2250     */
   2251     mv_pred_ctxt_t s_mv_pred_ctxt;
   2252 
   2253     /**
   2254     * Deblock BS ctb structure
   2255     */
   2256     deblk_bs_ctb_ctxt_t s_deblk_bs_prms;
   2257 
   2258     /**
   2259     * Deblocking ctb structure
   2260     */
   2261     deblk_ctb_params_t s_deblk_prms;
   2262 
   2263     /**
   2264     * Deblocking structure. For ctb-row level
   2265     */
   2266     deblk_ctbrow_prms_t s_deblk_ctbrow_prms;
   2267 
   2268     /**
   2269     * Deblocking enable flag
   2270     */
   2271     WORD32 i4_deblock_type;
   2272 
   2273     /**
   2274     *  context for cabac bit estimation used during rdopt stage
   2275     */
   2276     rdopt_entropy_ctxt_t s_rdopt_entropy_ctxt;
   2277 
   2278     /**
   2279     * Context models stored for RDopt store and restore purpose
   2280     */
   2281     UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
   2282 
   2283     /**
   2284     * current picture slice type
   2285     */
   2286     WORD8 i1_slice_type;
   2287 
   2288     /**
   2289     * strong_intra_smoothing_enable_flag
   2290     */
   2291     WORD8 i1_strong_intra_smoothing_enable_flag;
   2292 
   2293     /** Pointer to Dep Mngr for controlling Top-Right CU dependency */
   2294     void *pv_dep_mngr_enc_loop_cu_top_right;
   2295 
   2296     /** Pointer to Dep Mngr for controlling Deblocking Top dependency */
   2297     void *pv_dep_mngr_enc_loop_dblk;
   2298 
   2299     /** pointer to store the cabac states at end of second CTB in current row */
   2300     UWORD8 *pu1_curr_row_cabac_state;
   2301 
   2302     /** pointer to copy the cabac states at start of first CTB in current row */
   2303     UWORD8 *pu1_top_rt_cabac_state;
   2304     /** flag to indicate rate control mode.
   2305     * @remarks :  To enable CU level qp modulation only when required.
   2306     */
   2307     WORD8 i1_cu_qp_delta_enable;
   2308 
   2309     /** flag to indicate rate control mode.
   2310     * @remarks :  Entropy sync enable flag
   2311     */
   2312     WORD8 i1_entropy_coding_sync_enabled_flag;
   2313 
   2314     /** Use SATD or SAD for best merge candidate evaluation */
   2315     WORD32 i4_use_satd_for_merge_eval;
   2316 
   2317     UWORD8 u1_use_early_cbf_data;
   2318 
   2319     /** Use SATD or SAD for best CU merge candidate evaluation */
   2320     WORD32 i4_use_satd_for_cu_merge;
   2321 
   2322     /** Maximum number of merge candidates to be evaluated */
   2323     WORD32 i4_max_merge_candidates;
   2324 
   2325     /** Flag to indicate whether current pictute needs to be deblocked,
   2326     padded and hpel planes need to be generated.
   2327     These are turned off typically in non referecne pictures when psnr
   2328     and recon dump is disabled
   2329     */
   2330     WORD32 i4_deblk_pad_hpel_cur_pic;
   2331 
   2332     /* Array of structures for storing mc predicted data for
   2333     * merge and skip modes
   2334     */
   2335     merge_skip_pred_data_t as_merge_skip_pred_data[MAX_NUM_CU_MERGE_SKIP_CAND];
   2336 
   2337     /* Sum the Qps of each 8*8 block in CU
   2338     * 8*8 block is considered as Min CU size possible as per standard is 8
   2339     * 0 corresponds to INTER and 1 corresponds to INTRA
   2340     */
   2341     LWORD64 i8_cl_ssd_lambda_qf_array[MAX_HEVC_QP_12bit + 1];
   2342     UWORD32 au4_chroma_cost_weighing_factor_array[MAX_HEVC_QP_12bit + 1];
   2343     LWORD64 i8_cl_ssd_lambda_chroma_qf_array[MAX_HEVC_QP_12bit + 1];
   2344     WORD32 i4_satd_lamda_array[MAX_HEVC_QP_12bit + 1];
   2345     WORD32 i4_sad_lamda_array[MAX_HEVC_QP_12bit + 1];
   2346 
   2347     /************************************************************************/
   2348     /* The fields with the string 'type2' in their names are required */
   2349     /* when both 8bit and hbd lambdas are needed. The lambdas corresponding */
   2350     /* to the bit_depth != internal_bit_depth are stored in these fields */
   2351     /************************************************************************/
   2352     LWORD64 i8_cl_ssd_type2_lambda_qf_array[MAX_HEVC_QP_12bit + 1];
   2353     LWORD64 i8_cl_ssd_type2_lambda_chroma_qf_array[MAX_HEVC_QP_12bit + 1];
   2354     WORD32 i4_satd_type2_lamda_array[MAX_HEVC_QP_12bit + 1];
   2355     WORD32 i4_sad_type2_lamda_array[MAX_HEVC_QP_12bit + 1];
   2356 
   2357     /* Lokesh: Added to find if the CU is the first to be coded in the group */
   2358     WORD32 i4_is_first_cu_qg_coded;
   2359 
   2360     /* Chroma RDOPT related parameters */
   2361     ihevce_chroma_rdopt_ctxt_t s_chroma_rdopt_ctxt;
   2362 
   2363     /* Structure to save pred data of ME/Intra cand */
   2364     cu_me_intra_pred_prms_t s_cu_me_intra_pred_prms;
   2365 
   2366     /* Structure to save the flags required for Final mode Reconstruction
   2367     function. These flags are set based on quality presets and bit-rate
   2368     we are working on */
   2369     cu_final_recon_flags_t s_cu_final_recon_flags;
   2370 
   2371     /* Parameter to how at which level RDOQ will be implemented:
   2372     0 - RDOQ disbaled
   2373     1 - RDOQ enabled during RDOPT for all candidates
   2374     2 - RDOQ enabled only for the final candidate*/
   2375     WORD32 i4_rdoq_level;
   2376 
   2377     /* Parameter to how at which level Quant rounding factors are computed:
   2378     FIXED_QUANT_ROUNDING       : Fixed Quant rounding values are used
   2379     NCTB_LEVEL_QUANT_ROUNDING  : NCTB level Cmputed Quant rounding values are used
   2380     CTB_LEVEL_QUANT_ROUNDING   : CTB level Cmputed Quant rounding values are used
   2381     CU_LEVEL_QUANT_ROUNDING    : CU level Cmputed Quant rounding values are used
   2382     TU_LEVEL_QUANT_ROUNDING    : TU level Cmputed Quant rounding values are used*/
   2383     WORD32 i4_quant_rounding_level;
   2384 
   2385     /* Parameter to how at which level Quant rounding factors are computed:
   2386     CHROMA_QUANT_ROUNDING    : Chroma Quant rounding values are used for chroma */
   2387     WORD32 i4_chroma_quant_rounding_level;
   2388 
   2389     /* Parameter to how at which level RDOQ will be implemented:
   2390     0 - SBH disbaled
   2391     1 - SBH enabled during RDOPT for all candidates
   2392     2 - SBH enabled only for the final candidate*/
   2393     WORD32 i4_sbh_level;
   2394 
   2395     /* Parameter to how at which level ZERO CBF RDO will be implemented:
   2396     0 - ZCBF disbaled
   2397     1 - ZCBF enabled during RDOPT for all candidates
   2398     2 - ZCBF enabled only for the final candidate
   2399     */
   2400     WORD32 i4_zcbf_rdo_level;
   2401 
   2402     /*RDOQ-SBH context structure*/
   2403     rdoq_sbh_ctxt_t s_rdoq_sbh_ctxt;
   2404 
   2405     /** Structure to store the Merge/Skip Cand. for EncLoop */
   2406     cu_inter_merge_skip_t s_cu_inter_merge_skip;
   2407     /** Structure to store the Mixed mode Cand. for EncLoop */
   2408     cu_mixed_mode_inter_t s_mixed_mode_inter_cu;
   2409 
   2410     ihevce_inter_pred_buf_data_t s_pred_buf_data;
   2411 
   2412     void *pv_422_chroma_intra_pred_buf;
   2413 
   2414     WORD32 i4_max_num_inter_rdopt_cands;
   2415 
   2416     /* Output Struct per each CU during recursions */
   2417     ihevce_enc_cu_node_ctxt_t as_enc_cu_ctxt[MAX_CU_IN_CTB + 1];
   2418 
   2419     /* Used to store best inter candidate. Used only when */
   2420     /* 'CU modulated QP override' is enabled */
   2421     cu_inter_cand_t as_best_cand[MAX_CU_IN_CTB + 1];
   2422 
   2423     cu_inter_cand_t *ps_best_cand;
   2424 
   2425     UWORD8 au1_cu_init_cabac_state_a_priori[MAX_CU_IN_CTB + 1][IHEVC_CAB_CTXT_END];
   2426 
   2427     UWORD8 (*pau1_curr_cu_a_priori_cabac_state)[IHEVC_CAB_CTXT_END];
   2428 
   2429     /* Used to store pred data of each CU in the CTB. */
   2430     /* Used only when 'CU modulated QP override' is enabled */
   2431     void *pv_CTB_pred_luma;
   2432 
   2433     void *pv_CTB_pred_chroma;
   2434 
   2435     /**
   2436     * array for storing recon during SATD and RD opt stage at CU level
   2437     * one best and one current is required.Luma and chroma together
   2438     */
   2439     void *pv_cu_luma_recon;
   2440 
   2441     /**
   2442     * array for storing recon during SATD and RD opt stage at CU level
   2443     * one best and one current is required.Luma and chroma together
   2444     */
   2445     void *pv_cu_chrma_recon;
   2446 
   2447     /**
   2448     * Array to store pred modes  during SATD and RD opt stage at CU level
   2449     * one best and one current is required
   2450     */
   2451     nbr_4x4_t as_cu_recur_nbr[MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW];
   2452 
   2453     /**
   2454     * Pointer to Array to store pred modes  during SATD and RD opt stage at CU level
   2455     * one best and one current is required
   2456     */
   2457     nbr_4x4_t *ps_cu_recur_nbr;
   2458 
   2459     /**
   2460     * Context models stored for CU recursion parent evaluation
   2461     */
   2462     UWORD8 au1_rdopt_recur_ctxt_models[4][IHEVC_CAB_CTXT_END];
   2463 
   2464     ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt;
   2465 
   2466     /**
   2467     * array for storing coeffs during RD opt stage at CU level
   2468     * one best and one current is required. Luma and chroma together
   2469     */
   2470     /*UWORD8 au1_cu_recur_coeffs[MAX_LUMA_COEFFS_CTB + MAX_CHRM_COEFFS_CTB];*/
   2471 
   2472     UWORD8 *pu1_cu_recur_coeffs;
   2473 
   2474     UWORD8 *apu1_cu_level_pingpong_coeff_buf_addr[2];
   2475 
   2476     WORD16 *api2_cu_level_pingpong_deq_buf_addr[2];
   2477 
   2478     UWORD8 *pu1_ecd_data;
   2479 
   2480     /* OPT: flag to skip parent CU=4TU eval during recursion */
   2481     UWORD8 is_parent_cu_rdopt;
   2482 
   2483     /**
   2484     *   Array of structs containing block merge data for
   2485     *   4 32x32 CU's in indices 1 - 4 and 64x64 CU at 0
   2486     */
   2487     UWORD8 u1_cabac_states_next_row_copied_flag;
   2488 
   2489     UWORD8 u1_cabac_states_first_cu_copied_flag;
   2490 
   2491     UWORD32 u4_cur_ctb_wd;
   2492 
   2493     UWORD32 u4_cur_ctb_ht;
   2494 
   2495     /* thread id of the current context */
   2496     WORD32 thrd_id;
   2497 
   2498     /** Number of processing threads created run time */
   2499     WORD32 i4_num_proc_thrds;
   2500 
   2501     /* Instance number of bit-rate for multiple bit-rate encode */
   2502     WORD32 i4_bitrate_instance_num;
   2503 
   2504     WORD32 i4_num_bitrates;
   2505 
   2506     WORD32 i4_enc_frm_id;
   2507 
   2508     /* Flag to indicate if chroma needs to be considered for cost calculation */
   2509     WORD32 i4_consider_chroma_cost;
   2510 
   2511     /* Number of modes to be evaluated for intra */
   2512     WORD32 i4_num_modes_to_evaluate_intra;
   2513 
   2514     /* Number of modes to be evaluated for inter */
   2515     WORD32 i4_num_modes_to_evaluate_inter;
   2516     /*pointers for struct to hold RC parameters for each bit-rate instance */
   2517     enc_loop_rc_params_t
   2518         *aaps_enc_loop_rc_params[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
   2519 
   2520     /** Pointer to structure containing function pointers of common*/
   2521     func_selector_t *ps_func_selector;
   2522 
   2523     /* Flag to control Top Right Sync for during Merge */
   2524     UWORD8 u1_use_top_at_ctb_boundary;
   2525 
   2526     UWORD8 u1_is_input_data_hbd;
   2527 
   2528     UWORD8 u1_bit_depth;
   2529 
   2530     /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
   2531     UWORD8 u1_chroma_array_type;
   2532 
   2533     rc_quant_t *ps_rc_quant_ctxt;
   2534 
   2535     sao_ctxt_t s_sao_ctxt_t;
   2536 
   2537     /* Offset to get the Qp for the last CU of upper CTB-row.
   2538     This offset is from the current tile top row QP map start.
   2539     This will only be consumed by the first CU of current CTB-row
   2540     iff [it is skip && entropy sync is off] */
   2541     WORD32 *pi4_offset_for_last_cu_qp;
   2542 
   2543     double i4_lamda_modifier;
   2544     double i4_uv_lamda_modifier;
   2545     WORD32 i4_temporal_layer_id;
   2546 
   2547     UWORD8 u1_disable_intra_eval;
   2548 
   2549     WORD32 i4_quant_round_tu[2][32 * 32];
   2550 
   2551     WORD32 *pi4_quant_round_factor_tu_0_1[5];
   2552     WORD32 *pi4_quant_round_factor_tu_1_2[5];
   2553 
   2554     WORD32 i4_quant_round_4x4[2][4 * 4];
   2555     WORD32 i4_quant_round_8x8[2][8 * 8];
   2556     WORD32 i4_quant_round_16x16[2][16 * 16];
   2557     WORD32 i4_quant_round_32x32[2][32 * 32];
   2558 
   2559     WORD32 *pi4_quant_round_factor_cu_ctb_0_1[5];
   2560     WORD32 *pi4_quant_round_factor_cu_ctb_1_2[5];
   2561 
   2562     WORD32 i4_quant_round_cr_4x4[2][4 * 4];
   2563     WORD32 i4_quant_round_cr_8x8[2][8 * 8];
   2564     WORD32 i4_quant_round_cr_16x16[2][16 * 16];
   2565 
   2566     WORD32 *pi4_quant_round_factor_cr_cu_ctb_0_1[3];
   2567     WORD32 *pi4_quant_round_factor_cr_cu_ctb_1_2[3];
   2568     /* cost for not coding cu residue i.e forcing no residue syntax as 1 */
   2569     LWORD64 i8_cu_not_coded_cost;
   2570 
   2571     /* dependency manager for forward ME  sync */
   2572     void *pv_dep_mngr_encloop_dep_me;
   2573 
   2574     LWORD64 ai4_source_satd_8x8[64];
   2575 
   2576     LWORD64 ai4_source_chroma_satd[256];
   2577 
   2578     UWORD8 u1_is_refPic;
   2579 
   2580     WORD32 i4_qp_mod;
   2581 
   2582     WORD32 i4_is_ref_pic;
   2583 
   2584     WORD32 i4_chroma_format;
   2585 
   2586     WORD32 i4_temporal_layer;
   2587 
   2588     WORD32 i4_use_const_lamda_modifier;
   2589 
   2590     double f_i_pic_lamda_modifier;
   2591 
   2592     LWORD64 i8_distortion;
   2593 
   2594     WORD32 i4_use_ctb_level_lamda;
   2595 
   2596     float f_str_ratio;
   2597 
   2598     /* Flag to indicate if current frame is to be shared with other clients.
   2599     Used only in distributed-encoding */
   2600     WORD32 i4_share_flag;
   2601 
   2602     /* Pointer to the current recon being processed.
   2603     Needed for enabling TMVP in dist-encoding */
   2604     void *pv_frm_recon;
   2605 
   2606     ihevce_cmn_opt_func_t s_cmn_opt_func;
   2607 
   2608     /* The ME analogue to the struct above was not included since */
   2609     /* that would have entailed inclusion of all ME specific */
   2610     /* header files */
   2611     /*FT_SAD_EVALUATOR **/
   2612 
   2613     /*FT_SAD_EVALUATOR **/
   2614     void *pv_evalsad_pt_npu_mxn_8bit;
   2615     UWORD8 u1_enable_psyRDOPT;
   2616 
   2617     UWORD8 u1_is_stasino_enabled;
   2618 
   2619     UWORD32 u4_psy_strength;
   2620     /*Sub PIC rc context */
   2621 
   2622     WORD32 i4_sub_pic_level_rc;
   2623     WORD32 i4_num_ctb_for_out_scale;
   2624 
   2625     /**
   2626      * Accumalated bits of all cu for required CTBS estimated during RDO evaluation.
   2627      * Required for sup pic level RC. Reset when required CU/CTB count is reached.
   2628      */
   2629     UWORD32 u4_total_cu_bits;
   2630 
   2631     UWORD32 u4_total_cu_bits_mul_qs;
   2632 
   2633     UWORD32 u4_total_cu_hdr_bits;
   2634 
   2635     UWORD32 u4_cu_tot_bits_into_qscale;
   2636 
   2637     UWORD32 u4_cu_tot_bits;
   2638 
   2639     /*Scale added to the current qscale, output from sub pic rc*/
   2640     WORD32 i4_cu_qp_sub_pic_rc;
   2641 
   2642     /*Frame level L1 IPE sad*/
   2643     LWORD64 i8_frame_l1_ipe_sad;
   2644 
   2645     /*Frame level L0 IPE satd*/
   2646     LWORD64 i8_frame_l0_ipe_satd;
   2647 
   2648     /*Frame level L1 ME sad*/
   2649     LWORD64 i8_frame_l1_me_sad;
   2650 
   2651     /*Frame level L1 activity factor*/
   2652     LWORD64 i8_frame_l1_activity_fact;
   2653     /*bits esimated for frame calulated for sub pic rc bit control */
   2654     WORD32 ai4_frame_bits_estimated[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
   2655     /** I Scene cut */
   2656     WORD32 i4_is_I_scenecut;
   2657 
   2658     /** Non Scene cut */
   2659     WORD32 i4_is_non_I_scenecut;
   2660 
   2661     /** Frames for which online/offline model is not valid */
   2662     WORD32 i4_is_model_valid;
   2663 
   2664     /** Steady State Frame */
   2665     //WORD32 i4_is_steady_state;
   2666 
   2667     WORD32 i4_is_first_query;
   2668 
   2669     /* Pointer to Tile params base */
   2670     void *pv_tile_params_base;
   2671 
   2672     /** The index of column tile for which it is working */
   2673     WORD32 i4_tile_col_idx;
   2674 
   2675     WORD32 i4_max_search_range_horizontal;
   2676 
   2677     WORD32 i4_max_search_range_vertical;
   2678 
   2679     WORD32 i4_is_ctb_qp_modified;
   2680 
   2681     WORD32 i4_display_num;
   2682 
   2683     WORD32 i4_pred_qp;
   2684 
   2685     /*assumption of qg size is 8x8 block size*/
   2686     WORD32 ai4_qp_qg[8 * 8];
   2687 
   2688     WORD32 i4_last_cu_qp_from_prev_ctb;
   2689 
   2690     WORD32 i4_prev_QP;
   2691 
   2692     UWORD8 u1_max_inter_tr_depth;
   2693 
   2694     UWORD8 u1_max_intra_tr_depth;
   2695 
   2696 } ihevce_enc_loop_ctxt_t;
   2697 
   2698 /*****************************************************************************/
   2699 /* Enums                                                                     */
   2700 /*****************************************************************************/
   2701 
   2702 /** @brief RDOQ_LEVELS_T: This enumeration specifies the RDOQ mode of operation
   2703 *
   2704 *  NO_RDOQ    : RDOQ is not performed
   2705 *  BEST_CAND_RDOQ : RDOQ for final candidate only
   2706 *  ALL_CAND_RDOQ : RDOQ for all candidates
   2707 */
   2708 typedef enum
   2709 {
   2710     NO_RDOQ,
   2711     BEST_CAND_RDOQ,
   2712     ALL_CAND_RDOQ,
   2713 } RDOQ_LEVELS_T;
   2714 
   2715 /** @brief QUANT_ROUNDING_COEFF_LEVELS_T: This enumeration specifies the Coef level RDOQ mode of operation
   2716 *
   2717 *  FIXED_QUANT_ROUNDING       : Fixed Quant rounding values are used
   2718 *  NCTB_LEVEL_QUANT_ROUNDING  : NCTB level Cmputed Quant rounding values are used
   2719 *  CTB_LEVEL_QUANT_ROUNDING   : CTB level Cmputed Quant rounding values are used
   2720 *  CU_LEVEL_QUANT_ROUNDING    : CU level Cmputed Quant rounding values are used
   2721 *  TU_LEVEL_QUANT_ROUNDING    : TU level Cmputed Quant rounding values are used
   2722 *               Defaulat for all candidtes, based on RDOQ_LEVELS_T choose to best candidate
   2723 */
   2724 typedef enum
   2725 {
   2726     FIXED_QUANT_ROUNDING,
   2727     NCTB_LEVEL_QUANT_ROUNDING,
   2728     CTB_LEVEL_QUANT_ROUNDING,
   2729     CU_LEVEL_QUANT_ROUNDING,
   2730     TU_LEVEL_QUANT_ROUNDING,
   2731     CHROMA_QUANT_ROUNDING
   2732 } QUANT_ROUNDING_COEFF_LEVELS_T;
   2733 
   2734 /*****************************************************************************/
   2735 /* Enums                                                                     */
   2736 /*****************************************************************************/
   2737 
   2738 /** @brief SBH_LEVELS_T: This enumeration specifies the RDOQ mode of operation
   2739 *
   2740 *  NO_SBH    : SBH is not performed
   2741 *  BEST_CAND_SBH : SBH for final candidate only
   2742 *  ALL_CAND_SBH : SBH for all candidates
   2743 */
   2744 typedef enum
   2745 {
   2746     NO_SBH,
   2747     BEST_CAND_SBH,
   2748     ALL_CAND_SBH,
   2749 } SBH_LEVELS_T;
   2750 
   2751 /** @brief ZCBF_LEVELS_T: This enumeration specifies the ZeroCBF RDO mode of operation
   2752 *
   2753 *  NO_ZCBF    : ZCBF RDO is not performed
   2754 *  ALL_CAND_ZCBF : ZCBF RDO for all candidates
   2755 */
   2756 typedef enum
   2757 {
   2758     NO_ZCBF,
   2759     ZCBF_ENABLE,
   2760 } ZCBF_LEVELS_T;
   2761 
   2762 /**
   2763 ******************************************************************************
   2764 *  @brief  Encode loop master context structure
   2765 ******************************************************************************
   2766 */
   2767 typedef struct
   2768 {
   2769     /** Array of encode loop structure */
   2770     ihevce_enc_loop_ctxt_t *aps_enc_loop_thrd_ctxt[MAX_NUM_FRM_PROC_THRDS_ENC];
   2771 
   2772     /** Number of processing threads created run time */
   2773     WORD32 i4_num_proc_thrds;
   2774 
   2775     /**
   2776     *  Array of top row cu skip flags (1 bit per 8x8CU)
   2777     */
   2778     UWORD8 au1_cu_skip_top_row[HEVCE_MAX_WIDTH >> 6];
   2779 
   2780     /** Context models stored at the end of second CTB in a row)
   2781     *  stored in packed form pState[bits6-1] | MPS[bit0]
   2782     *  for each CTB row
   2783     *  using entropy sync model in RD opt
   2784     */
   2785     UWORD8 au1_ctxt_models[MAX_NUM_CTB_ROWS_FRM][IHEVC_CAB_CTXT_END];
   2786 
   2787     /** Dependency manager for controlling EncLoop Top-Right CU dependency
   2788     * One per each bit-rate and one per each frame in parallel
   2789     */
   2790     void *aapv_dep_mngr_enc_loop_cu_top_right[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
   2791 
   2792     /** Dependency manager for controlling Deblocking Top dependency
   2793     * One per each bit-rate and one per each frame in parallel
   2794     */
   2795     void *aapv_dep_mngr_enc_loop_dblk[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
   2796 
   2797     /** number of bit-rate instances running */
   2798     WORD32 i4_num_bitrates;
   2799 
   2800     /** number of enc frames running in parallel */
   2801     WORD32 i4_num_enc_loop_frm_pllel;
   2802 
   2803     /* Pointer to Tile params base */
   2804     void *pv_tile_params_base;
   2805     /* Offset to get the Qp for the last CU of upper CTB-row.
   2806     This offset is from the current tile top row QP map start.
   2807 
   2808     This will only be consumed by the first CU of current CTB-row
   2809     iff [it is skip && entropy sync is off]
   2810     There is one entry of every tile-column bcoz offset remains constant
   2811     for all tiles lying in a tile-column */
   2812     WORD32 ai4_offset_for_last_cu_qp[MAX_TILE_COLUMNS];
   2813 } ihevce_enc_loop_master_ctxt_t;
   2814 
   2815 /**
   2816 ******************************************************************************
   2817 *  @brief  This struct is used for storing data required by the block merge
   2818 *          function
   2819 ******************************************************************************
   2820 */
   2821 typedef struct
   2822 {
   2823     block_data_8x8_t *ps_8x8_data;
   2824 
   2825     block_data_16x16_t *ps_16x16_data;
   2826 
   2827     block_data_32x32_t *ps_32x32_data;
   2828 
   2829     block_data_64x64_t *ps_64x64_data;
   2830 
   2831     part_type_results_t **ps_32x32_results;
   2832 
   2833     cur_ctb_cu_tree_t *ps_cu_tree;
   2834 
   2835     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
   2836 
   2837     mv_pred_ctxt_t *ps_mv_pred_ctxt;
   2838 
   2839     recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2];
   2840 
   2841     nbr_4x4_t *ps_top_nbr_4x4;
   2842 
   2843     nbr_4x4_t *ps_left_nbr_4x4;
   2844 
   2845     nbr_4x4_t *ps_curr_nbr_4x4;
   2846 
   2847     UWORD8 *pu1_inp;
   2848 
   2849     UWORD8 *pu1_ctb_nbr_map;
   2850 
   2851     WORD32 i4_nbr_map_strd;
   2852 
   2853     WORD32 inp_stride;
   2854 
   2855     WORD32 i4_ctb_x_off;
   2856 
   2857     WORD32 i4_ctb_y_off;
   2858 
   2859     WORD32 use_satd_for_err_calc;
   2860 
   2861     WORD32 lambda;
   2862 
   2863     WORD32 lambda_q_shift;
   2864 
   2865     WORD32 frm_qstep;
   2866 
   2867     WORD32 num_4x4_in_ctb;
   2868 
   2869     UWORD8 *pu1_wkg_mem;
   2870 
   2871     UWORD8 **ppu1_pred;
   2872 
   2873     UWORD8 u1_bidir_enabled;
   2874 
   2875     UWORD8 u1_max_tr_depth;
   2876 
   2877     WORD32 i4_ctb_pos;
   2878 
   2879     WORD32 i4_ctb_size;
   2880 
   2881     UWORD8 *apu1_wt_inp[MAX_REFS_SEARCHABLE + 1];
   2882 
   2883     /** Pointer of Dep Mngr for EncLoop Top-Right CU dependency */
   2884     void *pv_dep_mngr_enc_loop_cu_top_right;
   2885     /** The current cu row no. for Dep Manager to Check */
   2886     WORD32 i4_dep_mngr_cur_cu_row_no;
   2887     /** The Top cu row no. for Dep Manager to Check */
   2888     WORD32 i4_dep_mngr_top_cu_row_no;
   2889 
   2890     WORD8 i1_quality_preset;
   2891 
   2892     /* Flag to control Top Right Sync for during Merge */
   2893     UWORD8 u1_use_top_at_ctb_boundary;
   2894 
   2895 } block_merge_input_t;
   2896 
   2897 /* Structure which stores the info regarding the TU's present in the CU*/
   2898 typedef struct tu_prms_t
   2899 {
   2900     UWORD8 u1_tu_size;
   2901 
   2902     UWORD8 u1_x_off;
   2903 
   2904     UWORD8 u1_y_off;
   2905 
   2906     WORD32 i4_tu_cost;
   2907 
   2908     WORD32 i4_early_cbf;
   2909 
   2910 } tu_prms_t;
   2911 
   2912 typedef struct
   2913 {
   2914     cu_enc_loop_out_t **pps_cu_final;
   2915 
   2916     pu_t **pps_row_pu;
   2917 
   2918     tu_enc_loop_out_t **pps_row_tu;
   2919 
   2920     UWORD8 **ppu1_row_ecd_data;
   2921 
   2922     WORD32 *pi4_num_pus_in_ctb;
   2923 
   2924     WORD32 *pi4_last_cu_pos_in_ctb;
   2925 
   2926     WORD32 *pi4_last_cu_size;
   2927 
   2928     UWORD8 *pu1_num_cus_in_ctb_out;
   2929 
   2930 } cu_final_update_prms;
   2931 
   2932 typedef struct
   2933 {
   2934     cu_nbr_prms_t *ps_cu_nbr_prms;
   2935 
   2936     cu_inter_cand_t *ps_best_inter_cand;
   2937 
   2938     enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms;
   2939 
   2940     WORD32 packed_pred_mode;
   2941 
   2942     WORD32 rd_opt_best_idx;
   2943 
   2944     void *pv_src;
   2945 
   2946     WORD32 src_strd;
   2947 
   2948     void *pv_pred;
   2949 
   2950     WORD32 pred_strd;
   2951 
   2952     void *pv_pred_chrm;
   2953 
   2954     WORD32 pred_chrm_strd;
   2955 
   2956     UWORD8 *pu1_final_ecd_data;
   2957 
   2958     UWORD8 *pu1_csbf_buf;
   2959 
   2960     WORD32 csbf_strd;
   2961 
   2962     void *pv_luma_recon;
   2963 
   2964     WORD32 recon_luma_strd;
   2965 
   2966     void *pv_chrm_recon;
   2967 
   2968     WORD32 recon_chrma_strd;
   2969 
   2970     UWORD8 u1_cu_pos_x;
   2971 
   2972     UWORD8 u1_cu_pos_y;
   2973 
   2974     UWORD8 u1_cu_size;
   2975 
   2976     WORD8 i1_cu_qp;
   2977 
   2978     UWORD8 u1_will_cabac_state_change;
   2979 
   2980     UWORD8 u1_recompute_sbh_and_rdoq;
   2981 
   2982     UWORD8 u1_is_first_pass;
   2983 
   2984 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
   2985     UWORD8 u1_is_cu_noisy;
   2986 #endif
   2987 
   2988 } final_mode_process_prms_t;
   2989 
   2990 typedef struct
   2991 {
   2992     cu_inter_cand_t s_best_cand;
   2993 
   2994     /* The size is twice of what is required to ensure availability */
   2995     /* of adequate space for 'HBD' case */
   2996     UWORD8 au1_pred_luma[MAX_CU_SIZE * MAX_CU_SIZE * 2];
   2997 
   2998     /* The size is twice of what is required to ensure availability */
   2999     /* of adequate space for 422 case */
   3000     UWORD8 au1_pred_chroma[MAX_CU_SIZE * MAX_CU_SIZE * 2];
   3001 } final_mode_state_t;
   3002 
   3003 typedef struct
   3004 {
   3005     cu_mixed_mode_inter_t *ps_mixed_modes_datastore;
   3006 
   3007     cu_inter_cand_t *ps_me_cands;
   3008 
   3009     cu_inter_cand_t *ps_merge_cands;
   3010 
   3011     mv_pred_ctxt_t *ps_mv_pred_ctxt;
   3012 
   3013     inter_pred_ctxt_t *ps_mc_ctxt;
   3014 
   3015     UWORD8 *pu1_ctb_nbr_map;
   3016 
   3017     void *pv_src;
   3018 
   3019     nbr_4x4_t *ps_cu_nbr_buf;
   3020 
   3021     nbr_4x4_t *ps_left_nbr_4x4;
   3022 
   3023     nbr_4x4_t *ps_top_nbr_4x4;
   3024 
   3025     nbr_4x4_t *ps_topleft_nbr_4x4;
   3026 
   3027     WORD32 i4_ctb_nbr_map_stride;
   3028 
   3029     WORD32 i4_src_strd;
   3030 
   3031     WORD32 i4_nbr_4x4_left_strd;
   3032 
   3033     UWORD8 u1_cu_size;
   3034 
   3035     UWORD8 u1_cu_pos_x;
   3036 
   3037     UWORD8 u1_cu_pos_y;
   3038 
   3039     UWORD8 u1_num_me_cands;
   3040 
   3041     UWORD8 u1_num_merge_cands;
   3042 
   3043     UWORD8 u1_max_num_mixed_mode_cands_to_select;
   3044 
   3045     UWORD8 u1_max_merge_candidates;
   3046 
   3047     UWORD8 u1_use_satd_for_merge_eval;
   3048 
   3049 } ihevce_mixed_inter_modes_selector_prms_t;
   3050 
   3051 typedef struct
   3052 {
   3053     LWORD64 i8_ssd;
   3054 
   3055     LWORD64 i8_cost;
   3056 
   3057 #if ENABLE_INTER_ZCU_COST
   3058     LWORD64 i8_not_coded_cost;
   3059 #endif
   3060 
   3061     UWORD32 u4_sad;
   3062 
   3063     WORD32 i4_bits;
   3064 
   3065     WORD32 i4_num_bytes_used_for_ecd;
   3066 
   3067     WORD32 i4_zero_col;
   3068 
   3069     WORD32 i4_zero_row;
   3070 
   3071     UWORD8 u1_cbf;
   3072 
   3073     UWORD8 u1_reconBufId;
   3074 
   3075     UWORD8 u1_is_valid_node;
   3076 
   3077     UWORD8 u1_size;
   3078 
   3079     UWORD8 u1_posx;
   3080 
   3081     UWORD8 u1_posy;
   3082 } tu_node_data_t;
   3083 
   3084 typedef struct tu_tree_node_t
   3085 {
   3086     struct tu_tree_node_t *ps_child_node_tl;
   3087 
   3088     struct tu_tree_node_t *ps_child_node_tr;
   3089 
   3090     struct tu_tree_node_t *ps_child_node_bl;
   3091 
   3092     struct tu_tree_node_t *ps_child_node_br;
   3093 
   3094     tu_node_data_t s_luma_data;
   3095 
   3096     /* 2 because of the 2 subTU's when input is 422 */
   3097     tu_node_data_t as_cb_data[2];
   3098 
   3099     tu_node_data_t as_cr_data[2];
   3100 
   3101     UWORD8 u1_is_valid_node;
   3102 
   3103 } tu_tree_node_t;
   3104 
   3105 /*****************************************************************************/
   3106 /* Extern Variable Declarations                                              */
   3107 /*****************************************************************************/
   3108 
   3109 /*****************************************************************************/
   3110 /* Extern Function Declarations                                              */
   3111 /*****************************************************************************/
   3112 
   3113 /*****************************************************************************/
   3114 /* Typedefs                                                                  */
   3115 /*****************************************************************************/
   3116 typedef LWORD64 (*pf_cu_mode_decide)(
   3117     ihevce_enc_loop_ctxt_t *ps_ctxt,
   3118     enc_loop_cu_prms_t *ps_cu_prms,
   3119     cu_analyse_t *ps_cu_analyse,
   3120     final_mode_state_t *ps_final_mode_state,
   3121     UWORD8 *pu1_ecd_data,
   3122     pu_col_mv_t *ps_col_pu,
   3123     UWORD8 *pu1_col_pu_map,
   3124     WORD32 col_start_pu_idx);
   3125 
   3126 typedef LWORD64 (*pf_inter_rdopt_cu_mc_mvp)(
   3127     ihevce_enc_loop_ctxt_t *ps_ctxt,
   3128     cu_inter_cand_t *ps_inter_cand,
   3129     WORD32 cu_size,
   3130     WORD32 cu_pos_x,
   3131     WORD32 cu_pos_y,
   3132     nbr_4x4_t *ps_left_nbr_4x4,
   3133     nbr_4x4_t *ps_top_nbr_4x4,
   3134     nbr_4x4_t *ps_topleft_nbr_4x4,
   3135     WORD32 nbr_4x4_left_strd,
   3136     WORD32 curr_buf_idx);
   3137 
   3138 typedef LWORD64 (*pf_inter_rdopt_cu_ntu)(
   3139     ihevce_enc_loop_ctxt_t *ps_ctxt,
   3140     enc_loop_cu_prms_t *ps_cu_prms,
   3141     void *pv_src,
   3142     WORD32 cu_size,
   3143     WORD32 cu_pos_x,
   3144     WORD32 cu_pos_y,
   3145     WORD32 curr_buf_idx,
   3146     enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
   3147     cu_inter_cand_t *ps_inter_cand,
   3148     cu_analyse_t *ps_cu_analyse,
   3149     WORD32 i4_alpha_stim_multiplier);
   3150 
   3151 typedef void (*pf_intra_chroma_pred_mode_selector)(
   3152     ihevce_enc_loop_ctxt_t *ps_ctxt,
   3153     enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
   3154     cu_analyse_t *ps_cu_analyse,
   3155     WORD32 rd_opt_curr_idx,
   3156     WORD32 tu_mode,
   3157     WORD32 i4_alpha_stim_multiplier,
   3158     UWORD8 u1_is_cu_noisy);
   3159 
   3160 typedef LWORD64 (*pf_intra_rdopt_cu_ntu)(
   3161     ihevce_enc_loop_ctxt_t *ps_ctxt,
   3162     enc_loop_cu_prms_t *ps_cu_prms,
   3163     void *pv_pred_org,
   3164     WORD32 pred_strd_org,
   3165     enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
   3166     UWORD8 *pu1_luma_mode,
   3167     cu_analyse_t *ps_cu_analyse,
   3168     void *pv_curr_src,
   3169     void *pv_cu_left,
   3170     void *pv_cu_top,
   3171     void *pv_cu_top_left,
   3172     nbr_4x4_t *ps_left_nbr_4x4,
   3173     nbr_4x4_t *ps_top_nbr_4x4,
   3174     WORD32 nbr_4x4_left_strd,
   3175     WORD32 cu_left_stride,
   3176     WORD32 curr_buf_idx,
   3177     WORD32 func_proc_mode,
   3178     WORD32 i4_alpha_stim_multiplier);
   3179 
   3180 typedef void (*pf_final_rdopt_mode_prcs)(
   3181     ihevce_enc_loop_ctxt_t *ps_ctxt, final_mode_process_prms_t *ps_prms);
   3182 
   3183 typedef void (*pf_store_cu_results)(
   3184     ihevce_enc_loop_ctxt_t *ps_ctxt,
   3185     enc_loop_cu_prms_t *ps_cu_prms,
   3186     final_mode_state_t *ps_final_state);
   3187 
   3188 typedef void (*pf_enc_loop_cu_bot_copy)(
   3189     ihevce_enc_loop_ctxt_t *ps_ctxt,
   3190     enc_loop_cu_prms_t *ps_cu_prms,
   3191     ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
   3192     WORD32 curr_cu_pos_in_row,
   3193     WORD32 curr_cu_pos_in_ctb);
   3194 
   3195 typedef void (*pf_enc_loop_ctb_left_copy)(
   3196     ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_prms_t *ps_cu_prms);
   3197 
   3198 #endif /* _IHEVCE_ENC_LOOP_STRUCTS_H_ */
   3199