Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 /*!
     21 ******************************************************************************
     22 * \file ihevce_enc_structs.h
     23 *
     24 * \brief
     25 *    This file contains structure definations of Encoder
     26 *
     27 * \date
     28 *    18/09/2012
     29 *
     30 * \author
     31 *    Ittiam
     32 *
     33 ******************************************************************************
     34 */
     35 
     36 #ifndef _IHEVCE_ENC_STRUCTS_H_
     37 #define _IHEVCE_ENC_STRUCTS_H_
     38 
     39 /*****************************************************************************/
     40 /* Constant Macros                                                           */
     41 /*****************************************************************************/
     42 #define HEVCE_MAX_WIDTH 1920
     43 #define HEVCE_MAX_HEIGHT 1088
     44 
     45 #define HEVCE_MIN_WIDTH 64
     46 #define HEVCE_MIN_HEIGHT 64
     47 
     48 #define MAX_CTBS_IN_FRAME (HEVCE_MAX_WIDTH * HEVCE_MAX_HEIGHT) / (MIN_CTB_SIZE * MIN_CTB_SIZE)
     49 #define MAX_NUM_CTB_ROWS_FRM (HEVCE_MAX_HEIGHT) / (MIN_CTB_SIZE)
     50 
     51 #define MIN_VERT_PROC_UNIT (8)
     52 #define MAX_NUM_VERT_UNITS_FRM (HEVCE_MAX_HEIGHT) / (MIN_VERT_PROC_UNIT)
     53 
     54 #define HEVCE_MAX_REF_PICS 8
     55 #define HEVCE_MAX_DPB_PICS (HEVCE_MAX_REF_PICS + 1)
     56 
     57 #define PAD_HORZ 80
     58 #define PAD_VERT 80
     59 
     60 #define DEFAULT_MAX_REFERENCE_PICS 4
     61 
     62 #define BLU_RAY_SUPPORT 231457
     63 
     64 /** @brief max number of parts in minCU : max 4 for NxN */
     65 #define NUM_PU_PARTS 4
     66 /** @brief max number of parts in Inter CU */
     67 #define NUM_INTER_PU_PARTS (MAX_NUM_INTER_PARTS)
     68 #define SEND_BI_RDOPT
     69 #ifdef SEND_BI_RDOPT
     70 /** @brief */
     71 #define MAX_INTER_CU_CANDIDATES 4
     72 #else
     73 /** @brief */
     74 #define MAX_INTER_CU_CANDIDATES 3
     75 #endif
     76 /** @brief */
     77 #define MAX_INTRA_CU_CANDIDATES 3
     78 
     79 #define MAX_INTRA_CANDIDATES 35
     80 
     81 /** For each resolution & bit-rate instance, one entropy thread is created */
     82 #define NUM_ENTROPY_THREADS (IHEVCE_MAX_NUM_RESOLUTIONS * IHEVCE_MAX_NUM_BITRATES)
     83 
     84 /* Number of buffers between Decomp and HME layers 1 : Seq mode >1 parallel mode */
     85 #define NUM_BUFS_DECOMP_HME 1
     86 
     87 /** Macro to indicate pre me and L0 ipe stagger in pre enc*/
     88 /** Implies MAX_PRE_ENC_STAGGER - 1 max stagger*/
     89 #define MAX_PRE_ENC_STAGGER (NUM_LAP2_LOOK_AHEAD + 1 + MIN_L1_L0_STAGGER_NON_SEQ)
     90 
     91 #define NUM_ME_ENC_BUFS (MAX_NUM_ENC_LOOP_PARALLEL)
     92 
     93 #define MIN_L0_IPE_ENC_STAGGER 1
     94 
     95 /*stagger between L0 IPE and enc*/
     96 #define MAX_L0_IPE_ENC_STAGGER (NUM_ME_ENC_BUFS + (MIN_L0_IPE_ENC_STAGGER))
     97 
     98 #define MAX_PRE_ENC_RC_DELAY (MAX_L0_IPE_ENC_STAGGER + 1 + NUM_BUFS_DECOMP_HME)
     99 
    100 #define MIN_PRE_ENC_RC_DELAY (MIN_L0_IPE_ENC_STAGGER + 1 + NUM_BUFS_DECOMP_HME)
    101 
    102 /** @brief number of contexts buffers maintained at frame level b/w pre-encode : encode */
    103 /*Explaination for minus 1: eg: MAX_PRE_ENC_STAGGER = 31 and MAX_L0_IPE_ENC_STAGGER = 5, In this case L1 produce 30 buffer,
    104   l0 will start off with 30th buffer and enc will work on 33nd and 34rd frame.*/
    105 /* NUM_BUFS_DECOMP_HME is added to take care of pipeline between Decomp-preintra and HME */
    106 #define MAX_NUM_PREENC_ENC_BUFS                                                                    \
    107     (MAX_PRE_ENC_STAGGER + MAX_L0_IPE_ENC_STAGGER + NUM_BUFS_DECOMP_HME - 1)  //22//5
    108 
    109 #define MIN_NUM_PREENC_ENC_BUFS                                                                    \
    110     (MAX_PRE_ENC_STAGGER + MIN_L0_IPE_ENC_STAGGER + NUM_BUFS_DECOMP_HME - 1)
    111 
    112 /** @brief number of ctb contexts maintained at frame level b/w encode : entropy */
    113 #define NUM_FRMPROC_ENTCOD_BUFS 8
    114 
    115 /** @brief number of extra recon buffs required for stagger design*/
    116 #define NUM_EXTRA_RECON_BUFS 0
    117 
    118 /** recon picture buffer size need to be increased to support EncLoop Parallelism **/
    119 #define NUM_EXTRA_RECON_BUFS_FOR_ELP 0
    120 
    121 /** @brief maximum number of bytes in 4x4 afetr scanning */
    122 #define MAX_SCAN_COEFFS_BYTES_4x4 (48)
    123 
    124 /** @brief maximum number of luma coeffs bytes after scan at CTB level  */
    125 #define MAX_LUMA_COEFFS_CTB ((MAX_SCAN_COEFFS_BYTES_4x4) * (MAX_TU_IN_CTB)*4)
    126 
    127 /** @brief maximum number of chroma coeffs bytes after scan at CTB level  */
    128 #define MAX_CHRM_COEFFS_CTB ((MAX_SCAN_COEFFS_BYTES_4x4) * ((MAX_TU_IN_CTB >> 1)) * 4)
    129 
    130 /** @brief maximum number of coeffs bytes after scan at CTB level  */
    131 #define MAX_SCAN_COEFFS_CTB ((MAX_LUMA_COEFFS_CTB) + (MAX_CHRM_COEFFS_CTB))
    132 
    133 /** @breif PU map CTB buffer buyes for neighbour availibility */
    134 #define MUN_PU_MAP_BYTES_PER_CTB (MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW)
    135 
    136 /** @brief tottal system memory records */
    137 #define TOTAL_SYSTEM_MEM_RECS 120
    138 
    139 /** @brief number of input async command buffers */
    140 #define NUM_AYSNC_CMD_BUFS 4
    141 
    142 /** @brief Comand buffers size */
    143 #define ENC_COMMAND_BUFF_SIZE 512 /* 512 bytes */
    144 
    145 /** @brief Number of output buffers */
    146 #define NUM_OUTPUT_BUFS 4
    147 
    148 /** @brief Lamda for SATD cost estimation */
    149 #define LAMDA_SATD 1
    150 
    151 /** @brief Maximum number of 1s in u2_sig_coeff_abs_gt1_flags */
    152 #define MAX_GT_ONE 8
    153 
    154 /** MAX num ipntra pred modes */
    155 #define MAX_NUM_IP_MODES 35
    156 
    157 /** Number of best intra modes used for intra mode refinement */
    158 #define NUM_BEST_MODES 3
    159 
    160 /** Maximim number of parallel frame processing threads in pre enocde group */
    161 #define MAX_NUM_FRM_PROC_THRDS_PRE_ENC MAX_NUM_CORES
    162 
    163 /** Maximim number of parallel frame processing threads in encode group */
    164 #define MAX_NUM_FRM_PROC_THRDS_ENC MAX_NUM_CORES
    165 
    166 /** Macro to indicate teh PING_PONG buffers for stagger*/
    167 #define PING_PONG_BUF 2
    168 
    169 /** Max number of layers in Motion estimation
    170  * should be greater than or equal to MAX_NUM_LAYERS defined in hme_interface.h
    171  */
    172 
    173 #define MAX_NUM_HME_LAYERS 5
    174 /**
    175 ******************************************************************************
    176  *  @brief      Maximum number of layers allowed
    177 ******************************************************************************
    178  */
    179 #define MAX_NUM_LAYERS 4
    180 
    181 #define NUM_RC_PIC_TYPE 9
    182 
    183 #define MAX_NUM_NODES_CU_TREE (85)
    184 
    185 /* macros to control Dynamic load balance */
    186 #define DYN_LOAD_BAL_UPPER_LIMIT 0.80
    187 
    188 #define DYN_LOAD_BAL_LOWER_LIMIT 0.20
    189 
    190 #define NUM_SUB_GOP_DYN_BAL 1
    191 
    192 #define MIN_NUM_FRMS_DYN_BAL 4
    193 
    194 #define CORES_SRES_OR_MRES 2
    195 
    196 #define HME_HIGH_SAD_BLK_THRESH 35
    197 
    198 /* Enable to compare cabac states of final entropy thread with enc loop states */
    199 #define VERIFY_ENCLOOP_CABAC_STATES 0
    200 
    201 #define MAX_NUM_BLKS_IN_MAX_CU 64 /* max cu size is 64x64 */
    202 
    203 /*****************************************************************************/
    204 /* Function Macros                                                           */
    205 /*****************************************************************************/
    206 
    207 /*****************************************************************************/
    208 /* Typedefs                                                                  */
    209 /*****************************************************************************/
    210 typedef void (*pf_iq_it_rec)(
    211     WORD16 *pi2_src,
    212     WORD16 *pi2_tmp,
    213     UWORD8 *pu1_pred,
    214     WORD16 *pi2_dequant_coeff,
    215     UWORD8 *pu1_dst,
    216     WORD32 qp_div, /* qpscaled / 6 */
    217     WORD32 qp_rem, /* qpscaled % 6 */
    218     WORD32 src_strd,
    219     WORD32 pred_strd,
    220     WORD32 dst_strd,
    221     WORD32 zero_cols,
    222     WORD32 zero_rows);
    223 
    224 typedef void (*pf_intra_pred)(
    225     UWORD8 *pu1_ref, WORD32 src_strd, UWORD8 *pu1_dst, WORD32 dst_strd, WORD32 nt, WORD32 mode);
    226 
    227 typedef UWORD32 (*pf_res_trans_luma)(
    228     UWORD8 *pu1_src,
    229     UWORD8 *pu1_pred,
    230     WORD32 *pi4_tmp,
    231     WORD16 *pi2_dst,
    232     WORD32 src_strd,
    233     WORD32 pred_strd,
    234     WORD32 dst_strd_chr_flag);
    235 
    236 typedef WORD32 (*pf_quant)(
    237     WORD16 *pi2_coeffs,
    238     WORD16 *pi2_quant_coeff,
    239     WORD16 *pi2_dst,
    240     WORD32 qp_div, /* qpscaled / 6 */
    241     WORD32 qp_rem, /* qpscaled % 6 */
    242     WORD32 q_add,
    243     WORD32 src_strd,
    244     WORD32 dst_strd,
    245     UWORD8 *pu1_csbf_buf,
    246     WORD32 csbf_strd,
    247     WORD32 *zero_cols,
    248     WORD32 *zero_row);
    249 
    250 /*****************************************************************************/
    251 /* Enums                                                                     */
    252 /*****************************************************************************/
    253 /// supported partition shape
    254 typedef enum
    255 {
    256     SIZE_2Nx2N = 0,  ///< symmetric motion partition,  2Nx2N
    257     SIZE_2NxN = 1,  ///< symmetric motion partition,  2Nx N
    258     SIZE_Nx2N = 2,  ///< symmetric motion partition,   Nx2N
    259     SIZE_NxN = 3,  ///< symmetric motion partition,   Nx N
    260     SIZE_2NxnU = 4,  ///< asymmetric motion partition, 2Nx( N/2) + 2Nx(3N/2)
    261     SIZE_2NxnD = 5,  ///< asymmetric motion partition, 2Nx(3N/2) + 2Nx( N/2)
    262     SIZE_nLx2N = 6,  ///< asymmetric motion partition, ( N/2)x2N + (3N/2)x2N
    263     SIZE_nRx2N = 7  ///< asymmetric motion partition, (3N/2)x2N + ( N/2)x2N
    264 } PART_SIZE_E;
    265 
    266 /** @brief  Interface level Queues of Encoder */
    267 
    268 typedef enum
    269 {
    270     IHEVCE_INPUT_DATA_CTRL_Q = 0,
    271     IHEVCE_ENC_INPUT_Q,
    272     IHEVCE_INPUT_ASYNCH_CTRL_Q,
    273     IHEVCE_OUTPUT_DATA_Q,
    274     IHEVCE_OUTPUT_STATUS_Q,
    275     IHEVCE_RECON_DATA_Q,  //   /*que for holding recon buffer */
    276 
    277     IHEVCE_FRM_PRS_ENT_COD_Q, /*que for holding output buffer of enc_loop |input buffer of entropy */
    278 
    279     IHEVCE_PRE_ENC_ME_Q, /*que for holding input buffer to ME | output of pre-enc */
    280 
    281     IHEVCE_ME_ENC_RDOPT_Q, /* que for holding output buffer of ME or input buffer of Enc-RDopt */
    282 
    283     IHEVCE_L0_IPE_ENC_Q, /* Queue for holding L0 ipe data to enc loop*/
    284 
    285     /* should be last entry */
    286     IHEVCE_MAX_NUM_QUEUES
    287 
    288 } IHEVCE_Q_DESC_T;
    289 
    290 /*****************************************************************************/
    291 /* Structure                                                                 */
    292 /*****************************************************************************/
    293 
    294 /**
    295 RC_QP_QSCALE conversion structures
    296 **/
    297 typedef struct
    298 {
    299     WORD16 i2_min_qp;
    300 
    301     WORD16 i2_max_qp;
    302 
    303     WORD16 i2_min_qscale;
    304 
    305     WORD16 i2_max_qscale;
    306 
    307     WORD32 *pi4_qscale_to_qp;
    308 
    309     WORD32 *pi4_qp_to_qscale_q_factor;
    310 
    311     WORD32 *pi4_qp_to_qscale;
    312 
    313     WORD8 i1_qp_offset;
    314 
    315 } rc_quant_t;
    316 
    317 /**
    318 ******************************************************************************
    319  *  @brief     4x4 level structure which contains all the parameters
    320  *             for neighbour prediction puopose
    321 ******************************************************************************
    322  */
    323 typedef struct
    324 {
    325     /** PU motion vectors */
    326     pu_mv_t mv;
    327     /** Intra or Inter flag for each partition - 0 or 1  */
    328     UWORD16 b1_intra_flag : 1;
    329     /** CU skip flag - 0 or 1  */
    330     UWORD16 b1_skip_flag : 1;
    331     /** CU depth in CTB tree (0-3)  */
    332     UWORD16 b2_cu_depth : 2;
    333 
    334     /** Y Qp  for loop filter */
    335     WORD16 b8_qp : 8;
    336 
    337     /** Luma Intra Mode 0 - 34   */
    338     UWORD16 b6_luma_intra_mode : 6;
    339 
    340     /** Y CBF  for BS compute */
    341     UWORD16 b1_y_cbf : 1;
    342     /** Pred L0 flag of current 4x4 */
    343     UWORD16 b1_pred_l0_flag : 1;
    344 
    345     /** Pred L0 flag of current 4x4 */
    346     UWORD16 b1_pred_l1_flag : 1;
    347 } nbr_4x4_t;
    348 
    349 typedef struct
    350 {
    351     /** Bottom Left availability flag */
    352     UWORD8 u1_bot_lt_avail;
    353 
    354     /** Left availability flag */
    355     UWORD8 u1_left_avail;
    356 
    357     /** Top availability flag */
    358     UWORD8 u1_top_avail;
    359 
    360     /** Top Right availability flag */
    361     UWORD8 u1_top_rt_avail;
    362 
    363     /** Top Left availability flag */
    364     UWORD8 u1_top_lt_avail;
    365 
    366 } nbr_avail_flags_t;
    367 
    368 typedef struct
    369 {
    370     /** prev intra flag*/
    371     UWORD8 b1_prev_intra_luma_pred_flag : 1;
    372 
    373     /** mpm_idx */
    374     UWORD8 b2_mpm_idx : 2;
    375 
    376     /** reminder pred mode */
    377     UWORD8 b5_rem_intra_pred_mode : 5;
    378 
    379 } intra_prev_rem_flags_t;
    380 
    381 /**
    382 ******************************************************************************
    383  *  @brief     calc (T+Q+RDOQ) output TU structure; entropy input TU structure
    384 ******************************************************************************
    385  */
    386 typedef struct
    387 {
    388     /** base tu structure */
    389     tu_t s_tu;
    390 
    391     /** offset of luma data in ecd buffer */
    392     WORD32 i4_luma_coeff_offset;
    393 
    394     /** offset of cb data in ecd buffer */
    395     WORD32 ai4_cb_coeff_offset[2];
    396 
    397     /** offset of cr data in ecd buffer */
    398     WORD32 ai4_cr_coeff_offset[2];
    399 
    400 } tu_enc_loop_out_t;
    401 
    402 typedef struct
    403 {
    404     /* L0 Motion Vector */
    405     mv_t s_l0_mv;
    406 
    407     /* L1 Motion Vector */
    408     mv_t s_l1_mv;
    409 
    410     /* L0 Ref index */
    411     WORD8 i1_l0_ref_idx;
    412 
    413     /*  L1 Ref index */
    414     WORD8 i1_l1_ref_idx;
    415 
    416     /* L0 Ref Pic Buf ID */
    417     WORD8 i1_l0_pic_buf_id;
    418 
    419     /* L1 Ref Pic Buf ID */
    420     WORD8 i1_l1_pic_buf_id;
    421 
    422     /** intra flag */
    423     UWORD8 b1_intra_flag : 1;
    424 
    425     /* Pred mode */
    426     UWORD8 b2_pred_mode : 2;
    427 
    428     /* reserved flag can be used for something later */
    429     UWORD8 u1_reserved;
    430 
    431 } pu_col_mv_t;
    432 
    433 /*****************************************************************************/
    434 /* Encoder uses same structure as pu_t for prediction unit                   */
    435 /*****************************************************************************/
    436 
    437 /**
    438 ******************************************************************************
    439  *  @brief     Encode loop (T+Q+RDOQ) output CU structure; entropy input CU structure
    440 ******************************************************************************
    441  */
    442 typedef struct
    443 {
    444     /* CU X position in terms of min CU (8x8) units */
    445     UWORD32 b3_cu_pos_x : 3;
    446 
    447     /* CU Y position in terms of min CU (8x8) units */
    448     UWORD32 b3_cu_pos_y : 3;
    449 
    450     /** CU size in terms of min CU (8x8) units */
    451     UWORD32 b4_cu_size : 4;
    452 
    453     /** transquant bypass flag ; 0 for this encoder */
    454     UWORD32 b1_tq_bypass_flag : 1;
    455 
    456     /** cu skip flag */
    457     UWORD32 b1_skip_flag : 1;
    458 
    459     /** intra / inter CU flag */
    460     UWORD32 b1_pred_mode_flag : 1;
    461 
    462     /** indicates partition information for CU
    463      *  For intra 0 : for 2Nx2N / 1 for NxN iff CU=minCBsize
    464      *  For inter 0 : @sa PART_SIZE_E
    465      */
    466     UWORD32 b3_part_mode : 3;
    467 
    468     /** 0 for this encoder */
    469     UWORD32 b1_pcm_flag : 1;
    470 
    471     /** only applicable for intra cu */
    472     UWORD32 b3_chroma_intra_pred_mode : 3;
    473 
    474     /** no residue flag for cu */
    475     UWORD32 b1_no_residual_syntax_flag : 1;
    476 
    477     /* flag to indicate if current CU is the first
    478     CU of the Quantisation group*/
    479     UWORD32 b1_first_cu_in_qg : 1;
    480 
    481     /** Intra prev and reminder flags
    482      * if part is NxN the tntries 1,2,3 will be valid
    483      * other wise only enry 0 will be set.
    484      */
    485     intra_prev_rem_flags_t as_prev_rem[NUM_PU_PARTS];
    486 
    487     /**
    488      *  Access valid  number of pus in this array based on u1_part_mode
    489      *  Moiton vector differentials and reference idx should be
    490      *  populated in this structure
    491      *  @remarks shall be accessed only for inter pus
    492      */
    493     pu_t *ps_pu;
    494 
    495     /**
    496      *  pointer to first tu of this cu. Each TU need to be populated
    497      *  in TU order by calc. Total TUs in CU is given by u2_num_tus_in_cu
    498      */
    499     tu_enc_loop_out_t *ps_enc_tu;
    500 
    501     /** total TUs in this CU; shall be 0 if b1_no_residual_syntax_flag = 1 */
    502     UWORD16 u2_num_tus_in_cu;
    503 
    504     /** Coeff bufer pointer */
    505     /* Pointer to transform coeff data */
    506     /*************************************************************************/
    507     /* Following format is repeated for every coded TU                       */
    508     /* Luma Block                                                            */
    509     /* num_coeffs      : 16 bits                                             */
    510     /* zero_cols       : 8 bits ( 1 bit per 4 columns)                       */
    511     /* sig_coeff_map   : ((TU Size * TU Size) + 31) >> 5 number of WORD32s   */
    512     /* coeff_data      : Non zero coefficients                               */
    513     /* Cb Block (only for last TU in 4x4 case else for every luma TU)        */
    514     /* num_coeffs      : 16 bits                                             */
    515     /* zero_cols       : 8 bits ( 1 bit per 4 columns)                       */
    516     /* sig_coeff_map   : ((TU Size * TU Size) + 31) >> 5 number of WORD32s   */
    517     /* coeff_data      : Non zero coefficients                               */
    518     /* Cr Block (only for last TU in 4x4 case else for every luma TU)        */
    519     /* num_coeffs      : 16 bits                                             */
    520     /* zero_cols       : 8 bits ( 1 bit per 4 columns)                       */
    521     /* sig_coeff_map   : ((TU Size * TU Size) + 31) >> 5 number of WORD32s   */
    522     /* coeff_data      : Non zero coefficients                               */
    523     /*************************************************************************/
    524     void *pv_coeff;
    525 
    526     /** qp used during for CU
    527       * @remarks :
    528       */
    529     WORD8 i1_cu_qp;
    530 
    531 } cu_enc_loop_out_t;
    532 
    533 /**
    534  * SAO
    535  */
    536 typedef struct
    537 {
    538     /**
    539      * sao_type_idx_luma
    540      */
    541     UWORD32 b3_y_type_idx : 3;
    542 
    543     /**
    544      * luma sao_band_position
    545      */
    546     UWORD32 b5_y_band_pos : 5;
    547 
    548     /**
    549      * sao_type_idx_chroma
    550      */
    551     UWORD32 b3_cb_type_idx : 3;
    552 
    553     /**
    554      * cb sao_band_position
    555      */
    556     UWORD32 b5_cb_band_pos : 5;
    557 
    558     /**
    559      * sao_type_idx_chroma
    560      */
    561     UWORD32 b3_cr_type_idx : 3;
    562 
    563     /**
    564      * cb sao_band_position
    565      */
    566     UWORD32 b5_cr_band_pos : 5;
    567 
    568     /*SAO Offsets
    569      * In all these offsets, 0th element is not used
    570      */
    571     /**
    572      * luma SaoOffsetVal[i]
    573      */
    574     WORD8 u1_y_offset[5];
    575 
    576     /**
    577      * chroma cb SaoOffsetVal[i]
    578      */
    579     WORD8 u1_cb_offset[5];
    580 
    581     /**
    582      * chroma cr SaoOffsetVal[i]
    583      */
    584     WORD8 u1_cr_offset[5];
    585 
    586     /**
    587      * sao_merge_left_flag common for y,cb,cr
    588      */
    589     UWORD32 b1_sao_merge_left_flag : 1;
    590 
    591     /**
    592      * sao_merge_up_flag common for y,cb,cr
    593      */
    594     UWORD32 b1_sao_merge_up_flag : 1;
    595 
    596 } sao_enc_t;
    597 
    598 /**
    599 ******************************************************************************
    600  *  @brief       ctb output structure; output of Encode loop, input to entropy
    601 ******************************************************************************
    602  */
    603 typedef struct
    604 {
    605     /**
    606      * bit0     :  depth0 split flag, (64x64 splits)
    607      * bits 1-3 :  not used
    608      * bits 4-7 :  depth1 split flags; valid iff depth0 split=1 (32x32 splits)
    609      * bits 8-23:  depth2 split flags; (if 0 16x16 is cu else 8x8 min cu)
    610 
    611      * if a split flag of n is set for depth 1, check the following split flags
    612      * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2:
    613      *
    614      */
    615     UWORD32 u4_cu_split_flags;
    616 
    617     /***************************************************************
    618      * For any given CU position CU_posx, CU_posy access
    619      *  au4_packed_tu_split_flags[(CU_posx >> 5)[(CU_posy >> 5)]
    620      * Note : For CTB size smaller than 64x64 only use u4_packed_tu_split_flags[0]
    621      ****************************************************************/
    622 
    623     /**
    624      * access bits corresponding to actual CU size till leaf nodes
    625      * bit0     :  (32x32 TU split flag)
    626      * bits 1-3 :  not used
    627      * bits 4-7 :  (16x16 TUsplit flags)
    628      * bits 8-23:  (8x8  TU split flags)
    629 
    630      * if a split flag of n is set for depth 1, check the following split flags
    631      * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2:
    632      *
    633      * @remarks     As tu sizes are relative to CU sizes the producer has to
    634      * make sure the correctness of u4_packed_tu_split_flags.
    635      *
    636      * @remarks     au4_packed_tu_split_flags_cu[1]/[2]/[3] to be used only
    637      *              for 64x64 ctb.
    638      */
    639     UWORD32 au4_packed_tu_split_flags_cu[4];
    640 
    641     /**
    642      *  pointer to first CU of CTB. Each CU need to be populated
    643      *  in CU order by calc. Total CUs in CTB is given by u1_num_cus_in_ctb
    644      */
    645     cu_enc_loop_out_t *ps_enc_cu;
    646 
    647     /** total TUs in this CU; shall be 0 if b1_no_residual_syntax_flag = 1 */
    648     UWORD8 u1_num_cus_in_ctb;
    649 
    650     /** CTB neighbour availability flags */
    651     nbr_avail_flags_t s_ctb_nbr_avail_flags;
    652 
    653     /* SAO parameters of the CTB */
    654     sao_enc_t s_sao;
    655 
    656 } ctb_enc_loop_out_t;
    657 
    658 /**
    659 ******************************************************************************
    660  *  @brief      cu inter candidate for encoder
    661 ******************************************************************************
    662  */
    663 typedef struct
    664 {
    665     /** base pu structure
    666      *  access valid  number of entries in this array based on u1_part_size
    667      */
    668     pu_t as_inter_pu[NUM_INTER_PU_PARTS];
    669 
    670     /* TU split flag : tu_split_flag[0] represents the transform splits
    671      *  for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds
    672      *  to respective 32x32  */
    673     /* For a 8x8 TU - 1 bit used to indicate split */
    674     /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */
    675     /* For a 32x32 TU - See above */
    676     WORD32 ai4_tu_split_flag[4];
    677 
    678     /* TU split flag : tu_split_flag[0] represents the transform splits
    679      *  for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds
    680      *  to respective 32x32  */
    681     /* For a 8x8 TU - 1 bit used to indicate split */
    682     /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */
    683     /* For a 32x32 TU - See above */
    684     WORD32 ai4_tu_early_cbf[4];
    685 
    686     /**Pointer to the buffer having predicted data after mc in SATD stage
    687      * Since we have 2 buffers for each candidate pred data for best merge candidate
    688      * can be in one of the 2 buffers.
    689      */
    690     UWORD8 *pu1_pred_data;
    691 
    692     UWORD16 *pu2_pred_data;
    693 
    694     UWORD8 *pu1_pred_data_scr;
    695 
    696     UWORD16 *pu2_pred_data_src;
    697 
    698     /* Total cost: SATD cost + MV cost */
    699     WORD32 i4_total_cost;
    700 
    701     /** Stride for predicted data*/
    702     WORD32 i4_pred_data_stride;
    703 
    704     /** @remarks u1_part_size can be non square only for  Inter   */
    705     UWORD8 b3_part_size : 3; /* @sa: PART_SIZE_E */
    706 
    707     /** evaluate transform for cusize iff this flag is 1 */
    708     /** this flag should be set 0 if CU is 64x64         */
    709     UWORD8 b1_eval_tx_cusize : 1;
    710 
    711     /** evaluate transform for cusize/2 iff this flag is 1 */
    712     UWORD8 b1_eval_tx_cusize_by2 : 1;
    713 
    714     /** Skip Flag : ME should always set this 0 for the candidates */
    715     UWORD8 b1_skip_flag : 1;
    716 
    717     UWORD8 b1_intra_has_won : 1;
    718 
    719     /* used to mark if this mode needs to be evaluated in auxiliary mode */
    720     /* if 1, this mode will be evaluated otherwise not.*/
    721     UWORD8 b1_eval_mark : 1;
    722 
    723 } cu_inter_cand_t;
    724 
    725 /**
    726 ******************************************************************************
    727  *  @brief      cu intra candidate for encoder
    728 ******************************************************************************
    729  */
    730 typedef struct
    731 {
    732     UWORD8 au1_intra_luma_mode_nxn_hash[NUM_PU_PARTS][MAX_INTRA_CANDIDATES];
    733 
    734     /**
    735      *  List of NxN PU candidates in CU  for each partition
    736      *  valid only of if current cusize = mincusize
    737      * +1 to signal the last flag invalid value of 255 needs to be stored
    738      */
    739     UWORD8 au1_intra_luma_modes_nxn[NUM_PU_PARTS][(MAX_INTRA_CU_CANDIDATES * (4)) + 2 + 1];
    740 
    741     /* used to mark if this mode needs to be evaluated in auxiliary mode */
    742     /* if 1, this mode will be evaluated otherwise not.*/
    743     UWORD8 au1_nxn_eval_mark[NUM_PU_PARTS][MAX_INTRA_CU_CANDIDATES + 1];
    744 
    745     /**
    746      *  List of 2Nx2N PU candidates in CU
    747      * +1 to signal the last flag invalid value of 255 needs to be stored
    748      */
    749     UWORD8 au1_intra_luma_modes_2nx2n_tu_eq_cu[MAX_INTRA_CU_CANDIDATES + 1];
    750 
    751     /**
    752      *  List of 2Nx2N PU candidates in CU
    753      * +1 to signal the last flag invalid value of 255 needs to be stored
    754      */
    755     UWORD8 au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[MAX_INTRA_CU_CANDIDATES + 1];
    756 
    757     /* used to mark if this mode needs to be evaluated in auxiliary mode */
    758     /* if 1, this mode will be evaluated otherwise not.*/
    759     UWORD8 au1_2nx2n_tu_eq_cu_eval_mark[MAX_INTRA_CU_CANDIDATES + 1];
    760 
    761     /* used to mark if this mode needs to be evaluated in auxiliary mode */
    762     /* if 1, this mode will be evaluated otherwise not.*/
    763     UWORD8 au1_2nx2n_tu_eq_cu_by_2_eval_mark[MAX_INTRA_CU_CANDIDATES + 1];
    764 
    765     UWORD8 au1_num_modes_added[NUM_PU_PARTS];
    766 
    767     /** evaluate transform for cusize iff this flag is 1 */
    768     /** this flag should be set 0 if CU is 64x64         */
    769     UWORD8 b1_eval_tx_cusize : 1;
    770 
    771     /** evaluate transform for cusize/2 iff this flag is 1 */
    772     UWORD8 b1_eval_tx_cusize_by2 : 1;
    773 
    774     /** number of intra candidates for SATD evaluation in */
    775     UWORD8 b6_num_intra_cands : 6;
    776 
    777 } cu_intra_cand_t;
    778 
    779 /**
    780 ******************************************************************************
    781  *  @brief      cu structure for mode analysis/evaluation
    782 ******************************************************************************
    783  */
    784 typedef struct
    785 {
    786     /** CU X position in terms of min CU (8x8) units */
    787     UWORD8 b3_cu_pos_x : 3;
    788 
    789     /** CU Y position in terms of min CU (8x8) units */
    790     UWORD8 b3_cu_pos_y : 3;
    791 
    792     /** reserved bytes */
    793     UWORD8 b2_reserved : 2;
    794 
    795     /** CU size 2N (width or height) in pixels */
    796     UWORD8 u1_cu_size;
    797 
    798     /** Intra CU candidates after FAST CU decision (output of IPE)
    799      *  8421 algo along with transform size evalution will
    800      *  be done for these modes in Encode loop pass.
    801      */
    802     cu_intra_cand_t s_cu_intra_cand;
    803 
    804     /** indicates the angular mode (0 - 34) for chroma,
    805      *  Note : No provision currently to take chroma through RDOPT or SATD
    806      */
    807     UWORD8 u1_chroma_intra_pred_mode;
    808 
    809     /** number of inter candidates in as_cu_inter_cand[]
    810       * shall be 0 for intra frames.
    811       * These inters are evaluated for RDOPT apart from merge/skip candidates
    812       */
    813     UWORD8 u1_num_inter_cands;
    814 
    815     /** List of candidates to be evalauted (SATD/RDOPT) for this CU
    816       * @remarks : all  merge/skip candidates not a part of this list
    817       */
    818     cu_inter_cand_t as_cu_inter_cand[MAX_INTER_CU_CANDIDATES];
    819 
    820     WORD32 ai4_mv_cost[MAX_INTER_CU_CANDIDATES][NUM_INTER_PU_PARTS];
    821 
    822 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
    823     WORD32 ai4_err_metric[MAX_INTER_CU_CANDIDATES][NUM_INTER_PU_PARTS];
    824 #endif
    825 
    826     /* Flag to convey if Inta or Inter is the best candidate among the
    827     candidates populated
    828      0: If inter is the winner and 1: if Intra is winner*/
    829     UWORD8 u1_best_is_intra;
    830 
    831     /** number of intra rdopt candidates
    832       * @remarks : shall be <= u1_num_intra_cands
    833       */
    834     UWORD8 u1_num_intra_rdopt_cands;
    835     /** qp used during for CU
    836       * @remarks :
    837       */
    838     WORD8 i1_cu_qp;
    839     /** Activity factor used in pre enc thread for deriving the Qp
    840       * @remarks : This is in Q format
    841       */
    842     WORD32 i4_act_factor[4][2];
    843 
    844 } cu_analyse_t;
    845 
    846 /**
    847 ******************************************************************************
    848  *  @brief      Structure for CU recursion
    849 ******************************************************************************
    850  */
    851 typedef struct cur_ctb_cu_tree_t
    852 {
    853     /** CU X position in terms of min CU (8x8) units */
    854     UWORD8 b3_cu_pos_x : 3;
    855 
    856     /** CU X position in terms of min CU (8x8) units */
    857     UWORD8 b3_cu_pos_y : 3;
    858 
    859     /** reserved bytes */
    860     UWORD8 b2_reserved : 2;
    861 
    862     UWORD8 u1_cu_size;
    863 
    864     UWORD8 u1_intra_eval_enable;
    865 
    866     UWORD8 u1_inter_eval_enable;
    867 
    868     /* Flag that indicates whether to evaluate this node */
    869     /* during RDOPT evaluation. This does not mean that */
    870     /* evaluation of the children need to be abandoned */
    871     UWORD8 is_node_valid;
    872 
    873     LWORD64 i8_best_rdopt_cost;
    874 
    875     struct cur_ctb_cu_tree_t *ps_child_node_tl;
    876 
    877     struct cur_ctb_cu_tree_t *ps_child_node_tr;
    878 
    879     struct cur_ctb_cu_tree_t *ps_child_node_bl;
    880 
    881     struct cur_ctb_cu_tree_t *ps_child_node_br;
    882 
    883 } cur_ctb_cu_tree_t;
    884 
    885 typedef struct
    886 {
    887     WORD32 num_best_results;
    888 
    889     part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS];
    890 
    891 } block_data_32x32_t;
    892 
    893 /**
    894 ******************************************************************************
    895  *  @brief      Structure for storing data about all the 64x64
    896  *              block in a 64x64 CTB
    897 ******************************************************************************
    898  */
    899 typedef block_data_32x32_t block_data_64x64_t;
    900 
    901 /**
    902 ******************************************************************************
    903  *  @brief      Structure for storing data about all 16 16x16
    904  *              blocks in a 64x64 CTB and each of their partitions
    905 ******************************************************************************
    906  */
    907 typedef struct
    908 {
    909     WORD32 num_best_results;
    910 
    911     /**
    912      * mask of active partitions, Totally 17 bits. For a given partition
    913      * id, as per PART_ID_T enum the corresponding bit position is 1/0
    914      * indicating that partition is active or inactive
    915      */
    916     /*WORD32 i4_part_mask;*/
    917 
    918     part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS];
    919 
    920 } block_data_16x16_t;
    921 
    922 typedef struct
    923 {
    924     WORD32 num_best_results;
    925 
    926     part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS];
    927 } block_data_8x8_t;
    928 
    929 /**
    930 ******************************************************************************
    931  *  @brief      Structure for data export from ME to Enc_Loop
    932 ******************************************************************************
    933  */
    934 typedef struct
    935 {
    936     block_data_8x8_t as_8x8_block_data[64];
    937 
    938     block_data_16x16_t as_block_data[16];
    939 
    940     block_data_32x32_t as_32x32_block_data[4];
    941 
    942     block_data_64x64_t s_64x64_block_data;
    943 
    944 } me_ctb_data_t;
    945 
    946 /**
    947 ******************************************************************************
    948  *  @brief   noise detection related structure
    949  *
    950 ******************************************************************************
    951  */
    952 
    953 typedef struct
    954 {
    955     WORD32 i4_noise_present;
    956 
    957     UWORD8 au1_is_8x8Blk_noisy[MAX_CU_IN_CTB];
    958 
    959     UWORD32 au4_variance_src_16x16[MAX_CU_IN_CTB];
    960 } ihevce_ctb_noise_params;
    961 
    962 /**
    963 ******************************************************************************
    964  *  @brief      ctb structure for mode analysis/evaluation
    965 ******************************************************************************
    966  */
    967 typedef struct
    968 {
    969     /**
    970      * CU decision in a ctb is frozen by ME/IPE and populated in
    971      * u4_packed_cu_split_flags.
    972      * @remarks
    973      * TODO:review comment
    974      * bit0     :  64x64 split flag,  (depth0 flag for 64x64 ctb unused for smaller ctb)
    975      * bits 1-3 :  not used
    976      * bits 4-7 :  32x32 split flags; (depth1 flags for 64x64ctb / only bit4 used for 32x32ctb)
    977      * bits 8-23:  16x16 split flags; (depth2 flags for 64x64 / depth1[bits8-11] for 32x32 [bit8 for ctb 16x16] )
    978 
    979      * if a split flag of n is set for depth 1, check the following split flags
    980      * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2:
    981      *
    982      */
    983     UWORD32 u4_cu_split_flags;
    984 
    985     UWORD8 u1_num_cus_in_ctb;
    986 
    987     cur_ctb_cu_tree_t *ps_cu_tree;
    988 
    989     me_ctb_data_t *ps_me_ctb_data;
    990 
    991     ihevce_ctb_noise_params s_ctb_noise_params;
    992 
    993 } ctb_analyse_t;
    994 /**
    995 ******************************************************************************
    996  *  @brief Structures for tapping ssd and bit-estimate information for all CUs
    997 ******************************************************************************
    998  */
    999 
   1000 typedef struct
   1001 {
   1002     LWORD64 i8_cost;
   1003     WORD32 i4_idx;
   1004 } cost_idx_t;
   1005 
   1006 /**
   1007 ******************************************************************************
   1008  *  @brief      reference/non reference pic context for encoder
   1009 ******************************************************************************
   1010  */
   1011 typedef struct
   1012 
   1013 {
   1014     /**
   1015      * YUV buffer discriptor for the recon
   1016      * Allocation per frame for Y = ((ALIGN(frame width, MAX_CTB_SIZE)) +  2 * PAD_HORZ)*
   1017      *                              ((ALIGN(frame height, MAX_CTB_SIZE)) + 2 * PAD_VERT)
   1018      */
   1019     iv_enc_yuv_buf_t s_yuv_buf_desc;
   1020 
   1021     iv_enc_yuv_buf_src_t s_yuv_buf_desc_src;
   1022 
   1023     /* Pointer to Luma (Y) sub plane buffers Horz/ Vert / HV grid            */
   1024     /* When (L0ME_IN_OPENLOOP_MODE == 1), additional buffer required to store */
   1025     /* the fullpel plane for use as reference */
   1026     UWORD8 *apu1_y_sub_pel_planes[3 + L0ME_IN_OPENLOOP_MODE];
   1027 
   1028     /**
   1029      * frm level pointer to pu bank for colocated  mv access
   1030      * Allocation per frame = (ALIGN(frame width, MAX_CTB_SIZE) / MIN_PU_SIZE) *
   1031      *                         (ALIGN(frame height, MAX_CTB_SIZE) / MIN_PU_SIZE)
   1032      */
   1033     pu_col_mv_t *ps_frm_col_mv;
   1034     /**
   1035      ************************************************************************
   1036      * Pointer to a PU map stored at frame level,
   1037      * It contains a 7 bit pu index in encoder order w.r.t to a ctb at a min
   1038      * granularirty of MIN_PU_SIZE size.
   1039      ************************************************************************
   1040      */
   1041     UWORD8 *pu1_frm_pu_map;
   1042 
   1043     /** CTB level frame buffer to store the accumulated sum of
   1044      * number of PUs for every row */
   1045     UWORD16 *pu2_num_pu_map;
   1046 
   1047     /** Offsets in the PU buffer at every CTB level */
   1048     UWORD32 *pu4_pu_off;
   1049 
   1050     /**  Collocated POC for reference list 0
   1051      * ToDo: Change the array size when multiple slices are to be supported */
   1052     WORD32 ai4_col_l0_poc[HEVCE_MAX_REF_PICS];
   1053 
   1054     /** Collocated POC for reference list 1 */
   1055     WORD32 ai4_col_l1_poc[HEVCE_MAX_REF_PICS];
   1056 
   1057     /** 0 = top field,  1 = bottom field  */
   1058     WORD32 i4_bottom_field;
   1059 
   1060     /** top field first input in case of interlaced case */
   1061     WORD32 i4_topfield_first;
   1062 
   1063     /** top field first input in case of interlaced case */
   1064     WORD32 i4_poc;
   1065 
   1066     /** unique buffer id */
   1067     WORD32 i4_buf_id;
   1068 
   1069     /** is this reference frame or not */
   1070     WORD32 i4_is_reference;
   1071 
   1072     /** Picture type of current picture */
   1073     WORD32 i4_pic_type;
   1074 
   1075     /** Flag to indicate whether current pictute is free or in use */
   1076     WORD32 i4_is_free;
   1077 
   1078     /** Bit0 -  of this Flag to indicate whether current pictute needs to be deblocked,
   1079         padded and hpel planes need to be generated.
   1080         These are turned off typically in non referecne pictures when psnr
   1081         and recon dump is disabled.
   1082 
   1083         Bit1 - of this flag set to 1 if sao is enabled. This is to enable deblocking when sao is enabled
   1084      */
   1085     WORD32 i4_deblk_pad_hpel_cur_pic;
   1086 
   1087     /**
   1088      * weight and offset for this ref pic. To be initialized for every pic
   1089      * based on the lap output
   1090      */
   1091     ihevce_wght_offst_t s_weight_offset;
   1092 
   1093     /**
   1094      * Reciprocal of the lumaweight in q15 format
   1095      */
   1096     WORD32 i4_inv_luma_wt;
   1097 
   1098     /**
   1099      * Log to base 2 of the common denominator used for luma weights across all ref pics
   1100      */
   1101     WORD32 i4_log2_wt_denom;
   1102 
   1103     /**
   1104      * Used as Reference for encoding current picture flag
   1105      */
   1106     WORD32 i4_used_by_cur_pic_flag;
   1107 
   1108 #if ADAPT_COLOCATED_FROM_L0_FLAG
   1109     WORD32 i4_frame_qp;
   1110 #endif
   1111     /*
   1112     * IDR GOP number
   1113     */
   1114 
   1115     WORD32 i4_idr_gop_num;
   1116 
   1117     /*
   1118     * non-ref-free_flag
   1119     */
   1120     WORD32 i4_non_ref_free_flag;
   1121     /**
   1122       * Dependency manager instance for ME - Prev recon dep
   1123       */
   1124     void *pv_dep_mngr_recon;
   1125 
   1126     /*display num*/
   1127     WORD32 i4_display_num;
   1128 } recon_pic_buf_t;
   1129 
   1130 /**
   1131 ******************************************************************************
   1132  *  @brief  Lambda values used for various cost computations
   1133 ******************************************************************************
   1134  */
   1135 typedef struct
   1136 {
   1137     /************************************************************************/
   1138     /* The fields with the string 'type2' in their names are required */
   1139     /* when both 8bit and hbd lambdas are needed. The lambdas corresponding */
   1140     /* to the bit_depth != internal_bit_depth are stored in these fields */
   1141     /************************************************************************/
   1142 
   1143     /**
   1144      * Closed loop SSD Lambda
   1145      * This is multiplied with bits for RD cost computations in SSD mode
   1146      * This is represented in q format with shift of LAMBDA_Q_SHIFT
   1147      */
   1148     LWORD64 i8_cl_ssd_lambda_qf;
   1149 
   1150     LWORD64 i8_cl_ssd_type2_lambda_qf;
   1151 
   1152     /**
   1153      * Closed loop SSD Lambda for chroma residue (chroma qp is different from luma qp)
   1154      * This is multiplied with bits for RD cost computations in SSD mode
   1155      * This is represented in q format with shift of LAMBDA_Q_SHIFT
   1156      */
   1157     LWORD64 i8_cl_ssd_lambda_chroma_qf;
   1158 
   1159     LWORD64 i8_cl_ssd_type2_lambda_chroma_qf;
   1160 
   1161     /**
   1162      * Closed loop SAD Lambda
   1163      * This is multiplied with bits for RD cost computations in SAD mode
   1164      * This is represented in q format with shift of LAMBDA_Q_SHIFT
   1165      */
   1166     WORD32 i4_cl_sad_lambda_qf;
   1167 
   1168     WORD32 i4_cl_sad_type2_lambda_qf;
   1169 
   1170     /**
   1171      * Open loop SAD Lambda
   1172      * This is multiplied with bits for RD cost computations in SAD mode
   1173      * This is represented in q format with shift of LAMBDA_Q_SHIFT
   1174      */
   1175     WORD32 i4_ol_sad_lambda_qf;
   1176 
   1177     WORD32 i4_ol_sad_type2_lambda_qf;
   1178 
   1179     /**
   1180      * Closed loop SATD Lambda
   1181      * This is multiplied with bits for RD cost computations in SATD mode
   1182      * This is represented in q format with shift of LAMBDA_Q_SHIFT
   1183      */
   1184     WORD32 i4_cl_satd_lambda_qf;
   1185 
   1186     WORD32 i4_cl_satd_type2_lambda_qf;
   1187 
   1188     /**
   1189      * Open loop SATD Lambda
   1190      * This is multiplied with bits for RD cost computations in SATD mode
   1191      * This is represented in q format with shift of LAMBDA_Q_SHIFT
   1192      */
   1193     WORD32 i4_ol_satd_lambda_qf;
   1194 
   1195     WORD32 i4_ol_satd_type2_lambda_qf;
   1196 
   1197     double lambda_modifier;
   1198 
   1199     double lambda_uv_modifier;
   1200 
   1201     UWORD32 u4_chroma_cost_weighing_factor;
   1202 
   1203 } frm_lambda_ctxt_t;
   1204 /**
   1205 ******************************************************************************
   1206 *  @brief  Mode attributes for 4x4 block populated by early decision
   1207 ******************************************************************************
   1208  */
   1209 typedef struct
   1210 {
   1211     /* If best mode is present or not */
   1212     UWORD8 mode_present;
   1213 
   1214     /** Best mode for the current 4x4 prediction block */
   1215     UWORD8 best_mode;
   1216 
   1217     /** sad for the best mode for the current 4x4 prediction block */
   1218     UWORD16 sad;
   1219 
   1220     /** cost for the best mode for the current 4x4 prediction block */
   1221     UWORD16 sad_cost;
   1222 
   1223 } ihevce_ed_mode_attr_t;  //early decision
   1224 /**
   1225 ******************************************************************************
   1226  *  @brief  Structure at 8x8 block level which has parameters such as cur satd
   1227  * for QP mod @ L0 level
   1228 ******************************************************************************
   1229  */
   1230 typedef struct
   1231 {
   1232     /*Store SATD of current data at 8*8 level for current layer (L0)*/
   1233     WORD32 i4_8x8_cur_satd;
   1234 } ihevce_8x8_L0_satd_t;
   1235 /**
   1236 ******************************************************************************
   1237  *  @brief  Structure at 8x8 block level mean for MEAN based QP mod
   1238 ******************************************************************************
   1239  */
   1240 typedef struct
   1241 {
   1242     /*Store SATD of current data at 8*8 level for current layer (L0)*/
   1243     WORD16 i2_8x8_cur_mean;
   1244 } ihevce_8x8_L0_mean_t;
   1245 
   1246 //#define DEBUG_ED_CTB_POS
   1247 /**
   1248 ******************************************************************************
   1249  *  @brief  Structure at 4x4 block level which has parameters about early
   1250  *          intra or inter decision
   1251 ******************************************************************************
   1252  */
   1253 typedef struct
   1254 {
   1255     /**
   1256      * Final parameter of Intra-Inter early decision for the current 4x4.
   1257      * 0 - invalid decision
   1258      * 1 - eval intra only
   1259      * 2 - eval inter only
   1260      * 3 - eval both intra and inter
   1261      */
   1262     UWORD8 intra_or_inter : 2;
   1263 
   1264     UWORD8 merge_success : 1;
   1265 
   1266     /** Best mode for the current 4x4 prediction block */
   1267     UWORD8 best_mode;
   1268 
   1269     /* sad cost for the best prediction mode */
   1270     //UWORD16 best_sad_cost;
   1271 
   1272     /** Best mode for the current 4x4 prediction block */
   1273     UWORD8 best_merge_mode;
   1274 
   1275     /*Store SATD at 4*4 level for current layer (L1)*/
   1276     WORD32 i4_4x4_satd;
   1277 
   1278     /*Store SATD of current data at 4*4 level for current layer (L1)*/
   1279     WORD32 i4_4x4_cur_satd;
   1280 
   1281 } ihevce_ed_blk_t;  //early decision
   1282 
   1283 /* l1 ipe ctb analyze structure */
   1284 /* Contains cu level qp mod related information for all possible cu
   1285 sizes (16,32,64 in L0) in a CTB*/
   1286 typedef struct
   1287 {
   1288     WORD32 i4_sum_4x4_satd[16];
   1289     WORD32 i4_min_4x4_satd[16];
   1290 
   1291     /*satd for L1_8x8 blocks in L1_32x32
   1292     16 - num L1_8x8 in L1_32x32
   1293     2 =>
   1294         0 - sum of L1_4x4 @ L1_8x8
   1295           - equivalent to transform size of 16x16 @ L0
   1296         1 - min/median of L1_4x4 @ L1_8x8
   1297           - equivalent to transform size of 8x8 @ L0
   1298     */
   1299     WORD32 i4_8x8_satd[16][2];
   1300 
   1301     /*satd for L1_16x16 blocks in L1_32x32
   1302     4 - num L1_16x16 in L1_32x32
   1303     3 =>
   1304         0 - sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16
   1305           - equivalent to transform size of 32x32 @ L0
   1306         1 - min/median of (sum of L1_4x4 @ L1_8x8) @ L1_16x16
   1307           - equivalent to transform size of 16x16 @ L0
   1308         2 - min/median of (min/median of L1_4x4 @ L1_8x8) @ L1_16x16
   1309           - equivalent to transform size of 8x8 @ L0
   1310     */
   1311     WORD32 i4_16x16_satd[4][3];
   1312 
   1313     /*satd for 32x32 block in L1*/
   1314     /*Please note that i4_32x32_satd[0][3] contains sum of all 32x32 */
   1315     /*satd for L1_32x32 blocks in L1_32x32
   1316     1 - num L1_32x32 in L1_32x32
   1317     4 =>
   1318         0 - min/median of (sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16) @ L1_32x32
   1319           - equivalent to transform size of 32x32 @ L0
   1320         1 - min/median of (sum of L1_4x4 @ L1_8x8) @ L1_32x32
   1321           - equivalent to transform size of 16x16 @ L0
   1322         2 - min/median of (min/median of L1_4x4 @ L1_8x8) @ L1_32x32
   1323           - equivalent to transform size of 8x8 @ L0
   1324         3 - sum of (sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16) @ L1_32x32
   1325     */
   1326     WORD32 i4_32x32_satd[1][4];
   1327 
   1328     /*Store SATD at 8x8 level for current layer (L1)*/
   1329     WORD32 i4_best_satd_8x8[16];
   1330 
   1331     /* EIID: This will be used for early inter intra decisions */
   1332     /*SAD at 8x8 level for current layer (l1) */
   1333     /*Cost based on sad at 8x8 level for current layer (l1) */
   1334     WORD32 i4_best_sad_cost_8x8_l1_ipe[16];
   1335 
   1336     WORD32 i4_best_sad_8x8_l1_ipe[16];
   1337     /* SAD at 8x8 level for ME. All other cost are IPE cost */
   1338     WORD32 i4_best_sad_cost_8x8_l1_me[16];
   1339 
   1340     /* SAD at 8x8 level for ME. for given reference */
   1341     WORD32 i4_sad_cost_me_for_ref[16];
   1342 
   1343     /* SAD at 8x8 level for ME. for given reference */
   1344     WORD32 i4_sad_me_for_ref[16];
   1345 
   1346     /* SAD at 8x8 level for ME. All other cost are IPE cost */
   1347     WORD32 i4_best_sad_8x8_l1_me[16];
   1348 
   1349     WORD32 i4_best_sad_8x8_l1_me_for_decide[16];
   1350 
   1351     /*Mean @ L0 16x16*/
   1352     WORD32 ai4_16x16_mean[16];
   1353 
   1354     /*Mean @ L0 32x32*/
   1355     WORD32 ai4_32x32_mean[4];
   1356 
   1357     /*Mean @ L0 64x64*/
   1358     WORD32 i4_64x64_mean;
   1359 
   1360 } ihevce_ed_ctb_l1_t;  //early decision
   1361 
   1362 /**
   1363 ******************************************************************************
   1364  *  @brief   8x8 Intra analyze structure
   1365 ******************************************************************************
   1366  */
   1367 typedef struct
   1368 {
   1369     /** Best intra modes for 8x8 transform.
   1370      *  Insert 255 in the end to limit number of modes
   1371      */
   1372     UWORD8 au1_best_modes_8x8_tu[MAX_INTRA_CU_CANDIDATES + 1];
   1373 
   1374     /** Best 8x8 intra modes for 4x4 transform
   1375      *  Insert 255 in the end to limit number of modes
   1376      */
   1377     UWORD8 au1_best_modes_4x4_tu[MAX_INTRA_CU_CANDIDATES + 1];
   1378 
   1379     /** Best 4x4 intra modes
   1380      *  Insert 255 in the end to limit number of modes
   1381      */
   1382     UWORD8 au1_4x4_best_modes[4][MAX_INTRA_CU_CANDIDATES + 1];
   1383 
   1384     /** best 8x8 intra sad/SATD cost */
   1385     WORD32 i4_best_intra_cost;
   1386 
   1387     /** flag to indicate if nxn pu mode (different pu at 4x4 level) is enabled */
   1388     UWORD8 b1_enable_nxn : 1;
   1389 
   1390     /** valid cu flag : required for incomplete ctbs at frame boundaries */
   1391     UWORD8 b1_valid_cu : 1;
   1392 
   1393     /** dummy bits */
   1394     UWORD8 b6_reserved : 6;
   1395 
   1396 } intra8_analyse_t;
   1397 
   1398 /**
   1399 ******************************************************************************
   1400  *  @brief   16x16 Intra analyze structure
   1401 ******************************************************************************
   1402  */
   1403 typedef struct
   1404 {
   1405     /** Best intra modes for 16x16 transform.
   1406      *  Insert 255 in the end to limit number of modes
   1407      */
   1408     UWORD8 au1_best_modes_16x16_tu[MAX_INTRA_CU_CANDIDATES + 1];
   1409 
   1410     /** Best 16x16 intra modes for 8x8 transform
   1411      *  Insert 255 in the end to limit number of modes
   1412      */
   1413     UWORD8 au1_best_modes_8x8_tu[MAX_INTRA_CU_CANDIDATES + 1];
   1414 
   1415     /** 8x8 children intra analyze for this 16x16 */
   1416     intra8_analyse_t as_intra8_analyse[4];
   1417 
   1418     /* best 16x16 intra sad/SATD cost */
   1419     WORD32 i4_best_intra_cost;
   1420 
   1421     /* indicates if 16x16 is best cu or 8x8 cu */
   1422     UWORD8 b1_split_flag : 1;
   1423 
   1424     /* indicates if 8x8 vs 16x16 rdo evaluation needed */
   1425     /* or only 8x8's rdo evaluation needed */
   1426     UWORD8 b1_merge_flag : 1;
   1427 
   1428     /**
   1429      * valid cu flag : required for incomplete ctbs at frame boundaries
   1430      * or if CTB size is lower than 32
   1431      */
   1432     UWORD8 b1_valid_cu : 1;
   1433 
   1434     /** dummy bits */
   1435     UWORD8 b6_reserved : 5;
   1436 
   1437 } intra16_analyse_t;
   1438 
   1439 /**
   1440 ******************************************************************************
   1441  *  @brief   32x32 Intra analyze structure
   1442 ******************************************************************************
   1443  */
   1444 typedef struct
   1445 {
   1446     /** Best intra modes for 32x32 transform.
   1447      *  Insert 255 in the end to limit number of modes
   1448      */
   1449     UWORD8 au1_best_modes_32x32_tu[MAX_INTRA_CU_CANDIDATES + 1];
   1450 
   1451     /** Best 32x32 intra modes for 16x16 transform
   1452      *  Insert 255 in the end to limit number of modes
   1453      */
   1454     UWORD8 au1_best_modes_16x16_tu[MAX_INTRA_CU_CANDIDATES + 1];
   1455 
   1456     /** 16x16 children intra analyze for this 32x32 */
   1457     intra16_analyse_t as_intra16_analyse[4];
   1458 
   1459     /* best 32x32 intra sad/SATD cost               */
   1460     WORD32 i4_best_intra_cost;
   1461 
   1462     /* indicates if 32x32 is best cu or 16x16 cu    */
   1463     UWORD8 b1_split_flag : 1;
   1464 
   1465     /* indicates if 32x32 vs 16x16 rdo evaluation needed */
   1466     /* or 16x16 vs 8x8 evaluation is needed */
   1467     UWORD8 b1_merge_flag : 1;
   1468 
   1469     /**
   1470      * valid cu flag : required for incomplete ctbs at frame boundaries
   1471      * or if CTB size is lower than 64
   1472      */
   1473     UWORD8 b1_valid_cu : 1;
   1474 
   1475     /** dummy bits */
   1476     UWORD8 b6_reserved : 5;
   1477 
   1478 } intra32_analyse_t;
   1479 
   1480 /**
   1481 ******************************************************************************
   1482  *  @brief  IPE L0 analyze structure for L0 ME to do intra/inter CU decisions
   1483  *          This is a CTB level structure encapsulating IPE modes, cost at all
   1484  *          level. IPE also recommemds max intra CU sizes which is required
   1485  *          by ME for CU size determination in intra dominant CTB
   1486 ******************************************************************************
   1487  */
   1488 typedef struct
   1489 {
   1490     /** Best 64x64 intra modes for 32x32 transform.
   1491      *  Insert 255 in the end to limit number of modes
   1492      */
   1493     UWORD8 au1_best_modes_32x32_tu[MAX_INTRA_CU_CANDIDATES + 1];
   1494 
   1495     /** 32x32 children intra analyze for this 32x32    */
   1496     intra32_analyse_t as_intra32_analyse[4];
   1497 
   1498     /* indicates if 64x64 is best CUs or 32x32 CUs      */
   1499     UWORD8 u1_split_flag;
   1500 
   1501     /* CTB level best 8x8 intra costs  */
   1502     WORD32 ai4_best8x8_intra_cost[MAX_CU_IN_CTB];
   1503 
   1504     /* CTB level best 16x16 intra costs */
   1505     WORD32 ai4_best16x16_intra_cost[MAX_CU_IN_CTB >> 2];
   1506 
   1507     /* CTB level best 32x32 intra costs */
   1508     WORD32 ai4_best32x32_intra_cost[MAX_CU_IN_CTB >> 4];
   1509 
   1510     /* best 64x64 intra cost */
   1511     WORD32 i4_best64x64_intra_cost;
   1512 
   1513     /**
   1514      * CTB level early intra / inter decision at 8x8 block level
   1515      * 0 - invalid decision
   1516      * 1 - eval intra only
   1517      * 2 - eval inter only
   1518      * 3 - eval both intra and inter
   1519      */
   1520     /* Z scan format */
   1521     WORD8 ai1_early_intra_inter_decision[MAX_CU_IN_CTB];
   1522 
   1523     /*
   1524     @L0 level
   1525     4 => 0 - 32x32 TU in 64x64 CU
   1526          1 - 16x16 TU in 64x64 CU
   1527          2 - 8x8  TU in 64x64 CU
   1528          3 - 64x64 CU
   1529     2 => Intra/Inter */
   1530     WORD32 i4_64x64_act_factor[4][2];
   1531 
   1532     /*
   1533     @L0 level
   1534     4 => num 32x32 in CTB
   1535     3 => 0 - 32x32 TU in 64x64 CU
   1536          1 - 16x16 TU in 64x64 CU
   1537          2 - 8x8  TU in 64x64 CU
   1538     2 => Intra/Inter */
   1539     WORD32 i4_32x32_act_factor[4][3][2];
   1540 
   1541     /*
   1542     @L0 level
   1543     16 => num 16x16 in CTB
   1544     2 => 0 - 16x16 TU in 64x64 CU
   1545          1 - 8x8  TU in 64x64 CU
   1546     2 => Intra/Inter */
   1547     WORD32 i4_16x16_act_factor[16][2][2];
   1548 
   1549     WORD32 nodes_created_in_cu_tree;
   1550 
   1551     cur_ctb_cu_tree_t *ps_cu_tree_root;
   1552 
   1553     WORD32 ai4_8x8_act_factor[16];
   1554     WORD32 ai4_best_sad_8x8_l1_me[MAX_CU_IN_CTB];
   1555     WORD32 ai4_best_sad_8x8_l1_ipe[MAX_CU_IN_CTB];
   1556     WORD32 ai4_best_sad_cost_8x8_l1_me[MAX_CU_IN_CTB];
   1557     WORD32 ai4_best_sad_cost_8x8_l1_ipe[MAX_CU_IN_CTB];
   1558 
   1559     /*Ctb level accumalated satd*/
   1560     WORD32 i4_ctb_acc_satd;
   1561 
   1562     /*Ctb level accumalated mpm bits*/
   1563     WORD32 i4_ctb_acc_mpm_bits;
   1564 
   1565 } ipe_l0_ctb_analyse_for_me_t;
   1566 
   1567 typedef struct
   1568 {
   1569     WORD16 i2_mv_x;
   1570     WORD16 i2_mv_y;
   1571 } global_mv_t;
   1572 
   1573 /**
   1574 ******************************************************************************
   1575  *  @brief  Pre Encode pass and ME pass shared variables and buffers
   1576 ******************************************************************************
   1577  */
   1578 typedef struct
   1579 {
   1580     /**
   1581      * Buffer id
   1582      */
   1583     WORD32 i4_buf_id;
   1584 
   1585     /**
   1586     * Flag will be set to 1 by frame processing thread after receiving flush
   1587     * command from application
   1588     */
   1589     WORD32 i4_end_flag;
   1590 
   1591     /** frame leve ctb analyse  buffer pointer */
   1592     ctb_analyse_t *ps_ctb_analyse;
   1593 
   1594     /** frame level cu analyse  buffer pointer for IPE */
   1595     //cu_analyse_t       *ps_cu_analyse;
   1596 
   1597     /** current input pointer */
   1598     ihevce_lap_enc_buf_t *ps_curr_inp;
   1599 
   1600     /** current inp buffer id */
   1601     WORD32 curr_inp_buf_id;
   1602 
   1603     /** Slice header parameters   */
   1604     slice_header_t s_slice_hdr;
   1605 
   1606     /** sps parameters activated by current slice  */
   1607     sps_t *ps_sps;
   1608 
   1609     /** pps parameters activated by current slice  */
   1610     pps_t *ps_pps;
   1611 
   1612     /** vps parameters activated by current slice  */
   1613     vps_t *ps_vps;
   1614     /**  Pointer to Penultilate Layer context memory internally has MV bank buff and related params */
   1615     void *pv_me_lyr_ctxt;
   1616 
   1617     /**  Pointer to Penultilate Layer  NV bank context memory */
   1618     void *pv_me_lyr_bnk_ctxt;
   1619 
   1620     /**  Pointer to Penultilate Layer MV bank buff */
   1621     void *pv_me_mv_bank;
   1622 
   1623     /**  Pointer to Penultilate Layer reference idx buffer */
   1624     void *pv_me_ref_idx;
   1625     /**
   1626      * Array to store 8x8 cost (partial 8x8 sad + level adjusted cost)
   1627      * The order of storing is raster scan order within CTB and
   1628      * CTB order is raster scan within frame.
   1629      */
   1630     double *plf_intra_8x8_cost;
   1631 
   1632     /**
   1633      * L0 layer ctb anaylse frame level buffer.
   1634      * IPE wil populate the cost and best modes at all levels in this buffer
   1635      *  for every CTB in a frame
   1636      */
   1637     // moved to shorter buffer queue
   1638     //ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb;
   1639 
   1640     /** Layer L1 buffer pointer */
   1641     ihevce_ed_blk_t *ps_layer1_buf;
   1642 
   1643     /** Layer L2 buffer pointer */
   1644     ihevce_ed_blk_t *ps_layer2_buf;
   1645 
   1646     /*ME reverse map info*/
   1647     UWORD8 *pu1_me_reverse_map_info;
   1648 
   1649     /** Buffer pointer for CTB level information in pre intra pass*/
   1650     ihevce_ed_ctb_l1_t *ps_ed_ctb_l1;
   1651 
   1652     /* L0 cur 8x8 satd for QP mod*/
   1653     ihevce_8x8_L0_satd_t *ps_layer0_cur_satd;
   1654 
   1655     /* L0 cur 8x8 mean for QP mod*/
   1656     ihevce_8x8_L0_mean_t *ps_layer0_cur_mean;
   1657 
   1658     /** vps parameters activated by current slice  */
   1659     sei_params_t s_sei;
   1660 
   1661     /** nal_type for the slice to be encoded  */
   1662     WORD32 i4_slice_nal_type;
   1663 
   1664     /** input time stamp in terms of ticks: lower 32  */
   1665     WORD32 i4_inp_timestamp_low;
   1666 
   1667     /** input time stamp in terms of ticks: higher 32 */
   1668     WORD32 i4_inp_timestamp_high;
   1669 
   1670     /** input frame ctxt of app to be retured in output buffer */
   1671     void *pv_app_frm_ctxt;
   1672 
   1673     /** current frm valid flag :
   1674      * will be 1 if valid input was processed by frame proc thrd
   1675      */
   1676     WORD32 i4_frm_proc_valid_flag;
   1677 
   1678     /**
   1679      * Qp to be used for current frame
   1680      */
   1681     WORD32 i4_curr_frm_qp;
   1682 
   1683     /**
   1684      * Frame level Lambda parameters
   1685      */
   1686     frm_lambda_ctxt_t as_lambda_prms[IHEVCE_MAX_NUM_BITRATES];
   1687 
   1688     /** Frame-levelSATDcost accumalator */
   1689     LWORD64 i8_frame_acc_satd_cost;
   1690 
   1691     /** Frame - L1 coarse me cost accumulated */
   1692     LWORD64 i8_acc_frame_coarse_me_cost;
   1693     /** Frame - L1 coarse me cost accumulated */
   1694     //LWORD64 i8_acc_frame_coarse_me_cost_for_ref;
   1695 
   1696     /** Frame - L1 coarse me sad accumulated */
   1697     LWORD64 i8_acc_frame_coarse_me_sad;
   1698 
   1699     /* Averge activity of 4x4 blocks from previous frame
   1700     *  If L1, maps to 8*8 in L0
   1701     */
   1702     WORD32 i4_curr_frame_4x4_avg_act;
   1703 
   1704     WORD32 ai4_mod_factor_derived_by_variance[2];
   1705 
   1706     float f_strength;
   1707 
   1708     /* Averge activity of 8x8 blocks from previous frame
   1709     *  If L1, maps to 16*16 in L0
   1710     */
   1711 
   1712     long double ld_curr_frame_8x8_log_avg[2];
   1713 
   1714     LWORD64 i8_curr_frame_8x8_avg_act[2];
   1715 
   1716     LWORD64 i8_curr_frame_8x8_sum_act[2];
   1717 
   1718     WORD32 i4_curr_frame_8x8_sum_act_for_strength[2];
   1719 
   1720     ULWORD64 u8_curr_frame_8x8_sum_act_sqr;
   1721 
   1722     WORD32 i4_curr_frame_8x8_num_blks[2];
   1723 
   1724     LWORD64 i8_acc_frame_8x8_sum_act[2];
   1725     LWORD64 i8_acc_frame_8x8_sum_act_sqr;
   1726     WORD32 i4_acc_frame_8x8_num_blks[2];
   1727     LWORD64 i8_acc_frame_8x8_sum_act_for_strength;
   1728     LWORD64 i8_curr_frame_8x8_sum_act_for_strength;
   1729 
   1730     /* Averge activity of 16x16 blocks from previous frame
   1731     *  If L1, maps to 32*32 in L0
   1732     */
   1733 
   1734     long double ld_curr_frame_16x16_log_avg[3];
   1735 
   1736     LWORD64 i8_curr_frame_16x16_avg_act[3];
   1737 
   1738     LWORD64 i8_curr_frame_16x16_sum_act[3];
   1739 
   1740     WORD32 i4_curr_frame_16x16_num_blks[3];
   1741 
   1742     LWORD64 i8_acc_frame_16x16_sum_act[3];
   1743     WORD32 i4_acc_frame_16x16_num_blks[3];
   1744 
   1745     /* Averge activity of 32x32 blocks from previous frame
   1746     *  If L1, maps to 64*64 in L0
   1747     */
   1748 
   1749     long double ld_curr_frame_32x32_log_avg[3];
   1750 
   1751     LWORD64 i8_curr_frame_32x32_avg_act[3];
   1752 
   1753     global_mv_t s_global_mv[MAX_NUM_REF];
   1754     LWORD64 i8_curr_frame_32x32_sum_act[3];
   1755 
   1756     WORD32 i4_curr_frame_32x32_num_blks[3];
   1757 
   1758     LWORD64 i8_acc_frame_32x32_sum_act[3];
   1759     WORD32 i4_acc_frame_32x32_num_blks[3];
   1760 
   1761     LWORD64 i8_acc_num_blks_high_sad;
   1762 
   1763     LWORD64 i8_total_blks;
   1764 
   1765     WORD32 i4_complexity_percentage;
   1766 
   1767     WORD32 i4_is_high_complex_region;
   1768 
   1769     WORD32 i4_avg_noise_thrshld_4x4;
   1770 
   1771     LWORD64 i8_curr_frame_mean_sum;
   1772     WORD32 i4_curr_frame_mean_num_blks;
   1773     LWORD64 i8_curr_frame_avg_mean_act;
   1774 
   1775 } pre_enc_me_ctxt_t;
   1776 
   1777 /**
   1778 ******************************************************************************
   1779  *  @brief  buffers from L0 IPE to ME and enc loop
   1780 ******************************************************************************
   1781  */
   1782 typedef struct
   1783 {
   1784     WORD32 i4_size;
   1785 
   1786     ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb;
   1787 } pre_enc_L0_ipe_encloop_ctxt_t;
   1788 /**
   1789 ******************************************************************************
   1790  *  @brief  Frame process and Entropy coding pass shared variables and buffers
   1791 ******************************************************************************
   1792  */
   1793 
   1794 typedef struct
   1795 {
   1796     /*PIC level Info*/
   1797     ULWORD64 i8_total_cu;
   1798     ULWORD64 i8_total_cu_min_8x8;
   1799     ULWORD64 i8_total_pu;
   1800     ULWORD64 i8_total_intra_cu;
   1801     ULWORD64 i8_total_inter_cu;
   1802     ULWORD64 i8_total_skip_cu;
   1803     ULWORD64 i8_total_cu_based_on_size[4];
   1804 
   1805     ULWORD64 i8_total_intra_pu;
   1806     ULWORD64 i8_total_merge_pu;
   1807     ULWORD64 i8_total_non_skipped_inter_pu;
   1808 
   1809     ULWORD64 i8_total_2nx2n_intra_pu[4];
   1810     ULWORD64 i8_total_nxn_intra_pu;
   1811     ULWORD64 i8_total_2nx2n_inter_pu[4];
   1812     ULWORD64 i8_total_smp_inter_pu[4];
   1813     ULWORD64 i8_total_amp_inter_pu[3];
   1814     ULWORD64 i8_total_nxn_inter_pu[3];
   1815 
   1816     ULWORD64 i8_total_L0_mode;
   1817     ULWORD64 i8_total_L1_mode;
   1818     ULWORD64 i8_total_BI_mode;
   1819 
   1820     ULWORD64 i8_total_L0_ref_idx[MAX_DPB_SIZE];
   1821     ULWORD64 i8_total_L1_ref_idx[MAX_DPB_SIZE];
   1822 
   1823     ULWORD64 i8_total_tu;
   1824     ULWORD64 i8_total_non_coded_tu;
   1825     ULWORD64 i8_total_inter_coded_tu;
   1826     ULWORD64 i8_total_intra_coded_tu;
   1827 
   1828     ULWORD64 i8_total_tu_based_on_size[4];
   1829     ULWORD64 i8_total_tu_cu64[4];
   1830     ULWORD64 i8_total_tu_cu32[4];
   1831     ULWORD64 i8_total_tu_cu16[3];
   1832     ULWORD64 i8_total_tu_cu8[2];
   1833 
   1834     LWORD64 i8_total_qp;
   1835     LWORD64 i8_total_qp_min_cu;
   1836     WORD32 i4_min_qp;
   1837     WORD32 i4_max_qp;
   1838     LWORD64 i8_sum_squared_frame_qp;
   1839     LWORD64 i8_total_frame_qp;
   1840     WORD32 i4_max_frame_qp;
   1841     float f_total_buffer_underflow;
   1842     float f_total_buffer_overflow;
   1843     float f_max_buffer_underflow;
   1844     float f_max_buffer_overflow;
   1845 
   1846     UWORD8 i1_num_ref_idx_l0_active;
   1847     UWORD8 i1_num_ref_idx_l1_active;
   1848 
   1849     WORD32 i4_ref_poc_l0[MAX_DPB_SIZE];
   1850     WORD32 i4_ref_poc_l1[MAX_DPB_SIZE];
   1851 
   1852     WORD8 i1_list_entry_l0[MAX_DPB_SIZE];
   1853     DOUBLE i2_luma_weight_l0[MAX_DPB_SIZE];
   1854     WORD16 i2_luma_offset_l0[MAX_DPB_SIZE];
   1855     WORD8 i1_list_entry_l1[MAX_DPB_SIZE];
   1856     DOUBLE i2_luma_weight_l1[MAX_DPB_SIZE];
   1857     WORD16 i2_luma_offset_l1[MAX_DPB_SIZE];
   1858 
   1859     ULWORD64 u8_bits_estimated_intra;
   1860     ULWORD64 u8_bits_estimated_inter;
   1861     ULWORD64 u8_bits_estimated_slice_header;
   1862     ULWORD64 u8_bits_estimated_sao;
   1863     ULWORD64 u8_bits_estimated_split_cu_flag;
   1864     ULWORD64 u8_bits_estimated_cu_hdr_bits;
   1865     ULWORD64 u8_bits_estimated_split_tu_flag;
   1866     ULWORD64 u8_bits_estimated_qp_delta_bits;
   1867     ULWORD64 u8_bits_estimated_cbf_luma_bits;
   1868     ULWORD64 u8_bits_estimated_cbf_chroma_bits;
   1869 
   1870     ULWORD64 u8_bits_estimated_res_luma_bits;
   1871     ULWORD64 u8_bits_estimated_res_chroma_bits;
   1872 
   1873     ULWORD64 u8_bits_estimated_ref_id;
   1874     ULWORD64 u8_bits_estimated_mvd;
   1875     ULWORD64 u8_bits_estimated_merge_flag;
   1876     ULWORD64 u8_bits_estimated_mpm_luma;
   1877     ULWORD64 u8_bits_estimated_mpm_chroma;
   1878 
   1879     ULWORD64 u8_total_bits_generated;
   1880     ULWORD64 u8_total_bits_vbv;
   1881 
   1882     ULWORD64 u8_total_I_bits_generated;
   1883     ULWORD64 u8_total_P_bits_generated;
   1884     ULWORD64 u8_total_B_bits_generated;
   1885 
   1886     UWORD32 u4_frame_sad;
   1887     UWORD32 u4_frame_intra_sad;
   1888     UWORD32 u4_frame_inter_sad;
   1889 
   1890     ULWORD64 i8_frame_cost;
   1891     ULWORD64 i8_frame_intra_cost;
   1892     ULWORD64 i8_frame_inter_cost;
   1893 } s_pic_level_acc_info_t;
   1894 
   1895 typedef struct
   1896 {
   1897     UWORD32 u4_target_bit_rate_sei_entropy;
   1898     UWORD32 u4_buffer_size_sei_entropy;
   1899     UWORD32 u4_dbf_entropy;
   1900 
   1901 } s_pic_level_sei_info_t;
   1902 /**
   1903 ******************************************************************************
   1904 *  @brief  ME pass and Main enocde pass shared variables and buffers
   1905 ******************************************************************************
   1906 */
   1907 typedef struct
   1908 {
   1909     /**
   1910     * Buffer id
   1911     */
   1912     WORD32 i4_buf_id;
   1913 
   1914     /**
   1915     * Flag will be set to 1 by frame processing thread after receiving flush
   1916     * command from application
   1917     */
   1918     WORD32 i4_end_flag;
   1919 
   1920     /** current input pointer */
   1921     ihevce_lap_enc_buf_t *ps_curr_inp;
   1922 
   1923     /** current inp buffer id */
   1924     WORD32 curr_inp_buf_id;
   1925 
   1926     /** current input buffers from ME */
   1927     pre_enc_me_ctxt_t *ps_curr_inp_from_me_prms;
   1928 
   1929     /** current inp buffer id from ME */
   1930     WORD32 curr_inp_from_me_buf_id;
   1931 
   1932     /** current input buffers from L0 IPE */
   1933     pre_enc_L0_ipe_encloop_ctxt_t *ps_curr_inp_from_l0_ipe_prms;
   1934 
   1935     /** current inp buffer id from L0 IPE */
   1936     WORD32 curr_inp_from_l0_ipe_buf_id;
   1937 
   1938     /** Slice header parameters   */
   1939     slice_header_t s_slice_hdr;
   1940 
   1941     /** current frm valid flag :
   1942      * will be 1 if valid input was processed by frame proc thrd
   1943      */
   1944     WORD32 i4_frm_proc_valid_flag;
   1945 
   1946     /**
   1947      * Array of reference picture list for ping instance
   1948      * 2=> ref_pic_list0 and ref_pic_list1
   1949      */
   1950     recon_pic_buf_t as_ref_list[IHEVCE_MAX_NUM_BITRATES][2][HEVCE_MAX_REF_PICS * 2];
   1951 
   1952     /**
   1953      * Array of reference picture list
   1954      * 2=> ref_pic_list0 and ref_pic_list1
   1955      */
   1956     recon_pic_buf_t *aps_ref_list[IHEVCE_MAX_NUM_BITRATES][2][HEVCE_MAX_REF_PICS * 2];
   1957 
   1958     /**  Job Queue Memory encode */
   1959     job_queue_t *ps_job_q_enc;
   1960 
   1961     /** Array of Job Queue handles of enc group for ping and pong instance*/
   1962     job_queue_handle_t as_job_que_enc_hdls[NUM_ENC_JOBS_QUES];
   1963 
   1964     /** Array of Job Queue handles of enc group for re-encode*/
   1965     job_queue_handle_t as_job_que_enc_hdls_reenc[NUM_ENC_JOBS_QUES];
   1966     /** frame level me_ctb_data_t buffer pointer
   1967       */
   1968     me_ctb_data_t *ps_cur_ctb_me_data;
   1969 
   1970     /** frame level cur_ctb_cu_tree_t buffer pointer for ME
   1971       */
   1972     cur_ctb_cu_tree_t *ps_cur_ctb_cu_tree;
   1973 
   1974     /** Pointer to Dep. Mngr for CTBs processed in every row of a frame.
   1975      * ME is producer, EncLoop is the consumer
   1976      */
   1977     void *pv_dep_mngr_encloop_dep_me;
   1978 
   1979 } me_enc_rdopt_ctxt_t;
   1980 
   1981 typedef struct
   1982 {
   1983     UWORD32 u4_payload_type;
   1984     UWORD32 u4_payload_length;
   1985     UWORD8 *pu1_sei_payload;
   1986 } sei_payload_t;
   1987 
   1988 typedef struct
   1989 {
   1990     /**
   1991     * Flag will be set to 1 by frame processing thread after receiving flush
   1992     * command from application
   1993     */
   1994     WORD32 i4_end_flag;
   1995 
   1996     /** frame level ctb allocation for ctb after aligning to max cu size */
   1997     ctb_enc_loop_out_t *ps_frm_ctb_data;
   1998 
   1999     /** frame level cu allocation for ctb after aligning to max cu size  */
   2000     cu_enc_loop_out_t *ps_frm_cu_data;
   2001 
   2002     /** frame level tu allocation for ctb after aligning to max cu size  */
   2003     tu_enc_loop_out_t *ps_frm_tu_data;
   2004 
   2005     /** frame level pu allocation for ctb after aligning to max cu size  */
   2006     pu_t *ps_frm_pu_data;
   2007 
   2008     /**  frame level coeff allocation for ctb after aligning to max cu size */
   2009     void *pv_coeff_data;
   2010 
   2011     /** Slice header parameters   */
   2012     slice_header_t s_slice_hdr;
   2013 
   2014     /** sps parameters activated by current slice  */
   2015     sps_t *ps_sps;
   2016 
   2017     /** pps parameters activated by current slice  */
   2018     pps_t *ps_pps;
   2019 
   2020     /** vps parameters activated by current slice  */
   2021     vps_t *ps_vps;
   2022 
   2023     /** vps parameters activated by current slice  */
   2024     sei_params_t s_sei;
   2025 
   2026     /* Flag to indicate if AUD NAL is present */
   2027     WORD8 i1_aud_present_flag;
   2028 
   2029     /* Flag to indicate if EOS NAL is present */
   2030     WORD8 i1_eos_present_flag;
   2031 
   2032     /** nal_type for the slice to be encoded  */
   2033     WORD32 i4_slice_nal_type;
   2034 
   2035     /** input time stamp in terms of ticks: lower 32  */
   2036     WORD32 i4_inp_timestamp_low;
   2037 
   2038     /** input time stamp in terms of ticks: higher 32 */
   2039     WORD32 i4_inp_timestamp_high;
   2040 
   2041     /** input frame ctxt of app to be retured in output buffer */
   2042     void *pv_app_frm_ctxt;
   2043 
   2044     /** current frm valid flag :
   2045      * will be 1 if valid input was processed by frame proc thrd
   2046      */
   2047     WORD32 i4_frm_proc_valid_flag;
   2048 
   2049     /** To support entropy sync the bitstream offset of each CTB row
   2050      * is populated in this array any put in slice header in the end
   2051      */
   2052     WORD32 ai4_entry_point_offset[MAX_NUM_CTB_ROWS_FRM];
   2053 
   2054     /** RDopt estimation of bytes generated based on which rc update happens
   2055      *
   2056      */
   2057     WORD32 i4_rdopt_bits_generated_estimate;
   2058 
   2059     /* These params are passed from enc-threads to entropy thread for
   2060         passing params needed for PSNR caclulation and encoding
   2061         summary prints */
   2062     DOUBLE lf_luma_mse;
   2063     DOUBLE lf_cb_mse;
   2064     DOUBLE lf_cr_mse;
   2065 
   2066     DOUBLE lf_luma_ssim;
   2067     DOUBLE lf_cb_ssim;
   2068     DOUBLE lf_cr_ssim;
   2069 
   2070     WORD32 i4_qp;
   2071     WORD32 i4_poc;
   2072     WORD32 i4_display_num;
   2073     WORD32 i4_pic_type;
   2074 
   2075     /** I-only SCD */
   2076     WORD32 i4_is_I_scenecut;
   2077 
   2078     WORD32 i4_is_non_I_scenecut;
   2079     WORD32 i4_sub_pic_level_rc;
   2080 
   2081     WORD32 ai4_frame_bits_estimated;
   2082     s_pic_level_acc_info_t s_pic_level_info;
   2083 
   2084     LWORD64 i8_buf_level_bitrate_change;
   2085 
   2086     WORD32 i4_is_end_of_idr_gop;
   2087 
   2088     sei_payload_t as_sei_payload[MAX_NUMBER_OF_SEI_PAYLOAD];
   2089 
   2090     UWORD32 u4_num_sei_payload;
   2091     /* Flag used only in mres single output case to flush out one res and start with next */
   2092     WORD32 i4_out_flush_flag;
   2093 
   2094 } frm_proc_ent_cod_ctxt_t;
   2095 
   2096 /**
   2097 ******************************************************************************
   2098 *  @brief  ME pass and Main enocde pass shared variables and buffers
   2099 ******************************************************************************
   2100 */
   2101 typedef struct
   2102 {
   2103     /*BitRate ID*/
   2104     WORD32 i4_br_id;
   2105 
   2106     /*Frame ID*/
   2107     WORD32 i4_frm_id;
   2108 
   2109     /*Number of CTB, after ich data is populated*/
   2110     WORD32 i4_ctb_count_in_data;
   2111 
   2112     /*Number of CTB, after ich scale is computed*/
   2113     WORD32 i4_ctb_count_out_scale;
   2114 
   2115     /*Bits estimated for the frame */
   2116     /* For NON-I SCD max buf bits*/
   2117     LWORD64 i8_frame_bits_estimated;
   2118 
   2119     /* Bits consumed till the nctb*/
   2120     LWORD64 i8_nctb_bits_consumed;
   2121 
   2122     /* Bits consumed till the nctb*/
   2123     LWORD64 i8_acc_bits_consumed;
   2124 
   2125     /*Frame level Best of Ipe and ME sad*/
   2126     LWORD64 i8_frame_l1_me_sad;
   2127 
   2128     /*SAD accumalted till NCTB*/
   2129     LWORD64 i8_nctb_l1_me_sad;
   2130 
   2131     /*Frame level IPE sad*/
   2132     LWORD64 i8_frame_l1_ipe_sad;
   2133 
   2134     /*SAD accumalted till NCTB*/
   2135     LWORD64 i8_nctb_l1_ipe_sad;
   2136 
   2137     /*Frame level L0 IPE satd*/
   2138     LWORD64 i8_frame_l0_ipe_satd;
   2139 
   2140     /*L0 SATD accumalted till NCTB*/
   2141     LWORD64 i8_nctb_l0_ipe_satd;
   2142 
   2143     /*Frame level Activity factor acc at 8x8 level */
   2144     LWORD64 i8_frame_l1_activity_fact;
   2145 
   2146     /*NCTB Activity factor acc at 8x8 level */
   2147     LWORD64 i8_nctb_l1_activity_fact;
   2148 
   2149     /*L0 MPM bits accumalted till NCTB*/
   2150     LWORD64 i8_nctb_l0_mpm_bits;
   2151 
   2152     /*Encoder hdr accumalted till NCTB*/
   2153     LWORD64 i8_nctb_hdr_bits_consumed;
   2154 
   2155 } ihevce_sub_pic_rc_ctxt_t;
   2156 
   2157 /**
   2158 ******************************************************************************
   2159  *  @brief  Memoery manager context (stores the memory tables allcoated)
   2160 ******************************************************************************
   2161  */
   2162 typedef struct
   2163 {
   2164     /**
   2165     * Total number of memtabs (Modules and system)
   2166     * during create time
   2167     */
   2168     WORD32 i4_num_create_memtabs;
   2169 
   2170     /**
   2171     * Pointer to the mem tabs
   2172     * of crate time
   2173     */
   2174     iv_mem_rec_t *ps_create_memtab;
   2175 
   2176     /**
   2177     * Total number of memtabs Data and control Ques
   2178     * during Ques create time
   2179     */
   2180     WORD32 i4_num_q_memtabs;
   2181 
   2182     /**
   2183     * Pointer to the mem tabs
   2184     * of crate time
   2185     */
   2186     iv_mem_rec_t *ps_q_memtab;
   2187 
   2188 } enc_mem_mngr_ctxt;
   2189 
   2190 /**
   2191 ******************************************************************************
   2192  *  @brief  Encoder Interafce Queues Context
   2193 ******************************************************************************
   2194  */
   2195 typedef struct
   2196 {
   2197     /** Number of Queues at interface context level */
   2198     WORD32 i4_num_queues;
   2199 
   2200     /** Array of Queues handle */
   2201     void *apv_q_hdl[IHEVCE_MAX_NUM_QUEUES];
   2202 
   2203     /** Mutex for encuring thread safety of the access of the queues */
   2204     void *pv_q_mutex_hdl;
   2205 
   2206 } enc_q_ctxt_t;
   2207 
   2208 /**
   2209 ******************************************************************************
   2210  *  @brief  Module context of different modules in encoder
   2211 ******************************************************************************
   2212  */
   2213 
   2214 typedef struct
   2215 {
   2216     /** Motion estimation context pointer */
   2217     void *pv_me_ctxt;
   2218     /** Coarse Motion estimation context pointer */
   2219     void *pv_coarse_me_ctxt;
   2220 
   2221     /** Intra Prediction context pointer */
   2222     void *pv_ipe_ctxt;
   2223 
   2224     /** Encode Loop context pointer */
   2225     void *pv_enc_loop_ctxt;
   2226 
   2227     /** Entropy Coding context pointer */
   2228     void *apv_ent_cod_ctxt[IHEVCE_MAX_NUM_BITRATES];
   2229 
   2230     /** Look Ahead Processing context pointer */
   2231     void *pv_lap_ctxt;
   2232     /** Rate control context pointer */
   2233     void *apv_rc_ctxt[IHEVCE_MAX_NUM_BITRATES];
   2234     /** Decomposition pre intra context pointer */
   2235     void *pv_decomp_pre_intra_ctxt;
   2236 
   2237 } module_ctxt_t;
   2238 
   2239 /**
   2240 ******************************************************************************
   2241  *  @brief  Threads semaphore handles
   2242 ******************************************************************************
   2243  */
   2244 typedef struct
   2245 {
   2246     /** LAP semaphore handle */
   2247     void *pv_lap_sem_handle;
   2248 
   2249     /** Encode frame Process semaphore handle */
   2250     void *pv_enc_frm_proc_sem_handle;
   2251 
   2252     /** Pre Encode frame Process semaphore handle */
   2253     void *pv_pre_enc_frm_proc_sem_handle;
   2254 
   2255     /** Entropy coding semaphore handle
   2256         One semaphore for each entropy thread, i.e. for each bit-rate instance*/
   2257     void *apv_ent_cod_sem_handle[IHEVCE_MAX_NUM_BITRATES];
   2258 
   2259     /**
   2260      *  Semaphore handle corresponding to get free inp frame buff
   2261      *  function call from app if called in blocking mode
   2262      */
   2263     void *pv_inp_data_sem_handle;
   2264 
   2265     /**
   2266      *  Semaphore handle corresponding to get free inp control command buff
   2267      *  function call from app if called in blocking mode
   2268      */
   2269     void *pv_inp_ctrl_sem_handle;
   2270 
   2271     /**
   2272      *  Semaphore handle corresponding to get filled out bitstream buff
   2273      *  function call from app if called in blocking mode
   2274      */
   2275     void *apv_out_strm_sem_handle[IHEVCE_MAX_NUM_BITRATES];
   2276 
   2277     /**
   2278      *  Semaphore handle corresponding to get filled out recon buff
   2279      *  function call from app if called in blocking mode
   2280      */
   2281     void *apv_out_recon_sem_handle[IHEVCE_MAX_NUM_BITRATES];
   2282 
   2283     /**
   2284      *  Semaphore handle corresponding to get filled out control status buff
   2285      *  function call from app if called in blocking mode
   2286      */
   2287     void *pv_out_ctrl_sem_handle;
   2288 
   2289     /**
   2290      *  Semaphore handle corresponding to get filled out control status buff
   2291      *  function call from app if called in blocking mode
   2292      */
   2293     void *pv_lap_inp_data_sem_hdl;
   2294 
   2295     /**
   2296      *  Semaphore handle corresponding to get filled out control status buff
   2297      *  function call from app if called in blocking mode
   2298      */
   2299     void *pv_preenc_inp_data_sem_hdl;
   2300 
   2301     /**
   2302      *  Semaphore handle corresponding to Multi Res Single output case
   2303      */
   2304     void *pv_ent_common_mres_sem_hdl;
   2305     void *pv_out_common_mres_sem_hdl;
   2306 
   2307 } thrd_que_sem_hdl_t;
   2308 
   2309 /**
   2310 ******************************************************************************
   2311  *  @brief  Frame level structure which has parameters about CTBs
   2312 ******************************************************************************
   2313  */
   2314 typedef struct
   2315 {
   2316     /** CTB size of all CTB in a frame in pixels
   2317      *  this will be create time value,
   2318      *  run time change in this value is not supported
   2319      */
   2320     WORD32 i4_ctb_size;
   2321 
   2322     /** Minimum CU size of CTB in a frame in pixels
   2323      *  this will be create time value,
   2324      *  run time change in this value is not supported
   2325      */
   2326     WORD32 i4_min_cu_size;
   2327 
   2328     /** Worst case num CUs in CTB based on i4_ctb_size */
   2329     WORD32 i4_num_cus_in_ctb;
   2330 
   2331     /** Worst case num PUs in CTB based on i4_ctb_size */
   2332     WORD32 i4_num_pus_in_ctb;
   2333 
   2334     /** Worst case num TUs in CTB based on i4_ctb_size */
   2335     WORD32 i4_num_tus_in_ctb;
   2336 
   2337     /** Number of CTBs in horizontal direction
   2338       * this is based on run time source width and i4_ctb_size
   2339       */
   2340     WORD32 i4_num_ctbs_horz;
   2341 
   2342     /** Number of CTBs in vertical direction
   2343      *  this is based on run time source height and i4_ctb_size
   2344      */
   2345     WORD32 i4_num_ctbs_vert;
   2346 
   2347     /** MAX CUs in horizontal direction
   2348      * this is based on run time source width, i4_ctb_size and  i4_num_cus_in_ctb
   2349      */
   2350     WORD32 i4_max_cus_in_row;
   2351 
   2352     /** MAX PUs in horizontal direction
   2353      * this is based on run time source width, i4_ctb_size and  i4_num_pus_in_ctb
   2354      */
   2355     WORD32 i4_max_pus_in_row;
   2356 
   2357     /** MAX TUs in horizontal direction
   2358      * this is based on run time source width, i4_ctb_size and  i4_num_tus_in_ctb
   2359      */
   2360     WORD32 i4_max_tus_in_row;
   2361 
   2362     /**
   2363      * CU aligned picture width (currently aligned to MAX CU size)
   2364      * should be modified to be aligned to MIN CU size
   2365      */
   2366 
   2367     WORD32 i4_cu_aligned_pic_wd;
   2368 
   2369     /**
   2370      * CU aligned picture height (currently aligned to MAX CU size)
   2371      * should be modified to be aligned to MIN CU size
   2372      */
   2373 
   2374     WORD32 i4_cu_aligned_pic_ht;
   2375 
   2376     /* Pointer to a frame level memory,
   2377     Stride is = 1 + (num ctbs in a ctb-row) + 1
   2378     Hieght is = 1 + (num ctbs in a ctb-col)
   2379     Contains tile-id of each ctb */
   2380     WORD32 *pi4_tile_id_map;
   2381 
   2382     /* stride in units of ctb */
   2383     WORD32 i4_tile_id_ctb_map_stride;
   2384 
   2385 } frm_ctb_ctxt_t;
   2386 
   2387 /**
   2388 ******************************************************************************
   2389  *  @brief  ME Job Queue desc
   2390 ******************************************************************************
   2391  */
   2392 typedef struct
   2393 {
   2394     /** Number of output dependencies which need to be set after
   2395      *  current job is complete,
   2396      *  should be less than or equal to MAX_OUT_DEP defined in
   2397      *  ihevce_multi_thrd_structs.h
   2398      */
   2399     WORD32 i4_num_output_dep;
   2400 
   2401     /** Array of offsets from the start of output dependent layer's Job Ques
   2402      *  which are dependent on current Job to be complete
   2403      */
   2404     WORD32 ai4_out_dep_unit_off[MAX_OUT_DEP];
   2405 
   2406     /** Number of input dependencies to be resolved for current job to start
   2407      *  these many jobs in lower layer should be complete to
   2408      *  start the current JOB
   2409      */
   2410     WORD32 i4_num_inp_dep;
   2411 
   2412 } multi_thrd_me_job_q_prms_t;
   2413 
   2414 /**
   2415  *  @brief  structure in which recon data
   2416  *          and related parameters are sent from Encoder
   2417  */
   2418 typedef struct
   2419 {
   2420     /** Kept for maintaining backwards compatibility in future */
   2421     WORD32 i4_size;
   2422 
   2423     /** Buffer id for the current buffer */
   2424     WORD32 i4_buf_id;
   2425 
   2426     /** POC of the current buffer */
   2427     WORD32 i4_poc;
   2428 
   2429     /** End flag to communicate this is last frame output from encoder */
   2430     WORD32 i4_end_flag;
   2431 
   2432     /** End flag to communicate encoder that this is the last buffer from application
   2433         1 - Last buf, 0 - Not last buffer. No other values are supported.
   2434         Application has to set the appropriate value before queing in encoder queue */
   2435 
   2436     WORD32 i4_is_last_buf;
   2437 
   2438     /** Recon luma buffer pointer */
   2439     void *pv_y_buf;
   2440 
   2441     /** Recon cb buffer pointer */
   2442     void *pv_cb_buf;
   2443 
   2444     /** Recon cr buffer pointer */
   2445     void *pv_cr_buf;
   2446 
   2447     /** Luma size **/
   2448     WORD32 i4_y_pixels;
   2449 
   2450     /** Chroma size **/
   2451     WORD32 i4_uv_pixels;
   2452 
   2453 } iv_enc_recon_data_buffs_t;
   2454 
   2455 /**
   2456 ******************************************************************************
   2457  *  @brief  Multi Thread context structure
   2458 ******************************************************************************
   2459  */
   2460 typedef struct
   2461 {
   2462     /* Flag to indicate to enc and pre-enc thrds that app has sent force end cmd*/
   2463     WORD32 i4_force_end_flag;
   2464 
   2465     /** Force all active threads flag
   2466       * This flag will be set to 1 if all Number of cores givento the encoder
   2467       * is less than or Equal to MAX_NUM_CORES_SEQ_EXEC. In this mode
   2468       * All pre enc threads and enc threads will run of the same cores with
   2469       * time sharing ar frame level
   2470       */
   2471     WORD32 i4_all_thrds_active_flag;
   2472 
   2473     /** Flag to indicate that core manager has been configured to enable
   2474      * sequential execution
   2475      */
   2476     WORD32 i4_seq_mode_enabled_flag;
   2477     /*-----------------------------------------------------------------------*/
   2478     /*--------- Params related to encode group  -----------------------------*/
   2479     /*-----------------------------------------------------------------------*/
   2480 
   2481     /** Number of processing threads created runtime in encode group */
   2482     WORD32 i4_num_enc_proc_thrds;
   2483 
   2484     /** Number of processing threads active for a given frame
   2485      * This value will be monitored at frame level, so as to
   2486      * have provsion for increasing / decreasing threads
   2487      * based on Load balance b/w stage in encoder
   2488      */
   2489     WORD32 i4_num_active_enc_thrds;
   2490     /**  Job Queue Memory encode */
   2491     job_queue_t *ps_job_q_enc[PING_PONG_BUF];
   2492 
   2493     /** Array of Job Queue handles of enc group for ping and pong instance*/
   2494     job_queue_handle_t as_job_que_enc_hdls[NUM_ENC_JOBS_QUES][PING_PONG_BUF];
   2495 
   2496     /** Mutex for ensuring thread safety of the access of Job queues in encode group */
   2497     void *pv_job_q_mutex_hdl_enc_grp_me;
   2498 
   2499     /** Mutex for ensuring thread safety of the access of Job queues in encode group */
   2500     void *pv_job_q_mutex_hdl_enc_grp_enc_loop;
   2501 
   2502     /** Array of Semaphore handles (for each frame processing threads ) */
   2503     void *apv_enc_thrd_sem_handle[MAX_NUM_FRM_PROC_THRDS_ENC];
   2504 
   2505     /** Array for communcating start processing from master thread to indivisual
   2506     *   threads in Enocde group of threads
   2507     *  till 0 : wait
   2508     *  1  : start
   2509     * After reading the start signal, corresponding thread hould reset it to 0
   2510     */
   2511     WORD32 ai4_enc_frm_proc_start[MAX_NUM_FRM_PROC_THRDS_ENC];
   2512 
   2513     /** Note: For Enc loop pass similar memory is used whihc is part of frm_proc_ent_cod_ctxt_t
   2514     *  for Row level Sync hence not explicitly declared here
   2515     */
   2516 
   2517     /** Array for ME to export the Job que dependency for all layers */
   2518     multi_thrd_me_job_q_prms_t as_me_job_q_prms[MAX_NUM_HME_LAYERS][MAX_NUM_VERT_UNITS_FRM];
   2519 
   2520     /* pointer to the mutex handle*/
   2521     void *apv_mutex_handle[MAX_NUM_ME_PARALLEL];
   2522 
   2523     /* pointer to the mutex handle for frame init*/
   2524     void *apv_mutex_handle_me_end[MAX_NUM_ME_PARALLEL];
   2525 
   2526     /* pointer to the mutex handle for frame init*/
   2527     void *apv_mutex_handle_frame_init[MAX_NUM_ENC_LOOP_PARALLEL];
   2528 
   2529     /*pointer to the mutex handle*/
   2530     void *apv_post_enc_mutex_handle[MAX_NUM_ENC_LOOP_PARALLEL];
   2531 
   2532     /* Flag to indicate that master has done ME init*/
   2533     WORD32 ai4_me_master_done_flag[MAX_NUM_ME_PARALLEL];
   2534 
   2535     /* Counter to keep track of me num of thrds exiting critical section*/
   2536     WORD32 me_num_thrds_exited[MAX_NUM_ME_PARALLEL];
   2537 
   2538     /* Flag to indicate that master has done the frame init*/
   2539     WORD32 enc_master_done_frame_init[MAX_NUM_ENC_LOOP_PARALLEL];
   2540 
   2541     /* Counter to keep track of num of thrds exiting critical section*/
   2542     WORD32 num_thrds_exited[MAX_NUM_ENC_LOOP_PARALLEL];
   2543 
   2544     /* Counter to keep track of num of thrds exiting critical section for re-encode*/
   2545     WORD32 num_thrds_exited_for_reenc;
   2546 
   2547     /* Array to store the curr qp for ping and pong instance*/
   2548     WORD32 cur_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
   2549 
   2550     /* Pointers to store output buffers for ping and pong instance*/
   2551     frm_proc_ent_cod_ctxt_t *ps_curr_out_enc_grp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
   2552 
   2553     /* Pointer to store input buffers for me*/
   2554     pre_enc_me_ctxt_t *aps_cur_inp_me_prms[MAX_NUM_ME_PARALLEL];
   2555 
   2556     /*pointers to store output buffers from me */
   2557     me_enc_rdopt_ctxt_t *aps_cur_out_me_prms[NUM_ME_ENC_BUFS];
   2558 
   2559     /*pointers to store input buffers to enc-rdopt */
   2560     me_enc_rdopt_ctxt_t *aps_cur_inp_enc_prms[NUM_ME_ENC_BUFS];
   2561 
   2562     /*Shared memory for Sub Pic rc */
   2563     /*Qscale calulated by sub pic rc bit control for Intra Pic*/
   2564     WORD32 ai4_curr_qp_estimated[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
   2565 
   2566     /*Header bits error by sub pic rc bit control*/
   2567     float af_acc_hdr_bits_scale_err[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
   2568 
   2569     /*Accumalated ME SAD for NCTB*/
   2570     LWORD64 ai8_nctb_me_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
   2571                            [MAX_NUM_FRM_PROC_THRDS_ENC];
   2572 
   2573     /*Accumalated IPE SAD for NCTB*/
   2574     LWORD64 ai8_nctb_ipe_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
   2575                             [MAX_NUM_FRM_PROC_THRDS_ENC];
   2576 
   2577     /*Accumalated L0 IPE SAD for NCTB*/
   2578     LWORD64 ai8_nctb_l0_ipe_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
   2579                                [MAX_NUM_FRM_PROC_THRDS_ENC];
   2580 
   2581     /*Accumalated Activity Factor for NCTB*/
   2582     LWORD64 ai8_nctb_act_factor[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
   2583                                [MAX_NUM_FRM_PROC_THRDS_ENC];
   2584 
   2585     /*Accumalated Ctb counter across all threads*/
   2586     WORD32 ai4_ctb_ctr[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
   2587 
   2588     /*Bits threshold reached for across all threads*/
   2589     WORD32 ai4_threshold_reached[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
   2590 
   2591     /*To hold the Previous In-frame RC chunk QP*/
   2592     WORD32 ai4_prev_chunk_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
   2593 
   2594     /*Accumalated Ctb counter across all threads*/
   2595     WORD32 ai4_acc_ctb_ctr[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
   2596 
   2597     /*Flag to check if thread is initialized */
   2598     WORD32 ai4_thrd_id_valid_flag[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
   2599                                  [MAX_NUM_FRM_PROC_THRDS_ENC];
   2600 
   2601     /*Accumalated Ctb counter across all threads*/
   2602     //WORD32 ai4_acc_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES][MAX_NUM_FRM_PROC_THRDS_ENC];
   2603 
   2604     /*Accumalated bits consumed for nctbs across all threads*/
   2605     LWORD64 ai8_nctb_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
   2606                                   [MAX_NUM_FRM_PROC_THRDS_ENC];
   2607 
   2608     /*Accumalated hdr bits consumed for nctbs across all threads*/
   2609     LWORD64 ai8_nctb_hdr_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
   2610                                       [MAX_NUM_FRM_PROC_THRDS_ENC];
   2611 
   2612     /*Accumalated l0 mpm bits consumed for nctbs across all threads*/
   2613     LWORD64 ai8_nctb_mpm_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
   2614                                       [MAX_NUM_FRM_PROC_THRDS_ENC];
   2615 
   2616     /*Accumalated bits consumed for total ctbs across all threads*/
   2617     LWORD64 ai8_acc_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
   2618                                  [MAX_NUM_FRM_PROC_THRDS_ENC];
   2619 
   2620     /*Accumalated bits consumed for total ctbs across all threads*/
   2621     LWORD64 ai8_acc_bits_mul_qs_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
   2622                                         [MAX_NUM_FRM_PROC_THRDS_ENC];
   2623 
   2624     /*Qscale calulated by sub pic rc bit control */
   2625     WORD32 ai4_curr_qp_acc[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
   2626     /* End of Sub pic rc variables */
   2627 
   2628     /* Pointers to store input (only L0 IPE)*/
   2629     pre_enc_L0_ipe_encloop_ctxt_t *aps_cur_L0_ipe_inp_prms[MAX_NUM_ME_PARALLEL];
   2630 
   2631     /** Slice header parameters   */
   2632     /** temporarily store the slice header parameters in enc-loop thread
   2633     which will be copied to curr_out when buffer is aquired */
   2634     //slice_header_t      as_slice_hdr[PING_PONG_BUF];
   2635 
   2636     /* Array to store input buffer ids for ping and pong instances*/
   2637     //WORD32 in_buf_id[PING_PONG_BUF];
   2638 
   2639     /* Array tp store L0 IPE input buf ids*/
   2640     WORD32 ai4_in_frm_l0_ipe_id[MAX_NUM_ME_PARALLEL];
   2641 
   2642     /* Array to store output buffer ids for ping and pong instances*/
   2643     WORD32 out_buf_id[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];  //[PING_PONG_BUF];
   2644 
   2645     /* Variable to indicate ping and pong instance for each thread*/
   2646     WORD32 ping_pong[MAX_NUM_FRM_PROC_THRDS_ENC];
   2647 
   2648     /* Array of pointers to store the recon buf pointers*/
   2649     iv_enc_recon_data_buffs_t
   2650         *ps_recon_out[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];  //[PING_PONG_BUF];
   2651 
   2652     /* Array of pointers to frame recon for ping and pong instances*/
   2653     recon_pic_buf_t *ps_frm_recon[NUM_ME_ENC_BUFS][IHEVCE_MAX_NUM_BITRATES];
   2654 
   2655     /* Array of recon buffer ids for ping and pong instance*/
   2656     WORD32 recon_buf_id[NUM_ME_ENC_BUFS][IHEVCE_MAX_NUM_BITRATES];  //[PING_PONG_BUF];
   2657 
   2658     /* End flag to signal end of all the frames in me*/
   2659     WORD32 me_end_flag;
   2660 
   2661     /* End flag to signal end of all the frames in enc*/
   2662     WORD32 enc_end_flag;
   2663 
   2664     /* Counter to keep track of num thrds done*/
   2665     WORD32 num_thrds_done;
   2666 
   2667     /* Flags to keep track of dumped ping pong recon buffer*/
   2668     WORD32 is_recon_dumped[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];  //[PING_PONG_BUF];
   2669 
   2670     /* Flags to keep track of dumped ping pong output buffer*/
   2671     WORD32 is_out_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];  //[PING_PONG_BUF];
   2672 
   2673     /* flag to produce output buffer by the thread who ever is finishing
   2674     enc-loop processing first, so that the entropy thread can start processing */
   2675     WORD32 ai4_produce_outbuf[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
   2676 
   2677     /* Flags to keep track of dumped ping pong input buffer*/
   2678     WORD32 is_in_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL];
   2679 
   2680     /* Flags to keep track of dumped ping pong L0 IPE to enc buffer*/
   2681     WORD32 is_L0_ipe_in_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL];
   2682 
   2683     /** Dependency manager for checking whether prev. EncLoop done before
   2684         current frame EncLoop starts */
   2685     void *apv_dep_mngr_prev_frame_done[MAX_NUM_ENC_LOOP_PARALLEL];
   2686 
   2687     /** Dependency manager for checking whether prev. EncLoop done before
   2688         re-encode of the current frame */
   2689     void *pv_dep_mngr_prev_frame_enc_done_for_reenc;
   2690 
   2691     /** Dependency manager for checking whether prev. me done before
   2692         current frame me starts */
   2693     void *apv_dep_mngr_prev_frame_me_done[MAX_NUM_ME_PARALLEL];
   2694 
   2695     /** ME coarsest layer JOB queue type */
   2696     WORD32 i4_me_coarsest_lyr_type;
   2697 
   2698     /** number of encloop frames running in parallel */
   2699     WORD32 i4_num_enc_loop_frm_pllel;
   2700 
   2701     /** number of me frames running in parallel */
   2702     WORD32 i4_num_me_frm_pllel;
   2703 
   2704     /*-----------------------------------------------------------------------*/
   2705     /*--------- Params related to pre-enc stage -----------------------------*/
   2706     /*-----------------------------------------------------------------------*/
   2707 
   2708     /** Number of processing threads created runtime in pre encode group */
   2709     WORD32 i4_num_pre_enc_proc_thrds;
   2710 
   2711     /** Number of processing threads active for a given frame
   2712      * This value will be monitored at frame level, so as to
   2713      * have provsion for increasing / decreasing threads
   2714      * based on Load balance b/w stage in encoder
   2715      */
   2716     WORD32 i4_num_active_pre_enc_thrds;
   2717     /** number of threads that have done processing the current frame
   2718         Use to find out the last thread that is coming out of pre-enc processing
   2719         so that the last thread can do de-init of pre-enc stage */
   2720     WORD32 ai4_num_thrds_processed_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
   2721 
   2722     /** number of threads that have done processing the current frame
   2723         Use to find out the first thread and last inoder to get qp query. As the query
   2724         is not read only , the quer should be done only once by thread that comes first
   2725         and other threads should get same value*/
   2726     WORD32 ai4_num_thrds_processed_L0_ipe_qp_init[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
   2727 
   2728     /** number of threads that have done proessing decomp_intra
   2729         Used to find out the last thread that is coming out so that
   2730         the last thread can set flag for decomp_pre_intra_finish */
   2731     WORD32 ai4_num_thrds_processed_decomp[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
   2732 
   2733     /** number of threads that have done proessing coarse_me
   2734         Used to find out the last thread that is coming out so that
   2735         the last thread can set flag for coarse_me_finish */
   2736     WORD32 ai4_num_thrds_processed_coarse_me[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
   2737 
   2738     /*Flag to indicate if current instance (frame)'s Decomp_pre_intra and Coarse_ME is done.
   2739       Used to check if previous frame is done proecessing decom_pre_intra and coarse_me */
   2740     WORD32 ai4_decomp_coarse_me_complete_flag[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
   2741 
   2742     /** Dependency manager for checking whether prev. frame decomp_intra
   2743         done before current frame  decomp_intra starts */
   2744     void *pv_dep_mngr_prev_frame_pre_enc_l1;
   2745 
   2746     /** Dependency manager for checking whether prev. frame L0 IPE done before
   2747         current frame L0 IPE starts */
   2748     void *pv_dep_mngr_prev_frame_pre_enc_l0;
   2749 
   2750     /** Dependency manager for checking whether prev. frame coarse_me done before
   2751         current frame coarse_me starts */
   2752     void *pv_dep_mngr_prev_frame_pre_enc_coarse_me;
   2753 
   2754     /** flag to indicate if pre_enc_init is done for current frame */
   2755     WORD32 ai4_pre_enc_init_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
   2756 
   2757     /** flag to indicate if pre_enc_hme_init is done for current frame */
   2758     WORD32 ai4_pre_enc_hme_init_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
   2759 
   2760     /** flag to indicate if pre_enc_deinit is done for current frame */
   2761     WORD32 ai4_pre_enc_deinit_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
   2762 
   2763     /** Flag to indicate the end of processing when all the frames are done processing */
   2764     WORD32 ai4_end_flag_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
   2765 
   2766     /** Flag to indicate the control blocking mode indicating input command to pre-enc
   2767     group should be blocking or unblocking */
   2768     WORD32 i4_ctrl_blocking_mode;
   2769 
   2770     /** Current input pointer */
   2771     ihevce_lap_enc_buf_t *aps_curr_inp_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
   2772 
   2773     WORD32 i4_last_inp_buf;
   2774 
   2775     /* buffer id for input buffer */
   2776     WORD32 ai4_in_buf_id_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
   2777 
   2778     /** Current output pointer */
   2779     pre_enc_me_ctxt_t *aps_curr_out_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
   2780 
   2781     /*Current L0 IPE to enc output pointer */
   2782     pre_enc_L0_ipe_encloop_ctxt_t *ps_L0_IPE_curr_out_pre_enc;
   2783 
   2784     /** buffer id for output buffer */
   2785     WORD32 ai4_out_buf_id_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
   2786 
   2787     /** buffer id for L0 IPE enc buffer*/
   2788     WORD32 i4_L0_IPE_out_buf_id;
   2789 
   2790     /** current frame recon pointer */
   2791     recon_pic_buf_t *aps_frm_recon_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
   2792 
   2793     /** Current picture Qp */
   2794     WORD32 ai4_cur_frame_qp_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
   2795 
   2796     /** Decomp layer buffers indicies */
   2797     WORD32 ai4_decomp_lyr_buf_idx[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
   2798 
   2799     /*since it is guranteed that cur frame ipe will not start unless prev frame ipe is completly done,
   2800       an array of MAX_PRE_ENC_STAGGER might not be required*/
   2801     WORD32 i4_qp_update_l0_ipe;
   2802 
   2803     /** Current picture encoded is the last picture to be encoded flag */
   2804     WORD32 i4_last_pic_flag;
   2805 
   2806     /** Mutex for ensuring thread safety of the access of Job queues in decomp stage */
   2807     void *pv_job_q_mutex_hdl_pre_enc_decomp;
   2808 
   2809     /** Mutex for ensuring thread safety of the access of Job queues in HME group */
   2810     void *pv_job_q_mutex_hdl_pre_enc_hme;
   2811 
   2812     /** Mutex for ensuring thread safety of the access of Job queues in l0 ipe stage */
   2813     void *pv_job_q_mutex_hdl_pre_enc_l0ipe;
   2814 
   2815     /** mutex handle for pre-enc init */
   2816     void *pv_mutex_hdl_pre_enc_init;
   2817 
   2818     /** mutex handle for pre-enc decomp deinit */
   2819     void *pv_mutex_hdl_pre_enc_decomp_deinit;
   2820 
   2821     /** mutex handle for pre enc hme init */
   2822     void *pv_mutex_hdl_pre_enc_hme_init;
   2823 
   2824     /** mutex handle for pre-enc hme deinit */
   2825     void *pv_mutex_hdl_pre_enc_hme_deinit;
   2826 
   2827     /*qp qurey before l0 ipe is done by multiple frame*/
   2828     /** mutex handle for L0 ipe(pre-enc init)*/
   2829     void *pv_mutex_hdl_l0_ipe_init;
   2830 
   2831     /** mutex handle for pre-enc deinit */
   2832     void *pv_mutex_hdl_pre_enc_deinit;
   2833 
   2834     /** Array of Semaphore handles (for each frame processing threads ) */
   2835     void *apv_pre_enc_thrd_sem_handle[MAX_NUM_FRM_PROC_THRDS_ENC];
   2836     /** array which will tell the number of CTB processed in each row,
   2837     *   used for Row level sync in IPE pass
   2838     */
   2839     WORD32 ai4_ctbs_in_row_proc_ipe_pass[MAX_NUM_CTB_ROWS_FRM];
   2840 
   2841     /**  Job Queue Memory pre encode */
   2842     job_queue_t *aps_job_q_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
   2843 
   2844     /** Array of Job Queue handles enc group */
   2845     job_queue_handle_t as_job_que_preenc_hdls[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]
   2846                                              [NUM_PRE_ENC_JOBS_QUES];
   2847 
   2848     /* accumulate intra sad across all thread to get qp before L0 IPE*/
   2849     WORD32 ai4_intra_satd_acc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]
   2850                              [MAX_NUM_FRM_PROC_THRDS_PRE_ENC];
   2851 
   2852     WORD32 i4_delay_pre_me_btw_l0_ipe;
   2853 
   2854     /*** This variable has the maximum delay between hme and l0ipe ***/
   2855     /*** This is used for wrapping around L0IPE index ***/
   2856     WORD32 i4_max_delay_pre_me_btw_l0_ipe;
   2857 
   2858     /* This is to register the handles of Dep Mngr b/w EncLoop and ME */
   2859     /* This is used to delete the Mngr at the end                          */
   2860     void *apv_dep_mngr_encloop_dep_me[NUM_ME_ENC_BUFS];
   2861     /*flag to track buffer in me/enc que is produced or not*/
   2862     WORD32 ai4_me_enc_buff_prod_flag[NUM_ME_ENC_BUFS];
   2863 
   2864     /*out buf que id for me */
   2865     WORD32 ai4_me_out_buf_id[NUM_ME_ENC_BUFS];
   2866 
   2867     /*in buf que id for enc from me*/
   2868     WORD32 i4_enc_in_buf_id[NUM_ME_ENC_BUFS];
   2869 
   2870     /* This is used to tell whether the free of recon buffers are done or not */
   2871     WORD32 i4_is_recon_free_done;
   2872 
   2873     /* index for DVSR population */
   2874     WORD32 i4_idx_dvsr_p;
   2875     WORD32 aai4_l1_pre_intra_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]
   2876                                  [(HEVCE_MAX_HEIGHT >> 1) / 8];
   2877 
   2878     WORD32 i4_rc_l0_qp;
   2879 
   2880     /* Used for mres single out cases. Checks whether a particular resolution is active or passive */
   2881     /* Only one resolution should be active for mres_single_out case */
   2882     WORD32 *pi4_active_res_id;
   2883 
   2884     /**
   2885      * Sub Pic bit control mutex lock handle
   2886      */
   2887     void *pv_sub_pic_rc_mutex_lock_hdl;
   2888 
   2889     void *pv_sub_pic_rc_for_qp_update_mutex_lock_hdl;
   2890 
   2891     WORD32 i4_encode;
   2892     WORD32 i4_in_frame_rc_enabled;
   2893     WORD32 i4_num_re_enc;
   2894 
   2895 } multi_thrd_ctxt_t;
   2896 
   2897 /**
   2898  *  @brief    Structure to describe tile params
   2899  */
   2900 typedef struct
   2901 {
   2902     /* flag to indicate tile encoding enabled/disabled */
   2903     WORD32 i4_tiles_enabled_flag;
   2904 
   2905     /* flag to indicate unifrom spacing of tiles */
   2906     WORD32 i4_uniform_spacing_flag;
   2907 
   2908     /* num tiles in a tile-row. num tiles in tile-col */
   2909     WORD32 i4_num_tile_cols;
   2910     WORD32 i4_num_tile_rows;
   2911 
   2912     /* Curr tile width and height*/
   2913     WORD32 i4_curr_tile_width;
   2914     WORD32 i4_curr_tile_height;
   2915 
   2916     /* Curr tile width and heignt in CTB units*/
   2917     WORD32 i4_curr_tile_wd_in_ctb_unit;
   2918     WORD32 i4_curr_tile_ht_in_ctb_unit;
   2919 
   2920     /* frame resolution */
   2921     //WORD32  i4_frame_width;  /* encode-width  */
   2922     //WORD32  i4_frame_height; /* encode-height */
   2923 
   2924     /* total num of tiles "in frame" */
   2925     WORD32 i4_num_tiles;
   2926 
   2927     /* Curr tile id. Assigned by raster scan order in a frame */
   2928     WORD32 i4_curr_tile_id;
   2929 
   2930     /* x-pos of first ctb of the slice in ctb */
   2931     /* y-pos of first ctb of the slice in ctb */
   2932     WORD32 i4_first_ctb_x;
   2933     WORD32 i4_first_ctb_y;
   2934 
   2935     /* x-pos of first ctb of the slice in samples */
   2936     /* y-pos of first ctb of the slice in samples */
   2937     WORD32 i4_first_sample_x;
   2938     WORD32 i4_first_sample_y;
   2939 
   2940 } ihevce_tile_params_t;
   2941 
   2942 /**
   2943 ******************************************************************************
   2944  *  @brief  Encoder context structure
   2945 ******************************************************************************
   2946  */
   2947 
   2948 typedef struct
   2949 {
   2950     /**
   2951      *  vps parameters
   2952      */
   2953     vps_t as_vps[IHEVCE_MAX_NUM_BITRATES];
   2954 
   2955     /**
   2956      *  sps parameters
   2957      */
   2958     sps_t as_sps[IHEVCE_MAX_NUM_BITRATES];
   2959 
   2960     /**
   2961      *  pps parameters
   2962      *  Required for each bitrate separately, mainly because
   2963      *  init qp etc parameters needs to be different for each instance
   2964      */
   2965     pps_t as_pps[IHEVCE_MAX_NUM_BITRATES];
   2966 
   2967     /**
   2968      * Rate control mutex lock handle
   2969      */
   2970     void *pv_rc_mutex_lock_hdl;
   2971 
   2972     /** frame level cu analyse  buffer pointer for ME
   2973      * ME will get ps_ctb_analyse structure populated with ps_cu pointers
   2974      * pointing to ps_cu_analyse buffer from IPE.
   2975       */
   2976     //cu_analyse_t       *ps_cu_analyse_inter[PING_PONG_BUF];
   2977 
   2978     /**
   2979       *  CTB frame context between encoder (producer) and entropy (consumer)
   2980       */
   2981     enc_q_ctxt_t s_enc_ques;
   2982 
   2983     /**
   2984      *  Encoder memory manager ctxt
   2985      */
   2986     enc_mem_mngr_ctxt s_mem_mngr;
   2987 
   2988     /**
   2989      * Semaphores of all the threads created in HLE
   2990      * and Que handle for buffers b/w frame process and entropy
   2991      */
   2992     thrd_que_sem_hdl_t s_thrd_sem_ctxt;
   2993 
   2994     /**
   2995      *  Reference /recon buffer Que pointer
   2996      */
   2997     recon_pic_buf_t **pps_recon_buf_q[IHEVCE_MAX_NUM_BITRATES];
   2998 
   2999     /**
   3000      * Number of buffers in Recon buffer queue
   3001      */
   3002     WORD32 ai4_num_buf_recon_q[IHEVCE_MAX_NUM_BITRATES];
   3003 
   3004     /**
   3005      * Reference / recon buffer Que pointer for Pre Encode group
   3006      * this will be just a container and no buffers will be allcoated
   3007      */
   3008     recon_pic_buf_t **pps_pre_enc_recon_buf_q;
   3009 
   3010     /**
   3011      * Number of buffers in Recon buffer queue
   3012      */
   3013     WORD32 i4_pre_enc_num_buf_recon_q;
   3014 
   3015     /**
   3016       * frame level CTB parameters and worst PU CU and TU in a CTB row
   3017       */
   3018     frm_ctb_ctxt_t s_frm_ctb_prms;
   3019 
   3020     /*
   3021      * Moudle ctxt pointers of all modules
   3022      */
   3023     module_ctxt_t s_module_ctxt;
   3024 
   3025     /*
   3026      * LAP static parameters
   3027      */
   3028     ihevce_lap_static_params_t s_lap_stat_prms;
   3029 
   3030     /*
   3031      * Run time dynamic source params
   3032      */
   3033 
   3034     ihevce_src_params_t s_runtime_src_prms;
   3035 
   3036     /*
   3037      *Target params
   3038      */
   3039     ihevce_tgt_params_t s_runtime_tgt_params;
   3040 
   3041     /*
   3042      *  Run time dynamic coding params
   3043      */
   3044     ihevce_coding_params_t s_runtime_coding_prms;
   3045 
   3046     /**
   3047      * Pointer to static config params
   3048      */
   3049     ihevce_static_cfg_params_t *ps_stat_prms;
   3050 
   3051     /**
   3052      * the following structure members used for copying recon buf info
   3053      * in case of duplicate pics
   3054      */
   3055 
   3056     /**
   3057      * Array of reference picture list for pre enc group
   3058      * Separate list for ping_pong instnaces
   3059      * 2=> ref_pic_list0 and ref_pic_list1
   3060      */
   3061     recon_pic_buf_t as_pre_enc_ref_lists[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME][2]
   3062                                         [HEVCE_MAX_REF_PICS * 2];
   3063 
   3064     /**
   3065      * Array of reference picture list for pre enc group
   3066      * Separate list for ping_pong instnaces
   3067      * 2=> ref_pic_list0 and ref_pic_list1
   3068      */
   3069     recon_pic_buf_t *aps_pre_enc_ref_lists[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME][2]
   3070                                           [HEVCE_MAX_REF_PICS * 2];
   3071 
   3072     /**
   3073      *  Number of input frames per input queue
   3074      */
   3075     WORD32 i4_num_input_buf_per_queue;
   3076 
   3077     /**
   3078      *  poc of the Clean Random Access(CRA)Ipic
   3079      */
   3080     WORD32 i4_cra_poc;
   3081 
   3082     /** Number of ref pics in list 0 for any given frame */
   3083     WORD32 i4_num_ref_l0;
   3084 
   3085     /** Number of ref pics in list 1 for any given frame */
   3086     WORD32 i4_num_ref_l1;
   3087 
   3088     /** Number of active ref pics in list 0 for cur frame */
   3089     WORD32 i4_num_ref_l0_active;
   3090 
   3091     /** Number of active ref pics in list 1 for cur frame */
   3092     WORD32 i4_num_ref_l1_active;
   3093 
   3094     /** Number of ref pics in list 0 for any given frame pre encode stage */
   3095     WORD32 i4_pre_enc_num_ref_l0;
   3096 
   3097     /** Number of ref pics in list 1 for any given frame  pre encode stage */
   3098     WORD32 i4_pre_enc_num_ref_l1;
   3099 
   3100     /** Number of active ref pics in list 0 for cur frame  pre encode stage */
   3101     WORD32 i4_pre_enc_num_ref_l0_active;
   3102 
   3103     /** Number of active ref pics in list 1 for cur frame  pre encode stage */
   3104     WORD32 i4_pre_enc_num_ref_l1_active;
   3105 
   3106     /**
   3107      *  working mem to be used for frm level activities
   3108      * One example is interplation at frame level. This requires memory
   3109      * of (max width + 16) * (max_height + 7 + 16 ) * 2 bytes.
   3110      * This is so since we generate interp output for max_width + 16 x
   3111      * max_height + 16, and then the intermediate output is 16 bit and
   3112      * is max_height + 16 + 7 rows
   3113      */
   3114     UWORD8 *pu1_frm_lvl_wkg_mem;
   3115 
   3116     /**
   3117      * Multi thread processing context
   3118      * This memory contains the variables and pointers shared across threads
   3119      * in enc-group and pre-enc-group
   3120      */
   3121     multi_thrd_ctxt_t s_multi_thrd;
   3122 
   3123     /** I/O Queues created status */
   3124     WORD32 i4_io_queues_created;
   3125 
   3126     WORD32 i4_end_flag;
   3127 
   3128     /** number of bit-rate instances running */
   3129     WORD32 i4_num_bitrates;
   3130 
   3131     /** number of enc frames running in parallel */
   3132     WORD32 i4_num_enc_loop_frm_pllel;
   3133 
   3134     /*ref bitrate id*/
   3135     WORD32 i4_ref_mbr_id;
   3136 
   3137     /* Flag to indicate app, that end of processing has reached */
   3138     WORD32 i4_frame_limit_reached;
   3139 
   3140     /*Structure to store the function selector
   3141      * pointers for common and encoder */
   3142     func_selector_t s_func_selector;
   3143 
   3144     /*ref resolution id*/
   3145     WORD32 i4_resolution_id;
   3146 
   3147     /*hle context*/
   3148     void *pv_hle_ctxt;
   3149 
   3150     rc_quant_t s_rc_quant;
   3151     /*ME cost of P pic stored for the next ref B pic*/
   3152     //LWORD64 i8_acc_me_cost_of_p_pic_for_b_pic[2];
   3153 
   3154     UWORD32 u4_cur_pic_encode_cnt;
   3155     UWORD32 u4_cur_pic_encode_cnt_dbp;
   3156     /*past 2 p pics high complexity status*/
   3157     WORD32 ai4_is_past_pic_complex[2];
   3158 
   3159     WORD32 i4_is_I_reset_done;
   3160     WORD32 i4_past_RC_reset_count;
   3161 
   3162     WORD32 i4_future_RC_reset;
   3163 
   3164     WORD32 i4_past_RC_scd_reset_count;
   3165 
   3166     WORD32 i4_future_RC_scd_reset;
   3167     WORD32 i4_poc_reset_values;
   3168 
   3169     /*Place holder to store the length of LAP in first pass*/
   3170     /** Number of frames to look-ahead for RC by -
   3171      * counts 2 fields as one frame for interlaced
   3172      */
   3173     WORD32 i4_look_ahead_frames_in_first_pass;
   3174 
   3175     WORD32 ai4_mod_factor_derived_by_variance[2];
   3176     float f_strength;
   3177 
   3178     /*for B frames use the avg activity
   3179     from the layer 0 (I or P) which is the average over
   3180     Lap2 window*/
   3181     LWORD64 ai8_lap2_8x8_avg_act_from_T0[2];
   3182 
   3183     LWORD64 ai8_lap2_16x16_avg_act_from_T0[3];
   3184 
   3185     LWORD64 ai8_lap2_32x32_avg_act_from_T0[3];
   3186 
   3187     /*for B frames use the log of avg activity
   3188     from the layer 0 (I or P) which is the average over
   3189     Lap2 window*/
   3190     long double ald_lap2_8x8_log_avg_act_from_T0[2];
   3191 
   3192     long double ald_lap2_16x16_log_avg_act_from_T0[3];
   3193 
   3194     long double ald_lap2_32x32_log_avg_act_from_T0[3];
   3195 
   3196     ihevce_tile_params_t *ps_tile_params_base;
   3197 
   3198     WORD32 ai4_column_width_array[MAX_TILE_COLUMNS];
   3199 
   3200     WORD32 ai4_row_height_array[MAX_TILE_ROWS];
   3201 
   3202     /* Architecture */
   3203     IV_ARCH_T e_arch_type;
   3204 
   3205     UWORD8 u1_is_popcnt_available;
   3206 
   3207     WORD32 i4_active_scene_num;
   3208 
   3209     WORD32 i4_max_fr_enc_loop_parallel_rc;
   3210     WORD32 ai4_rc_query[IHEVCE_MAX_NUM_BITRATES];
   3211     WORD32 i4_active_enc_frame_id;
   3212 
   3213     /**
   3214     * LAP interface ctxt pointer
   3215     */
   3216     void *pv_lap_interface_ctxt;
   3217 
   3218     /* If enable, enables blu ray compatibility of op*/
   3219     WORD32 i4_blu_ray_spec;
   3220 
   3221 } enc_ctxt_t;
   3222 
   3223 /**
   3224 ******************************************************************************
   3225 *  @brief  This struct contains the inter CTB params needed for the decision
   3226 *   of the best inter CU results
   3227 ******************************************************************************
   3228 */
   3229 typedef struct
   3230 {
   3231     hme_pred_buf_mngr_t s_pred_buf_mngr;
   3232 
   3233     /** X and y offset of ctb w.r.t. start of pic */
   3234     WORD32 i4_ctb_x_off;
   3235     WORD32 i4_ctb_y_off;
   3236 
   3237     /**
   3238      * Pred buffer ptr, updated inside subpel refinement process. This
   3239      * location passed to the leaf fxn for copying the winner pred buf
   3240      */
   3241     UWORD8 **ppu1_pred;
   3242 
   3243     /** Working mem passed to leaf fxns */
   3244     UWORD8 *pu1_wkg_mem;
   3245 
   3246     /** prediction buffer stride fo rleaf fxns to copy the pred winner buf */
   3247     WORD32 i4_pred_stride;
   3248 
   3249     /** Stride of input buf, updated inside subpel fxn */
   3250     WORD32 i4_inp_stride;
   3251 
   3252     /** stride of recon buffer */
   3253     WORD32 i4_rec_stride;
   3254 
   3255     /** Indicates if bi dir is enabled or not */
   3256     WORD32 i4_bidir_enabled;
   3257 
   3258     /**
   3259      * Total number of references of current picture which is enocded
   3260      */
   3261     UWORD8 u1_num_ref;
   3262 
   3263     /** Recon Pic buffer pointers for L0 list */
   3264     recon_pic_buf_t **pps_rec_list_l0;
   3265 
   3266     /** Recon Pic buffer pointers for L1 list */
   3267     recon_pic_buf_t **pps_rec_list_l1;
   3268 
   3269     /**
   3270      * These pointers point to modified input, one each for one ref idx.
   3271      * Instead of weighting the reference, we weight the input with inverse
   3272      * wt and offset for list 0 and list 1.
   3273      */
   3274     UWORD8 *apu1_wt_inp[2][MAX_NUM_REF];
   3275 
   3276     /* Since ME uses weighted inputs, we use reciprocal of the actual weights */
   3277     /* that are signaled in the bitstream */
   3278     WORD32 *pi4_inv_wt;
   3279     WORD32 *pi4_inv_wt_shift_val;
   3280 
   3281     /* Map between L0 Reference indices and LC indices */
   3282     WORD8 *pi1_past_list;
   3283 
   3284     /* Map between L1 Reference indices and LC indices */
   3285     WORD8 *pi1_future_list;
   3286 
   3287     /**
   3288      * Points to the non-weighted input data for the current CTB
   3289      */
   3290     UWORD8 *pu1_non_wt_inp;
   3291 
   3292     /**
   3293      * Store the pred lambda and lamda_qshifts for all the reference indices
   3294      */
   3295     WORD32 i4_lamda;
   3296 
   3297     UWORD8 u1_lamda_qshift;
   3298 
   3299     WORD32 wpred_log_wdc;
   3300 
   3301     /**
   3302      * Number of active references in l0
   3303      */
   3304     UWORD8 u1_num_active_ref_l0;
   3305 
   3306     /**
   3307      * Number of active references in l1
   3308      */
   3309     UWORD8 u1_num_active_ref_l1;
   3310 
   3311     /** The max_depth for inter tu_tree */
   3312     UWORD8 u1_max_tr_depth;
   3313 
   3314     /** Quality Preset */
   3315     WORD8 i1_quality_preset;
   3316 
   3317     /** SATD or SAD */
   3318     UWORD8 u1_use_satd;
   3319 
   3320     /* Frame level QP */
   3321     WORD32 i4_qstep_ls8;
   3322 
   3323     /* Pointer to an array of PU level src variances */
   3324     UWORD32 *pu4_src_variance;
   3325 
   3326     WORD32 i4_alpha_stim_multiplier;
   3327 
   3328     UWORD8 u1_is_cu_noisy;
   3329 
   3330     ULWORD64 *pu8_part_src_sigmaX;
   3331 
   3332     ULWORD64 *pu8_part_src_sigmaXSquared;
   3333 
   3334     UWORD8 u1_max_2nx2n_tu_recur_cands;
   3335 
   3336 } inter_ctb_prms_t;
   3337 
   3338 /*****************************************************************************/
   3339 /* Extern Variable Declarations                                              */
   3340 /*****************************************************************************/
   3341 extern const double lamda_modifier_for_I_pic[8];
   3342 
   3343 /*****************************************************************************/
   3344 /* Extern Function Declarations                                              */
   3345 /*****************************************************************************/
   3346 
   3347 #endif /* _IHEVCE_ENC_STRUCTS_H_ */
   3348