Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 /**
     21 ******************************************************************************
     22 * @file ihevce_cabac_tu.c
     23 *
     24 * @brief
     25 *  This file contains function definitions for cabac entropy coding of
     26 *  transform units of HEVC syntax
     27 *
     28 * @author
     29 *  ittiam
     30 *
     31 * @List of Functions
     32 *  ihevce_cabac_encode_qp_delta()
     33 *  ihevce_cabac_encode_last_coeff_x_y()
     34 *  ihevce_encode_transform_tree()
     35 *  ihevce_cabac_residue_encode()
     36 *  ihevce_cabac_residue_encode_rdopt()
     37 *  ihevce_cabac_residue_encode_rdoq()
     38 *  ihevce_code_all_sig_coeffs_as_0_explicitly()
     39 *  ihevce_find_new_last_csb()
     40 *  ihevce_copy_backup_ctxt()
     41 *  ihevce_estimate_num_bits_till_next_non_zero_coeff()
     42 *
     43 ******************************************************************************
     44 */
     45 
     46 /*****************************************************************************/
     47 /* File Includes                                                             */
     48 /*****************************************************************************/
     49 
     50 /* System include files */
     51 #include <stdio.h>
     52 #include <string.h>
     53 #include <stdlib.h>
     54 #include <assert.h>
     55 #include <stdarg.h>
     56 #include <math.h>
     57 
     58 /* User include files */
     59 #include "ihevc_typedefs.h"
     60 #include "itt_video_api.h"
     61 #include "ihevce_api.h"
     62 
     63 #include "rc_cntrl_param.h"
     64 #include "rc_frame_info_collector.h"
     65 #include "rc_look_ahead_params.h"
     66 
     67 #include "ihevc_defs.h"
     68 #include "ihevc_structs.h"
     69 #include "ihevc_platform_macros.h"
     70 #include "ihevc_deblk.h"
     71 #include "ihevc_itrans_recon.h"
     72 #include "ihevc_chroma_itrans_recon.h"
     73 #include "ihevc_chroma_intra_pred.h"
     74 #include "ihevc_intra_pred.h"
     75 #include "ihevc_inter_pred.h"
     76 #include "ihevc_mem_fns.h"
     77 #include "ihevc_padding.h"
     78 #include "ihevc_weighted_pred.h"
     79 #include "ihevc_sao.h"
     80 #include "ihevc_resi_trans.h"
     81 #include "ihevc_quant_iquant_ssd.h"
     82 #include "ihevc_cabac_tables.h"
     83 #include "ihevc_trans_macros.h"
     84 #include "ihevc_trans_tables.h"
     85 
     86 #include "ihevce_defs.h"
     87 #include "ihevce_lap_enc_structs.h"
     88 #include "ihevce_multi_thrd_structs.h"
     89 #include "ihevce_me_common_defs.h"
     90 #include "ihevce_had_satd.h"
     91 #include "ihevce_error_codes.h"
     92 #include "ihevce_bitstream.h"
     93 #include "ihevce_cabac.h"
     94 #include "ihevce_rdoq_macros.h"
     95 #include "ihevce_function_selector.h"
     96 #include "ihevce_enc_structs.h"
     97 #include "ihevce_entropy_structs.h"
     98 #include "ihevce_cmn_utils_instr_set_router.h"
     99 #include "ihevce_enc_loop_structs.h"
    100 #include "ihevce_bs_compute_ctb.h"
    101 #include "ihevce_global_tables.h"
    102 #include "ihevce_common_utils.h"
    103 #include "ihevce_trace.h"
    104 
    105 /*****************************************************************************/
    106 /* Globals                                                                   */
    107 /*****************************************************************************/
    108 extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
    109 
    110 /**
    111 ******************************************************************************
    112 * @brief  LUT for deriving of last significant coeff prefix.
    113 *
    114 * @input   : last_significant_coeff
    115 *
    116 * @output  : last_significant_prefix (does not include the
    117 *
    118 * @remarks Look up tables taken frm HM-8.0-dev
    119 ******************************************************************************
    120 */
    121 const UWORD8 gu1_hevce_last_coeff_prefix[32] = { 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
    122                                                  8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9 };
    123 
    124 /**
    125 *****************************************************************************
    126 * @brief  LUT for deriving of last significant coeff suffix
    127 *
    128 * @input   : last significant prefix
    129 *
    130 * @output  : prefix code that needs to be subtracted from last_pos to get
    131 *           suffix as per equation 7-55 in section 7.4.12.
    132 *
    133 *           It returns the following code for last_significant_prefix > 3
    134 *            ((1 << ((last_significant_coeff_x_prefix >> 1) - 1))  *
    135 *            (2 + (last_significant_coeff_x_prefix & 1))
    136 *
    137 *
    138 * @remarks Look up tables taken frm HM-8.0-dev
    139 *****************************************************************************
    140 */
    141 const UWORD8 gu1_hevce_last_coeff_prefix_code[10] = { 0, 1, 2, 3, 4, 6, 8, 12, 16, 24 };
    142 
    143 /**
    144 *****************************************************************************
    145 * @brief  returns raster index of 4x4 block for diag up-right/horz/vert scans
    146 *
    147 * @input   : scan type and scan idx
    148 *
    149 * @output  : packed y pos(msb 4bit) and x pos(lsb 2bit)
    150 *
    151 *****************************************************************************
    152 */
    153 const UWORD8 gu1_hevce_scan4x4[3][16] = {
    154     /* diag up right */
    155     { 0, 4, 1, 8, 5, 2, 12, 9, 6, 3, 13, 10, 7, 14, 11, 15 },
    156 
    157     /* horz */
    158     { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
    159 
    160     /* vert */
    161     { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }
    162 };
    163 
    164 /**
    165 *****************************************************************************
    166 * @brief  returns context increment for sig coeff based on csbf neigbour
    167 *         flags (bottom and right) and current coeff postion in 4x4 block
    168 *         See section 9.3.3.1.4 for details on this context increment
    169 *
    170 * @input   : neigbour csbf flags(bit0:rightcsbf, bit1:bottom csbf)
    171 *           coeff idx in raster order (0-15)
    172 *
    173 * @output  : context increment for sig coeff flag
    174 *
    175 *****************************************************************************
    176 */
    177 const UWORD8 gu1_hevce_sigcoeff_ctxtinc[4][16] = {
    178     /* nbr csbf = 0:  sigCtx = (xP+yP == 0) ? 2 : (xP+yP < 3) ? 1: 0 */
    179     { 2, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
    180 
    181     /* nbr csbf = 1:  sigCtx = (yP == 0) ? 2 : (yP == 1) ? 1: 0      */
    182     { 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
    183 
    184     /* nbr csbf = 2:  sigCtx = (xP == 0) ? 2 : (xP == 1) ? 1: 0      */
    185     { 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0 },
    186 
    187     /* nbr csbf = 3:  sigCtx = 2                                     */
    188     { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }
    189 };
    190 
    191 const UWORD8 gu1_hevce_sigcoeff_ctxtinc_00[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
    192 
    193 /**
    194 *****************************************************************************
    195 * @brief  returns context increment for sig coeff for 4x4 tranform size as
    196 *         per Table 9-39 in section 9.3.3.1.4
    197 *
    198 * @input   : coeff idx in raster order (0-15)
    199 *
    200 * @output  : context increment for sig coeff flag
    201 *
    202 *****************************************************************************
    203 */
    204 const UWORD8 gu1_hevce_sigcoeff_ctxtinc_tr4[16] = { 0, 1, 4, 5, 2, 3, 4, 5, 6, 6, 8, 8, 7, 7, 8, 0 };
    205 
    206 #define DISABLE_ZCSBF 0
    207 
    208 #define TEST_CABAC_BITESTIMATE 0
    209 
    210 /*****************************************************************************/
    211 /* Function Definitions                                                      */
    212 /*****************************************************************************/
    213 /**
    214 ******************************************************************************
    215 *
    216 *  @brief Entropy encoding of qp_delta in a tu as per sec 9.3.2 Table 9-32
    217 *
    218 *  @par   Description
    219 *  trunacted unary binarization is done based upto abs_delta of 5 and the rest
    220 *  is coded as 0th order Exponential Golomb code
    221 *
    222 *  @param[inout]   ps_cabac
    223 *  pointer to cabac encoding context (handle)
    224 *
    225 *  @param[in]      qp_delta
    226 *  delta qp that needs to be encoded
    227 *
    228 *  @return      success or failure error code
    229 *
    230 ******************************************************************************
    231 */
    232 WORD32 ihevce_cabac_encode_qp_delta(cab_ctxt_t *ps_cabac, WORD32 qp_delta)
    233 {
    234     WORD32 qp_delta_abs = ABS(qp_delta);
    235     WORD32 c_max = TU_MAX_QP_DELTA_ABS;
    236     WORD32 ctxt_inc = IHEVC_CAB_QP_DELTA_ABS;
    237     WORD32 ctxt_inc_max = CTXT_MAX_QP_DELTA_ABS;
    238     WORD32 ret = IHEVCE_SUCCESS;
    239 
    240     /* qp_delta_abs is coded as combination of tunary and eg0 code  */
    241     /* See Table 9-32 and Table 9-37 for details on cu_qp_delta_abs */
    242     ret |= ihevce_cabac_encode_tunary(
    243         ps_cabac, MIN(qp_delta_abs, c_max), c_max, ctxt_inc, 0, ctxt_inc_max);
    244     if(qp_delta_abs >= c_max)
    245     {
    246         ret |= ihevce_cabac_encode_egk(ps_cabac, qp_delta_abs - c_max, 0);
    247     }
    248     AEV_TRACE("cu_qp_delta_abs", qp_delta_abs, ps_cabac->u4_range);
    249 
    250     /* code the qp delta sign flag */
    251     if(qp_delta_abs)
    252     {
    253         WORD32 sign = (qp_delta < 0) ? 1 : 0;
    254         ret |= ihevce_cabac_encode_bypass_bin(ps_cabac, sign);
    255         AEV_TRACE("cu_qp_delta_sign", sign, ps_cabac->u4_range);
    256     }
    257 
    258     return (ret);
    259 }
    260 
    261 /**
    262 ******************************************************************************
    263 *
    264 *  @brief Encodes position of the last coded coeff (in scan order) of TU
    265 *
    266 *  @par   Description
    267 *  Entropy encode of last coded coeff of a TU as per section:7.3.13
    268 *
    269 *  @param[inout]   ps_cabac
    270 *  pointer to cabac context (handle)
    271 *
    272 *  @param[in]      last_coeff_x
    273 *  x co-ordinate of the last coded coeff of TU(in scan order)
    274 *
    275 *  @param[in]      last_coeff_y
    276 *  x co-ordinate of the last coded coeff of TU (in scan order
    277 *
    278 *  @param[in]      log2_tr_size
    279 *  transform block size corresponding to this node in quad tree
    280 *
    281 *  @param[in]      is_luma
    282 *  indicates if residual block corresponds to luma or chroma block
    283 *
    284 *  @return      success or failure error code
    285 *
    286 ******************************************************************************
    287 */
    288 WORD32 ihevce_cabac_encode_last_coeff_x_y(
    289     cab_ctxt_t *ps_cabac,
    290     WORD32 last_coeff_x,
    291     WORD32 last_coeff_y,
    292     WORD32 log2_tr_size,
    293     WORD32 is_luma)
    294 {
    295     WORD32 ret = IHEVCE_SUCCESS;
    296 
    297     WORD32 last_coeff_x_prefix;
    298     WORD32 last_coeff_y_prefix;
    299     WORD32 suffix, suf_length;
    300     WORD32 c_max;
    301     WORD32 ctxt_idx_x, ctxt_idx_y, ctx_shift;
    302 
    303     /* derive the prefix code */
    304     last_coeff_x_prefix = gu1_hevce_last_coeff_prefix[last_coeff_x];
    305     last_coeff_y_prefix = gu1_hevce_last_coeff_prefix[last_coeff_y];
    306 
    307     c_max = gu1_hevce_last_coeff_prefix[(1 << log2_tr_size) - 1];
    308 
    309     /* context increment as per section 9.3.3.1.2 */
    310     if(is_luma)
    311     {
    312         WORD32 ctx_offset = (3 * (log2_tr_size - 2)) + ((log2_tr_size - 1) >> 2);
    313 
    314         ctxt_idx_x = IHEVC_CAB_COEFFX_PREFIX + ctx_offset;
    315         ctxt_idx_y = IHEVC_CAB_COEFFY_PREFIX + ctx_offset;
    316         ctx_shift = (log2_tr_size + 1) >> 2;
    317     }
    318     else
    319     {
    320         ctxt_idx_x = IHEVC_CAB_COEFFX_PREFIX + 15;
    321         ctxt_idx_y = IHEVC_CAB_COEFFY_PREFIX + 15;
    322         ctx_shift = log2_tr_size - 2;
    323     }
    324 
    325     /* code the last_coeff_x_prefix as tunary binarized code */
    326     ret |= ihevce_cabac_encode_tunary(
    327         ps_cabac, last_coeff_x_prefix, c_max, ctxt_idx_x, ctx_shift, c_max);
    328 
    329     AEV_TRACE("last_coeff_x_prefix", last_coeff_x_prefix, ps_cabac->u4_range);
    330 
    331     /* code the last_coeff_y_prefix as tunary binarized code */
    332     ret |= ihevce_cabac_encode_tunary(
    333         ps_cabac, last_coeff_y_prefix, c_max, ctxt_idx_y, ctx_shift, c_max);
    334 
    335     AEV_TRACE("last_coeff_y_prefix", last_coeff_y_prefix, ps_cabac->u4_range);
    336 
    337     if(last_coeff_x_prefix > 3)
    338     {
    339         /* code the last_coeff_x_suffix as FLC bypass code */
    340         suffix = last_coeff_x - gu1_hevce_last_coeff_prefix_code[last_coeff_x_prefix];
    341 
    342         suf_length = ((last_coeff_x_prefix - 2) >> 1);
    343 
    344         ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, suffix, suf_length);
    345 
    346         AEV_TRACE("last_coeff_x_suffix", suffix, ps_cabac->u4_range);
    347     }
    348 
    349     if(last_coeff_y_prefix > 3)
    350     {
    351         /* code the last_coeff_y_suffix as FLC bypass code */
    352         suffix = last_coeff_y - gu1_hevce_last_coeff_prefix_code[last_coeff_y_prefix];
    353 
    354         suf_length = ((last_coeff_y_prefix - 2) >> 1);
    355 
    356         ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, suffix, suf_length);
    357 
    358         AEV_TRACE("last_coeff_y_suffix", suffix, ps_cabac->u4_range);
    359     }
    360 
    361     return (ret);
    362 }
    363 
    364 /**
    365 ******************************************************************************
    366 *
    367 *  @brief Encodes a transform tree as per section 7.3.11
    368 *
    369 *  @par   Description
    370 *  Uses recursion till a leaf node is reached where a transform unit
    371 *  is coded. While recursing split_transform_flag and parent chroma cbf flags
    372 *  are coded before recursing to leaf node
    373 *
    374 *  @param[inout]   ps_entropy_ctxt
    375 *  pointer to entropy context (handle)
    376 *
    377 *  @param[in]      x0_ctb
    378 *  x co-ordinate w.r.t ctb start of current tu node of coding tree
    379 *
    380 *  @param[in]      y0_ctb
    381 *  y co-ordinate w.r.t ctb start of current cu node of coding tree
    382 *
    383 *  @param[in]      log2_tr_size
    384 *  transform block size corresponding to this node in quad tree
    385 *
    386 *  @param[in]      tr_depth
    387 *  current depth of the tree
    388 *
    389 *  @param[in]      tr_depth
    390 *  current depth of the tree
    391 *
    392 *  @param[in]      blk_num
    393 *  current block number in the quad tree (required for chorma 4x4 coding)
    394 *
    395 *  @return      success or failure error code
    396 *
    397 ******************************************************************************
    398 */
    399 WORD32 ihevce_encode_transform_tree(
    400     entropy_context_t *ps_entropy_ctxt,
    401     WORD32 x0_ctb,
    402     WORD32 y0_ctb,
    403     WORD32 log2_tr_size,
    404     WORD32 tr_depth,
    405     WORD32 blk_num,
    406     cu_enc_loop_out_t *ps_enc_cu)
    407 {
    408     WORD32 ret = IHEVCE_SUCCESS;
    409     sps_t *ps_sps = ps_entropy_ctxt->ps_sps;
    410     WORD32 split_tr_flag;
    411 
    412     WORD32 tu_idx = ps_entropy_ctxt->i4_tu_idx;
    413     tu_enc_loop_out_t *ps_enc_tu = ps_enc_cu->ps_enc_tu + tu_idx;
    414 
    415     /* TU size in pels */
    416     WORD32 tu_size = 4 << ps_enc_tu->s_tu.b3_size;
    417 
    418     cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
    419 
    420     WORD32 max_tr_depth;
    421     WORD32 is_intra = (ps_enc_cu->b1_pred_mode_flag == PRED_MODE_INTRA);
    422     WORD32 log2_min_trafo_size, log2_max_trafo_size;
    423     UWORD32 u4_bits_estimated_prev;
    424 
    425     WORD32 intra_nxn_pu = 0;
    426     WORD32 ctxt_inc;
    427     WORD32 cbf_luma = 0;
    428     WORD32 ai4_cbf_cb[2] = { 0, 0 };
    429     WORD32 ai4_cbf_cr[2] = { 0, 0 };
    430     UWORD32 tu_split_bits = 0;
    431     UWORD8 u1_is_422 = (ps_sps->i1_chroma_format_idc == 2);
    432 
    433     tu_split_bits = ps_cabac->u4_bits_estimated_q12;
    434     /* intialize min / max transform sizes based on sps */
    435     log2_min_trafo_size = ps_sps->i1_log2_min_transform_block_size;
    436 
    437     log2_max_trafo_size = log2_min_trafo_size + ps_sps->i1_log2_diff_max_min_transform_block_size;
    438 
    439     /* intialize max transform depth for intra / inter signalled in sps */
    440     if(is_intra)
    441     {
    442         max_tr_depth = ps_sps->i1_max_transform_hierarchy_depth_intra;
    443         intra_nxn_pu = ps_enc_cu->b3_part_mode == PART_NxN;
    444     }
    445     else
    446     {
    447         max_tr_depth = ps_sps->i1_max_transform_hierarchy_depth_inter;
    448     }
    449 
    450     /* Sanity checks */
    451     ASSERT(tr_depth <= 4);
    452     ASSERT(log2_min_trafo_size >= 2);
    453     ASSERT(log2_max_trafo_size <= 5);
    454     ASSERT((tu_idx >= 0) && (tu_idx < ps_enc_cu->u2_num_tus_in_cu));
    455     ASSERT((tu_size >= 4) && (tu_size <= (1 << log2_tr_size)));
    456 
    457     /* Encode split transform flag based on following conditions; sec 7.3.11 */
    458     if((log2_tr_size <= log2_max_trafo_size) && (log2_tr_size > log2_min_trafo_size) &&
    459        (tr_depth < max_tr_depth) && (!(intra_nxn_pu && (tr_depth == 0))))
    460     {
    461         /* encode the split transform flag, context derived as per Table9-37 */
    462         ctxt_inc = IHEVC_CAB_SPLIT_TFM + (5 - log2_tr_size);
    463 
    464         /* split if actual tu size is smaller than target tu size */
    465         split_tr_flag = tu_size < (1 << log2_tr_size);
    466         u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
    467         ret |= ihevce_cabac_encode_bin(ps_cabac, split_tr_flag, ctxt_inc);
    468 
    469         if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
    470         {  // clang-format off
    471             /*PIC INFO : populate cu split flag*/
    472             ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_split_tu_flag +=
    473                 (ps_cabac->u4_bits_estimated_q12 - u4_bits_estimated_prev);
    474         }  // clang-format on
    475 
    476         AEV_TRACE("split_transform_flag", split_tr_flag, ps_cabac->u4_range);
    477     }
    478     else
    479     {
    480         WORD32 inter_split;
    481         /*********************************************************************/
    482         /*                                                                   */
    483         /* split tr is implicitly derived as 1 if  (see section 7.4.10)      */
    484         /*  a. log2_tr_size > log2_max_trafo_size                            */
    485         /*  b. intra cu has NXN pu                                           */
    486         /*  c. inter cu is not 2Nx2N && max_transform_hierarchy_depth_inter=0*/
    487         /*                                                                   */
    488         /* split tu is implicitly derived as 0 otherwise                     */
    489         /*********************************************************************/
    490         inter_split = (!is_intra) && (max_tr_depth == 0) && (tr_depth == 0) &&
    491                       (ps_enc_cu->b3_part_mode != PART_2Nx2N);
    492 
    493         if((log2_tr_size > log2_max_trafo_size) || (intra_nxn_pu && (tr_depth == 0)) ||
    494            (inter_split))
    495         {
    496             split_tr_flag = 1;
    497         }
    498         else
    499         {
    500             split_tr_flag = 0;
    501         }
    502     }
    503     /*accumulate only tu tree bits*/
    504     ps_cabac->u4_true_tu_split_flag_q12 += ps_cabac->u4_bits_estimated_q12 - tu_split_bits;
    505 
    506     /* Encode the cbf flags for chroma before the split as per sec 7.3.11   */
    507     if(log2_tr_size > 2)
    508     {
    509         /* encode the cbf cb, context derived as per Table 9-37 */
    510         ctxt_inc = IHEVC_CAB_CBCR_IDX + tr_depth;
    511 
    512         /* Note chroma cbf is coded for depth=0 or if parent cbf was coded */
    513         if((tr_depth == 0) || (ps_entropy_ctxt->apu1_cbf_cb[0][tr_depth - 1]) ||
    514            (ps_entropy_ctxt->apu1_cbf_cb[1][tr_depth - 1]))
    515         {
    516 #if CABAC_BIT_EFFICIENT_CHROMA_PARENT_CBF
    517             /*************************************************************/
    518             /* Bit-Efficient chroma cbf signalling                       */
    519             /* if children nodes have 0 cbf parent cbf can be coded as 0 */
    520             /* peeking through all the child nodes for cb to check if    */
    521             /* parent can be coded as 0                                  */
    522             /*************************************************************/
    523             WORD32 tu_cnt = 0;
    524             while(1)
    525             {
    526                 WORD32 trans_size = 1 << (ps_enc_tu[tu_cnt].s_tu.b3_size + 2);
    527                 WORD32 tu_x = (ps_enc_tu[tu_cnt].s_tu.b4_pos_x << 2);
    528                 WORD32 tu_y = (ps_enc_tu[tu_cnt].s_tu.b4_pos_y << 2);
    529 
    530                 ASSERT(tu_cnt < ps_enc_cu->u2_num_tus_in_cu);
    531 
    532                 if((ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf) || (ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf_subtu1))
    533                 {
    534                     ai4_cbf_cb[0] = ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf;
    535                     ai4_cbf_cb[1] = ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf_subtu1;
    536                     break;
    537                 }
    538 
    539                 /* 8x8 parent has only one 4x4 valid chroma block for 420 */
    540                 if(3 == log2_tr_size)
    541                     break;
    542 
    543                 if((tu_x + trans_size == (x0_ctb + (1 << log2_tr_size))) &&
    544                    (tu_y + trans_size == (y0_ctb + (1 << log2_tr_size))))
    545                 {
    546                     ai4_cbf_cb[0] = ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf;
    547                     ai4_cbf_cb[1] = ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf_subtu1;
    548                     ASSERT(
    549                         (0 == ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf) &&
    550                         (0 == ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf_subtu1));
    551                     break;
    552                 }
    553 
    554                 tu_cnt++;
    555             }
    556 #else
    557             /* read cbf only when split is 0 (child node) else force cbf=1 */
    558             ai4_cbf_cb[0] = (split_tr_flag && (log2_tr_size > 3)) ? 1 : ps_enc_tu->s_tu.b1_cb_cbf;
    559             ai4_cbf_cb[1] =
    560                 (split_tr_flag && (log2_tr_size > 3)) ? 1 : ps_enc_tu->s_tu.b1_cb_cbf_subtu1;
    561 
    562 #endif
    563             if((u1_is_422) && ((!split_tr_flag) || (3 == log2_tr_size)))
    564             {
    565                 u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
    566                 ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cb[0], ctxt_inc);
    567 
    568                 if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
    569                 {  // clang-format off
    570                     /*PIC INFO : Populate CBF cr bits*/
    571                     ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
    572                         (ps_cabac->u4_bits_estimated_q12 -
    573                             u4_bits_estimated_prev);
    574                 }  // clang-format on
    575 
    576                 AEV_TRACE("cbf_cb", ai4_cbf_cb[0], ps_cabac->u4_range);
    577 
    578                 u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
    579                 ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cb[1], ctxt_inc);
    580 
    581                 if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
    582                 {  // clang-format off
    583                     /*PIC INFO : Populate CBF cr bits*/
    584                     ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
    585                         (ps_cabac->u4_bits_estimated_q12 -
    586                             u4_bits_estimated_prev);
    587                 }  // clang-format on
    588 
    589                 AEV_TRACE("cbf_cb", ai4_cbf_cb[1], ps_cabac->u4_range);
    590             }
    591             else
    592             {
    593                 u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
    594                 ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cb[0] || ai4_cbf_cb[1], ctxt_inc);
    595 
    596                 if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
    597                 {  // clang-format off
    598                     /*PIC INFO : Populate CBF cr bits*/
    599                     ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
    600                         (ps_cabac->u4_bits_estimated_q12 -
    601                             u4_bits_estimated_prev);
    602                 }  // clang-format on
    603 
    604                 AEV_TRACE("cbf_cb", ai4_cbf_cb[0] || ai4_cbf_cb[1], ps_cabac->u4_range);
    605             }
    606         }
    607         else
    608         {
    609             ai4_cbf_cb[0] = ps_entropy_ctxt->apu1_cbf_cb[0][tr_depth - 1];
    610             ai4_cbf_cb[1] = ps_entropy_ctxt->apu1_cbf_cb[1][tr_depth - 1];
    611         }
    612 
    613         if((tr_depth == 0) || (ps_entropy_ctxt->apu1_cbf_cr[0][tr_depth - 1]) ||
    614            (ps_entropy_ctxt->apu1_cbf_cr[1][tr_depth - 1]))
    615         {
    616 #if CABAC_BIT_EFFICIENT_CHROMA_PARENT_CBF
    617             /*************************************************************/
    618             /* Bit-Efficient chroma cbf signalling                       */
    619             /* if children nodes have 0 cbf parent cbf can be coded as 0 */
    620             /* peeking through all the child nodes for cr to check if    */
    621             /* parent can be coded as 0                                  */
    622             /*************************************************************/
    623             WORD32 tu_cnt = 0;
    624             while(1)
    625             {
    626                 WORD32 trans_size = 1 << (ps_enc_tu[tu_cnt].s_tu.b3_size + 2);
    627                 WORD32 tu_x = (ps_enc_tu[tu_cnt].s_tu.b4_pos_x << 2);
    628                 WORD32 tu_y = (ps_enc_tu[tu_cnt].s_tu.b4_pos_y << 2);
    629 
    630                 ASSERT(tu_cnt < ps_enc_cu->u2_num_tus_in_cu);
    631 
    632                 if((ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf) || (ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf_subtu1))
    633                 {
    634                     ai4_cbf_cr[0] = ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf;
    635                     ai4_cbf_cr[1] = ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf_subtu1;
    636                     break;
    637                 }
    638 
    639                 /* 8x8 parent has only one 4x4 valid chroma block for 420 */
    640                 if(3 == log2_tr_size)
    641                     break;
    642 
    643                 if((tu_x + trans_size == (x0_ctb + (1 << log2_tr_size))) &&
    644                    (tu_y + trans_size == (y0_ctb + (1 << log2_tr_size))))
    645                 {
    646                     ai4_cbf_cr[0] = ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf;
    647                     ai4_cbf_cr[1] = ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf_subtu1;
    648                     ASSERT(
    649                         (0 == ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf) &&
    650                         (0 == ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf_subtu1));
    651                     break;
    652                 }
    653 
    654                 tu_cnt++;
    655             }
    656 #else
    657             /* read cbf only when split is 0 (child node) else force cbf=1 */
    658             ai4_cbf_cr[0] = (split_tr_flag && (log2_tr_size > 3)) ? 1 : ps_enc_tu->s_tu.b1_cr_cbf;
    659             ai4_cbf_cr[1] =
    660                 (split_tr_flag && (log2_tr_size > 3)) ? 1 : ps_enc_tu->s_tu.b1_cr_cbf_subtu1;
    661 #endif
    662 
    663             if((u1_is_422) && ((!split_tr_flag) || (3 == log2_tr_size)))
    664             {
    665                 u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
    666                 ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cr[0], ctxt_inc);
    667 
    668                 if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
    669                 {  // clang-format off
    670                     /*PIC INFO : Populate CBF cr bits*/
    671                     ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
    672                         (ps_cabac->u4_bits_estimated_q12 -
    673                             u4_bits_estimated_prev);
    674                 }  // clang-format on
    675 
    676                 AEV_TRACE("cbf_cr", ai4_cbf_cr[0], ps_cabac->u4_range);
    677 
    678                 u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
    679                 ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cr[1], ctxt_inc);
    680 
    681                 if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
    682                 {  // clang-format off
    683                     /*PIC INFO : Populate CBF cr bits*/
    684                     ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
    685                         (ps_cabac->u4_bits_estimated_q12 -
    686                             u4_bits_estimated_prev);
    687                 }  // clang-format on
    688 
    689                 AEV_TRACE("cbf_cr", ai4_cbf_cr[1], ps_cabac->u4_range);
    690             }
    691             else
    692             {
    693                 u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
    694                 ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cr[0] || ai4_cbf_cr[1], ctxt_inc);
    695 
    696                 if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
    697                 {  // clang-format off
    698                     /*PIC INFO : Populate CBF cr bits*/
    699                     ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
    700                         (ps_cabac->u4_bits_estimated_q12 -
    701                             u4_bits_estimated_prev);
    702                 }  // clang-format on
    703 
    704                 AEV_TRACE("cbf_cr", ai4_cbf_cr[0] || ai4_cbf_cr[1], ps_cabac->u4_range);
    705             }
    706         }
    707         else
    708         {
    709             ai4_cbf_cr[0] = ps_entropy_ctxt->apu1_cbf_cr[0][tr_depth - 1];
    710             ai4_cbf_cr[1] = ps_entropy_ctxt->apu1_cbf_cr[1][tr_depth - 1];
    711         }
    712 
    713         ps_entropy_ctxt->apu1_cbf_cb[0][tr_depth] = ai4_cbf_cb[0];
    714         ps_entropy_ctxt->apu1_cbf_cr[0][tr_depth] = ai4_cbf_cr[0];
    715         ps_entropy_ctxt->apu1_cbf_cb[1][tr_depth] = ai4_cbf_cb[1];
    716         ps_entropy_ctxt->apu1_cbf_cr[1][tr_depth] = ai4_cbf_cr[1];
    717     }
    718     else
    719     {
    720         ai4_cbf_cb[0] = ps_entropy_ctxt->apu1_cbf_cb[0][tr_depth - 1];
    721         ai4_cbf_cr[0] = ps_entropy_ctxt->apu1_cbf_cr[0][tr_depth - 1];
    722         ai4_cbf_cb[1] = ps_entropy_ctxt->apu1_cbf_cb[1][tr_depth - 1];
    723         ai4_cbf_cr[1] = ps_entropy_ctxt->apu1_cbf_cr[1][tr_depth - 1];
    724     }
    725 
    726     if(split_tr_flag)
    727     {
    728         /* recurse into quad child nodes till a leaf node is reached */
    729         WORD32 x1_ctb = x0_ctb + ((1 << log2_tr_size) >> 1);
    730         WORD32 y1_ctb = y0_ctb + ((1 << log2_tr_size) >> 1);
    731 
    732         /* node0 of quad tree */
    733         ret |= ihevce_encode_transform_tree(
    734             ps_entropy_ctxt,
    735             x0_ctb,
    736             y0_ctb,
    737             log2_tr_size - 1,
    738             tr_depth + 1,
    739             0, /* block 0 */
    740             ps_enc_cu);
    741 
    742         /* node1 of quad tree */
    743         ret |= ihevce_encode_transform_tree(
    744             ps_entropy_ctxt,
    745             x1_ctb,
    746             y0_ctb,
    747             log2_tr_size - 1,
    748             tr_depth + 1,
    749             1, /* block 1 */
    750             ps_enc_cu);
    751 
    752         /* node2 of quad tree */
    753         ret |= ihevce_encode_transform_tree(
    754             ps_entropy_ctxt,
    755             x0_ctb,
    756             y1_ctb,
    757             log2_tr_size - 1,
    758             tr_depth + 1,
    759             2, /* block 2 */
    760             ps_enc_cu);
    761 
    762         /* node3 of quad tree */
    763         ret |= ihevce_encode_transform_tree(
    764             ps_entropy_ctxt,
    765             x1_ctb,
    766             y1_ctb,
    767             log2_tr_size - 1,
    768             tr_depth + 1,
    769             3, /* block 3 */
    770             ps_enc_cu);
    771     }
    772     else
    773     {
    774         /* leaf node is reached! Encode the TU */
    775         WORD32 encode_delta_qp;
    776         void *pv_coeff;
    777         void *pv_cu_coeff = ps_enc_cu->pv_coeff;
    778 
    779         /* condition to encode qp of cu in first coded tu */
    780         encode_delta_qp = ps_entropy_ctxt->i1_encode_qp_delta &&
    781                           (ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS);
    782 
    783         if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
    784         {  // clang-format off
    785             /*PIC INFO : Tota TUs based on size*/
    786             if(32 == tu_size)
    787             {
    788                 ps_entropy_ctxt->ps_pic_level_info->i8_total_tu_based_on_size[3]++;
    789             }
    790             else
    791             {
    792                 ps_entropy_ctxt->ps_pic_level_info->i8_total_tu_based_on_size[tu_size >> 3]++;
    793             }
    794         }  // clang-format on
    795 
    796         /* sanity checks */
    797         ASSERT(ps_entropy_ctxt->i1_ctb_num_pcm_blks == 0);
    798         ASSERT((ps_enc_tu->s_tu.b4_pos_x << 2) == x0_ctb);
    799         ASSERT((ps_enc_tu->s_tu.b4_pos_y << 2) == y0_ctb);
    800         ASSERT(tu_size == (1 << log2_tr_size));
    801 
    802         /********************************************************************/
    803         /* encode luma cbf if any of following conditions are true          */
    804         /* intra cu | transform depth > 0 | any of chroma cbfs are coded    */
    805         /*                                                                  */
    806         /* Note that these conditions mean that cbf_luma need not be        */
    807         /* signalled and implicitly derived as 1 for inter cu whose tfr size*/
    808         /* is same as cu size and cbf for cb+cr are zero as no_residue_flag */
    809         /* at cu level = 1 indicated cbf luma is coded                      */
    810         /********************************************************************/
    811         if(is_intra || (tr_depth != 0) || ai4_cbf_cb[0] || ai4_cbf_cr[0] ||
    812            ((u1_is_422) && (ai4_cbf_cb[1] || ai4_cbf_cr[1])))
    813         {
    814             /* encode  cbf luma, context derived as per Table 9-37 */
    815             cbf_luma = ps_enc_tu->s_tu.b1_y_cbf;
    816 
    817             ctxt_inc = IHEVC_CAB_CBF_LUMA_IDX;
    818             ctxt_inc += (tr_depth == 0) ? 1 : 0;
    819 
    820             if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
    821             {
    822                 if(1 == cbf_luma)
    823                 {
    824                     // clang-format off
    825                     /*PIC INFO: Populated coded Intra/Inter TUs in CU*/
    826                     if(1 == is_intra)
    827                         ps_entropy_ctxt->ps_pic_level_info->i8_total_intra_coded_tu++;
    828                     else
    829                         ps_entropy_ctxt->ps_pic_level_info->i8_total_inter_coded_tu++;
    830                     // clang-format on
    831                 }
    832                 else
    833                 { /*PIC INFO: Populated coded non-coded TUs in CU*/
    834                     ps_entropy_ctxt->ps_pic_level_info->i8_total_non_coded_tu++;
    835                 }
    836             }
    837             u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
    838             ret |= ihevce_cabac_encode_bin(ps_cabac, cbf_luma, ctxt_inc);
    839 
    840             if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
    841             {  // clang-format off
    842                 /*PIC INFO : Populate CBF luma bits*/
    843                 ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_luma_bits +=
    844                     (ps_cabac->u4_bits_estimated_q12 - u4_bits_estimated_prev);
    845             }  // clang-format on
    846             AEV_TRACE("cbf_luma", cbf_luma, ps_cabac->u4_range);
    847         }
    848         else
    849         {
    850             if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
    851             {
    852                 /*PIC INFO: Populated coded Inter TUs in CU*/
    853                 ps_entropy_ctxt->ps_pic_level_info->i8_total_inter_coded_tu++;
    854             }
    855 
    856             /* shall be 1 as no_residue_flag was encoded as 1 in inter cu */
    857             ASSERT(1 == ps_enc_tu->s_tu.b1_y_cbf);
    858             cbf_luma = ps_enc_tu->s_tu.b1_y_cbf;
    859         }
    860 
    861         /*******************************************************************/
    862         /* code qp delta conditionally if following conditions are true    */
    863         /* any cbf coded (luma/cb/cr) and qp_delta_coded is 0 for this cu  */
    864         /* see section 7.3.12 Transform unit Syntax                        */
    865         /*******************************************************************/
    866         {
    867             WORD32 cbf_chroma = (ai4_cbf_cb[0] || ai4_cbf_cr[0]) ||
    868                                 (u1_is_422 && (ai4_cbf_cb[1] || ai4_cbf_cr[1]));
    869 
    870             if((cbf_luma || cbf_chroma) && encode_delta_qp)
    871             {
    872                 WORD32 tu_qp = ps_enc_tu->s_tu.b7_qp;
    873                 WORD32 qp_pred, qp_left, qp_top;
    874                 WORD32 qp_delta = tu_qp - ps_entropy_ctxt->i1_cur_qp;
    875                 WORD32 x_nbr_indx, y_nbr_indx;
    876 
    877                 /* Added code for handling the QP neighbour population depending
    878                    on the diff_cu_qp_delta_depth: Lokesh  */
    879                 /* minus 2 becoz the pos_x and pos_y are given in the order of
    880                  * 8x8 blocks rather than pixels */
    881                 WORD32 log2_min_cu_qp_delta_size =
    882                     ps_entropy_ctxt->i1_log2_ctb_size -
    883                     ps_entropy_ctxt->ps_pps->i1_diff_cu_qp_delta_depth;
    884                 //WORD32 min_cu_qp_delta_size = 1 << log2_min_cu_qp_delta_size;
    885 
    886                 //WORD32 curr_pos_x = ps_enc_cu->b3_cu_pos_x << 3;
    887                 //WORD32 curr_pos_y = ps_enc_cu->b3_cu_pos_y << 3;
    888 
    889                 WORD32 block_addr_align = 15 << (log2_min_cu_qp_delta_size - 3);
    890 
    891                 ps_entropy_ctxt->i4_qg_pos_x = ps_enc_cu->b3_cu_pos_x & block_addr_align;
    892                 ps_entropy_ctxt->i4_qg_pos_y = ps_enc_cu->b3_cu_pos_y & block_addr_align;
    893 
    894                 x_nbr_indx = ps_entropy_ctxt->i4_qg_pos_x - 1;
    895                 y_nbr_indx = ps_entropy_ctxt->i4_qg_pos_y - 1;
    896 
    897                 if(ps_entropy_ctxt->i4_qg_pos_x > 0)
    898                 {
    899                     // clang-format off
    900                     qp_left =
    901                         ps_entropy_ctxt->ai4_8x8_cu_qp[x_nbr_indx +
    902                                             (ps_entropy_ctxt->i4_qg_pos_y * 8)];
    903                     // clang-format on
    904                 }
    905                 if(ps_entropy_ctxt->i4_qg_pos_y > 0)
    906                 {
    907                     // clang-format off
    908                     qp_top = ps_entropy_ctxt->ai4_8x8_cu_qp[ps_entropy_ctxt->i4_qg_pos_x +
    909                                                  y_nbr_indx * 8];
    910                     // clang-format on
    911                 }
    912                 if(ps_entropy_ctxt->i4_qg_pos_x == 0)
    913                 {
    914                     /*previous coded Qp*/
    915                     qp_left = ps_entropy_ctxt->i1_cur_qp;
    916                 }
    917                 if(ps_entropy_ctxt->i4_qg_pos_y == 0)
    918                 {
    919                     /*previous coded Qp*/
    920                     qp_top = ps_entropy_ctxt->i1_cur_qp;
    921                 }
    922 
    923                 qp_pred = (qp_left + qp_top + 1) >> 1;
    924                 // clang-format off
    925                 /* start of every frame encode qp delta wrt slice qp when entrop
    926                  * sync is enabled */
    927                 if(ps_entropy_ctxt->i4_ctb_x == 0 &&
    928                     ps_entropy_ctxt->i4_qg_pos_x == 0 &&
    929                     ps_entropy_ctxt->i4_qg_pos_y == 0 &&
    930                     ps_entropy_ctxt->s_cabac_ctxt.i1_entropy_coding_sync_enabled_flag)
    931                 // clang-format on
    932                 {
    933                     qp_pred = ps_entropy_ctxt->ps_slice_hdr->i1_slice_qp_delta +
    934                               ps_entropy_ctxt->ps_pps->i1_pic_init_qp;
    935                 }
    936                 qp_delta = tu_qp - qp_pred;
    937 
    938                 /*PIC INFO : Populate QP delta bits*/
    939                 u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
    940 
    941                 /* code the qp delta */
    942                 ret |= ihevce_cabac_encode_qp_delta(ps_cabac, qp_delta);
    943 
    944                 if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
    945                 {
    946                     // clang-format off
    947                     ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_qp_delta_bits +=
    948                         (ps_cabac->u4_bits_estimated_q12 -
    949                             u4_bits_estimated_prev);
    950                     // clang-format on
    951                 }
    952 
    953                 ps_entropy_ctxt->i1_cur_qp = tu_qp;
    954                 //ps_entropy_ctxt->i1_cur_qp = Qp_pred;
    955                 ps_entropy_ctxt->i1_encode_qp_delta = 0;
    956                 //ps_entropy_ctxt->i4_is_cu_cbf_zero = 0;
    957             }
    958 
    959             if(cbf_luma || cbf_chroma)
    960             {
    961                 ps_entropy_ctxt->i4_is_cu_cbf_zero = 0;
    962             }
    963 
    964             /* code the residue of for luma and chroma tu based on cbf */
    965             if((cbf_luma) && (1 == ps_entropy_ctxt->i4_enable_res_encode))
    966             {
    967                 u4_bits_estimated_prev = ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12;
    968                 /* code the luma residue */
    969                 pv_coeff = (void *)((UWORD8 *)pv_cu_coeff + ps_enc_tu->i4_luma_coeff_offset);
    970 
    971                 ret |= ihevce_cabac_residue_encode(ps_entropy_ctxt, pv_coeff, log2_tr_size, 1);
    972 
    973                 if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
    974                 {  // clang-format off
    975                     /*PIC INFO : Populate Residue Luma Bits*/
    976                     ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_res_luma_bits +=
    977                         (ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12 -
    978                             u4_bits_estimated_prev);
    979                 }  // clang-format on
    980             }
    981 
    982             /* code chroma residue based on tranform size                  */
    983             /* For Inta 4x4 pu chroma is coded after all 4 luma blks coded */
    984             /* Note: chroma not encoded in rdopt mode                      */
    985             if(((log2_tr_size > 2) || (3 == blk_num)) /* &&
    986                 (CABAC_MODE_ENCODE_BITS == ps_cabac->e_cabac_op_mode) */
    987             )
    988             {
    989                 WORD32 log2_chroma_tr_size;
    990                 WORD32 i4_subtu_idx;
    991                 void *pv_coeff_cb, *pv_coeff_cr;
    992 
    993                 WORD32 i4_num_subtus = u1_is_422 + 1;
    994 
    995                 if(1 == ps_entropy_ctxt->i4_enable_res_encode)
    996                 {
    997                     for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_subtus; i4_subtu_idx++)
    998                     {
    999                         if(ai4_cbf_cb[i4_subtu_idx])
   1000                         {
   1001                             /* initailize chroma transform size and coeff based
   1002                              * on luma size */
   1003                             if(2 == log2_tr_size)
   1004                             {
   1005                                 /*********************************************************/
   1006                                 /* For Intra 4x4, chroma transform size is 4 and chroma  */
   1007                                 /* coeff offset is present  in the first Luma block      */
   1008                                 /*********************************************************/
   1009                                 log2_chroma_tr_size = 2;
   1010 
   1011                                 /* -3 is for going to first luma tu of the 4 TUs in min CU */
   1012                                 pv_coeff_cb =
   1013                                     (void
   1014                                          *)((UWORD8 *)pv_cu_coeff + ps_enc_tu[-3].ai4_cb_coeff_offset[i4_subtu_idx]);
   1015                             }
   1016                             else
   1017                             {
   1018                                 log2_chroma_tr_size = (log2_tr_size - 1);
   1019 
   1020                                 pv_coeff_cb =
   1021                                     (void
   1022                                          *)((UWORD8 *)pv_cu_coeff + ps_enc_tu->ai4_cb_coeff_offset[i4_subtu_idx]);
   1023                             }
   1024                             // clang-format off
   1025                             u4_bits_estimated_prev =
   1026                                 ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12;
   1027                             // clang-format on
   1028                             /* code the cb residue */
   1029                             ret |= ihevce_cabac_residue_encode(
   1030                                 ps_entropy_ctxt, pv_coeff_cb, log2_chroma_tr_size, 0);
   1031 
   1032                             if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
   1033                             {  // clang-format off
   1034                                 /*PIC INFO : Populate Residue Chroma cr Bits*/
   1035                                 ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_res_chroma_bits +=
   1036                                     (ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12 -
   1037                                         u4_bits_estimated_prev);
   1038                             }  // clang-format on
   1039                         }
   1040                     }
   1041                 }
   1042 
   1043                 if(1 == ps_entropy_ctxt->i4_enable_res_encode)
   1044                 {
   1045                     for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_subtus; i4_subtu_idx++)
   1046                     {
   1047                         if(ai4_cbf_cr[i4_subtu_idx])
   1048                         {
   1049                             /* initailize chroma transform size and coeff based on luma size */
   1050                             if(2 == log2_tr_size)
   1051                             {
   1052                                 /*********************************************************/
   1053                                 /* For Intra 4x4, chroma transform size is 4 and chroma  */
   1054                                 /* coeff offset is present  in the first Luma block      */
   1055                                 /*********************************************************/
   1056                                 log2_chroma_tr_size = 2;
   1057 
   1058                                 pv_coeff_cr =
   1059                                     (void
   1060                                          *)((UWORD8 *)pv_cu_coeff + ps_enc_tu[-3].ai4_cr_coeff_offset[i4_subtu_idx]);
   1061                             }
   1062                             else
   1063                             {
   1064                                 log2_chroma_tr_size = (log2_tr_size - 1);
   1065 
   1066                                 pv_coeff_cr =
   1067                                     (void
   1068                                          *)((UWORD8 *)pv_cu_coeff + ps_enc_tu->ai4_cr_coeff_offset[i4_subtu_idx]);
   1069                             }
   1070                             // clang-format off
   1071                             u4_bits_estimated_prev =
   1072                                 ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12;
   1073                             // clang-format on
   1074                             /* code the cb residue */
   1075                             ret |= ihevce_cabac_residue_encode(
   1076                                 ps_entropy_ctxt, pv_coeff_cr, log2_chroma_tr_size, 0);
   1077                             if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
   1078                             {  // clang-format off
   1079                                 /*PIC INFO : Populate Residue Chroma cr Bits*/
   1080                                 ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_res_chroma_bits +=
   1081                                     (ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12 -
   1082                                         u4_bits_estimated_prev);
   1083                             }  // clang-format on
   1084                         }
   1085                     }
   1086                 }
   1087             }
   1088         }
   1089 
   1090         /* update tu_idx after encoding current tu */
   1091         ps_entropy_ctxt->i4_tu_idx++;
   1092     }
   1093 
   1094     return ret;
   1095 }
   1096 
   1097 /**
   1098 ******************************************************************************
   1099 *
   1100 *  @brief Encodes a transform residual block as per section 7.3.13
   1101 *
   1102 *  @par   Description
   1103 *   The residual block is read from a compressed coeff buffer populated during
   1104 *   the scanning of the quantized coeffs. The contents of the buffer are
   1105 *   breifly explained in param description of pv_coeff
   1106 *
   1107 *  @remarks Does not support sign data hiding and transform skip flag currently
   1108 *
   1109 *  @remarks Need to resolve the differences between JVT-J1003_d7 spec and
   1110 *           HM.8.0-dev for related abs_greater_than_1 context initialization
   1111 *           and rice_max paramtere used for coeff abs level remaining
   1112 *
   1113 *  @param[inout]   ps_entropy_ctxt
   1114 *  pointer to entropy context (handle)
   1115 *
   1116 *  @param[in]      pv_coeff
   1117 *  Compressed residue buffer containing following information:
   1118 *
   1119 *  HEADER(4 bytes) : last_coeff_x, last_coeff_y, scantype, last_subblock_num
   1120 *
   1121 *  For each 4x4 subblock starting from last_subblock_num (in scan order)
   1122 *     Read 2 bytes  : MSB 12bits (0xBAD marker), bit0 cur_csbf, bit1-2 nbr csbf
   1123 *
   1124 *    `If cur_csbf
   1125 *      Read 2 bytes : sig_coeff_map (16bits in scan_order 1:coded, 0:not coded)
   1126 *      Read 2 bytes : abs_gt1_flags (max of 8 only)
   1127 *      Read 2 bytes : coeff_sign_flags
   1128 *
   1129 *      Based on abs_gt1_flags and sig_coeff_map read remaining abs levels
   1130 *      Read 2 bytes : remaining_abs_coeffs_minus1 (this is in a loop)
   1131 *
   1132 *  @param[in]      log2_tr_size
   1133 *  transform size of the current TU
   1134 *
   1135 *  @param[in]      is_luma
   1136 *  boolean indicating if the texture type is luma / chroma
   1137 *
   1138 *
   1139 *  @return      success or failure error code
   1140 *
   1141 ******************************************************************************
   1142 */
   1143 WORD32 ihevce_cabac_residue_encode(
   1144     entropy_context_t *ps_entropy_ctxt, void *pv_coeff, WORD32 log2_tr_size, WORD32 is_luma)
   1145 {
   1146     WORD32 ret = IHEVCE_SUCCESS;
   1147     cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
   1148     WORD32 i4_sign_data_hiding_flag, cu_tq_bypass_flag;
   1149 
   1150     UWORD8 *pu1_coeff_buf_hdr = (UWORD8 *)pv_coeff;
   1151     UWORD16 *pu2_sig_coeff_buf = (UWORD16 *)pv_coeff;
   1152 
   1153     /* last sig coeff indices in scan order */
   1154     WORD32 last_sig_coeff_x = pu1_coeff_buf_hdr[0];
   1155     WORD32 last_sig_coeff_y = pu1_coeff_buf_hdr[1];
   1156 
   1157     /* read the scan type : upright diag / horz / vert */
   1158     WORD32 scan_type = pu1_coeff_buf_hdr[2];
   1159 
   1160     /************************************************************************/
   1161     /* position of the last coded sub block. This sub block contains coeff  */
   1162     /* corresponding to last_sig_coeff_x, last_sig_coeff_y. Althoug this can*/
   1163     /* be derived here it better to be populated by scanning module         */
   1164     /************************************************************************/
   1165     WORD32 last_csb = pu1_coeff_buf_hdr[3];
   1166 
   1167     WORD32 cur_csbf = 0, nbr_csbf;
   1168     WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag    */
   1169     WORD32 abs_gt1_base_ctxt; /* cabac context for abslevel > 1 flag */
   1170 
   1171     WORD32 gt1_ctxt = 1; /* required for abs_gt1_ctxt modelling */
   1172 
   1173     WORD32 i;
   1174 
   1175     /* sanity checks */
   1176     /* transform skip not supported */
   1177     ASSERT(0 == ps_entropy_ctxt->ps_pps->i1_transform_skip_enabled_flag);
   1178 
   1179     cu_tq_bypass_flag = ps_entropy_ctxt->ps_pps->i1_transform_skip_enabled_flag;
   1180 
   1181     i4_sign_data_hiding_flag = ps_entropy_ctxt->ps_pps->i1_sign_data_hiding_flag;
   1182 
   1183     if(SCAN_VERT == scan_type)
   1184     {
   1185         /* last coeff x and y are swapped for vertical scan */
   1186         SWAP(last_sig_coeff_x, last_sig_coeff_y);
   1187     }
   1188 
   1189     /* Encode the last_sig_coeff_x and last_sig_coeff_y */
   1190     ret |= ihevce_cabac_encode_last_coeff_x_y(
   1191         ps_cabac, last_sig_coeff_x, last_sig_coeff_y, log2_tr_size, is_luma);
   1192 
   1193     /*************************************************************************/
   1194     /* derive base context index for sig coeff as per section 9.3.3.1.4      */
   1195     /* TODO; convert to look up based on luma/chroma, scan type and tfr size */
   1196     /*************************************************************************/
   1197     if(is_luma)
   1198     {
   1199         sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
   1200         abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
   1201 
   1202         if(3 == log2_tr_size)
   1203         {
   1204             /* 8x8 transform size */
   1205             sig_coeff_base_ctxt += (scan_type == SCAN_DIAG_UPRIGHT) ? 9 : 15;
   1206         }
   1207         else if(3 < log2_tr_size)
   1208         {
   1209             /* larger transform sizes */
   1210             sig_coeff_base_ctxt += 21;
   1211         }
   1212     }
   1213     else
   1214     {
   1215         /* chroma context initializations */
   1216         sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
   1217         abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
   1218 
   1219         if(3 == log2_tr_size)
   1220         {
   1221             /* 8x8 transform size */
   1222             sig_coeff_base_ctxt += 9;
   1223         }
   1224         else if(3 < log2_tr_size)
   1225         {
   1226             /* larger transform sizes */
   1227             sig_coeff_base_ctxt += 12;
   1228         }
   1229     }
   1230 
   1231     /* go to csbf flags */
   1232     pu2_sig_coeff_buf = (UWORD16 *)(pu1_coeff_buf_hdr + COEFF_BUF_HEADER_LEN);
   1233 
   1234     /************************************************************************/
   1235     /* encode the csbf, sig_coeff_map, abs_grt1_flags, abs_grt2_flag, sign  */
   1236     /* and abs_coeff_remaining for each 4x4 starting from last scan to first*/
   1237     /************************************************************************/
   1238     for(i = last_csb; i >= 0; i--)
   1239     {
   1240         UWORD16 u2_marker_csbf;
   1241         WORD32 ctxt_idx;
   1242 
   1243         u2_marker_csbf = *pu2_sig_coeff_buf;
   1244         pu2_sig_coeff_buf++;
   1245 
   1246         /* sanity checks for marker present in every csbf flag */
   1247         ASSERT((u2_marker_csbf >> 4) == 0xBAD);
   1248 
   1249         /* extract the current and neigbour csbf flags */
   1250         cur_csbf = u2_marker_csbf & 0x1;
   1251         nbr_csbf = (u2_marker_csbf >> 1) & 0x3;
   1252 
   1253         /*********************************************************************/
   1254         /* code the csbf flags; last and first csb not sent as it is derived */
   1255         /*********************************************************************/
   1256         if((i < last_csb) && (i > 0))
   1257         {
   1258             ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
   1259 
   1260             /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
   1261             ctxt_idx += nbr_csbf ? 1 : 0;
   1262             ctxt_idx += is_luma ? 0 : 2;
   1263 
   1264             ret |= ihevce_cabac_encode_bin(ps_cabac, cur_csbf, ctxt_idx);
   1265             AEV_TRACE("coded_sub_block_flag", cur_csbf, ps_cabac->u4_range);
   1266         }
   1267         else
   1268         {
   1269             /* sanity check, this csb contains the last_sig_coeff */
   1270             if(i == last_csb)
   1271             {
   1272                 ASSERT(cur_csbf == 1);
   1273             }
   1274         }
   1275 
   1276         if(cur_csbf)
   1277         {
   1278             /*****************************************************************/
   1279             /* encode the sig coeff map as per section 7.3.13                */
   1280             /* significant_coeff_flags: msb=coeff15-lsb=coeff0 in scan order */
   1281             /*****************************************************************/
   1282 
   1283             /* Added for Sign bit data hiding*/
   1284             WORD32 first_scan_pos = 16;
   1285             WORD32 last_scan_pos = -1;
   1286             WORD32 sign_hidden = 0;
   1287 
   1288             UWORD16 u2_gt0_flags = *pu2_sig_coeff_buf;
   1289             WORD32 gt1_flags = *(pu2_sig_coeff_buf + 1);
   1290             WORD32 sign_flags = *(pu2_sig_coeff_buf + 2);
   1291 
   1292             WORD32 sig_coeff_map = u2_gt0_flags;
   1293 
   1294             WORD32 gt1_bins = 0; /* bins for coeffs with abslevel > 1 */
   1295 
   1296             WORD32 sign_bins = 0; /* bins for sign flags of coded coeffs  */
   1297             WORD32 num_coded = 0; /* total coeffs coded in 4x4            */
   1298 
   1299             WORD32 infer_coeff; /* infer when 0,0 is the only coded coeff */
   1300             WORD32 bit; /* temp boolean */
   1301 
   1302             /* total count of coeffs to be coded as abs level remaining */
   1303             WORD32 num_coeffs_remaining = 0;
   1304 
   1305             /* count of coeffs to be coded as  abslevel-1 */
   1306             WORD32 num_coeffs_base1 = 0;
   1307             WORD32 scan_pos;
   1308             WORD32 first_gt1_coeff = 0;
   1309 
   1310             if((i != 0) || (0 == last_csb))
   1311             {
   1312                 /* sanity check, atleast one coeff is coded as csbf is set */
   1313                 ASSERT(sig_coeff_map != 0);
   1314             }
   1315 
   1316             pu2_sig_coeff_buf += 3;
   1317 
   1318             scan_pos = 15;
   1319             if(i == last_csb)
   1320             {
   1321                 /*************************************************************/
   1322                 /* clear last_scan_pos for last block in scan order as this  */
   1323                 /* is communicated  throught last_coeff_x and last_coeff_y   */
   1324                 /*************************************************************/
   1325                 WORD32 next_sig = CLZ(sig_coeff_map) + 1;
   1326 
   1327                 scan_pos = WORD_SIZE - next_sig;
   1328 
   1329                 /* prepare the bins for gt1 flags */
   1330                 EXTRACT_BIT(bit, gt1_flags, scan_pos);
   1331 
   1332                 /* insert gt1 bin in lsb */
   1333                 gt1_bins |= bit;
   1334 
   1335                 /* prepare the bins for sign flags */
   1336                 EXTRACT_BIT(bit, sign_flags, scan_pos);
   1337 
   1338                 /* insert sign bin in lsb */
   1339                 sign_bins |= bit;
   1340 
   1341                 sig_coeff_map = CLEAR_BIT(sig_coeff_map, scan_pos);
   1342 
   1343                 if(-1 == last_scan_pos)
   1344                     last_scan_pos = scan_pos;
   1345 
   1346                 scan_pos--;
   1347                 num_coded++;
   1348             }
   1349 
   1350             /* infer 0,0 coeff for all 4x4 blocks except fitst and last */
   1351             infer_coeff = (i < last_csb) && (i > 0);
   1352 
   1353             /* encode the required sigcoeff flags (abslevel > 0)   */
   1354             while(scan_pos >= 0)
   1355             {
   1356                 WORD32 y_pos_x_pos;
   1357                 WORD32 sig_ctxinc = 0; /* 0 is default inc for DC coeff */
   1358 
   1359                 WORD32 sig_coeff;
   1360 
   1361                 EXTRACT_BIT(sig_coeff, sig_coeff_map, scan_pos);
   1362 
   1363                 /* derive the x,y pos */
   1364                 y_pos_x_pos = gu1_hevce_scan4x4[scan_type][scan_pos];
   1365 
   1366                 /* derive the context inc as per section 9.3.3.1.4 */
   1367                 if(2 == log2_tr_size)
   1368                 {
   1369                     /* 4x4 transform size increment uses lookup */
   1370                     sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
   1371                 }
   1372                 else if(scan_pos || i)
   1373                 {
   1374                     /* ctxt for AC coeff depends on curpos and neigbour csbf */
   1375                     sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc[nbr_csbf][y_pos_x_pos];
   1376 
   1377                     /* based on luma subblock pos */
   1378                     sig_ctxinc += (i && is_luma) ? 3 : 0;
   1379                 }
   1380                 else
   1381                 {
   1382                     /* DC coeff has fixed context for luma and chroma */
   1383                     sig_coeff_base_ctxt = is_luma ? IHEVC_CAB_COEFF_FLAG
   1384                                                   : IHEVC_CAB_COEFF_FLAG + 27;
   1385                 }
   1386 
   1387                 /*************************************************************/
   1388                 /* encode sig coeff only if required                         */
   1389                 /* decoder infers 0,0 coeff when all the other coeffs are 0  */
   1390                 /*************************************************************/
   1391                 if(scan_pos || (!infer_coeff))
   1392                 {
   1393                     ctxt_idx = sig_ctxinc + sig_coeff_base_ctxt;
   1394                     ret |= ihevce_cabac_encode_bin(ps_cabac, sig_coeff, ctxt_idx);
   1395                     AEV_TRACE("significant_coeff_flag", sig_coeff, ps_cabac->u4_range);
   1396                 }
   1397 
   1398                 if(sig_coeff)
   1399                 {
   1400                     /* prepare the bins for gt1 flags */
   1401                     EXTRACT_BIT(bit, gt1_flags, scan_pos);
   1402 
   1403                     /* shift and insert gt1 bin in lsb */
   1404                     gt1_bins <<= 1;
   1405                     gt1_bins |= bit;
   1406 
   1407                     /* prepare the bins for sign flags */
   1408                     EXTRACT_BIT(bit, sign_flags, scan_pos);
   1409 
   1410                     /* shift and insert sign bin in lsb */
   1411                     sign_bins <<= 1;
   1412                     sign_bins |= bit;
   1413 
   1414                     num_coded++;
   1415 
   1416                     /* 0,0 coeff can no more be inferred :( */
   1417                     infer_coeff = 0;
   1418 
   1419                     if(-1 == last_scan_pos)
   1420                         last_scan_pos = scan_pos;
   1421 
   1422                     first_scan_pos = scan_pos;
   1423                 }
   1424 
   1425                 scan_pos--;
   1426             }
   1427 
   1428             /* Added for sign bit hiding*/
   1429             sign_hidden = ((last_scan_pos - first_scan_pos) > 3 && !cu_tq_bypass_flag);
   1430 
   1431             /****************************************************************/
   1432             /* encode the abs level greater than 1 bins; Section 7.3.13     */
   1433             /* These have already been prepared during sig_coeff_map encode */
   1434             /* Context modelling done as per section 9.3.3.1.5              */
   1435             /****************************************************************/
   1436             {
   1437                 WORD32 j;
   1438 
   1439                 /* context set based on luma subblock pos */
   1440                 WORD32 ctxt_set = (i && is_luma) ? 2 : 0;
   1441 
   1442                 /* count of coeffs with abslevel > 1; max of 8 to be coded */
   1443                 WORD32 num_gt1_bins = MIN(8, num_coded);
   1444 
   1445                 if(num_coded > 8)
   1446                 {
   1447                     /* pull back the bins to required number */
   1448                     gt1_bins >>= (num_coded - 8);
   1449 
   1450                     num_coeffs_remaining += (num_coded - 8);
   1451                     num_coeffs_base1 = (num_coded - 8);
   1452                 }
   1453 
   1454                 /* See section 9.3.3.1.5           */
   1455                 ctxt_set += (0 == gt1_ctxt) ? 1 : 0;
   1456 
   1457                 gt1_ctxt = 1;
   1458 
   1459                 for(j = num_gt1_bins - 1; j >= 0; j--)
   1460                 {
   1461                     /* Encodet the abs level gt1 bins */
   1462                     ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
   1463 
   1464                     EXTRACT_BIT(bit, gt1_bins, j);
   1465 
   1466                     ret |= ihevce_cabac_encode_bin(ps_cabac, bit, ctxt_idx);
   1467 
   1468                     AEV_TRACE("coeff_abs_level_greater1_flag", bit, ps_cabac->u4_range);
   1469 
   1470                     if(bit)
   1471                     {
   1472                         gt1_ctxt = 0;
   1473                         num_coeffs_remaining++;
   1474                     }
   1475                     else if(gt1_ctxt && (gt1_ctxt < 3))
   1476                     {
   1477                         gt1_ctxt++;
   1478                     }
   1479                 }
   1480 
   1481                 /*************************************************************/
   1482                 /* encode abs level greater than 2 bin; Section 7.3.13       */
   1483                 /*************************************************************/
   1484                 if(gt1_bins)
   1485                 {
   1486                     WORD32 gt2_bin;
   1487 
   1488                     first_gt1_coeff = pu2_sig_coeff_buf[0] + 1;
   1489                     gt2_bin = (first_gt1_coeff > 2);
   1490 
   1491                     /* atleast one level > 2 */
   1492                     ctxt_idx = IHEVC_CAB_COEFABS_GRTR2_FLAG;
   1493 
   1494                     ctxt_idx += (is_luma) ? ctxt_set : (ctxt_set + 4);
   1495 
   1496                     ret |= ihevce_cabac_encode_bin(ps_cabac, gt2_bin, ctxt_idx);
   1497 
   1498                     if(!gt2_bin)
   1499                     {
   1500                         /* sanity check */
   1501                         ASSERT(first_gt1_coeff == 2);
   1502 
   1503                         /* no need to send this coeff as bypass bins */
   1504                         pu2_sig_coeff_buf++;
   1505                         num_coeffs_remaining--;
   1506                     }
   1507 
   1508                     AEV_TRACE("coeff_abs_level_greater2_flag", gt2_bin, ps_cabac->u4_range);
   1509                 }
   1510             }
   1511 
   1512             /*************************************************************/
   1513             /* encode the coeff signs and abs remaing levels             */
   1514             /*************************************************************/
   1515             if(num_coded)
   1516             {
   1517                 WORD32 base_level;
   1518                 WORD32 rice_param = 0;
   1519                 WORD32 j;
   1520 
   1521                 /*************************************************************/
   1522                 /* encode the coeff signs populated in sign_bins             */
   1523                 /*************************************************************/
   1524 
   1525                 if(sign_hidden && i4_sign_data_hiding_flag)
   1526                 {
   1527                     sign_bins >>= 1;
   1528                     num_coded--;
   1529                 }
   1530 
   1531                 if(num_coded > 0)
   1532                 {
   1533                     ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, sign_bins, num_coded);
   1534                 }
   1535 
   1536                 AEV_TRACE("sign_flags", sign_bins, ps_cabac->u4_range);
   1537 
   1538                 /*************************************************************/
   1539                 /* encode the coeff_abs_level_remaining as TR / EGK bins     */
   1540                 /* See section 9.3.2.7 for details                           */
   1541                 /*************************************************************/
   1542 
   1543                 /* first remaining coeff baselevel */
   1544                 if(first_gt1_coeff > 2)
   1545                 {
   1546                     base_level = 3;
   1547                 }
   1548                 else if(num_coeffs_remaining > num_coeffs_base1)
   1549                 {
   1550                     /* atleast one coeff in first 8 is gt > 1 */
   1551                     base_level = 2;
   1552                 }
   1553                 else
   1554                 {
   1555                     /* all coeffs have base of 1 */
   1556                     base_level = 1;
   1557                 }
   1558 
   1559                 for(j = 0; j < num_coeffs_remaining; j++)
   1560                 {
   1561                     WORD32 abs_coeff = pu2_sig_coeff_buf[0] + 1;
   1562                     WORD32 abs_coeff_rem;
   1563                     WORD32 rice_max = (4 << rice_param);
   1564 
   1565                     pu2_sig_coeff_buf++;
   1566 
   1567                     /* sanity check */
   1568                     ASSERT(abs_coeff >= base_level);
   1569 
   1570                     abs_coeff_rem = (abs_coeff - base_level);
   1571 
   1572                     /* TODO://HM-8.0-dev uses (3 << rice_param) as rice_max */
   1573                     /* TODO://HM-8.0-dev does either TR or EGK but not both */
   1574                     if(abs_coeff_rem >= rice_max)
   1575                     {
   1576                         UWORD32 u4_suffix = (abs_coeff_rem - rice_max);
   1577 
   1578                         /* coeff exceeds max rice limit                    */
   1579                         /* encode the TR prefix as tunary code             */
   1580                         /* prefix = 1111 as (rice_max >> rice_praram) = 4  */
   1581                         ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, 0xF, 4);
   1582 
   1583                         /* encode the exponential golomb code suffix */
   1584                         ret |= ihevce_cabac_encode_egk(ps_cabac, u4_suffix, (rice_param + 1));
   1585                     }
   1586                     else
   1587                     {
   1588                         /* code coeff as truncated rice code  */
   1589                         ret |= ihevce_cabac_encode_trunc_rice(
   1590                             ps_cabac, abs_coeff_rem, rice_param, rice_max);
   1591                     }
   1592 
   1593                     AEV_TRACE("coeff_abs_level_remaining", abs_coeff_rem, ps_cabac->u4_range);
   1594 
   1595                     /* update the rice param based on coeff level */
   1596                     if((abs_coeff > (3 << rice_param)) && (rice_param < 4))
   1597                     {
   1598                         rice_param++;
   1599                     }
   1600 
   1601                     /* change base level to 1 if more than 8 coded coeffs */
   1602                     if((j + 1) < (num_coeffs_remaining - num_coeffs_base1))
   1603                     {
   1604                         base_level = 2;
   1605                     }
   1606                     else
   1607                     {
   1608                         base_level = 1;
   1609                     }
   1610                 }
   1611             }
   1612         }
   1613     }
   1614     /*tap texture bits*/
   1615     if(ps_cabac->e_cabac_op_mode == CABAC_MODE_COMPUTE_BITS)
   1616     {  // clang-format off
   1617         ps_cabac->u4_texture_bits_estimated_q12 +=
   1618             (ps_cabac->u4_bits_estimated_q12 -
   1619                 ps_cabac->u4_header_bits_estimated_q12);  //(ps_cabac->u4_bits_estimated_q12 - temp_tex_bits_q12);
   1620     }  // clang-format on
   1621 
   1622     return (ret);
   1623 }
   1624 
   1625 /**
   1626 ******************************************************************************
   1627 *
   1628 *  @brief Get the bits estimate for a transform residual block as per section
   1629 *   7.3.13
   1630 *
   1631 *  @par   Description
   1632 *   The residual block is read from a compressed coeff buffer populated during
   1633 *   the scanning of the quantized coeffs. The contents of the buffer are
   1634 *   breifly explained in param description of pv_coeff
   1635 *
   1636 *  @remarks Does not support sign data hiding and transform skip flag currently
   1637 *
   1638 *  @remarks Need to resolve the differences between JVT-J1003_d7 spec and
   1639 *           HM.8.0-dev for related abs_greater_than_1 context initialization
   1640 *           and rice_max paramtere used for coeff abs level remaining
   1641 *
   1642 *  @param[inout]   ps_entropy_ctxt
   1643 *  pointer to entropy context (handle)
   1644 *
   1645 *  @param[in]      pv_coeff
   1646 *  Compressed residue buffer containing following information:
   1647 *
   1648 *  HEADER(4 bytes) : last_coeff_x, last_coeff_y, scantype, last_subblock_num
   1649 *
   1650 *  For each 4x4 subblock starting from last_subblock_num (in scan order)
   1651 *     Read 2 bytes  : MSB 12bits (0xBAD marker), bit0 cur_csbf, bit1-2 nbr csbf
   1652 *
   1653 *    `If cur_csbf
   1654 *      Read 2 bytes : sig_coeff_map (16bits in scan_order 1:coded, 0:not coded)
   1655 *      Read 2 bytes : abs_gt1_flags (max of 8 only)
   1656 *      Read 2 bytes : coeff_sign_flags
   1657 *
   1658 *      Based on abs_gt1_flags and sig_coeff_map read remaining abs levels
   1659 *      Read 2 bytes : remaining_abs_coeffs_minus1 (this is in a loop)
   1660 *
   1661 *  @param[in]      log2_tr_size
   1662 *  transform size of the current TU
   1663 *
   1664 *  @param[in]      is_luma
   1665 *  boolean indicating if the texture type is luma / chroma
   1666 *
   1667 *
   1668 *  @return      success or failure error code
   1669 *
   1670 ******************************************************************************
   1671 */
   1672 WORD32 ihevce_cabac_residue_encode_rdopt(
   1673     entropy_context_t *ps_entropy_ctxt,
   1674     void *pv_coeff,
   1675     WORD32 log2_tr_size,
   1676     WORD32 is_luma,
   1677     WORD32 perform_sbh)
   1678 {
   1679     WORD32 ret = IHEVCE_SUCCESS;
   1680     cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
   1681     UWORD32 temp_tex_bits_q12;
   1682     WORD32 i4_sign_data_hiding_flag, cu_tq_bypass_flag;
   1683 
   1684     UWORD8 *pu1_coeff_buf_hdr = (UWORD8 *)pv_coeff;
   1685     UWORD16 *pu2_sig_coeff_buf = (UWORD16 *)pv_coeff;
   1686 
   1687     /* last sig coeff indices in scan order */
   1688     WORD32 last_sig_coeff_x = pu1_coeff_buf_hdr[0];
   1689     WORD32 last_sig_coeff_y = pu1_coeff_buf_hdr[1];
   1690 
   1691     /* read the scan type : upright diag / horz / vert */
   1692     WORD32 scan_type = pu1_coeff_buf_hdr[2];
   1693 
   1694     /************************************************************************/
   1695     /* position of the last coded sub block. This sub block contains coeff  */
   1696     /* corresponding to last_sig_coeff_x, last_sig_coeff_y. Althoug this can*/
   1697     /* be derived here it better to be populated by scanning module         */
   1698     /************************************************************************/
   1699     WORD32 last_csb = pu1_coeff_buf_hdr[3];
   1700 
   1701     WORD32 cur_csbf = 0, nbr_csbf;
   1702     WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag    */
   1703     WORD32 abs_gt1_base_ctxt; /* cabac context for abslevel > 1 flag */
   1704 
   1705     WORD32 gt1_ctxt = 1; /* required for abs_gt1_ctxt modelling */
   1706 
   1707     WORD32 i;
   1708 
   1709     UWORD8 *pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0];
   1710 
   1711     /* sanity checks */
   1712     /* transform skip not supported */
   1713     ASSERT(0 == ps_entropy_ctxt->ps_pps->i1_transform_skip_enabled_flag);
   1714 
   1715     cu_tq_bypass_flag = ps_entropy_ctxt->ps_pps->i1_transform_skip_enabled_flag;
   1716 
   1717     i4_sign_data_hiding_flag = ps_entropy_ctxt->ps_pps->i1_sign_data_hiding_flag;
   1718 
   1719     {
   1720         temp_tex_bits_q12 = ps_cabac->u4_bits_estimated_q12;
   1721     }
   1722 
   1723     if(SCAN_VERT == scan_type)
   1724     {
   1725         /* last coeff x and y are swapped for vertical scan */
   1726         SWAP(last_sig_coeff_x, last_sig_coeff_y);
   1727     }
   1728 
   1729     /* Encode the last_sig_coeff_x and last_sig_coeff_y */
   1730     ret |= ihevce_cabac_encode_last_coeff_x_y(
   1731         ps_cabac, last_sig_coeff_x, last_sig_coeff_y, log2_tr_size, is_luma);
   1732 
   1733     /*************************************************************************/
   1734     /* derive base context index for sig coeff as per section 9.3.3.1.4      */
   1735     /* TODO; convert to look up based on luma/chroma, scan type and tfr size */
   1736     /*************************************************************************/
   1737     if(is_luma)
   1738     {
   1739         sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
   1740         abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
   1741 
   1742         if(3 == log2_tr_size)
   1743         {
   1744             /* 8x8 transform size */
   1745             sig_coeff_base_ctxt += (scan_type == SCAN_DIAG_UPRIGHT) ? 9 : 15;
   1746         }
   1747         else if(3 < log2_tr_size)
   1748         {
   1749             /* larger transform sizes */
   1750             sig_coeff_base_ctxt += 21;
   1751         }
   1752     }
   1753     else
   1754     {
   1755         /* chroma context initializations */
   1756         sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
   1757         abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
   1758 
   1759         if(3 == log2_tr_size)
   1760         {
   1761             /* 8x8 transform size */
   1762             sig_coeff_base_ctxt += 9;
   1763         }
   1764         else if(3 < log2_tr_size)
   1765         {
   1766             /* larger transform sizes */
   1767             sig_coeff_base_ctxt += 12;
   1768         }
   1769     }
   1770 
   1771     /* go to csbf flags */
   1772     pu2_sig_coeff_buf = (UWORD16 *)(pu1_coeff_buf_hdr + COEFF_BUF_HEADER_LEN);
   1773 
   1774     /************************************************************************/
   1775     /* encode the csbf, sig_coeff_map, abs_grt1_flags, abs_grt2_flag, sign  */
   1776     /* and abs_coeff_remaining for each 4x4 starting from last scan to first*/
   1777     /************************************************************************/
   1778     for(i = last_csb; i >= 0; i--)
   1779     {
   1780         UWORD16 u2_marker_csbf;
   1781         WORD32 ctxt_idx;
   1782 
   1783         u2_marker_csbf = *pu2_sig_coeff_buf;
   1784         pu2_sig_coeff_buf++;
   1785 
   1786         /* sanity checks for marker present in every csbf flag */
   1787         ASSERT((u2_marker_csbf >> 4) == 0xBAD);
   1788 
   1789         /* extract the current and neigbour csbf flags */
   1790         cur_csbf = u2_marker_csbf & 0x1;
   1791         nbr_csbf = (u2_marker_csbf >> 1) & 0x3;
   1792 
   1793         /*********************************************************************/
   1794         /* code the csbf flags; last and first csb not sent as it is derived */
   1795         /*********************************************************************/
   1796         if((i < last_csb) && (i > 0))
   1797         {
   1798             ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
   1799 
   1800             /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
   1801             ctxt_idx += nbr_csbf ? 1 : 0;
   1802             ctxt_idx += is_luma ? 0 : 2;
   1803 
   1804             {
   1805                 WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
   1806 
   1807                 /* increment bits generated based on state and bin encoded */
   1808                 ps_cabac->u4_bits_estimated_q12 +=
   1809                     gau2_ihevce_cabac_bin_to_bits[state_mps ^ cur_csbf];
   1810 
   1811                 /* update the context model from state transition LUT */
   1812                 pu1_ctxt_model[ctxt_idx] = gau1_ihevc_next_state[(state_mps << 1) | cur_csbf];
   1813             }
   1814         }
   1815         else
   1816         {
   1817             /* sanity check, this csb contains the last_sig_coeff */
   1818             if(i == last_csb)
   1819             {
   1820                 ASSERT(cur_csbf == 1);
   1821             }
   1822         }
   1823 
   1824         if(cur_csbf)
   1825         {
   1826             /*****************************************************************/
   1827             /* encode the sig coeff map as per section 7.3.13                */
   1828             /* significant_coeff_flags: msb=coeff15-lsb=coeff0 in scan order */
   1829             /*****************************************************************/
   1830 
   1831             /* Added for Sign bit data hiding*/
   1832             WORD32 first_scan_pos = 16;
   1833             WORD32 last_scan_pos = -1;
   1834             WORD32 sign_hidden;
   1835 
   1836             UWORD16 u2_gt0_flags = *pu2_sig_coeff_buf;
   1837             WORD32 gt1_flags = *(pu2_sig_coeff_buf + 1);
   1838             WORD32 sign_flags = *(pu2_sig_coeff_buf + 2);
   1839 
   1840             WORD32 sig_coeff_map = u2_gt0_flags;
   1841 
   1842             WORD32 gt1_bins = 0; /* bins for coeffs with abslevel > 1 */
   1843 
   1844             WORD32 sign_bins = 0; /* bins for sign flags of coded coeffs  */
   1845             WORD32 num_coded = 0; /* total coeffs coded in 4x4            */
   1846 
   1847             WORD32 infer_coeff; /* infer when 0,0 is the only coded coeff */
   1848             WORD32 bit; /* temp boolean */
   1849 
   1850             /* total count of coeffs to be coded as abs level remaining */
   1851             WORD32 num_coeffs_remaining = 0;
   1852 
   1853             /* count of coeffs to be coded as  abslevel-1 */
   1854             WORD32 num_coeffs_base1 = 0;
   1855             WORD32 scan_pos;
   1856             WORD32 first_gt1_coeff = 0;
   1857 
   1858             if((i != 0) || (0 == last_csb))
   1859             {
   1860                 /* sanity check, atleast one coeff is coded as csbf is set */
   1861                 ASSERT(sig_coeff_map != 0);
   1862             }
   1863 
   1864             pu2_sig_coeff_buf += 3;
   1865 
   1866             scan_pos = 15;
   1867             if(i == last_csb)
   1868             {
   1869                 /*************************************************************/
   1870                 /* clear last_scan_pos for last block in scan order as this  */
   1871                 /* is communicated  throught last_coeff_x and last_coeff_y   */
   1872                 /*************************************************************/
   1873                 WORD32 next_sig = CLZ(sig_coeff_map) + 1;
   1874 
   1875                 scan_pos = WORD_SIZE - next_sig;
   1876 
   1877                 /* prepare the bins for gt1 flags */
   1878                 EXTRACT_BIT(bit, gt1_flags, scan_pos);
   1879 
   1880                 /* insert gt1 bin in lsb */
   1881                 gt1_bins |= bit;
   1882 
   1883                 /* prepare the bins for sign flags */
   1884                 EXTRACT_BIT(bit, sign_flags, scan_pos);
   1885 
   1886                 /* insert sign bin in lsb */
   1887                 sign_bins |= bit;
   1888 
   1889                 sig_coeff_map = CLEAR_BIT(sig_coeff_map, scan_pos);
   1890 
   1891                 if(-1 == last_scan_pos)
   1892                     last_scan_pos = scan_pos;
   1893 
   1894                 scan_pos--;
   1895                 num_coded++;
   1896             }
   1897 
   1898             /* infer 0,0 coeff for all 4x4 blocks except fitst and last */
   1899             infer_coeff = (i < last_csb) && (i > 0);
   1900 
   1901             /* encode the required sigcoeff flags (abslevel > 0)   */
   1902             while(scan_pos >= 0)
   1903             {
   1904                 WORD32 y_pos_x_pos;
   1905                 WORD32 sig_ctxinc = 0; /* 0 is default inc for DC coeff */
   1906 
   1907                 WORD32 sig_coeff;
   1908 
   1909                 EXTRACT_BIT(sig_coeff, sig_coeff_map, scan_pos);
   1910 
   1911                 /* derive the x,y pos */
   1912                 y_pos_x_pos = gu1_hevce_scan4x4[scan_type][scan_pos];
   1913 
   1914                 /* derive the context inc as per section 9.3.3.1.4 */
   1915                 if(2 == log2_tr_size)
   1916                 {
   1917                     /* 4x4 transform size increment uses lookup */
   1918                     sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
   1919                 }
   1920                 else if(scan_pos || i)
   1921                 {
   1922                     /* ctxt for AC coeff depends on curpos and neigbour csbf */
   1923                     sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc[nbr_csbf][y_pos_x_pos];
   1924 
   1925                     /* based on luma subblock pos */
   1926                     sig_ctxinc += (i && is_luma) ? 3 : 0;
   1927                 }
   1928                 else
   1929                 {
   1930                     /* DC coeff has fixed context for luma and chroma */
   1931                     sig_coeff_base_ctxt = is_luma ? IHEVC_CAB_COEFF_FLAG
   1932                                                   : IHEVC_CAB_COEFF_FLAG + 27;
   1933                 }
   1934 
   1935                 /*************************************************************/
   1936                 /* encode sig coeff only if required                         */
   1937                 /* decoder infers 0,0 coeff when all the other coeffs are 0  */
   1938                 /*************************************************************/
   1939                 if(scan_pos || (!infer_coeff))
   1940                 {
   1941                     ctxt_idx = sig_ctxinc + sig_coeff_base_ctxt;
   1942 
   1943                     //ret |= ihevce_cabac_encode_bin(ps_cabac, sig_coeff, ctxt_idx);
   1944                     {
   1945                         WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
   1946 
   1947                         /* increment bits generated based on state and bin encoded */
   1948                         ps_cabac->u4_bits_estimated_q12 +=
   1949                             gau2_ihevce_cabac_bin_to_bits[state_mps ^ sig_coeff];
   1950 
   1951                         /* update the context model from state transition LUT */
   1952                         pu1_ctxt_model[ctxt_idx] =
   1953                             gau1_ihevc_next_state[(state_mps << 1) | sig_coeff];
   1954                     }
   1955                 }
   1956 
   1957                 if(sig_coeff)
   1958                 {
   1959                     /* prepare the bins for gt1 flags */
   1960                     EXTRACT_BIT(bit, gt1_flags, scan_pos);
   1961 
   1962                     /* shift and insert gt1 bin in lsb */
   1963                     gt1_bins <<= 1;
   1964                     gt1_bins |= bit;
   1965 
   1966                     /* prepare the bins for sign flags */
   1967                     EXTRACT_BIT(bit, sign_flags, scan_pos);
   1968 
   1969                     /* shift and insert sign bin in lsb */
   1970                     sign_bins <<= 1;
   1971                     sign_bins |= bit;
   1972 
   1973                     num_coded++;
   1974 
   1975                     /* 0,0 coeff can no more be inferred :( */
   1976                     infer_coeff = 0;
   1977 
   1978                     if(-1 == last_scan_pos)
   1979                         last_scan_pos = scan_pos;
   1980 
   1981                     first_scan_pos = scan_pos;
   1982                 }
   1983 
   1984                 scan_pos--;
   1985             }
   1986 
   1987             /* Added for sign bit hiding*/
   1988             sign_hidden =
   1989                 (((last_scan_pos - first_scan_pos) > 3 && !cu_tq_bypass_flag) && (perform_sbh));
   1990 
   1991             /****************************************************************/
   1992             /* encode the abs level greater than 1 bins; Section 7.3.13     */
   1993             /* These have already been prepared during sig_coeff_map encode */
   1994             /* Context modelling done as per section 9.3.3.1.5              */
   1995             /****************************************************************/
   1996             {
   1997                 WORD32 j;
   1998 
   1999                 /* context set based on luma subblock pos */
   2000                 WORD32 ctxt_set = (i && is_luma) ? 2 : 0;
   2001 
   2002                 /* count of coeffs with abslevel > 1; max of 8 to be coded */
   2003                 WORD32 num_gt1_bins = MIN(8, num_coded);
   2004 
   2005                 if(num_coded > 8)
   2006                 {
   2007                     /* pull back the bins to required number */
   2008                     gt1_bins >>= (num_coded - 8);
   2009 
   2010                     num_coeffs_remaining += (num_coded - 8);
   2011                     num_coeffs_base1 = (num_coded - 8);
   2012                 }
   2013 
   2014                 /* See section 9.3.3.1.5           */
   2015                 ctxt_set += (0 == gt1_ctxt) ? 1 : 0;
   2016 
   2017                 gt1_ctxt = 1;
   2018 
   2019                 for(j = num_gt1_bins - 1; j >= 0; j--)
   2020                 {
   2021                     /* Encodet the abs level gt1 bins */
   2022                     ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
   2023 
   2024                     EXTRACT_BIT(bit, gt1_bins, j);
   2025 
   2026                     //ret |= ihevce_cabac_encode_bin(ps_cabac, bit, ctxt_idx);
   2027                     {
   2028                         WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
   2029 
   2030                         /* increment bits generated based on state and bin encoded */
   2031                         ps_cabac->u4_bits_estimated_q12 +=
   2032                             gau2_ihevce_cabac_bin_to_bits[state_mps ^ bit];
   2033 
   2034                         /* update the context model from state transition LUT */
   2035                         pu1_ctxt_model[ctxt_idx] = gau1_ihevc_next_state[(state_mps << 1) | bit];
   2036                     }
   2037 
   2038                     if(bit)
   2039                     {
   2040                         gt1_ctxt = 0;
   2041                         num_coeffs_remaining++;
   2042                     }
   2043                     else if(gt1_ctxt && (gt1_ctxt < 3))
   2044                     {
   2045                         gt1_ctxt++;
   2046                     }
   2047                 }
   2048 
   2049                 /*************************************************************/
   2050                 /* encode abs level greater than 2 bin; Section 7.3.13       */
   2051                 /*************************************************************/
   2052                 if(gt1_bins)
   2053                 {
   2054                     WORD32 gt2_bin;
   2055 
   2056                     first_gt1_coeff = pu2_sig_coeff_buf[0] + 1;
   2057                     gt2_bin = (first_gt1_coeff > 2);
   2058 
   2059                     /* atleast one level > 2 */
   2060                     ctxt_idx = IHEVC_CAB_COEFABS_GRTR2_FLAG;
   2061 
   2062                     ctxt_idx += (is_luma) ? ctxt_set : (ctxt_set + 4);
   2063 
   2064                     //ret |= ihevce_cabac_encode_bin(ps_cabac, gt2_bin, ctxt_idx);
   2065                     {
   2066                         WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
   2067 
   2068                         /* increment bits generated based on state and bin encoded */
   2069                         ps_cabac->u4_bits_estimated_q12 +=
   2070                             gau2_ihevce_cabac_bin_to_bits[state_mps ^ gt2_bin];
   2071 
   2072                         /* update the context model from state transition LUT */
   2073                         pu1_ctxt_model[ctxt_idx] =
   2074                             gau1_ihevc_next_state[(state_mps << 1) | gt2_bin];
   2075                     }
   2076 
   2077                     if(!gt2_bin)
   2078                     {
   2079                         /* sanity check */
   2080                         ASSERT(first_gt1_coeff == 2);
   2081 
   2082                         /* no need to send this coeff as bypass bins */
   2083                         pu2_sig_coeff_buf++;
   2084                         num_coeffs_remaining--;
   2085                     }
   2086                 }
   2087             }
   2088 
   2089             /*************************************************************/
   2090             /* encode the coeff signs and abs remaing levels             */
   2091             /*************************************************************/
   2092             if(num_coded)
   2093             {
   2094                 WORD32 base_level;
   2095                 WORD32 rice_param = 0;
   2096                 WORD32 j;
   2097 
   2098                 /*************************************************************/
   2099                 /* encode the coeff signs populated in sign_bins             */
   2100                 /*************************************************************/
   2101                 if(sign_hidden && i4_sign_data_hiding_flag)
   2102                 {
   2103                     sign_bins >>= 1;
   2104                     num_coded--;
   2105                 }
   2106 
   2107                 if(num_coded > 0)
   2108                 {
   2109                     /* ret |= ihevce_cabac_encode_bypass_bins(ps_cabac,
   2110                                                        sign_bins,
   2111                                                        num_coded);
   2112                     */
   2113 
   2114                     /* increment bits generated based on num bypass bins */
   2115                     ps_cabac->u4_bits_estimated_q12 += (num_coded << CABAC_FRAC_BITS_Q);
   2116                 }
   2117 
   2118                 /*************************************************************/
   2119                 /* encode the coeff_abs_level_remaining as TR / EGK bins     */
   2120                 /* See section 9.3.2.7 for details                           */
   2121                 /*************************************************************/
   2122 
   2123                 /* first remaining coeff baselevel */
   2124                 if(first_gt1_coeff > 2)
   2125                 {
   2126                     base_level = 3;
   2127                 }
   2128                 else if(num_coeffs_remaining > num_coeffs_base1)
   2129                 {
   2130                     /* atleast one coeff in first 8 is gt > 1 */
   2131                     base_level = 2;
   2132                 }
   2133                 else
   2134                 {
   2135                     /* all coeffs have base of 1 */
   2136                     base_level = 1;
   2137                 }
   2138 
   2139                 for(j = 0; j < num_coeffs_remaining; j++)
   2140                 {
   2141                     WORD32 abs_coeff = pu2_sig_coeff_buf[0] + 1;
   2142                     WORD32 abs_coeff_rem;
   2143                     WORD32 rice_max = (4 << rice_param);
   2144                     WORD32 num_bins, unary_length;
   2145                     UWORD32 u4_sym_shiftk_plus1;
   2146 
   2147                     pu2_sig_coeff_buf++;
   2148 
   2149                     /* sanity check */
   2150                     ASSERT(abs_coeff >= base_level);
   2151 
   2152                     abs_coeff_rem = (abs_coeff - base_level);
   2153 
   2154                     /* TODO://HM-8.0-dev uses (3 << rice_param) as rice_max */
   2155                     /* TODO://HM-8.0-dev does either TR or EGK but not both */
   2156                     if(abs_coeff_rem >= rice_max)
   2157                     {
   2158                         UWORD32 u4_suffix = (abs_coeff_rem - rice_max);
   2159 
   2160                         /* coeff exceeds max rice limit                    */
   2161                         /* encode the TR prefix as tunary code             */
   2162                         /* prefix = 1111 as (rice_max >> rice_praram) = 4  */
   2163                         /* ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, 0xF, 4); */
   2164 
   2165                         /* increment bits generated based on num bypass bins */
   2166                         ps_cabac->u4_bits_estimated_q12 += (4 << CABAC_FRAC_BITS_Q);
   2167 
   2168                         /* encode the exponential golomb code suffix */
   2169                         /*ret |= ihevce_cabac_encode_egk(ps_cabac,
   2170                                                        u4_suffix,
   2171                                                        (rice_param+1)
   2172                                                       ); */
   2173 
   2174                         /* k = rice_param+1 */
   2175                         /************************************************************************/
   2176                         /* shift symbol by k bits to find unary code prefix (111110)            */
   2177                         /* Use GETRANGE to elminate the while loop in sec 9.3.2.4 of HEVC spec  */
   2178                         /************************************************************************/
   2179                         u4_sym_shiftk_plus1 = (u4_suffix >> (rice_param + 1)) + 1;
   2180 
   2181                         /* GETRANGE(unary_length, (u4_sym_shiftk_plus1 + 1)); */
   2182                         GETRANGE(unary_length, u4_sym_shiftk_plus1);
   2183 
   2184                         /* length of the code = 2 *(unary_length - 1) + 1 + k */
   2185                         num_bins = (2 * unary_length) + rice_param;
   2186 
   2187                         /* increment bits generated based on num bypass bins */
   2188                         ps_cabac->u4_bits_estimated_q12 += (num_bins << CABAC_FRAC_BITS_Q);
   2189                     }
   2190                     else
   2191                     {
   2192                         /* code coeff as truncated rice code  */
   2193                         /* ret |= ihevce_cabac_encode_trunc_rice(ps_cabac,
   2194                                                               abs_coeff_rem,
   2195                                                               rice_param,
   2196                                                               rice_max);
   2197                                                               */
   2198 
   2199                         /************************************************************************/
   2200                         /* shift symbol by c_rice_param bits to find unary code prefix (111.10) */
   2201                         /************************************************************************/
   2202                         unary_length = (abs_coeff_rem >> rice_param) + 1;
   2203 
   2204                         /* length of the code */
   2205                         num_bins = unary_length + rice_param;
   2206 
   2207                         /* increment bits generated based on num bypass bins */
   2208                         ps_cabac->u4_bits_estimated_q12 += (num_bins << CABAC_FRAC_BITS_Q);
   2209                     }
   2210 
   2211                     /* update the rice param based on coeff level */
   2212                     if((abs_coeff > (3 << rice_param)) && (rice_param < 4))
   2213                     {
   2214                         rice_param++;
   2215                     }
   2216 
   2217                     /* change base level to 1 if more than 8 coded coeffs */
   2218                     if((j + 1) < (num_coeffs_remaining - num_coeffs_base1))
   2219                     {
   2220                         base_level = 2;
   2221                     }
   2222                     else
   2223                     {
   2224                         base_level = 1;
   2225                     }
   2226                 }
   2227             }
   2228         }
   2229     }
   2230     /*tap texture bits*/
   2231     {
   2232         ps_cabac->u4_texture_bits_estimated_q12 +=
   2233             (ps_cabac->u4_bits_estimated_q12 - temp_tex_bits_q12);
   2234     }
   2235 
   2236     return (ret);
   2237 }
   2238 
   2239 /**
   2240 ******************************************************************************
   2241 *
   2242 *  @brief Encodes a transform residual block as per section 7.3.13
   2243 *
   2244 *  @par   Description
   2245 *  RDOQ optimization is carried out here. When sub-blk RDOQ is turned on, we calculate
   2246 *  the distortion(D) and bits(R) for when the sub blk is coded and when not coded. We
   2247 *  then use the D+lambdaR metric to decide whether the sub-blk should be coded or not, and
   2248 *  aprropriately signal it. When coeff RDOQ is turned on, we traverse through the TU to
   2249 *  find all non-zero coeffs. If the non zero coeff is a 1, then we make a decision(based on D+lambdaR)
   2250 *  metric as to whether to code it as a 0 or 1. In case the coeff is > 1(say L where L>1) we choose betweem
   2251 *  L and L+1
   2252 *
   2253 *  @remarks Does not support sign data hiding and transform skip flag currently
   2254 *
   2255 *  @remarks Need to resolve the differences between JVT-J1003_d7 spec and
   2256 *           HM.8.0-dev for related abs_greater_than_1 context initialization
   2257 *           and rice_max paramtere used for coeff abs level remaining
   2258 *
   2259 *  @param[inout]   ps_entropy_ctxt
   2260 *  pointer to entropy context (handle)
   2261 *
   2262 *  @param[in]      pv_coeff
   2263 *  Compressed residue buffer containing following information:
   2264 *
   2265 *
   2266 *  HEADER(4 bytes) : last_coeff_x, last_coeff_y, scantype, last_subblock_num
   2267 *
   2268 *  For each 4x4 subblock starting from last_subblock_num (in scan order)
   2269 *     Read 2 bytes  : MSB 12bits (0xBAD marker), bit0 cur_csbf, bit1-2 nbr csbf
   2270 *
   2271 *    `If cur_csbf
   2272 *      Read 2 bytes : sig_coeff_map (16bits in scan_order 1:coded, 0:not coded)
   2273 *      Read 2 bytes : abs_gt1_flags (max of 8 only)
   2274 *      Read 2 bytes : coeff_sign_flags
   2275 *
   2276 *      Based on abs_gt1_flags and sig_coeff_map read remaining abs levels
   2277 *      Read 2 bytes : remaining_abs_coeffs_minus1 (this is in a loop)
   2278 *
   2279 *  @param[in]      log2_tr_size
   2280 *  transform size of the current TU
   2281 *
   2282 *  @param[in]      is_luma
   2283 *  boolean indicating if the texture type is luma / chroma
   2284 *
   2285 *  @param[out]    pi4_tu_coded_dist
   2286 *  The distortion when the TU is coded(not all coeffs are set to 0) is stored here
   2287 *
   2288 *  @param[out]    pi4_tu_not_coded_dist
   2289 *  The distortion when the entire TU is not coded(all coeffs are set to 0) is stored here
   2290 *
   2291 *
   2292 *  @return      success or failure error code
   2293 *
   2294 ******************************************************************************
   2295 */
   2296 
   2297 WORD32 ihevce_cabac_residue_encode_rdoq(
   2298     entropy_context_t *ps_entropy_ctxt,
   2299     void *pv_coeff,
   2300     WORD32 log2_tr_size,
   2301     WORD32 is_luma,
   2302     void *pv_rdoq_ctxt,
   2303     LWORD64 *pi8_tu_coded_dist,
   2304     LWORD64 *pi8_tu_not_coded_dist,
   2305     WORD32 perform_sbh)
   2306 {
   2307     WORD32 *pi4_subBlock2csbfId_map;
   2308 
   2309     WORD32 ret = IHEVCE_SUCCESS;
   2310 
   2311     cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
   2312     cab_ctxt_t s_sub_blk_not_coded_cabac_ctxt;
   2313     backup_ctxt_t s_backup_ctxt;
   2314     backup_ctxt_t s_backup_ctxt_sub_blk_not_coded;
   2315 
   2316     UWORD32 temp_tex_bits_q12;
   2317 
   2318     UWORD8 *pu1_coeff_buf_hdr = (UWORD8 *)pv_coeff;
   2319     UWORD16 *pu2_sig_coeff_buf = (UWORD16 *)pv_coeff;
   2320 
   2321     LWORD64 i8_sub_blk_not_coded_dist = 0, i8_sub_blk_coded_dist = 0;
   2322     WORD32 i4_sub_blk_not_coded_bits = 0, i4_sub_blk_coded_bits = 0;
   2323     LWORD64 i8_sub_blk_not_coded_metric, i8_sub_blk_coded_metric;
   2324     LWORD64 i8_tu_not_coded_dist = 0, i8_tu_coded_dist = 0;
   2325     WORD32 i4_tu_coded_bits = 0;
   2326     WORD32 temp_zero_col = 0, temp_zero_row = 0;
   2327 
   2328     UWORD8 *pu1_last_sig_coeff_x;
   2329     UWORD8 *pu1_last_sig_coeff_y;
   2330     WORD32 scan_type;
   2331     WORD32 last_csb;
   2332 
   2333     WORD32 cur_csbf = 0, nbr_csbf;
   2334     // WORD32 i4_temp_bits;
   2335 
   2336     WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag    */
   2337     WORD32 abs_gt1_base_ctxt; /* cabac context for abslevel > 1 flag */
   2338 
   2339     UWORD8 *pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0];
   2340 
   2341     rdoq_sbh_ctxt_t *ps_rdoq_ctxt = (rdoq_sbh_ctxt_t *)pv_rdoq_ctxt;
   2342     WORD16 *pi2_coeffs = ps_rdoq_ctxt->pi2_quant_coeffs;
   2343     WORD16 *pi2_tr_coeffs = ps_rdoq_ctxt->pi2_trans_values;
   2344     WORD32 trans_size = ps_rdoq_ctxt->i4_trans_size;
   2345     WORD32 i4_round_val = ps_rdoq_ctxt->i4_round_val_ssd_in_td;
   2346     WORD32 i4_shift_val = ps_rdoq_ctxt->i4_shift_val_ssd_in_td;
   2347     WORD32 scan_idx = ps_rdoq_ctxt->i4_scan_idx;
   2348 
   2349     UWORD8 *pu1_csb_table, *pu1_trans_table;
   2350     WORD32 shift_value, mask_value;
   2351 
   2352     WORD32 gt1_ctxt = 1; /* required for abs_gt1_ctxt modelling */
   2353     WORD32 temp_gt1_ctxt = gt1_ctxt;
   2354 
   2355     WORD32 i;
   2356 #if DISABLE_ZCSBF
   2357     WORD32 i4_skip_zero_cbf = 0;
   2358     WORD32 i4_skip_zero_csbf = 0;
   2359     WORD32 i4_num_abs_1_coeffs = 0;
   2360 #endif
   2361     (void)perform_sbh;
   2362     pi4_subBlock2csbfId_map = ps_rdoq_ctxt->pi4_subBlock2csbfId_map;
   2363 
   2364     /* scan order inside a csb */
   2365     pu1_csb_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
   2366     /*Initializing the backup_ctxt structures*/
   2367     s_backup_ctxt.i4_num_bits = 0;
   2368     s_backup_ctxt_sub_blk_not_coded.i4_num_bits = 0;
   2369 
   2370     memset(&s_backup_ctxt.au1_ctxt_to_backup, 0, MAX_NUM_CONTEXT_ELEMENTS);
   2371     memset(&s_backup_ctxt_sub_blk_not_coded.au1_ctxt_to_backup, 0, MAX_NUM_CONTEXT_ELEMENTS);
   2372 
   2373     pu1_coeff_buf_hdr = (UWORD8 *)pv_coeff;
   2374     pu2_sig_coeff_buf = (UWORD16 *)pv_coeff;
   2375 
   2376     /* last sig coeff indices in scan order */
   2377     pu1_last_sig_coeff_x = &pu1_coeff_buf_hdr[0];
   2378     pu1_last_sig_coeff_y = &pu1_coeff_buf_hdr[1];
   2379 
   2380     /* read the scan type : upright diag / horz / vert */
   2381     scan_type = pu1_coeff_buf_hdr[2];
   2382 
   2383     /************************************************************************/
   2384     /* position of the last coded sub block. This sub block contains coeff  */
   2385     /* corresponding to last_sig_coeff_x, last_sig_coeff_y. Althoug this can*/
   2386     /* be derived here it better to be populated by scanning module         */
   2387     /************************************************************************/
   2388     last_csb = pu1_coeff_buf_hdr[3];
   2389 
   2390     shift_value = ps_rdoq_ctxt->i4_log2_trans_size + 1;
   2391     /* for finding. row no. from scan index */
   2392     shift_value = shift_value - 3;
   2393     /*for finding the col. no. from scan index*/
   2394     mask_value = (ps_rdoq_ctxt->i4_trans_size / 4) - 1;
   2395 
   2396     switch(ps_rdoq_ctxt->i4_trans_size)
   2397     {
   2398     case 32:
   2399         pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_8x8[scan_idx][0]);
   2400         break;
   2401     case 16:
   2402         pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
   2403         break;
   2404     case 8:
   2405         pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_2x2[scan_idx][0]);
   2406         break;
   2407     case 4:
   2408         pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_1x1[0]);
   2409         break;
   2410     default:
   2411         DBG_PRINTF("Invalid Trans Size\n");
   2412         return -1;
   2413         break;
   2414     }
   2415 
   2416     /* sanity checks */
   2417     /* transform skip not supported */
   2418     ASSERT(0 == ps_entropy_ctxt->ps_pps->i1_transform_skip_enabled_flag);
   2419     {
   2420         temp_tex_bits_q12 = ps_cabac->u4_bits_estimated_q12;
   2421     }
   2422     /*************************************************************************/
   2423     /* derive base context index for sig coeff as per section 9.3.3.1.4      */
   2424     /* TODO; convert to look up based on luma/chroma, scan type and tfr size */
   2425     /*************************************************************************/
   2426     if(is_luma)
   2427     {
   2428         sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
   2429         abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
   2430 
   2431         if(3 == log2_tr_size)
   2432         {
   2433             /* 8x8 transform size */
   2434             sig_coeff_base_ctxt += (scan_type == SCAN_DIAG_UPRIGHT) ? 9 : 15;
   2435         }
   2436         else if(3 < log2_tr_size)
   2437         {
   2438             /* larger transform sizes */
   2439             sig_coeff_base_ctxt += 21;
   2440         }
   2441     }
   2442     else
   2443     {
   2444         /* chroma context initializations */
   2445         sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
   2446         abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
   2447 
   2448         if(3 == log2_tr_size)
   2449         {
   2450             /* 8x8 transform size */
   2451             sig_coeff_base_ctxt += 9;
   2452         }
   2453         else if(3 < log2_tr_size)
   2454         {
   2455             /* larger transform sizes */
   2456             sig_coeff_base_ctxt += 12;
   2457         }
   2458     }
   2459 
   2460     /* go to csbf flags */
   2461     pu2_sig_coeff_buf = (UWORD16 *)(pu1_coeff_buf_hdr + COEFF_BUF_HEADER_LEN);
   2462 
   2463     /*Calculating the distortion produced by all the zero coeffs in the TU*/
   2464     for(i = (trans_size * trans_size) - 1; i >= 0; i--)
   2465     {
   2466         WORD32 i4_dist;
   2467         WORD16 *pi2_orig_coeff = ps_rdoq_ctxt->pi2_trans_values;
   2468 
   2469         if(pi2_coeffs[i] == 0)
   2470         {
   2471             i4_dist = CALC_SSD_IN_TRANS_DOMAIN(pi2_orig_coeff[i], 0, 0, 0);
   2472             i8_tu_not_coded_dist += i4_dist;
   2473             i8_tu_coded_dist += i4_dist;
   2474         }
   2475     }
   2476 
   2477     /*Backup of the various cabac ctxts*/
   2478     memcpy(&s_sub_blk_not_coded_cabac_ctxt, ps_cabac, sizeof(cab_ctxt_t));
   2479     /************************************************************************/
   2480     /* encode the csbf, sig_coeff_map, abs_grt1_flags, abs_grt2_flag, sign  */
   2481     /* and abs_coeff_remaining for each 4x4 starting from last scan to first*/
   2482     /************************************************************************/
   2483 
   2484     for(i = last_csb; i >= 0; i--)
   2485     {
   2486         UWORD16 u2_marker_csbf;
   2487         WORD32 ctxt_idx;
   2488         WORD32 i4_sub_blk_is_coded = 0;
   2489         WORD32 blk_row, blk_col;
   2490         WORD32 scaled_blk_row;
   2491         WORD32 scaled_blk_col;
   2492         WORD32 infer_coeff;
   2493 
   2494         gt1_ctxt = temp_gt1_ctxt;
   2495 #if DISABLE_ZCSBF
   2496         /*Initialize skip zero cbf flag to 0*/
   2497         i4_skip_zero_csbf = 0;
   2498         i4_num_abs_1_coeffs = 0;
   2499 #endif
   2500 
   2501 #if OPT_MEMCPY
   2502         ihevce_copy_backup_ctxt(
   2503             (void *)&s_sub_blk_not_coded_cabac_ctxt,
   2504             (void *)ps_cabac,
   2505             (void *)&s_backup_ctxt_sub_blk_not_coded,
   2506             (void *)&s_backup_ctxt);
   2507         memset(s_backup_ctxt_sub_blk_not_coded.au1_ctxt_to_backup, 0, 5);
   2508         memset(s_backup_ctxt.au1_ctxt_to_backup, 0, 5);
   2509 #else
   2510         memcpy(&s_sub_blk_not_coded_cabac_ctxt, ps_cabac, sizeof(cab_ctxt_t));
   2511 #endif
   2512         // i4_temp_bits = s_sub_blk_not_coded_cabac_ctxt.u4_bits_estimated_q12;
   2513 
   2514         blk_row = pu1_trans_table[i] >> shift_value; /*row of csb*/
   2515         blk_col = pu1_trans_table[i] & mask_value; /*col of csb*/
   2516 
   2517         scaled_blk_row = blk_row << 2;
   2518         scaled_blk_col = blk_col << 2;
   2519 
   2520         infer_coeff = (i < last_csb) && (i > 0);
   2521         u2_marker_csbf = *pu2_sig_coeff_buf;
   2522 
   2523         if((blk_col + 1 < trans_size / 4)) /* checking right boundary */
   2524         {
   2525             if(!ps_rdoq_ctxt
   2526                     ->pu1_csbf_buf[pi4_subBlock2csbfId_map[blk_row * trans_size / 4 + blk_col + 1]])
   2527             {
   2528                 /* clear the 2nd bit if the right csb is 0 */
   2529                 u2_marker_csbf = u2_marker_csbf & (~(1 << 1));
   2530             }
   2531         }
   2532         if((blk_row + 1 < trans_size / 4)) /* checking bottom boundary */
   2533         {
   2534             if(!ps_rdoq_ctxt
   2535                     ->pu1_csbf_buf[pi4_subBlock2csbfId_map[(blk_row + 1) * trans_size / 4 + blk_col]])
   2536             {
   2537                 /* clear the 3rd bit if the bottom csb is 0*/
   2538                 u2_marker_csbf = u2_marker_csbf & (~(1 << 2));
   2539             }
   2540         }
   2541         pu2_sig_coeff_buf++;
   2542 
   2543         /* sanity checks for marker present in every csbf flag */
   2544         ASSERT((u2_marker_csbf >> 4) == 0xBAD);
   2545 
   2546         /* extract the current and neigbour csbf flags */
   2547         cur_csbf = u2_marker_csbf & 0x1;
   2548         nbr_csbf = (u2_marker_csbf >> 1) & 0x3;
   2549 
   2550         if((i < last_csb) && (i > 0))
   2551         {
   2552             ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
   2553 
   2554             /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
   2555             ctxt_idx += nbr_csbf ? 1 : 0;
   2556             ctxt_idx += is_luma ? 0 : 2;
   2557 
   2558             ret |= ihevce_cabac_encode_bin(ps_cabac, cur_csbf, ctxt_idx);
   2559 
   2560             s_backup_ctxt.au1_ctxt_to_backup[SUB_BLK_CODED_FLAG] = 1;
   2561 
   2562             if(cur_csbf)
   2563             {
   2564                 ret |= ihevce_cabac_encode_bin(&s_sub_blk_not_coded_cabac_ctxt, 0, ctxt_idx);
   2565                 // clang-format off
   2566                 i4_sub_blk_not_coded_bits =
   2567                     s_sub_blk_not_coded_cabac_ctxt.u4_bits_estimated_q12;  // - i4_temp_bits;
   2568                 s_backup_ctxt_sub_blk_not_coded.au1_ctxt_to_backup[SUB_BLK_CODED_FLAG] = 1;
   2569                 // clang-format on
   2570             }
   2571         }
   2572         else
   2573         {
   2574             /* sanity check, this csb contains the last_sig_coeff */
   2575             if(i == last_csb)
   2576             {
   2577                 ASSERT(cur_csbf == 1);
   2578             }
   2579         }
   2580         /*If any block in the TU is coded and the 0th block is not coded, the 0th
   2581           block is still signalled as csbf = 1, and with all sig_coeffs sent as
   2582           0(HEVC requirement)*/
   2583         if((ps_rdoq_ctxt->i1_tu_is_coded == 1) && (i == 0))
   2584         {
   2585             i4_sub_blk_not_coded_bits = ihevce_code_all_sig_coeffs_as_0_explicitly(
   2586                 (void *)ps_rdoq_ctxt,
   2587                 i,
   2588                 pu1_trans_table,
   2589                 is_luma,
   2590                 scan_type,
   2591                 infer_coeff,
   2592                 nbr_csbf,
   2593                 &s_sub_blk_not_coded_cabac_ctxt);
   2594         }
   2595 
   2596         if(i == last_csb)
   2597         {
   2598             WORD32 i4_last_x = *pu1_last_sig_coeff_x;
   2599             WORD32 i4_last_y = *pu1_last_sig_coeff_y;
   2600             if(SCAN_VERT == scan_type)
   2601             {
   2602                 /* last coeff x and y are swapped for vertical scan */
   2603                 SWAP(i4_last_x, i4_last_y);
   2604             }
   2605             /* Encode the last_sig_coeff_x and last_sig_coeff_y */
   2606             ret |= ihevce_cabac_encode_last_coeff_x_y(
   2607                 ps_cabac, i4_last_x, i4_last_y, log2_tr_size, is_luma);
   2608             s_backup_ctxt.au1_ctxt_to_backup[LASTXY] = 1;
   2609         }
   2610 
   2611         if(cur_csbf)
   2612         {
   2613             /*****************************************************************/
   2614             /* encode the sig coeff map as per section 7.3.13                */
   2615             /* significant_coeff_flags: msb=coeff15-lsb=coeff0 in scan order */
   2616             /*****************************************************************/
   2617 
   2618             WORD32 i4_bit_depth;
   2619             WORD32 i4_shift_iq;
   2620             WORD32 i4_dequant_val;
   2621             WORD32 bit; /* temp boolean */
   2622 
   2623             UWORD16 u2_gt0_flags = *pu2_sig_coeff_buf;
   2624             WORD32 sig_coeff_map = u2_gt0_flags;
   2625             WORD32 gt1_flags = *(pu2_sig_coeff_buf + 1);
   2626             WORD32 sign_flags = *(pu2_sig_coeff_buf + 2);
   2627 
   2628             WORD32 gt1_bins = 0; /* bins for coeffs with abslevel > 1 */
   2629 
   2630             WORD16 *pi2_dequant_coeff = ps_rdoq_ctxt->pi2_dequant_coeff;
   2631             WORD16 i2_qp_rem = ps_rdoq_ctxt->i2_qp_rem;
   2632             WORD32 i4_qp_div = ps_rdoq_ctxt->i4_qp_div;
   2633 
   2634             WORD32 sign_bins = 0; /* bins for sign flags of coded coeffs  */
   2635             WORD32 num_coded = 0; /* total coeffs coded in 4x4            */
   2636 
   2637             /* total count of coeffs to be coded as abs level remaining */
   2638             WORD32 num_coeffs_remaining = 0;
   2639 
   2640             /* count of coeffs to be coded as  abslevel-1 */
   2641             WORD32 num_coeffs_base1 = 0;
   2642             WORD32 scan_pos;
   2643             WORD32 first_gt1_coeff = 0;
   2644 
   2645             i4_bit_depth = ps_entropy_ctxt->ps_sps->i1_bit_depth_luma_minus8 + 8;
   2646             i4_shift_iq = i4_bit_depth + ps_rdoq_ctxt->i4_log2_trans_size - 5;
   2647 
   2648             i4_sub_blk_is_coded = 1;
   2649 
   2650             if((i != 0) || (0 == last_csb))
   2651             {
   2652                 /* sanity check, atleast one coeff is coded as csbf is set */
   2653                 ASSERT(sig_coeff_map != 0);
   2654             }
   2655             /*Calculating the distortions produced*/
   2656             {
   2657                 WORD32 k, j;
   2658                 WORD16 *pi2_temp_coeff =
   2659                     &pi2_coeffs[scaled_blk_col + (scaled_blk_row * trans_size)];
   2660                 WORD16 *pi2_temp_tr_coeff =
   2661                     &pi2_tr_coeffs[scaled_blk_col + (scaled_blk_row * trans_size)];
   2662                 WORD16 *pi2_temp_dequant_coeff =
   2663                     &pi2_dequant_coeff[scaled_blk_col + (scaled_blk_row * trans_size)];
   2664 
   2665                 for(k = 0; k < 4; k++)
   2666                 {
   2667                     for(j = 0; j < 4; j++)
   2668                     {
   2669                         if(*pi2_temp_coeff)
   2670                         {
   2671                             /*Inverse quantizing for distortion calculation*/
   2672                             if(ps_rdoq_ctxt->i4_trans_size != 4)
   2673                             {
   2674                                 IQUANT(
   2675                                     i4_dequant_val,
   2676                                     *pi2_temp_coeff,
   2677                                     *pi2_temp_dequant_coeff * g_ihevc_iquant_scales[i2_qp_rem],
   2678                                     i4_shift_iq,
   2679                                     i4_qp_div);
   2680                             }
   2681                             else
   2682                             {
   2683                                 IQUANT_4x4(
   2684                                     i4_dequant_val,
   2685                                     *pi2_temp_coeff,
   2686                                     *pi2_temp_dequant_coeff * g_ihevc_iquant_scales[i2_qp_rem],
   2687                                     i4_shift_iq,
   2688                                     i4_qp_div);
   2689                             }
   2690 
   2691                             i8_sub_blk_coded_dist +=
   2692                                 CALC_SSD_IN_TRANS_DOMAIN(*pi2_temp_tr_coeff, i4_dequant_val, 0, 0);
   2693 
   2694                             i8_sub_blk_not_coded_dist +=
   2695                                 CALC_SSD_IN_TRANS_DOMAIN(*pi2_temp_tr_coeff, 0, 0, 0);
   2696                         }
   2697 #if DISABLE_ZCSBF
   2698                         if(abs(*pi2_temp_coeff) > 1)
   2699                         {
   2700                             i4_skip_zero_csbf = 1;
   2701                         }
   2702                         else if(abs(*pi2_temp_coeff) == 1)
   2703                         {
   2704                             i4_num_abs_1_coeffs++;
   2705                         }
   2706 #endif
   2707                         pi2_temp_coeff++;
   2708                         pi2_temp_tr_coeff++;
   2709                         pi2_temp_dequant_coeff++;
   2710                     }
   2711                     pi2_temp_tr_coeff += ps_rdoq_ctxt->i4_trans_size - 4;
   2712                     pi2_temp_coeff += ps_rdoq_ctxt->i4_q_data_strd - 4;
   2713                     pi2_dequant_coeff += ps_rdoq_ctxt->i4_trans_size - 4;
   2714                 }
   2715             }
   2716 
   2717 #if DISABLE_ZCSBF
   2718             i4_skip_zero_csbf = i4_skip_zero_csbf || (i4_num_abs_1_coeffs > 3);
   2719 #endif
   2720             pu2_sig_coeff_buf += 3;
   2721 
   2722             scan_pos = 15;
   2723             if(i == last_csb)
   2724             {
   2725                 /*************************************************************/
   2726                 /* clear last_scan_pos for last block in scan order as this  */
   2727                 /* is communicated  throught last_coeff_x and last_coeff_y   */
   2728                 /*************************************************************/
   2729                 WORD32 next_sig = CLZ(sig_coeff_map) + 1;
   2730 
   2731                 scan_pos = WORD_SIZE - next_sig;
   2732 
   2733                 /* prepare the bins for gt1 flags */
   2734                 EXTRACT_BIT(bit, gt1_flags, scan_pos);
   2735 
   2736                 /* insert gt1 bin in lsb */
   2737                 gt1_bins |= bit;
   2738 
   2739                 /* prepare the bins for sign flags */
   2740                 EXTRACT_BIT(bit, sign_flags, scan_pos);
   2741 
   2742                 /* insert sign bin in lsb */
   2743                 sign_bins |= bit;
   2744 
   2745                 sig_coeff_map = CLEAR_BIT(sig_coeff_map, scan_pos);
   2746 
   2747                 scan_pos--;
   2748                 num_coded++;
   2749             }
   2750 
   2751             /* encode the required sigcoeff flags (abslevel > 0)   */
   2752             while(scan_pos >= 0)
   2753             {
   2754                 WORD32 y_pos_x_pos;
   2755                 WORD32 sig_ctxinc = 0; /* 0 is default inc for DC coeff */
   2756 
   2757                 WORD32 sig_coeff;
   2758 
   2759                 EXTRACT_BIT(sig_coeff, sig_coeff_map, scan_pos);
   2760 
   2761                 /* derive the x,y pos */
   2762                 y_pos_x_pos = gu1_hevce_scan4x4[scan_type][scan_pos];
   2763 
   2764                 /* derive the context inc as per section 9.3.3.1.4 */
   2765                 if(2 == log2_tr_size)
   2766                 {
   2767                     /* 4x4 transform size increment uses lookup */
   2768                     sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
   2769                 }
   2770                 else if(scan_pos || i)
   2771                 {
   2772                     /* ctxt for AC coeff depends on curpos and neigbour csbf */
   2773                     sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc[nbr_csbf][y_pos_x_pos];
   2774 
   2775                     /* based on luma subblock pos */
   2776                     sig_ctxinc += (i && is_luma) ? 3 : 0;
   2777                 }
   2778                 else
   2779                 {
   2780                     /* DC coeff has fixed context for luma and chroma */
   2781                     sig_coeff_base_ctxt = is_luma ? IHEVC_CAB_COEFF_FLAG
   2782                                                   : IHEVC_CAB_COEFF_FLAG + 27;
   2783                 }
   2784 
   2785                 /*************************************************************/
   2786                 /* encode sig coeff only if required                         */
   2787                 /* decoder infers 0,0 coeff when all the other coeffs are 0  */
   2788                 /*************************************************************/
   2789                 if(scan_pos || (!infer_coeff))
   2790                 {
   2791                     ctxt_idx = sig_ctxinc + sig_coeff_base_ctxt;
   2792                     //ret |= ihevce_cabac_encode_bin(ps_cabac, sig_coeff, ctxt_idx);
   2793                     {
   2794                         WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
   2795 
   2796                         /* increment bits generated based on state and bin encoded */
   2797                         ps_cabac->u4_bits_estimated_q12 +=
   2798                             gau2_ihevce_cabac_bin_to_bits[state_mps ^ sig_coeff];
   2799 
   2800                         /* update the context model from state transition LUT */
   2801                         pu1_ctxt_model[ctxt_idx] =
   2802                             gau1_ihevc_next_state[(state_mps << 1) | sig_coeff];
   2803                     }
   2804                 }
   2805 
   2806                 if(sig_coeff)
   2807                 {
   2808                     /* prepare the bins for gt1 flags */
   2809                     EXTRACT_BIT(bit, gt1_flags, scan_pos);
   2810 
   2811                     /* shift and insert gt1 bin in lsb */
   2812                     gt1_bins <<= 1;
   2813                     gt1_bins |= bit;
   2814 
   2815                     /* prepare the bins for sign flags */
   2816                     EXTRACT_BIT(bit, sign_flags, scan_pos);
   2817 
   2818                     /* shift and insert sign bin in lsb */
   2819                     sign_bins <<= 1;
   2820                     sign_bins |= bit;
   2821 
   2822                     num_coded++;
   2823 
   2824                     /* 0,0 coeff can no more be inferred :( */
   2825                     infer_coeff = 0;
   2826                 }
   2827 
   2828                 scan_pos--;
   2829             }
   2830 
   2831             s_backup_ctxt.au1_ctxt_to_backup[SIG_COEFF] = 1;
   2832 
   2833             /****************************************************************/
   2834             /* encode the abs level greater than 1 bins; Section 7.3.13     */
   2835             /* These have already been prepared during sig_coeff_map encode */
   2836             /* Context modelling done as per section 9.3.3.1.5              */
   2837             /****************************************************************/
   2838             {
   2839                 WORD32 j;
   2840 
   2841                 /* context set based on luma subblock pos */
   2842                 WORD32 ctxt_set = (i && is_luma) ? 2 : 0;
   2843 
   2844                 /* count of coeffs with abslevel > 1; max of 8 to be coded */
   2845                 WORD32 num_gt1_bins = MIN(8, num_coded);
   2846 
   2847                 if(num_coded > 8)
   2848                 {
   2849                     /* pull back the bins to required number */
   2850                     gt1_bins >>= (num_coded - 8);
   2851 
   2852                     num_coeffs_remaining += (num_coded - 8);
   2853                     num_coeffs_base1 = (num_coded - 8);
   2854                 }
   2855 
   2856                 /* See section 9.3.3.1.5           */
   2857                 ctxt_set += (0 == gt1_ctxt) ? 1 : 0;
   2858 
   2859                 gt1_ctxt = 1;
   2860 
   2861                 for(j = num_gt1_bins - 1; j >= 0; j--)
   2862                 {
   2863                     /* Encodet the abs level gt1 bins */
   2864                     ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
   2865 
   2866                     EXTRACT_BIT(bit, gt1_bins, j);
   2867 
   2868                     //ret |= ihevce_cabac_encode_bin(ps_cabac, bit, ctxt_idx);
   2869                     {
   2870                         WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
   2871 
   2872                         /* increment bits generated based on state and bin encoded */
   2873                         ps_cabac->u4_bits_estimated_q12 +=
   2874                             gau2_ihevce_cabac_bin_to_bits[state_mps ^ bit];
   2875 
   2876                         /* update the context model from state transition LUT */
   2877                         pu1_ctxt_model[ctxt_idx] = gau1_ihevc_next_state[(state_mps << 1) | bit];
   2878                     }
   2879 
   2880                     if(bit)
   2881                     {
   2882                         gt1_ctxt = 0;
   2883                         num_coeffs_remaining++;
   2884                     }
   2885                     else if(gt1_ctxt && (gt1_ctxt < 3))
   2886                     {
   2887                         gt1_ctxt++;
   2888                     }
   2889                 }
   2890                 s_backup_ctxt.au1_ctxt_to_backup[GRTR_THAN_1] = 1;
   2891                 /*************************************************************/
   2892                 /* encode abs level greater than 2 bin; Section 7.3.13       */
   2893                 /*************************************************************/
   2894                 if(gt1_bins)
   2895                 {
   2896                     WORD32 gt2_bin;
   2897 
   2898                     first_gt1_coeff = pu2_sig_coeff_buf[0] + 1;
   2899                     gt2_bin = (first_gt1_coeff > 2);
   2900 
   2901                     /* atleast one level > 2 */
   2902                     ctxt_idx = IHEVC_CAB_COEFABS_GRTR2_FLAG;
   2903 
   2904                     ctxt_idx += (is_luma) ? ctxt_set : (ctxt_set + 4);
   2905 
   2906                     //ret |= ihevce_cabac_encode_bin(ps_cabac, gt2_bin, ctxt_idx);
   2907                     {
   2908                         WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
   2909 
   2910                         /* increment bits generated based on state and bin encoded */
   2911                         ps_cabac->u4_bits_estimated_q12 +=
   2912                             gau2_ihevce_cabac_bin_to_bits[state_mps ^ gt2_bin];
   2913 
   2914                         /* update the context model from state transition LUT */
   2915                         pu1_ctxt_model[ctxt_idx] =
   2916                             gau1_ihevc_next_state[(state_mps << 1) | gt2_bin];
   2917                     }
   2918 
   2919                     if(!gt2_bin)
   2920                     {
   2921                         /* sanity check */
   2922                         ASSERT(first_gt1_coeff == 2);
   2923 
   2924                         /* no need to send this coeff as bypass bins */
   2925                         pu2_sig_coeff_buf++;
   2926                         num_coeffs_remaining--;
   2927                     }
   2928                     s_backup_ctxt.au1_ctxt_to_backup[GRTR_THAN_2] = 1;
   2929                 }
   2930             }
   2931 
   2932             /*************************************************************/
   2933             /* encode the coeff signs and abs remaing levels             */
   2934             /*************************************************************/
   2935             if(num_coded)
   2936             {
   2937                 WORD32 base_level;
   2938                 WORD32 rice_param = 0;
   2939                 WORD32 j;
   2940 
   2941                 /*************************************************************/
   2942                 /* encode the coeff signs populated in sign_bins             */
   2943                 /*************************************************************/
   2944                 if(num_coded > 0)
   2945                 {
   2946                     ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, sign_bins, num_coded);
   2947                 }
   2948                 /*************************************************************/
   2949                 /* encode the coeff_abs_level_remaining as TR / EGK bins     */
   2950                 /* See section 9.3.2.7 for details                           */
   2951                 /*************************************************************/
   2952 
   2953                 /* first remaining coeff baselevel */
   2954                 if(first_gt1_coeff > 2)
   2955                 {
   2956                     base_level = 3;
   2957                 }
   2958                 else if(num_coeffs_remaining > num_coeffs_base1)
   2959                 {
   2960                     /* atleast one coeff in first 8 is gt > 1 */
   2961                     base_level = 2;
   2962                 }
   2963                 else
   2964                 {
   2965                     /* all coeffs have base of 1 */
   2966                     base_level = 1;
   2967                 }
   2968 
   2969                 for(j = 0; j < num_coeffs_remaining; j++)
   2970                 {
   2971                     WORD32 abs_coeff = pu2_sig_coeff_buf[0] + 1;
   2972                     WORD32 abs_coeff_rem;
   2973                     WORD32 rice_max = (4 << rice_param);
   2974 
   2975                     pu2_sig_coeff_buf++;
   2976 
   2977                     /* sanity check */
   2978                     ASSERT(abs_coeff >= base_level);
   2979 
   2980                     abs_coeff_rem = (abs_coeff - base_level);
   2981 
   2982                     /* TODO://HM-8.0-dev uses (3 << rice_param) as rice_max */
   2983                     /* TODO://HM-8.0-dev does either TR or EGK but not both */
   2984                     if(abs_coeff_rem >= rice_max)
   2985                     {
   2986                         UWORD32 u4_suffix = (abs_coeff_rem - rice_max);
   2987 
   2988                         /* coeff exceeds max rice limit                    */
   2989                         /* encode the TR prefix as tunary code             */
   2990                         /* prefix = 1111 as (rice_max >> rice_praram) = 4  */
   2991                         ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, 0xF, 4);
   2992 
   2993                         /* encode the exponential golomb code suffix */
   2994                         ret |= ihevce_cabac_encode_egk(ps_cabac, u4_suffix, (rice_param + 1));
   2995                     }
   2996                     else
   2997                     {
   2998                         /* code coeff as truncated rice code  */
   2999                         ret |= ihevce_cabac_encode_trunc_rice(
   3000                             ps_cabac, abs_coeff_rem, rice_param, rice_max);
   3001                     }
   3002 
   3003                     /* update the rice param based on coeff level */
   3004                     if((abs_coeff > (3 << rice_param)) && (rice_param < 4))
   3005                     {
   3006                         rice_param++;
   3007                     }
   3008 
   3009                     /* change base level to 1 if more than 8 coded coeffs */
   3010                     if((j + 1) < (num_coeffs_remaining - num_coeffs_base1))
   3011                     {
   3012                         base_level = 2;
   3013                     }
   3014                     else
   3015                     {
   3016                         base_level = 1;
   3017                     }
   3018                 }
   3019             }
   3020 
   3021             i4_sub_blk_coded_bits = ps_cabac->u4_bits_estimated_q12;
   3022             /**********************************************************/
   3023             /**********************************************************/
   3024             /**********************************************************/
   3025             /*Decide whether sub block should be coded or not*/
   3026             /**********************************************************/
   3027             /**********************************************************/
   3028             /**********************************************************/
   3029             i8_sub_blk_coded_metric = CALC_CUMMUL_SSD_IN_TRANS_DOMAIN(
   3030                                           i8_sub_blk_coded_dist, 0, i4_round_val, i4_shift_val) +
   3031                                       COMPUTE_RATE_COST_CLIP30_RDOQ(
   3032                                           i4_sub_blk_coded_bits,
   3033                                           ps_rdoq_ctxt->i8_cl_ssd_lambda_qf,
   3034                                           (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
   3035             i8_sub_blk_not_coded_metric =
   3036                 CALC_CUMMUL_SSD_IN_TRANS_DOMAIN(
   3037                     i8_sub_blk_not_coded_dist, 0, i4_round_val, i4_shift_val) +
   3038                 COMPUTE_RATE_COST_CLIP30_RDOQ(
   3039                     i4_sub_blk_not_coded_bits,
   3040                     ps_rdoq_ctxt->i8_cl_ssd_lambda_qf,
   3041                     (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
   3042 
   3043 #if DISABLE_ZCSBF
   3044             if(((i8_sub_blk_not_coded_metric < i8_sub_blk_coded_metric) ||
   3045                 (i4_sub_blk_is_coded == 0)) &&
   3046                (i4_skip_zero_csbf == 0))
   3047 #else
   3048             if((i8_sub_blk_not_coded_metric < i8_sub_blk_coded_metric) ||
   3049                (i4_sub_blk_is_coded == 0))
   3050 #endif
   3051             {
   3052 #if OPT_MEMCPY
   3053                 ihevce_copy_backup_ctxt(
   3054                     (void *)ps_cabac,
   3055                     (void *)&s_sub_blk_not_coded_cabac_ctxt,
   3056                     (void *)&s_backup_ctxt,
   3057                     (void *)&s_backup_ctxt_sub_blk_not_coded);
   3058 #else
   3059                 memcpy(ps_cabac, &s_sub_blk_not_coded_cabac_ctxt, sizeof(cab_ctxt_t));
   3060 #endif
   3061                 scan_pos = 15;
   3062                 i4_sub_blk_is_coded = 0;
   3063 
   3064                 {
   3065                     WORD32 k, j;
   3066                     WORD16 *pi2_temp_coeff =
   3067                         &pi2_coeffs[scaled_blk_col + (scaled_blk_row * ps_rdoq_ctxt->i4_q_data_strd)];
   3068                     WORD16 *pi2_temp_iquant_coeff =
   3069                         &ps_rdoq_ctxt->pi2_iquant_coeffs
   3070                              [scaled_blk_col + (scaled_blk_row * ps_rdoq_ctxt->i4_iq_data_strd)];
   3071                     for(k = 0; k < 4; k++)
   3072                     {
   3073                         for(j = 0; j < 4; j++)
   3074                         {
   3075                             *pi2_temp_coeff = 0;
   3076                             *pi2_temp_iquant_coeff = 0;
   3077 
   3078                             pi2_temp_coeff++;
   3079                             pi2_temp_iquant_coeff++;
   3080                         }
   3081                         pi2_temp_coeff += ps_rdoq_ctxt->i4_q_data_strd - 4;
   3082                         pi2_temp_iquant_coeff += ps_rdoq_ctxt->i4_iq_data_strd - 4;
   3083                     }
   3084                 }
   3085 
   3086                 /* If the csb to be masked is the last csb, then we should
   3087                  * signal last x and last y from the next coded sub_blk */
   3088                 if(i == last_csb)
   3089                 {
   3090                     pu1_coeff_buf_hdr = (UWORD8 *)pu2_sig_coeff_buf;
   3091 
   3092                     ps_rdoq_ctxt->pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[i]]] = 0;
   3093                     last_csb = ihevce_find_new_last_csb(
   3094                         pi4_subBlock2csbfId_map,
   3095                         i,
   3096                         (void *)ps_rdoq_ctxt,
   3097                         pu1_trans_table,
   3098                         pu1_csb_table,
   3099                         pi2_coeffs,
   3100                         shift_value,
   3101                         mask_value,
   3102                         &pu1_coeff_buf_hdr);
   3103                     /*We are in a for loop. This means that the decrement to i happens immediately right
   3104                       at the end of the for loop. This would decrement the value of i to (last_csb - 1).
   3105                       Hence we increment i by 1, so that after the decrement i becomes last_csb.*/
   3106                     i = last_csb + 1;
   3107                     pu1_last_sig_coeff_x = &pu1_coeff_buf_hdr[0];
   3108                     pu1_last_sig_coeff_y = &pu1_coeff_buf_hdr[1];
   3109                     scan_type = pu1_coeff_buf_hdr[2];
   3110                     pu2_sig_coeff_buf = (UWORD16 *)(pu1_coeff_buf_hdr + 4);
   3111                 }
   3112                 i8_tu_coded_dist += i8_sub_blk_not_coded_dist;
   3113                 i4_tu_coded_bits += i4_sub_blk_not_coded_bits;
   3114             }
   3115             else
   3116             {
   3117                 ps_rdoq_ctxt->i1_tu_is_coded = 1;
   3118                 temp_gt1_ctxt = gt1_ctxt;
   3119 
   3120                 i8_tu_coded_dist += i8_sub_blk_coded_dist;
   3121                 i4_tu_coded_bits += i4_sub_blk_coded_bits;
   3122             }
   3123 #if DISABLE_ZCSBF
   3124             i4_skip_zero_cbf = i4_skip_zero_cbf || i4_skip_zero_csbf;
   3125 #endif
   3126             /*Cumulating the distortion for the entire TU*/
   3127             i8_tu_not_coded_dist += i8_sub_blk_not_coded_dist;
   3128             //i4_tu_coded_dist                += i4_sub_blk_coded_dist;
   3129             //i4_tu_coded_bits                += i4_sub_blk_coded_bits;
   3130             i8_sub_blk_not_coded_dist = 0;
   3131             i4_sub_blk_not_coded_bits = 0;
   3132             i8_sub_blk_coded_dist = 0;
   3133             i4_sub_blk_coded_bits = 0;
   3134 
   3135             if(i4_sub_blk_is_coded)
   3136             {
   3137                 ps_rdoq_ctxt->pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[i]]] = 1;
   3138                 temp_zero_col = (temp_zero_col) | (0xF << scaled_blk_col);
   3139                 temp_zero_row = (temp_zero_row) | (0xF << scaled_blk_row);
   3140             }
   3141             else
   3142             {
   3143                 if(!((ps_rdoq_ctxt->i1_tu_is_coded == 1) && (i == 0)))
   3144                 {
   3145                     ps_rdoq_ctxt->pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[i]]] = 0;
   3146                 }
   3147             }
   3148         }
   3149     }
   3150 
   3151     /*tap texture bits*/
   3152     {
   3153         ps_cabac->u4_texture_bits_estimated_q12 +=
   3154             (ps_cabac->u4_bits_estimated_q12 - temp_tex_bits_q12);
   3155     }
   3156 
   3157     i8_tu_not_coded_dist =
   3158         CALC_CUMMUL_SSD_IN_TRANS_DOMAIN(i8_tu_not_coded_dist, 0, i4_round_val, i4_shift_val);
   3159 
   3160     /* i4_tu_coded_dist = CALC_CUMMUL_SSD_IN_TRANS_DOMAIN(
   3161         i4_tu_coded_dist, 0, i4_round_val, i4_shift_val); */
   3162     *pi8_tu_coded_dist = i8_tu_coded_dist;
   3163     *pi8_tu_not_coded_dist = i8_tu_not_coded_dist;
   3164 #if DISABLE_ZCSBF
   3165     if(i4_skip_zero_cbf == 1)
   3166     {
   3167         *pi8_tu_not_coded_dist = 0x7FFFFFFF;
   3168     }
   3169 #endif
   3170 
   3171     *ps_rdoq_ctxt->pi4_zero_col = ~temp_zero_col;
   3172     *ps_rdoq_ctxt->pi4_zero_row = ~temp_zero_row;
   3173 
   3174     return (ret);
   3175 }
   3176 
   3177 /**
   3178 ******************************************************************************
   3179 *
   3180 *  @brief Codes all the sig coeffs as 0
   3181 *
   3182 *  @param[in]   i
   3183 *  Index of the current csb
   3184 *
   3185 *  @param[in]   pu1_trans_table
   3186 *  Pointer to the trans table
   3187 *
   3188 *  @param[in]  scan_type
   3189 *  Determines the scan order
   3190 *
   3191 *  @param[in]  infer_coeff
   3192 *  Indicates whether the 0,0 coeff can be inferred or not
   3193 *
   3194 *  @param[in]   nbr_csbf
   3195 *  Talks about if the neighboour csbs(right and bottom) are coded or not
   3196 *
   3197 *  @param[in]    ps_cabac
   3198 *  Cabac state
   3199 *
   3200 *  @param[out]    pi4_tu_not_coded_dist
   3201 *  The distortion when the entire TU is not coded(all coeffs are set to 0) is stored here
   3202 *
   3203 *  @return    The number of bits generated when the 0th sub blk is coded as all 0s
   3204 *             This is the cumulate bits(i.e. for all blocks in the TU), and not only
   3205 *             the bits generated for this block
   3206 *
   3207 ******************************************************************************
   3208 */
   3209 WORD32 ihevce_code_all_sig_coeffs_as_0_explicitly(
   3210     void *pv_rdoq_ctxt,
   3211     WORD32 i,
   3212     UWORD8 *pu1_trans_table,
   3213     WORD32 is_luma,
   3214     WORD32 scan_type,
   3215     WORD32 infer_coeff,
   3216     WORD32 nbr_csbf,
   3217     cab_ctxt_t *ps_cabac)
   3218 {
   3219     WORD32 sig_coeff_base_ctxt;
   3220     WORD32 scan_pos = 15;
   3221     WORD32 ctxt_idx;
   3222     WORD32 ret = 0;
   3223 
   3224     rdoq_sbh_ctxt_t *ps_rdoq_ctxt = (rdoq_sbh_ctxt_t *)pv_rdoq_ctxt;
   3225 
   3226     WORD32 log2_tr_size = ps_rdoq_ctxt->i4_log2_trans_size;
   3227 
   3228     (void)pu1_trans_table;
   3229     if(is_luma)
   3230     {
   3231         sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
   3232         if(3 == log2_tr_size)
   3233         {
   3234             /* 8x8 transform size */
   3235             sig_coeff_base_ctxt += (scan_type == SCAN_DIAG_UPRIGHT) ? 9 : 15;
   3236         }
   3237         else if(3 < log2_tr_size)
   3238         {
   3239             /* larger transform sizes */
   3240             sig_coeff_base_ctxt += 21;
   3241         }
   3242     }
   3243     else
   3244     {
   3245         /* chroma context initializations */
   3246         sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
   3247 
   3248         if(3 == log2_tr_size)
   3249         {
   3250             /* 8x8 transform size */
   3251             sig_coeff_base_ctxt += 9;
   3252         }
   3253         else if(3 < log2_tr_size)
   3254         {
   3255             /* larger transform sizes */
   3256             sig_coeff_base_ctxt += 12;
   3257         }
   3258     }
   3259     while(scan_pos >= 0)
   3260     {
   3261         WORD32 sig_ctxinc = 0; /* 0 is default inc for DC coeff */
   3262         WORD32 sig_coeff = 0;
   3263         /* derive the x,y pos */
   3264         WORD32 y_pos_x_pos = gu1_hevce_scan4x4[scan_type][scan_pos];
   3265 
   3266         /* derive the context inc as per section 9.3.3.1.4 */
   3267         if(2 == log2_tr_size)
   3268         {
   3269             /* 4x4 transform size increment uses lookup */
   3270             sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
   3271         }
   3272         else if(scan_pos || i)
   3273         {
   3274             /* ctxt for AC coeff depends on curpos and neigbour csbf */
   3275             sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc[nbr_csbf][y_pos_x_pos];
   3276 
   3277             /* based on luma subblock pos */
   3278             sig_ctxinc += (i && is_luma) ? 3 : 0;
   3279         }
   3280         else
   3281         {
   3282             /* DC coeff has fixed context for luma and chroma */
   3283             sig_coeff_base_ctxt = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
   3284         }
   3285 
   3286         if(scan_pos || (!infer_coeff))
   3287         {
   3288             ctxt_idx = sig_ctxinc + sig_coeff_base_ctxt;
   3289             ret |= ihevce_cabac_encode_bin(ps_cabac, sig_coeff, ctxt_idx);
   3290             AEV_TRACE("significant_coeff_flag", sig_coeff, ps_cabac->u4_range);
   3291         }
   3292         scan_pos--;
   3293     }
   3294     return (ps_cabac->u4_bits_estimated_q12);  // - i4_temp_bits);
   3295 }
   3296 
   3297 /**
   3298 ******************************************************************************
   3299 *
   3300 *  @brief Finds the next csb with a non-zero coeff
   3301 *
   3302 *  @paramp[in]  cur_last_csb_pos
   3303 *  The index of the current csb with a non-zero coeff
   3304 *
   3305 *  @param[inout]   pv_rdoq_ctxt
   3306 *  RODQ context structure
   3307 *
   3308 *  @param[in]   pu1_trans_table
   3309 *  Pointer to the trans table
   3310 *
   3311 *  @param[in]   pi2_coeffs
   3312 *  Pointer to all the quantized coefficients
   3313 *
   3314 *  @param[in]  shift_value
   3315 *  Determines the shifting value for determining appropriate position of coeff
   3316 *
   3317 *  @param[in]  mask_value
   3318 *  Determines the masking value for determining appropriate position of coeff
   3319 *
   3320 *  @param[in]   nbr_csbf
   3321 *  Talks about if the neighboour csbs(right and bottom) are coded or not
   3322 *
   3323 *  @param[in]    ps_cabac
   3324 *  Cabac state
   3325 *
   3326 *  @param[inout] ppu1_addr
   3327 *  Pointer to the header(i.e. pointer used for traversing the ecd data generated
   3328 *  in ihevce_scan_coeffs)
   3329 *
   3330 *  @return    The index of the csb with the next non-zero coeff
   3331 *
   3332 ******************************************************************************
   3333 */
   3334 WORD32 ihevce_find_new_last_csb(
   3335     WORD32 *pi4_subBlock2csbfId_map,
   3336     WORD32 cur_last_csb_pos,
   3337     void *pv_rdoq_ctxt,
   3338     UWORD8 *pu1_trans_table,
   3339     UWORD8 *pu1_csb_table,
   3340     WORD16 *pi2_coeffs,
   3341     WORD32 shift_value,
   3342     WORD32 mask_value,
   3343     UWORD8 **ppu1_addr)
   3344 {
   3345     WORD32 blk_row;
   3346     WORD32 blk_col;
   3347     WORD32 x_pos;
   3348     WORD32 y_pos;
   3349     WORD32 i;
   3350     WORD32 j;
   3351     UWORD16 *pu2_out_data_coeff;
   3352     rdoq_sbh_ctxt_t *ps_rdoq_ctxt = (rdoq_sbh_ctxt_t *)pv_rdoq_ctxt;
   3353     WORD32 trans_size = ps_rdoq_ctxt->i4_trans_size;
   3354     UWORD8 *pu1_out_data_header = *ppu1_addr;
   3355 
   3356     for(i = cur_last_csb_pos - 1; i >= 0; i--)
   3357     {
   3358         /* check for the first csb flag in our scan order */
   3359         if(ps_rdoq_ctxt->pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[i]]])
   3360         {
   3361             UWORD8 u1_last_x, u1_last_y;
   3362             WORD32 quant_coeff;
   3363 
   3364             pu1_out_data_header -= 4;  //To move the pointer back to the appropriate position
   3365             /* row of csb */
   3366             blk_row = pu1_trans_table[i] >> shift_value;
   3367             /* col of csb */
   3368             blk_col = pu1_trans_table[i] & mask_value;
   3369 
   3370             /*check for the 1st non-0 values inside the csb in our scan order*/
   3371             for(j = 15; j >= 0; j--)
   3372             {
   3373                 x_pos = (pu1_csb_table[j] & 0x3) + blk_col * 4;
   3374                 y_pos = (pu1_csb_table[j] >> 2) + blk_row * 4;
   3375 
   3376                 quant_coeff = pi2_coeffs[x_pos + (y_pos * trans_size)];
   3377 
   3378                 if(quant_coeff != 0)
   3379                     break;
   3380             }
   3381 
   3382             ASSERT(j >= 0);
   3383 
   3384             u1_last_x = x_pos;
   3385             u1_last_y = y_pos;
   3386 
   3387             /* storing last_x and last_y */
   3388             *(pu1_out_data_header) = u1_last_x;
   3389             *(pu1_out_data_header + 1) = u1_last_y;
   3390 
   3391             /* storing the scan order */
   3392             *(pu1_out_data_header + 2) = ps_rdoq_ctxt->i4_scan_idx;
   3393 
   3394             /* storing last_sub_block pos. in scan order count */
   3395             *(pu1_out_data_header + 3) = i;
   3396 
   3397             /*stored the first 4 bytes, now all are word16. So word16 pointer*/
   3398             pu2_out_data_coeff = (UWORD16 *)(pu1_out_data_header + 4);
   3399 
   3400             *pu2_out_data_coeff = 0xBAD0 | 1; /*since right&bottom csbf is 0*/
   3401             *ppu1_addr = pu1_out_data_header;
   3402 
   3403             break; /*We just need this loop for finding 1st non-zero csb only*/
   3404         }
   3405         else
   3406             pu1_out_data_header += 2;
   3407     }
   3408     return i;
   3409 }
   3410 
   3411 /**
   3412 ******************************************************************************
   3413 *
   3414 *  @brief Used to optimize the memcpy of cabac states. It copies only those
   3415 *  states in the cabac context which have been altered.
   3416 *
   3417 *  @paramp[inout]  pv_dest
   3418 *  Pointer to desitination cabac state.
   3419 *
   3420 *  @param[inout]   pv_backup_ctxt_dest
   3421 *  Pointer to destination backup context
   3422 *
   3423 *  @param[inout]   pv_backup_ctxt_src
   3424 *  Pointer to source backup context
   3425 *
   3426 *  @Desc:
   3427 *  We go through each element in the backup_ctxt structure which will tell us
   3428 *  if the states corresponding to lastxlasty, sigcoeffs, grtr_than_1_bins,
   3429 *  grtr_than_2_bins and sub_blk_coded_flag(i.e. 0xBAD0) context elements
   3430 *  have been altered. If they have been altered, we will memcpy the states
   3431 *  corresponding to these context elements alone
   3432 *
   3433 *  @return  Nothing
   3434 *
   3435 ******************************************************************************
   3436 */
   3437 void ihevce_copy_backup_ctxt(
   3438     void *pv_dest, void *pv_src, void *pv_backup_ctxt_dest, void *pv_backup_ctxt_src)
   3439 {
   3440     UWORD8 *pu1_dest = (UWORD8 *)(((cab_ctxt_t *)pv_dest)->au1_ctxt_models);
   3441     UWORD8 *pu1_src = (UWORD8 *)(((cab_ctxt_t *)pv_src)->au1_ctxt_models);
   3442     backup_ctxt_t *ps_backup_dest_ctxt = ((backup_ctxt_t *)pv_backup_ctxt_dest);
   3443     backup_ctxt_t *ps_backup_src_ctxt = ((backup_ctxt_t *)pv_backup_ctxt_src);
   3444     WORD32 i4_i;
   3445 
   3446     /*
   3447     0       IHEVC_CAB_COEFFX_PREFIX         lastx last y has been coded
   3448     1       IHEVC_CAB_CODED_SUBLK_IDX       sub-blk coded or not flag has been coded
   3449     2       IHEVC_CAB_COEFF_FLAG            sigcoeff has been coded
   3450     3       IHEVC_CAB_COEFABS_GRTR1_FLAG    greater than 1 bin has been coded
   3451     4       IHEVC_CAB_COEFABS_GRTR2_FLAG    greater than 2 bin has been coded*/
   3452     assert(MAX_NUM_CONTEXT_ELEMENTS == 5);
   3453     for(i4_i = 0; i4_i < MAX_NUM_CONTEXT_ELEMENTS; i4_i++)
   3454     {
   3455         if((ps_backup_src_ctxt->au1_ctxt_to_backup[SIG_COEFF]) ||
   3456            (ps_backup_dest_ctxt->au1_ctxt_to_backup[SIG_COEFF]))
   3457         {
   3458             memcpy(&pu1_dest[IHEVC_CAB_COEFF_FLAG], &pu1_src[IHEVC_CAB_COEFF_FLAG], 42);
   3459             ps_backup_dest_ctxt->au1_ctxt_to_backup[SIG_COEFF] = 0;
   3460             ps_backup_src_ctxt->au1_ctxt_to_backup[SIG_COEFF] = 0;
   3461         }
   3462         if((ps_backup_src_ctxt->au1_ctxt_to_backup[GRTR_THAN_1]) ||
   3463            (ps_backup_dest_ctxt->au1_ctxt_to_backup[GRTR_THAN_1]))
   3464         {
   3465             memcpy(
   3466                 &pu1_dest[IHEVC_CAB_COEFABS_GRTR1_FLAG],
   3467                 &pu1_src[IHEVC_CAB_COEFABS_GRTR1_FLAG],
   3468                 24);
   3469             ps_backup_dest_ctxt->au1_ctxt_to_backup[GRTR_THAN_1] = 0;
   3470             ps_backup_src_ctxt->au1_ctxt_to_backup[GRTR_THAN_1] = 0;
   3471         }
   3472         if((ps_backup_src_ctxt->au1_ctxt_to_backup[GRTR_THAN_2]) ||
   3473            (ps_backup_dest_ctxt->au1_ctxt_to_backup[GRTR_THAN_2]))
   3474         {
   3475             memcpy(
   3476                 &pu1_dest[IHEVC_CAB_COEFABS_GRTR2_FLAG], &pu1_src[IHEVC_CAB_COEFABS_GRTR2_FLAG], 6);
   3477             ps_backup_dest_ctxt->au1_ctxt_to_backup[GRTR_THAN_2] = 0;
   3478             ps_backup_src_ctxt->au1_ctxt_to_backup[GRTR_THAN_2] = 0;
   3479         }
   3480         if((ps_backup_src_ctxt->au1_ctxt_to_backup[SUB_BLK_CODED_FLAG]) ||
   3481            (ps_backup_dest_ctxt->au1_ctxt_to_backup[SUB_BLK_CODED_FLAG]))
   3482         {
   3483             memcpy(&pu1_dest[IHEVC_CAB_CODED_SUBLK_IDX], &pu1_src[IHEVC_CAB_CODED_SUBLK_IDX], 4);
   3484             ps_backup_dest_ctxt->au1_ctxt_to_backup[SUB_BLK_CODED_FLAG] = 0;
   3485             ps_backup_src_ctxt->au1_ctxt_to_backup[SUB_BLK_CODED_FLAG] = 0;
   3486         }
   3487         if((ps_backup_src_ctxt->au1_ctxt_to_backup[LASTXY]) ||
   3488            (ps_backup_dest_ctxt->au1_ctxt_to_backup[LASTXY]))
   3489         {
   3490             memcpy(&pu1_dest[IHEVC_CAB_COEFFX_PREFIX], &pu1_src[IHEVC_CAB_COEFFX_PREFIX], 36);
   3491             ps_backup_dest_ctxt->au1_ctxt_to_backup[LASTXY] = 0;
   3492             ps_backup_src_ctxt->au1_ctxt_to_backup[LASTXY] = 0;
   3493         }
   3494     }
   3495     ((cab_ctxt_t *)pv_dest)->u4_bits_estimated_q12 = ((cab_ctxt_t *)pv_src)->u4_bits_estimated_q12;
   3496 }
   3497