Home | History | Annotate | Download | only in encoder
      1 /*
      2  * Copyright (c) 2017, Alliance for Open Media. All rights reserved
      3  *
      4  * This source code is subject to the terms of the BSD 2 Clause License and
      5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6  * was not distributed with this source code in the LICENSE file, you can
      7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8  * Media Patent License 1.0 was not distributed with this source code in the
      9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10  */
     11 
     12 #include "av1/encoder/encodetxb.h"
     13 
     14 #include "aom_ports/mem.h"
     15 #include "av1/common/blockd.h"
     16 #include "av1/common/idct.h"
     17 #include "av1/common/pred_common.h"
     18 #include "av1/common/scan.h"
     19 #include "av1/encoder/bitstream.h"
     20 #include "av1/encoder/cost.h"
     21 #include "av1/encoder/encodeframe.h"
     22 #include "av1/encoder/hash.h"
     23 #include "av1/encoder/rdopt.h"
     24 #include "av1/encoder/tokenize.h"
     25 
     26 static int hbt_needs_init = 1;
     27 static CRC32C crc_calculator;
     28 static const int HBT_EOB = 16;            // also the length in opt_qcoeff
     29 static const int HBT_TABLE_SIZE = 65536;  // 16 bit: holds 65536 'arrays'
     30 static const int HBT_ARRAY_LENGTH = 256;  // 8 bit: 256 entries
     31 // If removed in hbt_create_hashes or increased beyond int8_t, widen deltas type
     32 static const int HBT_KICKOUT = 3;
     33 
     34 typedef struct OptTxbQcoeff {
     35   // Use larger type if larger/no kickout value is used in hbt_create_hashes
     36   int8_t deltas[16];
     37   uint32_t hbt_qc_hash;
     38   uint32_t hbt_ctx_hash;
     39   int init;
     40   int rate_cost;
     41 } OptTxbQcoeff;
     42 
     43 OptTxbQcoeff *hbt_hash_table;
     44 
     45 typedef struct LevelDownStats {
     46   int update;
     47   tran_low_t low_qc;
     48   tran_low_t low_dqc;
     49   int64_t dist0;
     50   int rate;
     51   int rate_low;
     52   int64_t dist;
     53   int64_t dist_low;
     54   int64_t rd;
     55   int64_t rd_low;
     56   int64_t nz_rd;
     57   int64_t rd_diff;
     58   int cost_diff;
     59   int64_t dist_diff;
     60   int new_eob;
     61 } LevelDownStats;
     62 
     63 void av1_alloc_txb_buf(AV1_COMP *cpi) {
     64   AV1_COMMON *cm = &cpi->common;
     65   int size = ((cm->mi_rows >> cm->seq_params.mib_size_log2) + 1) *
     66              ((cm->mi_cols >> cm->seq_params.mib_size_log2) + 1);
     67 
     68   av1_free_txb_buf(cpi);
     69   // TODO(jingning): This should be further reduced.
     70   CHECK_MEM_ERROR(cm, cpi->coeff_buffer_base,
     71                   aom_memalign(32, sizeof(*cpi->coeff_buffer_base) * size));
     72 }
     73 
     74 void av1_free_txb_buf(AV1_COMP *cpi) { aom_free(cpi->coeff_buffer_base); }
     75 
     76 void av1_set_coeff_buffer(const AV1_COMP *const cpi, MACROBLOCK *const x,
     77                           int mi_row, int mi_col) {
     78   const AV1_COMMON *const cm = &cpi->common;
     79   int mib_size_log2 = cm->seq_params.mib_size_log2;
     80   int stride = (cm->mi_cols >> mib_size_log2) + 1;
     81   int offset = (mi_row >> mib_size_log2) * stride + (mi_col >> mib_size_log2);
     82   x->mbmi_ext->cb_coef_buff = &cpi->coeff_buffer_base[offset];
     83   x->mbmi_ext->cb_offset = x->cb_offset;
     84   assert(x->cb_offset < (1 << num_pels_log2_lookup[cm->seq_params.sb_size]));
     85 }
     86 
     87 static void write_golomb(aom_writer *w, int level) {
     88   int x = level + 1;
     89   int i = x;
     90   int length = 0;
     91 
     92   while (i) {
     93     i >>= 1;
     94     ++length;
     95   }
     96   assert(length > 0);
     97 
     98   for (i = 0; i < length - 1; ++i) aom_write_bit(w, 0);
     99 
    100   for (i = length - 1; i >= 0; --i) aom_write_bit(w, (x >> i) & 0x01);
    101 }
    102 
    103 static INLINE tran_low_t get_lower_coeff(tran_low_t qc) {
    104   if (qc == 0) {
    105     return 0;
    106   }
    107   return qc > 0 ? qc - 1 : qc + 1;
    108 }
    109 
    110 static INLINE tran_low_t qcoeff_to_dqcoeff(tran_low_t qc, int coeff_idx,
    111                                            int dqv, int shift,
    112                                            const qm_val_t *iqmatrix) {
    113   int sign = qc < 0 ? -1 : 1;
    114   if (iqmatrix != NULL)
    115     dqv =
    116         ((iqmatrix[coeff_idx] * dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
    117   return sign * ((abs(qc) * dqv) >> shift);
    118 }
    119 
    120 static INLINE int64_t get_coeff_dist(tran_low_t tcoeff, tran_low_t dqcoeff,
    121                                      int shift) {
    122   const int64_t diff = (tcoeff - dqcoeff) * (1 << shift);
    123   const int64_t error = diff * diff;
    124   return error;
    125 }
    126 
    127 static const int8_t eob_to_pos_small[33] = {
    128   0, 1, 2,                                        // 0-2
    129   3, 3,                                           // 3-4
    130   4, 4, 4, 4,                                     // 5-8
    131   5, 5, 5, 5, 5, 5, 5, 5,                         // 9-16
    132   6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6  // 17-32
    133 };
    134 
    135 static const int8_t eob_to_pos_large[17] = {
    136   6,                               // place holder
    137   7,                               // 33-64
    138   8,  8,                           // 65-128
    139   9,  9,  9,  9,                   // 129-256
    140   10, 10, 10, 10, 10, 10, 10, 10,  // 257-512
    141   11                               // 513-
    142 };
    143 
    144 static INLINE int get_eob_pos_token(const int eob, int *const extra) {
    145   int t;
    146 
    147   if (eob < 33) {
    148     t = eob_to_pos_small[eob];
    149   } else {
    150     const int e = AOMMIN((eob - 1) >> 5, 16);
    151     t = eob_to_pos_large[e];
    152   }
    153 
    154   *extra = eob - k_eob_group_start[t];
    155 
    156   return t;
    157 }
    158 
    159 #if CONFIG_ENTROPY_STATS
    160 void av1_update_eob_context(int cdf_idx, int eob, TX_SIZE tx_size,
    161                             TX_CLASS tx_class, PLANE_TYPE plane,
    162                             FRAME_CONTEXT *ec_ctx, FRAME_COUNTS *counts,
    163                             uint8_t allow_update_cdf) {
    164 #else
    165 void av1_update_eob_context(int eob, TX_SIZE tx_size, TX_CLASS tx_class,
    166                             PLANE_TYPE plane, FRAME_CONTEXT *ec_ctx,
    167                             uint8_t allow_update_cdf) {
    168 #endif
    169   int eob_extra;
    170   const int eob_pt = get_eob_pos_token(eob, &eob_extra);
    171   TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
    172 
    173   const int eob_multi_size = txsize_log2_minus4[tx_size];
    174   const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1;
    175 
    176   switch (eob_multi_size) {
    177     case 0:
    178 #if CONFIG_ENTROPY_STATS
    179       ++counts->eob_multi16[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
    180 #endif
    181       if (allow_update_cdf)
    182         update_cdf(ec_ctx->eob_flag_cdf16[plane][eob_multi_ctx], eob_pt - 1, 5);
    183       break;
    184     case 1:
    185 #if CONFIG_ENTROPY_STATS
    186       ++counts->eob_multi32[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
    187 #endif
    188       if (allow_update_cdf)
    189         update_cdf(ec_ctx->eob_flag_cdf32[plane][eob_multi_ctx], eob_pt - 1, 6);
    190       break;
    191     case 2:
    192 #if CONFIG_ENTROPY_STATS
    193       ++counts->eob_multi64[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
    194 #endif
    195       if (allow_update_cdf)
    196         update_cdf(ec_ctx->eob_flag_cdf64[plane][eob_multi_ctx], eob_pt - 1, 7);
    197       break;
    198     case 3:
    199 #if CONFIG_ENTROPY_STATS
    200       ++counts->eob_multi128[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
    201 #endif
    202       if (allow_update_cdf) {
    203         update_cdf(ec_ctx->eob_flag_cdf128[plane][eob_multi_ctx], eob_pt - 1,
    204                    8);
    205       }
    206       break;
    207     case 4:
    208 #if CONFIG_ENTROPY_STATS
    209       ++counts->eob_multi256[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
    210 #endif
    211       if (allow_update_cdf) {
    212         update_cdf(ec_ctx->eob_flag_cdf256[plane][eob_multi_ctx], eob_pt - 1,
    213                    9);
    214       }
    215       break;
    216     case 5:
    217 #if CONFIG_ENTROPY_STATS
    218       ++counts->eob_multi512[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
    219 #endif
    220       if (allow_update_cdf) {
    221         update_cdf(ec_ctx->eob_flag_cdf512[plane][eob_multi_ctx], eob_pt - 1,
    222                    10);
    223       }
    224       break;
    225     case 6:
    226     default:
    227 #if CONFIG_ENTROPY_STATS
    228       ++counts->eob_multi1024[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
    229 #endif
    230       if (allow_update_cdf) {
    231         update_cdf(ec_ctx->eob_flag_cdf1024[plane][eob_multi_ctx], eob_pt - 1,
    232                    11);
    233       }
    234       break;
    235   }
    236 
    237   if (k_eob_offset_bits[eob_pt] > 0) {
    238     int eob_ctx = eob_pt - 3;
    239     int eob_shift = k_eob_offset_bits[eob_pt] - 1;
    240     int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
    241 #if CONFIG_ENTROPY_STATS
    242     counts->eob_extra[cdf_idx][txs_ctx][plane][eob_pt][bit]++;
    243 #endif  // CONFIG_ENTROPY_STATS
    244     if (allow_update_cdf)
    245       update_cdf(ec_ctx->eob_extra_cdf[txs_ctx][plane][eob_ctx], bit, 2);
    246   }
    247 }
    248 
    249 static int get_eob_cost(int eob, const LV_MAP_EOB_COST *txb_eob_costs,
    250                         const LV_MAP_COEFF_COST *txb_costs, TX_CLASS tx_class) {
    251   int eob_extra;
    252   const int eob_pt = get_eob_pos_token(eob, &eob_extra);
    253   int eob_cost = 0;
    254   const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1;
    255   eob_cost = txb_eob_costs->eob_cost[eob_multi_ctx][eob_pt - 1];
    256 
    257   if (k_eob_offset_bits[eob_pt] > 0) {
    258     const int eob_ctx = eob_pt - 3;
    259     const int eob_shift = k_eob_offset_bits[eob_pt] - 1;
    260     const int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
    261     eob_cost += txb_costs->eob_extra_cost[eob_ctx][bit];
    262     const int offset_bits = k_eob_offset_bits[eob_pt];
    263     if (offset_bits > 1) eob_cost += av1_cost_literal(offset_bits - 1);
    264   }
    265   return eob_cost;
    266 }
    267 
    268 static INLINE int get_sign_bit_cost(tran_low_t qc, int coeff_idx,
    269                                     const int (*dc_sign_cost)[2],
    270                                     int dc_sign_ctx) {
    271   if (coeff_idx == 0) {
    272     const int sign = (qc < 0) ? 1 : 0;
    273     return dc_sign_cost[dc_sign_ctx][sign];
    274   }
    275   return av1_cost_literal(1);
    276 }
    277 
    278 static const int golomb_bits_cost[32] = {
    279   0,       512,     512 * 3, 512 * 3, 512 * 5, 512 * 5, 512 * 5, 512 * 5,
    280   512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7,
    281   512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9,
    282   512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9
    283 };
    284 static const int golomb_cost_diff[32] = {
    285   0,       512, 512 * 2, 0, 512 * 2, 0, 0, 0, 512 * 2, 0, 0, 0, 0, 0, 0, 0,
    286   512 * 2, 0,   0,       0, 0,       0, 0, 0, 0,       0, 0, 0, 0, 0, 0, 0
    287 };
    288 
    289 static INLINE int get_golomb_cost(int abs_qc) {
    290   if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
    291     const int r = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
    292     const int length = get_msb(r) + 1;
    293     return av1_cost_literal(2 * length - 1);
    294   }
    295   return 0;
    296 }
    297 
    298 static INLINE int get_br_cost_with_diff(tran_low_t level, const int *coeff_lps,
    299                                         int *diff) {
    300   const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
    301   int golomb_bits = 0;
    302   if (level <= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS)
    303     *diff += coeff_lps[base_range + COEFF_BASE_RANGE + 1];
    304 
    305   if (level >= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS) {
    306     int r = level - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
    307     if (r < 32) {
    308       golomb_bits = golomb_bits_cost[r];
    309       *diff += golomb_cost_diff[r];
    310     } else {
    311       golomb_bits = get_golomb_cost(level);
    312       *diff += (r & (r - 1)) == 0 ? 1024 : 0;
    313     }
    314   }
    315 
    316   return coeff_lps[base_range] + golomb_bits;
    317 }
    318 
    319 static INLINE int get_br_cost(tran_low_t level, const int *coeff_lps) {
    320   const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
    321   return coeff_lps[base_range] + get_golomb_cost(level);
    322 }
    323 
    324 static int get_coeff_cost(const tran_low_t qc, const int scan_idx,
    325                           const int is_eob, const TxbInfo *const txb_info,
    326                           const LV_MAP_COEFF_COST *const txb_costs,
    327                           const int coeff_ctx, const TX_CLASS tx_class) {
    328   const TXB_CTX *const txb_ctx = txb_info->txb_ctx;
    329   const int is_nz = (qc != 0);
    330   const tran_low_t abs_qc = abs(qc);
    331   int cost = 0;
    332   const int16_t *const scan = txb_info->scan_order->scan;
    333   const int pos = scan[scan_idx];
    334 
    335   if (is_eob) {
    336     cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1];
    337   } else {
    338     cost += txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
    339   }
    340   if (is_nz) {
    341     cost += get_sign_bit_cost(qc, scan_idx, txb_costs->dc_sign_cost,
    342                               txb_ctx->dc_sign_ctx);
    343 
    344     if (abs_qc > NUM_BASE_LEVELS) {
    345       const int ctx =
    346           get_br_ctx(txb_info->levels, pos, txb_info->bwl, tx_class);
    347       cost += get_br_cost(abs_qc, txb_costs->lps_cost[ctx]);
    348     }
    349   }
    350   return cost;
    351 }
    352 
    353 static INLINE int get_nz_map_ctx(const uint8_t *const levels,
    354                                  const int coeff_idx, const int bwl,
    355                                  const int height, const int scan_idx,
    356                                  const int is_eob, const TX_SIZE tx_size,
    357                                  const TX_CLASS tx_class) {
    358   if (is_eob) {
    359     if (scan_idx == 0) return 0;
    360     if (scan_idx <= (height << bwl) / 8) return 1;
    361     if (scan_idx <= (height << bwl) / 4) return 2;
    362     return 3;
    363   }
    364   const int stats =
    365       get_nz_mag(levels + get_padded_idx(coeff_idx, bwl), bwl, tx_class);
    366   return get_nz_map_ctx_from_stats(stats, coeff_idx, bwl, tx_size, tx_class);
    367 }
    368 
    369 static void get_dist_cost_stats(LevelDownStats *const stats, const int scan_idx,
    370                                 const int is_eob,
    371                                 const LV_MAP_COEFF_COST *const txb_costs,
    372                                 const TxbInfo *const txb_info,
    373                                 const TX_CLASS tx_class) {
    374   const int16_t *const scan = txb_info->scan_order->scan;
    375   const int coeff_idx = scan[scan_idx];
    376   const tran_low_t qc = txb_info->qcoeff[coeff_idx];
    377   const uint8_t *const levels = txb_info->levels;
    378   stats->new_eob = -1;
    379   stats->update = 0;
    380   stats->rd_low = 0;
    381   stats->rd = 0;
    382   stats->nz_rd = 0;
    383   stats->dist_low = 0;
    384   stats->rate_low = 0;
    385   stats->low_qc = 0;
    386 
    387   const tran_low_t tqc = txb_info->tcoeff[coeff_idx];
    388   const int dqv = txb_info->dequant[coeff_idx != 0];
    389   const int coeff_ctx =
    390       get_nz_map_ctx(levels, coeff_idx, txb_info->bwl, txb_info->height,
    391                      scan_idx, is_eob, txb_info->tx_size, tx_class);
    392   const int qc_cost = get_coeff_cost(qc, scan_idx, is_eob, txb_info, txb_costs,
    393                                      coeff_ctx, tx_class);
    394   assert(qc != 0);
    395   const tran_low_t dqc = qcoeff_to_dqcoeff(qc, coeff_idx, dqv, txb_info->shift,
    396                                            txb_info->iqmatrix);
    397   const int64_t dqc_dist = get_coeff_dist(tqc, dqc, txb_info->shift);
    398 
    399   // distortion difference when coefficient is quantized to 0
    400   const tran_low_t dqc0 =
    401       qcoeff_to_dqcoeff(0, coeff_idx, dqv, txb_info->shift, txb_info->iqmatrix);
    402 
    403   stats->dist0 = get_coeff_dist(tqc, dqc0, txb_info->shift);
    404   stats->dist = dqc_dist - stats->dist0;
    405   stats->rate = qc_cost;
    406 
    407   stats->rd = RDCOST(txb_info->rdmult, stats->rate, stats->dist);
    408 
    409   stats->low_qc = get_lower_coeff(qc);
    410 
    411   if (is_eob && stats->low_qc == 0) {
    412     stats->rd_low = stats->rd;  // disable selection of low_qc in this case.
    413   } else {
    414     if (stats->low_qc == 0) {
    415       stats->dist_low = 0;
    416     } else {
    417       stats->low_dqc = qcoeff_to_dqcoeff(stats->low_qc, coeff_idx, dqv,
    418                                          txb_info->shift, txb_info->iqmatrix);
    419       const int64_t low_dqc_dist =
    420           get_coeff_dist(tqc, stats->low_dqc, txb_info->shift);
    421       stats->dist_low = low_dqc_dist - stats->dist0;
    422     }
    423     const int low_qc_cost =
    424         get_coeff_cost(stats->low_qc, scan_idx, is_eob, txb_info, txb_costs,
    425                        coeff_ctx, tx_class);
    426     stats->rate_low = low_qc_cost;
    427     stats->rd_low = RDCOST(txb_info->rdmult, stats->rate_low, stats->dist_low);
    428   }
    429 }
    430 
    431 static void get_dist_cost_stats_with_eob(
    432     LevelDownStats *const stats, const int scan_idx,
    433     const LV_MAP_COEFF_COST *const txb_costs, const TxbInfo *const txb_info,
    434     const TX_CLASS tx_class) {
    435   const int is_eob = 0;
    436   get_dist_cost_stats(stats, scan_idx, is_eob, txb_costs, txb_info, tx_class);
    437 
    438   const int16_t *const scan = txb_info->scan_order->scan;
    439   const int coeff_idx = scan[scan_idx];
    440   const tran_low_t qc = txb_info->qcoeff[coeff_idx];
    441   const int coeff_ctx_temp = get_nz_map_ctx(
    442       txb_info->levels, coeff_idx, txb_info->bwl, txb_info->height, scan_idx, 1,
    443       txb_info->tx_size, tx_class);
    444   const int qc_eob_cost = get_coeff_cost(qc, scan_idx, 1, txb_info, txb_costs,
    445                                          coeff_ctx_temp, tx_class);
    446   int64_t rd_eob = RDCOST(txb_info->rdmult, qc_eob_cost, stats->dist);
    447   if (stats->low_qc != 0) {
    448     const int low_qc_eob_cost =
    449         get_coeff_cost(stats->low_qc, scan_idx, 1, txb_info, txb_costs,
    450                        coeff_ctx_temp, tx_class);
    451     int64_t rd_eob_low =
    452         RDCOST(txb_info->rdmult, low_qc_eob_cost, stats->dist_low);
    453     rd_eob = (rd_eob > rd_eob_low) ? rd_eob_low : rd_eob;
    454   }
    455 
    456   stats->nz_rd = AOMMIN(stats->rd_low, stats->rd) - rd_eob;
    457 }
    458 
    459 static INLINE void update_qcoeff(const int coeff_idx, const tran_low_t qc,
    460                                  const TxbInfo *const txb_info) {
    461   txb_info->qcoeff[coeff_idx] = qc;
    462   txb_info->levels[get_padded_idx(coeff_idx, txb_info->bwl)] =
    463       (uint8_t)clamp(abs(qc), 0, INT8_MAX);
    464 }
    465 
    466 static INLINE void update_coeff(const int coeff_idx, const tran_low_t qc,
    467                                 const TxbInfo *const txb_info) {
    468   update_qcoeff(coeff_idx, qc, txb_info);
    469   const int dqv = txb_info->dequant[coeff_idx != 0];
    470   txb_info->dqcoeff[coeff_idx] = qcoeff_to_dqcoeff(
    471       qc, coeff_idx, dqv, txb_info->shift, txb_info->iqmatrix);
    472 }
    473 
    474 void av1_txb_init_levels_c(const tran_low_t *const coeff, const int width,
    475                            const int height, uint8_t *const levels) {
    476   const int stride = width + TX_PAD_HOR;
    477   uint8_t *ls = levels;
    478 
    479   memset(levels + stride * height, 0,
    480          sizeof(*levels) * (TX_PAD_BOTTOM * stride + TX_PAD_END));
    481 
    482   for (int i = 0; i < height; i++) {
    483     for (int j = 0; j < width; j++) {
    484       *ls++ = (uint8_t)clamp(abs(coeff[i * width + j]), 0, INT8_MAX);
    485     }
    486     for (int j = 0; j < TX_PAD_HOR; j++) {
    487       *ls++ = 0;
    488     }
    489   }
    490 }
    491 
    492 void av1_get_nz_map_contexts_c(const uint8_t *const levels,
    493                                const int16_t *const scan, const uint16_t eob,
    494                                const TX_SIZE tx_size, const TX_CLASS tx_class,
    495                                int8_t *const coeff_contexts) {
    496   const int bwl = get_txb_bwl(tx_size);
    497   const int height = get_txb_high(tx_size);
    498   for (int i = 0; i < eob; ++i) {
    499     const int pos = scan[i];
    500     coeff_contexts[pos] = get_nz_map_ctx(levels, pos, bwl, height, i,
    501                                          i == eob - 1, tx_size, tx_class);
    502   }
    503 }
    504 
    505 void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
    506                           aom_writer *w, int blk_row, int blk_col, int plane,
    507                           TX_SIZE tx_size, const tran_low_t *tcoeff,
    508                           uint16_t eob, TXB_CTX *txb_ctx) {
    509   const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
    510   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
    511   aom_write_symbol(w, eob == 0,
    512                    ec_ctx->txb_skip_cdf[txs_ctx][txb_ctx->txb_skip_ctx], 2);
    513   if (eob == 0) return;
    514   const PLANE_TYPE plane_type = get_plane_type(plane);
    515   const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
    516                                           tx_size, cm->reduced_tx_set_used);
    517   const TX_CLASS tx_class = tx_type_to_class[tx_type];
    518   const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
    519   const int16_t *const scan = scan_order->scan;
    520   int c;
    521   const int bwl = get_txb_bwl(tx_size);
    522   const int width = get_txb_wide(tx_size);
    523   const int height = get_txb_high(tx_size);
    524 
    525   uint8_t levels_buf[TX_PAD_2D];
    526   uint8_t *const levels = set_levels(levels_buf, width);
    527   DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
    528   av1_txb_init_levels(tcoeff, width, height, levels);
    529 
    530   av1_write_tx_type(cm, xd, blk_row, blk_col, plane, tx_size, w);
    531 
    532   int eob_extra;
    533   const int eob_pt = get_eob_pos_token(eob, &eob_extra);
    534   const int eob_multi_size = txsize_log2_minus4[tx_size];
    535   const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1;
    536   switch (eob_multi_size) {
    537     case 0:
    538       aom_write_symbol(w, eob_pt - 1,
    539                        ec_ctx->eob_flag_cdf16[plane_type][eob_multi_ctx], 5);
    540       break;
    541     case 1:
    542       aom_write_symbol(w, eob_pt - 1,
    543                        ec_ctx->eob_flag_cdf32[plane_type][eob_multi_ctx], 6);
    544       break;
    545     case 2:
    546       aom_write_symbol(w, eob_pt - 1,
    547                        ec_ctx->eob_flag_cdf64[plane_type][eob_multi_ctx], 7);
    548       break;
    549     case 3:
    550       aom_write_symbol(w, eob_pt - 1,
    551                        ec_ctx->eob_flag_cdf128[plane_type][eob_multi_ctx], 8);
    552       break;
    553     case 4:
    554       aom_write_symbol(w, eob_pt - 1,
    555                        ec_ctx->eob_flag_cdf256[plane_type][eob_multi_ctx], 9);
    556       break;
    557     case 5:
    558       aom_write_symbol(w, eob_pt - 1,
    559                        ec_ctx->eob_flag_cdf512[plane_type][eob_multi_ctx], 10);
    560       break;
    561     default:
    562       aom_write_symbol(w, eob_pt - 1,
    563                        ec_ctx->eob_flag_cdf1024[plane_type][eob_multi_ctx], 11);
    564       break;
    565   }
    566 
    567   const int eob_offset_bits = k_eob_offset_bits[eob_pt];
    568   if (eob_offset_bits > 0) {
    569     const int eob_ctx = eob_pt - 3;
    570     int eob_shift = eob_offset_bits - 1;
    571     int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
    572     aom_write_symbol(w, bit,
    573                      ec_ctx->eob_extra_cdf[txs_ctx][plane_type][eob_ctx], 2);
    574     for (int i = 1; i < eob_offset_bits; i++) {
    575       eob_shift = eob_offset_bits - 1 - i;
    576       bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
    577       aom_write_bit(w, bit);
    578     }
    579   }
    580 
    581   av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts);
    582 
    583   for (c = eob - 1; c >= 0; --c) {
    584     const int pos = scan[c];
    585     const int coeff_ctx = coeff_contexts[pos];
    586     const tran_low_t v = tcoeff[pos];
    587     const tran_low_t level = abs(v);
    588 
    589     if (c == eob - 1) {
    590       aom_write_symbol(
    591           w, AOMMIN(level, 3) - 1,
    592           ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx], 3);
    593     } else {
    594       aom_write_symbol(w, AOMMIN(level, 3),
    595                        ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx],
    596                        4);
    597     }
    598     if (level > NUM_BASE_LEVELS) {
    599       // level is above 1.
    600       const int base_range = level - 1 - NUM_BASE_LEVELS;
    601       const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class);
    602       aom_cdf_prob *cdf =
    603           ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type][br_ctx];
    604       for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
    605         const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1);
    606         aom_write_symbol(w, k, cdf, BR_CDF_SIZE);
    607         if (k < BR_CDF_SIZE - 1) break;
    608       }
    609     }
    610   }
    611 
    612   // Loop to code all signs in the transform block,
    613   // starting with the sign of DC (if applicable)
    614   for (c = 0; c < eob; ++c) {
    615     const tran_low_t v = tcoeff[scan[c]];
    616     const tran_low_t level = abs(v);
    617     const int sign = (v < 0) ? 1 : 0;
    618     if (level) {
    619       if (c == 0) {
    620         aom_write_symbol(
    621             w, sign, ec_ctx->dc_sign_cdf[plane_type][txb_ctx->dc_sign_ctx], 2);
    622       } else {
    623         aom_write_bit(w, sign);
    624       }
    625       if (level > COEFF_BASE_RANGE + NUM_BASE_LEVELS)
    626         write_golomb(w, level - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS);
    627     }
    628   }
    629 }
    630 
    631 typedef struct encode_txb_args {
    632   const AV1_COMMON *cm;
    633   MACROBLOCK *x;
    634   aom_writer *w;
    635 } ENCODE_TXB_ARGS;
    636 
    637 static void write_coeffs_txb_wrap(const AV1_COMMON *cm, MACROBLOCK *x,
    638                                   aom_writer *w, int plane, int block,
    639                                   int blk_row, int blk_col, TX_SIZE tx_size) {
    640   MACROBLOCKD *xd = &x->e_mbd;
    641   const int txb_offset =
    642       x->mbmi_ext->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
    643   tran_low_t *tcoeff_txb =
    644       x->mbmi_ext->cb_coef_buff->tcoeff[plane] + x->mbmi_ext->cb_offset;
    645   uint16_t *eob_txb = x->mbmi_ext->cb_coef_buff->eobs[plane] + txb_offset;
    646   uint8_t *txb_skip_ctx_txb =
    647       x->mbmi_ext->cb_coef_buff->txb_skip_ctx[plane] + txb_offset;
    648   int *dc_sign_ctx_txb =
    649       x->mbmi_ext->cb_coef_buff->dc_sign_ctx[plane] + txb_offset;
    650   tran_low_t *tcoeff = BLOCK_OFFSET(tcoeff_txb, block);
    651   uint16_t eob = eob_txb[block];
    652   TXB_CTX txb_ctx = { txb_skip_ctx_txb[block], dc_sign_ctx_txb[block] };
    653   av1_write_coeffs_txb(cm, xd, w, blk_row, blk_col, plane, tx_size, tcoeff, eob,
    654                        &txb_ctx);
    655 }
    656 
    657 void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, int mi_row,
    658                          int mi_col, aom_writer *w, BLOCK_SIZE bsize) {
    659   MACROBLOCKD *xd = &x->e_mbd;
    660   const int num_planes = av1_num_planes(cm);
    661   int block[MAX_MB_PLANE] = { 0 };
    662   int row, col;
    663   assert(bsize == get_plane_block_size(bsize, xd->plane[0].subsampling_x,
    664                                        xd->plane[0].subsampling_y));
    665   const int max_blocks_wide = max_block_wide(xd, bsize, 0);
    666   const int max_blocks_high = max_block_high(xd, bsize, 0);
    667   const BLOCK_SIZE max_unit_bsize = BLOCK_64X64;
    668   int mu_blocks_wide = block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
    669   int mu_blocks_high = block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
    670   mu_blocks_wide = AOMMIN(max_blocks_wide, mu_blocks_wide);
    671   mu_blocks_high = AOMMIN(max_blocks_high, mu_blocks_high);
    672 
    673   for (row = 0; row < max_blocks_high; row += mu_blocks_high) {
    674     for (col = 0; col < max_blocks_wide; col += mu_blocks_wide) {
    675       for (int plane = 0; plane < num_planes; ++plane) {
    676         const struct macroblockd_plane *const pd = &xd->plane[plane];
    677         if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
    678                                  pd->subsampling_y))
    679           continue;
    680         const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
    681         const int stepr = tx_size_high_unit[tx_size];
    682         const int stepc = tx_size_wide_unit[tx_size];
    683         const int step = stepr * stepc;
    684 
    685         const int unit_height = ROUND_POWER_OF_TWO(
    686             AOMMIN(mu_blocks_high + row, max_blocks_high), pd->subsampling_y);
    687         const int unit_width = ROUND_POWER_OF_TWO(
    688             AOMMIN(mu_blocks_wide + col, max_blocks_wide), pd->subsampling_x);
    689         for (int blk_row = row >> pd->subsampling_y; blk_row < unit_height;
    690              blk_row += stepr) {
    691           for (int blk_col = col >> pd->subsampling_x; blk_col < unit_width;
    692                blk_col += stepc) {
    693             write_coeffs_txb_wrap(cm, x, w, plane, block[plane], blk_row,
    694                                   blk_col, tx_size);
    695             block[plane] += step;
    696           }
    697         }
    698       }
    699     }
    700   }
    701 }
    702 
    703 // TODO(angiebird): use this function whenever it's possible
    704 static int get_tx_type_cost(const AV1_COMMON *cm, const MACROBLOCK *x,
    705                             const MACROBLOCKD *xd, int plane, TX_SIZE tx_size,
    706                             TX_TYPE tx_type) {
    707   if (plane > 0) return 0;
    708 
    709   const TX_SIZE square_tx_size = txsize_sqr_map[tx_size];
    710 
    711   const MB_MODE_INFO *mbmi = xd->mi[0];
    712   const int is_inter = is_inter_block(mbmi);
    713   if (get_ext_tx_types(tx_size, is_inter, cm->reduced_tx_set_used) > 1 &&
    714       !xd->lossless[xd->mi[0]->segment_id]) {
    715     const int ext_tx_set =
    716         get_ext_tx_set(tx_size, is_inter, cm->reduced_tx_set_used);
    717     if (is_inter) {
    718       if (ext_tx_set > 0)
    719         return x->inter_tx_type_costs[ext_tx_set][square_tx_size][tx_type];
    720     } else {
    721       if (ext_tx_set > 0) {
    722         PREDICTION_MODE intra_dir;
    723         if (mbmi->filter_intra_mode_info.use_filter_intra)
    724           intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info
    725                                              .filter_intra_mode];
    726         else
    727           intra_dir = mbmi->mode;
    728         return x->intra_tx_type_costs[ext_tx_set][square_tx_size][intra_dir]
    729                                      [tx_type];
    730       }
    731     }
    732   }
    733   return 0;
    734 }
    735 
    736 static AOM_FORCE_INLINE int warehouse_efficients_txb(
    737     const AV1_COMMON *const cm, const MACROBLOCK *x, const int plane,
    738     const int block, const TX_SIZE tx_size, const TXB_CTX *const txb_ctx,
    739     const struct macroblock_plane *p, const int eob,
    740     const PLANE_TYPE plane_type, const LV_MAP_COEFF_COST *const coeff_costs,
    741     const MACROBLOCKD *const xd, const TX_TYPE tx_type,
    742     const TX_CLASS tx_class) {
    743   const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
    744   const int txb_skip_ctx = txb_ctx->txb_skip_ctx;
    745   const int bwl = get_txb_bwl(tx_size);
    746   const int width = get_txb_wide(tx_size);
    747   const int height = get_txb_high(tx_size);
    748   const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
    749   const int16_t *const scan = scan_order->scan;
    750   uint8_t levels_buf[TX_PAD_2D];
    751   uint8_t *const levels = set_levels(levels_buf, width);
    752   DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
    753   const int eob_multi_size = txsize_log2_minus4[tx_size];
    754   const LV_MAP_EOB_COST *const eob_costs =
    755       &x->eob_costs[eob_multi_size][plane_type];
    756   int cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0];
    757 
    758   av1_txb_init_levels(qcoeff, width, height, levels);
    759 
    760   cost += get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type);
    761 
    762   cost += get_eob_cost(eob, eob_costs, coeff_costs, tx_class);
    763 
    764   av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts);
    765 
    766   const int(*lps_cost)[COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1] =
    767       coeff_costs->lps_cost;
    768   int c = eob - 1;
    769   {
    770     const int pos = scan[c];
    771     const tran_low_t v = qcoeff[pos];
    772     const int sign = v >> 31;
    773     const int level = (v ^ sign) - sign;
    774     const int coeff_ctx = coeff_contexts[pos];
    775     cost += coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1];
    776 
    777     if (v) {
    778       // sign bit cost
    779       if (level > NUM_BASE_LEVELS) {
    780         const int ctx = get_br_ctx_eob(pos, bwl, tx_class);
    781         cost += get_br_cost(level, lps_cost[ctx]);
    782       }
    783       if (c) {
    784         cost += av1_cost_literal(1);
    785       } else {
    786         const int sign01 = (sign ^ sign) - sign;
    787         const int dc_sign_ctx = txb_ctx->dc_sign_ctx;
    788         cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign01];
    789         return cost;
    790       }
    791     }
    792   }
    793   const int(*base_cost)[8] = coeff_costs->base_cost;
    794   for (c = eob - 2; c >= 1; --c) {
    795     const int pos = scan[c];
    796     const int coeff_ctx = coeff_contexts[pos];
    797     const tran_low_t v = qcoeff[pos];
    798     const int level = abs(v);
    799     const int cost0 = base_cost[coeff_ctx][AOMMIN(level, 3)];
    800     if (v) {
    801       // sign bit cost
    802       cost += av1_cost_literal(1);
    803       if (level > NUM_BASE_LEVELS) {
    804         const int ctx = get_br_ctx(levels, pos, bwl, tx_class);
    805         cost += get_br_cost(level, lps_cost[ctx]);
    806       }
    807     }
    808     cost += cost0;
    809   }
    810   if (c == 0) {
    811     const int pos = scan[c];
    812     const tran_low_t v = qcoeff[pos];
    813     const int coeff_ctx = coeff_contexts[pos];
    814     const int sign = v >> 31;
    815     const int level = (v ^ sign) - sign;
    816     cost += base_cost[coeff_ctx][AOMMIN(level, 3)];
    817 
    818     if (v) {
    819       // sign bit cost
    820       const int sign01 = (sign ^ sign) - sign;
    821       const int dc_sign_ctx = txb_ctx->dc_sign_ctx;
    822       cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign01];
    823       if (level > NUM_BASE_LEVELS) {
    824         const int ctx = get_br_ctx(levels, pos, bwl, tx_class);
    825         cost += get_br_cost(level, lps_cost[ctx]);
    826       }
    827     }
    828   }
    829   return cost;
    830 }
    831 
    832 int av1_cost_coeffs_txb(const AV1_COMMON *const cm, const MACROBLOCK *x,
    833                         const int plane, const int block, const TX_SIZE tx_size,
    834                         const TX_TYPE tx_type, const TXB_CTX *const txb_ctx) {
    835   const struct macroblock_plane *p = &x->plane[plane];
    836   const int eob = p->eobs[block];
    837   const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
    838   const PLANE_TYPE plane_type = get_plane_type(plane);
    839   const LV_MAP_COEFF_COST *const coeff_costs =
    840       &x->coeff_costs[txs_ctx][plane_type];
    841   if (eob == 0) {
    842     return coeff_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1];
    843   }
    844 
    845   const MACROBLOCKD *const xd = &x->e_mbd;
    846   const TX_CLASS tx_class = tx_type_to_class[tx_type];
    847 
    848 #define WAREHOUSE_EFFICIENTS_TXB_CASE(tx_class_literal)                        \
    849   case tx_class_literal:                                                       \
    850     return warehouse_efficients_txb(cm, x, plane, block, tx_size, txb_ctx, p,  \
    851                                     eob, plane_type, coeff_costs, xd, tx_type, \
    852                                     tx_class_literal);
    853   switch (tx_class) {
    854     WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_2D);
    855     WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_HORIZ);
    856     WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_VERT);
    857 #undef WAREHOUSE_EFFICIENTS_TXB_CASE
    858     default: assert(false); return 0;
    859   }
    860 }
    861 
    862 static int optimize_txb(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
    863                         const LV_MAP_EOB_COST *txb_eob_costs, int *rate_cost) {
    864   int update = 0;
    865   if (txb_info->eob == 0) return update;
    866   const int16_t *const scan = txb_info->scan_order->scan;
    867   // forward optimize the nz_map`
    868   const int init_eob = txb_info->eob;
    869   const TX_CLASS tx_class = tx_type_to_class[txb_info->tx_type];
    870   const int eob_cost =
    871       get_eob_cost(init_eob, txb_eob_costs, txb_costs, tx_class);
    872 
    873   // backward optimize the level-k map
    874   int accu_rate = eob_cost;
    875   int64_t accu_dist = 0;
    876   int64_t prev_eob_rd_cost = INT64_MAX;
    877   int64_t cur_eob_rd_cost = 0;
    878 
    879   {
    880     const int si = init_eob - 1;
    881     const int coeff_idx = scan[si];
    882     LevelDownStats stats;
    883     get_dist_cost_stats(&stats, si, si == init_eob - 1, txb_costs, txb_info,
    884                         tx_class);
    885     if ((stats.rd_low < stats.rd) && (stats.low_qc != 0)) {
    886       update = 1;
    887       update_coeff(coeff_idx, stats.low_qc, txb_info);
    888       accu_rate += stats.rate_low;
    889       accu_dist += stats.dist_low;
    890     } else {
    891       accu_rate += stats.rate;
    892       accu_dist += stats.dist;
    893     }
    894   }
    895 
    896   int si = init_eob - 2;
    897   int8_t has_nz_tail = 0;
    898   // eob is not fixed
    899   for (; si >= 0 && has_nz_tail < 2; --si) {
    900     assert(si != init_eob - 1);
    901     const int coeff_idx = scan[si];
    902     tran_low_t qc = txb_info->qcoeff[coeff_idx];
    903 
    904     if (qc == 0) {
    905       const int coeff_ctx =
    906           get_lower_levels_ctx(txb_info->levels, coeff_idx, txb_info->bwl,
    907                                txb_info->tx_size, tx_class);
    908       accu_rate += txb_costs->base_cost[coeff_ctx][0];
    909     } else {
    910       LevelDownStats stats;
    911       get_dist_cost_stats_with_eob(&stats, si, txb_costs, txb_info, tx_class);
    912       // check if it is better to make this the last significant coefficient
    913       int cur_eob_rate =
    914           get_eob_cost(si + 1, txb_eob_costs, txb_costs, tx_class);
    915       cur_eob_rd_cost = RDCOST(txb_info->rdmult, cur_eob_rate, 0);
    916       prev_eob_rd_cost =
    917           RDCOST(txb_info->rdmult, accu_rate, accu_dist) + stats.nz_rd;
    918       if (cur_eob_rd_cost <= prev_eob_rd_cost) {
    919         update = 1;
    920         for (int j = si + 1; j < txb_info->eob; j++) {
    921           const int coeff_pos_j = scan[j];
    922           update_coeff(coeff_pos_j, 0, txb_info);
    923         }
    924         txb_info->eob = si + 1;
    925 
    926         // rerun cost calculation due to change of eob
    927         accu_rate = cur_eob_rate;
    928         accu_dist = 0;
    929         get_dist_cost_stats(&stats, si, 1, txb_costs, txb_info, tx_class);
    930         if ((stats.rd_low < stats.rd) && (stats.low_qc != 0)) {
    931           update = 1;
    932           update_coeff(coeff_idx, stats.low_qc, txb_info);
    933           accu_rate += stats.rate_low;
    934           accu_dist += stats.dist_low;
    935         } else {
    936           accu_rate += stats.rate;
    937           accu_dist += stats.dist;
    938         }
    939 
    940         // reset non zero tail when new eob is found
    941         has_nz_tail = 0;
    942       } else {
    943         int bUpdCoeff = 0;
    944         if (stats.rd_low < stats.rd) {
    945           if ((si < txb_info->eob - 1)) {
    946             bUpdCoeff = 1;
    947             update = 1;
    948           }
    949         } else {
    950           ++has_nz_tail;
    951         }
    952 
    953         if (bUpdCoeff) {
    954           update_coeff(coeff_idx, stats.low_qc, txb_info);
    955           accu_rate += stats.rate_low;
    956           accu_dist += stats.dist_low;
    957         } else {
    958           accu_rate += stats.rate;
    959           accu_dist += stats.dist;
    960         }
    961       }
    962     }
    963   }  // for (si)
    964 
    965   // eob is fixed
    966   for (; si >= 0; --si) {
    967     assert(si != init_eob - 1);
    968     const int coeff_idx = scan[si];
    969     tran_low_t qc = txb_info->qcoeff[coeff_idx];
    970 
    971     if (qc == 0) {
    972       const int coeff_ctx =
    973           get_lower_levels_ctx(txb_info->levels, coeff_idx, txb_info->bwl,
    974                                txb_info->tx_size, tx_class);
    975       accu_rate += txb_costs->base_cost[coeff_ctx][0];
    976     } else {
    977       LevelDownStats stats;
    978       get_dist_cost_stats(&stats, si, 0, txb_costs, txb_info, tx_class);
    979 
    980       int bUpdCoeff = 0;
    981       if (stats.rd_low < stats.rd) {
    982         if ((si < txb_info->eob - 1)) {
    983           bUpdCoeff = 1;
    984           update = 1;
    985         }
    986       }
    987       if (bUpdCoeff) {
    988         update_coeff(coeff_idx, stats.low_qc, txb_info);
    989         accu_rate += stats.rate_low;
    990         accu_dist += stats.dist_low;
    991       } else {
    992         accu_rate += stats.rate;
    993         accu_dist += stats.dist;
    994       }
    995     }
    996   }  // for (si)
    997 
    998   int non_zero_blk_rate =
    999       txb_costs->txb_skip_cost[txb_info->txb_ctx->txb_skip_ctx][0];
   1000   prev_eob_rd_cost =
   1001       RDCOST(txb_info->rdmult, accu_rate + non_zero_blk_rate, accu_dist);
   1002 
   1003   int zero_blk_rate =
   1004       txb_costs->txb_skip_cost[txb_info->txb_ctx->txb_skip_ctx][1];
   1005   int64_t zero_blk_rd_cost = RDCOST(txb_info->rdmult, zero_blk_rate, 0);
   1006   if (zero_blk_rd_cost <= prev_eob_rd_cost) {
   1007     update = 1;
   1008     for (int j = 0; j < txb_info->eob; j++) {
   1009       const int coeff_pos_j = scan[j];
   1010       update_coeff(coeff_pos_j, 0, txb_info);
   1011     }
   1012     txb_info->eob = 0;
   1013   }
   1014 
   1015   // record total rate cost
   1016   *rate_cost = zero_blk_rd_cost <= prev_eob_rd_cost
   1017                    ? zero_blk_rate
   1018                    : accu_rate + non_zero_blk_rate;
   1019 
   1020   if (txb_info->eob > 0) {
   1021     *rate_cost += txb_info->tx_type_cost;
   1022   }
   1023 
   1024   return update;
   1025 }
   1026 
   1027 static void hbt_init() {
   1028   hbt_hash_table =
   1029       aom_malloc(sizeof(OptTxbQcoeff) * HBT_TABLE_SIZE * HBT_ARRAY_LENGTH);
   1030   memset(hbt_hash_table, 0,
   1031          sizeof(OptTxbQcoeff) * HBT_TABLE_SIZE * HBT_ARRAY_LENGTH);
   1032   av1_crc32c_calculator_init(&crc_calculator);  // 31 bit: qc & ctx
   1033 
   1034   hbt_needs_init = 0;
   1035 }
   1036 
   1037 void hbt_destroy() { aom_free(hbt_hash_table); }
   1038 
   1039 static int hbt_hash_miss(uint32_t hbt_ctx_hash, uint32_t hbt_qc_hash,
   1040                          TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
   1041                          const LV_MAP_EOB_COST *txb_eob_costs,
   1042                          const struct macroblock_plane *p, int block,
   1043                          int fast_mode, int *rate_cost) {
   1044   (void)fast_mode;
   1045   const int16_t *scan = txb_info->scan_order->scan;
   1046   int prev_eob = txb_info->eob;
   1047   assert(HBT_EOB <= 16);  // Lengthen array if allowing longer eob.
   1048   int32_t prev_coeff[16];
   1049   for (int i = 0; i < prev_eob; i++) {
   1050     prev_coeff[i] = txb_info->qcoeff[scan[i]];
   1051   }
   1052   for (int i = prev_eob; i < HBT_EOB; i++) {
   1053     prev_coeff[i] = 0;  // For compiler piece of mind.
   1054   }
   1055 
   1056   av1_txb_init_levels(txb_info->qcoeff, txb_info->width, txb_info->height,
   1057                       txb_info->levels);
   1058 
   1059   const int update =
   1060       optimize_txb(txb_info, txb_costs, txb_eob_costs, rate_cost);
   1061 
   1062   // Overwrite old entry
   1063   uint16_t hbt_table_index = hbt_ctx_hash % HBT_TABLE_SIZE;
   1064   uint16_t hbt_array_index = hbt_qc_hash % HBT_ARRAY_LENGTH;
   1065   hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
   1066       .rate_cost = *rate_cost;
   1067   hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index].init = 1;
   1068   hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
   1069       .hbt_qc_hash = hbt_qc_hash;
   1070   hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
   1071       .hbt_ctx_hash = hbt_ctx_hash;
   1072   assert(prev_eob >= txb_info->eob);  // eob can't get longer
   1073   for (int i = 0; i < txb_info->eob; i++) {
   1074     // Record how coeff changed. Convention: towards zero is negative.
   1075     if (txb_info->qcoeff[scan[i]] > 0)
   1076       hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
   1077           .deltas[i] = txb_info->qcoeff[scan[i]] - prev_coeff[i];
   1078     else
   1079       hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
   1080           .deltas[i] = prev_coeff[i] - txb_info->qcoeff[scan[i]];
   1081   }
   1082   for (int i = txb_info->eob; i < prev_eob; i++) {
   1083     // If eob got shorter, record that all after it changed to zero.
   1084     if (prev_coeff[i] > 0)
   1085       hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
   1086           .deltas[i] = -prev_coeff[i];
   1087     else
   1088       hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
   1089           .deltas[i] = prev_coeff[i];
   1090   }
   1091   for (int i = prev_eob; i < HBT_EOB; i++) {
   1092     // Record 'no change' after optimized coefficients run out.
   1093     hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
   1094         .deltas[i] = 0;
   1095   }
   1096 
   1097   if (update) {
   1098     p->eobs[block] = txb_info->eob;
   1099     p->txb_entropy_ctx[block] = av1_get_txb_entropy_context(
   1100         txb_info->qcoeff, txb_info->scan_order, txb_info->eob);
   1101   }
   1102   return txb_info->eob;
   1103 }
   1104 
   1105 static int hbt_hash_hit(uint32_t hbt_table_index, int hbt_array_index,
   1106                         TxbInfo *txb_info, const struct macroblock_plane *p,
   1107                         int block, int *rate_cost) {
   1108   const int16_t *scan = txb_info->scan_order->scan;
   1109   int new_eob = 0;
   1110   int update = 0;
   1111 
   1112   for (int i = 0; i < txb_info->eob; i++) {
   1113     // Delta convention is negatives go towards zero, so only apply those ones.
   1114     if (hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
   1115             .deltas[i] < 0) {
   1116       if (txb_info->qcoeff[scan[i]] > 0)
   1117         txb_info->qcoeff[scan[i]] +=
   1118             hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
   1119                 .deltas[i];
   1120       else
   1121         txb_info->qcoeff[scan[i]] -=
   1122             hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
   1123                 .deltas[i];
   1124 
   1125       update = 1;
   1126       update_coeff(scan[i], txb_info->qcoeff[scan[i]], txb_info);
   1127     }
   1128     if (txb_info->qcoeff[scan[i]]) new_eob = i + 1;
   1129   }
   1130 
   1131   // Rate_cost can be calculated here instead (av1_cost_coeffs_txb), but
   1132   // it is expensive and gives little benefit as long as qc_hash is high bit
   1133   *rate_cost =
   1134       hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
   1135           .rate_cost;
   1136 
   1137   if (update) {
   1138     txb_info->eob = new_eob;
   1139     p->eobs[block] = txb_info->eob;
   1140     p->txb_entropy_ctx[block] = av1_get_txb_entropy_context(
   1141         txb_info->qcoeff, txb_info->scan_order, txb_info->eob);
   1142   }
   1143 
   1144   return txb_info->eob;
   1145 }
   1146 
   1147 static int hbt_search_match(uint32_t hbt_ctx_hash, uint32_t hbt_qc_hash,
   1148                             TxbInfo *txb_info,
   1149                             const LV_MAP_COEFF_COST *txb_costs,
   1150                             const LV_MAP_EOB_COST *txb_eob_costs,
   1151                             const struct macroblock_plane *p, int block,
   1152                             int fast_mode, int *rate_cost) {
   1153   // Check for qcoeff match
   1154   int hbt_array_index = hbt_qc_hash % HBT_ARRAY_LENGTH;
   1155   int hbt_table_index = hbt_ctx_hash % HBT_TABLE_SIZE;
   1156 
   1157   if (hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
   1158               .hbt_qc_hash == hbt_qc_hash &&
   1159       hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
   1160               .hbt_ctx_hash == hbt_ctx_hash &&
   1161       hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
   1162           .init) {
   1163     return hbt_hash_hit(hbt_table_index, hbt_array_index, txb_info, p, block,
   1164                         rate_cost);
   1165   } else {
   1166     return hbt_hash_miss(hbt_ctx_hash, hbt_qc_hash, txb_info, txb_costs,
   1167                          txb_eob_costs, p, block, fast_mode, rate_cost);
   1168   }
   1169 }
   1170 
   1171 static int hbt_create_hashes(TxbInfo *txb_info,
   1172                              const LV_MAP_COEFF_COST *txb_costs,
   1173                              const LV_MAP_EOB_COST *txb_eob_costs,
   1174                              const struct macroblock_plane *p, int block,
   1175                              int fast_mode, int *rate_cost) {
   1176   // Initialize hash table if needed.
   1177   if (hbt_needs_init) {
   1178     hbt_init();
   1179   }
   1180 
   1181   //// Hash creation
   1182   uint8_t txb_hash_data[256];  // Asserts below to ensure enough space.
   1183   const int16_t *scan = txb_info->scan_order->scan;
   1184   uint8_t chunk = 0;
   1185   int hash_data_index = 0;
   1186 
   1187   // Make qc_hash.
   1188   int packing_index = 0;  // needed for packing.
   1189   for (int i = 0; i < txb_info->eob; i++) {
   1190     tran_low_t prechunk = txb_info->qcoeff[scan[i]];
   1191 
   1192     // Softening: Improves speed. Aligns with signed deltas.
   1193     if (prechunk < 0) prechunk *= -1;
   1194 
   1195     // Early kick out: Don't apply feature if there are large coeffs:
   1196     // If this kickout value is removed or raised beyond int8_t,
   1197     // widen deltas type in OptTxbQcoeff struct.
   1198     assert((int8_t)HBT_KICKOUT == HBT_KICKOUT);  // If not, widen types.
   1199     if (prechunk > HBT_KICKOUT) {
   1200       av1_txb_init_levels(txb_info->qcoeff, txb_info->width, txb_info->height,
   1201                           txb_info->levels);
   1202 
   1203       const int update =
   1204           optimize_txb(txb_info, txb_costs, txb_eob_costs, rate_cost);
   1205 
   1206       if (update) {
   1207         p->eobs[block] = txb_info->eob;
   1208         p->txb_entropy_ctx[block] = av1_get_txb_entropy_context(
   1209             txb_info->qcoeff, txb_info->scan_order, txb_info->eob);
   1210       }
   1211       return txb_info->eob;
   1212     }
   1213 
   1214     // Since coeffs are 0 to 3, only 2 bits are needed: pack into bytes
   1215     if (packing_index == 0) txb_hash_data[hash_data_index] = 0;
   1216     chunk = prechunk << packing_index;
   1217     packing_index += 2;
   1218     txb_hash_data[hash_data_index] |= chunk;
   1219 
   1220     // Full byte:
   1221     if (packing_index == 8) {
   1222       packing_index = 0;
   1223       hash_data_index++;
   1224     }
   1225   }
   1226   // Needed when packing_index != 0, to include final byte.
   1227   hash_data_index++;
   1228   assert(hash_data_index <= 64);
   1229   // 31 bit qc_hash: index to array
   1230   uint32_t hbt_qc_hash =
   1231       av1_get_crc32c_value(&crc_calculator, txb_hash_data, hash_data_index);
   1232 
   1233   // Make ctx_hash.
   1234   hash_data_index = 0;
   1235   tran_low_t prechunk;
   1236 
   1237   for (int i = 0; i < txb_info->eob; i++) {
   1238     // Save as magnitudes towards or away from zero.
   1239     if (txb_info->tcoeff[scan[i]] >= 0)
   1240       prechunk = txb_info->tcoeff[scan[i]] - txb_info->dqcoeff[scan[i]];
   1241     else
   1242       prechunk = txb_info->dqcoeff[scan[i]] - txb_info->tcoeff[scan[i]];
   1243 
   1244     chunk = prechunk & 0xff;
   1245     txb_hash_data[hash_data_index++] = chunk;
   1246   }
   1247 
   1248   // Extra ctx data:
   1249   // Include dequants.
   1250   txb_hash_data[hash_data_index++] = txb_info->dequant[0] & 0xff;
   1251   txb_hash_data[hash_data_index++] = txb_info->dequant[1] & 0xff;
   1252   chunk = txb_info->txb_ctx->txb_skip_ctx & 0xff;
   1253   txb_hash_data[hash_data_index++] = chunk;
   1254   chunk = txb_info->txb_ctx->dc_sign_ctx & 0xff;
   1255   txb_hash_data[hash_data_index++] = chunk;
   1256   // eob
   1257   chunk = txb_info->eob & 0xff;
   1258   txb_hash_data[hash_data_index++] = chunk;
   1259   // rdmult (int64)
   1260   chunk = txb_info->rdmult & 0xff;
   1261   txb_hash_data[hash_data_index++] = chunk;
   1262   // tx_type
   1263   chunk = txb_info->tx_type & 0xff;
   1264   txb_hash_data[hash_data_index++] = chunk;
   1265   // base_eob_cost
   1266   for (int i = 1; i < 3; i++) {  // i = 0 are softened away
   1267     for (int j = 0; j < SIG_COEF_CONTEXTS_EOB; j++) {
   1268       chunk = (txb_costs->base_eob_cost[j][i] & 0xff00) >> 8;
   1269       txb_hash_data[hash_data_index++] = chunk;
   1270     }
   1271   }
   1272   // eob_cost
   1273   for (int i = 0; i < 11; i++) {
   1274     for (int j = 0; j < 2; j++) {
   1275       chunk = (txb_eob_costs->eob_cost[j][i] & 0xff00) >> 8;
   1276       txb_hash_data[hash_data_index++] = chunk;
   1277     }
   1278   }
   1279   // dc_sign_cost
   1280   for (int i = 0; i < 2; i++) {
   1281     for (int j = 0; j < DC_SIGN_CONTEXTS; j++) {
   1282       chunk = (txb_costs->dc_sign_cost[j][i] & 0xff00) >> 8;
   1283       txb_hash_data[hash_data_index++] = chunk;
   1284     }
   1285   }
   1286 
   1287   assert(hash_data_index <= 256);
   1288   // 31 bit ctx_hash: used to index table
   1289   uint32_t hbt_ctx_hash =
   1290       av1_get_crc32c_value(&crc_calculator, txb_hash_data, hash_data_index);
   1291   //// End hash creation
   1292 
   1293   return hbt_search_match(hbt_ctx_hash, hbt_qc_hash, txb_info, txb_costs,
   1294                           txb_eob_costs, p, block, fast_mode, rate_cost);
   1295 }
   1296 
   1297 static AOM_FORCE_INLINE int get_two_coeff_cost_simple(
   1298     int ci, tran_low_t abs_qc, int coeff_ctx,
   1299     const LV_MAP_COEFF_COST *txb_costs, int bwl, TX_CLASS tx_class,
   1300     const uint8_t *levels, int *cost_low) {
   1301   // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
   1302   // and not the last (scan_idx != eob - 1)
   1303   assert(ci > 0);
   1304   int cost = txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
   1305   int diff = 0;
   1306   if (abs_qc <= 3) diff = txb_costs->base_cost[coeff_ctx][abs_qc + 4];
   1307   if (abs_qc) {
   1308     cost += av1_cost_literal(1);
   1309     if (abs_qc > NUM_BASE_LEVELS) {
   1310       const int br_ctx = get_br_ctx(levels, ci, bwl, tx_class);
   1311       int brcost_diff = 0;
   1312       cost += get_br_cost_with_diff(abs_qc, txb_costs->lps_cost[br_ctx],
   1313                                     &brcost_diff);
   1314       diff += brcost_diff;
   1315     }
   1316   }
   1317   *cost_low = cost - diff;
   1318 
   1319   return cost;
   1320 }
   1321 
   1322 static INLINE int get_coeff_cost_eob(int ci, tran_low_t abs_qc, int sign,
   1323                                      int coeff_ctx, int dc_sign_ctx,
   1324                                      const LV_MAP_COEFF_COST *txb_costs,
   1325                                      int bwl, TX_CLASS tx_class) {
   1326   int cost = 0;
   1327   cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1];
   1328   if (abs_qc != 0) {
   1329     if (ci == 0) {
   1330       cost += txb_costs->dc_sign_cost[dc_sign_ctx][sign];
   1331     } else {
   1332       cost += av1_cost_literal(1);
   1333     }
   1334     if (abs_qc > NUM_BASE_LEVELS) {
   1335       int br_ctx;
   1336       br_ctx = get_br_ctx_eob(ci, bwl, tx_class);
   1337       cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]);
   1338     }
   1339   }
   1340   return cost;
   1341 }
   1342 
   1343 static INLINE int get_coeff_cost_general(int is_last, int ci, tran_low_t abs_qc,
   1344                                          int sign, int coeff_ctx,
   1345                                          int dc_sign_ctx,
   1346                                          const LV_MAP_COEFF_COST *txb_costs,
   1347                                          int bwl, TX_CLASS tx_class,
   1348                                          const uint8_t *levels) {
   1349   int cost = 0;
   1350   if (is_last) {
   1351     cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1];
   1352   } else {
   1353     cost += txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
   1354   }
   1355   if (abs_qc != 0) {
   1356     if (ci == 0) {
   1357       cost += txb_costs->dc_sign_cost[dc_sign_ctx][sign];
   1358     } else {
   1359       cost += av1_cost_literal(1);
   1360     }
   1361     if (abs_qc > NUM_BASE_LEVELS) {
   1362       int br_ctx;
   1363       if (is_last)
   1364         br_ctx = get_br_ctx_eob(ci, bwl, tx_class);
   1365       else
   1366         br_ctx = get_br_ctx(levels, ci, bwl, tx_class);
   1367       cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]);
   1368     }
   1369   }
   1370   return cost;
   1371 }
   1372 
   1373 static INLINE void get_qc_dqc_low(tran_low_t abs_qc, int sign, int dqv,
   1374                                   int shift, tran_low_t *qc_low,
   1375                                   tran_low_t *dqc_low) {
   1376   tran_low_t abs_qc_low = abs_qc - 1;
   1377   *qc_low = (-sign ^ abs_qc_low) + sign;
   1378   assert((sign ? -abs_qc_low : abs_qc_low) == *qc_low);
   1379   tran_low_t abs_dqc_low = (abs_qc_low * dqv) >> shift;
   1380   *dqc_low = (-sign ^ abs_dqc_low) + sign;
   1381   assert((sign ? -abs_dqc_low : abs_dqc_low) == *dqc_low);
   1382 }
   1383 
   1384 static INLINE void update_coeff_general(
   1385     int *accu_rate, int64_t *accu_dist, int si, int eob, TX_SIZE tx_size,
   1386     TX_CLASS tx_class, int bwl, int height, int64_t rdmult, int shift,
   1387     int dc_sign_ctx, const int16_t *dequant, const int16_t *scan,
   1388     const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff,
   1389     tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels) {
   1390   const int dqv = dequant[si != 0];
   1391   const int ci = scan[si];
   1392   const tran_low_t qc = qcoeff[ci];
   1393   const int is_last = si == (eob - 1);
   1394   const int coeff_ctx = get_lower_levels_ctx_general(
   1395       is_last, si, bwl, height, levels, ci, tx_size, tx_class);
   1396   if (qc == 0) {
   1397     *accu_rate += txb_costs->base_cost[coeff_ctx][0];
   1398   } else {
   1399     const int sign = (qc < 0) ? 1 : 0;
   1400     const tran_low_t abs_qc = abs(qc);
   1401     const tran_low_t tqc = tcoeff[ci];
   1402     const tran_low_t dqc = dqcoeff[ci];
   1403     const int64_t dist = get_coeff_dist(tqc, dqc, shift);
   1404     const int64_t dist0 = get_coeff_dist(tqc, 0, shift);
   1405     const int rate =
   1406         get_coeff_cost_general(is_last, ci, abs_qc, sign, coeff_ctx,
   1407                                dc_sign_ctx, txb_costs, bwl, tx_class, levels);
   1408     const int64_t rd = RDCOST(rdmult, rate, dist);
   1409 
   1410     tran_low_t qc_low, dqc_low;
   1411     tran_low_t abs_qc_low;
   1412     int64_t dist_low, rd_low;
   1413     int rate_low;
   1414     if (abs_qc == 1) {
   1415       abs_qc_low = qc_low = dqc_low = 0;
   1416       dist_low = dist0;
   1417       rate_low = txb_costs->base_cost[coeff_ctx][0];
   1418     } else {
   1419       get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
   1420       abs_qc_low = abs_qc - 1;
   1421       dist_low = get_coeff_dist(tqc, dqc_low, shift);
   1422       rate_low =
   1423           get_coeff_cost_general(is_last, ci, abs_qc_low, sign, coeff_ctx,
   1424                                  dc_sign_ctx, txb_costs, bwl, tx_class, levels);
   1425     }
   1426 
   1427     rd_low = RDCOST(rdmult, rate_low, dist_low);
   1428     if (rd_low < rd) {
   1429       qcoeff[ci] = qc_low;
   1430       dqcoeff[ci] = dqc_low;
   1431       levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
   1432       *accu_rate += rate_low;
   1433       *accu_dist += dist_low - dist0;
   1434     } else {
   1435       *accu_rate += rate;
   1436       *accu_dist += dist - dist0;
   1437     }
   1438   }
   1439 }
   1440 
   1441 static AOM_FORCE_INLINE void update_coeff_simple(
   1442     int *accu_rate, int si, int eob, TX_SIZE tx_size, TX_CLASS tx_class,
   1443     int bwl, int64_t rdmult, int shift, const int16_t *dequant,
   1444     const int16_t *scan, const LV_MAP_COEFF_COST *txb_costs,
   1445     const tran_low_t *tcoeff, tran_low_t *qcoeff, tran_low_t *dqcoeff,
   1446     uint8_t *levels) {
   1447   const int dqv = dequant[1];
   1448   (void)eob;
   1449   // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
   1450   // and not the last (scan_idx != eob - 1)
   1451   assert(si != eob - 1);
   1452   assert(si > 0);
   1453   const int ci = scan[si];
   1454   const tran_low_t qc = qcoeff[ci];
   1455   const int coeff_ctx =
   1456       get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class);
   1457   if (qc == 0) {
   1458     *accu_rate += txb_costs->base_cost[coeff_ctx][0];
   1459   } else {
   1460     const tran_low_t abs_qc = abs(qc);
   1461     const tran_low_t abs_tqc = abs(tcoeff[ci]);
   1462     const tran_low_t abs_dqc = abs(dqcoeff[ci]);
   1463     int rate_low = 0;
   1464     const int rate = get_two_coeff_cost_simple(
   1465         ci, abs_qc, coeff_ctx, txb_costs, bwl, tx_class, levels, &rate_low);
   1466     if (abs_dqc < abs_tqc) {
   1467       *accu_rate += rate;
   1468       return;
   1469     }
   1470 
   1471     const int64_t dist = get_coeff_dist(abs_tqc, abs_dqc, shift);
   1472     const int64_t rd = RDCOST(rdmult, rate, dist);
   1473 
   1474     const tran_low_t abs_qc_low = abs_qc - 1;
   1475     const tran_low_t abs_dqc_low = (abs_qc_low * dqv) >> shift;
   1476     const int64_t dist_low = get_coeff_dist(abs_tqc, abs_dqc_low, shift);
   1477     const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low);
   1478 
   1479     if (rd_low < rd) {
   1480       const int sign = (qc < 0) ? 1 : 0;
   1481       qcoeff[ci] = (-sign ^ abs_qc_low) + sign;
   1482       dqcoeff[ci] = (-sign ^ abs_dqc_low) + sign;
   1483       levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
   1484       *accu_rate += rate_low;
   1485     } else {
   1486       *accu_rate += rate;
   1487     }
   1488   }
   1489 }
   1490 
   1491 static INLINE void update_coeff_eob_fast(int *eob, int shift,
   1492                                          const int16_t *dequant_ptr,
   1493                                          const int16_t *scan,
   1494                                          const tran_low_t *coeff_ptr,
   1495                                          tran_low_t *qcoeff_ptr,
   1496                                          tran_low_t *dqcoeff_ptr) {
   1497   // TODO(sarahparker) make this work for aomqm
   1498   int eob_out = *eob;
   1499   int zbin[2] = { dequant_ptr[0] + ROUND_POWER_OF_TWO(dequant_ptr[0] * 70, 7),
   1500                   dequant_ptr[1] + ROUND_POWER_OF_TWO(dequant_ptr[1] * 70, 7) };
   1501 
   1502   for (int i = *eob - 1; i >= 0; i--) {
   1503     const int rc = scan[i];
   1504     const int qcoeff = qcoeff_ptr[rc];
   1505     const int coeff = coeff_ptr[rc];
   1506     const int coeff_sign = (coeff >> 31);
   1507     int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
   1508 
   1509     if (((abs_coeff << (1 + shift)) < zbin[rc != 0]) || (qcoeff == 0)) {
   1510       eob_out--;
   1511       qcoeff_ptr[rc] = 0;
   1512       dqcoeff_ptr[rc] = 0;
   1513     } else {
   1514       break;
   1515     }
   1516   }
   1517 
   1518   *eob = eob_out;
   1519 }
   1520 
   1521 static AOM_FORCE_INLINE void update_coeff_eob(
   1522     int *accu_rate, int64_t *accu_dist, int *eob, int *nz_num, int *nz_ci,
   1523     int si, TX_SIZE tx_size, TX_CLASS tx_class, int bwl, int height,
   1524     int dc_sign_ctx, int64_t rdmult, int shift, const int16_t *dequant,
   1525     const int16_t *scan, const LV_MAP_EOB_COST *txb_eob_costs,
   1526     const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff,
   1527     tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels, int sharpness) {
   1528   const int dqv = dequant[si != 0];
   1529   assert(si != *eob - 1);
   1530   const int ci = scan[si];
   1531   const tran_low_t qc = qcoeff[ci];
   1532   const int coeff_ctx =
   1533       get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class);
   1534   if (qc == 0) {
   1535     *accu_rate += txb_costs->base_cost[coeff_ctx][0];
   1536   } else {
   1537     int lower_level = 0;
   1538     const tran_low_t abs_qc = abs(qc);
   1539     const tran_low_t tqc = tcoeff[ci];
   1540     const tran_low_t dqc = dqcoeff[ci];
   1541     const int sign = (qc < 0) ? 1 : 0;
   1542     const int64_t dist0 = get_coeff_dist(tqc, 0, shift);
   1543     int64_t dist = get_coeff_dist(tqc, dqc, shift) - dist0;
   1544     int rate =
   1545         get_coeff_cost_general(0, ci, abs_qc, sign, coeff_ctx, dc_sign_ctx,
   1546                                txb_costs, bwl, tx_class, levels);
   1547     int64_t rd = RDCOST(rdmult, *accu_rate + rate, *accu_dist + dist);
   1548 
   1549     tran_low_t qc_low, dqc_low;
   1550     tran_low_t abs_qc_low;
   1551     int64_t dist_low, rd_low;
   1552     int rate_low;
   1553     if (abs_qc == 1) {
   1554       abs_qc_low = 0;
   1555       dqc_low = qc_low = 0;
   1556       dist_low = 0;
   1557       rate_low = txb_costs->base_cost[coeff_ctx][0];
   1558       rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist);
   1559     } else {
   1560       get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
   1561       abs_qc_low = abs_qc - 1;
   1562       dist_low = get_coeff_dist(tqc, dqc_low, shift) - dist0;
   1563       rate_low =
   1564           get_coeff_cost_general(0, ci, abs_qc_low, sign, coeff_ctx,
   1565                                  dc_sign_ctx, txb_costs, bwl, tx_class, levels);
   1566       rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low);
   1567     }
   1568 
   1569     int lower_level_new_eob = 0;
   1570     const int new_eob = si + 1;
   1571     const int coeff_ctx_new_eob = get_lower_levels_ctx_eob(bwl, height, si);
   1572     const int new_eob_cost =
   1573         get_eob_cost(new_eob, txb_eob_costs, txb_costs, tx_class);
   1574     int rate_coeff_eob =
   1575         new_eob_cost + get_coeff_cost_eob(ci, abs_qc, sign, coeff_ctx_new_eob,
   1576                                           dc_sign_ctx, txb_costs, bwl,
   1577                                           tx_class);
   1578     int64_t dist_new_eob = dist;
   1579     int64_t rd_new_eob = RDCOST(rdmult, rate_coeff_eob, dist_new_eob);
   1580 
   1581     if (abs_qc_low > 0) {
   1582       const int rate_coeff_eob_low =
   1583           new_eob_cost + get_coeff_cost_eob(ci, abs_qc_low, sign,
   1584                                             coeff_ctx_new_eob, dc_sign_ctx,
   1585                                             txb_costs, bwl, tx_class);
   1586       const int64_t dist_new_eob_low = dist_low;
   1587       const int64_t rd_new_eob_low =
   1588           RDCOST(rdmult, rate_coeff_eob_low, dist_new_eob_low);
   1589       if (rd_new_eob_low < rd_new_eob) {
   1590         lower_level_new_eob = 1;
   1591         rd_new_eob = rd_new_eob_low;
   1592         rate_coeff_eob = rate_coeff_eob_low;
   1593         dist_new_eob = dist_new_eob_low;
   1594       }
   1595     }
   1596 
   1597     if (rd_low < rd) {
   1598       lower_level = 1;
   1599       rd = rd_low;
   1600       rate = rate_low;
   1601       dist = dist_low;
   1602     }
   1603 
   1604     if (sharpness == 0 && rd_new_eob < rd) {
   1605       for (int ni = 0; ni < *nz_num; ++ni) {
   1606         int last_ci = nz_ci[ni];
   1607         levels[get_padded_idx(last_ci, bwl)] = 0;
   1608         qcoeff[last_ci] = 0;
   1609         dqcoeff[last_ci] = 0;
   1610       }
   1611       *eob = new_eob;
   1612       *nz_num = 0;
   1613       *accu_rate = rate_coeff_eob;
   1614       *accu_dist = dist_new_eob;
   1615       lower_level = lower_level_new_eob;
   1616     } else {
   1617       *accu_rate += rate;
   1618       *accu_dist += dist;
   1619     }
   1620 
   1621     if (lower_level) {
   1622       qcoeff[ci] = qc_low;
   1623       dqcoeff[ci] = dqc_low;
   1624       levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
   1625     }
   1626     if (qcoeff[ci]) {
   1627       nz_ci[*nz_num] = ci;
   1628       ++*nz_num;
   1629     }
   1630   }
   1631 }
   1632 
   1633 static INLINE void update_skip(int *accu_rate, int64_t accu_dist, int *eob,
   1634                                int nz_num, int *nz_ci, int64_t rdmult,
   1635                                int skip_cost, int non_skip_cost,
   1636                                tran_low_t *qcoeff, tran_low_t *dqcoeff,
   1637                                int sharpness) {
   1638   const int64_t rd = RDCOST(rdmult, *accu_rate + non_skip_cost, accu_dist);
   1639   const int64_t rd_new_eob = RDCOST(rdmult, skip_cost, 0);
   1640   if (sharpness == 0 && rd_new_eob < rd) {
   1641     for (int i = 0; i < nz_num; ++i) {
   1642       const int ci = nz_ci[i];
   1643       qcoeff[ci] = 0;
   1644       dqcoeff[ci] = 0;
   1645       // no need to set up levels because this is the last step
   1646       // levels[get_padded_idx(ci, bwl)] = 0;
   1647     }
   1648     *accu_rate = 0;
   1649     *eob = 0;
   1650   }
   1651 }
   1652 
   1653 int av1_optimize_txb_new(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane,
   1654                          int block, TX_SIZE tx_size, TX_TYPE tx_type,
   1655                          const TXB_CTX *const txb_ctx, int *rate_cost,
   1656                          int sharpness, int fast_mode) {
   1657   MACROBLOCKD *xd = &x->e_mbd;
   1658   struct macroblockd_plane *pd = &xd->plane[plane];
   1659   const struct macroblock_plane *p = &x->plane[plane];
   1660   const SCAN_ORDER *scan_order = get_scan(tx_size, tx_type);
   1661   const int16_t *scan = scan_order->scan;
   1662   const int shift = av1_get_tx_scale(tx_size);
   1663   int eob = p->eobs[block];
   1664   const int16_t *dequant = p->dequant_QTX;
   1665   tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
   1666   tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   1667   const tran_low_t *tcoeff = BLOCK_OFFSET(p->coeff, block);
   1668 
   1669   if (fast_mode) {
   1670     update_coeff_eob_fast(&eob, shift, dequant, scan, tcoeff, qcoeff, dqcoeff);
   1671     p->eobs[block] = eob;
   1672     if (eob == 0) {
   1673       *rate_cost = av1_cost_skip_txb(x, txb_ctx, plane, tx_size);
   1674       return eob;
   1675     }
   1676   }
   1677 
   1678   const AV1_COMMON *cm = &cpi->common;
   1679   const PLANE_TYPE plane_type = get_plane_type(plane);
   1680   const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
   1681   const TX_CLASS tx_class = tx_type_to_class[tx_type];
   1682   const MB_MODE_INFO *mbmi = xd->mi[0];
   1683   const int bwl = get_txb_bwl(tx_size);
   1684   const int width = get_txb_wide(tx_size);
   1685   const int height = get_txb_high(tx_size);
   1686   assert(width == (1 << bwl));
   1687   const int is_inter = is_inter_block(mbmi);
   1688   const LV_MAP_COEFF_COST *txb_costs = &x->coeff_costs[txs_ctx][plane_type];
   1689   const int eob_multi_size = txsize_log2_minus4[tx_size];
   1690   const LV_MAP_EOB_COST *txb_eob_costs =
   1691       &x->eob_costs[eob_multi_size][plane_type];
   1692 
   1693   const int rshift =
   1694       (sharpness +
   1695        (cpi->oxcf.aq_mode == VARIANCE_AQ && mbmi->segment_id < 4
   1696             ? 7 - mbmi->segment_id
   1697             : 2) +
   1698        (cpi->oxcf.aq_mode != VARIANCE_AQ &&
   1699                 cpi->oxcf.deltaq_mode > NO_DELTA_Q && x->sb_energy_level < 0
   1700             ? (3 - x->sb_energy_level)
   1701             : 0));
   1702   const int64_t rdmult =
   1703       (((int64_t)x->rdmult *
   1704         (plane_rd_mult[is_inter][plane_type] << (2 * (xd->bd - 8)))) +
   1705        2) >>
   1706       rshift;
   1707 
   1708   uint8_t levels_buf[TX_PAD_2D];
   1709   uint8_t *const levels = set_levels(levels_buf, width);
   1710 
   1711   if (eob > 1) av1_txb_init_levels(qcoeff, width, height, levels);
   1712 
   1713   // TODO(angirbird): check iqmatrix
   1714 
   1715   const int non_skip_cost = txb_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][0];
   1716   const int skip_cost = txb_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1];
   1717   const int eob_cost = get_eob_cost(eob, txb_eob_costs, txb_costs, tx_class);
   1718   int accu_rate = eob_cost;
   1719   int64_t accu_dist = 0;
   1720   int si = eob - 1;
   1721   const int ci = scan[si];
   1722   const tran_low_t qc = qcoeff[ci];
   1723   const tran_low_t abs_qc = abs(qc);
   1724   const int sign = qc < 0;
   1725   const int max_nz_num = 2;
   1726   int nz_num = 1;
   1727   int nz_ci[3] = { ci, 0, 0 };
   1728   if (abs_qc >= 2) {
   1729     update_coeff_general(&accu_rate, &accu_dist, si, eob, tx_size, tx_class,
   1730                          bwl, height, rdmult, shift, txb_ctx->dc_sign_ctx,
   1731                          dequant, scan, txb_costs, tcoeff, qcoeff, dqcoeff,
   1732                          levels);
   1733     --si;
   1734   } else {
   1735     assert(abs_qc == 1);
   1736     const int coeff_ctx = get_lower_levels_ctx_eob(bwl, height, si);
   1737     accu_rate +=
   1738         get_coeff_cost_eob(ci, abs_qc, sign, coeff_ctx, txb_ctx->dc_sign_ctx,
   1739                            txb_costs, bwl, tx_class);
   1740     const tran_low_t tqc = tcoeff[ci];
   1741     const tran_low_t dqc = dqcoeff[ci];
   1742     const int64_t dist = get_coeff_dist(tqc, dqc, shift);
   1743     const int64_t dist0 = get_coeff_dist(tqc, 0, shift);
   1744     accu_dist += dist - dist0;
   1745     --si;
   1746   }
   1747 
   1748 #define UPDATE_COEFF_EOB_CASE(tx_class_literal)                            \
   1749   case tx_class_literal:                                                   \
   1750     for (; si >= 0 && nz_num <= max_nz_num && !fast_mode; --si) {          \
   1751       update_coeff_eob(&accu_rate, &accu_dist, &eob, &nz_num, nz_ci, si,   \
   1752                        tx_size, tx_class_literal, bwl, height,             \
   1753                        txb_ctx->dc_sign_ctx, rdmult, shift, dequant, scan, \
   1754                        txb_eob_costs, txb_costs, tcoeff, qcoeff, dqcoeff,  \
   1755                        levels, sharpness);                                 \
   1756     }                                                                      \
   1757     break;
   1758   switch (tx_class) {
   1759     UPDATE_COEFF_EOB_CASE(TX_CLASS_2D);
   1760     UPDATE_COEFF_EOB_CASE(TX_CLASS_HORIZ);
   1761     UPDATE_COEFF_EOB_CASE(TX_CLASS_VERT);
   1762 #undef UPDATE_COEFF_EOB_CASE
   1763     default: assert(false);
   1764   }
   1765 
   1766   if (si == -1 && nz_num <= max_nz_num) {
   1767     update_skip(&accu_rate, accu_dist, &eob, nz_num, nz_ci, rdmult, skip_cost,
   1768                 non_skip_cost, qcoeff, dqcoeff, sharpness);
   1769   }
   1770 
   1771 #define UPDATE_COEFF_SIMPLE_CASE(tx_class_literal)                             \
   1772   case tx_class_literal:                                                       \
   1773     for (; si >= 1; --si) {                                                    \
   1774       update_coeff_simple(&accu_rate, si, eob, tx_size, tx_class_literal, bwl, \
   1775                           rdmult, shift, dequant, scan, txb_costs, tcoeff,     \
   1776                           qcoeff, dqcoeff, levels);                            \
   1777     }                                                                          \
   1778     break;
   1779   switch (tx_class) {
   1780     UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_2D);
   1781     UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_HORIZ);
   1782     UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_VERT);
   1783 #undef UPDATE_COEFF_SIMPLE_CASE
   1784     default: assert(false);
   1785   }
   1786 
   1787   // DC position
   1788   if (si == 0) {
   1789     // no need to update accu_dist because it's not used after this point
   1790     int64_t dummy_dist = 0;
   1791     update_coeff_general(&accu_rate, &dummy_dist, si, eob, tx_size, tx_class,
   1792                          bwl, height, rdmult, shift, txb_ctx->dc_sign_ctx,
   1793                          dequant, scan, txb_costs, tcoeff, qcoeff, dqcoeff,
   1794                          levels);
   1795   }
   1796 
   1797   const int tx_type_cost = get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type);
   1798   if (eob == 0)
   1799     accu_rate += skip_cost;
   1800   else
   1801     accu_rate += non_skip_cost + tx_type_cost;
   1802 
   1803   p->eobs[block] = eob;
   1804   p->txb_entropy_ctx[block] =
   1805       av1_get_txb_entropy_context(qcoeff, scan_order, p->eobs[block]);
   1806 
   1807   *rate_cost = accu_rate;
   1808   return eob;
   1809 }
   1810 
   1811 // This function is deprecated, but we keep it here because hash trellis
   1812 // is not integrated with av1_optimize_txb_new yet
   1813 int av1_optimize_txb(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane,
   1814                      int blk_row, int blk_col, int block, TX_SIZE tx_size,
   1815                      TXB_CTX *txb_ctx, int fast_mode, int *rate_cost) {
   1816   const AV1_COMMON *cm = &cpi->common;
   1817   MACROBLOCKD *const xd = &x->e_mbd;
   1818   const PLANE_TYPE plane_type = get_plane_type(plane);
   1819   const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
   1820   const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
   1821                                           tx_size, cm->reduced_tx_set_used);
   1822   const MB_MODE_INFO *mbmi = xd->mi[0];
   1823   const struct macroblock_plane *p = &x->plane[plane];
   1824   struct macroblockd_plane *pd = &xd->plane[plane];
   1825   const int eob = p->eobs[block];
   1826   tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
   1827   tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   1828   const tran_low_t *tcoeff = BLOCK_OFFSET(p->coeff, block);
   1829   const int16_t *dequant = p->dequant_QTX;
   1830   const int seg_eob = av1_get_max_eob(tx_size);
   1831   const int bwl = get_txb_bwl(tx_size);
   1832   const int width = get_txb_wide(tx_size);
   1833   const int height = get_txb_high(tx_size);
   1834   const int is_inter = is_inter_block(mbmi);
   1835   const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
   1836   const LV_MAP_COEFF_COST *txb_costs = &x->coeff_costs[txs_ctx][plane_type];
   1837   const int eob_multi_size = txsize_log2_minus4[tx_size];
   1838   const LV_MAP_EOB_COST txb_eob_costs =
   1839       x->eob_costs[eob_multi_size][plane_type];
   1840 
   1841   const int shift = av1_get_tx_scale(tx_size);
   1842   const int64_t rdmult =
   1843       (((int64_t)x->rdmult * plane_rd_mult[is_inter][plane_type]
   1844         << (2 * (xd->bd - 8))) +
   1845        2) >>
   1846       2;
   1847   uint8_t levels_buf[TX_PAD_2D];
   1848   uint8_t *const levels = set_levels(levels_buf, width);
   1849   const TX_SIZE qm_tx_size = av1_get_adjusted_tx_size(tx_size);
   1850   const qm_val_t *iqmatrix =
   1851       IS_2D_TRANSFORM(tx_type)
   1852           ? pd->seg_iqmatrix[mbmi->segment_id][qm_tx_size]
   1853           : cm->giqmatrix[NUM_QM_LEVELS - 1][0][qm_tx_size];
   1854   assert(width == (1 << bwl));
   1855   const int tx_type_cost = get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type);
   1856   TxbInfo txb_info = {
   1857     qcoeff,     levels,  dqcoeff, tcoeff,   dequant,      shift, tx_size,
   1858     txs_ctx,    tx_type, bwl,     width,    height,       eob,   seg_eob,
   1859     scan_order, txb_ctx, rdmult,  iqmatrix, tx_type_cost,
   1860   };
   1861 
   1862   // Hash based trellis (hbt) speed feature: avoid expensive optimize_txb calls
   1863   // by storing the coefficient deltas in a hash table.
   1864   // Currently disabled in speedfeatures.c
   1865   if (eob <= HBT_EOB && eob > 0 && cpi->sf.use_hash_based_trellis) {
   1866     return hbt_create_hashes(&txb_info, txb_costs, &txb_eob_costs, p, block,
   1867                              fast_mode, rate_cost);
   1868   }
   1869 
   1870   av1_txb_init_levels(qcoeff, width, height, levels);
   1871 
   1872   const int update =
   1873       optimize_txb(&txb_info, txb_costs, &txb_eob_costs, rate_cost);
   1874 
   1875   if (update) {
   1876     p->eobs[block] = txb_info.eob;
   1877     p->txb_entropy_ctx[block] =
   1878         av1_get_txb_entropy_context(qcoeff, scan_order, txb_info.eob);
   1879   }
   1880   return txb_info.eob;
   1881 }
   1882 
   1883 int av1_get_txb_entropy_context(const tran_low_t *qcoeff,
   1884                                 const SCAN_ORDER *scan_order, int eob) {
   1885   const int16_t *const scan = scan_order->scan;
   1886   int cul_level = 0;
   1887   int c;
   1888 
   1889   if (eob == 0) return 0;
   1890   for (c = 0; c < eob; ++c) {
   1891     cul_level += abs(qcoeff[scan[c]]);
   1892     if (cul_level > COEFF_CONTEXT_MASK) break;
   1893   }
   1894 
   1895   cul_level = AOMMIN(COEFF_CONTEXT_MASK, cul_level);
   1896   set_dc_sign(&cul_level, qcoeff[0]);
   1897 
   1898   return cul_level;
   1899 }
   1900 
   1901 void av1_update_txb_context_b(int plane, int block, int blk_row, int blk_col,
   1902                               BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
   1903                               void *arg) {
   1904   struct tokenize_b_args *const args = arg;
   1905   const AV1_COMP *cpi = args->cpi;
   1906   const AV1_COMMON *cm = &cpi->common;
   1907   ThreadData *const td = args->td;
   1908   MACROBLOCK *const x = &td->mb;
   1909   MACROBLOCKD *const xd = &x->e_mbd;
   1910   struct macroblock_plane *p = &x->plane[plane];
   1911   struct macroblockd_plane *pd = &xd->plane[plane];
   1912   const uint16_t eob = p->eobs[block];
   1913   const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
   1914   const PLANE_TYPE plane_type = pd->plane_type;
   1915   const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
   1916                                           tx_size, cm->reduced_tx_set_used);
   1917   const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
   1918   const int cul_level = av1_get_txb_entropy_context(qcoeff, scan_order, eob);
   1919   av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, cul_level, blk_col,
   1920                    blk_row);
   1921 }
   1922 
   1923 static void update_tx_type_count(const AV1_COMMON *cm, MACROBLOCKD *xd,
   1924                                  int blk_row, int blk_col, int plane,
   1925                                  TX_SIZE tx_size, FRAME_COUNTS *counts,
   1926                                  uint8_t allow_update_cdf) {
   1927   MB_MODE_INFO *mbmi = xd->mi[0];
   1928   int is_inter = is_inter_block(mbmi);
   1929   FRAME_CONTEXT *fc = xd->tile_ctx;
   1930 #if !CONFIG_ENTROPY_STATS
   1931   (void)counts;
   1932 #endif  // !CONFIG_ENTROPY_STATS
   1933 
   1934   // Only y plane's tx_type is updated
   1935   if (plane > 0) return;
   1936   TX_TYPE tx_type = av1_get_tx_type(PLANE_TYPE_Y, xd, blk_row, blk_col, tx_size,
   1937                                     cm->reduced_tx_set_used);
   1938   if (get_ext_tx_types(tx_size, is_inter, cm->reduced_tx_set_used) > 1 &&
   1939       cm->base_qindex > 0 && !mbmi->skip &&
   1940       !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
   1941     const int eset = get_ext_tx_set(tx_size, is_inter, cm->reduced_tx_set_used);
   1942     if (eset > 0) {
   1943       const TxSetType tx_set_type =
   1944           av1_get_ext_tx_set_type(tx_size, is_inter, cm->reduced_tx_set_used);
   1945       if (is_inter) {
   1946         if (allow_update_cdf) {
   1947           update_cdf(fc->inter_ext_tx_cdf[eset][txsize_sqr_map[tx_size]],
   1948                      av1_ext_tx_ind[tx_set_type][tx_type],
   1949                      av1_num_ext_tx_set[tx_set_type]);
   1950         }
   1951 #if CONFIG_ENTROPY_STATS
   1952         ++counts->inter_ext_tx[eset][txsize_sqr_map[tx_size]]
   1953                               [av1_ext_tx_ind[tx_set_type][tx_type]];
   1954 #endif  // CONFIG_ENTROPY_STATS
   1955       } else {
   1956         PREDICTION_MODE intra_dir;
   1957         if (mbmi->filter_intra_mode_info.use_filter_intra)
   1958           intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info
   1959                                              .filter_intra_mode];
   1960         else
   1961           intra_dir = mbmi->mode;
   1962 #if CONFIG_ENTROPY_STATS
   1963         ++counts->intra_ext_tx[eset][txsize_sqr_map[tx_size]][intra_dir]
   1964                               [av1_ext_tx_ind[tx_set_type][tx_type]];
   1965 #endif  // CONFIG_ENTROPY_STATS
   1966         if (allow_update_cdf) {
   1967           update_cdf(
   1968               fc->intra_ext_tx_cdf[eset][txsize_sqr_map[tx_size]][intra_dir],
   1969               av1_ext_tx_ind[tx_set_type][tx_type],
   1970               av1_num_ext_tx_set[tx_set_type]);
   1971         }
   1972       }
   1973     }
   1974   }
   1975 }
   1976 
   1977 void av1_update_and_record_txb_context(int plane, int block, int blk_row,
   1978                                        int blk_col, BLOCK_SIZE plane_bsize,
   1979                                        TX_SIZE tx_size, void *arg) {
   1980   struct tokenize_b_args *const args = arg;
   1981   const AV1_COMP *cpi = args->cpi;
   1982   const AV1_COMMON *cm = &cpi->common;
   1983   ThreadData *const td = args->td;
   1984   MACROBLOCK *const x = &td->mb;
   1985   MACROBLOCKD *const xd = &x->e_mbd;
   1986   struct macroblock_plane *p = &x->plane[plane];
   1987   struct macroblockd_plane *pd = &xd->plane[plane];
   1988   MB_MODE_INFO *mbmi = xd->mi[0];
   1989   const int eob = p->eobs[block];
   1990   TXB_CTX txb_ctx;
   1991   get_txb_ctx(plane_bsize, tx_size, plane, pd->above_context + blk_col,
   1992               pd->left_context + blk_row, &txb_ctx);
   1993   const int bwl = get_txb_bwl(tx_size);
   1994   const int width = get_txb_wide(tx_size);
   1995   const int height = get_txb_high(tx_size);
   1996   const uint8_t allow_update_cdf = args->allow_update_cdf;
   1997   const TX_SIZE txsize_ctx = get_txsize_entropy_ctx(tx_size);
   1998   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
   1999 #if CONFIG_ENTROPY_STATS
   2000   int cdf_idx = cm->coef_cdf_category;
   2001 #endif  // CONFIG_ENTROPY_STATS
   2002 
   2003 #if CONFIG_ENTROPY_STATS
   2004   ++td->counts->txb_skip[cdf_idx][txsize_ctx][txb_ctx.txb_skip_ctx][eob == 0];
   2005 #endif  // CONFIG_ENTROPY_STATS
   2006   if (allow_update_cdf) {
   2007     update_cdf(ec_ctx->txb_skip_cdf[txsize_ctx][txb_ctx.txb_skip_ctx], eob == 0,
   2008                2);
   2009   }
   2010 
   2011   const int txb_offset =
   2012       x->mbmi_ext->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
   2013   uint16_t *eob_txb = x->mbmi_ext->cb_coef_buff->eobs[plane] + txb_offset;
   2014   uint8_t *txb_skip_ctx_txb =
   2015       x->mbmi_ext->cb_coef_buff->txb_skip_ctx[plane] + txb_offset;
   2016   txb_skip_ctx_txb[block] = txb_ctx.txb_skip_ctx;
   2017   eob_txb[block] = eob;
   2018 
   2019   if (eob == 0) {
   2020     av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, 0, blk_col, blk_row);
   2021     return;
   2022   }
   2023 
   2024   tran_low_t *tcoeff_txb =
   2025       x->mbmi_ext->cb_coef_buff->tcoeff[plane] + x->mbmi_ext->cb_offset;
   2026   tran_low_t *tcoeff = BLOCK_OFFSET(tcoeff_txb, block);
   2027   const int segment_id = mbmi->segment_id;
   2028   const int seg_eob = av1_get_tx_eob(&cpi->common.seg, segment_id, tx_size);
   2029   const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
   2030   memcpy(tcoeff, qcoeff, sizeof(*tcoeff) * seg_eob);
   2031 
   2032   uint8_t levels_buf[TX_PAD_2D];
   2033   uint8_t *const levels = set_levels(levels_buf, width);
   2034   av1_txb_init_levels(tcoeff, width, height, levels);
   2035   update_tx_type_count(cm, xd, blk_row, blk_col, plane, tx_size, td->counts,
   2036                        allow_update_cdf);
   2037 
   2038   const PLANE_TYPE plane_type = pd->plane_type;
   2039   const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
   2040                                           tx_size, cm->reduced_tx_set_used);
   2041   const TX_CLASS tx_class = tx_type_to_class[tx_type];
   2042   const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
   2043   const int16_t *const scan = scan_order->scan;
   2044 #if CONFIG_ENTROPY_STATS
   2045   av1_update_eob_context(cdf_idx, eob, tx_size, tx_class, plane_type, ec_ctx,
   2046                          td->counts, allow_update_cdf);
   2047 #else
   2048   av1_update_eob_context(eob, tx_size, tx_class, plane_type, ec_ctx,
   2049                          allow_update_cdf);
   2050 #endif
   2051 
   2052   DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
   2053   av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts);
   2054 
   2055   for (int c = eob - 1; c >= 0; --c) {
   2056     const int pos = scan[c];
   2057     const int coeff_ctx = coeff_contexts[pos];
   2058     const tran_low_t v = qcoeff[pos];
   2059     const tran_low_t level = abs(v);
   2060 
   2061     if (allow_update_cdf) {
   2062       if (c == eob - 1) {
   2063         assert(coeff_ctx < 4);
   2064         update_cdf(
   2065             ec_ctx->coeff_base_eob_cdf[txsize_ctx][plane_type][coeff_ctx],
   2066             AOMMIN(level, 3) - 1, 3);
   2067       } else {
   2068         update_cdf(ec_ctx->coeff_base_cdf[txsize_ctx][plane_type][coeff_ctx],
   2069                    AOMMIN(level, 3), 4);
   2070       }
   2071     }
   2072     {
   2073       if (c == eob - 1) {
   2074         assert(coeff_ctx < 4);
   2075 #if CONFIG_ENTROPY_STATS
   2076         ++td->counts->coeff_base_eob_multi[cdf_idx][txsize_ctx][plane_type]
   2077                                           [coeff_ctx][AOMMIN(level, 3) - 1];
   2078       } else {
   2079         ++td->counts->coeff_base_multi[cdf_idx][txsize_ctx][plane_type]
   2080                                       [coeff_ctx][AOMMIN(level, 3)];
   2081 #endif
   2082       }
   2083     }
   2084     if (level > NUM_BASE_LEVELS) {
   2085       const int base_range = level - 1 - NUM_BASE_LEVELS;
   2086       const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class);
   2087       for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
   2088         const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1);
   2089         if (allow_update_cdf) {
   2090           update_cdf(ec_ctx->coeff_br_cdf[AOMMIN(txsize_ctx, TX_32X32)]
   2091                                          [plane_type][br_ctx],
   2092                      k, BR_CDF_SIZE);
   2093         }
   2094         for (int lps = 0; lps < BR_CDF_SIZE - 1; lps++) {
   2095 #if CONFIG_ENTROPY_STATS
   2096           ++td->counts->coeff_lps[AOMMIN(txsize_ctx, TX_32X32)][plane_type][lps]
   2097                                  [br_ctx][lps == k];
   2098 #endif  // CONFIG_ENTROPY_STATS
   2099           if (lps == k) break;
   2100         }
   2101 #if CONFIG_ENTROPY_STATS
   2102         ++td->counts->coeff_lps_multi[cdf_idx][AOMMIN(txsize_ctx, TX_32X32)]
   2103                                      [plane_type][br_ctx][k];
   2104 #endif
   2105         if (k < BR_CDF_SIZE - 1) break;
   2106       }
   2107     }
   2108   }
   2109 
   2110   // Update the context needed to code the DC sign (if applicable)
   2111   if (tcoeff[0] != 0) {
   2112     const int dc_sign = (tcoeff[0] < 0) ? 1 : 0;
   2113     const int dc_sign_ctx = txb_ctx.dc_sign_ctx;
   2114 #if CONFIG_ENTROPY_STATS
   2115     ++td->counts->dc_sign[plane_type][dc_sign_ctx][dc_sign];
   2116 #endif  // CONFIG_ENTROPY_STATS
   2117     if (allow_update_cdf)
   2118       update_cdf(ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], dc_sign, 2);
   2119     int *dc_sign_ctx_txb =
   2120         x->mbmi_ext->cb_coef_buff->dc_sign_ctx[plane] + txb_offset;
   2121     dc_sign_ctx_txb[block] = dc_sign_ctx;
   2122   }
   2123 
   2124   const int cul_level = av1_get_txb_entropy_context(tcoeff, scan_order, eob);
   2125   av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, cul_level, blk_col,
   2126                    blk_row);
   2127 }
   2128 
   2129 void av1_update_txb_context(const AV1_COMP *cpi, ThreadData *td,
   2130                             RUN_TYPE dry_run, BLOCK_SIZE bsize, int *rate,
   2131                             int mi_row, int mi_col, uint8_t allow_update_cdf) {
   2132   const AV1_COMMON *const cm = &cpi->common;
   2133   const int num_planes = av1_num_planes(cm);
   2134   MACROBLOCK *const x = &td->mb;
   2135   MACROBLOCKD *const xd = &x->e_mbd;
   2136   MB_MODE_INFO *const mbmi = xd->mi[0];
   2137   struct tokenize_b_args arg = { cpi, td, NULL, 0, allow_update_cdf };
   2138   (void)rate;
   2139   (void)mi_row;
   2140   (void)mi_col;
   2141   if (mbmi->skip) {
   2142     av1_reset_skip_context(xd, mi_row, mi_col, bsize, num_planes);
   2143     return;
   2144   }
   2145 
   2146   if (!dry_run) {
   2147     av1_foreach_transformed_block(xd, bsize, mi_row, mi_col,
   2148                                   av1_update_and_record_txb_context, &arg,
   2149                                   num_planes);
   2150   } else if (dry_run == DRY_RUN_NORMAL) {
   2151     av1_foreach_transformed_block(xd, bsize, mi_row, mi_col,
   2152                                   av1_update_txb_context_b, &arg, num_planes);
   2153   } else {
   2154     printf("DRY_RUN_COSTCOEFFS is not supported yet\n");
   2155     assert(0);
   2156   }
   2157 }
   2158