Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "./vp9_rtcd.h"
     12 #include "./vpx_config.h"
     13 #include "./vpx_dsp_rtcd.h"
     14 
     15 #include "vpx_dsp/quantize.h"
     16 #include "vpx_mem/vpx_mem.h"
     17 #include "vpx_ports/mem.h"
     18 
     19 #include "vp9/common/vp9_idct.h"
     20 #include "vp9/common/vp9_reconinter.h"
     21 #include "vp9/common/vp9_reconintra.h"
     22 #include "vp9/common/vp9_scan.h"
     23 
     24 #include "vp9/encoder/vp9_encodemb.h"
     25 #include "vp9/encoder/vp9_rd.h"
     26 #include "vp9/encoder/vp9_tokenize.h"
     27 
     28 struct optimize_ctx {
     29   ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
     30   ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
     31 };
     32 
     33 void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
     34   struct macroblock_plane *const p = &x->plane[plane];
     35   const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
     36   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
     37   const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
     38   const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
     39 
     40 #if CONFIG_VP9_HIGHBITDEPTH
     41   if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     42     vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
     43                               p->src.stride, pd->dst.buf, pd->dst.stride,
     44                               x->e_mbd.bd);
     45     return;
     46   }
     47 #endif  // CONFIG_VP9_HIGHBITDEPTH
     48   vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
     49                      pd->dst.buf, pd->dst.stride);
     50 }
     51 
     52 static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
     53   { 10, 6 }, { 8, 5 },
     54 };
     55 
     56 // 'num' can be negative, but 'shift' must be non-negative.
     57 #define RIGHT_SHIFT_POSSIBLY_NEGATIVE(num, shift) \
     58   ((num) >= 0) ? (num) >> (shift) : -((-(num)) >> (shift))
     59 
     60 int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
     61                    int ctx) {
     62   MACROBLOCKD *const xd = &mb->e_mbd;
     63   struct macroblock_plane *const p = &mb->plane[plane];
     64   struct macroblockd_plane *const pd = &xd->plane[plane];
     65   const int ref = is_inter_block(xd->mi[0]);
     66   uint8_t token_cache[1024];
     67   const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
     68   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
     69   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
     70   const int eob = p->eobs[block];
     71   const PLANE_TYPE plane_type = get_plane_type(plane);
     72   const int default_eob = 16 << (tx_size << 1);
     73   const int shift = (tx_size == TX_32X32);
     74   const int16_t *const dequant_ptr = pd->dequant;
     75   const uint8_t *const band_translate = get_band_translate(tx_size);
     76   const scan_order *const so = get_scan(xd, tx_size, plane_type, block);
     77   const int16_t *const scan = so->scan;
     78   const int16_t *const nb = so->neighbors;
     79   const int64_t rdmult =
     80       ((int64_t)mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1;
     81   const int64_t rddiv = mb->rddiv;
     82   int64_t rd_cost0, rd_cost1;
     83   int64_t rate0, rate1;
     84   int16_t t0, t1;
     85   int i, final_eob;
     86 #if CONFIG_VP9_HIGHBITDEPTH
     87   const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
     88 #else
     89   const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8);
     90 #endif
     91   unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
     92       mb->token_costs[tx_size][plane_type][ref];
     93   unsigned int(*token_costs_cur)[2][COEFF_CONTEXTS][ENTROPY_TOKENS];
     94   int64_t eob_cost0, eob_cost1;
     95   const int ctx0 = ctx;
     96   int64_t accu_rate = 0;
     97   // Initialized to the worst possible error for the largest transform size.
     98   // This ensures that it never goes negative.
     99   int64_t accu_error = ((int64_t)1) << 50;
    100   int64_t best_block_rd_cost = INT64_MAX;
    101   int x_prev = 1;
    102   tran_low_t before_best_eob_qc = 0;
    103   tran_low_t before_best_eob_dqc = 0;
    104 
    105   assert((!plane_type && !plane) || (plane_type && plane));
    106   assert(eob <= default_eob);
    107 
    108   for (i = 0; i < eob; i++) {
    109     const int rc = scan[i];
    110     token_cache[rc] = vp9_pt_energy_class[vp9_get_token(qcoeff[rc])];
    111   }
    112   final_eob = 0;
    113 
    114   // Initial RD cost.
    115   token_costs_cur = token_costs + band_translate[0];
    116   rate0 = (*token_costs_cur)[0][ctx0][EOB_TOKEN];
    117   best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error);
    118 
    119   // For each token, pick one of two choices greedily:
    120   // (i) First candidate: Keep current quantized value, OR
    121   // (ii) Second candidate: Reduce quantized value by 1.
    122   for (i = 0; i < eob; i++) {
    123     const int rc = scan[i];
    124     const int x = qcoeff[rc];
    125     const int band_cur = band_translate[i];
    126     const int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i);
    127     const int token_tree_sel_cur = (x_prev == 0);
    128     token_costs_cur = token_costs + band_cur;
    129     if (x == 0) {  // No need to search
    130       const int token = vp9_get_token(x);
    131       rate0 = (*token_costs_cur)[token_tree_sel_cur][ctx_cur][token];
    132       accu_rate += rate0;
    133       x_prev = 0;
    134       // Note: accu_error does not change.
    135     } else {
    136       const int dqv = dequant_ptr[rc != 0];
    137       // Compute the distortion for quantizing to 0.
    138       const int diff_for_zero_raw = (0 - coeff[rc]) * (1 << shift);
    139       const int diff_for_zero =
    140 #if CONFIG_VP9_HIGHBITDEPTH
    141           (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
    142               ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff_for_zero_raw, xd->bd - 8)
    143               :
    144 #endif
    145               diff_for_zero_raw;
    146       const int64_t distortion_for_zero =
    147           (int64_t)diff_for_zero * diff_for_zero;
    148 
    149       // Compute the distortion for the first candidate
    150       const int diff0_raw = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
    151       const int diff0 =
    152 #if CONFIG_VP9_HIGHBITDEPTH
    153           (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
    154               ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff0_raw, xd->bd - 8)
    155               :
    156 #endif  // CONFIG_VP9_HIGHBITDEPTH
    157               diff0_raw;
    158       const int64_t distortion0 = (int64_t)diff0 * diff0;
    159 
    160       // Compute the distortion for the second candidate
    161       const int sign = -(x < 0);        // -1 if x is negative and 0 otherwise.
    162       const int x1 = x - 2 * sign - 1;  // abs(x1) = abs(x) - 1.
    163       int64_t distortion1;
    164       if (x1 != 0) {
    165         const int dqv_step =
    166 #if CONFIG_VP9_HIGHBITDEPTH
    167             (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? dqv >> (xd->bd - 8)
    168                                                           :
    169 #endif  // CONFIG_VP9_HIGHBITDEPTH
    170                                                           dqv;
    171         const int diff_step = (dqv_step + sign) ^ sign;
    172         const int diff1 = diff0 - diff_step;
    173         assert(dqv > 0);  // We aren't right shifting a negative number above.
    174         distortion1 = (int64_t)diff1 * diff1;
    175       } else {
    176         distortion1 = distortion_for_zero;
    177       }
    178       {
    179         // Calculate RDCost for current coeff for the two candidates.
    180         const int64_t base_bits0 = vp9_get_token_cost(x, &t0, cat6_high_cost);
    181         const int64_t base_bits1 = vp9_get_token_cost(x1, &t1, cat6_high_cost);
    182         rate0 =
    183             base_bits0 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t0];
    184         rate1 =
    185             base_bits1 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t1];
    186       }
    187       {
    188         int rdcost_better_for_x1, eob_rdcost_better_for_x1;
    189         int dqc0, dqc1;
    190         int64_t best_eob_cost_cur;
    191         int use_x1;
    192 
    193         // Calculate RD Cost effect on the next coeff for the two candidates.
    194         int64_t next_bits0 = 0;
    195         int64_t next_bits1 = 0;
    196         int64_t next_eob_bits0 = 0;
    197         int64_t next_eob_bits1 = 0;
    198         if (i < default_eob - 1) {
    199           int ctx_next, token_tree_sel_next;
    200           const int band_next = band_translate[i + 1];
    201           const int token_next =
    202               (i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
    203           unsigned int(
    204               *const token_costs_next)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
    205               token_costs + band_next;
    206           token_cache[rc] = vp9_pt_energy_class[t0];
    207           ctx_next = get_coef_context(nb, token_cache, i + 1);
    208           token_tree_sel_next = (x == 0);
    209           next_bits0 =
    210               (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
    211           next_eob_bits0 =
    212               (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
    213           token_cache[rc] = vp9_pt_energy_class[t1];
    214           ctx_next = get_coef_context(nb, token_cache, i + 1);
    215           token_tree_sel_next = (x1 == 0);
    216           next_bits1 =
    217               (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
    218           if (x1 != 0) {
    219             next_eob_bits1 =
    220                 (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
    221           }
    222         }
    223 
    224         // Compare the total RD costs for two candidates.
    225         rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), distortion0);
    226         rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), distortion1);
    227         rdcost_better_for_x1 = (rd_cost1 < rd_cost0);
    228         eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0),
    229                            (accu_error + distortion0 - distortion_for_zero));
    230         eob_cost1 = eob_cost0;
    231         if (x1 != 0) {
    232           eob_cost1 =
    233               RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1),
    234                      (accu_error + distortion1 - distortion_for_zero));
    235           eob_rdcost_better_for_x1 = (eob_cost1 < eob_cost0);
    236         } else {
    237           eob_rdcost_better_for_x1 = 0;
    238         }
    239 
    240         // Calculate the two candidate de-quantized values.
    241         dqc0 = dqcoeff[rc];
    242         dqc1 = 0;
    243         if (rdcost_better_for_x1 + eob_rdcost_better_for_x1) {
    244           if (x1 != 0) {
    245             dqc1 = RIGHT_SHIFT_POSSIBLY_NEGATIVE(x1 * dqv, shift);
    246           } else {
    247             dqc1 = 0;
    248           }
    249         }
    250 
    251         // Pick and record the better quantized and de-quantized values.
    252         if (rdcost_better_for_x1) {
    253           qcoeff[rc] = x1;
    254           dqcoeff[rc] = dqc1;
    255           accu_rate += rate1;
    256           accu_error += distortion1 - distortion_for_zero;
    257           assert(distortion1 <= distortion_for_zero);
    258           token_cache[rc] = vp9_pt_energy_class[t1];
    259         } else {
    260           accu_rate += rate0;
    261           accu_error += distortion0 - distortion_for_zero;
    262           assert(distortion0 <= distortion_for_zero);
    263           token_cache[rc] = vp9_pt_energy_class[t0];
    264         }
    265         assert(accu_error >= 0);
    266         x_prev = qcoeff[rc];  // Update based on selected quantized value.
    267 
    268         use_x1 = (x1 != 0) && eob_rdcost_better_for_x1;
    269         best_eob_cost_cur = use_x1 ? eob_cost1 : eob_cost0;
    270 
    271         // Determine whether to move the eob position to i+1
    272         if (best_eob_cost_cur < best_block_rd_cost) {
    273           best_block_rd_cost = best_eob_cost_cur;
    274           final_eob = i + 1;
    275           if (use_x1) {
    276             before_best_eob_qc = x1;
    277             before_best_eob_dqc = dqc1;
    278           } else {
    279             before_best_eob_qc = x;
    280             before_best_eob_dqc = dqc0;
    281           }
    282         }
    283       }
    284     }
    285   }
    286   assert(final_eob <= eob);
    287   if (final_eob > 0) {
    288     int rc;
    289     assert(before_best_eob_qc != 0);
    290     i = final_eob - 1;
    291     rc = scan[i];
    292     qcoeff[rc] = before_best_eob_qc;
    293     dqcoeff[rc] = before_best_eob_dqc;
    294   }
    295   for (i = final_eob; i < eob; i++) {
    296     int rc = scan[i];
    297     qcoeff[rc] = 0;
    298     dqcoeff[rc] = 0;
    299   }
    300   mb->plane[plane].eobs[block] = final_eob;
    301   return final_eob;
    302 }
    303 #undef RIGHT_SHIFT_POSSIBLY_NEGATIVE
    304 
    305 static INLINE void fdct32x32(int rd_transform, const int16_t *src,
    306                              tran_low_t *dst, int src_stride) {
    307   if (rd_transform)
    308     vpx_fdct32x32_rd(src, dst, src_stride);
    309   else
    310     vpx_fdct32x32(src, dst, src_stride);
    311 }
    312 
    313 #if CONFIG_VP9_HIGHBITDEPTH
    314 static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
    315                                     tran_low_t *dst, int src_stride) {
    316   if (rd_transform)
    317     vpx_highbd_fdct32x32_rd(src, dst, src_stride);
    318   else
    319     vpx_highbd_fdct32x32(src, dst, src_stride);
    320 }
    321 #endif  // CONFIG_VP9_HIGHBITDEPTH
    322 
    323 void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
    324                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
    325   MACROBLOCKD *const xd = &x->e_mbd;
    326   const struct macroblock_plane *const p = &x->plane[plane];
    327   const struct macroblockd_plane *const pd = &xd->plane[plane];
    328   const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
    329   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
    330   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
    331   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
    332   uint16_t *const eob = &p->eobs[block];
    333   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
    334   const int16_t *src_diff;
    335   src_diff = &p->src_diff[4 * (row * diff_stride + col)];
    336   // skip block condition should be handled before this is called.
    337   assert(!x->skip_block);
    338 
    339 #if CONFIG_VP9_HIGHBITDEPTH
    340   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    341     switch (tx_size) {
    342       case TX_32X32:
    343         highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
    344         vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->round_fp,
    345                                      p->quant_fp, qcoeff, dqcoeff, pd->dequant,
    346                                      eob, scan_order->scan, scan_order->iscan);
    347         break;
    348       case TX_16X16:
    349         vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
    350         vp9_highbd_quantize_fp(coeff, 256, x->skip_block, p->round_fp,
    351                                p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
    352                                scan_order->scan, scan_order->iscan);
    353         break;
    354       case TX_8X8:
    355         vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
    356         vp9_highbd_quantize_fp(coeff, 64, x->skip_block, p->round_fp,
    357                                p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
    358                                scan_order->scan, scan_order->iscan);
    359         break;
    360       case TX_4X4:
    361         x->fwd_txfm4x4(src_diff, coeff, diff_stride);
    362         vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->round_fp,
    363                                p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
    364                                scan_order->scan, scan_order->iscan);
    365         break;
    366       default: assert(0);
    367     }
    368     return;
    369   }
    370 #endif  // CONFIG_VP9_HIGHBITDEPTH
    371 
    372   switch (tx_size) {
    373     case TX_32X32:
    374       fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
    375       vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->round_fp,
    376                             p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
    377                             scan_order->scan, scan_order->iscan);
    378       break;
    379     case TX_16X16:
    380       vpx_fdct16x16(src_diff, coeff, diff_stride);
    381       vp9_quantize_fp(coeff, 256, x->skip_block, p->round_fp, p->quant_fp,
    382                       qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
    383                       scan_order->iscan);
    384       break;
    385     case TX_8X8:
    386       vp9_fdct8x8_quant(src_diff, diff_stride, coeff, 64, x->skip_block,
    387                         p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant,
    388                         eob, scan_order->scan, scan_order->iscan);
    389       break;
    390     case TX_4X4:
    391       x->fwd_txfm4x4(src_diff, coeff, diff_stride);
    392       vp9_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp,
    393                       qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
    394                       scan_order->iscan);
    395       break;
    396     default: assert(0); break;
    397   }
    398 }
    399 
    400 void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
    401                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
    402   MACROBLOCKD *const xd = &x->e_mbd;
    403   const struct macroblock_plane *const p = &x->plane[plane];
    404   const struct macroblockd_plane *const pd = &xd->plane[plane];
    405   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
    406   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
    407   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
    408   uint16_t *const eob = &p->eobs[block];
    409   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
    410   const int16_t *src_diff;
    411   src_diff = &p->src_diff[4 * (row * diff_stride + col)];
    412   // skip block condition should be handled before this is called.
    413   assert(!x->skip_block);
    414 
    415 #if CONFIG_VP9_HIGHBITDEPTH
    416   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    417     switch (tx_size) {
    418       case TX_32X32:
    419         vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
    420         vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round,
    421                                      p->quant_fp[0], qcoeff, dqcoeff,
    422                                      pd->dequant[0], eob);
    423         break;
    424       case TX_16X16:
    425         vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
    426         vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round,
    427                                p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
    428                                eob);
    429         break;
    430       case TX_8X8:
    431         vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
    432         vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round,
    433                                p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
    434                                eob);
    435         break;
    436       case TX_4X4:
    437         x->fwd_txfm4x4(src_diff, coeff, diff_stride);
    438         vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
    439                                p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
    440                                eob);
    441         break;
    442       default: assert(0);
    443     }
    444     return;
    445   }
    446 #endif  // CONFIG_VP9_HIGHBITDEPTH
    447 
    448   switch (tx_size) {
    449     case TX_32X32:
    450       vpx_fdct32x32_1(src_diff, coeff, diff_stride);
    451       vpx_quantize_dc_32x32(coeff, x->skip_block, p->round, p->quant_fp[0],
    452                             qcoeff, dqcoeff, pd->dequant[0], eob);
    453       break;
    454     case TX_16X16:
    455       vpx_fdct16x16_1(src_diff, coeff, diff_stride);
    456       vpx_quantize_dc(coeff, 256, x->skip_block, p->round, p->quant_fp[0],
    457                       qcoeff, dqcoeff, pd->dequant[0], eob);
    458       break;
    459     case TX_8X8:
    460       vpx_fdct8x8_1(src_diff, coeff, diff_stride);
    461       vpx_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0],
    462                       qcoeff, dqcoeff, pd->dequant[0], eob);
    463       break;
    464     case TX_4X4:
    465       x->fwd_txfm4x4(src_diff, coeff, diff_stride);
    466       vpx_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0],
    467                       qcoeff, dqcoeff, pd->dequant[0], eob);
    468       break;
    469     default: assert(0); break;
    470   }
    471 }
    472 
    473 void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
    474                      BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
    475   MACROBLOCKD *const xd = &x->e_mbd;
    476   const struct macroblock_plane *const p = &x->plane[plane];
    477   const struct macroblockd_plane *const pd = &xd->plane[plane];
    478   const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
    479   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
    480   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
    481   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
    482   uint16_t *const eob = &p->eobs[block];
    483   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
    484   const int16_t *src_diff;
    485   src_diff = &p->src_diff[4 * (row * diff_stride + col)];
    486   // skip block condition should be handled before this is called.
    487   assert(!x->skip_block);
    488 
    489 #if CONFIG_VP9_HIGHBITDEPTH
    490   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    491     switch (tx_size) {
    492       case TX_32X32:
    493         highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
    494         vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
    495                                     p->round, p->quant, p->quant_shift, qcoeff,
    496                                     dqcoeff, pd->dequant, eob, scan_order->scan,
    497                                     scan_order->iscan);
    498         break;
    499       case TX_16X16:
    500         vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
    501         vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
    502                               p->quant, p->quant_shift, qcoeff, dqcoeff,
    503                               pd->dequant, eob, scan_order->scan,
    504                               scan_order->iscan);
    505         break;
    506       case TX_8X8:
    507         vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
    508         vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
    509                               p->quant, p->quant_shift, qcoeff, dqcoeff,
    510                               pd->dequant, eob, scan_order->scan,
    511                               scan_order->iscan);
    512         break;
    513       case TX_4X4:
    514         x->fwd_txfm4x4(src_diff, coeff, diff_stride);
    515         vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
    516                               p->quant, p->quant_shift, qcoeff, dqcoeff,
    517                               pd->dequant, eob, scan_order->scan,
    518                               scan_order->iscan);
    519         break;
    520       default: assert(0);
    521     }
    522     return;
    523   }
    524 #endif  // CONFIG_VP9_HIGHBITDEPTH
    525 
    526   switch (tx_size) {
    527     case TX_32X32:
    528       fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
    529       vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
    530                            p->quant, p->quant_shift, qcoeff, dqcoeff,
    531                            pd->dequant, eob, scan_order->scan,
    532                            scan_order->iscan);
    533       break;
    534     case TX_16X16:
    535       vpx_fdct16x16(src_diff, coeff, diff_stride);
    536       vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant,
    537                      p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
    538                      scan_order->scan, scan_order->iscan);
    539       break;
    540     case TX_8X8:
    541       vpx_fdct8x8(src_diff, coeff, diff_stride);
    542       vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
    543                      p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
    544                      scan_order->scan, scan_order->iscan);
    545       break;
    546     case TX_4X4:
    547       x->fwd_txfm4x4(src_diff, coeff, diff_stride);
    548       vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
    549                      p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
    550                      scan_order->scan, scan_order->iscan);
    551       break;
    552     default: assert(0); break;
    553   }
    554 }
    555 
    556 static void encode_block(int plane, int block, int row, int col,
    557                          BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
    558   struct encode_b_args *const args = arg;
    559   MACROBLOCK *const x = args->x;
    560   MACROBLOCKD *const xd = &x->e_mbd;
    561   struct macroblock_plane *const p = &x->plane[plane];
    562   struct macroblockd_plane *const pd = &xd->plane[plane];
    563   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
    564   uint8_t *dst;
    565   ENTROPY_CONTEXT *a, *l;
    566   dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
    567   a = &args->ta[col];
    568   l = &args->tl[row];
    569 
    570   // TODO(jingning): per transformed block zero forcing only enabled for
    571   // luma component. will integrate chroma components as well.
    572   if (x->zcoeff_blk[tx_size][block] && plane == 0) {
    573     p->eobs[block] = 0;
    574     *a = *l = 0;
    575     return;
    576   }
    577 
    578   if (!x->skip_recode) {
    579     if (x->quant_fp) {
    580       // Encoding process for rtc mode
    581       if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
    582         // skip forward transform
    583         p->eobs[block] = 0;
    584         *a = *l = 0;
    585         return;
    586       } else {
    587         vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size);
    588       }
    589     } else {
    590       if (max_txsize_lookup[plane_bsize] == tx_size) {
    591         int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
    592         if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
    593           // full forward transform and quantization
    594           vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
    595         } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
    596           // fast path forward transform and quantization
    597           vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size);
    598         } else {
    599           // skip forward transform
    600           p->eobs[block] = 0;
    601           *a = *l = 0;
    602           return;
    603         }
    604       } else {
    605         vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
    606       }
    607     }
    608   }
    609 
    610   if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
    611     const int ctx = combine_entropy_contexts(*a, *l);
    612     *a = *l = vp9_optimize_b(x, plane, block, tx_size, ctx) > 0;
    613   } else {
    614     *a = *l = p->eobs[block] > 0;
    615   }
    616 
    617   if (p->eobs[block]) *(args->skip) = 0;
    618 
    619   if (x->skip_encode || p->eobs[block] == 0) return;
    620 #if CONFIG_VP9_HIGHBITDEPTH
    621   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    622     uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
    623     switch (tx_size) {
    624       case TX_32X32:
    625         vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
    626                                  xd->bd);
    627         break;
    628       case TX_16X16:
    629         vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
    630                                  xd->bd);
    631         break;
    632       case TX_8X8:
    633         vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
    634                                xd->bd);
    635         break;
    636       case TX_4X4:
    637         // this is like vp9_short_idct4x4 but has a special case around eob<=1
    638         // which is significant (not just an optimization) for the lossless
    639         // case.
    640         x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
    641                                xd->bd);
    642         break;
    643       default: assert(0 && "Invalid transform size");
    644     }
    645     return;
    646   }
    647 #endif  // CONFIG_VP9_HIGHBITDEPTH
    648 
    649   switch (tx_size) {
    650     case TX_32X32:
    651       vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
    652       break;
    653     case TX_16X16:
    654       vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
    655       break;
    656     case TX_8X8:
    657       vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
    658       break;
    659     case TX_4X4:
    660       // this is like vp9_short_idct4x4 but has a special case around eob<=1
    661       // which is significant (not just an optimization) for the lossless
    662       // case.
    663       x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
    664       break;
    665     default: assert(0 && "Invalid transform size"); break;
    666   }
    667 }
    668 
    669 static void encode_block_pass1(int plane, int block, int row, int col,
    670                                BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
    671                                void *arg) {
    672   MACROBLOCK *const x = (MACROBLOCK *)arg;
    673   MACROBLOCKD *const xd = &x->e_mbd;
    674   struct macroblock_plane *const p = &x->plane[plane];
    675   struct macroblockd_plane *const pd = &xd->plane[plane];
    676   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
    677   uint8_t *dst;
    678   dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
    679 
    680   vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
    681 
    682   if (p->eobs[block] > 0) {
    683 #if CONFIG_VP9_HIGHBITDEPTH
    684     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    685       x->highbd_inv_txfm_add(dqcoeff, CONVERT_TO_SHORTPTR(dst), pd->dst.stride,
    686                              p->eobs[block], xd->bd);
    687       return;
    688     }
    689 #endif  // CONFIG_VP9_HIGHBITDEPTH
    690     x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
    691   }
    692 }
    693 
    694 void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
    695   vp9_subtract_plane(x, bsize, 0);
    696   vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
    697                                          encode_block_pass1, x);
    698 }
    699 
    700 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
    701   MACROBLOCKD *const xd = &x->e_mbd;
    702   struct optimize_ctx ctx;
    703   MODE_INFO *mi = xd->mi[0];
    704   struct encode_b_args arg = { x, 1, NULL, NULL, &mi->skip };
    705   int plane;
    706 
    707   mi->skip = 1;
    708 
    709   if (x->skip) return;
    710 
    711   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
    712     if (!x->skip_recode) vp9_subtract_plane(x, bsize, plane);
    713 
    714     if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
    715       const struct macroblockd_plane *const pd = &xd->plane[plane];
    716       const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size;
    717       vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane],
    718                                ctx.tl[plane]);
    719       arg.enable_coeff_opt = 1;
    720     } else {
    721       arg.enable_coeff_opt = 0;
    722     }
    723     arg.ta = ctx.ta[plane];
    724     arg.tl = ctx.tl[plane];
    725 
    726     vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
    727                                            &arg);
    728   }
    729 }
    730 
    731 void vp9_encode_block_intra(int plane, int block, int row, int col,
    732                             BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
    733                             void *arg) {
    734   struct encode_b_args *const args = arg;
    735   MACROBLOCK *const x = args->x;
    736   MACROBLOCKD *const xd = &x->e_mbd;
    737   MODE_INFO *mi = xd->mi[0];
    738   struct macroblock_plane *const p = &x->plane[plane];
    739   struct macroblockd_plane *const pd = &xd->plane[plane];
    740   tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
    741   tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
    742   tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
    743   const scan_order *scan_order;
    744   TX_TYPE tx_type = DCT_DCT;
    745   PREDICTION_MODE mode;
    746   const int bwl = b_width_log2_lookup[plane_bsize];
    747   const int diff_stride = 4 * (1 << bwl);
    748   uint8_t *src, *dst;
    749   int16_t *src_diff;
    750   uint16_t *eob = &p->eobs[block];
    751   const int src_stride = p->src.stride;
    752   const int dst_stride = pd->dst.stride;
    753   ENTROPY_CONTEXT *a = NULL;
    754   ENTROPY_CONTEXT *l = NULL;
    755   int entropy_ctx = 0;
    756   dst = &pd->dst.buf[4 * (row * dst_stride + col)];
    757   src = &p->src.buf[4 * (row * src_stride + col)];
    758   src_diff = &p->src_diff[4 * (row * diff_stride + col)];
    759   if (args->enable_coeff_opt) {
    760     a = &args->ta[col];
    761     l = &args->tl[row];
    762     entropy_ctx = combine_entropy_contexts(*a, *l);
    763   }
    764 
    765   if (tx_size == TX_4X4) {
    766     tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block);
    767     scan_order = &vp9_scan_orders[TX_4X4][tx_type];
    768     mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode;
    769   } else {
    770     mode = plane == 0 ? mi->mode : mi->uv_mode;
    771     if (tx_size == TX_32X32) {
    772       scan_order = &vp9_default_scan_orders[TX_32X32];
    773     } else {
    774       tx_type = get_tx_type(get_plane_type(plane), xd);
    775       scan_order = &vp9_scan_orders[tx_size][tx_type];
    776     }
    777   }
    778 
    779   vp9_predict_intra_block(
    780       xd, bwl, tx_size, mode, (x->skip_encode || x->fp_src_pred) ? src : dst,
    781       (x->skip_encode || x->fp_src_pred) ? src_stride : dst_stride, dst,
    782       dst_stride, col, row, plane);
    783 
    784   // skip block condition should be handled before this is called.
    785   assert(!x->skip_block);
    786 
    787 #if CONFIG_VP9_HIGHBITDEPTH
    788   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    789     uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
    790     switch (tx_size) {
    791       case TX_32X32:
    792         if (!x->skip_recode) {
    793           vpx_highbd_subtract_block(32, 32, src_diff, diff_stride, src,
    794                                     src_stride, dst, dst_stride, xd->bd);
    795           highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
    796           vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
    797                                       p->round, p->quant, p->quant_shift,
    798                                       qcoeff, dqcoeff, pd->dequant, eob,
    799                                       scan_order->scan, scan_order->iscan);
    800         }
    801         if (args->enable_coeff_opt && !x->skip_recode) {
    802           *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
    803         }
    804         if (!x->skip_encode && *eob) {
    805           vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
    806         }
    807         break;
    808       case TX_16X16:
    809         if (!x->skip_recode) {
    810           vpx_highbd_subtract_block(16, 16, src_diff, diff_stride, src,
    811                                     src_stride, dst, dst_stride, xd->bd);
    812           if (tx_type == DCT_DCT)
    813             vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
    814           else
    815             vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
    816           vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
    817                                 p->quant, p->quant_shift, qcoeff, dqcoeff,
    818                                 pd->dequant, eob, scan_order->scan,
    819                                 scan_order->iscan);
    820         }
    821         if (args->enable_coeff_opt && !x->skip_recode) {
    822           *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
    823         }
    824         if (!x->skip_encode && *eob) {
    825           vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
    826                                   xd->bd);
    827         }
    828         break;
    829       case TX_8X8:
    830         if (!x->skip_recode) {
    831           vpx_highbd_subtract_block(8, 8, src_diff, diff_stride, src,
    832                                     src_stride, dst, dst_stride, xd->bd);
    833           if (tx_type == DCT_DCT)
    834             vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
    835           else
    836             vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
    837           vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
    838                                 p->quant, p->quant_shift, qcoeff, dqcoeff,
    839                                 pd->dequant, eob, scan_order->scan,
    840                                 scan_order->iscan);
    841         }
    842         if (args->enable_coeff_opt && !x->skip_recode) {
    843           *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
    844         }
    845         if (!x->skip_encode && *eob) {
    846           vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
    847                                 xd->bd);
    848         }
    849         break;
    850       case TX_4X4:
    851         if (!x->skip_recode) {
    852           vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, src,
    853                                     src_stride, dst, dst_stride, xd->bd);
    854           if (tx_type != DCT_DCT)
    855             vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
    856           else
    857             x->fwd_txfm4x4(src_diff, coeff, diff_stride);
    858           vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
    859                                 p->quant, p->quant_shift, qcoeff, dqcoeff,
    860                                 pd->dequant, eob, scan_order->scan,
    861                                 scan_order->iscan);
    862         }
    863         if (args->enable_coeff_opt && !x->skip_recode) {
    864           *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
    865         }
    866         if (!x->skip_encode && *eob) {
    867           if (tx_type == DCT_DCT) {
    868             // this is like vp9_short_idct4x4 but has a special case around
    869             // eob<=1 which is significant (not just an optimization) for the
    870             // lossless case.
    871             x->highbd_inv_txfm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
    872           } else {
    873             vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type,
    874                                      xd->bd);
    875           }
    876         }
    877         break;
    878       default: assert(0); return;
    879     }
    880     if (*eob) *(args->skip) = 0;
    881     return;
    882   }
    883 #endif  // CONFIG_VP9_HIGHBITDEPTH
    884 
    885   switch (tx_size) {
    886     case TX_32X32:
    887       if (!x->skip_recode) {
    888         vpx_subtract_block(32, 32, src_diff, diff_stride, src, src_stride, dst,
    889                            dst_stride);
    890         fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
    891         vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
    892                              p->quant, p->quant_shift, qcoeff, dqcoeff,
    893                              pd->dequant, eob, scan_order->scan,
    894                              scan_order->iscan);
    895       }
    896       if (args->enable_coeff_opt && !x->skip_recode) {
    897         *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
    898       }
    899       if (!x->skip_encode && *eob)
    900         vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
    901       break;
    902     case TX_16X16:
    903       if (!x->skip_recode) {
    904         vpx_subtract_block(16, 16, src_diff, diff_stride, src, src_stride, dst,
    905                            dst_stride);
    906         vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
    907         vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant,
    908                        p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
    909                        scan_order->scan, scan_order->iscan);
    910       }
    911       if (args->enable_coeff_opt && !x->skip_recode) {
    912         *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
    913       }
    914       if (!x->skip_encode && *eob)
    915         vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
    916       break;
    917     case TX_8X8:
    918       if (!x->skip_recode) {
    919         vpx_subtract_block(8, 8, src_diff, diff_stride, src, src_stride, dst,
    920                            dst_stride);
    921         vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
    922         vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
    923                        p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
    924                        scan_order->scan, scan_order->iscan);
    925       }
    926       if (args->enable_coeff_opt && !x->skip_recode) {
    927         *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
    928       }
    929       if (!x->skip_encode && *eob)
    930         vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
    931       break;
    932     case TX_4X4:
    933       if (!x->skip_recode) {
    934         vpx_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst,
    935                            dst_stride);
    936         if (tx_type != DCT_DCT)
    937           vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
    938         else
    939           x->fwd_txfm4x4(src_diff, coeff, diff_stride);
    940         vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
    941                        p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
    942                        scan_order->scan, scan_order->iscan);
    943       }
    944       if (args->enable_coeff_opt && !x->skip_recode) {
    945         *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
    946       }
    947       if (!x->skip_encode && *eob) {
    948         if (tx_type == DCT_DCT)
    949           // this is like vp9_short_idct4x4 but has a special case around eob<=1
    950           // which is significant (not just an optimization) for the lossless
    951           // case.
    952           x->inv_txfm_add(dqcoeff, dst, dst_stride, *eob);
    953         else
    954           vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
    955       }
    956       break;
    957     default: assert(0); break;
    958   }
    959   if (*eob) *(args->skip) = 0;
    960 }
    961 
    962 void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
    963                                   int enable_optimize_b) {
    964   const MACROBLOCKD *const xd = &x->e_mbd;
    965   struct optimize_ctx ctx;
    966   struct encode_b_args arg = { x, enable_optimize_b, ctx.ta[plane],
    967                                ctx.tl[plane], &xd->mi[0]->skip };
    968 
    969   if (enable_optimize_b && x->optimize &&
    970       (!x->skip_recode || !x->skip_optimize)) {
    971     const struct macroblockd_plane *const pd = &xd->plane[plane];
    972     const TX_SIZE tx_size =
    973         plane ? get_uv_tx_size(xd->mi[0], pd) : xd->mi[0]->tx_size;
    974     vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
    975   } else {
    976     arg.enable_coeff_opt = 0;
    977   }
    978 
    979   vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
    980                                          vp9_encode_block_intra, &arg);
    981 }
    982