Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <assert.h>
     12 #include <math.h>
     13 
     14 #include "./vp9_rtcd.h"
     15 #include "./vpx_dsp_rtcd.h"
     16 
     17 #include "vpx_dsp/vpx_dsp_common.h"
     18 #include "vpx_mem/vpx_mem.h"
     19 #include "vpx_ports/mem.h"
     20 #include "vpx_ports/system_state.h"
     21 
     22 #include "vp9/common/vp9_common.h"
     23 #include "vp9/common/vp9_entropy.h"
     24 #include "vp9/common/vp9_entropymode.h"
     25 #include "vp9/common/vp9_idct.h"
     26 #include "vp9/common/vp9_mvref_common.h"
     27 #include "vp9/common/vp9_pred_common.h"
     28 #include "vp9/common/vp9_quant_common.h"
     29 #include "vp9/common/vp9_reconinter.h"
     30 #include "vp9/common/vp9_reconintra.h"
     31 #include "vp9/common/vp9_scan.h"
     32 #include "vp9/common/vp9_seg_common.h"
     33 
     34 #include "vp9/encoder/vp9_cost.h"
     35 #include "vp9/encoder/vp9_encodemb.h"
     36 #include "vp9/encoder/vp9_encodemv.h"
     37 #include "vp9/encoder/vp9_encoder.h"
     38 #include "vp9/encoder/vp9_mcomp.h"
     39 #include "vp9/encoder/vp9_quantize.h"
     40 #include "vp9/encoder/vp9_ratectrl.h"
     41 #include "vp9/encoder/vp9_rd.h"
     42 #include "vp9/encoder/vp9_rdopt.h"
     43 #include "vp9/encoder/vp9_aq_variance.h"
     44 
     45 #define LAST_FRAME_MODE_MASK    ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
     46                                  (1 << INTRA_FRAME))
     47 #define GOLDEN_FRAME_MODE_MASK  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
     48                                  (1 << INTRA_FRAME))
     49 #define ALT_REF_MODE_MASK       ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
     50                                  (1 << INTRA_FRAME))
     51 
     52 #define SECOND_REF_FRAME_MASK   ((1 << ALTREF_FRAME) | 0x01)
     53 
     54 #define MIN_EARLY_TERM_INDEX    3
     55 #define NEW_MV_DISCOUNT_FACTOR  8
     56 
     57 typedef struct {
     58   PREDICTION_MODE mode;
     59   MV_REFERENCE_FRAME ref_frame[2];
     60 } MODE_DEFINITION;
     61 
     62 typedef struct {
     63   MV_REFERENCE_FRAME ref_frame[2];
     64 } REF_DEFINITION;
     65 
     66 struct rdcost_block_args {
     67   MACROBLOCK *x;
     68   ENTROPY_CONTEXT t_above[16];
     69   ENTROPY_CONTEXT t_left[16];
     70   int this_rate;
     71   int64_t this_dist;
     72   int64_t this_sse;
     73   int64_t this_rd;
     74   int64_t best_rd;
     75   int exit_early;
     76   int use_fast_coef_costing;
     77   const scan_order *so;
     78   uint8_t skippable;
     79 };
     80 
     81 #define LAST_NEW_MV_INDEX 6
     82 static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
     83   {NEARESTMV, {LAST_FRAME,   NONE}},
     84   {NEARESTMV, {ALTREF_FRAME, NONE}},
     85   {NEARESTMV, {GOLDEN_FRAME, NONE}},
     86 
     87   {DC_PRED,   {INTRA_FRAME,  NONE}},
     88 
     89   {NEWMV,     {LAST_FRAME,   NONE}},
     90   {NEWMV,     {ALTREF_FRAME, NONE}},
     91   {NEWMV,     {GOLDEN_FRAME, NONE}},
     92 
     93   {NEARMV,    {LAST_FRAME,   NONE}},
     94   {NEARMV,    {ALTREF_FRAME, NONE}},
     95   {NEARMV,    {GOLDEN_FRAME, NONE}},
     96 
     97   {ZEROMV,    {LAST_FRAME,   NONE}},
     98   {ZEROMV,    {GOLDEN_FRAME, NONE}},
     99   {ZEROMV,    {ALTREF_FRAME, NONE}},
    100 
    101   {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
    102   {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
    103 
    104   {TM_PRED,   {INTRA_FRAME,  NONE}},
    105 
    106   {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
    107   {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
    108   {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
    109   {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},
    110 
    111   {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
    112   {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
    113 
    114   {H_PRED,    {INTRA_FRAME,  NONE}},
    115   {V_PRED,    {INTRA_FRAME,  NONE}},
    116   {D135_PRED, {INTRA_FRAME,  NONE}},
    117   {D207_PRED, {INTRA_FRAME,  NONE}},
    118   {D153_PRED, {INTRA_FRAME,  NONE}},
    119   {D63_PRED,  {INTRA_FRAME,  NONE}},
    120   {D117_PRED, {INTRA_FRAME,  NONE}},
    121   {D45_PRED,  {INTRA_FRAME,  NONE}},
    122 };
    123 
    124 static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
    125   {{LAST_FRAME,   NONE}},
    126   {{GOLDEN_FRAME, NONE}},
    127   {{ALTREF_FRAME, NONE}},
    128   {{LAST_FRAME,   ALTREF_FRAME}},
    129   {{GOLDEN_FRAME, ALTREF_FRAME}},
    130   {{INTRA_FRAME,  NONE}},
    131 };
    132 
    133 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
    134                            int m, int n, int min_plane, int max_plane) {
    135   int i;
    136 
    137   for (i = min_plane; i < max_plane; ++i) {
    138     struct macroblock_plane *const p = &x->plane[i];
    139     struct macroblockd_plane *const pd = &x->e_mbd.plane[i];
    140 
    141     p->coeff    = ctx->coeff_pbuf[i][m];
    142     p->qcoeff   = ctx->qcoeff_pbuf[i][m];
    143     pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
    144     p->eobs     = ctx->eobs_pbuf[i][m];
    145 
    146     ctx->coeff_pbuf[i][m]   = ctx->coeff_pbuf[i][n];
    147     ctx->qcoeff_pbuf[i][m]  = ctx->qcoeff_pbuf[i][n];
    148     ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
    149     ctx->eobs_pbuf[i][m]    = ctx->eobs_pbuf[i][n];
    150 
    151     ctx->coeff_pbuf[i][n]   = p->coeff;
    152     ctx->qcoeff_pbuf[i][n]  = p->qcoeff;
    153     ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
    154     ctx->eobs_pbuf[i][n]    = p->eobs;
    155   }
    156 }
    157 
    158 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
    159                             MACROBLOCK *x, MACROBLOCKD *xd,
    160                             int *out_rate_sum, int64_t *out_dist_sum,
    161                             int *skip_txfm_sb, int64_t *skip_sse_sb) {
    162   // Note our transform coeffs are 8 times an orthogonal transform.
    163   // Hence quantizer step is also 8 times. To get effective quantizer
    164   // we need to divide by 8 before sending to modeling function.
    165   int i;
    166   int64_t rate_sum = 0;
    167   int64_t dist_sum = 0;
    168   const int ref = xd->mi[0]->mbmi.ref_frame[0];
    169   unsigned int sse;
    170   unsigned int var = 0;
    171   unsigned int sum_sse = 0;
    172   int64_t total_sse = 0;
    173   int skip_flag = 1;
    174   const int shift = 6;
    175   int rate;
    176   int64_t dist;
    177   const int dequant_shift =
    178 #if CONFIG_VP9_HIGHBITDEPTH
    179       (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
    180           xd->bd - 5 :
    181 #endif  // CONFIG_VP9_HIGHBITDEPTH
    182           3;
    183 
    184   x->pred_sse[ref] = 0;
    185 
    186   for (i = 0; i < MAX_MB_PLANE; ++i) {
    187     struct macroblock_plane *const p = &x->plane[i];
    188     struct macroblockd_plane *const pd = &xd->plane[i];
    189     const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
    190     const TX_SIZE max_tx_size = max_txsize_lookup[bs];
    191     const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
    192     const int64_t dc_thr = p->quant_thred[0] >> shift;
    193     const int64_t ac_thr = p->quant_thred[1] >> shift;
    194     // The low thresholds are used to measure if the prediction errors are
    195     // low enough so that we can skip the mode search.
    196     const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2);
    197     const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2);
    198     int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    199     int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    200     int idx, idy;
    201     int lw = b_width_log2_lookup[unit_size] + 2;
    202     int lh = b_height_log2_lookup[unit_size] + 2;
    203 
    204     sum_sse = 0;
    205 
    206     for (idy = 0; idy < bh; ++idy) {
    207       for (idx = 0; idx < bw; ++idx) {
    208         uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
    209         uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
    210         int block_idx = (idy << 1) + idx;
    211         int low_err_skip = 0;
    212 
    213         var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
    214                                         dst, pd->dst.stride, &sse);
    215         x->bsse[(i << 2) + block_idx] = sse;
    216         sum_sse += sse;
    217 
    218         x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_NONE;
    219         if (!x->select_tx_size) {
    220           // Check if all ac coefficients can be quantized to zero.
    221           if (var < ac_thr || var == 0) {
    222             x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_ONLY;
    223 
    224             // Check if dc coefficient can be quantized to zero.
    225             if (sse - var < dc_thr || sse == var) {
    226               x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_DC;
    227 
    228               if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
    229                 low_err_skip = 1;
    230             }
    231           }
    232         }
    233 
    234         if (skip_flag && !low_err_skip)
    235           skip_flag = 0;
    236 
    237         if (i == 0)
    238           x->pred_sse[ref] += sse;
    239       }
    240     }
    241 
    242     total_sse += sum_sse;
    243 
    244     // Fast approximate the modelling function.
    245     if (cpi->sf.simple_model_rd_from_var) {
    246       int64_t rate;
    247       const int64_t square_error = sum_sse;
    248       int quantizer = (pd->dequant[1] >> dequant_shift);
    249 
    250       if (quantizer < 120)
    251         rate = (square_error * (280 - quantizer)) >> 8;
    252       else
    253         rate = 0;
    254       dist = (square_error * quantizer) >> 8;
    255       rate_sum += rate;
    256       dist_sum += dist;
    257     } else {
    258       vp9_model_rd_from_var_lapndz(sum_sse, num_pels_log2_lookup[bs],
    259                                    pd->dequant[1] >> dequant_shift,
    260                                    &rate, &dist);
    261       rate_sum += rate;
    262       dist_sum += dist;
    263     }
    264   }
    265 
    266   *skip_txfm_sb = skip_flag;
    267   *skip_sse_sb = total_sse << 4;
    268   *out_rate_sum = (int)rate_sum;
    269   *out_dist_sum = dist_sum << 4;
    270 }
    271 
    272 #if CONFIG_VP9_HIGHBITDEPTH
    273 int64_t vp9_highbd_block_error_c(const tran_low_t *coeff,
    274                                  const tran_low_t *dqcoeff,
    275                                  intptr_t block_size,
    276                                  int64_t *ssz, int bd) {
    277   int i;
    278   int64_t error = 0, sqcoeff = 0;
    279   int shift = 2 * (bd - 8);
    280   int rounding = shift > 0 ? 1 << (shift - 1) : 0;
    281 
    282   for (i = 0; i < block_size; i++) {
    283     const int64_t diff = coeff[i] - dqcoeff[i];
    284     error +=  diff * diff;
    285     sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
    286   }
    287   assert(error >= 0 && sqcoeff >= 0);
    288   error = (error + rounding) >> shift;
    289   sqcoeff = (sqcoeff + rounding) >> shift;
    290 
    291   *ssz = sqcoeff;
    292   return error;
    293 }
    294 
    295 int64_t vp9_highbd_block_error_8bit_c(const tran_low_t *coeff,
    296                                       const tran_low_t *dqcoeff,
    297                                       intptr_t block_size,
    298                                       int64_t *ssz) {
    299   // Note that the C versions of these 2 functions (vp9_block_error and
    300   // vp9_highbd_block_error_8bit are the same, but the optimized assembly
    301   // routines are not compatible in the non high bitdepth configuration, so
    302   // they still cannot share the same name.
    303   return vp9_block_error_c(coeff, dqcoeff, block_size, ssz);
    304 }
    305 
    306 static int64_t vp9_highbd_block_error_dispatch(const tran_low_t *coeff,
    307                                                const tran_low_t *dqcoeff,
    308                                                intptr_t block_size,
    309                                                int64_t *ssz, int bd) {
    310   if (bd == 8) {
    311     return vp9_highbd_block_error_8bit(coeff, dqcoeff, block_size, ssz);
    312   } else {
    313     return vp9_highbd_block_error(coeff, dqcoeff, block_size, ssz, bd);
    314   }
    315 }
    316 #endif  // CONFIG_VP9_HIGHBITDEPTH
    317 
    318 int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
    319                           intptr_t block_size, int64_t *ssz) {
    320   int i;
    321   int64_t error = 0, sqcoeff = 0;
    322 
    323   for (i = 0; i < block_size; i++) {
    324     const int diff = coeff[i] - dqcoeff[i];
    325     error +=  diff * diff;
    326     sqcoeff += coeff[i] * coeff[i];
    327   }
    328 
    329   *ssz = sqcoeff;
    330   return error;
    331 }
    332 
    333 int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
    334                              int block_size) {
    335   int i;
    336   int64_t error = 0;
    337 
    338   for (i = 0; i < block_size; i++) {
    339     const int diff = coeff[i] - dqcoeff[i];
    340     error +=  diff * diff;
    341   }
    342 
    343   return error;
    344 }
    345 
    346 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
    347  * decide whether to include cost of a trailing EOB node or not (i.e. we
    348  * can skip this if the last coefficient in this transform block, e.g. the
    349  * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
    350  * were non-zero). */
    351 static const int16_t band_counts[TX_SIZES][8] = {
    352   { 1, 2, 3, 4,  3,   16 - 13, 0 },
    353   { 1, 2, 3, 4, 11,   64 - 21, 0 },
    354   { 1, 2, 3, 4, 11,  256 - 21, 0 },
    355   { 1, 2, 3, 4, 11, 1024 - 21, 0 },
    356 };
    357 static int cost_coeffs(MACROBLOCK *x,
    358                        int plane, int block,
    359                        ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
    360                        TX_SIZE tx_size,
    361                        const int16_t *scan, const int16_t *nb,
    362                        int use_fast_coef_costing) {
    363   MACROBLOCKD *const xd = &x->e_mbd;
    364   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
    365   const struct macroblock_plane *p = &x->plane[plane];
    366   const PLANE_TYPE type = get_plane_type(plane);
    367   const int16_t *band_count = &band_counts[tx_size][1];
    368   const int eob = p->eobs[block];
    369   const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
    370   unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
    371                    x->token_costs[tx_size][type][is_inter_block(mbmi)];
    372   uint8_t token_cache[32 * 32];
    373   int pt = combine_entropy_contexts(*A, *L);
    374   int c, cost;
    375 #if CONFIG_VP9_HIGHBITDEPTH
    376   const int16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
    377 #else
    378   const int16_t *cat6_high_cost = vp9_get_high_cost_table(8);
    379 #endif
    380 
    381   // Check for consistency of tx_size with mode info
    382   assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size :
    383          get_uv_tx_size(mbmi, &xd->plane[plane]) == tx_size);
    384 
    385   if (eob == 0) {
    386     // single eob token
    387     cost = token_costs[0][0][pt][EOB_TOKEN];
    388     c = 0;
    389   } else {
    390     int band_left = *band_count++;
    391 
    392     // dc token
    393     int v = qcoeff[0];
    394     int16_t prev_t;
    395     EXTRABIT e;
    396     vp9_get_token_extra(v, &prev_t, &e);
    397     cost = (*token_costs)[0][pt][prev_t] +
    398         vp9_get_cost(prev_t, e, cat6_high_cost);
    399 
    400     token_cache[0] = vp9_pt_energy_class[prev_t];
    401     ++token_costs;
    402 
    403     // ac tokens
    404     for (c = 1; c < eob; c++) {
    405       const int rc = scan[c];
    406       int16_t t;
    407 
    408       v = qcoeff[rc];
    409       vp9_get_token_extra(v, &t, &e);
    410       if (use_fast_coef_costing) {
    411         cost += (*token_costs)[!prev_t][!prev_t][t] +
    412             vp9_get_cost(t, e, cat6_high_cost);
    413       } else {
    414         pt = get_coef_context(nb, token_cache, c);
    415         cost += (*token_costs)[!prev_t][pt][t] +
    416             vp9_get_cost(t, e, cat6_high_cost);
    417         token_cache[rc] = vp9_pt_energy_class[t];
    418       }
    419       prev_t = t;
    420       if (!--band_left) {
    421         band_left = *band_count++;
    422         ++token_costs;
    423       }
    424     }
    425 
    426     // eob token
    427     if (band_left) {
    428       if (use_fast_coef_costing) {
    429         cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
    430       } else {
    431         pt = get_coef_context(nb, token_cache, c);
    432         cost += (*token_costs)[0][pt][EOB_TOKEN];
    433       }
    434     }
    435   }
    436 
    437   // is eob first coefficient;
    438   *A = *L = (c > 0);
    439 
    440   return cost;
    441 }
    442 
    443 static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
    444                        int64_t *out_dist, int64_t *out_sse) {
    445   const int ss_txfrm_size = tx_size << 1;
    446   MACROBLOCKD* const xd = &x->e_mbd;
    447   const struct macroblock_plane *const p = &x->plane[plane];
    448   const struct macroblockd_plane *const pd = &xd->plane[plane];
    449   int64_t this_sse;
    450   int shift = tx_size == TX_32X32 ? 0 : 2;
    451   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
    452   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
    453 #if CONFIG_VP9_HIGHBITDEPTH
    454   const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
    455   *out_dist = vp9_highbd_block_error_dispatch(coeff, dqcoeff,
    456                                               16 << ss_txfrm_size,
    457                                               &this_sse, bd) >> shift;
    458 #else
    459   *out_dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
    460                               &this_sse) >> shift;
    461 #endif  // CONFIG_VP9_HIGHBITDEPTH
    462   *out_sse = this_sse >> shift;
    463 
    464   if (x->skip_encode && !is_inter_block(&xd->mi[0]->mbmi)) {
    465     // TODO(jingning): tune the model to better capture the distortion.
    466     int64_t p = (pd->dequant[1] * pd->dequant[1] *
    467                     (1 << ss_txfrm_size)) >>
    468 #if CONFIG_VP9_HIGHBITDEPTH
    469                         (shift + 2 + (bd - 8) * 2);
    470 #else
    471                         (shift + 2);
    472 #endif  // CONFIG_VP9_HIGHBITDEPTH
    473     *out_dist += (p >> 4);
    474     *out_sse  += p;
    475   }
    476 }
    477 
    478 static int rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
    479                       TX_SIZE tx_size, struct rdcost_block_args* args) {
    480   int x_idx, y_idx;
    481   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
    482 
    483   return cost_coeffs(args->x, plane, block, args->t_above + x_idx,
    484                      args->t_left + y_idx, tx_size,
    485                      args->so->scan, args->so->neighbors,
    486                      args->use_fast_coef_costing);
    487 }
    488 
    489 static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
    490                           TX_SIZE tx_size, void *arg) {
    491   struct rdcost_block_args *args = arg;
    492   MACROBLOCK *const x = args->x;
    493   MACROBLOCKD *const xd = &x->e_mbd;
    494   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    495   int64_t rd1, rd2, rd;
    496   int rate;
    497   int64_t dist;
    498   int64_t sse;
    499 
    500   if (args->exit_early)
    501     return;
    502 
    503   if (!is_inter_block(mbmi)) {
    504     struct encode_b_args arg = {x, NULL, &mbmi->skip};
    505     vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &arg);
    506     dist_block(x, plane, block, tx_size, &dist, &sse);
    507   } else if (max_txsize_lookup[plane_bsize] == tx_size) {
    508     if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
    509         SKIP_TXFM_NONE) {
    510       // full forward transform and quantization
    511       vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
    512       dist_block(x, plane, block, tx_size, &dist, &sse);
    513     } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
    514                SKIP_TXFM_AC_ONLY) {
    515       // compute DC coefficient
    516       tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
    517       tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
    518       vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
    519       sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
    520       dist = sse;
    521       if (x->plane[plane].eobs[block]) {
    522         const int64_t orig_sse = (int64_t)coeff[0] * coeff[0];
    523         const int64_t resd_sse = coeff[0] - dqcoeff[0];
    524         int64_t dc_correct = orig_sse - resd_sse * resd_sse;
    525 #if CONFIG_VP9_HIGHBITDEPTH
    526         dc_correct >>= ((xd->bd - 8) * 2);
    527 #endif
    528         if (tx_size != TX_32X32)
    529           dc_correct >>= 2;
    530 
    531         dist = VPXMAX(0, sse - dc_correct);
    532       }
    533     } else {
    534       // SKIP_TXFM_AC_DC
    535       // skip forward transform
    536       x->plane[plane].eobs[block] = 0;
    537       sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
    538       dist = sse;
    539     }
    540   } else {
    541     // full forward transform and quantization
    542     vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
    543     dist_block(x, plane, block, tx_size, &dist, &sse);
    544   }
    545 
    546   rd = RDCOST(x->rdmult, x->rddiv, 0, dist);
    547   if (args->this_rd + rd > args->best_rd) {
    548     args->exit_early = 1;
    549     return;
    550   }
    551 
    552   rate = rate_block(plane, block, plane_bsize, tx_size, args);
    553   rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist);
    554   rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);
    555 
    556   // TODO(jingning): temporarily enabled only for luma component
    557   rd = VPXMIN(rd1, rd2);
    558   if (plane == 0)
    559     x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
    560                                     (rd1 > rd2 && !xd->lossless);
    561 
    562   args->this_rate += rate;
    563   args->this_dist += dist;
    564   args->this_sse += sse;
    565   args->this_rd += rd;
    566 
    567   if (args->this_rd > args->best_rd) {
    568     args->exit_early = 1;
    569     return;
    570   }
    571 
    572   args->skippable &= !x->plane[plane].eobs[block];
    573 }
    574 
    575 static void txfm_rd_in_plane(MACROBLOCK *x,
    576                              int *rate, int64_t *distortion,
    577                              int *skippable, int64_t *sse,
    578                              int64_t ref_best_rd, int plane,
    579                              BLOCK_SIZE bsize, TX_SIZE tx_size,
    580                              int use_fast_coef_casting) {
    581   MACROBLOCKD *const xd = &x->e_mbd;
    582   const struct macroblockd_plane *const pd = &xd->plane[plane];
    583   struct rdcost_block_args args;
    584   vp9_zero(args);
    585   args.x = x;
    586   args.best_rd = ref_best_rd;
    587   args.use_fast_coef_costing = use_fast_coef_casting;
    588   args.skippable = 1;
    589 
    590   if (plane == 0)
    591     xd->mi[0]->mbmi.tx_size = tx_size;
    592 
    593   vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
    594 
    595   args.so = get_scan(xd, tx_size, get_plane_type(plane), 0);
    596 
    597   vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
    598                                          block_rd_txfm, &args);
    599   if (args.exit_early) {
    600     *rate       = INT_MAX;
    601     *distortion = INT64_MAX;
    602     *sse        = INT64_MAX;
    603     *skippable  = 0;
    604   } else {
    605     *distortion = args.this_dist;
    606     *rate       = args.this_rate;
    607     *sse        = args.this_sse;
    608     *skippable  = args.skippable;
    609   }
    610 }
    611 
    612 static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x,
    613                                    int *rate, int64_t *distortion,
    614                                    int *skip, int64_t *sse,
    615                                    int64_t ref_best_rd,
    616                                    BLOCK_SIZE bs) {
    617   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
    618   VP9_COMMON *const cm = &cpi->common;
    619   const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
    620   MACROBLOCKD *const xd = &x->e_mbd;
    621   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    622 
    623   mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
    624 
    625   txfm_rd_in_plane(x, rate, distortion, skip,
    626                    sse, ref_best_rd, 0, bs,
    627                    mbmi->tx_size, cpi->sf.use_fast_coef_costing);
    628 }
    629 
    630 static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
    631                                    int *rate,
    632                                    int64_t *distortion,
    633                                    int *skip,
    634                                    int64_t *psse,
    635                                    int64_t ref_best_rd,
    636                                    BLOCK_SIZE bs) {
    637   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
    638   VP9_COMMON *const cm = &cpi->common;
    639   MACROBLOCKD *const xd = &x->e_mbd;
    640   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    641   vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
    642   int r[TX_SIZES][2], s[TX_SIZES];
    643   int64_t d[TX_SIZES], sse[TX_SIZES];
    644   int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
    645                              {INT64_MAX, INT64_MAX},
    646                              {INT64_MAX, INT64_MAX},
    647                              {INT64_MAX, INT64_MAX}};
    648   int n, m;
    649   int s0, s1;
    650   int64_t best_rd = INT64_MAX;
    651   TX_SIZE best_tx = max_tx_size;
    652   int start_tx, end_tx;
    653 
    654   const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
    655   assert(skip_prob > 0);
    656   s0 = vp9_cost_bit(skip_prob, 0);
    657   s1 = vp9_cost_bit(skip_prob, 1);
    658 
    659   if (cm->tx_mode == TX_MODE_SELECT) {
    660     start_tx = max_tx_size;
    661     end_tx = 0;
    662   } else {
    663     TX_SIZE chosen_tx_size = VPXMIN(max_tx_size,
    664                                     tx_mode_to_biggest_tx_size[cm->tx_mode]);
    665     start_tx = chosen_tx_size;
    666     end_tx = chosen_tx_size;
    667   }
    668 
    669   for (n = start_tx; n >= end_tx; n--) {
    670     int r_tx_size = 0;
    671     for (m = 0; m <= n - (n == (int) max_tx_size); m++) {
    672       if (m == n)
    673         r_tx_size += vp9_cost_zero(tx_probs[m]);
    674       else
    675         r_tx_size += vp9_cost_one(tx_probs[m]);
    676     }
    677     txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
    678                      &sse[n], ref_best_rd, 0, bs, n,
    679                      cpi->sf.use_fast_coef_costing);
    680     r[n][1] = r[n][0];
    681     if (r[n][0] < INT_MAX) {
    682       r[n][1] += r_tx_size;
    683     }
    684     if (d[n] == INT64_MAX || r[n][0] == INT_MAX) {
    685       rd[n][0] = rd[n][1] = INT64_MAX;
    686     } else if (s[n]) {
    687       if (is_inter_block(mbmi)) {
    688         rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
    689         r[n][1] -= r_tx_size;
    690       } else {
    691         rd[n][0] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
    692         rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size, sse[n]);
    693       }
    694     } else {
    695       rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
    696       rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
    697     }
    698 
    699     if (is_inter_block(mbmi) && !xd->lossless && !s[n] && sse[n] != INT64_MAX) {
    700       rd[n][0] = VPXMIN(rd[n][0], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
    701       rd[n][1] = VPXMIN(rd[n][1], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
    702     }
    703 
    704     // Early termination in transform size search.
    705     if (cpi->sf.tx_size_search_breakout &&
    706         (rd[n][1] == INT64_MAX ||
    707         (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) ||
    708         s[n] == 1))
    709       break;
    710 
    711     if (rd[n][1] < best_rd) {
    712       best_tx = n;
    713       best_rd = rd[n][1];
    714     }
    715   }
    716   mbmi->tx_size = best_tx;
    717 
    718   *distortion = d[mbmi->tx_size];
    719   *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
    720   *skip       = s[mbmi->tx_size];
    721   *psse       = sse[mbmi->tx_size];
    722 }
    723 
    724 static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
    725                             int64_t *distortion, int *skip,
    726                             int64_t *psse, BLOCK_SIZE bs,
    727                             int64_t ref_best_rd) {
    728   MACROBLOCKD *xd = &x->e_mbd;
    729   int64_t sse;
    730   int64_t *ret_sse = psse ? psse : &sse;
    731 
    732   assert(bs == xd->mi[0]->mbmi.sb_type);
    733 
    734   if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
    735     choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
    736                            bs);
    737   } else {
    738     choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse,
    739                            ref_best_rd, bs);
    740   }
    741 }
    742 
    743 static int conditional_skipintra(PREDICTION_MODE mode,
    744                                  PREDICTION_MODE best_intra_mode) {
    745   if (mode == D117_PRED &&
    746       best_intra_mode != V_PRED &&
    747       best_intra_mode != D135_PRED)
    748     return 1;
    749   if (mode == D63_PRED &&
    750       best_intra_mode != V_PRED &&
    751       best_intra_mode != D45_PRED)
    752     return 1;
    753   if (mode == D207_PRED &&
    754       best_intra_mode != H_PRED &&
    755       best_intra_mode != D45_PRED)
    756     return 1;
    757   if (mode == D153_PRED &&
    758       best_intra_mode != H_PRED &&
    759       best_intra_mode != D135_PRED)
    760     return 1;
    761   return 0;
    762 }
    763 
    764 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x,
    765                                      int row, int col,
    766                                      PREDICTION_MODE *best_mode,
    767                                      const int *bmode_costs,
    768                                      ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
    769                                      int *bestrate, int *bestratey,
    770                                      int64_t *bestdistortion,
    771                                      BLOCK_SIZE bsize, int64_t rd_thresh) {
    772   PREDICTION_MODE mode;
    773   MACROBLOCKD *const xd = &x->e_mbd;
    774   int64_t best_rd = rd_thresh;
    775   struct macroblock_plane *p = &x->plane[0];
    776   struct macroblockd_plane *pd = &xd->plane[0];
    777   const int src_stride = p->src.stride;
    778   const int dst_stride = pd->dst.stride;
    779   const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
    780   uint8_t *dst_init = &pd->dst.buf[row * 4 * src_stride + col * 4];
    781   ENTROPY_CONTEXT ta[2], tempa[2];
    782   ENTROPY_CONTEXT tl[2], templ[2];
    783   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
    784   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
    785   int idx, idy;
    786   uint8_t best_dst[8 * 8];
    787 #if CONFIG_VP9_HIGHBITDEPTH
    788   uint16_t best_dst16[8 * 8];
    789 #endif
    790 
    791   memcpy(ta, a, sizeof(ta));
    792   memcpy(tl, l, sizeof(tl));
    793   xd->mi[0]->mbmi.tx_size = TX_4X4;
    794 
    795 #if CONFIG_VP9_HIGHBITDEPTH
    796   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    797     for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
    798       int64_t this_rd;
    799       int ratey = 0;
    800       int64_t distortion = 0;
    801       int rate = bmode_costs[mode];
    802 
    803       if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
    804         continue;
    805 
    806       // Only do the oblique modes if the best so far is
    807       // one of the neighboring directional modes
    808       if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
    809         if (conditional_skipintra(mode, *best_mode))
    810             continue;
    811       }
    812 
    813       memcpy(tempa, ta, sizeof(ta));
    814       memcpy(templ, tl, sizeof(tl));
    815 
    816       for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
    817         for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
    818           const int block = (row + idy) * 2 + (col + idx);
    819           const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
    820           uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
    821           int16_t *const src_diff = vp9_raster_block_offset_int16(BLOCK_8X8,
    822                                                                   block,
    823                                                                   p->src_diff);
    824           tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
    825           xd->mi[0]->bmi[block].as_mode = mode;
    826           vp9_predict_intra_block(xd, 1, TX_4X4, mode,
    827                                   x->skip_encode ? src : dst,
    828                                   x->skip_encode ? src_stride : dst_stride,
    829                                   dst, dst_stride,
    830                                   col + idx, row + idy, 0);
    831           vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
    832                                     dst, dst_stride, xd->bd);
    833           if (xd->lossless) {
    834             const scan_order *so = &vp9_default_scan_orders[TX_4X4];
    835             vp9_highbd_fwht4x4(src_diff, coeff, 8);
    836             vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
    837             ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
    838                                  so->scan, so->neighbors,
    839                                  cpi->sf.use_fast_coef_costing);
    840             if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
    841               goto next_highbd;
    842             vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
    843                                    dst, dst_stride,
    844                                    p->eobs[block], xd->bd);
    845           } else {
    846             int64_t unused;
    847             const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
    848             const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
    849             if (tx_type == DCT_DCT)
    850               vpx_highbd_fdct4x4(src_diff, coeff, 8);
    851             else
    852               vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type);
    853             vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
    854             ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
    855                                  so->scan, so->neighbors,
    856                                  cpi->sf.use_fast_coef_costing);
    857             distortion += vp9_highbd_block_error_dispatch(
    858                 coeff, BLOCK_OFFSET(pd->dqcoeff, block),
    859                 16, &unused, xd->bd) >> 2;
    860             if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
    861               goto next_highbd;
    862             vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
    863                                   dst, dst_stride, p->eobs[block], xd->bd);
    864           }
    865         }
    866       }
    867 
    868       rate += ratey;
    869       this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
    870 
    871       if (this_rd < best_rd) {
    872         *bestrate = rate;
    873         *bestratey = ratey;
    874         *bestdistortion = distortion;
    875         best_rd = this_rd;
    876         *best_mode = mode;
    877         memcpy(a, tempa, sizeof(tempa));
    878         memcpy(l, templ, sizeof(templ));
    879         for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
    880           memcpy(best_dst16 + idy * 8,
    881                  CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
    882                  num_4x4_blocks_wide * 4 * sizeof(uint16_t));
    883         }
    884       }
    885     next_highbd:
    886       {}
    887     }
    888     if (best_rd >= rd_thresh || x->skip_encode)
    889       return best_rd;
    890 
    891     for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
    892       memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
    893              best_dst16 + idy * 8,
    894              num_4x4_blocks_wide * 4 * sizeof(uint16_t));
    895     }
    896 
    897     return best_rd;
    898   }
    899 #endif  // CONFIG_VP9_HIGHBITDEPTH
    900 
    901   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
    902     int64_t this_rd;
    903     int ratey = 0;
    904     int64_t distortion = 0;
    905     int rate = bmode_costs[mode];
    906 
    907     if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
    908       continue;
    909 
    910     // Only do the oblique modes if the best so far is
    911     // one of the neighboring directional modes
    912     if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
    913       if (conditional_skipintra(mode, *best_mode))
    914           continue;
    915     }
    916 
    917     memcpy(tempa, ta, sizeof(ta));
    918     memcpy(templ, tl, sizeof(tl));
    919 
    920     for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
    921       for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
    922         const int block = (row + idy) * 2 + (col + idx);
    923         const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
    924         uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
    925         int16_t *const src_diff =
    926             vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
    927         tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
    928         xd->mi[0]->bmi[block].as_mode = mode;
    929         vp9_predict_intra_block(xd, 1, TX_4X4, mode,
    930                                 x->skip_encode ? src : dst,
    931                                 x->skip_encode ? src_stride : dst_stride,
    932                                 dst, dst_stride, col + idx, row + idy, 0);
    933         vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
    934 
    935         if (xd->lossless) {
    936           const scan_order *so = &vp9_default_scan_orders[TX_4X4];
    937           vp9_fwht4x4(src_diff, coeff, 8);
    938           vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
    939           ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
    940                                so->scan, so->neighbors,
    941                                cpi->sf.use_fast_coef_costing);
    942           if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
    943             goto next;
    944           vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
    945                           p->eobs[block]);
    946         } else {
    947           int64_t unused;
    948           const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
    949           const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
    950           vp9_fht4x4(src_diff, coeff, 8, tx_type);
    951           vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
    952           ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
    953                              so->scan, so->neighbors,
    954                              cpi->sf.use_fast_coef_costing);
    955 #if CONFIG_VP9_HIGHBITDEPTH
    956           distortion += vp9_highbd_block_error_8bit(
    957               coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16, &unused) >> 2;
    958 #else
    959           distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
    960                                         16, &unused) >> 2;
    961 #endif
    962           if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
    963             goto next;
    964           vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
    965                          dst, dst_stride, p->eobs[block]);
    966         }
    967       }
    968     }
    969 
    970     rate += ratey;
    971     this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
    972 
    973     if (this_rd < best_rd) {
    974       *bestrate = rate;
    975       *bestratey = ratey;
    976       *bestdistortion = distortion;
    977       best_rd = this_rd;
    978       *best_mode = mode;
    979       memcpy(a, tempa, sizeof(tempa));
    980       memcpy(l, templ, sizeof(templ));
    981       for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
    982         memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
    983                num_4x4_blocks_wide * 4);
    984     }
    985   next:
    986     {}
    987   }
    988 
    989   if (best_rd >= rd_thresh || x->skip_encode)
    990     return best_rd;
    991 
    992   for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
    993     memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
    994            num_4x4_blocks_wide * 4);
    995 
    996   return best_rd;
    997 }
    998 
    999 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
   1000                                             int *rate, int *rate_y,
   1001                                             int64_t *distortion,
   1002                                             int64_t best_rd) {
   1003   int i, j;
   1004   const MACROBLOCKD *const xd = &mb->e_mbd;
   1005   MODE_INFO *const mic = xd->mi[0];
   1006   const MODE_INFO *above_mi = xd->above_mi;
   1007   const MODE_INFO *left_mi = xd->left_mi;
   1008   const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
   1009   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
   1010   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
   1011   int idx, idy;
   1012   int cost = 0;
   1013   int64_t total_distortion = 0;
   1014   int tot_rate_y = 0;
   1015   int64_t total_rd = 0;
   1016   ENTROPY_CONTEXT t_above[4], t_left[4];
   1017   const int *bmode_costs = cpi->mbmode_cost;
   1018 
   1019   memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
   1020   memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
   1021 
   1022   // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
   1023   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
   1024     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
   1025       PREDICTION_MODE best_mode = DC_PRED;
   1026       int r = INT_MAX, ry = INT_MAX;
   1027       int64_t d = INT64_MAX, this_rd = INT64_MAX;
   1028       i = idy * 2 + idx;
   1029       if (cpi->common.frame_type == KEY_FRAME) {
   1030         const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
   1031         const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
   1032 
   1033         bmode_costs  = cpi->y_mode_costs[A][L];
   1034       }
   1035 
   1036       this_rd = rd_pick_intra4x4block(cpi, mb, idy, idx, &best_mode,
   1037                                       bmode_costs, t_above + idx, t_left + idy,
   1038                                       &r, &ry, &d, bsize, best_rd - total_rd);
   1039       if (this_rd >= best_rd - total_rd)
   1040         return INT64_MAX;
   1041 
   1042       total_rd += this_rd;
   1043       cost += r;
   1044       total_distortion += d;
   1045       tot_rate_y += ry;
   1046 
   1047       mic->bmi[i].as_mode = best_mode;
   1048       for (j = 1; j < num_4x4_blocks_high; ++j)
   1049         mic->bmi[i + j * 2].as_mode = best_mode;
   1050       for (j = 1; j < num_4x4_blocks_wide; ++j)
   1051         mic->bmi[i + j].as_mode = best_mode;
   1052 
   1053       if (total_rd >= best_rd)
   1054         return INT64_MAX;
   1055     }
   1056   }
   1057 
   1058   *rate = cost;
   1059   *rate_y = tot_rate_y;
   1060   *distortion = total_distortion;
   1061   mic->mbmi.mode = mic->bmi[3].as_mode;
   1062 
   1063   return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
   1064 }
   1065 
   1066 // This function is used only for intra_only frames
   1067 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
   1068                                       int *rate, int *rate_tokenonly,
   1069                                       int64_t *distortion, int *skippable,
   1070                                       BLOCK_SIZE bsize,
   1071                                       int64_t best_rd) {
   1072   PREDICTION_MODE mode;
   1073   PREDICTION_MODE mode_selected = DC_PRED;
   1074   MACROBLOCKD *const xd = &x->e_mbd;
   1075   MODE_INFO *const mic = xd->mi[0];
   1076   int this_rate, this_rate_tokenonly, s;
   1077   int64_t this_distortion, this_rd;
   1078   TX_SIZE best_tx = TX_4X4;
   1079   int *bmode_costs;
   1080   const MODE_INFO *above_mi = xd->above_mi;
   1081   const MODE_INFO *left_mi = xd->left_mi;
   1082   const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
   1083   const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
   1084   bmode_costs = cpi->y_mode_costs[A][L];
   1085 
   1086   memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
   1087   /* Y Search for intra prediction mode */
   1088   for (mode = DC_PRED; mode <= TM_PRED; mode++) {
   1089     if (cpi->sf.use_nonrd_pick_mode) {
   1090       // These speed features are turned on in hybrid non-RD and RD mode
   1091       // for key frame coding in the context of real-time setting.
   1092       if (conditional_skipintra(mode, mode_selected))
   1093           continue;
   1094       if (*skippable)
   1095         break;
   1096     }
   1097 
   1098     mic->mbmi.mode = mode;
   1099 
   1100     super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
   1101         &s, NULL, bsize, best_rd);
   1102 
   1103     if (this_rate_tokenonly == INT_MAX)
   1104       continue;
   1105 
   1106     this_rate = this_rate_tokenonly + bmode_costs[mode];
   1107     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
   1108 
   1109     if (this_rd < best_rd) {
   1110       mode_selected   = mode;
   1111       best_rd         = this_rd;
   1112       best_tx         = mic->mbmi.tx_size;
   1113       *rate           = this_rate;
   1114       *rate_tokenonly = this_rate_tokenonly;
   1115       *distortion     = this_distortion;
   1116       *skippable      = s;
   1117     }
   1118   }
   1119 
   1120   mic->mbmi.mode = mode_selected;
   1121   mic->mbmi.tx_size = best_tx;
   1122 
   1123   return best_rd;
   1124 }
   1125 
   1126 // Return value 0: early termination triggered, no valid rd cost available;
   1127 //              1: rd cost values are valid.
   1128 static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
   1129                             int *rate, int64_t *distortion, int *skippable,
   1130                             int64_t *sse, BLOCK_SIZE bsize,
   1131                             int64_t ref_best_rd) {
   1132   MACROBLOCKD *const xd = &x->e_mbd;
   1133   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   1134   const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
   1135   int plane;
   1136   int pnrate = 0, pnskip = 1;
   1137   int64_t pndist = 0, pnsse = 0;
   1138   int is_cost_valid = 1;
   1139 
   1140   if (ref_best_rd < 0)
   1141     is_cost_valid = 0;
   1142 
   1143   if (is_inter_block(mbmi) && is_cost_valid) {
   1144     int plane;
   1145     for (plane = 1; plane < MAX_MB_PLANE; ++plane)
   1146       vp9_subtract_plane(x, bsize, plane);
   1147   }
   1148 
   1149   *rate = 0;
   1150   *distortion = 0;
   1151   *sse = 0;
   1152   *skippable = 1;
   1153 
   1154   for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
   1155     txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
   1156                      ref_best_rd, plane, bsize, uv_tx_size,
   1157                      cpi->sf.use_fast_coef_costing);
   1158     if (pnrate == INT_MAX) {
   1159       is_cost_valid = 0;
   1160       break;
   1161     }
   1162     *rate += pnrate;
   1163     *distortion += pndist;
   1164     *sse += pnsse;
   1165     *skippable &= pnskip;
   1166   }
   1167 
   1168   if (!is_cost_valid) {
   1169     // reset cost value
   1170     *rate = INT_MAX;
   1171     *distortion = INT64_MAX;
   1172     *sse = INT64_MAX;
   1173     *skippable = 0;
   1174   }
   1175 
   1176   return is_cost_valid;
   1177 }
   1178 
   1179 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
   1180                                        PICK_MODE_CONTEXT *ctx,
   1181                                        int *rate, int *rate_tokenonly,
   1182                                        int64_t *distortion, int *skippable,
   1183                                        BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
   1184   MACROBLOCKD *xd = &x->e_mbd;
   1185   PREDICTION_MODE mode;
   1186   PREDICTION_MODE mode_selected = DC_PRED;
   1187   int64_t best_rd = INT64_MAX, this_rd;
   1188   int this_rate_tokenonly, this_rate, s;
   1189   int64_t this_distortion, this_sse;
   1190 
   1191   memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
   1192   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
   1193     if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
   1194       continue;
   1195 
   1196     xd->mi[0]->mbmi.uv_mode = mode;
   1197 
   1198     if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
   1199                           &this_distortion, &s, &this_sse, bsize, best_rd))
   1200       continue;
   1201     this_rate = this_rate_tokenonly +
   1202                 cpi->intra_uv_mode_cost[cpi->common.frame_type][mode];
   1203     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
   1204 
   1205     if (this_rd < best_rd) {
   1206       mode_selected   = mode;
   1207       best_rd         = this_rd;
   1208       *rate           = this_rate;
   1209       *rate_tokenonly = this_rate_tokenonly;
   1210       *distortion     = this_distortion;
   1211       *skippable      = s;
   1212       if (!x->select_tx_size)
   1213         swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
   1214     }
   1215   }
   1216 
   1217   xd->mi[0]->mbmi.uv_mode = mode_selected;
   1218   return best_rd;
   1219 }
   1220 
   1221 static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x,
   1222                               int *rate, int *rate_tokenonly,
   1223                               int64_t *distortion, int *skippable,
   1224                               BLOCK_SIZE bsize) {
   1225   const VP9_COMMON *cm = &cpi->common;
   1226   int64_t unused;
   1227 
   1228   x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED;
   1229   memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
   1230   super_block_uvrd(cpi, x, rate_tokenonly, distortion,
   1231                    skippable, &unused, bsize, INT64_MAX);
   1232   *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED];
   1233   return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
   1234 }
   1235 
   1236 static void choose_intra_uv_mode(VP9_COMP *cpi, MACROBLOCK *const x,
   1237                                  PICK_MODE_CONTEXT *ctx,
   1238                                  BLOCK_SIZE bsize, TX_SIZE max_tx_size,
   1239                                  int *rate_uv, int *rate_uv_tokenonly,
   1240                                  int64_t *dist_uv, int *skip_uv,
   1241                                  PREDICTION_MODE *mode_uv) {
   1242   // Use an estimated rd for uv_intra based on DC_PRED if the
   1243   // appropriate speed flag is set.
   1244   if (cpi->sf.use_uv_intra_rd_estimate) {
   1245     rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv,
   1246                    skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
   1247   // Else do a proper rd search for each possible transform size that may
   1248   // be considered in the main rd loop.
   1249   } else {
   1250     rd_pick_intra_sbuv_mode(cpi, x, ctx,
   1251                             rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
   1252                             bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
   1253   }
   1254   *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
   1255 }
   1256 
   1257 static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode,
   1258                        int mode_context) {
   1259   assert(is_inter_mode(mode));
   1260   return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
   1261 }
   1262 
   1263 static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
   1264                                 int i,
   1265                                 PREDICTION_MODE mode, int_mv this_mv[2],
   1266                                 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
   1267                                 int_mv seg_mvs[MAX_REF_FRAMES],
   1268                                 int_mv *best_ref_mv[2], const int *mvjcost,
   1269                                 int *mvcost[2]) {
   1270   MODE_INFO *const mic = xd->mi[0];
   1271   const MB_MODE_INFO *const mbmi = &mic->mbmi;
   1272   const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
   1273   int thismvcost = 0;
   1274   int idx, idy;
   1275   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
   1276   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
   1277   const int is_compound = has_second_ref(mbmi);
   1278 
   1279   switch (mode) {
   1280     case NEWMV:
   1281       this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
   1282       thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
   1283                                     mvjcost, mvcost, MV_COST_WEIGHT_SUB);
   1284       if (is_compound) {
   1285         this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
   1286         thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
   1287                                       mvjcost, mvcost, MV_COST_WEIGHT_SUB);
   1288       }
   1289       break;
   1290     case NEARMV:
   1291     case NEARESTMV:
   1292       this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
   1293       if (is_compound)
   1294         this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
   1295       break;
   1296     case ZEROMV:
   1297       this_mv[0].as_int = 0;
   1298       if (is_compound)
   1299         this_mv[1].as_int = 0;
   1300       break;
   1301     default:
   1302       break;
   1303   }
   1304 
   1305   mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
   1306   if (is_compound)
   1307     mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
   1308 
   1309   mic->bmi[i].as_mode = mode;
   1310 
   1311   for (idy = 0; idy < num_4x4_blocks_high; ++idy)
   1312     for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
   1313       memmove(&mic->bmi[i + idy * 2 + idx], &mic->bmi[i], sizeof(mic->bmi[i]));
   1314 
   1315   return cost_mv_ref(cpi, mode, mbmi_ext->mode_context[mbmi->ref_frame[0]]) +
   1316             thismvcost;
   1317 }
   1318 
   1319 static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
   1320                                        MACROBLOCK *x,
   1321                                        int64_t best_yrd,
   1322                                        int i,
   1323                                        int *labelyrate,
   1324                                        int64_t *distortion, int64_t *sse,
   1325                                        ENTROPY_CONTEXT *ta,
   1326                                        ENTROPY_CONTEXT *tl,
   1327                                        int mi_row, int mi_col) {
   1328   int k;
   1329   MACROBLOCKD *xd = &x->e_mbd;
   1330   struct macroblockd_plane *const pd = &xd->plane[0];
   1331   struct macroblock_plane *const p = &x->plane[0];
   1332   MODE_INFO *const mi = xd->mi[0];
   1333   const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
   1334   const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
   1335   const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
   1336   int idx, idy;
   1337 
   1338   const uint8_t *const src =
   1339       &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
   1340   uint8_t *const dst = &pd->dst.buf[vp9_raster_block_offset(BLOCK_8X8, i,
   1341                                                             pd->dst.stride)];
   1342   int64_t thisdistortion = 0, thissse = 0;
   1343   int thisrate = 0, ref;
   1344   const scan_order *so = &vp9_default_scan_orders[TX_4X4];
   1345   const int is_compound = has_second_ref(&mi->mbmi);
   1346   const InterpKernel *kernel = vp9_filter_kernels[mi->mbmi.interp_filter];
   1347 
   1348   for (ref = 0; ref < 1 + is_compound; ++ref) {
   1349     const uint8_t *pre = &pd->pre[ref].buf[vp9_raster_block_offset(BLOCK_8X8, i,
   1350                                                pd->pre[ref].stride)];
   1351 #if CONFIG_VP9_HIGHBITDEPTH
   1352   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
   1353     vp9_highbd_build_inter_predictor(pre, pd->pre[ref].stride,
   1354                                      dst, pd->dst.stride,
   1355                                      &mi->bmi[i].as_mv[ref].as_mv,
   1356                                      &xd->block_refs[ref]->sf, width, height,
   1357                                      ref, kernel, MV_PRECISION_Q3,
   1358                                      mi_col * MI_SIZE + 4 * (i % 2),
   1359                                      mi_row * MI_SIZE + 4 * (i / 2), xd->bd);
   1360   } else {
   1361     vp9_build_inter_predictor(pre, pd->pre[ref].stride,
   1362                               dst, pd->dst.stride,
   1363                               &mi->bmi[i].as_mv[ref].as_mv,
   1364                               &xd->block_refs[ref]->sf, width, height, ref,
   1365                               kernel, MV_PRECISION_Q3,
   1366                               mi_col * MI_SIZE + 4 * (i % 2),
   1367                               mi_row * MI_SIZE + 4 * (i / 2));
   1368   }
   1369 #else
   1370     vp9_build_inter_predictor(pre, pd->pre[ref].stride,
   1371                               dst, pd->dst.stride,
   1372                               &mi->bmi[i].as_mv[ref].as_mv,
   1373                               &xd->block_refs[ref]->sf, width, height, ref,
   1374                               kernel, MV_PRECISION_Q3,
   1375                               mi_col * MI_SIZE + 4 * (i % 2),
   1376                               mi_row * MI_SIZE + 4 * (i / 2));
   1377 #endif  // CONFIG_VP9_HIGHBITDEPTH
   1378   }
   1379 
   1380 #if CONFIG_VP9_HIGHBITDEPTH
   1381   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
   1382     vpx_highbd_subtract_block(
   1383         height, width, vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
   1384         8, src, p->src.stride, dst, pd->dst.stride, xd->bd);
   1385   } else {
   1386     vpx_subtract_block(
   1387         height, width, vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
   1388         8, src, p->src.stride, dst, pd->dst.stride);
   1389   }
   1390 #else
   1391   vpx_subtract_block(height, width,
   1392                      vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
   1393                      8, src, p->src.stride, dst, pd->dst.stride);
   1394 #endif  // CONFIG_VP9_HIGHBITDEPTH
   1395 
   1396   k = i;
   1397   for (idy = 0; idy < height / 4; ++idy) {
   1398     for (idx = 0; idx < width / 4; ++idx) {
   1399 #if CONFIG_VP9_HIGHBITDEPTH
   1400       const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
   1401 #endif
   1402       int64_t ssz, rd, rd1, rd2;
   1403       tran_low_t* coeff;
   1404 
   1405       k += (idy * 2 + idx);
   1406       coeff = BLOCK_OFFSET(p->coeff, k);
   1407       x->fwd_txm4x4(vp9_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
   1408                     coeff, 8);
   1409       vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
   1410 #if CONFIG_VP9_HIGHBITDEPTH
   1411       thisdistortion += vp9_highbd_block_error_dispatch(
   1412           coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz, bd);
   1413 #else
   1414       thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
   1415                                         16, &ssz);
   1416 #endif  // CONFIG_VP9_HIGHBITDEPTH
   1417       thissse += ssz;
   1418       thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
   1419                               so->scan, so->neighbors,
   1420                               cpi->sf.use_fast_coef_costing);
   1421       rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
   1422       rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
   1423       rd = VPXMIN(rd1, rd2);
   1424       if (rd >= best_yrd)
   1425         return INT64_MAX;
   1426     }
   1427   }
   1428 
   1429   *distortion = thisdistortion >> 2;
   1430   *labelyrate = thisrate;
   1431   *sse = thissse >> 2;
   1432 
   1433   return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
   1434 }
   1435 
   1436 typedef struct {
   1437   int eobs;
   1438   int brate;
   1439   int byrate;
   1440   int64_t bdist;
   1441   int64_t bsse;
   1442   int64_t brdcost;
   1443   int_mv mvs[2];
   1444   ENTROPY_CONTEXT ta[2];
   1445   ENTROPY_CONTEXT tl[2];
   1446 } SEG_RDSTAT;
   1447 
   1448 typedef struct {
   1449   int_mv *ref_mv[2];
   1450   int_mv mvp;
   1451 
   1452   int64_t segment_rd;
   1453   int r;
   1454   int64_t d;
   1455   int64_t sse;
   1456   int segment_yrate;
   1457   PREDICTION_MODE modes[4];
   1458   SEG_RDSTAT rdstat[4][INTER_MODES];
   1459   int mvthresh;
   1460 } BEST_SEG_INFO;
   1461 
   1462 static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
   1463   return (mv->row >> 3) < x->mv_row_min ||
   1464          (mv->row >> 3) > x->mv_row_max ||
   1465          (mv->col >> 3) < x->mv_col_min ||
   1466          (mv->col >> 3) > x->mv_col_max;
   1467 }
   1468 
   1469 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
   1470   MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi;
   1471   struct macroblock_plane *const p = &x->plane[0];
   1472   struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
   1473 
   1474   p->src.buf = &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i,
   1475                                                    p->src.stride)];
   1476   assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
   1477   pd->pre[0].buf = &pd->pre[0].buf[vp9_raster_block_offset(BLOCK_8X8, i,
   1478                                                            pd->pre[0].stride)];
   1479   if (has_second_ref(mbmi))
   1480     pd->pre[1].buf = &pd->pre[1].buf[vp9_raster_block_offset(BLOCK_8X8, i,
   1481                                                            pd->pre[1].stride)];
   1482 }
   1483 
   1484 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
   1485                                   struct buf_2d orig_pre[2]) {
   1486   MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
   1487   x->plane[0].src = orig_src;
   1488   x->e_mbd.plane[0].pre[0] = orig_pre[0];
   1489   if (has_second_ref(mbmi))
   1490     x->e_mbd.plane[0].pre[1] = orig_pre[1];
   1491 }
   1492 
   1493 static INLINE int mv_has_subpel(const MV *mv) {
   1494   return (mv->row & 0x0F) || (mv->col & 0x0F);
   1495 }
   1496 
   1497 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
   1498 // TODO(aconverse): Find out if this is still productive then clean up or remove
   1499 static int check_best_zero_mv(
   1500     const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
   1501     int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int this_mode,
   1502     const MV_REFERENCE_FRAME ref_frames[2]) {
   1503   if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
   1504       frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
   1505       (ref_frames[1] == NONE ||
   1506        frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
   1507     int rfc = mode_context[ref_frames[0]];
   1508     int c1 = cost_mv_ref(cpi, NEARMV, rfc);
   1509     int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
   1510     int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
   1511 
   1512     if (this_mode == NEARMV) {
   1513       if (c1 > c3) return 0;
   1514     } else if (this_mode == NEARESTMV) {
   1515       if (c2 > c3) return 0;
   1516     } else {
   1517       assert(this_mode == ZEROMV);
   1518       if (ref_frames[1] == NONE) {
   1519         if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
   1520             (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
   1521           return 0;
   1522       } else {
   1523         if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
   1524              frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
   1525             (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
   1526              frame_mv[NEARMV][ref_frames[1]].as_int == 0))
   1527           return 0;
   1528       }
   1529     }
   1530   }
   1531   return 1;
   1532 }
   1533 
   1534 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
   1535                                 BLOCK_SIZE bsize,
   1536                                 int_mv *frame_mv,
   1537                                 int mi_row, int mi_col,
   1538                                 int_mv single_newmv[MAX_REF_FRAMES],
   1539                                 int *rate_mv) {
   1540   const VP9_COMMON *const cm = &cpi->common;
   1541   const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
   1542   const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
   1543   MACROBLOCKD *xd = &x->e_mbd;
   1544   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   1545   const int refs[2] = {mbmi->ref_frame[0],
   1546                        mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]};
   1547   int_mv ref_mv[2];
   1548   int ite, ref;
   1549   const InterpKernel *kernel = vp9_filter_kernels[mbmi->interp_filter];
   1550   struct scale_factors sf;
   1551 
   1552   // Do joint motion search in compound mode to get more accurate mv.
   1553   struct buf_2d backup_yv12[2][MAX_MB_PLANE];
   1554   int last_besterr[2] = {INT_MAX, INT_MAX};
   1555   const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
   1556     vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
   1557     vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
   1558   };
   1559 
   1560   // Prediction buffer from second frame.
   1561 #if CONFIG_VP9_HIGHBITDEPTH
   1562   DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[64 * 64]);
   1563   uint8_t *second_pred;
   1564 #else
   1565   DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]);
   1566 #endif  // CONFIG_VP9_HIGHBITDEPTH
   1567 
   1568   for (ref = 0; ref < 2; ++ref) {
   1569     ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
   1570 
   1571     if (scaled_ref_frame[ref]) {
   1572       int i;
   1573       // Swap out the reference frame for a version that's been scaled to
   1574       // match the resolution of the current frame, allowing the existing
   1575       // motion search code to be used without additional modifications.
   1576       for (i = 0; i < MAX_MB_PLANE; i++)
   1577         backup_yv12[ref][i] = xd->plane[i].pre[ref];
   1578       vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
   1579                            NULL);
   1580     }
   1581 
   1582     frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
   1583   }
   1584 
   1585   // Since we have scaled the reference frames to match the size of the current
   1586   // frame we must use a unit scaling factor during mode selection.
   1587 #if CONFIG_VP9_HIGHBITDEPTH
   1588   vp9_setup_scale_factors_for_frame(&sf, cm->width, cm->height,
   1589                                     cm->width, cm->height,
   1590                                     cm->use_highbitdepth);
   1591 #else
   1592   vp9_setup_scale_factors_for_frame(&sf, cm->width, cm->height,
   1593                                     cm->width, cm->height);
   1594 #endif  // CONFIG_VP9_HIGHBITDEPTH
   1595 
   1596   // Allow joint search multiple times iteratively for each reference frame
   1597   // and break out of the search loop if it couldn't find a better mv.
   1598   for (ite = 0; ite < 4; ite++) {
   1599     struct buf_2d ref_yv12[2];
   1600     int bestsme = INT_MAX;
   1601     int sadpb = x->sadperbit16;
   1602     MV tmp_mv;
   1603     int search_range = 3;
   1604 
   1605     int tmp_col_min = x->mv_col_min;
   1606     int tmp_col_max = x->mv_col_max;
   1607     int tmp_row_min = x->mv_row_min;
   1608     int tmp_row_max = x->mv_row_max;
   1609     int id = ite % 2;  // Even iterations search in the first reference frame,
   1610                        // odd iterations search in the second. The predictor
   1611                        // found for the 'other' reference frame is factored in.
   1612 
   1613     // Initialized here because of compiler problem in Visual Studio.
   1614     ref_yv12[0] = xd->plane[0].pre[0];
   1615     ref_yv12[1] = xd->plane[0].pre[1];
   1616 
   1617     // Get the prediction block from the 'other' reference frame.
   1618 #if CONFIG_VP9_HIGHBITDEPTH
   1619     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
   1620       second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
   1621       vp9_highbd_build_inter_predictor(ref_yv12[!id].buf,
   1622                                        ref_yv12[!id].stride,
   1623                                        second_pred, pw,
   1624                                        &frame_mv[refs[!id]].as_mv,
   1625                                        &sf, pw, ph, 0,
   1626                                        kernel, MV_PRECISION_Q3,
   1627                                        mi_col * MI_SIZE, mi_row * MI_SIZE,
   1628                                        xd->bd);
   1629     } else {
   1630       second_pred = (uint8_t *)second_pred_alloc_16;
   1631       vp9_build_inter_predictor(ref_yv12[!id].buf,
   1632                                 ref_yv12[!id].stride,
   1633                                 second_pred, pw,
   1634                                 &frame_mv[refs[!id]].as_mv,
   1635                                 &sf, pw, ph, 0,
   1636                                 kernel, MV_PRECISION_Q3,
   1637                                 mi_col * MI_SIZE, mi_row * MI_SIZE);
   1638     }
   1639 #else
   1640     vp9_build_inter_predictor(ref_yv12[!id].buf,
   1641                               ref_yv12[!id].stride,
   1642                               second_pred, pw,
   1643                               &frame_mv[refs[!id]].as_mv,
   1644                               &sf, pw, ph, 0,
   1645                               kernel, MV_PRECISION_Q3,
   1646                               mi_col * MI_SIZE, mi_row * MI_SIZE);
   1647 #endif  // CONFIG_VP9_HIGHBITDEPTH
   1648 
   1649     // Do compound motion search on the current reference frame.
   1650     if (id)
   1651       xd->plane[0].pre[0] = ref_yv12[id];
   1652     vp9_set_mv_search_range(x, &ref_mv[id].as_mv);
   1653 
   1654     // Use the mv result from the single mode as mv predictor.
   1655     tmp_mv = frame_mv[refs[id]].as_mv;
   1656 
   1657     tmp_mv.col >>= 3;
   1658     tmp_mv.row >>= 3;
   1659 
   1660     // Small-range full-pixel motion search.
   1661     bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
   1662                                        search_range,
   1663                                        &cpi->fn_ptr[bsize],
   1664                                        &ref_mv[id].as_mv, second_pred);
   1665     if (bestsme < INT_MAX)
   1666       bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
   1667                                       second_pred, &cpi->fn_ptr[bsize], 1);
   1668 
   1669     x->mv_col_min = tmp_col_min;
   1670     x->mv_col_max = tmp_col_max;
   1671     x->mv_row_min = tmp_row_min;
   1672     x->mv_row_max = tmp_row_max;
   1673 
   1674     if (bestsme < INT_MAX) {
   1675       int dis; /* TODO: use dis in distortion calculation later. */
   1676       unsigned int sse;
   1677       bestsme = cpi->find_fractional_mv_step(
   1678           x, &tmp_mv,
   1679           &ref_mv[id].as_mv,
   1680           cpi->common.allow_high_precision_mv,
   1681           x->errorperbit,
   1682           &cpi->fn_ptr[bsize],
   1683           0, cpi->sf.mv.subpel_iters_per_step,
   1684           NULL,
   1685           x->nmvjointcost, x->mvcost,
   1686           &dis, &sse, second_pred,
   1687           pw, ph);
   1688     }
   1689 
   1690     // Restore the pointer to the first (possibly scaled) prediction buffer.
   1691     if (id)
   1692       xd->plane[0].pre[0] = ref_yv12[0];
   1693 
   1694     if (bestsme < last_besterr[id]) {
   1695       frame_mv[refs[id]].as_mv = tmp_mv;
   1696       last_besterr[id] = bestsme;
   1697     } else {
   1698       break;
   1699     }
   1700   }
   1701 
   1702   *rate_mv = 0;
   1703 
   1704   for (ref = 0; ref < 2; ++ref) {
   1705     if (scaled_ref_frame[ref]) {
   1706       // Restore the prediction frame pointers to their unscaled versions.
   1707       int i;
   1708       for (i = 0; i < MAX_MB_PLANE; i++)
   1709         xd->plane[i].pre[ref] = backup_yv12[ref][i];
   1710     }
   1711 
   1712     *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
   1713                                 &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
   1714                                 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
   1715   }
   1716 }
   1717 
   1718 static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
   1719                                         int_mv *best_ref_mv,
   1720                                         int_mv *second_best_ref_mv,
   1721                                         int64_t best_rd, int *returntotrate,
   1722                                         int *returnyrate,
   1723                                         int64_t *returndistortion,
   1724                                         int *skippable, int64_t *psse,
   1725                                         int mvthresh,
   1726                                         int_mv seg_mvs[4][MAX_REF_FRAMES],
   1727                                         BEST_SEG_INFO *bsi_buf, int filter_idx,
   1728                                         int mi_row, int mi_col) {
   1729   int i;
   1730   BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
   1731   MACROBLOCKD *xd = &x->e_mbd;
   1732   MODE_INFO *mi = xd->mi[0];
   1733   MB_MODE_INFO *mbmi = &mi->mbmi;
   1734   int mode_idx;
   1735   int k, br = 0, idx, idy;
   1736   int64_t bd = 0, block_sse = 0;
   1737   PREDICTION_MODE this_mode;
   1738   VP9_COMMON *cm = &cpi->common;
   1739   struct macroblock_plane *const p = &x->plane[0];
   1740   struct macroblockd_plane *const pd = &xd->plane[0];
   1741   const int label_count = 4;
   1742   int64_t this_segment_rd = 0;
   1743   int label_mv_thresh;
   1744   int segmentyrate = 0;
   1745   const BLOCK_SIZE bsize = mbmi->sb_type;
   1746   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
   1747   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
   1748   ENTROPY_CONTEXT t_above[2], t_left[2];
   1749   int subpelmv = 1, have_ref = 0;
   1750   const int has_second_rf = has_second_ref(mbmi);
   1751   const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
   1752   MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
   1753 
   1754   vp9_zero(*bsi);
   1755 
   1756   bsi->segment_rd = best_rd;
   1757   bsi->ref_mv[0] = best_ref_mv;
   1758   bsi->ref_mv[1] = second_best_ref_mv;
   1759   bsi->mvp.as_int = best_ref_mv->as_int;
   1760   bsi->mvthresh = mvthresh;
   1761 
   1762   for (i = 0; i < 4; i++)
   1763     bsi->modes[i] = ZEROMV;
   1764 
   1765   memcpy(t_above, pd->above_context, sizeof(t_above));
   1766   memcpy(t_left, pd->left_context, sizeof(t_left));
   1767 
   1768   // 64 makes this threshold really big effectively
   1769   // making it so that we very rarely check mvs on
   1770   // segments.   setting this to 1 would make mv thresh
   1771   // roughly equal to what it is for macroblocks
   1772   label_mv_thresh = 1 * bsi->mvthresh / label_count;
   1773 
   1774   // Segmentation method overheads
   1775   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
   1776     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
   1777       // TODO(jingning,rbultje): rewrite the rate-distortion optimization
   1778       // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
   1779       int_mv mode_mv[MB_MODE_COUNT][2];
   1780       int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
   1781       PREDICTION_MODE mode_selected = ZEROMV;
   1782       int64_t best_rd = INT64_MAX;
   1783       const int i = idy * 2 + idx;
   1784       int ref;
   1785 
   1786       for (ref = 0; ref < 1 + has_second_rf; ++ref) {
   1787         const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
   1788         frame_mv[ZEROMV][frame].as_int = 0;
   1789         vp9_append_sub8x8_mvs_for_idx(cm, xd, i, ref, mi_row, mi_col,
   1790                                       &frame_mv[NEARESTMV][frame],
   1791                                       &frame_mv[NEARMV][frame],
   1792                                       mbmi_ext->mode_context);
   1793       }
   1794 
   1795       // search for the best motion vector on this segment
   1796       for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
   1797         const struct buf_2d orig_src = x->plane[0].src;
   1798         struct buf_2d orig_pre[2];
   1799 
   1800         mode_idx = INTER_OFFSET(this_mode);
   1801         bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
   1802         if (!(inter_mode_mask & (1 << this_mode)))
   1803           continue;
   1804 
   1805         if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv,
   1806                                 this_mode, mbmi->ref_frame))
   1807           continue;
   1808 
   1809         memcpy(orig_pre, pd->pre, sizeof(orig_pre));
   1810         memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
   1811                sizeof(bsi->rdstat[i][mode_idx].ta));
   1812         memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
   1813                sizeof(bsi->rdstat[i][mode_idx].tl));
   1814 
   1815         // motion search for newmv (single predictor case only)
   1816         if (!has_second_rf && this_mode == NEWMV &&
   1817             seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
   1818           MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
   1819           int step_param = 0;
   1820           int thissme, bestsme = INT_MAX;
   1821           int sadpb = x->sadperbit4;
   1822           MV mvp_full;
   1823           int max_mv;
   1824           int cost_list[5];
   1825 
   1826           /* Is the best so far sufficiently good that we cant justify doing
   1827            * and new motion search. */
   1828           if (best_rd < label_mv_thresh)
   1829             break;
   1830 
   1831           if (cpi->oxcf.mode != BEST) {
   1832             // use previous block's result as next block's MV predictor.
   1833             if (i > 0) {
   1834               bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
   1835               if (i == 2)
   1836                 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
   1837             }
   1838           }
   1839           if (i == 0)
   1840             max_mv = x->max_mv_context[mbmi->ref_frame[0]];
   1841           else
   1842             max_mv =
   1843                 VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
   1844 
   1845           if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
   1846             // Take wtd average of the step_params based on the last frame's
   1847             // max mv magnitude and the best ref mvs of the current block for
   1848             // the given reference.
   1849             step_param = (vp9_init_search_range(max_mv) +
   1850                               cpi->mv_step_param) / 2;
   1851           } else {
   1852             step_param = cpi->mv_step_param;
   1853           }
   1854 
   1855           mvp_full.row = bsi->mvp.as_mv.row >> 3;
   1856           mvp_full.col = bsi->mvp.as_mv.col >> 3;
   1857 
   1858           if (cpi->sf.adaptive_motion_search) {
   1859             mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3;
   1860             mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3;
   1861             step_param = VPXMAX(step_param, 8);
   1862           }
   1863 
   1864           // adjust src pointer for this block
   1865           mi_buf_shift(x, i);
   1866 
   1867           vp9_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
   1868 
   1869           bestsme = vp9_full_pixel_search(
   1870               cpi, x, bsize, &mvp_full, step_param, sadpb,
   1871               cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
   1872               &bsi->ref_mv[0]->as_mv, new_mv,
   1873               INT_MAX, 1);
   1874 
   1875           // Should we do a full search (best quality only)
   1876           if (cpi->oxcf.mode == BEST) {
   1877             int_mv *const best_mv = &mi->bmi[i].as_mv[0];
   1878             /* Check if mvp_full is within the range. */
   1879             clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
   1880                      x->mv_row_min, x->mv_row_max);
   1881             thissme = cpi->full_search_sad(x, &mvp_full,
   1882                                            sadpb, 16, &cpi->fn_ptr[bsize],
   1883                                            &bsi->ref_mv[0]->as_mv,
   1884                                            &best_mv->as_mv);
   1885             cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
   1886             if (thissme < bestsme) {
   1887               bestsme = thissme;
   1888               *new_mv = best_mv->as_mv;
   1889             } else {
   1890               // The full search result is actually worse so re-instate the
   1891               // previous best vector
   1892               best_mv->as_mv = *new_mv;
   1893             }
   1894           }
   1895 
   1896           if (bestsme < INT_MAX) {
   1897             int distortion;
   1898             cpi->find_fractional_mv_step(
   1899                 x,
   1900                 new_mv,
   1901                 &bsi->ref_mv[0]->as_mv,
   1902                 cm->allow_high_precision_mv,
   1903                 x->errorperbit, &cpi->fn_ptr[bsize],
   1904                 cpi->sf.mv.subpel_force_stop,
   1905                 cpi->sf.mv.subpel_iters_per_step,
   1906                 cond_cost_list(cpi, cost_list),
   1907                 x->nmvjointcost, x->mvcost,
   1908                 &distortion,
   1909                 &x->pred_sse[mbmi->ref_frame[0]],
   1910                 NULL, 0, 0);
   1911 
   1912             // save motion search result for use in compound prediction
   1913             seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
   1914           }
   1915 
   1916           if (cpi->sf.adaptive_motion_search)
   1917             x->pred_mv[mbmi->ref_frame[0]] = *new_mv;
   1918 
   1919           // restore src pointers
   1920           mi_buf_restore(x, orig_src, orig_pre);
   1921         }
   1922 
   1923         if (has_second_rf) {
   1924           if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
   1925               seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
   1926             continue;
   1927         }
   1928 
   1929         if (has_second_rf && this_mode == NEWMV &&
   1930             mbmi->interp_filter == EIGHTTAP) {
   1931           // adjust src pointers
   1932           mi_buf_shift(x, i);
   1933           if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
   1934             int rate_mv;
   1935             joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
   1936                                 mi_row, mi_col, seg_mvs[i],
   1937                                 &rate_mv);
   1938             seg_mvs[i][mbmi->ref_frame[0]].as_int =
   1939                 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
   1940             seg_mvs[i][mbmi->ref_frame[1]].as_int =
   1941                 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
   1942           }
   1943           // restore src pointers
   1944           mi_buf_restore(x, orig_src, orig_pre);
   1945         }
   1946 
   1947         bsi->rdstat[i][mode_idx].brate =
   1948             set_and_cost_bmi_mvs(cpi, x, xd, i, this_mode, mode_mv[this_mode],
   1949                                  frame_mv, seg_mvs[i], bsi->ref_mv,
   1950                                  x->nmvjointcost, x->mvcost);
   1951 
   1952         for (ref = 0; ref < 1 + has_second_rf; ++ref) {
   1953           bsi->rdstat[i][mode_idx].mvs[ref].as_int =
   1954               mode_mv[this_mode][ref].as_int;
   1955           if (num_4x4_blocks_wide > 1)
   1956             bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
   1957                 mode_mv[this_mode][ref].as_int;
   1958           if (num_4x4_blocks_high > 1)
   1959             bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
   1960                 mode_mv[this_mode][ref].as_int;
   1961         }
   1962 
   1963         // Trap vectors that reach beyond the UMV borders
   1964         if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) ||
   1965             (has_second_rf &&
   1966              mv_check_bounds(x, &mode_mv[this_mode][1].as_mv)))
   1967           continue;
   1968 
   1969         if (filter_idx > 0) {
   1970           BEST_SEG_INFO *ref_bsi = bsi_buf;
   1971           subpelmv = 0;
   1972           have_ref = 1;
   1973 
   1974           for (ref = 0; ref < 1 + has_second_rf; ++ref) {
   1975             subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
   1976             have_ref &= mode_mv[this_mode][ref].as_int ==
   1977                 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
   1978           }
   1979 
   1980           if (filter_idx > 1 && !subpelmv && !have_ref) {
   1981             ref_bsi = bsi_buf + 1;
   1982             have_ref = 1;
   1983             for (ref = 0; ref < 1 + has_second_rf; ++ref)
   1984               have_ref &= mode_mv[this_mode][ref].as_int ==
   1985                   ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
   1986           }
   1987 
   1988           if (!subpelmv && have_ref &&
   1989               ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
   1990             memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
   1991                    sizeof(SEG_RDSTAT));
   1992             if (num_4x4_blocks_wide > 1)
   1993               bsi->rdstat[i + 1][mode_idx].eobs =
   1994                   ref_bsi->rdstat[i + 1][mode_idx].eobs;
   1995             if (num_4x4_blocks_high > 1)
   1996               bsi->rdstat[i + 2][mode_idx].eobs =
   1997                   ref_bsi->rdstat[i + 2][mode_idx].eobs;
   1998 
   1999             if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
   2000               mode_selected = this_mode;
   2001               best_rd = bsi->rdstat[i][mode_idx].brdcost;
   2002             }
   2003             continue;
   2004           }
   2005         }
   2006 
   2007         bsi->rdstat[i][mode_idx].brdcost =
   2008             encode_inter_mb_segment(cpi, x,
   2009                                     bsi->segment_rd - this_segment_rd, i,
   2010                                     &bsi->rdstat[i][mode_idx].byrate,
   2011                                     &bsi->rdstat[i][mode_idx].bdist,
   2012                                     &bsi->rdstat[i][mode_idx].bsse,
   2013                                     bsi->rdstat[i][mode_idx].ta,
   2014                                     bsi->rdstat[i][mode_idx].tl,
   2015                                     mi_row, mi_col);
   2016         if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
   2017           bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
   2018                                             bsi->rdstat[i][mode_idx].brate, 0);
   2019           bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
   2020           bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
   2021           if (num_4x4_blocks_wide > 1)
   2022             bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
   2023           if (num_4x4_blocks_high > 1)
   2024             bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
   2025         }
   2026 
   2027         if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
   2028           mode_selected = this_mode;
   2029           best_rd = bsi->rdstat[i][mode_idx].brdcost;
   2030         }
   2031       } /*for each 4x4 mode*/
   2032 
   2033       if (best_rd == INT64_MAX) {
   2034         int iy, midx;
   2035         for (iy = i + 1; iy < 4; ++iy)
   2036           for (midx = 0; midx < INTER_MODES; ++midx)
   2037             bsi->rdstat[iy][midx].brdcost = INT64_MAX;
   2038         bsi->segment_rd = INT64_MAX;
   2039         return INT64_MAX;
   2040       }
   2041 
   2042       mode_idx = INTER_OFFSET(mode_selected);
   2043       memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
   2044       memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
   2045 
   2046       set_and_cost_bmi_mvs(cpi, x, xd, i, mode_selected, mode_mv[mode_selected],
   2047                            frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
   2048                            x->mvcost);
   2049 
   2050       br += bsi->rdstat[i][mode_idx].brate;
   2051       bd += bsi->rdstat[i][mode_idx].bdist;
   2052       block_sse += bsi->rdstat[i][mode_idx].bsse;
   2053       segmentyrate += bsi->rdstat[i][mode_idx].byrate;
   2054       this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
   2055 
   2056       if (this_segment_rd > bsi->segment_rd) {
   2057         int iy, midx;
   2058         for (iy = i + 1; iy < 4; ++iy)
   2059           for (midx = 0; midx < INTER_MODES; ++midx)
   2060             bsi->rdstat[iy][midx].brdcost = INT64_MAX;
   2061         bsi->segment_rd = INT64_MAX;
   2062         return INT64_MAX;
   2063       }
   2064     }
   2065   } /* for each label */
   2066 
   2067   bsi->r = br;
   2068   bsi->d = bd;
   2069   bsi->segment_yrate = segmentyrate;
   2070   bsi->segment_rd = this_segment_rd;
   2071   bsi->sse = block_sse;
   2072 
   2073   // update the coding decisions
   2074   for (k = 0; k < 4; ++k)
   2075     bsi->modes[k] = mi->bmi[k].as_mode;
   2076 
   2077   if (bsi->segment_rd > best_rd)
   2078     return INT64_MAX;
   2079   /* set it to the best */
   2080   for (i = 0; i < 4; i++) {
   2081     mode_idx = INTER_OFFSET(bsi->modes[i]);
   2082     mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
   2083     if (has_second_ref(mbmi))
   2084       mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
   2085     x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
   2086     mi->bmi[i].as_mode = bsi->modes[i];
   2087   }
   2088 
   2089   /*
   2090    * used to set mbmi->mv.as_int
   2091    */
   2092   *returntotrate = bsi->r;
   2093   *returndistortion = bsi->d;
   2094   *returnyrate = bsi->segment_yrate;
   2095   *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0);
   2096   *psse = bsi->sse;
   2097   mbmi->mode = bsi->modes[3];
   2098 
   2099   return bsi->segment_rd;
   2100 }
   2101 
   2102 static void estimate_ref_frame_costs(const VP9_COMMON *cm,
   2103                                      const MACROBLOCKD *xd,
   2104                                      int segment_id,
   2105                                      unsigned int *ref_costs_single,
   2106                                      unsigned int *ref_costs_comp,
   2107                                      vpx_prob *comp_mode_p) {
   2108   int seg_ref_active = segfeature_active(&cm->seg, segment_id,
   2109                                          SEG_LVL_REF_FRAME);
   2110   if (seg_ref_active) {
   2111     memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
   2112     memset(ref_costs_comp,   0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
   2113     *comp_mode_p = 128;
   2114   } else {
   2115     vpx_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
   2116     vpx_prob comp_inter_p = 128;
   2117 
   2118     if (cm->reference_mode == REFERENCE_MODE_SELECT) {
   2119       comp_inter_p = vp9_get_reference_mode_prob(cm, xd);
   2120       *comp_mode_p = comp_inter_p;
   2121     } else {
   2122       *comp_mode_p = 128;
   2123     }
   2124 
   2125     ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
   2126 
   2127     if (cm->reference_mode != COMPOUND_REFERENCE) {
   2128       vpx_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
   2129       vpx_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
   2130       unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
   2131 
   2132       if (cm->reference_mode == REFERENCE_MODE_SELECT)
   2133         base_cost += vp9_cost_bit(comp_inter_p, 0);
   2134 
   2135       ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
   2136           ref_costs_single[ALTREF_FRAME] = base_cost;
   2137       ref_costs_single[LAST_FRAME]   += vp9_cost_bit(ref_single_p1, 0);
   2138       ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
   2139       ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
   2140       ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
   2141       ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
   2142     } else {
   2143       ref_costs_single[LAST_FRAME]   = 512;
   2144       ref_costs_single[GOLDEN_FRAME] = 512;
   2145       ref_costs_single[ALTREF_FRAME] = 512;
   2146     }
   2147     if (cm->reference_mode != SINGLE_REFERENCE) {
   2148       vpx_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
   2149       unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
   2150 
   2151       if (cm->reference_mode == REFERENCE_MODE_SELECT)
   2152         base_cost += vp9_cost_bit(comp_inter_p, 1);
   2153 
   2154       ref_costs_comp[LAST_FRAME]   = base_cost + vp9_cost_bit(ref_comp_p, 0);
   2155       ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
   2156     } else {
   2157       ref_costs_comp[LAST_FRAME]   = 512;
   2158       ref_costs_comp[GOLDEN_FRAME] = 512;
   2159     }
   2160   }
   2161 }
   2162 
   2163 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
   2164                          int mode_index,
   2165                          int64_t comp_pred_diff[REFERENCE_MODES],
   2166                          int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS],
   2167                          int skippable) {
   2168   MACROBLOCKD *const xd = &x->e_mbd;
   2169 
   2170   // Take a snapshot of the coding context so it can be
   2171   // restored if we decide to encode this way
   2172   ctx->skip = x->skip;
   2173   ctx->skippable = skippable;
   2174   ctx->best_mode_index = mode_index;
   2175   ctx->mic = *xd->mi[0];
   2176   ctx->mbmi_ext = *x->mbmi_ext;
   2177   ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
   2178   ctx->comp_pred_diff   = (int)comp_pred_diff[COMPOUND_REFERENCE];
   2179   ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
   2180 
   2181   memcpy(ctx->best_filter_diff, best_filter_diff,
   2182          sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
   2183 }
   2184 
   2185 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
   2186                                MV_REFERENCE_FRAME ref_frame,
   2187                                BLOCK_SIZE block_size,
   2188                                int mi_row, int mi_col,
   2189                                int_mv frame_nearest_mv[MAX_REF_FRAMES],
   2190                                int_mv frame_near_mv[MAX_REF_FRAMES],
   2191                                struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
   2192   const VP9_COMMON *cm = &cpi->common;
   2193   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
   2194   MACROBLOCKD *const xd = &x->e_mbd;
   2195   MODE_INFO *const mi = xd->mi[0];
   2196   int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
   2197   const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
   2198   MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
   2199 
   2200   assert(yv12 != NULL);
   2201 
   2202   // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
   2203   // use the UV scaling factors.
   2204   vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
   2205 
   2206   // Gets an initial list of candidate vectors from neighbours and orders them
   2207   vp9_find_mv_refs(cm, xd, mi, ref_frame, candidates, mi_row, mi_col,
   2208                    NULL, NULL, mbmi_ext->mode_context);
   2209 
   2210   // Candidate refinement carried out at encoder and decoder
   2211   vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
   2212                         &frame_nearest_mv[ref_frame],
   2213                         &frame_near_mv[ref_frame]);
   2214 
   2215   // Further refinement that is encode side only to test the top few candidates
   2216   // in full and choose the best as the centre point for subsequent searches.
   2217   // The current implementation doesn't support scaling.
   2218   if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8)
   2219     vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
   2220                 ref_frame, block_size);
   2221 }
   2222 
   2223 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
   2224                                  BLOCK_SIZE bsize,
   2225                                  int mi_row, int mi_col,
   2226                                  int_mv *tmp_mv, int *rate_mv) {
   2227   MACROBLOCKD *xd = &x->e_mbd;
   2228   const VP9_COMMON *cm = &cpi->common;
   2229   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   2230   struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
   2231   int bestsme = INT_MAX;
   2232   int step_param;
   2233   int sadpb = x->sadperbit16;
   2234   MV mvp_full;
   2235   int ref = mbmi->ref_frame[0];
   2236   MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
   2237 
   2238   int tmp_col_min = x->mv_col_min;
   2239   int tmp_col_max = x->mv_col_max;
   2240   int tmp_row_min = x->mv_row_min;
   2241   int tmp_row_max = x->mv_row_max;
   2242   int cost_list[5];
   2243 
   2244   const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
   2245                                                                         ref);
   2246 
   2247   MV pred_mv[3];
   2248   pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
   2249   pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
   2250   pred_mv[2] = x->pred_mv[ref];
   2251 
   2252   if (scaled_ref_frame) {
   2253     int i;
   2254     // Swap out the reference frame for a version that's been scaled to
   2255     // match the resolution of the current frame, allowing the existing
   2256     // motion search code to be used without additional modifications.
   2257     for (i = 0; i < MAX_MB_PLANE; i++)
   2258       backup_yv12[i] = xd->plane[i].pre[0];
   2259 
   2260     vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
   2261   }
   2262 
   2263   vp9_set_mv_search_range(x, &ref_mv);
   2264 
   2265   // Work out the size of the first step in the mv step search.
   2266   // 0 here is maximum length first step. 1 is VPXMAX >> 1 etc.
   2267   if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
   2268     // Take wtd average of the step_params based on the last frame's
   2269     // max mv magnitude and that based on the best ref mvs of the current
   2270     // block for the given reference.
   2271     step_param = (vp9_init_search_range(x->max_mv_context[ref]) +
   2272                     cpi->mv_step_param) / 2;
   2273   } else {
   2274     step_param = cpi->mv_step_param;
   2275   }
   2276 
   2277   if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
   2278     int boffset =
   2279         2 * (b_width_log2_lookup[BLOCK_64X64] -
   2280              VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
   2281     step_param = VPXMAX(step_param, boffset);
   2282   }
   2283 
   2284   if (cpi->sf.adaptive_motion_search) {
   2285     int bwl = b_width_log2_lookup[bsize];
   2286     int bhl = b_height_log2_lookup[bsize];
   2287     int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
   2288 
   2289     if (tlevel < 5)
   2290       step_param += 2;
   2291 
   2292     // prev_mv_sad is not setup for dynamically scaled frames.
   2293     if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
   2294       int i;
   2295       for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
   2296         if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
   2297           x->pred_mv[ref].row = 0;
   2298           x->pred_mv[ref].col = 0;
   2299           tmp_mv->as_int = INVALID_MV;
   2300 
   2301           if (scaled_ref_frame) {
   2302             int i;
   2303             for (i = 0; i < MAX_MB_PLANE; ++i)
   2304               xd->plane[i].pre[0] = backup_yv12[i];
   2305           }
   2306           return;
   2307         }
   2308       }
   2309     }
   2310   }
   2311 
   2312   mvp_full = pred_mv[x->mv_best_ref_index[ref]];
   2313 
   2314   mvp_full.col >>= 3;
   2315   mvp_full.row >>= 3;
   2316 
   2317   bestsme = vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
   2318                                   cond_cost_list(cpi, cost_list),
   2319                                   &ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
   2320 
   2321   x->mv_col_min = tmp_col_min;
   2322   x->mv_col_max = tmp_col_max;
   2323   x->mv_row_min = tmp_row_min;
   2324   x->mv_row_max = tmp_row_max;
   2325 
   2326   if (bestsme < INT_MAX) {
   2327     int dis;  /* TODO: use dis in distortion calculation later. */
   2328     cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
   2329                                  cm->allow_high_precision_mv,
   2330                                  x->errorperbit,
   2331                                  &cpi->fn_ptr[bsize],
   2332                                  cpi->sf.mv.subpel_force_stop,
   2333                                  cpi->sf.mv.subpel_iters_per_step,
   2334                                  cond_cost_list(cpi, cost_list),
   2335                                  x->nmvjointcost, x->mvcost,
   2336                                  &dis, &x->pred_sse[ref], NULL, 0, 0);
   2337   }
   2338   *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
   2339                              x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
   2340 
   2341   if (cpi->sf.adaptive_motion_search)
   2342     x->pred_mv[ref] = tmp_mv->as_mv;
   2343 
   2344   if (scaled_ref_frame) {
   2345     int i;
   2346     for (i = 0; i < MAX_MB_PLANE; i++)
   2347       xd->plane[i].pre[0] = backup_yv12[i];
   2348   }
   2349 }
   2350 
   2351 
   2352 
   2353 static INLINE void restore_dst_buf(MACROBLOCKD *xd,
   2354                                    uint8_t *orig_dst[MAX_MB_PLANE],
   2355                                    int orig_dst_stride[MAX_MB_PLANE]) {
   2356   int i;
   2357   for (i = 0; i < MAX_MB_PLANE; i++) {
   2358     xd->plane[i].dst.buf = orig_dst[i];
   2359     xd->plane[i].dst.stride = orig_dst_stride[i];
   2360   }
   2361 }
   2362 
   2363 // In some situations we want to discount tha pparent cost of a new motion
   2364 // vector. Where there is a subtle motion field and especially where there is
   2365 // low spatial complexity then it can be hard to cover the cost of a new motion
   2366 // vector in a single block, even if that motion vector reduces distortion.
   2367 // However, once established that vector may be usable through the nearest and
   2368 // near mv modes to reduce distortion in subsequent blocks and also improve
   2369 // visual quality.
   2370 static int discount_newmv_test(const VP9_COMP *cpi,
   2371                                int this_mode,
   2372                                int_mv this_mv,
   2373                                int_mv (*mode_mv)[MAX_REF_FRAMES],
   2374                                int ref_frame) {
   2375   return (!cpi->rc.is_src_frame_alt_ref &&
   2376           (this_mode == NEWMV) &&
   2377           (this_mv.as_int != 0) &&
   2378           ((mode_mv[NEARESTMV][ref_frame].as_int == 0) ||
   2379            (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) &&
   2380           ((mode_mv[NEARMV][ref_frame].as_int == 0) ||
   2381            (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV)));
   2382 }
   2383 
   2384 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   2385                                  BLOCK_SIZE bsize,
   2386                                  int *rate2, int64_t *distortion,
   2387                                  int *skippable,
   2388                                  int *rate_y, int *rate_uv,
   2389                                  int *disable_skip,
   2390                                  int_mv (*mode_mv)[MAX_REF_FRAMES],
   2391                                  int mi_row, int mi_col,
   2392                                  int_mv single_newmv[MAX_REF_FRAMES],
   2393                                  INTERP_FILTER (*single_filter)[MAX_REF_FRAMES],
   2394                                  int (*single_skippable)[MAX_REF_FRAMES],
   2395                                  int64_t *psse,
   2396                                  const int64_t ref_best_rd,
   2397                                  int64_t *mask_filter,
   2398                                  int64_t filter_cache[]) {
   2399   VP9_COMMON *cm = &cpi->common;
   2400   MACROBLOCKD *xd = &x->e_mbd;
   2401   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   2402   MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
   2403   const int is_comp_pred = has_second_ref(mbmi);
   2404   const int this_mode = mbmi->mode;
   2405   int_mv *frame_mv = mode_mv[this_mode];
   2406   int i;
   2407   int refs[2] = { mbmi->ref_frame[0],
   2408     (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
   2409   int_mv cur_mv[2];
   2410 #if CONFIG_VP9_HIGHBITDEPTH
   2411   DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]);
   2412   uint8_t *tmp_buf;
   2413 #else
   2414   DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]);
   2415 #endif  // CONFIG_VP9_HIGHBITDEPTH
   2416   int pred_exists = 0;
   2417   int intpel_mv;
   2418   int64_t rd, tmp_rd, best_rd = INT64_MAX;
   2419   int best_needs_copy = 0;
   2420   uint8_t *orig_dst[MAX_MB_PLANE];
   2421   int orig_dst_stride[MAX_MB_PLANE];
   2422   int rs = 0;
   2423   INTERP_FILTER best_filter = SWITCHABLE;
   2424   uint8_t skip_txfm[MAX_MB_PLANE << 2] = {0};
   2425   int64_t bsse[MAX_MB_PLANE << 2] = {0};
   2426 
   2427   int bsl = mi_width_log2_lookup[bsize];
   2428   int pred_filter_search = cpi->sf.cb_pred_filter_search ?
   2429       (((mi_row + mi_col) >> bsl) +
   2430        get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
   2431 
   2432   int skip_txfm_sb = 0;
   2433   int64_t skip_sse_sb = INT64_MAX;
   2434   int64_t distortion_y = 0, distortion_uv = 0;
   2435 
   2436 #if CONFIG_VP9_HIGHBITDEPTH
   2437   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
   2438     tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16);
   2439   } else {
   2440     tmp_buf = (uint8_t *)tmp_buf16;
   2441   }
   2442 #endif  // CONFIG_VP9_HIGHBITDEPTH
   2443 
   2444   if (pred_filter_search) {
   2445     INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
   2446     if (xd->up_available)
   2447       af = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
   2448     if (xd->left_available)
   2449       lf = xd->mi[-1]->mbmi.interp_filter;
   2450 
   2451     if ((this_mode != NEWMV) || (af == lf))
   2452       best_filter = af;
   2453   }
   2454 
   2455   if (is_comp_pred) {
   2456     if (frame_mv[refs[0]].as_int == INVALID_MV ||
   2457         frame_mv[refs[1]].as_int == INVALID_MV)
   2458       return INT64_MAX;
   2459 
   2460     if (cpi->sf.adaptive_mode_search) {
   2461       if (single_filter[this_mode][refs[0]] ==
   2462           single_filter[this_mode][refs[1]])
   2463         best_filter = single_filter[this_mode][refs[0]];
   2464     }
   2465   }
   2466 
   2467   if (this_mode == NEWMV) {
   2468     int rate_mv;
   2469     if (is_comp_pred) {
   2470       // Initialize mv using single prediction mode result.
   2471       frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
   2472       frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
   2473 
   2474       if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
   2475         joint_motion_search(cpi, x, bsize, frame_mv,
   2476                             mi_row, mi_col, single_newmv, &rate_mv);
   2477       } else {
   2478         rate_mv  = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
   2479                                    &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
   2480                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
   2481         rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
   2482                                    &x->mbmi_ext->ref_mvs[refs[1]][0].as_mv,
   2483                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
   2484       }
   2485       *rate2 += rate_mv;
   2486     } else {
   2487       int_mv tmp_mv;
   2488       single_motion_search(cpi, x, bsize, mi_row, mi_col,
   2489                            &tmp_mv, &rate_mv);
   2490       if (tmp_mv.as_int == INVALID_MV)
   2491         return INT64_MAX;
   2492 
   2493       frame_mv[refs[0]].as_int =
   2494           xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
   2495       single_newmv[refs[0]].as_int = tmp_mv.as_int;
   2496 
   2497       // Estimate the rate implications of a new mv but discount this
   2498       // under certain circumstances where we want to help initiate a weak
   2499       // motion field, where the distortion gain for a single block may not
   2500       // be enough to overcome the cost of a new mv.
   2501       if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0])) {
   2502         *rate2 += VPXMAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
   2503       } else {
   2504         *rate2 += rate_mv;
   2505       }
   2506     }
   2507   }
   2508 
   2509   for (i = 0; i < is_comp_pred + 1; ++i) {
   2510     cur_mv[i] = frame_mv[refs[i]];
   2511     // Clip "next_nearest" so that it does not extend to far out of image
   2512     if (this_mode != NEWMV)
   2513       clamp_mv2(&cur_mv[i].as_mv, xd);
   2514 
   2515     if (mv_check_bounds(x, &cur_mv[i].as_mv))
   2516       return INT64_MAX;
   2517     mbmi->mv[i].as_int = cur_mv[i].as_int;
   2518   }
   2519 
   2520   // do first prediction into the destination buffer. Do the next
   2521   // prediction into a temporary buffer. Then keep track of which one
   2522   // of these currently holds the best predictor, and use the other
   2523   // one for future predictions. In the end, copy from tmp_buf to
   2524   // dst if necessary.
   2525   for (i = 0; i < MAX_MB_PLANE; i++) {
   2526     orig_dst[i] = xd->plane[i].dst.buf;
   2527     orig_dst_stride[i] = xd->plane[i].dst.stride;
   2528   }
   2529 
   2530   // We don't include the cost of the second reference here, because there
   2531   // are only two options: Last/ARF or Golden/ARF; The second one is always
   2532   // known, which is ARF.
   2533   //
   2534   // Under some circumstances we discount the cost of new mv mode to encourage
   2535   // initiation of a motion field.
   2536   if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]],
   2537                           mode_mv, refs[0])) {
   2538     *rate2 += VPXMIN(cost_mv_ref(cpi, this_mode,
   2539                                  mbmi_ext->mode_context[refs[0]]),
   2540                      cost_mv_ref(cpi, NEARESTMV,
   2541                                  mbmi_ext->mode_context[refs[0]]));
   2542   } else {
   2543     *rate2 += cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]);
   2544   }
   2545 
   2546   if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd &&
   2547       mbmi->mode != NEARESTMV)
   2548     return INT64_MAX;
   2549 
   2550   pred_exists = 0;
   2551   // Are all MVs integer pel for Y and UV
   2552   intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
   2553   if (is_comp_pred)
   2554     intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
   2555 
   2556   // Search for best switchable filter by checking the variance of
   2557   // pred error irrespective of whether the filter will be used
   2558   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
   2559     filter_cache[i] = INT64_MAX;
   2560 
   2561   if (cm->interp_filter != BILINEAR) {
   2562     if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
   2563       best_filter = EIGHTTAP;
   2564     } else if (best_filter == SWITCHABLE) {
   2565       int newbest;
   2566       int tmp_rate_sum = 0;
   2567       int64_t tmp_dist_sum = 0;
   2568 
   2569       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
   2570         int j;
   2571         int64_t rs_rd;
   2572         int tmp_skip_sb = 0;
   2573         int64_t tmp_skip_sse = INT64_MAX;
   2574 
   2575         mbmi->interp_filter = i;
   2576         rs = vp9_get_switchable_rate(cpi, xd);
   2577         rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
   2578 
   2579         if (i > 0 && intpel_mv) {
   2580           rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
   2581           filter_cache[i] = rd;
   2582           filter_cache[SWITCHABLE_FILTERS] =
   2583               VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
   2584           if (cm->interp_filter == SWITCHABLE)
   2585             rd += rs_rd;
   2586           *mask_filter = VPXMAX(*mask_filter, rd);
   2587         } else {
   2588           int rate_sum = 0;
   2589           int64_t dist_sum = 0;
   2590           if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
   2591               (cpi->sf.interp_filter_search_mask & (1 << i))) {
   2592             rate_sum = INT_MAX;
   2593             dist_sum = INT64_MAX;
   2594             continue;
   2595           }
   2596 
   2597           if ((cm->interp_filter == SWITCHABLE &&
   2598                (!i || best_needs_copy)) ||
   2599               (cm->interp_filter != SWITCHABLE &&
   2600                (cm->interp_filter == mbmi->interp_filter ||
   2601                 (i == 0 && intpel_mv)))) {
   2602             restore_dst_buf(xd, orig_dst, orig_dst_stride);
   2603           } else {
   2604             for (j = 0; j < MAX_MB_PLANE; j++) {
   2605               xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
   2606               xd->plane[j].dst.stride = 64;
   2607             }
   2608           }
   2609           vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
   2610           model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
   2611                           &tmp_skip_sb, &tmp_skip_sse);
   2612 
   2613           rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
   2614           filter_cache[i] = rd;
   2615           filter_cache[SWITCHABLE_FILTERS] =
   2616               VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
   2617           if (cm->interp_filter == SWITCHABLE)
   2618             rd += rs_rd;
   2619           *mask_filter = VPXMAX(*mask_filter, rd);
   2620 
   2621           if (i == 0 && intpel_mv) {
   2622             tmp_rate_sum = rate_sum;
   2623             tmp_dist_sum = dist_sum;
   2624           }
   2625         }
   2626 
   2627         if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
   2628           if (rd / 2 > ref_best_rd) {
   2629             restore_dst_buf(xd, orig_dst, orig_dst_stride);
   2630             return INT64_MAX;
   2631           }
   2632         }
   2633         newbest = i == 0 || rd < best_rd;
   2634 
   2635         if (newbest) {
   2636           best_rd = rd;
   2637           best_filter = mbmi->interp_filter;
   2638           if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
   2639             best_needs_copy = !best_needs_copy;
   2640         }
   2641 
   2642         if ((cm->interp_filter == SWITCHABLE && newbest) ||
   2643             (cm->interp_filter != SWITCHABLE &&
   2644              cm->interp_filter == mbmi->interp_filter)) {
   2645           pred_exists = 1;
   2646           tmp_rd = best_rd;
   2647 
   2648           skip_txfm_sb = tmp_skip_sb;
   2649           skip_sse_sb = tmp_skip_sse;
   2650           memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
   2651           memcpy(bsse, x->bsse, sizeof(bsse));
   2652         }
   2653       }
   2654       restore_dst_buf(xd, orig_dst, orig_dst_stride);
   2655     }
   2656   }
   2657   // Set the appropriate filter
   2658   mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
   2659       cm->interp_filter : best_filter;
   2660   rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi, xd) : 0;
   2661 
   2662   if (pred_exists) {
   2663     if (best_needs_copy) {
   2664       // again temporarily set the buffers to local memory to prevent a memcpy
   2665       for (i = 0; i < MAX_MB_PLANE; i++) {
   2666         xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
   2667         xd->plane[i].dst.stride = 64;
   2668       }
   2669     }
   2670     rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0);
   2671   } else {
   2672     int tmp_rate;
   2673     int64_t tmp_dist;
   2674     // Handles the special case when a filter that is not in the
   2675     // switchable list (ex. bilinear) is indicated at the frame level, or
   2676     // skip condition holds.
   2677     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
   2678     model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist,
   2679                     &skip_txfm_sb, &skip_sse_sb);
   2680     rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
   2681     memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
   2682     memcpy(bsse, x->bsse, sizeof(bsse));
   2683   }
   2684 
   2685   if (!is_comp_pred)
   2686     single_filter[this_mode][refs[0]] = mbmi->interp_filter;
   2687 
   2688   if (cpi->sf.adaptive_mode_search)
   2689     if (is_comp_pred)
   2690       if (single_skippable[this_mode][refs[0]] &&
   2691           single_skippable[this_mode][refs[1]])
   2692         memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
   2693 
   2694   if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
   2695     // if current pred_error modeled rd is substantially more than the best
   2696     // so far, do not bother doing full rd
   2697     if (rd / 2 > ref_best_rd) {
   2698       restore_dst_buf(xd, orig_dst, orig_dst_stride);
   2699       return INT64_MAX;
   2700     }
   2701   }
   2702 
   2703   if (cm->interp_filter == SWITCHABLE)
   2704     *rate2 += rs;
   2705 
   2706   memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
   2707   memcpy(x->bsse, bsse, sizeof(bsse));
   2708 
   2709   if (!skip_txfm_sb) {
   2710     int skippable_y, skippable_uv;
   2711     int64_t sseuv = INT64_MAX;
   2712     int64_t rdcosty = INT64_MAX;
   2713 
   2714     // Y cost and distortion
   2715     vp9_subtract_plane(x, bsize, 0);
   2716     super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
   2717                     bsize, ref_best_rd);
   2718 
   2719     if (*rate_y == INT_MAX) {
   2720       *rate2 = INT_MAX;
   2721       *distortion = INT64_MAX;
   2722       restore_dst_buf(xd, orig_dst, orig_dst_stride);
   2723       return INT64_MAX;
   2724     }
   2725 
   2726     *rate2 += *rate_y;
   2727     *distortion += distortion_y;
   2728 
   2729     rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
   2730     rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
   2731 
   2732     if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
   2733                           &sseuv, bsize, ref_best_rd - rdcosty)) {
   2734       *rate2 = INT_MAX;
   2735       *distortion = INT64_MAX;
   2736       restore_dst_buf(xd, orig_dst, orig_dst_stride);
   2737       return INT64_MAX;
   2738     }
   2739 
   2740     *psse += sseuv;
   2741     *rate2 += *rate_uv;
   2742     *distortion += distortion_uv;
   2743     *skippable = skippable_y && skippable_uv;
   2744   } else {
   2745     x->skip = 1;
   2746     *disable_skip = 1;
   2747 
   2748     // The cost of skip bit needs to be added.
   2749     *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
   2750 
   2751     *distortion = skip_sse_sb;
   2752   }
   2753 
   2754   if (!is_comp_pred)
   2755     single_skippable[this_mode][refs[0]] = *skippable;
   2756 
   2757   restore_dst_buf(xd, orig_dst, orig_dst_stride);
   2758   return 0;  // The rate-distortion cost will be re-calculated by caller.
   2759 }
   2760 
   2761 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   2762                                RD_COST *rd_cost, BLOCK_SIZE bsize,
   2763                                PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
   2764   VP9_COMMON *const cm = &cpi->common;
   2765   MACROBLOCKD *const xd = &x->e_mbd;
   2766   struct macroblockd_plane *const pd = xd->plane;
   2767   int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
   2768   int y_skip = 0, uv_skip = 0;
   2769   int64_t dist_y = 0, dist_uv = 0;
   2770   TX_SIZE max_uv_tx_size;
   2771   x->skip_encode = 0;
   2772   ctx->skip = 0;
   2773   xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
   2774   xd->mi[0]->mbmi.ref_frame[1] = NONE;
   2775 
   2776   if (bsize >= BLOCK_8X8) {
   2777     if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
   2778                                &dist_y, &y_skip, bsize,
   2779                                best_rd) >= best_rd) {
   2780       rd_cost->rate = INT_MAX;
   2781       return;
   2782     }
   2783   } else {
   2784     y_skip = 0;
   2785     if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
   2786                                      &dist_y, best_rd) >= best_rd) {
   2787       rd_cost->rate = INT_MAX;
   2788       return;
   2789     }
   2790   }
   2791   max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize,
   2792                                        pd[1].subsampling_x,
   2793                                        pd[1].subsampling_y);
   2794   rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
   2795                           &dist_uv, &uv_skip, VPXMAX(BLOCK_8X8, bsize),
   2796                           max_uv_tx_size);
   2797 
   2798   if (y_skip && uv_skip) {
   2799     rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
   2800                     vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
   2801     rd_cost->dist = dist_y + dist_uv;
   2802   } else {
   2803     rd_cost->rate = rate_y + rate_uv +
   2804                       vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
   2805     rd_cost->dist = dist_y + dist_uv;
   2806   }
   2807 
   2808   ctx->mic = *xd->mi[0];
   2809   ctx->mbmi_ext = *x->mbmi_ext;
   2810   rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
   2811 }
   2812 
   2813 // This function is designed to apply a bias or adjustment to an rd value based
   2814 // on the relative variance of the source and reconstruction.
   2815 #define LOW_VAR_THRESH 16
   2816 #define VLOW_ADJ_MAX 25
   2817 #define VHIGH_ADJ_MAX 8
   2818 static void rd_variance_adjustment(VP9_COMP *cpi,
   2819                                    MACROBLOCK *x,
   2820                                    BLOCK_SIZE bsize,
   2821                                    int64_t *this_rd,
   2822                                    MV_REFERENCE_FRAME ref_frame,
   2823                                    unsigned int source_variance) {
   2824   MACROBLOCKD *const xd = &x->e_mbd;
   2825   unsigned int recon_variance;
   2826   unsigned int absvar_diff = 0;
   2827   int64_t var_error = 0;
   2828   int64_t var_factor = 0;
   2829 
   2830   if (*this_rd == INT64_MAX)
   2831     return;
   2832 
   2833 #if CONFIG_VP9_HIGHBITDEPTH
   2834   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
   2835     recon_variance =
   2836       vp9_high_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize, xd->bd);
   2837   } else {
   2838     recon_variance =
   2839       vp9_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
   2840   }
   2841 #else
   2842   recon_variance =
   2843     vp9_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
   2844 #endif  // CONFIG_VP9_HIGHBITDEPTH
   2845 
   2846   if ((source_variance + recon_variance) > LOW_VAR_THRESH) {
   2847     absvar_diff = (source_variance > recon_variance)
   2848       ? (source_variance - recon_variance)
   2849       : (recon_variance - source_variance);
   2850 
   2851     var_error = (200 * source_variance * recon_variance) /
   2852       ((source_variance * source_variance) +
   2853        (recon_variance * recon_variance));
   2854     var_error = 100 - var_error;
   2855   }
   2856 
   2857   // Source variance above a threshold and ref frame is intra.
   2858   // This case is targeted mainly at discouraging intra modes that give rise
   2859   // to a predictor with a low spatial complexity compared to the source.
   2860   if ((source_variance > LOW_VAR_THRESH) && (ref_frame == INTRA_FRAME) &&
   2861       (source_variance > recon_variance)) {
   2862     var_factor = VPXMIN(absvar_diff, VPXMIN(VLOW_ADJ_MAX, var_error));
   2863   // A second possible case of interest is where the source variance
   2864   // is very low and we wish to discourage false texture or motion trails.
   2865   } else if ((source_variance < (LOW_VAR_THRESH >> 1)) &&
   2866              (recon_variance > source_variance)) {
   2867     var_factor = VPXMIN(absvar_diff, VPXMIN(VHIGH_ADJ_MAX, var_error));
   2868   }
   2869   *this_rd += (*this_rd * var_factor) / 100;
   2870 }
   2871 
   2872 
   2873 // Do we have an internal image edge (e.g. formatting bars).
   2874 int vp9_internal_image_edge(VP9_COMP *cpi) {
   2875   return (cpi->oxcf.pass == 2) &&
   2876     ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
   2877     (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
   2878 }
   2879 
   2880 // Checks to see if a super block is on a horizontal image edge.
   2881 // In most cases this is the "real" edge unless there are formatting
   2882 // bars embedded in the stream.
   2883 int vp9_active_h_edge(VP9_COMP *cpi, int mi_row, int mi_step) {
   2884   int top_edge = 0;
   2885   int b