Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <assert.h>
     12 #include <stdio.h>
     13 #include <math.h>
     14 #include <limits.h>
     15 #include <assert.h>
     16 #include "vpx_config.h"
     17 #include "vp8_rtcd.h"
     18 #include "./vpx_dsp_rtcd.h"
     19 #include "encodeframe.h"
     20 #include "tokenize.h"
     21 #include "treewriter.h"
     22 #include "onyx_int.h"
     23 #include "modecosts.h"
     24 #include "encodeintra.h"
     25 #include "pickinter.h"
     26 #include "vp8/common/common.h"
     27 #include "vp8/common/entropymode.h"
     28 #include "vp8/common/reconinter.h"
     29 #include "vp8/common/reconintra.h"
     30 #include "vp8/common/reconintra4x4.h"
     31 #include "vp8/common/findnearmv.h"
     32 #include "vp8/common/quant_common.h"
     33 #include "encodemb.h"
     34 #include "vp8/encoder/quantize.h"
     35 #include "vpx_dsp/variance.h"
     36 #include "vpx_ports/system_state.h"
     37 #include "mcomp.h"
     38 #include "rdopt.h"
     39 #include "vpx_mem/vpx_mem.h"
     40 #include "vp8/common/systemdependent.h"
     41 #if CONFIG_TEMPORAL_DENOISING
     42 #include "denoising.h"
     43 #endif
     44 extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
     45 
     46 #define MAXF(a, b) (((a) > (b)) ? (a) : (b))
     47 
     48 typedef struct rate_distortion_struct {
     49   int rate2;
     50   int rate_y;
     51   int rate_uv;
     52   int distortion2;
     53   int distortion_uv;
     54 } RATE_DISTORTION;
     55 
     56 typedef struct best_mode_struct {
     57   int yrd;
     58   int rd;
     59   int intra_rd;
     60   MB_MODE_INFO mbmode;
     61   union b_mode_info bmodes[16];
     62   PARTITION_INFO partition;
     63 } BEST_MODE;
     64 
     65 static const int auto_speed_thresh[17] = { 1000, 200, 150, 130, 150, 125,
     66                                            120,  115, 115, 115, 115, 115,
     67                                            115,  115, 115, 115, 105 };
     68 
     69 const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES] = {
     70   ZEROMV,    DC_PRED,
     71 
     72   NEARESTMV, NEARMV,
     73 
     74   ZEROMV,    NEARESTMV,
     75 
     76   ZEROMV,    NEARESTMV,
     77 
     78   NEARMV,    NEARMV,
     79 
     80   V_PRED,    H_PRED,    TM_PRED,
     81 
     82   NEWMV,     NEWMV,     NEWMV,
     83 
     84   SPLITMV,   SPLITMV,   SPLITMV,
     85 
     86   B_PRED,
     87 };
     88 
     89 /* This table determines the search order in reference frame priority order,
     90  * which may not necessarily match INTRA,LAST,GOLDEN,ARF
     91  */
     92 const int vp8_ref_frame_order[MAX_MODES] = {
     93   1, 0,
     94 
     95   1, 1,
     96 
     97   2, 2,
     98 
     99   3, 3,
    100 
    101   2, 3,
    102 
    103   0, 0, 0,
    104 
    105   1, 2, 3,
    106 
    107   1, 2, 3,
    108 
    109   0,
    110 };
    111 
    112 static void fill_token_costs(
    113     int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
    114     const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
    115                     [ENTROPY_NODES]) {
    116   int i, j, k;
    117 
    118   for (i = 0; i < BLOCK_TYPES; ++i) {
    119     for (j = 0; j < COEF_BANDS; ++j) {
    120       for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
    121         /* check for pt=0 and band > 1 if block type 0
    122          * and 0 if blocktype 1
    123          */
    124         if (k == 0 && j > (i == 0)) {
    125           vp8_cost_tokens2(c[i][j][k], p[i][j][k], vp8_coef_tree, 2);
    126         } else {
    127           vp8_cost_tokens(c[i][j][k], p[i][j][k], vp8_coef_tree);
    128         }
    129       }
    130     }
    131   }
    132 }
    133 
    134 static const int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0,
    135                                      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    136                                      0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
    137 
    138 /* values are now correlated to quantizer */
    139 static const int sad_per_bit16lut[QINDEX_RANGE] = {
    140   2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,
    141   3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,
    142   4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  6,  6,  6,
    143   6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
    144   7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,
    145   9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11,
    146   11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14
    147 };
    148 static const int sad_per_bit4lut[QINDEX_RANGE] = {
    149   2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
    150   3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  6,  6,
    151   6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,
    152   7,  7,  7,  7,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10,
    153   10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12,
    154   12, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
    155   16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 20,
    156 };
    157 
    158 void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex) {
    159   cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex];
    160   cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex];
    161 }
    162 
    163 void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue) {
    164   int q;
    165   int i;
    166   double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
    167   double rdconst = 2.80;
    168 
    169   vpx_clear_system_state();
    170 
    171   /* Further tests required to see if optimum is different
    172    * for key frames, golden frames and arf frames.
    173    */
    174   cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
    175 
    176   /* Extend rate multiplier along side quantizer zbin increases */
    177   if (cpi->mb.zbin_over_quant > 0) {
    178     double oq_factor;
    179     double modq;
    180 
    181     /* Experimental code using the same basic equation as used for Q above
    182      * The units of cpi->mb.zbin_over_quant are 1/128 of Q bin size
    183      */
    184     oq_factor = 1.0 + ((double)0.0015625 * cpi->mb.zbin_over_quant);
    185     modq = (int)((double)capped_q * oq_factor);
    186     cpi->RDMULT = (int)(rdconst * (modq * modq));
    187   }
    188 
    189   if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    190     if (cpi->twopass.next_iiratio > 31) {
    191       cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    192     } else {
    193       cpi->RDMULT +=
    194           (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
    195     }
    196   }
    197 
    198   cpi->mb.errorperbit = (cpi->RDMULT / 110);
    199   cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
    200 
    201   vp8_set_speed_features(cpi);
    202 
    203   for (i = 0; i < MAX_MODES; ++i) {
    204     x->mode_test_hit_counts[i] = 0;
    205   }
    206 
    207   q = (int)pow(Qvalue, 1.25);
    208 
    209   if (q < 8) q = 8;
    210 
    211   if (cpi->RDMULT > 1000) {
    212     cpi->RDDIV = 1;
    213     cpi->RDMULT /= 100;
    214 
    215     for (i = 0; i < MAX_MODES; ++i) {
    216       if (cpi->sf.thresh_mult[i] < INT_MAX) {
    217         x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
    218       } else {
    219         x->rd_threshes[i] = INT_MAX;
    220       }
    221 
    222       cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
    223     }
    224   } else {
    225     cpi->RDDIV = 100;
    226 
    227     for (i = 0; i < MAX_MODES; ++i) {
    228       if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
    229         x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
    230       } else {
    231         x->rd_threshes[i] = INT_MAX;
    232       }
    233 
    234       cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
    235     }
    236   }
    237 
    238   {
    239     /* build token cost array for the type of frame we have now */
    240     FRAME_CONTEXT *l = &cpi->lfc_n;
    241 
    242     if (cpi->common.refresh_alt_ref_frame) {
    243       l = &cpi->lfc_a;
    244     } else if (cpi->common.refresh_golden_frame) {
    245       l = &cpi->lfc_g;
    246     }
    247 
    248     fill_token_costs(cpi->mb.token_costs,
    249                      (const vp8_prob(*)[8][3][11])l->coef_probs);
    250     /*
    251     fill_token_costs(
    252         cpi->mb.token_costs,
    253         (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs);
    254     */
    255 
    256     /* TODO make these mode costs depend on last,alt or gold too.  (jbb) */
    257     vp8_init_mode_costs(cpi);
    258   }
    259 }
    260 
    261 void vp8_auto_select_speed(VP8_COMP *cpi) {
    262   int milliseconds_for_compress = (int)(1000000 / cpi->framerate);
    263 
    264   milliseconds_for_compress =
    265       milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
    266 
    267 #if 0
    268 
    269     if (0)
    270     {
    271         FILE *f;
    272 
    273         f = fopen("speed.stt", "a");
    274         fprintf(f, " %8ld %10ld %10ld %10ld\n",
    275                 cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time);
    276         fclose(f);
    277     }
    278 
    279 #endif
    280 
    281   if (cpi->avg_pick_mode_time < milliseconds_for_compress &&
    282       (cpi->avg_encode_time - cpi->avg_pick_mode_time) <
    283           milliseconds_for_compress) {
    284     if (cpi->avg_pick_mode_time == 0) {
    285       cpi->Speed = 4;
    286     } else {
    287       if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95) {
    288         cpi->Speed += 2;
    289         cpi->avg_pick_mode_time = 0;
    290         cpi->avg_encode_time = 0;
    291 
    292         if (cpi->Speed > 16) {
    293           cpi->Speed = 16;
    294         }
    295       }
    296 
    297       if (milliseconds_for_compress * 100 >
    298           cpi->avg_encode_time * auto_speed_thresh[cpi->Speed]) {
    299         cpi->Speed -= 1;
    300         cpi->avg_pick_mode_time = 0;
    301         cpi->avg_encode_time = 0;
    302 
    303         /* In real-time mode, cpi->speed is in [4, 16]. */
    304         if (cpi->Speed < 4) {
    305           cpi->Speed = 4;
    306         }
    307       }
    308     }
    309   } else {
    310     cpi->Speed += 4;
    311 
    312     if (cpi->Speed > 16) cpi->Speed = 16;
    313 
    314     cpi->avg_pick_mode_time = 0;
    315     cpi->avg_encode_time = 0;
    316   }
    317 }
    318 
    319 int vp8_block_error_c(short *coeff, short *dqcoeff) {
    320   int i;
    321   int error = 0;
    322 
    323   for (i = 0; i < 16; ++i) {
    324     int this_diff = coeff[i] - dqcoeff[i];
    325     error += this_diff * this_diff;
    326   }
    327 
    328   return error;
    329 }
    330 
    331 int vp8_mbblock_error_c(MACROBLOCK *mb, int dc) {
    332   BLOCK *be;
    333   BLOCKD *bd;
    334   int i, j;
    335   int berror, error = 0;
    336 
    337   for (i = 0; i < 16; ++i) {
    338     be = &mb->block[i];
    339     bd = &mb->e_mbd.block[i];
    340 
    341     berror = 0;
    342 
    343     for (j = dc; j < 16; ++j) {
    344       int this_diff = be->coeff[j] - bd->dqcoeff[j];
    345       berror += this_diff * this_diff;
    346     }
    347 
    348     error += berror;
    349   }
    350 
    351   return error;
    352 }
    353 
    354 int vp8_mbuverror_c(MACROBLOCK *mb) {
    355   BLOCK *be;
    356   BLOCKD *bd;
    357 
    358   int i;
    359   int error = 0;
    360 
    361   for (i = 16; i < 24; ++i) {
    362     be = &mb->block[i];
    363     bd = &mb->e_mbd.block[i];
    364 
    365     error += vp8_block_error_c(be->coeff, bd->dqcoeff);
    366   }
    367 
    368   return error;
    369 }
    370 
    371 int VP8_UVSSE(MACROBLOCK *x) {
    372   unsigned char *uptr, *vptr;
    373   unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
    374   unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
    375   int uv_stride = x->block[16].src_stride;
    376 
    377   unsigned int sse1 = 0;
    378   unsigned int sse2 = 0;
    379   int mv_row = x->e_mbd.mode_info_context->mbmi.mv.as_mv.row;
    380   int mv_col = x->e_mbd.mode_info_context->mbmi.mv.as_mv.col;
    381   int offset;
    382   int pre_stride = x->e_mbd.pre.uv_stride;
    383 
    384   if (mv_row < 0) {
    385     mv_row -= 1;
    386   } else {
    387     mv_row += 1;
    388   }
    389 
    390   if (mv_col < 0) {
    391     mv_col -= 1;
    392   } else {
    393     mv_col += 1;
    394   }
    395 
    396   mv_row /= 2;
    397   mv_col /= 2;
    398 
    399   offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
    400   uptr = x->e_mbd.pre.u_buffer + offset;
    401   vptr = x->e_mbd.pre.v_buffer + offset;
    402 
    403   if ((mv_row | mv_col) & 7) {
    404     vpx_sub_pixel_variance8x8(uptr, pre_stride, mv_col & 7, mv_row & 7,
    405                               upred_ptr, uv_stride, &sse2);
    406     vpx_sub_pixel_variance8x8(vptr, pre_stride, mv_col & 7, mv_row & 7,
    407                               vpred_ptr, uv_stride, &sse1);
    408     sse2 += sse1;
    409   } else {
    410     vpx_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
    411     vpx_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
    412     sse2 += sse1;
    413   }
    414   return sse2;
    415 }
    416 
    417 static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a,
    418                        ENTROPY_CONTEXT *l) {
    419   int c = !type; /* start at coef 0, unless Y with Y2 */
    420   int eob = (int)(*b->eob);
    421   int pt; /* surrounding block/prev coef predictor */
    422   int cost = 0;
    423   short *qcoeff_ptr = b->qcoeff;
    424 
    425   VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
    426 
    427   assert(eob <= 16);
    428   for (; c < eob; ++c) {
    429     const int v = qcoeff_ptr[vp8_default_zig_zag1d[c]];
    430     const int t = vp8_dct_value_tokens_ptr[v].Token;
    431     cost += mb->token_costs[type][vp8_coef_bands[c]][pt][t];
    432     cost += vp8_dct_value_cost_ptr[v];
    433     pt = vp8_prev_token_class[t];
    434   }
    435 
    436   if (c < 16) {
    437     cost += mb->token_costs[type][vp8_coef_bands[c]][pt][DCT_EOB_TOKEN];
    438   }
    439 
    440   pt = (c != !type); /* is eob first coefficient; */
    441   *a = *l = pt;
    442 
    443   return cost;
    444 }
    445 
    446 static int vp8_rdcost_mby(MACROBLOCK *mb) {
    447   int cost = 0;
    448   int b;
    449   MACROBLOCKD *x = &mb->e_mbd;
    450   ENTROPY_CONTEXT_PLANES t_above, t_left;
    451   ENTROPY_CONTEXT *ta;
    452   ENTROPY_CONTEXT *tl;
    453 
    454   memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    455   memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    456 
    457   ta = (ENTROPY_CONTEXT *)&t_above;
    458   tl = (ENTROPY_CONTEXT *)&t_left;
    459 
    460   for (b = 0; b < 16; ++b) {
    461     cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_NO_DC,
    462                         ta + vp8_block2above[b], tl + vp8_block2left[b]);
    463   }
    464 
    465   cost += cost_coeffs(mb, x->block + 24, PLANE_TYPE_Y2,
    466                       ta + vp8_block2above[24], tl + vp8_block2left[24]);
    467 
    468   return cost;
    469 }
    470 
    471 static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion) {
    472   int b;
    473   MACROBLOCKD *const x = &mb->e_mbd;
    474   BLOCK *const mb_y2 = mb->block + 24;
    475   BLOCKD *const x_y2 = x->block + 24;
    476   short *Y2DCPtr = mb_y2->src_diff;
    477   BLOCK *beptr;
    478   int d;
    479 
    480   vp8_subtract_mby(mb->src_diff, *(mb->block[0].base_src),
    481                    mb->block[0].src_stride, mb->e_mbd.predictor, 16);
    482 
    483   /* Fdct and building the 2nd order block */
    484   for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) {
    485     mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
    486     *Y2DCPtr++ = beptr->coeff[0];
    487     *Y2DCPtr++ = beptr->coeff[16];
    488   }
    489 
    490   /* 2nd order fdct */
    491   mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
    492 
    493   /* Quantization */
    494   for (b = 0; b < 16; ++b) {
    495     mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
    496   }
    497 
    498   /* DC predication and Quantization of 2nd Order block */
    499   mb->quantize_b(mb_y2, x_y2);
    500 
    501   /* Distortion */
    502   d = vp8_mbblock_error(mb, 1) << 2;
    503   d += vp8_block_error(mb_y2->coeff, x_y2->dqcoeff);
    504 
    505   *Distortion = (d >> 4);
    506 
    507   /* rate */
    508   *Rate = vp8_rdcost_mby(mb);
    509 }
    510 
    511 static void copy_predictor(unsigned char *dst, const unsigned char *predictor) {
    512   const unsigned int *p = (const unsigned int *)predictor;
    513   unsigned int *d = (unsigned int *)dst;
    514   d[0] = p[0];
    515   d[4] = p[4];
    516   d[8] = p[8];
    517   d[12] = p[12];
    518 }
    519 static int rd_pick_intra4x4block(MACROBLOCK *x, BLOCK *be, BLOCKD *b,
    520                                  B_PREDICTION_MODE *best_mode,
    521                                  const int *bmode_costs, ENTROPY_CONTEXT *a,
    522                                  ENTROPY_CONTEXT *l,
    523 
    524                                  int *bestrate, int *bestratey,
    525                                  int *bestdistortion) {
    526   B_PREDICTION_MODE mode;
    527   int best_rd = INT_MAX;
    528   int rate = 0;
    529   int distortion;
    530 
    531   ENTROPY_CONTEXT ta = *a, tempa = *a;
    532   ENTROPY_CONTEXT tl = *l, templ = *l;
    533   /*
    534    * The predictor buffer is a 2d buffer with a stride of 16.  Create
    535    * a temp buffer that meets the stride requirements, but we are only
    536    * interested in the left 4x4 block
    537    * */
    538   DECLARE_ALIGNED(16, unsigned char, best_predictor[16 * 4]);
    539   DECLARE_ALIGNED(16, short, best_dqcoeff[16]);
    540   int dst_stride = x->e_mbd.dst.y_stride;
    541   unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
    542 
    543   unsigned char *Above = dst - dst_stride;
    544   unsigned char *yleft = dst - 1;
    545   unsigned char top_left = Above[-1];
    546 
    547   for (mode = B_DC_PRED; mode <= B_HU_PRED; ++mode) {
    548     int this_rd;
    549     int ratey;
    550 
    551     rate = bmode_costs[mode];
    552 
    553     vp8_intra4x4_predict(Above, yleft, dst_stride, mode, b->predictor, 16,
    554                          top_left);
    555     vp8_subtract_b(be, b, 16);
    556     x->short_fdct4x4(be->src_diff, be->coeff, 32);
    557     x->quantize_b(be, b);
    558 
    559     tempa = ta;
    560     templ = tl;
    561 
    562     ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ);
    563     rate += ratey;
    564     distortion = vp8_block_error(be->coeff, b->dqcoeff) >> 2;
    565 
    566     this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
    567 
    568     if (this_rd < best_rd) {
    569       *bestrate = rate;
    570       *bestratey = ratey;
    571       *bestdistortion = distortion;
    572       best_rd = this_rd;
    573       *best_mode = mode;
    574       *a = tempa;
    575       *l = templ;
    576       copy_predictor(best_predictor, b->predictor);
    577       memcpy(best_dqcoeff, b->dqcoeff, 32);
    578     }
    579   }
    580   b->bmi.as_mode = *best_mode;
    581 
    582   vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride);
    583 
    584   return best_rd;
    585 }
    586 
    587 static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate, int *rate_y,
    588                                      int *Distortion, int best_rd) {
    589   MACROBLOCKD *const xd = &mb->e_mbd;
    590   int i;
    591   int cost = mb->mbmode_cost[xd->frame_type][B_PRED];
    592   int distortion = 0;
    593   int tot_rate_y = 0;
    594   int64_t total_rd = 0;
    595   ENTROPY_CONTEXT_PLANES t_above, t_left;
    596   ENTROPY_CONTEXT *ta;
    597   ENTROPY_CONTEXT *tl;
    598   const int *bmode_costs;
    599 
    600   memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    601   memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    602 
    603   ta = (ENTROPY_CONTEXT *)&t_above;
    604   tl = (ENTROPY_CONTEXT *)&t_left;
    605 
    606   intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16);
    607 
    608   bmode_costs = mb->inter_bmode_costs;
    609 
    610   for (i = 0; i < 16; ++i) {
    611     MODE_INFO *const mic = xd->mode_info_context;
    612     const int mis = xd->mode_info_stride;
    613     B_PREDICTION_MODE best_mode = B_MODE_COUNT;
    614     int r = 0, ry = 0, d = 0;
    615 
    616     if (mb->e_mbd.frame_type == KEY_FRAME) {
    617       const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
    618       const B_PREDICTION_MODE L = left_block_mode(mic, i);
    619 
    620       bmode_costs = mb->bmode_costs[A][L];
    621     }
    622 
    623     total_rd += rd_pick_intra4x4block(
    624         mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
    625         ta + vp8_block2above[i], tl + vp8_block2left[i], &r, &ry, &d);
    626 
    627     cost += r;
    628     distortion += d;
    629     tot_rate_y += ry;
    630 
    631     assert(best_mode != B_MODE_COUNT);
    632     mic->bmi[i].as_mode = best_mode;
    633 
    634     if (total_rd >= (int64_t)best_rd) break;
    635   }
    636 
    637   if (total_rd >= (int64_t)best_rd) return INT_MAX;
    638 
    639   *Rate = cost;
    640   *rate_y = tot_rate_y;
    641   *Distortion = distortion;
    642 
    643   return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
    644 }
    645 
    646 static int rd_pick_intra16x16mby_mode(MACROBLOCK *x, int *Rate, int *rate_y,
    647                                       int *Distortion) {
    648   MB_PREDICTION_MODE mode;
    649   MB_PREDICTION_MODE mode_selected = MB_MODE_COUNT;
    650   int rate, ratey;
    651   int distortion;
    652   int best_rd = INT_MAX;
    653   int this_rd;
    654   MACROBLOCKD *xd = &x->e_mbd;
    655 
    656   /* Y Search for 16x16 intra prediction mode */
    657   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
    658     xd->mode_info_context->mbmi.mode = mode;
    659 
    660     vp8_build_intra_predictors_mby_s(xd, xd->dst.y_buffer - xd->dst.y_stride,
    661                                      xd->dst.y_buffer - 1, xd->dst.y_stride,
    662                                      xd->predictor, 16);
    663 
    664     macro_block_yrd(x, &ratey, &distortion);
    665     rate = ratey +
    666            x->mbmode_cost[xd->frame_type][xd->mode_info_context->mbmi.mode];
    667 
    668     this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
    669 
    670     if (this_rd < best_rd) {
    671       mode_selected = mode;
    672       best_rd = this_rd;
    673       *Rate = rate;
    674       *rate_y = ratey;
    675       *Distortion = distortion;
    676     }
    677   }
    678 
    679   assert(mode_selected != MB_MODE_COUNT);
    680   xd->mode_info_context->mbmi.mode = mode_selected;
    681   return best_rd;
    682 }
    683 
    684 static int rd_cost_mbuv(MACROBLOCK *mb) {
    685   int b;
    686   int cost = 0;
    687   MACROBLOCKD *x = &mb->e_mbd;
    688   ENTROPY_CONTEXT_PLANES t_above, t_left;
    689   ENTROPY_CONTEXT *ta;
    690   ENTROPY_CONTEXT *tl;
    691 
    692   memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    693   memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    694 
    695   ta = (ENTROPY_CONTEXT *)&t_above;
    696   tl = (ENTROPY_CONTEXT *)&t_left;
    697 
    698   for (b = 16; b < 24; ++b) {
    699     cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_UV,
    700                         ta + vp8_block2above[b], tl + vp8_block2left[b]);
    701   }
    702 
    703   return cost;
    704 }
    705 
    706 static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
    707                             int *distortion, int fullpixel) {
    708   (void)cpi;
    709   (void)fullpixel;
    710 
    711   vp8_build_inter16x16_predictors_mbuv(&x->e_mbd);
    712   vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
    713                     x->src.uv_stride, &x->e_mbd.predictor[256],
    714                     &x->e_mbd.predictor[320], 8);
    715 
    716   vp8_transform_mbuv(x);
    717   vp8_quantize_mbuv(x);
    718 
    719   *rate = rd_cost_mbuv(x);
    720   *distortion = vp8_mbuverror(x) / 4;
    721 
    722   return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
    723 }
    724 
    725 static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
    726                           int *distortion, int fullpixel) {
    727   (void)cpi;
    728   (void)fullpixel;
    729 
    730   vp8_build_inter4x4_predictors_mbuv(&x->e_mbd);
    731   vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
    732                     x->src.uv_stride, &x->e_mbd.predictor[256],
    733                     &x->e_mbd.predictor[320], 8);
    734 
    735   vp8_transform_mbuv(x);
    736   vp8_quantize_mbuv(x);
    737 
    738   *rate = rd_cost_mbuv(x);
    739   *distortion = vp8_mbuverror(x) / 4;
    740 
    741   return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
    742 }
    743 
    744 static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
    745                                     int *rate_tokenonly, int *distortion) {
    746   MB_PREDICTION_MODE mode;
    747   MB_PREDICTION_MODE mode_selected = MB_MODE_COUNT;
    748   int best_rd = INT_MAX;
    749   int d = 0, r = 0;
    750   int rate_to;
    751   MACROBLOCKD *xd = &x->e_mbd;
    752 
    753   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
    754     int this_rate;
    755     int this_distortion;
    756     int this_rd;
    757 
    758     xd->mode_info_context->mbmi.uv_mode = mode;
    759 
    760     vp8_build_intra_predictors_mbuv_s(
    761         xd, xd->dst.u_buffer - xd->dst.uv_stride,
    762         xd->dst.v_buffer - xd->dst.uv_stride, xd->dst.u_buffer - 1,
    763         xd->dst.v_buffer - 1, xd->dst.uv_stride, &xd->predictor[256],
    764         &xd->predictor[320], 8);
    765 
    766     vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
    767                       x->src.uv_stride, &xd->predictor[256],
    768                       &xd->predictor[320], 8);
    769     vp8_transform_mbuv(x);
    770     vp8_quantize_mbuv(x);
    771 
    772     rate_to = rd_cost_mbuv(x);
    773     this_rate =
    774         rate_to + x->intra_uv_mode_cost[xd->frame_type]
    775                                        [xd->mode_info_context->mbmi.uv_mode];
    776 
    777     this_distortion = vp8_mbuverror(x) / 4;
    778 
    779     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
    780 
    781     if (this_rd < best_rd) {
    782       best_rd = this_rd;
    783       d = this_distortion;
    784       r = this_rate;
    785       *rate_tokenonly = rate_to;
    786       mode_selected = mode;
    787     }
    788   }
    789 
    790   *rate = r;
    791   *distortion = d;
    792 
    793   assert(mode_selected != MB_MODE_COUNT);
    794   xd->mode_info_context->mbmi.uv_mode = mode_selected;
    795 }
    796 
    797 int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]) {
    798   vp8_prob p[VP8_MVREFS - 1];
    799   assert(NEARESTMV <= m && m <= SPLITMV);
    800   vp8_mv_ref_probs(p, near_mv_ref_ct);
    801   return vp8_cost_token(vp8_mv_ref_tree, p,
    802                         vp8_mv_ref_encoding_array + (m - NEARESTMV));
    803 }
    804 
    805 void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
    806   x->e_mbd.mode_info_context->mbmi.mode = mb;
    807   x->e_mbd.mode_info_context->mbmi.mv.as_int = mv->as_int;
    808 }
    809 
    810 static int labels2mode(MACROBLOCK *x, int const *labelings, int which_label,
    811                        B_PREDICTION_MODE this_mode, int_mv *this_mv,
    812                        int_mv *best_ref_mv, int *mvcost[2]) {
    813   MACROBLOCKD *const xd = &x->e_mbd;
    814   MODE_INFO *const mic = xd->mode_info_context;
    815   const int mis = xd->mode_info_stride;
    816 
    817   int cost = 0;
    818   int thismvcost = 0;
    819 
    820   /* We have to be careful retrieving previously-encoded motion vectors.
    821      Ones from this macroblock have to be pulled from the BLOCKD array
    822      as they have not yet made it to the bmi array in our MB_MODE_INFO. */
    823 
    824   int i = 0;
    825 
    826   do {
    827     BLOCKD *const d = xd->block + i;
    828     const int row = i >> 2, col = i & 3;
    829 
    830     B_PREDICTION_MODE m;
    831 
    832     if (labelings[i] != which_label) continue;
    833 
    834     if (col && labelings[i] == labelings[i - 1]) {
    835       m = LEFT4X4;
    836     } else if (row && labelings[i] == labelings[i - 4]) {
    837       m = ABOVE4X4;
    838     } else {
    839       /* the only time we should do costing for new motion vector
    840        * or mode is when we are on a new label  (jbb May 08, 2007)
    841        */
    842       switch (m = this_mode) {
    843         case NEW4X4:
    844           thismvcost = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost, 102);
    845           break;
    846         case LEFT4X4:
    847           this_mv->as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
    848           break;
    849         case ABOVE4X4:
    850           this_mv->as_int =
    851               row ? d[-4].bmi.mv.as_int : above_block_mv(mic, i, mis);
    852           break;
    853         case ZERO4X4: this_mv->as_int = 0; break;
    854         default: break;
    855       }
    856 
    857       if (m == ABOVE4X4) { /* replace above with left if same */
    858         int_mv left_mv;
    859 
    860         left_mv.as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
    861 
    862         if (left_mv.as_int == this_mv->as_int) m = LEFT4X4;
    863       }
    864 
    865       cost = x->inter_bmode_costs[m];
    866     }
    867 
    868     d->bmi.mv.as_int = this_mv->as_int;
    869 
    870     x->partition_info->bmi[i].mode = m;
    871     x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
    872 
    873   } while (++i < 16);
    874 
    875   cost += thismvcost;
    876   return cost;
    877 }
    878 
    879 static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
    880                               int which_label, ENTROPY_CONTEXT *ta,
    881                               ENTROPY_CONTEXT *tl) {
    882   int cost = 0;
    883   int b;
    884   MACROBLOCKD *x = &mb->e_mbd;
    885 
    886   for (b = 0; b < 16; ++b) {
    887     if (labels[b] == which_label) {
    888       cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_WITH_DC,
    889                           ta + vp8_block2above[b], tl + vp8_block2left[b]);
    890     }
    891   }
    892 
    893   return cost;
    894 }
    895 static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x,
    896                                                 int const *labels,
    897                                                 int which_label) {
    898   int i;
    899   unsigned int distortion = 0;
    900   int pre_stride = x->e_mbd.pre.y_stride;
    901   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    902 
    903   for (i = 0; i < 16; ++i) {
    904     if (labels[i] == which_label) {
    905       BLOCKD *bd = &x->e_mbd.block[i];
    906       BLOCK *be = &x->block[i];
    907 
    908       vp8_build_inter_predictors_b(bd, 16, base_pre, pre_stride,
    909                                    x->e_mbd.subpixel_predict);
    910       vp8_subtract_b(be, bd, 16);
    911       x->short_fdct4x4(be->src_diff, be->coeff, 32);
    912       x->quantize_b(be, bd);
    913 
    914       distortion += vp8_block_error(be->coeff, bd->dqcoeff);
    915     }
    916   }
    917 
    918   return distortion;
    919 }
    920 
    921 static const unsigned int segmentation_to_sseshift[4] = { 3, 3, 2, 0 };
    922 
    923 typedef struct {
    924   int_mv *ref_mv;
    925   int_mv mvp;
    926 
    927   int segment_rd;
    928   int segment_num;
    929   int r;
    930   int d;
    931   int segment_yrate;
    932   B_PREDICTION_MODE modes[16];
    933   int_mv mvs[16];
    934   unsigned char eobs[16];
    935 
    936   int mvthresh;
    937   int *mdcounts;
    938 
    939   int_mv sv_mvp[4]; /* save 4 mvp from 8x8 */
    940   int sv_istep[2];  /* save 2 initial step_param for 16x8/8x16 */
    941 
    942 } BEST_SEG_INFO;
    943 
    944 static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
    945                              unsigned int segmentation) {
    946   int i;
    947   int const *labels;
    948   int br = 0;
    949   int bd = 0;
    950   B_PREDICTION_MODE this_mode;
    951 
    952   int label_count;
    953   int this_segment_rd = 0;
    954   int label_mv_thresh;
    955   int rate = 0;
    956   int sbr = 0;
    957   int sbd = 0;
    958   int segmentyrate = 0;
    959 
    960   vp8_variance_fn_ptr_t *v_fn_ptr;
    961 
    962   ENTROPY_CONTEXT_PLANES t_above, t_left;
    963   ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
    964 
    965   memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    966   memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    967 
    968   vp8_zero(t_above_b);
    969   vp8_zero(t_left_b);
    970 
    971   br = 0;
    972   bd = 0;
    973 
    974   v_fn_ptr = &cpi->fn_ptr[segmentation];
    975   labels = vp8_mbsplits[segmentation];
    976   label_count = vp8_mbsplit_count[segmentation];
    977 
    978   /* 64 makes this threshold really big effectively making it so that we
    979    * very rarely check mvs on segments.   setting this to 1 would make mv
    980    * thresh roughly equal to what it is for macroblocks
    981    */
    982   label_mv_thresh = 1 * bsi->mvthresh / label_count;
    983 
    984   /* Segmentation method overheads */
    985   rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs,
    986                         vp8_mbsplit_encodings + segmentation);
    987   rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts);
    988   this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
    989   br += rate;
    990 
    991   for (i = 0; i < label_count; ++i) {
    992     int_mv mode_mv[B_MODE_COUNT];
    993     int best_label_rd = INT_MAX;
    994     B_PREDICTION_MODE mode_selected = ZERO4X4;
    995     int bestlabelyrate = 0;
    996 
    997     /* search for the best motion vector on this segment */
    998     for (this_mode = LEFT4X4; this_mode <= NEW4X4; ++this_mode) {
    999       int this_rd;
   1000       int distortion;
   1001       int labelyrate;
   1002       ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
   1003       ENTROPY_CONTEXT *ta_s;
   1004       ENTROPY_CONTEXT *tl_s;
   1005 
   1006       memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
   1007       memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
   1008 
   1009       ta_s = (ENTROPY_CONTEXT *)&t_above_s;
   1010       tl_s = (ENTROPY_CONTEXT *)&t_left_s;
   1011 
   1012       if (this_mode == NEW4X4) {
   1013         int sseshift;
   1014         int num00;
   1015         int step_param = 0;
   1016         int further_steps;
   1017         int n;
   1018         int thissme;
   1019         int bestsme = INT_MAX;
   1020         int_mv temp_mv;
   1021         BLOCK *c;
   1022         BLOCKD *e;
   1023 
   1024         /* Is the best so far sufficiently good that we cant justify
   1025          * doing a new motion search.
   1026          */
   1027         if (best_label_rd < label_mv_thresh) break;
   1028 
   1029         if (cpi->compressor_speed) {
   1030           if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8) {
   1031             bsi->mvp.as_int = bsi->sv_mvp[i].as_int;
   1032             if (i == 1 && segmentation == BLOCK_16X8) {
   1033               bsi->mvp.as_int = bsi->sv_mvp[2].as_int;
   1034             }
   1035 
   1036             step_param = bsi->sv_istep[i];
   1037           }
   1038 
   1039           /* use previous block's result as next block's MV
   1040            * predictor.
   1041            */
   1042           if (segmentation == BLOCK_4X4 && i > 0) {
   1043             bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.mv.as_int;
   1044             if (i == 4 || i == 8 || i == 12) {
   1045               bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.mv.as_int;
   1046             }
   1047             step_param = 2;
   1048           }
   1049         }
   1050 
   1051         further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
   1052 
   1053         {
   1054           int sadpb = x->sadperbit4;
   1055           int_mv mvp_full;
   1056 
   1057           mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
   1058           mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
   1059 
   1060           /* find first label */
   1061           n = vp8_mbsplit_offset[segmentation][i];
   1062 
   1063           c = &x->block[n];
   1064           e = &x->e_mbd.block[n];
   1065 
   1066           {
   1067             bestsme = cpi->diamond_search_sad(
   1068                 x, c, e, &mvp_full, &mode_mv[NEW4X4], step_param, sadpb, &num00,
   1069                 v_fn_ptr, x->mvcost, bsi->ref_mv);
   1070 
   1071             n = num00;
   1072             num00 = 0;
   1073 
   1074             while (n < further_steps) {
   1075               n++;
   1076 
   1077               if (num00) {
   1078                 num00--;
   1079               } else {
   1080                 thissme = cpi->diamond_search_sad(
   1081                     x, c, e, &mvp_full, &temp_mv, step_param + n, sadpb, &num00,
   1082                     v_fn_ptr, x->mvcost, bsi->ref_mv);
   1083 
   1084                 if (thissme < bestsme) {
   1085                   bestsme = thissme;
   1086                   mode_mv[NEW4X4].as_int = temp_mv.as_int;
   1087                 }
   1088               }
   1089             }
   1090           }
   1091 
   1092           sseshift = segmentation_to_sseshift[segmentation];
   1093 
   1094           /* Should we do a full search (best quality only) */
   1095           if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) {
   1096             /* Check if mvp_full is within the range. */
   1097             vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min,
   1098                          x->mv_row_max);
   1099 
   1100             thissme = cpi->full_search_sad(x, c, e, &mvp_full, sadpb, 16,
   1101                                            v_fn_ptr, x->mvcost, bsi->ref_mv);
   1102 
   1103             if (thissme < bestsme) {
   1104               bestsme = thissme;
   1105               mode_mv[NEW4X4].as_int = e->bmi.mv.as_int;
   1106             } else {
   1107               /* The full search result is actually worse so
   1108                * re-instate the previous best vector
   1109                */
   1110               e->bmi.mv.as_int = mode_mv[NEW4X4].as_int;
   1111             }
   1112           }
   1113         }
   1114 
   1115         if (bestsme < INT_MAX) {
   1116           int disto;
   1117           unsigned int sse;
   1118           cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], bsi->ref_mv,
   1119                                        x->errorperbit, v_fn_ptr, x->mvcost,
   1120                                        &disto, &sse);
   1121         }
   1122       } /* NEW4X4 */
   1123 
   1124       rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
   1125                          bsi->ref_mv, x->mvcost);
   1126 
   1127       /* Trap vectors that reach beyond the UMV borders */
   1128       if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
   1129           ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
   1130           ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
   1131           ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
   1132         continue;
   1133       }
   1134 
   1135       distortion = vp8_encode_inter_mb_segment(x, labels, i) / 4;
   1136 
   1137       labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
   1138       rate += labelyrate;
   1139 
   1140       this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
   1141 
   1142       if (this_rd < best_label_rd) {
   1143         sbr = rate;
   1144         sbd = distortion;
   1145         bestlabelyrate = labelyrate;
   1146         mode_selected = this_mode;
   1147         best_label_rd = this_rd;
   1148 
   1149         memcpy(&t_above_b, &t_above_s, sizeof(ENTROPY_CONTEXT_PLANES));
   1150         memcpy(&t_left_b, &t_left_s, sizeof(ENTROPY_CONTEXT_PLANES));
   1151       }
   1152     } /*for each 4x4 mode*/
   1153 
   1154     memcpy(&t_above, &t_above_b, sizeof(ENTROPY_CONTEXT_PLANES));
   1155     memcpy(&t_left, &t_left_b, sizeof(ENTROPY_CONTEXT_PLANES));
   1156 
   1157     labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
   1158                 bsi->ref_mv, x->mvcost);
   1159 
   1160     br += sbr;
   1161     bd += sbd;
   1162     segmentyrate += bestlabelyrate;
   1163     this_segment_rd += best_label_rd;
   1164 
   1165     if (this_segment_rd >= bsi->segment_rd) break;
   1166 
   1167   } /* for each label */
   1168 
   1169   if (this_segment_rd < bsi->segment_rd) {
   1170     bsi->r = br;
   1171     bsi->d = bd;
   1172     bsi->segment_yrate = segmentyrate;
   1173     bsi->segment_rd = this_segment_rd;
   1174     bsi->segment_num = segmentation;
   1175 
   1176     /* store everything needed to come back to this!! */
   1177     for (i = 0; i < 16; ++i) {
   1178       bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
   1179       bsi->modes[i] = x->partition_info->bmi[i].mode;
   1180       bsi->eobs[i] = x->e_mbd.eobs[i];
   1181     }
   1182   }
   1183 }
   1184 
   1185 static void vp8_cal_step_param(int sr, int *sp) {
   1186   int step = 0;
   1187 
   1188   if (sr > MAX_FIRST_STEP) {
   1189     sr = MAX_FIRST_STEP;
   1190   } else if (sr < 1) {
   1191     sr = 1;
   1192   }
   1193 
   1194   while (sr >>= 1) step++;
   1195 
   1196   *sp = MAX_MVSEARCH_STEPS - 1 - step;
   1197 }
   1198 
   1199 static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
   1200                                            int_mv *best_ref_mv, int best_rd,
   1201                                            int *mdcounts, int *returntotrate,
   1202                                            int *returnyrate,
   1203                                            int *returndistortion,
   1204                                            int mvthresh) {
   1205   int i;
   1206   BEST_SEG_INFO bsi;
   1207 
   1208   memset(&bsi, 0, sizeof(bsi));
   1209 
   1210   bsi.segment_rd = best_rd;
   1211   bsi.ref_mv = best_ref_mv;
   1212   bsi.mvp.as_int = best_ref_mv->as_int;
   1213   bsi.mvthresh = mvthresh;
   1214   bsi.mdcounts = mdcounts;
   1215 
   1216   for (i = 0; i < 16; ++i) {
   1217     bsi.modes[i] = ZERO4X4;
   1218   }
   1219 
   1220   if (cpi->compressor_speed == 0) {
   1221     /* for now, we will keep the original segmentation order
   1222        when in best quality mode */
   1223     rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
   1224     rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
   1225     rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
   1226     rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
   1227   } else {
   1228     int sr;
   1229 
   1230     rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
   1231 
   1232     if (bsi.segment_rd < best_rd) {
   1233       int col_min = ((best_ref_mv->as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL;
   1234       int row_min = ((best_ref_mv->as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL;
   1235       int col_max = (best_ref_mv->as_mv.col >> 3) + MAX_FULL_PEL_VAL;
   1236       int row_max = (best_ref_mv->as_mv.row >> 3) + MAX_FULL_PEL_VAL;
   1237 
   1238       int tmp_col_min = x->mv_col_min;
   1239       int tmp_col_max = x->mv_col_max;
   1240       int tmp_row_min = x->mv_row_min;
   1241       int tmp_row_max = x->mv_row_max;
   1242 
   1243       /* Get intersection of UMV window and valid MV window to reduce # of
   1244        * checks in diamond search. */
   1245       if (x->mv_col_min < col_min) x->mv_col_min = col_min;
   1246       if (x->mv_col_max > col_max) x->mv_col_max = col_max;
   1247       if (x->mv_row_min < row_min) x->mv_row_min = row_min;
   1248       if (x->mv_row_max > row_max) x->mv_row_max = row_max;
   1249 
   1250       /* Get 8x8 result */
   1251       bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int;
   1252       bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int;
   1253       bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int;
   1254       bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int;
   1255 
   1256       /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range
   1257        * according to the closeness of 2 MV. */
   1258       /* block 8X16 */
   1259       {
   1260         sr =
   1261             MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3,
   1262                  (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3);
   1263         vp8_cal_step_param(sr, &bsi.sv_istep[0]);
   1264 
   1265         sr =
   1266             MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
   1267                  (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
   1268         vp8_cal_step_param(sr, &bsi.sv_istep[1]);
   1269 
   1270         rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
   1271       }
   1272 
   1273       /* block 16X8 */
   1274       {
   1275         sr =
   1276             MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3,
   1277                  (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3);
   1278         vp8_cal_step_param(sr, &bsi.sv_istep[0]);
   1279 
   1280         sr =
   1281             MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
   1282                  (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
   1283         vp8_cal_step_param(sr, &bsi.sv_istep[1]);
   1284 
   1285         rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
   1286       }
   1287 
   1288       /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
   1289       /* Not skip 4x4 if speed=0 (good quality) */
   1290       if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8)
   1291       /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
   1292       {
   1293         bsi.mvp.as_int = bsi.sv_mvp[0].as_int;
   1294         rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
   1295       }
   1296 
   1297       /* restore UMV window */
   1298       x->mv_col_min = tmp_col_min;
   1299       x->mv_col_max = tmp_col_max;
   1300       x->mv_row_min = tmp_row_min;
   1301       x->mv_row_max = tmp_row_max;
   1302     }
   1303   }
   1304 
   1305   /* set it to the best */
   1306   for (i = 0; i < 16; ++i) {
   1307     BLOCKD *bd = &x->e_mbd.block[i];
   1308 
   1309     bd->bmi.mv.as_int = bsi.mvs[i].as_int;
   1310     *bd->eob = bsi.eobs[i];
   1311   }
   1312 
   1313   *returntotrate = bsi.r;
   1314   *returndistortion = bsi.d;
   1315   *returnyrate = bsi.segment_yrate;
   1316 
   1317   /* save partitions */
   1318   x->e_mbd.mode_info_context->mbmi.partitioning = bsi.segment_num;
   1319   x->partition_info->count = vp8_mbsplit_count[bsi.segment_num];
   1320 
   1321   for (i = 0; i < x->partition_info->count; ++i) {
   1322     int j;
   1323 
   1324     j = vp8_mbsplit_offset[bsi.segment_num][i];
   1325 
   1326     x->partition_info->bmi[i].mode = bsi.modes[j];
   1327     x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv;
   1328   }
   1329   /*
   1330    * used to set x->e_mbd.mode_info_context->mbmi.mv.as_int
   1331    */
   1332   x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int;
   1333 
   1334   return bsi.segment_rd;
   1335 }
   1336 
   1337 /* The improved MV prediction */
   1338 void vp8_mv_pred(VP8_COMP *cpi, MACROBLOCKD *xd, const MODE_INFO *here,
   1339                  int_mv *mvp, int refframe, int *ref_frame_sign_bias, int *sr,
   1340                  int near_sadidx[]) {
   1341   const MODE_INFO *above = here - xd->mode_info_stride;
   1342   const MODE_INFO *left = here - 1;
   1343   const MODE_INFO *aboveleft = above - 1;
   1344   int_mv near_mvs[8];
   1345   int near_ref[8];
   1346   int_mv mv;
   1347   int vcnt = 0;
   1348   int find = 0;
   1349   int mb_offset;
   1350 
   1351   int mvx[8];
   1352   int mvy[8];
   1353   int i;
   1354 
   1355   mv.as_int = 0;
   1356 
   1357   if (here->mbmi.ref_frame != INTRA_FRAME) {
   1358     near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int =
   1359         near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int =
   1360             near_mvs[6].as_int = near_mvs[7].as_int = 0;
   1361     near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] =
   1362         near_ref[5] = near_ref[6] = near_ref[7] = 0;
   1363 
   1364     /* read in 3 nearby block's MVs from current frame as prediction
   1365      * candidates.
   1366      */
   1367     if (above->mbmi.ref_frame != INTRA_FRAME) {
   1368       near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
   1369       mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe,
   1370               &near_mvs[vcnt], ref_frame_sign_bias);
   1371       near_ref[vcnt] = above->mbmi.ref_frame;
   1372     }
   1373     vcnt++;
   1374     if (left->mbmi.ref_frame != INTRA_FRAME) {
   1375       near_mvs[vcnt].as_int = left->mbmi.mv.as_int;
   1376       mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe,
   1377               &near_mvs[vcnt], ref_frame_sign_bias);
   1378       near_ref[vcnt] = left->mbmi.ref_frame;
   1379     }
   1380     vcnt++;
   1381     if (aboveleft->mbmi.ref_frame != INTRA_FRAME) {
   1382       near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int;
   1383       mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe,
   1384               &near_mvs[vcnt], ref_frame_sign_bias);
   1385       near_ref[vcnt] = aboveleft->mbmi.ref_frame;
   1386     }
   1387     vcnt++;
   1388 
   1389     /* read in 5 nearby block's MVs from last frame. */
   1390     if (cpi->common.last_frame_type != KEY_FRAME) {
   1391       mb_offset = (-xd->mb_to_top_edge / 128 + 1) * (xd->mode_info_stride + 1) +
   1392                   (-xd->mb_to_left_edge / 128 + 1);
   1393 
   1394       /* current in last frame */
   1395       if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME) {
   1396         near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
   1397         mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe,
   1398                 &near_mvs[vcnt], ref_frame_sign_bias);
   1399         near_ref[vcnt] = cpi->lf_ref_frame[mb_offset];
   1400       }
   1401       vcnt++;
   1402 
   1403       /* above in last frame */
   1404       if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1] !=
   1405           INTRA_FRAME) {
   1406         near_mvs[vcnt].as_int =
   1407             cpi->lfmv[mb_offset - xd->mode_info_stride - 1].as_int;
   1408         mv_bias(
   1409             cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride - 1],
   1410             refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1411         near_ref[vcnt] =
   1412             cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1];
   1413       }
   1414       vcnt++;
   1415 
   1416       /* left in last frame */
   1417       if (cpi->lf_ref_frame[mb_offset - 1] != INTRA_FRAME) {
   1418         near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - 1].as_int;
   1419         mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - 1], refframe,
   1420                 &near_mvs[vcnt], ref_frame_sign_bias);
   1421         near_ref[vcnt] = cpi->lf_ref_frame[mb_offset - 1];
   1422       }
   1423       vcnt++;
   1424 
   1425       /* right in last frame */
   1426       if (cpi->lf_ref_frame[mb_offset + 1] != INTRA_FRAME) {
   1427         near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + 1].as_int;
   1428         mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + 1], refframe,
   1429                 &near_mvs[vcnt], ref_frame_sign_bias);
   1430         near_ref[vcnt] = cpi->lf_ref_frame[mb_offset + 1];
   1431       }
   1432       vcnt++;
   1433 
   1434       /* below in last frame */
   1435       if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1] !=
   1436           INTRA_FRAME) {
   1437         near_mvs[vcnt].as_int =
   1438             cpi->lfmv[mb_offset + xd->mode_info_stride + 1].as_int;
   1439         mv_bias(
   1440             cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride + 1],
   1441             refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1442         near_ref[vcnt] =
   1443             cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1];
   1444       }
   1445       vcnt++;
   1446     }
   1447 
   1448     for (i = 0; i < vcnt; ++i) {
   1449       if (near_ref[near_sadidx[i]] != INTRA_FRAME) {
   1450         if (here->mbmi.ref_frame == near_ref[near_sadidx[i]]) {
   1451           mv.as_int = near_mvs[near_sadidx[i]].as_int;
   1452           find = 1;
   1453           if (i < 3) {
   1454             *sr = 3;
   1455           } else {
   1456             *sr = 2;
   1457           }
   1458           break;
   1459         }
   1460       }
   1461     }
   1462 
   1463     if (!find) {
   1464       for (i = 0; i < vcnt; ++i) {
   1465         mvx[i] = near_mvs[i].as_mv.row;
   1466         mvy[i] = near_mvs[i].as_mv.col;
   1467       }
   1468 
   1469       insertsortmv(mvx, vcnt);
   1470       insertsortmv(mvy, vcnt);
   1471       mv.as_mv.row = mvx[vcnt / 2];
   1472       mv.as_mv.col = mvy[vcnt / 2];
   1473 
   1474       /* sr is set to 0 to allow calling function to decide the search
   1475        * range.
   1476        */
   1477       *sr = 0;
   1478     }
   1479   }
   1480 
   1481   /* Set up return values */
   1482   mvp->as_int = mv.as_int;
   1483   vp8_clamp_mv2(mvp, xd);
   1484 }
   1485 
   1486 void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x,
   1487                  int recon_yoffset, int near_sadidx[]) {
   1488   /* near_sad indexes:
   1489    *   0-cf above, 1-cf left, 2-cf aboveleft,
   1490    *   3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
   1491    */
   1492   int near_sad[8] = { 0 };
   1493   BLOCK *b = &x->block[0];
   1494   unsigned char *src_y_ptr = *(b->base_src);
   1495 
   1496   /* calculate sad for current frame 3 nearby MBs. */
   1497   if (xd->mb_to_top_edge == 0 && xd->mb_to_left_edge == 0) {
   1498     near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
   1499   } else if (xd->mb_to_top_edge ==
   1500              0) { /* only has left MB for sad calculation. */
   1501     near_sad[0] = near_sad[2] = INT_MAX;
   1502     near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(
   1503         src_y_ptr, b->src_stride, xd->dst.y_buffer - 16, xd->dst.y_stride);
   1504   } else if (xd->mb_to_left_edge ==
   1505              0) { /* only has left MB for sad calculation. */
   1506     near_sad[1] = near_sad[2] = INT_MAX;
   1507     near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(
   1508         src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16,
   1509         xd->dst.y_stride);
   1510   } else {
   1511     near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(
   1512         src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16,
   1513         xd->dst.y_stride);
   1514     near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(
   1515         src_y_ptr, b->src_stride, xd->dst.y_buffer - 16, xd->dst.y_stride);
   1516     near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(
   1517         src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16 - 16,
   1518         xd->dst.y_stride);
   1519   }
   1520 
   1521   if (cpi->common.last_frame_type != KEY_FRAME) {
   1522     /* calculate sad for last frame 5 nearby MBs. */
   1523     unsigned char *pre_y_buffer =
   1524         cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
   1525     int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
   1526 
   1527     if (xd->mb_to_top_edge == 0) near_sad[4] = INT_MAX;
   1528     if (xd->mb_to_left_edge == 0) near_sad[5] = INT_MAX;
   1529     if (xd->mb_to_right_edge == 0) near_sad[6] = INT_MAX;
   1530     if (xd->mb_to_bottom_edge == 0) near_sad[7] = INT_MAX;
   1531 
   1532     if (near_sad[4] != INT_MAX) {
   1533       near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(
   1534           src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride * 16,
   1535           pre_y_stride);
   1536     }
   1537     if (near_sad[5] != INT_MAX) {
   1538       near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(
   1539           src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride);
   1540     }
   1541     near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride,
   1542                                                pre_y_buffer, pre_y_stride);
   1543     if (near_sad[6] != INT_MAX) {
   1544       near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(
   1545           src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride);
   1546     }
   1547     if (near_sad[7] != INT_MAX) {
   1548       near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(
   1549           src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride * 16,
   1550           pre_y_stride);
   1551     }
   1552   }
   1553 
   1554   if (cpi->common.last_frame_type != KEY_FRAME) {
   1555     insertsortsad(near_sad, near_sadidx, 8);
   1556   } else {
   1557     insertsortsad(near_sad, near_sadidx, 3);
   1558   }
   1559 }
   1560 
   1561 static void rd_update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv) {
   1562   if (x->e_mbd.mode_info_context->mbmi.mode == SPLITMV) {
   1563     int i;
   1564 
   1565     for (i = 0; i < x->partition_info->count; ++i) {
   1566       if (x->partition_info->bmi[i].mode == NEW4X4) {
   1567         x->MVcount[0][mv_max + ((x->partition_info->bmi[i].mv.as_mv.row -
   1568                                  best_ref_mv->as_mv.row) >>
   1569                                 1)]++;
   1570         x->MVcount[1][mv_max + ((x->partition_info->bmi[i].mv.as_mv.col -
   1571                                  best_ref_mv->as_mv.col) >>
   1572                                 1)]++;
   1573       }
   1574     }
   1575   } else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV) {
   1576     x->MVcount[0][mv_max + ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row -
   1577                              best_ref_mv->as_mv.row) >>
   1578                             1)]++;
   1579     x->MVcount[1][mv_max + ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col -
   1580                              best_ref_mv->as_mv.col) >>
   1581                             1)]++;
   1582   }
   1583 }
   1584 
   1585 static int evaluate_inter_mode_rd(int mdcounts[4], RATE_DISTORTION *rd,
   1586                                   int *disable_skip, VP8_COMP *cpi,
   1587                                   MACROBLOCK *x) {
   1588   MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
   1589   BLOCK *b = &x->block[0];
   1590   MACROBLOCKD *xd = &x->e_mbd;
   1591   int distortion;
   1592   vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16);
   1593 
   1594   if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
   1595     x->skip = 1;
   1596   } else if (x->encode_breakout) {
   1597     unsigned int sse;
   1598     unsigned int var;
   1599     unsigned int threshold =
   1600         (xd->block[0].dequant[1] * xd->block[0].dequant[1] >> 4);
   1601 
   1602     if (threshold < x->encode_breakout) threshold = x->encode_breakout;
   1603 
   1604     var = vpx_variance16x16(*(b->base_src), b->src_stride, x->e_mbd.predictor,
   1605                             16, &sse);
   1606 
   1607     if (sse < threshold) {
   1608       unsigned int q2dc = xd->block[24].dequant[0];
   1609       /* If theres is no codeable 2nd order dc
   1610          or a very small uniform pixel change change */
   1611       if ((sse - var<q2dc * q2dc>> 4) || (sse / 2 > var && sse - var < 64)) {
   1612         /* Check u and v to make sure skip is ok */
   1613         unsigned int sse2 = VP8_UVSSE(x);
   1614         if (sse2 * 2 < threshold) {
   1615           x->skip = 1;
   1616           rd->distortion2 = sse + sse2;
   1617           rd->rate2 = 500;
   1618 
   1619           /* for best_yrd calculation */
   1620           rd->rate_uv = 0;
   1621           rd->distortion_uv = sse2;
   1622 
   1623           *disable_skip = 1;
   1624           return RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
   1625         }
   1626       }
   1627     }
   1628   }
   1629 
   1630   /* Add in the Mv/mode cost */
   1631   rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
   1632 
   1633   /* Y cost and distortion */
   1634   macro_block_yrd(x, &rd->rate_y, &distortion);
   1635   rd->rate2 += rd->rate_y;
   1636   rd->distortion2 += distortion;
   1637 
   1638   /* UV cost and distortion */
   1639   rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv,
   1640                    cpi->common.full_pixel);
   1641   rd->rate2 += rd->rate_uv;
   1642   rd->distortion2 += rd->distortion_uv;
   1643   return INT_MAX;
   1644 }
   1645 
   1646 static int calculate_final_rd_costs(int this_rd, RATE_DISTORTION *rd,
   1647                                     int *other_cost, int disable_skip,
   1648                                     int uv_intra_tteob, int intra_rd_penalty,
   1649                                     VP8_COMP *cpi, MACROBLOCK *x) {
   1650   MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
   1651 
   1652   /* Where skip is allowable add in the default per mb cost for the no
   1653    * skip case. where we then decide to skip we have to delete this and
   1654    * replace it with the cost of signalling a skip
   1655    */
   1656   if (cpi->common.mb_no_coeff_skip) {
   1657     *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0);
   1658     rd->rate2 += *other_cost;
   1659   }
   1660 
   1661   /* Estimate the reference frame signaling cost and add it
   1662    * to the rolling cost variable.
   1663    */
   1664   rd->rate2 += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
   1665 
   1666   if (!disable_skip) {
   1667     /* Test for the condition where skip block will be activated
   1668      * because there are no non zero coefficients and make any
   1669      * necessary adjustment for rate
   1670      */
   1671     if (cpi->common.mb_no_coeff_skip) {
   1672       int i;
   1673       int tteob;
   1674       int has_y2_block = (this_mode != SPLITMV && this_mode != B_PRED);
   1675 
   1676       tteob = 0;
   1677       if (has_y2_block) tteob += x->e_mbd.eobs[24];
   1678 
   1679       for (i = 0; i < 16; ++i) tteob += (x->e_mbd.eobs[i] > has_y2_block);
   1680 
   1681       if (x->e_mbd.mode_info_context->mbmi.ref_frame) {
   1682         for (i = 16; i < 24; ++i) tteob += x->e_mbd.eobs[i];
   1683       } else {
   1684         tteob += uv_intra_tteob;
   1685       }
   1686 
   1687       if (tteob == 0) {
   1688         rd->rate2 -= (rd->rate_y + rd->rate_uv);
   1689         /* for best_yrd calculation */
   1690         rd->rate_uv = 0;
   1691 
   1692         /* Back out no skip flag costing and add in skip flag costing */
   1693         if (cpi->prob_skip_false) {
   1694           int prob_skip_cost;
   1695 
   1696           prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1);
   1697           prob_skip_cost -= (int)vp8_cost_bit(cpi->prob_skip_false, 0);
   1698           rd->rate2 += prob_skip_cost;
   1699           *other_cost += prob_skip_cost;
   1700         }
   1701       }
   1702     }
   1703     /* Calculate the final RD estimate for this mode */
   1704     this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
   1705     if (this_rd < INT_MAX &&
   1706         x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
   1707       this_rd += intra_rd_penalty;
   1708     }
   1709   }
   1710   return this_rd;
   1711 }
   1712 
   1713 static void update_best_mode(BEST_MODE *best_mode, int this_rd,
   1714                              RATE_DISTORTION *rd, int other_cost,
   1715                              MACROBLOCK *x) {
   1716   MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
   1717 
   1718   other_cost += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
   1719 
   1720   /* Calculate the final y RD estimate for this mode */
   1721   best_mode->yrd =
   1722       RDCOST(x->rdmult, x->rddiv, (rd->rate2 - rd->rate_uv - other_cost),
   1723              (rd->distortion2 - rd->distortion_uv));
   1724 
   1725   best_mode->rd = this_rd;
   1726   memcpy(&best_mode->mbmode, &x->e_mbd.mode_info_context->mbmi,
   1727          sizeof(MB_MODE_INFO));
   1728   memcpy(&best_mode->partition, x->partition_info, sizeof(PARTITION_INFO));
   1729 
   1730   if ((this_mode == B_PRED) || (this_mode == SPLITMV)) {
   1731     int i;
   1732     for (i = 0; i < 16; ++i) {
   1733       best_mode->bmodes[i] = x->e_mbd.block[i].bmi;
   1734     }
   1735   }
   1736 }
   1737 
   1738 void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
   1739                             int recon_uvoffset, int *returnrate,
   1740                             int *returndistortion, int *returnintra, int mb_row,
   1741                             int mb_col) {
   1742   BLOCK *b = &x->block[0];
   1743   BLOCKD *d = &x->e_mbd.block[0];
   1744   MACROBLOCKD *xd = &x->e_mbd;
   1745   int_mv best_ref_mv_sb[2];
   1746   int_mv mode_mv_sb[2][MB_MODE_COUNT];
   1747   int_mv best_ref_mv;
   1748   int_mv *mode_mv;
   1749   MB_PREDICTION_MODE this_mode;
   1750   int num00;
   1751   int best_mode_index = 0;
   1752   BEST_MODE best_mode;
   1753 
   1754   int i;
   1755   int mode_index;
   1756   int mdcounts[4];
   1757   int rate;
   1758   RATE_DISTORTION rd;
   1759   int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
   1760   int uv_intra_tteob = 0;
   1761   int uv_intra_done = 0;
   1762 
   1763   MB_PREDICTION_MODE uv_intra_mode = 0;
   1764   int_mv mvp;
   1765   int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
   1766   int saddone = 0;
   1767   /* search range got from mv_pred(). It uses step_param levels. (0-7) */
   1768   int sr = 0;
   1769 
   1770   unsigned char *plane[4][3];
   1771   int ref_frame_map[4];
   1772   int sign_bias = 0;
   1773 
   1774   int intra_rd_penalty =
   1775       10 * vp8_dc_quant(cpi->common.base_qindex, cpi->common.y1dc_delta_q);
   1776 
   1777 #if CONFIG_TEMPORAL_DENOISING
   1778   unsigned int zero_mv_sse = UINT_MAX, best_sse = UINT_MAX,
   1779                best_rd_sse = UINT_MAX;
   1780 #endif
   1781 
   1782   // _uv variables are not set consistantly before calling update_best_mode.
   1783   rd.rate_uv = 0;
   1784   rd.distortion_uv = 0;
   1785 
   1786   mode_mv = mode_mv_sb[sign_bias];
   1787   best_ref_mv.as_int = 0;
   1788   best_mode.rd = INT_MAX;
   1789   best_mode.yrd = INT_MAX;
   1790   best_mode.intra_rd = INT_MAX;
   1791   memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
   1792   memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode));
   1793   memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes));
   1794 
   1795   /* Setup search priorities */
   1796   get_reference_search_order(cpi, ref_frame_map);
   1797 
   1798   /* Check to see if there is at least 1 valid reference frame that we need
   1799    * to calculate near_mvs.
   1800    */
   1801   if (ref_frame_map[1] > 0) {
   1802     sign_bias = vp8_find_near_mvs_bias(
   1803         &x->e_mbd, x->e_mbd.mode_info_context, mode_mv_sb, best_ref_mv_sb,
   1804         mdcounts, ref_frame_map[1], cpi->common.ref_frame_sign_bias);
   1805 
   1806     mode_mv = mode_mv_sb[sign_bias];
   1807     best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
   1808   }
   1809 
   1810   get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
   1811 
   1812   *returnintra = INT_MAX;
   1813   /* Count of the number of MBs tested so far this frame */
   1814   x->mbs_tested_so_far++;
   1815 
   1816   x->skip = 0;
   1817 
   1818   for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
   1819     int this_rd = INT_MAX;
   1820     int disable_skip = 0;
   1821     int other_cost = 0;
   1822     int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];
   1823 
   1824     /* Test best rd so far against threshold for trying this mode. */
   1825     if (best_mode.rd <= x->rd_threshes[mode_index]) continue;
   1826 
   1827     if (this_ref_frame < 0) continue;
   1828 
   1829     /* These variables hold are rolling total cost and distortion for
   1830      * this mode
   1831      */
   1832     rd.rate2 = 0;
   1833     rd.distortion2 = 0;
   1834 
   1835     this_mode = vp8_mode_order[mode_index];
   1836 
   1837     x->e_mbd.mode_info_context->mbmi.mode = this_mode;
   1838     x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
   1839 
   1840     /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
   1841      * unless ARNR filtering is enabled in which case we want
   1842      * an unfiltered alternative
   1843      */
   1844     if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
   1845       if (this_mode != ZEROMV ||
   1846           x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME) {
   1847         continue;
   1848       }
   1849     }
   1850 
   1851     /* everything but intra */
   1852     if (x->e_mbd.mode_info_context->mbmi.ref_frame) {
   1853       x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
   1854       x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
   1855       x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
   1856 
   1857       if (sign_bias != cpi->common.ref_frame_sign_bias[this_ref_frame]) {
   1858         sign_bias = cpi->common.ref_frame_sign_bias[this_ref_frame];
   1859         mode_mv = mode_mv_sb[sign_bias];
   1860         best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
   1861       }
   1862     }
   1863 
   1864     /* Check to see if the testing frequency for this mode is at its
   1865      * max If so then prevent it from being tested and increase the
   1866      * threshold for its testing
   1867      */
   1868     if (x->mode_test_hit_counts[mode_index] &&
   1869         (cpi->mode_check_freq[mode_index] > 1)) {
   1870       if (x->mbs_tested_so_far <= cpi->mode_check_freq[mode_index] *
   1871                                       x->mode_test_hit_counts[mode_index]) {
   1872         /* Increase the threshold for coding this mode to make it
   1873          * less likely to be chosen
   1874          */
   1875         x->rd_thresh_mult[mode_index] += 4;
   1876 
   1877         if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) {
   1878           x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
   1879         }
   1880 
   1881         x->rd_threshes[mode_index] =
   1882             (cpi->rd_baseline_thresh[mode_index] >> 7) *
   1883             x->rd_thresh_mult[mode_index];
   1884 
   1885         continue;
   1886       }
   1887     }
   1888 
   1889     /* We have now reached the point where we are going to test the
   1890      * current mode so increment the counter for the number of times
   1891      * it has been tested
   1892      */
   1893     x->mode_test_hit_counts[mode_index]++;
   1894 
   1895     /* Experimental code. Special case for gf and arf zeromv modes.
   1896      * Increase zbin size to supress noise
   1897      */
   1898     if (x->zbin_mode_boost_enabled) {
   1899       if (this_ref_frame == INTRA_FRAME) {
   1900         x->zbin_mode_boost = 0;
   1901       } else {
   1902         if (vp8_mode_order[mode_index] == ZEROMV) {
   1903           if (this_ref_frame != LAST_FRAME) {
   1904             x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
   1905           } else {
   1906             x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
   1907           }
   1908         } else if (vp8_mode_order[mode_index] == SPLITMV) {
   1909           x->zbin_mode_boost = 0;
   1910         } else {
   1911           x->zbin_mode_boost = MV_ZBIN_BOOST;
   1912         }
   1913       }
   1914 
   1915       vp8_update_zbin_extra(cpi, x);
   1916     }
   1917 
   1918     if (!uv_intra_done && this_ref_frame == INTRA_FRAME) {
   1919       rd_pick_intra_mbuv_mode(x, &uv_intra_rate, &uv_intra_rate_tokenonly,
   1920                               &uv_intra_distortion);
   1921       uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
   1922 
   1923       /*
   1924        * Total of the eobs is used later to further adjust rate2. Since uv
   1925        * block's intra eobs will be overwritten when we check inter modes,
   1926        * we need to save uv_intra_tteob here.
   1927        */
   1928       for (i = 16; i < 24; ++i) uv_intra_tteob += x->e_mbd.eobs[i];
   1929 
   1930       uv_intra_done = 1;
   1931     }
   1932 
   1933     switch (this_mode) {
   1934       case B_PRED: {
   1935         int tmp_rd;
   1936 
   1937         /* Note the rate value returned here includes the cost of
   1938          * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED]
   1939          */
   1940         int distortion;
   1941         tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion,
   1942                                            best_mode.yrd);
   1943         rd.rate2 += rate;
   1944         rd.distortion2 += distortion;
   1945 
   1946         if (tmp_rd < best_mode.yrd) {
   1947           rd.rate2 += uv_intra_rate;
   1948           rd.rate_uv = uv_intra_rate_tokenonly;
   1949           rd.distortion2 += uv_intra_distortion;
   1950           rd.distortion_uv = uv_intra_distortion;
   1951         } else {
   1952           this_rd = INT_MAX;
   1953           disable_skip = 1;
   1954         }
   1955         break;
   1956       }
   1957 
   1958       case SPLITMV: {
   1959         int tmp_rd;
   1960         int this_rd_thresh;
   1961         int distortion;
   1962 
   1963         this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1)
   1964                              ? x->rd_threshes[THR_NEW1]
   1965                              : x->rd_threshes[THR_NEW3];
   1966         this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2)
   1967                              ? x->rd_threshes[THR_NEW2]
   1968                              : this_rd_thresh;
   1969 
   1970         tmp_rd = vp8_rd_pick_best_mbsegmentation(
   1971             cpi, x, &best_ref_mv, best_mode.yrd, mdcounts, &rate, &rd.rate_y,
   1972             &distortion, this_rd_thresh);
   1973 
   1974         rd.rate2 += rate;
   1975         rd.distortion2 += distortion;
   1976 
   1977         /* If even the 'Y' rd value of split is higher than best so far
   1978          * then dont bother looking at UV
   1979          */
   1980         if (tmp_rd < best_mode.yrd) {
   1981           /* Now work out UV cost and add it in */
   1982           rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv,
   1983                          cpi->common.full_pixel);
   1984           rd.rate2 += rd.rate_uv;
   1985           rd.distortion2 += rd.distortion_uv;
   1986         } else {
   1987           this_rd = INT_MAX;
   1988           disable_skip = 1;
   1989         }
   1990         break;
   1991       }
   1992       case DC_PRED:
   1993       case V_PRED:
   1994       case H_PRED:
   1995       case TM_PRED: {
   1996         int distortion;
   1997         x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
   1998 
   1999         vp8_build_intra_predictors_mby_s(
   2000             xd, xd->dst.y_buffer - xd->dst.y_stride, xd->dst.y_buffer - 1,
   2001             xd->dst.y_stride, xd->predictor, 16);
   2002         macro_block_yrd(x, &rd.rate_y, &distortion);
   2003         rd.rate2 += rd.rate_y;
   2004         rd.distortion2 += distortion;
   2005         rd.rate2 += x->mbmode_cost[x->e_mbd.frame_type]
   2006                                   [x->e_mbd.mode_info_context->mbmi.mode];
   2007         rd.rate2 += uv_intra_rate;
   2008         rd.rate_uv = uv_intra_rate_tokenonly;
   2009         rd.distortion2 += uv_intra_distortion;
   2010         rd.distortion_uv = uv_intra_distortion;
   2011         break;
   2012       }
   2013 
   2014       case NEWMV: {
   2015         int thissme;
   2016         int bestsme = INT_MAX;
   2017         int step_param = cpi->sf.first_step;
   2018         int further_steps;
   2019         int n;
   2020         /* If last step (1-away) of n-step search doesn't pick the center point
   2021            as the best match, we will do a final 1-away diamond refining search
   2022         */
   2023         int do_refine = 1;
   2024 
   2025         int sadpb = x->sadperbit16;
   2026         int_mv mvp_full;
   2027 
   2028         int col_min = ((best_ref_mv.as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL;
   2029         int row_min = ((best_ref_mv.as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL;
   2030         int col_max = (best_ref_mv.as_mv.col >> 3) + MAX_FULL_PEL_VAL;
   2031         int row_max = (best_ref_mv.as_mv.row >> 3) + MAX_FULL_PEL_VAL;
   2032 
   2033         int tmp_col_min = x->mv_col_min;
   2034         int tmp_col_max = x->mv_col_max;
   2035         int tmp_row_min = x->mv_row_min;
   2036         int tmp_row_max = x->mv_row_max;
   2037 
   2038         if (!saddone) {
   2039           vp8_cal_sad(cpi, xd, x, recon_yoffset, &near_sadidx[0]);
   2040           saddone = 1;
   2041         }
   2042 
   2043         vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
   2044                     x->e_mbd.mode_info_context->mbmi.ref_frame,
   2045                     cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
   2046 
   2047         mvp_full.as_mv.col = mvp.as_mv.col >> 3;
   2048         mvp_full.as_mv.row = mvp.as_mv.row >> 3;
   2049 
   2050         /* Get intersection of UMV window and valid MV window to
   2051          * reduce # of checks in diamond search.
   2052          */
   2053         if (x->mv_col_min < col_min) x->mv_col_min = col_min;
   2054         if (x->mv_col_max > col_max) x->mv_col_max = col_max;
   2055         if (x->mv_row_min < row_min) x->mv_row_min = row_min;
   2056         if (x->mv_row_max > row_max) x->mv_row_max = row_max;
   2057 
   2058         /* adjust search range according to sr from mv prediction */
   2059         if (sr > step_param) step_param = sr;
   2060 
   2061         /* Initial step/diamond search */
   2062         {
   2063           bestsme = cpi->diamond_search_sad(
   2064               x, b, d, &mvp_full, &d->bmi.mv, step_param, sadpb, &num00,
   2065               &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
   2066           mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
   2067 
   2068           /* Further step/diamond searches as necessary */
   2069           further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
   2070 
   2071           n = num00;
   2072           num00 = 0;
   2073 
   2074           /* If there won't be more n-step search, check to see if refining
   2075            * search is needed. */
   2076           if (n > further_steps) do_refine = 0;
   2077 
   2078           while (n < further_steps) {
   2079             n++;
   2080 
   2081             if (num00) {
   2082               num00--;
   2083             } else {
   2084               thissme = cpi->diamond_search_sad(
   2085                   x, b, d, &mvp_full, &d->bmi.mv, step_param + n, sadpb, &num00,
   2086                   &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
   2087 
   2088               /* check to see if refining search is needed. */
   2089               if (num00 > (further_steps - n)) do_refine = 0;
   2090 
   2091               if (thissme < bestsme) {
   2092                 bestsme = thissme;
   2093                 mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
   2094               } else {
   2095                 d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
   2096               }
   2097             }
   2098           }
   2099         }
   2100 
   2101         /* final 1-away diamond refining search */
   2102         if (do_refine == 1) {
   2103           int search_range;
   2104 
   2105           search_range = 8;
   2106 
   2107           thissme = cpi->refining_search_sad(
   2108               x, b, d, &d->bmi.mv, sadpb, search_range,
   2109               &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
   2110 
   2111           if (thissme < bestsme) {
   2112             bestsme = thissme;
   2113             mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
   2114           } else {
   2115             d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
   2116           }
   2117         }
   2118 
   2119         x->mv_col_min = tmp_col_min;
   2120         x->mv_col_max = tmp_col_max;
   2121         x->mv_row_min = tmp_row_min;
   2122         x->mv_row_max = tmp_row_max;
   2123 
   2124         if (bestsme < INT_MAX) {
   2125           int dis; /* TODO: use dis in distortion calculation later. */
   2126           unsigned int sse;
   2127           cpi->find_fractional_mv_step(
   2128               x, b, d, &d->bmi.mv, &best_ref_mv, x->errorperbit,
   2129               &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis, &sse);
   2130         }
   2131 
   2132         mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
   2133 
   2134         /* Add the new motion vector cost to our rolling cost variable */
   2135         rd.rate2 +=
   2136             vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
   2137       }
   2138         // fall through
   2139 
   2140       case NEARESTMV:
   2141       case NEARMV:
   2142         /* Clip "next_nearest" so that it does not extend to far out
   2143          * of image
   2144          */
   2145         vp8_clamp_mv2(&mode_mv[this_mode], xd);
   2146 
   2147         /* Do not bother proceeding if the vector (from newmv, nearest
   2148          * or near) is 0,0 as this should then be coded using the zeromv
   2149          * mode.
   2150          */
   2151         if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) &&
   2152             (mode_mv[this_mode].as_int == 0)) {
   2153           continue;
   2154         }
   2155         // fall through
   2156 
   2157       case ZEROMV:
   2158 
   2159         /* Trap vectors that reach beyond the UMV borders
   2160          * Note that ALL New MV, Nearest MV Near MV and Zero MV code
   2161          * drops through to this point because of the lack of break
   2162          * statements in the previous two cases.
   2163          */
   2164         if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
   2165             ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
   2166             ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
   2167             ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
   2168           continue;
   2169         }
   2170 
   2171         vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
   2172         this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
   2173         break;
   2174 
   2175       default: break;
   2176     }
   2177 
   2178     this_rd =
   2179         calculate_final_rd_costs(this_rd, &rd, &other_cost, disable_skip,
   2180                                  uv_intra_tteob, intra_rd_penalty, cpi, x);
   2181 
   2182     /* Keep record of best intra distortion */
   2183     if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
   2184         (this_rd < best_mode.intra_rd)) {
   2185       best_mode.intra_rd = this_rd;
   2186       *returnintra = rd.distortion2;
   2187     }
   2188 #if CONFIG_TEMPORAL_DENOISING
   2189     if (cpi->oxcf.noise_sensitivity) {
   2190       unsigned int sse;
   2191       vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &sse,
   2192                                  mode_mv[this_mode]);
   2193 
   2194       if (sse < best_rd_sse) best_rd_sse = sse;
   2195 
   2196       /* Store for later use by denoiser. */
   2197       if (this_mode == ZEROMV && sse < zero_mv_sse) {
   2198         zero_mv_sse = sse;
   2199         x->best_zeromv_reference_frame =
   2200             x->e_mbd.mode_info_context->mbmi.ref_frame;
   2201       }
   2202 
   2203       /* Store the best NEWMV in x for later use in the denoiser. */
   2204       if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && sse < best_sse) {
   2205         best_sse = sse;
   2206         vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &best_sse,
   2207                                    mode_mv[this_mode]);
   2208         x->best_sse_inter_mode = NEWMV;
   2209         x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
   2210         x->need_to_clamp_best_mvs =
   2211             x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
   2212         x->best_reference_frame = x->e_mbd.mode_info_context->mbmi.ref_frame;
   2213       }
   2214     }
   2215 #endif
   2216 
   2217     /* Did this mode help.. i.i is it the new best mode */
   2218     if (this_rd < best_mode.rd || x->skip) {
   2219       /* Note index of best mode so far */
   2220       best_mode_index = mode_index;
   2221       *returnrate = rd.rate2;
   2222       *returndistortion = rd.distortion2;
   2223       if (this_mode <= B_PRED) {
   2224         x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode;
   2225         /* required for left and above block mv */
   2226         x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
   2227       }
   2228       update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
   2229 
   2230       /* Testing this mode gave rise to an improvement in best error
   2231        * score. Lower threshold a bit for next time
   2232        */
   2233       x->rd_thresh_mult[mode_index] =
   2234           (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2))
   2235               ? x->rd_thresh_mult[mode_index] - 2
   2236               : MIN_THRESHMULT;
   2237     }
   2238 
   2239     /* If the mode did not help improve the best error case then raise
   2240      * the threshold for testing that mode next time around.
   2241      */
   2242     else {
   2243       x->rd_thresh_mult[mode_index] += 4;
   2244 
   2245       if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) {
   2246         x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
   2247       }
   2248     }
   2249     x->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) *
   2250                                  x->rd_thresh_mult[mode_index];
   2251 
   2252     if (x->skip) break;
   2253   }
   2254 
   2255   /* Reduce the activation RD thresholds for the best choice mode */
   2256   if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
   2257       (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
   2258     int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 2);
   2259 
   2260     x->rd_thresh_mult[best_mode_index] =
   2261         (x->rd_thresh_mult[best_mode_index] >=
   2262          (MIN_THRESHMULT + best_adjustment))
   2263             ? x->rd_thresh_mult[best_mode_index] - best_adjustment
   2264             : MIN_THRESHMULT;
   2265     x->rd_threshes[best_mode_index] =
   2266         (cpi->rd_baseline_thresh[best_mode_index] >> 7) *
   2267         x->rd_thresh_mult[best_mode_index];
   2268   }
   2269 
   2270 #if CONFIG_TEMPORAL_DENOISING
   2271   if (cpi->oxcf.noise_sensitivity) {
   2272     int block_index = mb_row * cpi->common.mb_cols + mb_col;
   2273     if (x->best_sse_inter_mode == DC_PRED) {
   2274       /* No best MV found. */
   2275       x->best_sse_inter_mode = best_mode.mbmode.mode;
   2276       x->best_sse_mv = best_mode.mbmode.mv;
   2277       x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
   2278       x->best_reference_frame = best_mode.mbmode.ref_frame;
   2279       best_sse = best_rd_sse;
   2280     }
   2281     vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
   2282                             recon_yoffset, recon_uvoffset, &cpi->common.lf_info,
   2283                             mb_row, mb_col, block_index, 0);
   2284 
   2285     /* Reevaluate ZEROMV after denoising. */
   2286     if (best_mode.mbmode.ref_frame == INTRA_FRAME &&
   2287         x->best_zeromv_reference_frame != INTRA_FRAME) {
   2288       int this_rd = INT_MAX;
   2289       int disable_skip = 0;
   2290       int other_cost = 0;
   2291       int this_ref_frame = x->best_zeromv_reference_frame;
   2292       rd.rate2 =
   2293           x->ref_frame_cost[this_ref_frame] + vp8_cost_mv_ref(ZEROMV, mdcounts);
   2294       rd.distortion2 = 0;
   2295 
   2296       /* set up the proper prediction buffers for the frame */
   2297       x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
   2298       x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
   2299       x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
   2300       x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
   2301 
   2302       x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
   2303       x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
   2304       x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
   2305 
   2306       this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
   2307       this_rd =
   2308           calculate_final_rd_costs(this_rd, &rd, &other_cost, disable_skip,
   2309                                    uv_intra_tteob, intra_rd_penalty, cpi, x);
   2310       if (this_rd < best_mode.rd || x->skip) {
   2311         *returnrate = rd.rate2;
   2312         *returndistortion = rd.distortion2;
   2313         update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
   2314       }
   2315     }
   2316   }
   2317 #endif
   2318 
   2319   if (cpi->is_src_frame_alt_ref &&
   2320       (best_mode.mbmode.mode != ZEROMV ||
   2321        best_mode.mbmode.ref_frame != ALTREF_FRAME)) {
   2322     x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
   2323     x->e_mbd.mode_info_context->mbmi.ref_frame = ALTREF_FRAME;
   2324     x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
   2325     x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
   2326     x->e_mbd.mode_info_context->mbmi.mb_skip_coeff =
   2327         (cpi->common.mb_no_coeff_skip);
   2328     x->e_mbd.mode_info_context->mbmi.partitioning = 0;
   2329     return;
   2330   }
   2331 
   2332   /* macroblock modes */
   2333   memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode,
   2334          sizeof(MB_MODE_INFO));
   2335 
   2336   if (best_mode.mbmode.mode == B_PRED) {
   2337     for (i = 0; i < 16; ++i) {
   2338       xd->mode_info_context->bmi[i].as_mode = best_mode.bmodes[i].as_mode;
   2339     }
   2340   }
   2341 
   2342   if (best_mode.mbmode.mode == SPLITMV) {
   2343     for (i = 0; i < 16; ++i) {
   2344       xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int;
   2345     }
   2346 
   2347     memcpy(x->partition_info, &best_mode.partition, sizeof(PARTITION_INFO));
   2348 
   2349     x->e_mbd.mode_info_context->mbmi.mv.as_int =
   2350         x->partition_info->bmi[15].mv.as_int;
   2351   }
   2352 
   2353   if (sign_bias !=
   2354       cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame]) {
   2355     best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int;
   2356   }
   2357 
   2358   rd_update_mvcount(x, &best_ref_mv);
   2359 }
   2360 
   2361 void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate) {
   2362   int error4x4, error16x16;
   2363   int rate4x4, rate16x16 = 0, rateuv;
   2364   int dist4x4, dist16x16, distuv;
   2365   int rate_;
   2366   int rate4x4_tokenonly = 0;
   2367   int rate16x16_tokenonly = 0;
   2368   int rateuv_tokenonly = 0;
   2369 
   2370   x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
   2371 
   2372   rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv);
   2373   rate_ = rateuv;
   2374 
   2375   error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly,
   2376                                           &dist16x16);
   2377 
   2378   error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly,
   2379                                        &dist4x4, error16x16);
   2380 
   2381   if (error4x4 < error16x16) {
   2382     x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
   2383     rate_ += rate4x4;
   2384   } else {
   2385     rate_ += rate16x16;
   2386   }
   2387 
   2388   *rate = rate_;
   2389 }
   2390