Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include <stdio.h>
     13 #include <math.h>
     14 #include <limits.h>
     15 #include <assert.h>
     16 #include "vpx_config.h"
     17 #include "vpx_rtcd.h"
     18 #include "vp8/common/pragmas.h"
     19 #include "tokenize.h"
     20 #include "treewriter.h"
     21 #include "onyx_int.h"
     22 #include "modecosts.h"
     23 #include "encodeintra.h"
     24 #include "pickinter.h"
     25 #include "vp8/common/entropymode.h"
     26 #include "vp8/common/reconinter.h"
     27 #include "vp8/common/reconintra4x4.h"
     28 #include "vp8/common/findnearmv.h"
     29 #include "vp8/common/quant_common.h"
     30 #include "encodemb.h"
     31 #include "quantize.h"
     32 #include "vp8/common/variance.h"
     33 #include "mcomp.h"
     34 #include "rdopt.h"
     35 #include "vpx_mem/vpx_mem.h"
     36 #include "vp8/common/systemdependent.h"
     37 #if CONFIG_TEMPORAL_DENOISING
     38 #include "denoising.h"
     39 #endif
     40 extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
     41 
     42 #define MAXF(a,b)            (((a) > (b)) ? (a) : (b))
     43 
     44 typedef struct rate_distortion_struct
     45 {
     46     int rate2;
     47     int rate_y;
     48     int rate_uv;
     49     int distortion2;
     50     int distortion_uv;
     51 } RATE_DISTORTION;
     52 
     53 typedef struct best_mode_struct
     54 {
     55   int yrd;
     56   int rd;
     57   int intra_rd;
     58   MB_MODE_INFO mbmode;
     59   union b_mode_info bmodes[16];
     60   PARTITION_INFO partition;
     61 } BEST_MODE;
     62 
     63 static const int auto_speed_thresh[17] =
     64 {
     65     1000,
     66     200,
     67     150,
     68     130,
     69     150,
     70     125,
     71     120,
     72     115,
     73     115,
     74     115,
     75     115,
     76     115,
     77     115,
     78     115,
     79     115,
     80     115,
     81     105
     82 };
     83 
     84 const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES] =
     85 {
     86     ZEROMV,
     87     DC_PRED,
     88 
     89     NEARESTMV,
     90     NEARMV,
     91 
     92     ZEROMV,
     93     NEARESTMV,
     94 
     95     ZEROMV,
     96     NEARESTMV,
     97 
     98     NEARMV,
     99     NEARMV,
    100 
    101     V_PRED,
    102     H_PRED,
    103     TM_PRED,
    104 
    105     NEWMV,
    106     NEWMV,
    107     NEWMV,
    108 
    109     SPLITMV,
    110     SPLITMV,
    111     SPLITMV,
    112 
    113     B_PRED,
    114 };
    115 
    116 /* This table determines the search order in reference frame priority order,
    117  * which may not necessarily match INTRA,LAST,GOLDEN,ARF
    118  */
    119 const int vp8_ref_frame_order[MAX_MODES] =
    120 {
    121     1,
    122     0,
    123 
    124     1,
    125     1,
    126 
    127     2,
    128     2,
    129 
    130     3,
    131     3,
    132 
    133     2,
    134     3,
    135 
    136     0,
    137     0,
    138     0,
    139 
    140     1,
    141     2,
    142     3,
    143 
    144     1,
    145     2,
    146     3,
    147 
    148     0,
    149 };
    150 
    151 static void fill_token_costs(
    152     int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
    153     const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]
    154 )
    155 {
    156     int i, j, k;
    157 
    158 
    159     for (i = 0; i < BLOCK_TYPES; i++)
    160         for (j = 0; j < COEF_BANDS; j++)
    161             for (k = 0; k < PREV_COEF_CONTEXTS; k++)
    162 
    163                 /* check for pt=0 and band > 1 if block type 0
    164                  * and 0 if blocktype 1
    165                  */
    166                 if (k == 0 && j > (i == 0))
    167                     vp8_cost_tokens2(c[i][j][k], p [i][j][k], vp8_coef_tree, 2);
    168                 else
    169                     vp8_cost_tokens(c[i][j][k], p [i][j][k], vp8_coef_tree);
    170 }
    171 
    172 static const int rd_iifactor[32] =
    173 {
    174     4, 4, 3, 2, 1, 0, 0, 0,
    175     0, 0, 0, 0, 0, 0, 0, 0,
    176     0, 0, 0, 0, 0, 0, 0, 0,
    177     0, 0, 0, 0, 0, 0, 0, 0
    178 };
    179 
    180 /* values are now correlated to quantizer */
    181 static const int sad_per_bit16lut[QINDEX_RANGE] =
    182 {
    183     2,  2,  2,  2,  2,  2,  2,  2,
    184     2,  2,  2,  2,  2,  2,  2,  2,
    185     3,  3,  3,  3,  3,  3,  3,  3,
    186     3,  3,  3,  3,  3,  3,  4,  4,
    187     4,  4,  4,  4,  4,  4,  4,  4,
    188     4,  4,  5,  5,  5,  5,  5,  5,
    189     5,  5,  5,  5,  5,  5,  6,  6,
    190     6,  6,  6,  6,  6,  6,  6,  6,
    191     6,  6,  7,  7,  7,  7,  7,  7,
    192     7,  7,  7,  7,  7,  7,  8,  8,
    193     8,  8,  8,  8,  8,  8,  8,  8,
    194     8,  8,  9,  9,  9,  9,  9,  9,
    195     9,  9,  9,  9,  9,  9,  10, 10,
    196     10, 10, 10, 10, 10, 10, 11, 11,
    197     11, 11, 11, 11, 12, 12, 12, 12,
    198     12, 12, 13, 13, 13, 13, 14, 14
    199 };
    200 static const int sad_per_bit4lut[QINDEX_RANGE] =
    201 {
    202     2,  2,  2,  2,  2,  2,  3,  3,
    203     3,  3,  3,  3,  3,  3,  3,  3,
    204     3,  3,  3,  3,  4,  4,  4,  4,
    205     4,  4,  4,  4,  4,  4,  5,  5,
    206     5,  5,  5,  5,  6,  6,  6,  6,
    207     6,  6,  6,  6,  6,  6,  6,  6,
    208     7,  7,  7,  7,  7,  7,  7,  7,
    209     7,  7,  7,  7,  7,  8,  8,  8,
    210     8,  8,  9,  9,  9,  9,  9,  9,
    211     10, 10, 10, 10, 10, 10, 10, 10,
    212     11, 11, 11, 11, 11, 11, 11, 11,
    213     12, 12, 12, 12, 12, 12, 12, 12,
    214     13, 13, 13, 13, 13, 13, 13, 14,
    215     14, 14, 14, 14, 15, 15, 15, 15,
    216     16, 16, 16, 16, 17, 17, 17, 18,
    217     18, 18, 19, 19, 19, 20, 20, 20,
    218 };
    219 
    220 void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex)
    221 {
    222     cpi->mb.sadperbit16 =  sad_per_bit16lut[QIndex];
    223     cpi->mb.sadperbit4  =  sad_per_bit4lut[QIndex];
    224 }
    225 
    226 void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
    227 {
    228     int q;
    229     int i;
    230     double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
    231     double rdconst = 2.80;
    232 
    233     vp8_clear_system_state();
    234 
    235     /* Further tests required to see if optimum is different
    236      * for key frames, golden frames and arf frames.
    237      */
    238     cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
    239 
    240     /* Extend rate multiplier along side quantizer zbin increases */
    241     if (cpi->zbin_over_quant  > 0)
    242     {
    243         double oq_factor;
    244         double modq;
    245 
    246         /* Experimental code using the same basic equation as used for Q above
    247          * The units of cpi->zbin_over_quant are 1/128 of Q bin size
    248          */
    249         oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant);
    250         modq = (int)((double)capped_q * oq_factor);
    251         cpi->RDMULT = (int)(rdconst * (modq * modq));
    252     }
    253 
    254     if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME))
    255     {
    256         if (cpi->twopass.next_iiratio > 31)
    257             cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    258         else
    259             cpi->RDMULT +=
    260                 (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
    261     }
    262 
    263     cpi->mb.errorperbit = (cpi->RDMULT / 110);
    264     cpi->mb.errorperbit += (cpi->mb.errorperbit==0);
    265 
    266     vp8_set_speed_features(cpi);
    267 
    268     q = (int)pow(Qvalue, 1.25);
    269 
    270     if (q < 8)
    271         q = 8;
    272 
    273     if (cpi->RDMULT > 1000)
    274     {
    275         cpi->RDDIV = 1;
    276         cpi->RDMULT /= 100;
    277 
    278         for (i = 0; i < MAX_MODES; i++)
    279         {
    280             if (cpi->sf.thresh_mult[i] < INT_MAX)
    281             {
    282                 cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
    283             }
    284             else
    285             {
    286                 cpi->rd_threshes[i] = INT_MAX;
    287             }
    288 
    289             cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
    290         }
    291     }
    292     else
    293     {
    294         cpi->RDDIV = 100;
    295 
    296         for (i = 0; i < MAX_MODES; i++)
    297         {
    298             if (cpi->sf.thresh_mult[i] < (INT_MAX / q))
    299             {
    300                 cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
    301             }
    302             else
    303             {
    304                 cpi->rd_threshes[i] = INT_MAX;
    305             }
    306 
    307             cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
    308         }
    309     }
    310 
    311     {
    312       /* build token cost array for the type of frame we have now */
    313       FRAME_CONTEXT *l = &cpi->lfc_n;
    314 
    315       if(cpi->common.refresh_alt_ref_frame)
    316           l = &cpi->lfc_a;
    317       else if(cpi->common.refresh_golden_frame)
    318           l = &cpi->lfc_g;
    319 
    320       fill_token_costs(
    321           cpi->mb.token_costs,
    322           (const vp8_prob( *)[8][3][11]) l->coef_probs
    323       );
    324       /*
    325       fill_token_costs(
    326           cpi->mb.token_costs,
    327           (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs);
    328       */
    329 
    330 
    331       /* TODO make these mode costs depend on last,alt or gold too.  (jbb) */
    332       vp8_init_mode_costs(cpi);
    333     }
    334 
    335 }
    336 
    337 void vp8_auto_select_speed(VP8_COMP *cpi)
    338 {
    339     int milliseconds_for_compress = (int)(1000000 / cpi->frame_rate);
    340 
    341     milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
    342 
    343 #if 0
    344 
    345     if (0)
    346     {
    347         FILE *f;
    348 
    349         f = fopen("speed.stt", "a");
    350         fprintf(f, " %8ld %10ld %10ld %10ld\n",
    351                 cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time);
    352         fclose(f);
    353     }
    354 
    355 #endif
    356 
    357     if (cpi->avg_pick_mode_time < milliseconds_for_compress && (cpi->avg_encode_time - cpi->avg_pick_mode_time) < milliseconds_for_compress)
    358     {
    359         if (cpi->avg_pick_mode_time == 0)
    360         {
    361             cpi->Speed = 4;
    362         }
    363         else
    364         {
    365             if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95)
    366             {
    367                 cpi->Speed          += 2;
    368                 cpi->avg_pick_mode_time = 0;
    369                 cpi->avg_encode_time = 0;
    370 
    371                 if (cpi->Speed > 16)
    372                 {
    373                     cpi->Speed = 16;
    374                 }
    375             }
    376 
    377             if (milliseconds_for_compress * 100 > cpi->avg_encode_time * auto_speed_thresh[cpi->Speed])
    378             {
    379                 cpi->Speed          -= 1;
    380                 cpi->avg_pick_mode_time = 0;
    381                 cpi->avg_encode_time = 0;
    382 
    383                 /* In real-time mode, cpi->speed is in [4, 16]. */
    384                 if (cpi->Speed < 4)
    385                 {
    386                     cpi->Speed = 4;
    387                 }
    388             }
    389         }
    390     }
    391     else
    392     {
    393         cpi->Speed += 4;
    394 
    395         if (cpi->Speed > 16)
    396             cpi->Speed = 16;
    397 
    398 
    399         cpi->avg_pick_mode_time = 0;
    400         cpi->avg_encode_time = 0;
    401     }
    402 }
    403 
    404 int vp8_block_error_c(short *coeff, short *dqcoeff)
    405 {
    406     int i;
    407     int error = 0;
    408 
    409     for (i = 0; i < 16; i++)
    410     {
    411         int this_diff = coeff[i] - dqcoeff[i];
    412         error += this_diff * this_diff;
    413     }
    414 
    415     return error;
    416 }
    417 
    418 int vp8_mbblock_error_c(MACROBLOCK *mb, int dc)
    419 {
    420     BLOCK  *be;
    421     BLOCKD *bd;
    422     int i, j;
    423     int berror, error = 0;
    424 
    425     for (i = 0; i < 16; i++)
    426     {
    427         be = &mb->block[i];
    428         bd = &mb->e_mbd.block[i];
    429 
    430         berror = 0;
    431 
    432         for (j = dc; j < 16; j++)
    433         {
    434             int this_diff = be->coeff[j] - bd->dqcoeff[j];
    435             berror += this_diff * this_diff;
    436         }
    437 
    438         error += berror;
    439     }
    440 
    441     return error;
    442 }
    443 
    444 int vp8_mbuverror_c(MACROBLOCK *mb)
    445 {
    446 
    447     BLOCK  *be;
    448     BLOCKD *bd;
    449 
    450 
    451     int i;
    452     int error = 0;
    453 
    454     for (i = 16; i < 24; i++)
    455     {
    456         be = &mb->block[i];
    457         bd = &mb->e_mbd.block[i];
    458 
    459         error += vp8_block_error_c(be->coeff, bd->dqcoeff);
    460     }
    461 
    462     return error;
    463 }
    464 
    465 int VP8_UVSSE(MACROBLOCK *x)
    466 {
    467     unsigned char *uptr, *vptr;
    468     unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
    469     unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
    470     int uv_stride = x->block[16].src_stride;
    471 
    472     unsigned int sse1 = 0;
    473     unsigned int sse2 = 0;
    474     int mv_row = x->e_mbd.mode_info_context->mbmi.mv.as_mv.row;
    475     int mv_col = x->e_mbd.mode_info_context->mbmi.mv.as_mv.col;
    476     int offset;
    477     int pre_stride = x->e_mbd.pre.uv_stride;
    478 
    479     if (mv_row < 0)
    480         mv_row -= 1;
    481     else
    482         mv_row += 1;
    483 
    484     if (mv_col < 0)
    485         mv_col -= 1;
    486     else
    487         mv_col += 1;
    488 
    489     mv_row /= 2;
    490     mv_col /= 2;
    491 
    492     offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
    493     uptr = x->e_mbd.pre.u_buffer + offset;
    494     vptr = x->e_mbd.pre.v_buffer + offset;
    495 
    496     if ((mv_row | mv_col) & 7)
    497     {
    498         vp8_sub_pixel_variance8x8(uptr, pre_stride,
    499             mv_col & 7, mv_row & 7, upred_ptr, uv_stride, &sse2);
    500         vp8_sub_pixel_variance8x8(vptr, pre_stride,
    501             mv_col & 7, mv_row & 7, vpred_ptr, uv_stride, &sse1);
    502         sse2 += sse1;
    503     }
    504     else
    505     {
    506         vp8_variance8x8(uptr, pre_stride,
    507             upred_ptr, uv_stride, &sse2);
    508         vp8_variance8x8(vptr, pre_stride,
    509             vpred_ptr, uv_stride, &sse1);
    510         sse2 += sse1;
    511     }
    512     return sse2;
    513 
    514 }
    515 
    516 static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
    517 {
    518     int c = !type;              /* start at coef 0, unless Y with Y2 */
    519     int eob = (int)(*b->eob);
    520     int pt ;    /* surrounding block/prev coef predictor */
    521     int cost = 0;
    522     short *qcoeff_ptr = b->qcoeff;
    523 
    524     VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
    525 
    526 # define QC( I)  ( qcoeff_ptr [vp8_default_zig_zag1d[I]] )
    527 
    528     for (; c < eob; c++)
    529     {
    530         int v = QC(c);
    531         int t = vp8_dct_value_tokens_ptr[v].Token;
    532         cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [t];
    533         cost += vp8_dct_value_cost_ptr[v];
    534         pt = vp8_prev_token_class[t];
    535     }
    536 
    537 # undef QC
    538 
    539     if (c < 16)
    540         cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN];
    541 
    542     pt = (c != !type); /* is eob first coefficient; */
    543     *a = *l = pt;
    544 
    545     return cost;
    546 }
    547 
    548 static int vp8_rdcost_mby(MACROBLOCK *mb)
    549 {
    550     int cost = 0;
    551     int b;
    552     MACROBLOCKD *x = &mb->e_mbd;
    553     ENTROPY_CONTEXT_PLANES t_above, t_left;
    554     ENTROPY_CONTEXT *ta;
    555     ENTROPY_CONTEXT *tl;
    556 
    557     vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    558     vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    559 
    560     ta = (ENTROPY_CONTEXT *)&t_above;
    561     tl = (ENTROPY_CONTEXT *)&t_left;
    562 
    563     for (b = 0; b < 16; b++)
    564         cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_NO_DC,
    565                     ta + vp8_block2above[b], tl + vp8_block2left[b]);
    566 
    567     cost += cost_coeffs(mb, x->block + 24, PLANE_TYPE_Y2,
    568                 ta + vp8_block2above[24], tl + vp8_block2left[24]);
    569 
    570     return cost;
    571 }
    572 
    573 static void macro_block_yrd( MACROBLOCK *mb,
    574                              int *Rate,
    575                              int *Distortion)
    576 {
    577     int b;
    578     MACROBLOCKD *const x = &mb->e_mbd;
    579     BLOCK   *const mb_y2 = mb->block + 24;
    580     BLOCKD *const x_y2  = x->block + 24;
    581     short *Y2DCPtr = mb_y2->src_diff;
    582     BLOCK *beptr;
    583     int d;
    584 
    585     vp8_subtract_mby( mb->src_diff, *(mb->block[0].base_src),
    586         mb->block[0].src_stride,  mb->e_mbd.predictor, 16);
    587 
    588     /* Fdct and building the 2nd order block */
    589     for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
    590     {
    591         mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
    592         *Y2DCPtr++ = beptr->coeff[0];
    593         *Y2DCPtr++ = beptr->coeff[16];
    594     }
    595 
    596     /* 2nd order fdct */
    597     mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
    598 
    599     /* Quantization */
    600     for (b = 0; b < 16; b++)
    601     {
    602         mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
    603     }
    604 
    605     /* DC predication and Quantization of 2nd Order block */
    606     mb->quantize_b(mb_y2, x_y2);
    607 
    608     /* Distortion */
    609     d = vp8_mbblock_error(mb, 1) << 2;
    610     d += vp8_block_error(mb_y2->coeff, x_y2->dqcoeff);
    611 
    612     *Distortion = (d >> 4);
    613 
    614     /* rate */
    615     *Rate = vp8_rdcost_mby(mb);
    616 }
    617 
    618 static void copy_predictor(unsigned char *dst, const unsigned char *predictor)
    619 {
    620     const unsigned int *p = (const unsigned int *)predictor;
    621     unsigned int *d = (unsigned int *)dst;
    622     d[0] = p[0];
    623     d[4] = p[4];
    624     d[8] = p[8];
    625     d[12] = p[12];
    626 }
    627 static int rd_pick_intra4x4block(
    628     MACROBLOCK *x,
    629     BLOCK *be,
    630     BLOCKD *b,
    631     B_PREDICTION_MODE *best_mode,
    632     const int *bmode_costs,
    633     ENTROPY_CONTEXT *a,
    634     ENTROPY_CONTEXT *l,
    635 
    636     int *bestrate,
    637     int *bestratey,
    638     int *bestdistortion)
    639 {
    640     B_PREDICTION_MODE mode;
    641     int best_rd = INT_MAX;
    642     int rate = 0;
    643     int distortion;
    644 
    645     ENTROPY_CONTEXT ta = *a, tempa = *a;
    646     ENTROPY_CONTEXT tl = *l, templ = *l;
    647     /*
    648      * The predictor buffer is a 2d buffer with a stride of 16.  Create
    649      * a temp buffer that meets the stride requirements, but we are only
    650      * interested in the left 4x4 block
    651      * */
    652     DECLARE_ALIGNED_ARRAY(16, unsigned char,  best_predictor, 16*4);
    653     DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16);
    654     int dst_stride = x->e_mbd.dst.y_stride;
    655     unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
    656 
    657     unsigned char *Above = dst - dst_stride;
    658     unsigned char *yleft = dst - 1;
    659     unsigned char top_left = Above[-1];
    660 
    661     for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++)
    662     {
    663         int this_rd;
    664         int ratey;
    665 
    666         rate = bmode_costs[mode];
    667 
    668         vp8_intra4x4_predict(Above, yleft, dst_stride, mode,
    669                              b->predictor, 16, top_left);
    670         vp8_subtract_b(be, b, 16);
    671         x->short_fdct4x4(be->src_diff, be->coeff, 32);
    672         x->quantize_b(be, b);
    673 
    674         tempa = ta;
    675         templ = tl;
    676 
    677         ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ);
    678         rate += ratey;
    679         distortion = vp8_block_error(be->coeff, b->dqcoeff) >> 2;
    680 
    681         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
    682 
    683         if (this_rd < best_rd)
    684         {
    685             *bestrate = rate;
    686             *bestratey = ratey;
    687             *bestdistortion = distortion;
    688             best_rd = this_rd;
    689             *best_mode = mode;
    690             *a = tempa;
    691             *l = templ;
    692             copy_predictor(best_predictor, b->predictor);
    693             vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
    694         }
    695     }
    696     b->bmi.as_mode = *best_mode;
    697 
    698     vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride);
    699 
    700     return best_rd;
    701 }
    702 
    703 static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate,
    704                                      int *rate_y, int *Distortion, int best_rd)
    705 {
    706     MACROBLOCKD *const xd = &mb->e_mbd;
    707     int i;
    708     int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
    709     int distortion = 0;
    710     int tot_rate_y = 0;
    711     int64_t total_rd = 0;
    712     ENTROPY_CONTEXT_PLANES t_above, t_left;
    713     ENTROPY_CONTEXT *ta;
    714     ENTROPY_CONTEXT *tl;
    715     const int *bmode_costs;
    716 
    717     vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    718     vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    719 
    720     ta = (ENTROPY_CONTEXT *)&t_above;
    721     tl = (ENTROPY_CONTEXT *)&t_left;
    722 
    723     intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16);
    724 
    725     bmode_costs = mb->inter_bmode_costs;
    726 
    727     for (i = 0; i < 16; i++)
    728     {
    729         MODE_INFO *const mic = xd->mode_info_context;
    730         const int mis = xd->mode_info_stride;
    731         B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
    732         int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
    733 
    734         if (mb->e_mbd.frame_type == KEY_FRAME)
    735         {
    736             const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
    737             const B_PREDICTION_MODE L = left_block_mode(mic, i);
    738 
    739             bmode_costs  = mb->bmode_costs[A][L];
    740         }
    741 
    742         total_rd += rd_pick_intra4x4block(
    743             mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
    744             ta + vp8_block2above[i],
    745             tl + vp8_block2left[i], &r, &ry, &d);
    746 
    747         cost += r;
    748         distortion += d;
    749         tot_rate_y += ry;
    750 
    751         mic->bmi[i].as_mode = best_mode;
    752 
    753         if(total_rd >= (int64_t)best_rd)
    754             break;
    755     }
    756 
    757     if(total_rd >= (int64_t)best_rd)
    758         return INT_MAX;
    759 
    760     *Rate = cost;
    761     *rate_y = tot_rate_y;
    762     *Distortion = distortion;
    763 
    764     return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
    765 }
    766 
    767 
    768 static int rd_pick_intra16x16mby_mode(MACROBLOCK *x,
    769                                       int *Rate,
    770                                       int *rate_y,
    771                                       int *Distortion)
    772 {
    773     MB_PREDICTION_MODE mode;
    774     MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
    775     int rate, ratey;
    776     int distortion;
    777     int best_rd = INT_MAX;
    778     int this_rd;
    779     MACROBLOCKD *xd = &x->e_mbd;
    780 
    781     /* Y Search for 16x16 intra prediction mode */
    782     for (mode = DC_PRED; mode <= TM_PRED; mode++)
    783     {
    784         xd->mode_info_context->mbmi.mode = mode;
    785 
    786         vp8_build_intra_predictors_mby_s(xd,
    787                                          xd->dst.y_buffer - xd->dst.y_stride,
    788                                          xd->dst.y_buffer - 1,
    789                                          xd->dst.y_stride,
    790                                          xd->predictor,
    791                                          16);
    792 
    793         macro_block_yrd(x, &ratey, &distortion);
    794         rate = ratey + x->mbmode_cost[xd->frame_type]
    795                                      [xd->mode_info_context->mbmi.mode];
    796 
    797         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
    798 
    799         if (this_rd < best_rd)
    800         {
    801             mode_selected = mode;
    802             best_rd = this_rd;
    803             *Rate = rate;
    804             *rate_y = ratey;
    805             *Distortion = distortion;
    806         }
    807     }
    808 
    809     xd->mode_info_context->mbmi.mode = mode_selected;
    810     return best_rd;
    811 }
    812 
    813 static int rd_cost_mbuv(MACROBLOCK *mb)
    814 {
    815     int b;
    816     int cost = 0;
    817     MACROBLOCKD *x = &mb->e_mbd;
    818     ENTROPY_CONTEXT_PLANES t_above, t_left;
    819     ENTROPY_CONTEXT *ta;
    820     ENTROPY_CONTEXT *tl;
    821 
    822     vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    823     vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    824 
    825     ta = (ENTROPY_CONTEXT *)&t_above;
    826     tl = (ENTROPY_CONTEXT *)&t_left;
    827 
    828     for (b = 16; b < 24; b++)
    829         cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_UV,
    830                     ta + vp8_block2above[b], tl + vp8_block2left[b]);
    831 
    832     return cost;
    833 }
    834 
    835 
    836 static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
    837                             int *distortion, int fullpixel)
    838 {
    839     vp8_build_inter16x16_predictors_mbuv(&x->e_mbd);
    840     vp8_subtract_mbuv(x->src_diff,
    841         x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
    842         &x->e_mbd.predictor[256], &x->e_mbd.predictor[320], 8);
    843 
    844     vp8_transform_mbuv(x);
    845     vp8_quantize_mbuv(x);
    846 
    847     *rate       = rd_cost_mbuv(x);
    848     *distortion = vp8_mbuverror(x) / 4;
    849 
    850     return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
    851 }
    852 
    853 static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
    854                           int *distortion, int fullpixel)
    855 {
    856     vp8_build_inter4x4_predictors_mbuv(&x->e_mbd);
    857     vp8_subtract_mbuv(x->src_diff,
    858         x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
    859         &x->e_mbd.predictor[256], &x->e_mbd.predictor[320], 8);
    860 
    861     vp8_transform_mbuv(x);
    862     vp8_quantize_mbuv(x);
    863 
    864     *rate       = rd_cost_mbuv(x);
    865     *distortion = vp8_mbuverror(x) / 4;
    866 
    867     return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
    868 }
    869 
    870 static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
    871                                     int *rate_tokenonly, int *distortion)
    872 {
    873     MB_PREDICTION_MODE mode;
    874     MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
    875     int best_rd = INT_MAX;
    876     int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r);
    877     int rate_to;
    878     MACROBLOCKD *xd = &x->e_mbd;
    879 
    880     for (mode = DC_PRED; mode <= TM_PRED; mode++)
    881     {
    882         int rate;
    883         int distortion;
    884         int this_rd;
    885 
    886         xd->mode_info_context->mbmi.uv_mode = mode;
    887 
    888         vp8_build_intra_predictors_mbuv_s(xd,
    889                                           xd->dst.u_buffer - xd->dst.uv_stride,
    890                                           xd->dst.v_buffer - xd->dst.uv_stride,
    891                                           xd->dst.u_buffer - 1,
    892                                           xd->dst.v_buffer - 1,
    893                                           xd->dst.uv_stride,
    894                                           &xd->predictor[256], &xd->predictor[320],
    895                                           8);
    896 
    897 
    898         vp8_subtract_mbuv(x->src_diff,
    899                       x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
    900                       &xd->predictor[256], &xd->predictor[320], 8);
    901         vp8_transform_mbuv(x);
    902         vp8_quantize_mbuv(x);
    903 
    904         rate_to = rd_cost_mbuv(x);
    905         rate = rate_to + x->intra_uv_mode_cost[xd->frame_type][xd->mode_info_context->mbmi.uv_mode];
    906 
    907         distortion = vp8_mbuverror(x) / 4;
    908 
    909         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
    910 
    911         if (this_rd < best_rd)
    912         {
    913             best_rd = this_rd;
    914             d = distortion;
    915             r = rate;
    916             *rate_tokenonly = rate_to;
    917             mode_selected = mode;
    918         }
    919     }
    920 
    921     *rate = r;
    922     *distortion = d;
    923 
    924     xd->mode_info_context->mbmi.uv_mode = mode_selected;
    925 }
    926 
    927 int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4])
    928 {
    929     vp8_prob p [VP8_MVREFS-1];
    930     assert(NEARESTMV <= m  &&  m <= SPLITMV);
    931     vp8_mv_ref_probs(p, near_mv_ref_ct);
    932     return vp8_cost_token(vp8_mv_ref_tree, p,
    933                           vp8_mv_ref_encoding_array - NEARESTMV + m);
    934 }
    935 
    936 void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv)
    937 {
    938     x->e_mbd.mode_info_context->mbmi.mode = mb;
    939     x->e_mbd.mode_info_context->mbmi.mv.as_int = mv->as_int;
    940 }
    941 
    942 static int labels2mode(
    943     MACROBLOCK *x,
    944     int const *labelings, int which_label,
    945     B_PREDICTION_MODE this_mode,
    946     int_mv *this_mv, int_mv *best_ref_mv,
    947     int *mvcost[2]
    948 )
    949 {
    950     MACROBLOCKD *const xd = & x->e_mbd;
    951     MODE_INFO *const mic = xd->mode_info_context;
    952     const int mis = xd->mode_info_stride;
    953 
    954     int cost = 0;
    955     int thismvcost = 0;
    956 
    957     /* We have to be careful retrieving previously-encoded motion vectors.
    958        Ones from this macroblock have to be pulled from the BLOCKD array
    959        as they have not yet made it to the bmi array in our MB_MODE_INFO. */
    960 
    961     int i = 0;
    962 
    963     do
    964     {
    965         BLOCKD *const d = xd->block + i;
    966         const int row = i >> 2,  col = i & 3;
    967 
    968         B_PREDICTION_MODE m;
    969 
    970         if (labelings[i] != which_label)
    971             continue;
    972 
    973         if (col  &&  labelings[i] == labelings[i-1])
    974             m = LEFT4X4;
    975         else if (row  &&  labelings[i] == labelings[i-4])
    976             m = ABOVE4X4;
    977         else
    978         {
    979             /* the only time we should do costing for new motion vector
    980              * or mode is when we are on a new label  (jbb May 08, 2007)
    981              */
    982             switch (m = this_mode)
    983             {
    984             case NEW4X4 :
    985                 thismvcost  = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost, 102);
    986                 break;
    987             case LEFT4X4:
    988                 this_mv->as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
    989                 break;
    990             case ABOVE4X4:
    991                 this_mv->as_int = row ? d[-4].bmi.mv.as_int : above_block_mv(mic, i, mis);
    992                 break;
    993             case ZERO4X4:
    994                 this_mv->as_int = 0;
    995                 break;
    996             default:
    997                 break;
    998             }
    999 
   1000             if (m == ABOVE4X4)  /* replace above with left if same */
   1001             {
   1002                 int_mv left_mv;
   1003 
   1004                 left_mv.as_int = col ? d[-1].bmi.mv.as_int :
   1005                                         left_block_mv(mic, i);
   1006 
   1007                 if (left_mv.as_int == this_mv->as_int)
   1008                     m = LEFT4X4;
   1009             }
   1010 
   1011             cost = x->inter_bmode_costs[ m];
   1012         }
   1013 
   1014         d->bmi.mv.as_int = this_mv->as_int;
   1015 
   1016         x->partition_info->bmi[i].mode = m;
   1017         x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
   1018 
   1019     }
   1020     while (++i < 16);
   1021 
   1022     cost += thismvcost ;
   1023     return cost;
   1024 }
   1025 
   1026 static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
   1027                               int which_label, ENTROPY_CONTEXT *ta,
   1028                               ENTROPY_CONTEXT *tl)
   1029 {
   1030     int cost = 0;
   1031     int b;
   1032     MACROBLOCKD *x = &mb->e_mbd;
   1033 
   1034     for (b = 0; b < 16; b++)
   1035         if (labels[ b] == which_label)
   1036             cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_WITH_DC,
   1037                                 ta + vp8_block2above[b],
   1038                                 tl + vp8_block2left[b]);
   1039 
   1040     return cost;
   1041 
   1042 }
   1043 static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels, int which_label)
   1044 {
   1045     int i;
   1046     unsigned int distortion = 0;
   1047     int pre_stride = x->e_mbd.pre.y_stride;
   1048     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1049 
   1050 
   1051     for (i = 0; i < 16; i++)
   1052     {
   1053         if (labels[i] == which_label)
   1054         {
   1055             BLOCKD *bd = &x->e_mbd.block[i];
   1056             BLOCK *be = &x->block[i];
   1057 
   1058             vp8_build_inter_predictors_b(bd, 16, base_pre, pre_stride, x->e_mbd.subpixel_predict);
   1059             vp8_subtract_b(be, bd, 16);
   1060             x->short_fdct4x4(be->src_diff, be->coeff, 32);
   1061             x->quantize_b(be, bd);
   1062 
   1063             distortion += vp8_block_error(be->coeff, bd->dqcoeff);
   1064         }
   1065     }
   1066 
   1067     return distortion;
   1068 }
   1069 
   1070 
   1071 static const unsigned int segmentation_to_sseshift[4] = {3, 3, 2, 0};
   1072 
   1073 
   1074 typedef struct
   1075 {
   1076   int_mv *ref_mv;
   1077   int_mv mvp;
   1078 
   1079   int segment_rd;
   1080   int segment_num;
   1081   int r;
   1082   int d;
   1083   int segment_yrate;
   1084   B_PREDICTION_MODE modes[16];
   1085   int_mv mvs[16];
   1086   unsigned char eobs[16];
   1087 
   1088   int mvthresh;
   1089   int *mdcounts;
   1090 
   1091   int_mv sv_mvp[4]; /* save 4 mvp from 8x8 */
   1092   int sv_istep[2];  /* save 2 initial step_param for 16x8/8x16 */
   1093 
   1094 } BEST_SEG_INFO;
   1095 
   1096 
   1097 static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
   1098                              BEST_SEG_INFO *bsi, unsigned int segmentation)
   1099 {
   1100     int i;
   1101     int const *labels;
   1102     int br = 0;
   1103     int bd = 0;
   1104     B_PREDICTION_MODE this_mode;
   1105 
   1106 
   1107     int label_count;
   1108     int this_segment_rd = 0;
   1109     int label_mv_thresh;
   1110     int rate = 0;
   1111     int sbr = 0;
   1112     int sbd = 0;
   1113     int segmentyrate = 0;
   1114 
   1115     vp8_variance_fn_ptr_t *v_fn_ptr;
   1116 
   1117     ENTROPY_CONTEXT_PLANES t_above, t_left;
   1118     ENTROPY_CONTEXT *ta;
   1119     ENTROPY_CONTEXT *tl;
   1120     ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
   1121     ENTROPY_CONTEXT *ta_b;
   1122     ENTROPY_CONTEXT *tl_b;
   1123 
   1124     vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
   1125     vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
   1126 
   1127     ta = (ENTROPY_CONTEXT *)&t_above;
   1128     tl = (ENTROPY_CONTEXT *)&t_left;
   1129     ta_b = (ENTROPY_CONTEXT *)&t_above_b;
   1130     tl_b = (ENTROPY_CONTEXT *)&t_left_b;
   1131 
   1132     br = 0;
   1133     bd = 0;
   1134 
   1135     v_fn_ptr = &cpi->fn_ptr[segmentation];
   1136     labels = vp8_mbsplits[segmentation];
   1137     label_count = vp8_mbsplit_count[segmentation];
   1138 
   1139     /* 64 makes this threshold really big effectively making it so that we
   1140      * very rarely check mvs on segments.   setting this to 1 would make mv
   1141      * thresh roughly equal to what it is for macroblocks
   1142      */
   1143     label_mv_thresh = 1 * bsi->mvthresh / label_count ;
   1144 
   1145     /* Segmentation method overheads */
   1146     rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation);
   1147     rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts);
   1148     this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
   1149     br += rate;
   1150 
   1151     for (i = 0; i < label_count; i++)
   1152     {
   1153         int_mv mode_mv[B_MODE_COUNT];
   1154         int best_label_rd = INT_MAX;
   1155         B_PREDICTION_MODE mode_selected = ZERO4X4;
   1156         int bestlabelyrate = 0;
   1157 
   1158         /* search for the best motion vector on this segment */
   1159         for (this_mode = LEFT4X4; this_mode <= NEW4X4 ; this_mode ++)
   1160         {
   1161             int this_rd;
   1162             int distortion;
   1163             int labelyrate;
   1164             ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
   1165             ENTROPY_CONTEXT *ta_s;
   1166             ENTROPY_CONTEXT *tl_s;
   1167 
   1168             vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
   1169             vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
   1170 
   1171             ta_s = (ENTROPY_CONTEXT *)&t_above_s;
   1172             tl_s = (ENTROPY_CONTEXT *)&t_left_s;
   1173 
   1174             if (this_mode == NEW4X4)
   1175             {
   1176                 int sseshift;
   1177                 int num00;
   1178                 int step_param = 0;
   1179                 int further_steps;
   1180                 int n;
   1181                 int thissme;
   1182                 int bestsme = INT_MAX;
   1183                 int_mv  temp_mv;
   1184                 BLOCK *c;
   1185                 BLOCKD *e;
   1186 
   1187                 /* Is the best so far sufficiently good that we cant justify
   1188                  * doing a new motion search.
   1189                  */
   1190                 if (best_label_rd < label_mv_thresh)
   1191                     break;
   1192 
   1193                 if(cpi->compressor_speed)
   1194                 {
   1195                     if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8)
   1196                     {
   1197                         bsi->mvp.as_int = bsi->sv_mvp[i].as_int;
   1198                         if (i==1 && segmentation == BLOCK_16X8)
   1199                           bsi->mvp.as_int = bsi->sv_mvp[2].as_int;
   1200 
   1201                         step_param = bsi->sv_istep[i];
   1202                     }
   1203 
   1204                     /* use previous block's result as next block's MV
   1205                      * predictor.
   1206                      */
   1207                     if (segmentation == BLOCK_4X4 && i>0)
   1208                     {
   1209                         bsi->mvp.as_int = x->e_mbd.block[i-1].bmi.mv.as_int;
   1210                         if (i==4 || i==8 || i==12)
   1211                             bsi->mvp.as_int = x->e_mbd.block[i-4].bmi.mv.as_int;
   1212                         step_param = 2;
   1213                     }
   1214                 }
   1215 
   1216                 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
   1217 
   1218                 {
   1219                     int sadpb = x->sadperbit4;
   1220                     int_mv mvp_full;
   1221 
   1222                     mvp_full.as_mv.row = bsi->mvp.as_mv.row >>3;
   1223                     mvp_full.as_mv.col = bsi->mvp.as_mv.col >>3;
   1224 
   1225                     /* find first label */
   1226                     n = vp8_mbsplit_offset[segmentation][i];
   1227 
   1228                     c = &x->block[n];
   1229                     e = &x->e_mbd.block[n];
   1230 
   1231                     {
   1232                         bestsme = cpi->diamond_search_sad(x, c, e, &mvp_full,
   1233                                                 &mode_mv[NEW4X4], step_param,
   1234                                                 sadpb, &num00, v_fn_ptr,
   1235                                                 x->mvcost, bsi->ref_mv);
   1236 
   1237                         n = num00;
   1238                         num00 = 0;
   1239 
   1240                         while (n < further_steps)
   1241                         {
   1242                             n++;
   1243 
   1244                             if (num00)
   1245                                 num00--;
   1246                             else
   1247                             {
   1248                                 thissme = cpi->diamond_search_sad(x, c, e,
   1249                                                     &mvp_full, &temp_mv,
   1250                                                     step_param + n, sadpb,
   1251                                                     &num00, v_fn_ptr,
   1252                                                     x->mvcost, bsi->ref_mv);
   1253 
   1254                                 if (thissme < bestsme)
   1255                                 {
   1256                                     bestsme = thissme;
   1257                                     mode_mv[NEW4X4].as_int = temp_mv.as_int;
   1258                                 }
   1259                             }
   1260                         }
   1261                     }
   1262 
   1263                     sseshift = segmentation_to_sseshift[segmentation];
   1264 
   1265                     /* Should we do a full search (best quality only) */
   1266                     if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000)
   1267                     {
   1268                         /* Check if mvp_full is within the range. */
   1269                         vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   1270 
   1271                         thissme = cpi->full_search_sad(x, c, e, &mvp_full,
   1272                                                        sadpb, 16, v_fn_ptr,
   1273                                                        x->mvcost, bsi->ref_mv);
   1274 
   1275                         if (thissme < bestsme)
   1276                         {
   1277                             bestsme = thissme;
   1278                             mode_mv[NEW4X4].as_int = e->bmi.mv.as_int;
   1279                         }
   1280                         else
   1281                         {
   1282                             /* The full search result is actually worse so
   1283                              * re-instate the previous best vector
   1284                              */
   1285                             e->bmi.mv.as_int = mode_mv[NEW4X4].as_int;
   1286                         }
   1287                     }
   1288                 }
   1289 
   1290                 if (bestsme < INT_MAX)
   1291                 {
   1292                     int distortion;
   1293                     unsigned int sse;
   1294                     cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
   1295                         bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost,
   1296                         &distortion, &sse);
   1297 
   1298                 }
   1299             } /* NEW4X4 */
   1300 
   1301             rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
   1302                                bsi->ref_mv, x->mvcost);
   1303 
   1304             /* Trap vectors that reach beyond the UMV borders */
   1305             if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
   1306                 ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max))
   1307             {
   1308                 continue;
   1309             }
   1310 
   1311             distortion = vp8_encode_inter_mb_segment(x, labels, i) / 4;
   1312 
   1313             labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
   1314             rate += labelyrate;
   1315 
   1316             this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
   1317 
   1318             if (this_rd < best_label_rd)
   1319             {
   1320                 sbr = rate;
   1321                 sbd = distortion;
   1322                 bestlabelyrate = labelyrate;
   1323                 mode_selected = this_mode;
   1324                 best_label_rd = this_rd;
   1325 
   1326                 vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
   1327                 vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
   1328 
   1329             }
   1330         } /*for each 4x4 mode*/
   1331 
   1332         vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
   1333         vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
   1334 
   1335         labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
   1336                     bsi->ref_mv, x->mvcost);
   1337 
   1338         br += sbr;
   1339         bd += sbd;
   1340         segmentyrate += bestlabelyrate;
   1341         this_segment_rd += best_label_rd;
   1342 
   1343         if (this_segment_rd >= bsi->segment_rd)
   1344             break;
   1345 
   1346     } /* for each label */
   1347 
   1348     if (this_segment_rd < bsi->segment_rd)
   1349     {
   1350         bsi->r = br;
   1351         bsi->d = bd;
   1352         bsi->segment_yrate = segmentyrate;
   1353         bsi->segment_rd = this_segment_rd;
   1354         bsi->segment_num = segmentation;
   1355 
   1356         /* store everything needed to come back to this!! */
   1357         for (i = 0; i < 16; i++)
   1358         {
   1359             bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
   1360             bsi->modes[i] = x->partition_info->bmi[i].mode;
   1361             bsi->eobs[i] = x->e_mbd.eobs[i];
   1362         }
   1363     }
   1364 }
   1365 
   1366 static
   1367 void vp8_cal_step_param(int sr, int *sp)
   1368 {
   1369     int step = 0;
   1370 
   1371     if (sr > MAX_FIRST_STEP) sr = MAX_FIRST_STEP;
   1372     else if (sr < 1) sr = 1;
   1373 
   1374     while (sr>>=1)
   1375         step++;
   1376 
   1377     *sp = MAX_MVSEARCH_STEPS - 1 - step;
   1378 }
   1379 
   1380 static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
   1381                                            int_mv *best_ref_mv, int best_rd,
   1382                                            int *mdcounts, int *returntotrate,
   1383                                            int *returnyrate, int *returndistortion,
   1384                                            int mvthresh)
   1385 {
   1386     int i;
   1387     BEST_SEG_INFO bsi;
   1388 
   1389     vpx_memset(&bsi, 0, sizeof(bsi));
   1390 
   1391     bsi.segment_rd = best_rd;
   1392     bsi.ref_mv = best_ref_mv;
   1393     bsi.mvp.as_int = best_ref_mv->as_int;
   1394     bsi.mvthresh = mvthresh;
   1395     bsi.mdcounts = mdcounts;
   1396 
   1397     for(i = 0; i < 16; i++)
   1398     {
   1399         bsi.modes[i] = ZERO4X4;
   1400     }
   1401 
   1402     if(cpi->compressor_speed == 0)
   1403     {
   1404         /* for now, we will keep the original segmentation order
   1405            when in best quality mode */
   1406         rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
   1407         rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
   1408         rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
   1409         rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
   1410     }
   1411     else
   1412     {
   1413         int sr;
   1414 
   1415         rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
   1416 
   1417         if (bsi.segment_rd < best_rd)
   1418         {
   1419             int col_min = ((best_ref_mv->as_mv.col+7)>>3) - MAX_FULL_PEL_VAL;
   1420             int row_min = ((best_ref_mv->as_mv.row+7)>>3) - MAX_FULL_PEL_VAL;
   1421             int col_max = (best_ref_mv->as_mv.col>>3) + MAX_FULL_PEL_VAL;
   1422             int row_max = (best_ref_mv->as_mv.row>>3) + MAX_FULL_PEL_VAL;
   1423 
   1424             int tmp_col_min = x->mv_col_min;
   1425             int tmp_col_max = x->mv_col_max;
   1426             int tmp_row_min = x->mv_row_min;
   1427             int tmp_row_max = x->mv_row_max;
   1428 
   1429             /* Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. */
   1430             if (x->mv_col_min < col_min )
   1431                 x->mv_col_min = col_min;
   1432             if (x->mv_col_max > col_max )
   1433                 x->mv_col_max = col_max;
   1434             if (x->mv_row_min < row_min )
   1435                 x->mv_row_min = row_min;
   1436             if (x->mv_row_max > row_max )
   1437                 x->mv_row_max = row_max;
   1438 
   1439             /* Get 8x8 result */
   1440             bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int;
   1441             bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int;
   1442             bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int;
   1443             bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int;
   1444 
   1445             /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range according to the closeness of 2 MV. */
   1446             /* block 8X16 */
   1447             {
   1448                 sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row))>>3, (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col))>>3);
   1449                 vp8_cal_step_param(sr, &bsi.sv_istep[0]);
   1450 
   1451                 sr = MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row))>>3, (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col))>>3);
   1452                 vp8_cal_step_param(sr, &bsi.sv_istep[1]);
   1453 
   1454                 rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
   1455             }
   1456 
   1457             /* block 16X8 */
   1458             {
   1459                 sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row))>>3, (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col))>>3);
   1460                 vp8_cal_step_param(sr, &bsi.sv_istep[0]);
   1461 
   1462                 sr = MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row))>>3, (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col))>>3);
   1463                 vp8_cal_step_param(sr, &bsi.sv_istep[1]);
   1464 
   1465                 rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
   1466             }
   1467 
   1468             /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
   1469             /* Not skip 4x4 if speed=0 (good quality) */
   1470             if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8)  /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
   1471             {
   1472                 bsi.mvp.as_int = bsi.sv_mvp[0].as_int;
   1473                 rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
   1474             }
   1475 
   1476             /* restore UMV window */
   1477             x->mv_col_min = tmp_col_min;
   1478             x->mv_col_max = tmp_col_max;
   1479             x->mv_row_min = tmp_row_min;
   1480             x->mv_row_max = tmp_row_max;
   1481         }
   1482     }
   1483 
   1484     /* set it to the best */
   1485     for (i = 0; i < 16; i++)
   1486     {
   1487         BLOCKD *bd = &x->e_mbd.block[i];
   1488 
   1489         bd->bmi.mv.as_int = bsi.mvs[i].as_int;
   1490         *bd->eob = bsi.eobs[i];
   1491     }
   1492 
   1493     *returntotrate = bsi.r;
   1494     *returndistortion = bsi.d;
   1495     *returnyrate = bsi.segment_yrate;
   1496 
   1497     /* save partitions */
   1498     x->e_mbd.mode_info_context->mbmi.partitioning = bsi.segment_num;
   1499     x->partition_info->count = vp8_mbsplit_count[bsi.segment_num];
   1500 
   1501     for (i = 0; i < x->partition_info->count; i++)
   1502     {
   1503         int j;
   1504 
   1505         j = vp8_mbsplit_offset[bsi.segment_num][i];
   1506 
   1507         x->partition_info->bmi[i].mode = bsi.modes[j];
   1508         x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv;
   1509     }
   1510     /*
   1511      * used to set x->e_mbd.mode_info_context->mbmi.mv.as_int
   1512      */
   1513     x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int;
   1514 
   1515     return bsi.segment_rd;
   1516 }
   1517 
   1518 /* The improved MV prediction */
   1519 void vp8_mv_pred
   1520 (
   1521     VP8_COMP *cpi,
   1522     MACROBLOCKD *xd,
   1523     const MODE_INFO *here,
   1524     int_mv *mvp,
   1525     int refframe,
   1526     int *ref_frame_sign_bias,
   1527     int *sr,
   1528     int near_sadidx[]
   1529 )
   1530 {
   1531     const MODE_INFO *above = here - xd->mode_info_stride;
   1532     const MODE_INFO *left = here - 1;
   1533     const MODE_INFO *aboveleft = above - 1;
   1534     int_mv           near_mvs[8];
   1535     int              near_ref[8];
   1536     int_mv           mv;
   1537     int              vcnt=0;
   1538     int              find=0;
   1539     int              mb_offset;
   1540 
   1541     int              mvx[8];
   1542     int              mvy[8];
   1543     int              i;
   1544 
   1545     mv.as_int = 0;
   1546 
   1547     if(here->mbmi.ref_frame != INTRA_FRAME)
   1548     {
   1549         near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = near_mvs[7].as_int = 0;
   1550         near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = near_ref[7] = 0;
   1551 
   1552         /* read in 3 nearby block's MVs from current frame as prediction
   1553          * candidates.
   1554          */
   1555         if (above->mbmi.ref_frame != INTRA_FRAME)
   1556         {
   1557             near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
   1558             mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1559             near_ref[vcnt] =  above->mbmi.ref_frame;
   1560         }
   1561         vcnt++;
   1562         if (left->mbmi.ref_frame != INTRA_FRAME)
   1563         {
   1564             near_mvs[vcnt].as_int = left->mbmi.mv.as_int;
   1565             mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1566             near_ref[vcnt] =  left->mbmi.ref_frame;
   1567         }
   1568         vcnt++;
   1569         if (aboveleft->mbmi.ref_frame != INTRA_FRAME)
   1570         {
   1571             near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int;
   1572             mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1573             near_ref[vcnt] =  aboveleft->mbmi.ref_frame;
   1574         }
   1575         vcnt++;
   1576 
   1577         /* read in 5 nearby block's MVs from last frame. */
   1578         if(cpi->common.last_frame_type != KEY_FRAME)
   1579         {
   1580             mb_offset = (-xd->mb_to_top_edge/128 + 1) * (xd->mode_info_stride +1) + (-xd->mb_to_left_edge/128 +1) ;
   1581 
   1582             /* current in last frame */
   1583             if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME)
   1584             {
   1585                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
   1586                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1587                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset];
   1588             }
   1589             vcnt++;
   1590 
   1591             /* above in last frame */
   1592             if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1] != INTRA_FRAME)
   1593             {
   1594                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride-1].as_int;
   1595                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride-1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1596                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1];
   1597             }
   1598             vcnt++;
   1599 
   1600             /* left in last frame */
   1601             if (cpi->lf_ref_frame[mb_offset-1] != INTRA_FRAME)
   1602             {
   1603                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset -1].as_int;
   1604                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset -1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1605                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - 1];
   1606             }
   1607             vcnt++;
   1608 
   1609             /* right in last frame */
   1610             if (cpi->lf_ref_frame[mb_offset +1] != INTRA_FRAME)
   1611             {
   1612                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset +1].as_int;
   1613                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1614                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset +1];
   1615             }
   1616             vcnt++;
   1617 
   1618             /* below in last frame */
   1619             if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1] != INTRA_FRAME)
   1620             {
   1621                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + xd->mode_info_stride +1].as_int;
   1622                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1623                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1];
   1624             }
   1625             vcnt++;
   1626         }
   1627 
   1628         for(i=0; i< vcnt; i++)
   1629         {
   1630             if(near_ref[near_sadidx[i]] != INTRA_FRAME)
   1631             {
   1632                 if(here->mbmi.ref_frame == near_ref[near_sadidx[i]])
   1633                 {
   1634                     mv.as_int = near_mvs[near_sadidx[i]].as_int;
   1635                     find = 1;
   1636                     if (i < 3)
   1637                         *sr = 3;
   1638                     else
   1639                         *sr = 2;
   1640                     break;
   1641                 }
   1642             }
   1643         }
   1644 
   1645         if(!find)
   1646         {
   1647             for(i=0; i<vcnt; i++)
   1648             {
   1649                 mvx[i] = near_mvs[i].as_mv.row;
   1650                 mvy[i] = near_mvs[i].as_mv.col;
   1651             }
   1652 
   1653             insertsortmv(mvx, vcnt);
   1654             insertsortmv(mvy, vcnt);
   1655             mv.as_mv.row = mvx[vcnt/2];
   1656             mv.as_mv.col = mvy[vcnt/2];
   1657 
   1658             find = 1;
   1659             /* sr is set to 0 to allow calling function to decide the search
   1660              * range.
   1661              */
   1662             *sr = 0;
   1663         }
   1664     }
   1665 
   1666     /* Set up return values */
   1667     mvp->as_int = mv.as_int;
   1668     vp8_clamp_mv2(mvp, xd);
   1669 }
   1670 
   1671 void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[])
   1672 {
   1673     /* near_sad indexes:
   1674      *   0-cf above, 1-cf left, 2-cf aboveleft,
   1675      *   3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
   1676      */
   1677     int near_sad[8] = {0};
   1678     BLOCK *b = &x->block[0];
   1679     unsigned char *src_y_ptr = *(b->base_src);
   1680 
   1681     /* calculate sad for current frame 3 nearby MBs. */
   1682     if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0)
   1683     {
   1684         near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
   1685     }else if(xd->mb_to_top_edge==0)
   1686     {   /* only has left MB for sad calculation. */
   1687         near_sad[0] = near_sad[2] = INT_MAX;
   1688         near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX);
   1689     }else if(xd->mb_to_left_edge ==0)
   1690     {   /* only has left MB for sad calculation. */
   1691         near_sad[1] = near_sad[2] = INT_MAX;
   1692         near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX);
   1693     }else
   1694     {
   1695         near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX);
   1696         near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX);
   1697         near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, UINT_MAX);
   1698     }
   1699 
   1700     if(cpi->common.last_frame_type != KEY_FRAME)
   1701     {
   1702         /* calculate sad for last frame 5 nearby MBs. */
   1703         unsigned char *pre_y_buffer = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
   1704         int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
   1705 
   1706         if(xd->mb_to_top_edge==0) near_sad[4] = INT_MAX;
   1707         if(xd->mb_to_left_edge ==0) near_sad[5] = INT_MAX;
   1708         if(xd->mb_to_right_edge ==0) near_sad[6] = INT_MAX;
   1709         if(xd->mb_to_bottom_edge==0) near_sad[7] = INT_MAX;
   1710 
   1711         if(near_sad[4] != INT_MAX)
   1712             near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, UINT_MAX);
   1713         if(near_sad[5] != INT_MAX)
   1714             near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride, UINT_MAX);
   1715         near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride, UINT_MAX);
   1716         if(near_sad[6] != INT_MAX)
   1717             near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride, UINT_MAX);
   1718         if(near_sad[7] != INT_MAX)
   1719             near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, UINT_MAX);
   1720     }
   1721 
   1722     if(cpi->common.last_frame_type != KEY_FRAME)
   1723     {
   1724         insertsortsad(near_sad, near_sadidx, 8);
   1725     }else
   1726     {
   1727         insertsortsad(near_sad, near_sadidx, 3);
   1728     }
   1729 }
   1730 
   1731 static void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv)
   1732 {
   1733     if (x->e_mbd.mode_info_context->mbmi.mode == SPLITMV)
   1734     {
   1735         int i;
   1736 
   1737         for (i = 0; i < x->partition_info->count; i++)
   1738         {
   1739             if (x->partition_info->bmi[i].mode == NEW4X4)
   1740             {
   1741                 x->MVcount[0][mv_max+((x->partition_info->bmi[i].mv.as_mv.row
   1742                                           - best_ref_mv->as_mv.row) >> 1)]++;
   1743                 x->MVcount[1][mv_max+((x->partition_info->bmi[i].mv.as_mv.col
   1744                                           - best_ref_mv->as_mv.col) >> 1)]++;
   1745             }
   1746         }
   1747     }
   1748     else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV)
   1749     {
   1750         x->MVcount[0][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row
   1751                                           - best_ref_mv->as_mv.row) >> 1)]++;
   1752         x->MVcount[1][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col
   1753                                           - best_ref_mv->as_mv.col) >> 1)]++;
   1754     }
   1755 }
   1756 
   1757 static int evaluate_inter_mode_rd(int mdcounts[4],
   1758                                   RATE_DISTORTION* rd,
   1759                                   int* disable_skip,
   1760                                   VP8_COMP *cpi, MACROBLOCK *x)
   1761 {
   1762     MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
   1763     BLOCK *b = &x->block[0];
   1764     MACROBLOCKD *xd = &x->e_mbd;
   1765     int distortion;
   1766     vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16);
   1767 
   1768     if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
   1769         x->skip = 1;
   1770     }
   1771     else if (x->encode_breakout)
   1772     {
   1773         unsigned int sse;
   1774         unsigned int var;
   1775         unsigned int threshold = (xd->block[0].dequant[1]
   1776                     * xd->block[0].dequant[1] >>4);
   1777 
   1778         if(threshold < x->encode_breakout)
   1779             threshold = x->encode_breakout;
   1780 
   1781         var = vp8_variance16x16
   1782                 (*(b->base_src), b->src_stride,
   1783                 x->e_mbd.predictor, 16, &sse);
   1784 
   1785         if (sse < threshold)
   1786         {
   1787              unsigned int q2dc = xd->block[24].dequant[0];
   1788             /* If theres is no codeable 2nd order dc
   1789                or a very small uniform pixel change change */
   1790             if ((sse - var < q2dc * q2dc >>4) ||
   1791                 (sse /2 > var && sse-var < 64))
   1792             {
   1793                 /* Check u and v to make sure skip is ok */
   1794                 unsigned int sse2 = VP8_UVSSE(x);
   1795                 if (sse2 * 2 < threshold)
   1796                 {
   1797                     x->skip = 1;
   1798                     rd->distortion2 = sse + sse2;
   1799                     rd->rate2 = 500;
   1800 
   1801                     /* for best_yrd calculation */
   1802                     rd->rate_uv = 0;
   1803                     rd->distortion_uv = sse2;
   1804 
   1805                     *disable_skip = 1;
   1806                     return RDCOST(x->rdmult, x->rddiv, rd->rate2,
   1807                                   rd->distortion2);
   1808                 }
   1809             }
   1810         }
   1811     }
   1812 
   1813 
   1814     /* Add in the Mv/mode cost */
   1815     rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
   1816 
   1817     /* Y cost and distortion */
   1818     macro_block_yrd(x, &rd->rate_y, &distortion);
   1819     rd->rate2 += rd->rate_y;
   1820     rd->distortion2 += distortion;
   1821 
   1822     /* UV cost and distortion */
   1823     rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv,
   1824                      cpi->common.full_pixel);
   1825     rd->rate2 += rd->rate_uv;
   1826     rd->distortion2 += rd->distortion_uv;
   1827     return INT_MAX;
   1828 }
   1829 
   1830 static int calculate_final_rd_costs(int this_rd,
   1831                                     RATE_DISTORTION* rd,
   1832                                     int* other_cost,
   1833                                     int disable_skip,
   1834                                     int uv_intra_tteob,
   1835                                     int intra_rd_penalty,
   1836                                     VP8_COMP *cpi, MACROBLOCK *x)
   1837 {
   1838     MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
   1839 
   1840     /* Where skip is allowable add in the default per mb cost for the no
   1841      * skip case. where we then decide to skip we have to delete this and
   1842      * replace it with the cost of signalling a skip
   1843      */
   1844     if (cpi->common.mb_no_coeff_skip)
   1845     {
   1846         *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0);
   1847         rd->rate2 += *other_cost;
   1848     }
   1849 
   1850     /* Estimate the reference frame signaling cost and add it
   1851      * to the rolling cost variable.
   1852      */
   1853     rd->rate2 +=
   1854         x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
   1855 
   1856     if (!disable_skip)
   1857     {
   1858         /* Test for the condition where skip block will be activated
   1859          * because there are no non zero coefficients and make any
   1860          * necessary adjustment for rate
   1861          */
   1862         if (cpi->common.mb_no_coeff_skip)
   1863         {
   1864             int i;
   1865             int tteob;
   1866             int has_y2_block = (this_mode!=SPLITMV && this_mode!=B_PRED);
   1867 
   1868             tteob = 0;
   1869             if(has_y2_block)
   1870                 tteob += x->e_mbd.eobs[24];
   1871 
   1872             for (i = 0; i < 16; i++)
   1873                 tteob += (x->e_mbd.eobs[i] > has_y2_block);
   1874 
   1875             if (x->e_mbd.mode_info_context->mbmi.ref_frame)
   1876             {
   1877                 for (i = 16; i < 24; i++)
   1878                     tteob += x->e_mbd.eobs[i];
   1879             }
   1880             else
   1881                 tteob += uv_intra_tteob;
   1882 
   1883             if (tteob == 0)
   1884             {
   1885                 rd->rate2 -= (rd->rate_y + rd->rate_uv);
   1886                 /* for best_yrd calculation */
   1887                 rd->rate_uv = 0;
   1888 
   1889                 /* Back out no skip flag costing and add in skip flag costing */
   1890                 if (cpi->prob_skip_false)
   1891                 {
   1892                     int prob_skip_cost;
   1893 
   1894                     prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1);
   1895                     prob_skip_cost -= vp8_cost_bit(cpi->prob_skip_false, 0);
   1896                     rd->rate2 += prob_skip_cost;
   1897                     *other_cost += prob_skip_cost;
   1898                 }
   1899             }
   1900         }
   1901         /* Calculate the final RD estimate for this mode */
   1902         this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
   1903         if (this_rd < INT_MAX && x->e_mbd.mode_info_context->mbmi.ref_frame
   1904                                  == INTRA_FRAME)
   1905             this_rd += intra_rd_penalty;
   1906     }
   1907     return this_rd;
   1908 }
   1909 
   1910 static void update_best_mode(BEST_MODE* best_mode, int this_rd,
   1911                              RATE_DISTORTION* rd, int other_cost, MACROBLOCK *x)
   1912 {
   1913     MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
   1914 
   1915     other_cost +=
   1916     x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
   1917 
   1918     /* Calculate the final y RD estimate for this mode */
   1919     best_mode->yrd = RDCOST(x->rdmult, x->rddiv, (rd->rate2-rd->rate_uv-other_cost),
   1920                       (rd->distortion2-rd->distortion_uv));
   1921 
   1922     best_mode->rd = this_rd;
   1923     vpx_memcpy(&best_mode->mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
   1924     vpx_memcpy(&best_mode->partition, x->partition_info, sizeof(PARTITION_INFO));
   1925 
   1926     if ((this_mode == B_PRED) || (this_mode == SPLITMV))
   1927     {
   1928         int i;
   1929         for (i = 0; i < 16; i++)
   1930         {
   1931             best_mode->bmodes[i] = x->e_mbd.block[i].bmi;
   1932         }
   1933     }
   1934 }
   1935 
   1936 void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
   1937                             int recon_uvoffset, int *returnrate,
   1938                             int *returndistortion, int *returnintra)
   1939 {
   1940     BLOCK *b = &x->block[0];
   1941     BLOCKD *d = &x->e_mbd.block[0];
   1942     MACROBLOCKD *xd = &x->e_mbd;
   1943     int_mv best_ref_mv_sb[2];
   1944     int_mv mode_mv_sb[2][MB_MODE_COUNT];
   1945     int_mv best_ref_mv;
   1946     int_mv *mode_mv;
   1947     MB_PREDICTION_MODE this_mode;
   1948     int num00;
   1949     int best_mode_index = 0;
   1950     BEST_MODE best_mode;
   1951 
   1952     int i;
   1953     int mode_index;
   1954     int mdcounts[4];
   1955     int rate;
   1956     RATE_DISTORTION rd;
   1957     int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
   1958     int uv_intra_tteob = 0;
   1959     int uv_intra_done = 0;
   1960 
   1961     MB_PREDICTION_MODE uv_intra_mode = 0;
   1962     int_mv mvp;
   1963     int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
   1964     int saddone=0;
   1965     /* search range got from mv_pred(). It uses step_param levels. (0-7) */
   1966     int sr=0;
   1967 
   1968     unsigned char *plane[4][3];
   1969     int ref_frame_map[4];
   1970     int sign_bias = 0;
   1971 
   1972     int intra_rd_penalty =  10* vp8_dc_quant(cpi->common.base_qindex,
   1973                                              cpi->common.y1dc_delta_q);
   1974 
   1975 #if CONFIG_TEMPORAL_DENOISING
   1976     unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX,
   1977             best_rd_sse = INT_MAX;
   1978 #endif
   1979 
   1980     mode_mv = mode_mv_sb[sign_bias];
   1981     best_ref_mv.as_int = 0;
   1982     best_mode.rd = INT_MAX;
   1983     best_mode.yrd = INT_MAX;
   1984     best_mode.intra_rd = INT_MAX;
   1985     vpx_memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
   1986     vpx_memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode));
   1987     vpx_memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes));
   1988 
   1989     /* Setup search priorities */
   1990     get_reference_search_order(cpi, ref_frame_map);
   1991 
   1992     /* Check to see if there is at least 1 valid reference frame that we need
   1993      * to calculate near_mvs.
   1994      */
   1995     if (ref_frame_map[1] > 0)
   1996     {
   1997         sign_bias = vp8_find_near_mvs_bias(&x->e_mbd,
   1998                                            x->e_mbd.mode_info_context,
   1999                                            mode_mv_sb,
   2000                                            best_ref_mv_sb,
   2001                                            mdcounts,
   2002                                            ref_frame_map[1],
   2003                                            cpi->common.ref_frame_sign_bias);
   2004 
   2005         mode_mv = mode_mv_sb[sign_bias];
   2006         best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
   2007     }
   2008 
   2009     get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
   2010 
   2011     *returnintra = INT_MAX;
   2012     /* Count of the number of MBs tested so far this frame */
   2013     cpi->mbs_tested_so_far++;
   2014 
   2015     x->skip = 0;
   2016 
   2017     for (mode_index = 0; mode_index < MAX_MODES; mode_index++)
   2018     {
   2019         int this_rd = INT_MAX;
   2020         int disable_skip = 0;
   2021         int other_cost = 0;
   2022         int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];
   2023 
   2024         /* Test best rd so far against threshold for trying this mode. */
   2025         if (best_mode.rd <= cpi->rd_threshes[mode_index])
   2026             continue;
   2027 
   2028         if (this_ref_frame < 0)
   2029             continue;
   2030 
   2031         /* These variables hold are rolling total cost and distortion for
   2032          * this mode
   2033          */
   2034         rd.rate2 = 0;
   2035         rd.distortion2 = 0;
   2036 
   2037         this_mode = vp8_mode_order[mode_index];
   2038 
   2039         x->e_mbd.mode_info_context->mbmi.mode = this_mode;
   2040         x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
   2041 
   2042         /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
   2043          * unless ARNR filtering is enabled in which case we want
   2044          * an unfiltered alternative
   2045          */
   2046         if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
   2047         {
   2048             if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
   2049                 continue;
   2050         }
   2051 
   2052         /* everything but intra */
   2053         if (x->e_mbd.mode_info_context->mbmi.ref_frame)
   2054         {
   2055             x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
   2056             x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
   2057             x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
   2058 
   2059             if (sign_bias != cpi->common.ref_frame_sign_bias[this_ref_frame])
   2060             {
   2061                 sign_bias = cpi->common.ref_frame_sign_bias[this_ref_frame];
   2062                 mode_mv = mode_mv_sb[sign_bias];
   2063                 best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
   2064             }
   2065         }
   2066 
   2067         /* Check to see if the testing frequency for this mode is at its
   2068          * max If so then prevent it from being tested and increase the
   2069          * threshold for its testing
   2070          */
   2071         if (cpi->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1))
   2072         {
   2073             if (cpi->mbs_tested_so_far  <= cpi->mode_check_freq[mode_index] * cpi->mode_test_hit_counts[mode_index])
   2074             {
   2075                 /* Increase the threshold for coding this mode to make it
   2076                  * less likely to be chosen
   2077                  */
   2078                 cpi->rd_thresh_mult[mode_index] += 4;
   2079 
   2080                 if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
   2081                     cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
   2082 
   2083                 cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
   2084 
   2085                 continue;
   2086             }
   2087         }
   2088 
   2089         /* We have now reached the point where we are going to test the
   2090          * current mode so increment the counter for the number of times
   2091          * it has been tested
   2092          */
   2093         cpi->mode_test_hit_counts[mode_index] ++;
   2094 
   2095         /* Experimental code. Special case for gf and arf zeromv modes.
   2096          * Increase zbin size to supress noise
   2097          */
   2098         if (cpi->zbin_mode_boost_enabled)
   2099         {
   2100             if ( this_ref_frame == INTRA_FRAME )
   2101                 cpi->zbin_mode_boost = 0;
   2102             else
   2103             {
   2104                 if (vp8_mode_order[mode_index] == ZEROMV)
   2105                 {
   2106                     if (this_ref_frame != LAST_FRAME)
   2107                         cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
   2108                     else
   2109                         cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
   2110                 }
   2111                 else if (vp8_mode_order[mode_index] == SPLITMV)
   2112                     cpi->zbin_mode_boost = 0;
   2113                 else
   2114                     cpi->zbin_mode_boost = MV_ZBIN_BOOST;
   2115             }
   2116 
   2117             vp8_update_zbin_extra(cpi, x);
   2118         }
   2119 
   2120         if(!uv_intra_done && this_ref_frame == INTRA_FRAME)
   2121         {
   2122             rd_pick_intra_mbuv_mode(x, &uv_intra_rate,
   2123                                     &uv_intra_rate_tokenonly,
   2124                                     &uv_intra_distortion);
   2125             uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
   2126 
   2127             /*
   2128              * Total of the eobs is used later to further adjust rate2. Since uv
   2129              * block's intra eobs will be overwritten when we check inter modes,
   2130              * we need to save uv_intra_tteob here.
   2131              */
   2132             for (i = 16; i < 24; i++)
   2133                 uv_intra_tteob += x->e_mbd.eobs[i];
   2134 
   2135             uv_intra_done = 1;
   2136         }
   2137 
   2138         switch (this_mode)
   2139         {
   2140         case B_PRED:
   2141         {
   2142             int tmp_rd;
   2143 
   2144             /* Note the rate value returned here includes the cost of
   2145              * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED]
   2146              */
   2147             int distortion;
   2148             tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion, best_mode.yrd);
   2149             rd.rate2 += rate;
   2150             rd.distortion2 += distortion;
   2151 
   2152             if(tmp_rd < best_mode.yrd)
   2153             {
   2154                 rd.rate2 += uv_intra_rate;
   2155                 rd.rate_uv = uv_intra_rate_tokenonly;
   2156                 rd.distortion2 += uv_intra_distortion;
   2157                 rd.distortion_uv = uv_intra_distortion;
   2158             }
   2159             else
   2160             {
   2161                 this_rd = INT_MAX;
   2162                 disable_skip = 1;
   2163             }
   2164         }
   2165         break;
   2166 
   2167         case SPLITMV:
   2168         {
   2169             int tmp_rd;
   2170             int this_rd_thresh;
   2171             int distortion;
   2172 
   2173             this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) ? cpi->rd_threshes[THR_NEW1] : cpi->rd_threshes[THR_NEW3];
   2174             this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) ? cpi->rd_threshes[THR_NEW2] : this_rd_thresh;
   2175 
   2176             tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
   2177                                                      best_mode.yrd, mdcounts,
   2178                                                      &rate, &rd.rate_y, &distortion, this_rd_thresh) ;
   2179 
   2180             rd.rate2 += rate;
   2181             rd.distortion2 += distortion;
   2182 
   2183             /* If even the 'Y' rd value of split is higher than best so far
   2184              * then dont bother looking at UV
   2185              */
   2186             if (tmp_rd < best_mode.yrd)
   2187             {
   2188                 /* Now work out UV cost and add it in */
   2189                 rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv, cpi->common.full_pixel);
   2190                 rd.rate2 += rd.rate_uv;
   2191                 rd.distortion2 += rd.distortion_uv;
   2192             }
   2193             else
   2194             {
   2195                 this_rd = INT_MAX;
   2196                 disable_skip = 1;
   2197             }
   2198         }
   2199         break;
   2200         case DC_PRED:
   2201         case V_PRED:
   2202         case H_PRED:
   2203         case TM_PRED:
   2204         {
   2205             int distortion;
   2206             x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
   2207 
   2208             vp8_build_intra_predictors_mby_s(xd,
   2209                                              xd->dst.y_buffer - xd->dst.y_stride,
   2210                                              xd->dst.y_buffer - 1,
   2211                                              xd->dst.y_stride,
   2212                                              xd->predictor,
   2213                                              16);
   2214             macro_block_yrd(x, &rd.rate_y, &distortion) ;
   2215             rd.rate2 += rd.rate_y;
   2216             rd.distortion2 += distortion;
   2217             rd.rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
   2218             rd.rate2 += uv_intra_rate;
   2219             rd.rate_uv = uv_intra_rate_tokenonly;
   2220             rd.distortion2 += uv_intra_distortion;
   2221             rd.distortion_uv = uv_intra_distortion;
   2222         }
   2223         break;
   2224 
   2225         case NEWMV:
   2226         {
   2227             int thissme;
   2228             int bestsme = INT_MAX;
   2229             int step_param = cpi->sf.first_step;
   2230             int further_steps;
   2231             int n;
   2232             int do_refine=1;   /* If last step (1-away) of n-step search doesn't pick the center point as the best match,
   2233                                   we will do a final 1-away diamond refining search  */
   2234 
   2235             int sadpb = x->sadperbit16;
   2236             int_mv mvp_full;
   2237 
   2238             int col_min = ((best_ref_mv.as_mv.col+7)>>3) - MAX_FULL_PEL_VAL;
   2239             int row_min = ((best_ref_mv.as_mv.row+7)>>3) - MAX_FULL_PEL_VAL;
   2240             int col_max = (best_ref_mv.as_mv.col>>3) + MAX_FULL_PEL_VAL;
   2241             int row_max = (best_ref_mv.as_mv.row>>3) + MAX_FULL_PEL_VAL;
   2242 
   2243             int tmp_col_min = x->mv_col_min;
   2244             int tmp_col_max = x->mv_col_max;
   2245             int tmp_row_min = x->mv_row_min;
   2246             int tmp_row_max = x->mv_row_max;
   2247 
   2248             if(!saddone)
   2249             {
   2250                 vp8_cal_sad(cpi,xd,x, recon_yoffset ,&near_sadidx[0] );
   2251                 saddone = 1;
   2252             }
   2253 
   2254             vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
   2255                         x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
   2256 
   2257             mvp_full.as_mv.col = mvp.as_mv.col>>3;
   2258             mvp_full.as_mv.row = mvp.as_mv.row>>3;
   2259 
   2260             /* Get intersection of UMV window and valid MV window to
   2261              * reduce # of checks in diamond search.
   2262              */
   2263             if (x->mv_col_min < col_min )
   2264                 x->mv_col_min = col_min;
   2265             if (x->mv_col_max > col_max )
   2266                 x->mv_col_max = col_max;
   2267             if (x->mv_row_min < row_min )
   2268                 x->mv_row_min = row_min;
   2269             if (x->mv_row_max > row_max )
   2270                 x->mv_row_max = row_max;
   2271 
   2272             /* adjust search range according to sr from mv prediction */
   2273             if(sr > step_param)
   2274                 step_param = sr;
   2275 
   2276             /* Initial step/diamond search */
   2277             {
   2278                 bestsme = cpi->diamond_search_sad(x, b, d, &mvp_full, &d->bmi.mv,
   2279                                         step_param, sadpb, &num00,
   2280                                         &cpi->fn_ptr[BLOCK_16X16],
   2281                                         x->mvcost, &best_ref_mv);
   2282                 mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
   2283 
   2284                 /* Further step/diamond searches as necessary */
   2285                 n = 0;
   2286                 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
   2287 
   2288                 n = num00;
   2289                 num00 = 0;
   2290 
   2291                 /* If there won't be more n-step search, check to see if refining search is needed. */
   2292                 if (n > further_steps)
   2293                     do_refine = 0;
   2294 
   2295                 while (n < further_steps)
   2296                 {
   2297                     n++;
   2298 
   2299                     if (num00)
   2300                         num00--;
   2301                     else
   2302                     {
   2303                         thissme = cpi->diamond_search_sad(x, b, d, &mvp_full,
   2304                                     &d->bmi.mv, step_param + n, sadpb, &num00,
   2305                                     &cpi->fn_ptr[BLOCK_16X16], x->mvcost,
   2306                                     &best_ref_mv);
   2307 
   2308                         /* check to see if refining search is needed. */
   2309                         if (num00 > (further_steps-n))
   2310                             do_refine = 0;
   2311 
   2312                         if (thissme < bestsme)
   2313                         {
   2314                             bestsme = thissme;
   2315                             mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
   2316                         }
   2317                         else
   2318                         {
   2319                             d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
   2320                         }
   2321                     }
   2322                 }
   2323             }
   2324 
   2325             /* final 1-away diamond refining search */
   2326             if (do_refine == 1)
   2327             {
   2328                 int search_range;
   2329 
   2330                 search_range = 8;
   2331 
   2332                 thissme = cpi->refining_search_sad(x, b, d, &d->bmi.mv, sadpb,
   2333                                        search_range, &cpi->fn_ptr[BLOCK_16X16],
   2334                                        x->mvcost, &best_ref_mv);
   2335 
   2336                 if (thissme < bestsme)
   2337                 {
   2338                     bestsme = thissme;
   2339                     mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
   2340                 }
   2341                 else
   2342                 {
   2343                     d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
   2344                 }
   2345             }
   2346 
   2347             x->mv_col_min = tmp_col_min;
   2348             x->mv_col_max = tmp_col_max;
   2349             x->mv_row_min = tmp_row_min;
   2350             x->mv_row_max = tmp_row_max;
   2351 
   2352             if (bestsme < INT_MAX)
   2353             {
   2354                 int dis; /* TODO: use dis in distortion calculation later. */
   2355                 unsigned int sse;
   2356                 cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv, &best_ref_mv,
   2357                                              x->errorperbit,
   2358                                              &cpi->fn_ptr[BLOCK_16X16],
   2359                                              x->mvcost, &dis, &sse);
   2360             }
   2361 
   2362             mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
   2363 
   2364             /* Add the new motion vector cost to our rolling cost variable */
   2365             rd.rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
   2366         }
   2367 
   2368         case NEARESTMV:
   2369         case NEARMV:
   2370             /* Clip "next_nearest" so that it does not extend to far out
   2371              * of image
   2372              */
   2373             vp8_clamp_mv2(&mode_mv[this_mode], xd);
   2374 
   2375             /* Do not bother proceeding if the vector (from newmv, nearest
   2376              * or near) is 0,0 as this should then be coded using the zeromv
   2377              * mode.
   2378              */
   2379             if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) && (mode_mv[this_mode].as_int == 0))
   2380                 continue;
   2381 
   2382         case ZEROMV:
   2383 
   2384             /* Trap vectors that reach beyond the UMV borders
   2385              * Note that ALL New MV, Nearest MV Near MV and Zero MV code
   2386              * drops through to this point because of the lack of break
   2387              * statements in the previous two cases.
   2388              */
   2389             if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
   2390                 ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max))
   2391                 continue;
   2392 
   2393             vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
   2394             this_rd = evaluate_inter_mode_rd(mdcounts, &rd,
   2395                                              &disable_skip, cpi, x);
   2396             break;
   2397 
   2398         default:
   2399             break;
   2400         }
   2401 
   2402         this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost,
   2403                                            disable_skip, uv_intra_tteob,
   2404                                            intra_rd_penalty, cpi, x);
   2405 
   2406         /* Keep record of best intra distortion */
   2407         if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
   2408             (this_rd < best_mode.intra_rd) )
   2409         {
   2410           best_mode.intra_rd = this_rd;
   2411             *returnintra = rd.distortion2 ;
   2412         }
   2413 #if CONFIG_TEMPORAL_DENOISING
   2414         if (cpi->oxcf.noise_sensitivity)
   2415         {
   2416             unsigned int sse;
   2417             vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&sse,
   2418                                    mode_mv[this_mode]);
   2419 
   2420             if (sse < best_rd_sse)
   2421                 best_rd_sse = sse;
   2422 
   2423             /* Store for later use by denoiser. */
   2424             if (this_mode == ZEROMV && sse < zero_mv_sse )
   2425             {
   2426                 zero_mv_sse = sse;
   2427                 x->best_zeromv_reference_frame =
   2428                         x->e_mbd.mode_info_context->mbmi.ref_frame;
   2429             }
   2430 
   2431             /* Store the best NEWMV in x for later use in the denoiser. */
   2432             if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
   2433                     sse < best_sse)
   2434             {
   2435                 best_sse = sse;
   2436                 vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&best_sse,
   2437                                        mode_mv[this_mode]);
   2438                 x->best_sse_inter_mode = NEWMV;
   2439                 x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
   2440                 x->need_to_clamp_best_mvs =
   2441                     x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
   2442                 x->best_reference_frame =
   2443                     x->e_mbd.mode_info_context->mbmi.ref_frame;
   2444             }
   2445         }
   2446 #endif
   2447 
   2448         /* Did this mode help.. i.i is it the new best mode */
   2449         if (this_rd < best_mode.rd || x->skip)
   2450         {
   2451             /* Note index of best mode so far */
   2452             best_mode_index = mode_index;
   2453             *returnrate = rd.rate2;
   2454             *returndistortion = rd.distortion2;
   2455             if (this_mode <= B_PRED)
   2456             {
   2457                 x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode;
   2458                 /* required for left and above block mv */
   2459                 x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
   2460             }
   2461             update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
   2462 
   2463 
   2464             /* Testing this mode gave rise to an improvement in best error
   2465              * score. Lower threshold a bit for next time
   2466              */
   2467             cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
   2468             cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
   2469         }
   2470 
   2471         /* If the mode did not help improve the best error case then raise
   2472          * the threshold for testing that mode next time around.
   2473          */
   2474         else
   2475         {
   2476             cpi->rd_thresh_mult[mode_index] += 4;
   2477 
   2478             if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
   2479                 cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
   2480 
   2481             cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
   2482         }
   2483 
   2484         if (x->skip)
   2485             break;
   2486 
   2487     }
   2488 
   2489     /* Reduce the activation RD thresholds for the best choice mode */
   2490     if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2)))
   2491     {
   2492         int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);
   2493 
   2494         cpi->rd_thresh_mult[best_mode_index] = (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ? cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
   2495         cpi->rd_threshes[best_mode_index] = (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index];
   2496     }
   2497 
   2498     /* Note how often each mode chosen as best */
   2499     cpi->mode_chosen_counts[best_mode_index] ++;
   2500 
   2501 #if CONFIG_TEMPORAL_DENOISING
   2502     if (cpi->oxcf.noise_sensitivity)
   2503     {
   2504         if (x->best_sse_inter_mode == DC_PRED)
   2505         {
   2506             /* No best MV found. */
   2507             x->best_sse_inter_mode = best_mode.mbmode.mode;
   2508             x->best_sse_mv = best_mode.mbmode.mv;
   2509             x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
   2510             x->best_reference_frame = best_mode.mbmode.ref_frame;
   2511             best_sse = best_rd_sse;
   2512         }
   2513         vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
   2514                                 recon_yoffset, recon_uvoffset);
   2515 
   2516 
   2517         /* Reevaluate ZEROMV after denoising. */
   2518         if (best_mode.mbmode.ref_frame == INTRA_FRAME &&
   2519             x->best_zeromv_reference_frame != INTRA_FRAME)
   2520         {
   2521             int this_rd = INT_MAX;
   2522             int disable_skip = 0;
   2523             int other_cost = 0;
   2524             int this_ref_frame = x->best_zeromv_reference_frame;
   2525             rd.rate2 = x->ref_frame_cost[this_ref_frame] +
   2526                     vp8_cost_mv_ref(ZEROMV, mdcounts);
   2527             rd.distortion2 = 0;
   2528 
   2529             /* set up the proper prediction buffers for the frame */
   2530             x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
   2531             x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
   2532             x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
   2533             x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
   2534 
   2535             x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
   2536             x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
   2537             x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
   2538 
   2539             this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
   2540             this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost,
   2541                                                disable_skip, uv_intra_tteob,
   2542                                                intra_rd_penalty, cpi, x);
   2543             if (this_rd < best_mode.rd || x->skip)
   2544             {
   2545                 /* Note index of best mode so far */
   2546                 best_mode_index = mode_index;
   2547                 *returnrate = rd.rate2;
   2548                 *returndistortion = rd.distortion2;
   2549                 update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
   2550             }
   2551         }
   2552 
   2553     }
   2554 #endif
   2555 
   2556     if (cpi->is_src_frame_alt_ref &&
   2557         (best_mode.mbmode.mode != ZEROMV || best_mode.mbmode.ref_frame != ALTREF_FRAME))
   2558     {
   2559         x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
   2560         x->e_mbd.mode_info_context->mbmi.ref_frame = ALTREF_FRAME;
   2561         x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
   2562         x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
   2563         x->e_mbd.mode_info_context->mbmi.mb_skip_coeff =
   2564                                         (cpi->common.mb_no_coeff_skip);
   2565         x->e_mbd.mode_info_context->mbmi.partitioning = 0;
   2566         return;
   2567     }
   2568 
   2569 
   2570     /* macroblock modes */
   2571     vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO));
   2572 
   2573     if (best_mode.mbmode.mode == B_PRED)
   2574     {
   2575         for (i = 0; i < 16; i++)
   2576             xd->mode_info_context->bmi[i].as_mode = best_mode.bmodes[i].as_mode;
   2577     }
   2578 
   2579     if (best_mode.mbmode.mode == SPLITMV)
   2580     {
   2581         for (i = 0; i < 16; i++)
   2582             xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int;
   2583 
   2584         vpx_memcpy(x->partition_info, &best_mode.partition, sizeof(PARTITION_INFO));
   2585 
   2586         x->e_mbd.mode_info_context->mbmi.mv.as_int =
   2587                                       x->partition_info->bmi[15].mv.as_int;
   2588     }
   2589 
   2590     if (sign_bias
   2591         != cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame])
   2592         best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int;
   2593 
   2594     rd_update_mvcount(cpi, x, &best_ref_mv);
   2595 }
   2596 
   2597 void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate_)
   2598 {
   2599     int error4x4, error16x16;
   2600     int rate4x4, rate16x16 = 0, rateuv;
   2601     int dist4x4, dist16x16, distuv;
   2602     int rate;
   2603     int rate4x4_tokenonly = 0;
   2604     int rate16x16_tokenonly = 0;
   2605     int rateuv_tokenonly = 0;
   2606 
   2607     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
   2608 
   2609     rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv);
   2610     rate = rateuv;
   2611 
   2612     error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly,
   2613                                             &dist16x16);
   2614 
   2615     error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly,
   2616                                          &dist4x4, error16x16);
   2617 
   2618     if (error4x4 < error16x16)
   2619     {
   2620         x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
   2621         rate += rate4x4;
   2622     }
   2623     else
   2624     {
   2625         rate += rate16x16;
   2626     }
   2627 
   2628     *rate_ = rate;
   2629 }
   2630