Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include <stdio.h>
     13 #include <math.h>
     14 #include <limits.h>
     15 #include <assert.h>
     16 #include "vpx_config.h"
     17 #include "vp8_rtcd.h"
     18 #include "vp8/common/pragmas.h"
     19 #include "tokenize.h"
     20 #include "treewriter.h"
     21 #include "onyx_int.h"
     22 #include "modecosts.h"
     23 #include "encodeintra.h"
     24 #include "pickinter.h"
     25 #include "vp8/common/entropymode.h"
     26 #include "vp8/common/reconinter.h"
     27 #include "vp8/common/reconintra4x4.h"
     28 #include "vp8/common/findnearmv.h"
     29 #include "vp8/common/quant_common.h"
     30 #include "encodemb.h"
     31 #include "quantize.h"
     32 #include "vp8/common/variance.h"
     33 #include "mcomp.h"
     34 #include "rdopt.h"
     35 #include "vpx_mem/vpx_mem.h"
     36 #include "vp8/common/systemdependent.h"
     37 #if CONFIG_TEMPORAL_DENOISING
     38 #include "denoising.h"
     39 #endif
     40 extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
     41 
     42 #define MAXF(a,b)            (((a) > (b)) ? (a) : (b))
     43 
     44 typedef struct rate_distortion_struct
     45 {
     46     int rate2;
     47     int rate_y;
     48     int rate_uv;
     49     int distortion2;
     50     int distortion_uv;
     51 } RATE_DISTORTION;
     52 
     53 typedef struct best_mode_struct
     54 {
     55   int yrd;
     56   int rd;
     57   int intra_rd;
     58   MB_MODE_INFO mbmode;
     59   union b_mode_info bmodes[16];
     60   PARTITION_INFO partition;
     61 } BEST_MODE;
     62 
     63 static const int auto_speed_thresh[17] =
     64 {
     65     1000,
     66     200,
     67     150,
     68     130,
     69     150,
     70     125,
     71     120,
     72     115,
     73     115,
     74     115,
     75     115,
     76     115,
     77     115,
     78     115,
     79     115,
     80     115,
     81     105
     82 };
     83 
     84 const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES] =
     85 {
     86     ZEROMV,
     87     DC_PRED,
     88 
     89     NEARESTMV,
     90     NEARMV,
     91 
     92     ZEROMV,
     93     NEARESTMV,
     94 
     95     ZEROMV,
     96     NEARESTMV,
     97 
     98     NEARMV,
     99     NEARMV,
    100 
    101     V_PRED,
    102     H_PRED,
    103     TM_PRED,
    104 
    105     NEWMV,
    106     NEWMV,
    107     NEWMV,
    108 
    109     SPLITMV,
    110     SPLITMV,
    111     SPLITMV,
    112 
    113     B_PRED,
    114 };
    115 
    116 /* This table determines the search order in reference frame priority order,
    117  * which may not necessarily match INTRA,LAST,GOLDEN,ARF
    118  */
    119 const int vp8_ref_frame_order[MAX_MODES] =
    120 {
    121     1,
    122     0,
    123 
    124     1,
    125     1,
    126 
    127     2,
    128     2,
    129 
    130     3,
    131     3,
    132 
    133     2,
    134     3,
    135 
    136     0,
    137     0,
    138     0,
    139 
    140     1,
    141     2,
    142     3,
    143 
    144     1,
    145     2,
    146     3,
    147 
    148     0,
    149 };
    150 
    151 static void fill_token_costs(
    152     int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
    153     const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]
    154 )
    155 {
    156     int i, j, k;
    157 
    158 
    159     for (i = 0; i < BLOCK_TYPES; i++)
    160         for (j = 0; j < COEF_BANDS; j++)
    161             for (k = 0; k < PREV_COEF_CONTEXTS; k++)
    162 
    163                 /* check for pt=0 and band > 1 if block type 0
    164                  * and 0 if blocktype 1
    165                  */
    166                 if (k == 0 && j > (i == 0))
    167                     vp8_cost_tokens2(c[i][j][k], p [i][j][k], vp8_coef_tree, 2);
    168                 else
    169                     vp8_cost_tokens(c[i][j][k], p [i][j][k], vp8_coef_tree);
    170 }
    171 
    172 static const int rd_iifactor[32] =
    173 {
    174     4, 4, 3, 2, 1, 0, 0, 0,
    175     0, 0, 0, 0, 0, 0, 0, 0,
    176     0, 0, 0, 0, 0, 0, 0, 0,
    177     0, 0, 0, 0, 0, 0, 0, 0
    178 };
    179 
    180 /* values are now correlated to quantizer */
    181 static const int sad_per_bit16lut[QINDEX_RANGE] =
    182 {
    183     2,  2,  2,  2,  2,  2,  2,  2,
    184     2,  2,  2,  2,  2,  2,  2,  2,
    185     3,  3,  3,  3,  3,  3,  3,  3,
    186     3,  3,  3,  3,  3,  3,  4,  4,
    187     4,  4,  4,  4,  4,  4,  4,  4,
    188     4,  4,  5,  5,  5,  5,  5,  5,
    189     5,  5,  5,  5,  5,  5,  6,  6,
    190     6,  6,  6,  6,  6,  6,  6,  6,
    191     6,  6,  7,  7,  7,  7,  7,  7,
    192     7,  7,  7,  7,  7,  7,  8,  8,
    193     8,  8,  8,  8,  8,  8,  8,  8,
    194     8,  8,  9,  9,  9,  9,  9,  9,
    195     9,  9,  9,  9,  9,  9,  10, 10,
    196     10, 10, 10, 10, 10, 10, 11, 11,
    197     11, 11, 11, 11, 12, 12, 12, 12,
    198     12, 12, 13, 13, 13, 13, 14, 14
    199 };
    200 static const int sad_per_bit4lut[QINDEX_RANGE] =
    201 {
    202     2,  2,  2,  2,  2,  2,  3,  3,
    203     3,  3,  3,  3,  3,  3,  3,  3,
    204     3,  3,  3,  3,  4,  4,  4,  4,
    205     4,  4,  4,  4,  4,  4,  5,  5,
    206     5,  5,  5,  5,  6,  6,  6,  6,
    207     6,  6,  6,  6,  6,  6,  6,  6,
    208     7,  7,  7,  7,  7,  7,  7,  7,
    209     7,  7,  7,  7,  7,  8,  8,  8,
    210     8,  8,  9,  9,  9,  9,  9,  9,
    211     10, 10, 10, 10, 10, 10, 10, 10,
    212     11, 11, 11, 11, 11, 11, 11, 11,
    213     12, 12, 12, 12, 12, 12, 12, 12,
    214     13, 13, 13, 13, 13, 13, 13, 14,
    215     14, 14, 14, 14, 15, 15, 15, 15,
    216     16, 16, 16, 16, 17, 17, 17, 18,
    217     18, 18, 19, 19, 19, 20, 20, 20,
    218 };
    219 
    220 void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex)
    221 {
    222     cpi->mb.sadperbit16 =  sad_per_bit16lut[QIndex];
    223     cpi->mb.sadperbit4  =  sad_per_bit4lut[QIndex];
    224 }
    225 
    226 void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue)
    227 {
    228     int q;
    229     int i;
    230     double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
    231     double rdconst = 2.80;
    232 
    233     vp8_clear_system_state();
    234 
    235     /* Further tests required to see if optimum is different
    236      * for key frames, golden frames and arf frames.
    237      */
    238     cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
    239 
    240     /* Extend rate multiplier along side quantizer zbin increases */
    241     if (cpi->mb.zbin_over_quant  > 0)
    242     {
    243         double oq_factor;
    244         double modq;
    245 
    246         /* Experimental code using the same basic equation as used for Q above
    247          * The units of cpi->mb.zbin_over_quant are 1/128 of Q bin size
    248          */
    249         oq_factor = 1.0 + ((double)0.0015625 * cpi->mb.zbin_over_quant);
    250         modq = (int)((double)capped_q * oq_factor);
    251         cpi->RDMULT = (int)(rdconst * (modq * modq));
    252     }
    253 
    254     if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME))
    255     {
    256         if (cpi->twopass.next_iiratio > 31)
    257             cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    258         else
    259             cpi->RDMULT +=
    260                 (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
    261     }
    262 
    263     cpi->mb.errorperbit = (cpi->RDMULT / 110);
    264     cpi->mb.errorperbit += (cpi->mb.errorperbit==0);
    265 
    266     vp8_set_speed_features(cpi);
    267 
    268     for (i = 0; i < MAX_MODES; i++)
    269     {
    270         x->mode_test_hit_counts[i] = 0;
    271     }
    272 
    273     q = (int)pow(Qvalue, 1.25);
    274 
    275     if (q < 8)
    276         q = 8;
    277 
    278     if (cpi->RDMULT > 1000)
    279     {
    280         cpi->RDDIV = 1;
    281         cpi->RDMULT /= 100;
    282 
    283         for (i = 0; i < MAX_MODES; i++)
    284         {
    285             if (cpi->sf.thresh_mult[i] < INT_MAX)
    286             {
    287                 x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
    288             }
    289             else
    290             {
    291                 x->rd_threshes[i] = INT_MAX;
    292             }
    293 
    294             cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
    295         }
    296     }
    297     else
    298     {
    299         cpi->RDDIV = 100;
    300 
    301         for (i = 0; i < MAX_MODES; i++)
    302         {
    303             if (cpi->sf.thresh_mult[i] < (INT_MAX / q))
    304             {
    305                 x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
    306             }
    307             else
    308             {
    309                 x->rd_threshes[i] = INT_MAX;
    310             }
    311 
    312             cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
    313         }
    314     }
    315 
    316     {
    317       /* build token cost array for the type of frame we have now */
    318       FRAME_CONTEXT *l = &cpi->lfc_n;
    319 
    320       if(cpi->common.refresh_alt_ref_frame)
    321           l = &cpi->lfc_a;
    322       else if(cpi->common.refresh_golden_frame)
    323           l = &cpi->lfc_g;
    324 
    325       fill_token_costs(
    326           cpi->mb.token_costs,
    327           (const vp8_prob( *)[8][3][11]) l->coef_probs
    328       );
    329       /*
    330       fill_token_costs(
    331           cpi->mb.token_costs,
    332           (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs);
    333       */
    334 
    335 
    336       /* TODO make these mode costs depend on last,alt or gold too.  (jbb) */
    337       vp8_init_mode_costs(cpi);
    338     }
    339 
    340 }
    341 
    342 void vp8_auto_select_speed(VP8_COMP *cpi)
    343 {
    344     int milliseconds_for_compress = (int)(1000000 / cpi->framerate);
    345 
    346     milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
    347 
    348 #if 0
    349 
    350     if (0)
    351     {
    352         FILE *f;
    353 
    354         f = fopen("speed.stt", "a");
    355         fprintf(f, " %8ld %10ld %10ld %10ld\n",
    356                 cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time);
    357         fclose(f);
    358     }
    359 
    360 #endif
    361 
    362     if (cpi->avg_pick_mode_time < milliseconds_for_compress && (cpi->avg_encode_time - cpi->avg_pick_mode_time) < milliseconds_for_compress)
    363     {
    364         if (cpi->avg_pick_mode_time == 0)
    365         {
    366             cpi->Speed = 4;
    367         }
    368         else
    369         {
    370             if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95)
    371             {
    372                 cpi->Speed          += 2;
    373                 cpi->avg_pick_mode_time = 0;
    374                 cpi->avg_encode_time = 0;
    375 
    376                 if (cpi->Speed > 16)
    377                 {
    378                     cpi->Speed = 16;
    379                 }
    380             }
    381 
    382             if (milliseconds_for_compress * 100 > cpi->avg_encode_time * auto_speed_thresh[cpi->Speed])
    383             {
    384                 cpi->Speed          -= 1;
    385                 cpi->avg_pick_mode_time = 0;
    386                 cpi->avg_encode_time = 0;
    387 
    388                 /* In real-time mode, cpi->speed is in [4, 16]. */
    389                 if (cpi->Speed < 4)
    390                 {
    391                     cpi->Speed = 4;
    392                 }
    393             }
    394         }
    395     }
    396     else
    397     {
    398         cpi->Speed += 4;
    399 
    400         if (cpi->Speed > 16)
    401             cpi->Speed = 16;
    402 
    403 
    404         cpi->avg_pick_mode_time = 0;
    405         cpi->avg_encode_time = 0;
    406     }
    407 }
    408 
    409 int vp8_block_error_c(short *coeff, short *dqcoeff)
    410 {
    411     int i;
    412     int error = 0;
    413 
    414     for (i = 0; i < 16; i++)
    415     {
    416         int this_diff = coeff[i] - dqcoeff[i];
    417         error += this_diff * this_diff;
    418     }
    419 
    420     return error;
    421 }
    422 
    423 int vp8_mbblock_error_c(MACROBLOCK *mb, int dc)
    424 {
    425     BLOCK  *be;
    426     BLOCKD *bd;
    427     int i, j;
    428     int berror, error = 0;
    429 
    430     for (i = 0; i < 16; i++)
    431     {
    432         be = &mb->block[i];
    433         bd = &mb->e_mbd.block[i];
    434 
    435         berror = 0;
    436 
    437         for (j = dc; j < 16; j++)
    438         {
    439             int this_diff = be->coeff[j] - bd->dqcoeff[j];
    440             berror += this_diff * this_diff;
    441         }
    442 
    443         error += berror;
    444     }
    445 
    446     return error;
    447 }
    448 
    449 int vp8_mbuverror_c(MACROBLOCK *mb)
    450 {
    451 
    452     BLOCK  *be;
    453     BLOCKD *bd;
    454 
    455 
    456     int i;
    457     int error = 0;
    458 
    459     for (i = 16; i < 24; i++)
    460     {
    461         be = &mb->block[i];
    462         bd = &mb->e_mbd.block[i];
    463 
    464         error += vp8_block_error_c(be->coeff, bd->dqcoeff);
    465     }
    466 
    467     return error;
    468 }
    469 
    470 int VP8_UVSSE(MACROBLOCK *x)
    471 {
    472     unsigned char *uptr, *vptr;
    473     unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
    474     unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
    475     int uv_stride = x->block[16].src_stride;
    476 
    477     unsigned int sse1 = 0;
    478     unsigned int sse2 = 0;
    479     int mv_row = x->e_mbd.mode_info_context->mbmi.mv.as_mv.row;
    480     int mv_col = x->e_mbd.mode_info_context->mbmi.mv.as_mv.col;
    481     int offset;
    482     int pre_stride = x->e_mbd.pre.uv_stride;
    483 
    484     if (mv_row < 0)
    485         mv_row -= 1;
    486     else
    487         mv_row += 1;
    488 
    489     if (mv_col < 0)
    490         mv_col -= 1;
    491     else
    492         mv_col += 1;
    493 
    494     mv_row /= 2;
    495     mv_col /= 2;
    496 
    497     offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
    498     uptr = x->e_mbd.pre.u_buffer + offset;
    499     vptr = x->e_mbd.pre.v_buffer + offset;
    500 
    501     if ((mv_row | mv_col) & 7)
    502     {
    503         vp8_sub_pixel_variance8x8(uptr, pre_stride,
    504             mv_col & 7, mv_row & 7, upred_ptr, uv_stride, &sse2);
    505         vp8_sub_pixel_variance8x8(vptr, pre_stride,
    506             mv_col & 7, mv_row & 7, vpred_ptr, uv_stride, &sse1);
    507         sse2 += sse1;
    508     }
    509     else
    510     {
    511         vp8_variance8x8(uptr, pre_stride,
    512             upred_ptr, uv_stride, &sse2);
    513         vp8_variance8x8(vptr, pre_stride,
    514             vpred_ptr, uv_stride, &sse1);
    515         sse2 += sse1;
    516     }
    517     return sse2;
    518 
    519 }
    520 
    521 static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
    522 {
    523     int c = !type;              /* start at coef 0, unless Y with Y2 */
    524     int eob = (int)(*b->eob);
    525     int pt ;    /* surrounding block/prev coef predictor */
    526     int cost = 0;
    527     short *qcoeff_ptr = b->qcoeff;
    528 
    529     VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
    530 
    531 # define QC( I)  ( qcoeff_ptr [vp8_default_zig_zag1d[I]] )
    532 
    533     for (; c < eob; c++)
    534     {
    535         int v = QC(c);
    536         int t = vp8_dct_value_tokens_ptr[v].Token;
    537         cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [t];
    538         cost += vp8_dct_value_cost_ptr[v];
    539         pt = vp8_prev_token_class[t];
    540     }
    541 
    542 # undef QC
    543 
    544     if (c < 16)
    545         cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN];
    546 
    547     pt = (c != !type); /* is eob first coefficient; */
    548     *a = *l = pt;
    549 
    550     return cost;
    551 }
    552 
    553 static int vp8_rdcost_mby(MACROBLOCK *mb)
    554 {
    555     int cost = 0;
    556     int b;
    557     MACROBLOCKD *x = &mb->e_mbd;
    558     ENTROPY_CONTEXT_PLANES t_above, t_left;
    559     ENTROPY_CONTEXT *ta;
    560     ENTROPY_CONTEXT *tl;
    561 
    562     vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    563     vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    564 
    565     ta = (ENTROPY_CONTEXT *)&t_above;
    566     tl = (ENTROPY_CONTEXT *)&t_left;
    567 
    568     for (b = 0; b < 16; b++)
    569         cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_NO_DC,
    570                     ta + vp8_block2above[b], tl + vp8_block2left[b]);
    571 
    572     cost += cost_coeffs(mb, x->block + 24, PLANE_TYPE_Y2,
    573                 ta + vp8_block2above[24], tl + vp8_block2left[24]);
    574 
    575     return cost;
    576 }
    577 
    578 static void macro_block_yrd( MACROBLOCK *mb,
    579                              int *Rate,
    580                              int *Distortion)
    581 {
    582     int b;
    583     MACROBLOCKD *const x = &mb->e_mbd;
    584     BLOCK   *const mb_y2 = mb->block + 24;
    585     BLOCKD *const x_y2  = x->block + 24;
    586     short *Y2DCPtr = mb_y2->src_diff;
    587     BLOCK *beptr;
    588     int d;
    589 
    590     vp8_subtract_mby( mb->src_diff, *(mb->block[0].base_src),
    591         mb->block[0].src_stride,  mb->e_mbd.predictor, 16);
    592 
    593     /* Fdct and building the 2nd order block */
    594     for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
    595     {
    596         mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
    597         *Y2DCPtr++ = beptr->coeff[0];
    598         *Y2DCPtr++ = beptr->coeff[16];
    599     }
    600 
    601     /* 2nd order fdct */
    602     mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
    603 
    604     /* Quantization */
    605     for (b = 0; b < 16; b++)
    606     {
    607         mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
    608     }
    609 
    610     /* DC predication and Quantization of 2nd Order block */
    611     mb->quantize_b(mb_y2, x_y2);
    612 
    613     /* Distortion */
    614     d = vp8_mbblock_error(mb, 1) << 2;
    615     d += vp8_block_error(mb_y2->coeff, x_y2->dqcoeff);
    616 
    617     *Distortion = (d >> 4);
    618 
    619     /* rate */
    620     *Rate = vp8_rdcost_mby(mb);
    621 }
    622 
    623 static void copy_predictor(unsigned char *dst, const unsigned char *predictor)
    624 {
    625     const unsigned int *p = (const unsigned int *)predictor;
    626     unsigned int *d = (unsigned int *)dst;
    627     d[0] = p[0];
    628     d[4] = p[4];
    629     d[8] = p[8];
    630     d[12] = p[12];
    631 }
    632 static int rd_pick_intra4x4block(
    633     MACROBLOCK *x,
    634     BLOCK *be,
    635     BLOCKD *b,
    636     B_PREDICTION_MODE *best_mode,
    637     const int *bmode_costs,
    638     ENTROPY_CONTEXT *a,
    639     ENTROPY_CONTEXT *l,
    640 
    641     int *bestrate,
    642     int *bestratey,
    643     int *bestdistortion)
    644 {
    645     B_PREDICTION_MODE mode;
    646     int best_rd = INT_MAX;
    647     int rate = 0;
    648     int distortion;
    649 
    650     ENTROPY_CONTEXT ta = *a, tempa = *a;
    651     ENTROPY_CONTEXT tl = *l, templ = *l;
    652     /*
    653      * The predictor buffer is a 2d buffer with a stride of 16.  Create
    654      * a temp buffer that meets the stride requirements, but we are only
    655      * interested in the left 4x4 block
    656      * */
    657     DECLARE_ALIGNED_ARRAY(16, unsigned char,  best_predictor, 16*4);
    658     DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16);
    659     int dst_stride = x->e_mbd.dst.y_stride;
    660     unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
    661 
    662     unsigned char *Above = dst - dst_stride;
    663     unsigned char *yleft = dst - 1;
    664     unsigned char top_left = Above[-1];
    665 
    666     for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++)
    667     {
    668         int this_rd;
    669         int ratey;
    670 
    671         rate = bmode_costs[mode];
    672 
    673         vp8_intra4x4_predict(Above, yleft, dst_stride, mode,
    674                              b->predictor, 16, top_left);
    675         vp8_subtract_b(be, b, 16);
    676         x->short_fdct4x4(be->src_diff, be->coeff, 32);
    677         x->quantize_b(be, b);
    678 
    679         tempa = ta;
    680         templ = tl;
    681 
    682         ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ);
    683         rate += ratey;
    684         distortion = vp8_block_error(be->coeff, b->dqcoeff) >> 2;
    685 
    686         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
    687 
    688         if (this_rd < best_rd)
    689         {
    690             *bestrate = rate;
    691             *bestratey = ratey;
    692             *bestdistortion = distortion;
    693             best_rd = this_rd;
    694             *best_mode = mode;
    695             *a = tempa;
    696             *l = templ;
    697             copy_predictor(best_predictor, b->predictor);
    698             vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
    699         }
    700     }
    701     b->bmi.as_mode = *best_mode;
    702 
    703     vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride);
    704 
    705     return best_rd;
    706 }
    707 
    708 static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate,
    709                                      int *rate_y, int *Distortion, int best_rd)
    710 {
    711     MACROBLOCKD *const xd = &mb->e_mbd;
    712     int i;
    713     int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
    714     int distortion = 0;
    715     int tot_rate_y = 0;
    716     int64_t total_rd = 0;
    717     ENTROPY_CONTEXT_PLANES t_above, t_left;
    718     ENTROPY_CONTEXT *ta;
    719     ENTROPY_CONTEXT *tl;
    720     const int *bmode_costs;
    721 
    722     vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    723     vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    724 
    725     ta = (ENTROPY_CONTEXT *)&t_above;
    726     tl = (ENTROPY_CONTEXT *)&t_left;
    727 
    728     intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16);
    729 
    730     bmode_costs = mb->inter_bmode_costs;
    731 
    732     for (i = 0; i < 16; i++)
    733     {
    734         MODE_INFO *const mic = xd->mode_info_context;
    735         const int mis = xd->mode_info_stride;
    736         B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
    737         int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
    738 
    739         if (mb->e_mbd.frame_type == KEY_FRAME)
    740         {
    741             const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
    742             const B_PREDICTION_MODE L = left_block_mode(mic, i);
    743 
    744             bmode_costs  = mb->bmode_costs[A][L];
    745         }
    746 
    747         total_rd += rd_pick_intra4x4block(
    748             mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
    749             ta + vp8_block2above[i],
    750             tl + vp8_block2left[i], &r, &ry, &d);
    751 
    752         cost += r;
    753         distortion += d;
    754         tot_rate_y += ry;
    755 
    756         mic->bmi[i].as_mode = best_mode;
    757 
    758         if(total_rd >= (int64_t)best_rd)
    759             break;
    760     }
    761 
    762     if(total_rd >= (int64_t)best_rd)
    763         return INT_MAX;
    764 
    765     *Rate = cost;
    766     *rate_y = tot_rate_y;
    767     *Distortion = distortion;
    768 
    769     return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
    770 }
    771 
    772 
    773 static int rd_pick_intra16x16mby_mode(MACROBLOCK *x,
    774                                       int *Rate,
    775                                       int *rate_y,
    776                                       int *Distortion)
    777 {
    778     MB_PREDICTION_MODE mode;
    779     MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
    780     int rate, ratey;
    781     int distortion;
    782     int best_rd = INT_MAX;
    783     int this_rd;
    784     MACROBLOCKD *xd = &x->e_mbd;
    785 
    786     /* Y Search for 16x16 intra prediction mode */
    787     for (mode = DC_PRED; mode <= TM_PRED; mode++)
    788     {
    789         xd->mode_info_context->mbmi.mode = mode;
    790 
    791         vp8_build_intra_predictors_mby_s(xd,
    792                                          xd->dst.y_buffer - xd->dst.y_stride,
    793                                          xd->dst.y_buffer - 1,
    794                                          xd->dst.y_stride,
    795                                          xd->predictor,
    796                                          16);
    797 
    798         macro_block_yrd(x, &ratey, &distortion);
    799         rate = ratey + x->mbmode_cost[xd->frame_type]
    800                                      [xd->mode_info_context->mbmi.mode];
    801 
    802         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
    803 
    804         if (this_rd < best_rd)
    805         {
    806             mode_selected = mode;
    807             best_rd = this_rd;
    808             *Rate = rate;
    809             *rate_y = ratey;
    810             *Distortion = distortion;
    811         }
    812     }
    813 
    814     xd->mode_info_context->mbmi.mode = mode_selected;
    815     return best_rd;
    816 }
    817 
    818 static int rd_cost_mbuv(MACROBLOCK *mb)
    819 {
    820     int b;
    821     int cost = 0;
    822     MACROBLOCKD *x = &mb->e_mbd;
    823     ENTROPY_CONTEXT_PLANES t_above, t_left;
    824     ENTROPY_CONTEXT *ta;
    825     ENTROPY_CONTEXT *tl;
    826 
    827     vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    828     vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    829 
    830     ta = (ENTROPY_CONTEXT *)&t_above;
    831     tl = (ENTROPY_CONTEXT *)&t_left;
    832 
    833     for (b = 16; b < 24; b++)
    834         cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_UV,
    835                     ta + vp8_block2above[b], tl + vp8_block2left[b]);
    836 
    837     return cost;
    838 }
    839 
    840 
    841 static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
    842                             int *distortion, int fullpixel)
    843 {
    844     vp8_build_inter16x16_predictors_mbuv(&x->e_mbd);
    845     vp8_subtract_mbuv(x->src_diff,
    846         x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
    847         &x->e_mbd.predictor[256], &x->e_mbd.predictor[320], 8);
    848 
    849     vp8_transform_mbuv(x);
    850     vp8_quantize_mbuv(x);
    851 
    852     *rate       = rd_cost_mbuv(x);
    853     *distortion = vp8_mbuverror(x) / 4;
    854 
    855     return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
    856 }
    857 
    858 static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
    859                           int *distortion, int fullpixel)
    860 {
    861     vp8_build_inter4x4_predictors_mbuv(&x->e_mbd);
    862     vp8_subtract_mbuv(x->src_diff,
    863         x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
    864         &x->e_mbd.predictor[256], &x->e_mbd.predictor[320], 8);
    865 
    866     vp8_transform_mbuv(x);
    867     vp8_quantize_mbuv(x);
    868 
    869     *rate       = rd_cost_mbuv(x);
    870     *distortion = vp8_mbuverror(x) / 4;
    871 
    872     return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
    873 }
    874 
    875 static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
    876                                     int *rate_tokenonly, int *distortion)
    877 {
    878     MB_PREDICTION_MODE mode;
    879     MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
    880     int best_rd = INT_MAX;
    881     int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r);
    882     int rate_to;
    883     MACROBLOCKD *xd = &x->e_mbd;
    884 
    885     for (mode = DC_PRED; mode <= TM_PRED; mode++)
    886     {
    887         int this_rate;
    888         int this_distortion;
    889         int this_rd;
    890 
    891         xd->mode_info_context->mbmi.uv_mode = mode;
    892 
    893         vp8_build_intra_predictors_mbuv_s(xd,
    894                                           xd->dst.u_buffer - xd->dst.uv_stride,
    895                                           xd->dst.v_buffer - xd->dst.uv_stride,
    896                                           xd->dst.u_buffer - 1,
    897                                           xd->dst.v_buffer - 1,
    898                                           xd->dst.uv_stride,
    899                                           &xd->predictor[256], &xd->predictor[320],
    900                                           8);
    901 
    902 
    903         vp8_subtract_mbuv(x->src_diff,
    904                       x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
    905                       &xd->predictor[256], &xd->predictor[320], 8);
    906         vp8_transform_mbuv(x);
    907         vp8_quantize_mbuv(x);
    908 
    909         rate_to = rd_cost_mbuv(x);
    910         this_rate = rate_to + x->intra_uv_mode_cost[xd->frame_type][xd->mode_info_context->mbmi.uv_mode];
    911 
    912         this_distortion = vp8_mbuverror(x) / 4;
    913 
    914         this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
    915 
    916         if (this_rd < best_rd)
    917         {
    918             best_rd = this_rd;
    919             d = this_distortion;
    920             r = this_rate;
    921             *rate_tokenonly = rate_to;
    922             mode_selected = mode;
    923         }
    924     }
    925 
    926     *rate = r;
    927     *distortion = d;
    928 
    929     xd->mode_info_context->mbmi.uv_mode = mode_selected;
    930 }
    931 
    932 int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4])
    933 {
    934     vp8_prob p [VP8_MVREFS-1];
    935     assert(NEARESTMV <= m  &&  m <= SPLITMV);
    936     vp8_mv_ref_probs(p, near_mv_ref_ct);
    937     return vp8_cost_token(vp8_mv_ref_tree, p,
    938                           vp8_mv_ref_encoding_array - NEARESTMV + m);
    939 }
    940 
    941 void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv)
    942 {
    943     x->e_mbd.mode_info_context->mbmi.mode = mb;
    944     x->e_mbd.mode_info_context->mbmi.mv.as_int = mv->as_int;
    945 }
    946 
    947 static int labels2mode(
    948     MACROBLOCK *x,
    949     int const *labelings, int which_label,
    950     B_PREDICTION_MODE this_mode,
    951     int_mv *this_mv, int_mv *best_ref_mv,
    952     int *mvcost[2]
    953 )
    954 {
    955     MACROBLOCKD *const xd = & x->e_mbd;
    956     MODE_INFO *const mic = xd->mode_info_context;
    957     const int mis = xd->mode_info_stride;
    958 
    959     int cost = 0;
    960     int thismvcost = 0;
    961 
    962     /* We have to be careful retrieving previously-encoded motion vectors.
    963        Ones from this macroblock have to be pulled from the BLOCKD array
    964        as they have not yet made it to the bmi array in our MB_MODE_INFO. */
    965 
    966     int i = 0;
    967 
    968     do
    969     {
    970         BLOCKD *const d = xd->block + i;
    971         const int row = i >> 2,  col = i & 3;
    972 
    973         B_PREDICTION_MODE m;
    974 
    975         if (labelings[i] != which_label)
    976             continue;
    977 
    978         if (col  &&  labelings[i] == labelings[i-1])
    979             m = LEFT4X4;
    980         else if (row  &&  labelings[i] == labelings[i-4])
    981             m = ABOVE4X4;
    982         else
    983         {
    984             /* the only time we should do costing for new motion vector
    985              * or mode is when we are on a new label  (jbb May 08, 2007)
    986              */
    987             switch (m = this_mode)
    988             {
    989             case NEW4X4 :
    990                 thismvcost  = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost, 102);
    991                 break;
    992             case LEFT4X4:
    993                 this_mv->as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
    994                 break;
    995             case ABOVE4X4:
    996                 this_mv->as_int = row ? d[-4].bmi.mv.as_int : above_block_mv(mic, i, mis);
    997                 break;
    998             case ZERO4X4:
    999                 this_mv->as_int = 0;
   1000                 break;
   1001             default:
   1002                 break;
   1003             }
   1004 
   1005             if (m == ABOVE4X4)  /* replace above with left if same */
   1006             {
   1007                 int_mv left_mv;
   1008 
   1009                 left_mv.as_int = col ? d[-1].bmi.mv.as_int :
   1010                                         left_block_mv(mic, i);
   1011 
   1012                 if (left_mv.as_int == this_mv->as_int)
   1013                     m = LEFT4X4;
   1014             }
   1015 
   1016             cost = x->inter_bmode_costs[ m];
   1017         }
   1018 
   1019         d->bmi.mv.as_int = this_mv->as_int;
   1020 
   1021         x->partition_info->bmi[i].mode = m;
   1022         x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
   1023 
   1024     }
   1025     while (++i < 16);
   1026 
   1027     cost += thismvcost ;
   1028     return cost;
   1029 }
   1030 
   1031 static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
   1032                               int which_label, ENTROPY_CONTEXT *ta,
   1033                               ENTROPY_CONTEXT *tl)
   1034 {
   1035     int cost = 0;
   1036     int b;
   1037     MACROBLOCKD *x = &mb->e_mbd;
   1038 
   1039     for (b = 0; b < 16; b++)
   1040         if (labels[ b] == which_label)
   1041             cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_WITH_DC,
   1042                                 ta + vp8_block2above[b],
   1043                                 tl + vp8_block2left[b]);
   1044 
   1045     return cost;
   1046 
   1047 }
   1048 static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels, int which_label)
   1049 {
   1050     int i;
   1051     unsigned int distortion = 0;
   1052     int pre_stride = x->e_mbd.pre.y_stride;
   1053     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1054 
   1055 
   1056     for (i = 0; i < 16; i++)
   1057     {
   1058         if (labels[i] == which_label)
   1059         {
   1060             BLOCKD *bd = &x->e_mbd.block[i];
   1061             BLOCK *be = &x->block[i];
   1062 
   1063             vp8_build_inter_predictors_b(bd, 16, base_pre, pre_stride, x->e_mbd.subpixel_predict);
   1064             vp8_subtract_b(be, bd, 16);
   1065             x->short_fdct4x4(be->src_diff, be->coeff, 32);
   1066             x->quantize_b(be, bd);
   1067 
   1068             distortion += vp8_block_error(be->coeff, bd->dqcoeff);
   1069         }
   1070     }
   1071 
   1072     return distortion;
   1073 }
   1074 
   1075 
   1076 static const unsigned int segmentation_to_sseshift[4] = {3, 3, 2, 0};
   1077 
   1078 
   1079 typedef struct
   1080 {
   1081   int_mv *ref_mv;
   1082   int_mv mvp;
   1083 
   1084   int segment_rd;
   1085   int segment_num;
   1086   int r;
   1087   int d;
   1088   int segment_yrate;
   1089   B_PREDICTION_MODE modes[16];
   1090   int_mv mvs[16];
   1091   unsigned char eobs[16];
   1092 
   1093   int mvthresh;
   1094   int *mdcounts;
   1095 
   1096   int_mv sv_mvp[4]; /* save 4 mvp from 8x8 */
   1097   int sv_istep[2];  /* save 2 initial step_param for 16x8/8x16 */
   1098 
   1099 } BEST_SEG_INFO;
   1100 
   1101 
   1102 static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
   1103                              BEST_SEG_INFO *bsi, unsigned int segmentation)
   1104 {
   1105     int i;
   1106     int const *labels;
   1107     int br = 0;
   1108     int bd = 0;
   1109     B_PREDICTION_MODE this_mode;
   1110 
   1111 
   1112     int label_count;
   1113     int this_segment_rd = 0;
   1114     int label_mv_thresh;
   1115     int rate = 0;
   1116     int sbr = 0;
   1117     int sbd = 0;
   1118     int segmentyrate = 0;
   1119 
   1120     vp8_variance_fn_ptr_t *v_fn_ptr;
   1121 
   1122     ENTROPY_CONTEXT_PLANES t_above, t_left;
   1123     ENTROPY_CONTEXT *ta;
   1124     ENTROPY_CONTEXT *tl;
   1125     ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
   1126     ENTROPY_CONTEXT *ta_b;
   1127     ENTROPY_CONTEXT *tl_b;
   1128 
   1129     vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
   1130     vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
   1131 
   1132     ta = (ENTROPY_CONTEXT *)&t_above;
   1133     tl = (ENTROPY_CONTEXT *)&t_left;
   1134     ta_b = (ENTROPY_CONTEXT *)&t_above_b;
   1135     tl_b = (ENTROPY_CONTEXT *)&t_left_b;
   1136 
   1137     br = 0;
   1138     bd = 0;
   1139 
   1140     v_fn_ptr = &cpi->fn_ptr[segmentation];
   1141     labels = vp8_mbsplits[segmentation];
   1142     label_count = vp8_mbsplit_count[segmentation];
   1143 
   1144     /* 64 makes this threshold really big effectively making it so that we
   1145      * very rarely check mvs on segments.   setting this to 1 would make mv
   1146      * thresh roughly equal to what it is for macroblocks
   1147      */
   1148     label_mv_thresh = 1 * bsi->mvthresh / label_count ;
   1149 
   1150     /* Segmentation method overheads */
   1151     rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation);
   1152     rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts);
   1153     this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
   1154     br += rate;
   1155 
   1156     for (i = 0; i < label_count; i++)
   1157     {
   1158         int_mv mode_mv[B_MODE_COUNT];
   1159         int best_label_rd = INT_MAX;
   1160         B_PREDICTION_MODE mode_selected = ZERO4X4;
   1161         int bestlabelyrate = 0;
   1162 
   1163         /* search for the best motion vector on this segment */
   1164         for (this_mode = LEFT4X4; this_mode <= NEW4X4 ; this_mode ++)
   1165         {
   1166             int this_rd;
   1167             int distortion;
   1168             int labelyrate;
   1169             ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
   1170             ENTROPY_CONTEXT *ta_s;
   1171             ENTROPY_CONTEXT *tl_s;
   1172 
   1173             vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
   1174             vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
   1175 
   1176             ta_s = (ENTROPY_CONTEXT *)&t_above_s;
   1177             tl_s = (ENTROPY_CONTEXT *)&t_left_s;
   1178 
   1179             if (this_mode == NEW4X4)
   1180             {
   1181                 int sseshift;
   1182                 int num00;
   1183                 int step_param = 0;
   1184                 int further_steps;
   1185                 int n;
   1186                 int thissme;
   1187                 int bestsme = INT_MAX;
   1188                 int_mv  temp_mv;
   1189                 BLOCK *c;
   1190                 BLOCKD *e;
   1191 
   1192                 /* Is the best so far sufficiently good that we cant justify
   1193                  * doing a new motion search.
   1194                  */
   1195                 if (best_label_rd < label_mv_thresh)
   1196                     break;
   1197 
   1198                 if(cpi->compressor_speed)
   1199                 {
   1200                     if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8)
   1201                     {
   1202                         bsi->mvp.as_int = bsi->sv_mvp[i].as_int;
   1203                         if (i==1 && segmentation == BLOCK_16X8)
   1204                           bsi->mvp.as_int = bsi->sv_mvp[2].as_int;
   1205 
   1206                         step_param = bsi->sv_istep[i];
   1207                     }
   1208 
   1209                     /* use previous block's result as next block's MV
   1210                      * predictor.
   1211                      */
   1212                     if (segmentation == BLOCK_4X4 && i>0)
   1213                     {
   1214                         bsi->mvp.as_int = x->e_mbd.block[i-1].bmi.mv.as_int;
   1215                         if (i==4 || i==8 || i==12)
   1216                             bsi->mvp.as_int = x->e_mbd.block[i-4].bmi.mv.as_int;
   1217                         step_param = 2;
   1218                     }
   1219                 }
   1220 
   1221                 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
   1222 
   1223                 {
   1224                     int sadpb = x->sadperbit4;
   1225                     int_mv mvp_full;
   1226 
   1227                     mvp_full.as_mv.row = bsi->mvp.as_mv.row >>3;
   1228                     mvp_full.as_mv.col = bsi->mvp.as_mv.col >>3;
   1229 
   1230                     /* find first label */
   1231                     n = vp8_mbsplit_offset[segmentation][i];
   1232 
   1233                     c = &x->block[n];
   1234                     e = &x->e_mbd.block[n];
   1235 
   1236                     {
   1237                         bestsme = cpi->diamond_search_sad(x, c, e, &mvp_full,
   1238                                                 &mode_mv[NEW4X4], step_param,
   1239                                                 sadpb, &num00, v_fn_ptr,
   1240                                                 x->mvcost, bsi->ref_mv);
   1241 
   1242                         n = num00;
   1243                         num00 = 0;
   1244 
   1245                         while (n < further_steps)
   1246                         {
   1247                             n++;
   1248 
   1249                             if (num00)
   1250                                 num00--;
   1251                             else
   1252                             {
   1253                                 thissme = cpi->diamond_search_sad(x, c, e,
   1254                                                     &mvp_full, &temp_mv,
   1255                                                     step_param + n, sadpb,
   1256                                                     &num00, v_fn_ptr,
   1257                                                     x->mvcost, bsi->ref_mv);
   1258 
   1259                                 if (thissme < bestsme)
   1260                                 {
   1261                                     bestsme = thissme;
   1262                                     mode_mv[NEW4X4].as_int = temp_mv.as_int;
   1263                                 }
   1264                             }
   1265                         }
   1266                     }
   1267 
   1268                     sseshift = segmentation_to_sseshift[segmentation];
   1269 
   1270                     /* Should we do a full search (best quality only) */
   1271                     if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000)
   1272                     {
   1273                         /* Check if mvp_full is within the range. */
   1274                         vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   1275 
   1276                         thissme = cpi->full_search_sad(x, c, e, &mvp_full,
   1277                                                        sadpb, 16, v_fn_ptr,
   1278                                                        x->mvcost, bsi->ref_mv);
   1279 
   1280                         if (thissme < bestsme)
   1281                         {
   1282                             bestsme = thissme;
   1283                             mode_mv[NEW4X4].as_int = e->bmi.mv.as_int;
   1284                         }
   1285                         else
   1286                         {
   1287                             /* The full search result is actually worse so
   1288                              * re-instate the previous best vector
   1289                              */
   1290                             e->bmi.mv.as_int = mode_mv[NEW4X4].as_int;
   1291                         }
   1292                     }
   1293                 }
   1294 
   1295                 if (bestsme < INT_MAX)
   1296                 {
   1297                     int disto;
   1298                     unsigned int sse;
   1299                     cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
   1300                         bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost,
   1301                         &disto, &sse);
   1302                 }
   1303             } /* NEW4X4 */
   1304 
   1305             rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
   1306                                bsi->ref_mv, x->mvcost);
   1307 
   1308             /* Trap vectors that reach beyond the UMV borders */
   1309             if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
   1310                 ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max))
   1311             {
   1312                 continue;
   1313             }
   1314 
   1315             distortion = vp8_encode_inter_mb_segment(x, labels, i) / 4;
   1316 
   1317             labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
   1318             rate += labelyrate;
   1319 
   1320             this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
   1321 
   1322             if (this_rd < best_label_rd)
   1323             {
   1324                 sbr = rate;
   1325                 sbd = distortion;
   1326                 bestlabelyrate = labelyrate;
   1327                 mode_selected = this_mode;
   1328                 best_label_rd = this_rd;
   1329 
   1330                 vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
   1331                 vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
   1332 
   1333             }
   1334         } /*for each 4x4 mode*/
   1335 
   1336         vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
   1337         vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
   1338 
   1339         labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
   1340                     bsi->ref_mv, x->mvcost);
   1341 
   1342         br += sbr;
   1343         bd += sbd;
   1344         segmentyrate += bestlabelyrate;
   1345         this_segment_rd += best_label_rd;
   1346 
   1347         if (this_segment_rd >= bsi->segment_rd)
   1348             break;
   1349 
   1350     } /* for each label */
   1351 
   1352     if (this_segment_rd < bsi->segment_rd)
   1353     {
   1354         bsi->r = br;
   1355         bsi->d = bd;
   1356         bsi->segment_yrate = segmentyrate;
   1357         bsi->segment_rd = this_segment_rd;
   1358         bsi->segment_num = segmentation;
   1359 
   1360         /* store everything needed to come back to this!! */
   1361         for (i = 0; i < 16; i++)
   1362         {
   1363             bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
   1364             bsi->modes[i] = x->partition_info->bmi[i].mode;
   1365             bsi->eobs[i] = x->e_mbd.eobs[i];
   1366         }
   1367     }
   1368 }
   1369 
   1370 static
   1371 void vp8_cal_step_param(int sr, int *sp)
   1372 {
   1373     int step = 0;
   1374 
   1375     if (sr > MAX_FIRST_STEP) sr = MAX_FIRST_STEP;
   1376     else if (sr < 1) sr = 1;
   1377 
   1378     while (sr>>=1)
   1379         step++;
   1380 
   1381     *sp = MAX_MVSEARCH_STEPS - 1 - step;
   1382 }
   1383 
   1384 static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
   1385                                            int_mv *best_ref_mv, int best_rd,
   1386                                            int *mdcounts, int *returntotrate,
   1387                                            int *returnyrate, int *returndistortion,
   1388                                            int mvthresh)
   1389 {
   1390     int i;
   1391     BEST_SEG_INFO bsi;
   1392 
   1393     vpx_memset(&bsi, 0, sizeof(bsi));
   1394 
   1395     bsi.segment_rd = best_rd;
   1396     bsi.ref_mv = best_ref_mv;
   1397     bsi.mvp.as_int = best_ref_mv->as_int;
   1398     bsi.mvthresh = mvthresh;
   1399     bsi.mdcounts = mdcounts;
   1400 
   1401     for(i = 0; i < 16; i++)
   1402     {
   1403         bsi.modes[i] = ZERO4X4;
   1404     }
   1405 
   1406     if(cpi->compressor_speed == 0)
   1407     {
   1408         /* for now, we will keep the original segmentation order
   1409            when in best quality mode */
   1410         rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
   1411         rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
   1412         rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
   1413         rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
   1414     }
   1415     else
   1416     {
   1417         int sr;
   1418 
   1419         rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
   1420 
   1421         if (bsi.segment_rd < best_rd)
   1422         {
   1423             int col_min = ((best_ref_mv->as_mv.col+7)>>3) - MAX_FULL_PEL_VAL;
   1424             int row_min = ((best_ref_mv->as_mv.row+7)>>3) - MAX_FULL_PEL_VAL;
   1425             int col_max = (best_ref_mv->as_mv.col>>3) + MAX_FULL_PEL_VAL;
   1426             int row_max = (best_ref_mv->as_mv.row>>3) + MAX_FULL_PEL_VAL;
   1427 
   1428             int tmp_col_min = x->mv_col_min;
   1429             int tmp_col_max = x->mv_col_max;
   1430             int tmp_row_min = x->mv_row_min;
   1431             int tmp_row_max = x->mv_row_max;
   1432 
   1433             /* Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. */
   1434             if (x->mv_col_min < col_min )
   1435                 x->mv_col_min = col_min;
   1436             if (x->mv_col_max > col_max )
   1437                 x->mv_col_max = col_max;
   1438             if (x->mv_row_min < row_min )
   1439                 x->mv_row_min = row_min;
   1440             if (x->mv_row_max > row_max )
   1441                 x->mv_row_max = row_max;
   1442 
   1443             /* Get 8x8 result */
   1444             bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int;
   1445             bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int;
   1446             bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int;
   1447             bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int;
   1448 
   1449             /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range according to the closeness of 2 MV. */
   1450             /* block 8X16 */
   1451             {
   1452                 sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row))>>3, (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col))>>3);
   1453                 vp8_cal_step_param(sr, &bsi.sv_istep[0]);
   1454 
   1455                 sr = MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row))>>3, (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col))>>3);
   1456                 vp8_cal_step_param(sr, &bsi.sv_istep[1]);
   1457 
   1458                 rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
   1459             }
   1460 
   1461             /* block 16X8 */
   1462             {
   1463                 sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row))>>3, (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col))>>3);
   1464                 vp8_cal_step_param(sr, &bsi.sv_istep[0]);
   1465 
   1466                 sr = MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row))>>3, (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col))>>3);
   1467                 vp8_cal_step_param(sr, &bsi.sv_istep[1]);
   1468 
   1469                 rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
   1470             }
   1471 
   1472             /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
   1473             /* Not skip 4x4 if speed=0 (good quality) */
   1474             if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8)  /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
   1475             {
   1476                 bsi.mvp.as_int = bsi.sv_mvp[0].as_int;
   1477                 rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
   1478             }
   1479 
   1480             /* restore UMV window */
   1481             x->mv_col_min = tmp_col_min;
   1482             x->mv_col_max = tmp_col_max;
   1483             x->mv_row_min = tmp_row_min;
   1484             x->mv_row_max = tmp_row_max;
   1485         }
   1486     }
   1487 
   1488     /* set it to the best */
   1489     for (i = 0; i < 16; i++)
   1490     {
   1491         BLOCKD *bd = &x->e_mbd.block[i];
   1492 
   1493         bd->bmi.mv.as_int = bsi.mvs[i].as_int;
   1494         *bd->eob = bsi.eobs[i];
   1495     }
   1496 
   1497     *returntotrate = bsi.r;
   1498     *returndistortion = bsi.d;
   1499     *returnyrate = bsi.segment_yrate;
   1500 
   1501     /* save partitions */
   1502     x->e_mbd.mode_info_context->mbmi.partitioning = bsi.segment_num;
   1503     x->partition_info->count = vp8_mbsplit_count[bsi.segment_num];
   1504 
   1505     for (i = 0; i < x->partition_info->count; i++)
   1506     {
   1507         int j;
   1508 
   1509         j = vp8_mbsplit_offset[bsi.segment_num][i];
   1510 
   1511         x->partition_info->bmi[i].mode = bsi.modes[j];
   1512         x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv;
   1513     }
   1514     /*
   1515      * used to set x->e_mbd.mode_info_context->mbmi.mv.as_int
   1516      */
   1517     x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int;
   1518 
   1519     return bsi.segment_rd;
   1520 }
   1521 
   1522 /* The improved MV prediction */
   1523 void vp8_mv_pred
   1524 (
   1525     VP8_COMP *cpi,
   1526     MACROBLOCKD *xd,
   1527     const MODE_INFO *here,
   1528     int_mv *mvp,
   1529     int refframe,
   1530     int *ref_frame_sign_bias,
   1531     int *sr,
   1532     int near_sadidx[]
   1533 )
   1534 {
   1535     const MODE_INFO *above = here - xd->mode_info_stride;
   1536     const MODE_INFO *left = here - 1;
   1537     const MODE_INFO *aboveleft = above - 1;
   1538     int_mv           near_mvs[8];
   1539     int              near_ref[8];
   1540     int_mv           mv;
   1541     int              vcnt=0;
   1542     int              find=0;
   1543     int              mb_offset;
   1544 
   1545     int              mvx[8];
   1546     int              mvy[8];
   1547     int              i;
   1548 
   1549     mv.as_int = 0;
   1550 
   1551     if(here->mbmi.ref_frame != INTRA_FRAME)
   1552     {
   1553         near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = near_mvs[7].as_int = 0;
   1554         near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = near_ref[7] = 0;
   1555 
   1556         /* read in 3 nearby block's MVs from current frame as prediction
   1557          * candidates.
   1558          */
   1559         if (above->mbmi.ref_frame != INTRA_FRAME)
   1560         {
   1561             near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
   1562             mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1563             near_ref[vcnt] =  above->mbmi.ref_frame;
   1564         }
   1565         vcnt++;
   1566         if (left->mbmi.ref_frame != INTRA_FRAME)
   1567         {
   1568             near_mvs[vcnt].as_int = left->mbmi.mv.as_int;
   1569             mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1570             near_ref[vcnt] =  left->mbmi.ref_frame;
   1571         }
   1572         vcnt++;
   1573         if (aboveleft->mbmi.ref_frame != INTRA_FRAME)
   1574         {
   1575             near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int;
   1576             mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1577             near_ref[vcnt] =  aboveleft->mbmi.ref_frame;
   1578         }
   1579         vcnt++;
   1580 
   1581         /* read in 5 nearby block's MVs from last frame. */
   1582         if(cpi->common.last_frame_type != KEY_FRAME)
   1583         {
   1584             mb_offset = (-xd->mb_to_top_edge/128 + 1) * (xd->mode_info_stride +1) + (-xd->mb_to_left_edge/128 +1) ;
   1585 
   1586             /* current in last frame */
   1587             if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME)
   1588             {
   1589                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
   1590                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1591                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset];
   1592             }
   1593             vcnt++;
   1594 
   1595             /* above in last frame */
   1596             if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1] != INTRA_FRAME)
   1597             {
   1598                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride-1].as_int;
   1599                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride-1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1600                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1];
   1601             }
   1602             vcnt++;
   1603 
   1604             /* left in last frame */
   1605             if (cpi->lf_ref_frame[mb_offset-1] != INTRA_FRAME)
   1606             {
   1607                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset -1].as_int;
   1608                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset -1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1609                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - 1];
   1610             }
   1611             vcnt++;
   1612 
   1613             /* right in last frame */
   1614             if (cpi->lf_ref_frame[mb_offset +1] != INTRA_FRAME)
   1615             {
   1616                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset +1].as_int;
   1617                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1618                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset +1];
   1619             }
   1620             vcnt++;
   1621 
   1622             /* below in last frame */
   1623             if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1] != INTRA_FRAME)
   1624             {
   1625                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + xd->mode_info_stride +1].as_int;
   1626                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1627                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1];
   1628             }
   1629             vcnt++;
   1630         }
   1631 
   1632         for(i=0; i< vcnt; i++)
   1633         {
   1634             if(near_ref[near_sadidx[i]] != INTRA_FRAME)
   1635             {
   1636                 if(here->mbmi.ref_frame == near_ref[near_sadidx[i]])
   1637                 {
   1638                     mv.as_int = near_mvs[near_sadidx[i]].as_int;
   1639                     find = 1;
   1640                     if (i < 3)
   1641                         *sr = 3;
   1642                     else
   1643                         *sr = 2;
   1644                     break;
   1645                 }
   1646             }
   1647         }
   1648 
   1649         if(!find)
   1650         {
   1651             for(i=0; i<vcnt; i++)
   1652             {
   1653                 mvx[i] = near_mvs[i].as_mv.row;
   1654                 mvy[i] = near_mvs[i].as_mv.col;
   1655             }
   1656 
   1657             insertsortmv(mvx, vcnt);
   1658             insertsortmv(mvy, vcnt);
   1659             mv.as_mv.row = mvx[vcnt/2];
   1660             mv.as_mv.col = mvy[vcnt/2];
   1661 
   1662             find = 1;
   1663             /* sr is set to 0 to allow calling function to decide the search
   1664              * range.
   1665              */
   1666             *sr = 0;
   1667         }
   1668     }
   1669 
   1670     /* Set up return values */
   1671     mvp->as_int = mv.as_int;
   1672     vp8_clamp_mv2(mvp, xd);
   1673 }
   1674 
   1675 void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[])
   1676 {
   1677     /* near_sad indexes:
   1678      *   0-cf above, 1-cf left, 2-cf aboveleft,
   1679      *   3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
   1680      */
   1681     int near_sad[8] = {0};
   1682     BLOCK *b = &x->block[0];
   1683     unsigned char *src_y_ptr = *(b->base_src);
   1684 
   1685     /* calculate sad for current frame 3 nearby MBs. */
   1686     if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0)
   1687     {
   1688         near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
   1689     }else if(xd->mb_to_top_edge==0)
   1690     {   /* only has left MB for sad calculation. */
   1691         near_sad[0] = near_sad[2] = INT_MAX;
   1692         near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX);
   1693     }else if(xd->mb_to_left_edge ==0)
   1694     {   /* only has left MB for sad calculation. */
   1695         near_sad[1] = near_sad[2] = INT_MAX;
   1696         near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX);
   1697     }else
   1698     {
   1699         near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX);
   1700         near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX);
   1701         near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, UINT_MAX);
   1702     }
   1703 
   1704     if(cpi->common.last_frame_type != KEY_FRAME)
   1705     {
   1706         /* calculate sad for last frame 5 nearby MBs. */
   1707         unsigned char *pre_y_buffer = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
   1708         int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
   1709 
   1710         if(xd->mb_to_top_edge==0) near_sad[4] = INT_MAX;
   1711         if(xd->mb_to_left_edge ==0) near_sad[5] = INT_MAX;
   1712         if(xd->mb_to_right_edge ==0) near_sad[6] = INT_MAX;
   1713         if(xd->mb_to_bottom_edge==0) near_sad[7] = INT_MAX;
   1714 
   1715         if(near_sad[4] != INT_MAX)
   1716             near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, UINT_MAX);
   1717         if(near_sad[5] != INT_MAX)
   1718             near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride, UINT_MAX);
   1719         near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride, UINT_MAX);
   1720         if(near_sad[6] != INT_MAX)
   1721             near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride, UINT_MAX);
   1722         if(near_sad[7] != INT_MAX)
   1723             near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, UINT_MAX);
   1724     }
   1725 
   1726     if(cpi->common.last_frame_type != KEY_FRAME)
   1727     {
   1728         insertsortsad(near_sad, near_sadidx, 8);
   1729     }else
   1730     {
   1731         insertsortsad(near_sad, near_sadidx, 3);
   1732     }
   1733 }
   1734 
   1735 static void rd_update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv)
   1736 {
   1737     if (x->e_mbd.mode_info_context->mbmi.mode == SPLITMV)
   1738     {
   1739         int i;
   1740 
   1741         for (i = 0; i < x->partition_info->count; i++)
   1742         {
   1743             if (x->partition_info->bmi[i].mode == NEW4X4)
   1744             {
   1745                 x->MVcount[0][mv_max+((x->partition_info->bmi[i].mv.as_mv.row
   1746                                           - best_ref_mv->as_mv.row) >> 1)]++;
   1747                 x->MVcount[1][mv_max+((x->partition_info->bmi[i].mv.as_mv.col
   1748                                           - best_ref_mv->as_mv.col) >> 1)]++;
   1749             }
   1750         }
   1751     }
   1752     else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV)
   1753     {
   1754         x->MVcount[0][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row
   1755                                           - best_ref_mv->as_mv.row) >> 1)]++;
   1756         x->MVcount[1][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col
   1757                                           - best_ref_mv->as_mv.col) >> 1)]++;
   1758     }
   1759 }
   1760 
   1761 static int evaluate_inter_mode_rd(int mdcounts[4],
   1762                                   RATE_DISTORTION* rd,
   1763                                   int* disable_skip,
   1764                                   VP8_COMP *cpi, MACROBLOCK *x)
   1765 {
   1766     MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
   1767     BLOCK *b = &x->block[0];
   1768     MACROBLOCKD *xd = &x->e_mbd;
   1769     int distortion;
   1770     vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16);
   1771 
   1772     if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
   1773         x->skip = 1;
   1774     }
   1775     else if (x->encode_breakout)
   1776     {
   1777         unsigned int sse;
   1778         unsigned int var;
   1779         unsigned int threshold = (xd->block[0].dequant[1]
   1780                     * xd->block[0].dequant[1] >>4);
   1781 
   1782         if(threshold < x->encode_breakout)
   1783             threshold = x->encode_breakout;
   1784 
   1785         var = vp8_variance16x16
   1786                 (*(b->base_src), b->src_stride,
   1787                 x->e_mbd.predictor, 16, &sse);
   1788 
   1789         if (sse < threshold)
   1790         {
   1791              unsigned int q2dc = xd->block[24].dequant[0];
   1792             /* If theres is no codeable 2nd order dc
   1793                or a very small uniform pixel change change */
   1794             if ((sse - var < q2dc * q2dc >>4) ||
   1795                 (sse /2 > var && sse-var < 64))
   1796             {
   1797                 /* Check u and v to make sure skip is ok */
   1798                 unsigned int sse2 = VP8_UVSSE(x);
   1799                 if (sse2 * 2 < threshold)
   1800                 {
   1801                     x->skip = 1;
   1802                     rd->distortion2 = sse + sse2;
   1803                     rd->rate2 = 500;
   1804 
   1805                     /* for best_yrd calculation */
   1806                     rd->rate_uv = 0;
   1807                     rd->distortion_uv = sse2;
   1808 
   1809                     *disable_skip = 1;
   1810                     return RDCOST(x->rdmult, x->rddiv, rd->rate2,
   1811                                   rd->distortion2);
   1812                 }
   1813             }
   1814         }
   1815     }
   1816 
   1817 
   1818     /* Add in the Mv/mode cost */
   1819     rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
   1820 
   1821     /* Y cost and distortion */
   1822     macro_block_yrd(x, &rd->rate_y, &distortion);
   1823     rd->rate2 += rd->rate_y;
   1824     rd->distortion2 += distortion;
   1825 
   1826     /* UV cost and distortion */
   1827     rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv,
   1828                      cpi->common.full_pixel);
   1829     rd->rate2 += rd->rate_uv;
   1830     rd->distortion2 += rd->distortion_uv;
   1831     return INT_MAX;
   1832 }
   1833 
   1834 static int calculate_final_rd_costs(int this_rd,
   1835                                     RATE_DISTORTION* rd,
   1836                                     int* other_cost,
   1837                                     int disable_skip,
   1838                                     int uv_intra_tteob,
   1839                                     int intra_rd_penalty,
   1840                                     VP8_COMP *cpi, MACROBLOCK *x)
   1841 {
   1842     MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
   1843 
   1844     /* Where skip is allowable add in the default per mb cost for the no
   1845      * skip case. where we then decide to skip we have to delete this and
   1846      * replace it with the cost of signalling a skip
   1847      */
   1848     if (cpi->common.mb_no_coeff_skip)
   1849     {
   1850         *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0);
   1851         rd->rate2 += *other_cost;
   1852     }
   1853 
   1854     /* Estimate the reference frame signaling cost and add it
   1855      * to the rolling cost variable.
   1856      */
   1857     rd->rate2 +=
   1858         x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
   1859 
   1860     if (!disable_skip)
   1861     {
   1862         /* Test for the condition where skip block will be activated
   1863          * because there are no non zero coefficients and make any
   1864          * necessary adjustment for rate
   1865          */
   1866         if (cpi->common.mb_no_coeff_skip)
   1867         {
   1868             int i;
   1869             int tteob;
   1870             int has_y2_block = (this_mode!=SPLITMV && this_mode!=B_PRED);
   1871 
   1872             tteob = 0;
   1873             if(has_y2_block)
   1874                 tteob += x->e_mbd.eobs[24];
   1875 
   1876             for (i = 0; i < 16; i++)
   1877                 tteob += (x->e_mbd.eobs[i] > has_y2_block);
   1878 
   1879             if (x->e_mbd.mode_info_context->mbmi.ref_frame)
   1880             {
   1881                 for (i = 16; i < 24; i++)
   1882                     tteob += x->e_mbd.eobs[i];
   1883             }
   1884             else
   1885                 tteob += uv_intra_tteob;
   1886 
   1887             if (tteob == 0)
   1888             {
   1889                 rd->rate2 -= (rd->rate_y + rd->rate_uv);
   1890                 /* for best_yrd calculation */
   1891                 rd->rate_uv = 0;
   1892 
   1893                 /* Back out no skip flag costing and add in skip flag costing */
   1894                 if (cpi->prob_skip_false)
   1895                 {
   1896                     int prob_skip_cost;
   1897 
   1898                     prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1);
   1899                     prob_skip_cost -= vp8_cost_bit(cpi->prob_skip_false, 0);
   1900                     rd->rate2 += prob_skip_cost;
   1901                     *other_cost += prob_skip_cost;
   1902                 }
   1903             }
   1904         }
   1905         /* Calculate the final RD estimate for this mode */
   1906         this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
   1907         if (this_rd < INT_MAX && x->e_mbd.mode_info_context->mbmi.ref_frame
   1908                                  == INTRA_FRAME)
   1909             this_rd += intra_rd_penalty;
   1910     }
   1911     return this_rd;
   1912 }
   1913 
   1914 static void update_best_mode(BEST_MODE* best_mode, int this_rd,
   1915                              RATE_DISTORTION* rd, int other_cost, MACROBLOCK *x)
   1916 {
   1917     MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
   1918 
   1919     other_cost +=
   1920     x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
   1921 
   1922     /* Calculate the final y RD estimate for this mode */
   1923     best_mode->yrd = RDCOST(x->rdmult, x->rddiv, (rd->rate2-rd->rate_uv-other_cost),
   1924                       (rd->distortion2-rd->distortion_uv));
   1925 
   1926     best_mode->rd = this_rd;
   1927     vpx_memcpy(&best_mode->mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
   1928     vpx_memcpy(&best_mode->partition, x->partition_info, sizeof(PARTITION_INFO));
   1929 
   1930     if ((this_mode == B_PRED) || (this_mode == SPLITMV))
   1931     {
   1932         int i;
   1933         for (i = 0; i < 16; i++)
   1934         {
   1935             best_mode->bmodes[i] = x->e_mbd.block[i].bmi;
   1936         }
   1937     }
   1938 }
   1939 
   1940 void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
   1941                             int recon_uvoffset, int *returnrate,
   1942                             int *returndistortion, int *returnintra)
   1943 {
   1944     BLOCK *b = &x->block[0];
   1945     BLOCKD *d = &x->e_mbd.block[0];
   1946     MACROBLOCKD *xd = &x->e_mbd;
   1947     int_mv best_ref_mv_sb[2];
   1948     int_mv mode_mv_sb[2][MB_MODE_COUNT];
   1949     int_mv best_ref_mv;
   1950     int_mv *mode_mv;
   1951     MB_PREDICTION_MODE this_mode;
   1952     int num00;
   1953     int best_mode_index = 0;
   1954     BEST_MODE best_mode;
   1955 
   1956     int i;
   1957     int mode_index;
   1958     int mdcounts[4];
   1959     int rate;
   1960     RATE_DISTORTION rd;
   1961     int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
   1962     int uv_intra_tteob = 0;
   1963     int uv_intra_done = 0;
   1964 
   1965     MB_PREDICTION_MODE uv_intra_mode = 0;
   1966     int_mv mvp;
   1967     int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
   1968     int saddone=0;
   1969     /* search range got from mv_pred(). It uses step_param levels. (0-7) */
   1970     int sr=0;
   1971 
   1972     unsigned char *plane[4][3];
   1973     int ref_frame_map[4];
   1974     int sign_bias = 0;
   1975 
   1976     int intra_rd_penalty =  10* vp8_dc_quant(cpi->common.base_qindex,
   1977                                              cpi->common.y1dc_delta_q);
   1978 
   1979 #if CONFIG_TEMPORAL_DENOISING
   1980     unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX,
   1981             best_rd_sse = INT_MAX;
   1982 #endif
   1983 
   1984     mode_mv = mode_mv_sb[sign_bias];
   1985     best_ref_mv.as_int = 0;
   1986     best_mode.rd = INT_MAX;
   1987     best_mode.yrd = INT_MAX;
   1988     best_mode.intra_rd = INT_MAX;
   1989     vpx_memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
   1990     vpx_memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode));
   1991     vpx_memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes));
   1992 
   1993     /* Setup search priorities */
   1994     get_reference_search_order(cpi, ref_frame_map);
   1995 
   1996     /* Check to see if there is at least 1 valid reference frame that we need
   1997      * to calculate near_mvs.
   1998      */
   1999     if (ref_frame_map[1] > 0)
   2000     {
   2001         sign_bias = vp8_find_near_mvs_bias(&x->e_mbd,
   2002                                            x->e_mbd.mode_info_context,
   2003                                            mode_mv_sb,
   2004                                            best_ref_mv_sb,
   2005                                            mdcounts,
   2006                                            ref_frame_map[1],
   2007                                            cpi->common.ref_frame_sign_bias);
   2008 
   2009         mode_mv = mode_mv_sb[sign_bias];
   2010         best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
   2011     }
   2012 
   2013     get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
   2014 
   2015     *returnintra = INT_MAX;
   2016     /* Count of the number of MBs tested so far this frame */
   2017     x->mbs_tested_so_far++;
   2018 
   2019     x->skip = 0;
   2020 
   2021     for (mode_index = 0; mode_index < MAX_MODES; mode_index++)
   2022     {
   2023         int this_rd = INT_MAX;
   2024         int disable_skip = 0;
   2025         int other_cost = 0;
   2026         int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];
   2027 
   2028         /* Test best rd so far against threshold for trying this mode. */
   2029         if (best_mode.rd <= x->rd_threshes[mode_index])
   2030             continue;
   2031 
   2032         if (this_ref_frame < 0)
   2033             continue;
   2034 
   2035         /* These variables hold are rolling total cost and distortion for
   2036          * this mode
   2037          */
   2038         rd.rate2 = 0;
   2039         rd.distortion2 = 0;
   2040 
   2041         this_mode = vp8_mode_order[mode_index];
   2042 
   2043         x->e_mbd.mode_info_context->mbmi.mode = this_mode;
   2044         x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
   2045 
   2046         /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
   2047          * unless ARNR filtering is enabled in which case we want
   2048          * an unfiltered alternative
   2049          */
   2050         if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
   2051         {
   2052             if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
   2053                 continue;
   2054         }
   2055 
   2056         /* everything but intra */
   2057         if (x->e_mbd.mode_info_context->mbmi.ref_frame)
   2058         {
   2059             x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
   2060             x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
   2061             x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
   2062 
   2063             if (sign_bias != cpi->common.ref_frame_sign_bias[this_ref_frame])
   2064             {
   2065                 sign_bias = cpi->common.ref_frame_sign_bias[this_ref_frame];
   2066                 mode_mv = mode_mv_sb[sign_bias];
   2067                 best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
   2068             }
   2069         }
   2070 
   2071         /* Check to see if the testing frequency for this mode is at its
   2072          * max If so then prevent it from being tested and increase the
   2073          * threshold for its testing
   2074          */
   2075         if (x->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1))
   2076         {
   2077             if (x->mbs_tested_so_far  <= cpi->mode_check_freq[mode_index] * x->mode_test_hit_counts[mode_index])
   2078             {
   2079                 /* Increase the threshold for coding this mode to make it
   2080                  * less likely to be chosen
   2081                  */
   2082                 x->rd_thresh_mult[mode_index] += 4;
   2083 
   2084                 if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
   2085                     x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
   2086 
   2087                 x->rd_threshes[mode_index] =
   2088                     (cpi->rd_baseline_thresh[mode_index] >> 7) *
   2089                     x->rd_thresh_mult[mode_index];
   2090 
   2091                 continue;
   2092             }
   2093         }
   2094 
   2095         /* We have now reached the point where we are going to test the
   2096          * current mode so increment the counter for the number of times
   2097          * it has been tested
   2098          */
   2099         x->mode_test_hit_counts[mode_index] ++;
   2100 
   2101         /* Experimental code. Special case for gf and arf zeromv modes.
   2102          * Increase zbin size to supress noise
   2103          */
   2104         if (x->zbin_mode_boost_enabled)
   2105         {
   2106             if ( this_ref_frame == INTRA_FRAME )
   2107                 x->zbin_mode_boost = 0;
   2108             else
   2109             {
   2110                 if (vp8_mode_order[mode_index] == ZEROMV)
   2111                 {
   2112                     if (this_ref_frame != LAST_FRAME)
   2113                         x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
   2114                     else
   2115                         x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
   2116                 }
   2117                 else if (vp8_mode_order[mode_index] == SPLITMV)
   2118                     x->zbin_mode_boost = 0;
   2119                 else
   2120                     x->zbin_mode_boost = MV_ZBIN_BOOST;
   2121             }
   2122 
   2123             vp8_update_zbin_extra(cpi, x);
   2124         }
   2125 
   2126         if(!uv_intra_done && this_ref_frame == INTRA_FRAME)
   2127         {
   2128             rd_pick_intra_mbuv_mode(x, &uv_intra_rate,
   2129                                     &uv_intra_rate_tokenonly,
   2130                                     &uv_intra_distortion);
   2131             uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
   2132 
   2133             /*
   2134              * Total of the eobs is used later to further adjust rate2. Since uv
   2135              * block's intra eobs will be overwritten when we check inter modes,
   2136              * we need to save uv_intra_tteob here.
   2137              */
   2138             for (i = 16; i < 24; i++)
   2139                 uv_intra_tteob += x->e_mbd.eobs[i];
   2140 
   2141             uv_intra_done = 1;
   2142         }
   2143 
   2144         switch (this_mode)
   2145         {
   2146         case B_PRED:
   2147         {
   2148             int tmp_rd;
   2149 
   2150             /* Note the rate value returned here includes the cost of
   2151              * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED]
   2152              */
   2153             int distortion;
   2154             tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion, best_mode.yrd);
   2155             rd.rate2 += rate;
   2156             rd.distortion2 += distortion;
   2157 
   2158             if(tmp_rd < best_mode.yrd)
   2159             {
   2160                 rd.rate2 += uv_intra_rate;
   2161                 rd.rate_uv = uv_intra_rate_tokenonly;
   2162                 rd.distortion2 += uv_intra_distortion;
   2163                 rd.distortion_uv = uv_intra_distortion;
   2164             }
   2165             else
   2166             {
   2167                 this_rd = INT_MAX;
   2168                 disable_skip = 1;
   2169             }
   2170         }
   2171         break;
   2172 
   2173         case SPLITMV:
   2174         {
   2175             int tmp_rd;
   2176             int this_rd_thresh;
   2177             int distortion;
   2178 
   2179             this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) ?
   2180                 x->rd_threshes[THR_NEW1] : x->rd_threshes[THR_NEW3];
   2181             this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) ?
   2182                 x->rd_threshes[THR_NEW2] : this_rd_thresh;
   2183 
   2184             tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
   2185                                                      best_mode.yrd, mdcounts,
   2186                                                      &rate, &rd.rate_y, &distortion, this_rd_thresh) ;
   2187 
   2188             rd.rate2 += rate;
   2189             rd.distortion2 += distortion;
   2190 
   2191             /* If even the 'Y' rd value of split is higher than best so far
   2192              * then dont bother looking at UV
   2193              */
   2194             if (tmp_rd < best_mode.yrd)
   2195             {
   2196                 /* Now work out UV cost and add it in */
   2197                 rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv, cpi->common.full_pixel);
   2198                 rd.rate2 += rd.rate_uv;
   2199                 rd.distortion2 += rd.distortion_uv;
   2200             }
   2201             else
   2202             {
   2203                 this_rd = INT_MAX;
   2204                 disable_skip = 1;
   2205             }
   2206         }
   2207         break;
   2208         case DC_PRED:
   2209         case V_PRED:
   2210         case H_PRED:
   2211         case TM_PRED:
   2212         {
   2213             int distortion;
   2214             x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
   2215 
   2216             vp8_build_intra_predictors_mby_s(xd,
   2217                                              xd->dst.y_buffer - xd->dst.y_stride,
   2218                                              xd->dst.y_buffer - 1,
   2219                                              xd->dst.y_stride,
   2220                                              xd->predictor,
   2221                                              16);
   2222             macro_block_yrd(x, &rd.rate_y, &distortion) ;
   2223             rd.rate2 += rd.rate_y;
   2224             rd.distortion2 += distortion;
   2225             rd.rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
   2226             rd.rate2 += uv_intra_rate;
   2227             rd.rate_uv = uv_intra_rate_tokenonly;
   2228             rd.distortion2 += uv_intra_distortion;
   2229             rd.distortion_uv = uv_intra_distortion;
   2230         }
   2231         break;
   2232 
   2233         case NEWMV:
   2234         {
   2235             int thissme;
   2236             int bestsme = INT_MAX;
   2237             int step_param = cpi->sf.first_step;
   2238             int further_steps;
   2239             int n;
   2240             int do_refine=1;   /* If last step (1-away) of n-step search doesn't pick the center point as the best match,
   2241                                   we will do a final 1-away diamond refining search  */
   2242 
   2243             int sadpb = x->sadperbit16;
   2244             int_mv mvp_full;
   2245 
   2246             int col_min = ((best_ref_mv.as_mv.col+7)>>3) - MAX_FULL_PEL_VAL;
   2247             int row_min = ((best_ref_mv.as_mv.row+7)>>3) - MAX_FULL_PEL_VAL;
   2248             int col_max = (best_ref_mv.as_mv.col>>3) + MAX_FULL_PEL_VAL;
   2249             int row_max = (best_ref_mv.as_mv.row>>3) + MAX_FULL_PEL_VAL;
   2250 
   2251             int tmp_col_min = x->mv_col_min;
   2252             int tmp_col_max = x->mv_col_max;
   2253             int tmp_row_min = x->mv_row_min;
   2254             int tmp_row_max = x->mv_row_max;
   2255 
   2256             if(!saddone)
   2257             {
   2258                 vp8_cal_sad(cpi,xd,x, recon_yoffset ,&near_sadidx[0] );
   2259                 saddone = 1;
   2260             }
   2261 
   2262             vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
   2263                         x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
   2264 
   2265             mvp_full.as_mv.col = mvp.as_mv.col>>3;
   2266             mvp_full.as_mv.row = mvp.as_mv.row>>3;
   2267 
   2268             /* Get intersection of UMV window and valid MV window to
   2269              * reduce # of checks in diamond search.
   2270              */
   2271             if (x->mv_col_min < col_min )
   2272                 x->mv_col_min = col_min;
   2273             if (x->mv_col_max > col_max )
   2274                 x->mv_col_max = col_max;
   2275             if (x->mv_row_min < row_min )
   2276                 x->mv_row_min = row_min;
   2277             if (x->mv_row_max > row_max )
   2278                 x->mv_row_max = row_max;
   2279 
   2280             /* adjust search range according to sr from mv prediction */
   2281             if(sr > step_param)
   2282                 step_param = sr;
   2283 
   2284             /* Initial step/diamond search */
   2285             {
   2286                 bestsme = cpi->diamond_search_sad(x, b, d, &mvp_full, &d->bmi.mv,
   2287                                         step_param, sadpb, &num00,
   2288                                         &cpi->fn_ptr[BLOCK_16X16],
   2289                                         x->mvcost, &best_ref_mv);
   2290                 mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
   2291 
   2292                 /* Further step/diamond searches as necessary */
   2293                 n = 0;
   2294                 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
   2295 
   2296                 n = num00;
   2297                 num00 = 0;
   2298 
   2299                 /* If there won't be more n-step search, check to see if refining search is needed. */
   2300                 if (n > further_steps)
   2301                     do_refine = 0;
   2302 
   2303                 while (n < further_steps)
   2304                 {
   2305                     n++;
   2306 
   2307                     if (num00)
   2308                         num00--;
   2309                     else
   2310                     {
   2311                         thissme = cpi->diamond_search_sad(x, b, d, &mvp_full,
   2312                                     &d->bmi.mv, step_param + n, sadpb, &num00,
   2313                                     &cpi->fn_ptr[BLOCK_16X16], x->mvcost,
   2314                                     &best_ref_mv);
   2315 
   2316                         /* check to see if refining search is needed. */
   2317                         if (num00 > (further_steps-n))
   2318                             do_refine = 0;
   2319 
   2320                         if (thissme < bestsme)
   2321                         {
   2322                             bestsme = thissme;
   2323                             mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
   2324                         }
   2325                         else
   2326                         {
   2327                             d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
   2328                         }
   2329                     }
   2330                 }
   2331             }
   2332 
   2333             /* final 1-away diamond refining search */
   2334             if (do_refine == 1)
   2335             {
   2336                 int search_range;
   2337 
   2338                 search_range = 8;
   2339 
   2340                 thissme = cpi->refining_search_sad(x, b, d, &d->bmi.mv, sadpb,
   2341                                        search_range, &cpi->fn_ptr[BLOCK_16X16],
   2342                                        x->mvcost, &best_ref_mv);
   2343 
   2344                 if (thissme < bestsme)
   2345                 {
   2346                     bestsme = thissme;
   2347                     mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
   2348                 }
   2349                 else
   2350                 {
   2351                     d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
   2352                 }
   2353             }
   2354 
   2355             x->mv_col_min = tmp_col_min;
   2356             x->mv_col_max = tmp_col_max;
   2357             x->mv_row_min = tmp_row_min;
   2358             x->mv_row_max = tmp_row_max;
   2359 
   2360             if (bestsme < INT_MAX)
   2361             {
   2362                 int dis; /* TODO: use dis in distortion calculation later. */
   2363                 unsigned int sse;
   2364                 cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv, &best_ref_mv,
   2365                                              x->errorperbit,
   2366                                              &cpi->fn_ptr[BLOCK_16X16],
   2367                                              x->mvcost, &dis, &sse);
   2368             }
   2369 
   2370             mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
   2371 
   2372             /* Add the new motion vector cost to our rolling cost variable */
   2373             rd.rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
   2374         }
   2375 
   2376         case NEARESTMV:
   2377         case NEARMV:
   2378             /* Clip "next_nearest" so that it does not extend to far out
   2379              * of image
   2380              */
   2381             vp8_clamp_mv2(&mode_mv[this_mode], xd);
   2382 
   2383             /* Do not bother proceeding if the vector (from newmv, nearest
   2384              * or near) is 0,0 as this should then be coded using the zeromv
   2385              * mode.
   2386              */
   2387             if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) && (mode_mv[this_mode].as_int == 0))
   2388                 continue;
   2389 
   2390         case ZEROMV:
   2391 
   2392             /* Trap vectors that reach beyond the UMV borders
   2393              * Note that ALL New MV, Nearest MV Near MV and Zero MV code
   2394              * drops through to this point because of the lack of break
   2395              * statements in the previous two cases.
   2396              */
   2397             if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
   2398                 ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max))
   2399                 continue;
   2400 
   2401             vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
   2402             this_rd = evaluate_inter_mode_rd(mdcounts, &rd,
   2403                                              &disable_skip, cpi, x);
   2404             break;
   2405 
   2406         default:
   2407             break;
   2408         }
   2409 
   2410         this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost,
   2411                                            disable_skip, uv_intra_tteob,
   2412                                            intra_rd_penalty, cpi, x);
   2413 
   2414         /* Keep record of best intra distortion */
   2415         if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
   2416             (this_rd < best_mode.intra_rd) )
   2417         {
   2418           best_mode.intra_rd = this_rd;
   2419             *returnintra = rd.distortion2 ;
   2420         }
   2421 #if CONFIG_TEMPORAL_DENOISING
   2422         if (cpi->oxcf.noise_sensitivity)
   2423         {
   2424             unsigned int sse;
   2425             vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&sse,
   2426                                    mode_mv[this_mode]);
   2427 
   2428             if (sse < best_rd_sse)
   2429                 best_rd_sse = sse;
   2430 
   2431             /* Store for later use by denoiser. */
   2432             if (this_mode == ZEROMV && sse < zero_mv_sse )
   2433             {
   2434                 zero_mv_sse = sse;
   2435                 x->best_zeromv_reference_frame =
   2436                         x->e_mbd.mode_info_context->mbmi.ref_frame;
   2437             }
   2438 
   2439             /* Store the best NEWMV in x for later use in the denoiser. */
   2440             if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
   2441                     sse < best_sse)
   2442             {
   2443                 best_sse = sse;
   2444                 vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&best_sse,
   2445                                        mode_mv[this_mode]);
   2446                 x->best_sse_inter_mode = NEWMV;
   2447                 x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
   2448                 x->need_to_clamp_best_mvs =
   2449                     x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
   2450                 x->best_reference_frame =
   2451                     x->e_mbd.mode_info_context->mbmi.ref_frame;
   2452             }
   2453         }
   2454 #endif
   2455 
   2456         /* Did this mode help.. i.i is it the new best mode */
   2457         if (this_rd < best_mode.rd || x->skip)
   2458         {
   2459             /* Note index of best mode so far */
   2460             best_mode_index = mode_index;
   2461             *returnrate = rd.rate2;
   2462             *returndistortion = rd.distortion2;
   2463             if (this_mode <= B_PRED)
   2464             {
   2465                 x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode;
   2466                 /* required for left and above block mv */
   2467                 x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
   2468             }
   2469             update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
   2470 
   2471 
   2472             /* Testing this mode gave rise to an improvement in best error
   2473              * score. Lower threshold a bit for next time
   2474              */
   2475             x->rd_thresh_mult[mode_index] =
   2476                 (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
   2477                     x->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
   2478         }
   2479 
   2480         /* If the mode did not help improve the best error case then raise
   2481          * the threshold for testing that mode next time around.
   2482          */
   2483         else
   2484         {
   2485             x->rd_thresh_mult[mode_index] += 4;
   2486 
   2487             if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
   2488                 x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
   2489         }
   2490         x->rd_threshes[mode_index] =
   2491             (cpi->rd_baseline_thresh[mode_index] >> 7) *
   2492                 x->rd_thresh_mult[mode_index];
   2493 
   2494         if (x->skip)
   2495             break;
   2496 
   2497     }
   2498 
   2499     /* Reduce the activation RD thresholds for the best choice mode */
   2500     if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2)))
   2501     {
   2502         int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 2);
   2503 
   2504         x->rd_thresh_mult[best_mode_index] =
   2505             (x->rd_thresh_mult[best_mode_index] >=
   2506                 (MIN_THRESHMULT + best_adjustment)) ?
   2507                     x->rd_thresh_mult[best_mode_index] - best_adjustment :
   2508                     MIN_THRESHMULT;
   2509         x->rd_threshes[best_mode_index] =
   2510             (cpi->rd_baseline_thresh[best_mode_index] >> 7) *
   2511                 x->rd_thresh_mult[best_mode_index];
   2512     }
   2513 
   2514 #if CONFIG_TEMPORAL_DENOISING
   2515     if (cpi->oxcf.noise_sensitivity)
   2516     {
   2517         if (x->best_sse_inter_mode == DC_PRED)
   2518         {
   2519             /* No best MV found. */
   2520             x->best_sse_inter_mode = best_mode.mbmode.mode;
   2521             x->best_sse_mv = best_mode.mbmode.mv;
   2522             x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
   2523             x->best_reference_frame = best_mode.mbmode.ref_frame;
   2524             best_sse = best_rd_sse;
   2525         }
   2526         vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
   2527                                 recon_yoffset, recon_uvoffset);
   2528 
   2529 
   2530         /* Reevaluate ZEROMV after denoising. */
   2531         if (best_mode.mbmode.ref_frame == INTRA_FRAME &&
   2532             x->best_zeromv_reference_frame != INTRA_FRAME)
   2533         {
   2534             int this_rd = INT_MAX;
   2535             int disable_skip = 0;
   2536             int other_cost = 0;
   2537             int this_ref_frame = x->best_zeromv_reference_frame;
   2538             rd.rate2 = x->ref_frame_cost[this_ref_frame] +
   2539                     vp8_cost_mv_ref(ZEROMV, mdcounts);
   2540             rd.distortion2 = 0;
   2541 
   2542             /* set up the proper prediction buffers for the frame */
   2543             x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
   2544             x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
   2545             x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
   2546             x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
   2547 
   2548             x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
   2549             x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
   2550             x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
   2551 
   2552             this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
   2553             this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost,
   2554                                                disable_skip, uv_intra_tteob,
   2555                                                intra_rd_penalty, cpi, x);
   2556             if (this_rd < best_mode.rd || x->skip)
   2557             {
   2558                 /* Note index of best mode so far */
   2559                 best_mode_index = mode_index;
   2560                 *returnrate = rd.rate2;
   2561                 *returndistortion = rd.distortion2;
   2562                 update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
   2563             }
   2564         }
   2565 
   2566     }
   2567 #endif
   2568 
   2569     if (cpi->is_src_frame_alt_ref &&
   2570         (best_mode.mbmode.mode != ZEROMV || best_mode.mbmode.ref_frame != ALTREF_FRAME))
   2571     {
   2572         x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
   2573         x->e_mbd.mode_info_context->mbmi.ref_frame = ALTREF_FRAME;
   2574         x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
   2575         x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
   2576         x->e_mbd.mode_info_context->mbmi.mb_skip_coeff =
   2577                                         (cpi->common.mb_no_coeff_skip);
   2578         x->e_mbd.mode_info_context->mbmi.partitioning = 0;
   2579         return;
   2580     }
   2581 
   2582 
   2583     /* macroblock modes */
   2584     vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO));
   2585 
   2586     if (best_mode.mbmode.mode == B_PRED)
   2587     {
   2588         for (i = 0; i < 16; i++)
   2589             xd->mode_info_context->bmi[i].as_mode = best_mode.bmodes[i].as_mode;
   2590     }
   2591 
   2592     if (best_mode.mbmode.mode == SPLITMV)
   2593     {
   2594         for (i = 0; i < 16; i++)
   2595             xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int;
   2596 
   2597         vpx_memcpy(x->partition_info, &best_mode.partition, sizeof(PARTITION_INFO));
   2598 
   2599         x->e_mbd.mode_info_context->mbmi.mv.as_int =
   2600                                       x->partition_info->bmi[15].mv.as_int;
   2601     }
   2602 
   2603     if (sign_bias
   2604         != cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame])
   2605         best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int;
   2606 
   2607     rd_update_mvcount(x, &best_ref_mv);
   2608 }
   2609 
   2610 void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate_)
   2611 {
   2612     int error4x4, error16x16;
   2613     int rate4x4, rate16x16 = 0, rateuv;
   2614     int dist4x4, dist16x16, distuv;
   2615     int rate;
   2616     int rate4x4_tokenonly = 0;
   2617     int rate16x16_tokenonly = 0;
   2618     int rateuv_tokenonly = 0;
   2619 
   2620     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
   2621 
   2622     rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv);
   2623     rate = rateuv;
   2624 
   2625     error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly,
   2626                                             &dist16x16);
   2627 
   2628     error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly,
   2629                                          &dist4x4, error16x16);
   2630 
   2631     if (error4x4 < error16x16)
   2632     {
   2633         x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
   2634         rate += rate4x4;
   2635     }
   2636     else
   2637     {
   2638         rate += rate16x16;
   2639     }
   2640 
   2641     *rate_ = rate;
   2642 }
   2643