Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include <stdio.h>
     13 #include <math.h>
     14 #include <limits.h>
     15 #include <assert.h>
     16 #include "vpx_config.h"
     17 #include "vp8_rtcd.h"
     18 #include "tokenize.h"
     19 #include "treewriter.h"
     20 #include "onyx_int.h"
     21 #include "modecosts.h"
     22 #include "encodeintra.h"
     23 #include "pickinter.h"
     24 #include "vp8/common/entropymode.h"
     25 #include "vp8/common/reconinter.h"
     26 #include "vp8/common/reconintra4x4.h"
     27 #include "vp8/common/findnearmv.h"
     28 #include "vp8/common/quant_common.h"
     29 #include "encodemb.h"
     30 #include "quantize.h"
     31 #include "vp8/common/variance.h"
     32 #include "mcomp.h"
     33 #include "rdopt.h"
     34 #include "vpx_mem/vpx_mem.h"
     35 #include "vp8/common/systemdependent.h"
     36 #if CONFIG_TEMPORAL_DENOISING
     37 #include "denoising.h"
     38 #endif
     39 extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
     40 
     41 #define MAXF(a,b)            (((a) > (b)) ? (a) : (b))
     42 
     43 typedef struct rate_distortion_struct
     44 {
     45     int rate2;
     46     int rate_y;
     47     int rate_uv;
     48     int distortion2;
     49     int distortion_uv;
     50 } RATE_DISTORTION;
     51 
     52 typedef struct best_mode_struct
     53 {
     54   int yrd;
     55   int rd;
     56   int intra_rd;
     57   MB_MODE_INFO mbmode;
     58   union b_mode_info bmodes[16];
     59   PARTITION_INFO partition;
     60 } BEST_MODE;
     61 
     62 static const int auto_speed_thresh[17] =
     63 {
     64     1000,
     65     200,
     66     150,
     67     130,
     68     150,
     69     125,
     70     120,
     71     115,
     72     115,
     73     115,
     74     115,
     75     115,
     76     115,
     77     115,
     78     115,
     79     115,
     80     105
     81 };
     82 
     83 const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES] =
     84 {
     85     ZEROMV,
     86     DC_PRED,
     87 
     88     NEARESTMV,
     89     NEARMV,
     90 
     91     ZEROMV,
     92     NEARESTMV,
     93 
     94     ZEROMV,
     95     NEARESTMV,
     96 
     97     NEARMV,
     98     NEARMV,
     99 
    100     V_PRED,
    101     H_PRED,
    102     TM_PRED,
    103 
    104     NEWMV,
    105     NEWMV,
    106     NEWMV,
    107 
    108     SPLITMV,
    109     SPLITMV,
    110     SPLITMV,
    111 
    112     B_PRED,
    113 };
    114 
    115 /* This table determines the search order in reference frame priority order,
    116  * which may not necessarily match INTRA,LAST,GOLDEN,ARF
    117  */
    118 const int vp8_ref_frame_order[MAX_MODES] =
    119 {
    120     1,
    121     0,
    122 
    123     1,
    124     1,
    125 
    126     2,
    127     2,
    128 
    129     3,
    130     3,
    131 
    132     2,
    133     3,
    134 
    135     0,
    136     0,
    137     0,
    138 
    139     1,
    140     2,
    141     3,
    142 
    143     1,
    144     2,
    145     3,
    146 
    147     0,
    148 };
    149 
    150 static void fill_token_costs(
    151     int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
    152     const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]
    153 )
    154 {
    155     int i, j, k;
    156 
    157 
    158     for (i = 0; i < BLOCK_TYPES; i++)
    159         for (j = 0; j < COEF_BANDS; j++)
    160             for (k = 0; k < PREV_COEF_CONTEXTS; k++)
    161 
    162                 /* check for pt=0 and band > 1 if block type 0
    163                  * and 0 if blocktype 1
    164                  */
    165                 if (k == 0 && j > (i == 0))
    166                     vp8_cost_tokens2(c[i][j][k], p [i][j][k], vp8_coef_tree, 2);
    167                 else
    168                     vp8_cost_tokens(c[i][j][k], p [i][j][k], vp8_coef_tree);
    169 }
    170 
    171 static const int rd_iifactor[32] =
    172 {
    173     4, 4, 3, 2, 1, 0, 0, 0,
    174     0, 0, 0, 0, 0, 0, 0, 0,
    175     0, 0, 0, 0, 0, 0, 0, 0,
    176     0, 0, 0, 0, 0, 0, 0, 0
    177 };
    178 
    179 /* values are now correlated to quantizer */
    180 static const int sad_per_bit16lut[QINDEX_RANGE] =
    181 {
    182     2,  2,  2,  2,  2,  2,  2,  2,
    183     2,  2,  2,  2,  2,  2,  2,  2,
    184     3,  3,  3,  3,  3,  3,  3,  3,
    185     3,  3,  3,  3,  3,  3,  4,  4,
    186     4,  4,  4,  4,  4,  4,  4,  4,
    187     4,  4,  5,  5,  5,  5,  5,  5,
    188     5,  5,  5,  5,  5,  5,  6,  6,
    189     6,  6,  6,  6,  6,  6,  6,  6,
    190     6,  6,  7,  7,  7,  7,  7,  7,
    191     7,  7,  7,  7,  7,  7,  8,  8,
    192     8,  8,  8,  8,  8,  8,  8,  8,
    193     8,  8,  9,  9,  9,  9,  9,  9,
    194     9,  9,  9,  9,  9,  9,  10, 10,
    195     10, 10, 10, 10, 10, 10, 11, 11,
    196     11, 11, 11, 11, 12, 12, 12, 12,
    197     12, 12, 13, 13, 13, 13, 14, 14
    198 };
    199 static const int sad_per_bit4lut[QINDEX_RANGE] =
    200 {
    201     2,  2,  2,  2,  2,  2,  3,  3,
    202     3,  3,  3,  3,  3,  3,  3,  3,
    203     3,  3,  3,  3,  4,  4,  4,  4,
    204     4,  4,  4,  4,  4,  4,  5,  5,
    205     5,  5,  5,  5,  6,  6,  6,  6,
    206     6,  6,  6,  6,  6,  6,  6,  6,
    207     7,  7,  7,  7,  7,  7,  7,  7,
    208     7,  7,  7,  7,  7,  8,  8,  8,
    209     8,  8,  9,  9,  9,  9,  9,  9,
    210     10, 10, 10, 10, 10, 10, 10, 10,
    211     11, 11, 11, 11, 11, 11, 11, 11,
    212     12, 12, 12, 12, 12, 12, 12, 12,
    213     13, 13, 13, 13, 13, 13, 13, 14,
    214     14, 14, 14, 14, 15, 15, 15, 15,
    215     16, 16, 16, 16, 17, 17, 17, 18,
    216     18, 18, 19, 19, 19, 20, 20, 20,
    217 };
    218 
    219 void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex)
    220 {
    221     cpi->mb.sadperbit16 =  sad_per_bit16lut[QIndex];
    222     cpi->mb.sadperbit4  =  sad_per_bit4lut[QIndex];
    223 }
    224 
    225 void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue)
    226 {
    227     int q;
    228     int i;
    229     double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
    230     double rdconst = 2.80;
    231 
    232     vp8_clear_system_state();
    233 
    234     /* Further tests required to see if optimum is different
    235      * for key frames, golden frames and arf frames.
    236      */
    237     cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
    238 
    239     /* Extend rate multiplier along side quantizer zbin increases */
    240     if (cpi->mb.zbin_over_quant  > 0)
    241     {
    242         double oq_factor;
    243         double modq;
    244 
    245         /* Experimental code using the same basic equation as used for Q above
    246          * The units of cpi->mb.zbin_over_quant are 1/128 of Q bin size
    247          */
    248         oq_factor = 1.0 + ((double)0.0015625 * cpi->mb.zbin_over_quant);
    249         modq = (int)((double)capped_q * oq_factor);
    250         cpi->RDMULT = (int)(rdconst * (modq * modq));
    251     }
    252 
    253     if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME))
    254     {
    255         if (cpi->twopass.next_iiratio > 31)
    256             cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    257         else
    258             cpi->RDMULT +=
    259                 (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
    260     }
    261 
    262     cpi->mb.errorperbit = (cpi->RDMULT / 110);
    263     cpi->mb.errorperbit += (cpi->mb.errorperbit==0);
    264 
    265     vp8_set_speed_features(cpi);
    266 
    267     for (i = 0; i < MAX_MODES; i++)
    268     {
    269         x->mode_test_hit_counts[i] = 0;
    270     }
    271 
    272     q = (int)pow(Qvalue, 1.25);
    273 
    274     if (q < 8)
    275         q = 8;
    276 
    277     if (cpi->RDMULT > 1000)
    278     {
    279         cpi->RDDIV = 1;
    280         cpi->RDMULT /= 100;
    281 
    282         for (i = 0; i < MAX_MODES; i++)
    283         {
    284             if (cpi->sf.thresh_mult[i] < INT_MAX)
    285             {
    286                 x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
    287             }
    288             else
    289             {
    290                 x->rd_threshes[i] = INT_MAX;
    291             }
    292 
    293             cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
    294         }
    295     }
    296     else
    297     {
    298         cpi->RDDIV = 100;
    299 
    300         for (i = 0; i < MAX_MODES; i++)
    301         {
    302             if (cpi->sf.thresh_mult[i] < (INT_MAX / q))
    303             {
    304                 x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
    305             }
    306             else
    307             {
    308                 x->rd_threshes[i] = INT_MAX;
    309             }
    310 
    311             cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
    312         }
    313     }
    314 
    315     {
    316       /* build token cost array for the type of frame we have now */
    317       FRAME_CONTEXT *l = &cpi->lfc_n;
    318 
    319       if(cpi->common.refresh_alt_ref_frame)
    320           l = &cpi->lfc_a;
    321       else if(cpi->common.refresh_golden_frame)
    322           l = &cpi->lfc_g;
    323 
    324       fill_token_costs(
    325           cpi->mb.token_costs,
    326           (const vp8_prob( *)[8][3][11]) l->coef_probs
    327       );
    328       /*
    329       fill_token_costs(
    330           cpi->mb.token_costs,
    331           (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs);
    332       */
    333 
    334 
    335       /* TODO make these mode costs depend on last,alt or gold too.  (jbb) */
    336       vp8_init_mode_costs(cpi);
    337     }
    338 
    339 }
    340 
    341 void vp8_auto_select_speed(VP8_COMP *cpi)
    342 {
    343     int milliseconds_for_compress = (int)(1000000 / cpi->framerate);
    344 
    345     milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
    346 
    347 #if 0
    348 
    349     if (0)
    350     {
    351         FILE *f;
    352 
    353         f = fopen("speed.stt", "a");
    354         fprintf(f, " %8ld %10ld %10ld %10ld\n",
    355                 cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time);
    356         fclose(f);
    357     }
    358 
    359 #endif
    360 
    361     if (cpi->avg_pick_mode_time < milliseconds_for_compress && (cpi->avg_encode_time - cpi->avg_pick_mode_time) < milliseconds_for_compress)
    362     {
    363         if (cpi->avg_pick_mode_time == 0)
    364         {
    365             cpi->Speed = 4;
    366         }
    367         else
    368         {
    369             if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95)
    370             {
    371                 cpi->Speed          += 2;
    372                 cpi->avg_pick_mode_time = 0;
    373                 cpi->avg_encode_time = 0;
    374 
    375                 if (cpi->Speed > 16)
    376                 {
    377                     cpi->Speed = 16;
    378                 }
    379             }
    380 
    381             if (milliseconds_for_compress * 100 > cpi->avg_encode_time * auto_speed_thresh[cpi->Speed])
    382             {
    383                 cpi->Speed          -= 1;
    384                 cpi->avg_pick_mode_time = 0;
    385                 cpi->avg_encode_time = 0;
    386 
    387                 /* In real-time mode, cpi->speed is in [4, 16]. */
    388                 if (cpi->Speed < 4)
    389                 {
    390                     cpi->Speed = 4;
    391                 }
    392             }
    393         }
    394     }
    395     else
    396     {
    397         cpi->Speed += 4;
    398 
    399         if (cpi->Speed > 16)
    400             cpi->Speed = 16;
    401 
    402 
    403         cpi->avg_pick_mode_time = 0;
    404         cpi->avg_encode_time = 0;
    405     }
    406 }
    407 
    408 int vp8_block_error_c(short *coeff, short *dqcoeff)
    409 {
    410     int i;
    411     int error = 0;
    412 
    413     for (i = 0; i < 16; i++)
    414     {
    415         int this_diff = coeff[i] - dqcoeff[i];
    416         error += this_diff * this_diff;
    417     }
    418 
    419     return error;
    420 }
    421 
    422 int vp8_mbblock_error_c(MACROBLOCK *mb, int dc)
    423 {
    424     BLOCK  *be;
    425     BLOCKD *bd;
    426     int i, j;
    427     int berror, error = 0;
    428 
    429     for (i = 0; i < 16; i++)
    430     {
    431         be = &mb->block[i];
    432         bd = &mb->e_mbd.block[i];
    433 
    434         berror = 0;
    435 
    436         for (j = dc; j < 16; j++)
    437         {
    438             int this_diff = be->coeff[j] - bd->dqcoeff[j];
    439             berror += this_diff * this_diff;
    440         }
    441 
    442         error += berror;
    443     }
    444 
    445     return error;
    446 }
    447 
    448 int vp8_mbuverror_c(MACROBLOCK *mb)
    449 {
    450 
    451     BLOCK  *be;
    452     BLOCKD *bd;
    453 
    454 
    455     int i;
    456     int error = 0;
    457 
    458     for (i = 16; i < 24; i++)
    459     {
    460         be = &mb->block[i];
    461         bd = &mb->e_mbd.block[i];
    462 
    463         error += vp8_block_error_c(be->coeff, bd->dqcoeff);
    464     }
    465 
    466     return error;
    467 }
    468 
    469 int VP8_UVSSE(MACROBLOCK *x)
    470 {
    471     unsigned char *uptr, *vptr;
    472     unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
    473     unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
    474     int uv_stride = x->block[16].src_stride;
    475 
    476     unsigned int sse1 = 0;
    477     unsigned int sse2 = 0;
    478     int mv_row = x->e_mbd.mode_info_context->mbmi.mv.as_mv.row;
    479     int mv_col = x->e_mbd.mode_info_context->mbmi.mv.as_mv.col;
    480     int offset;
    481     int pre_stride = x->e_mbd.pre.uv_stride;
    482 
    483     if (mv_row < 0)
    484         mv_row -= 1;
    485     else
    486         mv_row += 1;
    487 
    488     if (mv_col < 0)
    489         mv_col -= 1;
    490     else
    491         mv_col += 1;
    492 
    493     mv_row /= 2;
    494     mv_col /= 2;
    495 
    496     offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
    497     uptr = x->e_mbd.pre.u_buffer + offset;
    498     vptr = x->e_mbd.pre.v_buffer + offset;
    499 
    500     if ((mv_row | mv_col) & 7)
    501     {
    502         vp8_sub_pixel_variance8x8(uptr, pre_stride,
    503             mv_col & 7, mv_row & 7, upred_ptr, uv_stride, &sse2);
    504         vp8_sub_pixel_variance8x8(vptr, pre_stride,
    505             mv_col & 7, mv_row & 7, vpred_ptr, uv_stride, &sse1);
    506         sse2 += sse1;
    507     }
    508     else
    509     {
    510         vp8_variance8x8(uptr, pre_stride,
    511             upred_ptr, uv_stride, &sse2);
    512         vp8_variance8x8(vptr, pre_stride,
    513             vpred_ptr, uv_stride, &sse1);
    514         sse2 += sse1;
    515     }
    516     return sse2;
    517 
    518 }
    519 
    520 static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
    521 {
    522     int c = !type;              /* start at coef 0, unless Y with Y2 */
    523     int eob = (int)(*b->eob);
    524     int pt ;    /* surrounding block/prev coef predictor */
    525     int cost = 0;
    526     short *qcoeff_ptr = b->qcoeff;
    527 
    528     VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
    529 
    530     assert(eob <= 16);
    531     for (; c < eob; c++)
    532     {
    533         const int v = qcoeff_ptr[vp8_default_zig_zag1d[c]];
    534         const int t = vp8_dct_value_tokens_ptr[v].Token;
    535         cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [t];
    536         cost += vp8_dct_value_cost_ptr[v];
    537         pt = vp8_prev_token_class[t];
    538     }
    539 
    540     if (c < 16)
    541         cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN];
    542 
    543     pt = (c != !type); /* is eob first coefficient; */
    544     *a = *l = pt;
    545 
    546     return cost;
    547 }
    548 
    549 static int vp8_rdcost_mby(MACROBLOCK *mb)
    550 {
    551     int cost = 0;
    552     int b;
    553     MACROBLOCKD *x = &mb->e_mbd;
    554     ENTROPY_CONTEXT_PLANES t_above, t_left;
    555     ENTROPY_CONTEXT *ta;
    556     ENTROPY_CONTEXT *tl;
    557 
    558     vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    559     vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    560 
    561     ta = (ENTROPY_CONTEXT *)&t_above;
    562     tl = (ENTROPY_CONTEXT *)&t_left;
    563 
    564     for (b = 0; b < 16; b++)
    565         cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_NO_DC,
    566                     ta + vp8_block2above[b], tl + vp8_block2left[b]);
    567 
    568     cost += cost_coeffs(mb, x->block + 24, PLANE_TYPE_Y2,
    569                 ta + vp8_block2above[24], tl + vp8_block2left[24]);
    570 
    571     return cost;
    572 }
    573 
    574 static void macro_block_yrd( MACROBLOCK *mb,
    575                              int *Rate,
    576                              int *Distortion)
    577 {
    578     int b;
    579     MACROBLOCKD *const x = &mb->e_mbd;
    580     BLOCK   *const mb_y2 = mb->block + 24;
    581     BLOCKD *const x_y2  = x->block + 24;
    582     short *Y2DCPtr = mb_y2->src_diff;
    583     BLOCK *beptr;
    584     int d;
    585 
    586     vp8_subtract_mby( mb->src_diff, *(mb->block[0].base_src),
    587         mb->block[0].src_stride,  mb->e_mbd.predictor, 16);
    588 
    589     /* Fdct and building the 2nd order block */
    590     for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
    591     {
    592         mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
    593         *Y2DCPtr++ = beptr->coeff[0];
    594         *Y2DCPtr++ = beptr->coeff[16];
    595     }
    596 
    597     /* 2nd order fdct */
    598     mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
    599 
    600     /* Quantization */
    601     for (b = 0; b < 16; b++)
    602     {
    603         mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
    604     }
    605 
    606     /* DC predication and Quantization of 2nd Order block */
    607     mb->quantize_b(mb_y2, x_y2);
    608 
    609     /* Distortion */
    610     d = vp8_mbblock_error(mb, 1) << 2;
    611     d += vp8_block_error(mb_y2->coeff, x_y2->dqcoeff);
    612 
    613     *Distortion = (d >> 4);
    614 
    615     /* rate */
    616     *Rate = vp8_rdcost_mby(mb);
    617 }
    618 
    619 static void copy_predictor(unsigned char *dst, const unsigned char *predictor)
    620 {
    621     const unsigned int *p = (const unsigned int *)predictor;
    622     unsigned int *d = (unsigned int *)dst;
    623     d[0] = p[0];
    624     d[4] = p[4];
    625     d[8] = p[8];
    626     d[12] = p[12];
    627 }
    628 static int rd_pick_intra4x4block(
    629     MACROBLOCK *x,
    630     BLOCK *be,
    631     BLOCKD *b,
    632     B_PREDICTION_MODE *best_mode,
    633     const int *bmode_costs,
    634     ENTROPY_CONTEXT *a,
    635     ENTROPY_CONTEXT *l,
    636 
    637     int *bestrate,
    638     int *bestratey,
    639     int *bestdistortion)
    640 {
    641     B_PREDICTION_MODE mode;
    642     int best_rd = INT_MAX;
    643     int rate = 0;
    644     int distortion;
    645 
    646     ENTROPY_CONTEXT ta = *a, tempa = *a;
    647     ENTROPY_CONTEXT tl = *l, templ = *l;
    648     /*
    649      * The predictor buffer is a 2d buffer with a stride of 16.  Create
    650      * a temp buffer that meets the stride requirements, but we are only
    651      * interested in the left 4x4 block
    652      * */
    653     DECLARE_ALIGNED_ARRAY(16, unsigned char,  best_predictor, 16*4);
    654     DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16);
    655     int dst_stride = x->e_mbd.dst.y_stride;
    656     unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
    657 
    658     unsigned char *Above = dst - dst_stride;
    659     unsigned char *yleft = dst - 1;
    660     unsigned char top_left = Above[-1];
    661 
    662     for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++)
    663     {
    664         int this_rd;
    665         int ratey;
    666 
    667         rate = bmode_costs[mode];
    668 
    669         vp8_intra4x4_predict(Above, yleft, dst_stride, mode,
    670                              b->predictor, 16, top_left);
    671         vp8_subtract_b(be, b, 16);
    672         x->short_fdct4x4(be->src_diff, be->coeff, 32);
    673         x->quantize_b(be, b);
    674 
    675         tempa = ta;
    676         templ = tl;
    677 
    678         ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ);
    679         rate += ratey;
    680         distortion = vp8_block_error(be->coeff, b->dqcoeff) >> 2;
    681 
    682         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
    683 
    684         if (this_rd < best_rd)
    685         {
    686             *bestrate = rate;
    687             *bestratey = ratey;
    688             *bestdistortion = distortion;
    689             best_rd = this_rd;
    690             *best_mode = mode;
    691             *a = tempa;
    692             *l = templ;
    693             copy_predictor(best_predictor, b->predictor);
    694             vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
    695         }
    696     }
    697     b->bmi.as_mode = *best_mode;
    698 
    699     vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride);
    700 
    701     return best_rd;
    702 }
    703 
    704 static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate,
    705                                      int *rate_y, int *Distortion, int best_rd)
    706 {
    707     MACROBLOCKD *const xd = &mb->e_mbd;
    708     int i;
    709     int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
    710     int distortion = 0;
    711     int tot_rate_y = 0;
    712     int64_t total_rd = 0;
    713     ENTROPY_CONTEXT_PLANES t_above, t_left;
    714     ENTROPY_CONTEXT *ta;
    715     ENTROPY_CONTEXT *tl;
    716     const int *bmode_costs;
    717 
    718     vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    719     vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    720 
    721     ta = (ENTROPY_CONTEXT *)&t_above;
    722     tl = (ENTROPY_CONTEXT *)&t_left;
    723 
    724     intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16);
    725 
    726     bmode_costs = mb->inter_bmode_costs;
    727 
    728     for (i = 0; i < 16; i++)
    729     {
    730         MODE_INFO *const mic = xd->mode_info_context;
    731         const int mis = xd->mode_info_stride;
    732         B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
    733         int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
    734 
    735         if (mb->e_mbd.frame_type == KEY_FRAME)
    736         {
    737             const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
    738             const B_PREDICTION_MODE L = left_block_mode(mic, i);
    739 
    740             bmode_costs  = mb->bmode_costs[A][L];
    741         }
    742 
    743         total_rd += rd_pick_intra4x4block(
    744             mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
    745             ta + vp8_block2above[i],
    746             tl + vp8_block2left[i], &r, &ry, &d);
    747 
    748         cost += r;
    749         distortion += d;
    750         tot_rate_y += ry;
    751 
    752         mic->bmi[i].as_mode = best_mode;
    753 
    754         if(total_rd >= (int64_t)best_rd)
    755             break;
    756     }
    757 
    758     if(total_rd >= (int64_t)best_rd)
    759         return INT_MAX;
    760 
    761     *Rate = cost;
    762     *rate_y = tot_rate_y;
    763     *Distortion = distortion;
    764 
    765     return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
    766 }
    767 
    768 
    769 static int rd_pick_intra16x16mby_mode(MACROBLOCK *x,
    770                                       int *Rate,
    771                                       int *rate_y,
    772                                       int *Distortion)
    773 {
    774     MB_PREDICTION_MODE mode;
    775     MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
    776     int rate, ratey;
    777     int distortion;
    778     int best_rd = INT_MAX;
    779     int this_rd;
    780     MACROBLOCKD *xd = &x->e_mbd;
    781 
    782     /* Y Search for 16x16 intra prediction mode */
    783     for (mode = DC_PRED; mode <= TM_PRED; mode++)
    784     {
    785         xd->mode_info_context->mbmi.mode = mode;
    786 
    787         vp8_build_intra_predictors_mby_s(xd,
    788                                          xd->dst.y_buffer - xd->dst.y_stride,
    789                                          xd->dst.y_buffer - 1,
    790                                          xd->dst.y_stride,
    791                                          xd->predictor,
    792                                          16);
    793 
    794         macro_block_yrd(x, &ratey, &distortion);
    795         rate = ratey + x->mbmode_cost[xd->frame_type]
    796                                      [xd->mode_info_context->mbmi.mode];
    797 
    798         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
    799 
    800         if (this_rd < best_rd)
    801         {
    802             mode_selected = mode;
    803             best_rd = this_rd;
    804             *Rate = rate;
    805             *rate_y = ratey;
    806             *Distortion = distortion;
    807         }
    808     }
    809 
    810     xd->mode_info_context->mbmi.mode = mode_selected;
    811     return best_rd;
    812 }
    813 
    814 static int rd_cost_mbuv(MACROBLOCK *mb)
    815 {
    816     int b;
    817     int cost = 0;
    818     MACROBLOCKD *x = &mb->e_mbd;
    819     ENTROPY_CONTEXT_PLANES t_above, t_left;
    820     ENTROPY_CONTEXT *ta;
    821     ENTROPY_CONTEXT *tl;
    822 
    823     vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    824     vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    825 
    826     ta = (ENTROPY_CONTEXT *)&t_above;
    827     tl = (ENTROPY_CONTEXT *)&t_left;
    828 
    829     for (b = 16; b < 24; b++)
    830         cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_UV,
    831                     ta + vp8_block2above[b], tl + vp8_block2left[b]);
    832 
    833     return cost;
    834 }
    835 
    836 
    837 static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
    838                             int *distortion, int fullpixel)
    839 {
    840     vp8_build_inter16x16_predictors_mbuv(&x->e_mbd);
    841     vp8_subtract_mbuv(x->src_diff,
    842         x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
    843         &x->e_mbd.predictor[256], &x->e_mbd.predictor[320], 8);
    844 
    845     vp8_transform_mbuv(x);
    846     vp8_quantize_mbuv(x);
    847 
    848     *rate       = rd_cost_mbuv(x);
    849     *distortion = vp8_mbuverror(x) / 4;
    850 
    851     return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
    852 }
    853 
    854 static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
    855                           int *distortion, int fullpixel)
    856 {
    857     vp8_build_inter4x4_predictors_mbuv(&x->e_mbd);
    858     vp8_subtract_mbuv(x->src_diff,
    859         x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
    860         &x->e_mbd.predictor[256], &x->e_mbd.predictor[320], 8);
    861 
    862     vp8_transform_mbuv(x);
    863     vp8_quantize_mbuv(x);
    864 
    865     *rate       = rd_cost_mbuv(x);
    866     *distortion = vp8_mbuverror(x) / 4;
    867 
    868     return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
    869 }
    870 
    871 static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
    872                                     int *rate_tokenonly, int *distortion)
    873 {
    874     MB_PREDICTION_MODE mode;
    875     MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
    876     int best_rd = INT_MAX;
    877     int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r);
    878     int rate_to;
    879     MACROBLOCKD *xd = &x->e_mbd;
    880 
    881     for (mode = DC_PRED; mode <= TM_PRED; mode++)
    882     {
    883         int this_rate;
    884         int this_distortion;
    885         int this_rd;
    886 
    887         xd->mode_info_context->mbmi.uv_mode = mode;
    888 
    889         vp8_build_intra_predictors_mbuv_s(xd,
    890                                           xd->dst.u_buffer - xd->dst.uv_stride,
    891                                           xd->dst.v_buffer - xd->dst.uv_stride,
    892                                           xd->dst.u_buffer - 1,
    893                                           xd->dst.v_buffer - 1,
    894                                           xd->dst.uv_stride,
    895                                           &xd->predictor[256], &xd->predictor[320],
    896                                           8);
    897 
    898 
    899         vp8_subtract_mbuv(x->src_diff,
    900                       x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
    901                       &xd->predictor[256], &xd->predictor[320], 8);
    902         vp8_transform_mbuv(x);
    903         vp8_quantize_mbuv(x);
    904 
    905         rate_to = rd_cost_mbuv(x);
    906         this_rate = rate_to + x->intra_uv_mode_cost[xd->frame_type][xd->mode_info_context->mbmi.uv_mode];
    907 
    908         this_distortion = vp8_mbuverror(x) / 4;
    909 
    910         this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
    911 
    912         if (this_rd < best_rd)
    913         {
    914             best_rd = this_rd;
    915             d = this_distortion;
    916             r = this_rate;
    917             *rate_tokenonly = rate_to;
    918             mode_selected = mode;
    919         }
    920     }
    921 
    922     *rate = r;
    923     *distortion = d;
    924 
    925     xd->mode_info_context->mbmi.uv_mode = mode_selected;
    926 }
    927 
    928 int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4])
    929 {
    930     vp8_prob p [VP8_MVREFS-1];
    931     assert(NEARESTMV <= m  &&  m <= SPLITMV);
    932     vp8_mv_ref_probs(p, near_mv_ref_ct);
    933     return vp8_cost_token(vp8_mv_ref_tree, p,
    934                           vp8_mv_ref_encoding_array + (m - NEARESTMV));
    935 }
    936 
    937 void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv)
    938 {
    939     x->e_mbd.mode_info_context->mbmi.mode = mb;
    940     x->e_mbd.mode_info_context->mbmi.mv.as_int = mv->as_int;
    941 }
    942 
    943 static int labels2mode(
    944     MACROBLOCK *x,
    945     int const *labelings, int which_label,
    946     B_PREDICTION_MODE this_mode,
    947     int_mv *this_mv, int_mv *best_ref_mv,
    948     int *mvcost[2]
    949 )
    950 {
    951     MACROBLOCKD *const xd = & x->e_mbd;
    952     MODE_INFO *const mic = xd->mode_info_context;
    953     const int mis = xd->mode_info_stride;
    954 
    955     int cost = 0;
    956     int thismvcost = 0;
    957 
    958     /* We have to be careful retrieving previously-encoded motion vectors.
    959        Ones from this macroblock have to be pulled from the BLOCKD array
    960        as they have not yet made it to the bmi array in our MB_MODE_INFO. */
    961 
    962     int i = 0;
    963 
    964     do
    965     {
    966         BLOCKD *const d = xd->block + i;
    967         const int row = i >> 2,  col = i & 3;
    968 
    969         B_PREDICTION_MODE m;
    970 
    971         if (labelings[i] != which_label)
    972             continue;
    973 
    974         if (col  &&  labelings[i] == labelings[i-1])
    975             m = LEFT4X4;
    976         else if (row  &&  labelings[i] == labelings[i-4])
    977             m = ABOVE4X4;
    978         else
    979         {
    980             /* the only time we should do costing for new motion vector
    981              * or mode is when we are on a new label  (jbb May 08, 2007)
    982              */
    983             switch (m = this_mode)
    984             {
    985             case NEW4X4 :
    986                 thismvcost  = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost, 102);
    987                 break;
    988             case LEFT4X4:
    989                 this_mv->as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
    990                 break;
    991             case ABOVE4X4:
    992                 this_mv->as_int = row ? d[-4].bmi.mv.as_int : above_block_mv(mic, i, mis);
    993                 break;
    994             case ZERO4X4:
    995                 this_mv->as_int = 0;
    996                 break;
    997             default:
    998                 break;
    999             }
   1000 
   1001             if (m == ABOVE4X4)  /* replace above with left if same */
   1002             {
   1003                 int_mv left_mv;
   1004 
   1005                 left_mv.as_int = col ? d[-1].bmi.mv.as_int :
   1006                                         left_block_mv(mic, i);
   1007 
   1008                 if (left_mv.as_int == this_mv->as_int)
   1009                     m = LEFT4X4;
   1010             }
   1011 
   1012             cost = x->inter_bmode_costs[ m];
   1013         }
   1014 
   1015         d->bmi.mv.as_int = this_mv->as_int;
   1016 
   1017         x->partition_info->bmi[i].mode = m;
   1018         x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
   1019 
   1020     }
   1021     while (++i < 16);
   1022 
   1023     cost += thismvcost ;
   1024     return cost;
   1025 }
   1026 
   1027 static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
   1028                               int which_label, ENTROPY_CONTEXT *ta,
   1029                               ENTROPY_CONTEXT *tl)
   1030 {
   1031     int cost = 0;
   1032     int b;
   1033     MACROBLOCKD *x = &mb->e_mbd;
   1034 
   1035     for (b = 0; b < 16; b++)
   1036         if (labels[ b] == which_label)
   1037             cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_WITH_DC,
   1038                                 ta + vp8_block2above[b],
   1039                                 tl + vp8_block2left[b]);
   1040 
   1041     return cost;
   1042 
   1043 }
   1044 static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels, int which_label)
   1045 {
   1046     int i;
   1047     unsigned int distortion = 0;
   1048     int pre_stride = x->e_mbd.pre.y_stride;
   1049     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1050 
   1051 
   1052     for (i = 0; i < 16; i++)
   1053     {
   1054         if (labels[i] == which_label)
   1055         {
   1056             BLOCKD *bd = &x->e_mbd.block[i];
   1057             BLOCK *be = &x->block[i];
   1058 
   1059             vp8_build_inter_predictors_b(bd, 16, base_pre, pre_stride, x->e_mbd.subpixel_predict);
   1060             vp8_subtract_b(be, bd, 16);
   1061             x->short_fdct4x4(be->src_diff, be->coeff, 32);
   1062             x->quantize_b(be, bd);
   1063 
   1064             distortion += vp8_block_error(be->coeff, bd->dqcoeff);
   1065         }
   1066     }
   1067 
   1068     return distortion;
   1069 }
   1070 
   1071 
   1072 static const unsigned int segmentation_to_sseshift[4] = {3, 3, 2, 0};
   1073 
   1074 
   1075 typedef struct
   1076 {
   1077   int_mv *ref_mv;
   1078   int_mv mvp;
   1079 
   1080   int segment_rd;
   1081   int segment_num;
   1082   int r;
   1083   int d;
   1084   int segment_yrate;
   1085   B_PREDICTION_MODE modes[16];
   1086   int_mv mvs[16];
   1087   unsigned char eobs[16];
   1088 
   1089   int mvthresh;
   1090   int *mdcounts;
   1091 
   1092   int_mv sv_mvp[4]; /* save 4 mvp from 8x8 */
   1093   int sv_istep[2];  /* save 2 initial step_param for 16x8/8x16 */
   1094 
   1095 } BEST_SEG_INFO;
   1096 
   1097 
   1098 static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
   1099                              BEST_SEG_INFO *bsi, unsigned int segmentation)
   1100 {
   1101     int i;
   1102     int const *labels;
   1103     int br = 0;
   1104     int bd = 0;
   1105     B_PREDICTION_MODE this_mode;
   1106 
   1107 
   1108     int label_count;
   1109     int this_segment_rd = 0;
   1110     int label_mv_thresh;
   1111     int rate = 0;
   1112     int sbr = 0;
   1113     int sbd = 0;
   1114     int segmentyrate = 0;
   1115 
   1116     vp8_variance_fn_ptr_t *v_fn_ptr;
   1117 
   1118     ENTROPY_CONTEXT_PLANES t_above, t_left;
   1119     ENTROPY_CONTEXT *ta;
   1120     ENTROPY_CONTEXT *tl;
   1121     ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
   1122     ENTROPY_CONTEXT *ta_b;
   1123     ENTROPY_CONTEXT *tl_b;
   1124 
   1125     vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
   1126     vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
   1127 
   1128     ta = (ENTROPY_CONTEXT *)&t_above;
   1129     tl = (ENTROPY_CONTEXT *)&t_left;
   1130     ta_b = (ENTROPY_CONTEXT *)&t_above_b;
   1131     tl_b = (ENTROPY_CONTEXT *)&t_left_b;
   1132 
   1133     br = 0;
   1134     bd = 0;
   1135 
   1136     v_fn_ptr = &cpi->fn_ptr[segmentation];
   1137     labels = vp8_mbsplits[segmentation];
   1138     label_count = vp8_mbsplit_count[segmentation];
   1139 
   1140     /* 64 makes this threshold really big effectively making it so that we
   1141      * very rarely check mvs on segments.   setting this to 1 would make mv
   1142      * thresh roughly equal to what it is for macroblocks
   1143      */
   1144     label_mv_thresh = 1 * bsi->mvthresh / label_count ;
   1145 
   1146     /* Segmentation method overheads */
   1147     rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation);
   1148     rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts);
   1149     this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
   1150     br += rate;
   1151 
   1152     for (i = 0; i < label_count; i++)
   1153     {
   1154         int_mv mode_mv[B_MODE_COUNT];
   1155         int best_label_rd = INT_MAX;
   1156         B_PREDICTION_MODE mode_selected = ZERO4X4;
   1157         int bestlabelyrate = 0;
   1158 
   1159         /* search for the best motion vector on this segment */
   1160         for (this_mode = LEFT4X4; this_mode <= NEW4X4 ; this_mode ++)
   1161         {
   1162             int this_rd;
   1163             int distortion;
   1164             int labelyrate;
   1165             ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
   1166             ENTROPY_CONTEXT *ta_s;
   1167             ENTROPY_CONTEXT *tl_s;
   1168 
   1169             vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
   1170             vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
   1171 
   1172             ta_s = (ENTROPY_CONTEXT *)&t_above_s;
   1173             tl_s = (ENTROPY_CONTEXT *)&t_left_s;
   1174 
   1175             if (this_mode == NEW4X4)
   1176             {
   1177                 int sseshift;
   1178                 int num00;
   1179                 int step_param = 0;
   1180                 int further_steps;
   1181                 int n;
   1182                 int thissme;
   1183                 int bestsme = INT_MAX;
   1184                 int_mv  temp_mv;
   1185                 BLOCK *c;
   1186                 BLOCKD *e;
   1187 
   1188                 /* Is the best so far sufficiently good that we cant justify
   1189                  * doing a new motion search.
   1190                  */
   1191                 if (best_label_rd < label_mv_thresh)
   1192                     break;
   1193 
   1194                 if(cpi->compressor_speed)
   1195                 {
   1196                     if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8)
   1197                     {
   1198                         bsi->mvp.as_int = bsi->sv_mvp[i].as_int;
   1199                         if (i==1 && segmentation == BLOCK_16X8)
   1200                           bsi->mvp.as_int = bsi->sv_mvp[2].as_int;
   1201 
   1202                         step_param = bsi->sv_istep[i];
   1203                     }
   1204 
   1205                     /* use previous block's result as next block's MV
   1206                      * predictor.
   1207                      */
   1208                     if (segmentation == BLOCK_4X4 && i>0)
   1209                     {
   1210                         bsi->mvp.as_int = x->e_mbd.block[i-1].bmi.mv.as_int;
   1211                         if (i==4 || i==8 || i==12)
   1212                             bsi->mvp.as_int = x->e_mbd.block[i-4].bmi.mv.as_int;
   1213                         step_param = 2;
   1214                     }
   1215                 }
   1216 
   1217                 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
   1218 
   1219                 {
   1220                     int sadpb = x->sadperbit4;
   1221                     int_mv mvp_full;
   1222 
   1223                     mvp_full.as_mv.row = bsi->mvp.as_mv.row >>3;
   1224                     mvp_full.as_mv.col = bsi->mvp.as_mv.col >>3;
   1225 
   1226                     /* find first label */
   1227                     n = vp8_mbsplit_offset[segmentation][i];
   1228 
   1229                     c = &x->block[n];
   1230                     e = &x->e_mbd.block[n];
   1231 
   1232                     {
   1233                         bestsme = cpi->diamond_search_sad(x, c, e, &mvp_full,
   1234                                                 &mode_mv[NEW4X4], step_param,
   1235                                                 sadpb, &num00, v_fn_ptr,
   1236                                                 x->mvcost, bsi->ref_mv);
   1237 
   1238                         n = num00;
   1239                         num00 = 0;
   1240 
   1241                         while (n < further_steps)
   1242                         {
   1243                             n++;
   1244 
   1245                             if (num00)
   1246                                 num00--;
   1247                             else
   1248                             {
   1249                                 thissme = cpi->diamond_search_sad(x, c, e,
   1250                                                     &mvp_full, &temp_mv,
   1251                                                     step_param + n, sadpb,
   1252                                                     &num00, v_fn_ptr,
   1253                                                     x->mvcost, bsi->ref_mv);
   1254 
   1255                                 if (thissme < bestsme)
   1256                                 {
   1257                                     bestsme = thissme;
   1258                                     mode_mv[NEW4X4].as_int = temp_mv.as_int;
   1259                                 }
   1260                             }
   1261                         }
   1262                     }
   1263 
   1264                     sseshift = segmentation_to_sseshift[segmentation];
   1265 
   1266                     /* Should we do a full search (best quality only) */
   1267                     if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000)
   1268                     {
   1269                         /* Check if mvp_full is within the range. */
   1270                         vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   1271 
   1272                         thissme = cpi->full_search_sad(x, c, e, &mvp_full,
   1273                                                        sadpb, 16, v_fn_ptr,
   1274                                                        x->mvcost, bsi->ref_mv);
   1275 
   1276                         if (thissme < bestsme)
   1277                         {
   1278                             bestsme = thissme;
   1279                             mode_mv[NEW4X4].as_int = e->bmi.mv.as_int;
   1280                         }
   1281                         else
   1282                         {
   1283                             /* The full search result is actually worse so
   1284                              * re-instate the previous best vector
   1285                              */
   1286                             e->bmi.mv.as_int = mode_mv[NEW4X4].as_int;
   1287                         }
   1288                     }
   1289                 }
   1290 
   1291                 if (bestsme < INT_MAX)
   1292                 {
   1293                     int disto;
   1294                     unsigned int sse;
   1295                     cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
   1296                         bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost,
   1297                         &disto, &sse);
   1298                 }
   1299             } /* NEW4X4 */
   1300 
   1301             rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
   1302                                bsi->ref_mv, x->mvcost);
   1303 
   1304             /* Trap vectors that reach beyond the UMV borders */
   1305             if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
   1306                 ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max))
   1307             {
   1308                 continue;
   1309             }
   1310 
   1311             distortion = vp8_encode_inter_mb_segment(x, labels, i) / 4;
   1312 
   1313             labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
   1314             rate += labelyrate;
   1315 
   1316             this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
   1317 
   1318             if (this_rd < best_label_rd)
   1319             {
   1320                 sbr = rate;
   1321                 sbd = distortion;
   1322                 bestlabelyrate = labelyrate;
   1323                 mode_selected = this_mode;
   1324                 best_label_rd = this_rd;
   1325 
   1326                 vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
   1327                 vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
   1328 
   1329             }
   1330         } /*for each 4x4 mode*/
   1331 
   1332         vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
   1333         vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
   1334 
   1335         labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
   1336                     bsi->ref_mv, x->mvcost);
   1337 
   1338         br += sbr;
   1339         bd += sbd;
   1340         segmentyrate += bestlabelyrate;
   1341         this_segment_rd += best_label_rd;
   1342 
   1343         if (this_segment_rd >= bsi->segment_rd)
   1344             break;
   1345 
   1346     } /* for each label */
   1347 
   1348     if (this_segment_rd < bsi->segment_rd)
   1349     {
   1350         bsi->r = br;
   1351         bsi->d = bd;
   1352         bsi->segment_yrate = segmentyrate;
   1353         bsi->segment_rd = this_segment_rd;
   1354         bsi->segment_num = segmentation;
   1355 
   1356         /* store everything needed to come back to this!! */
   1357         for (i = 0; i < 16; i++)
   1358         {
   1359             bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
   1360             bsi->modes[i] = x->partition_info->bmi[i].mode;
   1361             bsi->eobs[i] = x->e_mbd.eobs[i];
   1362         }
   1363     }
   1364 }
   1365 
   1366 static
   1367 void vp8_cal_step_param(int sr, int *sp)
   1368 {
   1369     int step = 0;
   1370 
   1371     if (sr > MAX_FIRST_STEP) sr = MAX_FIRST_STEP;
   1372     else if (sr < 1) sr = 1;
   1373 
   1374     while (sr>>=1)
   1375         step++;
   1376 
   1377     *sp = MAX_MVSEARCH_STEPS - 1 - step;
   1378 }
   1379 
   1380 static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
   1381                                            int_mv *best_ref_mv, int best_rd,
   1382                                            int *mdcounts, int *returntotrate,
   1383                                            int *returnyrate, int *returndistortion,
   1384                                            int mvthresh)
   1385 {
   1386     int i;
   1387     BEST_SEG_INFO bsi;
   1388 
   1389     vpx_memset(&bsi, 0, sizeof(bsi));
   1390 
   1391     bsi.segment_rd = best_rd;
   1392     bsi.ref_mv = best_ref_mv;
   1393     bsi.mvp.as_int = best_ref_mv->as_int;
   1394     bsi.mvthresh = mvthresh;
   1395     bsi.mdcounts = mdcounts;
   1396 
   1397     for(i = 0; i < 16; i++)
   1398     {
   1399         bsi.modes[i] = ZERO4X4;
   1400     }
   1401 
   1402     if(cpi->compressor_speed == 0)
   1403     {
   1404         /* for now, we will keep the original segmentation order
   1405            when in best quality mode */
   1406         rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
   1407         rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
   1408         rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
   1409         rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
   1410     }
   1411     else
   1412     {
   1413         int sr;
   1414 
   1415         rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
   1416 
   1417         if (bsi.segment_rd < best_rd)
   1418         {
   1419             int col_min = ((best_ref_mv->as_mv.col+7)>>3) - MAX_FULL_PEL_VAL;
   1420             int row_min = ((best_ref_mv->as_mv.row+7)>>3) - MAX_FULL_PEL_VAL;
   1421             int col_max = (best_ref_mv->as_mv.col>>3) + MAX_FULL_PEL_VAL;
   1422             int row_max = (best_ref_mv->as_mv.row>>3) + MAX_FULL_PEL_VAL;
   1423 
   1424             int tmp_col_min = x->mv_col_min;
   1425             int tmp_col_max = x->mv_col_max;
   1426             int tmp_row_min = x->mv_row_min;
   1427             int tmp_row_max = x->mv_row_max;
   1428 
   1429             /* Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. */
   1430             if (x->mv_col_min < col_min )
   1431                 x->mv_col_min = col_min;
   1432             if (x->mv_col_max > col_max )
   1433                 x->mv_col_max = col_max;
   1434             if (x->mv_row_min < row_min )
   1435                 x->mv_row_min = row_min;
   1436             if (x->mv_row_max > row_max )
   1437                 x->mv_row_max = row_max;
   1438 
   1439             /* Get 8x8 result */
   1440             bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int;
   1441             bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int;
   1442             bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int;
   1443             bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int;
   1444 
   1445             /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range according to the closeness of 2 MV. */
   1446             /* block 8X16 */
   1447             {
   1448                 sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row))>>3, (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col))>>3);
   1449                 vp8_cal_step_param(sr, &bsi.sv_istep[0]);
   1450 
   1451                 sr = MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row))>>3, (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col))>>3);
   1452                 vp8_cal_step_param(sr, &bsi.sv_istep[1]);
   1453 
   1454                 rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
   1455             }
   1456 
   1457             /* block 16X8 */
   1458             {
   1459                 sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row))>>3, (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col))>>3);
   1460                 vp8_cal_step_param(sr, &bsi.sv_istep[0]);
   1461 
   1462                 sr = MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row))>>3, (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col))>>3);
   1463                 vp8_cal_step_param(sr, &bsi.sv_istep[1]);
   1464 
   1465                 rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
   1466             }
   1467 
   1468             /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
   1469             /* Not skip 4x4 if speed=0 (good quality) */
   1470             if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8)  /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
   1471             {
   1472                 bsi.mvp.as_int = bsi.sv_mvp[0].as_int;
   1473                 rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
   1474             }
   1475 
   1476             /* restore UMV window */
   1477             x->mv_col_min = tmp_col_min;
   1478             x->mv_col_max = tmp_col_max;
   1479             x->mv_row_min = tmp_row_min;
   1480             x->mv_row_max = tmp_row_max;
   1481         }
   1482     }
   1483 
   1484     /* set it to the best */
   1485     for (i = 0; i < 16; i++)
   1486     {
   1487         BLOCKD *bd = &x->e_mbd.block[i];
   1488 
   1489         bd->bmi.mv.as_int = bsi.mvs[i].as_int;
   1490         *bd->eob = bsi.eobs[i];
   1491     }
   1492 
   1493     *returntotrate = bsi.r;
   1494     *returndistortion = bsi.d;
   1495     *returnyrate = bsi.segment_yrate;
   1496 
   1497     /* save partitions */
   1498     x->e_mbd.mode_info_context->mbmi.partitioning = bsi.segment_num;
   1499     x->partition_info->count = vp8_mbsplit_count[bsi.segment_num];
   1500 
   1501     for (i = 0; i < x->partition_info->count; i++)
   1502     {
   1503         int j;
   1504 
   1505         j = vp8_mbsplit_offset[bsi.segment_num][i];
   1506 
   1507         x->partition_info->bmi[i].mode = bsi.modes[j];
   1508         x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv;
   1509     }
   1510     /*
   1511      * used to set x->e_mbd.mode_info_context->mbmi.mv.as_int
   1512      */
   1513     x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int;
   1514 
   1515     return bsi.segment_rd;
   1516 }
   1517 
   1518 /* The improved MV prediction */
   1519 void vp8_mv_pred
   1520 (
   1521     VP8_COMP *cpi,
   1522     MACROBLOCKD *xd,
   1523     const MODE_INFO *here,
   1524     int_mv *mvp,
   1525     int refframe,
   1526     int *ref_frame_sign_bias,
   1527     int *sr,
   1528     int near_sadidx[]
   1529 )
   1530 {
   1531     const MODE_INFO *above = here - xd->mode_info_stride;
   1532     const MODE_INFO *left = here - 1;
   1533     const MODE_INFO *aboveleft = above - 1;
   1534     int_mv           near_mvs[8];
   1535     int              near_ref[8];
   1536     int_mv           mv;
   1537     int              vcnt=0;
   1538     int              find=0;
   1539     int              mb_offset;
   1540 
   1541     int              mvx[8];
   1542     int              mvy[8];
   1543     int              i;
   1544 
   1545     mv.as_int = 0;
   1546 
   1547     if(here->mbmi.ref_frame != INTRA_FRAME)
   1548     {
   1549         near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = near_mvs[7].as_int = 0;
   1550         near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = near_ref[7] = 0;
   1551 
   1552         /* read in 3 nearby block's MVs from current frame as prediction
   1553          * candidates.
   1554          */
   1555         if (above->mbmi.ref_frame != INTRA_FRAME)
   1556         {
   1557             near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
   1558             mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1559             near_ref[vcnt] =  above->mbmi.ref_frame;
   1560         }
   1561         vcnt++;
   1562         if (left->mbmi.ref_frame != INTRA_FRAME)
   1563         {
   1564             near_mvs[vcnt].as_int = left->mbmi.mv.as_int;
   1565             mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1566             near_ref[vcnt] =  left->mbmi.ref_frame;
   1567         }
   1568         vcnt++;
   1569         if (aboveleft->mbmi.ref_frame != INTRA_FRAME)
   1570         {
   1571             near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int;
   1572             mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1573             near_ref[vcnt] =  aboveleft->mbmi.ref_frame;
   1574         }
   1575         vcnt++;
   1576 
   1577         /* read in 5 nearby block's MVs from last frame. */
   1578         if(cpi->common.last_frame_type != KEY_FRAME)
   1579         {
   1580             mb_offset = (-xd->mb_to_top_edge/128 + 1) * (xd->mode_info_stride +1) + (-xd->mb_to_left_edge/128 +1) ;
   1581 
   1582             /* current in last frame */
   1583             if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME)
   1584             {
   1585                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
   1586                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1587                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset];
   1588             }
   1589             vcnt++;
   1590 
   1591             /* above in last frame */
   1592             if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1] != INTRA_FRAME)
   1593             {
   1594                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride-1].as_int;
   1595                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride-1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1596                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1];
   1597             }
   1598             vcnt++;
   1599 
   1600             /* left in last frame */
   1601             if (cpi->lf_ref_frame[mb_offset-1] != INTRA_FRAME)
   1602             {
   1603                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset -1].as_int;
   1604                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset -1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1605                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - 1];
   1606             }
   1607             vcnt++;
   1608 
   1609             /* right in last frame */
   1610             if (cpi->lf_ref_frame[mb_offset +1] != INTRA_FRAME)
   1611             {
   1612                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset +1].as_int;
   1613                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1614                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset +1];
   1615             }
   1616             vcnt++;
   1617 
   1618             /* below in last frame */
   1619             if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1] != INTRA_FRAME)
   1620             {
   1621                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + xd->mode_info_stride +1].as_int;
   1622                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1623                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1];
   1624             }
   1625             vcnt++;
   1626         }
   1627 
   1628         for(i=0; i< vcnt; i++)
   1629         {
   1630             if(near_ref[near_sadidx[i]] != INTRA_FRAME)
   1631             {
   1632                 if(here->mbmi.ref_frame == near_ref[near_sadidx[i]])
   1633                 {
   1634                     mv.as_int = near_mvs[near_sadidx[i]].as_int;
   1635                     find = 1;
   1636                     if (i < 3)
   1637                         *sr = 3;
   1638                     else
   1639                         *sr = 2;
   1640                     break;
   1641                 }
   1642             }
   1643         }
   1644 
   1645         if(!find)
   1646         {
   1647             for(i=0; i<vcnt; i++)
   1648             {
   1649                 mvx[i] = near_mvs[i].as_mv.row;
   1650                 mvy[i] = near_mvs[i].as_mv.col;
   1651             }
   1652 
   1653             insertsortmv(mvx, vcnt);
   1654             insertsortmv(mvy, vcnt);
   1655             mv.as_mv.row = mvx[vcnt/2];
   1656             mv.as_mv.col = mvy[vcnt/2];
   1657 
   1658             find = 1;
   1659             /* sr is set to 0 to allow calling function to decide the search
   1660              * range.
   1661              */
   1662             *sr = 0;
   1663         }
   1664     }
   1665 
   1666     /* Set up return values */
   1667     mvp->as_int = mv.as_int;
   1668     vp8_clamp_mv2(mvp, xd);
   1669 }
   1670 
   1671 void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[])
   1672 {
   1673     /* near_sad indexes:
   1674      *   0-cf above, 1-cf left, 2-cf aboveleft,
   1675      *   3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
   1676      */
   1677     int near_sad[8] = {0};
   1678     BLOCK *b = &x->block[0];
   1679     unsigned char *src_y_ptr = *(b->base_src);
   1680 
   1681     /* calculate sad for current frame 3 nearby MBs. */
   1682     if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0)
   1683     {
   1684         near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
   1685     }else if(xd->mb_to_top_edge==0)
   1686     {   /* only has left MB for sad calculation. */
   1687         near_sad[0] = near_sad[2] = INT_MAX;
   1688         near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX);
   1689     }else if(xd->mb_to_left_edge ==0)
   1690     {   /* only has left MB for sad calculation. */
   1691         near_sad[1] = near_sad[2] = INT_MAX;
   1692         near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX);
   1693     }else
   1694     {
   1695         near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX);
   1696         near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX);
   1697         near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, UINT_MAX);
   1698     }
   1699 
   1700     if(cpi->common.last_frame_type != KEY_FRAME)
   1701     {
   1702         /* calculate sad for last frame 5 nearby MBs. */
   1703         unsigned char *pre_y_buffer = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
   1704         int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
   1705 
   1706         if(xd->mb_to_top_edge==0) near_sad[4] = INT_MAX;
   1707         if(xd->mb_to_left_edge ==0) near_sad[5] = INT_MAX;
   1708         if(xd->mb_to_right_edge ==0) near_sad[6] = INT_MAX;
   1709         if(xd->mb_to_bottom_edge==0) near_sad[7] = INT_MAX;
   1710 
   1711         if(near_sad[4] != INT_MAX)
   1712             near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, UINT_MAX);
   1713         if(near_sad[5] != INT_MAX)
   1714             near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride, UINT_MAX);
   1715         near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride, UINT_MAX);
   1716         if(near_sad[6] != INT_MAX)
   1717             near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride, UINT_MAX);
   1718         if(near_sad[7] != INT_MAX)
   1719             near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, UINT_MAX);
   1720     }
   1721 
   1722     if(cpi->common.last_frame_type != KEY_FRAME)
   1723     {
   1724         insertsortsad(near_sad, near_sadidx, 8);
   1725     }else
   1726     {
   1727         insertsortsad(near_sad, near_sadidx, 3);
   1728     }
   1729 }
   1730 
   1731 static void rd_update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv)
   1732 {
   1733     if (x->e_mbd.mode_info_context->mbmi.mode == SPLITMV)
   1734     {
   1735         int i;
   1736 
   1737         for (i = 0; i < x->partition_info->count; i++)
   1738         {
   1739             if (x->partition_info->bmi[i].mode == NEW4X4)
   1740             {
   1741                 x->MVcount[0][mv_max+((x->partition_info->bmi[i].mv.as_mv.row
   1742                                           - best_ref_mv->as_mv.row) >> 1)]++;
   1743                 x->MVcount[1][mv_max+((x->partition_info->bmi[i].mv.as_mv.col
   1744                                           - best_ref_mv->as_mv.col) >> 1)]++;
   1745             }
   1746         }
   1747     }
   1748     else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV)
   1749     {
   1750         x->MVcount[0][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row
   1751                                           - best_ref_mv->as_mv.row) >> 1)]++;
   1752         x->MVcount[1][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col
   1753                                           - best_ref_mv->as_mv.col) >> 1)]++;
   1754     }
   1755 }
   1756 
   1757 static int evaluate_inter_mode_rd(int mdcounts[4],
   1758                                   RATE_DISTORTION* rd,
   1759                                   int* disable_skip,
   1760                                   VP8_COMP *cpi, MACROBLOCK *x)
   1761 {
   1762     MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
   1763     BLOCK *b = &x->block[0];
   1764     MACROBLOCKD *xd = &x->e_mbd;
   1765     int distortion;
   1766     vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16);
   1767 
   1768     if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
   1769         x->skip = 1;
   1770     }
   1771     else if (x->encode_breakout)
   1772     {
   1773         unsigned int sse;
   1774         unsigned int var;
   1775         unsigned int threshold = (xd->block[0].dequant[1]
   1776                     * xd->block[0].dequant[1] >>4);
   1777 
   1778         if(threshold < x->encode_breakout)
   1779             threshold = x->encode_breakout;
   1780 
   1781         var = vp8_variance16x16
   1782                 (*(b->base_src), b->src_stride,
   1783                 x->e_mbd.predictor, 16, &sse);
   1784 
   1785         if (sse < threshold)
   1786         {
   1787              unsigned int q2dc = xd->block[24].dequant[0];
   1788             /* If theres is no codeable 2nd order dc
   1789                or a very small uniform pixel change change */
   1790             if ((sse - var < q2dc * q2dc >>4) ||
   1791                 (sse /2 > var && sse-var < 64))
   1792             {
   1793                 /* Check u and v to make sure skip is ok */
   1794                 unsigned int sse2 = VP8_UVSSE(x);
   1795                 if (sse2 * 2 < threshold)
   1796                 {
   1797                     x->skip = 1;
   1798                     rd->distortion2 = sse + sse2;
   1799                     rd->rate2 = 500;
   1800 
   1801                     /* for best_yrd calculation */
   1802                     rd->rate_uv = 0;
   1803                     rd->distortion_uv = sse2;
   1804 
   1805                     *disable_skip = 1;
   1806                     return RDCOST(x->rdmult, x->rddiv, rd->rate2,
   1807                                   rd->distortion2);
   1808                 }
   1809             }
   1810         }
   1811     }
   1812 
   1813 
   1814     /* Add in the Mv/mode cost */
   1815     rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
   1816 
   1817     /* Y cost and distortion */
   1818     macro_block_yrd(x, &rd->rate_y, &distortion);
   1819     rd->rate2 += rd->rate_y;
   1820     rd->distortion2 += distortion;
   1821 
   1822     /* UV cost and distortion */
   1823     rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv,
   1824                      cpi->common.full_pixel);
   1825     rd->rate2 += rd->rate_uv;
   1826     rd->distortion2 += rd->distortion_uv;
   1827     return INT_MAX;
   1828 }
   1829 
   1830 static int calculate_final_rd_costs(int this_rd,
   1831                                     RATE_DISTORTION* rd,
   1832                                     int* other_cost,
   1833                                     int disable_skip,
   1834                                     int uv_intra_tteob,
   1835                                     int intra_rd_penalty,
   1836                                     VP8_COMP *cpi, MACROBLOCK *x)
   1837 {
   1838     MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
   1839 
   1840     /* Where skip is allowable add in the default per mb cost for the no
   1841      * skip case. where we then decide to skip we have to delete this and
   1842      * replace it with the cost of signalling a skip
   1843      */
   1844     if (cpi->common.mb_no_coeff_skip)
   1845     {
   1846         *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0);
   1847         rd->rate2 += *other_cost;
   1848     }
   1849 
   1850     /* Estimate the reference frame signaling cost and add it
   1851      * to the rolling cost variable.
   1852      */
   1853     rd->rate2 +=
   1854         x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
   1855 
   1856     if (!disable_skip)
   1857     {
   1858         /* Test for the condition where skip block will be activated
   1859          * because there are no non zero coefficients and make any
   1860          * necessary adjustment for rate
   1861          */
   1862         if (cpi->common.mb_no_coeff_skip)
   1863         {
   1864             int i;
   1865             int tteob;
   1866             int has_y2_block = (this_mode!=SPLITMV && this_mode!=B_PRED);
   1867 
   1868             tteob = 0;
   1869             if(has_y2_block)
   1870                 tteob += x->e_mbd.eobs[24];
   1871 
   1872             for (i = 0; i < 16; i++)
   1873                 tteob += (x->e_mbd.eobs[i] > has_y2_block);
   1874 
   1875             if (x->e_mbd.mode_info_context->mbmi.ref_frame)
   1876             {
   1877                 for (i = 16; i < 24; i++)
   1878                     tteob += x->e_mbd.eobs[i];
   1879             }
   1880             else
   1881                 tteob += uv_intra_tteob;
   1882 
   1883             if (tteob == 0)
   1884             {
   1885                 rd->rate2 -= (rd->rate_y + rd->rate_uv);
   1886                 /* for best_yrd calculation */
   1887                 rd->rate_uv = 0;
   1888 
   1889                 /* Back out no skip flag costing and add in skip flag costing */
   1890                 if (cpi->prob_skip_false)
   1891                 {
   1892                     int prob_skip_cost;
   1893 
   1894                     prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1);
   1895                     prob_skip_cost -= vp8_cost_bit(cpi->prob_skip_false, 0);
   1896                     rd->rate2 += prob_skip_cost;
   1897                     *other_cost += prob_skip_cost;
   1898                 }
   1899             }
   1900         }
   1901         /* Calculate the final RD estimate for this mode */
   1902         this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
   1903         if (this_rd < INT_MAX && x->e_mbd.mode_info_context->mbmi.ref_frame
   1904                                  == INTRA_FRAME)
   1905             this_rd += intra_rd_penalty;
   1906     }
   1907     return this_rd;
   1908 }
   1909 
   1910 static void update_best_mode(BEST_MODE* best_mode, int this_rd,
   1911                              RATE_DISTORTION* rd, int other_cost, MACROBLOCK *x)
   1912 {
   1913     MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
   1914 
   1915     other_cost +=
   1916     x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
   1917 
   1918     /* Calculate the final y RD estimate for this mode */
   1919     best_mode->yrd = RDCOST(x->rdmult, x->rddiv, (rd->rate2-rd->rate_uv-other_cost),
   1920                       (rd->distortion2-rd->distortion_uv));
   1921 
   1922     best_mode->rd = this_rd;
   1923     vpx_memcpy(&best_mode->mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
   1924     vpx_memcpy(&best_mode->partition, x->partition_info, sizeof(PARTITION_INFO));
   1925 
   1926     if ((this_mode == B_PRED) || (this_mode == SPLITMV))
   1927     {
   1928         int i;
   1929         for (i = 0; i < 16; i++)
   1930         {
   1931             best_mode->bmodes[i] = x->e_mbd.block[i].bmi;
   1932         }
   1933     }
   1934 }
   1935 
   1936 void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
   1937                             int recon_uvoffset, int *returnrate,
   1938                             int *returndistortion, int *returnintra,
   1939                             int mb_row, int mb_col)
   1940 {
   1941     BLOCK *b = &x->block[0];
   1942     BLOCKD *d = &x->e_mbd.block[0];
   1943     MACROBLOCKD *xd = &x->e_mbd;
   1944     int_mv best_ref_mv_sb[2];
   1945     int_mv mode_mv_sb[2][MB_MODE_COUNT];
   1946     int_mv best_ref_mv;
   1947     int_mv *mode_mv;
   1948     MB_PREDICTION_MODE this_mode;
   1949     int num00;
   1950     int best_mode_index = 0;
   1951     BEST_MODE best_mode;
   1952 
   1953     int i;
   1954     int mode_index;
   1955     int mdcounts[4];
   1956     int rate;
   1957     RATE_DISTORTION rd;
   1958     int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
   1959     int uv_intra_tteob = 0;
   1960     int uv_intra_done = 0;
   1961 
   1962     MB_PREDICTION_MODE uv_intra_mode = 0;
   1963     int_mv mvp;
   1964     int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
   1965     int saddone=0;
   1966     /* search range got from mv_pred(). It uses step_param levels. (0-7) */
   1967     int sr=0;
   1968 
   1969     unsigned char *plane[4][3];
   1970     int ref_frame_map[4];
   1971     int sign_bias = 0;
   1972 
   1973     int intra_rd_penalty =  10* vp8_dc_quant(cpi->common.base_qindex,
   1974                                              cpi->common.y1dc_delta_q);
   1975 
   1976 #if CONFIG_TEMPORAL_DENOISING
   1977     unsigned int zero_mv_sse = UINT_MAX, best_sse = UINT_MAX,
   1978             best_rd_sse = UINT_MAX;
   1979 #endif
   1980 
   1981     mode_mv = mode_mv_sb[sign_bias];
   1982     best_ref_mv.as_int = 0;
   1983     best_mode.rd = INT_MAX;
   1984     best_mode.yrd = INT_MAX;
   1985     best_mode.intra_rd = INT_MAX;
   1986     vpx_memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
   1987     vpx_memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode));
   1988     vpx_memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes));
   1989 
   1990     /* Setup search priorities */
   1991     get_reference_search_order(cpi, ref_frame_map);
   1992 
   1993     /* Check to see if there is at least 1 valid reference frame that we need
   1994      * to calculate near_mvs.
   1995      */
   1996     if (ref_frame_map[1] > 0)
   1997     {
   1998         sign_bias = vp8_find_near_mvs_bias(&x->e_mbd,
   1999                                            x->e_mbd.mode_info_context,
   2000                                            mode_mv_sb,
   2001                                            best_ref_mv_sb,
   2002                                            mdcounts,
   2003                                            ref_frame_map[1],
   2004                                            cpi->common.ref_frame_sign_bias);
   2005 
   2006         mode_mv = mode_mv_sb[sign_bias];
   2007         best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
   2008     }
   2009 
   2010     get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
   2011 
   2012     *returnintra = INT_MAX;
   2013     /* Count of the number of MBs tested so far this frame */
   2014     x->mbs_tested_so_far++;
   2015 
   2016     x->skip = 0;
   2017 
   2018     for (mode_index = 0; mode_index < MAX_MODES; mode_index++)
   2019     {
   2020         int this_rd = INT_MAX;
   2021         int disable_skip = 0;
   2022         int other_cost = 0;
   2023         int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];
   2024 
   2025         /* Test best rd so far against threshold for trying this mode. */
   2026         if (best_mode.rd <= x->rd_threshes[mode_index])
   2027             continue;
   2028 
   2029         if (this_ref_frame < 0)
   2030             continue;
   2031 
   2032         /* These variables hold are rolling total cost and distortion for
   2033          * this mode
   2034          */
   2035         rd.rate2 = 0;
   2036         rd.distortion2 = 0;
   2037 
   2038         this_mode = vp8_mode_order[mode_index];
   2039 
   2040         x->e_mbd.mode_info_context->mbmi.mode = this_mode;
   2041         x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
   2042 
   2043         /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
   2044          * unless ARNR filtering is enabled in which case we want
   2045          * an unfiltered alternative
   2046          */
   2047         if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
   2048         {
   2049             if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
   2050                 continue;
   2051         }
   2052 
   2053         /* everything but intra */
   2054         if (x->e_mbd.mode_info_context->mbmi.ref_frame)
   2055         {
   2056             x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
   2057             x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
   2058             x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
   2059 
   2060             if (sign_bias != cpi->common.ref_frame_sign_bias[this_ref_frame])
   2061             {
   2062                 sign_bias = cpi->common.ref_frame_sign_bias[this_ref_frame];
   2063                 mode_mv = mode_mv_sb[sign_bias];
   2064                 best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
   2065             }
   2066         }
   2067 
   2068         /* Check to see if the testing frequency for this mode is at its
   2069          * max If so then prevent it from being tested and increase the
   2070          * threshold for its testing
   2071          */
   2072         if (x->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1))
   2073         {
   2074             if (x->mbs_tested_so_far  <= cpi->mode_check_freq[mode_index] * x->mode_test_hit_counts[mode_index])
   2075             {
   2076                 /* Increase the threshold for coding this mode to make it
   2077                  * less likely to be chosen
   2078                  */
   2079                 x->rd_thresh_mult[mode_index] += 4;
   2080 
   2081                 if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
   2082                     x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
   2083 
   2084                 x->rd_threshes[mode_index] =
   2085                     (cpi->rd_baseline_thresh[mode_index] >> 7) *
   2086                     x->rd_thresh_mult[mode_index];
   2087 
   2088                 continue;
   2089             }
   2090         }
   2091 
   2092         /* We have now reached the point where we are going to test the
   2093          * current mode so increment the counter for the number of times
   2094          * it has been tested
   2095          */
   2096         x->mode_test_hit_counts[mode_index] ++;
   2097 
   2098         /* Experimental code. Special case for gf and arf zeromv modes.
   2099          * Increase zbin size to supress noise
   2100          */
   2101         if (x->zbin_mode_boost_enabled)
   2102         {
   2103             if ( this_ref_frame == INTRA_FRAME )
   2104                 x->zbin_mode_boost = 0;
   2105             else
   2106             {
   2107                 if (vp8_mode_order[mode_index] == ZEROMV)
   2108                 {
   2109                     if (this_ref_frame != LAST_FRAME)
   2110                         x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
   2111                     else
   2112                         x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
   2113                 }
   2114                 else if (vp8_mode_order[mode_index] == SPLITMV)
   2115                     x->zbin_mode_boost = 0;
   2116                 else
   2117                     x->zbin_mode_boost = MV_ZBIN_BOOST;
   2118             }
   2119 
   2120             vp8_update_zbin_extra(cpi, x);
   2121         }
   2122 
   2123         if(!uv_intra_done && this_ref_frame == INTRA_FRAME)
   2124         {
   2125             rd_pick_intra_mbuv_mode(x, &uv_intra_rate,
   2126                                     &uv_intra_rate_tokenonly,
   2127                                     &uv_intra_distortion);
   2128             uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
   2129 
   2130             /*
   2131              * Total of the eobs is used later to further adjust rate2. Since uv
   2132              * block's intra eobs will be overwritten when we check inter modes,
   2133              * we need to save uv_intra_tteob here.
   2134              */
   2135             for (i = 16; i < 24; i++)
   2136                 uv_intra_tteob += x->e_mbd.eobs[i];
   2137 
   2138             uv_intra_done = 1;
   2139         }
   2140 
   2141         switch (this_mode)
   2142         {
   2143         case B_PRED:
   2144         {
   2145             int tmp_rd;
   2146 
   2147             /* Note the rate value returned here includes the cost of
   2148              * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED]
   2149              */
   2150             int distortion;
   2151             tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion, best_mode.yrd);
   2152             rd.rate2 += rate;
   2153             rd.distortion2 += distortion;
   2154 
   2155             if(tmp_rd < best_mode.yrd)
   2156             {
   2157                 rd.rate2 += uv_intra_rate;
   2158                 rd.rate_uv = uv_intra_rate_tokenonly;
   2159                 rd.distortion2 += uv_intra_distortion;
   2160                 rd.distortion_uv = uv_intra_distortion;
   2161             }
   2162             else
   2163             {
   2164                 this_rd = INT_MAX;
   2165                 disable_skip = 1;
   2166             }
   2167         }
   2168         break;
   2169 
   2170         case SPLITMV:
   2171         {
   2172             int tmp_rd;
   2173             int this_rd_thresh;
   2174             int distortion;
   2175 
   2176             this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) ?
   2177                 x->rd_threshes[THR_NEW1] : x->rd_threshes[THR_NEW3];
   2178             this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) ?
   2179                 x->rd_threshes[THR_NEW2] : this_rd_thresh;
   2180 
   2181             tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
   2182                                                      best_mode.yrd, mdcounts,
   2183                                                      &rate, &rd.rate_y, &distortion, this_rd_thresh) ;
   2184 
   2185             rd.rate2 += rate;
   2186             rd.distortion2 += distortion;
   2187 
   2188             /* If even the 'Y' rd value of split is higher than best so far
   2189              * then dont bother looking at UV
   2190              */
   2191             if (tmp_rd < best_mode.yrd)
   2192             {
   2193                 /* Now work out UV cost and add it in */
   2194                 rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv, cpi->common.full_pixel);
   2195                 rd.rate2 += rd.rate_uv;
   2196                 rd.distortion2 += rd.distortion_uv;
   2197             }
   2198             else
   2199             {
   2200                 this_rd = INT_MAX;
   2201                 disable_skip = 1;
   2202             }
   2203         }
   2204         break;
   2205         case DC_PRED:
   2206         case V_PRED:
   2207         case H_PRED:
   2208         case TM_PRED:
   2209         {
   2210             int distortion;
   2211             x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
   2212 
   2213             vp8_build_intra_predictors_mby_s(xd,
   2214                                              xd->dst.y_buffer - xd->dst.y_stride,
   2215                                              xd->dst.y_buffer - 1,
   2216                                              xd->dst.y_stride,
   2217                                              xd->predictor,
   2218                                              16);
   2219             macro_block_yrd(x, &rd.rate_y, &distortion) ;
   2220             rd.rate2 += rd.rate_y;
   2221             rd.distortion2 += distortion;
   2222             rd.rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
   2223             rd.rate2 += uv_intra_rate;
   2224             rd.rate_uv = uv_intra_rate_tokenonly;
   2225             rd.distortion2 += uv_intra_distortion;
   2226             rd.distortion_uv = uv_intra_distortion;
   2227         }
   2228         break;
   2229 
   2230         case NEWMV:
   2231         {
   2232             int thissme;
   2233             int bestsme = INT_MAX;
   2234             int step_param = cpi->sf.first_step;
   2235             int further_steps;
   2236             int n;
   2237             int do_refine=1;   /* If last step (1-away) of n-step search doesn't pick the center point as the best match,
   2238                                   we will do a final 1-away diamond refining search  */
   2239 
   2240             int sadpb = x->sadperbit16;
   2241             int_mv mvp_full;
   2242 
   2243             int col_min = ((best_ref_mv.as_mv.col+7)>>3) - MAX_FULL_PEL_VAL;
   2244             int row_min = ((best_ref_mv.as_mv.row+7)>>3) - MAX_FULL_PEL_VAL;
   2245             int col_max = (best_ref_mv.as_mv.col>>3) + MAX_FULL_PEL_VAL;
   2246             int row_max = (best_ref_mv.as_mv.row>>3) + MAX_FULL_PEL_VAL;
   2247 
   2248             int tmp_col_min = x->mv_col_min;
   2249             int tmp_col_max = x->mv_col_max;
   2250             int tmp_row_min = x->mv_row_min;
   2251             int tmp_row_max = x->mv_row_max;
   2252 
   2253             if(!saddone)
   2254             {
   2255                 vp8_cal_sad(cpi,xd,x, recon_yoffset ,&near_sadidx[0] );
   2256                 saddone = 1;
   2257             }
   2258 
   2259             vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
   2260                         x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
   2261 
   2262             mvp_full.as_mv.col = mvp.as_mv.col>>3;
   2263             mvp_full.as_mv.row = mvp.as_mv.row>>3;
   2264 
   2265             /* Get intersection of UMV window and valid MV window to
   2266              * reduce # of checks in diamond search.
   2267              */
   2268             if (x->mv_col_min < col_min )
   2269                 x->mv_col_min = col_min;
   2270             if (x->mv_col_max > col_max )
   2271                 x->mv_col_max = col_max;
   2272             if (x->mv_row_min < row_min )
   2273                 x->mv_row_min = row_min;
   2274             if (x->mv_row_max > row_max )
   2275                 x->mv_row_max = row_max;
   2276 
   2277             /* adjust search range according to sr from mv prediction */
   2278             if(sr > step_param)
   2279                 step_param = sr;
   2280 
   2281             /* Initial step/diamond search */
   2282             {
   2283                 bestsme = cpi->diamond_search_sad(x, b, d, &mvp_full, &d->bmi.mv,
   2284                                         step_param, sadpb, &num00,
   2285                                         &cpi->fn_ptr[BLOCK_16X16],
   2286                                         x->mvcost, &best_ref_mv);
   2287                 mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
   2288 
   2289                 /* Further step/diamond searches as necessary */
   2290                 n = 0;
   2291                 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
   2292 
   2293                 n = num00;
   2294                 num00 = 0;
   2295 
   2296                 /* If there won't be more n-step search, check to see if refining search is needed. */
   2297                 if (n > further_steps)
   2298                     do_refine = 0;
   2299 
   2300                 while (n < further_steps)
   2301                 {
   2302                     n++;
   2303 
   2304                     if (num00)
   2305                         num00--;
   2306                     else
   2307                     {
   2308                         thissme = cpi->diamond_search_sad(x, b, d, &mvp_full,
   2309                                     &d->bmi.mv, step_param + n, sadpb, &num00,
   2310                                     &cpi->fn_ptr[BLOCK_16X16], x->mvcost,
   2311                                     &best_ref_mv);
   2312 
   2313                         /* check to see if refining search is needed. */
   2314                         if (num00 > (further_steps-n))
   2315                             do_refine = 0;
   2316 
   2317                         if (thissme < bestsme)
   2318                         {
   2319                             bestsme = thissme;
   2320                             mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
   2321                         }
   2322                         else
   2323                         {
   2324                             d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
   2325                         }
   2326                     }
   2327                 }
   2328             }
   2329 
   2330             /* final 1-away diamond refining search */
   2331             if (do_refine == 1)
   2332             {
   2333                 int search_range;
   2334 
   2335                 search_range = 8;
   2336 
   2337                 thissme = cpi->refining_search_sad(x, b, d, &d->bmi.mv, sadpb,
   2338                                        search_range, &cpi->fn_ptr[BLOCK_16X16],
   2339                                        x->mvcost, &best_ref_mv);
   2340 
   2341                 if (thissme < bestsme)
   2342                 {
   2343                     bestsme = thissme;
   2344                     mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
   2345                 }
   2346                 else
   2347                 {
   2348                     d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
   2349                 }
   2350             }
   2351 
   2352             x->mv_col_min = tmp_col_min;
   2353             x->mv_col_max = tmp_col_max;
   2354             x->mv_row_min = tmp_row_min;
   2355             x->mv_row_max = tmp_row_max;
   2356 
   2357             if (bestsme < INT_MAX)
   2358             {
   2359                 int dis; /* TODO: use dis in distortion calculation later. */
   2360                 unsigned int sse;
   2361                 cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv, &best_ref_mv,
   2362                                              x->errorperbit,
   2363                                              &cpi->fn_ptr[BLOCK_16X16],
   2364                                              x->mvcost, &dis, &sse);
   2365             }
   2366 
   2367             mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
   2368 
   2369             /* Add the new motion vector cost to our rolling cost variable */
   2370             rd.rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
   2371         }
   2372 
   2373         case NEARESTMV:
   2374         case NEARMV:
   2375             /* Clip "next_nearest" so that it does not extend to far out
   2376              * of image
   2377              */
   2378             vp8_clamp_mv2(&mode_mv[this_mode], xd);
   2379 
   2380             /* Do not bother proceeding if the vector (from newmv, nearest
   2381              * or near) is 0,0 as this should then be coded using the zeromv
   2382              * mode.
   2383              */
   2384             if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) && (mode_mv[this_mode].as_int == 0))
   2385                 continue;
   2386 
   2387         case ZEROMV:
   2388 
   2389             /* Trap vectors that reach beyond the UMV borders
   2390              * Note that ALL New MV, Nearest MV Near MV and Zero MV code
   2391              * drops through to this point because of the lack of break
   2392              * statements in the previous two cases.
   2393              */
   2394             if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
   2395                 ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max))
   2396                 continue;
   2397 
   2398             vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
   2399             this_rd = evaluate_inter_mode_rd(mdcounts, &rd,
   2400                                              &disable_skip, cpi, x);
   2401             break;
   2402 
   2403         default:
   2404             break;
   2405         }
   2406 
   2407         this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost,
   2408                                            disable_skip, uv_intra_tteob,
   2409                                            intra_rd_penalty, cpi, x);
   2410 
   2411         /* Keep record of best intra distortion */
   2412         if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
   2413             (this_rd < best_mode.intra_rd) )
   2414         {
   2415           best_mode.intra_rd = this_rd;
   2416             *returnintra = rd.distortion2 ;
   2417         }
   2418 #if CONFIG_TEMPORAL_DENOISING
   2419         if (cpi->oxcf.noise_sensitivity)
   2420         {
   2421             unsigned int sse;
   2422             vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&sse,
   2423                                    mode_mv[this_mode]);
   2424 
   2425             if (sse < best_rd_sse)
   2426                 best_rd_sse = sse;
   2427 
   2428             /* Store for later use by denoiser. */
   2429             if (this_mode == ZEROMV && sse < zero_mv_sse )
   2430             {
   2431                 zero_mv_sse = sse;
   2432                 x->best_zeromv_reference_frame =
   2433                         x->e_mbd.mode_info_context->mbmi.ref_frame;
   2434             }
   2435 
   2436             /* Store the best NEWMV in x for later use in the denoiser. */
   2437             if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
   2438                     sse < best_sse)
   2439             {
   2440                 best_sse = sse;
   2441                 vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&best_sse,
   2442                                        mode_mv[this_mode]);
   2443                 x->best_sse_inter_mode = NEWMV;
   2444                 x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
   2445                 x->need_to_clamp_best_mvs =
   2446                     x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
   2447                 x->best_reference_frame =
   2448                     x->e_mbd.mode_info_context->mbmi.ref_frame;
   2449             }
   2450         }
   2451 #endif
   2452 
   2453         /* Did this mode help.. i.i is it the new best mode */
   2454         if (this_rd < best_mode.rd || x->skip)
   2455         {
   2456             /* Note index of best mode so far */
   2457             best_mode_index = mode_index;
   2458             *returnrate = rd.rate2;
   2459             *returndistortion = rd.distortion2;
   2460             if (this_mode <= B_PRED)
   2461             {
   2462                 x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode;
   2463                 /* required for left and above block mv */
   2464                 x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
   2465             }
   2466             update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
   2467 
   2468 
   2469             /* Testing this mode gave rise to an improvement in best error
   2470              * score. Lower threshold a bit for next time
   2471              */
   2472             x->rd_thresh_mult[mode_index] =
   2473                 (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
   2474                     x->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
   2475         }
   2476 
   2477         /* If the mode did not help improve the best error case then raise
   2478          * the threshold for testing that mode next time around.
   2479          */
   2480         else
   2481         {
   2482             x->rd_thresh_mult[mode_index] += 4;
   2483 
   2484             if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
   2485                 x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
   2486         }
   2487         x->rd_threshes[mode_index] =
   2488             (cpi->rd_baseline_thresh[mode_index] >> 7) *
   2489                 x->rd_thresh_mult[mode_index];
   2490 
   2491         if (x->skip)
   2492             break;
   2493 
   2494     }
   2495 
   2496     /* Reduce the activation RD thresholds for the best choice mode */
   2497     if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2)))
   2498     {
   2499         int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 2);
   2500 
   2501         x->rd_thresh_mult[best_mode_index] =
   2502             (x->rd_thresh_mult[best_mode_index] >=
   2503                 (MIN_THRESHMULT + best_adjustment)) ?
   2504                     x->rd_thresh_mult[best_mode_index] - best_adjustment :
   2505                     MIN_THRESHMULT;
   2506         x->rd_threshes[best_mode_index] =
   2507             (cpi->rd_baseline_thresh[best_mode_index] >> 7) *
   2508                 x->rd_thresh_mult[best_mode_index];
   2509     }
   2510 
   2511 #if CONFIG_TEMPORAL_DENOISING
   2512     if (cpi->oxcf.noise_sensitivity)
   2513     {
   2514         int block_index = mb_row * cpi->common.mb_cols + mb_col;
   2515         if (x->best_sse_inter_mode == DC_PRED)
   2516         {
   2517             /* No best MV found. */
   2518             x->best_sse_inter_mode = best_mode.mbmode.mode;
   2519             x->best_sse_mv = best_mode.mbmode.mv;
   2520             x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
   2521             x->best_reference_frame = best_mode.mbmode.ref_frame;
   2522             best_sse = best_rd_sse;
   2523         }
   2524         vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
   2525                                 recon_yoffset, recon_uvoffset,
   2526                                 &cpi->common.lf_info, mb_row, mb_col,
   2527                                 block_index);
   2528 
   2529         /* Reevaluate ZEROMV after denoising. */
   2530         if (best_mode.mbmode.ref_frame == INTRA_FRAME &&
   2531             x->best_zeromv_reference_frame != INTRA_FRAME)
   2532         {
   2533             int this_rd = INT_MAX;
   2534             int disable_skip = 0;
   2535             int other_cost = 0;
   2536             int this_ref_frame = x->best_zeromv_reference_frame;
   2537             rd.rate2 = x->ref_frame_cost[this_ref_frame] +
   2538                     vp8_cost_mv_ref(ZEROMV, mdcounts);
   2539             rd.distortion2 = 0;
   2540 
   2541             /* set up the proper prediction buffers for the frame */
   2542             x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
   2543             x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
   2544             x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
   2545             x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
   2546 
   2547             x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
   2548             x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
   2549             x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
   2550 
   2551             this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
   2552             this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost,
   2553                                                disable_skip, uv_intra_tteob,
   2554                                                intra_rd_penalty, cpi, x);
   2555             if (this_rd < best_mode.rd || x->skip)
   2556             {
   2557                 /* Note index of best mode so far */
   2558                 best_mode_index = mode_index;
   2559                 *returnrate = rd.rate2;
   2560                 *returndistortion = rd.distortion2;
   2561                 update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
   2562             }
   2563         }
   2564 
   2565     }
   2566 #endif
   2567 
   2568     if (cpi->is_src_frame_alt_ref &&
   2569         (best_mode.mbmode.mode != ZEROMV || best_mode.mbmode.ref_frame != ALTREF_FRAME))
   2570     {
   2571         x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
   2572         x->e_mbd.mode_info_context->mbmi.ref_frame = ALTREF_FRAME;
   2573         x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
   2574         x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
   2575         x->e_mbd.mode_info_context->mbmi.mb_skip_coeff =
   2576                                         (cpi->common.mb_no_coeff_skip);
   2577         x->e_mbd.mode_info_context->mbmi.partitioning = 0;
   2578         return;
   2579     }
   2580 
   2581 
   2582     /* macroblock modes */
   2583     vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO));
   2584 
   2585     if (best_mode.mbmode.mode == B_PRED)
   2586     {
   2587         for (i = 0; i < 16; i++)
   2588             xd->mode_info_context->bmi[i].as_mode = best_mode.bmodes[i].as_mode;
   2589     }
   2590 
   2591     if (best_mode.mbmode.mode == SPLITMV)
   2592     {
   2593         for (i = 0; i < 16; i++)
   2594             xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int;
   2595 
   2596         vpx_memcpy(x->partition_info, &best_mode.partition, sizeof(PARTITION_INFO));
   2597 
   2598         x->e_mbd.mode_info_context->mbmi.mv.as_int =
   2599                                       x->partition_info->bmi[15].mv.as_int;
   2600     }
   2601 
   2602     if (sign_bias
   2603         != cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame])
   2604         best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int;
   2605 
   2606     rd_update_mvcount(x, &best_ref_mv);
   2607 }
   2608 
   2609 void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate_)
   2610 {
   2611     int error4x4, error16x16;
   2612     int rate4x4, rate16x16 = 0, rateuv;
   2613     int dist4x4, dist16x16, distuv;
   2614     int rate;
   2615     int rate4x4_tokenonly = 0;
   2616     int rate16x16_tokenonly = 0;
   2617     int rateuv_tokenonly = 0;
   2618 
   2619     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
   2620 
   2621     rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv);
   2622     rate = rateuv;
   2623 
   2624     error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly,
   2625                                             &dist16x16);
   2626 
   2627     error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly,
   2628                                          &dist4x4, error16x16);
   2629 
   2630     if (error4x4 < error16x16)
   2631     {
   2632         x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
   2633         rate += rate4x4;
   2634     }
   2635     else
   2636     {
   2637         rate += rate16x16;
   2638     }
   2639 
   2640     *rate_ = rate;
   2641 }
   2642