Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include <stdio.h>
     13 #include <math.h>
     14 #include <limits.h>
     15 #include <assert.h>
     16 #include "vpx_config.h"
     17 #include "vp8_rtcd.h"
     18 #include "vp8/common/pragmas.h"
     19 #include "tokenize.h"
     20 #include "treewriter.h"
     21 #include "onyx_int.h"
     22 #include "modecosts.h"
     23 #include "encodeintra.h"
     24 #include "pickinter.h"
     25 #include "vp8/common/entropymode.h"
     26 #include "vp8/common/reconinter.h"
     27 #include "vp8/common/reconintra4x4.h"
     28 #include "vp8/common/findnearmv.h"
     29 #include "vp8/common/quant_common.h"
     30 #include "encodemb.h"
     31 #include "quantize.h"
     32 #include "vp8/common/variance.h"
     33 #include "mcomp.h"
     34 #include "rdopt.h"
     35 #include "vpx_mem/vpx_mem.h"
     36 #include "vp8/common/systemdependent.h"
     37 #if CONFIG_TEMPORAL_DENOISING
     38 #include "denoising.h"
     39 #endif
     40 extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
     41 
     42 #define MAXF(a,b)            (((a) > (b)) ? (a) : (b))
     43 
     44 typedef struct rate_distortion_struct
     45 {
     46     int rate2;
     47     int rate_y;
     48     int rate_uv;
     49     int distortion2;
     50     int distortion_uv;
     51 } RATE_DISTORTION;
     52 
     53 typedef struct best_mode_struct
     54 {
     55   int yrd;
     56   int rd;
     57   int intra_rd;
     58   MB_MODE_INFO mbmode;
     59   union b_mode_info bmodes[16];
     60   PARTITION_INFO partition;
     61 } BEST_MODE;
     62 
     63 static const int auto_speed_thresh[17] =
     64 {
     65     1000,
     66     200,
     67     150,
     68     130,
     69     150,
     70     125,
     71     120,
     72     115,
     73     115,
     74     115,
     75     115,
     76     115,
     77     115,
     78     115,
     79     115,
     80     115,
     81     105
     82 };
     83 
     84 const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES] =
     85 {
     86     ZEROMV,
     87     DC_PRED,
     88 
     89     NEARESTMV,
     90     NEARMV,
     91 
     92     ZEROMV,
     93     NEARESTMV,
     94 
     95     ZEROMV,
     96     NEARESTMV,
     97 
     98     NEARMV,
     99     NEARMV,
    100 
    101     V_PRED,
    102     H_PRED,
    103     TM_PRED,
    104 
    105     NEWMV,
    106     NEWMV,
    107     NEWMV,
    108 
    109     SPLITMV,
    110     SPLITMV,
    111     SPLITMV,
    112 
    113     B_PRED,
    114 };
    115 
    116 /* This table determines the search order in reference frame priority order,
    117  * which may not necessarily match INTRA,LAST,GOLDEN,ARF
    118  */
    119 const int vp8_ref_frame_order[MAX_MODES] =
    120 {
    121     1,
    122     0,
    123 
    124     1,
    125     1,
    126 
    127     2,
    128     2,
    129 
    130     3,
    131     3,
    132 
    133     2,
    134     3,
    135 
    136     0,
    137     0,
    138     0,
    139 
    140     1,
    141     2,
    142     3,
    143 
    144     1,
    145     2,
    146     3,
    147 
    148     0,
    149 };
    150 
    151 static void fill_token_costs(
    152     int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
    153     const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]
    154 )
    155 {
    156     int i, j, k;
    157 
    158 
    159     for (i = 0; i < BLOCK_TYPES; i++)
    160         for (j = 0; j < COEF_BANDS; j++)
    161             for (k = 0; k < PREV_COEF_CONTEXTS; k++)
    162 
    163                 /* check for pt=0 and band > 1 if block type 0
    164                  * and 0 if blocktype 1
    165                  */
    166                 if (k == 0 && j > (i == 0))
    167                     vp8_cost_tokens2(c[i][j][k], p [i][j][k], vp8_coef_tree, 2);
    168                 else
    169                     vp8_cost_tokens(c[i][j][k], p [i][j][k], vp8_coef_tree);
    170 }
    171 
    172 static const int rd_iifactor[32] =
    173 {
    174     4, 4, 3, 2, 1, 0, 0, 0,
    175     0, 0, 0, 0, 0, 0, 0, 0,
    176     0, 0, 0, 0, 0, 0, 0, 0,
    177     0, 0, 0, 0, 0, 0, 0, 0
    178 };
    179 
    180 /* values are now correlated to quantizer */
    181 static const int sad_per_bit16lut[QINDEX_RANGE] =
    182 {
    183     2,  2,  2,  2,  2,  2,  2,  2,
    184     2,  2,  2,  2,  2,  2,  2,  2,
    185     3,  3,  3,  3,  3,  3,  3,  3,
    186     3,  3,  3,  3,  3,  3,  4,  4,
    187     4,  4,  4,  4,  4,  4,  4,  4,
    188     4,  4,  5,  5,  5,  5,  5,  5,
    189     5,  5,  5,  5,  5,  5,  6,  6,
    190     6,  6,  6,  6,  6,  6,  6,  6,
    191     6,  6,  7,  7,  7,  7,  7,  7,
    192     7,  7,  7,  7,  7,  7,  8,  8,
    193     8,  8,  8,  8,  8,  8,  8,  8,
    194     8,  8,  9,  9,  9,  9,  9,  9,
    195     9,  9,  9,  9,  9,  9,  10, 10,
    196     10, 10, 10, 10, 10, 10, 11, 11,
    197     11, 11, 11, 11, 12, 12, 12, 12,
    198     12, 12, 13, 13, 13, 13, 14, 14
    199 };
    200 static const int sad_per_bit4lut[QINDEX_RANGE] =
    201 {
    202     2,  2,  2,  2,  2,  2,  3,  3,
    203     3,  3,  3,  3,  3,  3,  3,  3,
    204     3,  3,  3,  3,  4,  4,  4,  4,
    205     4,  4,  4,  4,  4,  4,  5,  5,
    206     5,  5,  5,  5,  6,  6,  6,  6,
    207     6,  6,  6,  6,  6,  6,  6,  6,
    208     7,  7,  7,  7,  7,  7,  7,  7,
    209     7,  7,  7,  7,  7,  8,  8,  8,
    210     8,  8,  9,  9,  9,  9,  9,  9,
    211     10, 10, 10, 10, 10, 10, 10, 10,
    212     11, 11, 11, 11, 11, 11, 11, 11,
    213     12, 12, 12, 12, 12, 12, 12, 12,
    214     13, 13, 13, 13, 13, 13, 13, 14,
    215     14, 14, 14, 14, 15, 15, 15, 15,
    216     16, 16, 16, 16, 17, 17, 17, 18,
    217     18, 18, 19, 19, 19, 20, 20, 20,
    218 };
    219 
    220 void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex)
    221 {
    222     cpi->mb.sadperbit16 =  sad_per_bit16lut[QIndex];
    223     cpi->mb.sadperbit4  =  sad_per_bit4lut[QIndex];
    224 }
    225 
    226 void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue)
    227 {
    228     int q;
    229     int i;
    230     double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
    231     double rdconst = 2.80;
    232 
    233     vp8_clear_system_state();
    234 
    235     /* Further tests required to see if optimum is different
    236      * for key frames, golden frames and arf frames.
    237      */
    238     cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
    239 
    240     /* Extend rate multiplier along side quantizer zbin increases */
    241     if (cpi->mb.zbin_over_quant  > 0)
    242     {
    243         double oq_factor;
    244         double modq;
    245 
    246         /* Experimental code using the same basic equation as used for Q above
    247          * The units of cpi->mb.zbin_over_quant are 1/128 of Q bin size
    248          */
    249         oq_factor = 1.0 + ((double)0.0015625 * cpi->mb.zbin_over_quant);
    250         modq = (int)((double)capped_q * oq_factor);
    251         cpi->RDMULT = (int)(rdconst * (modq * modq));
    252     }
    253 
    254     if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME))
    255     {
    256         if (cpi->twopass.next_iiratio > 31)
    257             cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    258         else
    259             cpi->RDMULT +=
    260                 (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
    261     }
    262 
    263     cpi->mb.errorperbit = (cpi->RDMULT / 110);
    264     cpi->mb.errorperbit += (cpi->mb.errorperbit==0);
    265 
    266     vp8_set_speed_features(cpi);
    267 
    268     for (i = 0; i < MAX_MODES; i++)
    269     {
    270         x->mode_test_hit_counts[i] = 0;
    271     }
    272 
    273     q = (int)pow(Qvalue, 1.25);
    274 
    275     if (q < 8)
    276         q = 8;
    277 
    278     if (cpi->RDMULT > 1000)
    279     {
    280         cpi->RDDIV = 1;
    281         cpi->RDMULT /= 100;
    282 
    283         for (i = 0; i < MAX_MODES; i++)
    284         {
    285             if (cpi->sf.thresh_mult[i] < INT_MAX)
    286             {
    287                 x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
    288             }
    289             else
    290             {
    291                 x->rd_threshes[i] = INT_MAX;
    292             }
    293 
    294             cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
    295         }
    296     }
    297     else
    298     {
    299         cpi->RDDIV = 100;
    300 
    301         for (i = 0; i < MAX_MODES; i++)
    302         {
    303             if (cpi->sf.thresh_mult[i] < (INT_MAX / q))
    304             {
    305                 x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
    306             }
    307             else
    308             {
    309                 x->rd_threshes[i] = INT_MAX;
    310             }
    311 
    312             cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
    313         }
    314     }
    315 
    316     {
    317       /* build token cost array for the type of frame we have now */
    318       FRAME_CONTEXT *l = &cpi->lfc_n;
    319 
    320       if(cpi->common.refresh_alt_ref_frame)
    321           l = &cpi->lfc_a;
    322       else if(cpi->common.refresh_golden_frame)
    323           l = &cpi->lfc_g;
    324 
    325       fill_token_costs(
    326           cpi->mb.token_costs,
    327           (const vp8_prob( *)[8][3][11]) l->coef_probs
    328       );
    329       /*
    330       fill_token_costs(
    331           cpi->mb.token_costs,
    332           (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs);
    333       */
    334 
    335 
    336       /* TODO make these mode costs depend on last,alt or gold too.  (jbb) */
    337       vp8_init_mode_costs(cpi);
    338     }
    339 
    340 }
    341 
    342 void vp8_auto_select_speed(VP8_COMP *cpi)
    343 {
    344     int milliseconds_for_compress = (int)(1000000 / cpi->framerate);
    345 
    346     milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
    347 
    348 #if 0
    349 
    350     if (0)
    351     {
    352         FILE *f;
    353 
    354         f = fopen("speed.stt", "a");
    355         fprintf(f, " %8ld %10ld %10ld %10ld\n",
    356                 cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time);
    357         fclose(f);
    358     }
    359 
    360 #endif
    361 
    362     if (cpi->avg_pick_mode_time < milliseconds_for_compress && (cpi->avg_encode_time - cpi->avg_pick_mode_time) < milliseconds_for_compress)
    363     {
    364         if (cpi->avg_pick_mode_time == 0)
    365         {
    366             cpi->Speed = 4;
    367         }
    368         else
    369         {
    370             if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95)
    371             {
    372                 cpi->Speed          += 2;
    373                 cpi->avg_pick_mode_time = 0;
    374                 cpi->avg_encode_time = 0;
    375 
    376                 if (cpi->Speed > 16)
    377                 {
    378                     cpi->Speed = 16;
    379                 }
    380             }
    381 
    382             if (milliseconds_for_compress * 100 > cpi->avg_encode_time * auto_speed_thresh[cpi->Speed])
    383             {
    384                 cpi->Speed          -= 1;
    385                 cpi->avg_pick_mode_time = 0;
    386                 cpi->avg_encode_time = 0;
    387 
    388                 /* In real-time mode, cpi->speed is in [4, 16]. */
    389                 if (cpi->Speed < 4)
    390                 {
    391                     cpi->Speed = 4;
    392                 }
    393             }
    394         }
    395     }
    396     else
    397     {
    398         cpi->Speed += 4;
    399 
    400         if (cpi->Speed > 16)
    401             cpi->Speed = 16;
    402 
    403 
    404         cpi->avg_pick_mode_time = 0;
    405         cpi->avg_encode_time = 0;
    406     }
    407 }
    408 
    409 int vp8_block_error_c(short *coeff, short *dqcoeff)
    410 {
    411     int i;
    412     int error = 0;
    413 
    414     for (i = 0; i < 16; i++)
    415     {
    416         int this_diff = coeff[i] - dqcoeff[i];
    417         error += this_diff * this_diff;
    418     }
    419 
    420     return error;
    421 }
    422 
    423 int vp8_mbblock_error_c(MACROBLOCK *mb, int dc)
    424 {
    425     BLOCK  *be;
    426     BLOCKD *bd;
    427     int i, j;
    428     int berror, error = 0;
    429 
    430     for (i = 0; i < 16; i++)
    431     {
    432         be = &mb->block[i];
    433         bd = &mb->e_mbd.block[i];
    434 
    435         berror = 0;
    436 
    437         for (j = dc; j < 16; j++)
    438         {
    439             int this_diff = be->coeff[j] - bd->dqcoeff[j];
    440             berror += this_diff * this_diff;
    441         }
    442 
    443         error += berror;
    444     }
    445 
    446     return error;
    447 }
    448 
    449 int vp8_mbuverror_c(MACROBLOCK *mb)
    450 {
    451 
    452     BLOCK  *be;
    453     BLOCKD *bd;
    454 
    455 
    456     int i;
    457     int error = 0;
    458 
    459     for (i = 16; i < 24; i++)
    460     {
    461         be = &mb->block[i];
    462         bd = &mb->e_mbd.block[i];
    463 
    464         error += vp8_block_error_c(be->coeff, bd->dqcoeff);
    465     }
    466 
    467     return error;
    468 }
    469 
    470 int VP8_UVSSE(MACROBLOCK *x)
    471 {
    472     unsigned char *uptr, *vptr;
    473     unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
    474     unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
    475     int uv_stride = x->block[16].src_stride;
    476 
    477     unsigned int sse1 = 0;
    478     unsigned int sse2 = 0;
    479     int mv_row = x->e_mbd.mode_info_context->mbmi.mv.as_mv.row;
    480     int mv_col = x->e_mbd.mode_info_context->mbmi.mv.as_mv.col;
    481     int offset;
    482     int pre_stride = x->e_mbd.pre.uv_stride;
    483 
    484     if (mv_row < 0)
    485         mv_row -= 1;
    486     else
    487         mv_row += 1;
    488 
    489     if (mv_col < 0)
    490         mv_col -= 1;
    491     else
    492         mv_col += 1;
    493 
    494     mv_row /= 2;
    495     mv_col /= 2;
    496 
    497     offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
    498     uptr = x->e_mbd.pre.u_buffer + offset;
    499     vptr = x->e_mbd.pre.v_buffer + offset;
    500 
    501     if ((mv_row | mv_col) & 7)
    502     {
    503         vp8_sub_pixel_variance8x8(uptr, pre_stride,
    504             mv_col & 7, mv_row & 7, upred_ptr, uv_stride, &sse2);
    505         vp8_sub_pixel_variance8x8(vptr, pre_stride,
    506             mv_col & 7, mv_row & 7, vpred_ptr, uv_stride, &sse1);
    507         sse2 += sse1;
    508     }
    509     else
    510     {
    511         vp8_variance8x8(uptr, pre_stride,
    512             upred_ptr, uv_stride, &sse2);
    513         vp8_variance8x8(vptr, pre_stride,
    514             vpred_ptr, uv_stride, &sse1);
    515         sse2 += sse1;
    516     }
    517     return sse2;
    518 
    519 }
    520 
    521 static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
    522 {
    523     int c = !type;              /* start at coef 0, unless Y with Y2 */
    524     int eob = (int)(*b->eob);
    525     int pt ;    /* surrounding block/prev coef predictor */
    526     int cost = 0;
    527     short *qcoeff_ptr = b->qcoeff;
    528 
    529     VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
    530 
    531     assert(eob <= 16);
    532     for (; c < eob; c++)
    533     {
    534         const int v = qcoeff_ptr[vp8_default_zig_zag1d[c]];
    535         const int t = vp8_dct_value_tokens_ptr[v].Token;
    536         cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [t];
    537         cost += vp8_dct_value_cost_ptr[v];
    538         pt = vp8_prev_token_class[t];
    539     }
    540 
    541     if (c < 16)
    542         cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN];
    543 
    544     pt = (c != !type); /* is eob first coefficient; */
    545     *a = *l = pt;
    546 
    547     return cost;
    548 }
    549 
    550 static int vp8_rdcost_mby(MACROBLOCK *mb)
    551 {
    552     int cost = 0;
    553     int b;
    554     MACROBLOCKD *x = &mb->e_mbd;
    555     ENTROPY_CONTEXT_PLANES t_above, t_left;
    556     ENTROPY_CONTEXT *ta;
    557     ENTROPY_CONTEXT *tl;
    558 
    559     vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    560     vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    561 
    562     ta = (ENTROPY_CONTEXT *)&t_above;
    563     tl = (ENTROPY_CONTEXT *)&t_left;
    564 
    565     for (b = 0; b < 16; b++)
    566         cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_NO_DC,
    567                     ta + vp8_block2above[b], tl + vp8_block2left[b]);
    568 
    569     cost += cost_coeffs(mb, x->block + 24, PLANE_TYPE_Y2,
    570                 ta + vp8_block2above[24], tl + vp8_block2left[24]);
    571 
    572     return cost;
    573 }
    574 
    575 static void macro_block_yrd( MACROBLOCK *mb,
    576                              int *Rate,
    577                              int *Distortion)
    578 {
    579     int b;
    580     MACROBLOCKD *const x = &mb->e_mbd;
    581     BLOCK   *const mb_y2 = mb->block + 24;
    582     BLOCKD *const x_y2  = x->block + 24;
    583     short *Y2DCPtr = mb_y2->src_diff;
    584     BLOCK *beptr;
    585     int d;
    586 
    587     vp8_subtract_mby( mb->src_diff, *(mb->block[0].base_src),
    588         mb->block[0].src_stride,  mb->e_mbd.predictor, 16);
    589 
    590     /* Fdct and building the 2nd order block */
    591     for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
    592     {
    593         mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
    594         *Y2DCPtr++ = beptr->coeff[0];
    595         *Y2DCPtr++ = beptr->coeff[16];
    596     }
    597 
    598     /* 2nd order fdct */
    599     mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
    600 
    601     /* Quantization */
    602     for (b = 0; b < 16; b++)
    603     {
    604         mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
    605     }
    606 
    607     /* DC predication and Quantization of 2nd Order block */
    608     mb->quantize_b(mb_y2, x_y2);
    609 
    610     /* Distortion */
    611     d = vp8_mbblock_error(mb, 1) << 2;
    612     d += vp8_block_error(mb_y2->coeff, x_y2->dqcoeff);
    613 
    614     *Distortion = (d >> 4);
    615 
    616     /* rate */
    617     *Rate = vp8_rdcost_mby(mb);
    618 }
    619 
    620 static void copy_predictor(unsigned char *dst, const unsigned char *predictor)
    621 {
    622     const unsigned int *p = (const unsigned int *)predictor;
    623     unsigned int *d = (unsigned int *)dst;
    624     d[0] = p[0];
    625     d[4] = p[4];
    626     d[8] = p[8];
    627     d[12] = p[12];
    628 }
    629 static int rd_pick_intra4x4block(
    630     MACROBLOCK *x,
    631     BLOCK *be,
    632     BLOCKD *b,
    633     B_PREDICTION_MODE *best_mode,
    634     const int *bmode_costs,
    635     ENTROPY_CONTEXT *a,
    636     ENTROPY_CONTEXT *l,
    637 
    638     int *bestrate,
    639     int *bestratey,
    640     int *bestdistortion)
    641 {
    642     B_PREDICTION_MODE mode;
    643     int best_rd = INT_MAX;
    644     int rate = 0;
    645     int distortion;
    646 
    647     ENTROPY_CONTEXT ta = *a, tempa = *a;
    648     ENTROPY_CONTEXT tl = *l, templ = *l;
    649     /*
    650      * The predictor buffer is a 2d buffer with a stride of 16.  Create
    651      * a temp buffer that meets the stride requirements, but we are only
    652      * interested in the left 4x4 block
    653      * */
    654     DECLARE_ALIGNED_ARRAY(16, unsigned char,  best_predictor, 16*4);
    655     DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16);
    656     int dst_stride = x->e_mbd.dst.y_stride;
    657     unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
    658 
    659     unsigned char *Above = dst - dst_stride;
    660     unsigned char *yleft = dst - 1;
    661     unsigned char top_left = Above[-1];
    662 
    663     for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++)
    664     {
    665         int this_rd;
    666         int ratey;
    667 
    668         rate = bmode_costs[mode];
    669 
    670         vp8_intra4x4_predict(Above, yleft, dst_stride, mode,
    671                              b->predictor, 16, top_left);
    672         vp8_subtract_b(be, b, 16);
    673         x->short_fdct4x4(be->src_diff, be->coeff, 32);
    674         x->quantize_b(be, b);
    675 
    676         tempa = ta;
    677         templ = tl;
    678 
    679         ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ);
    680         rate += ratey;
    681         distortion = vp8_block_error(be->coeff, b->dqcoeff) >> 2;
    682 
    683         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
    684 
    685         if (this_rd < best_rd)
    686         {
    687             *bestrate = rate;
    688             *bestratey = ratey;
    689             *bestdistortion = distortion;
    690             best_rd = this_rd;
    691             *best_mode = mode;
    692             *a = tempa;
    693             *l = templ;
    694             copy_predictor(best_predictor, b->predictor);
    695             vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
    696         }
    697     }
    698     b->bmi.as_mode = *best_mode;
    699 
    700     vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride);
    701 
    702     return best_rd;
    703 }
    704 
    705 static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate,
    706                                      int *rate_y, int *Distortion, int best_rd)
    707 {
    708     MACROBLOCKD *const xd = &mb->e_mbd;
    709     int i;
    710     int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
    711     int distortion = 0;
    712     int tot_rate_y = 0;
    713     int64_t total_rd = 0;
    714     ENTROPY_CONTEXT_PLANES t_above, t_left;
    715     ENTROPY_CONTEXT *ta;
    716     ENTROPY_CONTEXT *tl;
    717     const int *bmode_costs;
    718 
    719     vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    720     vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    721 
    722     ta = (ENTROPY_CONTEXT *)&t_above;
    723     tl = (ENTROPY_CONTEXT *)&t_left;
    724 
    725     intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16);
    726 
    727     bmode_costs = mb->inter_bmode_costs;
    728 
    729     for (i = 0; i < 16; i++)
    730     {
    731         MODE_INFO *const mic = xd->mode_info_context;
    732         const int mis = xd->mode_info_stride;
    733         B_PREDICTION_MODE best_mode = 0;
    734         int r = 0, ry = 0, d = 0;
    735 
    736         if (mb->e_mbd.frame_type == KEY_FRAME)
    737         {
    738             const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
    739             const B_PREDICTION_MODE L = left_block_mode(mic, i);
    740 
    741             bmode_costs  = mb->bmode_costs[A][L];
    742         }
    743 
    744         total_rd += rd_pick_intra4x4block(
    745             mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
    746             ta + vp8_block2above[i],
    747             tl + vp8_block2left[i], &r, &ry, &d);
    748 
    749         cost += r;
    750         distortion += d;
    751         tot_rate_y += ry;
    752 
    753         mic->bmi[i].as_mode = best_mode;
    754 
    755         if(total_rd >= (int64_t)best_rd)
    756             break;
    757     }
    758 
    759     if(total_rd >= (int64_t)best_rd)
    760         return INT_MAX;
    761 
    762     *Rate = cost;
    763     *rate_y = tot_rate_y;
    764     *Distortion = distortion;
    765 
    766     return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
    767 }
    768 
    769 
    770 static int rd_pick_intra16x16mby_mode(MACROBLOCK *x,
    771                                       int *Rate,
    772                                       int *rate_y,
    773                                       int *Distortion)
    774 {
    775     MB_PREDICTION_MODE mode;
    776     MB_PREDICTION_MODE mode_selected;
    777     int rate, ratey;
    778     int distortion;
    779     int best_rd = INT_MAX;
    780     int this_rd;
    781     MACROBLOCKD *xd = &x->e_mbd;
    782     mode_selected = 0;
    783 
    784     /* Y Search for 16x16 intra prediction mode */
    785     for (mode = DC_PRED; mode <= TM_PRED; mode++)
    786     {
    787         xd->mode_info_context->mbmi.mode = mode;
    788 
    789         vp8_build_intra_predictors_mby_s(xd,
    790                                          xd->dst.y_buffer - xd->dst.y_stride,
    791                                          xd->dst.y_buffer - 1,
    792                                          xd->dst.y_stride,
    793                                          xd->predictor,
    794                                          16);
    795 
    796         macro_block_yrd(x, &ratey, &distortion);
    797         rate = ratey + x->mbmode_cost[xd->frame_type]
    798                                      [xd->mode_info_context->mbmi.mode];
    799 
    800         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
    801 
    802         if (this_rd < best_rd)
    803         {
    804             mode_selected = mode;
    805             best_rd = this_rd;
    806             *Rate = rate;
    807             *rate_y = ratey;
    808             *Distortion = distortion;
    809         }
    810     }
    811 
    812     xd->mode_info_context->mbmi.mode = mode_selected;
    813     return best_rd;
    814 }
    815 
    816 static int rd_cost_mbuv(MACROBLOCK *mb)
    817 {
    818     int b;
    819     int cost = 0;
    820     MACROBLOCKD *x = &mb->e_mbd;
    821     ENTROPY_CONTEXT_PLANES t_above, t_left;
    822     ENTROPY_CONTEXT *ta;
    823     ENTROPY_CONTEXT *tl;
    824 
    825     vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    826     vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    827 
    828     ta = (ENTROPY_CONTEXT *)&t_above;
    829     tl = (ENTROPY_CONTEXT *)&t_left;
    830 
    831     for (b = 16; b < 24; b++)
    832         cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_UV,
    833                     ta + vp8_block2above[b], tl + vp8_block2left[b]);
    834 
    835     return cost;
    836 }
    837 
    838 
    839 static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
    840                             int *distortion, int fullpixel)
    841 {
    842     UNINITIALIZED_IS_SAFE(cpi);
    843     UNINITIALIZED_IS_SAFE(fullpixel);
    844     vp8_build_inter16x16_predictors_mbuv(&x->e_mbd);
    845     vp8_subtract_mbuv(x->src_diff,
    846         x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
    847         &x->e_mbd.predictor[256], &x->e_mbd.predictor[320], 8);
    848 
    849     vp8_transform_mbuv(x);
    850     vp8_quantize_mbuv(x);
    851 
    852     *rate       = rd_cost_mbuv(x);
    853     *distortion = vp8_mbuverror(x) / 4;
    854 
    855     return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
    856 }
    857 
    858 static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
    859                           int *distortion, int fullpixel)
    860 {
    861     UNINITIALIZED_IS_SAFE(cpi);
    862     UNINITIALIZED_IS_SAFE(fullpixel);
    863     vp8_build_inter4x4_predictors_mbuv(&x->e_mbd);
    864     vp8_subtract_mbuv(x->src_diff,
    865         x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
    866         &x->e_mbd.predictor[256], &x->e_mbd.predictor[320], 8);
    867 
    868     vp8_transform_mbuv(x);
    869     vp8_quantize_mbuv(x);
    870 
    871     *rate       = rd_cost_mbuv(x);
    872     *distortion = vp8_mbuverror(x) / 4;
    873 
    874     return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
    875 }
    876 
    877 static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
    878                                     int *rate_tokenonly, int *distortion)
    879 {
    880     MB_PREDICTION_MODE mode;
    881     MB_PREDICTION_MODE mode_selected;
    882     int best_rd = INT_MAX;
    883     int d = 0, r = 0;
    884     int rate_to;
    885     MACROBLOCKD *xd = &x->e_mbd;
    886     mode_selected = 0;
    887     for (mode = DC_PRED; mode <= TM_PRED; mode++)
    888     {
    889         int this_rate;
    890         int this_distortion;
    891         int this_rd;
    892 
    893         xd->mode_info_context->mbmi.uv_mode = mode;
    894 
    895         vp8_build_intra_predictors_mbuv_s(xd,
    896                                           xd->dst.u_buffer - xd->dst.uv_stride,
    897                                           xd->dst.v_buffer - xd->dst.uv_stride,
    898                                           xd->dst.u_buffer - 1,
    899                                           xd->dst.v_buffer - 1,
    900                                           xd->dst.uv_stride,
    901                                           &xd->predictor[256], &xd->predictor[320],
    902                                           8);
    903 
    904 
    905         vp8_subtract_mbuv(x->src_diff,
    906                       x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
    907                       &xd->predictor[256], &xd->predictor[320], 8);
    908         vp8_transform_mbuv(x);
    909         vp8_quantize_mbuv(x);
    910 
    911         rate_to = rd_cost_mbuv(x);
    912         this_rate = rate_to + x->intra_uv_mode_cost[xd->frame_type][xd->mode_info_context->mbmi.uv_mode];
    913 
    914         this_distortion = vp8_mbuverror(x) / 4;
    915 
    916         this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
    917 
    918         if (this_rd < best_rd)
    919         {
    920             best_rd = this_rd;
    921             d = this_distortion;
    922             r = this_rate;
    923             *rate_tokenonly = rate_to;
    924             mode_selected = mode;
    925         }
    926     }
    927 
    928     *rate = r;
    929     *distortion = d;
    930 
    931     xd->mode_info_context->mbmi.uv_mode = mode_selected;
    932 }
    933 
    934 int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4])
    935 {
    936     vp8_prob p [VP8_MVREFS-1];
    937     assert(NEARESTMV <= m  &&  m <= SPLITMV);
    938     vp8_mv_ref_probs(p, near_mv_ref_ct);
    939     return vp8_cost_token(vp8_mv_ref_tree, p,
    940                           vp8_mv_ref_encoding_array + (m - NEARESTMV));
    941 }
    942 
    943 void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv)
    944 {
    945     x->e_mbd.mode_info_context->mbmi.mode = mb;
    946     x->e_mbd.mode_info_context->mbmi.mv.as_int = mv->as_int;
    947 }
    948 
    949 static int labels2mode(
    950     MACROBLOCK *x,
    951     int const *labelings, int which_label,
    952     B_PREDICTION_MODE this_mode,
    953     int_mv *this_mv, int_mv *best_ref_mv,
    954     int *mvcost[2]
    955 )
    956 {
    957     MACROBLOCKD *const xd = & x->e_mbd;
    958     MODE_INFO *const mic = xd->mode_info_context;
    959     const int mis = xd->mode_info_stride;
    960 
    961     int cost = 0;
    962     int thismvcost = 0;
    963 
    964     /* We have to be careful retrieving previously-encoded motion vectors.
    965        Ones from this macroblock have to be pulled from the BLOCKD array
    966        as they have not yet made it to the bmi array in our MB_MODE_INFO. */
    967 
    968     int i = 0;
    969 
    970     do
    971     {
    972         BLOCKD *const d = xd->block + i;
    973         const int row = i >> 2,  col = i & 3;
    974 
    975         B_PREDICTION_MODE m;
    976 
    977         if (labelings[i] != which_label)
    978             continue;
    979 
    980         if (col  &&  labelings[i] == labelings[i-1])
    981             m = LEFT4X4;
    982         else if (row  &&  labelings[i] == labelings[i-4])
    983             m = ABOVE4X4;
    984         else
    985         {
    986             /* the only time we should do costing for new motion vector
    987              * or mode is when we are on a new label  (jbb May 08, 2007)
    988              */
    989             switch (m = this_mode)
    990             {
    991             case NEW4X4 :
    992                 thismvcost  = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost, 102);
    993                 break;
    994             case LEFT4X4:
    995                 this_mv->as_int = col ? d[-1].bmi.mv.as_int : (uint32_t)left_block_mv(mic, i);
    996                 break;
    997             case ABOVE4X4:
    998                 this_mv->as_int = row ? d[-4].bmi.mv.as_int : (uint32_t)above_block_mv(mic, i, mis);
    999                 break;
   1000             case ZERO4X4:
   1001                 this_mv->as_int = 0;
   1002                 break;
   1003             default:
   1004                 break;
   1005             }
   1006 
   1007             if (m == ABOVE4X4)  /* replace above with left if same */
   1008             {
   1009                 int_mv left_mv;
   1010 
   1011                 left_mv.as_int = col ? d[-1].bmi.mv.as_int :
   1012                                         (uint32_t)left_block_mv(mic, i);
   1013 
   1014                 if (left_mv.as_int == this_mv->as_int)
   1015                     m = LEFT4X4;
   1016             }
   1017 
   1018             cost = x->inter_bmode_costs[ m];
   1019         }
   1020 
   1021         d->bmi.mv.as_int = this_mv->as_int;
   1022 
   1023         x->partition_info->bmi[i].mode = m;
   1024         x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
   1025 
   1026     }
   1027     while (++i < 16);
   1028 
   1029     cost += thismvcost ;
   1030     return cost;
   1031 }
   1032 
   1033 static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
   1034                               int which_label, ENTROPY_CONTEXT *ta,
   1035                               ENTROPY_CONTEXT *tl)
   1036 {
   1037     int cost = 0;
   1038     int b;
   1039     MACROBLOCKD *x = &mb->e_mbd;
   1040 
   1041     for (b = 0; b < 16; b++)
   1042         if (labels[ b] == which_label)
   1043             cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_WITH_DC,
   1044                                 ta + vp8_block2above[b],
   1045                                 tl + vp8_block2left[b]);
   1046 
   1047     return cost;
   1048 
   1049 }
   1050 static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels, int which_label)
   1051 {
   1052     int i;
   1053     unsigned int distortion = 0;
   1054     int pre_stride = x->e_mbd.pre.y_stride;
   1055     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1056 
   1057 
   1058     for (i = 0; i < 16; i++)
   1059     {
   1060         if (labels[i] == which_label)
   1061         {
   1062             BLOCKD *bd = &x->e_mbd.block[i];
   1063             BLOCK *be = &x->block[i];
   1064 
   1065             vp8_build_inter_predictors_b(bd, 16, base_pre, pre_stride, x->e_mbd.subpixel_predict);
   1066             vp8_subtract_b(be, bd, 16);
   1067             x->short_fdct4x4(be->src_diff, be->coeff, 32);
   1068             x->quantize_b(be, bd);
   1069 
   1070             distortion += vp8_block_error(be->coeff, bd->dqcoeff);
   1071         }
   1072     }
   1073 
   1074     return distortion;
   1075 }
   1076 
   1077 
   1078 static const unsigned int segmentation_to_sseshift[4] = {3, 3, 2, 0};
   1079 
   1080 
   1081 typedef struct
   1082 {
   1083   int_mv *ref_mv;
   1084   int_mv mvp;
   1085 
   1086   int segment_rd;
   1087   int segment_num;
   1088   int r;
   1089   int d;
   1090   int segment_yrate;
   1091   B_PREDICTION_MODE modes[16];
   1092   int_mv mvs[16];
   1093   unsigned char eobs[16];
   1094 
   1095   int mvthresh;
   1096   int *mdcounts;
   1097 
   1098   int_mv sv_mvp[4]; /* save 4 mvp from 8x8 */
   1099   int sv_istep[2];  /* save 2 initial step_param for 16x8/8x16 */
   1100 
   1101 } BEST_SEG_INFO;
   1102 
   1103 
   1104 static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
   1105                              BEST_SEG_INFO *bsi, unsigned int segmentation)
   1106 {
   1107     int i;
   1108     int const *labels;
   1109     int br = 0;
   1110     int bd = 0;
   1111     B_PREDICTION_MODE this_mode;
   1112 
   1113 
   1114     int label_count;
   1115     int this_segment_rd = 0;
   1116     int label_mv_thresh;
   1117     int rate = 0;
   1118     int sbr = 0;
   1119     int sbd = 0;
   1120     int segmentyrate = 0;
   1121 
   1122     vp8_variance_fn_ptr_t *v_fn_ptr;
   1123 
   1124     ENTROPY_CONTEXT_PLANES t_above, t_left;
   1125     ENTROPY_CONTEXT *ta;
   1126     ENTROPY_CONTEXT *tl;
   1127     ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
   1128     ENTROPY_CONTEXT *ta_b;
   1129     ENTROPY_CONTEXT *tl_b;
   1130 
   1131     vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
   1132     vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
   1133 
   1134     ta = (ENTROPY_CONTEXT *)&t_above;
   1135     tl = (ENTROPY_CONTEXT *)&t_left;
   1136     ta_b = (ENTROPY_CONTEXT *)&t_above_b;
   1137     tl_b = (ENTROPY_CONTEXT *)&t_left_b;
   1138 
   1139     br = 0;
   1140     bd = 0;
   1141 
   1142     v_fn_ptr = &cpi->fn_ptr[segmentation];
   1143     labels = vp8_mbsplits[segmentation];
   1144     label_count = vp8_mbsplit_count[segmentation];
   1145 
   1146     /* 64 makes this threshold really big effectively making it so that we
   1147      * very rarely check mvs on segments.   setting this to 1 would make mv
   1148      * thresh roughly equal to what it is for macroblocks
   1149      */
   1150     label_mv_thresh = 1 * bsi->mvthresh / label_count ;
   1151 
   1152     /* Segmentation method overheads */
   1153     rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation);
   1154     rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts);
   1155     this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
   1156     br += rate;
   1157 
   1158     for (i = 0; i < label_count; i++)
   1159     {
   1160         int_mv mode_mv[B_MODE_COUNT];
   1161         int best_label_rd = INT_MAX;
   1162         B_PREDICTION_MODE mode_selected = ZERO4X4;
   1163         int bestlabelyrate = 0;
   1164 
   1165         /* search for the best motion vector on this segment */
   1166         for (this_mode = LEFT4X4; this_mode <= NEW4X4 ; this_mode ++)
   1167         {
   1168             int this_rd;
   1169             int distortion;
   1170             int labelyrate;
   1171             ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
   1172             ENTROPY_CONTEXT *ta_s;
   1173             ENTROPY_CONTEXT *tl_s;
   1174 
   1175             vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
   1176             vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
   1177 
   1178             ta_s = (ENTROPY_CONTEXT *)&t_above_s;
   1179             tl_s = (ENTROPY_CONTEXT *)&t_left_s;
   1180 
   1181             if (this_mode == NEW4X4)
   1182             {
   1183                 int sseshift;
   1184                 int num00;
   1185                 int step_param = 0;
   1186                 int further_steps;
   1187                 int n;
   1188                 int thissme;
   1189                 int bestsme = INT_MAX;
   1190                 int_mv  temp_mv;
   1191                 BLOCK *c;
   1192                 BLOCKD *e;
   1193 
   1194                 /* Is the best so far sufficiently good that we cant justify
   1195                  * doing a new motion search.
   1196                  */
   1197                 if (best_label_rd < label_mv_thresh)
   1198                     break;
   1199 
   1200                 if(cpi->compressor_speed)
   1201                 {
   1202                     if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8)
   1203                     {
   1204                         bsi->mvp.as_int = bsi->sv_mvp[i].as_int;
   1205                         if (i==1 && segmentation == BLOCK_16X8)
   1206                           bsi->mvp.as_int = bsi->sv_mvp[2].as_int;
   1207 
   1208                         step_param = bsi->sv_istep[i];
   1209                     }
   1210 
   1211                     /* use previous block's result as next block's MV
   1212                      * predictor.
   1213                      */
   1214                     if (segmentation == BLOCK_4X4 && i>0)
   1215                     {
   1216                         bsi->mvp.as_int = x->e_mbd.block[i-1].bmi.mv.as_int;
   1217                         if (i==4 || i==8 || i==12)
   1218                             bsi->mvp.as_int = x->e_mbd.block[i-4].bmi.mv.as_int;
   1219                         step_param = 2;
   1220                     }
   1221                 }
   1222 
   1223                 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
   1224 
   1225                 {
   1226                     int sadpb = x->sadperbit4;
   1227                     int_mv mvp_full;
   1228 
   1229                     mvp_full.as_mv.row = bsi->mvp.as_mv.row >>3;
   1230                     mvp_full.as_mv.col = bsi->mvp.as_mv.col >>3;
   1231 
   1232                     /* find first label */
   1233                     n = vp8_mbsplit_offset[segmentation][i];
   1234 
   1235                     c = &x->block[n];
   1236                     e = &x->e_mbd.block[n];
   1237 
   1238                     {
   1239                         bestsme = cpi->diamond_search_sad(x, c, e, &mvp_full,
   1240                                                 &mode_mv[NEW4X4], step_param,
   1241                                                 sadpb, &num00, v_fn_ptr,
   1242                                                 x->mvcost, bsi->ref_mv);
   1243 
   1244                         n = num00;
   1245                         num00 = 0;
   1246 
   1247                         while (n < further_steps)
   1248                         {
   1249                             n++;
   1250 
   1251                             if (num00)
   1252                                 num00--;
   1253                             else
   1254                             {
   1255                                 thissme = cpi->diamond_search_sad(x, c, e,
   1256                                                     &mvp_full, &temp_mv,
   1257                                                     step_param + n, sadpb,
   1258                                                     &num00, v_fn_ptr,
   1259                                                     x->mvcost, bsi->ref_mv);
   1260 
   1261                                 if (thissme < bestsme)
   1262                                 {
   1263                                     bestsme = thissme;
   1264                                     mode_mv[NEW4X4].as_int = temp_mv.as_int;
   1265                                 }
   1266                             }
   1267                         }
   1268                     }
   1269 
   1270                     sseshift = segmentation_to_sseshift[segmentation];
   1271 
   1272                     /* Should we do a full search (best quality only) */
   1273                     if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000)
   1274                     {
   1275                         /* Check if mvp_full is within the range. */
   1276                         vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   1277 
   1278                         thissme = cpi->full_search_sad(x, c, e, &mvp_full,
   1279                                                        sadpb, 16, v_fn_ptr,
   1280                                                        x->mvcost, bsi->ref_mv);
   1281 
   1282                         if (thissme < bestsme)
   1283                         {
   1284                             bestsme = thissme;
   1285                             mode_mv[NEW4X4].as_int = e->bmi.mv.as_int;
   1286                         }
   1287                         else
   1288                         {
   1289                             /* The full search result is actually worse so
   1290                              * re-instate the previous best vector
   1291                              */
   1292                             e->bmi.mv.as_int = mode_mv[NEW4X4].as_int;
   1293                         }
   1294                     }
   1295                 }
   1296 
   1297                 if (bestsme < INT_MAX)
   1298                 {
   1299                     int disto;
   1300                     unsigned int sse;
   1301                     cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
   1302                         bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost,
   1303                         &disto, &sse);
   1304                 }
   1305             } /* NEW4X4 */
   1306 
   1307             rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
   1308                                bsi->ref_mv, x->mvcost);
   1309 
   1310             /* Trap vectors that reach beyond the UMV borders */
   1311             if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
   1312                 ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max))
   1313             {
   1314                 continue;
   1315             }
   1316 
   1317             distortion = vp8_encode_inter_mb_segment(x, labels, i) / 4;
   1318 
   1319             labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
   1320             rate += labelyrate;
   1321 
   1322             this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
   1323 
   1324             if (this_rd < best_label_rd)
   1325             {
   1326                 sbr = rate;
   1327                 sbd = distortion;
   1328                 bestlabelyrate = labelyrate;
   1329                 mode_selected = this_mode;
   1330                 best_label_rd = this_rd;
   1331 
   1332                 vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
   1333                 vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
   1334 
   1335             }
   1336         } /*for each 4x4 mode*/
   1337 
   1338         vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
   1339         vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
   1340 
   1341         labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
   1342                     bsi->ref_mv, x->mvcost);
   1343 
   1344         br += sbr;
   1345         bd += sbd;
   1346         segmentyrate += bestlabelyrate;
   1347         this_segment_rd += best_label_rd;
   1348 
   1349         if (this_segment_rd >= bsi->segment_rd)
   1350             break;
   1351 
   1352     } /* for each label */
   1353 
   1354     if (this_segment_rd < bsi->segment_rd)
   1355     {
   1356         bsi->r = br;
   1357         bsi->d = bd;
   1358         bsi->segment_yrate = segmentyrate;
   1359         bsi->segment_rd = this_segment_rd;
   1360         bsi->segment_num = segmentation;
   1361 
   1362         /* store everything needed to come back to this!! */
   1363         for (i = 0; i < 16; i++)
   1364         {
   1365             bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
   1366             bsi->modes[i] = x->partition_info->bmi[i].mode;
   1367             bsi->eobs[i] = x->e_mbd.eobs[i];
   1368         }
   1369     }
   1370 }
   1371 
   1372 static
   1373 void vp8_cal_step_param(int sr, int *sp)
   1374 {
   1375     int step = 0;
   1376 
   1377     if (sr > MAX_FIRST_STEP) sr = MAX_FIRST_STEP;
   1378     else if (sr < 1) sr = 1;
   1379 
   1380     while (sr>>=1)
   1381         step++;
   1382 
   1383     *sp = MAX_MVSEARCH_STEPS - 1 - step;
   1384 }
   1385 
   1386 static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
   1387                                            int_mv *best_ref_mv, int best_rd,
   1388                                            int *mdcounts, int *returntotrate,
   1389                                            int *returnyrate, int *returndistortion,
   1390                                            int mvthresh)
   1391 {
   1392     int i;
   1393     BEST_SEG_INFO bsi;
   1394 
   1395     vpx_memset(&bsi, 0, sizeof(bsi));
   1396 
   1397     bsi.segment_rd = best_rd;
   1398     bsi.ref_mv = best_ref_mv;
   1399     bsi.mvp.as_int = best_ref_mv->as_int;
   1400     bsi.mvthresh = mvthresh;
   1401     bsi.mdcounts = mdcounts;
   1402 
   1403     for(i = 0; i < 16; i++)
   1404     {
   1405         bsi.modes[i] = ZERO4X4;
   1406     }
   1407 
   1408     if(cpi->compressor_speed == 0)
   1409     {
   1410         /* for now, we will keep the original segmentation order
   1411            when in best quality mode */
   1412         rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
   1413         rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
   1414         rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
   1415         rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
   1416     }
   1417     else
   1418     {
   1419         int sr;
   1420 
   1421         rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
   1422 
   1423         if (bsi.segment_rd < best_rd)
   1424         {
   1425             int col_min = ((best_ref_mv->as_mv.col+7)>>3) - MAX_FULL_PEL_VAL;
   1426             int row_min = ((best_ref_mv->as_mv.row+7)>>3) - MAX_FULL_PEL_VAL;
   1427             int col_max = (best_ref_mv->as_mv.col>>3) + MAX_FULL_PEL_VAL;
   1428             int row_max = (best_ref_mv->as_mv.row>>3) + MAX_FULL_PEL_VAL;
   1429 
   1430             int tmp_col_min = x->mv_col_min;
   1431             int tmp_col_max = x->mv_col_max;
   1432             int tmp_row_min = x->mv_row_min;
   1433             int tmp_row_max = x->mv_row_max;
   1434 
   1435             /* Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. */
   1436             if (x->mv_col_min < col_min )
   1437                 x->mv_col_min = col_min;
   1438             if (x->mv_col_max > col_max )
   1439                 x->mv_col_max = col_max;
   1440             if (x->mv_row_min < row_min )
   1441                 x->mv_row_min = row_min;
   1442             if (x->mv_row_max > row_max )
   1443                 x->mv_row_max = row_max;
   1444 
   1445             /* Get 8x8 result */
   1446             bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int;
   1447             bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int;
   1448             bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int;
   1449             bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int;
   1450 
   1451             /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range according to the closeness of 2 MV. */
   1452             /* block 8X16 */
   1453             {
   1454                 sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row))>>3, (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col))>>3);
   1455                 vp8_cal_step_param(sr, &bsi.sv_istep[0]);
   1456 
   1457                 sr = MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row))>>3, (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col))>>3);
   1458                 vp8_cal_step_param(sr, &bsi.sv_istep[1]);
   1459 
   1460                 rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
   1461             }
   1462 
   1463             /* block 16X8 */
   1464             {
   1465                 sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row))>>3, (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col))>>3);
   1466                 vp8_cal_step_param(sr, &bsi.sv_istep[0]);
   1467 
   1468                 sr = MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row))>>3, (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col))>>3);
   1469                 vp8_cal_step_param(sr, &bsi.sv_istep[1]);
   1470 
   1471                 rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
   1472             }
   1473 
   1474             /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
   1475             /* Not skip 4x4 if speed=0 (good quality) */
   1476             if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8)  /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
   1477             {
   1478                 bsi.mvp.as_int = bsi.sv_mvp[0].as_int;
   1479                 rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
   1480             }
   1481 
   1482             /* restore UMV window */
   1483             x->mv_col_min = tmp_col_min;
   1484             x->mv_col_max = tmp_col_max;
   1485             x->mv_row_min = tmp_row_min;
   1486             x->mv_row_max = tmp_row_max;
   1487         }
   1488     }
   1489 
   1490     /* set it to the best */
   1491     for (i = 0; i < 16; i++)
   1492     {
   1493         BLOCKD *bd = &x->e_mbd.block[i];
   1494 
   1495         bd->bmi.mv.as_int = bsi.mvs[i].as_int;
   1496         *bd->eob = bsi.eobs[i];
   1497     }
   1498 
   1499     *returntotrate = bsi.r;
   1500     *returndistortion = bsi.d;
   1501     *returnyrate = bsi.segment_yrate;
   1502 
   1503     /* save partitions */
   1504     x->e_mbd.mode_info_context->mbmi.partitioning = bsi.segment_num;
   1505     x->partition_info->count = vp8_mbsplit_count[bsi.segment_num];
   1506 
   1507     for (i = 0; i < x->partition_info->count; i++)
   1508     {
   1509         int j;
   1510 
   1511         j = vp8_mbsplit_offset[bsi.segment_num][i];
   1512 
   1513         x->partition_info->bmi[i].mode = bsi.modes[j];
   1514         x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv;
   1515     }
   1516     /*
   1517      * used to set x->e_mbd.mode_info_context->mbmi.mv.as_int
   1518      */
   1519     x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int;
   1520 
   1521     return bsi.segment_rd;
   1522 }
   1523 
   1524 /* The improved MV prediction */
   1525 void vp8_mv_pred
   1526 (
   1527     VP8_COMP *cpi,
   1528     MACROBLOCKD *xd,
   1529     const MODE_INFO *here,
   1530     int_mv *mvp,
   1531     int refframe,
   1532     int *ref_frame_sign_bias,
   1533     int *sr,
   1534     int near_sadidx[]
   1535 )
   1536 {
   1537     const MODE_INFO *above = here - xd->mode_info_stride;
   1538     const MODE_INFO *left = here - 1;
   1539     const MODE_INFO *aboveleft = above - 1;
   1540     int_mv           near_mvs[8];
   1541     int              near_ref[8];
   1542     int_mv           mv;
   1543     int              vcnt=0;
   1544     int              find=0;
   1545     int              mb_offset;
   1546 
   1547     int              mvx[8];
   1548     int              mvy[8];
   1549     int              i;
   1550 
   1551     mv.as_int = 0;
   1552 
   1553     if(here->mbmi.ref_frame != INTRA_FRAME)
   1554     {
   1555         near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = near_mvs[7].as_int = 0;
   1556         near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = near_ref[7] = 0;
   1557 
   1558         /* read in 3 nearby block's MVs from current frame as prediction
   1559          * candidates.
   1560          */
   1561         if (above->mbmi.ref_frame != INTRA_FRAME)
   1562         {
   1563             near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
   1564             mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1565             near_ref[vcnt] =  above->mbmi.ref_frame;
   1566         }
   1567         vcnt++;
   1568         if (left->mbmi.ref_frame != INTRA_FRAME)
   1569         {
   1570             near_mvs[vcnt].as_int = left->mbmi.mv.as_int;
   1571             mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1572             near_ref[vcnt] =  left->mbmi.ref_frame;
   1573         }
   1574         vcnt++;
   1575         if (aboveleft->mbmi.ref_frame != INTRA_FRAME)
   1576         {
   1577             near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int;
   1578             mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1579             near_ref[vcnt] =  aboveleft->mbmi.ref_frame;
   1580         }
   1581         vcnt++;
   1582 
   1583         /* read in 5 nearby block's MVs from last frame. */
   1584         if(cpi->common.last_frame_type != KEY_FRAME)
   1585         {
   1586             mb_offset = (-xd->mb_to_top_edge/128 + 1) * (xd->mode_info_stride +1) + (-xd->mb_to_left_edge/128 +1) ;
   1587 
   1588             /* current in last frame */
   1589             if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME)
   1590             {
   1591                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
   1592                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1593                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset];
   1594             }
   1595             vcnt++;
   1596 
   1597             /* above in last frame */
   1598             if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1] != INTRA_FRAME)
   1599             {
   1600                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride-1].as_int;
   1601                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride-1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1602                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1];
   1603             }
   1604             vcnt++;
   1605 
   1606             /* left in last frame */
   1607             if (cpi->lf_ref_frame[mb_offset-1] != INTRA_FRAME)
   1608             {
   1609                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset -1].as_int;
   1610                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset -1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1611                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - 1];
   1612             }
   1613             vcnt++;
   1614 
   1615             /* right in last frame */
   1616             if (cpi->lf_ref_frame[mb_offset +1] != INTRA_FRAME)
   1617             {
   1618                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset +1].as_int;
   1619                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1620                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset +1];
   1621             }
   1622             vcnt++;
   1623 
   1624             /* below in last frame */
   1625             if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1] != INTRA_FRAME)
   1626             {
   1627                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + xd->mode_info_stride +1].as_int;
   1628                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1629                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1];
   1630             }
   1631             vcnt++;
   1632         }
   1633 
   1634         for(i=0; i< vcnt; i++)
   1635         {
   1636             if(near_ref[near_sadidx[i]] != INTRA_FRAME)
   1637             {
   1638                 if(here->mbmi.ref_frame == near_ref[near_sadidx[i]])
   1639                 {
   1640                     mv.as_int = near_mvs[near_sadidx[i]].as_int;
   1641                     find = 1;
   1642                     if (i < 3)
   1643                         *sr = 3;
   1644                     else
   1645                         *sr = 2;
   1646                     break;
   1647                 }
   1648             }
   1649         }
   1650 
   1651         if(!find)
   1652         {
   1653             for(i=0; i<vcnt; i++)
   1654             {
   1655                 mvx[i] = near_mvs[i].as_mv.row;
   1656                 mvy[i] = near_mvs[i].as_mv.col;
   1657             }
   1658 
   1659             insertsortmv(mvx, vcnt);
   1660             insertsortmv(mvy, vcnt);
   1661             mv.as_mv.row = mvx[vcnt/2];
   1662             mv.as_mv.col = mvy[vcnt/2];
   1663 
   1664             find = 1;
   1665             /* sr is set to 0 to allow calling function to decide the search
   1666              * range.
   1667              */
   1668             *sr = 0;
   1669         }
   1670     }
   1671 
   1672     /* Set up return values */
   1673     mvp->as_int = mv.as_int;
   1674     vp8_clamp_mv2(mvp, xd);
   1675 }
   1676 
   1677 void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[])
   1678 {
   1679     /* near_sad indexes:
   1680      *   0-cf above, 1-cf left, 2-cf aboveleft,
   1681      *   3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
   1682      */
   1683     int near_sad[8] = {0};
   1684     BLOCK *b = &x->block[0];
   1685     unsigned char *src_y_ptr = *(b->base_src);
   1686 
   1687     /* calculate sad for current frame 3 nearby MBs. */
   1688     if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0)
   1689     {
   1690         near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
   1691     }else if(xd->mb_to_top_edge==0)
   1692     {   /* only has left MB for sad calculation. */
   1693         near_sad[0] = near_sad[2] = INT_MAX;
   1694         near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX);
   1695     }else if(xd->mb_to_left_edge ==0)
   1696     {   /* only has left MB for sad calculation. */
   1697         near_sad[1] = near_sad[2] = INT_MAX;
   1698         near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX);
   1699     }else
   1700     {
   1701         near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX);
   1702         near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX);
   1703         near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, UINT_MAX);
   1704     }
   1705 
   1706     if(cpi->common.last_frame_type != KEY_FRAME)
   1707     {
   1708         /* calculate sad for last frame 5 nearby MBs. */
   1709         unsigned char *pre_y_buffer = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
   1710         int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
   1711 
   1712         if(xd->mb_to_top_edge==0) near_sad[4] = INT_MAX;
   1713         if(xd->mb_to_left_edge ==0) near_sad[5] = INT_MAX;
   1714         if(xd->mb_to_right_edge ==0) near_sad[6] = INT_MAX;
   1715         if(xd->mb_to_bottom_edge==0) near_sad[7] = INT_MAX;
   1716 
   1717         if(near_sad[4] != INT_MAX)
   1718             near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, UINT_MAX);
   1719         if(near_sad[5] != INT_MAX)
   1720             near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride, UINT_MAX);
   1721         near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride, UINT_MAX);
   1722         if(near_sad[6] != INT_MAX)
   1723             near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride, UINT_MAX);
   1724         if(near_sad[7] != INT_MAX)
   1725             near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, UINT_MAX);
   1726     }
   1727 
   1728     if(cpi->common.last_frame_type != KEY_FRAME)
   1729     {
   1730         insertsortsad(near_sad, near_sadidx, 8);
   1731     }else
   1732     {
   1733         insertsortsad(near_sad, near_sadidx, 3);
   1734     }
   1735 }
   1736 
   1737 static void rd_update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv)
   1738 {
   1739     if (x->e_mbd.mode_info_context->mbmi.mode == SPLITMV)
   1740     {
   1741         int i;
   1742 
   1743         for (i = 0; i < x->partition_info->count; i++)
   1744         {
   1745             if (x->partition_info->bmi[i].mode == NEW4X4)
   1746             {
   1747                 x->MVcount[0][mv_max+((x->partition_info->bmi[i].mv.as_mv.row
   1748                                           - best_ref_mv->as_mv.row) >> 1)]++;
   1749                 x->MVcount[1][mv_max+((x->partition_info->bmi[i].mv.as_mv.col
   1750                                           - best_ref_mv->as_mv.col) >> 1)]++;
   1751             }
   1752         }
   1753     }
   1754     else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV)
   1755     {
   1756         x->MVcount[0][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row
   1757                                           - best_ref_mv->as_mv.row) >> 1)]++;
   1758         x->MVcount[1][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col
   1759                                           - best_ref_mv->as_mv.col) >> 1)]++;
   1760     }
   1761 }
   1762 
   1763 static int evaluate_inter_mode_rd(int mdcounts[4],
   1764                                   RATE_DISTORTION* rd,
   1765                                   int* disable_skip,
   1766                                   VP8_COMP *cpi, MACROBLOCK *x)
   1767 {
   1768     MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
   1769     BLOCK *b = &x->block[0];
   1770     MACROBLOCKD *xd = &x->e_mbd;
   1771     int distortion;
   1772     vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16);
   1773 
   1774     if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
   1775         x->skip = 1;
   1776     }
   1777     else if (x->encode_breakout)
   1778     {
   1779         unsigned int sse;
   1780         unsigned int var;
   1781         unsigned int threshold = (xd->block[0].dequant[1]
   1782                     * xd->block[0].dequant[1] >>4);
   1783 
   1784         if(threshold < x->encode_breakout)
   1785             threshold = x->encode_breakout;
   1786 
   1787         var = vp8_variance16x16
   1788                 (*(b->base_src), b->src_stride,
   1789                 x->e_mbd.predictor, 16, &sse);
   1790 
   1791         if (sse < threshold)
   1792         {
   1793              unsigned int q2dc = xd->block[24].dequant[0];
   1794             /* If theres is no codeable 2nd order dc
   1795                or a very small uniform pixel change change */
   1796             if ((sse - var < q2dc * q2dc >>4) ||
   1797                 (sse /2 > var && sse-var < 64))
   1798             {
   1799                 /* Check u and v to make sure skip is ok */
   1800                 unsigned int sse2 = VP8_UVSSE(x);
   1801                 if (sse2 * 2 < threshold)
   1802                 {
   1803                     x->skip = 1;
   1804                     rd->distortion2 = sse + sse2;
   1805                     rd->rate2 = 500;
   1806 
   1807                     /* for best_yrd calculation */
   1808                     rd->rate_uv = 0;
   1809                     rd->distortion_uv = sse2;
   1810 
   1811                     *disable_skip = 1;
   1812                     return RDCOST(x->rdmult, x->rddiv, rd->rate2,
   1813                                   rd->distortion2);
   1814                 }
   1815             }
   1816         }
   1817     }
   1818 
   1819 
   1820     /* Add in the Mv/mode cost */
   1821     rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
   1822 
   1823     /* Y cost and distortion */
   1824     macro_block_yrd(x, &rd->rate_y, &distortion);
   1825     rd->rate2 += rd->rate_y;
   1826     rd->distortion2 += distortion;
   1827 
   1828     /* UV cost and distortion */
   1829     rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv,
   1830                      cpi->common.full_pixel);
   1831     rd->rate2 += rd->rate_uv;
   1832     rd->distortion2 += rd->distortion_uv;
   1833     return INT_MAX;
   1834 }
   1835 
   1836 static int calculate_final_rd_costs(int this_rd,
   1837                                     RATE_DISTORTION* rd,
   1838                                     int* other_cost,
   1839                                     int disable_skip,
   1840                                     int uv_intra_tteob,
   1841                                     int intra_rd_penalty,
   1842                                     VP8_COMP *cpi, MACROBLOCK *x)
   1843 {
   1844     MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
   1845 
   1846     /* Where skip is allowable add in the default per mb cost for the no
   1847      * skip case. where we then decide to skip we have to delete this and
   1848      * replace it with the cost of signalling a skip
   1849      */
   1850     if (cpi->common.mb_no_coeff_skip)
   1851     {
   1852         *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0);
   1853         rd->rate2 += *other_cost;
   1854     }
   1855 
   1856     /* Estimate the reference frame signaling cost and add it
   1857      * to the rolling cost variable.
   1858      */
   1859     rd->rate2 +=
   1860         x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
   1861 
   1862     if (!disable_skip)
   1863     {
   1864         /* Test for the condition where skip block will be activated
   1865          * because there are no non zero coefficients and make any
   1866          * necessary adjustment for rate
   1867          */
   1868         if (cpi->common.mb_no_coeff_skip)
   1869         {
   1870             int i;
   1871             int tteob;
   1872             int has_y2_block = (this_mode!=SPLITMV && this_mode!=B_PRED);
   1873 
   1874             tteob = 0;
   1875             if(has_y2_block)
   1876                 tteob += x->e_mbd.eobs[24];
   1877 
   1878             for (i = 0; i < 16; i++)
   1879                 tteob += (x->e_mbd.eobs[i] > has_y2_block);
   1880 
   1881             if (x->e_mbd.mode_info_context->mbmi.ref_frame)
   1882             {
   1883                 for (i = 16; i < 24; i++)
   1884                     tteob += x->e_mbd.eobs[i];
   1885             }
   1886             else
   1887                 tteob += uv_intra_tteob;
   1888 
   1889             if (tteob == 0)
   1890             {
   1891                 rd->rate2 -= (rd->rate_y + rd->rate_uv);
   1892                 /* for best_yrd calculation */
   1893                 rd->rate_uv = 0;
   1894 
   1895                 /* Back out no skip flag costing and add in skip flag costing */
   1896                 if (cpi->prob_skip_false)
   1897                 {
   1898                     int prob_skip_cost;
   1899 
   1900                     prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1);
   1901                     prob_skip_cost -= vp8_cost_bit(cpi->prob_skip_false, 0);
   1902                     rd->rate2 += prob_skip_cost;
   1903                     *other_cost += prob_skip_cost;
   1904                 }
   1905             }
   1906         }
   1907         /* Calculate the final RD estimate for this mode */
   1908         this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
   1909         if (this_rd < INT_MAX && x->e_mbd.mode_info_context->mbmi.ref_frame
   1910                                  == INTRA_FRAME)
   1911             this_rd += intra_rd_penalty;
   1912     }
   1913     return this_rd;
   1914 }
   1915 
   1916 static void update_best_mode(BEST_MODE* best_mode, int this_rd,
   1917                              RATE_DISTORTION* rd, int other_cost, MACROBLOCK *x)
   1918 {
   1919     MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
   1920 
   1921     other_cost +=
   1922     x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
   1923 
   1924     /* Calculate the final y RD estimate for this mode */
   1925     best_mode->yrd = RDCOST(x->rdmult, x->rddiv, (rd->rate2-rd->rate_uv-other_cost),
   1926                       (rd->distortion2-rd->distortion_uv));
   1927 
   1928     best_mode->rd = this_rd;
   1929     vpx_memcpy(&best_mode->mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
   1930     vpx_memcpy(&best_mode->partition, x->partition_info, sizeof(PARTITION_INFO));
   1931 
   1932     if ((this_mode == B_PRED) || (this_mode == SPLITMV))
   1933     {
   1934         int i;
   1935         for (i = 0; i < 16; i++)
   1936         {
   1937             best_mode->bmodes[i] = x->e_mbd.block[i].bmi;
   1938         }
   1939     }
   1940 }
   1941 
   1942 void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
   1943                             int recon_uvoffset, int *returnrate,
   1944                             int *returndistortion, int *returnintra)
   1945 {
   1946     BLOCK *b = &x->block[0];
   1947     BLOCKD *d = &x->e_mbd.block[0];
   1948     MACROBLOCKD *xd = &x->e_mbd;
   1949     int_mv best_ref_mv_sb[2];
   1950     int_mv mode_mv_sb[2][MB_MODE_COUNT];
   1951     int_mv best_ref_mv;
   1952     int_mv *mode_mv;
   1953     MB_PREDICTION_MODE this_mode;
   1954     int num00;
   1955     int best_mode_index = 0;
   1956     BEST_MODE best_mode;
   1957 
   1958     int i;
   1959     int mode_index;
   1960     int mdcounts[4];
   1961     int rate;
   1962     RATE_DISTORTION rd;
   1963     int uv_intra_rate = 0, uv_intra_distortion = 0, uv_intra_rate_tokenonly = 0;
   1964     int uv_intra_tteob = 0;
   1965     int uv_intra_done = 0;
   1966 
   1967     MB_PREDICTION_MODE uv_intra_mode = 0;
   1968     int_mv mvp;
   1969     int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
   1970     int saddone=0;
   1971     /* search range got from mv_pred(). It uses step_param levels. (0-7) */
   1972     int sr=0;
   1973 
   1974     unsigned char *plane[4][3];
   1975     int ref_frame_map[4];
   1976     int sign_bias = 0;
   1977 
   1978     int intra_rd_penalty =  10* vp8_dc_quant(cpi->common.base_qindex,
   1979                                              cpi->common.y1dc_delta_q);
   1980 
   1981 #if CONFIG_TEMPORAL_DENOISING
   1982     unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX,
   1983             best_rd_sse = INT_MAX;
   1984 #endif
   1985 
   1986     mode_mv = mode_mv_sb[sign_bias];
   1987     best_ref_mv.as_int = 0;
   1988     best_mode.rd = INT_MAX;
   1989     best_mode.yrd = INT_MAX;
   1990     best_mode.intra_rd = INT_MAX;
   1991     vpx_memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
   1992     vpx_memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode));
   1993     vpx_memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes));
   1994 
   1995     /* Setup search priorities */
   1996     get_reference_search_order(cpi, ref_frame_map);
   1997 
   1998     /* Check to see if there is at least 1 valid reference frame that we need
   1999      * to calculate near_mvs.
   2000      */
   2001     if (ref_frame_map[1] > 0)
   2002     {
   2003         sign_bias = vp8_find_near_mvs_bias(&x->e_mbd,
   2004                                            x->e_mbd.mode_info_context,
   2005                                            mode_mv_sb,
   2006                                            best_ref_mv_sb,
   2007                                            mdcounts,
   2008                                            ref_frame_map[1],
   2009                                            cpi->common.ref_frame_sign_bias);
   2010 
   2011         mode_mv = mode_mv_sb[sign_bias];
   2012         best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
   2013     }
   2014 
   2015     get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
   2016 
   2017     *returnintra = INT_MAX;
   2018     /* Count of the number of MBs tested so far this frame */
   2019     x->mbs_tested_so_far++;
   2020 
   2021     x->skip = 0;
   2022 
   2023     for (mode_index = 0; mode_index < MAX_MODES; mode_index++)
   2024     {
   2025         int this_rd = INT_MAX;
   2026         int disable_skip = 0;
   2027         int other_cost = 0;
   2028         int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];
   2029 
   2030         /* Test best rd so far against threshold for trying this mode. */
   2031         if (best_mode.rd <= x->rd_threshes[mode_index])
   2032             continue;
   2033 
   2034         if (this_ref_frame < 0)
   2035             continue;
   2036 
   2037         /* These variables hold are rolling total cost and distortion for
   2038          * this mode
   2039          */
   2040         rd.rate2 = 0;
   2041         rd.distortion2 = 0;
   2042 
   2043         this_mode = vp8_mode_order[mode_index];
   2044 
   2045         x->e_mbd.mode_info_context->mbmi.mode = this_mode;
   2046         x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
   2047 
   2048         /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
   2049          * unless ARNR filtering is enabled in which case we want
   2050          * an unfiltered alternative
   2051          */
   2052         if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
   2053         {
   2054             if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
   2055                 continue;
   2056         }
   2057 
   2058         /* everything but intra */
   2059         if (x->e_mbd.mode_info_context->mbmi.ref_frame)
   2060         {
   2061             x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
   2062             x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
   2063             x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
   2064 
   2065             if (sign_bias != cpi->common.ref_frame_sign_bias[this_ref_frame])
   2066             {
   2067                 sign_bias = cpi->common.ref_frame_sign_bias[this_ref_frame];
   2068                 mode_mv = mode_mv_sb[sign_bias];
   2069                 best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
   2070             }
   2071         }
   2072 
   2073         /* Check to see if the testing frequency for this mode is at its
   2074          * max If so then prevent it from being tested and increase the
   2075          * threshold for its testing
   2076          */
   2077         if (x->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1))
   2078         {
   2079             if (x->mbs_tested_so_far  <= cpi->mode_check_freq[mode_index] * x->mode_test_hit_counts[mode_index])
   2080             {
   2081                 /* Increase the threshold for coding this mode to make it
   2082                  * less likely to be chosen
   2083                  */
   2084                 x->rd_thresh_mult[mode_index] += 4;
   2085 
   2086                 if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
   2087                     x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
   2088 
   2089                 x->rd_threshes[mode_index] =
   2090                     (cpi->rd_baseline_thresh[mode_index] >> 7) *
   2091                     x->rd_thresh_mult[mode_index];
   2092 
   2093                 continue;
   2094             }
   2095         }
   2096 
   2097         /* We have now reached the point where we are going to test the
   2098          * current mode so increment the counter for the number of times
   2099          * it has been tested
   2100          */
   2101         x->mode_test_hit_counts[mode_index] ++;
   2102 
   2103         /* Experimental code. Special case for gf and arf zeromv modes.
   2104          * Increase zbin size to supress noise
   2105          */
   2106         if (x->zbin_mode_boost_enabled)
   2107         {
   2108             if ( this_ref_frame == INTRA_FRAME )
   2109                 x->zbin_mode_boost = 0;
   2110             else
   2111             {
   2112                 if (vp8_mode_order[mode_index] == ZEROMV)
   2113                 {
   2114                     if (this_ref_frame != LAST_FRAME)
   2115                         x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
   2116                     else
   2117                         x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
   2118                 }
   2119                 else if (vp8_mode_order[mode_index] == SPLITMV)
   2120                     x->zbin_mode_boost = 0;
   2121                 else
   2122                     x->zbin_mode_boost = MV_ZBIN_BOOST;
   2123             }
   2124 
   2125             vp8_update_zbin_extra(cpi, x);
   2126         }
   2127 
   2128         if(!uv_intra_done && this_ref_frame == INTRA_FRAME)
   2129         {
   2130             rd_pick_intra_mbuv_mode(x, &uv_intra_rate,
   2131                                     &uv_intra_rate_tokenonly,
   2132                                     &uv_intra_distortion);
   2133             uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
   2134 
   2135             /*
   2136              * Total of the eobs is used later to further adjust rate2. Since uv
   2137              * block's intra eobs will be overwritten when we check inter modes,
   2138              * we need to save uv_intra_tteob here.
   2139              */
   2140             for (i = 16; i < 24; i++)
   2141                 uv_intra_tteob += x->e_mbd.eobs[i];
   2142 
   2143             uv_intra_done = 1;
   2144         }
   2145 
   2146         switch (this_mode)
   2147         {
   2148         case B_PRED:
   2149         {
   2150             int tmp_rd;
   2151 
   2152             /* Note the rate value returned here includes the cost of
   2153              * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED]
   2154              */
   2155             int distortion;
   2156             tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion, best_mode.yrd);
   2157             rd.rate2 += rate;
   2158             rd.distortion2 += distortion;
   2159 
   2160             if(tmp_rd < best_mode.yrd)
   2161             {
   2162                 rd.rate2 += uv_intra_rate;
   2163                 rd.rate_uv = uv_intra_rate_tokenonly;
   2164                 rd.distortion2 += uv_intra_distortion;
   2165                 rd.distortion_uv = uv_intra_distortion;
   2166             }
   2167             else
   2168             {
   2169                 this_rd = INT_MAX;
   2170                 disable_skip = 1;
   2171             }
   2172         }
   2173         break;
   2174 
   2175         case SPLITMV:
   2176         {
   2177             int tmp_rd;
   2178             int this_rd_thresh;
   2179             int distortion;
   2180 
   2181             this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) ?
   2182                 x->rd_threshes[THR_NEW1] : x->rd_threshes[THR_NEW3];
   2183             this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) ?
   2184                 x->rd_threshes[THR_NEW2] : this_rd_thresh;
   2185 
   2186             tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
   2187                                                      best_mode.yrd, mdcounts,
   2188                                                      &rate, &rd.rate_y, &distortion, this_rd_thresh) ;
   2189 
   2190             rd.rate2 += rate;
   2191             rd.distortion2 += distortion;
   2192 
   2193             /* If even the 'Y' rd value of split is higher than best so far
   2194              * then dont bother looking at UV
   2195              */
   2196             if (tmp_rd < best_mode.yrd)
   2197             {
   2198                 /* Now work out UV cost and add it in */
   2199                 rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv, cpi->common.full_pixel);
   2200                 rd.rate2 += rd.rate_uv;
   2201                 rd.distortion2 += rd.distortion_uv;
   2202             }
   2203             else
   2204             {
   2205                 this_rd = INT_MAX;
   2206                 disable_skip = 1;
   2207             }
   2208         }
   2209         break;
   2210         case DC_PRED:
   2211         case V_PRED:
   2212         case H_PRED:
   2213         case TM_PRED:
   2214         {
   2215             int distortion;
   2216             x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
   2217 
   2218             vp8_build_intra_predictors_mby_s(xd,
   2219                                              xd->dst.y_buffer - xd->dst.y_stride,
   2220                                              xd->dst.y_buffer - 1,
   2221                                              xd->dst.y_stride,
   2222                                              xd->predictor,
   2223                                              16);
   2224             macro_block_yrd(x, &rd.rate_y, &distortion) ;
   2225             rd.rate2 += rd.rate_y;
   2226             rd.distortion2 += distortion;
   2227             rd.rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
   2228             rd.rate2 += uv_intra_rate;
   2229             rd.rate_uv = uv_intra_rate_tokenonly;
   2230             rd.distortion2 += uv_intra_distortion;
   2231             rd.distortion_uv = uv_intra_distortion;
   2232         }
   2233         break;
   2234 
   2235         case NEWMV:
   2236         {
   2237             int thissme;
   2238             int bestsme = INT_MAX;
   2239             int step_param = cpi->sf.first_step;
   2240             int further_steps;
   2241             int n;
   2242             int do_refine=1;   /* If last step (1-away) of n-step search doesn't pick the center point as the best match,
   2243                                   we will do a final 1-away diamond refining search  */
   2244 
   2245             int sadpb = x->sadperbit16;
   2246             int_mv mvp_full;
   2247 
   2248             int col_min = ((best_ref_mv.as_mv.col+7)>>3) - MAX_FULL_PEL_VAL;
   2249             int row_min = ((best_ref_mv.as_mv.row+7)>>3) - MAX_FULL_PEL_VAL;
   2250             int col_max = (best_ref_mv.as_mv.col>>3) + MAX_FULL_PEL_VAL;
   2251             int row_max = (best_ref_mv.as_mv.row>>3) + MAX_FULL_PEL_VAL;
   2252 
   2253             int tmp_col_min = x->mv_col_min;
   2254             int tmp_col_max = x->mv_col_max;
   2255             int tmp_row_min = x->mv_row_min;
   2256             int tmp_row_max = x->mv_row_max;
   2257 
   2258             if(!saddone)
   2259             {
   2260                 vp8_cal_sad(cpi,xd,x, recon_yoffset ,&near_sadidx[0] );
   2261                 saddone = 1;
   2262             }
   2263 
   2264             vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
   2265                         x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
   2266 
   2267             mvp_full.as_mv.col = mvp.as_mv.col>>3;
   2268             mvp_full.as_mv.row = mvp.as_mv.row>>3;
   2269 
   2270             /* Get intersection of UMV window and valid MV window to
   2271              * reduce # of checks in diamond search.
   2272              */
   2273             if (x->mv_col_min < col_min )
   2274                 x->mv_col_min = col_min;
   2275             if (x->mv_col_max > col_max )
   2276                 x->mv_col_max = col_max;
   2277             if (x->mv_row_min < row_min )
   2278                 x->mv_row_min = row_min;
   2279             if (x->mv_row_max > row_max )
   2280                 x->mv_row_max = row_max;
   2281 
   2282             /* adjust search range according to sr from mv prediction */
   2283             if(sr > step_param)
   2284                 step_param = sr;
   2285 
   2286             /* Initial step/diamond search */
   2287             {
   2288                 bestsme = cpi->diamond_search_sad(x, b, d, &mvp_full, &d->bmi.mv,
   2289                                         step_param, sadpb, &num00,
   2290                                         &cpi->fn_ptr[BLOCK_16X16],
   2291                                         x->mvcost, &best_ref_mv);
   2292                 mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
   2293 
   2294                 /* Further step/diamond searches as necessary */
   2295                 n = 0;
   2296                 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
   2297 
   2298                 n = num00;
   2299                 num00 = 0;
   2300 
   2301                 /* If there won't be more n-step search, check to see if refining search is needed. */
   2302                 if (n > further_steps)
   2303                     do_refine = 0;
   2304 
   2305                 while (n < further_steps)
   2306                 {
   2307                     n++;
   2308 
   2309                     if (num00)
   2310                         num00--;
   2311                     else
   2312                     {
   2313                         thissme = cpi->diamond_search_sad(x, b, d, &mvp_full,
   2314                                     &d->bmi.mv, step_param + n, sadpb, &num00,
   2315                                     &cpi->fn_ptr[BLOCK_16X16], x->mvcost,
   2316                                     &best_ref_mv);
   2317 
   2318                         /* check to see if refining search is needed. */
   2319                         if (num00 > (further_steps-n))
   2320                             do_refine = 0;
   2321 
   2322                         if (thissme < bestsme)
   2323                         {
   2324                             bestsme = thissme;
   2325                             mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
   2326                         }
   2327                         else
   2328                         {
   2329                             d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
   2330                         }
   2331                     }
   2332                 }
   2333             }
   2334 
   2335             /* final 1-away diamond refining search */
   2336             if (do_refine == 1)
   2337             {
   2338                 int search_range;
   2339 
   2340                 search_range = 8;
   2341 
   2342                 thissme = cpi->refining_search_sad(x, b, d, &d->bmi.mv, sadpb,
   2343                                        search_range, &cpi->fn_ptr[BLOCK_16X16],
   2344                                        x->mvcost, &best_ref_mv);
   2345 
   2346                 if (thissme < bestsme)
   2347                 {
   2348                     bestsme = thissme;
   2349                     mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
   2350                 }
   2351                 else
   2352                 {
   2353                     d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
   2354                 }
   2355             }
   2356 
   2357             x->mv_col_min = tmp_col_min;
   2358             x->mv_col_max = tmp_col_max;
   2359             x->mv_row_min = tmp_row_min;
   2360             x->mv_row_max = tmp_row_max;
   2361 
   2362             if (bestsme < INT_MAX)
   2363             {
   2364                 int dis; /* TODO: use dis in distortion calculation later. */
   2365                 unsigned int sse;
   2366                 cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv, &best_ref_mv,
   2367                                              x->errorperbit,
   2368                                              &cpi->fn_ptr[BLOCK_16X16],
   2369                                              x->mvcost, &dis, &sse);
   2370             }
   2371 
   2372             mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
   2373 
   2374             /* Add the new motion vector cost to our rolling cost variable */
   2375             rd.rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
   2376         }
   2377 
   2378         case NEARESTMV:
   2379         case NEARMV:
   2380             /* Clip "next_nearest" so that it does not extend to far out
   2381              * of image
   2382              */
   2383             vp8_clamp_mv2(&mode_mv[this_mode], xd);
   2384 
   2385             /* Do not bother proceeding if the vector (from newmv, nearest
   2386              * or near) is 0,0 as this should then be coded using the zeromv
   2387              * mode.
   2388              */
   2389             if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) && (mode_mv[this_mode].as_int == 0))
   2390                 continue;
   2391 
   2392         case ZEROMV:
   2393 
   2394             /* Trap vectors that reach beyond the UMV borders
   2395              * Note that ALL New MV, Nearest MV Near MV and Zero MV code
   2396              * drops through to this point because of the lack of break
   2397              * statements in the previous two cases.
   2398              */
   2399             if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
   2400                 ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max))
   2401                 continue;
   2402 
   2403             vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
   2404             this_rd = evaluate_inter_mode_rd(mdcounts, &rd,
   2405                                              &disable_skip, cpi, x);
   2406             break;
   2407 
   2408         default:
   2409             break;
   2410         }
   2411 
   2412         this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost,
   2413                                            disable_skip, uv_intra_tteob,
   2414                                            intra_rd_penalty, cpi, x);
   2415 
   2416         /* Keep record of best intra distortion */
   2417         if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
   2418             (this_rd < best_mode.intra_rd) )
   2419         {
   2420           best_mode.intra_rd = this_rd;
   2421             *returnintra = rd.distortion2 ;
   2422         }
   2423 #if CONFIG_TEMPORAL_DENOISING
   2424         if (cpi->oxcf.noise_sensitivity)
   2425         {
   2426             unsigned int sse;
   2427             vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&sse,
   2428                                    mode_mv[this_mode]);
   2429 
   2430             if (sse < best_rd_sse)
   2431                 best_rd_sse = sse;
   2432 
   2433             /* Store for later use by denoiser. */
   2434             if (this_mode == ZEROMV && sse < zero_mv_sse )
   2435             {
   2436                 zero_mv_sse = sse;
   2437                 x->best_zeromv_reference_frame =
   2438                         x->e_mbd.mode_info_context->mbmi.ref_frame;
   2439             }
   2440 
   2441             /* Store the best NEWMV in x for later use in the denoiser. */
   2442             if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
   2443                     sse < best_sse)
   2444             {
   2445                 best_sse = sse;
   2446                 vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&best_sse,
   2447                                        mode_mv[this_mode]);
   2448                 x->best_sse_inter_mode = NEWMV;
   2449                 x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
   2450                 x->need_to_clamp_best_mvs =
   2451                     x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
   2452                 x->best_reference_frame =
   2453                     x->e_mbd.mode_info_context->mbmi.ref_frame;
   2454             }
   2455         }
   2456 #endif
   2457 
   2458         /* Did this mode help.. i.i is it the new best mode */
   2459         if (this_rd < best_mode.rd || x->skip)
   2460         {
   2461             /* Note index of best mode so far */
   2462             best_mode_index = mode_index;
   2463             *returnrate = rd.rate2;
   2464             *returndistortion = rd.distortion2;
   2465             if (this_mode <= B_PRED)
   2466             {
   2467                 x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode;
   2468                 /* required for left and above block mv */
   2469                 x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
   2470             }
   2471             update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
   2472 
   2473 
   2474             /* Testing this mode gave rise to an improvement in best error
   2475              * score. Lower threshold a bit for next time
   2476              */
   2477             x->rd_thresh_mult[mode_index] =
   2478                 (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
   2479                     x->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
   2480         }
   2481 
   2482         /* If the mode did not help improve the best error case then raise
   2483          * the threshold for testing that mode next time around.
   2484          */
   2485         else
   2486         {
   2487             x->rd_thresh_mult[mode_index] += 4;
   2488 
   2489             if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
   2490                 x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
   2491         }
   2492         x->rd_threshes[mode_index] =
   2493             (cpi->rd_baseline_thresh[mode_index] >> 7) *
   2494                 x->rd_thresh_mult[mode_index];
   2495 
   2496         if (x->skip)
   2497             break;
   2498 
   2499     }
   2500 
   2501     /* Reduce the activation RD thresholds for the best choice mode */
   2502     if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2)))
   2503     {
   2504         int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 2);
   2505 
   2506         x->rd_thresh_mult[best_mode_index] =
   2507             (x->rd_thresh_mult[best_mode_index] >=
   2508                 (MIN_THRESHMULT + best_adjustment)) ?
   2509                     x->rd_thresh_mult[best_mode_index] - best_adjustment :
   2510                     MIN_THRESHMULT;
   2511         x->rd_threshes[best_mode_index] =
   2512             (cpi->rd_baseline_thresh[best_mode_index] >> 7) *
   2513                 x->rd_thresh_mult[best_mode_index];
   2514     }
   2515 
   2516 #if CONFIG_TEMPORAL_DENOISING
   2517     if (cpi->oxcf.noise_sensitivity)
   2518     {
   2519         if (x->best_sse_inter_mode == DC_PRED)
   2520         {
   2521             /* No best MV found. */
   2522             x->best_sse_inter_mode = best_mode.mbmode.mode;
   2523             x->best_sse_mv = best_mode.mbmode.mv;
   2524             x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
   2525             x->best_reference_frame = best_mode.mbmode.ref_frame;
   2526             best_sse = best_rd_sse;
   2527         }
   2528         vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
   2529                                 recon_yoffset, recon_uvoffset);
   2530 
   2531 
   2532         /* Reevaluate ZEROMV after denoising. */
   2533         if (best_mode.mbmode.ref_frame == INTRA_FRAME &&
   2534             x->best_zeromv_reference_frame != INTRA_FRAME)
   2535         {
   2536             int this_rd = INT_MAX;
   2537             int disable_skip = 0;
   2538             int other_cost = 0;
   2539             int this_ref_frame = x->best_zeromv_reference_frame;
   2540             rd.rate2 = x->ref_frame_cost[this_ref_frame] +
   2541                     vp8_cost_mv_ref(ZEROMV, mdcounts);
   2542             rd.distortion2 = 0;
   2543 
   2544             /* set up the proper prediction buffers for the frame */
   2545             x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
   2546             x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
   2547             x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
   2548             x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
   2549 
   2550             x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
   2551             x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
   2552             x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
   2553 
   2554             this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
   2555             this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost,
   2556                                                disable_skip, uv_intra_tteob,
   2557                                                intra_rd_penalty, cpi, x);
   2558             if (this_rd < best_mode.rd || x->skip)
   2559             {
   2560                 /* Note index of best mode so far */
   2561                 best_mode_index = mode_index;
   2562                 *returnrate = rd.rate2;
   2563                 *returndistortion = rd.distortion2;
   2564                 update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
   2565             }
   2566         }
   2567 
   2568     }
   2569 #endif
   2570 
   2571     if (cpi->is_src_frame_alt_ref &&
   2572         (best_mode.mbmode.mode != ZEROMV || best_mode.mbmode.ref_frame != ALTREF_FRAME))
   2573     {
   2574         x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
   2575         x->e_mbd.mode_info_context->mbmi.ref_frame = ALTREF_FRAME;
   2576         x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
   2577         x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
   2578         x->e_mbd.mode_info_context->mbmi.mb_skip_coeff =
   2579                                         (cpi->common.mb_no_coeff_skip);
   2580         x->e_mbd.mode_info_context->mbmi.partitioning = 0;
   2581         return;
   2582     }
   2583 
   2584 
   2585     /* macroblock modes */
   2586     vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO));
   2587 
   2588     if (best_mode.mbmode.mode == B_PRED)
   2589     {
   2590         for (i = 0; i < 16; i++)
   2591             xd->mode_info_context->bmi[i].as_mode = best_mode.bmodes[i].as_mode;
   2592     }
   2593 
   2594     if (best_mode.mbmode.mode == SPLITMV)
   2595     {
   2596         for (i = 0; i < 16; i++)
   2597             xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int;
   2598 
   2599         vpx_memcpy(x->partition_info, &best_mode.partition, sizeof(PARTITION_INFO));
   2600 
   2601         x->e_mbd.mode_info_context->mbmi.mv.as_int =
   2602                                       x->partition_info->bmi[15].mv.as_int;
   2603     }
   2604 
   2605     if (sign_bias
   2606         != cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame])
   2607         best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int;
   2608 
   2609     rd_update_mvcount(x, &best_ref_mv);
   2610 }
   2611 
   2612 void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate_)
   2613 {
   2614     int error4x4, error16x16;
   2615     int rate4x4, rate16x16 = 0, rateuv;
   2616     int dist4x4, dist16x16, distuv;
   2617     int rate;
   2618     int rate4x4_tokenonly = 0;
   2619     int rate16x16_tokenonly = 0;
   2620     int rateuv_tokenonly = 0;
   2621 
   2622     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
   2623 
   2624     rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv);
   2625     rate = rateuv;
   2626 
   2627     error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly,
   2628                                             &dist16x16);
   2629 
   2630     error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly,
   2631                                          &dist4x4, error16x16);
   2632 
   2633     if (error4x4 < error16x16)
   2634     {
   2635         x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
   2636         rate += rate4x4;
   2637     }
   2638     else
   2639     {
   2640         rate += rate16x16;
   2641     }
   2642 
   2643     *rate_ = rate;
   2644 }
   2645