Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include <stdio.h>
     13 #include <math.h>
     14 #include <limits.h>
     15 #include <assert.h>
     16 #include "vp8/common/pragmas.h"
     17 
     18 #include "tokenize.h"
     19 #include "treewriter.h"
     20 #include "onyx_int.h"
     21 #include "modecosts.h"
     22 #include "encodeintra.h"
     23 #include "vp8/common/entropymode.h"
     24 #include "vp8/common/reconinter.h"
     25 #include "vp8/common/reconintra.h"
     26 #include "vp8/common/reconintra4x4.h"
     27 #include "vp8/common/findnearmv.h"
     28 #include "encodemb.h"
     29 #include "quantize.h"
     30 #include "vp8/common/idct.h"
     31 #include "vp8/common/g_common.h"
     32 #include "variance.h"
     33 #include "mcomp.h"
     34 
     35 #include "vpx_mem/vpx_mem.h"
     36 #include "dct.h"
     37 #include "vp8/common/systemdependent.h"
     38 
     39 #if CONFIG_RUNTIME_CPU_DETECT
     40 #define IF_RTCD(x)  (x)
     41 #else
     42 #define IF_RTCD(x)  NULL
     43 #endif
     44 
     45 
     46 extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
     47 extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
     48 
     49 
     50 #define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
     51 
     52 #define MAXF(a,b)            (((a) > (b)) ? (a) : (b))
     53 
     54 
     55 
     56 static const int auto_speed_thresh[17] =
     57 {
     58     1000,
     59     200,
     60     150,
     61     130,
     62     150,
     63     125,
     64     120,
     65     115,
     66     115,
     67     115,
     68     115,
     69     115,
     70     115,
     71     115,
     72     115,
     73     115,
     74     105
     75 };
     76 
     77 const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES] =
     78 {
     79     ZEROMV,
     80     DC_PRED,
     81 
     82     NEARESTMV,
     83     NEARMV,
     84 
     85     ZEROMV,
     86     NEARESTMV,
     87 
     88     ZEROMV,
     89     NEARESTMV,
     90 
     91     NEARMV,
     92     NEARMV,
     93 
     94     V_PRED,
     95     H_PRED,
     96     TM_PRED,
     97 
     98     NEWMV,
     99     NEWMV,
    100     NEWMV,
    101 
    102     SPLITMV,
    103     SPLITMV,
    104     SPLITMV,
    105 
    106     B_PRED,
    107 };
    108 
    109 const MV_REFERENCE_FRAME vp8_ref_frame_order[MAX_MODES] =
    110 {
    111     LAST_FRAME,
    112     INTRA_FRAME,
    113 
    114     LAST_FRAME,
    115     LAST_FRAME,
    116 
    117     GOLDEN_FRAME,
    118     GOLDEN_FRAME,
    119 
    120     ALTREF_FRAME,
    121     ALTREF_FRAME,
    122 
    123     GOLDEN_FRAME,
    124     ALTREF_FRAME,
    125 
    126     INTRA_FRAME,
    127     INTRA_FRAME,
    128     INTRA_FRAME,
    129 
    130     LAST_FRAME,
    131     GOLDEN_FRAME,
    132     ALTREF_FRAME,
    133 
    134     LAST_FRAME,
    135     GOLDEN_FRAME,
    136     ALTREF_FRAME,
    137 
    138     INTRA_FRAME,
    139 };
    140 
    141 static void fill_token_costs(
    142     unsigned int c      [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens],
    143     const vp8_prob p    [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1]
    144 )
    145 {
    146     int i, j, k;
    147 
    148 
    149     for (i = 0; i < BLOCK_TYPES; i++)
    150         for (j = 0; j < COEF_BANDS; j++)
    151             for (k = 0; k < PREV_COEF_CONTEXTS; k++)
    152 
    153                 vp8_cost_tokens((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree);
    154 
    155 }
    156 
    157 static int rd_iifactor [ 32 ] =  {    4,   4,   3,   2,   1,   0,   0,   0,
    158                                       0,   0,   0,   0,   0,   0,   0,   0,
    159                                       0,   0,   0,   0,   0,   0,   0,   0,
    160                                       0,   0,   0,   0,   0,   0,   0,   0,
    161                                  };
    162 
    163 
    164 /* values are now correlated to quantizer */
    165 static int sad_per_bit16lut[QINDEX_RANGE] =
    166 {
    167     5,  5,  5,  5,  5,  5,  6,  6,
    168     6,  6,  6,  6,  6,  7,  7,  7,
    169     7,  7,  7,  7,  8,  8,  8,  8,
    170     8,  8,  8,  8,  8,  8,  9,  9,
    171     9,  9,  9,  9, 10, 10, 10, 10,
    172     10, 10, 11, 11, 11, 11, 11, 11,
    173     12, 12, 12, 12, 12, 12, 12, 13,
    174     13, 13, 13, 13, 13, 14, 14, 14,
    175     14, 14, 15, 15, 15, 15, 15, 15,
    176     16, 16, 16, 16, 16, 16, 17, 17,
    177     17, 17, 17, 17, 17, 18, 18, 18,
    178     18, 18, 19, 19, 19, 19, 19, 19,
    179     20, 20, 20, 21, 21, 21, 21, 22,
    180     22, 22, 23, 23, 23, 24, 24, 24,
    181     25, 25, 26, 26, 27, 27, 27, 28,
    182     28, 28, 29, 29, 30, 30, 31, 31
    183 };
    184 static int sad_per_bit4lut[QINDEX_RANGE] =
    185 {
    186     5,  5,  5,  5,  5,  5,  7,  7,
    187     7,  7,  7,  7,  7,  8,  8,  8,
    188     8,  8,  8,  8,  10, 10, 10, 10,
    189     10, 10, 10, 10, 10, 10, 11, 11,
    190     11, 11, 11, 11, 13, 13, 13, 13,
    191     13, 13, 14, 14, 14, 14, 14, 14,
    192     16, 16, 16, 16, 16, 16, 16, 17,
    193     17, 17, 17, 17, 17, 19, 19, 19,
    194     19, 19, 20, 20, 20, 20, 20, 20,
    195     22, 22, 22, 22, 22, 22, 23, 23,
    196     23, 23, 23, 23, 23, 25, 25, 25,
    197     25, 25, 26, 26, 26, 26, 26, 26,
    198     28, 28, 28, 29, 29, 29, 29, 31,
    199     31, 31, 32, 32, 32, 34, 34, 34,
    200     35, 35, 37, 37, 38, 38, 38, 40,
    201     40, 40, 41, 41, 43, 43, 44, 44,
    202 };
    203 
    204 void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex)
    205 {
    206     cpi->mb.sadperbit16 =  sad_per_bit16lut[QIndex];
    207     cpi->mb.sadperbit4  =  sad_per_bit4lut[QIndex];
    208 }
    209 
    210 void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
    211 {
    212     int q;
    213     int i;
    214     double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
    215     double rdconst = 3.00;
    216 
    217     vp8_clear_system_state();  //__asm emms;
    218 
    219     // Further tests required to see if optimum is different
    220     // for key frames, golden frames and arf frames.
    221     // if (cpi->common.refresh_golden_frame ||
    222     //     cpi->common.refresh_alt_ref_frame)
    223     cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
    224 
    225     // Extend rate multiplier along side quantizer zbin increases
    226     if (cpi->zbin_over_quant  > 0)
    227     {
    228         double oq_factor;
    229         double modq;
    230 
    231         // Experimental code using the same basic equation as used for Q above
    232         // The units of cpi->zbin_over_quant are 1/128 of Q bin size
    233         oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant);
    234         modq = (int)((double)capped_q * oq_factor);
    235         cpi->RDMULT = (int)(rdconst * (modq * modq));
    236     }
    237 
    238     if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME))
    239     {
    240         if (cpi->next_iiratio > 31)
    241             cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    242         else
    243             cpi->RDMULT += (cpi->RDMULT * rd_iifactor[cpi->next_iiratio]) >> 4;
    244     }
    245 
    246     cpi->mb.errorperbit = (cpi->RDMULT / 100);
    247     cpi->mb.errorperbit += (cpi->mb.errorperbit==0);
    248 
    249     vp8_set_speed_features(cpi);
    250 
    251     if (cpi->common.simpler_lpf)
    252         cpi->common.filter_type = SIMPLE_LOOPFILTER;
    253 
    254     q = (int)pow(Qvalue, 1.25);
    255 
    256     if (q < 8)
    257         q = 8;
    258 
    259     if (cpi->RDMULT > 1000)
    260     {
    261         cpi->RDDIV = 1;
    262         cpi->RDMULT /= 100;
    263 
    264         for (i = 0; i < MAX_MODES; i++)
    265         {
    266             if (cpi->sf.thresh_mult[i] < INT_MAX)
    267             {
    268                 cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
    269             }
    270             else
    271             {
    272                 cpi->rd_threshes[i] = INT_MAX;
    273             }
    274 
    275             cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
    276         }
    277     }
    278     else
    279     {
    280         cpi->RDDIV = 100;
    281 
    282         for (i = 0; i < MAX_MODES; i++)
    283         {
    284             if (cpi->sf.thresh_mult[i] < (INT_MAX / q))
    285             {
    286                 cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
    287             }
    288             else
    289             {
    290                 cpi->rd_threshes[i] = INT_MAX;
    291             }
    292 
    293             cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
    294         }
    295     }
    296 
    297     fill_token_costs(
    298         cpi->mb.token_costs,
    299         (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs
    300     );
    301 
    302     vp8_init_mode_costs(cpi);
    303 
    304 }
    305 
    306 void vp8_auto_select_speed(VP8_COMP *cpi)
    307 {
    308     int used = cpi->oxcf.cpu_used;
    309 
    310     int milliseconds_for_compress = (int)(1000000 / cpi->oxcf.frame_rate);
    311 
    312     milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
    313 
    314 #if 0
    315 
    316     if (0)
    317     {
    318         FILE *f;
    319 
    320         f = fopen("speed.stt", "a");
    321         fprintf(f, " %8ld %10ld %10ld %10ld\n",
    322                 cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time);
    323         fclose(f);
    324     }
    325 
    326 #endif
    327 
    328     /*
    329     // this is done during parameter valid check
    330     if( used > 16)
    331         used = 16;
    332     if( used < -16)
    333         used = -16;
    334     */
    335 
    336     if (cpi->avg_pick_mode_time < milliseconds_for_compress && (cpi->avg_encode_time - cpi->avg_pick_mode_time) < milliseconds_for_compress)
    337     {
    338         if (cpi->avg_pick_mode_time == 0)
    339         {
    340             cpi->Speed = 4;
    341         }
    342         else
    343         {
    344             if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95)
    345             {
    346                 cpi->Speed          += 2;
    347                 cpi->avg_pick_mode_time = 0;
    348                 cpi->avg_encode_time = 0;
    349 
    350                 if (cpi->Speed > 16)
    351                 {
    352                     cpi->Speed = 16;
    353                 }
    354             }
    355 
    356             if (milliseconds_for_compress * 100 > cpi->avg_encode_time * auto_speed_thresh[cpi->Speed])
    357             {
    358                 cpi->Speed          -= 1;
    359                 cpi->avg_pick_mode_time = 0;
    360                 cpi->avg_encode_time = 0;
    361 
    362                 // In real-time mode, cpi->speed is in [4, 16].
    363                 if (cpi->Speed < 4)        //if ( cpi->Speed < 0 )
    364                 {
    365                     cpi->Speed = 4;        //cpi->Speed = 0;
    366                 }
    367             }
    368         }
    369     }
    370     else
    371     {
    372         cpi->Speed += 4;
    373 
    374         if (cpi->Speed > 16)
    375             cpi->Speed = 16;
    376 
    377 
    378         cpi->avg_pick_mode_time = 0;
    379         cpi->avg_encode_time = 0;
    380     }
    381 }
    382 
    383 int vp8_block_error_c(short *coeff, short *dqcoeff)
    384 {
    385     int i;
    386     int error = 0;
    387 
    388     for (i = 0; i < 16; i++)
    389     {
    390         int this_diff = coeff[i] - dqcoeff[i];
    391         error += this_diff * this_diff;
    392     }
    393 
    394     return error;
    395 }
    396 
    397 int vp8_mbblock_error_c(MACROBLOCK *mb, int dc)
    398 {
    399     BLOCK  *be;
    400     BLOCKD *bd;
    401     int i, j;
    402     int berror, error = 0;
    403 
    404     for (i = 0; i < 16; i++)
    405     {
    406         be = &mb->block[i];
    407         bd = &mb->e_mbd.block[i];
    408 
    409         berror = 0;
    410 
    411         for (j = dc; j < 16; j++)
    412         {
    413             int this_diff = be->coeff[j] - bd->dqcoeff[j];
    414             berror += this_diff * this_diff;
    415         }
    416 
    417         error += berror;
    418     }
    419 
    420     return error;
    421 }
    422 
    423 int vp8_mbuverror_c(MACROBLOCK *mb)
    424 {
    425 
    426     BLOCK  *be;
    427     BLOCKD *bd;
    428 
    429 
    430     int i;
    431     int error = 0;
    432 
    433     for (i = 16; i < 24; i++)
    434     {
    435         be = &mb->block[i];
    436         bd = &mb->e_mbd.block[i];
    437 
    438         error += vp8_block_error_c(be->coeff, bd->dqcoeff);
    439     }
    440 
    441     return error;
    442 }
    443 
    444 int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd)
    445 {
    446     unsigned char *uptr, *vptr;
    447     unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
    448     unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
    449     int uv_stride = x->block[16].src_stride;
    450 
    451     unsigned int sse1 = 0;
    452     unsigned int sse2 = 0;
    453     int mv_row;
    454     int mv_col;
    455     int offset;
    456     int pre_stride = x->e_mbd.block[16].pre_stride;
    457 
    458     vp8_build_uvmvs(&x->e_mbd, 0);
    459     mv_row = x->e_mbd.block[16].bmi.mv.as_mv.row;
    460     mv_col = x->e_mbd.block[16].bmi.mv.as_mv.col;
    461 
    462     offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
    463     uptr = x->e_mbd.pre.u_buffer + offset;
    464     vptr = x->e_mbd.pre.v_buffer + offset;
    465 
    466     if ((mv_row | mv_col) & 7)
    467     {
    468         VARIANCE_INVOKE(rtcd, subpixvar8x8)(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, uv_stride, &sse2);
    469         VARIANCE_INVOKE(rtcd, subpixvar8x8)(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, uv_stride, &sse1);
    470         sse2 += sse1;
    471     }
    472     else
    473     {
    474         VARIANCE_INVOKE(rtcd, subpixvar8x8)(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, uv_stride, &sse2);
    475         VARIANCE_INVOKE(rtcd, subpixvar8x8)(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, uv_stride, &sse1);
    476         sse2 += sse1;
    477     }
    478 
    479     return sse2;
    480 
    481 }
    482 
    483 #if !(CONFIG_REALTIME_ONLY)
    484 static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
    485 {
    486     int c = !type;              /* start at coef 0, unless Y with Y2 */
    487     int eob = b->eob;
    488     int pt ;    /* surrounding block/prev coef predictor */
    489     int cost = 0;
    490     short *qcoeff_ptr = b->qcoeff;
    491 
    492     VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
    493 
    494 # define QC( I)  ( qcoeff_ptr [vp8_default_zig_zag1d[I]] )
    495 
    496     for (; c < eob; c++)
    497     {
    498         int v = QC(c);
    499         int t = vp8_dct_value_tokens_ptr[v].Token;
    500         cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [t];
    501         cost += vp8_dct_value_cost_ptr[v];
    502         pt = vp8_prev_token_class[t];
    503     }
    504 
    505 # undef QC
    506 
    507     if (c < 16)
    508         cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN];
    509 
    510     pt = (c != !type); // is eob first coefficient;
    511     *a = *l = pt;
    512 
    513     return cost;
    514 }
    515 
    516 static int vp8_rdcost_mby(MACROBLOCK *mb)
    517 {
    518     int cost = 0;
    519     int b;
    520     MACROBLOCKD *x = &mb->e_mbd;
    521     ENTROPY_CONTEXT_PLANES t_above, t_left;
    522     ENTROPY_CONTEXT *ta;
    523     ENTROPY_CONTEXT *tl;
    524 
    525     vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    526     vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    527 
    528     ta = (ENTROPY_CONTEXT *)&t_above;
    529     tl = (ENTROPY_CONTEXT *)&t_left;
    530 
    531     for (b = 0; b < 16; b++)
    532         cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_NO_DC,
    533                     ta + vp8_block2above[b], tl + vp8_block2left[b]);
    534 
    535     cost += cost_coeffs(mb, x->block + 24, PLANE_TYPE_Y2,
    536                 ta + vp8_block2above[24], tl + vp8_block2left[24]);
    537 
    538     return cost;
    539 }
    540 
    541 static void macro_block_yrd( MACROBLOCK *mb,
    542                              int *Rate,
    543                              int *Distortion,
    544                              const vp8_encodemb_rtcd_vtable_t *rtcd)
    545 {
    546     int b;
    547     MACROBLOCKD *const x = &mb->e_mbd;
    548     BLOCK   *const mb_y2 = mb->block + 24;
    549     BLOCKD *const x_y2  = x->block + 24;
    550     short *Y2DCPtr = mb_y2->src_diff;
    551     BLOCK *beptr;
    552     int d;
    553 
    554     ENCODEMB_INVOKE(rtcd, submby)( mb->src_diff, mb->src.y_buffer,
    555                                    mb->e_mbd.predictor, mb->src.y_stride );
    556 
    557     // Fdct and building the 2nd order block
    558     for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
    559     {
    560         mb->vp8_short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
    561         *Y2DCPtr++ = beptr->coeff[0];
    562         *Y2DCPtr++ = beptr->coeff[16];
    563     }
    564 
    565     // 2nd order fdct
    566     mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
    567 
    568     // Quantization
    569     for (b = 0; b < 16; b++)
    570     {
    571         mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
    572     }
    573 
    574     // DC predication and Quantization of 2nd Order block
    575     mb->quantize_b(mb_y2, x_y2);
    576 
    577     // Distortion
    578     d = ENCODEMB_INVOKE(rtcd, mberr)(mb, 1) << 2;
    579     d += ENCODEMB_INVOKE(rtcd, berr)(mb_y2->coeff, x_y2->dqcoeff);
    580 
    581     *Distortion = (d >> 4);
    582 
    583     // rate
    584     *Rate = vp8_rdcost_mby(mb);
    585 }
    586 
    587 static void copy_predictor(unsigned char *dst, const unsigned char *predictor)
    588 {
    589     const unsigned int *p = (const unsigned int *)predictor;
    590     unsigned int *d = (unsigned int *)dst;
    591     d[0] = p[0];
    592     d[4] = p[4];
    593     d[8] = p[8];
    594     d[12] = p[12];
    595 }
    596 static int rd_pick_intra4x4block(
    597     VP8_COMP *cpi,
    598     MACROBLOCK *x,
    599     BLOCK *be,
    600     BLOCKD *b,
    601     B_PREDICTION_MODE *best_mode,
    602     unsigned int *bmode_costs,
    603     ENTROPY_CONTEXT *a,
    604     ENTROPY_CONTEXT *l,
    605 
    606     int *bestrate,
    607     int *bestratey,
    608     int *bestdistortion)
    609 {
    610     B_PREDICTION_MODE mode;
    611     int best_rd = INT_MAX;
    612     int rate = 0;
    613     int distortion;
    614 
    615     ENTROPY_CONTEXT ta = *a, tempa = *a;
    616     ENTROPY_CONTEXT tl = *l, templ = *l;
    617     /*
    618      * The predictor buffer is a 2d buffer with a stride of 16.  Create
    619      * a temp buffer that meets the stride requirements, but we are only
    620      * interested in the left 4x4 block
    621      * */
    622     DECLARE_ALIGNED_ARRAY(16, unsigned char,  best_predictor, 16*4);
    623     DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16);
    624 
    625     for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++)
    626     {
    627         int this_rd;
    628         int ratey;
    629 
    630         rate = bmode_costs[mode];
    631 
    632         vp8_predict_intra4x4(b, mode, b->predictor);
    633         ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), subb)(be, b, 16);
    634         x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
    635         x->quantize_b(be, b);
    636 
    637         tempa = ta;
    638         templ = tl;
    639 
    640         ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ);
    641         rate += ratey;
    642         distortion = ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), berr)(be->coeff, b->dqcoeff) >> 2;
    643 
    644         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
    645 
    646         if (this_rd < best_rd)
    647         {
    648             *bestrate = rate;
    649             *bestratey = ratey;
    650             *bestdistortion = distortion;
    651             best_rd = this_rd;
    652             *best_mode = mode;
    653             *a = tempa;
    654             *l = templ;
    655             copy_predictor(best_predictor, b->predictor);
    656             vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
    657         }
    658     }
    659 
    660     b->bmi.mode = (B_PREDICTION_MODE)(*best_mode);
    661 
    662     IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(best_dqcoeff, b->diff, 32);
    663     RECON_INVOKE(IF_RTCD(&cpi->rtcd.common->recon), recon)(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
    664 
    665     return best_rd;
    666 }
    667 
    668 int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
    669                                   int *rate_y, int *Distortion, int best_rd)
    670 {
    671     MACROBLOCKD *const xd = &mb->e_mbd;
    672     int i;
    673     int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
    674     int distortion = 0;
    675     int tot_rate_y = 0;
    676     long long total_rd = 0;
    677     ENTROPY_CONTEXT_PLANES t_above, t_left;
    678     ENTROPY_CONTEXT *ta;
    679     ENTROPY_CONTEXT *tl;
    680     unsigned int *bmode_costs;
    681 
    682     vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    683     vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    684 
    685     ta = (ENTROPY_CONTEXT *)&t_above;
    686     tl = (ENTROPY_CONTEXT *)&t_left;
    687 
    688     vp8_intra_prediction_down_copy(xd);
    689 
    690     bmode_costs = mb->inter_bmode_costs;
    691 
    692     for (i = 0; i < 16; i++)
    693     {
    694         MODE_INFO *const mic = xd->mode_info_context;
    695         const int mis = xd->mode_info_stride;
    696         B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
    697         int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
    698 
    699         if (mb->e_mbd.frame_type == KEY_FRAME)
    700         {
    701             const B_PREDICTION_MODE A = vp8_above_bmi(mic, i, mis)->mode;
    702             const B_PREDICTION_MODE L = vp8_left_bmi(mic, i)->mode;
    703 
    704             bmode_costs  = mb->bmode_costs[A][L];
    705         }
    706 
    707         total_rd += rd_pick_intra4x4block(
    708             cpi, mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
    709             ta + vp8_block2above[i],
    710             tl + vp8_block2left[i], &r, &ry, &d);
    711 
    712         cost += r;
    713         distortion += d;
    714         tot_rate_y += ry;
    715         mic->bmi[i].mode = xd->block[i].bmi.mode = best_mode;
    716 
    717         if(total_rd >= (long long)best_rd)
    718             break;
    719     }
    720 
    721     if(total_rd >= (long long)best_rd)
    722         return INT_MAX;
    723 
    724     *Rate = cost;
    725     *rate_y += tot_rate_y;
    726     *Distortion = distortion;
    727 
    728     return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
    729 }
    730 int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
    731                                    MACROBLOCK *x,
    732                                    int *Rate,
    733                                    int *rate_y,
    734                                    int *Distortion)
    735 {
    736     MB_PREDICTION_MODE mode;
    737     MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
    738     int rate, ratey;
    739     int distortion;
    740     int best_rd = INT_MAX;
    741     int this_rd;
    742 
    743     //Y Search for 16x16 intra prediction mode
    744     for (mode = DC_PRED; mode <= TM_PRED; mode++)
    745     {
    746         x->e_mbd.mode_info_context->mbmi.mode = mode;
    747 
    748         RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
    749             (&x->e_mbd);
    750 
    751         macro_block_yrd(x, &ratey, &distortion, IF_RTCD(&cpi->rtcd.encodemb));
    752         rate = ratey + x->mbmode_cost[x->e_mbd.frame_type]
    753                                      [x->e_mbd.mode_info_context->mbmi.mode];
    754 
    755         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
    756 
    757         if (this_rd < best_rd)
    758         {
    759             mode_selected = mode;
    760             best_rd = this_rd;
    761             *Rate = rate;
    762             *rate_y = ratey;
    763             *Distortion = distortion;
    764         }
    765     }
    766 
    767     x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
    768     return best_rd;
    769 }
    770 
    771 static int rd_cost_mbuv(MACROBLOCK *mb)
    772 {
    773     int b;
    774     int cost = 0;
    775     MACROBLOCKD *x = &mb->e_mbd;
    776     ENTROPY_CONTEXT_PLANES t_above, t_left;
    777     ENTROPY_CONTEXT *ta;
    778     ENTROPY_CONTEXT *tl;
    779 
    780     vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    781     vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    782 
    783     ta = (ENTROPY_CONTEXT *)&t_above;
    784     tl = (ENTROPY_CONTEXT *)&t_left;
    785 
    786     for (b = 16; b < 24; b++)
    787         cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_UV,
    788                     ta + vp8_block2above[b], tl + vp8_block2left[b]);
    789 
    790     return cost;
    791 }
    792 
    793 
    794 static int vp8_rd_inter_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int fullpixel)
    795 {
    796     vp8_build_uvmvs(&x->e_mbd, fullpixel);
    797     vp8_encode_inter16x16uvrd(IF_RTCD(&cpi->rtcd), x);
    798 
    799 
    800     *rate       = rd_cost_mbuv(x);
    801     *distortion = ENCODEMB_INVOKE(&cpi->rtcd.encodemb, mbuverr)(x) / 4;
    802 
    803     return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
    804 }
    805 
    806 int vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion)
    807 {
    808     MB_PREDICTION_MODE mode;
    809     MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
    810     int best_rd = INT_MAX;
    811     int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r);
    812     int rate_to;
    813 
    814     for (mode = DC_PRED; mode <= TM_PRED; mode++)
    815     {
    816         int rate;
    817         int distortion;
    818         int this_rd;
    819 
    820         x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
    821         vp8_build_intra_predictors_mbuv(&x->e_mbd);
    822         ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), submbuv)(x->src_diff,
    823                       x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor,
    824                       x->src.uv_stride);
    825         vp8_transform_mbuv(x);
    826         vp8_quantize_mbuv(x);
    827 
    828         rate_to = rd_cost_mbuv(x);
    829         rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.uv_mode];
    830 
    831         distortion = ENCODEMB_INVOKE(&cpi->rtcd.encodemb, mbuverr)(x) / 4;
    832 
    833         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
    834 
    835         if (this_rd < best_rd)
    836         {
    837             best_rd = this_rd;
    838             d = distortion;
    839             r = rate;
    840             *rate_tokenonly = rate_to;
    841             mode_selected = mode;
    842         }
    843     }
    844 
    845     *rate = r;
    846     *distortion = d;
    847 
    848     x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
    849     return best_rd;
    850 }
    851 #endif
    852 
    853 int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4])
    854 {
    855     vp8_prob p [VP8_MVREFS-1];
    856     assert(NEARESTMV <= m  &&  m <= SPLITMV);
    857     vp8_mv_ref_probs(p, near_mv_ref_ct);
    858     return vp8_cost_token(vp8_mv_ref_tree, p,
    859                           vp8_mv_ref_encoding_array - NEARESTMV + m);
    860 }
    861 
    862 void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv)
    863 {
    864     int i;
    865 
    866     x->e_mbd.mode_info_context->mbmi.mode = mb;
    867     x->e_mbd.mode_info_context->mbmi.mv.as_mv.row = mv->row;
    868     x->e_mbd.mode_info_context->mbmi.mv.as_mv.col = mv->col;
    869 
    870     for (i = 0; i < 16; i++)
    871     {
    872         B_MODE_INFO *bmi = &x->e_mbd.block[i].bmi;
    873         bmi->mode = (B_PREDICTION_MODE) mb;
    874         bmi->mv.as_mv.row = mv->row;
    875         bmi->mv.as_mv.col = mv->col;
    876     }
    877 }
    878 
    879 #if !(CONFIG_REALTIME_ONLY)
    880 static int labels2mode(
    881     MACROBLOCK *x,
    882     int const *labelings, int which_label,
    883     B_PREDICTION_MODE this_mode,
    884     MV *this_mv, MV *best_ref_mv,
    885     int *mvcost[2]
    886 )
    887 {
    888     MACROBLOCKD *const xd = & x->e_mbd;
    889     MODE_INFO *const mic = xd->mode_info_context;
    890     const int mis = xd->mode_info_stride;
    891 
    892     int cost = 0;
    893     int thismvcost = 0;
    894 
    895     /* We have to be careful retrieving previously-encoded motion vectors.
    896        Ones from this macroblock have to be pulled from the BLOCKD array
    897        as they have not yet made it to the bmi array in our MB_MODE_INFO. */
    898 
    899     int i = 0;
    900 
    901     do
    902     {
    903         BLOCKD *const d = xd->block + i;
    904         const int row = i >> 2,  col = i & 3;
    905 
    906         B_PREDICTION_MODE m;
    907 
    908         if (labelings[i] != which_label)
    909             continue;
    910 
    911         if (col  &&  labelings[i] == labelings[i-1])
    912             m = LEFT4X4;
    913         else if (row  &&  labelings[i] == labelings[i-4])
    914             m = ABOVE4X4;
    915         else
    916         {
    917             // the only time we should do costing for new motion vector or mode
    918             // is when we are on a new label  (jbb May 08, 2007)
    919             switch (m = this_mode)
    920             {
    921             case NEW4X4 :
    922                 thismvcost  = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost, 102);
    923                 break;
    924             case LEFT4X4:
    925                 *this_mv = col ? d[-1].bmi.mv.as_mv : vp8_left_bmi(mic, i)->mv.as_mv;
    926                 break;
    927             case ABOVE4X4:
    928                 *this_mv = row ? d[-4].bmi.mv.as_mv : vp8_above_bmi(mic, i, mis)->mv.as_mv;
    929                 break;
    930             case ZERO4X4:
    931                 this_mv->row = this_mv->col = 0;
    932                 break;
    933             default:
    934                 break;
    935             }
    936 
    937             if (m == ABOVE4X4)  // replace above with left if same
    938             {
    939                 const MV mv = col ? d[-1].bmi.mv.as_mv : vp8_left_bmi(mic, i)->mv.as_mv;
    940 
    941                 if (mv.row == this_mv->row  &&  mv.col == this_mv->col)
    942                     m = LEFT4X4;
    943             }
    944 
    945             cost = x->inter_bmode_costs[ m];
    946         }
    947 
    948         d->bmi.mode = m;
    949         d->bmi.mv.as_mv = *this_mv;
    950 
    951     }
    952     while (++i < 16);
    953 
    954     cost += thismvcost ;
    955     return cost;
    956 }
    957 
    958 static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
    959                               int which_label, ENTROPY_CONTEXT *ta,
    960                               ENTROPY_CONTEXT *tl)
    961 {
    962     int cost = 0;
    963     int b;
    964     MACROBLOCKD *x = &mb->e_mbd;
    965 
    966     for (b = 0; b < 16; b++)
    967         if (labels[ b] == which_label)
    968             cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_WITH_DC,
    969                                 ta + vp8_block2above[b],
    970                                 tl + vp8_block2left[b]);
    971 
    972     return cost;
    973 
    974 }
    975 static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels, int which_label, const vp8_encodemb_rtcd_vtable_t *rtcd)
    976 {
    977     int i;
    978     unsigned int distortion = 0;
    979 
    980     for (i = 0; i < 16; i++)
    981     {
    982         if (labels[i] == which_label)
    983         {
    984             BLOCKD *bd = &x->e_mbd.block[i];
    985             BLOCK *be = &x->block[i];
    986 
    987 
    988             vp8_build_inter_predictors_b(bd, 16, x->e_mbd.subpixel_predict);
    989             ENCODEMB_INVOKE(rtcd, subb)(be, bd, 16);
    990             x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
    991 
    992             // set to 0 no way to account for 2nd order DC so discount
    993             //be->coeff[0] = 0;
    994             x->quantize_b(be, bd);
    995 
    996             distortion += ENCODEMB_INVOKE(rtcd, berr)(be->coeff, bd->dqcoeff);
    997         }
    998     }
    999 
   1000     return distortion;
   1001 }
   1002 
   1003 
   1004 static const unsigned int segmentation_to_sseshift[4] = {3, 3, 2, 0};
   1005 
   1006 
   1007 typedef struct
   1008 {
   1009   MV *ref_mv;
   1010   MV *mvp;
   1011 
   1012   int segment_rd;
   1013   int segment_num;
   1014   int r;
   1015   int d;
   1016   int segment_yrate;
   1017   B_PREDICTION_MODE modes[16];
   1018   int_mv mvs[16];
   1019   unsigned char eobs[16];
   1020 
   1021   int mvthresh;
   1022   int *mdcounts;
   1023 
   1024   MV sv_mvp[4];     // save 4 mvp from 8x8
   1025   int sv_istep[2];  // save 2 initial step_param for 16x8/8x16
   1026 
   1027 } BEST_SEG_INFO;
   1028 
   1029 
   1030 static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
   1031                              BEST_SEG_INFO *bsi, unsigned int segmentation)
   1032 {
   1033     int i;
   1034     int const *labels;
   1035     int br = 0;
   1036     int bd = 0;
   1037     B_PREDICTION_MODE this_mode;
   1038 
   1039 
   1040     int label_count;
   1041     int this_segment_rd = 0;
   1042     int label_mv_thresh;
   1043     int rate = 0;
   1044     int sbr = 0;
   1045     int sbd = 0;
   1046     int segmentyrate = 0;
   1047 
   1048     vp8_variance_fn_ptr_t *v_fn_ptr;
   1049 
   1050     ENTROPY_CONTEXT_PLANES t_above, t_left;
   1051     ENTROPY_CONTEXT *ta;
   1052     ENTROPY_CONTEXT *tl;
   1053     ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
   1054     ENTROPY_CONTEXT *ta_b;
   1055     ENTROPY_CONTEXT *tl_b;
   1056 
   1057     vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
   1058     vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
   1059 
   1060     ta = (ENTROPY_CONTEXT *)&t_above;
   1061     tl = (ENTROPY_CONTEXT *)&t_left;
   1062     ta_b = (ENTROPY_CONTEXT *)&t_above_b;
   1063     tl_b = (ENTROPY_CONTEXT *)&t_left_b;
   1064 
   1065     br = 0;
   1066     bd = 0;
   1067 
   1068     v_fn_ptr = &cpi->fn_ptr[segmentation];
   1069     labels = vp8_mbsplits[segmentation];
   1070     label_count = vp8_mbsplit_count[segmentation];
   1071 
   1072     // 64 makes this threshold really big effectively
   1073     // making it so that we very rarely check mvs on
   1074     // segments.   setting this to 1 would make mv thresh
   1075     // roughly equal to what it is for macroblocks
   1076     label_mv_thresh = 1 * bsi->mvthresh / label_count ;
   1077 
   1078     // Segmentation method overheads
   1079     rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation);
   1080     rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts);
   1081     this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
   1082     br += rate;
   1083 
   1084     for (i = 0; i < label_count; i++)
   1085     {
   1086         MV mode_mv[B_MODE_COUNT];
   1087         int best_label_rd = INT_MAX;
   1088         B_PREDICTION_MODE mode_selected = ZERO4X4;
   1089         int bestlabelyrate = 0;
   1090 
   1091         // search for the best motion vector on this segment
   1092         for (this_mode = LEFT4X4; this_mode <= NEW4X4 ; this_mode ++)
   1093         {
   1094             int this_rd;
   1095             int distortion;
   1096             int labelyrate;
   1097             ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
   1098             ENTROPY_CONTEXT *ta_s;
   1099             ENTROPY_CONTEXT *tl_s;
   1100 
   1101             vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
   1102             vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
   1103 
   1104             ta_s = (ENTROPY_CONTEXT *)&t_above_s;
   1105             tl_s = (ENTROPY_CONTEXT *)&t_left_s;
   1106 
   1107             if (this_mode == NEW4X4)
   1108             {
   1109                 int sseshift;
   1110                 int num00;
   1111                 int step_param = 0;
   1112                 int further_steps;
   1113                 int n;
   1114                 int thissme;
   1115                 int bestsme = INT_MAX;
   1116                 MV  temp_mv;
   1117                 BLOCK *c;
   1118                 BLOCKD *e;
   1119 
   1120                 // Is the best so far sufficiently good that we cant justify doing and new motion search.
   1121                 if (best_label_rd < label_mv_thresh)
   1122                     break;
   1123 
   1124                 if(cpi->compressor_speed)
   1125                 {
   1126                     if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8)
   1127                     {
   1128                         bsi->mvp = &bsi->sv_mvp[i];
   1129                         if (i==1 && segmentation == BLOCK_16X8) bsi->mvp = &bsi->sv_mvp[2];
   1130 
   1131                         step_param = bsi->sv_istep[i];
   1132                     }
   1133 
   1134                     // use previous block's result as next block's MV predictor.
   1135                     if (segmentation == BLOCK_4X4 && i>0)
   1136                     {
   1137                         bsi->mvp = &(x->e_mbd.block[i-1].bmi.mv.as_mv);
   1138                         if (i==4 || i==8 || i==12) bsi->mvp = &(x->e_mbd.block[i-4].bmi.mv.as_mv);
   1139                         step_param = 2;
   1140                     }
   1141                 }
   1142 
   1143                 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
   1144 
   1145                 {
   1146                     int sadpb = x->sadperbit4;
   1147 
   1148                     // find first label
   1149                     n = vp8_mbsplit_offset[segmentation][i];
   1150 
   1151                     c = &x->block[n];
   1152                     e = &x->e_mbd.block[n];
   1153 
   1154                     if (cpi->sf.search_method == HEX)
   1155                         bestsme = vp8_hex_search(x, c, e, bsi->ref_mv,
   1156                                                  &mode_mv[NEW4X4], step_param, sadpb, &num00, v_fn_ptr, x->mvsadcost, x->mvcost, bsi->ref_mv);
   1157 
   1158                     else
   1159                     {
   1160                         bestsme = cpi->diamond_search_sad(x, c, e, bsi->mvp,
   1161                                                           &mode_mv[NEW4X4], step_param,
   1162                                                           sadpb / 2, &num00, v_fn_ptr, x->mvsadcost, x->mvcost, bsi->ref_mv);
   1163 
   1164                         n = num00;
   1165                         num00 = 0;
   1166 
   1167                         while (n < further_steps)
   1168                         {
   1169                             n++;
   1170 
   1171                             if (num00)
   1172                                 num00--;
   1173                             else
   1174                             {
   1175                                 thissme = cpi->diamond_search_sad(x, c, e, bsi->mvp,
   1176                                                                   &temp_mv, step_param + n,
   1177                                                                   sadpb / 2, &num00, v_fn_ptr, x->mvsadcost, x->mvcost, bsi->ref_mv);
   1178 
   1179                                 if (thissme < bestsme)
   1180                                 {
   1181                                     bestsme = thissme;
   1182                                     mode_mv[NEW4X4].row = temp_mv.row;
   1183                                     mode_mv[NEW4X4].col = temp_mv.col;
   1184                                 }
   1185                             }
   1186                         }
   1187                     }
   1188 
   1189                     sseshift = segmentation_to_sseshift[segmentation];
   1190 
   1191                     // Should we do a full search (best quality only)
   1192                     if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000)
   1193                     {
   1194                         thissme = cpi->full_search_sad(x, c, e, bsi->mvp,
   1195                                                        sadpb / 4, 16, v_fn_ptr, x->mvcost, x->mvsadcost,bsi->ref_mv);
   1196 
   1197                         if (thissme < bestsme)
   1198                         {
   1199                             bestsme = thissme;
   1200                             mode_mv[NEW4X4] = e->bmi.mv.as_mv;
   1201                         }
   1202                         else
   1203                         {
   1204                             // The full search result is actually worse so re-instate the previous best vector
   1205                             e->bmi.mv.as_mv = mode_mv[NEW4X4];
   1206                         }
   1207                     }
   1208                 }
   1209 
   1210                 if (bestsme < INT_MAX)
   1211                 {
   1212                     if (!cpi->common.full_pixel)
   1213                         cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
   1214                                                      bsi->ref_mv, x->errorperbit / 2, v_fn_ptr, x->mvcost);
   1215                     else
   1216                         vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
   1217                                                     bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost);
   1218                 }
   1219             } /* NEW4X4 */
   1220 
   1221             rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
   1222                                bsi->ref_mv, x->mvcost);
   1223 
   1224             // Trap vectors that reach beyond the UMV borders
   1225             if (((mode_mv[this_mode].row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].row >> 3) > x->mv_row_max) ||
   1226                 ((mode_mv[this_mode].col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].col >> 3) > x->mv_col_max))
   1227             {
   1228                 continue;
   1229             }
   1230 
   1231             distortion = vp8_encode_inter_mb_segment(x, labels, i, IF_RTCD(&cpi->rtcd.encodemb)) / 4;
   1232 
   1233             labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
   1234             rate += labelyrate;
   1235 
   1236             this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
   1237 
   1238             if (this_rd < best_label_rd)
   1239             {
   1240                 sbr = rate;
   1241                 sbd = distortion;
   1242                 bestlabelyrate = labelyrate;
   1243                 mode_selected = this_mode;
   1244                 best_label_rd = this_rd;
   1245 
   1246                 vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
   1247                 vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
   1248 
   1249             }
   1250         } /*for each 4x4 mode*/
   1251 
   1252         vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
   1253         vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
   1254 
   1255         labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
   1256                     bsi->ref_mv, x->mvcost);
   1257 
   1258         br += sbr;
   1259         bd += sbd;
   1260         segmentyrate += bestlabelyrate;
   1261         this_segment_rd += best_label_rd;
   1262 
   1263         if (this_segment_rd >= bsi->segment_rd)
   1264             break;
   1265 
   1266     } /* for each label */
   1267 
   1268     if (this_segment_rd < bsi->segment_rd)
   1269     {
   1270         bsi->r = br;
   1271         bsi->d = bd;
   1272         bsi->segment_yrate = segmentyrate;
   1273         bsi->segment_rd = this_segment_rd;
   1274         bsi->segment_num = segmentation;
   1275 
   1276         // store everything needed to come back to this!!
   1277         for (i = 0; i < 16; i++)
   1278         {
   1279             BLOCKD *bd = &x->e_mbd.block[i];
   1280 
   1281             bsi->mvs[i].as_mv = bd->bmi.mv.as_mv;
   1282             bsi->modes[i] = bd->bmi.mode;
   1283             bsi->eobs[i] = bd->eob;
   1284         }
   1285     }
   1286 }
   1287 
   1288 static __inline
   1289 void vp8_cal_step_param(int sr, int *sp)
   1290 {
   1291     int step = 0;
   1292 
   1293     if (sr > MAX_FIRST_STEP) sr = MAX_FIRST_STEP;
   1294     else if (sr < 1) sr = 1;
   1295 
   1296     while (sr>>=1)
   1297         step++;
   1298 
   1299     *sp = MAX_MVSEARCH_STEPS - 1 - step;
   1300 }
   1301 
   1302 static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
   1303                                            MV *best_ref_mv, int best_rd,
   1304                                            int *mdcounts, int *returntotrate,
   1305                                            int *returnyrate, int *returndistortion,
   1306                                            int mvthresh)
   1307 {
   1308     int i;
   1309     BEST_SEG_INFO bsi;
   1310 
   1311     vpx_memset(&bsi, 0, sizeof(bsi));
   1312 
   1313     bsi.segment_rd = best_rd;
   1314     bsi.ref_mv = best_ref_mv;
   1315     bsi.mvp = best_ref_mv;
   1316     bsi.mvthresh = mvthresh;
   1317     bsi.mdcounts = mdcounts;
   1318 
   1319     for(i = 0; i < 16; i++)
   1320     {
   1321         bsi.modes[i] = ZERO4X4;
   1322     }
   1323 
   1324     if(cpi->compressor_speed == 0)
   1325     {
   1326         /* for now, we will keep the original segmentation order
   1327            when in best quality mode */
   1328         rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
   1329         rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
   1330         rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
   1331         rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
   1332     }
   1333     else
   1334     {
   1335         int sr;
   1336 
   1337         rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
   1338 
   1339         if (bsi.segment_rd < best_rd)
   1340         {
   1341             int col_min = (best_ref_mv->col - MAX_FULL_PEL_VAL) >>3;
   1342             int col_max = (best_ref_mv->col + MAX_FULL_PEL_VAL) >>3;
   1343             int row_min = (best_ref_mv->row - MAX_FULL_PEL_VAL) >>3;
   1344             int row_max = (best_ref_mv->row + MAX_FULL_PEL_VAL) >>3;
   1345 
   1346             int tmp_col_min = x->mv_col_min;
   1347             int tmp_col_max = x->mv_col_max;
   1348             int tmp_row_min = x->mv_row_min;
   1349             int tmp_row_max = x->mv_row_max;
   1350 
   1351             /* Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. */
   1352             if (x->mv_col_min < col_min )
   1353                 x->mv_col_min = col_min;
   1354             if (x->mv_col_max > col_max )
   1355                 x->mv_col_max = col_max;
   1356             if (x->mv_row_min < row_min )
   1357                 x->mv_row_min = row_min;
   1358             if (x->mv_row_max > row_max )
   1359                 x->mv_row_max = row_max;
   1360 
   1361             /* Get 8x8 result */
   1362             bsi.sv_mvp[0] = bsi.mvs[0].as_mv;
   1363             bsi.sv_mvp[1] = bsi.mvs[2].as_mv;
   1364             bsi.sv_mvp[2] = bsi.mvs[8].as_mv;
   1365             bsi.sv_mvp[3] = bsi.mvs[10].as_mv;
   1366 
   1367             /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range according to the closeness of 2 MV. */
   1368             /* block 8X16 */
   1369             {
   1370                 sr = MAXF((abs(bsi.sv_mvp[0].row - bsi.sv_mvp[2].row))>>3, (abs(bsi.sv_mvp[0].col - bsi.sv_mvp[2].col))>>3);
   1371                 vp8_cal_step_param(sr, &bsi.sv_istep[0]);
   1372 
   1373                 sr = MAXF((abs(bsi.sv_mvp[1].row - bsi.sv_mvp[3].row))>>3, (abs(bsi.sv_mvp[1].col - bsi.sv_mvp[3].col))>>3);
   1374                 vp8_cal_step_param(sr, &bsi.sv_istep[1]);
   1375 
   1376                 rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
   1377             }
   1378 
   1379             /* block 16X8 */
   1380             {
   1381                 sr = MAXF((abs(bsi.sv_mvp[0].row - bsi.sv_mvp[1].row))>>3, (abs(bsi.sv_mvp[0].col - bsi.sv_mvp[1].col))>>3);
   1382                 vp8_cal_step_param(sr, &bsi.sv_istep[0]);
   1383 
   1384                 sr = MAXF((abs(bsi.sv_mvp[2].row - bsi.sv_mvp[3].row))>>3, (abs(bsi.sv_mvp[2].col - bsi.sv_mvp[3].col))>>3);
   1385                 vp8_cal_step_param(sr, &bsi.sv_istep[1]);
   1386 
   1387                 rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
   1388             }
   1389 
   1390             /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
   1391             /* Not skip 4x4 if speed=0 (good quality) */
   1392             if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8)  /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
   1393             {
   1394                 bsi.mvp = &bsi.sv_mvp[0];
   1395                 rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
   1396             }
   1397 
   1398             /* restore UMV window */
   1399             x->mv_col_min = tmp_col_min;
   1400             x->mv_col_max = tmp_col_max;
   1401             x->mv_row_min = tmp_row_min;
   1402             x->mv_row_max = tmp_row_max;
   1403         }
   1404     }
   1405 
   1406     /* set it to the best */
   1407     for (i = 0; i < 16; i++)
   1408     {
   1409         BLOCKD *bd = &x->e_mbd.block[i];
   1410 
   1411         bd->bmi.mv.as_mv = bsi.mvs[i].as_mv;
   1412         bd->bmi.mode = bsi.modes[i];
   1413         bd->eob = bsi.eobs[i];
   1414     }
   1415 
   1416     *returntotrate = bsi.r;
   1417     *returndistortion = bsi.d;
   1418     *returnyrate = bsi.segment_yrate;
   1419 
   1420     /* save partitions */
   1421     x->e_mbd.mode_info_context->mbmi.partitioning = bsi.segment_num;
   1422     x->partition_info->count = vp8_mbsplit_count[bsi.segment_num];
   1423 
   1424     for (i = 0; i < x->partition_info->count; i++)
   1425     {
   1426         int j;
   1427 
   1428         j = vp8_mbsplit_offset[bsi.segment_num][i];
   1429 
   1430         x->partition_info->bmi[i].mode = x->e_mbd.block[j].bmi.mode;
   1431         x->partition_info->bmi[i].mv.as_mv = x->e_mbd.block[j].bmi.mv.as_mv;
   1432     }
   1433 
   1434     return bsi.segment_rd;
   1435 }
   1436 #endif
   1437 
   1438 static void swap(int *x,int *y)
   1439 {
   1440    int tmp;
   1441 
   1442    tmp = *x;
   1443    *x = *y;
   1444    *y = tmp;
   1445 }
   1446 
   1447 static void quicksortmv(int arr[],int left, int right)
   1448 {
   1449    int lidx,ridx,pivot;
   1450 
   1451    lidx = left;
   1452    ridx = right;
   1453 
   1454    if( left < right)
   1455    {
   1456       pivot = (left + right)/2;
   1457 
   1458       while(lidx <=pivot && ridx >=pivot)
   1459       {
   1460           while(arr[lidx] < arr[pivot] && lidx <= pivot)
   1461               lidx++;
   1462           while(arr[ridx] > arr[pivot] && ridx >= pivot)
   1463               ridx--;
   1464           swap(&arr[lidx], &arr[ridx]);
   1465           lidx++;
   1466           ridx--;
   1467           if(lidx-1 == pivot)
   1468           {
   1469               ridx++;
   1470               pivot = ridx;
   1471           }
   1472           else if(ridx+1 == pivot)
   1473           {
   1474               lidx--;
   1475               pivot = lidx;
   1476           }
   1477       }
   1478       quicksortmv(arr, left, pivot - 1);
   1479       quicksortmv(arr, pivot + 1, right);
   1480    }
   1481 }
   1482 
   1483 static void quicksortsad(int arr[],int idx[], int left, int right)
   1484 {
   1485    int lidx,ridx,pivot;
   1486 
   1487    lidx = left;
   1488    ridx = right;
   1489 
   1490    if( left < right)
   1491    {
   1492       pivot = (left + right)/2;
   1493 
   1494       while(lidx <=pivot && ridx >=pivot)
   1495       {
   1496           while(arr[lidx] < arr[pivot] && lidx <= pivot)
   1497               lidx++;
   1498           while(arr[ridx] > arr[pivot] && ridx >= pivot)
   1499               ridx--;
   1500           swap(&arr[lidx], &arr[ridx]);
   1501           swap(&idx[lidx], &idx[ridx]);
   1502           lidx++;
   1503           ridx--;
   1504           if(lidx-1 == pivot)
   1505           {
   1506               ridx++;
   1507               pivot = ridx;
   1508           }
   1509           else if(ridx+1 == pivot)
   1510           {
   1511               lidx--;
   1512               pivot = lidx;
   1513           }
   1514       }
   1515       quicksortsad(arr, idx, left, pivot - 1);
   1516       quicksortsad(arr, idx, pivot + 1, right);
   1517    }
   1518 }
   1519 
   1520 //The improved MV prediction
   1521 void vp8_mv_pred
   1522 (
   1523     VP8_COMP *cpi,
   1524     MACROBLOCKD *xd,
   1525     const MODE_INFO *here,
   1526     MV *mvp,
   1527     int refframe,
   1528     int *ref_frame_sign_bias,
   1529     int *sr,
   1530     int near_sadidx[]
   1531 )
   1532 {
   1533     const MODE_INFO *above = here - xd->mode_info_stride;
   1534     const MODE_INFO *left = here - 1;
   1535     const MODE_INFO *aboveleft = above - 1;
   1536     int_mv           near_mvs[8];
   1537     int              near_ref[8];
   1538     int_mv           mv;
   1539     int              vcnt=0;
   1540     int              find=0;
   1541     int              mb_offset;
   1542 
   1543     int              mvx[8];
   1544     int              mvy[8];
   1545     int              i;
   1546 
   1547     mv.as_int = 0;
   1548 
   1549     if(here->mbmi.ref_frame != INTRA_FRAME)
   1550     {
   1551         near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = near_mvs[7].as_int = 0;
   1552         near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = near_ref[7] = 0;
   1553 
   1554         // read in 3 nearby block's MVs from current frame as prediction candidates.
   1555         if (above->mbmi.ref_frame != INTRA_FRAME)
   1556         {
   1557             near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
   1558             mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1559             near_ref[vcnt] =  above->mbmi.ref_frame;
   1560         }
   1561         vcnt++;
   1562         if (left->mbmi.ref_frame != INTRA_FRAME)
   1563         {
   1564             near_mvs[vcnt].as_int = left->mbmi.mv.as_int;
   1565             mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1566             near_ref[vcnt] =  left->mbmi.ref_frame;
   1567         }
   1568         vcnt++;
   1569         if (aboveleft->mbmi.ref_frame != INTRA_FRAME)
   1570         {
   1571             near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int;
   1572             mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1573             near_ref[vcnt] =  aboveleft->mbmi.ref_frame;
   1574         }
   1575         vcnt++;
   1576 
   1577         // read in 5 nearby block's MVs from last frame.
   1578         if(cpi->common.last_frame_type != KEY_FRAME)
   1579         {
   1580             mb_offset = (-xd->mb_to_top_edge/128 + 1) * (xd->mode_info_stride +1) + (-xd->mb_to_left_edge/128 +1) ;
   1581 
   1582             // current in last frame
   1583             if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME)
   1584             {
   1585                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
   1586                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1587                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset];
   1588             }
   1589             vcnt++;
   1590 
   1591             // above in last frame
   1592             if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1] != INTRA_FRAME)
   1593             {
   1594                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride-1].as_int;
   1595                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride-1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1596                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1];
   1597             }
   1598             vcnt++;
   1599 
   1600             // left in last frame
   1601             if (cpi->lf_ref_frame[mb_offset-1] != INTRA_FRAME)
   1602             {
   1603                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset -1].as_int;
   1604                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset -1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1605                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - 1];
   1606             }
   1607             vcnt++;
   1608 
   1609             // right in last frame
   1610             if (cpi->lf_ref_frame[mb_offset +1] != INTRA_FRAME)
   1611             {
   1612                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset +1].as_int;
   1613                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1614                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset +1];
   1615             }
   1616             vcnt++;
   1617 
   1618             // below in last frame
   1619             if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1] != INTRA_FRAME)
   1620             {
   1621                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + xd->mode_info_stride +1].as_int;
   1622                 mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
   1623                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1];
   1624             }
   1625             vcnt++;
   1626         }
   1627 
   1628         for(i=0; i< vcnt; i++)
   1629         {
   1630             if(near_ref[near_sadidx[i]] != INTRA_FRAME)
   1631             {
   1632                 if(here->mbmi.ref_frame == near_ref[near_sadidx[i]])
   1633                 {
   1634                     mv.as_int = near_mvs[near_sadidx[i]].as_int;
   1635                     find = 1;
   1636                     if (i < 3)
   1637                         *sr = 3;
   1638                     else
   1639                         *sr = 2;
   1640                     break;
   1641                 }
   1642             }
   1643         }
   1644 
   1645         if(!find)
   1646         {
   1647             for(i=0; i<vcnt; i++)
   1648             {
   1649                 mvx[i] = near_mvs[i].as_mv.row;
   1650                 mvy[i] = near_mvs[i].as_mv.col;
   1651             }
   1652 
   1653             quicksortmv (mvx, 0, vcnt-1);
   1654             quicksortmv (mvy, 0, vcnt-1);
   1655             mv.as_mv.row = mvx[vcnt/2];
   1656             mv.as_mv.col = mvy[vcnt/2];
   1657 
   1658             find = 1;
   1659             //sr is set to 0 to allow calling function to decide the search range.
   1660             *sr = 0;
   1661         }
   1662     }
   1663 
   1664     /* Set up return values */
   1665     *mvp = mv.as_mv;
   1666     vp8_clamp_mv(mvp, xd);
   1667 }
   1668 
   1669 void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[])
   1670 {
   1671 
   1672     int near_sad[8] = {0}; // 0-cf above, 1-cf left, 2-cf aboveleft, 3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
   1673 
   1674     //calculate sad for current frame 3 nearby MBs.
   1675     if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0)
   1676     {
   1677         near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
   1678     }else if(xd->mb_to_top_edge==0)
   1679     {   //only has left MB for sad calculation.
   1680         near_sad[0] = near_sad[2] = INT_MAX;
   1681         near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff);
   1682     }else if(xd->mb_to_left_edge ==0)
   1683     {   //only has left MB for sad calculation.
   1684         near_sad[1] = near_sad[2] = INT_MAX;
   1685         near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff);
   1686     }else
   1687     {
   1688         near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff);
   1689         near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff);
   1690         near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, 0x7fffffff);
   1691     }
   1692 
   1693     if(cpi->common.last_frame_type != KEY_FRAME)
   1694     {
   1695         //calculate sad for last frame 5 nearby MBs.
   1696         unsigned char *pre_y_buffer = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
   1697         int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
   1698 
   1699         if(xd->mb_to_top_edge==0) near_sad[4] = INT_MAX;
   1700         if(xd->mb_to_left_edge ==0) near_sad[5] = INT_MAX;
   1701         if(xd->mb_to_right_edge ==0) near_sad[6] = INT_MAX;
   1702         if(xd->mb_to_bottom_edge==0) near_sad[7] = INT_MAX;
   1703 
   1704         if(near_sad[4] != INT_MAX)
   1705             near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, 0x7fffffff);
   1706         if(near_sad[5] != INT_MAX)
   1707             near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - 16, pre_y_stride, 0x7fffffff);
   1708         near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer, pre_y_stride, 0x7fffffff);
   1709         if(near_sad[6] != INT_MAX)
   1710             near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer + 16, pre_y_stride, 0x7fffffff);
   1711         if(near_sad[7] != INT_MAX)
   1712             near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, 0x7fffffff);
   1713     }
   1714 
   1715     if(cpi->common.last_frame_type != KEY_FRAME)
   1716     {
   1717         quicksortsad(near_sad, near_sadidx, 0, 7);
   1718     }else
   1719     {
   1720         quicksortsad(near_sad, near_sadidx, 0, 2);
   1721     }
   1722 }
   1723 
   1724 #if !(CONFIG_REALTIME_ONLY)
   1725 int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra)
   1726 {
   1727     BLOCK *b = &x->block[0];
   1728     BLOCKD *d = &x->e_mbd.block[0];
   1729     MACROBLOCKD *xd = &x->e_mbd;
   1730     B_MODE_INFO best_bmodes[16];
   1731     MB_MODE_INFO best_mbmode;
   1732     PARTITION_INFO best_partition;
   1733     MV best_ref_mv;
   1734     MV mode_mv[MB_MODE_COUNT];
   1735     MB_PREDICTION_MODE this_mode;
   1736     int num00;
   1737     int best_mode_index = 0;
   1738 
   1739     int i;
   1740     int mode_index;
   1741     int mdcounts[4];
   1742     int rate;
   1743     int distortion;
   1744     int best_rd = INT_MAX; // 1 << 30;
   1745     int ref_frame_cost[MAX_REF_FRAMES];
   1746     int rate2, distortion2;
   1747     int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
   1748     int rate_y, UNINITIALIZED_IS_SAFE(rate_uv);
   1749     int distortion_uv;
   1750     int best_yrd = INT_MAX;
   1751 
   1752     //int all_rds[MAX_MODES];        // Experimental debug code.
   1753     //int all_rates[MAX_MODES];
   1754     //int all_dist[MAX_MODES];
   1755     //int intermodecost[MAX_MODES];
   1756 
   1757     MB_PREDICTION_MODE uv_intra_mode;
   1758 
   1759     int force_no_skip = 0;
   1760 
   1761     MV mvp;
   1762     int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
   1763     int saddone=0;
   1764     int sr=0;    //search range got from mv_pred(). It uses step_param levels. (0-7)
   1765 
   1766     MV frame_nearest_mv[4];
   1767     MV frame_near_mv[4];
   1768     MV frame_best_ref_mv[4];
   1769     int frame_mdcounts[4][4];
   1770     int frame_lf_or_gf[4];
   1771     unsigned char *y_buffer[4];
   1772     unsigned char *u_buffer[4];
   1773     unsigned char *v_buffer[4];
   1774 
   1775     vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
   1776 
   1777     if (cpi->ref_frame_flags & VP8_LAST_FLAG)
   1778     {
   1779         YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
   1780 
   1781         vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &frame_nearest_mv[LAST_FRAME], &frame_near_mv[LAST_FRAME],
   1782                           &frame_best_ref_mv[LAST_FRAME], frame_mdcounts[LAST_FRAME], LAST_FRAME, cpi->common.ref_frame_sign_bias);
   1783 
   1784         y_buffer[LAST_FRAME] = lst_yv12->y_buffer + recon_yoffset;
   1785         u_buffer[LAST_FRAME] = lst_yv12->u_buffer + recon_uvoffset;
   1786         v_buffer[LAST_FRAME] = lst_yv12->v_buffer + recon_uvoffset;
   1787 
   1788         frame_lf_or_gf[LAST_FRAME] = 0;
   1789     }
   1790 
   1791     if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
   1792     {
   1793         YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx];
   1794 
   1795         vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &frame_nearest_mv[GOLDEN_FRAME], &frame_near_mv[GOLDEN_FRAME],
   1796                           &frame_best_ref_mv[GOLDEN_FRAME], frame_mdcounts[GOLDEN_FRAME], GOLDEN_FRAME, cpi->common.ref_frame_sign_bias);
   1797 
   1798         y_buffer[GOLDEN_FRAME] = gld_yv12->y_buffer + recon_yoffset;
   1799         u_buffer[GOLDEN_FRAME] = gld_yv12->u_buffer + recon_uvoffset;
   1800         v_buffer[GOLDEN_FRAME] = gld_yv12->v_buffer + recon_uvoffset;
   1801 
   1802         frame_lf_or_gf[GOLDEN_FRAME] = 1;
   1803     }
   1804 
   1805     if (cpi->ref_frame_flags & VP8_ALT_FLAG)
   1806     {
   1807         YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx];
   1808 
   1809         vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &frame_nearest_mv[ALTREF_FRAME], &frame_near_mv[ALTREF_FRAME],
   1810                           &frame_best_ref_mv[ALTREF_FRAME], frame_mdcounts[ALTREF_FRAME], ALTREF_FRAME, cpi->common.ref_frame_sign_bias);
   1811 
   1812         y_buffer[ALTREF_FRAME] = alt_yv12->y_buffer + recon_yoffset;
   1813         u_buffer[ALTREF_FRAME] = alt_yv12->u_buffer + recon_uvoffset;
   1814         v_buffer[ALTREF_FRAME] = alt_yv12->v_buffer + recon_uvoffset;
   1815 
   1816         frame_lf_or_gf[ALTREF_FRAME] = 1;
   1817     }
   1818 
   1819     *returnintra = INT_MAX;
   1820     cpi->mbs_tested_so_far++;          // Count of the number of MBs tested so far this frame
   1821 
   1822     x->skip = 0;
   1823 
   1824     ref_frame_cost[INTRA_FRAME]   = vp8_cost_zero(cpi->prob_intra_coded);
   1825 
   1826     // Special case treatment when GF and ARF are not sensible options for reference
   1827     if (cpi->ref_frame_flags == VP8_LAST_FLAG)
   1828     {
   1829         ref_frame_cost[LAST_FRAME]    = vp8_cost_one(cpi->prob_intra_coded)
   1830                                         + vp8_cost_zero(255);
   1831         ref_frame_cost[GOLDEN_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
   1832                                         + vp8_cost_one(255)
   1833                                         + vp8_cost_zero(128);
   1834         ref_frame_cost[ALTREF_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
   1835                                         + vp8_cost_one(255)
   1836                                         + vp8_cost_one(128);
   1837     }
   1838     else
   1839     {
   1840         ref_frame_cost[LAST_FRAME]    = vp8_cost_one(cpi->prob_intra_coded)
   1841                                         + vp8_cost_zero(cpi->prob_last_coded);
   1842         ref_frame_cost[GOLDEN_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
   1843                                         + vp8_cost_one(cpi->prob_last_coded)
   1844                                         + vp8_cost_zero(cpi->prob_gf_coded);
   1845         ref_frame_cost[ALTREF_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
   1846                                         + vp8_cost_one(cpi->prob_last_coded)
   1847                                         + vp8_cost_one(cpi->prob_gf_coded);
   1848     }
   1849 
   1850     vpx_memset(mode_mv, 0, sizeof(mode_mv));
   1851 
   1852     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
   1853     vp8_rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, &uv_intra_rate_tokenonly, &uv_intra_distortion);
   1854     uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
   1855 
   1856     for (mode_index = 0; mode_index < MAX_MODES; mode_index++)
   1857     {
   1858         int this_rd = INT_MAX;
   1859         int lf_or_gf = 0;           // Lat Frame (01) or gf/arf (1)
   1860         int disable_skip = 0;
   1861         int other_cost = 0;
   1862 
   1863         force_no_skip = 0;
   1864 
   1865         // Experimental debug code.
   1866         // Record of rd values recorded for this MB. -1 indicates not measured
   1867         //all_rds[mode_index] = -1;
   1868         //all_rates[mode_index] = -1;
   1869         //all_dist[mode_index] = -1;
   1870         //intermodecost[mode_index] = -1;
   1871 
   1872         // Test best rd so far against threshold for trying this mode.
   1873         if (best_rd <= cpi->rd_threshes[mode_index])
   1874             continue;
   1875 
   1876         // These variables hold are rolling total cost and distortion for this mode
   1877         rate2 = 0;
   1878         distortion2 = 0;
   1879 
   1880         this_mode = vp8_mode_order[mode_index];
   1881 
   1882         x->e_mbd.mode_info_context->mbmi.mode = this_mode;
   1883         x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
   1884         x->e_mbd.mode_info_context->mbmi.ref_frame = vp8_ref_frame_order[mode_index];
   1885 
   1886         // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
   1887         // unless ARNR filtering is enabled in which case we want
   1888         // an unfiltered alternative
   1889         if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
   1890         {
   1891             if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
   1892                 continue;
   1893         }
   1894 
   1895         /* everything but intra */
   1896         if (x->e_mbd.mode_info_context->mbmi.ref_frame)
   1897         {
   1898             x->e_mbd.pre.y_buffer = y_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
   1899             x->e_mbd.pre.u_buffer = u_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
   1900             x->e_mbd.pre.v_buffer = v_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
   1901             mode_mv[NEARESTMV] = frame_nearest_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
   1902             mode_mv[NEARMV] = frame_near_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
   1903             best_ref_mv = frame_best_ref_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
   1904             vpx_memcpy(mdcounts, frame_mdcounts[x->e_mbd.mode_info_context->mbmi.ref_frame], sizeof(mdcounts));
   1905             lf_or_gf = frame_lf_or_gf[x->e_mbd.mode_info_context->mbmi.ref_frame];
   1906         }
   1907 
   1908         if(x->e_mbd.mode_info_context->mbmi.mode == NEWMV)
   1909         {
   1910             if(!saddone)
   1911             {
   1912                 vp8_cal_sad(cpi,xd,x, recon_yoffset ,&near_sadidx[0] );
   1913                 saddone = 1;
   1914             }
   1915 
   1916             vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
   1917                         x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
   1918 
   1919             /* adjust mvp to make sure it is within MV range */
   1920             if(mvp.row > best_ref_mv.row + MAX_FULL_PEL_VAL)
   1921                 mvp.row = best_ref_mv.row + MAX_FULL_PEL_VAL;
   1922             else if(mvp.row < best_ref_mv.row - MAX_FULL_PEL_VAL)
   1923                 mvp.row = best_ref_mv.row - MAX_FULL_PEL_VAL;
   1924             if(mvp.col > best_ref_mv.col + MAX_FULL_PEL_VAL)
   1925                 mvp.col = best_ref_mv.col + MAX_FULL_PEL_VAL;
   1926             else if(mvp.col < best_ref_mv.col - MAX_FULL_PEL_VAL)
   1927                 mvp.col = best_ref_mv.col - MAX_FULL_PEL_VAL;
   1928         }
   1929 
   1930         // Check to see if the testing frequency for this mode is at its max
   1931         // If so then prevent it from being tested and increase the threshold for its testing
   1932         if (cpi->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1))
   1933         {
   1934             if (cpi->mbs_tested_so_far  <= cpi->mode_check_freq[mode_index] * cpi->mode_test_hit_counts[mode_index])
   1935             {
   1936                 // Increase the threshold for coding this mode to make it less likely to be chosen
   1937                 cpi->rd_thresh_mult[mode_index] += 4;
   1938 
   1939                 if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
   1940                     cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
   1941 
   1942                 cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
   1943 
   1944                 continue;
   1945             }
   1946         }
   1947 
   1948         // We have now reached the point where we are going to test the current mode so increment the counter for the number of times it has been tested
   1949         cpi->mode_test_hit_counts[mode_index] ++;
   1950 
   1951         // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
   1952         if (cpi->zbin_mode_boost_enabled)
   1953         {
   1954             if ( vp8_ref_frame_order[mode_index] == INTRA_FRAME )
   1955                 cpi->zbin_mode_boost = 0;
   1956             else
   1957             {
   1958                 if (vp8_mode_order[mode_index] == ZEROMV)
   1959                 {
   1960                     if (vp8_ref_frame_order[mode_index] != LAST_FRAME)
   1961                         cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
   1962                     else
   1963                         cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
   1964                 }
   1965                 else if (vp8_mode_order[mode_index] == SPLITMV)
   1966                     cpi->zbin_mode_boost = 0;
   1967                 else
   1968                     cpi->zbin_mode_boost = MV_ZBIN_BOOST;
   1969             }
   1970 
   1971             vp8_update_zbin_extra(cpi, x);
   1972         }
   1973 
   1974         switch (this_mode)
   1975         {
   1976         case B_PRED:
   1977         {
   1978             int tmp_rd;
   1979 
   1980             // Note the rate value returned here includes the cost of coding the BPRED mode : x->mbmode_cost[x->e_mbd.frame_type][BPRED];
   1981             tmp_rd = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd);
   1982             rate2 += rate;
   1983             distortion2 += distortion;
   1984 
   1985             if(tmp_rd < best_yrd)
   1986             {
   1987                 rate2 += uv_intra_rate;
   1988                 rate_uv = uv_intra_rate_tokenonly;
   1989                 distortion2 += uv_intra_distortion;
   1990                 distortion_uv = uv_intra_distortion;
   1991             }
   1992             else
   1993             {
   1994                 this_rd = INT_MAX;
   1995                 disable_skip = 1;
   1996             }
   1997         }
   1998         break;
   1999 
   2000         case SPLITMV:
   2001         {
   2002             int tmp_rd;
   2003             int this_rd_thresh;
   2004 
   2005             this_rd_thresh = (x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME) ? cpi->rd_threshes[THR_NEWMV] : cpi->rd_threshes[THR_NEWA];
   2006             this_rd_thresh = (x->e_mbd.mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) ? cpi->rd_threshes[THR_NEWG]: this_rd_thresh;
   2007 
   2008             tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
   2009                                                      best_yrd, mdcounts,
   2010                                                      &rate, &rate_y, &distortion, this_rd_thresh) ;
   2011 
   2012             rate2 += rate;
   2013             distortion2 += distortion;
   2014 
   2015             // If even the 'Y' rd value of split is higher than best so far then dont bother looking at UV
   2016             if (tmp_rd < best_yrd)
   2017             {
   2018                 // Now work out UV cost and add it in
   2019                 vp8_rd_inter_uv(cpi, x, &rate_uv, &distortion_uv, cpi->common.full_pixel);
   2020                 rate2 += rate_uv;
   2021                 distortion2 += distortion_uv;
   2022             }
   2023             else
   2024             {
   2025                 this_rd = INT_MAX;
   2026                 disable_skip = 1;
   2027             }
   2028         }
   2029         break;
   2030         case DC_PRED:
   2031         case V_PRED:
   2032         case H_PRED:
   2033         case TM_PRED:
   2034             x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
   2035             RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
   2036                 (&x->e_mbd);
   2037             macro_block_yrd(x, &rate_y, &distortion, IF_RTCD(&cpi->rtcd.encodemb)) ;
   2038             rate2 += rate_y;
   2039             distortion2 += distortion;
   2040             rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
   2041             rate2 += uv_intra_rate;
   2042             rate_uv = uv_intra_rate_tokenonly;
   2043             distortion2 += uv_intra_distortion;
   2044             distortion_uv = uv_intra_distortion;
   2045             break;
   2046 
   2047         case NEWMV:
   2048 
   2049             // Decrement full search counter
   2050             if (cpi->check_freq[lf_or_gf] > 0)
   2051                 cpi->check_freq[lf_or_gf] --;
   2052 
   2053             {
   2054                 int thissme;
   2055                 int bestsme = INT_MAX;
   2056                 int step_param = cpi->sf.first_step;
   2057                 int search_range;
   2058                 int further_steps;
   2059                 int n;
   2060 
   2061                 int col_min = (best_ref_mv.col - MAX_FULL_PEL_VAL) >>3;
   2062                 int col_max = (best_ref_mv.col + MAX_FULL_PEL_VAL) >>3;
   2063                 int row_min = (best_ref_mv.row - MAX_FULL_PEL_VAL) >>3;
   2064                 int row_max = (best_ref_mv.row + MAX_FULL_PEL_VAL) >>3;
   2065 
   2066                 int tmp_col_min = x->mv_col_min;
   2067                 int tmp_col_max = x->mv_col_max;
   2068                 int tmp_row_min = x->mv_row_min;
   2069                 int tmp_row_max = x->mv_row_max;
   2070 
   2071                 // Get intersection of UMV window and valid MV window to reduce # of checks in diamond search.
   2072                 if (x->mv_col_min < col_min )
   2073                     x->mv_col_min = col_min;
   2074                 if (x->mv_col_max > col_max )
   2075                     x->mv_col_max = col_max;
   2076                 if (x->mv_row_min < row_min )
   2077                     x->mv_row_min = row_min;
   2078                 if (x->mv_row_max > row_max )
   2079                     x->mv_row_max = row_max;
   2080 
   2081                 //adjust search range according to sr from mv prediction
   2082                 if(sr > step_param)
   2083                     step_param = sr;
   2084 
   2085                 // Work out how long a search we should do
   2086                 search_range = MAXF(abs(best_ref_mv.col), abs(best_ref_mv.row)) >> 3;
   2087 
   2088                 if (search_range >= x->vector_range)
   2089                     x->vector_range = search_range;
   2090                 else if (x->vector_range > cpi->sf.min_fs_radius)
   2091                     x->vector_range--;
   2092 
   2093                 // Initial step/diamond search
   2094                 {
   2095                     int sadpb = x->sadperbit16;
   2096 
   2097                     if (cpi->sf.search_method == HEX)
   2098                     {
   2099                         bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv);
   2100                         mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
   2101                         mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
   2102                     }
   2103                     else
   2104                     {
   2105                         bestsme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); //sadpb < 9
   2106                         mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
   2107                         mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
   2108 
   2109                         // Further step/diamond searches as necessary
   2110                         n = 0;
   2111                         further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
   2112 
   2113                         n = num00;
   2114                         num00 = 0;
   2115 
   2116                         while (n < further_steps)
   2117                         {
   2118                             n++;
   2119 
   2120                             if (num00)
   2121                                 num00--;
   2122                             else
   2123                             {
   2124                                 thissme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); //sadpb = 9
   2125 
   2126                                 if (thissme < bestsme)
   2127                                 {
   2128                                     bestsme = thissme;
   2129                                     mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
   2130                                     mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
   2131                                 }
   2132                                 else
   2133                                 {
   2134                                     d->bmi.mv.as_mv.row = mode_mv[NEWMV].row;
   2135                                     d->bmi.mv.as_mv.col = mode_mv[NEWMV].col;
   2136                                 }
   2137                             }
   2138                         }
   2139                     }
   2140 
   2141                 }
   2142 
   2143                 // Should we do a full search
   2144                 if (!cpi->check_freq[lf_or_gf] || cpi->do_full[lf_or_gf])
   2145                 {
   2146                     int thissme;
   2147                     int full_flag_thresh = 0;
   2148                     MV full_mvp;
   2149 
   2150                     full_mvp.row = d->bmi.mv.as_mv.row <<3;    // use diamond search result as full search staring point
   2151                     full_mvp.col = d->bmi.mv.as_mv.col <<3;
   2152 
   2153                     // Update x->vector_range based on best vector found in step search
   2154                     search_range = MAXF(abs((mvp.row>>3) - d->bmi.mv.as_mv.row), abs((mvp.col>>3) - d->bmi.mv.as_mv.col));
   2155                     //search_range *= 1.4;  //didn't improve PSNR
   2156 
   2157                     if (search_range > x->vector_range)
   2158                         x->vector_range = search_range;
   2159                     else
   2160                         search_range = x->vector_range;
   2161 
   2162                     // Apply limits
   2163                     search_range = (search_range > cpi->sf.max_fs_radius) ? cpi->sf.max_fs_radius : search_range;
   2164 
   2165                     //add this to reduce full search range.
   2166                     if(sr<=3 && search_range > 8) search_range = 8;
   2167 
   2168                     {
   2169                         int sadpb = x->sadperbit16 >> 2;
   2170                         thissme = cpi->full_search_sad(x, b, d, &full_mvp, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, x->mvsadcost,&best_ref_mv);
   2171                     }
   2172 
   2173                     // Barrier threshold to initiating full search
   2174                     // full_flag_thresh = 10 + (thissme >> 7);
   2175                     if ((thissme + full_flag_thresh) < bestsme)
   2176                     {
   2177                         cpi->do_full[lf_or_gf] ++;
   2178                         bestsme = thissme;
   2179                     }
   2180                     else if (thissme < bestsme)
   2181                         bestsme = thissme;
   2182                     else
   2183                     {
   2184                         cpi->do_full[lf_or_gf] = cpi->do_full[lf_or_gf] >> 1;
   2185                         cpi->check_freq[lf_or_gf] = cpi->sf.full_freq[lf_or_gf];
   2186 
   2187                         // The full search result is actually worse so re-instate the previous best vector
   2188                         d->bmi.mv.as_mv.row = mode_mv[NEWMV].row;
   2189                         d->bmi.mv.as_mv.col = mode_mv[NEWMV].col;
   2190                     }
   2191                 }
   2192 
   2193                 x->mv_col_min = tmp_col_min;
   2194                 x->mv_col_max = tmp_col_max;
   2195                 x->mv_row_min = tmp_row_min;
   2196                 x->mv_row_max = tmp_row_max;
   2197 
   2198                 if (bestsme < INT_MAX)
   2199                     // cpi->find_fractional_mv_step(x,b,d,&d->bmi.mv.as_mv,&best_ref_mv,x->errorperbit/2,cpi->fn_ptr.svf,cpi->fn_ptr.vf,x->mvcost);  // normal mvc=11
   2200                     cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost);
   2201 
   2202                 mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
   2203                 mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
   2204 
   2205                 // Add the new motion vector cost to our rolling cost variable
   2206                 rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
   2207 
   2208             }
   2209 
   2210         case NEARESTMV:
   2211         case NEARMV:
   2212 
   2213             // Clip "next_nearest" so that it does not extend to far out of image
   2214             if (mode_mv[this_mode].col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
   2215                 mode_mv[this_mode].col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
   2216             else if (mode_mv[this_mode].col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
   2217                 mode_mv[this_mode].col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
   2218 
   2219             if (mode_mv[this_mode].row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
   2220                 mode_mv[this_mode].row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
   2221             else if (mode_mv[this_mode].row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
   2222                 mode_mv[this_mode].row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
   2223 
   2224             // Do not bother proceeding if the vector (from newmv,nearest or near) is 0,0 as this should then be coded using the zeromv mode.
   2225             if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) &&
   2226                 ((mode_mv[this_mode].row == 0) && (mode_mv[this_mode].col == 0)))
   2227                 continue;
   2228 
   2229         case ZEROMV:
   2230 
   2231         mv_selected:
   2232 
   2233             // Trap vectors that reach beyond the UMV borders
   2234             // Note that ALL New MV, Nearest MV Near MV and Zero MV code drops through to this point
   2235             // because of the lack of break statements in the previous two cases.
   2236             if (((mode_mv[this_mode].row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].row >> 3) > x->mv_row_max) ||
   2237                 ((mode_mv[this_mode].col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].col >> 3) > x->mv_col_max))
   2238                 continue;
   2239 
   2240             vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
   2241             vp8_build_inter_predictors_mby(&x->e_mbd);
   2242 
   2243             if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
   2244                 x->skip = 1;
   2245             }
   2246             else if (x->encode_breakout)
   2247             {
   2248                 int sum, sse;
   2249                 int threshold = (xd->block[0].dequant[1]
   2250                             * xd->block[0].dequant[1] >>4);
   2251 
   2252                 if(threshold < x->encode_breakout)
   2253                     threshold = x->encode_breakout;
   2254 
   2255                 VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)
   2256                     (x->src.y_buffer, x->src.y_stride,
   2257                      x->e_mbd.predictor, 16, (unsigned int *)(&sse), &sum);
   2258 
   2259                 if (sse < threshold)
   2260                 {
   2261                     // Check u and v to make sure skip is ok
   2262                     int sse2 = 0;
   2263                     /* If theres is no codeable 2nd order dc
   2264                        or a very small uniform pixel change change */
   2265                     if (abs(sum) < (xd->block[24].dequant[0]<<2)||
   2266                         ((sum * sum>>8) > sse && abs(sum) <128))
   2267                     {
   2268                         sse2 = VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance));
   2269 
   2270                         if (sse2 * 2 < threshold)
   2271                         {
   2272                             x->skip = 1;
   2273                             distortion2 = sse + sse2;
   2274                             rate2 = 500;
   2275 
   2276                             /* for best_yrd calculation */
   2277                             rate_uv = 0;
   2278                             distortion_uv = sse2;
   2279 
   2280                             disable_skip = 1;
   2281                             this_rd = RDCOST(x->rdmult, x->rddiv, rate2,
   2282                                              distortion2);
   2283 
   2284                             break;
   2285                         }
   2286                     }
   2287                 }
   2288             }
   2289 
   2290 
   2291             //intermodecost[mode_index] = vp8_cost_mv_ref(this_mode, mdcounts);   // Experimental debug code
   2292 
   2293             // Add in the Mv/mode cost
   2294             rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
   2295 
   2296             // Y cost and distortion
   2297             macro_block_yrd(x, &rate_y, &distortion, IF_RTCD(&cpi->rtcd.encodemb));
   2298             rate2 += rate_y;
   2299             distortion2 += distortion;
   2300 
   2301             // UV cost and distortion
   2302             vp8_rd_inter_uv(cpi, x, &rate_uv, &distortion_uv, cpi->common.full_pixel);
   2303             rate2 += rate_uv;
   2304             distortion2 += distortion_uv;
   2305             break;
   2306 
   2307         default:
   2308             break;
   2309         }
   2310 
   2311         // Where skip is allowable add in the default per mb cost for the no skip case.
   2312         // where we then decide to skip we have to delete this and replace it with the
   2313         // cost of signallying a skip
   2314         if (cpi->common.mb_no_coeff_skip)
   2315         {
   2316             other_cost += vp8_cost_bit(cpi->prob_skip_false, 0);
   2317             rate2 += other_cost;
   2318         }
   2319 
   2320         // Estimate the reference frame signaling cost and add it to the rolling cost variable.
   2321         rate2 += ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
   2322 
   2323         if (!disable_skip)
   2324         {
   2325             // Test for the condition where skip block will be activated because there are no non zero coefficients and make any necessary adjustment for rate
   2326             if (cpi->common.mb_no_coeff_skip)
   2327             {
   2328                 int tteob;
   2329 
   2330                 tteob = 0;
   2331 
   2332                 for (i = 0; i <= 24; i++)
   2333                 {
   2334                     tteob += x->e_mbd.block[i].eob;
   2335                 }
   2336 
   2337                 if (tteob == 0)
   2338                 {
   2339                     rate2 -= (rate_y + rate_uv);
   2340                     //for best_yrd calculation
   2341                     rate_uv = 0;
   2342 
   2343                     // Back out no skip flag costing and add in skip flag costing
   2344                     if (cpi->prob_skip_false)
   2345                     {
   2346                         int prob_skip_cost;
   2347 
   2348                         prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1);
   2349                         prob_skip_cost -= vp8_cost_bit(cpi->prob_skip_false, 0);
   2350                         rate2 += prob_skip_cost;
   2351                         other_cost += prob_skip_cost;
   2352                     }
   2353                 }
   2354             }
   2355             // Calculate the final RD estimate for this mode
   2356             this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
   2357         }
   2358 
   2359         // Experimental debug code.
   2360         //all_rds[mode_index] = this_rd;
   2361         //all_rates[mode_index] = rate2;
   2362         //all_dist[mode_index] = distortion2;
   2363 
   2364         if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME)  && (this_rd < *returnintra))
   2365         {
   2366             *returnintra = this_rd ;
   2367         }
   2368 
   2369         // Did this mode help.. i.i is it the new best mode
   2370         if (this_rd < best_rd || x->skip)
   2371         {
   2372             // Note index of best mode so far
   2373             best_mode_index = mode_index;
   2374             x->e_mbd.mode_info_context->mbmi.force_no_skip = force_no_skip;
   2375 
   2376             if (this_mode <= B_PRED)
   2377             {
   2378                 x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode;
   2379             }
   2380 
   2381             other_cost += ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
   2382 
   2383             /* Calculate the final y RD estimate for this mode */
   2384             best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2-rate_uv-other_cost),
   2385                               (distortion2-distortion_uv));
   2386 
   2387             *returnrate = rate2;
   2388             *returndistortion = distortion2;
   2389             best_rd = this_rd;
   2390             vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
   2391             vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO));
   2392 
   2393             for (i = 0; i < 16; i++)
   2394             {
   2395                 vpx_memcpy(&best_bmodes[i], &x->e_mbd.block[i].bmi, sizeof(B_MODE_INFO));
   2396             }
   2397 
   2398             // Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time
   2399             cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
   2400             cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
   2401         }
   2402 
   2403         // If the mode did not help improve the best error case then raise the threshold for testing that mode next time around.
   2404         else
   2405         {
   2406             cpi->rd_thresh_mult[mode_index] += 4;
   2407 
   2408             if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
   2409                 cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
   2410 
   2411             cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
   2412         }
   2413 
   2414         if (x->skip)
   2415             break;
   2416 
   2417     }
   2418 
   2419     // Reduce the activation RD thresholds for the best choice mode
   2420     if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2)))
   2421     {
   2422         int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);
   2423 
   2424         cpi->rd_thresh_mult[best_mode_index] = (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ? cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
   2425         cpi->rd_threshes[best_mode_index] = (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index];
   2426 
   2427         // If we chose a split mode then reset the new MV thresholds as well
   2428         /*if ( vp8_mode_order[best_mode_index] == SPLITMV )
   2429         {
   2430             best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWMV] >> 4);
   2431             cpi->rd_thresh_mult[THR_NEWMV] = (cpi->rd_thresh_mult[THR_NEWMV] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWMV]-best_adjustment: MIN_THRESHMULT;
   2432             cpi->rd_threshes[THR_NEWMV] = (cpi->rd_baseline_thresh[THR_NEWMV] >> 7) * cpi->rd_thresh_mult[THR_NEWMV];
   2433 
   2434             best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWG] >> 4);
   2435             cpi->rd_thresh_mult[THR_NEWG] = (cpi->rd_thresh_mult[THR_NEWG] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWG]-best_adjustment: MIN_THRESHMULT;
   2436             cpi->rd_threshes[THR_NEWG] = (cpi->rd_baseline_thresh[THR_NEWG] >> 7) * cpi->rd_thresh_mult[THR_NEWG];
   2437 
   2438             best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWA] >> 4);
   2439             cpi->rd_thresh_mult[THR_NEWA] = (cpi->rd_thresh_mult[THR_NEWA] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWA]-best_adjustment: MIN_THRESHMULT;
   2440             cpi->rd_threshes[THR_NEWA] = (cpi->rd_baseline_thresh[THR_NEWA] >> 7) * cpi->rd_thresh_mult[THR_NEWA];
   2441         }*/
   2442 
   2443     }
   2444 
   2445     // If we have chosen new mv or split then decay the full search check count more quickly.
   2446     if ((vp8_mode_order[best_mode_index] == NEWMV) || (vp8_mode_order[best_mode_index] == SPLITMV))
   2447     {
   2448         int lf_or_gf = (vp8_ref_frame_order[best_mode_index] == LAST_FRAME) ? 0 : 1;
   2449 
   2450         if (cpi->check_freq[lf_or_gf] && !cpi->do_full[lf_or_gf])
   2451         {
   2452             cpi->check_freq[lf_or_gf] --;
   2453         }
   2454     }
   2455 
   2456     // Keep a record of best mode index that we chose
   2457     cpi->last_best_mode_index = best_mode_index;
   2458 
   2459     // Note how often each mode chosen as best
   2460     cpi->mode_chosen_counts[best_mode_index] ++;
   2461 
   2462 
   2463     if (cpi->is_src_frame_alt_ref && (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME))
   2464     {
   2465         best_mbmode.mode = ZEROMV;
   2466         best_mbmode.ref_frame = ALTREF_FRAME;
   2467         best_mbmode.mv.as_int = 0;
   2468         best_mbmode.uv_mode = 0;
   2469         best_mbmode.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
   2470         best_mbmode.partitioning = 0;
   2471         best_mbmode.dc_diff = 0;
   2472 
   2473         vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
   2474         vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
   2475 
   2476         for (i = 0; i < 16; i++)
   2477         {
   2478             vpx_memset(&x->e_mbd.block[i].bmi, 0, sizeof(B_MODE_INFO));
   2479         }
   2480 
   2481         x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
   2482 
   2483         return best_rd;
   2484     }
   2485 
   2486 
   2487     if(best_mbmode.mode <= B_PRED)
   2488     {
   2489         int i;
   2490         for (i = 0; i < 16; i++)
   2491         {
   2492             best_bmodes[i].mv.as_int = 0;
   2493         }
   2494     }
   2495 
   2496     // macroblock modes
   2497     vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
   2498     vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
   2499 
   2500     for (i = 0; i < 16; i++)
   2501     {
   2502         vpx_memcpy(&x->e_mbd.block[i].bmi, &best_bmodes[i], sizeof(B_MODE_INFO));
   2503     }
   2504 
   2505     x->e_mbd.mode_info_context->mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv;
   2506 
   2507     return best_rd;
   2508 }
   2509 #endif
   2510