Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include <stdio.h>
     13 #include <math.h>
     14 #include <limits.h>
     15 #include <assert.h>
     16 #include "pragmas.h"
     17 
     18 #include "tokenize.h"
     19 #include "treewriter.h"
     20 #include "onyx_int.h"
     21 #include "modecosts.h"
     22 #include "encodeintra.h"
     23 #include "entropymode.h"
     24 #include "reconinter.h"
     25 #include "reconintra.h"
     26 #include "reconintra4x4.h"
     27 #include "findnearmv.h"
     28 #include "encodemb.h"
     29 #include "quantize.h"
     30 #include "idct.h"
     31 #include "g_common.h"
     32 #include "variance.h"
     33 #include "mcomp.h"
     34 
     35 #include "vpx_mem/vpx_mem.h"
     36 #include "dct.h"
     37 #include "systemdependent.h"
     38 
     39 #define DIAMONDSEARCH 1
     40 #if CONFIG_RUNTIME_CPU_DETECT
     41 #define IF_RTCD(x)  (x)
     42 #else
     43 #define IF_RTCD(x)  NULL
     44 #endif
     45 
     46 
     47 void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
     48 
     49 
     50 #define RDFUNC(RM,DM,R,D,target_rd) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
     51 /*int  RDFUNC( int RM,int DM, int R, int D, int target_r )
     52 {
     53     int rd_value;
     54 
     55     rd_value =  ( ((128+(R)*(RM)) >> 8) + (DM)*(D) );
     56 
     57     return rd_value;
     58 }*/
     59 
     60 #define UVRDFUNC(RM,DM,R,D,target_r)  RDFUNC(RM,DM,R,D,target_r)
     61 
     62 #define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
     63 
     64 #define MAXF(a,b)            (((a) > (b)) ? (a) : (b))
     65 
     66 
     67 
     68 const int vp8_auto_speed_thresh[17] =
     69 {
     70     1000,
     71     200,
     72     150,
     73     130,
     74     150,
     75     125,
     76     120,
     77     115,
     78     115,
     79     115,
     80     115,
     81     115,
     82     115,
     83     115,
     84     115,
     85     115,
     86     105
     87 };
     88 
     89 const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES] =
     90 {
     91     ZEROMV,
     92     DC_PRED,
     93 
     94     NEARESTMV,
     95     NEARMV,
     96 
     97     ZEROMV,
     98     NEARESTMV,
     99 
    100     ZEROMV,
    101     NEARESTMV,
    102 
    103     NEARMV,
    104     NEARMV,
    105 
    106     V_PRED,
    107     H_PRED,
    108     TM_PRED,
    109 
    110     NEWMV,
    111     NEWMV,
    112     NEWMV,
    113 
    114     SPLITMV,
    115     SPLITMV,
    116     SPLITMV,
    117 
    118     B_PRED,
    119 };
    120 
    121 const MV_REFERENCE_FRAME vp8_ref_frame_order[MAX_MODES] =
    122 {
    123     LAST_FRAME,
    124     INTRA_FRAME,
    125 
    126     LAST_FRAME,
    127     LAST_FRAME,
    128 
    129     GOLDEN_FRAME,
    130     GOLDEN_FRAME,
    131 
    132     ALTREF_FRAME,
    133     ALTREF_FRAME,
    134 
    135     GOLDEN_FRAME,
    136     ALTREF_FRAME,
    137 
    138     INTRA_FRAME,
    139     INTRA_FRAME,
    140     INTRA_FRAME,
    141 
    142     LAST_FRAME,
    143     GOLDEN_FRAME,
    144     ALTREF_FRAME,
    145 
    146     LAST_FRAME,
    147     GOLDEN_FRAME,
    148     ALTREF_FRAME,
    149 
    150     INTRA_FRAME,
    151 };
    152 
    153 static void fill_token_costs(
    154     unsigned int c      [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens],
    155     const vp8_prob p    [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1]
    156 )
    157 {
    158     int i, j, k;
    159 
    160 
    161     for (i = 0; i < BLOCK_TYPES; i++)
    162         for (j = 0; j < COEF_BANDS; j++)
    163             for (k = 0; k < PREV_COEF_CONTEXTS; k++)
    164 
    165                 vp8_cost_tokens((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree);
    166 
    167 }
    168 
    169 static int rd_iifactor [ 32 ] =  {    4,   4,   3,   2,   1,   0,   0,   0,
    170                                       0,   0,   0,   0,   0,   0,   0,   0,
    171                                       0,   0,   0,   0,   0,   0,   0,   0,
    172                                       0,   0,   0,   0,   0,   0,   0,   0,
    173                                  };
    174 
    175 
    176 // The values in this table should be reviewed
    177 static int sad_per_bit16lut[128] =
    178 {
    179     4,  4, 4, 4,  4, 4, 4, 4,   // 4
    180     4,  4, 4, 4,  4, 4, 4, 4,   // 1
    181     4,  4, 4, 4,  4, 4, 4, 4,   // 2
    182     4,  4, 4, 4,  4, 4, 4, 4,   // 3
    183     4,  4, 4, 4,  4, 4, 4, 4,   // 4
    184     4,  4, 12, 12, 13, 13, 14, 14, // 5
    185     14, 14, 14, 15, 15, 15, 15, 15, // 6
    186     15, 15, 15, 15, 15, 15, 15, 15, // 7
    187     15, 15, 15, 15, 15, 16, 16, 16, // 8
    188     16, 16, 18, 18, 18, 18, 19, 19, // 9
    189     19, 19, 19, 19, 19, 19, 19, 19, // 10
    190     20, 20, 22, 22, 22, 22, 21, 21, // 11
    191     22, 22, 22, 22, 22, 22, 22, 22, // 12
    192     22, 22, 22, 22, 22, 22, 22, 22, // 13
    193     22, 22, 22, 22, 22, 22, 22, 22, // 14
    194     22, 22, 22, 22, 22, 22, 22, 22, // 15
    195 };
    196 
    197 static int sad_per_bit4lut[128] =
    198 {
    199     4,  4, 4, 4,  4, 4, 4, 4,   // 4
    200     4,  4, 4, 4,  4, 4, 4, 4,   // 1
    201     4,  4, 4, 4,  4, 4, 4, 4,   // 2
    202     4,  4, 4, 4,  4, 4, 4, 4,   // 3
    203     4,  4, 4, 4,  4, 4, 4, 4,   // 4
    204     4,  4, 15, 15, 15, 15, 16, 16, // 5
    205     16, 17, 17, 17, 17, 17, 17, 17, // 6
    206     17, 17, 19, 19, 22, 22, 21, 21, // 7
    207     23, 23, 23, 23, 23, 24, 24, 24, // 8
    208     25, 25, 27, 27, 27, 27, 28, 28, // 9
    209     28, 28, 29, 29, 29, 29, 29, 29, // 10
    210     30, 30, 31, 31, 31, 31, 32, 32, // 11
    211     34, 34, 34, 34, 34, 34, 34, 34, // 12
    212     34, 34, 34, 34, 34, 34, 34, 34, // 13
    213     34, 34, 34, 34, 34, 34, 34, 34, // 14
    214     34, 34, 34, 34, 34, 34, 34, 34, // 15
    215 };
    216 
    217 void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex)
    218 {
    219     cpi->mb.sadperbit16 =  sad_per_bit16lut[QIndex];
    220     cpi->mb.sadperbit4  =  sad_per_bit4lut[QIndex];
    221 }
    222 
    223 void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
    224 {
    225     int q;
    226     int i;
    227     int *thresh;
    228     int threshmult;
    229     double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
    230     double rdconst = 3.00;
    231 
    232     vp8_clear_system_state();  //__asm emms;
    233 
    234     // Further tests required to see if optimum is different
    235     // for key frames, golden frames and arf frames.
    236     // if (cpi->common.refresh_golden_frame ||
    237     //     cpi->common.refresh_alt_ref_frame)
    238     cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
    239 
    240     // Extend rate multiplier along side quantizer zbin increases
    241     if (cpi->zbin_over_quant  > 0)
    242     {
    243         double oq_factor;
    244         double modq;
    245 
    246         // Experimental code using the same basic equation as used for Q above
    247         // The units of cpi->zbin_over_quant are 1/128 of Q bin size
    248         oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant);
    249         modq = (int)((double)capped_q * oq_factor);
    250         cpi->RDMULT = (int)(rdconst * (modq * modq));
    251     }
    252 
    253     if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME))
    254     {
    255         if (cpi->next_iiratio > 31)
    256             cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    257         else
    258             cpi->RDMULT += (cpi->RDMULT * rd_iifactor[cpi->next_iiratio]) >> 4;
    259     }
    260 
    261     if (cpi->RDMULT < 125)
    262         cpi->RDMULT = 125;
    263 
    264     cpi->mb.errorperbit = (cpi->RDMULT / 100);
    265 
    266     if (cpi->mb.errorperbit < 1)
    267         cpi->mb.errorperbit = 1;
    268 
    269     vp8_set_speed_features(cpi);
    270 
    271     if (cpi->common.simpler_lpf)
    272         cpi->common.filter_type = SIMPLE_LOOPFILTER;
    273 
    274     q = (int)pow(Qvalue, 1.25);
    275 
    276     if (q < 8)
    277         q = 8;
    278 
    279     if (cpi->ref_frame_flags == VP8_ALT_FLAG)
    280     {
    281         thresh      = &cpi->rd_threshes[THR_NEWA];
    282         threshmult  = cpi->sf.thresh_mult[THR_NEWA];
    283     }
    284     else if (cpi->ref_frame_flags == VP8_GOLD_FLAG)
    285     {
    286         thresh      = &cpi->rd_threshes[THR_NEWG];
    287         threshmult  = cpi->sf.thresh_mult[THR_NEWG];
    288     }
    289     else
    290     {
    291         thresh      = &cpi->rd_threshes[THR_NEWMV];
    292         threshmult  = cpi->sf.thresh_mult[THR_NEWMV];
    293     }
    294 
    295     if (cpi->RDMULT > 1000)
    296     {
    297         cpi->RDDIV = 1;
    298         cpi->RDMULT /= 100;
    299 
    300         for (i = 0; i < MAX_MODES; i++)
    301         {
    302             if (cpi->sf.thresh_mult[i] < INT_MAX)
    303             {
    304                 cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
    305             }
    306             else
    307             {
    308                 cpi->rd_threshes[i] = INT_MAX;
    309             }
    310 
    311             cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
    312         }
    313     }
    314     else
    315     {
    316         cpi->RDDIV = 100;
    317 
    318         for (i = 0; i < MAX_MODES; i++)
    319         {
    320             if (cpi->sf.thresh_mult[i] < (INT_MAX / q))
    321             {
    322                 cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
    323             }
    324             else
    325             {
    326                 cpi->rd_threshes[i] = INT_MAX;
    327             }
    328 
    329             cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
    330         }
    331     }
    332 
    333     fill_token_costs(
    334         cpi->mb.token_costs,
    335         (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs
    336     );
    337 
    338     vp8_init_mode_costs(cpi);
    339 
    340 }
    341 
    342 void vp8_auto_select_speed(VP8_COMP *cpi)
    343 {
    344     int used = cpi->oxcf.cpu_used;
    345 
    346     int milliseconds_for_compress = (int)(1000000 / cpi->oxcf.frame_rate);
    347 
    348     milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
    349 
    350 #if 0
    351 
    352     if (0)
    353     {
    354         FILE *f;
    355 
    356         f = fopen("speed.stt", "a");
    357         fprintf(f, " %8ld %10ld %10ld %10ld\n",
    358                 cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time);
    359         fclose(f);
    360     }
    361 
    362 #endif
    363 
    364     /*
    365     // this is done during parameter valid check
    366     if( used > 16)
    367         used = 16;
    368     if( used < -16)
    369         used = -16;
    370     */
    371 
    372     if (cpi->avg_pick_mode_time < milliseconds_for_compress && (cpi->avg_encode_time - cpi->avg_pick_mode_time) < milliseconds_for_compress)
    373     {
    374         if (cpi->avg_pick_mode_time == 0)
    375         {
    376             cpi->Speed = 4;
    377         }
    378         else
    379         {
    380             if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95)
    381             {
    382                 cpi->Speed          += 2;
    383                 cpi->avg_pick_mode_time = 0;
    384                 cpi->avg_encode_time = 0;
    385 
    386                 if (cpi->Speed > 16)
    387                 {
    388                     cpi->Speed = 16;
    389                 }
    390             }
    391 
    392             if (milliseconds_for_compress * 100 > cpi->avg_encode_time * vp8_auto_speed_thresh[cpi->Speed])
    393             {
    394                 cpi->Speed          -= 1;
    395                 cpi->avg_pick_mode_time = 0;
    396                 cpi->avg_encode_time = 0;
    397 
    398                 // In real-time mode, cpi->speed is in [4, 16].
    399                 if (cpi->Speed < 4)        //if ( cpi->Speed < 0 )
    400                 {
    401                     cpi->Speed = 4;        //cpi->Speed = 0;
    402                 }
    403             }
    404         }
    405     }
    406     else
    407     {
    408         cpi->Speed += 4;
    409 
    410         if (cpi->Speed > 16)
    411             cpi->Speed = 16;
    412 
    413 
    414         cpi->avg_pick_mode_time = 0;
    415         cpi->avg_encode_time = 0;
    416     }
    417 }
    418 
    419 int vp8_block_error_c(short *coeff, short *dqcoeff)
    420 {
    421     int i;
    422     int error = 0;
    423 
    424     for (i = 0; i < 16; i++)
    425     {
    426         int this_diff = coeff[i] - dqcoeff[i];
    427         error += this_diff * this_diff;
    428     }
    429 
    430     return error;
    431 }
    432 
    433 int vp8_mbblock_error_c(MACROBLOCK *mb, int dc)
    434 {
    435     BLOCK  *be;
    436     BLOCKD *bd;
    437     int i, j;
    438     int berror, error = 0;
    439 
    440     for (i = 0; i < 16; i++)
    441     {
    442         be = &mb->block[i];
    443         bd = &mb->e_mbd.block[i];
    444 
    445         berror = 0;
    446 
    447         for (j = dc; j < 16; j++)
    448         {
    449             int this_diff = be->coeff[j] - bd->dqcoeff[j];
    450             berror += this_diff * this_diff;
    451         }
    452 
    453         error += berror;
    454     }
    455 
    456     return error;
    457 }
    458 
    459 int vp8_mbuverror_c(MACROBLOCK *mb)
    460 {
    461 
    462     BLOCK  *be;
    463     BLOCKD *bd;
    464 
    465 
    466     int i;
    467     int error = 0;
    468 
    469     for (i = 16; i < 24; i++)
    470     {
    471         be = &mb->block[i];
    472         bd = &mb->e_mbd.block[i];
    473 
    474         error += vp8_block_error_c(be->coeff, bd->dqcoeff);
    475     }
    476 
    477     return error;
    478 }
    479 
    480 #if !(CONFIG_REALTIME_ONLY)
    481 static int macro_block_max_error(MACROBLOCK *mb)
    482 {
    483     int error = 0;
    484     int dc = 0;
    485     BLOCK  *be;
    486     int i, j;
    487     int berror;
    488 
    489     dc = !(mb->e_mbd.mode_info_context->mbmi.mode == B_PRED || mb->e_mbd.mode_info_context->mbmi.mode == SPLITMV);
    490 
    491     for (i = 0; i < 16; i++)
    492     {
    493         be = &mb->block[i];
    494 
    495         berror = 0;
    496 
    497         for (j = dc; j < 16; j++)
    498         {
    499             int this_diff = be->coeff[j];
    500             berror += this_diff * this_diff;
    501         }
    502 
    503         error += berror;
    504     }
    505 
    506     for (i = 16; i < 24; i++)
    507     {
    508         be = &mb->block[i];
    509         berror = 0;
    510 
    511         for (j = 0; j < 16; j++)
    512         {
    513             int this_diff = be->coeff[j];
    514             berror += this_diff * this_diff;
    515         }
    516 
    517         error += berror;
    518     }
    519 
    520     error <<= 2;
    521 
    522     if (dc)
    523     {
    524         be = &mb->block[24];
    525         berror = 0;
    526 
    527         for (j = 0; j < 16; j++)
    528         {
    529             int this_diff = be->coeff[j];
    530             berror += this_diff * this_diff;
    531         }
    532 
    533         error += berror;
    534     }
    535 
    536     error >>= 4;
    537     return error;
    538 }
    539 #endif
    540 
    541 int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd)
    542 {
    543     unsigned char *uptr, *vptr;
    544     unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
    545     unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
    546     int uv_stride = x->block[16].src_stride;
    547 
    548     unsigned int sse1 = 0;
    549     unsigned int sse2 = 0;
    550     int mv_row;
    551     int mv_col;
    552     int offset;
    553     int pre_stride = x->e_mbd.block[16].pre_stride;
    554 
    555     vp8_build_uvmvs(&x->e_mbd, 0);
    556     mv_row = x->e_mbd.block[16].bmi.mv.as_mv.row;
    557     mv_col = x->e_mbd.block[16].bmi.mv.as_mv.col;
    558 
    559     offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
    560     uptr = x->e_mbd.pre.u_buffer + offset;
    561     vptr = x->e_mbd.pre.v_buffer + offset;
    562 
    563     if ((mv_row | mv_col) & 7)
    564     {
    565         VARIANCE_INVOKE(rtcd, subpixvar8x8)(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, uv_stride, &sse2);
    566         VARIANCE_INVOKE(rtcd, subpixvar8x8)(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, uv_stride, &sse1);
    567         sse2 += sse1;
    568     }
    569     else
    570     {
    571         VARIANCE_INVOKE(rtcd, subpixvar8x8)(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, uv_stride, &sse2);
    572         VARIANCE_INVOKE(rtcd, subpixvar8x8)(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, uv_stride, &sse1);
    573         sse2 += sse1;
    574     }
    575 
    576     return sse2;
    577 
    578 }
    579 
    580 #if !(CONFIG_REALTIME_ONLY)
    581 static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
    582 {
    583     int c = !type;              /* start at coef 0, unless Y with Y2 */
    584     int eob = b->eob;
    585     int pt ;    /* surrounding block/prev coef predictor */
    586     int cost = 0;
    587     short *qcoeff_ptr = b->qcoeff;
    588 
    589     VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
    590 
    591 # define QC( I)  ( qcoeff_ptr [vp8_default_zig_zag1d[I]] )
    592 
    593     for (; c < eob; c++)
    594     {
    595         int v = QC(c);
    596         int t = vp8_dct_value_tokens_ptr[v].Token;
    597         cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [t];
    598         cost += vp8_dct_value_cost_ptr[v];
    599         pt = vp8_prev_token_class[t];
    600     }
    601 
    602 # undef QC
    603 
    604     if (c < 16)
    605         cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN];
    606 
    607     pt = (c != !type); // is eob first coefficient;
    608     *a = *l = pt;
    609 
    610     return cost;
    611 }
    612 
    613 int vp8_rdcost_mby(MACROBLOCK *mb)
    614 {
    615     int cost = 0;
    616     int b;
    617     int type = 0;
    618     MACROBLOCKD *x = &mb->e_mbd;
    619     ENTROPY_CONTEXT_PLANES t_above, t_left;
    620     ENTROPY_CONTEXT *ta;
    621     ENTROPY_CONTEXT *tl;
    622 
    623     vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    624     vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    625 
    626     ta = (ENTROPY_CONTEXT *)&t_above;
    627     tl = (ENTROPY_CONTEXT *)&t_left;
    628 
    629     if (x->mode_info_context->mbmi.mode == SPLITMV)
    630         type = 3;
    631 
    632     for (b = 0; b < 16; b++)
    633         cost += cost_coeffs(mb, x->block + b, type,
    634                     ta + vp8_block2above[b], tl + vp8_block2left[b]);
    635 
    636     if (x->mode_info_context->mbmi.mode != SPLITMV)
    637         cost += cost_coeffs(mb, x->block + 24, 1,
    638                     ta + vp8_block2above[24], tl + vp8_block2left[24]);
    639 
    640     return cost;
    641 }
    642 
    643 
    644 static void rd_pick_intra4x4block(
    645     VP8_COMP *cpi,
    646     MACROBLOCK *x,
    647     BLOCK *be,
    648     BLOCKD *b,
    649     B_PREDICTION_MODE *best_mode,
    650     B_PREDICTION_MODE above,
    651     B_PREDICTION_MODE left,
    652     ENTROPY_CONTEXT *a,
    653     ENTROPY_CONTEXT *l,
    654 
    655     int *bestrate,
    656     int *bestratey,
    657     int *bestdistortion)
    658 {
    659     B_PREDICTION_MODE mode;
    660     int best_rd = INT_MAX;       // 1<<30
    661     int rate = 0;
    662     int distortion;
    663     unsigned int *mode_costs;
    664 
    665     ENTROPY_CONTEXT ta = *a, tempa = *a;
    666     ENTROPY_CONTEXT tl = *l, templ = *l;
    667 
    668 
    669     if (x->e_mbd.frame_type == KEY_FRAME)
    670     {
    671         mode_costs  = x->bmode_costs[above][left];
    672     }
    673     else
    674     {
    675         mode_costs = x->inter_bmode_costs;
    676     }
    677 
    678     for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++)
    679     {
    680         int this_rd;
    681         int ratey;
    682 
    683         rate = mode_costs[mode];
    684         vp8_encode_intra4x4block_rd(IF_RTCD(&cpi->rtcd), x, be, b, mode);
    685 
    686         tempa = ta;
    687         templ = tl;
    688 
    689         ratey = cost_coeffs(x, b, 3, &tempa, &templ);
    690         rate += ratey;
    691         distortion = ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), berr)(be->coeff, b->dqcoeff) >> 2;
    692 
    693         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
    694 
    695         if (this_rd < best_rd)
    696         {
    697             *bestrate = rate;
    698             *bestratey = ratey;
    699             *bestdistortion = distortion;
    700             best_rd = this_rd;
    701             *best_mode = mode;
    702             *a = tempa;
    703             *l = templ;
    704         }
    705     }
    706 
    707     b->bmi.mode = (B_PREDICTION_MODE)(*best_mode);
    708     vp8_encode_intra4x4block_rd(IF_RTCD(&cpi->rtcd), x, be, b, b->bmi.mode);
    709 
    710 }
    711 
    712 
    713 int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, int *rate_y, int *Distortion)
    714 {
    715     MACROBLOCKD *const xd = &mb->e_mbd;
    716     int i;
    717     int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
    718     int distortion = 0;
    719     int tot_rate_y = 0;
    720     ENTROPY_CONTEXT_PLANES t_above, t_left;
    721     ENTROPY_CONTEXT *ta;
    722     ENTROPY_CONTEXT *tl;
    723 
    724     vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    725     vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    726 
    727     ta = (ENTROPY_CONTEXT *)&t_above;
    728     tl = (ENTROPY_CONTEXT *)&t_left;
    729 
    730     vp8_intra_prediction_down_copy(xd);
    731 
    732     for (i = 0; i < 16; i++)
    733     {
    734         MODE_INFO *const mic = xd->mode_info_context;
    735         const int mis = xd->mode_info_stride;
    736         const B_PREDICTION_MODE A = vp8_above_bmi(mic, i, mis)->mode;
    737         const B_PREDICTION_MODE L = vp8_left_bmi(mic, i)->mode;
    738         B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
    739         int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
    740 
    741         rd_pick_intra4x4block(
    742             cpi, mb, mb->block + i, xd->block + i, &best_mode, A, L,
    743             ta + vp8_block2above[i],
    744             tl + vp8_block2left[i], &r, &ry, &d);
    745 
    746         cost += r;
    747         distortion += d;
    748         tot_rate_y += ry;
    749         mic->bmi[i].mode = xd->block[i].bmi.mode = best_mode;
    750     }
    751 
    752     *Rate = cost;
    753     *rate_y += tot_rate_y;
    754     *Distortion = distortion;
    755 
    756     return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
    757 }
    758 
    759 int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *Rate, int *rate_y, int *Distortion)
    760 {
    761 
    762     MB_PREDICTION_MODE mode;
    763     MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
    764     int rate, ratey;
    765     unsigned int distortion;
    766     int best_rd = INT_MAX;
    767 
    768     //Y Search for 16x16 intra prediction mode
    769     for (mode = DC_PRED; mode <= TM_PRED; mode++)
    770     {
    771         int this_rd;
    772         int dummy;
    773         rate = 0;
    774 
    775         x->e_mbd.mode_info_context->mbmi.mode = mode;
    776 
    777         rate += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
    778 
    779         vp8_encode_intra16x16mbyrd(IF_RTCD(&cpi->rtcd), x);
    780 
    781         ratey = vp8_rdcost_mby(x);
    782 
    783         rate += ratey;
    784 
    785         VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer, x->src.y_stride, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride, &distortion, &dummy);
    786 
    787         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
    788 
    789         if (this_rd < best_rd)
    790         {
    791             mode_selected = mode;
    792             best_rd = this_rd;
    793             *Rate = rate;
    794             *rate_y = ratey;
    795             *Distortion = (int)distortion;
    796         }
    797     }
    798 
    799     x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
    800     return best_rd;
    801 }
    802 
    803 
    804 static int rd_cost_mbuv(MACROBLOCK *mb)
    805 {
    806     int b;
    807     int cost = 0;
    808     MACROBLOCKD *x = &mb->e_mbd;
    809     ENTROPY_CONTEXT_PLANES t_above, t_left;
    810     ENTROPY_CONTEXT *ta;
    811     ENTROPY_CONTEXT *tl;
    812 
    813     vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    814     vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    815 
    816     ta = (ENTROPY_CONTEXT *)&t_above;
    817     tl = (ENTROPY_CONTEXT *)&t_left;
    818 
    819     for (b = 16; b < 20; b++)
    820         cost += cost_coeffs(mb, x->block + b, vp8_block2type[b],
    821                     ta + vp8_block2above[b], tl + vp8_block2left[b]);
    822 
    823     for (b = 20; b < 24; b++)
    824         cost += cost_coeffs(mb, x->block + b, vp8_block2type[b],
    825                     ta + vp8_block2above[b], tl + vp8_block2left[b]);
    826 
    827     return cost;
    828 }
    829 
    830 
    831 unsigned int vp8_get_mbuvrecon_error(const vp8_variance_rtcd_vtable_t *rtcd, const MACROBLOCK *x) // sum of squares
    832 {
    833     unsigned int sse0, sse1;
    834     int sum0, sum1;
    835     VARIANCE_INVOKE(rtcd, get8x8var)(x->src.u_buffer, x->src.uv_stride, x->e_mbd.dst.u_buffer, x->e_mbd.dst.uv_stride, &sse0, &sum0);
    836     VARIANCE_INVOKE(rtcd, get8x8var)(x->src.v_buffer, x->src.uv_stride, x->e_mbd.dst.v_buffer, x->e_mbd.dst.uv_stride, &sse1, &sum1);
    837     return (sse0 + sse1);
    838 }
    839 
    840 static int vp8_rd_inter_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int fullpixel)
    841 {
    842     vp8_build_uvmvs(&x->e_mbd, fullpixel);
    843     vp8_encode_inter16x16uvrd(IF_RTCD(&cpi->rtcd), x);
    844 
    845 
    846     *rate       = rd_cost_mbuv(x);
    847     *distortion = ENCODEMB_INVOKE(&cpi->rtcd.encodemb, mbuverr)(x) / 4;
    848 
    849     return UVRDFUNC(x->rdmult, x->rddiv, *rate, *distortion, cpi->target_bits_per_mb);
    850 }
    851 
    852 int vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion)
    853 {
    854     MB_PREDICTION_MODE mode;
    855     MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
    856     int best_rd = INT_MAX;
    857     int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r);
    858     int rate_to;
    859 
    860     for (mode = DC_PRED; mode <= TM_PRED; mode++)
    861     {
    862         int rate;
    863         int distortion;
    864         int this_rd;
    865 
    866         x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
    867         vp8_encode_intra16x16mbuvrd(IF_RTCD(&cpi->rtcd), x);
    868 
    869         rate_to = rd_cost_mbuv(x);
    870         rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.uv_mode];
    871 
    872         distortion = vp8_get_mbuvrecon_error(IF_RTCD(&cpi->rtcd.variance), x);
    873 
    874         this_rd = UVRDFUNC(x->rdmult, x->rddiv, rate, distortion, cpi->target_bits_per_mb);
    875 
    876         if (this_rd < best_rd)
    877         {
    878             best_rd = this_rd;
    879             d = distortion;
    880             r = rate;
    881             *rate_tokenonly = rate_to;
    882             mode_selected = mode;
    883         }
    884     }
    885 
    886     *rate = r;
    887     *distortion = d;
    888 
    889     x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
    890     return best_rd;
    891 }
    892 #endif
    893 
    894 int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4])
    895 {
    896     vp8_prob p [VP8_MVREFS-1];
    897     assert(NEARESTMV <= m  &&  m <= SPLITMV);
    898     vp8_mv_ref_probs(p, near_mv_ref_ct);
    899     return vp8_cost_token(vp8_mv_ref_tree, p,
    900                           vp8_mv_ref_encoding_array - NEARESTMV + m);
    901 }
    902 
    903 void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv)
    904 {
    905     int i;
    906 
    907     x->e_mbd.mode_info_context->mbmi.mode = mb;
    908     x->e_mbd.mode_info_context->mbmi.mv.as_mv.row = mv->row;
    909     x->e_mbd.mode_info_context->mbmi.mv.as_mv.col = mv->col;
    910 
    911     for (i = 0; i < 16; i++)
    912     {
    913         B_MODE_INFO *bmi = &x->e_mbd.block[i].bmi;
    914         bmi->mode = (B_PREDICTION_MODE) mb;
    915         bmi->mv.as_mv.row = mv->row;
    916         bmi->mv.as_mv.col = mv->col;
    917     }
    918 }
    919 
    920 #if !(CONFIG_REALTIME_ONLY)
    921 static int labels2mode(
    922     MACROBLOCK *x,
    923     int const *labelings, int which_label,
    924     B_PREDICTION_MODE this_mode,
    925     MV *this_mv, MV *best_ref_mv,
    926     int *mvcost[2]
    927 )
    928 {
    929     MACROBLOCKD *const xd = & x->e_mbd;
    930     MODE_INFO *const mic = xd->mode_info_context;
    931     const int mis = xd->mode_info_stride;
    932 
    933     int cost = 0;
    934     int thismvcost = 0;
    935 
    936     /* We have to be careful retrieving previously-encoded motion vectors.
    937        Ones from this macroblock have to be pulled from the BLOCKD array
    938        as they have not yet made it to the bmi array in our MB_MODE_INFO. */
    939 
    940     int i = 0;
    941 
    942     do
    943     {
    944         BLOCKD *const d = xd->block + i;
    945         const int row = i >> 2,  col = i & 3;
    946 
    947         B_PREDICTION_MODE m;
    948 
    949         if (labelings[i] != which_label)
    950             continue;
    951 
    952         if (col  &&  labelings[i] == labelings[i-1])
    953             m = LEFT4X4;
    954         else if (row  &&  labelings[i] == labelings[i-4])
    955             m = ABOVE4X4;
    956         else
    957         {
    958             // the only time we should do costing for new motion vector or mode
    959             // is when we are on a new label  (jbb May 08, 2007)
    960             switch (m = this_mode)
    961             {
    962             case NEW4X4 :
    963                 thismvcost  = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost, 102);
    964                 break;
    965             case LEFT4X4:
    966                 *this_mv = col ? d[-1].bmi.mv.as_mv : vp8_left_bmi(mic, i)->mv.as_mv;
    967                 break;
    968             case ABOVE4X4:
    969                 *this_mv = row ? d[-4].bmi.mv.as_mv : vp8_above_bmi(mic, i, mis)->mv.as_mv;
    970                 break;
    971             case ZERO4X4:
    972                 this_mv->row = this_mv->col = 0;
    973                 break;
    974             default:
    975                 break;
    976             }
    977 
    978             if (m == ABOVE4X4)  // replace above with left if same
    979             {
    980                 const MV mv = col ? d[-1].bmi.mv.as_mv : vp8_left_bmi(mic, i)->mv.as_mv;
    981 
    982                 if (mv.row == this_mv->row  &&  mv.col == this_mv->col)
    983                     m = LEFT4X4;
    984             }
    985 
    986             cost = x->inter_bmode_costs[ m];
    987         }
    988 
    989         d->bmi.mode = m;
    990         d->bmi.mv.as_mv = *this_mv;
    991 
    992     }
    993     while (++i < 16);
    994 
    995     cost += thismvcost ;
    996     return cost;
    997 }
    998 
    999 static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
   1000                               int which_label, ENTROPY_CONTEXT *ta,
   1001                               ENTROPY_CONTEXT *tl)
   1002 {
   1003     int cost = 0;
   1004     int b;
   1005     MACROBLOCKD *x = &mb->e_mbd;
   1006 
   1007     for (b = 0; b < 16; b++)
   1008         if (labels[ b] == which_label)
   1009             cost += cost_coeffs(mb, x->block + b, 3,
   1010                                 ta + vp8_block2above[b],
   1011                                 tl + vp8_block2left[b]);
   1012 
   1013     return cost;
   1014 
   1015 }
   1016 static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels, int which_label, const vp8_encodemb_rtcd_vtable_t *rtcd)
   1017 {
   1018     int i;
   1019     unsigned int distortion = 0;
   1020 
   1021     for (i = 0; i < 16; i++)
   1022     {
   1023         if (labels[i] == which_label)
   1024         {
   1025             BLOCKD *bd = &x->e_mbd.block[i];
   1026             BLOCK *be = &x->block[i];
   1027 
   1028 
   1029             vp8_build_inter_predictors_b(bd, 16, x->e_mbd.subpixel_predict);
   1030             ENCODEMB_INVOKE(rtcd, subb)(be, bd, 16);
   1031             x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
   1032 
   1033             // set to 0 no way to account for 2nd order DC so discount
   1034             //be->coeff[0] = 0;
   1035             x->quantize_b(be, bd);
   1036 
   1037             distortion += ENCODEMB_INVOKE(rtcd, berr)(be->coeff, bd->dqcoeff);
   1038         }
   1039     }
   1040 
   1041     return distortion;
   1042 }
   1043 
   1044 static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion, const vp8_encodemb_rtcd_vtable_t *rtcd)
   1045 {
   1046     int b;
   1047     MACROBLOCKD *const x = &mb->e_mbd;
   1048     BLOCK   *const mb_y2 = mb->block + 24;
   1049     BLOCKD *const x_y2  = x->block + 24;
   1050     short *Y2DCPtr = mb_y2->src_diff;
   1051     BLOCK *beptr;
   1052     int d;
   1053 
   1054     ENCODEMB_INVOKE(rtcd, submby)(mb->src_diff, mb->src.y_buffer, mb->e_mbd.predictor, mb->src.y_stride);
   1055 
   1056     // Fdct and building the 2nd order block
   1057     for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
   1058     {
   1059         mb->vp8_short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
   1060         *Y2DCPtr++ = beptr->coeff[0];
   1061         *Y2DCPtr++ = beptr->coeff[16];
   1062     }
   1063 
   1064     // 2nd order fdct
   1065     if (x->mode_info_context->mbmi.mode != SPLITMV)
   1066     {
   1067         mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
   1068     }
   1069 
   1070     // Quantization
   1071     for (b = 0; b < 16; b++)
   1072     {
   1073         mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
   1074     }
   1075 
   1076     // DC predication and Quantization of 2nd Order block
   1077     if (x->mode_info_context->mbmi.mode != SPLITMV)
   1078     {
   1079 
   1080         {
   1081             mb->quantize_b(mb_y2, x_y2);
   1082         }
   1083     }
   1084 
   1085     // Distortion
   1086     if (x->mode_info_context->mbmi.mode == SPLITMV)
   1087         d = ENCODEMB_INVOKE(rtcd, mberr)(mb, 0) << 2;
   1088     else
   1089     {
   1090         d = ENCODEMB_INVOKE(rtcd, mberr)(mb, 1) << 2;
   1091         d += ENCODEMB_INVOKE(rtcd, berr)(mb_y2->coeff, x_y2->dqcoeff);
   1092     }
   1093 
   1094     *Distortion = (d >> 4);
   1095 
   1096     // rate
   1097     *Rate = vp8_rdcost_mby(mb);
   1098 }
   1099 
   1100 unsigned char vp8_mbsplit_offset2[4][16] = {
   1101     { 0,  8,  0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
   1102     { 0,  2,  0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
   1103     { 0,  2,  8, 10,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
   1104     { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15}
   1105 };
   1106 static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *best_ref_mv, int best_rd, int *mdcounts, int *returntotrate, int *returnyrate, int *returndistortion, int compressor_speed, int *mvcost[2], int mvthresh, int fullpixel)
   1107 {
   1108     int i, segmentation;
   1109     B_PREDICTION_MODE this_mode;
   1110     MACROBLOCKD *xc = &x->e_mbd;
   1111     BLOCK *c;
   1112     BLOCKD *e;
   1113     int const *labels;
   1114     int best_segment_rd = INT_MAX;
   1115     int best_seg = 0;
   1116     int br = 0;
   1117     int bd = 0;
   1118     int bsr = 0;
   1119     int bsd = 0;
   1120     int bestsegmentyrate = 0;
   1121 
   1122     static const int segmentation_to_sseshift[4] = {3, 3, 2, 0};
   1123 
   1124     // FIX TO Rd error outrange bug PGW 9 june 2004
   1125     B_PREDICTION_MODE bmodes[16] = {ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4,
   1126                                     ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4,
   1127                                     ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4,
   1128                                     ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4
   1129                                    };
   1130 
   1131     MV bmvs[16];
   1132     int beobs[16];
   1133 
   1134     vpx_memset(beobs, 0, sizeof(beobs));
   1135 
   1136 
   1137     for (segmentation = 0; segmentation < VP8_NUMMBSPLITS; segmentation++)
   1138     {
   1139         int label_count;
   1140         int this_segment_rd = 0;
   1141         int label_mv_thresh;
   1142         int rate = 0;
   1143         int sbr = 0;
   1144         int sbd = 0;
   1145         int sseshift;
   1146         int segmentyrate = 0;
   1147 
   1148         vp8_variance_fn_ptr_t *v_fn_ptr;
   1149 
   1150         ENTROPY_CONTEXT_PLANES t_above, t_left;
   1151         ENTROPY_CONTEXT *ta;
   1152         ENTROPY_CONTEXT *tl;
   1153         ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
   1154         ENTROPY_CONTEXT *ta_b;
   1155         ENTROPY_CONTEXT *tl_b;
   1156 
   1157         vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
   1158         vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
   1159 
   1160         ta = (ENTROPY_CONTEXT *)&t_above;
   1161         tl = (ENTROPY_CONTEXT *)&t_left;
   1162         ta_b = (ENTROPY_CONTEXT *)&t_above_b;
   1163         tl_b = (ENTROPY_CONTEXT *)&t_left_b;
   1164 
   1165         br = 0;
   1166         bd = 0;
   1167 
   1168         v_fn_ptr = &cpi->fn_ptr[segmentation];
   1169         sseshift = segmentation_to_sseshift[segmentation];
   1170         labels = vp8_mbsplits[segmentation];
   1171         label_count = vp8_mbsplit_count[segmentation];
   1172 
   1173         // 64 makes this threshold really big effectively
   1174         // making it so that we very rarely check mvs on
   1175         // segments.   setting this to 1 would make mv thresh
   1176         // roughly equal to what it is for macroblocks
   1177         label_mv_thresh = 1 * mvthresh / label_count ;
   1178 
   1179         // Segmentation method overheads
   1180         rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation);
   1181 
   1182         rate += vp8_cost_mv_ref(SPLITMV, mdcounts);
   1183 
   1184         this_segment_rd += RDFUNC(x->rdmult, x->rddiv, rate, 0, cpi->target_bits_per_mb);
   1185         br += rate;
   1186 
   1187         for (i = 0; i < label_count; i++)
   1188         {
   1189             MV mode_mv[B_MODE_COUNT];
   1190             int best_label_rd = INT_MAX;
   1191             B_PREDICTION_MODE mode_selected = ZERO4X4;
   1192             int j;
   1193             int bestlabelyrate = 0;
   1194 
   1195 
   1196             // find first label
   1197             j = vp8_mbsplit_offset2[segmentation][i];
   1198 
   1199             c = &x->block[j];
   1200             e = &x->e_mbd.block[j];
   1201 
   1202             // search for the best motion vector on this segment
   1203             for (this_mode = LEFT4X4; this_mode <= NEW4X4 ; this_mode ++)
   1204             {
   1205                 int distortion;
   1206                 int this_rd;
   1207                 int num00;
   1208                 int labelyrate;
   1209                 ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
   1210                 ENTROPY_CONTEXT *ta_s;
   1211                 ENTROPY_CONTEXT *tl_s;
   1212 
   1213                 vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
   1214                 vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
   1215 
   1216                 ta_s = (ENTROPY_CONTEXT *)&t_above_s;
   1217                 tl_s = (ENTROPY_CONTEXT *)&t_left_s;
   1218 
   1219                 if (this_mode == NEW4X4)
   1220                 {
   1221                     int step_param = 0;
   1222                     int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
   1223                     int n;
   1224                     int thissme;
   1225                     int bestsme = INT_MAX;
   1226                     MV  temp_mv;
   1227 
   1228                     // Is the best so far sufficiently good that we cant justify doing and new motion search.
   1229                     if (best_label_rd < label_mv_thresh)
   1230                         break;
   1231 
   1232                     {
   1233                         int sadpb = x->sadperbit4;
   1234 
   1235                         if (cpi->sf.search_method == HEX)
   1236                             bestsme = vp8_hex_search(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost);
   1237                         else
   1238                         {
   1239                             bestsme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb / 2/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost);
   1240 
   1241                             n = num00;
   1242                             num00 = 0;
   1243 
   1244                             while (n < further_steps)
   1245                             {
   1246                                 n++;
   1247 
   1248                                 if (num00)
   1249                                     num00--;
   1250                                 else
   1251                                 {
   1252                                     thissme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &temp_mv, step_param + n, sadpb / 2/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost);
   1253 
   1254                                     if (thissme < bestsme)
   1255                                     {
   1256                                         bestsme = thissme;
   1257                                         mode_mv[NEW4X4].row = temp_mv.row;
   1258                                         mode_mv[NEW4X4].col = temp_mv.col;
   1259                                     }
   1260                                 }
   1261                             }
   1262                         }
   1263 
   1264                         // Should we do a full search (best quality only)
   1265                         if ((compressor_speed == 0) && (bestsme >> sseshift) > 4000)
   1266                         {
   1267                             thissme = cpi->full_search_sad(x, c, e, best_ref_mv, sadpb / 4, 16, v_fn_ptr, x->mvcost, x->mvsadcost);
   1268 
   1269                             if (thissme < bestsme)
   1270                             {
   1271                                 bestsme = thissme;
   1272                                 mode_mv[NEW4X4] = e->bmi.mv.as_mv;
   1273                             }
   1274                             else
   1275                             {
   1276                                 // The full search result is actually worse so re-instate the previous best vector
   1277                                 e->bmi.mv.as_mv = mode_mv[NEW4X4];
   1278                             }
   1279                         }
   1280                     }
   1281 
   1282                     if (bestsme < INT_MAX)
   1283                     {
   1284                         if (!fullpixel)
   1285                             cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit / 2, v_fn_ptr, mvcost);
   1286                         else
   1287                             vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit, v_fn_ptr, mvcost);
   1288                     }
   1289                 }
   1290 
   1291                 rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode], best_ref_mv, mvcost);
   1292 
   1293                 // Trap vectors that reach beyond the UMV borders
   1294                 if (((mode_mv[this_mode].row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].row >> 3) > x->mv_row_max) ||
   1295                     ((mode_mv[this_mode].col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].col >> 3) > x->mv_col_max))
   1296                 {
   1297                     continue;
   1298                 }
   1299 
   1300                 distortion = vp8_encode_inter_mb_segment(x, labels, i, IF_RTCD(&cpi->rtcd.encodemb)) / 4;
   1301 
   1302                 labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
   1303                 rate += labelyrate;
   1304 
   1305                 this_rd = RDFUNC(x->rdmult, x->rddiv, rate, distortion, cpi->target_bits_per_mb);
   1306 
   1307                 if (this_rd < best_label_rd)
   1308                 {
   1309                     sbr = rate;
   1310                     sbd = distortion;
   1311                     bestlabelyrate = labelyrate;
   1312                     mode_selected = this_mode;
   1313                     best_label_rd = this_rd;
   1314 
   1315                     vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
   1316                     vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
   1317 
   1318                 }
   1319             }
   1320 
   1321             vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
   1322             vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
   1323 
   1324             labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], best_ref_mv, mvcost);
   1325 
   1326             br += sbr;
   1327             bd += sbd;
   1328             segmentyrate += bestlabelyrate;
   1329             this_segment_rd += best_label_rd;
   1330 
   1331             if ((this_segment_rd > best_rd) || (this_segment_rd > best_segment_rd))
   1332                 break;
   1333         }
   1334 
   1335         if ((this_segment_rd <= best_rd) && (this_segment_rd < best_segment_rd))
   1336         {
   1337             bsr = br;
   1338             bsd = bd;
   1339             bestsegmentyrate = segmentyrate;
   1340             best_segment_rd = this_segment_rd;
   1341             best_seg = segmentation;
   1342 
   1343             // store everything needed to come back to this!!
   1344             for (i = 0; i < 16; i++)
   1345             {
   1346                 BLOCKD *bd = &x->e_mbd.block[i];
   1347 
   1348                 bmvs[i] = bd->bmi.mv.as_mv;
   1349                 bmodes[i] = bd->bmi.mode;
   1350                 beobs[i] = bd->eob;
   1351             }
   1352         }
   1353     }
   1354 
   1355     // set it to the best
   1356     for (i = 0; i < 16; i++)
   1357     {
   1358         BLOCKD *bd = &x->e_mbd.block[i];
   1359 
   1360         bd->bmi.mv.as_mv = bmvs[i];
   1361         bd->bmi.mode = bmodes[i];
   1362         bd->eob = beobs[i];
   1363     }
   1364 
   1365     *returntotrate = bsr;
   1366     *returndistortion = bsd;
   1367     *returnyrate = bestsegmentyrate;
   1368 
   1369     // save partitions
   1370     labels = vp8_mbsplits[best_seg];
   1371     x->e_mbd.mode_info_context->mbmi.partitioning = best_seg;
   1372     x->partition_info->count = vp8_mbsplit_count[best_seg];
   1373 
   1374     for (i = 0; i < x->partition_info->count; i++)
   1375     {
   1376         int j;
   1377 
   1378         j = vp8_mbsplit_offset2[best_seg][i];
   1379 
   1380         x->partition_info->bmi[i].mode = x->e_mbd.block[j].bmi.mode;
   1381         x->partition_info->bmi[i].mv.as_mv = x->e_mbd.block[j].bmi.mv.as_mv;
   1382     }
   1383 
   1384     return best_segment_rd;
   1385 }
   1386 
   1387 
   1388 int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra)
   1389 {
   1390     BLOCK *b = &x->block[0];
   1391     BLOCKD *d = &x->e_mbd.block[0];
   1392     MACROBLOCKD *xd = &x->e_mbd;
   1393     B_MODE_INFO best_bmodes[16];
   1394     MB_MODE_INFO best_mbmode;
   1395     PARTITION_INFO best_partition;
   1396     MV best_ref_mv;
   1397     MV mode_mv[MB_MODE_COUNT];
   1398     MB_PREDICTION_MODE this_mode;
   1399     int num00;
   1400     int best_mode_index = 0;
   1401 
   1402     int i;
   1403     int mode_index;
   1404     int mdcounts[4];
   1405     int rate;
   1406     int distortion;
   1407     int best_rd = INT_MAX; // 1 << 30;
   1408     int ref_frame_cost[MAX_REF_FRAMES];
   1409     int rate2, distortion2;
   1410     int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
   1411     int rate_y, UNINITIALIZED_IS_SAFE(rate_uv);
   1412 
   1413     //int all_rds[MAX_MODES];        // Experimental debug code.
   1414     //int all_rates[MAX_MODES];
   1415     //int all_dist[MAX_MODES];
   1416     //int intermodecost[MAX_MODES];
   1417 
   1418     MB_PREDICTION_MODE uv_intra_mode;
   1419     int sse;
   1420     int sum;
   1421     int uvintra_eob = 0;
   1422     int tteob = 0;
   1423     int force_no_skip = 0;
   1424 
   1425     *returnintra = INT_MAX;
   1426 
   1427     vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); // clean
   1428 
   1429     cpi->mbs_tested_so_far++;          // Count of the number of MBs tested so far this frame
   1430 
   1431     x->skip = 0;
   1432 
   1433     ref_frame_cost[INTRA_FRAME]   = vp8_cost_zero(cpi->prob_intra_coded);
   1434 
   1435     // Experimental code
   1436     // Adjust the RD multiplier based on the best case distortion we saw in the most recently coded mb
   1437     //if ( (cpi->last_mb_distortion) > 0 && (cpi->target_bits_per_mb > 0) )
   1438     /*{
   1439         int tmprdmult;
   1440 
   1441         //tmprdmult = (cpi->last_mb_distortion * 256) / ((cpi->av_per_frame_bandwidth*256)/cpi->common.MBs);
   1442         tmprdmult = (cpi->last_mb_distortion * 256) / cpi->target_bits_per_mb;
   1443         //tmprdmult = tmprdmult;
   1444 
   1445         //if ( tmprdmult > cpi->RDMULT * 2 )
   1446         //  tmprdmult = cpi->RDMULT * 2;
   1447         //else if ( tmprdmult < cpi->RDMULT / 2 )
   1448         //  tmprdmult = cpi->RDMULT / 2;
   1449 
   1450         //tmprdmult = (tmprdmult < 25) ? 25 : tmprdmult;
   1451 
   1452         //x->rdmult = tmprdmult;
   1453 
   1454     }*/
   1455 
   1456     // Special case treatment when GF and ARF are not sensible options for reference
   1457     if (cpi->ref_frame_flags == VP8_LAST_FLAG)
   1458     {
   1459         ref_frame_cost[LAST_FRAME]    = vp8_cost_one(cpi->prob_intra_coded)
   1460                                         + vp8_cost_zero(255);
   1461         ref_frame_cost[GOLDEN_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
   1462                                         + vp8_cost_one(255)
   1463                                         + vp8_cost_zero(128);
   1464         ref_frame_cost[ALTREF_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
   1465                                         + vp8_cost_one(255)
   1466                                         + vp8_cost_one(128);
   1467     }
   1468     else
   1469     {
   1470         ref_frame_cost[LAST_FRAME]    = vp8_cost_one(cpi->prob_intra_coded)
   1471                                         + vp8_cost_zero(cpi->prob_last_coded);
   1472         ref_frame_cost[GOLDEN_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
   1473                                         + vp8_cost_one(cpi->prob_last_coded)
   1474                                         + vp8_cost_zero(cpi->prob_gf_coded);
   1475         ref_frame_cost[ALTREF_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
   1476                                         + vp8_cost_one(cpi->prob_last_coded)
   1477                                         + vp8_cost_one(cpi->prob_gf_coded);
   1478     }
   1479 
   1480     vpx_memset(mode_mv, 0, sizeof(mode_mv));
   1481 
   1482     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
   1483     vp8_rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, &uv_intra_rate_tokenonly, &uv_intra_distortion);
   1484     uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
   1485     {
   1486         uvintra_eob = 0;
   1487 
   1488         for (i = 16; i < 24; i++)
   1489             uvintra_eob += x->e_mbd.block[i].eob;
   1490     }
   1491 
   1492     for (mode_index = 0; mode_index < MAX_MODES; mode_index++)
   1493     {
   1494         int frame_cost;
   1495         int this_rd = INT_MAX;
   1496         int lf_or_gf = 0;           // Lat Frame (01) or gf/arf (1)
   1497         int disable_skip = 0;
   1498 
   1499         force_no_skip = 0;
   1500 
   1501         // Experimental debug code.
   1502         // Record of rd values recorded for this MB. -1 indicates not measured
   1503         //all_rds[mode_index] = -1;
   1504         //all_rates[mode_index] = -1;
   1505         //all_dist[mode_index] = -1;
   1506         //intermodecost[mode_index] = -1;
   1507 
   1508         // Test best rd so far against threshold for trying this mode.
   1509         if (best_rd <= cpi->rd_threshes[mode_index])
   1510             continue;
   1511 
   1512 
   1513 
   1514         // These variables hold are rolling total cost and distortion for this mode
   1515         rate2 = 0;
   1516         distortion2 = 0;
   1517 
   1518         // Where skip is allowable add in the default per mb cost for the no skip case.
   1519         // where we then decide to skip we have to delete this and replace it with the
   1520         // cost of signallying a skip
   1521         if (cpi->common.mb_no_coeff_skip)
   1522         {
   1523             rate2 += vp8_cost_bit(cpi->prob_skip_false, 0);
   1524         }
   1525 
   1526         this_mode = vp8_mode_order[mode_index];
   1527 
   1528         x->e_mbd.mode_info_context->mbmi.mode = this_mode;
   1529         x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
   1530         x->e_mbd.mode_info_context->mbmi.ref_frame = vp8_ref_frame_order[mode_index];
   1531 
   1532         //Only consider ZEROMV/ALTREF_FRAME for alt ref frame.
   1533         if (cpi->is_src_frame_alt_ref)
   1534         {
   1535             if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
   1536                 continue;
   1537         }
   1538 
   1539         if (x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
   1540         {
   1541             YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
   1542 
   1543             if (!(cpi->ref_frame_flags & VP8_LAST_FLAG))
   1544                 continue;
   1545 
   1546             lf_or_gf = 0;  // Local last frame vs Golden frame flag
   1547 
   1548             // Set up pointers for this macro block into the previous frame recon buffer
   1549             x->e_mbd.pre.y_buffer = lst_yv12->y_buffer + recon_yoffset;
   1550             x->e_mbd.pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset;
   1551             x->e_mbd.pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset;
   1552         }
   1553         else if (x->e_mbd.mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
   1554         {
   1555             YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx];
   1556 
   1557             // not supposed to reference gold frame
   1558             if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
   1559                 continue;
   1560 
   1561             lf_or_gf = 1;  // Local last frame vs Golden frame flag
   1562 
   1563             // Set up pointers for this macro block into the previous frame recon buffer
   1564             x->e_mbd.pre.y_buffer = gld_yv12->y_buffer + recon_yoffset;
   1565             x->e_mbd.pre.u_buffer = gld_yv12->u_buffer + recon_uvoffset;
   1566             x->e_mbd.pre.v_buffer = gld_yv12->v_buffer + recon_uvoffset;
   1567         }
   1568         else if (x->e_mbd.mode_info_context->mbmi.ref_frame == ALTREF_FRAME)
   1569         {
   1570             YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx];
   1571 
   1572             // not supposed to reference alt ref frame
   1573             if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
   1574                 continue;
   1575 
   1576             //if ( !cpi->source_alt_ref_active )
   1577             //  continue;
   1578 
   1579             lf_or_gf = 1;  // Local last frame vs Golden frame flag
   1580 
   1581             // Set up pointers for this macro block into the previous frame recon buffer
   1582             x->e_mbd.pre.y_buffer = alt_yv12->y_buffer + recon_yoffset;
   1583             x->e_mbd.pre.u_buffer = alt_yv12->u_buffer + recon_uvoffset;
   1584             x->e_mbd.pre.v_buffer = alt_yv12->v_buffer + recon_uvoffset;
   1585         }
   1586 
   1587         vp8_find_near_mvs(&x->e_mbd,
   1588                           x->e_mbd.mode_info_context,
   1589                           &mode_mv[NEARESTMV], &mode_mv[NEARMV], &best_ref_mv,
   1590                           mdcounts, x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
   1591 
   1592 
   1593         // Estimate the reference frame signaling cost and add it to the rolling cost variable.
   1594         frame_cost = ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
   1595         rate2 += frame_cost;
   1596 
   1597         if (this_mode <= B_PRED)
   1598         {
   1599             for (i = 0; i < 16; i++)
   1600             {
   1601                 vpx_memset(&x->e_mbd.block[i].bmi, 0, sizeof(B_MODE_INFO));
   1602             }
   1603         }
   1604 
   1605         // Check to see if the testing frequency for this mode is at its max
   1606         // If so then prevent it from being tested and increase the threshold for its testing
   1607         if (cpi->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1))
   1608         {
   1609             if (cpi->mbs_tested_so_far  <= cpi->mode_check_freq[mode_index] * cpi->mode_test_hit_counts[mode_index])
   1610             {
   1611                 // Increase the threshold for coding this mode to make it less likely to be chosen
   1612                 cpi->rd_thresh_mult[mode_index] += 4;
   1613 
   1614                 if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
   1615                     cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
   1616 
   1617                 cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
   1618 
   1619                 continue;
   1620             }
   1621         }
   1622 
   1623         // We have now reached the point where we are going to test the current mode so increment the counter for the number of times it has been tested
   1624         cpi->mode_test_hit_counts[mode_index] ++;
   1625 
   1626         // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
   1627         if (cpi->zbin_mode_boost_enabled)
   1628         {
   1629             if ((vp8_mode_order[mode_index] == ZEROMV) && (vp8_ref_frame_order[mode_index] != LAST_FRAME))
   1630                 cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
   1631             else
   1632                 cpi->zbin_mode_boost = 0;
   1633 
   1634             vp8cx_mb_init_quantizer(cpi, x);
   1635         }
   1636 
   1637         switch (this_mode)
   1638         {
   1639         case B_PRED:
   1640 
   1641             // Note the rate value returned here includes the cost of coding the BPRED mode : x->mbmode_cost[x->e_mbd.frame_type][BPRED];
   1642             vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion);
   1643             rate2 += rate;
   1644             //rate_y = rate;
   1645             distortion2 += distortion;
   1646             rate2 += uv_intra_rate;
   1647             rate_uv = uv_intra_rate_tokenonly;
   1648             distortion2 += uv_intra_distortion;
   1649             break;
   1650 
   1651         case SPLITMV:
   1652         {
   1653             int frame_cost_rd = RDFUNC(x->rdmult, x->rddiv, frame_cost, 0, cpi->target_bits_per_mb);
   1654             int saved_rate = rate2;
   1655 
   1656             // vp8_rd_pick_best_mbsegmentation looks only at Y and does not account for frame_cost.
   1657             // (best_rd - frame_cost_rd) is thus a conservative breakout number.
   1658             int breakout_rd = best_rd - frame_cost_rd;
   1659             int tmp_rd;
   1660 
   1661             if (x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
   1662                 tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, breakout_rd, mdcounts, &rate, &rate_y, &distortion, cpi->compressor_speed, x->mvcost, cpi->rd_threshes[THR_NEWMV], cpi->common.full_pixel) ;
   1663             else if (x->e_mbd.mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
   1664                 tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, breakout_rd, mdcounts, &rate, &rate_y, &distortion, cpi->compressor_speed, x->mvcost, cpi->rd_threshes[THR_NEWG], cpi->common.full_pixel) ;
   1665             else
   1666                 tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, breakout_rd, mdcounts, &rate, &rate_y, &distortion, cpi->compressor_speed, x->mvcost, cpi->rd_threshes[THR_NEWA], cpi->common.full_pixel) ;
   1667 
   1668             rate2 += rate;
   1669             distortion2 += distortion;
   1670 
   1671             // If even the 'Y' rd value of split is higher than best so far then dont bother looking at UV
   1672             if (tmp_rd < breakout_rd)
   1673             {
   1674                 // Now work out UV cost and add it in
   1675                 vp8_rd_inter_uv(cpi, x, &rate, &distortion, cpi->common.full_pixel);
   1676                 rate2 += rate;
   1677                 rate_uv = rate;
   1678                 distortion2 += distortion;
   1679 
   1680             }
   1681             else
   1682             {
   1683                 this_rd = INT_MAX;
   1684                 disable_skip = 1;
   1685             }
   1686 
   1687             // Trap cases where the best split mode has all vectors coded 0,0 (or all the same)
   1688             if (0)
   1689             {
   1690                 int allsame = 1;
   1691 
   1692                 for (i = 1; i < 16; i++)
   1693                 {
   1694                     BLOCKD *bd = &x->e_mbd.block[i];
   1695 
   1696                     if (bd->bmi.mv.as_int != x->e_mbd.block[0].bmi.mv.as_int)   //(bmvs[i].col != bmvs[i-1].col) || (bmvs[i].row != bmvs[i-1].row ) )
   1697                     {
   1698                         allsame = 0;
   1699                         break;
   1700                     }
   1701                 }
   1702 
   1703                 if (allsame)
   1704                 {
   1705                     // reset mode and mv and jump to newmv
   1706                     this_mode = NEWMV;
   1707                     distortion2 = 0;
   1708                     rate2 = saved_rate;
   1709                     mode_mv[NEWMV].row = x->e_mbd.block[0].bmi.mv.as_mv.row;
   1710                     mode_mv[NEWMV].col = x->e_mbd.block[0].bmi.mv.as_mv.col;
   1711                     rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
   1712                     goto mv_selected;
   1713                 }
   1714             }
   1715 
   1716             // trap cases where the 8x8s can be promoted to 8x16s or 16x8s
   1717             if (0)//x->partition_info->count == 4)
   1718             {
   1719 
   1720                 if (x->partition_info->bmi[0].mv.as_int == x->partition_info->bmi[1].mv.as_int
   1721                     && x->partition_info->bmi[2].mv.as_int == x->partition_info->bmi[3].mv.as_int)
   1722                 {
   1723                     const int *labels = vp8_mbsplits[2];
   1724                     x->e_mbd.mode_info_context->mbmi.partitioning = 0;
   1725                     rate -= vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + 2);
   1726                     rate += vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings);
   1727                     //rate -=  x->inter_bmode_costs[  x->partition_info->bmi[1]];
   1728                     //rate -=  x->inter_bmode_costs[  x->partition_info->bmi[3]];
   1729                     x->partition_info->bmi[1] = x->partition_info->bmi[2];
   1730                 }
   1731             }
   1732 
   1733         }
   1734         break;
   1735         case DC_PRED:
   1736         case V_PRED:
   1737         case H_PRED:
   1738         case TM_PRED:
   1739             x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
   1740             vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
   1741             {
   1742                 macro_block_yrd(x, &rate, &distortion, IF_RTCD(&cpi->rtcd.encodemb)) ;
   1743                 rate2 += rate;
   1744                 rate_y = rate;
   1745                 distortion2 += distortion;
   1746                 rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
   1747                 rate2 += uv_intra_rate;
   1748                 rate_uv = uv_intra_rate_tokenonly;
   1749                 distortion2 += uv_intra_distortion;
   1750             }
   1751             break;
   1752 
   1753         case NEWMV:
   1754 
   1755             // Decrement full search counter
   1756             if (cpi->check_freq[lf_or_gf] > 0)
   1757                 cpi->check_freq[lf_or_gf] --;
   1758 
   1759             {
   1760                 int thissme;
   1761                 int bestsme = INT_MAX;
   1762                 int step_param = cpi->sf.first_step;
   1763                 int search_range;
   1764                 int further_steps;
   1765                 int n;
   1766 
   1767                 // Work out how long a search we should do
   1768                 search_range = MAXF(abs(best_ref_mv.col), abs(best_ref_mv.row)) >> 3;
   1769 
   1770                 if (search_range >= x->vector_range)
   1771                     x->vector_range = search_range;
   1772                 else if (x->vector_range > cpi->sf.min_fs_radius)
   1773                     x->vector_range--;
   1774 
   1775                 // Initial step/diamond search
   1776                 {
   1777                     int sadpb = x->sadperbit16;
   1778 
   1779                     if (cpi->sf.search_method == HEX)
   1780                     {
   1781                         bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost);
   1782                         mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
   1783                         mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
   1784                     }
   1785                     else
   1786                     {
   1787                         bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb < 9
   1788                         mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
   1789                         mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
   1790 
   1791                         // Further step/diamond searches as necessary
   1792                         n = 0;
   1793                         further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
   1794 
   1795                         n = num00;
   1796                         num00 = 0;
   1797 
   1798                         while (n < further_steps)
   1799                         {
   1800                             n++;
   1801 
   1802                             if (num00)
   1803                                 num00--;
   1804                             else
   1805                             {
   1806                                 thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb = 9
   1807 
   1808                                 if (thissme < bestsme)
   1809                                 {
   1810                                     bestsme = thissme;
   1811                                     mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
   1812                                     mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
   1813                                 }
   1814                                 else
   1815                                 {
   1816                                     d->bmi.mv.as_mv.row = mode_mv[NEWMV].row;
   1817                                     d->bmi.mv.as_mv.col = mode_mv[NEWMV].col;
   1818                                 }
   1819                             }
   1820                         }
   1821                     }
   1822 
   1823                 }
   1824 
   1825                 // Should we do a full search
   1826                 if (!cpi->check_freq[lf_or_gf] || cpi->do_full[lf_or_gf])
   1827                 {
   1828                     int thissme;
   1829                     int full_flag_thresh = 0;
   1830 
   1831                     // Update x->vector_range based on best vector found in step search
   1832                     search_range = MAXF(abs(d->bmi.mv.as_mv.row), abs(d->bmi.mv.as_mv.col));
   1833 
   1834                     if (search_range > x->vector_range)
   1835                         x->vector_range = search_range;
   1836                     else
   1837                         search_range = x->vector_range;
   1838 
   1839                     // Apply limits
   1840                     search_range = (search_range > cpi->sf.max_fs_radius) ? cpi->sf.max_fs_radius : search_range;
   1841                     {
   1842                         int sadpb = x->sadperbit16 >> 2;
   1843                         thissme = cpi->full_search_sad(x, b, d, &best_ref_mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, x->mvsadcost);
   1844                     }
   1845 
   1846                     // Barrier threshold to initiating full search
   1847                     // full_flag_thresh = 10 + (thissme >> 7);
   1848                     if ((thissme + full_flag_thresh) < bestsme)
   1849                     {
   1850                         cpi->do_full[lf_or_gf] ++;
   1851                         bestsme = thissme;
   1852                     }
   1853                     else if (thissme < bestsme)
   1854                         bestsme = thissme;
   1855                     else
   1856                     {
   1857                         cpi->do_full[lf_or_gf] = cpi->do_full[lf_or_gf] >> 1;
   1858                         cpi->check_freq[lf_or_gf] = cpi->sf.full_freq[lf_or_gf];
   1859 
   1860                         // The full search result is actually worse so re-instate the previous best vector
   1861                         d->bmi.mv.as_mv.row = mode_mv[NEWMV].row;
   1862                         d->bmi.mv.as_mv.col = mode_mv[NEWMV].col;
   1863                     }
   1864                 }
   1865 
   1866                 if (bestsme < INT_MAX)
   1867                     // cpi->find_fractional_mv_step(x,b,d,&d->bmi.mv.as_mv,&best_ref_mv,x->errorperbit/2,cpi->fn_ptr.svf,cpi->fn_ptr.vf,x->mvcost);  // normal mvc=11
   1868                     cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost);
   1869 
   1870                 mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
   1871                 mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
   1872 
   1873                 // Add the new motion vector cost to our rolling cost variable
   1874                 rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
   1875 
   1876             }
   1877 
   1878         case NEARESTMV:
   1879         case NEARMV:
   1880 
   1881             // Clip "next_nearest" so that it does not extend to far out of image
   1882             if (mode_mv[this_mode].col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
   1883                 mode_mv[this_mode].col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
   1884             else if (mode_mv[this_mode].col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
   1885                 mode_mv[this_mode].col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
   1886 
   1887             if (mode_mv[this_mode].row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
   1888                 mode_mv[this_mode].row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
   1889             else if (mode_mv[this_mode].row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
   1890                 mode_mv[this_mode].row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
   1891 
   1892             // Do not bother proceeding if the vector (from newmv,nearest or near) is 0,0 as this should then be coded using the zeromv mode.
   1893             if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) &&
   1894                 ((mode_mv[this_mode].row == 0) && (mode_mv[this_mode].col == 0)))
   1895                 continue;
   1896 
   1897         case ZEROMV:
   1898 
   1899         mv_selected:
   1900 
   1901             // Trap vectors that reach beyond the UMV borders
   1902             // Note that ALL New MV, Nearest MV Near MV and Zero MV code drops through to this point
   1903             // because of the lack of break statements in the previous two cases.
   1904             if (((mode_mv[this_mode].row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].row >> 3) > x->mv_row_max) ||
   1905                 ((mode_mv[this_mode].col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].col >> 3) > x->mv_col_max))
   1906                 continue;
   1907 
   1908             vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
   1909             vp8_build_inter_predictors_mby(&x->e_mbd);
   1910             VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, (unsigned int *)(&sse), &sum);
   1911 
   1912             if (cpi->active_map_enabled && x->active_ptr[0] == 0)
   1913             {
   1914                 x->skip = 1;
   1915             }
   1916             else if (sse < x->encode_breakout)
   1917             {
   1918                 // Check u and v to make sure skip is ok
   1919                 int sse2 = 0;
   1920 
   1921                 sse2 = VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance));
   1922 
   1923                 if (sse2 * 2 < x->encode_breakout)
   1924                 {
   1925                     x->skip = 1;
   1926                     distortion2 = sse;
   1927                     rate2 = 500;
   1928 
   1929                     disable_skip = 1;    // We have no real rate data so trying to adjust for rate_y and rate_uv below will cause problems.
   1930                     this_rd = RDFUNC(x->rdmult, x->rddiv, rate2, distortion2, cpi->target_bits_per_mb);
   1931 
   1932                     break;              // (PGW) Move break here from below - for now at least
   1933                 }
   1934                 else
   1935                     x->skip = 0;
   1936             }
   1937 
   1938             //intermodecost[mode_index] = vp8_cost_mv_ref(this_mode, mdcounts);   // Experimental debug code
   1939 
   1940             // Add in the Mv/mode cost
   1941             rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
   1942 
   1943             // Y cost and distortion
   1944             macro_block_yrd(x, &rate, &distortion, IF_RTCD(&cpi->rtcd.encodemb));
   1945             rate2 += rate;
   1946             rate_y = rate;
   1947             distortion2 += distortion;
   1948 
   1949             // UV cost and distortion
   1950             vp8_rd_inter_uv(cpi, x, &rate, &distortion, cpi->common.full_pixel);
   1951             rate2 += rate;
   1952             rate_uv = rate;
   1953             distortion2 += distortion;
   1954             break;
   1955 
   1956         default:
   1957             break;
   1958         }
   1959 
   1960         if (!disable_skip)
   1961         {
   1962             // Test for the condition where skip block will be activated because there are no non zero coefficients and make any necessary adjustment for rate
   1963             if (cpi->common.mb_no_coeff_skip)
   1964             {
   1965                 tteob = 0;
   1966 
   1967                 for (i = 0; i <= 24; i++)
   1968                 {
   1969                     tteob += x->e_mbd.block[i].eob;
   1970                 }
   1971 
   1972                 if (tteob == 0)
   1973                 {
   1974 #if 1
   1975                     rate2 -= (rate_y + rate_uv);
   1976 
   1977                     // Back out no skip flag costing and add in skip flag costing
   1978                     if (cpi->prob_skip_false)
   1979                     {
   1980                         rate2 += vp8_cost_bit(cpi->prob_skip_false, 1);
   1981                         rate2 -= vp8_cost_bit(cpi->prob_skip_false, 0);
   1982                     }
   1983 
   1984 #else
   1985                     int rateuseskip;
   1986                     int ratenotuseskip;
   1987 
   1988 
   1989 
   1990                     ratenotuseskip = rate_y + rate_uv + vp8_cost_bit(cpi->prob_skip_false, 0);
   1991                     rateuseskip    = vp8_cost_bit(cpi->prob_skip_false, 1);
   1992 
   1993                     if (1) // rateuseskip<ratenotuseskip)
   1994                     {
   1995                         rate2 -= ratenotuseskip;
   1996                         rate2 += rateuseskip;
   1997                         force_no_skip = 0;
   1998                     }
   1999                     else
   2000                     {
   2001                         force_no_skip = 1;
   2002                     }
   2003 
   2004 #endif
   2005                 }
   2006 
   2007 #if             0
   2008                 else
   2009                 {
   2010                     int rateuseskip;
   2011                     int ratenotuseskip;
   2012                     int maxdistortion;
   2013                     int minrate;
   2014                     int skip_rd;
   2015 
   2016                     // distortion when no coeff is encoded
   2017                     maxdistortion = macro_block_max_error(x);
   2018 
   2019                     ratenotuseskip = rate_y + rate_uv + vp8_cost_bit(cpi->prob_skip_false, 0);
   2020                     rateuseskip    = vp8_cost_bit(cpi->prob_skip_false, 1);
   2021 
   2022                     minrate         = rateuseskip - ratenotuseskip;
   2023 
   2024                     skip_rd = RDFUNC(x->rdmult, x->rddiv, minrate, maxdistortion - distortion2, cpi->target_bits_per_mb);
   2025 
   2026                     if (skip_rd + 50 < 0 && x->e_mbd.mbmi.ref_frame != INTRA_FRAME && rate_y + rate_uv < 4000)
   2027                     {
   2028                         force_no_skip = 1;
   2029                         rate2       = rate2 + rateuseskip - ratenotuseskip;
   2030                         distortion2 =  maxdistortion;
   2031                     }
   2032                     else
   2033                     {
   2034                         force_no_skip = 0;
   2035                     }
   2036 
   2037                 }
   2038 
   2039 #endif
   2040 
   2041             }
   2042 
   2043             // Calculate the final RD estimate for this mode
   2044             this_rd = RDFUNC(x->rdmult, x->rddiv, rate2, distortion2, cpi->target_bits_per_mb);
   2045         }
   2046 
   2047         // Experimental debug code.
   2048         //all_rds[mode_index] = this_rd;
   2049         //all_rates[mode_index] = rate2;
   2050         //all_dist[mode_index] = distortion2;
   2051 
   2052         if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME)  && (this_rd < *returnintra))
   2053         {
   2054             *returnintra = this_rd ;
   2055         }
   2056 
   2057         // Did this mode help.. i.i is it the new best mode
   2058         if (this_rd < best_rd || x->skip)
   2059         {
   2060             // Note index of best mode so far
   2061             best_mode_index = mode_index;
   2062             x->e_mbd.mode_info_context->mbmi.force_no_skip = force_no_skip;
   2063 
   2064             if (this_mode <= B_PRED)
   2065             {
   2066                 x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode;
   2067             }
   2068 
   2069             *returnrate = rate2;
   2070             *returndistortion = distortion2;
   2071             best_rd = this_rd;
   2072             vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
   2073             vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO));
   2074 
   2075             for (i = 0; i < 16; i++)
   2076             {
   2077                 vpx_memcpy(&best_bmodes[i], &x->e_mbd.block[i].bmi, sizeof(B_MODE_INFO));
   2078             }
   2079 
   2080             // Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time
   2081             cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
   2082             cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
   2083         }
   2084 
   2085         // If the mode did not help improve the best error case then raise the threshold for testing that mode next time around.
   2086         else
   2087         {
   2088             cpi->rd_thresh_mult[mode_index] += 4;
   2089 
   2090             if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
   2091                 cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
   2092 
   2093             cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
   2094         }
   2095 
   2096         if (x->skip)
   2097             break;
   2098     }
   2099 
   2100     // Reduce the activation RD thresholds for the best choice mode
   2101     if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2)))
   2102     {
   2103         int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);
   2104 
   2105         cpi->rd_thresh_mult[best_mode_index] = (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ? cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
   2106         cpi->rd_threshes[best_mode_index] = (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index];
   2107 
   2108         // If we chose a split mode then reset the new MV thresholds as well
   2109         /*if ( vp8_mode_order[best_mode_index] == SPLITMV )
   2110         {
   2111             best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWMV] >> 4);
   2112             cpi->rd_thresh_mult[THR_NEWMV] = (cpi->rd_thresh_mult[THR_NEWMV] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWMV]-best_adjustment: MIN_THRESHMULT;
   2113             cpi->rd_threshes[THR_NEWMV] = (cpi->rd_baseline_thresh[THR_NEWMV] >> 7) * cpi->rd_thresh_mult[THR_NEWMV];
   2114 
   2115             best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWG] >> 4);
   2116             cpi->rd_thresh_mult[THR_NEWG] = (cpi->rd_thresh_mult[THR_NEWG] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWG]-best_adjustment: MIN_THRESHMULT;
   2117             cpi->rd_threshes[THR_NEWG] = (cpi->rd_baseline_thresh[THR_NEWG] >> 7) * cpi->rd_thresh_mult[THR_NEWG];
   2118 
   2119             best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWA] >> 4);
   2120             cpi->rd_thresh_mult[THR_NEWA] = (cpi->rd_thresh_mult[THR_NEWA] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWA]-best_adjustment: MIN_THRESHMULT;
   2121             cpi->rd_threshes[THR_NEWA] = (cpi->rd_baseline_thresh[THR_NEWA] >> 7) * cpi->rd_thresh_mult[THR_NEWA];
   2122         }*/
   2123 
   2124     }
   2125 
   2126     // If we have chosen new mv or split then decay the full search check count more quickly.
   2127     if ((vp8_mode_order[best_mode_index] == NEWMV) || (vp8_mode_order[best_mode_index] == SPLITMV))
   2128     {
   2129         int lf_or_gf = (vp8_ref_frame_order[best_mode_index] == LAST_FRAME) ? 0 : 1;
   2130 
   2131         if (cpi->check_freq[lf_or_gf] && !cpi->do_full[lf_or_gf])
   2132         {
   2133             cpi->check_freq[lf_or_gf] --;
   2134         }
   2135     }
   2136 
   2137     // Keep a record of best mode index that we chose
   2138     cpi->last_best_mode_index = best_mode_index;
   2139 
   2140     // Note how often each mode chosen as best
   2141     cpi->mode_chosen_counts[best_mode_index] ++;
   2142 
   2143 
   2144     if (cpi->is_src_frame_alt_ref && (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME))
   2145     {
   2146         best_mbmode.mode = ZEROMV;
   2147         best_mbmode.ref_frame = ALTREF_FRAME;
   2148         best_mbmode.mv.as_int = 0;
   2149         best_mbmode.uv_mode = 0;
   2150         best_mbmode.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
   2151         best_mbmode.partitioning = 0;
   2152         best_mbmode.dc_diff = 0;
   2153 
   2154         vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
   2155         vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
   2156 
   2157         for (i = 0; i < 16; i++)
   2158         {
   2159             vpx_memset(&x->e_mbd.block[i].bmi, 0, sizeof(B_MODE_INFO));
   2160         }
   2161 
   2162         x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
   2163 
   2164         return best_rd;
   2165     }
   2166 
   2167 
   2168     // macroblock modes
   2169     vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
   2170     vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
   2171 
   2172     for (i = 0; i < 16; i++)
   2173     {
   2174         vpx_memcpy(&x->e_mbd.block[i].bmi, &best_bmodes[i], sizeof(B_MODE_INFO));
   2175     }
   2176 
   2177     x->e_mbd.mode_info_context->mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv;
   2178 
   2179     return best_rd;
   2180 }
   2181 #endif
   2182 
   2183