Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <limits.h>
     12 #include <math.h>
     13 #include <stdio.h>
     14 
     15 #include "./vp9_rtcd.h"
     16 #include "./vpx_config.h"
     17 
     18 #include "vpx_ports/vpx_timer.h"
     19 
     20 #include "vp9/common/vp9_common.h"
     21 #include "vp9/common/vp9_entropy.h"
     22 #include "vp9/common/vp9_entropymode.h"
     23 #include "vp9/common/vp9_extend.h"
     24 #include "vp9/common/vp9_findnearmv.h"
     25 #include "vp9/common/vp9_idct.h"
     26 #include "vp9/common/vp9_mvref_common.h"
     27 #include "vp9/common/vp9_pred_common.h"
     28 #include "vp9/common/vp9_quant_common.h"
     29 #include "vp9/common/vp9_reconintra.h"
     30 #include "vp9/common/vp9_reconinter.h"
     31 #include "vp9/common/vp9_seg_common.h"
     32 #include "vp9/common/vp9_tile_common.h"
     33 #include "vp9/encoder/vp9_encodeframe.h"
     34 #include "vp9/encoder/vp9_encodeintra.h"
     35 #include "vp9/encoder/vp9_encodemb.h"
     36 #include "vp9/encoder/vp9_encodemv.h"
     37 #include "vp9/encoder/vp9_onyx_int.h"
     38 #include "vp9/encoder/vp9_rdopt.h"
     39 #include "vp9/encoder/vp9_segmentation.h"
     40 #include "vp9/common/vp9_systemdependent.h"
     41 #include "vp9/encoder/vp9_tokenize.h"
     42 #include "vp9/encoder/vp9_vaq.h"
     43 
     44 
     45 #define DBG_PRNT_SEGMAP 0
     46 
     47 
     48 // #define ENC_DEBUG
     49 #ifdef ENC_DEBUG
     50 int enc_debug = 0;
     51 #endif
     52 
     53 static INLINE uint8_t *get_sb_index(MACROBLOCK *x, BLOCK_SIZE subsize) {
     54   switch (subsize) {
     55     case BLOCK_64X64:
     56     case BLOCK_64X32:
     57     case BLOCK_32X64:
     58     case BLOCK_32X32:
     59       return &x->sb_index;
     60     case BLOCK_32X16:
     61     case BLOCK_16X32:
     62     case BLOCK_16X16:
     63       return &x->mb_index;
     64     case BLOCK_16X8:
     65     case BLOCK_8X16:
     66     case BLOCK_8X8:
     67       return &x->b_index;
     68     case BLOCK_8X4:
     69     case BLOCK_4X8:
     70     case BLOCK_4X4:
     71       return &x->ab_index;
     72     default:
     73       assert(0);
     74       return NULL;
     75   }
     76 }
     77 
     78 static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
     79                               int mi_row, int mi_col, BLOCK_SIZE bsize);
     80 
     81 static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x);
     82 
     83 /* activity_avg must be positive, or flat regions could get a zero weight
     84  *  (infinite lambda), which confounds analysis.
     85  * This also avoids the need for divide by zero checks in
     86  *  vp9_activity_masking().
     87  */
     88 #define ACTIVITY_AVG_MIN (64)
     89 
     90 /* Motion vector component magnitude threshold for defining fast motion. */
     91 #define FAST_MOTION_MV_THRESH (24)
     92 
     93 /* This is used as a reference when computing the source variance for the
     94  *  purposes of activity masking.
     95  * Eventually this should be replaced by custom no-reference routines,
     96  *  which will be faster.
     97  */
     98 static const uint8_t VP9_VAR_OFFS[64] = {
     99   128, 128, 128, 128, 128, 128, 128, 128,
    100   128, 128, 128, 128, 128, 128, 128, 128,
    101   128, 128, 128, 128, 128, 128, 128, 128,
    102   128, 128, 128, 128, 128, 128, 128, 128,
    103   128, 128, 128, 128, 128, 128, 128, 128,
    104   128, 128, 128, 128, 128, 128, 128, 128,
    105   128, 128, 128, 128, 128, 128, 128, 128,
    106   128, 128, 128, 128, 128, 128, 128, 128
    107 };
    108 
    109 static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi, MACROBLOCK *x,
    110                                               BLOCK_SIZE bs) {
    111   unsigned int var, sse;
    112   var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
    113                            x->plane[0].src.stride,
    114                            VP9_VAR_OFFS, 0, &sse);
    115   return (var + (1 << (num_pels_log2_lookup[bs] - 1))) >>
    116       num_pels_log2_lookup[bs];
    117 }
    118 
    119 // Original activity measure from Tim T's code.
    120 static unsigned int tt_activity_measure(MACROBLOCK *x) {
    121   unsigned int act;
    122   unsigned int sse;
    123   /* TODO: This could also be done over smaller areas (8x8), but that would
    124    *  require extensive changes elsewhere, as lambda is assumed to be fixed
    125    *  over an entire MB in most of the code.
    126    * Another option is to compute four 8x8 variances, and pick a single
    127    *  lambda using a non-linear combination (e.g., the smallest, or second
    128    *  smallest, etc.).
    129    */
    130   act = vp9_variance16x16(x->plane[0].src.buf, x->plane[0].src.stride,
    131                           VP9_VAR_OFFS, 0, &sse);
    132   act <<= 4;
    133 
    134   /* If the region is flat, lower the activity some more. */
    135   if (act < 8 << 12)
    136     act = act < 5 << 12 ? act : 5 << 12;
    137 
    138   return act;
    139 }
    140 
    141 // Stub for alternative experimental activity measures.
    142 static unsigned int alt_activity_measure(MACROBLOCK *x, int use_dc_pred) {
    143   return vp9_encode_intra(x, use_dc_pred);
    144 }
    145 
    146 // Measure the activity of the current macroblock
    147 // What we measure here is TBD so abstracted to this function
    148 #define ALT_ACT_MEASURE 1
    149 static unsigned int mb_activity_measure(MACROBLOCK *x, int mb_row, int mb_col) {
    150   unsigned int mb_activity;
    151 
    152   if (ALT_ACT_MEASURE) {
    153     int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
    154 
    155     // Or use and alternative.
    156     mb_activity = alt_activity_measure(x, use_dc_pred);
    157   } else {
    158     // Original activity measure from Tim T's code.
    159     mb_activity = tt_activity_measure(x);
    160   }
    161 
    162   if (mb_activity < ACTIVITY_AVG_MIN)
    163     mb_activity = ACTIVITY_AVG_MIN;
    164 
    165   return mb_activity;
    166 }
    167 
    168 // Calculate an "average" mb activity value for the frame
    169 #define ACT_MEDIAN 0
    170 static void calc_av_activity(VP9_COMP *cpi, int64_t activity_sum) {
    171 #if ACT_MEDIAN
    172   // Find median: Simple n^2 algorithm for experimentation
    173   {
    174     unsigned int median;
    175     unsigned int i, j;
    176     unsigned int *sortlist;
    177     unsigned int tmp;
    178 
    179     // Create a list to sort to
    180     CHECK_MEM_ERROR(&cpi->common, sortlist, vpx_calloc(sizeof(unsigned int),
    181                     cpi->common.MBs));
    182 
    183     // Copy map to sort list
    184     vpx_memcpy(sortlist, cpi->mb_activity_map,
    185         sizeof(unsigned int) * cpi->common.MBs);
    186 
    187     // Ripple each value down to its correct position
    188     for (i = 1; i < cpi->common.MBs; i ++) {
    189       for (j = i; j > 0; j --) {
    190         if (sortlist[j] < sortlist[j - 1]) {
    191           // Swap values
    192           tmp = sortlist[j - 1];
    193           sortlist[j - 1] = sortlist[j];
    194           sortlist[j] = tmp;
    195         } else {
    196           break;
    197         }
    198       }
    199     }
    200 
    201     // Even number MBs so estimate median as mean of two either side.
    202     median = (1 + sortlist[cpi->common.MBs >> 1] +
    203         sortlist[(cpi->common.MBs >> 1) + 1]) >> 1;
    204 
    205     cpi->activity_avg = median;
    206 
    207     vpx_free(sortlist);
    208   }
    209 #else
    210   // Simple mean for now
    211   cpi->activity_avg = (unsigned int) (activity_sum / cpi->common.MBs);
    212 #endif  // ACT_MEDIAN
    213 
    214   if (cpi->activity_avg < ACTIVITY_AVG_MIN)
    215     cpi->activity_avg = ACTIVITY_AVG_MIN;
    216 
    217   // Experimental code: return fixed value normalized for several clips
    218   if (ALT_ACT_MEASURE)
    219     cpi->activity_avg = 100000;
    220 }
    221 
    222 #define USE_ACT_INDEX   0
    223 #define OUTPUT_NORM_ACT_STATS   0
    224 
    225 #if USE_ACT_INDEX
    226 // Calculate an activity index for each mb
    227 static void calc_activity_index(VP9_COMP *cpi, MACROBLOCK *x) {
    228   VP9_COMMON *const cm = &cpi->common;
    229   int mb_row, mb_col;
    230 
    231   int64_t act;
    232   int64_t a;
    233   int64_t b;
    234 
    235 #if OUTPUT_NORM_ACT_STATS
    236   FILE *f = fopen("norm_act.stt", "a");
    237   fprintf(f, "\n%12d\n", cpi->activity_avg);
    238 #endif
    239 
    240   // Reset pointers to start of activity map
    241   x->mb_activity_ptr = cpi->mb_activity_map;
    242 
    243   // Calculate normalized mb activity number.
    244   for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
    245     // for each macroblock col in image
    246     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
    247       // Read activity from the map
    248       act = *(x->mb_activity_ptr);
    249 
    250       // Calculate a normalized activity number
    251       a = act + 4 * cpi->activity_avg;
    252       b = 4 * act + cpi->activity_avg;
    253 
    254       if (b >= a)
    255       *(x->activity_ptr) = (int)((b + (a >> 1)) / a) - 1;
    256       else
    257       *(x->activity_ptr) = 1 - (int)((a + (b >> 1)) / b);
    258 
    259 #if OUTPUT_NORM_ACT_STATS
    260       fprintf(f, " %6d", *(x->mb_activity_ptr));
    261 #endif
    262       // Increment activity map pointers
    263       x->mb_activity_ptr++;
    264     }
    265 
    266 #if OUTPUT_NORM_ACT_STATS
    267     fprintf(f, "\n");
    268 #endif
    269   }
    270 
    271 #if OUTPUT_NORM_ACT_STATS
    272   fclose(f);
    273 #endif
    274 }
    275 #endif  // USE_ACT_INDEX
    276 
    277 // Loop through all MBs. Note activity of each, average activity and
    278 // calculate a normalized activity for each
    279 static void build_activity_map(VP9_COMP *cpi) {
    280   MACROBLOCK * const x = &cpi->mb;
    281   MACROBLOCKD *xd = &x->e_mbd;
    282   VP9_COMMON * const cm = &cpi->common;
    283 
    284 #if ALT_ACT_MEASURE
    285   YV12_BUFFER_CONFIG *new_yv12 = get_frame_new_buffer(cm);
    286   int recon_yoffset;
    287   int recon_y_stride = new_yv12->y_stride;
    288 #endif
    289 
    290   int mb_row, mb_col;
    291   unsigned int mb_activity;
    292   int64_t activity_sum = 0;
    293 
    294   x->mb_activity_ptr = cpi->mb_activity_map;
    295 
    296   // for each macroblock row in image
    297   for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
    298 #if ALT_ACT_MEASURE
    299     // reset above block coeffs
    300     xd->up_available = (mb_row != 0);
    301     recon_yoffset = (mb_row * recon_y_stride * 16);
    302 #endif
    303     // for each macroblock col in image
    304     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
    305 #if ALT_ACT_MEASURE
    306       xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset;
    307       xd->left_available = (mb_col != 0);
    308       recon_yoffset += 16;
    309 #endif
    310 
    311       // measure activity
    312       mb_activity = mb_activity_measure(x, mb_row, mb_col);
    313 
    314       // Keep frame sum
    315       activity_sum += mb_activity;
    316 
    317       // Store MB level activity details.
    318       *x->mb_activity_ptr = mb_activity;
    319 
    320       // Increment activity map pointer
    321       x->mb_activity_ptr++;
    322 
    323       // adjust to the next column of source macroblocks
    324       x->plane[0].src.buf += 16;
    325     }
    326 
    327     // adjust to the next row of mbs
    328     x->plane[0].src.buf += 16 * x->plane[0].src.stride - 16 * cm->mb_cols;
    329   }
    330 
    331   // Calculate an "average" MB activity
    332   calc_av_activity(cpi, activity_sum);
    333 
    334 #if USE_ACT_INDEX
    335   // Calculate an activity index number of each mb
    336   calc_activity_index(cpi, x);
    337 #endif
    338 }
    339 
    340 // Macroblock activity masking
    341 void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x) {
    342 #if USE_ACT_INDEX
    343   x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2);
    344   x->errorperbit = x->rdmult * 100 / (110 * x->rddiv);
    345   x->errorperbit += (x->errorperbit == 0);
    346 #else
    347   int64_t a;
    348   int64_t b;
    349   int64_t act = *(x->mb_activity_ptr);
    350 
    351   // Apply the masking to the RD multiplier.
    352   a = act + (2 * cpi->activity_avg);
    353   b = (2 * act) + cpi->activity_avg;
    354 
    355   x->rdmult = (unsigned int) (((int64_t) x->rdmult * b + (a >> 1)) / a);
    356   x->errorperbit = x->rdmult * 100 / (110 * x->rddiv);
    357   x->errorperbit += (x->errorperbit == 0);
    358 #endif
    359 
    360   // Activity based Zbin adjustment
    361   adjust_act_zbin(cpi, x);
    362 }
    363 
    364 static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
    365                          BLOCK_SIZE bsize, int output_enabled) {
    366   int i, x_idx, y;
    367   VP9_COMMON *const cm = &cpi->common;
    368   MACROBLOCK *const x = &cpi->mb;
    369   MACROBLOCKD *const xd = &x->e_mbd;
    370   struct macroblock_plane *const p = x->plane;
    371   struct macroblockd_plane *const pd = xd->plane;
    372   MODE_INFO *mi = &ctx->mic;
    373   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
    374   MODE_INFO *mi_addr = xd->mi_8x8[0];
    375 
    376   int mb_mode_index = ctx->best_mode_index;
    377   const int mis = cm->mode_info_stride;
    378   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
    379   const int mi_height = num_8x8_blocks_high_lookup[bsize];
    380   int max_plane;
    381 
    382   assert(mi->mbmi.mode < MB_MODE_COUNT);
    383   assert(mi->mbmi.ref_frame[0] < MAX_REF_FRAMES);
    384   assert(mi->mbmi.ref_frame[1] < MAX_REF_FRAMES);
    385   assert(mi->mbmi.sb_type == bsize);
    386 
    387   *mi_addr = *mi;
    388 
    389   max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1;
    390   for (i = 0; i < max_plane; ++i) {
    391     p[i].coeff = ctx->coeff_pbuf[i][1];
    392     pd[i].qcoeff = ctx->qcoeff_pbuf[i][1];
    393     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
    394     pd[i].eobs = ctx->eobs_pbuf[i][1];
    395   }
    396 
    397   for (i = max_plane; i < MAX_MB_PLANE; ++i) {
    398     p[i].coeff = ctx->coeff_pbuf[i][2];
    399     pd[i].qcoeff = ctx->qcoeff_pbuf[i][2];
    400     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
    401     pd[i].eobs = ctx->eobs_pbuf[i][2];
    402   }
    403 
    404   // Restore the coding context of the MB to that that was in place
    405   // when the mode was picked for it
    406   for (y = 0; y < mi_height; y++)
    407     for (x_idx = 0; x_idx < mi_width; x_idx++)
    408       if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx
    409           && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y)
    410         xd->mi_8x8[x_idx + y * mis] = mi_addr;
    411 
    412   if (cpi->sf.variance_adaptive_quantization) {
    413     vp9_mb_init_quantizer(cpi, x);
    414   }
    415 
    416   // FIXME(rbultje) I'm pretty sure this should go to the end of this block
    417   // (i.e. after the output_enabled)
    418   if (bsize < BLOCK_32X32) {
    419     if (bsize < BLOCK_16X16)
    420       ctx->tx_rd_diff[ALLOW_16X16] = ctx->tx_rd_diff[ALLOW_8X8];
    421     ctx->tx_rd_diff[ALLOW_32X32] = ctx->tx_rd_diff[ALLOW_16X16];
    422   }
    423 
    424   if (is_inter_block(mbmi) && mbmi->sb_type < BLOCK_8X8) {
    425     mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
    426     mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
    427   }
    428 
    429   x->skip = ctx->skip;
    430   vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk,
    431              sizeof(uint8_t) * ctx->num_4x4_blk);
    432 
    433   if (!output_enabled)
    434     return;
    435 
    436   if (!vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
    437     for (i = 0; i < TX_MODES; i++)
    438       cpi->rd_tx_select_diff[i] += ctx->tx_rd_diff[i];
    439   }
    440 
    441   if (frame_is_intra_only(cm)) {
    442 #if CONFIG_INTERNAL_STATS
    443     static const int kf_mode_index[] = {
    444       THR_DC /*DC_PRED*/,
    445       THR_V_PRED /*V_PRED*/,
    446       THR_H_PRED /*H_PRED*/,
    447       THR_D45_PRED /*D45_PRED*/,
    448       THR_D135_PRED /*D135_PRED*/,
    449       THR_D117_PRED /*D117_PRED*/,
    450       THR_D153_PRED /*D153_PRED*/,
    451       THR_D207_PRED /*D207_PRED*/,
    452       THR_D63_PRED /*D63_PRED*/,
    453       THR_TM /*TM_PRED*/,
    454     };
    455     cpi->mode_chosen_counts[kf_mode_index[mi->mbmi.mode]]++;
    456 #endif
    457   } else {
    458     // Note how often each mode chosen as best
    459     cpi->mode_chosen_counts[mb_mode_index]++;
    460     if (is_inter_block(mbmi)
    461         && (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) {
    462       int_mv best_mv[2];
    463       const MV_REFERENCE_FRAME rf1 = mbmi->ref_frame[0];
    464       const MV_REFERENCE_FRAME rf2 = mbmi->ref_frame[1];
    465       best_mv[0].as_int = ctx->best_ref_mv.as_int;
    466       best_mv[1].as_int = ctx->second_best_ref_mv.as_int;
    467       if (mbmi->mode == NEWMV) {
    468         best_mv[0].as_int = mbmi->ref_mvs[rf1][0].as_int;
    469         if (rf2 > 0)
    470           best_mv[1].as_int = mbmi->ref_mvs[rf2][0].as_int;
    471       }
    472       mbmi->best_mv[0].as_int = best_mv[0].as_int;
    473       mbmi->best_mv[1].as_int = best_mv[1].as_int;
    474       vp9_update_mv_count(cpi, x, best_mv);
    475     }
    476 
    477     if (cm->mcomp_filter_type == SWITCHABLE && is_inter_mode(mbmi->mode)) {
    478       const int ctx = vp9_get_pred_context_switchable_interp(xd);
    479       ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
    480     }
    481 
    482     cpi->rd_comp_pred_diff[SINGLE_PREDICTION_ONLY] += ctx->single_pred_diff;
    483     cpi->rd_comp_pred_diff[COMP_PREDICTION_ONLY] += ctx->comp_pred_diff;
    484     cpi->rd_comp_pred_diff[HYBRID_PREDICTION] += ctx->hybrid_pred_diff;
    485 
    486     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
    487       cpi->rd_filter_diff[i] += ctx->best_filter_diff[i];
    488   }
    489 }
    490 
    491 void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
    492                           int mi_row, int mi_col) {
    493   uint8_t *const buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer,
    494                                src->alpha_buffer};
    495   const int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride,
    496                           src->alpha_stride};
    497   int i;
    498 
    499   for (i = 0; i < MAX_MB_PLANE; i++)
    500     setup_pred_plane(&x->plane[i].src, buffers[i], strides[i], mi_row, mi_col,
    501                      NULL, x->e_mbd.plane[i].subsampling_x,
    502                      x->e_mbd.plane[i].subsampling_y);
    503 }
    504 
    505 static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
    506                         int mi_row, int mi_col, BLOCK_SIZE bsize) {
    507   MACROBLOCK *const x = &cpi->mb;
    508   VP9_COMMON *const cm = &cpi->common;
    509   MACROBLOCKD *const xd = &x->e_mbd;
    510   MB_MODE_INFO *mbmi;
    511   const int dst_fb_idx = cm->new_fb_idx;
    512   const int idx_str = xd->mode_info_stride * mi_row + mi_col;
    513   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
    514   const int mi_height = num_8x8_blocks_high_lookup[bsize];
    515   const int mb_row = mi_row >> 1;
    516   const int mb_col = mi_col >> 1;
    517   const int idx_map = mb_row * cm->mb_cols + mb_col;
    518   const struct segmentation *const seg = &cm->seg;
    519 
    520   set_skip_context(xd, cpi->above_context, cpi->left_context, mi_row, mi_col);
    521 
    522   // Activity map pointer
    523   x->mb_activity_ptr = &cpi->mb_activity_map[idx_map];
    524   x->active_ptr = cpi->active_map + idx_map;
    525 
    526   xd->mi_8x8 = cm->mi_grid_visible + idx_str;
    527   xd->prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
    528 
    529   // Special case: if prev_mi is NULL, the previous mode info context
    530   // cannot be used.
    531   xd->last_mi = cm->prev_mi ? xd->prev_mi_8x8[0] : NULL;
    532 
    533   xd->mi_8x8[0] = cm->mi + idx_str;
    534 
    535   mbmi = &xd->mi_8x8[0]->mbmi;
    536 
    537   // Set up destination pointers
    538   setup_dst_planes(xd, &cm->yv12_fb[dst_fb_idx], mi_row, mi_col);
    539 
    540   // Set up limit values for MV components
    541   // mv beyond the range do not produce new/different prediction block
    542   x->mv_row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND);
    543   x->mv_col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND);
    544   x->mv_row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND;
    545   x->mv_col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND;
    546 
    547   // Set up distance of MB to edge of frame in 1/8th pel units
    548   assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
    549   set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width,
    550                  cm->mi_rows, cm->mi_cols);
    551 
    552   /* set up source buffers */
    553   vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
    554 
    555   /* R/D setup */
    556   x->rddiv = cpi->RDDIV;
    557   x->rdmult = cpi->RDMULT;
    558 
    559   /* segment ID */
    560   if (seg->enabled) {
    561     if (!cpi->sf.variance_adaptive_quantization) {
    562       uint8_t *map = seg->update_map ? cpi->segmentation_map
    563           : cm->last_frame_seg_map;
    564       mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col);
    565     }
    566     vp9_mb_init_quantizer(cpi, x);
    567 
    568     if (seg->enabled && cpi->seg0_cnt > 0
    569         && !vp9_segfeature_active(seg, 0, SEG_LVL_REF_FRAME)
    570         && vp9_segfeature_active(seg, 1, SEG_LVL_REF_FRAME)) {
    571       cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt;
    572     } else {
    573       const int y = mb_row & ~3;
    574       const int x = mb_col & ~3;
    575       const int p16 = ((mb_row & 1) << 1) + (mb_col & 1);
    576       const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1);
    577       const int tile_progress = tile->mi_col_start * cm->mb_rows >> 1;
    578       const int mb_cols = (tile->mi_col_end - tile->mi_col_start) >> 1;
    579 
    580       cpi->seg0_progress = ((y * mb_cols + x * 4 + p32 + p16 + tile_progress)
    581           << 16) / cm->MBs;
    582     }
    583 
    584     x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id];
    585   } else {
    586     mbmi->segment_id = 0;
    587     x->encode_breakout = cpi->oxcf.encode_breakout;
    588   }
    589 }
    590 
    591 static void pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
    592                           int mi_row, int mi_col,
    593                           int *totalrate, int64_t *totaldist,
    594                           BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
    595                           int64_t best_rd) {
    596   VP9_COMMON *const cm = &cpi->common;
    597   MACROBLOCK *const x = &cpi->mb;
    598   MACROBLOCKD *const xd = &x->e_mbd;
    599   struct macroblock_plane *const p = x->plane;
    600   struct macroblockd_plane *const pd = xd->plane;
    601   int i;
    602   int orig_rdmult = x->rdmult;
    603   double rdmult_ratio;
    604 
    605   vp9_clear_system_state();  // __asm emms;
    606   rdmult_ratio = 1.0;  // avoid uninitialized warnings
    607 
    608   // Use the lower precision, but faster, 32x32 fdct for mode selection.
    609   x->use_lp32x32fdct = 1;
    610 
    611   if (bsize < BLOCK_8X8) {
    612     // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
    613     // there is nothing to be done.
    614     if (x->ab_index != 0) {
    615       *totalrate = 0;
    616       *totaldist = 0;
    617       return;
    618     }
    619   }
    620 
    621   set_offsets(cpi, tile, mi_row, mi_col, bsize);
    622   xd->mi_8x8[0]->mbmi.sb_type = bsize;
    623 
    624   for (i = 0; i < MAX_MB_PLANE; ++i) {
    625     p[i].coeff = ctx->coeff_pbuf[i][0];
    626     pd[i].qcoeff = ctx->qcoeff_pbuf[i][0];
    627     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
    628     pd[i].eobs = ctx->eobs_pbuf[i][0];
    629   }
    630   ctx->is_coded = 0;
    631   x->skip_recode = 0;
    632 
    633   // Set to zero to make sure we do not use the previous encoded frame stats
    634   xd->mi_8x8[0]->mbmi.skip_coeff = 0;
    635 
    636   x->source_variance = get_sby_perpixel_variance(cpi, x, bsize);
    637 
    638   if (cpi->sf.variance_adaptive_quantization) {
    639     int energy;
    640     if (bsize <= BLOCK_16X16) {
    641       energy = x->mb_energy;
    642     } else {
    643       energy = vp9_block_energy(cpi, x, bsize);
    644     }
    645 
    646     xd->mi_8x8[0]->mbmi.segment_id = vp9_vaq_segment_id(energy);
    647     rdmult_ratio = vp9_vaq_rdmult_ratio(energy);
    648     vp9_mb_init_quantizer(cpi, x);
    649   }
    650 
    651   if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
    652     vp9_activity_masking(cpi, x);
    653 
    654   if (cpi->sf.variance_adaptive_quantization) {
    655     vp9_clear_system_state();  // __asm emms;
    656     x->rdmult = round(x->rdmult * rdmult_ratio);
    657   }
    658 
    659   // Find best coding mode & reconstruct the MB so it is available
    660   // as a predictor for MBs that follow in the SB
    661   if (frame_is_intra_only(cm)) {
    662     vp9_rd_pick_intra_mode_sb(cpi, x, totalrate, totaldist, bsize, ctx,
    663                               best_rd);
    664   } else {
    665     if (bsize >= BLOCK_8X8)
    666       vp9_rd_pick_inter_mode_sb(cpi, x, tile, mi_row, mi_col,
    667                                 totalrate, totaldist, bsize, ctx, best_rd);
    668     else
    669       vp9_rd_pick_inter_mode_sub8x8(cpi, x, tile, mi_row, mi_col, totalrate,
    670                                     totaldist, bsize, ctx, best_rd);
    671   }
    672 
    673   if (cpi->sf.variance_adaptive_quantization) {
    674     x->rdmult = orig_rdmult;
    675     if (*totalrate != INT_MAX) {
    676       vp9_clear_system_state();  // __asm emms;
    677       *totalrate = round(*totalrate * rdmult_ratio);
    678     }
    679   }
    680 }
    681 
    682 static void update_stats(VP9_COMP *cpi) {
    683   VP9_COMMON *const cm = &cpi->common;
    684   MACROBLOCK *const x = &cpi->mb;
    685   MACROBLOCKD *const xd = &x->e_mbd;
    686   MODE_INFO *mi = xd->mi_8x8[0];
    687   MB_MODE_INFO *const mbmi = &mi->mbmi;
    688 
    689   if (!frame_is_intra_only(cm)) {
    690     const int seg_ref_active = vp9_segfeature_active(&cm->seg, mbmi->segment_id,
    691                                                      SEG_LVL_REF_FRAME);
    692 
    693     if (!seg_ref_active)
    694       cpi->intra_inter_count[vp9_get_pred_context_intra_inter(xd)]
    695                             [is_inter_block(mbmi)]++;
    696 
    697     // If the segment reference feature is enabled we have only a single
    698     // reference frame allowed for the segment so exclude it from
    699     // the reference frame counts used to work out probabilities.
    700     if (is_inter_block(mbmi) && !seg_ref_active) {
    701       if (cm->comp_pred_mode == HYBRID_PREDICTION)
    702         cpi->comp_inter_count[vp9_get_pred_context_comp_inter_inter(cm, xd)]
    703                              [has_second_ref(mbmi)]++;
    704 
    705       if (has_second_ref(mbmi)) {
    706         cpi->comp_ref_count[vp9_get_pred_context_comp_ref_p(cm, xd)]
    707                            [mbmi->ref_frame[0] == GOLDEN_FRAME]++;
    708       } else {
    709         cpi->single_ref_count[vp9_get_pred_context_single_ref_p1(xd)][0]
    710                              [mbmi->ref_frame[0] != LAST_FRAME]++;
    711         if (mbmi->ref_frame[0] != LAST_FRAME)
    712           cpi->single_ref_count[vp9_get_pred_context_single_ref_p2(xd)][1]
    713                                [mbmi->ref_frame[0] != GOLDEN_FRAME]++;
    714       }
    715     }
    716   }
    717 }
    718 
    719 static BLOCK_SIZE *get_sb_partitioning(MACROBLOCK *x, BLOCK_SIZE bsize) {
    720   switch (bsize) {
    721     case BLOCK_64X64:
    722       return &x->sb64_partitioning;
    723     case BLOCK_32X32:
    724       return &x->sb_partitioning[x->sb_index];
    725     case BLOCK_16X16:
    726       return &x->mb_partitioning[x->sb_index][x->mb_index];
    727     case BLOCK_8X8:
    728       return &x->b_partitioning[x->sb_index][x->mb_index][x->b_index];
    729     default:
    730       assert(0);
    731       return NULL;
    732   }
    733 }
    734 
    735 static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col,
    736                             ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
    737                             ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
    738                             PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
    739                             BLOCK_SIZE bsize) {
    740   MACROBLOCK *const x = &cpi->mb;
    741   MACROBLOCKD *const xd = &x->e_mbd;
    742   int p;
    743   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
    744   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
    745   int mi_width = num_8x8_blocks_wide_lookup[bsize];
    746   int mi_height = num_8x8_blocks_high_lookup[bsize];
    747   for (p = 0; p < MAX_MB_PLANE; p++) {
    748     vpx_memcpy(
    749         cpi->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
    750         a + num_4x4_blocks_wide * p,
    751         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
    752         xd->plane[p].subsampling_x);
    753     vpx_memcpy(
    754         cpi->left_context[p]
    755             + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
    756         l + num_4x4_blocks_high * p,
    757         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
    758         xd->plane[p].subsampling_y);
    759   }
    760   vpx_memcpy(cpi->above_seg_context + mi_col, sa,
    761              sizeof(*cpi->above_seg_context) * mi_width);
    762   vpx_memcpy(cpi->left_seg_context + (mi_row & MI_MASK), sl,
    763              sizeof(cpi->left_seg_context[0]) * mi_height);
    764 }
    765 static void save_context(VP9_COMP *cpi, int mi_row, int mi_col,
    766                          ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
    767                          ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
    768                          PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
    769                          BLOCK_SIZE bsize) {
    770   const MACROBLOCK *const x = &cpi->mb;
    771   const MACROBLOCKD *const xd = &x->e_mbd;
    772   int p;
    773   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
    774   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
    775   int mi_width = num_8x8_blocks_wide_lookup[bsize];
    776   int mi_height = num_8x8_blocks_high_lookup[bsize];
    777 
    778   // buffer the above/left context information of the block in search.
    779   for (p = 0; p < MAX_MB_PLANE; ++p) {
    780     vpx_memcpy(
    781         a + num_4x4_blocks_wide * p,
    782         cpi->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
    783         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
    784         xd->plane[p].subsampling_x);
    785     vpx_memcpy(
    786         l + num_4x4_blocks_high * p,
    787         cpi->left_context[p]
    788             + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
    789         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
    790         xd->plane[p].subsampling_y);
    791   }
    792   vpx_memcpy(sa, cpi->above_seg_context + mi_col,
    793              sizeof(*cpi->above_seg_context) * mi_width);
    794   vpx_memcpy(sl, cpi->left_seg_context + (mi_row & MI_MASK),
    795              sizeof(cpi->left_seg_context[0]) * mi_height);
    796 }
    797 
    798 static void encode_b(VP9_COMP *cpi, const TileInfo *const tile,
    799                      TOKENEXTRA **tp, int mi_row, int mi_col,
    800                      int output_enabled, BLOCK_SIZE bsize, int sub_index) {
    801   VP9_COMMON *const cm = &cpi->common;
    802   MACROBLOCK *const x = &cpi->mb;
    803 
    804   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
    805     return;
    806 
    807   if (sub_index != -1)
    808     *get_sb_index(x, bsize) = sub_index;
    809 
    810   if (bsize < BLOCK_8X8) {
    811     // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
    812     // there is nothing to be done.
    813     if (x->ab_index > 0)
    814       return;
    815   }
    816   set_offsets(cpi, tile, mi_row, mi_col, bsize);
    817   update_state(cpi, get_block_context(x, bsize), bsize, output_enabled);
    818   encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
    819 
    820   if (output_enabled) {
    821     update_stats(cpi);
    822 
    823     (*tp)->token = EOSB_TOKEN;
    824     (*tp)++;
    825   }
    826 }
    827 
    828 static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile,
    829                       TOKENEXTRA **tp, int mi_row, int mi_col,
    830                       int output_enabled, BLOCK_SIZE bsize) {
    831   VP9_COMMON *const cm = &cpi->common;
    832   MACROBLOCK *const x = &cpi->mb;
    833   BLOCK_SIZE c1 = BLOCK_8X8;
    834   const int bsl = b_width_log2(bsize), bs = (1 << bsl) / 4;
    835   int pl = 0;
    836   PARTITION_TYPE partition;
    837   BLOCK_SIZE subsize;
    838   int i;
    839 
    840   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
    841     return;
    842 
    843   c1 = BLOCK_4X4;
    844   if (bsize >= BLOCK_8X8) {
    845     pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
    846                                  mi_row, mi_col, bsize);
    847     c1 = *(get_sb_partitioning(x, bsize));
    848   }
    849   partition = partition_lookup[bsl][c1];
    850 
    851   switch (partition) {
    852     case PARTITION_NONE:
    853       if (output_enabled && bsize >= BLOCK_8X8)
    854         cpi->partition_count[pl][PARTITION_NONE]++;
    855       encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, c1, -1);
    856       break;
    857     case PARTITION_VERT:
    858       if (output_enabled)
    859         cpi->partition_count[pl][PARTITION_VERT]++;
    860       encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, c1, 0);
    861       encode_b(cpi, tile, tp, mi_row, mi_col + bs, output_enabled, c1, 1);
    862       break;
    863     case PARTITION_HORZ:
    864       if (output_enabled)
    865         cpi->partition_count[pl][PARTITION_HORZ]++;
    866       encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, c1, 0);
    867       encode_b(cpi, tile, tp, mi_row + bs, mi_col, output_enabled, c1, 1);
    868       break;
    869     case PARTITION_SPLIT:
    870       subsize = get_subsize(bsize, PARTITION_SPLIT);
    871 
    872       if (output_enabled)
    873         cpi->partition_count[pl][PARTITION_SPLIT]++;
    874 
    875       for (i = 0; i < 4; i++) {
    876         const int x_idx = i & 1, y_idx = i >> 1;
    877 
    878         *get_sb_index(x, subsize) = i;
    879         encode_sb(cpi, tile, tp, mi_row + y_idx * bs, mi_col + x_idx * bs,
    880                   output_enabled, subsize);
    881       }
    882       break;
    883     default:
    884       assert(0);
    885       break;
    886   }
    887 
    888   if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
    889     update_partition_context(cpi->above_seg_context, cpi->left_seg_context,
    890                              mi_row, mi_col, c1, bsize);
    891 }
    892 
    893 // Check to see if the given partition size is allowed for a specified number
    894 // of 8x8 block rows and columns remaining in the image.
    895 // If not then return the largest allowed partition size
    896 static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize,
    897                                       int rows_left, int cols_left,
    898                                       int *bh, int *bw) {
    899   if ((rows_left <= 0) || (cols_left <= 0)) {
    900     return MIN(bsize, BLOCK_8X8);
    901   } else {
    902     for (; bsize > 0; --bsize) {
    903       *bh = num_8x8_blocks_high_lookup[bsize];
    904       *bw = num_8x8_blocks_wide_lookup[bsize];
    905       if ((*bh <= rows_left) && (*bw <= cols_left)) {
    906         break;
    907       }
    908     }
    909   }
    910   return bsize;
    911 }
    912 
    913 // This function attempts to set all mode info entries in a given SB64
    914 // to the same block partition size.
    915 // However, at the bottom and right borders of the image the requested size
    916 // may not be allowed in which case this code attempts to choose the largest
    917 // allowable partition.
    918 static void set_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
    919                              MODE_INFO **mi_8x8, int mi_row, int mi_col) {
    920   VP9_COMMON *const cm = &cpi->common;
    921   BLOCK_SIZE bsize = cpi->sf.always_this_block_size;
    922   const int mis = cm->mode_info_stride;
    923   int row8x8_remaining = tile->mi_row_end - mi_row;
    924   int col8x8_remaining = tile->mi_col_end - mi_col;
    925   int block_row, block_col;
    926   MODE_INFO * mi_upper_left = cm->mi + mi_row * mis + mi_col;
    927   int bh = num_8x8_blocks_high_lookup[bsize];
    928   int bw = num_8x8_blocks_wide_lookup[bsize];
    929 
    930   assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
    931 
    932   // Apply the requested partition size to the SB64 if it is all "in image"
    933   if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
    934       (row8x8_remaining >= MI_BLOCK_SIZE)) {
    935     for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) {
    936       for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
    937         int index = block_row * mis + block_col;
    938         mi_8x8[index] = mi_upper_left + index;
    939         mi_8x8[index]->mbmi.sb_type = bsize;
    940       }
    941     }
    942   } else {
    943     // Else this is a partial SB64.
    944     for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) {
    945       for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
    946         int index = block_row * mis + block_col;
    947         // Find a partition size that fits
    948         bsize = find_partition_size(cpi->sf.always_this_block_size,
    949                                     (row8x8_remaining - block_row),
    950                                     (col8x8_remaining - block_col), &bh, &bw);
    951         mi_8x8[index] = mi_upper_left + index;
    952         mi_8x8[index]->mbmi.sb_type = bsize;
    953       }
    954     }
    955   }
    956 }
    957 
    958 static void copy_partitioning(VP9_COMP *cpi, MODE_INFO **mi_8x8,
    959                               MODE_INFO **prev_mi_8x8) {
    960   VP9_COMMON *const cm = &cpi->common;
    961   const int mis = cm->mode_info_stride;
    962   int block_row, block_col;
    963 
    964   for (block_row = 0; block_row < 8; ++block_row) {
    965     for (block_col = 0; block_col < 8; ++block_col) {
    966       MODE_INFO * prev_mi = prev_mi_8x8[block_row * mis + block_col];
    967       BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
    968       ptrdiff_t offset;
    969 
    970       if (prev_mi) {
    971         offset = prev_mi - cm->prev_mi;
    972         mi_8x8[block_row * mis + block_col] = cm->mi + offset;
    973         mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type;
    974       }
    975     }
    976   }
    977 }
    978 
    979 static int sb_has_motion(VP9_COMP *cpi, MODE_INFO **prev_mi_8x8) {
    980   VP9_COMMON *const cm = &cpi->common;
    981   const int mis = cm->mode_info_stride;
    982   int block_row, block_col;
    983 
    984   if (cm->prev_mi) {
    985     for (block_row = 0; block_row < 8; ++block_row) {
    986       for (block_col = 0; block_col < 8; ++block_col) {
    987         MODE_INFO * prev_mi = prev_mi_8x8[block_row * mis + block_col];
    988         if (prev_mi) {
    989           if (abs(prev_mi->mbmi.mv[0].as_mv.row) >= 8 ||
    990               abs(prev_mi->mbmi.mv[0].as_mv.col) >= 8)
    991             return 1;
    992         }
    993       }
    994     }
    995   }
    996   return 0;
    997 }
    998 
    999 static void rd_use_partition(VP9_COMP *cpi,
   1000                              const TileInfo *const tile,
   1001                              MODE_INFO **mi_8x8,
   1002                              TOKENEXTRA **tp, int mi_row, int mi_col,
   1003                              BLOCK_SIZE bsize, int *rate, int64_t *dist,
   1004                              int do_recon) {
   1005   VP9_COMMON *const cm = &cpi->common;
   1006   MACROBLOCK *const x = &cpi->mb;
   1007   const int mis = cm->mode_info_stride;
   1008   int bsl = b_width_log2(bsize);
   1009   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
   1010   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
   1011   int ms = num_4x4_blocks_wide / 2;
   1012   int mh = num_4x4_blocks_high / 2;
   1013   int bss = (1 << bsl) / 4;
   1014   int i, pl;
   1015   PARTITION_TYPE partition = PARTITION_NONE;
   1016   BLOCK_SIZE subsize;
   1017   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
   1018   PARTITION_CONTEXT sl[8], sa[8];
   1019   int last_part_rate = INT_MAX;
   1020   int64_t last_part_dist = INT_MAX;
   1021   int split_rate = INT_MAX;
   1022   int64_t split_dist = INT_MAX;
   1023   int none_rate = INT_MAX;
   1024   int64_t none_dist = INT_MAX;
   1025   int chosen_rate = INT_MAX;
   1026   int64_t chosen_dist = INT_MAX;
   1027   BLOCK_SIZE sub_subsize = BLOCK_4X4;
   1028   int splits_below = 0;
   1029   BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type;
   1030 
   1031   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
   1032     return;
   1033 
   1034   partition = partition_lookup[bsl][bs_type];
   1035 
   1036   subsize = get_subsize(bsize, partition);
   1037 
   1038   if (bsize < BLOCK_8X8) {
   1039     // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
   1040     // there is nothing to be done.
   1041     if (x->ab_index != 0) {
   1042       *rate = 0;
   1043       *dist = 0;
   1044       return;
   1045     }
   1046   } else {
   1047     *(get_sb_partitioning(x, bsize)) = subsize;
   1048   }
   1049   save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   1050 
   1051   if (bsize == BLOCK_16X16) {
   1052     set_offsets(cpi, tile, mi_row, mi_col, bsize);
   1053     x->mb_energy = vp9_block_energy(cpi, x, bsize);
   1054   }
   1055 
   1056   x->fast_ms = 0;
   1057   x->subblock_ref = 0;
   1058 
   1059   if (cpi->sf.adjust_partitioning_from_last_frame) {
   1060     // Check if any of the sub blocks are further split.
   1061     if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
   1062       sub_subsize = get_subsize(subsize, PARTITION_SPLIT);
   1063       splits_below = 1;
   1064       for (i = 0; i < 4; i++) {
   1065         int jj = i >> 1, ii = i & 0x01;
   1066         MODE_INFO * this_mi = mi_8x8[jj * bss * mis + ii * bss];
   1067         if (this_mi && this_mi->mbmi.sb_type >= sub_subsize) {
   1068           splits_below = 0;
   1069         }
   1070       }
   1071     }
   1072 
   1073     // If partition is not none try none unless each of the 4 splits are split
   1074     // even further..
   1075     if (partition != PARTITION_NONE && !splits_below &&
   1076         mi_row + (ms >> 1) < cm->mi_rows &&
   1077         mi_col + (ms >> 1) < cm->mi_cols) {
   1078       *(get_sb_partitioning(x, bsize)) = bsize;
   1079       pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rate, &none_dist, bsize,
   1080                     get_block_context(x, bsize), INT64_MAX);
   1081 
   1082       pl = partition_plane_context(cpi->above_seg_context,
   1083                                    cpi->left_seg_context,
   1084                                    mi_row, mi_col, bsize);
   1085       none_rate += x->partition_cost[pl][PARTITION_NONE];
   1086 
   1087       restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   1088       mi_8x8[0]->mbmi.sb_type = bs_type;
   1089       *(get_sb_partitioning(x, bsize)) = subsize;
   1090     }
   1091   }
   1092 
   1093   switch (partition) {
   1094     case PARTITION_NONE:
   1095       pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, &last_part_dist,
   1096                     bsize, get_block_context(x, bsize), INT64_MAX);
   1097       break;
   1098     case PARTITION_HORZ:
   1099       *get_sb_index(x, subsize) = 0;
   1100       pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, &last_part_dist,
   1101                     subsize, get_block_context(x, subsize), INT64_MAX);
   1102       if (last_part_rate != INT_MAX &&
   1103           bsize >= BLOCK_8X8 && mi_row + (mh >> 1) < cm->mi_rows) {
   1104         int rt = 0;
   1105         int64_t dt = 0;
   1106         update_state(cpi, get_block_context(x, subsize), subsize, 0);
   1107         encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
   1108         *get_sb_index(x, subsize) = 1;
   1109         pick_sb_modes(cpi, tile, mi_row + (ms >> 1), mi_col, &rt, &dt, subsize,
   1110                       get_block_context(x, subsize), INT64_MAX);
   1111         if (rt == INT_MAX || dt == INT_MAX) {
   1112           last_part_rate = INT_MAX;
   1113           last_part_dist = INT_MAX;
   1114           break;
   1115         }
   1116 
   1117         last_part_rate += rt;
   1118         last_part_dist += dt;
   1119       }
   1120       break;
   1121     case PARTITION_VERT:
   1122       *get_sb_index(x, subsize) = 0;
   1123       pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, &last_part_dist,
   1124                     subsize, get_block_context(x, subsize), INT64_MAX);
   1125       if (last_part_rate != INT_MAX &&
   1126           bsize >= BLOCK_8X8 && mi_col + (ms >> 1) < cm->mi_cols) {
   1127         int rt = 0;
   1128         int64_t dt = 0;
   1129         update_state(cpi, get_block_context(x, subsize), subsize, 0);
   1130         encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
   1131         *get_sb_index(x, subsize) = 1;
   1132         pick_sb_modes(cpi, tile, mi_row, mi_col + (ms >> 1), &rt, &dt, subsize,
   1133                       get_block_context(x, subsize), INT64_MAX);
   1134         if (rt == INT_MAX || dt == INT_MAX) {
   1135           last_part_rate = INT_MAX;
   1136           last_part_dist = INT_MAX;
   1137           break;
   1138         }
   1139         last_part_rate += rt;
   1140         last_part_dist += dt;
   1141       }
   1142       break;
   1143     case PARTITION_SPLIT:
   1144       // Split partition.
   1145       last_part_rate = 0;
   1146       last_part_dist = 0;
   1147       for (i = 0; i < 4; i++) {
   1148         int x_idx = (i & 1) * (ms >> 1);
   1149         int y_idx = (i >> 1) * (ms >> 1);
   1150         int jj = i >> 1, ii = i & 0x01;
   1151         int rt;
   1152         int64_t dt;
   1153 
   1154         if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
   1155           continue;
   1156 
   1157         *get_sb_index(x, subsize) = i;
   1158 
   1159         rd_use_partition(cpi, tile, mi_8x8 + jj * bss * mis + ii * bss, tp,
   1160                          mi_row + y_idx, mi_col + x_idx, subsize, &rt, &dt,
   1161                          i != 3);
   1162         if (rt == INT_MAX || dt == INT_MAX) {
   1163           last_part_rate = INT_MAX;
   1164           last_part_dist = INT_MAX;
   1165           break;
   1166         }
   1167         last_part_rate += rt;
   1168         last_part_dist += dt;
   1169       }
   1170       break;
   1171     default:
   1172       assert(0);
   1173   }
   1174 
   1175   pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
   1176                                mi_row, mi_col, bsize);
   1177   if (last_part_rate < INT_MAX)
   1178     last_part_rate += x->partition_cost[pl][partition];
   1179 
   1180   if (cpi->sf.adjust_partitioning_from_last_frame
   1181       && partition != PARTITION_SPLIT && bsize > BLOCK_8X8
   1182       && (mi_row + ms < cm->mi_rows || mi_row + (ms >> 1) == cm->mi_rows)
   1183       && (mi_col + ms < cm->mi_cols || mi_col + (ms >> 1) == cm->mi_cols)) {
   1184     BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
   1185     split_rate = 0;
   1186     split_dist = 0;
   1187     restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   1188 
   1189     // Split partition.
   1190     for (i = 0; i < 4; i++) {
   1191       int x_idx = (i & 1) * (num_4x4_blocks_wide >> 2);
   1192       int y_idx = (i >> 1) * (num_4x4_blocks_wide >> 2);
   1193       int rt = 0;
   1194       int64_t dt = 0;
   1195       ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
   1196       PARTITION_CONTEXT sl[8], sa[8];
   1197 
   1198       if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
   1199         continue;
   1200 
   1201       *get_sb_index(x, split_subsize) = i;
   1202       *get_sb_partitioning(x, bsize) = split_subsize;
   1203       *get_sb_partitioning(x, split_subsize) = split_subsize;
   1204 
   1205       save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   1206 
   1207       pick_sb_modes(cpi, tile, mi_row + y_idx, mi_col + x_idx, &rt, &dt,
   1208                     split_subsize, get_block_context(x, split_subsize),
   1209                     INT64_MAX);
   1210 
   1211       restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   1212 
   1213       if (rt == INT_MAX || dt == INT_MAX) {
   1214         split_rate = INT_MAX;
   1215         split_dist = INT_MAX;
   1216         break;
   1217       }
   1218 
   1219       if (i != 3)
   1220         encode_sb(cpi, tile, tp,  mi_row + y_idx, mi_col + x_idx, 0,
   1221                   split_subsize);
   1222 
   1223       split_rate += rt;
   1224       split_dist += dt;
   1225       pl = partition_plane_context(cpi->above_seg_context,
   1226                                    cpi->left_seg_context,
   1227                                    mi_row + y_idx, mi_col + x_idx, bsize);
   1228       split_rate += x->partition_cost[pl][PARTITION_NONE];
   1229     }
   1230     pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
   1231                                  mi_row, mi_col, bsize);
   1232     if (split_rate < INT_MAX) {
   1233       split_rate += x->partition_cost[pl][PARTITION_SPLIT];
   1234 
   1235       chosen_rate = split_rate;
   1236       chosen_dist = split_dist;
   1237     }
   1238   }
   1239 
   1240   // If last_part is better set the partitioning to that...
   1241   if (RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist)
   1242       < RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist)) {
   1243     mi_8x8[0]->mbmi.sb_type = bsize;
   1244     if (bsize >= BLOCK_8X8)
   1245       *(get_sb_partitioning(x, bsize)) = subsize;
   1246     chosen_rate = last_part_rate;
   1247     chosen_dist = last_part_dist;
   1248   }
   1249   // If none was better set the partitioning to that...
   1250   if (RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist)
   1251       > RDCOST(x->rdmult, x->rddiv, none_rate, none_dist)) {
   1252     if (bsize >= BLOCK_8X8)
   1253       *(get_sb_partitioning(x, bsize)) = bsize;
   1254     chosen_rate = none_rate;
   1255     chosen_dist = none_dist;
   1256   }
   1257 
   1258   restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   1259 
   1260   // We must have chosen a partitioning and encoding or we'll fail later on.
   1261   // No other opportunities for success.
   1262   if ( bsize == BLOCK_64X64)
   1263     assert(chosen_rate < INT_MAX && chosen_dist < INT_MAX);
   1264 
   1265   if (do_recon)
   1266     encode_sb(cpi, tile, tp, mi_row, mi_col, bsize == BLOCK_64X64, bsize);
   1267 
   1268   *rate = chosen_rate;
   1269   *dist = chosen_dist;
   1270 }
   1271 
   1272 static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
   1273   BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
   1274   BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8,
   1275   BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16
   1276 };
   1277 
   1278 static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = {
   1279   BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,
   1280   BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, BLOCK_64X64,
   1281   BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64
   1282 };
   1283 
   1284 // Look at all the mode_info entries for blocks that are part of this
   1285 // partition and find the min and max values for sb_type.
   1286 // At the moment this is designed to work on a 64x64 SB but could be
   1287 // adjusted to use a size parameter.
   1288 //
   1289 // The min and max are assumed to have been initialized prior to calling this
   1290 // function so repeat calls can accumulate a min and max of more than one sb64.
   1291 static void get_sb_partition_size_range(VP9_COMP *cpi, MODE_INFO ** mi_8x8,
   1292                                         BLOCK_SIZE * min_block_size,
   1293                                         BLOCK_SIZE * max_block_size ) {
   1294   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   1295   int sb_width_in_blocks = MI_BLOCK_SIZE;
   1296   int sb_height_in_blocks  = MI_BLOCK_SIZE;
   1297   int i, j;
   1298   int index = 0;
   1299 
   1300   // Check the sb_type for each block that belongs to this region.
   1301   for (i = 0; i < sb_height_in_blocks; ++i) {
   1302     for (j = 0; j < sb_width_in_blocks; ++j) {
   1303       MODE_INFO * mi = mi_8x8[index+j];
   1304       BLOCK_SIZE sb_type = mi ? mi->mbmi.sb_type : 0;
   1305       *min_block_size = MIN(*min_block_size, sb_type);
   1306       *max_block_size = MAX(*max_block_size, sb_type);
   1307     }
   1308     index += xd->mode_info_stride;
   1309   }
   1310 }
   1311 
   1312 // Look at neighboring blocks and set a min and max partition size based on
   1313 // what they chose.
   1314 static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
   1315                                     int row, int col,
   1316                                     BLOCK_SIZE *min_block_size,
   1317                                     BLOCK_SIZE *max_block_size) {
   1318   VP9_COMMON * const cm = &cpi->common;
   1319   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   1320   MODE_INFO ** mi_8x8 = xd->mi_8x8;
   1321   MODE_INFO ** prev_mi_8x8 = xd->prev_mi_8x8;
   1322 
   1323   const int left_in_image = xd->left_available && mi_8x8[-1];
   1324   const int above_in_image = xd->up_available &&
   1325                              mi_8x8[-xd->mode_info_stride];
   1326   MODE_INFO ** above_sb64_mi_8x8;
   1327   MODE_INFO ** left_sb64_mi_8x8;
   1328 
   1329   int row8x8_remaining = tile->mi_row_end - row;
   1330   int col8x8_remaining = tile->mi_col_end - col;
   1331   int bh, bw;
   1332 
   1333   // Trap case where we do not have a prediction.
   1334   if (!left_in_image && !above_in_image &&
   1335       ((cm->frame_type == KEY_FRAME) || !cm->prev_mi)) {
   1336     *min_block_size = BLOCK_4X4;
   1337     *max_block_size = BLOCK_64X64;
   1338   } else {
   1339     // Default "min to max" and "max to min"
   1340     *min_block_size = BLOCK_64X64;
   1341     *max_block_size = BLOCK_4X4;
   1342 
   1343     // NOTE: each call to get_sb_partition_size_range() uses the previous
   1344     // passed in values for min and max as a starting point.
   1345     //
   1346     // Find the min and max partition used in previous frame at this location
   1347     if (cm->prev_mi && (cm->frame_type != KEY_FRAME)) {
   1348       get_sb_partition_size_range(cpi, prev_mi_8x8,
   1349                                   min_block_size, max_block_size);
   1350     }
   1351 
   1352     // Find the min and max partition sizes used in the left SB64
   1353     if (left_in_image) {
   1354       left_sb64_mi_8x8 = &mi_8x8[-MI_BLOCK_SIZE];
   1355       get_sb_partition_size_range(cpi, left_sb64_mi_8x8,
   1356                                   min_block_size, max_block_size);
   1357     }
   1358 
   1359     // Find the min and max partition sizes used in the above SB64.
   1360     if (above_in_image) {
   1361       above_sb64_mi_8x8 = &mi_8x8[-xd->mode_info_stride * MI_BLOCK_SIZE];
   1362       get_sb_partition_size_range(cpi, above_sb64_mi_8x8,
   1363                                   min_block_size, max_block_size);
   1364     }
   1365   }
   1366 
   1367   // Give a bit of leaway either side of the observed min and max
   1368   *min_block_size = min_partition_size[*min_block_size];
   1369   *max_block_size = max_partition_size[*max_block_size];
   1370 
   1371   // Check border cases where max and min from neighbours may not be legal.
   1372   *max_block_size = find_partition_size(*max_block_size,
   1373                                         row8x8_remaining, col8x8_remaining,
   1374                                         &bh, &bw);
   1375   *min_block_size = MIN(*min_block_size, *max_block_size);
   1376 }
   1377 
   1378 static void compute_fast_motion_search_level(VP9_COMP *cpi, BLOCK_SIZE bsize) {
   1379   VP9_COMMON *const cm = &cpi->common;
   1380   MACROBLOCK *const x = &cpi->mb;
   1381 
   1382   // Only use 8x8 result for non HD videos.
   1383   // int use_8x8 = (MIN(cpi->common.width, cpi->common.height) < 720) ? 1 : 0;
   1384   int use_8x8 = 1;
   1385 
   1386   if (cm->frame_type && !cpi->is_src_frame_alt_ref &&
   1387       ((use_8x8 && bsize == BLOCK_16X16) ||
   1388       bsize == BLOCK_32X32 || bsize == BLOCK_64X64)) {
   1389     int ref0 = 0, ref1 = 0, ref2 = 0, ref3 = 0;
   1390     PICK_MODE_CONTEXT *block_context = NULL;
   1391 
   1392     if (bsize == BLOCK_16X16) {
   1393       block_context = x->sb8x8_context[x->sb_index][x->mb_index];
   1394     } else if (bsize == BLOCK_32X32) {
   1395       block_context = x->mb_context[x->sb_index];
   1396     } else if (bsize == BLOCK_64X64) {
   1397       block_context = x->sb32_context;
   1398     }
   1399 
   1400     if (block_context) {
   1401       ref0 = block_context[0].mic.mbmi.ref_frame[0];
   1402       ref1 = block_context[1].mic.mbmi.ref_frame[0];
   1403       ref2 = block_context[2].mic.mbmi.ref_frame[0];
   1404       ref3 = block_context[3].mic.mbmi.ref_frame[0];
   1405     }
   1406 
   1407     // Currently, only consider 4 inter reference frames.
   1408     if (ref0 && ref1 && ref2 && ref3) {
   1409       int d01, d23, d02, d13;
   1410 
   1411       // Motion vectors for the four subblocks.
   1412       int16_t mvr0 = block_context[0].mic.mbmi.mv[0].as_mv.row;
   1413       int16_t mvc0 = block_context[0].mic.mbmi.mv[0].as_mv.col;
   1414       int16_t mvr1 = block_context[1].mic.mbmi.mv[0].as_mv.row;
   1415       int16_t mvc1 = block_context[1].mic.mbmi.mv[0].as_mv.col;
   1416       int16_t mvr2 = block_context[2].mic.mbmi.mv[0].as_mv.row;
   1417       int16_t mvc2 = block_context[2].mic.mbmi.mv[0].as_mv.col;
   1418       int16_t mvr3 = block_context[3].mic.mbmi.mv[0].as_mv.row;
   1419       int16_t mvc3 = block_context[3].mic.mbmi.mv[0].as_mv.col;
   1420 
   1421       // Adjust sign if ref is alt_ref.
   1422       if (cm->ref_frame_sign_bias[ref0]) {
   1423         mvr0 *= -1;
   1424         mvc0 *= -1;
   1425       }
   1426 
   1427       if (cm->ref_frame_sign_bias[ref1]) {
   1428         mvr1 *= -1;
   1429         mvc1 *= -1;
   1430       }
   1431 
   1432       if (cm->ref_frame_sign_bias[ref2]) {
   1433         mvr2 *= -1;
   1434         mvc2 *= -1;
   1435       }
   1436 
   1437       if (cm->ref_frame_sign_bias[ref3]) {
   1438         mvr3 *= -1;
   1439         mvc3 *= -1;
   1440       }
   1441 
   1442       // Calculate mv distances.
   1443       d01 = MAX(abs(mvr0 - mvr1), abs(mvc0 - mvc1));
   1444       d23 = MAX(abs(mvr2 - mvr3), abs(mvc2 - mvc3));
   1445       d02 = MAX(abs(mvr0 - mvr2), abs(mvc0 - mvc2));
   1446       d13 = MAX(abs(mvr1 - mvr3), abs(mvc1 - mvc3));
   1447 
   1448       if (d01 < FAST_MOTION_MV_THRESH && d23 < FAST_MOTION_MV_THRESH &&
   1449           d02 < FAST_MOTION_MV_THRESH && d13 < FAST_MOTION_MV_THRESH) {
   1450         // Set fast motion search level.
   1451         x->fast_ms = 1;
   1452 
   1453         if (ref0 == ref1 && ref1 == ref2 && ref2 == ref3 &&
   1454             d01 < 2 && d23 < 2 && d02 < 2 && d13 < 2) {
   1455           // Set fast motion search level.
   1456           x->fast_ms = 2;
   1457 
   1458           if (!d01 && !d23 && !d02 && !d13) {
   1459             x->fast_ms = 3;
   1460             x->subblock_ref = ref0;
   1461           }
   1462         }
   1463       }
   1464     }
   1465   }
   1466 }
   1467 
   1468 static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
   1469   vpx_memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
   1470 }
   1471 
   1472 static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
   1473   vpx_memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
   1474 }
   1475 
   1476 // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
   1477 // unlikely to be selected depending on previous rate-distortion optimization
   1478 // results, for encoding speed-up.
   1479 static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
   1480                               TOKENEXTRA **tp, int mi_row,
   1481                               int mi_col, BLOCK_SIZE bsize, int *rate,
   1482                               int64_t *dist, int do_recon, int64_t best_rd) {
   1483   VP9_COMMON *const cm = &cpi->common;
   1484   MACROBLOCK *const x = &cpi->mb;
   1485   const int ms = num_8x8_blocks_wide_lookup[bsize] / 2;
   1486   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
   1487   PARTITION_CONTEXT sl[8], sa[8];
   1488   TOKENEXTRA *tp_orig = *tp;
   1489   int i, pl;
   1490   BLOCK_SIZE subsize;
   1491   int this_rate, sum_rate = 0, best_rate = INT_MAX;
   1492   int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX;
   1493   int64_t sum_rd = 0;
   1494   int do_split = bsize >= BLOCK_8X8;
   1495   int do_rect = 1;
   1496   // Override skipping rectangular partition operations for edge blocks
   1497   const int force_horz_split = (mi_row + ms >= cm->mi_rows);
   1498   const int force_vert_split = (mi_col + ms >= cm->mi_cols);
   1499 
   1500   int partition_none_allowed = !force_horz_split && !force_vert_split;
   1501   int partition_horz_allowed = !force_vert_split && bsize >= BLOCK_8X8;
   1502   int partition_vert_allowed = !force_horz_split && bsize >= BLOCK_8X8;
   1503 
   1504   int partition_split_done = 0;
   1505   (void) *tp_orig;
   1506 
   1507   if (bsize < BLOCK_8X8) {
   1508     // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
   1509     // there is nothing to be done.
   1510     if (x->ab_index != 0) {
   1511       *rate = 0;
   1512       *dist = 0;
   1513       return;
   1514     }
   1515   }
   1516   assert(num_8x8_blocks_wide_lookup[bsize] ==
   1517              num_8x8_blocks_high_lookup[bsize]);
   1518 
   1519   if (bsize == BLOCK_16X16) {
   1520     set_offsets(cpi, tile, mi_row, mi_col, bsize);
   1521     x->mb_energy = vp9_block_energy(cpi, x, bsize);
   1522   }
   1523 
   1524   // Determine partition types in search according to the speed features.
   1525   // The threshold set here has to be of square block size.
   1526   if (cpi->sf.auto_min_max_partition_size) {
   1527     partition_none_allowed &= (bsize <= cpi->sf.max_partition_size &&
   1528                                bsize >= cpi->sf.min_partition_size);
   1529     partition_horz_allowed &= ((bsize <= cpi->sf.max_partition_size &&
   1530                                 bsize >  cpi->sf.min_partition_size) ||
   1531                                 force_horz_split);
   1532     partition_vert_allowed &= ((bsize <= cpi->sf.max_partition_size &&
   1533                                 bsize >  cpi->sf.min_partition_size) ||
   1534                                 force_vert_split);
   1535     do_split &= bsize > cpi->sf.min_partition_size;
   1536   }
   1537   if (cpi->sf.use_square_partition_only) {
   1538     partition_horz_allowed &= force_horz_split;
   1539     partition_vert_allowed &= force_vert_split;
   1540   }
   1541 
   1542   save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   1543 
   1544   if (cpi->sf.disable_split_var_thresh && partition_none_allowed) {
   1545     unsigned int source_variancey;
   1546     vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
   1547     source_variancey = get_sby_perpixel_variance(cpi, x, bsize);
   1548     if (source_variancey < cpi->sf.disable_split_var_thresh) {
   1549       do_split = 0;
   1550       if (source_variancey < cpi->sf.disable_split_var_thresh / 2)
   1551         do_rect = 0;
   1552     }
   1553   }
   1554 
   1555   // PARTITION_NONE
   1556   if (partition_none_allowed) {
   1557     pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, &this_dist, bsize,
   1558                   get_block_context(x, bsize), best_rd);
   1559     if (this_rate != INT_MAX) {
   1560       if (bsize >= BLOCK_8X8) {
   1561         pl = partition_plane_context(cpi->above_seg_context,
   1562                                      cpi->left_seg_context,
   1563                                      mi_row, mi_col, bsize);
   1564         this_rate += x->partition_cost[pl][PARTITION_NONE];
   1565       }
   1566       sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
   1567       if (sum_rd < best_rd) {
   1568         int64_t stop_thresh = 2048;
   1569 
   1570         best_rate = this_rate;
   1571         best_dist = this_dist;
   1572         best_rd = sum_rd;
   1573         if (bsize >= BLOCK_8X8)
   1574           *(get_sb_partitioning(x, bsize)) = bsize;
   1575 
   1576         // Adjust threshold according to partition size.
   1577         stop_thresh >>= 8 - (b_width_log2_lookup[bsize] +
   1578             b_height_log2_lookup[bsize]);
   1579 
   1580         // If obtained distortion is very small, choose current partition
   1581         // and stop splitting.
   1582         if (this_dist < stop_thresh) {
   1583           do_split = 0;
   1584           do_rect = 0;
   1585         }
   1586       }
   1587     }
   1588     restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   1589   }
   1590 
   1591   // store estimated motion vector
   1592   if (cpi->sf.adaptive_motion_search)
   1593     store_pred_mv(x, get_block_context(x, bsize));
   1594 
   1595   // PARTITION_SPLIT
   1596   sum_rd = 0;
   1597   // TODO(jingning): use the motion vectors given by the above search as
   1598   // the starting point of motion search in the following partition type check.
   1599   if (do_split) {
   1600     subsize = get_subsize(bsize, PARTITION_SPLIT);
   1601     for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
   1602       const int x_idx = (i & 1) * ms;
   1603       const int y_idx = (i >> 1) * ms;
   1604 
   1605       if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
   1606         continue;
   1607 
   1608       *get_sb_index(x, subsize) = i;
   1609       if (cpi->sf.adaptive_motion_search)
   1610         load_pred_mv(x, get_block_context(x, bsize));
   1611       rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, subsize,
   1612                         &this_rate, &this_dist, i != 3, best_rd - sum_rd);
   1613 
   1614       if (this_rate == INT_MAX) {
   1615         sum_rd = INT64_MAX;
   1616       } else {
   1617         sum_rate += this_rate;
   1618         sum_dist += this_dist;
   1619         sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
   1620       }
   1621     }
   1622     if (sum_rd < best_rd && i == 4) {
   1623       pl = partition_plane_context(cpi->above_seg_context,
   1624                                    cpi->left_seg_context,
   1625                                    mi_row, mi_col, bsize);
   1626       sum_rate += x->partition_cost[pl][PARTITION_SPLIT];
   1627       sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
   1628       if (sum_rd < best_rd) {
   1629         best_rate = sum_rate;
   1630         best_dist = sum_dist;
   1631         best_rd = sum_rd;
   1632         *(get_sb_partitioning(x, bsize)) = subsize;
   1633       }
   1634     } else {
   1635       // skip rectangular partition test when larger block size
   1636       // gives better rd cost
   1637       if (cpi->sf.less_rectangular_check)
   1638         do_rect &= !partition_none_allowed;
   1639     }
   1640     partition_split_done = 1;
   1641     restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   1642   }
   1643 
   1644   x->fast_ms = 0;
   1645   x->subblock_ref = 0;
   1646 
   1647   if (partition_split_done &&
   1648       cpi->sf.using_small_partition_info) {
   1649     compute_fast_motion_search_level(cpi, bsize);
   1650   }
   1651 
   1652   // PARTITION_HORZ
   1653   if (partition_horz_allowed && do_rect) {
   1654     subsize = get_subsize(bsize, PARTITION_HORZ);
   1655     *get_sb_index(x, subsize) = 0;
   1656     if (cpi->sf.adaptive_motion_search)
   1657       load_pred_mv(x, get_block_context(x, bsize));
   1658     pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
   1659                   get_block_context(x, subsize), best_rd);
   1660     sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
   1661 
   1662     if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) {
   1663       update_state(cpi, get_block_context(x, subsize), subsize, 0);
   1664       encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
   1665 
   1666       *get_sb_index(x, subsize) = 1;
   1667       if (cpi->sf.adaptive_motion_search)
   1668         load_pred_mv(x, get_block_context(x, bsize));
   1669       pick_sb_modes(cpi, tile, mi_row + ms, mi_col, &this_rate,
   1670                     &this_dist, subsize, get_block_context(x, subsize),
   1671                     best_rd - sum_rd);
   1672       if (this_rate == INT_MAX) {
   1673         sum_rd = INT64_MAX;
   1674       } else {
   1675         sum_rate += this_rate;
   1676         sum_dist += this_dist;
   1677         sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
   1678       }
   1679     }
   1680     if (sum_rd < best_rd) {
   1681       pl = partition_plane_context(cpi->above_seg_context,
   1682                                    cpi->left_seg_context,
   1683                                    mi_row, mi_col, bsize);
   1684       sum_rate += x->partition_cost[pl][PARTITION_HORZ];
   1685       sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
   1686       if (sum_rd < best_rd) {
   1687         best_rd = sum_rd;
   1688         best_rate = sum_rate;
   1689         best_dist = sum_dist;
   1690         *(get_sb_partitioning(x, bsize)) = subsize;
   1691       }
   1692     }
   1693     restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   1694   }
   1695 
   1696   // PARTITION_VERT
   1697   if (partition_vert_allowed && do_rect) {
   1698     subsize = get_subsize(bsize, PARTITION_VERT);
   1699 
   1700     *get_sb_index(x, subsize) = 0;
   1701     if (cpi->sf.adaptive_motion_search)
   1702       load_pred_mv(x, get_block_context(x, bsize));
   1703     pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
   1704                   get_block_context(x, subsize), best_rd);
   1705     sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
   1706     if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) {
   1707       update_state(cpi, get_block_context(x, subsize), subsize, 0);
   1708       encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
   1709 
   1710       *get_sb_index(x, subsize) = 1;
   1711       if (cpi->sf.adaptive_motion_search)
   1712         load_pred_mv(x, get_block_context(x, bsize));
   1713       pick_sb_modes(cpi, tile, mi_row, mi_col + ms, &this_rate,
   1714                     &this_dist, subsize, get_block_context(x, subsize),
   1715                     best_rd - sum_rd);
   1716       if (this_rate == INT_MAX) {
   1717         sum_rd = INT64_MAX;
   1718       } else {
   1719         sum_rate += this_rate;
   1720         sum_dist += this_dist;
   1721         sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
   1722       }
   1723     }
   1724     if (sum_rd < best_rd) {
   1725       pl = partition_plane_context(cpi->above_seg_context,
   1726                                    cpi->left_seg_context,
   1727                                    mi_row, mi_col, bsize);
   1728       sum_rate += x->partition_cost[pl][PARTITION_VERT];
   1729       sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
   1730       if (sum_rd < best_rd) {
   1731         best_rate = sum_rate;
   1732         best_dist = sum_dist;
   1733         best_rd = sum_rd;
   1734         *(get_sb_partitioning(x, bsize)) = subsize;
   1735       }
   1736     }
   1737     restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   1738   }
   1739 
   1740 
   1741   *rate = best_rate;
   1742   *dist = best_dist;
   1743 
   1744   if (best_rate < INT_MAX && best_dist < INT64_MAX && do_recon)
   1745     encode_sb(cpi, tile, tp, mi_row, mi_col, bsize == BLOCK_64X64, bsize);
   1746   if (bsize == BLOCK_64X64) {
   1747     assert(tp_orig < *tp);
   1748     assert(best_rate < INT_MAX);
   1749     assert(best_dist < INT_MAX);
   1750   } else {
   1751     assert(tp_orig == *tp);
   1752   }
   1753 }
   1754 
   1755 // Examines 64x64 block and chooses a best reference frame
   1756 static void rd_pick_reference_frame(VP9_COMP *cpi, const TileInfo *const tile,
   1757                                     int mi_row, int mi_col) {
   1758   VP9_COMMON * const cm = &cpi->common;
   1759   MACROBLOCK * const x = &cpi->mb;
   1760   int bsl = b_width_log2(BLOCK_64X64), bs = 1 << bsl;
   1761   int ms = bs / 2;
   1762   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
   1763   PARTITION_CONTEXT sl[8], sa[8];
   1764   int pl;
   1765   int r;
   1766   int64_t d;
   1767 
   1768   save_context(cpi, mi_row, mi_col, a, l, sa, sl, BLOCK_64X64);
   1769 
   1770   // Default is non mask (all reference frames allowed.
   1771   cpi->ref_frame_mask = 0;
   1772 
   1773   // Do RD search for 64x64.
   1774   if ((mi_row + (ms >> 1) < cm->mi_rows) &&
   1775       (mi_col + (ms >> 1) < cm->mi_cols)) {
   1776     cpi->set_ref_frame_mask = 1;
   1777     pick_sb_modes(cpi, tile, mi_row, mi_col, &r, &d, BLOCK_64X64,
   1778                   get_block_context(x, BLOCK_64X64), INT64_MAX);
   1779     pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
   1780                                  mi_row, mi_col, BLOCK_64X64);
   1781     r += x->partition_cost[pl][PARTITION_NONE];
   1782 
   1783     *(get_sb_partitioning(x, BLOCK_64X64)) = BLOCK_64X64;
   1784     cpi->set_ref_frame_mask = 0;
   1785   }
   1786 
   1787   restore_context(cpi, mi_row, mi_col, a, l, sa, sl, BLOCK_64X64);
   1788 }
   1789 
   1790 static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
   1791                           int mi_row, TOKENEXTRA **tp) {
   1792   VP9_COMMON * const cm = &cpi->common;
   1793   int mi_col;
   1794 
   1795   // Initialize the left context for the new SB row
   1796   vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context));
   1797   vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context));
   1798 
   1799   // Code each SB in the row
   1800   for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
   1801        mi_col += MI_BLOCK_SIZE) {
   1802     int dummy_rate;
   1803     int64_t dummy_dist;
   1804 
   1805     vp9_zero(cpi->mb.pred_mv);
   1806 
   1807     if (cpi->sf.reference_masking)
   1808       rd_pick_reference_frame(cpi, tile, mi_row, mi_col);
   1809 
   1810     if (cpi->sf.use_lastframe_partitioning ||
   1811         cpi->sf.use_one_partition_size_always ) {
   1812       const int idx_str = cm->mode_info_stride * mi_row + mi_col;
   1813       MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
   1814       MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
   1815 
   1816       cpi->mb.source_variance = UINT_MAX;
   1817       if (cpi->sf.use_one_partition_size_always) {
   1818         set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
   1819         set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col);
   1820         rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
   1821                          &dummy_rate, &dummy_dist, 1);
   1822       } else {
   1823         if ((cpi->common.current_video_frame
   1824             % cpi->sf.last_partitioning_redo_frequency) == 0
   1825             || cm->prev_mi == 0
   1826             || cpi->common.show_frame == 0
   1827             || cpi->common.frame_type == KEY_FRAME
   1828             || cpi->is_src_frame_alt_ref
   1829             || ((cpi->sf.use_lastframe_partitioning ==
   1830                  LAST_FRAME_PARTITION_LOW_MOTION) &&
   1831                  sb_has_motion(cpi, prev_mi_8x8))) {
   1832           // If required set upper and lower partition size limits
   1833           if (cpi->sf.auto_min_max_partition_size) {
   1834             set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
   1835             rd_auto_partition_range(cpi, tile, mi_row, mi_col,
   1836                                     &cpi->sf.min_partition_size,
   1837                                     &cpi->sf.max_partition_size);
   1838           }
   1839           rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
   1840                             &dummy_rate, &dummy_dist, 1, INT64_MAX);
   1841         } else {
   1842           copy_partitioning(cpi, mi_8x8, prev_mi_8x8);
   1843           rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
   1844                            &dummy_rate, &dummy_dist, 1);
   1845         }
   1846       }
   1847     } else {
   1848       // If required set upper and lower partition size limits
   1849       if (cpi->sf.auto_min_max_partition_size) {
   1850         set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
   1851         rd_auto_partition_range(cpi, tile, mi_row, mi_col,
   1852                                 &cpi->sf.min_partition_size,
   1853                                 &cpi->sf.max_partition_size);
   1854       }
   1855       rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
   1856                         &dummy_rate, &dummy_dist, 1, INT64_MAX);
   1857     }
   1858   }
   1859 }
   1860 
   1861 static void init_encode_frame_mb_context(VP9_COMP *cpi) {
   1862   MACROBLOCK *const x = &cpi->mb;
   1863   VP9_COMMON *const cm = &cpi->common;
   1864   MACROBLOCKD *const xd = &x->e_mbd;
   1865   const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
   1866 
   1867   x->act_zbin_adj = 0;
   1868   cpi->seg0_idx = 0;
   1869 
   1870   xd->mode_info_stride = cm->mode_info_stride;
   1871 
   1872   // reset intra mode contexts
   1873   if (frame_is_intra_only(cm))
   1874     vp9_init_mbmode_probs(cm);
   1875 
   1876   // Copy data over into macro block data structures.
   1877   vp9_setup_src_planes(x, cpi->Source, 0, 0);
   1878 
   1879   // TODO(jkoleszar): are these initializations required?
   1880   setup_pre_planes(xd, 0, &cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]],
   1881                    0, 0, NULL);
   1882   setup_dst_planes(xd, get_frame_new_buffer(cm), 0, 0);
   1883 
   1884   setup_block_dptrs(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
   1885 
   1886   xd->mi_8x8[0]->mbmi.mode = DC_PRED;
   1887   xd->mi_8x8[0]->mbmi.uv_mode = DC_PRED;
   1888 
   1889   vp9_zero(cpi->y_mode_count);
   1890   vp9_zero(cpi->y_uv_mode_count);
   1891   vp9_zero(cm->counts.inter_mode);
   1892   vp9_zero(cpi->partition_count);
   1893   vp9_zero(cpi->intra_inter_count);
   1894   vp9_zero(cpi->comp_inter_count);
   1895   vp9_zero(cpi->single_ref_count);
   1896   vp9_zero(cpi->comp_ref_count);
   1897   vp9_zero(cm->counts.tx);
   1898   vp9_zero(cm->counts.mbskip);
   1899 
   1900   // Note: this memset assumes above_context[0], [1] and [2]
   1901   // are allocated as part of the same buffer.
   1902   vpx_memset(cpi->above_context[0], 0,
   1903              sizeof(*cpi->above_context[0]) *
   1904              2 * aligned_mi_cols * MAX_MB_PLANE);
   1905   vpx_memset(cpi->above_seg_context, 0,
   1906              sizeof(*cpi->above_seg_context) * aligned_mi_cols);
   1907 }
   1908 
   1909 static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
   1910   if (lossless) {
   1911     // printf("Switching to lossless\n");
   1912     cpi->mb.fwd_txm4x4 = vp9_fwht4x4;
   1913     cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add;
   1914     cpi->mb.optimize = 0;
   1915     cpi->common.lf.filter_level = 0;
   1916     cpi->zbin_mode_boost_enabled = 0;
   1917     cpi->common.tx_mode = ONLY_4X4;
   1918   } else {
   1919     // printf("Not lossless\n");
   1920     cpi->mb.fwd_txm4x4 = vp9_fdct4x4;
   1921     cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add;
   1922   }
   1923 }
   1924 
   1925 static void switch_tx_mode(VP9_COMP *cpi) {
   1926   if (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
   1927       cpi->common.tx_mode >= ALLOW_32X32)
   1928     cpi->common.tx_mode = ALLOW_32X32;
   1929 }
   1930 
   1931 static void encode_frame_internal(VP9_COMP *cpi) {
   1932   int mi_row;
   1933   MACROBLOCK * const x = &cpi->mb;
   1934   VP9_COMMON * const cm = &cpi->common;
   1935   MACROBLOCKD * const xd = &x->e_mbd;
   1936 
   1937 //  fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
   1938 //           cpi->common.current_video_frame, cpi->common.show_frame,
   1939 //           cm->frame_type);
   1940 
   1941 // debug output
   1942 #if DBG_PRNT_SEGMAP
   1943   {
   1944     FILE *statsfile;
   1945     statsfile = fopen("segmap2.stt", "a");
   1946     fprintf(statsfile, "\n");
   1947     fclose(statsfile);
   1948   }
   1949 #endif
   1950 
   1951   vp9_zero(cm->counts.switchable_interp);
   1952   vp9_zero(cpi->tx_stepdown_count);
   1953 
   1954   xd->mi_8x8 = cm->mi_grid_visible;
   1955   // required for vp9_frame_init_quantizer
   1956   xd->mi_8x8[0] = cm->mi;
   1957 
   1958   xd->last_mi = cm->prev_mi;
   1959 
   1960   vp9_zero(cpi->NMVcount);
   1961   vp9_zero(cpi->coef_counts);
   1962   vp9_zero(cm->counts.eob_branch);
   1963 
   1964   cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0
   1965       && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
   1966   switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless);
   1967 
   1968   vp9_frame_init_quantizer(cpi);
   1969 
   1970   vp9_initialize_rd_consts(cpi);
   1971   vp9_initialize_me_consts(cpi, cm->base_qindex);
   1972   switch_tx_mode(cpi);
   1973 
   1974   if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
   1975     // Initialize encode frame context.
   1976     init_encode_frame_mb_context(cpi);
   1977 
   1978     // Build a frame level activity map
   1979     build_activity_map(cpi);
   1980   }
   1981 
   1982   // Re-initialize encode frame context.
   1983   init_encode_frame_mb_context(cpi);
   1984 
   1985   vp9_zero(cpi->rd_comp_pred_diff);
   1986   vp9_zero(cpi->rd_filter_diff);
   1987   vp9_zero(cpi->rd_tx_select_diff);
   1988   vp9_zero(cpi->rd_tx_select_threshes);
   1989 
   1990   set_prev_mi(cm);
   1991 
   1992   {
   1993     struct vpx_usec_timer emr_timer;
   1994     vpx_usec_timer_start(&emr_timer);
   1995 
   1996     {
   1997       // Take tiles into account and give start/end MB
   1998       int tile_col, tile_row;
   1999       TOKENEXTRA *tp = cpi->tok;
   2000       const int tile_cols = 1 << cm->log2_tile_cols;
   2001       const int tile_rows = 1 << cm->log2_tile_rows;
   2002 
   2003       for (tile_row = 0; tile_row < tile_rows; tile_row++) {
   2004         for (tile_col = 0; tile_col < tile_cols; tile_col++) {
   2005           TileInfo tile;
   2006           TOKENEXTRA *tp_old = tp;
   2007 
   2008           // For each row of SBs in the frame
   2009           vp9_tile_init(&tile, cm, tile_row, tile_col);
   2010           for (mi_row = tile.mi_row_start;
   2011                mi_row < tile.mi_row_end; mi_row += 8)
   2012             encode_sb_row(cpi, &tile, mi_row, &tp);
   2013 
   2014           cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
   2015           assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
   2016         }
   2017       }
   2018     }
   2019 
   2020     vpx_usec_timer_mark(&emr_timer);
   2021     cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
   2022   }
   2023 
   2024   if (cpi->sf.skip_encode_sb) {
   2025     int j;
   2026     unsigned int intra_count = 0, inter_count = 0;
   2027     for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
   2028       intra_count += cpi->intra_inter_count[j][0];
   2029       inter_count += cpi->intra_inter_count[j][1];
   2030     }
   2031     cpi->sf.skip_encode_frame = ((intra_count << 2) < inter_count);
   2032     cpi->sf.skip_encode_frame &= (cm->frame_type != KEY_FRAME);
   2033     cpi->sf.skip_encode_frame &= cm->show_frame;
   2034   } else {
   2035     cpi->sf.skip_encode_frame = 0;
   2036   }
   2037 
   2038 #if 0
   2039   // Keep record of the total distortion this time around for future use
   2040   cpi->last_frame_distortion = cpi->frame_distortion;
   2041 #endif
   2042 }
   2043 
   2044 static int check_dual_ref_flags(VP9_COMP *cpi) {
   2045   const int ref_flags = cpi->ref_frame_flags;
   2046 
   2047   if (vp9_segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) {
   2048     return 0;
   2049   } else {
   2050     return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG)
   2051         + !!(ref_flags & VP9_ALT_FLAG)) >= 2;
   2052   }
   2053 }
   2054 
   2055 static int get_skip_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs) {
   2056   int x, y;
   2057 
   2058   for (y = 0; y < ymbs; y++) {
   2059     for (x = 0; x < xmbs; x++) {
   2060       if (!mi_8x8[y * mis + x]->mbmi.skip_coeff)
   2061         return 0;
   2062     }
   2063   }
   2064 
   2065   return 1;
   2066 }
   2067 
   2068 static void set_txfm_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs,
   2069                           TX_SIZE tx_size) {
   2070   int x, y;
   2071 
   2072   for (y = 0; y < ymbs; y++) {
   2073     for (x = 0; x < xmbs; x++)
   2074       mi_8x8[y * mis + x]->mbmi.tx_size = tx_size;
   2075   }
   2076 }
   2077 
   2078 static void reset_skip_txfm_size_b(VP9_COMP *cpi, MODE_INFO **mi_8x8,
   2079                                    int mis, TX_SIZE max_tx_size, int bw, int bh,
   2080                                    int mi_row, int mi_col, BLOCK_SIZE bsize) {
   2081   VP9_COMMON * const cm = &cpi->common;
   2082 
   2083   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) {
   2084     return;
   2085   } else {
   2086     MB_MODE_INFO * const mbmi = &mi_8x8[0]->mbmi;
   2087     if (mbmi->tx_size > max_tx_size) {
   2088       const int ymbs = MIN(bh, cm->mi_rows - mi_row);
   2089       const int xmbs = MIN(bw, cm->mi_cols - mi_col);
   2090 
   2091       assert(vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) ||
   2092              get_skip_flag(mi_8x8, mis, ymbs, xmbs));
   2093       set_txfm_flag(mi_8x8, mis, ymbs, xmbs, max_tx_size);
   2094     }
   2095   }
   2096 }
   2097 
   2098 static void reset_skip_txfm_size_sb(VP9_COMP *cpi, MODE_INFO **mi_8x8,
   2099                                     TX_SIZE max_tx_size, int mi_row, int mi_col,
   2100                                     BLOCK_SIZE bsize) {
   2101   VP9_COMMON * const cm = &cpi->common;
   2102   const int mis = cm->mode_info_stride;
   2103   int bw, bh;
   2104   const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2;
   2105 
   2106   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
   2107     return;
   2108 
   2109   bw = num_8x8_blocks_wide_lookup[mi_8x8[0]->mbmi.sb_type];
   2110   bh = num_8x8_blocks_high_lookup[mi_8x8[0]->mbmi.sb_type];
   2111 
   2112   if (bw == bs && bh == bs) {
   2113     reset_skip_txfm_size_b(cpi, mi_8x8, mis, max_tx_size, bs, bs, mi_row,
   2114                            mi_col, bsize);
   2115   } else if (bw == bs && bh < bs) {
   2116     reset_skip_txfm_size_b(cpi, mi_8x8, mis, max_tx_size, bs, hbs, mi_row,
   2117                            mi_col, bsize);
   2118     reset_skip_txfm_size_b(cpi, mi_8x8 + hbs * mis, mis, max_tx_size, bs, hbs,
   2119                            mi_row + hbs, mi_col, bsize);
   2120   } else if (bw < bs && bh == bs) {
   2121     reset_skip_txfm_size_b(cpi, mi_8x8, mis, max_tx_size, hbs, bs, mi_row,
   2122                            mi_col, bsize);
   2123     reset_skip_txfm_size_b(cpi, mi_8x8 + hbs, mis, max_tx_size, hbs, bs, mi_row,
   2124                            mi_col + hbs, bsize);
   2125 
   2126   } else {
   2127     const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize];
   2128     int n;
   2129 
   2130     assert(bw < bs && bh < bs);
   2131 
   2132     for (n = 0; n < 4; n++) {
   2133       const int mi_dc = hbs * (n & 1);
   2134       const int mi_dr = hbs * (n >> 1);
   2135 
   2136       reset_skip_txfm_size_sb(cpi, &mi_8x8[mi_dr * mis + mi_dc], max_tx_size,
   2137                               mi_row + mi_dr, mi_col + mi_dc, subsize);
   2138     }
   2139   }
   2140 }
   2141 
   2142 static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) {
   2143   VP9_COMMON * const cm = &cpi->common;
   2144   int mi_row, mi_col;
   2145   const int mis = cm->mode_info_stride;
   2146 //  MODE_INFO *mi, *mi_ptr = cm->mi;
   2147   MODE_INFO **mi_8x8, **mi_ptr = cm->mi_grid_visible;
   2148 
   2149   for (mi_row = 0; mi_row < cm->mi_rows; mi_row += 8, mi_ptr += 8 * mis) {
   2150     mi_8x8 = mi_ptr;
   2151     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += 8, mi_8x8 += 8) {
   2152       reset_skip_txfm_size_sb(cpi, mi_8x8, txfm_max, mi_row, mi_col,
   2153                               BLOCK_64X64);
   2154     }
   2155   }
   2156 }
   2157 
   2158 static int get_frame_type(VP9_COMP *cpi) {
   2159   int frame_type;
   2160   if (frame_is_intra_only(&cpi->common))
   2161     frame_type = 0;
   2162   else if (cpi->is_src_frame_alt_ref && cpi->refresh_golden_frame)
   2163     frame_type = 3;
   2164   else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
   2165     frame_type = 1;
   2166   else
   2167     frame_type = 2;
   2168   return frame_type;
   2169 }
   2170 
   2171 static void select_tx_mode(VP9_COMP *cpi) {
   2172   if (cpi->oxcf.lossless) {
   2173     cpi->common.tx_mode = ONLY_4X4;
   2174   } else if (cpi->common.current_video_frame == 0) {
   2175     cpi->common.tx_mode = TX_MODE_SELECT;
   2176   } else {
   2177     if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
   2178       cpi->common.tx_mode = ALLOW_32X32;
   2179     } else if (cpi->sf.tx_size_search_method == USE_FULL_RD) {
   2180       int frame_type = get_frame_type(cpi);
   2181       cpi->common.tx_mode =
   2182           cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32]
   2183           > cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ?
   2184           ALLOW_32X32 : TX_MODE_SELECT;
   2185     } else {
   2186       unsigned int total = 0;
   2187       int i;
   2188       for (i = 0; i < TX_SIZES; ++i)
   2189         total += cpi->tx_stepdown_count[i];
   2190       if (total) {
   2191         double fraction = (double)cpi->tx_stepdown_count[0] / total;
   2192         cpi->common.tx_mode = fraction > 0.90 ? ALLOW_32X32 : TX_MODE_SELECT;
   2193         // printf("fraction = %f\n", fraction);
   2194       }  // else keep unchanged
   2195     }
   2196   }
   2197 }
   2198 
   2199 void vp9_encode_frame(VP9_COMP *cpi) {
   2200   VP9_COMMON * const cm = &cpi->common;
   2201 
   2202   // In the longer term the encoder should be generalized to match the
   2203   // decoder such that we allow compound where one of the 3 buffers has a
   2204   // different sign bias and that buffer is then the fixed ref. However, this
   2205   // requires further work in the rd loop. For now the only supported encoder
   2206   // side behavior is where the ALT ref buffer has opposite sign bias to
   2207   // the other two.
   2208   if (!frame_is_intra_only(cm)) {
   2209     if ((cm->ref_frame_sign_bias[ALTREF_FRAME]
   2210          == cm->ref_frame_sign_bias[GOLDEN_FRAME])
   2211         || (cm->ref_frame_sign_bias[ALTREF_FRAME]
   2212             == cm->ref_frame_sign_bias[LAST_FRAME])) {
   2213       cm->allow_comp_inter_inter = 0;
   2214     } else {
   2215       cm->allow_comp_inter_inter = 1;
   2216       cm->comp_fixed_ref = ALTREF_FRAME;
   2217       cm->comp_var_ref[0] = LAST_FRAME;
   2218       cm->comp_var_ref[1] = GOLDEN_FRAME;
   2219     }
   2220   }
   2221 
   2222   if (cpi->sf.RD) {
   2223     int i, pred_type;
   2224     INTERPOLATION_TYPE filter_type;
   2225     /*
   2226      * This code does a single RD pass over the whole frame assuming
   2227      * either compound, single or hybrid prediction as per whatever has
   2228      * worked best for that type of frame in the past.
   2229      * It also predicts whether another coding mode would have worked
   2230      * better that this coding mode. If that is the case, it remembers
   2231      * that for subsequent frames.
   2232      * It does the same analysis for transform size selection also.
   2233      */
   2234     int frame_type = get_frame_type(cpi);
   2235 
   2236     /* prediction (compound, single or hybrid) mode selection */
   2237     if (frame_type == 3 || !cm->allow_comp_inter_inter)
   2238       pred_type = SINGLE_PREDICTION_ONLY;
   2239     else if (cpi->rd_prediction_type_threshes[frame_type][1]
   2240              > cpi->rd_prediction_type_threshes[frame_type][0]
   2241              && cpi->rd_prediction_type_threshes[frame_type][1]
   2242              > cpi->rd_prediction_type_threshes[frame_type][2]
   2243              && check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100)
   2244       pred_type = COMP_PREDICTION_ONLY;
   2245     else if (cpi->rd_prediction_type_threshes[frame_type][0]
   2246              > cpi->rd_prediction_type_threshes[frame_type][2])
   2247       pred_type = SINGLE_PREDICTION_ONLY;
   2248     else
   2249       pred_type = HYBRID_PREDICTION;
   2250 
   2251     /* filter type selection */
   2252     // FIXME(rbultje) for some odd reason, we often select smooth_filter
   2253     // as default filter for ARF overlay frames. This is a REALLY BAD
   2254     // IDEA so we explicitly disable it here.
   2255     if (frame_type != 3 &&
   2256         cpi->rd_filter_threshes[frame_type][1] >
   2257             cpi->rd_filter_threshes[frame_type][0] &&
   2258         cpi->rd_filter_threshes[frame_type][1] >
   2259             cpi->rd_filter_threshes[frame_type][2] &&
   2260         cpi->rd_filter_threshes[frame_type][1] >
   2261             cpi->rd_filter_threshes[frame_type][SWITCHABLE_FILTERS]) {
   2262       filter_type = EIGHTTAP_SMOOTH;
   2263     } else if (cpi->rd_filter_threshes[frame_type][2] >
   2264             cpi->rd_filter_threshes[frame_type][0] &&
   2265         cpi->rd_filter_threshes[frame_type][2] >
   2266             cpi->rd_filter_threshes[frame_type][SWITCHABLE_FILTERS]) {
   2267       filter_type = EIGHTTAP_SHARP;
   2268     } else if (cpi->rd_filter_threshes[frame_type][0] >
   2269                   cpi->rd_filter_threshes[frame_type][SWITCHABLE_FILTERS]) {
   2270       filter_type = EIGHTTAP;
   2271     } else {
   2272       filter_type = SWITCHABLE;
   2273     }
   2274 
   2275     cpi->mb.e_mbd.lossless = 0;
   2276     if (cpi->oxcf.lossless) {
   2277       cpi->mb.e_mbd.lossless = 1;
   2278     }
   2279 
   2280     /* transform size selection (4x4, 8x8, 16x16 or select-per-mb) */
   2281     select_tx_mode(cpi);
   2282     cpi->common.comp_pred_mode = pred_type;
   2283     cpi->common.mcomp_filter_type = filter_type;
   2284     encode_frame_internal(cpi);
   2285 
   2286     for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
   2287       const int diff = (int) (cpi->rd_comp_pred_diff[i] / cpi->common.MBs);
   2288       cpi->rd_prediction_type_threshes[frame_type][i] += diff;
   2289       cpi->rd_prediction_type_threshes[frame_type][i] >>= 1;
   2290     }
   2291 
   2292     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
   2293       const int64_t diff = cpi->rd_filter_diff[i] / cpi->common.MBs;
   2294       cpi->rd_filter_threshes[frame_type][i] =
   2295           (cpi->rd_filter_threshes[frame_type][i] + diff) / 2;
   2296     }
   2297 
   2298     for (i = 0; i < TX_MODES; ++i) {
   2299       int64_t pd = cpi->rd_tx_select_diff[i];
   2300       int diff;
   2301       if (i == TX_MODE_SELECT)
   2302         pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv,
   2303                      2048 * (TX_SIZES - 1), 0);
   2304       diff = (int) (pd / cpi->common.MBs);
   2305       cpi->rd_tx_select_threshes[frame_type][i] += diff;
   2306       cpi->rd_tx_select_threshes[frame_type][i] /= 2;
   2307     }
   2308 
   2309     if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
   2310       int single_count_zero = 0;
   2311       int comp_count_zero = 0;
   2312 
   2313       for (i = 0; i < COMP_INTER_CONTEXTS; i++) {
   2314         single_count_zero += cpi->comp_inter_count[i][0];
   2315         comp_count_zero += cpi->comp_inter_count[i][1];
   2316       }
   2317 
   2318       if (comp_count_zero == 0) {
   2319         cpi->common.comp_pred_mode = SINGLE_PREDICTION_ONLY;
   2320         vp9_zero(cpi->comp_inter_count);
   2321       } else if (single_count_zero == 0) {
   2322         cpi->common.comp_pred_mode = COMP_PREDICTION_ONLY;
   2323         vp9_zero(cpi->comp_inter_count);
   2324       }
   2325     }
   2326 
   2327     if (cpi->common.tx_mode == TX_MODE_SELECT) {
   2328       int count4x4 = 0;
   2329       int count8x8_lp = 0, count8x8_8x8p = 0;
   2330       int count16x16_16x16p = 0, count16x16_lp = 0;
   2331       int count32x32 = 0;
   2332 
   2333       for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
   2334         count4x4 += cm->counts.tx.p32x32[i][TX_4X4];
   2335         count4x4 += cm->counts.tx.p16x16[i][TX_4X4];
   2336         count4x4 += cm->counts.tx.p8x8[i][TX_4X4];
   2337 
   2338         count8x8_lp += cm->counts.tx.p32x32[i][TX_8X8];
   2339         count8x8_lp += cm->counts.tx.p16x16[i][TX_8X8];
   2340         count8x8_8x8p += cm->counts.tx.p8x8[i][TX_8X8];
   2341 
   2342         count16x16_16x16p += cm->counts.tx.p16x16[i][TX_16X16];
   2343         count16x16_lp += cm->counts.tx.p32x32[i][TX_16X16];
   2344         count32x32 += cm->counts.tx.p32x32[i][TX_32X32];
   2345       }
   2346 
   2347       if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0
   2348           && count32x32 == 0) {
   2349         cpi->common.tx_mode = ALLOW_8X8;
   2350         reset_skip_txfm_size(cpi, TX_8X8);
   2351       } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0
   2352                  && count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) {
   2353         cpi->common.tx_mode = ONLY_4X4;
   2354         reset_skip_txfm_size(cpi, TX_4X4);
   2355       } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) {
   2356         cpi->common.tx_mode = ALLOW_32X32;
   2357       } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) {
   2358         cpi->common.tx_mode = ALLOW_16X16;
   2359         reset_skip_txfm_size(cpi, TX_16X16);
   2360       }
   2361     }
   2362   } else {
   2363     encode_frame_internal(cpi);
   2364   }
   2365 }
   2366 
   2367 static void sum_intra_stats(VP9_COMP *cpi, const MODE_INFO *mi) {
   2368   const MB_PREDICTION_MODE y_mode = mi->mbmi.mode;
   2369   const MB_PREDICTION_MODE uv_mode = mi->mbmi.uv_mode;
   2370   const BLOCK_SIZE bsize = mi->mbmi.sb_type;
   2371 
   2372   ++cpi->y_uv_mode_count[y_mode][uv_mode];
   2373 
   2374   if (bsize < BLOCK_8X8) {
   2375     int idx, idy;
   2376     const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
   2377     const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
   2378     for (idy = 0; idy < 2; idy += num_4x4_blocks_high)
   2379       for (idx = 0; idx < 2; idx += num_4x4_blocks_wide)
   2380         ++cpi->y_mode_count[0][mi->bmi[idy * 2 + idx].as_mode];
   2381   } else {
   2382     ++cpi->y_mode_count[size_group_lookup[bsize]][y_mode];
   2383   }
   2384 }
   2385 
   2386 // Experimental stub function to create a per MB zbin adjustment based on
   2387 // some previously calculated measure of MB activity.
   2388 static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x) {
   2389 #if USE_ACT_INDEX
   2390   x->act_zbin_adj = *(x->mb_activity_ptr);
   2391 #else
   2392   int64_t a;
   2393   int64_t b;
   2394   int64_t act = *(x->mb_activity_ptr);
   2395 
   2396   // Apply the masking to the RD multiplier.
   2397   a = act + 4 * cpi->activity_avg;
   2398   b = 4 * act + cpi->activity_avg;
   2399 
   2400   if (act > cpi->activity_avg)
   2401     x->act_zbin_adj = (int) (((int64_t) b + (a >> 1)) / a) - 1;
   2402   else
   2403     x->act_zbin_adj = 1 - (int) (((int64_t) a + (b >> 1)) / b);
   2404 #endif
   2405 }
   2406 static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
   2407                               int mi_row, int mi_col, BLOCK_SIZE bsize) {
   2408   VP9_COMMON * const cm = &cpi->common;
   2409   MACROBLOCK * const x = &cpi->mb;
   2410   MACROBLOCKD * const xd = &x->e_mbd;
   2411   MODE_INFO **mi_8x8 = xd->mi_8x8;
   2412   MODE_INFO *mi = mi_8x8[0];
   2413   MB_MODE_INFO *mbmi = &mi->mbmi;
   2414   PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize);
   2415   unsigned int segment_id = mbmi->segment_id;
   2416   const int mis = cm->mode_info_stride;
   2417   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
   2418   const int mi_height = num_8x8_blocks_high_lookup[bsize];
   2419   x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8;
   2420   x->skip_optimize = ctx->is_coded;
   2421   ctx->is_coded = 1;
   2422   x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
   2423   x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame &&
   2424                     x->q_index < QIDX_SKIP_THRESH);
   2425   if (x->skip_encode)
   2426     return;
   2427 
   2428   if (cm->frame_type == KEY_FRAME) {
   2429     if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
   2430       adjust_act_zbin(cpi, x);
   2431       vp9_update_zbin_extra(cpi, x);
   2432     }
   2433   } else {
   2434     vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
   2435 
   2436     if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
   2437       // Adjust the zbin based on this MB rate.
   2438       adjust_act_zbin(cpi, x);
   2439     }
   2440 
   2441     // Experimental code. Special case for gf and arf zeromv modes.
   2442     // Increase zbin size to suppress noise
   2443     cpi->zbin_mode_boost = 0;
   2444     if (cpi->zbin_mode_boost_enabled) {
   2445       if (is_inter_block(mbmi)) {
   2446         if (mbmi->mode == ZEROMV) {
   2447           if (mbmi->ref_frame[0] != LAST_FRAME)
   2448             cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
   2449           else
   2450             cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
   2451         } else if (mbmi->sb_type < BLOCK_8X8) {
   2452           cpi->zbin_mode_boost = SPLIT_MV_ZBIN_BOOST;
   2453         } else {
   2454           cpi->zbin_mode_boost = MV_ZBIN_BOOST;
   2455         }
   2456       } else {
   2457         cpi->zbin_mode_boost = INTRA_ZBIN_BOOST;
   2458       }
   2459     }
   2460 
   2461     vp9_update_zbin_extra(cpi, x);
   2462   }
   2463 
   2464   if (!is_inter_block(mbmi)) {
   2465     vp9_encode_intra_block_y(x, MAX(bsize, BLOCK_8X8));
   2466     vp9_encode_intra_block_uv(x, MAX(bsize, BLOCK_8X8));
   2467     if (output_enabled)
   2468       sum_intra_stats(cpi, mi);
   2469   } else {
   2470     int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, mbmi->ref_frame[0])];
   2471     YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[idx];
   2472     YV12_BUFFER_CONFIG *second_ref_fb = NULL;
   2473     if (has_second_ref(mbmi)) {
   2474       idx = cm->ref_frame_map[get_ref_frame_idx(cpi, mbmi->ref_frame[1])];
   2475       second_ref_fb = &cm->yv12_fb[idx];
   2476     }
   2477 
   2478     assert(cm->frame_type != KEY_FRAME);
   2479 
   2480     setup_pre_planes(xd, 0, ref_fb, mi_row, mi_col,
   2481                      &xd->scale_factor[0]);
   2482     setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col,
   2483                      &xd->scale_factor[1]);
   2484 
   2485     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
   2486   }
   2487 
   2488   if (!is_inter_block(mbmi)) {
   2489     vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8));
   2490   } else if (!x->skip) {
   2491     vp9_encode_sb(x, MAX(bsize, BLOCK_8X8));
   2492     vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8));
   2493   } else {
   2494     int mb_skip_context = xd->left_available ? mi_8x8[-1]->mbmi.skip_coeff : 0;
   2495     mb_skip_context += mi_8x8[-mis] ? mi_8x8[-mis]->mbmi.skip_coeff : 0;
   2496 
   2497     mbmi->skip_coeff = 1;
   2498     if (output_enabled)
   2499       cm->counts.mbskip[mb_skip_context][1]++;
   2500     reset_skip_context(xd, MAX(bsize, BLOCK_8X8));
   2501   }
   2502 
   2503   if (output_enabled) {
   2504     if (cm->tx_mode == TX_MODE_SELECT &&
   2505         mbmi->sb_type >= BLOCK_8X8  &&
   2506         !(is_inter_block(mbmi) &&
   2507             (mbmi->skip_coeff ||
   2508              vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)))) {
   2509       const uint8_t context = vp9_get_pred_context_tx_size(xd);
   2510       ++get_tx_counts(max_txsize_lookup[bsize],
   2511                       context, &cm->counts.tx)[mbmi->tx_size];
   2512     } else {
   2513       int x, y;
   2514       TX_SIZE sz = tx_mode_to_biggest_tx_size[cm->tx_mode];
   2515       assert(sizeof(tx_mode_to_biggest_tx_size) /
   2516              sizeof(tx_mode_to_biggest_tx_size[0]) == TX_MODES);
   2517       // The new intra coding scheme requires no change of transform size
   2518       if (is_inter_block(&mi->mbmi)) {
   2519         if (sz == TX_32X32 && bsize < BLOCK_32X32)
   2520           sz = TX_16X16;
   2521         if (sz == TX_16X16 && bsize < BLOCK_16X16)
   2522           sz = TX_8X8;
   2523         if (sz == TX_8X8 && bsize < BLOCK_8X8)
   2524           sz = TX_4X4;
   2525       } else if (bsize >= BLOCK_8X8) {
   2526         sz = mbmi->tx_size;
   2527       } else {
   2528         sz = TX_4X4;
   2529       }
   2530 
   2531       for (y = 0; y < mi_height; y++)
   2532         for (x = 0; x < mi_width; x++)
   2533           if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows)
   2534             mi_8x8[mis * y + x]->mbmi.tx_size = sz;
   2535     }
   2536   }
   2537 }
   2538