Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <limits.h>
     12 #include <math.h>
     13 #include <stdio.h>
     14 
     15 #include "./vp9_rtcd.h"
     16 #include "./vpx_dsp_rtcd.h"
     17 #include "./vpx_config.h"
     18 
     19 #include "vpx_dsp/vpx_dsp_common.h"
     20 #include "vpx_ports/mem.h"
     21 #include "vpx_ports/vpx_timer.h"
     22 #include "vpx_ports/system_state.h"
     23 
     24 #include "vp9/common/vp9_common.h"
     25 #include "vp9/common/vp9_entropy.h"
     26 #include "vp9/common/vp9_entropymode.h"
     27 #include "vp9/common/vp9_idct.h"
     28 #include "vp9/common/vp9_mvref_common.h"
     29 #include "vp9/common/vp9_pred_common.h"
     30 #include "vp9/common/vp9_quant_common.h"
     31 #include "vp9/common/vp9_reconintra.h"
     32 #include "vp9/common/vp9_reconinter.h"
     33 #include "vp9/common/vp9_seg_common.h"
     34 #include "vp9/common/vp9_tile_common.h"
     35 
     36 #include "vp9/encoder/vp9_aq_360.h"
     37 #include "vp9/encoder/vp9_aq_complexity.h"
     38 #include "vp9/encoder/vp9_aq_cyclicrefresh.h"
     39 #include "vp9/encoder/vp9_aq_variance.h"
     40 #include "vp9/encoder/vp9_encodeframe.h"
     41 #include "vp9/encoder/vp9_encodemb.h"
     42 #include "vp9/encoder/vp9_encodemv.h"
     43 #include "vp9/encoder/vp9_ethread.h"
     44 #include "vp9/encoder/vp9_extend.h"
     45 #include "vp9/encoder/vp9_multi_thread.h"
     46 #include "vp9/encoder/vp9_partition_models.h"
     47 #include "vp9/encoder/vp9_pickmode.h"
     48 #include "vp9/encoder/vp9_rd.h"
     49 #include "vp9/encoder/vp9_rdopt.h"
     50 #include "vp9/encoder/vp9_segmentation.h"
     51 #include "vp9/encoder/vp9_tokenize.h"
     52 
     53 static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
     54                               int output_enabled, int mi_row, int mi_col,
     55                               BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx);
     56 
     57 // This is used as a reference when computing the source variance for the
     58 //  purpose of activity masking.
     59 // Eventually this should be replaced by custom no-reference routines,
     60 //  which will be faster.
     61 static const uint8_t VP9_VAR_OFFS[64] = {
     62   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
     63   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
     64   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
     65   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
     66   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
     67 };
     68 
     69 #if CONFIG_VP9_HIGHBITDEPTH
     70 static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = {
     71   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
     72   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
     73   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
     74   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
     75   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
     76 };
     77 
     78 static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = {
     79   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
     80   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
     81   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
     82   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
     83   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
     84   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
     85   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
     86   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
     87 };
     88 
     89 static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = {
     90   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
     91   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
     92   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
     93   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
     94   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
     95   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
     96   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
     97   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
     98   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
     99   128 * 16
    100 };
    101 #endif  // CONFIG_VP9_HIGHBITDEPTH
    102 
    103 unsigned int vp9_get_sby_variance(VP9_COMP *cpi, const struct buf_2d *ref,
    104                                   BLOCK_SIZE bs) {
    105   unsigned int sse;
    106   const unsigned int var =
    107       cpi->fn_ptr[bs].vf(ref->buf, ref->stride, VP9_VAR_OFFS, 0, &sse);
    108   return var;
    109 }
    110 
    111 #if CONFIG_VP9_HIGHBITDEPTH
    112 unsigned int vp9_high_get_sby_variance(VP9_COMP *cpi, const struct buf_2d *ref,
    113                                        BLOCK_SIZE bs, int bd) {
    114   unsigned int var, sse;
    115   switch (bd) {
    116     case 10:
    117       var =
    118           cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
    119                              CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10), 0, &sse);
    120       break;
    121     case 12:
    122       var =
    123           cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
    124                              CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12), 0, &sse);
    125       break;
    126     case 8:
    127     default:
    128       var =
    129           cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
    130                              CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8), 0, &sse);
    131       break;
    132   }
    133   return var;
    134 }
    135 #endif  // CONFIG_VP9_HIGHBITDEPTH
    136 
    137 unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi,
    138                                            const struct buf_2d *ref,
    139                                            BLOCK_SIZE bs) {
    140   return ROUND_POWER_OF_TWO(vp9_get_sby_variance(cpi, ref, bs),
    141                             num_pels_log2_lookup[bs]);
    142 }
    143 
    144 #if CONFIG_VP9_HIGHBITDEPTH
    145 unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi,
    146                                                 const struct buf_2d *ref,
    147                                                 BLOCK_SIZE bs, int bd) {
    148   return (unsigned int)ROUND64_POWER_OF_TWO(
    149       (int64_t)vp9_high_get_sby_variance(cpi, ref, bs, bd),
    150       num_pels_log2_lookup[bs]);
    151 }
    152 #endif  // CONFIG_VP9_HIGHBITDEPTH
    153 
    154 static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,
    155                                                    const struct buf_2d *ref,
    156                                                    int mi_row, int mi_col,
    157                                                    BLOCK_SIZE bs) {
    158   unsigned int sse, var;
    159   uint8_t *last_y;
    160   const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME);
    161 
    162   assert(last != NULL);
    163   last_y =
    164       &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];
    165   var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);
    166   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
    167 }
    168 
    169 static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x,
    170                                                    int mi_row, int mi_col) {
    171   unsigned int var = get_sby_perpixel_diff_variance(
    172       cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64);
    173   if (var < 8)
    174     return BLOCK_64X64;
    175   else if (var < 128)
    176     return BLOCK_32X32;
    177   else if (var < 2048)
    178     return BLOCK_16X16;
    179   else
    180     return BLOCK_8X8;
    181 }
    182 
    183 static void set_segment_index(VP9_COMP *cpi, MACROBLOCK *const x, int mi_row,
    184                               int mi_col, BLOCK_SIZE bsize, int segment_index) {
    185   VP9_COMMON *const cm = &cpi->common;
    186   const struct segmentation *const seg = &cm->seg;
    187   MACROBLOCKD *const xd = &x->e_mbd;
    188   MODE_INFO *mi = xd->mi[0];
    189 
    190   const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
    191   const uint8_t *const map =
    192       seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
    193 
    194   // Initialize the segmentation index as 0.
    195   mi->segment_id = 0;
    196 
    197   // Skip the rest if AQ mode is disabled.
    198   if (!seg->enabled) return;
    199 
    200   switch (aq_mode) {
    201     case CYCLIC_REFRESH_AQ:
    202       mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
    203       break;
    204     case VARIANCE_AQ:
    205       if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame ||
    206           cpi->force_update_segmentation ||
    207           (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
    208         int min_energy;
    209         int max_energy;
    210         // Get sub block energy range
    211         if (bsize >= BLOCK_32X32) {
    212           vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy,
    213                                    &max_energy);
    214         } else {
    215           min_energy = bsize <= BLOCK_16X16 ? x->mb_energy
    216                                             : vp9_block_energy(cpi, x, bsize);
    217         }
    218         mi->segment_id = vp9_vaq_segment_id(min_energy);
    219       } else {
    220         mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
    221       }
    222       break;
    223     case LOOKAHEAD_AQ:
    224       mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
    225       break;
    226     case EQUATOR360_AQ:
    227       if (cm->frame_type == KEY_FRAME || cpi->force_update_segmentation)
    228         mi->segment_id = vp9_360aq_segment_id(mi_row, cm->mi_rows);
    229       else
    230         mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
    231       break;
    232     case PSNR_AQ: mi->segment_id = segment_index; break;
    233     default:
    234       // NO_AQ or PSNR_AQ
    235       break;
    236   }
    237 
    238   vp9_init_plane_quantizers(cpi, x);
    239 }
    240 
    241 // Lighter version of set_offsets that only sets the mode info
    242 // pointers.
    243 static INLINE void set_mode_info_offsets(VP9_COMMON *const cm,
    244                                          MACROBLOCK *const x,
    245                                          MACROBLOCKD *const xd, int mi_row,
    246                                          int mi_col) {
    247   const int idx_str = xd->mi_stride * mi_row + mi_col;
    248   xd->mi = cm->mi_grid_visible + idx_str;
    249   xd->mi[0] = cm->mi + idx_str;
    250   x->mbmi_ext = x->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
    251 }
    252 
    253 static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
    254                         MACROBLOCK *const x, int mi_row, int mi_col,
    255                         BLOCK_SIZE bsize) {
    256   VP9_COMMON *const cm = &cpi->common;
    257   MACROBLOCKD *const xd = &x->e_mbd;
    258   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
    259   const int mi_height = num_8x8_blocks_high_lookup[bsize];
    260   MvLimits *const mv_limits = &x->mv_limits;
    261 
    262   set_skip_context(xd, mi_row, mi_col);
    263 
    264   set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
    265 
    266   // Set up destination pointers.
    267   vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
    268 
    269   // Set up limit values for MV components.
    270   // Mv beyond the range do not produce new/different prediction block.
    271   mv_limits->row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND);
    272   mv_limits->col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND);
    273   mv_limits->row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND;
    274   mv_limits->col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND;
    275 
    276   // Set up distance of MB to edge of frame in 1/8th pel units.
    277   assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
    278   set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows,
    279                  cm->mi_cols);
    280 
    281   // Set up source buffers.
    282   vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
    283 
    284   // R/D setup.
    285   x->rddiv = cpi->rd.RDDIV;
    286   x->rdmult = cpi->rd.RDMULT;
    287 
    288   // required by vp9_append_sub8x8_mvs_for_idx() and vp9_find_best_ref_mvs()
    289   xd->tile = *tile;
    290 }
    291 
    292 static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd,
    293                                       int mi_row, int mi_col,
    294                                       BLOCK_SIZE bsize) {
    295   const int block_width =
    296       VPXMIN(num_8x8_blocks_wide_lookup[bsize], cm->mi_cols - mi_col);
    297   const int block_height =
    298       VPXMIN(num_8x8_blocks_high_lookup[bsize], cm->mi_rows - mi_row);
    299   const int mi_stride = xd->mi_stride;
    300   MODE_INFO *const src_mi = xd->mi[0];
    301   int i, j;
    302 
    303   for (j = 0; j < block_height; ++j)
    304     for (i = 0; i < block_width; ++i) xd->mi[j * mi_stride + i] = src_mi;
    305 }
    306 
    307 static void set_block_size(VP9_COMP *const cpi, MACROBLOCK *const x,
    308                            MACROBLOCKD *const xd, int mi_row, int mi_col,
    309                            BLOCK_SIZE bsize) {
    310   if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
    311     set_mode_info_offsets(&cpi->common, x, xd, mi_row, mi_col);
    312     xd->mi[0]->sb_type = bsize;
    313   }
    314 }
    315 
    316 typedef struct {
    317   // This struct is used for computing variance in choose_partitioning(), where
    318   // the max number of samples within a superblock is 16x16 (with 4x4 avg). Even
    319   // in high bitdepth, uint32_t is enough for sum_square_error (2^12 * 2^12 * 16
    320   // * 16 = 2^32).
    321   uint32_t sum_square_error;
    322   int32_t sum_error;
    323   int log2_count;
    324   int variance;
    325 } var;
    326 
    327 typedef struct {
    328   var none;
    329   var horz[2];
    330   var vert[2];
    331 } partition_variance;
    332 
    333 typedef struct {
    334   partition_variance part_variances;
    335   var split[4];
    336 } v4x4;
    337 
    338 typedef struct {
    339   partition_variance part_variances;
    340   v4x4 split[4];
    341 } v8x8;
    342 
    343 typedef struct {
    344   partition_variance part_variances;
    345   v8x8 split[4];
    346 } v16x16;
    347 
    348 typedef struct {
    349   partition_variance part_variances;
    350   v16x16 split[4];
    351 } v32x32;
    352 
    353 typedef struct {
    354   partition_variance part_variances;
    355   v32x32 split[4];
    356 } v64x64;
    357 
    358 typedef struct {
    359   partition_variance *part_variances;
    360   var *split[4];
    361 } variance_node;
    362 
    363 typedef enum {
    364   V16X16,
    365   V32X32,
    366   V64X64,
    367 } TREE_LEVEL;
    368 
    369 static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
    370   int i;
    371   node->part_variances = NULL;
    372   switch (bsize) {
    373     case BLOCK_64X64: {
    374       v64x64 *vt = (v64x64 *)data;
    375       node->part_variances = &vt->part_variances;
    376       for (i = 0; i < 4; i++)
    377         node->split[i] = &vt->split[i].part_variances.none;
    378       break;
    379     }
    380     case BLOCK_32X32: {
    381       v32x32 *vt = (v32x32 *)data;
    382       node->part_variances = &vt->part_variances;
    383       for (i = 0; i < 4; i++)
    384         node->split[i] = &vt->split[i].part_variances.none;
    385       break;
    386     }
    387     case BLOCK_16X16: {
    388       v16x16 *vt = (v16x16 *)data;
    389       node->part_variances = &vt->part_variances;
    390       for (i = 0; i < 4; i++)
    391         node->split[i] = &vt->split[i].part_variances.none;
    392       break;
    393     }
    394     case BLOCK_8X8: {
    395       v8x8 *vt = (v8x8 *)data;
    396       node->part_variances = &vt->part_variances;
    397       for (i = 0; i < 4; i++)
    398         node->split[i] = &vt->split[i].part_variances.none;
    399       break;
    400     }
    401     default: {
    402       v4x4 *vt = (v4x4 *)data;
    403       assert(bsize == BLOCK_4X4);
    404       node->part_variances = &vt->part_variances;
    405       for (i = 0; i < 4; i++) node->split[i] = &vt->split[i];
    406       break;
    407     }
    408   }
    409 }
    410 
    411 // Set variance values given sum square error, sum error, count.
    412 static void fill_variance(uint32_t s2, int32_t s, int c, var *v) {
    413   v->sum_square_error = s2;
    414   v->sum_error = s;
    415   v->log2_count = c;
    416 }
    417 
    418 static void get_variance(var *v) {
    419   v->variance =
    420       (int)(256 * (v->sum_square_error -
    421                    (uint32_t)(((int64_t)v->sum_error * v->sum_error) >>
    422                               v->log2_count)) >>
    423             v->log2_count);
    424 }
    425 
    426 static void sum_2_variances(const var *a, const var *b, var *r) {
    427   assert(a->log2_count == b->log2_count);
    428   fill_variance(a->sum_square_error + b->sum_square_error,
    429                 a->sum_error + b->sum_error, a->log2_count + 1, r);
    430 }
    431 
    432 static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
    433   variance_node node;
    434   memset(&node, 0, sizeof(node));
    435   tree_to_node(data, bsize, &node);
    436   sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
    437   sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
    438   sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
    439   sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
    440   sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
    441                   &node.part_variances->none);
    442 }
    443 
    444 static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x,
    445                                MACROBLOCKD *const xd, void *data,
    446                                BLOCK_SIZE bsize, int mi_row, int mi_col,
    447                                int64_t threshold, BLOCK_SIZE bsize_min,
    448                                int force_split) {
    449   VP9_COMMON *const cm = &cpi->common;
    450   variance_node vt;
    451   const int block_width = num_8x8_blocks_wide_lookup[bsize];
    452   const int block_height = num_8x8_blocks_high_lookup[bsize];
    453 
    454   assert(block_height == block_width);
    455   tree_to_node(data, bsize, &vt);
    456 
    457   if (force_split == 1) return 0;
    458 
    459   // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
    460   // variance is below threshold, otherwise split will be selected.
    461   // No check for vert/horiz split as too few samples for variance.
    462   if (bsize == bsize_min) {
    463     // Variance already computed to set the force_split.
    464     if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
    465     if (mi_col + block_width / 2 < cm->mi_cols &&
    466         mi_row + block_height / 2 < cm->mi_rows &&
    467         vt.part_variances->none.variance < threshold) {
    468       set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
    469       return 1;
    470     }
    471     return 0;
    472   } else if (bsize > bsize_min) {
    473     // Variance already computed to set the force_split.
    474     if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
    475     // For key frame: take split for bsize above 32X32 or very high variance.
    476     if (frame_is_intra_only(cm) &&
    477         (bsize > BLOCK_32X32 ||
    478          vt.part_variances->none.variance > (threshold << 4))) {
    479       return 0;
    480     }
    481     // If variance is low, take the bsize (no split).
    482     if (mi_col + block_width / 2 < cm->mi_cols &&
    483         mi_row + block_height / 2 < cm->mi_rows &&
    484         vt.part_variances->none.variance < threshold) {
    485       set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
    486       return 1;
    487     }
    488 
    489     // Check vertical split.
    490     if (mi_row + block_height / 2 < cm->mi_rows) {
    491       BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
    492       get_variance(&vt.part_variances->vert[0]);
    493       get_variance(&vt.part_variances->vert[1]);
    494       if (vt.part_variances->vert[0].variance < threshold &&
    495           vt.part_variances->vert[1].variance < threshold &&
    496           get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
    497         set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
    498         set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize);
    499         return 1;
    500       }
    501     }
    502     // Check horizontal split.
    503     if (mi_col + block_width / 2 < cm->mi_cols) {
    504       BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
    505       get_variance(&vt.part_variances->horz[0]);
    506       get_variance(&vt.part_variances->horz[1]);
    507       if (vt.part_variances->horz[0].variance < threshold &&
    508           vt.part_variances->horz[1].variance < threshold &&
    509           get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
    510         set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
    511         set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize);
    512         return 1;
    513       }
    514     }
    515 
    516     return 0;
    517   }
    518   return 0;
    519 }
    520 
    521 static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed,
    522                                          int width, int height,
    523                                          int content_state) {
    524   if (speed >= 8) {
    525     if (width <= 640 && height <= 480)
    526       return (5 * threshold_base) >> 2;
    527     else if ((content_state == kLowSadLowSumdiff) ||
    528              (content_state == kHighSadLowSumdiff) ||
    529              (content_state == kLowVarHighSumdiff))
    530       return (5 * threshold_base) >> 2;
    531   } else if (speed == 7) {
    532     if ((content_state == kLowSadLowSumdiff) ||
    533         (content_state == kHighSadLowSumdiff) ||
    534         (content_state == kLowVarHighSumdiff)) {
    535       return (5 * threshold_base) >> 2;
    536     }
    537   }
    538   return threshold_base;
    539 }
    540 
    541 // Set the variance split thresholds for following the block sizes:
    542 // 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16,
    543 // 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is
    544 // currently only used on key frame.
    545 static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q,
    546                                int content_state) {
    547   VP9_COMMON *const cm = &cpi->common;
    548   const int is_key_frame = frame_is_intra_only(cm);
    549   const int threshold_multiplier = is_key_frame ? 20 : 1;
    550   int64_t threshold_base =
    551       (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]);
    552 
    553   if (is_key_frame) {
    554     thresholds[0] = threshold_base;
    555     thresholds[1] = threshold_base >> 2;
    556     thresholds[2] = threshold_base >> 2;
    557     thresholds[3] = threshold_base << 2;
    558   } else {
    559     // Increase base variance threshold based on estimated noise level.
    560     if (cpi->noise_estimate.enabled && cm->width >= 640 && cm->height >= 480) {
    561       NOISE_LEVEL noise_level =
    562           vp9_noise_estimate_extract_level(&cpi->noise_estimate);
    563       if (noise_level == kHigh)
    564         threshold_base = 3 * threshold_base;
    565       else if (noise_level == kMedium)
    566         threshold_base = threshold_base << 1;
    567       else if (noise_level < kLow)
    568         threshold_base = (7 * threshold_base) >> 3;
    569     }
    570 #if CONFIG_VP9_TEMPORAL_DENOISING
    571     if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
    572         cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow)
    573       threshold_base =
    574           vp9_scale_part_thresh(threshold_base, cpi->denoiser.denoising_level,
    575                                 content_state, cpi->svc.temporal_layer_id);
    576     else
    577       threshold_base =
    578           scale_part_thresh_sumdiff(threshold_base, cpi->oxcf.speed, cm->width,
    579                                     cm->height, content_state);
    580 #else
    581     // Increase base variance threshold based on content_state/sum_diff level.
    582     threshold_base = scale_part_thresh_sumdiff(
    583         threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state);
    584 #endif
    585     thresholds[0] = threshold_base;
    586     thresholds[2] = threshold_base << cpi->oxcf.speed;
    587     if (cm->width >= 1280 && cm->height >= 720 && cpi->oxcf.speed < 7)
    588       thresholds[2] = thresholds[2] << 1;
    589     if (cm->width <= 352 && cm->height <= 288) {
    590       thresholds[0] = threshold_base >> 3;
    591       thresholds[1] = threshold_base >> 1;
    592       thresholds[2] = threshold_base << 3;
    593     } else if (cm->width < 1280 && cm->height < 720) {
    594       thresholds[1] = (5 * threshold_base) >> 2;
    595     } else if (cm->width < 1920 && cm->height < 1080) {
    596       thresholds[1] = threshold_base << 1;
    597     } else {
    598       thresholds[1] = (5 * threshold_base) >> 1;
    599     }
    600     if (cpi->sf.disable_16x16part_nonkey) thresholds[2] = INT64_MAX;
    601   }
    602 }
    603 
    604 void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q,
    605                                            int content_state) {
    606   VP9_COMMON *const cm = &cpi->common;
    607   SPEED_FEATURES *const sf = &cpi->sf;
    608   const int is_key_frame = frame_is_intra_only(cm);
    609   if (sf->partition_search_type != VAR_BASED_PARTITION &&
    610       sf->partition_search_type != REFERENCE_PARTITION) {
    611     return;
    612   } else {
    613     set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state);
    614     // The thresholds below are not changed locally.
    615     if (is_key_frame) {
    616       cpi->vbp_threshold_sad = 0;
    617       cpi->vbp_threshold_copy = 0;
    618       cpi->vbp_bsize_min = BLOCK_8X8;
    619     } else {
    620       if (cm->width <= 352 && cm->height <= 288)
    621         cpi->vbp_threshold_sad = 10;
    622       else
    623         cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000
    624                                      ? (cpi->y_dequant[q][1] << 1)
    625                                      : 1000;
    626       cpi->vbp_bsize_min = BLOCK_16X16;
    627       if (cm->width <= 352 && cm->height <= 288)
    628         cpi->vbp_threshold_copy = 4000;
    629       else if (cm->width <= 640 && cm->height <= 360)
    630         cpi->vbp_threshold_copy = 8000;
    631       else
    632         cpi->vbp_threshold_copy = (cpi->y_dequant[q][1] << 3) > 8000
    633                                       ? (cpi->y_dequant[q][1] << 3)
    634                                       : 8000;
    635       if (cpi->rc.high_source_sad ||
    636           (cpi->use_svc && cpi->svc.high_source_sad_superframe)) {
    637         cpi->vbp_threshold_sad = 0;
    638         cpi->vbp_threshold_copy = 0;
    639       }
    640     }
    641     cpi->vbp_threshold_minmax = 15 + (q >> 3);
    642   }
    643 }
    644 
    645 // Compute the minmax over the 8x8 subblocks.
    646 static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
    647                               int dp, int x16_idx, int y16_idx,
    648 #if CONFIG_VP9_HIGHBITDEPTH
    649                               int highbd_flag,
    650 #endif
    651                               int pixels_wide, int pixels_high) {
    652   int k;
    653   int minmax_max = 0;
    654   int minmax_min = 255;
    655   // Loop over the 4 8x8 subblocks.
    656   for (k = 0; k < 4; k++) {
    657     int x8_idx = x16_idx + ((k & 1) << 3);
    658     int y8_idx = y16_idx + ((k >> 1) << 3);
    659     int min = 0;
    660     int max = 0;
    661     if (x8_idx < pixels_wide && y8_idx < pixels_high) {
    662 #if CONFIG_VP9_HIGHBITDEPTH
    663       if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
    664         vpx_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
    665                               d + y8_idx * dp + x8_idx, dp, &min, &max);
    666       } else {
    667         vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx,
    668                        dp, &min, &max);
    669       }
    670 #else
    671       vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp,
    672                      &min, &max);
    673 #endif
    674       if ((max - min) > minmax_max) minmax_max = (max - min);
    675       if ((max - min) < minmax_min) minmax_min = (max - min);
    676     }
    677   }
    678   return (minmax_max - minmax_min);
    679 }
    680 
    681 static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
    682                                  int dp, int x8_idx, int y8_idx, v8x8 *vst,
    683 #if CONFIG_VP9_HIGHBITDEPTH
    684                                  int highbd_flag,
    685 #endif
    686                                  int pixels_wide, int pixels_high,
    687                                  int is_key_frame) {
    688   int k;
    689   for (k = 0; k < 4; k++) {
    690     int x4_idx = x8_idx + ((k & 1) << 2);
    691     int y4_idx = y8_idx + ((k >> 1) << 2);
    692     unsigned int sse = 0;
    693     int sum = 0;
    694     if (x4_idx < pixels_wide && y4_idx < pixels_high) {
    695       int s_avg;
    696       int d_avg = 128;
    697 #if CONFIG_VP9_HIGHBITDEPTH
    698       if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
    699         s_avg = vpx_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp);
    700         if (!is_key_frame)
    701           d_avg = vpx_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp);
    702       } else {
    703         s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
    704         if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
    705       }
    706 #else
    707       s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
    708       if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
    709 #endif
    710       sum = s_avg - d_avg;
    711       sse = sum * sum;
    712     }
    713     fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
    714   }
    715 }
    716 
    717 static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d,
    718                                  int dp, int x16_idx, int y16_idx, v16x16 *vst,
    719 #if CONFIG_VP9_HIGHBITDEPTH
    720                                  int highbd_flag,
    721 #endif
    722                                  int pixels_wide, int pixels_high,
    723                                  int is_key_frame) {
    724   int k;
    725   for (k = 0; k < 4; k++) {
    726     int x8_idx = x16_idx + ((k & 1) << 3);
    727     int y8_idx = y16_idx + ((k >> 1) << 3);
    728     unsigned int sse = 0;
    729     int sum = 0;
    730     if (x8_idx < pixels_wide && y8_idx < pixels_high) {
    731       int s_avg;
    732       int d_avg = 128;
    733 #if CONFIG_VP9_HIGHBITDEPTH
    734       if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
    735         s_avg = vpx_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
    736         if (!is_key_frame)
    737           d_avg = vpx_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
    738       } else {
    739         s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
    740         if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
    741       }
    742 #else
    743       s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
    744       if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
    745 #endif
    746       sum = s_avg - d_avg;
    747       sse = sum * sum;
    748     }
    749     fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
    750   }
    751 }
    752 
    753 // Check if most of the superblock is skin content, and if so, force split to
    754 // 32x32, and set x->sb_is_skin for use in mode selection.
    755 static int skin_sb_split(VP9_COMP *cpi, MACROBLOCK *x, const int low_res,
    756                          int mi_row, int mi_col, int *force_split) {
    757   VP9_COMMON *const cm = &cpi->common;
    758 #if CONFIG_VP9_HIGHBITDEPTH
    759   if (cm->use_highbitdepth) return 0;
    760 #endif
    761   // Avoid checking superblocks on/near boundary and avoid low resolutions.
    762   // Note superblock may still pick 64X64 if y_sad is very small
    763   // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is.
    764   if (!low_res && (mi_col >= 8 && mi_col + 8 < cm->mi_cols && mi_row >= 8 &&
    765                    mi_row + 8 < cm->mi_rows)) {
    766     int num_16x16_skin = 0;
    767     int num_16x16_nonskin = 0;
    768     uint8_t *ysignal = x->plane[0].src.buf;
    769     uint8_t *usignal = x->plane[1].src.buf;
    770     uint8_t *vsignal = x->plane[2].src.buf;
    771     int sp = x->plane[0].src.stride;
    772     int spuv = x->plane[1].src.stride;
    773     const int block_index = mi_row * cm->mi_cols + mi_col;
    774     const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
    775     const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
    776     const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
    777     const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
    778     // Loop through the 16x16 sub-blocks.
    779     int i, j;
    780     for (i = 0; i < ymis; i += 2) {
    781       for (j = 0; j < xmis; j += 2) {
    782         int bl_index = block_index + i * cm->mi_cols + j;
    783         int is_skin = cpi->skin_map[bl_index];
    784         num_16x16_skin += is_skin;
    785         num_16x16_nonskin += (1 - is_skin);
    786         if (num_16x16_nonskin > 3) {
    787           // Exit loop if at least 4 of the 16x16 blocks are not skin.
    788           i = ymis;
    789           break;
    790         }
    791         ysignal += 16;
    792         usignal += 8;
    793         vsignal += 8;
    794       }
    795       ysignal += (sp << 4) - 64;
    796       usignal += (spuv << 3) - 32;
    797       vsignal += (spuv << 3) - 32;
    798     }
    799     if (num_16x16_skin > 12) {
    800       *force_split = 1;
    801       return 1;
    802     }
    803   }
    804   return 0;
    805 }
    806 
    807 static void set_low_temp_var_flag(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
    808                                   v64x64 *vt, int64_t thresholds[],
    809                                   MV_REFERENCE_FRAME ref_frame_partition,
    810                                   int mi_col, int mi_row) {
    811   int i, j;
    812   VP9_COMMON *const cm = &cpi->common;
    813   const int mv_thr = cm->width > 640 ? 8 : 4;
    814   // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected and
    815   // int_pro mv is small. If the temporal variance is small set the flag
    816   // variance_low for the block. The variance threshold can be adjusted, the
    817   // higher the more aggressive.
    818   if (ref_frame_partition == LAST_FRAME &&
    819       (cpi->sf.short_circuit_low_temp_var == 1 ||
    820        (xd->mi[0]->mv[0].as_mv.col < mv_thr &&
    821         xd->mi[0]->mv[0].as_mv.col > -mv_thr &&
    822         xd->mi[0]->mv[0].as_mv.row < mv_thr &&
    823         xd->mi[0]->mv[0].as_mv.row > -mv_thr))) {
    824     if (xd->mi[0]->sb_type == BLOCK_64X64) {
    825       if ((vt->part_variances).none.variance < (thresholds[0] >> 1))
    826         x->variance_low[0] = 1;
    827     } else if (xd->mi[0]->sb_type == BLOCK_64X32) {
    828       for (i = 0; i < 2; i++) {
    829         if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2))
    830           x->variance_low[i + 1] = 1;
    831       }
    832     } else if (xd->mi[0]->sb_type == BLOCK_32X64) {
    833       for (i = 0; i < 2; i++) {
    834         if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2))
    835           x->variance_low[i + 3] = 1;
    836       }
    837     } else {
    838       for (i = 0; i < 4; i++) {
    839         const int idx[4][2] = { { 0, 0 }, { 0, 4 }, { 4, 0 }, { 4, 4 } };
    840         const int idx_str =
    841             cm->mi_stride * (mi_row + idx[i][0]) + mi_col + idx[i][1];
    842         MODE_INFO **this_mi = cm->mi_grid_visible + idx_str;
    843 
    844         if (cm->mi_cols <= mi_col + idx[i][1] ||
    845             cm->mi_rows <= mi_row + idx[i][0])
    846           continue;
    847 
    848         if ((*this_mi)->sb_type == BLOCK_32X32) {
    849           int64_t threshold_32x32 = (cpi->sf.short_circuit_low_temp_var == 1 ||
    850                                      cpi->sf.short_circuit_low_temp_var == 3)
    851                                         ? ((5 * thresholds[1]) >> 3)
    852                                         : (thresholds[1] >> 1);
    853           if (vt->split[i].part_variances.none.variance < threshold_32x32)
    854             x->variance_low[i + 5] = 1;
    855         } else if (cpi->sf.short_circuit_low_temp_var >= 2) {
    856           // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
    857           // inside.
    858           if ((*this_mi)->sb_type == BLOCK_16X16 ||
    859               (*this_mi)->sb_type == BLOCK_32X16 ||
    860               (*this_mi)->sb_type == BLOCK_16X32) {
    861             for (j = 0; j < 4; j++) {
    862               if (vt->split[i].split[j].part_variances.none.variance <
    863                   (thresholds[2] >> 8))
    864                 x->variance_low[(i << 2) + j + 9] = 1;
    865             }
    866           }
    867         }
    868       }
    869     }
    870   }
    871 }
    872 
    873 static void copy_partitioning_helper(VP9_COMP *cpi, MACROBLOCK *x,
    874                                      MACROBLOCKD *xd, BLOCK_SIZE bsize,
    875                                      int mi_row, int mi_col) {
    876   VP9_COMMON *const cm = &cpi->common;
    877   BLOCK_SIZE *prev_part = cpi->prev_partition;
    878   int start_pos = mi_row * cm->mi_stride + mi_col;
    879 
    880   const int bsl = b_width_log2_lookup[bsize];
    881   const int bs = (1 << bsl) >> 2;
    882   BLOCK_SIZE subsize;
    883   PARTITION_TYPE partition;
    884 
    885   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
    886 
    887   partition = partition_lookup[bsl][prev_part[start_pos]];
    888   subsize = get_subsize(bsize, partition);
    889 
    890   if (subsize < BLOCK_8X8) {
    891     set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
    892   } else {
    893     switch (partition) {
    894       case PARTITION_NONE:
    895         set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
    896         break;
    897       case PARTITION_HORZ:
    898         set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
    899         set_block_size(cpi, x, xd, mi_row + bs, mi_col, subsize);
    900         break;
    901       case PARTITION_VERT:
    902         set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
    903         set_block_size(cpi, x, xd, mi_row, mi_col + bs, subsize);
    904         break;
    905       default:
    906         assert(partition == PARTITION_SPLIT);
    907         copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col);
    908         copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col);
    909         copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col + bs);
    910         copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col + bs);
    911         break;
    912     }
    913   }
    914 }
    915 
    916 static int copy_partitioning(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
    917                              int mi_row, int mi_col, int segment_id,
    918                              int sb_offset) {
    919   int svc_copy_allowed = 1;
    920   int frames_since_key_thresh = 1;
    921   if (cpi->use_svc) {
    922     // For SVC, don't allow copy if base spatial layer is key frame, or if
    923     // frame is not a temporal enhancement layer frame.
    924     int layer = LAYER_IDS_TO_IDX(0, cpi->svc.temporal_layer_id,
    925                                  cpi->svc.number_temporal_layers);
    926     const LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
    927     if (lc->is_key_frame || !cpi->svc.non_reference_frame) svc_copy_allowed = 0;
    928     frames_since_key_thresh = cpi->svc.number_spatial_layers << 1;
    929   }
    930   if (cpi->rc.frames_since_key > frames_since_key_thresh && svc_copy_allowed &&
    931       !cpi->resize_pending && segment_id == CR_SEGMENT_ID_BASE &&
    932       cpi->prev_segment_id[sb_offset] == CR_SEGMENT_ID_BASE &&
    933       cpi->copied_frame_cnt[sb_offset] < cpi->max_copied_frame) {
    934     if (cpi->prev_partition != NULL) {
    935       copy_partitioning_helper(cpi, x, xd, BLOCK_64X64, mi_row, mi_col);
    936       cpi->copied_frame_cnt[sb_offset] += 1;
    937       memcpy(x->variance_low, &(cpi->prev_variance_low[sb_offset * 25]),
    938              sizeof(x->variance_low));
    939       return 1;
    940     }
    941   }
    942 
    943   return 0;
    944 }
    945 
    946 static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
    947                                   BLOCK_SIZE bsize, int mi_row, int mi_col,
    948                                   int mi_row_high, int mi_col_high) {
    949   VP9_COMMON *const cm = &cpi->common;
    950   SVC *const svc = &cpi->svc;
    951   BLOCK_SIZE *prev_part = svc->prev_partition_svc;
    952   // Variables with _high are for higher resolution.
    953   int bsize_high = 0;
    954   int subsize_high = 0;
    955   const int bsl_high = b_width_log2_lookup[bsize];
    956   const int bs_high = (1 << bsl_high) >> 2;
    957   const int has_rows = (mi_row_high + bs_high) < cm->mi_rows;
    958   const int has_cols = (mi_col_high + bs_high) < cm->mi_cols;
    959 
    960   const int row_boundary_block_scale_factor[BLOCK_SIZES] = {
    961     13, 13, 13, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0
    962   };
    963   const int col_boundary_block_scale_factor[BLOCK_SIZES] = {
    964     13, 13, 13, 2, 2, 0, 2, 2, 0, 2, 2, 0, 0
    965   };
    966   int start_pos;
    967   BLOCK_SIZE bsize_low;
    968   PARTITION_TYPE partition_high;
    969 
    970   if (mi_row_high >= cm->mi_rows || mi_col_high >= cm->mi_cols) return 0;
    971   if (mi_row >= svc->mi_rows[svc->spatial_layer_id - 1] ||
    972       mi_col >= svc->mi_cols[svc->spatial_layer_id - 1])
    973     return 0;
    974 
    975   // Find corresponding (mi_col/mi_row) block down-scaled by 2x2.
    976   start_pos = mi_row * (svc->mi_stride[svc->spatial_layer_id - 1]) + mi_col;
    977   bsize_low = prev_part[start_pos];
    978   // The block size is too big for boundaries. Do variance based partitioning.
    979   if ((!has_rows || !has_cols) && bsize_low > BLOCK_16X16) return 1;
    980 
    981   // For reference frames: return 1 (do variance-based partitioning) if the
    982   // superblock is not low source sad and lower-resoln bsize is below 32x32.
    983   if (!cpi->svc.non_reference_frame && !x->skip_low_source_sad &&
    984       bsize_low < BLOCK_32X32)
    985     return 1;
    986 
    987   // Scale up block size by 2x2. Force 64x64 for size larger than 32x32.
    988   if (bsize_low < BLOCK_32X32) {
    989     bsize_high = bsize_low + 3;
    990   } else if (bsize_low >= BLOCK_32X32) {
    991     bsize_high = BLOCK_64X64;
    992   }
    993   // Scale up blocks on boundary.
    994   if (!has_cols && has_rows) {
    995     bsize_high = bsize_low + row_boundary_block_scale_factor[bsize_low];
    996   } else if (has_cols && !has_rows) {
    997     bsize_high = bsize_low + col_boundary_block_scale_factor[bsize_low];
    998   } else if (!has_cols && !has_rows) {
    999     bsize_high = bsize_low;
   1000   }
   1001 
   1002   partition_high = partition_lookup[bsl_high][bsize_high];
   1003   subsize_high = get_subsize(bsize, partition_high);
   1004 
   1005   if (subsize_high < BLOCK_8X8) {
   1006     set_block_size(cpi, x, xd, mi_row_high, mi_col_high, bsize_high);
   1007   } else {
   1008     const int bsl = b_width_log2_lookup[bsize];
   1009     const int bs = (1 << bsl) >> 2;
   1010     switch (partition_high) {
   1011       case PARTITION_NONE:
   1012         set_block_size(cpi, x, xd, mi_row_high, mi_col_high, bsize_high);
   1013         break;
   1014       case PARTITION_HORZ:
   1015         set_block_size(cpi, x, xd, mi_row_high, mi_col_high, subsize_high);
   1016         if (subsize_high < BLOCK_64X64)
   1017           set_block_size(cpi, x, xd, mi_row_high + bs_high, mi_col_high,
   1018                          subsize_high);
   1019         break;
   1020       case PARTITION_VERT:
   1021         set_block_size(cpi, x, xd, mi_row_high, mi_col_high, subsize_high);
   1022         if (subsize_high < BLOCK_64X64)
   1023           set_block_size(cpi, x, xd, mi_row_high, mi_col_high + bs_high,
   1024                          subsize_high);
   1025         break;
   1026       default:
   1027         assert(partition_high == PARTITION_SPLIT);
   1028         if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row, mi_col,
   1029                                    mi_row_high, mi_col_high))
   1030           return 1;
   1031         if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row + (bs >> 1),
   1032                                    mi_col, mi_row_high + bs_high, mi_col_high))
   1033           return 1;
   1034         if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row,
   1035                                    mi_col + (bs >> 1), mi_row_high,
   1036                                    mi_col_high + bs_high))
   1037           return 1;
   1038         if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row + (bs >> 1),
   1039                                    mi_col + (bs >> 1), mi_row_high + bs_high,
   1040                                    mi_col_high + bs_high))
   1041           return 1;
   1042         break;
   1043     }
   1044   }
   1045 
   1046   return 0;
   1047 }
   1048 
   1049 static void update_partition_svc(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
   1050                                  int mi_col) {
   1051   VP9_COMMON *const cm = &cpi->common;
   1052   BLOCK_SIZE *prev_part = cpi->svc.prev_partition_svc;
   1053   int start_pos = mi_row * cm->mi_stride + mi_col;
   1054   const int bsl = b_width_log2_lookup[bsize];
   1055   const int bs = (1 << bsl) >> 2;
   1056   BLOCK_SIZE subsize;
   1057   PARTITION_TYPE partition;
   1058   const MODE_INFO *mi = NULL;
   1059   int xx, yy;
   1060 
   1061   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
   1062 
   1063   mi = cm->mi_grid_visible[start_pos];
   1064   partition = partition_lookup[bsl][mi->sb_type];
   1065   subsize = get_subsize(bsize, partition);
   1066   if (subsize < BLOCK_8X8) {
   1067     prev_part[start_pos] = bsize;
   1068   } else {
   1069     switch (partition) {
   1070       case PARTITION_NONE:
   1071         prev_part[start_pos] = bsize;
   1072         if (bsize == BLOCK_64X64) {
   1073           for (xx = 0; xx < 8; xx += 4)
   1074             for (yy = 0; yy < 8; yy += 4) {
   1075               if ((mi_row + xx < cm->mi_rows) && (mi_col + yy < cm->mi_cols))
   1076                 prev_part[start_pos + xx * cm->mi_stride + yy] = bsize;
   1077             }
   1078         }
   1079         break;
   1080       case PARTITION_HORZ:
   1081         prev_part[start_pos] = subsize;
   1082         if (mi_row + bs < cm->mi_rows)
   1083           prev_part[start_pos + bs * cm->mi_stride] = subsize;
   1084         break;
   1085       case PARTITION_VERT:
   1086         prev_part[start_pos] = subsize;
   1087         if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize;
   1088         break;
   1089       default:
   1090         assert(partition == PARTITION_SPLIT);
   1091         update_partition_svc(cpi, subsize, mi_row, mi_col);
   1092         update_partition_svc(cpi, subsize, mi_row + bs, mi_col);
   1093         update_partition_svc(cpi, subsize, mi_row, mi_col + bs);
   1094         update_partition_svc(cpi, subsize, mi_row + bs, mi_col + bs);
   1095         break;
   1096     }
   1097   }
   1098 }
   1099 
   1100 static void update_prev_partition_helper(VP9_COMP *cpi, BLOCK_SIZE bsize,
   1101                                          int mi_row, int mi_col) {
   1102   VP9_COMMON *const cm = &cpi->common;
   1103   BLOCK_SIZE *prev_part = cpi->prev_partition;
   1104   int start_pos = mi_row * cm->mi_stride + mi_col;
   1105   const int bsl = b_width_log2_lookup[bsize];
   1106   const int bs = (1 << bsl) >> 2;
   1107   BLOCK_SIZE subsize;
   1108   PARTITION_TYPE partition;
   1109   const MODE_INFO *mi = NULL;
   1110 
   1111   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
   1112 
   1113   mi = cm->mi_grid_visible[start_pos];
   1114   partition = partition_lookup[bsl][mi->sb_type];
   1115   subsize = get_subsize(bsize, partition);
   1116   if (subsize < BLOCK_8X8) {
   1117     prev_part[start_pos] = bsize;
   1118   } else {
   1119     switch (partition) {
   1120       case PARTITION_NONE: prev_part[start_pos] = bsize; break;
   1121       case PARTITION_HORZ:
   1122         prev_part[start_pos] = subsize;
   1123         if (mi_row + bs < cm->mi_rows)
   1124           prev_part[start_pos + bs * cm->mi_stride] = subsize;
   1125         break;
   1126       case PARTITION_VERT:
   1127         prev_part[start_pos] = subsize;
   1128         if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize;
   1129         break;
   1130       default:
   1131         assert(partition == PARTITION_SPLIT);
   1132         update_prev_partition_helper(cpi, subsize, mi_row, mi_col);
   1133         update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col);
   1134         update_prev_partition_helper(cpi, subsize, mi_row, mi_col + bs);
   1135         update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col + bs);
   1136         break;
   1137     }
   1138   }
   1139 }
   1140 
   1141 static void update_prev_partition(VP9_COMP *cpi, MACROBLOCK *x, int segment_id,
   1142                                   int mi_row, int mi_col, int sb_offset) {
   1143   update_prev_partition_helper(cpi, BLOCK_64X64, mi_row, mi_col);
   1144   cpi->prev_segment_id[sb_offset] = segment_id;
   1145   memcpy(&(cpi->prev_variance_low[sb_offset * 25]), x->variance_low,
   1146          sizeof(x->variance_low));
   1147   // Reset the counter for copy partitioning
   1148   cpi->copied_frame_cnt[sb_offset] = 0;
   1149 }
   1150 
   1151 static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize,
   1152                          unsigned int y_sad, int is_key_frame) {
   1153   int i;
   1154   MACROBLOCKD *xd = &x->e_mbd;
   1155 
   1156   if (is_key_frame) return;
   1157 
   1158   // For speed >= 8, avoid the chroma check if y_sad is above threshold.
   1159   if (cpi->oxcf.speed >= 8) {
   1160     if (y_sad > cpi->vbp_thresholds[1] &&
   1161         (!cpi->noise_estimate.enabled ||
   1162          vp9_noise_estimate_extract_level(&cpi->noise_estimate) < kMedium))
   1163       return;
   1164   }
   1165 
   1166   for (i = 1; i <= 2; ++i) {
   1167     unsigned int uv_sad = UINT_MAX;
   1168     struct macroblock_plane *p = &x->plane[i];
   1169     struct macroblockd_plane *pd = &xd->plane[i];
   1170     const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
   1171 
   1172     if (bs != BLOCK_INVALID)
   1173       uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, pd->dst.buf,
   1174                                    pd->dst.stride);
   1175 
   1176     // TODO(marpan): Investigate if we should lower this threshold if
   1177     // superblock is detected as skin.
   1178     x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2);
   1179   }
   1180 }
   1181 
   1182 static uint64_t avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift,
   1183                                int sb_offset) {
   1184   unsigned int tmp_sse;
   1185   uint64_t tmp_sad;
   1186   unsigned int tmp_variance;
   1187   const BLOCK_SIZE bsize = BLOCK_64X64;
   1188   uint8_t *src_y = cpi->Source->y_buffer;
   1189   int src_ystride = cpi->Source->y_stride;
   1190   uint8_t *last_src_y = cpi->Last_Source->y_buffer;
   1191   int last_src_ystride = cpi->Last_Source->y_stride;
   1192   uint64_t avg_source_sad_threshold = 10000;
   1193   uint64_t avg_source_sad_threshold2 = 12000;
   1194 #if CONFIG_VP9_HIGHBITDEPTH
   1195   if (cpi->common.use_highbitdepth) return 0;
   1196 #endif
   1197   src_y += shift;
   1198   last_src_y += shift;
   1199   tmp_sad =
   1200       cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y, last_src_ystride);
   1201   tmp_variance = vpx_variance64x64(src_y, src_ystride, last_src_y,
   1202                                    last_src_ystride, &tmp_sse);
   1203   // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12)
   1204   if (tmp_sad < avg_source_sad_threshold)
   1205     x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kLowSadLowSumdiff
   1206                                                           : kLowSadHighSumdiff;
   1207   else
   1208     x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kHighSadLowSumdiff
   1209                                                           : kHighSadHighSumdiff;
   1210 
   1211   // Detect large lighting change.
   1212   if (cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
   1213       cpi->oxcf.rc_mode == VPX_CBR && tmp_variance < (tmp_sse >> 3) &&
   1214       (tmp_sse - tmp_variance) > 10000)
   1215     x->content_state_sb = kLowVarHighSumdiff;
   1216   else if (tmp_sad > (avg_source_sad_threshold << 1))
   1217     x->content_state_sb = kVeryHighSad;
   1218 
   1219   if (cpi->content_state_sb_fd != NULL) {
   1220     if (tmp_sad < avg_source_sad_threshold2) {
   1221       // Cap the increment to 255.
   1222       if (cpi->content_state_sb_fd[sb_offset] < 255)
   1223         cpi->content_state_sb_fd[sb_offset]++;
   1224     } else {
   1225       cpi->content_state_sb_fd[sb_offset] = 0;
   1226     }
   1227   }
   1228   if (tmp_sad == 0) x->zero_temp_sad_source = 1;
   1229   return tmp_sad;
   1230 }
   1231 
   1232 // This function chooses partitioning based on the variance between source and
   1233 // reconstructed last, where variance is computed for down-sampled inputs.
   1234 static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
   1235                                MACROBLOCK *x, int mi_row, int mi_col) {
   1236   VP9_COMMON *const cm = &cpi->common;
   1237   MACROBLOCKD *xd = &x->e_mbd;
   1238   int i, j, k, m;
   1239   v64x64 vt;
   1240   v16x16 *vt2 = NULL;
   1241   int force_split[21];
   1242   int avg_32x32;
   1243   int max_var_32x32 = 0;
   1244   int min_var_32x32 = INT_MAX;
   1245   int var_32x32;
   1246   int avg_16x16[4];
   1247   int maxvar_16x16[4];
   1248   int minvar_16x16[4];
   1249   int64_t threshold_4x4avg;
   1250   NOISE_LEVEL noise_level = kLow;
   1251   int content_state = 0;
   1252   uint8_t *s;
   1253   const uint8_t *d;
   1254   int sp;
   1255   int dp;
   1256   int compute_minmax_variance = 1;
   1257   unsigned int y_sad = UINT_MAX;
   1258   BLOCK_SIZE bsize = BLOCK_64X64;
   1259   // Ref frame used in partitioning.
   1260   MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME;
   1261   int pixels_wide = 64, pixels_high = 64;
   1262   int64_t thresholds[4] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1],
   1263                             cpi->vbp_thresholds[2], cpi->vbp_thresholds[3] };
   1264   int scene_change_detected =
   1265       cpi->rc.high_source_sad ||
   1266       (cpi->use_svc && cpi->svc.high_source_sad_superframe);
   1267 
   1268   // For the variance computation under SVC mode, we treat the frame as key if
   1269   // the reference (base layer frame) is key frame (i.e., is_key_frame == 1).
   1270   int is_key_frame =
   1271       (frame_is_intra_only(cm) ||
   1272        (is_one_pass_cbr_svc(cpi) &&
   1273         cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame));
   1274   // Always use 4x4 partition for key frame.
   1275   const int use_4x4_partition = frame_is_intra_only(cm);
   1276   const int low_res = (cm->width <= 352 && cm->height <= 288);
   1277   int variance4x4downsample[16];
   1278   int segment_id;
   1279   int sb_offset = (cm->mi_stride >> 3) * (mi_row >> 3) + (mi_col >> 3);
   1280 
   1281   // For SVC: check if LAST frame is NULL or if the resolution of LAST is
   1282   // different than the current frame resolution, and if so, treat this frame
   1283   // as a key frame, for the purpose of the superblock partitioning.
   1284   // LAST == NULL can happen in some cases where enhancement spatial layers are
   1285   // enabled dyanmically in the stream and the only reference is the spatial
   1286   // reference (GOLDEN).
   1287   if (cpi->use_svc) {
   1288     const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, LAST_FRAME);
   1289     if (ref == NULL || ref->y_crop_height != cm->height ||
   1290         ref->y_crop_width != cm->width)
   1291       is_key_frame = 1;
   1292   }
   1293 
   1294   set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
   1295   set_segment_index(cpi, x, mi_row, mi_col, BLOCK_64X64, 0);
   1296   segment_id = xd->mi[0]->segment_id;
   1297 
   1298   if (cpi->oxcf.speed >= 8 || (cpi->use_svc && cpi->svc.non_reference_frame))
   1299     compute_minmax_variance = 0;
   1300 
   1301   memset(x->variance_low, 0, sizeof(x->variance_low));
   1302 
   1303   if (cpi->sf.use_source_sad && !is_key_frame) {
   1304     int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3);
   1305     content_state = x->content_state_sb;
   1306     x->skip_low_source_sad = (content_state == kLowSadLowSumdiff ||
   1307                               content_state == kLowSadHighSumdiff)
   1308                                  ? 1
   1309                                  : 0;
   1310     x->lowvar_highsumdiff = (content_state == kLowVarHighSumdiff) ? 1 : 0;
   1311     if (cpi->content_state_sb_fd != NULL)
   1312       x->last_sb_high_content = cpi->content_state_sb_fd[sb_offset2];
   1313 
   1314     // For SVC on top spatial layer: use/scale the partition from
   1315     // the lower spatial resolution if svc_use_lowres_part is enabled.
   1316     if (cpi->sf.svc_use_lowres_part &&
   1317         cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1 &&
   1318         cpi->svc.prev_partition_svc != NULL && content_state != kVeryHighSad) {
   1319       if (!scale_partitioning_svc(cpi, x, xd, BLOCK_64X64, mi_row >> 1,
   1320                                   mi_col >> 1, mi_row, mi_col)) {
   1321         if (cpi->sf.copy_partition_flag) {
   1322           update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset);
   1323         }
   1324         return 0;
   1325       }
   1326     }
   1327     // If source_sad is low copy the partition without computing the y_sad.
   1328     if (x->skip_low_source_sad && cpi->sf.copy_partition_flag &&
   1329         !scene_change_detected &&
   1330         copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) {
   1331       x->sb_use_mv_part = 1;
   1332       if (cpi->sf.svc_use_lowres_part &&
   1333           cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2)
   1334         update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col);
   1335       return 0;
   1336     }
   1337   }
   1338 
   1339   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
   1340       cyclic_refresh_segment_id_boosted(segment_id)) {
   1341     int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
   1342     set_vbp_thresholds(cpi, thresholds, q, content_state);
   1343   } else {
   1344     set_vbp_thresholds(cpi, thresholds, cm->base_qindex, content_state);
   1345   }
   1346 
   1347   // For non keyframes, disable 4x4 average for low resolution when speed = 8
   1348   threshold_4x4avg = (cpi->oxcf.speed < 8) ? thresholds[1] << 1 : INT64_MAX;
   1349 
   1350   if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
   1351   if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
   1352 
   1353   s = x->plane[0].src.buf;
   1354   sp = x->plane[0].src.stride;
   1355 
   1356   // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
   1357   // 5-20 for the 16x16 blocks.
   1358   force_split[0] = scene_change_detected;
   1359 
   1360   if (!is_key_frame) {
   1361     // In the case of spatial/temporal scalable coding, the assumption here is
   1362     // that the temporal reference frame will always be of type LAST_FRAME.
   1363     // TODO(marpan): If that assumption is broken, we need to revisit this code.
   1364     MODE_INFO *mi = xd->mi[0];
   1365     YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
   1366 
   1367     const YV12_BUFFER_CONFIG *yv12_g = NULL;
   1368     unsigned int y_sad_g, y_sad_thr, y_sad_last;
   1369     bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 +
   1370             (mi_row + 4 < cm->mi_rows);
   1371 
   1372     assert(yv12 != NULL);
   1373 
   1374     if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) ||
   1375         cpi->svc.use_gf_temporal_ref_current_layer) {
   1376       // For now, GOLDEN will not be used for non-zero spatial layers, since
   1377       // it may not be a temporal reference.
   1378       yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
   1379     }
   1380 
   1381     // Only compute y_sad_g (sad for golden reference) for speed < 8.
   1382     if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 &&
   1383         (cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
   1384       vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
   1385                            &cm->frame_refs[GOLDEN_FRAME - 1].sf);
   1386       y_sad_g = cpi->fn_ptr[bsize].sdf(
   1387           x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
   1388           xd->plane[0].pre[0].stride);
   1389     } else {
   1390       y_sad_g = UINT_MAX;
   1391     }
   1392 
   1393     if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
   1394         cpi->rc.is_src_frame_alt_ref) {
   1395       yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME);
   1396       vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
   1397                            &cm->frame_refs[ALTREF_FRAME - 1].sf);
   1398       mi->ref_frame[0] = ALTREF_FRAME;
   1399       y_sad_g = UINT_MAX;
   1400     } else {
   1401       vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
   1402                            &cm->frame_refs[LAST_FRAME - 1].sf);
   1403       mi->ref_frame[0] = LAST_FRAME;
   1404     }
   1405     mi->ref_frame[1] = NONE;
   1406     mi->sb_type = BLOCK_64X64;
   1407     mi->mv[0].as_int = 0;
   1408     mi->interp_filter = BILINEAR;
   1409 
   1410     if (cpi->oxcf.speed >= 8 && !low_res &&
   1411         x->content_state_sb != kVeryHighSad) {
   1412       y_sad = cpi->fn_ptr[bsize].sdf(
   1413           x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
   1414           xd->plane[0].pre[0].stride);
   1415     } else {
   1416       const MV dummy_mv = { 0, 0 };
   1417       y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col,
   1418                                             &dummy_mv);
   1419       x->sb_use_mv_part = 1;
   1420       x->sb_mvcol_part = mi->mv[0].as_mv.col;
   1421       x->sb_mvrow_part = mi->mv[0].as_mv.row;
   1422       if (cpi->oxcf.content == VP9E_CONTENT_SCREEN &&
   1423           cpi->svc.spatial_layer_id == 0 &&
   1424           cpi->svc.high_num_blocks_with_motion && !x->zero_temp_sad_source &&
   1425           cm->width > 640 && cm->height > 480) {
   1426         // Disable split below 16x16 block size when scroll motion is detected.
   1427         // TODO(marpan/jianj): Improve this condition: issue is that search
   1428         // range is hard-coded/limited in vp9_int_pro_motion_estimation() so
   1429         // scroll motion may not be detected here.
   1430         if ((abs(x->sb_mvrow_part) >= 48 && abs(x->sb_mvcol_part) <= 8) ||
   1431             y_sad < 100000) {
   1432           compute_minmax_variance = 0;
   1433           thresholds[2] = INT64_MAX;
   1434         }
   1435       }
   1436     }
   1437 
   1438     y_sad_last = y_sad;
   1439     // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad
   1440     // are close if short_circuit_low_temp_var is on.
   1441     y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad;
   1442     if (y_sad_g < y_sad_thr) {
   1443       vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
   1444                            &cm->frame_refs[GOLDEN_FRAME - 1].sf);
   1445       mi->ref_frame[0] = GOLDEN_FRAME;
   1446       mi->mv[0].as_int = 0;
   1447       y_sad = y_sad_g;
   1448       ref_frame_partition = GOLDEN_FRAME;
   1449     } else {
   1450       x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
   1451       ref_frame_partition = LAST_FRAME;
   1452     }
   1453 
   1454     set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
   1455     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
   1456 
   1457     if (cpi->use_skin_detection)
   1458       x->sb_is_skin =
   1459           skin_sb_split(cpi, x, low_res, mi_row, mi_col, force_split);
   1460 
   1461     d = xd->plane[0].dst.buf;
   1462     dp = xd->plane[0].dst.stride;
   1463 
   1464     // If the y_sad is very small, take 64x64 as partition and exit.
   1465     // Don't check on boosted segment for now, as 64x64 is suppressed there.
   1466     if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) {
   1467       const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64];
   1468       const int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64];
   1469       if (mi_col + block_width / 2 < cm->mi_cols &&
   1470           mi_row + block_height / 2 < cm->mi_rows) {
   1471         set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_64X64);
   1472         x->variance_low[0] = 1;
   1473         chroma_check(cpi, x, bsize, y_sad, is_key_frame);
   1474         if (cpi->sf.svc_use_lowres_part &&
   1475             cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2)
   1476           update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col);
   1477         if (cpi->sf.copy_partition_flag) {
   1478           update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset);
   1479         }
   1480         return 0;
   1481       }
   1482     }
   1483 
   1484     // If the y_sad is small enough, copy the partition of the superblock in the
   1485     // last frame to current frame only if the last frame is not a keyframe.
   1486     // Stop the copy every cpi->max_copied_frame to refresh the partition.
   1487     // TODO(jianj) : tune the threshold.
   1488     if (cpi->sf.copy_partition_flag && y_sad_last < cpi->vbp_threshold_copy &&
   1489         copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) {
   1490       chroma_check(cpi, x, bsize, y_sad, is_key_frame);
   1491       if (cpi->sf.svc_use_lowres_part &&
   1492           cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2)
   1493         update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col);
   1494       return 0;
   1495     }
   1496   } else {
   1497     d = VP9_VAR_OFFS;
   1498     dp = 0;
   1499 #if CONFIG_VP9_HIGHBITDEPTH
   1500     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
   1501       switch (xd->bd) {
   1502         case 10: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10); break;
   1503         case 12: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12); break;
   1504         case 8:
   1505         default: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8); break;
   1506       }
   1507     }
   1508 #endif  // CONFIG_VP9_HIGHBITDEPTH
   1509   }
   1510 
   1511   if (low_res && threshold_4x4avg < INT64_MAX)
   1512     CHECK_MEM_ERROR(cm, vt2, vpx_calloc(16, sizeof(*vt2)));
   1513   // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances
   1514   // for splits.
   1515   for (i = 0; i < 4; i++) {
   1516     const int x32_idx = ((i & 1) << 5);
   1517     const int y32_idx = ((i >> 1) << 5);
   1518     const int i2 = i << 2;
   1519     force_split[i + 1] = 0;
   1520     avg_16x16[i] = 0;
   1521     maxvar_16x16[i] = 0;
   1522     minvar_16x16[i] = INT_MAX;
   1523     for (j = 0; j < 4; j++) {
   1524       const int x16_idx = x32_idx + ((j & 1) << 4);
   1525       const int y16_idx = y32_idx + ((j >> 1) << 4);
   1526       const int split_index = 5 + i2 + j;
   1527       v16x16 *vst = &vt.split[i].split[j];
   1528       force_split[split_index] = 0;
   1529       variance4x4downsample[i2 + j] = 0;
   1530       if (!is_key_frame) {
   1531         fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst,
   1532 #if CONFIG_VP9_HIGHBITDEPTH
   1533                              xd->cur_buf->flags,
   1534 #endif
   1535                              pixels_wide, pixels_high, is_key_frame);
   1536         fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
   1537         get_variance(&vt.split[i].split[j].part_variances.none);
   1538         avg_16x16[i] += vt.split[i].split[j].part_variances.none.variance;
   1539         if (vt.split[i].split[j].part_variances.none.variance < minvar_16x16[i])
   1540           minvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance;
   1541         if (vt.split[i].split[j].part_variances.none.variance > maxvar_16x16[i])
   1542           maxvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance;
   1543         if (vt.split[i].split[j].part_variances.none.variance > thresholds[2]) {
   1544           // 16X16 variance is above threshold for split, so force split to 8x8
   1545           // for this 16x16 block (this also forces splits for upper levels).
   1546           force_split[split_index] = 1;
   1547           force_split[i + 1] = 1;
   1548           force_split[0] = 1;
   1549         } else if (compute_minmax_variance &&
   1550                    vt.split[i].split[j].part_variances.none.variance >
   1551                        thresholds[1] &&
   1552                    !cyclic_refresh_segment_id_boosted(segment_id)) {
   1553           // We have some nominal amount of 16x16 variance (based on average),
   1554           // compute the minmax over the 8x8 sub-blocks, and if above threshold,
   1555           // force split to 8x8 block for this 16x16 block.
   1556           int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx,
   1557 #if CONFIG_VP9_HIGHBITDEPTH
   1558                                           xd->cur_buf->flags,
   1559 #endif
   1560                                           pixels_wide, pixels_high);
   1561           int thresh_minmax = (int)cpi->vbp_threshold_minmax;
   1562           if (x->content_state_sb == kVeryHighSad)
   1563             thresh_minmax = thresh_minmax << 1;
   1564           if (minmax > thresh_minmax) {
   1565             force_split[split_index] = 1;
   1566             force_split[i + 1] = 1;
   1567             force_split[0] = 1;
   1568           }
   1569         }
   1570       }
   1571       if (is_key_frame ||
   1572           (low_res && vt.split[i].split[j].part_variances.none.variance >
   1573                           threshold_4x4avg)) {
   1574         force_split[split_index] = 0;
   1575         // Go down to 4x4 down-sampling for variance.
   1576         variance4x4downsample[i2 + j] = 1;
   1577         for (k = 0; k < 4; k++) {
   1578           int x8_idx = x16_idx + ((k & 1) << 3);
   1579           int y8_idx = y16_idx + ((k >> 1) << 3);
   1580           v8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k];
   1581           fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2,
   1582 #if CONFIG_VP9_HIGHBITDEPTH
   1583                                xd->cur_buf->flags,
   1584 #endif
   1585                                pixels_wide, pixels_high, is_key_frame);
   1586         }
   1587       }
   1588     }
   1589   }
   1590   if (cpi->noise_estimate.enabled)
   1591     noise_level = vp9_noise_estimate_extract_level(&cpi->noise_estimate);
   1592   // Fill the rest of the variance tree by summing split partition values.
   1593   avg_32x32 = 0;
   1594   for (i = 0; i < 4; i++) {
   1595     const int i2 = i << 2;
   1596     for (j = 0; j < 4; j++) {
   1597       if (variance4x4downsample[i2 + j] == 1) {
   1598         v16x16 *vtemp = (!is_key_frame) ? &vt2[i2 + j] : &vt.split[i].split[j];
   1599         for (m = 0; m < 4; m++) fill_variance_tree(&vtemp->split[m], BLOCK_8X8);
   1600         fill_variance_tree(vtemp, BLOCK_16X16);
   1601         // If variance of this 16x16 block is above the threshold, force block
   1602         // to split. This also forces a split on the upper levels.
   1603         get_variance(&vtemp->part_variances.none);
   1604         if (vtemp->part_variances.none.variance > thresholds[2]) {
   1605           force_split[5 + i2 + j] = 1;
   1606           force_split[i + 1] = 1;
   1607           force_split[0] = 1;
   1608         }
   1609       }
   1610     }
   1611     fill_variance_tree(&vt.split[i], BLOCK_32X32);
   1612     // If variance of this 32x32 block is above the threshold, or if its above
   1613     // (some threshold of) the average variance over the sub-16x16 blocks, then
   1614     // force this block to split. This also forces a split on the upper
   1615     // (64x64) level.
   1616     if (!force_split[i + 1]) {
   1617       get_variance(&vt.split[i].part_variances.none);
   1618       var_32x32 = vt.split[i].part_variances.none.variance;
   1619       max_var_32x32 = VPXMAX(var_32x32, max_var_32x32);
   1620       min_var_32x32 = VPXMIN(var_32x32, min_var_32x32);
   1621       if (vt.split[i].part_variances.none.variance > thresholds[1] ||
   1622           (!is_key_frame &&
   1623            vt.split[i].part_variances.none.variance > (thresholds[1] >> 1) &&
   1624            vt.split[i].part_variances.none.variance > (avg_16x16[i] >> 1))) {
   1625         force_split[i + 1] = 1;
   1626         force_split[0] = 1;
   1627       } else if (!is_key_frame && noise_level < kLow && cm->height <= 360 &&
   1628                  (maxvar_16x16[i] - minvar_16x16[i]) > (thresholds[1] >> 1) &&
   1629                  maxvar_16x16[i] > thresholds[1]) {
   1630         force_split[i + 1] = 1;
   1631         force_split[0] = 1;
   1632       }
   1633       avg_32x32 += var_32x32;
   1634     }
   1635   }
   1636   if (!force_split[0]) {
   1637     fill_variance_tree(&vt, BLOCK_64X64);
   1638     get_variance(&vt.part_variances.none);
   1639     // If variance of this 64x64 block is above (some threshold of) the average
   1640     // variance over the sub-32x32 blocks, then force this block to split.
   1641     // Only checking this for noise level >= medium for now.
   1642     if (!is_key_frame && noise_level >= kMedium &&
   1643         vt.part_variances.none.variance > (9 * avg_32x32) >> 5)
   1644       force_split[0] = 1;
   1645     // Else if the maximum 32x32 variance minus the miniumum 32x32 variance in
   1646     // a 64x64 block is greater than threshold and the maximum 32x32 variance is
   1647     // above a miniumum threshold, then force the split of a 64x64 block
   1648     // Only check this for low noise.
   1649     else if (!is_key_frame && noise_level < kMedium &&
   1650              (max_var_32x32 - min_var_32x32) > 3 * (thresholds[0] >> 3) &&
   1651              max_var_32x32 > thresholds[0] >> 1)
   1652       force_split[0] = 1;
   1653   }
   1654 
   1655   // Now go through the entire structure, splitting every block size until
   1656   // we get to one that's got a variance lower than our threshold.
   1657   if (mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows ||
   1658       !set_vt_partitioning(cpi, x, xd, &vt, BLOCK_64X64, mi_row, mi_col,
   1659                            thresholds[0], BLOCK_16X16, force_split[0])) {
   1660     for (i = 0; i < 4; ++i) {
   1661       const int x32_idx = ((i & 1) << 2);
   1662       const int y32_idx = ((i >> 1) << 2);
   1663       const int i2 = i << 2;
   1664       if (!set_vt_partitioning(cpi, x, xd, &vt.split[i], BLOCK_32X32,
   1665                                (mi_row + y32_idx), (mi_col + x32_idx),
   1666                                thresholds[1], BLOCK_16X16,
   1667                                force_split[i + 1])) {
   1668         for (j = 0; j < 4; ++j) {
   1669           const int x16_idx = ((j & 1) << 1);
   1670           const int y16_idx = ((j >> 1) << 1);
   1671           // For inter frames: if variance4x4downsample[] == 1 for this 16x16
   1672           // block, then the variance is based on 4x4 down-sampling, so use vt2
   1673           // in set_vt_partioning(), otherwise use vt.
   1674           v16x16 *vtemp = (!is_key_frame && variance4x4downsample[i2 + j] == 1)
   1675                               ? &vt2[i2 + j]
   1676                               : &vt.split[i].split[j];
   1677           if (!set_vt_partitioning(
   1678                   cpi, x, xd, vtemp, BLOCK_16X16, mi_row + y32_idx + y16_idx,
   1679                   mi_col + x32_idx + x16_idx, thresholds[2], cpi->vbp_bsize_min,
   1680                   force_split[5 + i2 + j])) {
   1681             for (k = 0; k < 4; ++k) {
   1682               const int x8_idx = (k & 1);
   1683               const int y8_idx = (k >> 1);
   1684               if (use_4x4_partition) {
   1685                 if (!set_vt_partitioning(cpi, x, xd, &vtemp->split[k],
   1686                                          BLOCK_8X8,
   1687                                          mi_row + y32_idx + y16_idx + y8_idx,
   1688                                          mi_col + x32_idx + x16_idx + x8_idx,
   1689                                          thresholds[3], BLOCK_8X8, 0)) {
   1690                   set_block_size(
   1691                       cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx),
   1692                       (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_4X4);
   1693                 }
   1694               } else {
   1695                 set_block_size(
   1696                     cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx),
   1697                     (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_8X8);
   1698               }
   1699             }
   1700           }
   1701         }
   1702       }
   1703     }
   1704   }
   1705 
   1706   if (!frame_is_intra_only(cm) && cpi->sf.copy_partition_flag) {
   1707     update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset);
   1708   }
   1709 
   1710   if (!frame_is_intra_only(cm) && cpi->sf.svc_use_lowres_part &&
   1711       cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2)
   1712     update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col);
   1713 
   1714   if (cpi->sf.short_circuit_low_temp_var) {
   1715     set_low_temp_var_flag(cpi, x, xd, &vt, thresholds, ref_frame_partition,
   1716                           mi_col, mi_row);
   1717   }
   1718 
   1719   chroma_check(cpi, x, bsize, y_sad, is_key_frame);
   1720   if (vt2) vpx_free(vt2);
   1721   return 0;
   1722 }
   1723 
   1724 static void update_state(VP9_COMP *cpi, ThreadData *td, PICK_MODE_CONTEXT *ctx,
   1725                          int mi_row, int mi_col, BLOCK_SIZE bsize,
   1726                          int output_enabled) {
   1727   int i, x_idx, y;
   1728   VP9_COMMON *const cm = &cpi->common;
   1729   RD_COUNTS *const rdc = &td->rd_counts;
   1730   MACROBLOCK *const x = &td->mb;
   1731   MACROBLOCKD *const xd = &x->e_mbd;
   1732   struct macroblock_plane *const p = x->plane;
   1733   struct macroblockd_plane *const pd = xd->plane;
   1734   MODE_INFO *mi = &ctx->mic;
   1735   MODE_INFO *const xdmi = xd->mi[0];
   1736   MODE_INFO *mi_addr = xd->mi[0];
   1737   const struct segmentation *const seg = &cm->seg;
   1738   const int bw = num_8x8_blocks_wide_lookup[mi->sb_type];
   1739   const int bh = num_8x8_blocks_high_lookup[mi->sb_type];
   1740   const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
   1741   const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
   1742   MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
   1743   int w, h;
   1744 
   1745   const int mis = cm->mi_stride;
   1746   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
   1747   const int mi_height = num_8x8_blocks_high_lookup[bsize];
   1748   int max_plane;
   1749 
   1750   assert(mi->sb_type == bsize);
   1751 
   1752   *mi_addr = *mi;
   1753   *x->mbmi_ext = ctx->mbmi_ext;
   1754 
   1755   // If segmentation in use
   1756   if (seg->enabled) {
   1757     // For in frame complexity AQ copy the segment id from the segment map.
   1758     if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
   1759       const uint8_t *const map =
   1760           seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
   1761       mi_addr->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
   1762     }
   1763     // Else for cyclic refresh mode update the segment map, set the segment id
   1764     // and then update the quantizer.
   1765     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
   1766       vp9_cyclic_refresh_update_segment(cpi, xd->mi[0], mi_row, mi_col, bsize,
   1767                                         ctx->rate, ctx->dist, x->skip, p);
   1768     }
   1769   }
   1770 
   1771   max_plane = is_inter_block(xdmi) ? MAX_MB_PLANE : 1;
   1772   for (i = 0; i < max_plane; ++i) {
   1773     p[i].coeff = ctx->coeff_pbuf[i][1];
   1774     p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
   1775     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
   1776     p[i].eobs = ctx->eobs_pbuf[i][1];
   1777   }
   1778 
   1779   for (i = max_plane; i < MAX_MB_PLANE; ++i) {
   1780     p[i].coeff = ctx->coeff_pbuf[i][2];
   1781     p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
   1782     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
   1783     p[i].eobs = ctx->eobs_pbuf[i][2];
   1784   }
   1785 
   1786   // Restore the coding context of the MB to that that was in place
   1787   // when the mode was picked for it
   1788   for (y = 0; y < mi_height; y++)
   1789     for (x_idx = 0; x_idx < mi_width; x_idx++)
   1790       if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx &&
   1791           (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) {
   1792         xd->mi[x_idx + y * mis] = mi_addr;
   1793       }
   1794 
   1795   if (cpi->oxcf.aq_mode != NO_AQ) vp9_init_plane_quantizers(cpi, x);
   1796 
   1797   if (is_inter_block(xdmi) && xdmi->sb_type < BLOCK_8X8) {
   1798     xdmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
   1799     xdmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
   1800   }
   1801 
   1802   x->skip = ctx->skip;
   1803   memcpy(x->zcoeff_blk[xdmi->tx_size], ctx->zcoeff_blk,
   1804          sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
   1805 
   1806   if (!output_enabled) return;
   1807 
   1808 #if CONFIG_INTERNAL_STATS
   1809   if (frame_is_intra_only(cm)) {
   1810     static const int kf_mode_index[] = {
   1811       THR_DC /*DC_PRED*/,          THR_V_PRED /*V_PRED*/,
   1812       THR_H_PRED /*H_PRED*/,       THR_D45_PRED /*D45_PRED*/,
   1813       THR_D135_PRED /*D135_PRED*/, THR_D117_PRED /*D117_PRED*/,
   1814       THR_D153_PRED /*D153_PRED*/, THR_D207_PRED /*D207_PRED*/,
   1815       THR_D63_PRED /*D63_PRED*/,   THR_TM /*TM_PRED*/,
   1816     };
   1817     ++cpi->mode_chosen_counts[kf_mode_index[xdmi->mode]];
   1818   } else {
   1819     // Note how often each mode chosen as best
   1820     ++cpi->mode_chosen_counts[ctx->best_mode_index];
   1821   }
   1822 #endif
   1823   if (!frame_is_intra_only(cm)) {
   1824     if (is_inter_block(xdmi)) {
   1825       vp9_update_mv_count(td);
   1826 
   1827       if (cm->interp_filter == SWITCHABLE) {
   1828         const int ctx = get_pred_context_switchable_interp(xd);
   1829         ++td->counts->switchable_interp[ctx][xdmi->interp_filter];
   1830       }
   1831     }
   1832 
   1833     rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
   1834     rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
   1835     rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
   1836 
   1837     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
   1838       rdc->filter_diff[i] += ctx->best_filter_diff[i];
   1839   }
   1840 
   1841   for (h = 0; h < y_mis; ++h) {
   1842     MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
   1843     for (w = 0; w < x_mis; ++w) {
   1844       MV_REF *const mv = frame_mv + w;
   1845       mv->ref_frame[0] = mi->ref_frame[0];
   1846       mv->ref_frame[1] = mi->ref_frame[1];
   1847       mv->mv[0].as_int = mi->mv[0].as_int;
   1848       mv->mv[1].as_int = mi->mv[1].as_int;
   1849     }
   1850   }
   1851 }
   1852 
   1853 void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
   1854                           int mi_row, int mi_col) {
   1855   uint8_t *const buffers[3] = { src->y_buffer, src->u_buffer, src->v_buffer };
   1856   const int strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
   1857   int i;
   1858 
   1859   // Set current frame pointer.
   1860   x->e_mbd.cur_buf = src;
   1861 
   1862   for (i = 0; i < MAX_MB_PLANE; i++)
   1863     setup_pred_plane(&x->plane[i].src, buffers[i], strides[i], mi_row, mi_col,
   1864                      NULL, x->e_mbd.plane[i].subsampling_x,
   1865                      x->e_mbd.plane[i].subsampling_y);
   1866 }
   1867 
   1868 static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode,
   1869                                    RD_COST *rd_cost, BLOCK_SIZE bsize) {
   1870   MACROBLOCKD *const xd = &x->e_mbd;
   1871   MODE_INFO *const mi = xd->mi[0];
   1872   INTERP_FILTER filter_ref;
   1873 
   1874   filter_ref = get_pred_context_switchable_interp(xd);
   1875   if (filter_ref == SWITCHABLE_FILTERS) filter_ref = EIGHTTAP;
   1876 
   1877   mi->sb_type = bsize;
   1878   mi->mode = ZEROMV;
   1879   mi->tx_size =
   1880       VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[tx_mode]);
   1881   mi->skip = 1;
   1882   mi->uv_mode = DC_PRED;
   1883   mi->ref_frame[0] = LAST_FRAME;
   1884   mi->ref_frame[1] = NONE;
   1885   mi->mv[0].as_int = 0;
   1886   mi->interp_filter = filter_ref;
   1887 
   1888   xd->mi[0]->bmi[0].as_mv[0].as_int = 0;
   1889   x->skip = 1;
   1890 
   1891   vp9_rd_cost_init(rd_cost);
   1892 }
   1893 
   1894 static void set_segment_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x,
   1895                                int mi_row, int mi_col, BLOCK_SIZE bsize,
   1896                                AQ_MODE aq_mode) {
   1897   int segment_qindex;
   1898   VP9_COMMON *const cm = &cpi->common;
   1899   const uint8_t *const map =
   1900       cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
   1901 
   1902   vp9_init_plane_quantizers(cpi, x);
   1903   vpx_clear_system_state();
   1904   segment_qindex =
   1905       vp9_get_qindex(&cm->seg, x->e_mbd.mi[0]->segment_id, cm->base_qindex);
   1906 
   1907   if (aq_mode == NO_AQ || aq_mode == PSNR_AQ) {
   1908     if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult;
   1909     return;
   1910   }
   1911 
   1912   if (aq_mode == CYCLIC_REFRESH_AQ) {
   1913     // If segment is boosted, use rdmult for that segment.
   1914     if (cyclic_refresh_segment_id_boosted(
   1915             get_segment_id(cm, map, bsize, mi_row, mi_col)))
   1916       x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
   1917     return;
   1918   }
   1919 
   1920   x->rdmult = vp9_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
   1921 }
   1922 
   1923 static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
   1924                              MACROBLOCK *const x, int mi_row, int mi_col,
   1925                              RD_COST *rd_cost, BLOCK_SIZE bsize,
   1926                              PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
   1927   VP9_COMMON *const cm = &cpi->common;
   1928   TileInfo *const tile_info = &tile_data->tile_info;
   1929   MACROBLOCKD *const xd = &x->e_mbd;
   1930   MODE_INFO *mi;
   1931   struct macroblock_plane *const p = x->plane;
   1932   struct macroblockd_plane *const pd = xd->plane;
   1933   const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
   1934   int i, orig_rdmult;
   1935 
   1936   vpx_clear_system_state();
   1937 
   1938   // Use the lower precision, but faster, 32x32 fdct for mode selection.
   1939   x->use_lp32x32fdct = 1;
   1940 
   1941   set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
   1942   mi = xd->mi[0];
   1943   mi->sb_type = bsize;
   1944 
   1945   for (i = 0; i < MAX_MB_PLANE; ++i) {
   1946     p[i].coeff = ctx->coeff_pbuf[i][0];
   1947     p[i].qcoeff = ctx->qcoeff_pbuf[i][0];
   1948     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
   1949     p[i].eobs = ctx->eobs_pbuf[i][0];
   1950   }
   1951   ctx->is_coded = 0;
   1952   ctx->skippable = 0;
   1953   ctx->pred_pixel_ready = 0;
   1954   x->skip_recode = 0;
   1955 
   1956   // Set to zero to make sure we do not use the previous encoded frame stats
   1957   mi->skip = 0;
   1958 
   1959 #if CONFIG_VP9_HIGHBITDEPTH
   1960   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
   1961     x->source_variance = vp9_high_get_sby_perpixel_variance(
   1962         cpi, &x->plane[0].src, bsize, xd->bd);
   1963   } else {
   1964     x->source_variance =
   1965         vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
   1966   }
   1967 #else
   1968   x->source_variance =
   1969       vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
   1970 #endif  // CONFIG_VP9_HIGHBITDEPTH
   1971 
   1972   // Save rdmult before it might be changed, so it can be restored later.
   1973   orig_rdmult = x->rdmult;
   1974 
   1975   if ((cpi->sf.tx_domain_thresh > 0.0) || (cpi->sf.quant_opt_thresh > 0.0)) {
   1976     double logvar = vp9_log_block_var(cpi, x, bsize);
   1977     // Check block complexity as part of descision on using pixel or transform
   1978     // domain distortion in rd tests.
   1979     x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion &&
   1980                          (logvar >= cpi->sf.tx_domain_thresh);
   1981 
   1982     // Check block complexity as part of descision on using quantized
   1983     // coefficient optimisation inside the rd loop.
   1984     x->block_qcoeff_opt =
   1985         cpi->sf.allow_quant_coeff_opt && (logvar <= cpi->sf.quant_opt_thresh);
   1986   } else {
   1987     x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion;
   1988     x->block_qcoeff_opt = cpi->sf.allow_quant_coeff_opt;
   1989   }
   1990 
   1991   set_segment_index(cpi, x, mi_row, mi_col, bsize, 0);
   1992   set_segment_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode);
   1993 
   1994   // Find best coding mode & reconstruct the MB so it is available
   1995   // as a predictor for MBs that follow in the SB
   1996   if (frame_is_intra_only(cm)) {
   1997     vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd);
   1998   } else {
   1999     if (bsize >= BLOCK_8X8) {
   2000       if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP))
   2001         vp9_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize,
   2002                                            ctx, best_rd);
   2003       else
   2004         vp9_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost,
   2005                                   bsize, ctx, best_rd);
   2006     } else {
   2007       vp9_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, rd_cost,
   2008                                     bsize, ctx, best_rd);
   2009     }
   2010   }
   2011 
   2012   // Examine the resulting rate and for AQ mode 2 make a segment choice.
   2013   if ((rd_cost->rate != INT_MAX) && (aq_mode == COMPLEXITY_AQ) &&
   2014       (bsize >= BLOCK_16X16) &&
   2015       (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame ||
   2016        (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) {
   2017     vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
   2018   }
   2019 
   2020   // TODO(jingning) The rate-distortion optimization flow needs to be
   2021   // refactored to provide proper exit/return handle.
   2022   if (rd_cost->rate == INT_MAX)
   2023     rd_cost->rdcost = INT64_MAX;
   2024   else
   2025     rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
   2026 
   2027   x->rdmult = orig_rdmult;
   2028 
   2029   ctx->rate = rd_cost->rate;
   2030   ctx->dist = rd_cost->dist;
   2031 }
   2032 
   2033 static void update_stats(VP9_COMMON *cm, ThreadData *td) {
   2034   const MACROBLOCK *x = &td->mb;
   2035   const MACROBLOCKD *const xd = &x->e_mbd;
   2036   const MODE_INFO *const mi = xd->mi[0];
   2037   const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
   2038   const BLOCK_SIZE bsize = mi->sb_type;
   2039 
   2040   if (!frame_is_intra_only(cm)) {
   2041     FRAME_COUNTS *const counts = td->counts;
   2042     const int inter_block = is_inter_block(mi);
   2043     const int seg_ref_active =
   2044         segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_REF_FRAME);
   2045     if (!seg_ref_active) {
   2046       counts->intra_inter[get_intra_inter_context(xd)][inter_block]++;
   2047       // If the segment reference feature is enabled we have only a single
   2048       // reference frame allowed for the segment so exclude it from
   2049       // the reference frame counts used to work out probabilities.
   2050       if (inter_block) {
   2051         const MV_REFERENCE_FRAME ref0 = mi->ref_frame[0];
   2052         if (cm->reference_mode == REFERENCE_MODE_SELECT)
   2053           counts->comp_inter[vp9_get_reference_mode_context(cm, xd)]
   2054                             [has_second_ref(mi)]++;
   2055 
   2056         if (has_second_ref(mi)) {
   2057           const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
   2058           const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd);
   2059           const int bit = mi->ref_frame[!idx] == cm->comp_var_ref[1];
   2060           counts->comp_ref[ctx][bit]++;
   2061         } else {
   2062           counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0]
   2063                             [ref0 != LAST_FRAME]++;
   2064           if (ref0 != LAST_FRAME)
   2065             counts->single_ref[vp9_get_pred_context_single_ref_p2(xd)][1]
   2066                               [ref0 != GOLDEN_FRAME]++;
   2067         }
   2068       }
   2069     }
   2070     if (inter_block &&
   2071         !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) {
   2072       const int mode_ctx = mbmi_ext->mode_context[mi->ref_frame[0]];
   2073       if (bsize >= BLOCK_8X8) {
   2074         const PREDICTION_MODE mode = mi->mode;
   2075         ++counts->inter_mode[mode_ctx][INTER_OFFSET(mode)];
   2076       } else {
   2077         const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
   2078         const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
   2079         int idx, idy;
   2080         for (idy = 0; idy < 2; idy += num_4x4_h) {
   2081           for (idx = 0; idx < 2; idx += num_4x4_w) {
   2082             const int j = idy * 2 + idx;
   2083             const PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
   2084             ++counts->inter_mode[mode_ctx][INTER_OFFSET(b_mode)];
   2085           }
   2086         }
   2087       }
   2088     }
   2089   }
   2090 }
   2091 
   2092 static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col,
   2093                             ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
   2094                             ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
   2095                             PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
   2096                             BLOCK_SIZE bsize) {
   2097   MACROBLOCKD *const xd = &x->e_mbd;
   2098   int p;
   2099   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
   2100   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
   2101   int mi_width = num_8x8_blocks_wide_lookup[bsize];
   2102   int mi_height = num_8x8_blocks_high_lookup[bsize];
   2103   for (p = 0; p < MAX_MB_PLANE; p++) {
   2104     memcpy(xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
   2105            a + num_4x4_blocks_wide * p,
   2106            (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
   2107                xd->plane[p].subsampling_x);
   2108     memcpy(xd->left_context[p] +
   2109                ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
   2110            l + num_4x4_blocks_high * p,
   2111            (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
   2112                xd->plane[p].subsampling_y);
   2113   }
   2114   memcpy(xd->above_seg_context + mi_col, sa,
   2115          sizeof(*xd->above_seg_context) * mi_width);
   2116   memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl,
   2117          sizeof(xd->left_seg_context[0]) * mi_height);
   2118 }
   2119 
   2120 static void save_context(MACROBLOCK *const x, int mi_row, int mi_col,
   2121                          ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
   2122                          ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
   2123                          PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
   2124                          BLOCK_SIZE bsize) {
   2125   const MACROBLOCKD *const xd = &x->e_mbd;
   2126   int p;
   2127   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
   2128   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
   2129   int mi_width = num_8x8_blocks_wide_lookup[bsize];
   2130   int mi_height = num_8x8_blocks_high_lookup[bsize];
   2131 
   2132   // buffer the above/left context information of the block in search.
   2133   for (p = 0; p < MAX_MB_PLANE; ++p) {
   2134     memcpy(a + num_4x4_blocks_wide * p,
   2135            xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
   2136            (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
   2137                xd->plane[p].subsampling_x);
   2138     memcpy(l + num_4x4_blocks_high * p,
   2139            xd->left_context[p] +
   2140                ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
   2141            (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
   2142                xd->plane[p].subsampling_y);
   2143   }
   2144   memcpy(sa, xd->above_seg_context + mi_col,
   2145          sizeof(*xd->above_seg_context) * mi_width);
   2146   memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK),
   2147          sizeof(xd->left_seg_context[0]) * mi_height);
   2148 }
   2149 
   2150 static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, ThreadData *td,
   2151                      TOKENEXTRA **tp, int mi_row, int mi_col,
   2152                      int output_enabled, BLOCK_SIZE bsize,
   2153                      PICK_MODE_CONTEXT *ctx) {
   2154   MACROBLOCK *const x = &td->mb;
   2155   set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
   2156 
   2157   if (cpi->sf.enable_tpl_model && cpi->oxcf.aq_mode == NO_AQ)
   2158     x->rdmult = x->cb_rdmult;
   2159 
   2160   update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled);
   2161   encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
   2162 
   2163   if (output_enabled) {
   2164     update_stats(&cpi->common, td);
   2165 
   2166     (*tp)->token = EOSB_TOKEN;
   2167     (*tp)++;
   2168   }
   2169 }
   2170 
   2171 static void encode_sb(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile,
   2172                       TOKENEXTRA **tp, int mi_row, int mi_col,
   2173                       int output_enabled, BLOCK_SIZE bsize, PC_TREE *pc_tree) {
   2174   VP9_COMMON *const cm = &cpi->common;
   2175   MACROBLOCK *const x = &td->mb;
   2176   MACROBLOCKD *const xd = &x->e_mbd;
   2177 
   2178   const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
   2179   int ctx;
   2180   PARTITION_TYPE partition;
   2181   BLOCK_SIZE subsize = bsize;
   2182 
   2183   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
   2184 
   2185   if (bsize >= BLOCK_8X8) {
   2186     ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
   2187     subsize = get_subsize(bsize, pc_tree->partitioning);
   2188   } else {
   2189     ctx = 0;
   2190     subsize = BLOCK_4X4;
   2191   }
   2192 
   2193   partition = partition_lookup[bsl][subsize];
   2194   if (output_enabled && bsize != BLOCK_4X4)
   2195     td->counts->partition[ctx][partition]++;
   2196 
   2197   switch (partition) {
   2198     case PARTITION_NONE:
   2199       encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
   2200                &pc_tree->none);
   2201       break;
   2202     case PARTITION_VERT:
   2203       encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
   2204                &pc_tree->vertical[0]);
   2205       if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
   2206         encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled,
   2207                  subsize, &pc_tree->vertical[1]);
   2208       }
   2209       break;
   2210     case PARTITION_HORZ:
   2211       encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
   2212                &pc_tree->horizontal[0]);
   2213       if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
   2214         encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled,
   2215                  subsize, &pc_tree->horizontal[1]);
   2216       }
   2217       break;
   2218     default:
   2219       assert(partition == PARTITION_SPLIT);
   2220       if (bsize == BLOCK_8X8) {
   2221         encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
   2222                  pc_tree->leaf_split[0]);
   2223       } else {
   2224         encode_sb(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
   2225                   pc_tree->split[0]);
   2226         encode_sb(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
   2227                   subsize, pc_tree->split[1]);
   2228         encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
   2229                   subsize, pc_tree->split[2]);
   2230         encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled,
   2231                   subsize, pc_tree->split[3]);
   2232       }
   2233       break;
   2234   }
   2235 
   2236   if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
   2237     update_partition_context(xd, mi_row, mi_col, subsize, bsize);
   2238 }
   2239 
   2240 // Check to see if the given partition size is allowed for a specified number
   2241 // of 8x8 block rows and columns remaining in the image.
   2242 // If not then return the largest allowed partition size
   2243 static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left,
   2244                                       int cols_left, int *bh, int *bw) {
   2245   if (rows_left <= 0 || cols_left <= 0) {
   2246     return VPXMIN(bsize, BLOCK_8X8);
   2247   } else {
   2248     for (; bsize > 0; bsize -= 3) {
   2249       *bh = num_8x8_blocks_high_lookup[bsize];
   2250       *bw = num_8x8_blocks_wide_lookup[bsize];
   2251       if ((*bh <= rows_left) && (*bw <= cols_left)) {
   2252         break;
   2253       }
   2254     }
   2255   }
   2256   return bsize;
   2257 }
   2258 
   2259 static void set_partial_b64x64_partition(MODE_INFO *mi, int mis, int bh_in,
   2260                                          int bw_in, int row8x8_remaining,
   2261                                          int col8x8_remaining, BLOCK_SIZE bsize,
   2262                                          MODE_INFO **mi_8x8) {
   2263   int bh = bh_in;
   2264   int r, c;
   2265   for (r = 0; r < MI_BLOCK_SIZE; r += bh) {
   2266     int bw = bw_in;
   2267     for (c = 0; c < MI_BLOCK_SIZE; c += bw) {
   2268       const int index = r * mis + c;
   2269       mi_8x8[index] = mi + index;
   2270       mi_8x8[index]->sb_type = find_partition_size(
   2271           bsize, row8x8_remaining - r, col8x8_remaining - c, &bh, &bw);
   2272     }
   2273   }
   2274 }
   2275 
   2276 // This function attempts to set all mode info entries in a given SB64
   2277 // to the same block partition size.
   2278 // However, at the bottom and right borders of the image the requested size
   2279 // may not be allowed in which case this code attempts to choose the largest
   2280 // allowable partition.
   2281 static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
   2282                                    MODE_INFO **mi_8x8, int mi_row, int mi_col,
   2283                                    BLOCK_SIZE bsize) {
   2284   VP9_COMMON *const cm = &cpi->common;
   2285   const int mis = cm->mi_stride;
   2286   const int row8x8_remaining = tile->mi_row_end - mi_row;
   2287   const int col8x8_remaining = tile->mi_col_end - mi_col;
   2288   int block_row, block_col;
   2289   MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
   2290   int bh = num_8x8_blocks_high_lookup[bsize];
   2291   int bw = num_8x8_blocks_wide_lookup[bsize];
   2292 
   2293   assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
   2294 
   2295   // Apply the requested partition size to the SB64 if it is all "in image"
   2296   if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
   2297       (row8x8_remaining >= MI_BLOCK_SIZE)) {
   2298     for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) {
   2299       for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
   2300         int index = block_row * mis + block_col;
   2301         mi_8x8[index] = mi_upper_left + index;
   2302         mi_8x8[index]->sb_type = bsize;
   2303       }
   2304     }
   2305   } else {
   2306     // Else this is a partial SB64.
   2307     set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining,
   2308                                  col8x8_remaining, bsize, mi_8x8);
   2309   }
   2310 }
   2311 
   2312 static const struct {
   2313   int row;
   2314   int col;
   2315 } coord_lookup[16] = {
   2316   // 32x32 index = 0
   2317   { 0, 0 },
   2318   { 0, 2 },
   2319   { 2, 0 },
   2320   { 2, 2 },
   2321   // 32x32 index = 1
   2322   { 0, 4 },
   2323   { 0, 6 },
   2324   { 2, 4 },
   2325   { 2, 6 },
   2326   // 32x32 index = 2
   2327   { 4, 0 },
   2328   { 4, 2 },
   2329   { 6, 0 },
   2330   { 6, 2 },
   2331   // 32x32 index = 3
   2332   { 4, 4 },
   2333   { 4, 6 },
   2334   { 6, 4 },
   2335   { 6, 6 },
   2336 };
   2337 
   2338 static void set_source_var_based_partition(VP9_COMP *cpi,
   2339                                            const TileInfo *const tile,
   2340                                            MACROBLOCK *const x,
   2341                                            MODE_INFO **mi_8x8, int mi_row,
   2342                                            int mi_col) {
   2343   VP9_COMMON *const cm = &cpi->common;
   2344   const int mis = cm->mi_stride;
   2345   const int row8x8_remaining = tile->mi_row_end - mi_row;
   2346   const int col8x8_remaining = tile->mi_col_end - mi_col;
   2347   MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
   2348 
   2349   vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
   2350 
   2351   assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
   2352 
   2353   // In-image SB64
   2354   if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
   2355       (row8x8_remaining >= MI_BLOCK_SIZE)) {
   2356     int i, j;
   2357     int index;
   2358     diff d32[4];
   2359     const int offset = (mi_row >> 1) * cm->mb_cols + (mi_col >> 1);
   2360     int is_larger_better = 0;
   2361     int use32x32 = 0;
   2362     unsigned int thr = cpi->source_var_thresh;
   2363 
   2364     memset(d32, 0, 4 * sizeof(diff));
   2365 
   2366     for (i = 0; i < 4; i++) {
   2367       diff *d16[4];
   2368 
   2369       for (j = 0; j < 4; j++) {
   2370         int b_mi_row = coord_lookup[i * 4 + j].row;
   2371         int b_mi_col = coord_lookup[i * 4 + j].col;
   2372         int boffset = b_mi_row / 2 * cm->mb_cols + b_mi_col / 2;
   2373 
   2374         d16[j] = cpi->source_diff_var + offset + boffset;
   2375 
   2376         index = b_mi_row * mis + b_mi_col;
   2377         mi_8x8[index] = mi_upper_left + index;
   2378         mi_8x8[index]->sb_type = BLOCK_16X16;
   2379 
   2380         // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition
   2381         // size to further improve quality.
   2382       }
   2383 
   2384       is_larger_better = (d16[0]->var < thr) && (d16[1]->var < thr) &&
   2385                          (d16[2]->var < thr) && (d16[3]->var < thr);
   2386 
   2387       // Use 32x32 partition
   2388       if (is_larger_better) {
   2389         use32x32 += 1;
   2390 
   2391         for (j = 0; j < 4; j++) {
   2392           d32[i].sse += d16[j]->sse;
   2393           d32[i].sum += d16[j]->sum;
   2394         }
   2395 
   2396         d32[i].var =
   2397             (unsigned int)(d32[i].sse -
   2398                            (unsigned int)(((int64_t)d32[i].sum * d32[i].sum) >>
   2399                                           10));
   2400 
   2401         index = coord_lookup[i * 4].row * mis + coord_lookup[i * 4].col;
   2402         mi_8x8[index] = mi_upper_left + index;
   2403         mi_8x8[index]->sb_type = BLOCK_32X32;
   2404       }
   2405     }
   2406 
   2407     if (use32x32 == 4) {
   2408       thr <<= 1;
   2409       is_larger_better = (d32[0].var < thr) && (d32[1].var < thr) &&
   2410                          (d32[2].var < thr) && (d32[3].var < thr);
   2411 
   2412       // Use 64x64 partition
   2413       if (is_larger_better) {
   2414         mi_8x8[0] = mi_upper_left;
   2415         mi_8x8[0]->sb_type = BLOCK_64X64;
   2416       }
   2417     }
   2418   } else {  // partial in-image SB64
   2419     int bh = num_8x8_blocks_high_lookup[BLOCK_16X16];
   2420     int bw = num_8x8_blocks_wide_lookup[BLOCK_16X16];
   2421     set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining,
   2422                                  col8x8_remaining, BLOCK_16X16, mi_8x8);
   2423   }
   2424 }
   2425 
   2426 static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
   2427                             PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
   2428                             int bsize) {
   2429   VP9_COMMON *const cm = &cpi->common;
   2430   MACROBLOCK *const x = &td->mb;
   2431   MACROBLOCKD *const xd = &x->e_mbd;
   2432   MODE_INFO *const mi = xd->mi[0];
   2433   struct macroblock_plane *const p = x->plane;
   2434   const struct segmentation *const seg = &cm->seg;
   2435   const int bw = num_8x8_blocks_wide_lookup[mi->sb_type];
   2436   const int bh = num_8x8_blocks_high_lookup[mi->sb_type];
   2437   const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
   2438   const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
   2439 
   2440   *(xd->mi[0]) = ctx->mic;
   2441   *(x->mbmi_ext) = ctx->mbmi_ext;
   2442 
   2443   if (seg->enabled && cpi->oxcf.aq_mode != NO_AQ) {
   2444     // For in frame complexity AQ or variance AQ, copy segment_id from
   2445     // segmentation_map.
   2446     if (cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ) {
   2447       const uint8_t *const map =
   2448           seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
   2449       mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
   2450     } else {
   2451       // Setting segmentation map for cyclic_refresh.
   2452       vp9_cyclic_refresh_update_segment(cpi, mi, mi_row, mi_col, bsize,
   2453                                         ctx->rate, ctx->dist, x->skip, p);
   2454     }
   2455     vp9_init_plane_quantizers(cpi, x);
   2456   }
   2457 
   2458   if (is_inter_block(mi)) {
   2459     vp9_update_mv_count(td);
   2460     if (cm->interp_filter == SWITCHABLE) {
   2461       const int pred_ctx = get_pred_context_switchable_interp(xd);
   2462       ++td->counts->switchable_interp[pred_ctx][mi->interp_filter];
   2463     }
   2464 
   2465     if (mi->sb_type < BLOCK_8X8) {
   2466       mi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
   2467       mi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
   2468     }
   2469   }
   2470 
   2471   if (cm->use_prev_frame_mvs || !cm->error_resilient_mode ||
   2472       (cpi->svc.use_base_mv && cpi->svc.number_spatial_layers > 1 &&
   2473        cpi->svc.spatial_layer_id != cpi->svc.number_spatial_layers - 1)) {
   2474     MV_REF *const frame_mvs =
   2475         cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
   2476     int w, h;
   2477 
   2478     for (h = 0; h < y_mis; ++h) {
   2479       MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
   2480       for (w = 0; w < x_mis; ++w) {
   2481         MV_REF *const mv = frame_mv + w;
   2482         mv->ref_frame[0] = mi->ref_frame[0];
   2483         mv->ref_frame[1] = mi->ref_frame[1];
   2484         mv->mv[0].as_int = mi->mv[0].as_int;
   2485         mv->mv[1].as_int = mi->mv[1].as_int;
   2486       }
   2487     }
   2488   }
   2489 
   2490   x->skip = ctx->skip;
   2491   x->skip_txfm[0] = (mi->segment_id || xd->lossless) ? 0 : ctx->skip_txfm[0];
   2492 }
   2493 
   2494 static void encode_b_rt(VP9_COMP *cpi, ThreadData *td,
   2495                         const TileInfo *const tile, TOKENEXTRA **tp, int mi_row,
   2496                         int mi_col, int output_enabled, BLOCK_SIZE bsize,
   2497                         PICK_MODE_CONTEXT *ctx) {
   2498   MACROBLOCK *const x = &td->mb;
   2499   set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
   2500   update_state_rt(cpi, td, ctx, mi_row, mi_col, bsize);
   2501 
   2502   encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
   2503   update_stats(&cpi->common, td);
   2504 
   2505   (*tp)->token = EOSB_TOKEN;
   2506   (*tp)++;
   2507 }
   2508 
   2509 static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td,
   2510                          const TileInfo *const tile, TOKENEXTRA **tp,
   2511                          int mi_row, int mi_col, int output_enabled,
   2512                          BLOCK_SIZE bsize, PC_TREE *pc_tree) {
   2513   VP9_COMMON *const cm = &cpi->common;
   2514   MACROBLOCK *const x = &td->mb;
   2515   MACROBLOCKD *const xd = &x->e_mbd;
   2516 
   2517   const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
   2518   int ctx;
   2519   PARTITION_TYPE partition;
   2520   BLOCK_SIZE subsize;
   2521 
   2522   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
   2523 
   2524   if (bsize >= BLOCK_8X8) {
   2525     const int idx_str = xd->mi_stride * mi_row + mi_col;
   2526     MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
   2527     ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
   2528     subsize = mi_8x8[0]->sb_type;
   2529   } else {
   2530     ctx = 0;
   2531     subsize = BLOCK_4X4;
   2532   }
   2533 
   2534   partition = partition_lookup[bsl][subsize];
   2535   if (output_enabled && bsize != BLOCK_4X4)
   2536     td->counts->partition[ctx][partition]++;
   2537 
   2538   switch (partition) {
   2539     case PARTITION_NONE:
   2540       encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
   2541                   &pc_tree->none);
   2542       break;
   2543     case PARTITION_VERT:
   2544       encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
   2545                   &pc_tree->vertical[0]);
   2546       if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
   2547         encode_b_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
   2548                     subsize, &pc_tree->vertical[1]);
   2549       }
   2550       break;
   2551     case PARTITION_HORZ:
   2552       encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
   2553                   &pc_tree->horizontal[0]);
   2554       if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
   2555         encode_b_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
   2556                     subsize, &pc_tree->horizontal[1]);
   2557       }
   2558       break;
   2559     default:
   2560       assert(partition == PARTITION_SPLIT);
   2561       subsize = get_subsize(bsize, PARTITION_SPLIT);
   2562       encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
   2563                    pc_tree->split[0]);
   2564       encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
   2565                    subsize, pc_tree->split[1]);
   2566       encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
   2567                    subsize, pc_tree->split[2]);
   2568       encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs,
   2569                    output_enabled, subsize, pc_tree->split[3]);
   2570       break;
   2571   }
   2572 
   2573   if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
   2574     update_partition_context(xd, mi_row, mi_col, subsize, bsize);
   2575 }
   2576 
   2577 static void rd_use_partition(VP9_COMP *cpi, ThreadData *td,
   2578                              TileDataEnc *tile_data, MODE_INFO **mi_8x8,
   2579                              TOKENEXTRA **tp, int mi_row, int mi_col,
   2580                              BLOCK_SIZE bsize, int *rate, int64_t *dist,
   2581                              int do_recon, PC_TREE *pc_tree) {
   2582   VP9_COMMON *const cm = &cpi->common;
   2583   TileInfo *const tile_info = &tile_data->tile_info;
   2584   MACROBLOCK *const x = &td->mb;
   2585   MACROBLOCKD *const xd = &x->e_mbd;
   2586   const int mis = cm->mi_stride;
   2587   const int bsl = b_width_log2_lookup[bsize];
   2588   const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2;
   2589   const int bss = (1 << bsl) / 4;
   2590   int i, pl;
   2591   PARTITION_TYPE partition = PARTITION_NONE;
   2592   BLOCK_SIZE subsize;
   2593   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
   2594   PARTITION_CONTEXT sl[8], sa[8];
   2595   RD_COST last_part_rdc, none_rdc, chosen_rdc;
   2596   BLOCK_SIZE sub_subsize = BLOCK_4X4;
   2597   int splits_below = 0;
   2598   BLOCK_SIZE bs_type = mi_8x8[0]->sb_type;
   2599   int do_partition_search = 1;
   2600   PICK_MODE_CONTEXT *ctx = &pc_tree->none;
   2601 
   2602   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
   2603 
   2604   assert(num_4x4_blocks_wide_lookup[bsize] ==
   2605          num_4x4_blocks_high_lookup[bsize]);
   2606 
   2607   vp9_rd_cost_reset(&last_part_rdc);
   2608   vp9_rd_cost_reset(&none_rdc);
   2609   vp9_rd_cost_reset(&chosen_rdc);
   2610 
   2611   partition = partition_lookup[bsl][bs_type];
   2612   subsize = get_subsize(bsize, partition);
   2613 
   2614   pc_tree->partitioning = partition;
   2615   save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
   2616 
   2617   if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ) {
   2618     set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
   2619     x->mb_energy = vp9_block_energy(cpi, x, bsize);
   2620   }
   2621 
   2622   if (do_partition_search &&
   2623       cpi->sf.partition_search_type == SEARCH_PARTITION &&
   2624       cpi->sf.adjust_partitioning_from_last_frame) {
   2625     // Check if any of the sub blocks are further split.
   2626     if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
   2627       sub_subsize = get_subsize(subsize, PARTITION_SPLIT);
   2628       splits_below = 1;
   2629       for (i = 0; i < 4; i++) {
   2630         int jj = i >> 1, ii = i & 0x01;
   2631         MODE_INFO *this_mi = mi_8x8[jj * bss * mis + ii * bss];
   2632         if (this_mi && this_mi->sb_type >= sub_subsize) {
   2633           splits_below = 0;
   2634         }
   2635       }
   2636     }
   2637 
   2638     // If partition is not none try none unless each of the 4 splits are split
   2639     // even further..
   2640     if (partition != PARTITION_NONE && !splits_below &&
   2641         mi_row + (mi_step >> 1) < cm->mi_rows &&
   2642         mi_col + (mi_step >> 1) < cm->mi_cols) {
   2643       pc_tree->partitioning = PARTITION_NONE;
   2644       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize, ctx,
   2645                        INT64_MAX);
   2646 
   2647       pl = partition_plane_context(xd, mi_row, mi_col, bsize);
   2648 
   2649       if (none_rdc.rate < INT_MAX) {
   2650         none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
   2651         none_rdc.rdcost =
   2652             RDCOST(x->rdmult, x->rddiv, none_rdc.rate, none_rdc.dist);
   2653       }
   2654 
   2655       restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
   2656       mi_8x8[0]->sb_type = bs_type;
   2657       pc_tree->partitioning = partition;
   2658     }
   2659   }
   2660 
   2661   switch (partition) {
   2662     case PARTITION_NONE:
   2663       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, bsize,
   2664                        ctx, INT64_MAX);
   2665       break;
   2666     case PARTITION_HORZ:
   2667       pc_tree->horizontal[0].skip_ref_frame_mask = 0;
   2668       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
   2669                        subsize, &pc_tree->horizontal[0], INT64_MAX);
   2670       if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
   2671           mi_row + (mi_step >> 1) < cm->mi_rows) {
   2672         RD_COST tmp_rdc;
   2673         PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
   2674         vp9_rd_cost_init(&tmp_rdc);
   2675         update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
   2676         encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
   2677         pc_tree->horizontal[1].skip_ref_frame_mask = 0;
   2678         rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col,
   2679                          &tmp_rdc, subsize, &pc_tree->horizontal[1], INT64_MAX);
   2680         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
   2681           vp9_rd_cost_reset(&last_part_rdc);
   2682           break;
   2683         }
   2684         last_part_rdc.rate += tmp_rdc.rate;
   2685         last_part_rdc.dist += tmp_rdc.dist;
   2686         last_part_rdc.rdcost += tmp_rdc.rdcost;
   2687       }
   2688       break;
   2689     case PARTITION_VERT:
   2690       pc_tree->vertical[0].skip_ref_frame_mask = 0;
   2691       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
   2692                        subsize, &pc_tree->vertical[0], INT64_MAX);
   2693       if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
   2694           mi_col + (mi_step >> 1) < cm->mi_cols) {
   2695         RD_COST tmp_rdc;
   2696         PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0];
   2697         vp9_rd_cost_init(&tmp_rdc);
   2698         update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
   2699         encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
   2700         pc_tree->vertical[bsize > BLOCK_8X8].skip_ref_frame_mask = 0;
   2701         rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1),
   2702                          &tmp_rdc, subsize,
   2703                          &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX);
   2704         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
   2705           vp9_rd_cost_reset(&last_part_rdc);
   2706           break;
   2707         }
   2708         last_part_rdc.rate += tmp_rdc.rate;
   2709         last_part_rdc.dist += tmp_rdc.dist;
   2710         last_part_rdc.rdcost += tmp_rdc.rdcost;
   2711       }
   2712       break;
   2713     default:
   2714       assert(partition == PARTITION_SPLIT);
   2715       if (bsize == BLOCK_8X8) {
   2716         rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
   2717                          subsize, pc_tree->leaf_split[0], INT64_MAX);
   2718         break;
   2719       }
   2720       last_part_rdc.rate = 0;
   2721       last_part_rdc.dist = 0;
   2722       last_part_rdc.rdcost = 0;
   2723       for (i = 0; i < 4; i++) {
   2724         int x_idx = (i & 1) * (mi_step >> 1);
   2725         int y_idx = (i >> 1) * (mi_step >> 1);
   2726         int jj = i >> 1, ii = i & 0x01;
   2727         RD_COST tmp_rdc;
   2728         if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
   2729           continue;
   2730 
   2731         vp9_rd_cost_init(&tmp_rdc);
   2732         rd_use_partition(cpi, td, tile_data, mi_8x8 + jj * bss * mis + ii * bss,
   2733                          tp, mi_row + y_idx, mi_col + x_idx, subsize,
   2734                          &tmp_rdc.rate, &tmp_rdc.dist, i != 3,
   2735                          pc_tree->split[i]);
   2736         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
   2737           vp9_rd_cost_reset(&last_part_rdc);
   2738           break;
   2739         }
   2740         last_part_rdc.rate += tmp_rdc.rate;
   2741         last_part_rdc.dist += tmp_rdc.dist;
   2742       }
   2743       break;
   2744   }
   2745 
   2746   pl = partition_plane_context(xd, mi_row, mi_col, bsize);
   2747   if (last_part_rdc.rate < INT_MAX) {
   2748     last_part_rdc.rate += cpi->partition_cost[pl][partition];
   2749     last_part_rdc.rdcost =
   2750         RDCOST(x->rdmult, x->rddiv, last_part_rdc.rate, last_part_rdc.dist);
   2751   }
   2752 
   2753   if (do_partition_search && cpi->sf.adjust_partitioning_from_last_frame &&
   2754       cpi->sf.partition_search_type == SEARCH_PARTITION &&
   2755       partition != PARTITION_SPLIT && bsize > BLOCK_8X8 &&
   2756       (mi_row + mi_step < cm->mi_rows ||
   2757        mi_row + (mi_step >> 1) == cm->mi_rows) &&
   2758       (mi_col + mi_step < cm->mi_cols ||
   2759        mi_col + (mi_step >> 1) == cm->mi_cols)) {
   2760     BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
   2761     chosen_rdc.rate = 0;
   2762     chosen_rdc.dist = 0;
   2763     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
   2764     pc_tree->partitioning = PARTITION_SPLIT;
   2765 
   2766     // Split partition.
   2767     for (i = 0; i < 4; i++) {
   2768       int x_idx = (i & 1) * (mi_step >> 1);
   2769       int y_idx = (i >> 1) * (mi_step >> 1);
   2770       RD_COST tmp_rdc;
   2771       ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
   2772       PARTITION_CONTEXT sl[8], sa[8];
   2773 
   2774       if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
   2775         continue;
   2776 
   2777       save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
   2778       pc_tree->split[i]->partitioning = PARTITION_NONE;
   2779       rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
   2780                        &tmp_rdc, split_subsize, &pc_tree->split[i]->none,
   2781                        INT64_MAX);
   2782 
   2783       restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
   2784 
   2785       if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
   2786         vp9_rd_cost_reset(&chosen_rdc);
   2787         break;
   2788       }
   2789 
   2790       chosen_rdc.rate += tmp_rdc.rate;
   2791       chosen_rdc.dist += tmp_rdc.dist;
   2792 
   2793       if (i != 3)
   2794         encode_sb(cpi, td, tile_info, tp, mi_row + y_idx, mi_col + x_idx, 0,
   2795                   split_subsize, pc_tree->split[i]);
   2796 
   2797       pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx,
   2798                                    split_subsize);
   2799       chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
   2800     }
   2801     pl = partition_plane_context(xd, mi_row, mi_col, bsize);
   2802     if (chosen_rdc.rate < INT_MAX) {
   2803       chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
   2804       chosen_rdc.rdcost =
   2805           RDCOST(x->rdmult, x->rddiv, chosen_rdc.rate, chosen_rdc.dist);
   2806     }
   2807   }
   2808 
   2809   // If last_part is better set the partitioning to that.
   2810   if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
   2811     mi_8x8[0]->sb_type = bsize;
   2812     if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition;
   2813     chosen_rdc = last_part_rdc;
   2814   }
   2815   // If none was better set the partitioning to that.
   2816   if (none_rdc.rdcost < chosen_rdc.rdcost) {
   2817     if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
   2818     chosen_rdc = none_rdc;
   2819   }
   2820 
   2821   restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
   2822 
   2823   // We must have chosen a partitioning and encoding or we'll fail later on.
   2824   // No other opportunities for success.
   2825   if (bsize == BLOCK_64X64)
   2826     assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
   2827 
   2828   if (do_recon) {
   2829     int output_enabled = (bsize == BLOCK_64X64);
   2830     encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
   2831               pc_tree);
   2832   }
   2833 
   2834   *rate = chosen_rdc.rate;
   2835   *dist = chosen_rdc.dist;
   2836 }
   2837 
   2838 static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
   2839   BLOCK_4X4,   BLOCK_4X4,   BLOCK_4X4,  BLOCK_4X4, BLOCK_4X4,
   2840   BLOCK_4X4,   BLOCK_8X8,   BLOCK_8X8,  BLOCK_8X8, BLOCK_16X16,
   2841   BLOCK_16X16, BLOCK_16X16, BLOCK_16X16
   2842 };
   2843 
   2844 static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = {
   2845   BLOCK_8X8,   BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32,
   2846   BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64,
   2847   BLOCK_64X64, BLOCK_64X64, BLOCK_64X64
   2848 };
   2849 
   2850 // Look at all the mode_info entries for blocks that are part of this
   2851 // partition and find the min and max values for sb_type.
   2852 // At the moment this is designed to work on a 64x64 SB but could be
   2853 // adjusted to use a size parameter.
   2854 //
   2855 // The min and max are assumed to have been initialized prior to calling this
   2856 // function so repeat calls can accumulate a min and max of more than one sb64.
   2857 static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8,
   2858                                         BLOCK_SIZE *min_block_size,
   2859                                         BLOCK_SIZE *max_block_size,
   2860                                         int bs_hist[BLOCK_SIZES]) {
   2861   int sb_width_in_blocks = MI_BLOCK_SIZE;
   2862   int sb_height_in_blocks = MI_BLOCK_SIZE;
   2863   int i, j;
   2864   int index = 0;
   2865 
   2866   // Check the sb_type for each block that belongs to this region.
   2867   for (i = 0; i < sb_height_in_blocks; ++i) {
   2868     for (j = 0; j < sb_width_in_blocks; ++j) {
   2869       MODE_INFO *mi = mi_8x8[index + j];
   2870       BLOCK_SIZE sb_type = mi ? mi->sb_type : 0;
   2871       bs_hist[sb_type]++;
   2872       *min_block_size = VPXMIN(*min_block_size, sb_type);
   2873       *max_block_size = VPXMAX(*max_block_size, sb_type);
   2874     }
   2875     index += xd->mi_stride;
   2876   }
   2877 }
   2878 
   2879 // Next square block size less or equal than current block size.
   2880 static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = {
   2881   BLOCK_4X4,   BLOCK_4X4,   BLOCK_4X4,   BLOCK_8X8,   BLOCK_8X8,
   2882   BLOCK_8X8,   BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32,
   2883   BLOCK_32X32, BLOCK_32X32, BLOCK_64X64
   2884 };
   2885 
   2886 // Look at neighboring blocks and set a min and max partition size based on
   2887 // what they chose.
   2888 static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
   2889                                     MACROBLOCKD *const xd, int mi_row,
   2890                                     int mi_col, BLOCK_SIZE *min_block_size,
   2891                                     BLOCK_SIZE *max_block_size) {
   2892   VP9_COMMON *const cm = &cpi->common;
   2893   MODE_INFO **mi = xd->mi;
   2894   const int left_in_image = !!xd->left_mi;
   2895   const int above_in_image = !!xd->above_mi;
   2896   const int row8x8_remaining = tile->mi_row_end - mi_row;
   2897   const int col8x8_remaining = tile->mi_col_end - mi_col;
   2898   int bh, bw;
   2899   BLOCK_SIZE min_size = BLOCK_4X4;
   2900   BLOCK_SIZE max_size = BLOCK_64X64;
   2901   int bs_hist[BLOCK_SIZES] = { 0 };
   2902 
   2903   // Trap case where we do not have a prediction.
   2904   if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) {
   2905     // Default "min to max" and "max to min"
   2906     min_size = BLOCK_64X64;
   2907     max_size = BLOCK_4X4;
   2908 
   2909     // NOTE: each call to get_sb_partition_size_range() uses the previous
   2910     // passed in values for min and max as a starting point.
   2911     // Find the min and max partition used in previous frame at this location
   2912     if (cm->frame_type != KEY_FRAME) {
   2913       MODE_INFO **prev_mi =
   2914           &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col];
   2915       get_sb_partition_size_range(xd, prev_mi, &min_size, &max_size, bs_hist);
   2916     }
   2917     // Find the min and max partition sizes used in the left SB64
   2918     if (left_in_image) {
   2919       MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE];
   2920       get_sb_partition_size_range(xd, left_sb64_mi, &min_size, &max_size,
   2921                                   bs_hist);
   2922     }
   2923     // Find the min and max partition sizes used in the above SB64.
   2924     if (above_in_image) {
   2925       MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE];
   2926       get_sb_partition_size_range(xd, above_sb64_mi, &min_size, &max_size,
   2927                                   bs_hist);
   2928     }
   2929 
   2930     // Adjust observed min and max for "relaxed" auto partition case.
   2931     if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
   2932       min_size = min_partition_size[min_size];
   2933       max_size = max_partition_size[max_size];
   2934     }
   2935   }
   2936 
   2937   // Check border cases where max and min from neighbors may not be legal.
   2938   max_size = find_partition_size(max_size, row8x8_remaining, col8x8_remaining,
   2939                                  &bh, &bw);
   2940   // Test for blocks at the edge of the active image.
   2941   // This may be the actual edge of the image or where there are formatting
   2942   // bars.
   2943   if (vp9_active_edge_sb(cpi, mi_row, mi_col)) {
   2944     min_size = BLOCK_4X4;
   2945   } else {
   2946     min_size =
   2947         VPXMIN(cpi->sf.rd_auto_partition_min_limit, VPXMIN(min_size, max_size));
   2948   }
   2949 
   2950   // When use_square_partition_only is true, make sure at least one square
   2951   // partition is allowed by selecting the next smaller square size as
   2952   // *min_block_size.
   2953   if (cpi->sf.use_square_partition_only &&
   2954       next_square_size[max_size] < min_size) {
   2955     min_size = next_square_size[max_size];
   2956   }
   2957 
   2958   *min_block_size = min_size;
   2959   *max_block_size = max_size;
   2960 }
   2961 
   2962 // TODO(jingning) refactor functions setting partition search range
   2963 static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, int mi_row,
   2964                                 int mi_col, BLOCK_SIZE bsize,
   2965                                 BLOCK_SIZE *min_bs, BLOCK_SIZE *max_bs) {
   2966   int mi_width = num_8x8_blocks_wide_lookup[bsize];
   2967   int mi_height = num_8x8_blocks_high_lookup[bsize];
   2968   int idx, idy;
   2969 
   2970   MODE_INFO *mi;
   2971   const int idx_str = cm->mi_stride * mi_row + mi_col;
   2972   MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str];
   2973   BLOCK_SIZE bs, min_size, max_size;
   2974 
   2975   min_size = BLOCK_64X64;
   2976   max_size = BLOCK_4X4;
   2977 
   2978   if (prev_mi) {
   2979     for (idy = 0; idy < mi_height; ++idy) {
   2980       for (idx = 0; idx < mi_width; ++idx) {
   2981         mi = prev_mi[idy * cm->mi_stride + idx];
   2982         bs = mi ? mi->sb_type : bsize;
   2983         min_size = VPXMIN(min_size, bs);
   2984         max_size = VPXMAX(max_size, bs);
   2985       }
   2986     }
   2987   }
   2988 
   2989   if (xd->left_mi) {
   2990     for (idy = 0; idy < mi_height; ++idy) {
   2991       mi = xd->mi[idy * cm->mi_stride - 1];
   2992       bs = mi ? mi->sb_type : bsize;
   2993       min_size = VPXMIN(min_size, bs);
   2994       max_size = VPXMAX(max_size, bs);
   2995     }
   2996   }
   2997 
   2998   if (xd->above_mi) {
   2999     for (idx = 0; idx < mi_width; ++idx) {
   3000       mi = xd->mi[idx - cm->mi_stride];
   3001       bs = mi ? mi->sb_type : bsize;
   3002       min_size = VPXMIN(min_size, bs);
   3003       max_size = VPXMAX(max_size, bs);
   3004     }
   3005   }
   3006 
   3007   if (min_size == max_size) {
   3008     min_size = min_partition_size[min_size];
   3009     max_size = max_partition_size[max_size];
   3010   }
   3011 
   3012   *min_bs = min_size;
   3013   *max_bs = max_size;
   3014 }
   3015 
   3016 static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
   3017   memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
   3018 }
   3019 
   3020 static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
   3021   memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
   3022 }
   3023 
   3024 #if CONFIG_FP_MB_STATS
   3025 const int num_16x16_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1,
   3026                                                         1, 2, 2, 2, 4, 4 };
   3027 const int num_16x16_blocks_high_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1,
   3028                                                         2, 1, 2, 4, 2, 4 };
   3029 const int qindex_skip_threshold_lookup[BLOCK_SIZES] = {
   3030   0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120
   3031 };
   3032 const int qindex_split_threshold_lookup[BLOCK_SIZES] = {
   3033   0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120
   3034 };
   3035 const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = {
   3036   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6
   3037 };
   3038 
   3039 typedef enum {
   3040   MV_ZERO = 0,
   3041   MV_LEFT = 1,
   3042   MV_UP = 2,
   3043   MV_RIGHT = 3,
   3044   MV_DOWN = 4,
   3045   MV_INVALID
   3046 } MOTION_DIRECTION;
   3047 
   3048 static INLINE MOTION_DIRECTION get_motion_direction_fp(uint8_t fp_byte) {
   3049   if (fp_byte & FPMB_MOTION_ZERO_MASK) {
   3050     return MV_ZERO;
   3051   } else if (fp_byte & FPMB_MOTION_LEFT_MASK) {
   3052     return MV_LEFT;
   3053   } else if (fp_byte & FPMB_MOTION_RIGHT_MASK) {
   3054     return MV_RIGHT;
   3055   } else if (fp_byte & FPMB_MOTION_UP_MASK) {
   3056     return MV_UP;
   3057   } else {
   3058     return MV_DOWN;
   3059   }
   3060 }
   3061 
   3062 static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
   3063                                            MOTION_DIRECTION that_mv) {
   3064   if (this_mv == that_mv) {
   3065     return 0;
   3066   } else {
   3067     return abs(this_mv - that_mv) == 2 ? 2 : 1;
   3068   }
   3069 }
   3070 #endif
   3071 
   3072 // Calculate prediction based on the given input features and neural net config.
   3073 // Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden
   3074 // layer.
   3075 static void nn_predict(const float *features, const NN_CONFIG *nn_config,
   3076                        float *output) {
   3077   int num_input_nodes = nn_config->num_inputs;
   3078   int buf_index = 0;
   3079   float buf[2][NN_MAX_NODES_PER_LAYER];
   3080   const float *input_nodes = features;
   3081 
   3082   // Propagate hidden layers.
   3083   const int num_layers = nn_config->num_hidden_layers;
   3084   int layer, node, i;
   3085   assert(num_layers <= NN_MAX_HIDDEN_LAYERS);
   3086   for (layer = 0; layer < num_layers; ++layer) {
   3087     const float *weights = nn_config->weights[layer];
   3088     const float *bias = nn_config->bias[layer];
   3089     float *output_nodes = buf[buf_index];
   3090     const int num_output_nodes = nn_config->num_hidden_nodes[layer];
   3091     assert(num_output_nodes < NN_MAX_NODES_PER_LAYER);
   3092     for (node = 0; node < num_output_nodes; ++node) {
   3093       float val = 0.0f;
   3094       for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i];
   3095       val += bias[node];
   3096       // ReLU as activation function.
   3097       val = VPXMAX(val, 0.0f);
   3098       output_nodes[node] = val;
   3099       weights += num_input_nodes;
   3100     }
   3101     num_input_nodes = num_output_nodes;
   3102     input_nodes = output_nodes;
   3103     buf_index = 1 - buf_index;
   3104   }
   3105 
   3106   // Final output layer.
   3107   {
   3108     const float *weights = nn_config->weights[num_layers];
   3109     for (node = 0; node < nn_config->num_outputs; ++node) {
   3110       const float *bias = nn_config->bias[num_layers];
   3111       float val = 0.0f;
   3112       for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i];
   3113       output[node] = val + bias[node];
   3114       weights += num_input_nodes;
   3115     }
   3116   }
   3117 }
   3118 
   3119 #define FEATURES 7
   3120 // Machine-learning based partition search early termination.
   3121 // Return 1 to skip split and rect partitions.
   3122 static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,
   3123                                 PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
   3124                                 BLOCK_SIZE bsize) {
   3125   const int mag_mv =
   3126       abs(ctx->mic.mv[0].as_mv.col) + abs(ctx->mic.mv[0].as_mv.row);
   3127   const int left_in_image = !!xd->left_mi;
   3128   const int above_in_image = !!xd->above_mi;
   3129   MODE_INFO **prev_mi =
   3130       &cm->prev_mi_grid_visible[mi_col + cm->mi_stride * mi_row];
   3131   int above_par = 0;  // above_partitioning
   3132   int left_par = 0;   // left_partitioning
   3133   int last_par = 0;   // last_partitioning
   3134   int offset = 0;
   3135   int i;
   3136   BLOCK_SIZE context_size;
   3137   const NN_CONFIG *nn_config = NULL;
   3138   const float *mean, *sd, *linear_weights;
   3139   float nn_score, linear_score;
   3140   float features[FEATURES];
   3141 
   3142   assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]);
   3143   vpx_clear_system_state();
   3144 
   3145   switch (bsize) {
   3146     case BLOCK_64X64:
   3147       offset = 0;
   3148       nn_config = &vp9_partition_nnconfig_64x64;
   3149       break;
   3150     case BLOCK_32X32:
   3151       offset = 8;
   3152       nn_config = &vp9_partition_nnconfig_32x32;
   3153       break;
   3154     case BLOCK_16X16:
   3155       offset = 16;
   3156       nn_config = &vp9_partition_nnconfig_16x16;
   3157       break;
   3158     default: assert(0 && "Unexpected block size."); return 0;
   3159   }
   3160 
   3161   if (above_in_image) {
   3162     context_size = xd->above_mi->sb_type;
   3163     if (context_size < bsize)
   3164       above_par = 2;
   3165     else if (context_size == bsize)
   3166       above_par = 1;
   3167   }
   3168 
   3169   if (left_in_image) {
   3170     context_size = xd->left_mi->sb_type;
   3171     if (context_size < bsize)
   3172       left_par = 2;
   3173     else if (context_size == bsize)
   3174       left_par = 1;
   3175   }
   3176 
   3177   if (prev_mi) {
   3178     context_size = prev_mi[0]->sb_type;
   3179     if (context_size < bsize)
   3180       last_par = 2;
   3181     else if (context_size == bsize)
   3182       last_par = 1;
   3183   }
   3184 
   3185   mean = &vp9_partition_feature_mean[offset];
   3186   sd = &vp9_partition_feature_std[offset];
   3187   features[0] = ((float)ctx->rate - mean[0]) / sd[0];
   3188   features[1] = ((float)ctx->dist - mean[1]) / sd[1];
   3189   features[2] = ((float)mag_mv / 2 - mean[2]) * sd[2];
   3190   features[3] = ((float)(left_par + above_par) / 2 - mean[3]) * sd[3];
   3191   features[4] = ((float)ctx->sum_y_eobs - mean[4]) / sd[4];
   3192   features[5] = ((float)cm->base_qindex - mean[5]) * sd[5];
   3193   features[6] = ((float)last_par - mean[6]) * sd[6];
   3194 
   3195   // Predict using linear model.
   3196   linear_weights = &vp9_partition_linear_weights[offset];
   3197   linear_score = linear_weights[FEATURES];
   3198   for (i = 0; i < FEATURES; ++i)
   3199     linear_score += linear_weights[i] * features[i];
   3200   if (linear_score > 0.1f) return 0;
   3201 
   3202   // Predict using neural net model.
   3203   nn_predict(features, nn_config, &nn_score);
   3204 
   3205   if (linear_score < -0.0f && nn_score < 0.1f) return 1;
   3206   if (nn_score < -0.0f && linear_score < 0.1f) return 1;
   3207   return 0;
   3208 }
   3209 #undef FEATURES
   3210 
   3211 #define FEATURES 4
   3212 // ML-based partition search breakout.
   3213 static int ml_predict_breakout(VP9_COMP *const cpi, BLOCK_SIZE bsize,
   3214                                const MACROBLOCK *const x,
   3215                                const RD_COST *const rd_cost) {
   3216   DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = { 0 };
   3217   const VP9_COMMON *const cm = &cpi->common;
   3218   float features[FEATURES];
   3219   const float *linear_weights = NULL;  // Linear model weights.
   3220   float linear_score = 0.0f;
   3221   const int qindex = cm->base_qindex;
   3222   const int q_ctx = qindex >= 200 ? 0 : (qindex >= 150 ? 1 : 2);
   3223   const int is_720p_or_larger = VPXMIN(cm->width, cm->height) >= 720;
   3224   const int resolution_ctx = is_720p_or_larger ? 1 : 0;
   3225 
   3226   switch (bsize) {
   3227     case BLOCK_64X64:
   3228       linear_weights = vp9_partition_breakout_weights_64[resolution_ctx][q_ctx];
   3229       break;
   3230     case BLOCK_32X32:
   3231       linear_weights = vp9_partition_breakout_weights_32[resolution_ctx][q_ctx];
   3232       break;
   3233     case BLOCK_16X16:
   3234       linear_weights = vp9_partition_breakout_weights_16[resolution_ctx][q_ctx];
   3235       break;
   3236     case BLOCK_8X8:
   3237       linear_weights = vp9_partition_breakout_weights_8[resolution_ctx][q_ctx];
   3238       break;
   3239     default: assert(0 && "Unexpected block size."); return 0;
   3240   }
   3241   if (!linear_weights) return 0;
   3242 
   3243   {  // Generate feature values.
   3244 #if CONFIG_VP9_HIGHBITDEPTH
   3245     const int ac_q =
   3246         vp9_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8);
   3247 #else
   3248     const int ac_q = vp9_ac_quant(qindex, 0, cm->bit_depth);
   3249 #endif  // CONFIG_VP9_HIGHBITDEPTH
   3250     const int num_pels_log2 = num_pels_log2_lookup[bsize];
   3251     int feature_index = 0;
   3252     unsigned int var, sse;
   3253     float rate_f, dist_f;
   3254 
   3255 #if CONFIG_VP9_HIGHBITDEPTH
   3256     if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
   3257       var =
   3258           vp9_high_get_sby_variance(cpi, &x->plane[0].src, bsize, x->e_mbd.bd);
   3259     } else {
   3260       var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride,
   3261                                   vp9_64_zeros, 0, &sse);
   3262     }
   3263 #else
   3264     var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride,
   3265                                 vp9_64_zeros, 0, &sse);
   3266 #endif
   3267     var = var >> num_pels_log2;
   3268 
   3269     vpx_clear_system_state();
   3270 
   3271     rate_f = (float)VPXMIN(rd_cost->rate, INT_MAX);
   3272     dist_f = (float)(VPXMIN(rd_cost->dist, INT_MAX) >> num_pels_log2);
   3273     rate_f =
   3274         ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) *
   3275         rate_f;
   3276 
   3277     features[feature_index++] = rate_f;
   3278     features[feature_index++] = dist_f;
   3279     features[feature_index++] = (float)var;
   3280     features[feature_index++] = (float)ac_q;
   3281     assert(feature_index == FEATURES);
   3282   }
   3283 
   3284   {  // Calculate the output score.
   3285     int i;
   3286     linear_score = linear_weights[FEATURES];
   3287     for (i = 0; i < FEATURES; ++i)
   3288       linear_score += linear_weights[i] * features[i];
   3289   }
   3290 
   3291   return linear_score >= cpi->sf.ml_partition_search_breakout_thresh[q_ctx];
   3292 }
   3293 #undef FEATURES
   3294 
   3295 #define FEATURES 17
   3296 #define LABELS 4
   3297 static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x,
   3298                                     BLOCK_SIZE bsize,
   3299                                     const PC_TREE *const pc_tree,
   3300                                     int *allow_horz, int *allow_vert,
   3301                                     int64_t ref_rd, int mi_row, int mi_col) {
   3302   const NN_CONFIG *nn_config = NULL;
   3303   float score[LABELS] = {
   3304     0.0f,
   3305   };
   3306   int thresh = -1;
   3307   int i;
   3308 
   3309   if (ref_rd <= 0 || ref_rd > 1000000000) return;
   3310 
   3311   switch (bsize) {
   3312     case BLOCK_8X8: break;
   3313     case BLOCK_16X16:
   3314       nn_config = &vp9_rect_part_nnconfig_16;
   3315       thresh = cpi->sf.ml_prune_rect_partition_threhold[1];
   3316       break;
   3317     case BLOCK_32X32:
   3318       nn_config = &vp9_rect_part_nnconfig_32;
   3319       thresh = cpi->sf.ml_prune_rect_partition_threhold[2];
   3320       break;
   3321     case BLOCK_64X64:
   3322       nn_config = &vp9_rect_part_nnconfig_64;
   3323       thresh = cpi->sf.ml_prune_rect_partition_threhold[3];
   3324       break;
   3325     default: assert(0 && "Unexpected block size."); return;
   3326   }
   3327   if (!nn_config || thresh < 0) return;
   3328 
   3329   // Feature extraction and model score calculation.
   3330   {
   3331     const int64_t none_rdcost = pc_tree->none.rdcost;
   3332     const VP9_COMMON *const cm = &cpi->common;
   3333 #if CONFIG_VP9_HIGHBITDEPTH
   3334     const int dc_q =
   3335         vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8);
   3336 #else
   3337     const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
   3338 #endif  // CONFIG_VP9_HIGHBITDEPTH
   3339     int feature_index = 0;
   3340     unsigned int block_var = 0;
   3341     unsigned int sub_block_var[4] = { 0 };
   3342     float features[FEATURES];
   3343 
   3344     features[feature_index++] =
   3345         (float)(pc_tree->partitioning == PARTITION_NONE);
   3346     features[feature_index++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f);
   3347 
   3348     // Calculate source pixel variance.
   3349     {
   3350       struct buf_2d buf;
   3351       const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
   3352       const int bs = 4 * num_4x4_blocks_wide_lookup[bsize];
   3353       const MACROBLOCKD *const xd = &x->e_mbd;
   3354       vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
   3355 
   3356       (void)xd;
   3357 #if CONFIG_VP9_HIGHBITDEPTH
   3358       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
   3359         block_var = vp9_high_get_sby_perpixel_variance(cpi, &x->plane[0].src,
   3360                                                        bsize, xd->bd);
   3361       } else {
   3362         block_var = vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
   3363       }
   3364 #else
   3365       block_var = vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
   3366 #endif  // CONFIG_VP9_HIGHBITDEPTH
   3367 
   3368       buf.stride = x->plane[0].src.stride;
   3369       for (i = 0; i < 4; ++i) {
   3370         const int x_idx = (i & 1) * bs / 2;
   3371         const int y_idx = (i >> 1) * bs / 2;
   3372         buf.buf = x->plane[0].src.buf + x_idx + y_idx * buf.stride;
   3373 #if CONFIG_VP9_HIGHBITDEPTH
   3374         if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
   3375           sub_block_var[i] =
   3376               vp9_high_get_sby_perpixel_variance(cpi, &buf, subsize, xd->bd);
   3377         } else {
   3378           sub_block_var[i] = vp9_get_sby_perpixel_variance(cpi, &buf, subsize);
   3379         }
   3380 #else
   3381         sub_block_var[i] = vp9_get_sby_perpixel_variance(cpi, &buf, subsize);
   3382 #endif  // CONFIG_VP9_HIGHBITDEPTH
   3383       }
   3384     }
   3385 
   3386     features[feature_index++] = logf((float)block_var + 1.0f);
   3387     features[feature_index++] = logf((float)ref_rd + 1.0f);
   3388     features[feature_index++] = (none_rdcost > 0 && none_rdcost < 1000000000)
   3389                                     ? (float)pc_tree->none.skippable
   3390                                     : 0.0f;
   3391 
   3392     for (i = 0; i < 4; ++i) {
   3393       const int64_t this_rd = pc_tree->split[i]->none.rdcost;
   3394       const int rd_valid = this_rd > 0 && this_rd < 1000000000;
   3395       // Ratio between sub-block RD and whole block RD.
   3396       features[feature_index++] =
   3397           rd_valid ? ((float)this_rd / (float)ref_rd) : 1.0f;
   3398       // Sub-block skippable.
   3399       features[feature_index++] =
   3400           rd_valid ? ((float)pc_tree->split[i]->none.skippable) : 0.0f;
   3401     }
   3402 
   3403     {
   3404       const float denom = (float)(block_var + 1);
   3405       const float low_b = 0.1f;
   3406       const float high_b = 10.0f;
   3407       for (i = 0; i < 4; ++i) {
   3408         // Ratio between the quarter sub-block variance and the
   3409         // whole-block variance.
   3410         float var_ratio = (float)(sub_block_var[i] + 1) / denom;
   3411         if (var_ratio < low_b) var_ratio = low_b;
   3412         if (var_ratio > high_b) var_ratio = high_b;
   3413         features[feature_index++] = var_ratio;
   3414       }
   3415     }
   3416     assert(feature_index == FEATURES);
   3417     nn_predict(features, nn_config, score);
   3418   }
   3419 
   3420   // Make decisions based on the model score.
   3421   {
   3422     int max_score = -1000;
   3423     int horz = 0, vert = 0;
   3424     int int_score[LABELS];
   3425     for (i = 0; i < LABELS; ++i) {
   3426       int_score[i] = (int)(100 * score[i]);
   3427       max_score = VPXMAX(int_score[i], max_score);
   3428     }
   3429     thresh = max_score - thresh;
   3430     for (i = 0; i < LABELS; ++i) {
   3431       if (int_score[i] >= thresh) {
   3432         if ((i >> 0) & 1) horz = 1;
   3433         if ((i >> 1) & 1) vert = 1;
   3434       }
   3435     }
   3436     *allow_horz = *allow_horz && horz;
   3437     *allow_vert = *allow_vert && vert;
   3438   }
   3439 }
   3440 #undef FEATURES
   3441 #undef LABELS
   3442 
   3443 // Use a neural net model to prune partition-none and partition-split search.
   3444 // The model uses prediction residue variance and quantization step size as
   3445 // input features.
   3446 #define FEATURES 6
   3447 static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
   3448                                           BLOCK_SIZE bsize, int mi_row,
   3449                                           int mi_col, int *none, int *split) {
   3450   VP9_COMMON *const cm = &cpi->common;
   3451   MACROBLOCKD *xd = &x->e_mbd;
   3452   MODE_INFO *mi = xd->mi[0];
   3453   const NN_CONFIG *nn_config = NULL;
   3454 #if CONFIG_VP9_HIGHBITDEPTH
   3455   DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64 * 2]);
   3456   uint8_t *const pred_buf = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
   3457                                 ? (CONVERT_TO_BYTEPTR(pred_buffer))
   3458                                 : pred_buffer;
   3459 #else
   3460   DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64]);
   3461   uint8_t *const pred_buf = pred_buffer;
   3462 #endif  // CONFIG_VP9_HIGHBITDEPTH
   3463   const int speed = cpi->oxcf.speed;
   3464   int i;
   3465   float thresh = 0.0f;
   3466 
   3467   switch (bsize) {
   3468     case BLOCK_64X64:
   3469       nn_config = &vp9_var_rd_part_nnconfig_64;
   3470       thresh = speed > 0 ? 3.5f : 3.0f;
   3471       break;
   3472     case BLOCK_32X32:
   3473       nn_config = &vp9_var_rd_part_nnconfig_32;
   3474       thresh = speed > 0 ? 3.5f : 3.0f;
   3475       break;
   3476     case BLOCK_16X16:
   3477       nn_config = &vp9_var_rd_part_nnconfig_16;
   3478       thresh = speed > 0 ? 3.5f : 4.0f;
   3479       break;
   3480     case BLOCK_8X8:
   3481       nn_config = &vp9_var_rd_part_nnconfig_8;
   3482       if (cm->width >= 720 && cm->height >= 720)
   3483         thresh = speed > 0 ? 2.5f : 2.0f;
   3484       else
   3485         thresh = speed > 0 ? 3.5f : 2.0f;
   3486       break;
   3487     default: assert(0 && "Unexpected block size."); return;
   3488   }
   3489 
   3490   if (!nn_config) return;
   3491 
   3492   mi->ref_frame[1] = NONE;
   3493   mi->sb_type = bsize;
   3494   // Do a simple single motion search to find a prediction for current block.
   3495   // The variance of the residue will be used as input features.
   3496   {
   3497     const MV_REFERENCE_FRAME ref =
   3498         cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME;
   3499     YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref);
   3500     MV ref_mv = { 0, 0 };
   3501     MV ref_mv_full = { 0, 0 };
   3502     const int step_param = 1;
   3503     const MvLimits tmp_mv_limits = x->mv_limits;
   3504     const SEARCH_METHODS search_method = NSTEP;
   3505     const int sadpb = x->sadperbit16;
   3506     MV best_mv = { 0, 0 };
   3507     int cost_list[5];
   3508 
   3509     assert(yv12 != NULL);
   3510     if (!yv12) return;
   3511     vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
   3512                          &cm->frame_refs[ref - 1].sf);
   3513     mi->ref_frame[0] = ref;
   3514     vp9_set_mv_search_range(&x->mv_limits, &ref_mv);
   3515     vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param,
   3516                           search_method, sadpb, cond_cost_list(cpi, cost_list),
   3517                           &ref_mv, &best_mv, 0, 0);
   3518     best_mv.row *= 8;
   3519     best_mv.col *= 8;
   3520     x->mv_limits = tmp_mv_limits;
   3521     mi->mv[0].as_mv = best_mv;
   3522 
   3523     set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
   3524     xd->plane[0].dst.buf = pred_buf;
   3525     xd->plane[0].dst.stride = 64;
   3526     vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
   3527   }
   3528 
   3529   vpx_clear_system_state();
   3530 
   3531   {
   3532     float features[FEATURES] = { 0.0f };
   3533 #if CONFIG_VP9_HIGHBITDEPTH
   3534     const int dc_q =
   3535         vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (xd->bd - 8);
   3536 #else
   3537     const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
   3538 #endif  // CONFIG_VP9_HIGHBITDEPTH
   3539     int feature_idx = 0;
   3540     float score;
   3541 
   3542     // Generate model input features.
   3543     features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f);
   3544     vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
   3545     // Get the variance of the residue as input features.
   3546     {
   3547       const int bs = 4 * num_4x4_blocks_wide_lookup[bsize];
   3548       const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
   3549       const uint8_t *pred = pred_buf;
   3550       const uint8_t *src = x->plane[0].src.buf;
   3551       const int src_stride = x->plane[0].src.stride;
   3552       const int pred_stride = 64;
   3553       unsigned int sse;
   3554       // Variance of whole block.
   3555       const unsigned int var =
   3556           cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse);
   3557       const float factor = (var == 0) ? 1.0f : (1.0f / (float)var);
   3558 
   3559       features[feature_idx++] = logf((float)var + 1.0f);
   3560       for (i = 0; i < 4; ++i) {
   3561         const int x_idx = (i & 1) * bs / 2;
   3562         const int y_idx = (i >> 1) * bs / 2;
   3563         const int src_offset = y_idx * src_stride + x_idx;
   3564         const int pred_offset = y_idx * pred_stride + x_idx;
   3565         // Variance of quarter block.
   3566         const unsigned int sub_var =
   3567             cpi->fn_ptr[subsize].vf(src + src_offset, src_stride,
   3568                                     pred + pred_offset, pred_stride, &sse);
   3569         const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var;
   3570         features[feature_idx++] = var_ratio;
   3571       }
   3572     }
   3573     assert(feature_idx == FEATURES);
   3574 
   3575     // Feed the features into the model to get the confidence score.
   3576     nn_predict(features, nn_config, &score);
   3577 
   3578     // Higher score means that the model has higher confidence that the split
   3579     // partition is better than the non-split partition. So if the score is
   3580     // high enough, we skip the none-split partition search; if the score is
   3581     // low enough, we skip the split partition search.
   3582     if (score > thresh) *none = 0;
   3583     if (score < -thresh) *split = 0;
   3584   }
   3585 }
   3586 #undef FEATURES
   3587 #undef LABELS
   3588 
   3589 static int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
   3590                             int mi_col, int orig_rdmult) {
   3591   const int gf_group_index = cpi->twopass.gf_group.index;
   3592   TplDepFrame *tpl_frame = &cpi->tpl_stats[gf_group_index];
   3593   TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
   3594   int tpl_stride = tpl_frame->stride;
   3595   int64_t intra_cost = 0;
   3596   int64_t mc_dep_cost = 0;
   3597   int mi_wide = num_8x8_blocks_wide_lookup[bsize];
   3598   int mi_high = num_8x8_blocks_high_lookup[bsize];
   3599   int row, col;
   3600 
   3601   int dr = 0;
   3602   int count = 0;
   3603   double r0, rk, beta;
   3604 
   3605   if (tpl_frame->is_valid == 0) return orig_rdmult;
   3606 
   3607   if (cpi->twopass.gf_group.layer_depth[gf_group_index] > 1) return orig_rdmult;
   3608 
   3609   if (gf_group_index >= MAX_ARF_GOP_SIZE) return orig_rdmult;
   3610 
   3611   for (row = mi_row; row < mi_row + mi_high; ++row) {
   3612     for (col = mi_col; col < mi_col + mi_wide; ++col) {
   3613       TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
   3614 
   3615       if (row >= cpi->common.mi_rows || col >= cpi->common.mi_cols) continue;
   3616 
   3617       intra_cost += this_stats->intra_cost;
   3618       mc_dep_cost += this_stats->mc_dep_cost;
   3619 
   3620       ++count;
   3621     }
   3622   }
   3623 
   3624   vpx_clear_system_state();
   3625 
   3626   r0 = cpi->rd.r0;
   3627   rk = (double)intra_cost / mc_dep_cost;
   3628   beta = r0 / rk;
   3629   dr = vp9_get_adaptive_rdmult(cpi, beta);
   3630 
   3631   dr = VPXMIN(dr, orig_rdmult * 3 / 2);
   3632   dr = VPXMAX(dr, orig_rdmult * 1 / 2);
   3633 
   3634   dr = VPXMAX(1, dr);
   3635 
   3636   return dr;
   3637 }
   3638 
   3639 // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
   3640 // unlikely to be selected depending on previous rate-distortion optimization
   3641 // results, for encoding speed-up.
   3642 static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
   3643                               TileDataEnc *tile_data, TOKENEXTRA **tp,
   3644                               int mi_row, int mi_col, BLOCK_SIZE bsize,
   3645                               RD_COST *rd_cost, int64_t best_rd,
   3646                               PC_TREE *pc_tree) {
   3647   VP9_COMMON *const cm = &cpi->common;
   3648   TileInfo *const tile_info = &tile_data->tile_info;
   3649   MACROBLOCK *const x = &td->mb;
   3650   MACROBLOCKD *const xd = &x->e_mbd;
   3651   const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
   3652   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
   3653   PARTITION_CONTEXT sl[8], sa[8];
   3654   TOKENEXTRA *tp_orig = *tp;
   3655   PICK_MODE_CONTEXT *const ctx = &pc_tree->none;
   3656   int i;
   3657   const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
   3658   BLOCK_SIZE subsize;
   3659   RD_COST this_rdc, sum_rdc, best_rdc;
   3660   int do_split = bsize >= BLOCK_8X8;
   3661   int do_rect = 1;
   3662   INTERP_FILTER pred_interp_filter;
   3663 
   3664   // Override skipping rectangular partition operations for edge blocks
   3665   const int force_horz_split = (mi_row + mi_step >= cm->mi_rows);
   3666   const int force_vert_split = (mi_col + mi_step >= cm->mi_cols);
   3667   const int xss = x->e_mbd.plane[1].subsampling_x;
   3668   const int yss = x->e_mbd.plane[1].subsampling_y;
   3669 
   3670   BLOCK_SIZE min_size = x->min_partition_size;
   3671   BLOCK_SIZE max_size = x->max_partition_size;
   3672 
   3673 #if CONFIG_FP_MB_STATS
   3674   unsigned int src_diff_var = UINT_MAX;
   3675   int none_complexity = 0;
   3676 #endif
   3677 
   3678   int partition_none_allowed = !force_horz_split && !force_vert_split;
   3679   int partition_horz_allowed =
   3680       !force_vert_split && yss <= xss && bsize >= BLOCK_8X8;
   3681   int partition_vert_allowed =
   3682       !force_horz_split && xss <= yss && bsize >= BLOCK_8X8;
   3683 
   3684   int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_thr.dist;
   3685   int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate;
   3686   int must_split = 0;
   3687   int partition_mul = cpi->sf.enable_tpl_model && cpi->oxcf.aq_mode == NO_AQ
   3688                           ? x->cb_rdmult
   3689                           : cpi->rd.RDMULT;
   3690   // Ref frames picked in the [i_th] quarter subblock during square partition
   3691   // RD search. It may be used to prune ref frame selection of rect partitions.
   3692   uint8_t ref_frames_used[4] = { 0, 0, 0, 0 };
   3693 
   3694   (void)*tp_orig;
   3695 
   3696   assert(num_8x8_blocks_wide_lookup[bsize] ==
   3697          num_8x8_blocks_high_lookup[bsize]);
   3698 
   3699   dist_breakout_thr >>=
   3700       8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
   3701 
   3702   rate_breakout_thr *= num_pels_log2_lookup[bsize];
   3703 
   3704   vp9_rd_cost_init(&this_rdc);
   3705   vp9_rd_cost_init(&sum_rdc);
   3706   vp9_rd_cost_reset(&best_rdc);
   3707   best_rdc.rdcost = best_rd;
   3708 
   3709   set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
   3710 
   3711   if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ &&
   3712       cpi->oxcf.aq_mode != LOOKAHEAD_AQ)
   3713     x->mb_energy = vp9_block_energy(cpi, x, bsize);
   3714 
   3715   if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) {
   3716     int cb_partition_search_ctrl =
   3717         ((pc_tree->index == 0 || pc_tree->index == 3) +
   3718          get_chessboard_index(cm->current_video_frame)) &
   3719         0x1;
   3720 
   3721     if (cb_partition_search_ctrl && bsize > min_size && bsize < max_size)
   3722       set_partition_range(cm, xd, mi_row, mi_col, bsize, &min_size, &max_size);
   3723   }
   3724 
   3725   // Get sub block energy range
   3726   if (bsize >= BLOCK_16X16) {
   3727     int min_energy, max_energy;
   3728     vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy,
   3729                              &max_energy);
   3730     must_split = (min_energy < -3) && (max_energy - min_energy > 2);
   3731   }
   3732 
   3733   // Determine partition types in search according to the speed features.
   3734   // The threshold set here has to be of square block size.
   3735   if (cpi->sf.auto_min_max_partition_size) {
   3736     partition_none_allowed &= (bsize <= max_size);
   3737     partition_horz_allowed &=
   3738         ((bsize <= max_size && bsize > min_size) || force_horz_split);
   3739     partition_vert_allowed &=
   3740         ((bsize <= max_size && bsize > min_size) || force_vert_split);
   3741     do_split &= bsize > min_size;
   3742   }
   3743 
   3744   if (cpi->sf.use_square_partition_only &&
   3745       (bsize > cpi->sf.use_square_only_thresh_high ||
   3746        bsize < cpi->sf.use_square_only_thresh_low)) {
   3747     if (cpi->use_svc) {
   3748       if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless)
   3749         partition_horz_allowed &= force_horz_split;
   3750       if (!vp9_active_v_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless)
   3751         partition_vert_allowed &= force_vert_split;
   3752     } else {
   3753       partition_horz_allowed &= force_horz_split;
   3754       partition_vert_allowed &= force_vert_split;
   3755     }
   3756   }
   3757 
   3758   save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
   3759 
   3760 #if CONFIG_FP_MB_STATS
   3761   if (cpi->use_fp_mb_stats) {
   3762     set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
   3763     src_diff_var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, mi_row,
   3764                                                   mi_col, bsize);
   3765   }
   3766 #endif
   3767 
   3768 #if CONFIG_FP_MB_STATS
   3769   // Decide whether we shall split directly and skip searching NONE by using
   3770   // the first pass block statistics
   3771   if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_split &&
   3772       partition_none_allowed && src_diff_var > 4 &&
   3773       cm->base_qindex < qindex_split_threshold_lookup[bsize]) {
   3774     int mb_row = mi_row >> 1;
   3775     int mb_col = mi_col >> 1;
   3776     int mb_row_end =
   3777         VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
   3778     int mb_col_end =
   3779         VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
   3780     int r, c;
   3781 
   3782     // compute a complexity measure, basically measure inconsistency of motion
   3783     // vectors obtained from the first pass in the current block
   3784     for (r = mb_row; r < mb_row_end; r++) {
   3785       for (c = mb_col; c < mb_col_end; c++) {
   3786         const int mb_index = r * cm->mb_cols + c;
   3787 
   3788         MOTION_DIRECTION this_mv;
   3789         MOTION_DIRECTION right_mv;
   3790         MOTION_DIRECTION bottom_mv;
   3791 
   3792         this_mv =
   3793             get_motion_direction_fp(cpi->twopass.this_frame_mb_stats[mb_index]);
   3794 
   3795         // to its right
   3796         if (c != mb_col_end - 1) {
   3797           right_mv = get_motion_direction_fp(
   3798               cpi->twopass.this_frame_mb_stats[mb_index + 1]);
   3799           none_complexity += get_motion_inconsistency(this_mv, right_mv);
   3800         }
   3801 
   3802         // to its bottom
   3803         if (r != mb_row_end - 1) {
   3804           bottom_mv = get_motion_direction_fp(
   3805               cpi->twopass.this_frame_mb_stats[mb_index + cm->mb_cols]);
   3806           none_complexity += get_motion_inconsistency(this_mv, bottom_mv);
   3807         }
   3808 
   3809         // do not count its left and top neighbors to avoid double counting
   3810       }
   3811     }
   3812 
   3813     if (none_complexity > complexity_16x16_blocks_threshold[bsize]) {
   3814       partition_none_allowed = 0;
   3815     }
   3816   }
   3817 #endif
   3818 
   3819   pc_tree->partitioning = PARTITION_NONE;
   3820 
   3821   if (cpi->sf.ml_var_partition_pruning) {
   3822     const int do_ml_var_partition_pruning =
   3823         !frame_is_intra_only(cm) && partition_none_allowed && do_split &&
   3824         mi_row + num_8x8_blocks_high_lookup[bsize] <= cm->mi_rows &&
   3825         mi_col + num_8x8_blocks_wide_lookup[bsize] <= cm->mi_cols;
   3826     if (do_ml_var_partition_pruning) {
   3827       ml_predict_var_rd_paritioning(cpi, x, bsize, mi_row, mi_col,
   3828                                     &partition_none_allowed, &do_split);
   3829     }
   3830   }
   3831 
   3832   // PARTITION_NONE
   3833   if (partition_none_allowed) {
   3834     rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx,
   3835                      best_rdc.rdcost);
   3836     ctx->rdcost = this_rdc.rdcost;
   3837     if (this_rdc.rate != INT_MAX) {
   3838       if (cpi->sf.prune_ref_frame_for_rect_partitions) {
   3839         const int ref1 = ctx->mic.ref_frame[0];
   3840         const int ref2 = ctx->mic.ref_frame[1];
   3841         for (i = 0; i < 4; ++i) {
   3842           ref_frames_used[i] |= (1 << ref1);
   3843           if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
   3844         }
   3845       }
   3846       if (bsize >= BLOCK_8X8) {
   3847         this_rdc.rdcost += RDCOST(partition_mul, x->rddiv,
   3848                                   cpi->partition_cost[pl][PARTITION_NONE], 0);
   3849         this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
   3850       }
   3851 
   3852       if (this_rdc.rdcost < best_rdc.rdcost) {
   3853         MODE_INFO *mi = xd->mi[0];
   3854 
   3855         best_rdc = this_rdc;
   3856         if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
   3857 
   3858         if (cpi->sf.ml_partition_search_early_termination) {
   3859           // Currently, the machine-learning based partition search early
   3860           // termination is only used while bsize is 16x16, 32x32 or 64x64,
   3861           // VPXMIN(cm->width, cm->height) >= 480, and speed = 0.
   3862           if (!x->e_mbd.lossless &&
   3863               !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP) &&
   3864               ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) {
   3865             if (ml_pruning_partition(cm, xd, ctx, mi_row, mi_col, bsize)) {
   3866               do_split = 0;
   3867               do_rect = 0;
   3868             }
   3869           }
   3870         }
   3871 
   3872         if ((do_split || do_rect) && !x->e_mbd.lossless && ctx->skippable) {
   3873           const int use_ml_based_breakout =
   3874               cpi->sf.use_ml_partition_search_breakout &&
   3875               cm->base_qindex >= 100;
   3876           if (use_ml_based_breakout) {
   3877             if (ml_predict_breakout(cpi, bsize, x, &this_rdc)) {
   3878               do_split = 0;
   3879               do_rect = 0;
   3880             }
   3881           } else {
   3882             if (!cpi->sf.ml_partition_search_early_termination) {
   3883               if ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
   3884                   (best_rdc.dist < dist_breakout_thr &&
   3885                    best_rdc.rate < rate_breakout_thr)) {
   3886                 do_split = 0;
   3887                 do_rect = 0;
   3888               }
   3889             }
   3890           }
   3891         }
   3892 
   3893 #if CONFIG_FP_MB_STATS
   3894         // Check if every 16x16 first pass block statistics has zero
   3895         // motion and the corresponding first pass residue is small enough.
   3896         // If that is the case, check the difference variance between the
   3897         // current frame and the last frame. If the variance is small enough,
   3898         // stop further splitting in RD optimization
   3899         if (cpi->use_fp_mb_stats && do_split != 0 &&
   3900             cm->base_qindex > qindex_skip_threshold_lookup[bsize]) {
   3901           int mb_row = mi_row >> 1;
   3902           int mb_col = mi_col >> 1;
   3903           int mb_row_end =
   3904               VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
   3905           int mb_col_end =
   3906               VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
   3907           int r, c;
   3908 
   3909           int skip = 1;
   3910           for (r = mb_row; r < mb_row_end; r++) {
   3911             for (c = mb_col; c < mb_col_end; c++) {
   3912               const int mb_index = r * cm->mb_cols + c;
   3913               if (!(cpi->twopass.this_frame_mb_stats[mb_index] &
   3914                     FPMB_MOTION_ZERO_MASK) ||
   3915                   !(cpi->twopass.this_frame_mb_stats[mb_index] &
   3916                     FPMB_ERROR_SMALL_MASK)) {
   3917                 skip = 0;
   3918                 break;
   3919               }
   3920             }
   3921             if (skip == 0) {
   3922               break;
   3923             }
   3924           }
   3925 
   3926           if (skip) {
   3927             if (src_diff_var == UINT_MAX) {
   3928               set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
   3929               src_diff_var = get_sby_perpixel_diff_variance(
   3930                   cpi, &x->plane[0].src, mi_row, mi_col, bsize);
   3931             }
   3932             if (src_diff_var < 8) {
   3933               do_split = 0;
   3934               do_rect = 0;
   3935             }
   3936           }
   3937         }
   3938 #endif
   3939       }
   3940     }
   3941     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
   3942   } else {
   3943     vp9_zero(ctx->pred_mv);
   3944     ctx->mic.interp_filter = EIGHTTAP;
   3945   }
   3946 
   3947   // store estimated motion vector
   3948   store_pred_mv(x, ctx);
   3949 
   3950   // If the interp_filter is marked as SWITCHABLE_FILTERS, it was for an
   3951   // intra block and used for context purposes.
   3952   if (ctx->mic.interp_filter == SWITCHABLE_FILTERS) {
   3953     pred_interp_filter = EIGHTTAP;
   3954   } else {
   3955     pred_interp_filter = ctx->mic.interp_filter;
   3956   }
   3957 
   3958   // PARTITION_SPLIT
   3959   // TODO(jingning): use the motion vectors given by the above search as
   3960   // the starting point of motion search in the following partition type check.
   3961   pc_tree->split[0]->none.rdcost = 0;
   3962   pc_tree->split[1]->none.rdcost = 0;
   3963   pc_tree->split[2]->none.rdcost = 0;
   3964   pc_tree->split[3]->none.rdcost = 0;
   3965   if (do_split || must_split) {
   3966     subsize = get_subsize(bsize, PARTITION_SPLIT);
   3967     load_pred_mv(x, ctx);
   3968     if (bsize == BLOCK_8X8) {
   3969       i = 4;
   3970       if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
   3971         pc_tree->leaf_split[0]->pred_interp_filter = pred_interp_filter;
   3972       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
   3973                        pc_tree->leaf_split[0], best_rdc.rdcost);
   3974       if (sum_rdc.rate == INT_MAX) {
   3975         sum_rdc.rdcost = INT64_MAX;
   3976       } else {
   3977         if (cpi->sf.prune_ref_frame_for_rect_partitions) {
   3978           const int ref1 = pc_tree->leaf_split[0]->mic.ref_frame[0];
   3979           const int ref2 = pc_tree->leaf_split[0]->mic.ref_frame[1];
   3980           for (i = 0; i < 4; ++i) {
   3981             ref_frames_used[i] |= (1 << ref1);
   3982             if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
   3983           }
   3984         }
   3985       }
   3986     } else {
   3987       for (i = 0; (i < 4) && ((sum_rdc.rdcost < best_rdc.rdcost) || must_split);
   3988            ++i) {
   3989         const int x_idx = (i & 1) * mi_step;
   3990         const int y_idx = (i >> 1) * mi_step;
   3991 
   3992         if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
   3993           continue;
   3994 
   3995         pc_tree->split[i]->index = i;
   3996         if (cpi->sf.prune_ref_frame_for_rect_partitions)
   3997           pc_tree->split[i]->none.rate = INT_MAX;
   3998         rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
   3999                           mi_col + x_idx, subsize, &this_rdc,
   4000                           // A must split test here increases the number of sub
   4001                           // partitions but hurts metrics results quite a bit,
   4002                           // so this extra test is commented out pending
   4003                           // further tests on whether it adds much in terms of
   4004                           // visual quality.
   4005                           // (must_split) ? best_rdc.rdcost
   4006                           //              : best_rdc.rdcost - sum_rdc.rdcost,
   4007                           best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
   4008 
   4009         if (this_rdc.rate == INT_MAX) {
   4010           sum_rdc.rdcost = INT64_MAX;
   4011           break;
   4012         } else {
   4013           if (cpi->sf.prune_ref_frame_for_rect_partitions &&
   4014               pc_tree->split[i]->none.rate != INT_MAX) {
   4015             const int ref1 = pc_tree->split[i]->none.mic.ref_frame[0];
   4016             const int ref2 = pc_tree->split[i]->none.mic.ref_frame[1];
   4017             ref_frames_used[i] |= (1 << ref1);
   4018             if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
   4019           }
   4020           sum_rdc.rate += this_rdc.rate;
   4021           sum_rdc.dist += this_rdc.dist;
   4022           sum_rdc.rdcost += this_rdc.rdcost;
   4023         }
   4024       }
   4025     }
   4026 
   4027     if (((sum_rdc.rdcost < best_rdc.rdcost) || must_split) && i == 4) {
   4028       sum_rdc.rdcost += RDCOST(partition_mul, x->rddiv,
   4029                                cpi->partition_cost[pl][PARTITION_SPLIT], 0);
   4030       sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
   4031 
   4032       if ((sum_rdc.rdcost < best_rdc.rdcost) ||
   4033           (must_split && (sum_rdc.dist < best_rdc.dist))) {
   4034         best_rdc = sum_rdc;
   4035         pc_tree->partitioning = PARTITION_SPLIT;
   4036 
   4037         // Rate and distortion based partition search termination clause.
   4038         if (!cpi->sf.ml_partition_search_early_termination &&
   4039             !x->e_mbd.lossless &&
   4040             ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
   4041              (best_rdc.dist < dist_breakout_thr &&
   4042               best_rdc.rate < rate_breakout_thr))) {
   4043           do_rect = 0;
   4044         }
   4045       }
   4046     } else {
   4047       // skip rectangular partition test when larger block size
   4048       // gives better rd cost
   4049       if (cpi->sf.less_rectangular_check &&
   4050           (bsize > cpi->sf.use_square_only_thresh_high ||
   4051            best_rdc.dist < dist_breakout_thr))
   4052         do_rect &= !partition_none_allowed;
   4053     }
   4054     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
   4055   }
   4056 
   4057   pc_tree->horizontal[0].skip_ref_frame_mask = 0;
   4058   pc_tree->horizontal[1].skip_ref_frame_mask = 0;
   4059   pc_tree->vertical[0].skip_ref_frame_mask = 0;
   4060   pc_tree->vertical[1].skip_ref_frame_mask = 0;
   4061   if (cpi->sf.prune_ref_frame_for_rect_partitions) {
   4062     uint8_t used_frames;
   4063     used_frames = ref_frames_used[0] | ref_frames_used[1];
   4064     if (used_frames) pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames;
   4065     used_frames = ref_frames_used[2] | ref_frames_used[3];
   4066     if (used_frames) pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames;
   4067     used_frames = ref_frames_used[0] | ref_frames_used[2];
   4068     if (used_frames) pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames;
   4069     used_frames = ref_frames_used[1] | ref_frames_used[3];
   4070     if (used_frames) pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames;
   4071   }
   4072 
   4073   {
   4074     const int do_ml_rect_partition_pruning =
   4075         !frame_is_intra_only(cm) && !force_horz_split && !force_vert_split &&
   4076         (partition_horz_allowed || partition_vert_allowed) && bsize > BLOCK_8X8;
   4077     if (do_ml_rect_partition_pruning) {
   4078       ml_prune_rect_partition(cpi, x, bsize, pc_tree, &partition_horz_allowed,
   4079                               &partition_vert_allowed, best_rdc.rdcost, mi_row,
   4080                               mi_col);
   4081     }
   4082   }
   4083 
   4084   // PARTITION_HORZ
   4085   if (partition_horz_allowed &&
   4086       (do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) {
   4087     const int part_mode_rate = cpi->partition_cost[pl][PARTITION_HORZ];
   4088     const int64_t part_mode_rdcost =
   4089         RDCOST(partition_mul, x->rddiv, part_mode_rate, 0);
   4090     subsize = get_subsize(bsize, PARTITION_HORZ);
   4091     load_pred_mv(x, ctx);
   4092     if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
   4093         partition_none_allowed)
   4094       pc_tree->horizontal[0].pred_interp_filter = pred_interp_filter;
   4095     rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
   4096                      &pc_tree->horizontal[0],
   4097                      best_rdc.rdcost - part_mode_rdcost);
   4098     if (sum_rdc.rdcost < INT64_MAX) {
   4099       sum_rdc.rdcost += part_mode_rdcost;
   4100       sum_rdc.rate += part_mode_rate;
   4101     }
   4102 
   4103     if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows &&
   4104         bsize > BLOCK_8X8) {
   4105       PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
   4106       update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
   4107       encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
   4108       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
   4109           partition_none_allowed)
   4110         pc_tree->horizontal[1].pred_interp_filter = pred_interp_filter;
   4111       rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
   4112                        subsize, &pc_tree->horizontal[1],
   4113                        best_rdc.rdcost - sum_rdc.rdcost);
   4114       if (this_rdc.rate == INT_MAX) {
   4115         sum_rdc.rdcost = INT64_MAX;
   4116       } else {
   4117         sum_rdc.rate += this_rdc.rate;
   4118         sum_rdc.dist += this_rdc.dist;
   4119         sum_rdc.rdcost += this_rdc.rdcost;
   4120       }
   4121     }
   4122 
   4123     if (sum_rdc.rdcost < best_rdc.rdcost) {
   4124       best_rdc = sum_rdc;
   4125       pc_tree->partitioning = PARTITION_HORZ;
   4126 
   4127       if (cpi->sf.less_rectangular_check &&
   4128           bsize > cpi->sf.use_square_only_thresh_high)
   4129         do_rect = 0;
   4130     }
   4131     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
   4132   }
   4133 
   4134   // PARTITION_VERT
   4135   if (partition_vert_allowed &&
   4136       (do_rect || vp9_active_v_edge(cpi, mi_col, mi_step))) {
   4137     const int part_mode_rate = cpi->partition_cost[pl][PARTITION_VERT];
   4138     const int64_t part_mode_rdcost =
   4139         RDCOST(partition_mul, x->rddiv, part_mode_rate, 0);
   4140     subsize = get_subsize(bsize, PARTITION_VERT);
   4141     load_pred_mv(x, ctx);
   4142     if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
   4143         partition_none_allowed)
   4144       pc_tree->vertical[0].pred_interp_filter = pred_interp_filter;
   4145     rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
   4146                      &pc_tree->vertical[0], best_rdc.rdcost - part_mode_rdcost);
   4147     if (sum_rdc.rdcost < INT64_MAX) {
   4148       sum_rdc.rdcost += part_mode_rdcost;
   4149       sum_rdc.rate += part_mode_rate;
   4150     }
   4151 
   4152     if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols &&
   4153         bsize > BLOCK_8X8) {
   4154       update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0);
   4155       encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize,
   4156                         &pc_tree->vertical[0]);
   4157       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
   4158           partition_none_allowed)
   4159         pc_tree->vertical[1].pred_interp_filter = pred_interp_filter;
   4160       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
   4161                        subsize, &pc_tree->vertical[1],
   4162                        best_rdc.rdcost - sum_rdc.rdcost);
   4163       if (this_rdc.rate == INT_MAX) {
   4164         sum_rdc.rdcost = INT64_MAX;
   4165       } else {
   4166         sum_rdc.rate += this_rdc.rate;
   4167         sum_rdc.dist += this_rdc.dist;
   4168         sum_rdc.rdcost += this_rdc.rdcost;
   4169       }
   4170     }
   4171 
   4172     if (sum_rdc.rdcost < best_rdc.rdcost) {
   4173       best_rdc = sum_rdc;
   4174       pc_tree->partitioning = PARTITION_VERT;
   4175     }
   4176     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
   4177   }
   4178 
   4179   // TODO(jbb): This code added so that we avoid static analysis
   4180   // warning related to the fact that best_rd isn't used after this
   4181   // point.  This code should be refactored so that the duplicate
   4182   // checks occur in some sub function and thus are used...
   4183   (void)best_rd;
   4184   *rd_cost = best_rdc;
   4185 
   4186   if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
   4187       pc_tree->index != 3) {
   4188     int output_enabled = (bsize == BLOCK_64X64);
   4189     encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
   4190               pc_tree);
   4191   }
   4192 
   4193   if (bsize == BLOCK_64X64) {
   4194     assert(tp_orig < *tp);
   4195     assert(best_rdc.rate < INT_MAX);
   4196     assert(best_rdc.dist < INT64_MAX);
   4197   } else {
   4198     assert(tp_orig == *tp);
   4199   }
   4200 }
   4201 
   4202 static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td,
   4203                              TileDataEnc *tile_data, int mi_row,
   4204                              TOKENEXTRA **tp) {
   4205   VP9_COMMON *const cm = &cpi->common;
   4206   TileInfo *const tile_info = &tile_data->tile_info;
   4207   MACROBLOCK *const x = &td->mb;
   4208   MACROBLOCKD *const xd = &x->e_mbd;
   4209   SPEED_FEATURES *const sf = &cpi->sf;
   4210   const int mi_col_start = tile_info->mi_col_start;
   4211   const int mi_col_end = tile_info->mi_col_end;
   4212   int mi_col;
   4213   const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2;
   4214   const int num_sb_cols =
   4215       get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2);
   4216   int sb_col_in_tile;
   4217 
   4218   // Initialize the left context for the new SB row
   4219   memset(&xd->left_context, 0, sizeof(xd->left_context));
   4220   memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
   4221 
   4222   // Code each SB in the row
   4223   for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end;
   4224        mi_col += MI_BLOCK_SIZE, sb_col_in_tile++) {
   4225     const struct segmentation *const seg = &cm->seg;
   4226     int dummy_rate;
   4227     int64_t dummy_dist;
   4228     RD_COST dummy_rdc;
   4229     int i;
   4230     int seg_skip = 0;
   4231 
   4232     const int idx_str = cm->mi_stride * mi_row + mi_col;
   4233     MODE_INFO **mi = cm->mi_grid_visible + idx_str;
   4234 
   4235     (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
   4236                                    sb_col_in_tile);
   4237 
   4238     if (sf->adaptive_pred_interp_filter) {
   4239       for (i = 0; i < 64; ++i) td->leaf_tree[i].pred_interp_filter = SWITCHABLE;
   4240 
   4241       for (i = 0; i < 64; ++i) {
   4242         td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
   4243         td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
   4244         td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
   4245         td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE;
   4246       }
   4247     }
   4248 
   4249     for (i = 0; i < MAX_REF_FRAMES; ++i) {
   4250       x->pred_mv[i].row = INT16_MAX;
   4251       x->pred_mv[i].col = INT16_MAX;
   4252     }
   4253     td->pc_root->index = 0;
   4254 
   4255     if (seg->enabled) {
   4256       const uint8_t *const map =
   4257           seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
   4258       int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
   4259       seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
   4260     }
   4261 
   4262     x->source_variance = UINT_MAX;
   4263     if (sf->partition_search_type == FIXED_PARTITION || seg_skip) {
   4264       const BLOCK_SIZE bsize =
   4265           seg_skip ? BLOCK_64X64 : sf->always_this_block_size;
   4266       set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
   4267       set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
   4268       rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
   4269                        &dummy_rate, &dummy_dist, 1, td->pc_root);
   4270     } else if (cpi->partition_search_skippable_frame) {
   4271       BLOCK_SIZE bsize;
   4272       set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
   4273       bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
   4274       set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
   4275       rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
   4276                        &dummy_rate, &dummy_dist, 1, td->pc_root);
   4277     } else if (sf->partition_search_type == VAR_BASED_PARTITION &&
   4278                cm->frame_type != KEY_FRAME) {
   4279       choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
   4280       rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
   4281                        &dummy_rate, &dummy_dist, 1, td->pc_root);
   4282     } else {
   4283       int orig_rdmult = cpi->rd.RDMULT;
   4284       x->cb_rdmult = orig_rdmult;
   4285       if (cpi->twopass.gf_group.index > 0 && cpi->sf.enable_tpl_model) {
   4286         int dr =
   4287             get_rdmult_delta(cpi, BLOCK_64X64, mi_row, mi_col, orig_rdmult);
   4288         x->cb_rdmult = dr;
   4289       }
   4290 
   4291       // If required set upper and lower partition size limits
   4292       if (sf->auto_min_max_partition_size) {
   4293         set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
   4294         rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col,
   4295                                 &x->min_partition_size, &x->max_partition_size);
   4296       }
   4297       td->pc_root->none.rdcost = 0;
   4298       rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64,
   4299                         &dummy_rdc, INT64_MAX, td->pc_root);
   4300     }
   4301     (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
   4302                                     sb_col_in_tile, num_sb_cols);
   4303   }
   4304 }
   4305 
   4306 static void init_encode_frame_mb_context(VP9_COMP *cpi) {
   4307   MACROBLOCK *const x = &cpi->td.mb;
   4308   VP9_COMMON *const cm = &cpi->common;
   4309   MACROBLOCKD *const xd = &x->e_mbd;
   4310   const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
   4311 
   4312   // Copy data over into macro block data structures.
   4313   vp9_setup_src_planes(x, cpi->Source, 0, 0);
   4314 
   4315   vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
   4316 
   4317   // Note: this memset assumes above_context[0], [1] and [2]
   4318   // are allocated as part of the same buffer.
   4319   memset(xd->above_context[0], 0,
   4320          sizeof(*xd->above_context[0]) * 2 * aligned_mi_cols * MAX_MB_PLANE);
   4321   memset(xd->above_seg_context, 0,
   4322          sizeof(*xd->above_seg_context) * aligned_mi_cols);
   4323 }
   4324 
   4325 static int check_dual_ref_flags(VP9_COMP *cpi) {
   4326   const int ref_flags = cpi->ref_frame_flags;
   4327 
   4328   if (segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) {
   4329     return 0;
   4330   } else {
   4331     return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG) +
   4332             !!(ref_flags & VP9_ALT_FLAG)) >= 2;
   4333   }
   4334 }
   4335 
   4336 static void reset_skip_tx_size(VP9_COMMON *cm, TX_SIZE max_tx_size) {
   4337   int mi_row, mi_col;
   4338   const int mis = cm->mi_stride;
   4339   MODE_INFO **mi_ptr = cm->mi_grid_visible;
   4340 
   4341   for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) {
   4342     for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
   4343       if (mi_ptr[mi_col]->tx_size > max_tx_size)
   4344         mi_ptr[mi_col]->tx_size = max_tx_size;
   4345     }
   4346   }
   4347 }
   4348 
   4349 static MV_REFERENCE_FRAME get_frame_type(const VP9_COMP *cpi) {
   4350   if (frame_is_intra_only(&cpi->common))
   4351     return INTRA_FRAME;
   4352   else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame)
   4353     return ALTREF_FRAME;
   4354   else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
   4355     return GOLDEN_FRAME;
   4356   else
   4357     return LAST_FRAME;
   4358 }
   4359 
   4360 static TX_MODE select_tx_mode(const VP9_COMP *cpi, MACROBLOCKD *const xd) {
   4361   if (xd->lossless) return ONLY_4X4;
   4362   if (cpi->common.frame_type == KEY_FRAME && cpi->sf.use_nonrd_pick_mode)
   4363     return ALLOW_16X16;
   4364   if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
   4365     return ALLOW_32X32;
   4366   else if (cpi->sf.tx_size_search_method == USE_FULL_RD ||
   4367            cpi->sf.tx_size_search_method == USE_TX_8X8)
   4368     return TX_MODE_SELECT;
   4369   else
   4370     return cpi->common.tx_mode;
   4371 }
   4372 
   4373 static void hybrid_intra_mode_search(VP9_COMP *cpi, MACROBLOCK *const x,
   4374                                      RD_COST *rd_cost, BLOCK_SIZE bsize,
   4375                                      PICK_MODE_CONTEXT *ctx) {
   4376   if (!cpi->sf.nonrd_keyframe && bsize < BLOCK_16X16)
   4377     vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
   4378   else
   4379     vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx);
   4380 }
   4381 
   4382 static void hybrid_search_svc_baseiskey(VP9_COMP *cpi, MACROBLOCK *const x,
   4383                                         RD_COST *rd_cost, BLOCK_SIZE bsize,
   4384                                         PICK_MODE_CONTEXT *ctx,
   4385                                         TileDataEnc *tile_data, int mi_row,
   4386                                         int mi_col) {
   4387   if (!cpi->sf.nonrd_keyframe && bsize <= BLOCK_8X8) {
   4388     vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
   4389   } else {
   4390     if (cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF)
   4391       vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx);
   4392     else if (bsize >= BLOCK_8X8)
   4393       vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize,
   4394                           ctx);
   4395     else
   4396       vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx);
   4397   }
   4398 }
   4399 
   4400 static void hybrid_search_scene_change(VP9_COMP *cpi, MACROBLOCK *const x,
   4401                                        RD_COST *rd_cost, BLOCK_SIZE bsize,
   4402                                        PICK_MODE_CONTEXT *ctx,
   4403                                        TileDataEnc *tile_data, int mi_row,
   4404                                        int mi_col) {
   4405   if (!cpi->sf.nonrd_keyframe && bsize <= BLOCK_8X8) {
   4406     vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
   4407   } else {
   4408     vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx);
   4409   }
   4410 }
   4411 
   4412 static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
   4413                                 MACROBLOCK *const x, int mi_row, int mi_col,
   4414                                 RD_COST *rd_cost, BLOCK_SIZE bsize,
   4415                                 PICK_MODE_CONTEXT *ctx) {
   4416   VP9_COMMON *const cm = &cpi->common;
   4417   TileInfo *const tile_info = &tile_data->tile_info;
   4418   MACROBLOCKD *const xd = &x->e_mbd;
   4419   MODE_INFO *mi;
   4420   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
   4421   BLOCK_SIZE bs = VPXMAX(bsize, BLOCK_8X8);  // processing unit block size
   4422   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
   4423   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
   4424   int plane;
   4425 
   4426   set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
   4427 
   4428   set_segment_index(cpi, x, mi_row, mi_col, bsize, 0);
   4429 
   4430   mi = xd->mi[0];
   4431   mi->sb_type = bsize;
   4432 
   4433   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
   4434     struct macroblockd_plane *pd = &xd->plane[plane];
   4435     memcpy(a + num_4x4_blocks_wide * plane, pd->above_context,
   4436            (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x);
   4437     memcpy(l + num_4x4_blocks_high * plane, pd->left_context,
   4438            (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y);
   4439   }
   4440 
   4441   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled)
   4442     if (cyclic_refresh_segment_id_boosted(mi->segment_id))
   4443       x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
   4444 
   4445   if (frame_is_intra_only(cm))
   4446     hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
   4447   else if (cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)
   4448     hybrid_search_svc_baseiskey(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row,
   4449                                 mi_col);
   4450   else if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP))
   4451     set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize);
   4452   else if (bsize >= BLOCK_8X8) {
   4453     if (cpi->rc.hybrid_intra_scene_change)
   4454       hybrid_search_scene_change(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row,
   4455                                  mi_col);
   4456     else
   4457       vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize,
   4458                           ctx);
   4459   } else {
   4460     vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx);
   4461   }
   4462 
   4463   duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
   4464 
   4465   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
   4466     struct macroblockd_plane *pd = &xd->plane[plane];
   4467     memcpy(pd->above_context, a + num_4x4_blocks_wide * plane,
   4468            (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x);
   4469     memcpy(pd->left_context, l + num_4x4_blocks_high * plane,
   4470            (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y);
   4471   }
   4472 
   4473   if (rd_cost->rate == INT_MAX) vp9_rd_cost_reset(rd_cost);
   4474 
   4475   ctx->rate = rd_cost->rate;
   4476   ctx->dist = rd_cost->dist;
   4477 }
   4478 
   4479 static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x, int mi_row,
   4480                               int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree) {
   4481   MACROBLOCKD *xd = &x->e_mbd;
   4482   int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
   4483   PARTITION_TYPE partition = pc_tree->partitioning;
   4484   BLOCK_SIZE subsize = get_subsize(bsize, partition);
   4485 
   4486   assert(bsize >= BLOCK_8X8);
   4487 
   4488   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
   4489 
   4490   switch (partition) {
   4491     case PARTITION_NONE:
   4492       set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
   4493       *(xd->mi[0]) = pc_tree->none.mic;
   4494       *(x->mbmi_ext) = pc_tree->none.mbmi_ext;
   4495       duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
   4496       break;
   4497     case PARTITION_VERT:
   4498       set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
   4499       *(xd->mi[0]) = pc_tree->vertical[0].mic;
   4500       *(x->mbmi_ext) = pc_tree->vertical[0].mbmi_ext;
   4501       duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize);
   4502 
   4503       if (mi_col + hbs < cm->mi_cols) {
   4504         set_mode_info_offsets(cm, x, xd, mi_row, mi_col + hbs);
   4505         *(xd->mi[0]) = pc_tree->vertical[1].mic;
   4506         *(x->mbmi_ext) = pc_tree->vertical[1].mbmi_ext;
   4507         duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, subsize);
   4508       }
   4509       break;
   4510     case PARTITION_HORZ:
   4511       set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
   4512       *(xd->mi[0]) = pc_tree->horizontal[0].mic;
   4513       *(x->mbmi_ext) = pc_tree->horizontal[0].mbmi_ext;
   4514       duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize);
   4515       if (mi_row + hbs < cm->mi_rows) {
   4516         set_mode_info_offsets(cm, x, xd, mi_row + hbs, mi_col);
   4517         *(xd->mi[0]) = pc_tree->horizontal[1].mic;
   4518         *(x->mbmi_ext) = pc_tree->horizontal[1].mbmi_ext;
   4519         duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, subsize);
   4520       }
   4521       break;
   4522     case PARTITION_SPLIT: {
   4523       fill_mode_info_sb(cm, x, mi_row, mi_col, subsize, pc_tree->split[0]);
   4524       fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize,
   4525                         pc_tree->split[1]);
   4526       fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize,
   4527                         pc_tree->split[2]);
   4528       fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize,
   4529                         pc_tree->split[3]);
   4530       break;
   4531     }
   4532     default: break;
   4533   }
   4534 }
   4535 
   4536 // Reset the prediction pixel ready flag recursively.
   4537 static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
   4538   pc_tree->none.pred_pixel_ready = 0;
   4539   pc_tree->horizontal[0].pred_pixel_ready = 0;
   4540   pc_tree->horizontal[1].pred_pixel_ready = 0;
   4541   pc_tree->vertical[0].pred_pixel_ready = 0;
   4542   pc_tree->vertical[1].pred_pixel_ready = 0;
   4543 
   4544   if (bsize > BLOCK_8X8) {
   4545     BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
   4546     int i;
   4547     for (i = 0; i < 4; ++i) pred_pixel_ready_reset(pc_tree->split[i], subsize);
   4548   }
   4549 }
   4550 
   4551 #if CONFIG_ML_VAR_PARTITION
   4552 #define FEATURES 6
   4553 #define LABELS 2
   4554 static int ml_predict_var_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
   4555                                       BLOCK_SIZE bsize, int mi_row,
   4556                                       int mi_col) {
   4557   VP9_COMMON *const cm = &cpi->common;
   4558   const NN_CONFIG *nn_config = NULL;
   4559 
   4560   switch (bsize) {
   4561     case BLOCK_64X64: nn_config = &vp9_var_part_nnconfig_64; break;
   4562     case BLOCK_32X32: nn_config = &vp9_var_part_nnconfig_32; break;
   4563     case BLOCK_16X16: nn_config = &vp9_var_part_nnconfig_16; break;
   4564     case BLOCK_8X8: break;
   4565     default: assert(0 && "Unexpected block size."); return -1;
   4566   }
   4567 
   4568   if (!nn_config) return -1;
   4569 
   4570   vpx_clear_system_state();
   4571 
   4572   {
   4573     const float thresh = cpi->oxcf.speed <= 5 ? 1.25f : 0.0f;
   4574     float features[FEATURES] = { 0.0f };
   4575     const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
   4576     int feature_idx = 0;
   4577     float score[LABELS];
   4578 
   4579     features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f);
   4580     vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
   4581     {
   4582       const int bs = 4 * num_4x4_blocks_wide_lookup[bsize];
   4583       const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
   4584       const int sb_offset_row = 8 * (mi_row & 7);
   4585       const int sb_offset_col = 8 * (mi_col & 7);
   4586       const uint8_t *pred = x->est_pred + sb_offset_row * 64 + sb_offset_col;
   4587       const uint8_t *src = x->plane[0].src.buf;
   4588       const int src_stride = x->plane[0].src.stride;
   4589       const int pred_stride = 64;
   4590       unsigned int sse;
   4591       int i;
   4592       // Variance of whole block.
   4593       const unsigned int var =
   4594           cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse);
   4595       const float factor = (var == 0) ? 1.0f : (1.0f / (float)var);
   4596 
   4597       features[feature_idx++] = logf((float)var + 1.0f);
   4598       for (i = 0; i < 4; ++i) {
   4599         const int x_idx = (i & 1) * bs / 2;
   4600         const int y_idx = (i >> 1) * bs / 2;
   4601         const int src_offset = y_idx * src_stride + x_idx;
   4602         const int pred_offset = y_idx * pred_stride + x_idx;
   4603         // Variance of quarter block.
   4604         const unsigned int sub_var =
   4605             cpi->fn_ptr[subsize].vf(src + src_offset, src_stride,
   4606                                     pred + pred_offset, pred_stride, &sse);
   4607         const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var;
   4608         features[feature_idx++] = var_ratio;
   4609       }
   4610     }
   4611 
   4612     assert(feature_idx == FEATURES);
   4613     nn_predict(features, nn_config, score);
   4614     if (score[0] > thresh) return PARTITION_SPLIT;
   4615     if (score[0] < -thresh) return PARTITION_NONE;
   4616     return -1;
   4617   }
   4618 }
   4619 #undef FEATURES
   4620 #undef LABELS
   4621 #endif  // CONFIG_ML_VAR_PARTITION
   4622 
   4623 static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
   4624                                  TileDataEnc *tile_data, TOKENEXTRA **tp,
   4625                                  int mi_row, int mi_col, BLOCK_SIZE bsize,
   4626                                  RD_COST *rd_cost, int do_recon,
   4627                                  int64_t best_rd, PC_TREE *pc_tree) {
   4628   const SPEED_FEATURES *const sf = &cpi->sf;
   4629   VP9_COMMON *const cm = &cpi->common;
   4630   TileInfo *const tile_info = &tile_data->tile_info;
   4631   MACROBLOCK *const x = &td->mb;
   4632   MACROBLOCKD *const xd = &x->e_mbd;
   4633   const int ms = num_8x8_blocks_wide_lookup[bsize] / 2;
   4634   TOKENEXTRA *tp_orig = *tp;
   4635   PICK_MODE_CONTEXT *ctx = &pc_tree->none;
   4636   int i;
   4637   BLOCK_SIZE subsize = bsize;
   4638   RD_COST this_rdc, sum_rdc, best_rdc;
   4639   int do_split = bsize >= BLOCK_8X8;
   4640   int do_rect = 1;
   4641   // Override skipping rectangular partition operations for edge blocks
   4642   const int force_horz_split = (mi_row + ms >= cm->mi_rows);
   4643   const int force_vert_split = (mi_col + ms >= cm->mi_cols);
   4644   const int xss = x->e_mbd.plane[1].subsampling_x;
   4645   const int yss = x->e_mbd.plane[1].subsampling_y;
   4646 
   4647   int partition_none_allowed = !force_horz_split && !force_vert_split;
   4648   int partition_horz_allowed =
   4649       !force_vert_split && yss <= xss && bsize >= BLOCK_8X8;
   4650   int partition_vert_allowed =
   4651       !force_horz_split && xss <= yss && bsize >= BLOCK_8X8;
   4652 #if CONFIG_ML_VAR_PARTITION
   4653   const int use_ml_based_partitioning =
   4654       sf->partition_search_type == ML_BASED_PARTITION;
   4655 #endif  // CONFIG_ML_VAR_PARTITION
   4656 
   4657   (void)*tp_orig;
   4658 
   4659   // Avoid checking for rectangular partitions for speed >= 6.
   4660   if (cpi->oxcf.speed >= 6) do_rect = 0;
   4661 
   4662   assert(num_8x8_blocks_wide_lookup[bsize] ==
   4663          num_8x8_blocks_high_lookup[bsize]);
   4664 
   4665   vp9_rd_cost_init(&sum_rdc);
   4666   vp9_rd_cost_reset(&best_rdc);
   4667   best_rdc.rdcost = best_rd;
   4668 
   4669   // Determine partition types in search according to the speed features.
   4670   // The threshold set here has to be of square block size.
   4671   if (sf->auto_min_max_partition_size) {
   4672     partition_none_allowed &=
   4673         (bsize <= x->max_partition_size && bsize >= x->min_partition_size);
   4674     partition_horz_allowed &=
   4675         ((bsize <= x->max_partition_size && bsize > x->min_partition_size) ||
   4676          force_horz_split);
   4677     partition_vert_allowed &=
   4678         ((bsize <= x->max_partition_size && bsize > x->min_partition_size) ||
   4679          force_vert_split);
   4680     do_split &= bsize > x->min_partition_size;
   4681   }
   4682   if (sf->use_square_partition_only) {
   4683     partition_horz_allowed &= force_horz_split;
   4684     partition_vert_allowed &= force_vert_split;
   4685   }
   4686 
   4687 #if CONFIG_ML_VAR_PARTITION
   4688   if (use_ml_based_partitioning) {
   4689     if (partition_none_allowed || do_split) do_rect = 0;
   4690     if (partition_none_allowed && do_split) {
   4691       const int ml_predicted_partition =
   4692           ml_predict_var_paritioning(cpi, x, bsize, mi_row, mi_col);
   4693       if (ml_predicted_partition == PARTITION_NONE) do_split = 0;
   4694       if (ml_predicted_partition == PARTITION_SPLIT) partition_none_allowed = 0;
   4695     }
   4696   }
   4697 #endif  // CONFIG_ML_VAR_PARTITION
   4698 
   4699   if (!partition_none_allowed && !do_split) do_rect = 1;
   4700 
   4701   ctx->pred_pixel_ready =
   4702       !(partition_vert_allowed || partition_horz_allowed || do_split);
   4703 
   4704   // PARTITION_NONE
   4705   if (partition_none_allowed) {
   4706     nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize,
   4707                         ctx);
   4708     ctx->mic = *xd->mi[0];
   4709     ctx->mbmi_ext = *x->mbmi_ext;
   4710     ctx->skip_txfm[0] = x->skip_txfm[0];
   4711     ctx->skip = x->skip;
   4712 
   4713     if (this_rdc.rate != INT_MAX) {
   4714       const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
   4715       this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
   4716       this_rdc.rdcost =
   4717           RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
   4718       if (this_rdc.rdcost < best_rdc.rdcost) {
   4719         best_rdc = this_rdc;
   4720         if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
   4721 
   4722 #if CONFIG_ML_VAR_PARTITION
   4723         if (!use_ml_based_partitioning)
   4724 #endif  // CONFIG_ML_VAR_PARTITION
   4725         {
   4726           int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist;
   4727           int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate;
   4728           dist_breakout_thr >>=
   4729               8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
   4730           rate_breakout_thr *= num_pels_log2_lookup[bsize];
   4731           if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr &&
   4732               this_rdc.dist < dist_breakout_thr) {
   4733             do_split = 0;
   4734             do_rect = 0;
   4735           }
   4736         }
   4737       }
   4738     }
   4739   }
   4740 
   4741   // store estimated motion vector
   4742   store_pred_mv(x, ctx);
   4743 
   4744   // PARTITION_SPLIT
   4745   if (do_split) {
   4746     int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
   4747     sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
   4748     sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
   4749     subsize = get_subsize(bsize, PARTITION_SPLIT);
   4750     for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
   4751       const int x_idx = (i & 1) * ms;
   4752       const int y_idx = (i >> 1) * ms;
   4753 
   4754       if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
   4755         continue;
   4756       load_pred_mv(x, ctx);
   4757       nonrd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
   4758                            mi_col + x_idx, subsize, &this_rdc, 0,
   4759                            best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
   4760 
   4761       if (this_rdc.rate == INT_MAX) {
   4762         vp9_rd_cost_reset(&sum_rdc);
   4763       } else {
   4764         sum_rdc.rate += this_rdc.rate;
   4765         sum_rdc.dist += this_rdc.dist;
   4766         sum_rdc.rdcost += this_rdc.rdcost;
   4767       }
   4768     }
   4769 
   4770     if (sum_rdc.rdcost < best_rdc.rdcost) {
   4771       best_rdc = sum_rdc;
   4772       pc_tree->partitioning = PARTITION_SPLIT;
   4773     } else {
   4774       // skip rectangular partition test when larger block size
   4775       // gives better rd cost
   4776       if (sf->less_rectangular_check) do_rect &= !partition_none_allowed;
   4777     }
   4778   }
   4779 
   4780   // PARTITION_HORZ
   4781   if (partition_horz_allowed && do_rect) {
   4782     subsize = get_subsize(bsize, PARTITION_HORZ);
   4783     load_pred_mv(x, ctx);
   4784     pc_tree->horizontal[0].pred_pixel_ready = 1;
   4785     nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
   4786                         &pc_tree->horizontal[0]);
   4787 
   4788     pc_tree->horizontal[0].mic = *xd->mi[0];
   4789     pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
   4790     pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
   4791     pc_tree->horizontal[0].skip = x->skip;
   4792 
   4793     if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + ms < cm->mi_rows) {
   4794       load_pred_mv(x, ctx);
   4795       pc_tree->horizontal[1].pred_pixel_ready = 1;
   4796       nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + ms, mi_col, &this_rdc,
   4797                           subsize, &pc_tree->horizontal[1]);
   4798 
   4799       pc_tree->horizontal[1].mic = *xd->mi[0];
   4800       pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
   4801       pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
   4802       pc_tree->horizontal[1].skip = x->skip;
   4803 
   4804       if (this_rdc.rate == INT_MAX) {
   4805         vp9_rd_cost_reset(&sum_rdc);
   4806       } else {
   4807         int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
   4808         this_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ];
   4809         sum_rdc.rate += this_rdc.rate;
   4810         sum_rdc.dist += this_rdc.dist;
   4811         sum_rdc.rdcost =
   4812             RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
   4813       }
   4814     }
   4815 
   4816     if (sum_rdc.rdcost < best_rdc.rdcost) {
   4817       best_rdc = sum_rdc;
   4818       pc_tree->partitioning = PARTITION_HORZ;
   4819     } else {
   4820       pred_pixel_ready_reset(pc_tree, bsize);
   4821     }
   4822   }
   4823 
   4824   // PARTITION_VERT
   4825   if (partition_vert_allowed && do_rect) {
   4826     subsize = get_subsize(bsize, PARTITION_VERT);
   4827     load_pred_mv(x, ctx);
   4828     pc_tree->vertical[0].pred_pixel_ready = 1;
   4829     nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
   4830                         &pc_tree->vertical[0]);
   4831     pc_tree->vertical[0].mic = *xd->mi[0];
   4832     pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
   4833     pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
   4834     pc_tree->vertical[0].skip = x->skip;
   4835 
   4836     if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + ms < cm->mi_cols) {
   4837       load_pred_mv(x, ctx);
   4838       pc_tree->vertical[1].pred_pixel_ready = 1;
   4839       nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + ms, &this_rdc,
   4840                           subsize, &pc_tree->vertical[1]);
   4841       pc_tree->vertical[1].mic = *xd->mi[0];
   4842       pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
   4843       pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
   4844       pc_tree->vertical[1].skip = x->skip;
   4845 
   4846       if (this_rdc.rate == INT_MAX) {
   4847         vp9_rd_cost_reset(&sum_rdc);
   4848       } else {
   4849         int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
   4850         sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT];
   4851         sum_rdc.rate += this_rdc.rate;
   4852         sum_rdc.dist += this_rdc.dist;
   4853         sum_rdc.rdcost =
   4854             RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
   4855       }
   4856     }
   4857 
   4858     if (sum_rdc.rdcost < best_rdc.rdcost) {
   4859       best_rdc = sum_rdc;
   4860       pc_tree->partitioning = PARTITION_VERT;
   4861     } else {
   4862       pred_pixel_ready_reset(pc_tree, bsize);
   4863     }
   4864   }
   4865 
   4866   *rd_cost = best_rdc;
   4867 
   4868   if (best_rdc.rate == INT_MAX) {
   4869     vp9_rd_cost_reset(rd_cost);
   4870     return;
   4871   }
   4872 
   4873   // update mode info array
   4874   fill_mode_info_sb(cm, x, mi_row, mi_col, bsize, pc_tree);
   4875 
   4876   if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && do_recon) {
   4877     int output_enabled = (bsize == BLOCK_64X64);
   4878     encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
   4879                  pc_tree);
   4880   }
   4881 
   4882   if (bsize == BLOCK_64X64 && do_recon) {
   4883     assert(tp_orig < *tp);
   4884     assert(best_rdc.rate < INT_MAX);
   4885     assert(best_rdc.dist < INT64_MAX);
   4886   } else {
   4887     assert(tp_orig == *tp);
   4888   }
   4889 }
   4890 
   4891 static void nonrd_select_partition(VP9_COMP *cpi, ThreadData *td,
   4892                                    TileDataEnc *tile_data, MODE_INFO **mi,
   4893                                    TOKENEXTRA **tp, int mi_row, int mi_col,
   4894                                    BLOCK_SIZE bsize, int output_enabled,
   4895                                    RD_COST *rd_cost, PC_TREE *pc_tree) {
   4896   VP9_COMMON *const cm = &cpi->common;
   4897   TileInfo *const tile_info = &tile_data->tile_info;
   4898   MACROBLOCK *const x = &td->mb;
   4899   MACROBLOCKD *const xd = &x->e_mbd;
   4900   const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
   4901   const int mis = cm->mi_stride;
   4902   PARTITION_TYPE partition;
   4903   BLOCK_SIZE subsize;
   4904   RD_COST this_rdc;
   4905   BLOCK_SIZE subsize_ref =
   4906       (cpi->sf.adapt_partition_source_sad) ? BLOCK_8X8 : BLOCK_16X16;
   4907 
   4908   vp9_rd_cost_reset(&this_rdc);
   4909   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
   4910 
   4911   subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4;
   4912   partition = partition_lookup[bsl][subsize];
   4913 
   4914   if (bsize == BLOCK_32X32 && subsize == BLOCK_32X32) {
   4915     x->max_partition_size = BLOCK_32X32;
   4916     x->min_partition_size = BLOCK_16X16;
   4917     nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost,
   4918                          0, INT64_MAX, pc_tree);
   4919   } else if (bsize == BLOCK_32X32 && partition != PARTITION_NONE &&
   4920              subsize >= subsize_ref) {
   4921     x->max_partition_size = BLOCK_32X32;
   4922     x->min_partition_size = BLOCK_8X8;
   4923     nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost,
   4924                          0, INT64_MAX, pc_tree);
   4925   } else if (bsize == BLOCK_16X16 && partition != PARTITION_NONE) {
   4926     x->max_partition_size = BLOCK_16X16;
   4927     x->min_partition_size = BLOCK_8X8;
   4928     nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost,
   4929                          0, INT64_MAX, pc_tree);
   4930   } else {
   4931     switch (partition) {
   4932       case PARTITION_NONE:
   4933         pc_tree->none.pred_pixel_ready = 1;
   4934         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize,
   4935                             &pc_tree->none);
   4936         pc_tree->none.mic = *xd->mi[0];
   4937         pc_tree->none.mbmi_ext = *x->mbmi_ext;
   4938         pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
   4939         pc_tree->none.skip = x->skip;
   4940         break;
   4941       case PARTITION_VERT:
   4942         pc_tree->vertical[0].pred_pixel_ready = 1;
   4943         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize,
   4944                             &pc_tree->vertical[0]);
   4945         pc_tree->vertical[0].mic = *xd->mi[0];
   4946         pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
   4947         pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
   4948         pc_tree->vertical[0].skip = x->skip;
   4949         if (mi_col + hbs < cm->mi_cols) {
   4950           pc_tree->vertical[1].pred_pixel_ready = 1;
   4951           nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs,
   4952                               &this_rdc, subsize, &pc_tree->vertical[1]);
   4953           pc_tree->vertical[1].mic = *xd->mi[0];
   4954           pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
   4955           pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
   4956           pc_tree->vertical[1].skip = x->skip;
   4957           if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
   4958               rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
   4959             rd_cost->rate += this_rdc.rate;
   4960             rd_cost->dist += this_rdc.dist;
   4961           }
   4962         }
   4963         break;
   4964       case PARTITION_HORZ:
   4965         pc_tree->horizontal[0].pred_pixel_ready = 1;
   4966         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize,
   4967                             &pc_tree->horizontal[0]);
   4968         pc_tree->horizontal[0].mic = *xd->mi[0];
   4969         pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
   4970         pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
   4971         pc_tree->horizontal[0].skip = x->skip;
   4972         if (mi_row + hbs < cm->mi_rows) {
   4973           pc_tree->horizontal[1].pred_pixel_ready = 1;
   4974           nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col,
   4975                               &this_rdc, subsize, &pc_tree->horizontal[1]);
   4976           pc_tree->horizontal[1].mic = *xd->mi[0];
   4977           pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
   4978           pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
   4979           pc_tree->horizontal[1].skip = x->skip;
   4980           if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
   4981               rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
   4982             rd_cost->rate += this_rdc.rate;
   4983             rd_cost->dist += this_rdc.dist;
   4984           }
   4985         }
   4986         break;
   4987       default:
   4988         assert(partition == PARTITION_SPLIT);
   4989         subsize = get_subsize(bsize, PARTITION_SPLIT);
   4990         nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
   4991                                subsize, output_enabled, rd_cost,
   4992                                pc_tree->split[0]);
   4993         nonrd_select_partition(cpi, td, tile_data, mi + hbs, tp, mi_row,
   4994                                mi_col + hbs, subsize, output_enabled, &this_rdc,
   4995                                pc_tree->split[1]);
   4996         if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
   4997             rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
   4998           rd_cost->rate += this_rdc.rate;
   4999           rd_cost->dist += this_rdc.dist;
   5000         }
   5001         nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis, tp,
   5002                                mi_row + hbs, mi_col, subsize, output_enabled,
   5003                                &this_rdc, pc_tree->split[2]);
   5004         if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
   5005             rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
   5006           rd_cost->rate += this_rdc.rate;
   5007           rd_cost->dist += this_rdc.dist;
   5008         }
   5009         nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp,
   5010                                mi_row + hbs, mi_col + hbs, subsize,
   5011                                output_enabled, &this_rdc, pc_tree->split[3]);
   5012         if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
   5013             rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
   5014           rd_cost->rate += this_rdc.rate;
   5015           rd_cost->dist += this_rdc.dist;
   5016         }
   5017         break;
   5018     }
   5019   }
   5020 
   5021   if (bsize == BLOCK_64X64 && output_enabled)
   5022     encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, 1, bsize, pc_tree);
   5023 }
   5024 
   5025 static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td,
   5026                                 TileDataEnc *tile_data, MODE_INFO **mi,
   5027                                 TOKENEXTRA **tp, int mi_row, int mi_col,
   5028                                 BLOCK_SIZE bsize, int output_enabled,
   5029                                 RD_COST *dummy_cost, PC_TREE *pc_tree) {
   5030   VP9_COMMON *const cm = &cpi->common;
   5031   TileInfo *tile_info = &tile_data->tile_info;
   5032   MACROBLOCK *const x = &td->mb;
   5033   MACROBLOCKD *const xd = &x->e_mbd;
   5034   const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
   5035   const int mis = cm->mi_stride;
   5036   PARTITION_TYPE partition;
   5037   BLOCK_SIZE subsize;
   5038 
   5039   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
   5040 
   5041   subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4;
   5042   partition = partition_lookup[bsl][subsize];
   5043 
   5044   if (output_enabled && bsize != BLOCK_4X4) {
   5045     int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
   5046     td->counts->partition[ctx][partition]++;
   5047   }
   5048 
   5049   switch (partition) {
   5050     case PARTITION_NONE:
   5051       pc_tree->none.pred_pixel_ready = 1;
   5052       nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
   5053                           subsize, &pc_tree->none);
   5054       pc_tree->none.mic = *xd->mi[0];
   5055       pc_tree->none.mbmi_ext = *x->mbmi_ext;
   5056       pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
   5057       pc_tree->none.skip = x->skip;
   5058       encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
   5059                   subsize, &pc_tree->none);
   5060       break;
   5061     case PARTITION_VERT:
   5062       pc_tree->vertical[0].pred_pixel_ready = 1;
   5063       nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
   5064                           subsize, &pc_tree->vertical[0]);
   5065       pc_tree->vertical[0].mic = *xd->mi[0];
   5066       pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
   5067       pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
   5068       pc_tree->vertical[0].skip = x->skip;
   5069       encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
   5070                   subsize, &pc_tree->vertical[0]);
   5071       if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
   5072         pc_tree->vertical[1].pred_pixel_ready = 1;
   5073         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, dummy_cost,
   5074                             subsize, &pc_tree->vertical[1]);
   5075         pc_tree->vertical[1].mic = *xd->mi[0];
   5076         pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
   5077         pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
   5078         pc_tree->vertical[1].skip = x->skip;
   5079         encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col + hbs,
   5080                     output_enabled, subsize, &pc_tree->vertical[1]);
   5081       }
   5082       break;
   5083     case PARTITION_HORZ:
   5084       pc_tree->horizontal[0].pred_pixel_ready = 1;
   5085       nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
   5086                           subsize, &pc_tree->horizontal[0]);
   5087       pc_tree->horizontal[0].mic = *xd->mi[0];
   5088       pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
   5089       pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
   5090       pc_tree->horizontal[0].skip = x->skip;
   5091       encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
   5092                   subsize, &pc_tree->horizontal[0]);
   5093 
   5094       if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
   5095         pc_tree->horizontal[1].pred_pixel_ready = 1;
   5096         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, dummy_cost,
   5097                             subsize, &pc_tree->horizontal[1]);
   5098         pc_tree->horizontal[1].mic = *xd->mi[0];
   5099         pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
   5100         pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
   5101         pc_tree->horizontal[1].skip = x->skip;
   5102         encode_b_rt(cpi, td, tile_info, tp, mi_row + hbs, mi_col,
   5103                     output_enabled, subsize, &pc_tree->horizontal[1]);
   5104       }
   5105       break;
   5106     default:
   5107       assert(partition == PARTITION_SPLIT);
   5108       subsize = get_subsize(bsize, PARTITION_SPLIT);
   5109       if (bsize == BLOCK_8X8) {
   5110         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
   5111                             subsize, pc_tree->leaf_split[0]);
   5112         encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
   5113                     subsize, pc_tree->leaf_split[0]);
   5114       } else {
   5115         nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, subsize,
   5116                             output_enabled, dummy_cost, pc_tree->split[0]);
   5117         nonrd_use_partition(cpi, td, tile_data, mi + hbs, tp, mi_row,
   5118                             mi_col + hbs, subsize, output_enabled, dummy_cost,
   5119                             pc_tree->split[1]);
   5120         nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis, tp,
   5121                             mi_row + hbs, mi_col, subsize, output_enabled,
   5122                             dummy_cost, pc_tree->split[2]);
   5123         nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp,
   5124                             mi_row + hbs, mi_col + hbs, subsize, output_enabled,
   5125                             dummy_cost, pc_tree->split[3]);
   5126       }
   5127       break;
   5128   }
   5129 
   5130   if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
   5131     update_partition_context(xd, mi_row, mi_col, subsize, bsize);
   5132 }
   5133 
   5134 #if CONFIG_ML_VAR_PARTITION
   5135 // Get a prediction(stored in x->est_pred) for the whole 64x64 superblock.
   5136 static void get_estimated_pred(VP9_COMP *cpi, const TileInfo *const tile,
   5137                                MACROBLOCK *x, int mi_row, int mi_col) {
   5138   VP9_COMMON *const cm = &cpi->common;
   5139   const int is_key_frame = frame_is_intra_only(cm);
   5140 
   5141   set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
   5142 
   5143   if (!is_key_frame) {
   5144     MACROBLOCKD *xd = &x->e_mbd;
   5145     MODE_INFO *mi = xd->mi[0];
   5146     YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
   5147     const YV12_BUFFER_CONFIG *yv12_g = NULL;
   5148     const BLOCK_SIZE bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 +
   5149                              (mi_row + 4 < cm->mi_rows);
   5150     int pixels_wide = 64, pixels_high = 64;
   5151     unsigned int y_sad_g, y_sad_thr;
   5152     unsigned int y_sad = UINT_MAX;
   5153 
   5154     assert(yv12 != NULL);
   5155 
   5156     if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
   5157     if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
   5158 
   5159     if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) ||
   5160         cpi->svc.use_gf_temporal_ref_current_layer) {
   5161       // For now, GOLDEN will not be used for non-zero spatial layers, since
   5162       // it may not be a temporal reference.
   5163       yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
   5164     }
   5165 
   5166     // Only compute y_sad_g (sad for golden reference) for speed < 8.
   5167     if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 &&
   5168         (cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
   5169       vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
   5170                            &cm->frame_refs[GOLDEN_FRAME - 1].sf);
   5171       y_sad_g = cpi->fn_ptr[bsize].sdf(
   5172           x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
   5173           xd->plane[0].pre[0].stride);
   5174     } else {
   5175       y_sad_g = UINT_MAX;
   5176     }
   5177 
   5178     if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
   5179         cpi->rc.is_src_frame_alt_ref) {
   5180       yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME);
   5181       vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
   5182                            &cm->frame_refs[ALTREF_FRAME - 1].sf);
   5183       mi->ref_frame[0] = ALTREF_FRAME;
   5184       y_sad_g = UINT_MAX;
   5185     } else {
   5186       vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
   5187                            &cm->frame_refs[LAST_FRAME - 1].sf);
   5188       mi->ref_frame[0] = LAST_FRAME;
   5189     }
   5190     mi->ref_frame[1] = NONE;
   5191     mi->sb_type = BLOCK_64X64;
   5192     mi->mv[0].as_int = 0;
   5193     mi->interp_filter = BILINEAR;
   5194 
   5195     {
   5196       const MV dummy_mv = { 0, 0 };
   5197       y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col,
   5198                                             &dummy_mv);
   5199       x->sb_use_mv_part = 1;
   5200       x->sb_mvcol_part = mi->mv[0].as_mv.col;
   5201       x->sb_mvrow_part = mi->mv[0].as_mv.row;
   5202     }
   5203 
   5204     // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad
   5205     // are close if short_circuit_low_temp_var is on.
   5206     y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad;
   5207     if (y_sad_g < y_sad_thr) {
   5208       vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
   5209                            &cm->frame_refs[GOLDEN_FRAME - 1].sf);
   5210       mi->ref_frame[0] = GOLDEN_FRAME;
   5211       mi->mv[0].as_int = 0;
   5212       y_sad = y_sad_g;
   5213     } else {
   5214       x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
   5215     }
   5216 
   5217     set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
   5218     xd->plane[0].dst.buf = x->est_pred;
   5219     xd->plane[0].dst.stride = 64;
   5220     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
   5221   } else {
   5222 #if CONFIG_VP9_HIGHBITDEPTH
   5223     switch (xd->bd) {
   5224       case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
   5225       case 10:
   5226         memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
   5227         break;
   5228       case 12:
   5229         memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
   5230         break;
   5231     }
   5232 #else
   5233     memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
   5234 #endif  // CONFIG_VP9_HIGHBITDEPTH
   5235   }
   5236 }
   5237 #endif  // CONFIG_ML_VAR_PARTITION
   5238 
   5239 static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
   5240                                 TileDataEnc *tile_data, int mi_row,
   5241                                 TOKENEXTRA **tp) {
   5242   SPEED_FEATURES *const sf = &cpi->sf;
   5243   VP9_COMMON *const cm = &cpi->common;
   5244   TileInfo *const tile_info = &tile_data->tile_info;
   5245   MACROBLOCK *const x = &td->mb;
   5246   MACROBLOCKD *const xd = &x->e_mbd;
   5247   const int mi_col_start = tile_info->mi_col_start;
   5248   const int mi_col_end = tile_info->mi_col_end;
   5249   int mi_col;
   5250   const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2;
   5251   const int num_sb_cols =
   5252       get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2);
   5253   int sb_col_in_tile;
   5254 
   5255   // Initialize the left context for the new SB row
   5256   memset(&xd->left_context, 0, sizeof(xd->left_context));
   5257   memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
   5258 
   5259   // Code each SB in the row
   5260   for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end;
   5261        mi_col += MI_BLOCK_SIZE, ++sb_col_in_tile) {
   5262     const struct segmentation *const seg = &cm->seg;
   5263     RD_COST dummy_rdc;
   5264     const int idx_str = cm->mi_stride * mi_row + mi_col;
   5265     MODE_INFO **mi = cm->mi_grid_visible + idx_str;
   5266     PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type;
   5267     BLOCK_SIZE bsize = BLOCK_64X64;
   5268     int seg_skip = 0;
   5269     int i;
   5270 
   5271     (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
   5272                                    sb_col_in_tile);
   5273 
   5274     if (cpi->use_skin_detection) {
   5275       vp9_compute_skin_sb(cpi, BLOCK_16X16, mi_row, mi_col);
   5276     }
   5277 
   5278     x->source_variance = UINT_MAX;
   5279     for (i = 0; i < MAX_REF_FRAMES; ++i) {
   5280       x->pred_mv[i].row = INT16_MAX;
   5281       x->pred_mv[i].col = INT16_MAX;
   5282     }
   5283     vp9_rd_cost_init(&dummy_rdc);
   5284     x->color_sensitivity[0] = 0;
   5285     x->color_sensitivity[1] = 0;
   5286     x->sb_is_skin = 0;
   5287     x->skip_low_source_sad = 0;
   5288     x->lowvar_highsumdiff = 0;
   5289     x->content_state_sb = 0;
   5290     x->zero_temp_sad_source = 0;
   5291     x->sb_use_mv_part = 0;
   5292     x->sb_mvcol_part = 0;
   5293     x->sb_mvrow_part = 0;
   5294     x->sb_pickmode_part = 0;
   5295     x->arf_frame_usage = 0;
   5296     x->lastgolden_frame_usage = 0;
   5297 
   5298     if (seg->enabled) {
   5299       const uint8_t *const map =
   5300           seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
   5301       int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
   5302       seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
   5303       if (seg_skip) {
   5304         partition_search_type = FIXED_PARTITION;
   5305       }
   5306     }
   5307 
   5308     if (cpi->compute_source_sad_onepass && cpi->sf.use_source_sad) {
   5309       int shift = cpi->Source->y_stride * (mi_row << 3) + (mi_col << 3);
   5310       int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3);
   5311       int64_t source_sad = avg_source_sad(cpi, x, shift, sb_offset2);
   5312       if (sf->adapt_partition_source_sad &&
   5313           (cpi->oxcf.rc_mode == VPX_VBR && !cpi->rc.is_src_frame_alt_ref &&
   5314            source_sad > sf->adapt_partition_thresh &&
   5315            (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)))
   5316         partition_search_type = REFERENCE_PARTITION;
   5317     }
   5318 
   5319     // Set the partition type of the 64X64 block
   5320     switch (partition_search_type) {
   5321       case VAR_BASED_PARTITION:
   5322         // TODO(jingning, marpan): The mode decision and encoding process
   5323         // support both intra and inter sub8x8 block coding for RTC mode.
   5324         // Tune the thresholds accordingly to use sub8x8 block coding for
   5325         // coding performance improvement.
   5326         choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
   5327         nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
   5328                             BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
   5329         break;
   5330 #if CONFIG_ML_VAR_PARTITION
   5331       case ML_BASED_PARTITION:
   5332         get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
   5333         x->max_partition_size = BLOCK_64X64;
   5334         x->min_partition_size = BLOCK_8X8;
   5335         x->sb_pickmode_part = 1;
   5336         nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
   5337                              BLOCK_64X64, &dummy_rdc, 1, INT64_MAX,
   5338                              td->pc_root);
   5339         break;
   5340 #endif  // CONFIG_ML_VAR_PARTITION
   5341       case SOURCE_VAR_BASED_PARTITION:
   5342         set_source_var_based_partition(cpi, tile_info, x, mi, mi_row, mi_col);
   5343         nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
   5344                             BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
   5345         break;
   5346       case FIXED_PARTITION:
   5347         if (!seg_skip) bsize = sf->always_this_block_size;
   5348         set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
   5349         nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
   5350                             BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
   5351         break;
   5352       default:
   5353         assert(partition_search_type == REFERENCE_PARTITION);
   5354         x->sb_pickmode_part = 1;
   5355         set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
   5356         // Use nonrd_pick_partition on scene-cut for VBR mode.
   5357         // nonrd_pick_partition does not support 4x4 partition, so avoid it
   5358         // on key frame for now.
   5359         if ((cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.high_source_sad &&
   5360              cpi->oxcf.speed < 6 && !frame_is_intra_only(cm) &&
   5361              (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) {
   5362           // Use lower max_partition_size for low resoultions.
   5363           if (cm->width <= 352 && cm->height <= 288)
   5364             x->max_partition_size = BLOCK_32X32;
   5365           else
   5366             x->max_partition_size = BLOCK_64X64;
   5367           x->min_partition_size = BLOCK_8X8;
   5368           nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
   5369                                BLOCK_64X64, &dummy_rdc, 1, INT64_MAX,
   5370                                td->pc_root);
   5371         } else {
   5372           choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
   5373           // TODO(marpan): Seems like nonrd_select_partition does not support
   5374           // 4x4 partition. Since 4x4 is used on key frame, use this switch
   5375           // for now.
   5376           if (frame_is_intra_only(cm))
   5377             nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
   5378                                 BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
   5379           else
   5380             nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
   5381                                    BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
   5382         }
   5383 
   5384         break;
   5385     }
   5386 
   5387     // Update ref_frame usage for inter frame if this group is ARF group.
   5388     if (!cpi->rc.is_src_frame_alt_ref && !cpi->refresh_golden_frame &&
   5389         !cpi->refresh_alt_ref_frame && cpi->rc.alt_ref_gf_group &&
   5390         cpi->sf.use_altref_onepass) {
   5391       int sboffset = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3);
   5392       if (cpi->count_arf_frame_usage != NULL)
   5393         cpi->count_arf_frame_usage[sboffset] = x->arf_frame_usage;
   5394       if (cpi->count_lastgolden_frame_usage != NULL)
   5395         cpi->count_lastgolden_frame_usage[sboffset] = x->lastgolden_frame_usage;
   5396     }
   5397 
   5398     (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
   5399                                     sb_col_in_tile, num_sb_cols);
   5400   }
   5401 }
   5402 // end RTC play code
   5403 
   5404 static INLINE uint32_t variance(const diff *const d) {
   5405   return d->sse - (uint32_t)(((int64_t)d->sum * d->sum) >> 8);
   5406 }
   5407 
   5408 #if CONFIG_VP9_HIGHBITDEPTH
   5409 static INLINE uint32_t variance_highbd(diff *const d) {
   5410   const int64_t var = (int64_t)d->sse - (((int64_t)d->sum * d->sum) >> 8);
   5411   return (var >= 0) ? (uint32_t)var : 0;
   5412 }
   5413 #endif  // CONFIG_VP9_HIGHBITDEPTH
   5414 
   5415 static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
   5416   const SPEED_FEATURES *const sf = &cpi->sf;
   5417   const VP9_COMMON *const cm = &cpi->common;
   5418 
   5419   const uint8_t *src = cpi->Source->y_buffer;
   5420   const uint8_t *last_src = cpi->Last_Source->y_buffer;
   5421   const int src_stride = cpi->Source->y_stride;
   5422   const int last_stride = cpi->Last_Source->y_stride;
   5423 
   5424   // Pick cutoff threshold
   5425   const int cutoff = (VPXMIN(cm->width, cm->height) >= 720)
   5426                          ? (cm->MBs * VAR_HIST_LARGE_CUT_OFF / 100)
   5427                          : (cm->MBs * VAR_HIST_SMALL_CUT_OFF / 100);
   5428   DECLARE_ALIGNED(16, int, hist[VAR_HIST_BINS]);
   5429   diff *var16 = cpi->source_diff_var;
   5430 
   5431   int sum = 0;
   5432   int i, j;
   5433 
   5434   memset(hist, 0, VAR_HIST_BINS * sizeof(hist[0]));
   5435 
   5436   for (i = 0; i < cm->mb_rows; i++) {
   5437     for (j = 0; j < cm->mb_cols; j++) {
   5438 #if CONFIG_VP9_HIGHBITDEPTH
   5439       if (cm->use_highbitdepth) {
   5440         switch (cm->bit_depth) {
   5441           case VPX_BITS_8:
   5442             vpx_highbd_8_get16x16var(src, src_stride, last_src, last_stride,
   5443                                      &var16->sse, &var16->sum);
   5444             var16->var = variance(var16);
   5445             break;
   5446           case VPX_BITS_10:
   5447             vpx_highbd_10_get16x16var(src, src_stride, last_src, last_stride,
   5448                                       &var16->sse, &var16->sum);
   5449             var16->var = variance_highbd(var16);
   5450             break;
   5451           default:
   5452             assert(cm->bit_depth == VPX_BITS_12);
   5453             vpx_highbd_12_get16x16var(src, src_stride, last_src, last_stride,
   5454                                       &var16->sse, &var16->sum);
   5455             var16->var = variance_highbd(var16);
   5456             break;
   5457         }
   5458       } else {
   5459         vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse,
   5460                         &var16->sum);
   5461         var16->var = variance(var16);
   5462       }
   5463 #else
   5464       vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse,
   5465                       &var16->sum);
   5466       var16->var = variance(var16);
   5467 #endif  // CONFIG_VP9_HIGHBITDEPTH
   5468 
   5469       if (var16->var >= VAR_HIST_MAX_BG_VAR)
   5470         hist[VAR_HIST_BINS - 1]++;
   5471       else
   5472         hist[var16->var / VAR_HIST_FACTOR]++;
   5473 
   5474       src += 16;
   5475       last_src += 16;
   5476       var16++;
   5477     }
   5478 
   5479     src = src - cm->mb_cols * 16 + 16 * src_stride;
   5480     last_src = last_src - cm->mb_cols * 16 + 16 * last_stride;
   5481   }
   5482 
   5483   cpi->source_var_thresh = 0;
   5484 
   5485   if (hist[VAR_HIST_BINS - 1] < cutoff) {
   5486     for (i = 0; i < VAR_HIST_BINS - 1; i++) {
   5487       sum += hist[i];
   5488 
   5489       if (sum > cutoff) {
   5490         cpi->source_var_thresh = (i + 1) * VAR_HIST_FACTOR;
   5491         return 0;
   5492       }
   5493     }
   5494   }
   5495 
   5496   return sf->search_type_check_frequency;
   5497 }
   5498 
   5499 static void source_var_based_partition_search_method(VP9_COMP *cpi) {
   5500   VP9_COMMON *const cm = &cpi->common;
   5501   SPEED_FEATURES *const sf = &cpi->sf;
   5502 
   5503   if (cm->frame_type == KEY_FRAME) {
   5504     // For key frame, use SEARCH_PARTITION.
   5505     sf->partition_search_type = SEARCH_PARTITION;
   5506   } else if (cm->intra_only) {
   5507     sf->partition_search_type = FIXED_PARTITION;
   5508   } else {
   5509     if (cm->last_width != cm->width || cm->last_height != cm->height) {
   5510       if (cpi->source_diff_var) vpx_free(cpi->source_diff_var);
   5511 
   5512       CHECK_MEM_ERROR(cm, cpi->source_diff_var,
   5513                       vpx_calloc(cm->MBs, sizeof(diff)));
   5514     }
   5515 
   5516     if (!cpi->frames_till_next_var_check)
   5517       cpi->frames_till_next_var_check = set_var_thresh_from_histogram(cpi);
   5518 
   5519     if (cpi->frames_till_next_var_check > 0) {
   5520       sf->partition_search_type = FIXED_PARTITION;
   5521       cpi->frames_till_next_var_check--;
   5522     }
   5523   }
   5524 }
   5525 
   5526 static int get_skip_encode_frame(const VP9_COMMON *cm, ThreadData *const td) {
   5527   unsigned int intra_count = 0, inter_count = 0;
   5528   int j;
   5529 
   5530   for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
   5531     intra_count += td->counts->intra_inter[j][0];
   5532     inter_count += td->counts->intra_inter[j][1];
   5533   }
   5534 
   5535   return (intra_count << 2) < inter_count && cm->frame_type != KEY_FRAME &&
   5536          cm->show_frame;
   5537 }
   5538 
   5539 void vp9_init_tile_data(VP9_COMP *cpi) {
   5540   VP9_COMMON *const cm = &cpi->common;
   5541   const int tile_cols = 1 << cm->log2_tile_cols;
   5542   const int tile_rows = 1 << cm->log2_tile_rows;
   5543   int tile_col, tile_row;
   5544   TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
   5545   TOKENLIST *tplist = cpi->tplist[0][0];
   5546   int tile_tok = 0;
   5547   int tplist_count = 0;
   5548 
   5549   if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
   5550     if (cpi->tile_data != NULL) vpx_free(cpi->tile_data);
   5551     CHECK_MEM_ERROR(
   5552         cm, cpi->tile_data,
   5553         vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data)));
   5554     cpi->allocated_tiles = tile_cols * tile_rows;
   5555 
   5556     for (tile_row = 0; tile_row < tile_rows; ++tile_row)
   5557       for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
   5558         TileDataEnc *tile_data =
   5559             &cpi->tile_data[tile_row * tile_cols + tile_col];
   5560         int i, j;
   5561         for (i = 0; i < BLOCK_SIZES; ++i) {
   5562           for (j = 0; j < MAX_MODES; ++j) {
   5563             tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT;
   5564 #if CONFIG_CONSISTENT_RECODE
   5565             tile_data->thresh_freq_fact_prev[i][j] = RD_THRESH_INIT_FACT;
   5566 #endif
   5567             tile_data->mode_map[i][j] = j;
   5568           }
   5569         }
   5570 #if CONFIG_MULTITHREAD
   5571         tile_data->row_base_thresh_freq_fact = NULL;
   5572 #endif
   5573       }
   5574   }
   5575 
   5576   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
   5577     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
   5578       TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
   5579       TileInfo *tile_info = &this_tile->tile_info;
   5580       if (cpi->sf.adaptive_rd_thresh_row_mt &&
   5581           this_tile->row_base_thresh_freq_fact == NULL)
   5582         vp9_row_mt_alloc_rd_thresh(cpi, this_tile);
   5583       vp9_tile_init(tile_info, cm, tile_row, tile_col);
   5584 
   5585       cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
   5586       pre_tok = cpi->tile_tok[tile_row][tile_col];
   5587       tile_tok = allocated_tokens(*tile_info);
   5588 
   5589       cpi->tplist[tile_row][tile_col] = tplist + tplist_count;
   5590       tplist = cpi->tplist[tile_row][tile_col];
   5591       tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
   5592     }
   5593   }
   5594 }
   5595 
   5596 void vp9_encode_sb_row(VP9_COMP *cpi, ThreadData *td, int tile_row,
   5597                        int tile_col, int mi_row) {
   5598   VP9_COMMON *const cm = &cpi->common;
   5599   const int tile_cols = 1 << cm->log2_tile_cols;
   5600   TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
   5601   const TileInfo *const tile_info = &this_tile->tile_info;
   5602   TOKENEXTRA *tok = NULL;
   5603   int tile_sb_row;
   5604   int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 1) >> 1;
   5605 
   5606   tile_sb_row = mi_cols_aligned_to_sb(mi_row - tile_info->mi_row_start) >>
   5607                 MI_BLOCK_SIZE_LOG2;
   5608   get_start_tok(cpi, tile_row, tile_col, mi_row, &tok);
   5609   cpi->tplist[tile_row][tile_col][tile_sb_row].start = tok;
   5610 
   5611   if (cpi->sf.use_nonrd_pick_mode)
   5612     encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok);
   5613   else
   5614     encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
   5615 
   5616   cpi->tplist[tile_row][tile_col][tile_sb_row].stop = tok;
   5617   cpi->tplist[tile_row][tile_col][tile_sb_row].count =
   5618       (unsigned int)(cpi->tplist[tile_row][tile_col][tile_sb_row].stop -
   5619                      cpi->tplist[tile_row][tile_col][tile_sb_row].start);
   5620   assert(tok - cpi->tplist[tile_row][tile_col][tile_sb_row].start <=
   5621          get_token_alloc(MI_BLOCK_SIZE >> 1, tile_mb_cols));
   5622 
   5623   (void)tile_mb_cols;
   5624 }
   5625 
   5626 void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row,
   5627                      int tile_col) {
   5628   VP9_COMMON *const cm = &cpi->common;
   5629   const int tile_cols = 1 << cm->log2_tile_cols;
   5630   TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
   5631   const TileInfo *const tile_info = &this_tile->tile_info;
   5632   const int mi_row_start = tile_info->mi_row_start;
   5633   const int mi_row_end = tile_info->mi_row_end;
   5634   int mi_row;
   5635 
   5636   for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE)
   5637     vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
   5638 }
   5639 
   5640 static void encode_tiles(VP9_COMP *cpi) {
   5641   VP9_COMMON *const cm = &cpi->common;
   5642   const int tile_cols = 1 << cm->log2_tile_cols;
   5643   const int tile_rows = 1 << cm->log2_tile_rows;
   5644   int tile_col, tile_row;
   5645 
   5646   vp9_init_tile_data(cpi);
   5647 
   5648   for (tile_row = 0; tile_row < tile_rows; ++tile_row)
   5649     for (tile_col = 0; tile_col < tile_cols; ++tile_col)
   5650       vp9_encode_tile(cpi, &cpi->td, tile_row, tile_col);
   5651 }
   5652 
   5653 #if CONFIG_FP_MB_STATS
   5654 static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats,
   5655                             VP9_COMMON *cm, uint8_t **this_frame_mb_stats) {
   5656   uint8_t *mb_stats_in = firstpass_mb_stats->mb_stats_start +
   5657                          cm->current_video_frame * cm->MBs * sizeof(uint8_t);
   5658 
   5659   if (mb_stats_in > firstpass_mb_stats->mb_stats_end) return EOF;
   5660 
   5661   *this_frame_mb_stats = mb_stats_in;
   5662 
   5663   return 1;
   5664 }
   5665 #endif
   5666 
   5667 static void encode_frame_internal(VP9_COMP *cpi) {
   5668   SPEED_FEATURES *const sf = &cpi->sf;
   5669   ThreadData *const td = &cpi->td;
   5670   MACROBLOCK *const x = &td->mb;
   5671   VP9_COMMON *const cm = &cpi->common;
   5672   MACROBLOCKD *const xd = &x->e_mbd;
   5673   const int gf_group_index = cpi->twopass.gf_group.index;
   5674 
   5675   xd->mi = cm->mi_grid_visible;
   5676   xd->mi[0] = cm->mi;
   5677   vp9_zero(*td->counts);
   5678   vp9_zero(cpi->td.rd_counts);
   5679 
   5680   xd->lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 &&
   5681                  cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
   5682 
   5683 #if CONFIG_VP9_HIGHBITDEPTH
   5684   if (cm->use_highbitdepth)
   5685     x->fwd_txfm4x4 = xd->lossless ? vp9_highbd_fwht4x4 : vpx_highbd_fdct4x4;
   5686   else
   5687     x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4;
   5688   x->highbd_inv_txfm_add =
   5689       xd->lossless ? vp9_highbd_iwht4x4_add : vp9_highbd_idct4x4_add;
   5690 #else
   5691   x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4;
   5692 #endif  // CONFIG_VP9_HIGHBITDEPTH
   5693   x->inv_txfm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
   5694 #if CONFIG_CONSISTENT_RECODE
   5695   x->optimize = sf->optimize_coefficients == 1 && cpi->oxcf.pass != 1;
   5696 #endif
   5697   if (xd->lossless) x->optimize = 0;
   5698   x->sharpness = cpi->oxcf.sharpness;
   5699   x->adjust_rdmult_by_segment = (cpi->oxcf.aq_mode == VARIANCE_AQ);
   5700 
   5701   cm->tx_mode = select_tx_mode(cpi, xd);
   5702 
   5703   vp9_frame_init_quantizer(cpi);
   5704 
   5705   vp9_initialize_rd_consts(cpi);
   5706   vp9_initialize_me_consts(cpi, x, cm->base_qindex);
   5707   init_encode_frame_mb_context(cpi);
   5708   cm->use_prev_frame_mvs =
   5709       !cm->error_resilient_mode && cm->width == cm->last_width &&
   5710       cm->height == cm->last_height && !cm->intra_only && cm->last_show_frame;
   5711   // Special case: set prev_mi to NULL when the previous mode info
   5712   // context cannot be used.
   5713   cm->prev_mi =
   5714       cm->use_prev_frame_mvs ? cm->prev_mip + cm->mi_stride + 1 : NULL;
   5715 
   5716   x->quant_fp = cpi->sf.use_quant_fp;
   5717   vp9_zero(x->skip_txfm);
   5718   if (sf->use_nonrd_pick_mode) {
   5719     // Initialize internal buffer pointers for rtc coding, where non-RD
   5720     // mode decision is used and hence no buffer pointer swap needed.
   5721     int i;
   5722     struct macroblock_plane *const p = x->plane;
   5723     struct macroblockd_plane *const pd = xd->plane;
   5724     PICK_MODE_CONTEXT *ctx = &cpi->td.pc_root->none;
   5725 
   5726     for (i = 0; i < MAX_MB_PLANE; ++i) {
   5727       p[i].coeff = ctx->coeff_pbuf[i][0];
   5728       p[i].qcoeff = ctx->qcoeff_pbuf[i][0];
   5729       pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
   5730       p[i].eobs = ctx->eobs_pbuf[i][0];
   5731     }
   5732     vp9_zero(x->zcoeff_blk);
   5733 
   5734     if (cm->frame_type != KEY_FRAME && cpi->rc.frames_since_golden == 0 &&
   5735         !(cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR) &&
   5736         !cpi->use_svc)
   5737       cpi->ref_frame_flags &= (~VP9_GOLD_FLAG);
   5738 
   5739     if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION)
   5740       source_var_based_partition_search_method(cpi);
   5741   } else if (gf_group_index && gf_group_index < MAX_ARF_GOP_SIZE &&
   5742              cpi->sf.enable_tpl_model) {
   5743     TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
   5744     TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
   5745 
   5746     int tpl_stride = tpl_frame->stride;
   5747     int64_t intra_cost_base = 0;
   5748     int64_t mc_dep_cost_base = 0;
   5749     int row, col;
   5750 
   5751     for (row = 0; row < cm->mi_rows && tpl_frame->is_valid; ++row) {
   5752       for (col = 0; col < cm->mi_cols; ++col) {
   5753         TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
   5754         intra_cost_base += this_stats->intra_cost;
   5755         mc_dep_cost_base += this_stats->mc_dep_cost;
   5756       }
   5757     }
   5758 
   5759     vpx_clear_system_state();
   5760 
   5761     if (tpl_frame->is_valid)
   5762       cpi->rd.r0 = (double)intra_cost_base / mc_dep_cost_base;
   5763   }
   5764 
   5765   {
   5766     struct vpx_usec_timer emr_timer;
   5767     vpx_usec_timer_start(&emr_timer);
   5768 
   5769 #if CONFIG_FP_MB_STATS
   5770     if (cpi->use_fp_mb_stats) {
   5771       input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm,
   5772                        &cpi->twopass.this_frame_mb_stats);
   5773     }
   5774 #endif
   5775 
   5776     if (!cpi->row_mt) {
   5777       cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read_dummy;
   5778       cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write_dummy;
   5779       // If allowed, encoding tiles in parallel with one thread handling one
   5780       // tile when row based multi-threading is disabled.
   5781       if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1)
   5782         vp9_encode_tiles_mt(cpi);
   5783       else
   5784         encode_tiles(cpi);
   5785     } else {
   5786       cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read;
   5787       cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write;
   5788       vp9_encode_tiles_row_mt(cpi);
   5789     }
   5790 
   5791     vpx_usec_timer_mark(&emr_timer);
   5792     cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
   5793   }
   5794 
   5795   sf->skip_encode_frame =
   5796       sf->skip_encode_sb ? get_skip_encode_frame(cm, td) : 0;
   5797 
   5798 #if 0
   5799   // Keep record of the total distortion this time around for future use
   5800   cpi->last_frame_distortion = cpi->frame_distortion;
   5801 #endif
   5802 }
   5803 
   5804 static INTERP_FILTER get_interp_filter(
   5805     const int64_t threshes[SWITCHABLE_FILTER_CONTEXTS], int is_alt_ref) {
   5806   if (!is_alt_ref && threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP] &&
   5807       threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP_SHARP] &&
   5808       threshes[EIGHTTAP_SMOOTH] > threshes[SWITCHABLE - 1]) {
   5809     return EIGHTTAP_SMOOTH;
   5810   } else if (threshes[EIGHTTAP_SHARP] > threshes[EIGHTTAP] &&
   5811              threshes[EIGHTTAP_SHARP] > threshes[SWITCHABLE - 1]) {
   5812     return EIGHTTAP_SHARP;
   5813   } else if (threshes[EIGHTTAP] > threshes[SWITCHABLE - 1]) {
   5814     return EIGHTTAP;
   5815   } else {
   5816     return SWITCHABLE;
   5817   }
   5818 }
   5819 
   5820 static int compute_frame_aq_offset(struct VP9_COMP *cpi) {
   5821   VP9_COMMON *const cm = &cpi->common;
   5822   MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible;
   5823   struct segmentation *const seg = &cm->seg;
   5824 
   5825   int mi_row, mi_col;
   5826   int sum_delta = 0;
   5827   int map_index = 0;
   5828   int qdelta_index;
   5829   int segment_id;
   5830 
   5831   for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
   5832     MODE_INFO **mi_8x8 = mi_8x8_ptr;
   5833     for (mi_col = 0; mi_col < cm->mi_cols; mi_col++, mi_8x8++) {
   5834       segment_id = mi_8x8[0]->segment_id;
   5835       qdelta_index = get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
   5836       sum_delta += qdelta_index;
   5837       map_index++;
   5838     }
   5839     mi_8x8_ptr += cm->mi_stride;
   5840   }
   5841 
   5842   return sum_delta / (cm->mi_rows * cm->mi_cols);
   5843 }
   5844 
   5845 #if CONFIG_CONSISTENT_RECODE
   5846 static void restore_encode_params(VP9_COMP *cpi) {
   5847   VP9_COMMON *const cm = &cpi->common;
   5848   const int tile_cols = 1 << cm->log2_tile_cols;
   5849   const int tile_rows = 1 << cm->log2_tile_rows;
   5850   int tile_col, tile_row;
   5851   int i, j;
   5852   RD_OPT *rd_opt = &cpi->rd;
   5853   for (i = 0; i < MAX_REF_FRAMES; i++) {
   5854     for (j = 0; j < REFERENCE_MODES; j++)
   5855       rd_opt->prediction_type_threshes[i][j] =
   5856           rd_opt->prediction_type_threshes_prev[i][j];
   5857 
   5858     for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++)
   5859       rd_opt->filter_threshes[i][j] = rd_opt->filter_threshes_prev[i][j];
   5860   }
   5861 
   5862   if (cpi->tile_data != NULL) {
   5863     for (tile_row = 0; tile_row < tile_rows; ++tile_row)
   5864       for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
   5865         TileDataEnc *tile_data =
   5866             &cpi->tile_data[tile_row * tile_cols + tile_col];
   5867         for (i = 0; i < BLOCK_SIZES; ++i) {
   5868           for (j = 0; j < MAX_MODES; ++j) {
   5869             tile_data->thresh_freq_fact[i][j] =
   5870                 tile_data->thresh_freq_fact_prev[i][j];
   5871           }
   5872         }
   5873       }
   5874   }
   5875 
   5876   cm->interp_filter = cpi->sf.default_interp_filter;
   5877 }
   5878 #endif
   5879 
   5880 void vp9_encode_frame(VP9_COMP *cpi) {
   5881   VP9_COMMON *const cm = &cpi->common;
   5882 
   5883 #if CONFIG_CONSISTENT_RECODE
   5884   restore_encode_params(cpi);
   5885 #endif
   5886 
   5887   // In the longer term the encoder should be generalized to match the
   5888   // decoder such that we allow compound where one of the 3 buffers has a
   5889   // different sign bias and that buffer is then the fixed ref. However, this
   5890   // requires further work in the rd loop. For now the only supported encoder
   5891   // side behavior is where the ALT ref buffer has opposite sign bias to
   5892   // the other two.
   5893   if (!frame_is_intra_only(cm)) {
   5894     if (vp9_compound_reference_allowed(cm)) {
   5895       cpi->allow_comp_inter_inter = 1;
   5896       vp9_setup_compound_reference_mode(cm);
   5897     } else {
   5898       cpi->allow_comp_inter_inter = 0;
   5899     }
   5900   }
   5901 
   5902   if (cpi->sf.frame_parameter_update) {
   5903     int i;
   5904     RD_OPT *const rd_opt = &cpi->rd;
   5905     FRAME_COUNTS *counts = cpi->td.counts;
   5906     RD_COUNTS *const rdc = &cpi->td.rd_counts;
   5907 
   5908     // This code does a single RD pass over the whole frame assuming
   5909     // either compound, single or hybrid prediction as per whatever has
   5910     // worked best for that type of frame in the past.
   5911     // It also predicts whether another coding mode would have worked
   5912     // better than this coding mode. If that is the case, it remembers
   5913     // that for subsequent frames.
   5914     // It also does the same analysis for transform size selection.
   5915     const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
   5916     int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type];
   5917     int64_t *const filter_thrs = rd_opt->filter_threshes[frame_type];
   5918     const int is_alt_ref = frame_type == ALTREF_FRAME;
   5919 
   5920     /* prediction (compound, single or hybrid) mode selection */
   5921     if (is_alt_ref || !cpi->allow_comp_inter_inter)
   5922       cm->reference_mode = SINGLE_REFERENCE;
   5923     else if (mode_thrs[COMPOUND_REFERENCE] > mode_thrs[SINGLE_REFERENCE] &&
   5924              mode_thrs[COMPOUND_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT] &&
   5925              check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100)
   5926       cm->reference_mode = COMPOUND_REFERENCE;
   5927     else if (mode_thrs[SINGLE_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT])
   5928       cm->reference_mode = SINGLE_REFERENCE;
   5929     else
   5930       cm->reference_mode = REFERENCE_MODE_SELECT;
   5931 
   5932     if (cm->interp_filter == SWITCHABLE)
   5933       cm->interp_filter = get_interp_filter(filter_thrs, is_alt_ref);
   5934 
   5935     encode_frame_internal(cpi);
   5936 
   5937     for (i = 0; i < REFERENCE_MODES; ++i)
   5938       mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2;
   5939 
   5940     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
   5941       filter_thrs[i] = (filter_thrs[i] + rdc->filter_diff[i] / cm->MBs) / 2;
   5942 
   5943     if (cm->reference_mode == REFERENCE_MODE_SELECT) {
   5944       int single_count_zero = 0;
   5945       int comp_count_zero = 0;
   5946 
   5947       for (i = 0; i < COMP_INTER_CONTEXTS; i++) {
   5948         single_count_zero += counts->comp_inter[i][0];
   5949         comp_count_zero += counts->comp_inter[i][1];
   5950       }
   5951 
   5952       if (comp_count_zero == 0) {
   5953         cm->reference_mode = SINGLE_REFERENCE;
   5954         vp9_zero(counts->comp_inter);
   5955       } else if (single_count_zero == 0) {
   5956         cm->reference_mode = COMPOUND_REFERENCE;
   5957         vp9_zero(counts->comp_inter);
   5958       }
   5959     }
   5960 
   5961     if (cm->tx_mode == TX_MODE_SELECT) {
   5962       int count4x4 = 0;
   5963       int count8x8_lp = 0, count8x8_8x8p = 0;
   5964       int count16x16_16x16p = 0, count16x16_lp = 0;
   5965       int count32x32 = 0;
   5966 
   5967       for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
   5968         count4x4 += counts->tx.p32x32[i][TX_4X4];
   5969         count4x4 += counts->tx.p16x16[i][TX_4X4];
   5970         count4x4 += counts->tx.p8x8[i][TX_4X4];
   5971 
   5972         count8x8_lp += counts->tx.p32x32[i][TX_8X8];
   5973         count8x8_lp += counts->tx.p16x16[i][TX_8X8];
   5974         count8x8_8x8p += counts->tx.p8x8[i][TX_8X8];
   5975 
   5976         count16x16_16x16p += counts->tx.p16x16[i][TX_16X16];
   5977         count16x16_lp += counts->tx.p32x32[i][TX_16X16];
   5978         count32x32 += counts->tx.p32x32[i][TX_32X32];
   5979       }
   5980       if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
   5981           count32x32 == 0) {
   5982         cm->tx_mode = ALLOW_8X8;
   5983         reset_skip_tx_size(cm, TX_8X8);
   5984       } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 &&
   5985                  count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) {
   5986         cm->tx_mode = ONLY_4X4;
   5987         reset_skip_tx_size(cm, TX_4X4);
   5988       } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) {
   5989         cm->tx_mode = ALLOW_32X32;
   5990       } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) {
   5991         cm->tx_mode = ALLOW_16X16;
   5992         reset_skip_tx_size(cm, TX_16X16);
   5993       }
   5994     }
   5995   } else {
   5996     FRAME_COUNTS *counts = cpi->td.counts;
   5997     cm->reference_mode = SINGLE_REFERENCE;
   5998     if (cpi->allow_comp_inter_inter && cpi->sf.use_compound_nonrd_pickmode &&
   5999         cpi->rc.alt_ref_gf_group && !cpi->rc.is_src_frame_alt_ref &&
   6000         cm->frame_type != KEY_FRAME)
   6001       cm->reference_mode = REFERENCE_MODE_SELECT;
   6002 
   6003     encode_frame_internal(cpi);
   6004 
   6005     if (cm->reference_mode == REFERENCE_MODE_SELECT) {
   6006       int single_count_zero = 0;
   6007       int comp_count_zero = 0;
   6008       int i;
   6009       for (i = 0; i < COMP_INTER_CONTEXTS; i++) {
   6010         single_count_zero += counts->comp_inter[i][0];
   6011         comp_count_zero += counts->comp_inter[i][1];
   6012       }
   6013       if (comp_count_zero == 0) {
   6014         cm->reference_mode = SINGLE_REFERENCE;
   6015         vp9_zero(counts->comp_inter);
   6016       } else if (single_count_zero == 0) {
   6017         cm->reference_mode = COMPOUND_REFERENCE;
   6018         vp9_zero(counts->comp_inter);
   6019       }
   6020     }
   6021   }
   6022 
   6023   // If segmented AQ is enabled compute the average AQ weighting.
   6024   if (cm->seg.enabled && (cpi->oxcf.aq_mode != NO_AQ) &&
   6025       (cm->seg.update_map || cm->seg.update_data)) {
   6026     cm->seg.aq_av_offset = compute_frame_aq_offset(cpi);
   6027   }
   6028 }
   6029 
   6030 static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) {
   6031   const PREDICTION_MODE y_mode = mi->mode;
   6032   const PREDICTION_MODE uv_mode = mi->uv_mode;
   6033   const BLOCK_SIZE bsize = mi->sb_type;
   6034 
   6035   if (bsize < BLOCK_8X8) {
   6036     int idx, idy;
   6037     const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
   6038     const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
   6039     for (idy = 0; idy < 2; idy += num_4x4_h)
   6040       for (idx = 0; idx < 2; idx += num_4x4_w)
   6041         ++counts->y_mode[0][mi->bmi[idy * 2 + idx].as_mode];
   6042   } else {
   6043     ++counts->y_mode[size_group_lookup[bsize]][y_mode];
   6044   }
   6045 
   6046   ++counts->uv_mode[y_mode][uv_mode];
   6047 }
   6048 
   6049 static void update_zeromv_cnt(VP9_COMP *const cpi, const MODE_INFO *const mi,
   6050                               int mi_row, int mi_col, BLOCK_SIZE bsize) {
   6051   const VP9_COMMON *const cm = &cpi->common;
   6052   MV mv = mi->mv[0].as_mv;
   6053   const int bw = num_8x8_blocks_wide_lookup[bsize];
   6054   const int bh = num_8x8_blocks_high_lookup[bsize];
   6055   const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
   6056   const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
   6057   const int block_index = mi_row * cm->mi_cols + mi_col;
   6058   int x, y;
   6059   for (y = 0; y < ymis; y++)
   6060     for (x = 0; x < xmis; x++) {
   6061       int map_offset = block_index + y * cm->mi_cols + x;
   6062       if (mi->ref_frame[0] == LAST_FRAME && is_inter_block(mi) &&
   6063           mi->segment_id <= CR_SEGMENT_ID_BOOST2) {
   6064         if (abs(mv.row) < 8 && abs(mv.col) < 8) {
   6065           if (cpi->consec_zero_mv[map_offset] < 255)
   6066             cpi->consec_zero_mv[map_offset]++;
   6067         } else {
   6068           cpi->consec_zero_mv[map_offset] = 0;
   6069         }
   6070       }
   6071     }
   6072 }
   6073 
   6074 static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
   6075                               int output_enabled, int mi_row, int mi_col,
   6076                               BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
   6077   VP9_COMMON *const cm = &cpi->common;
   6078   MACROBLOCK *const x = &td->mb;
   6079   MACROBLOCKD *const xd = &x->e_mbd;
   6080   MODE_INFO *mi = xd->mi[0];
   6081   const int seg_skip =
   6082       segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP);
   6083   x->skip_recode = !x->select_tx_size && mi->sb_type >= BLOCK_8X8 &&
   6084                    cpi->oxcf.aq_mode != COMPLEXITY_AQ &&
   6085                    cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ &&
   6086                    cpi->sf.allow_skip_recode;
   6087 
   6088   if (!x->skip_recode && !cpi->sf.use_nonrd_pick_mode)
   6089     memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
   6090 
   6091   x->skip_optimize = ctx->is_coded;
   6092   ctx->is_coded = 1;
   6093   x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
   6094   x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame &&
   6095                     x->q_index < QIDX_SKIP_THRESH);
   6096 
   6097   if (x->skip_encode) return;
   6098 
   6099   if (!is_inter_block(mi)) {
   6100     int plane;
   6101 #if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
   6102     if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) &&
   6103         (xd->above_mi == NULL || xd->left_mi == NULL) &&
   6104         need_top_left[mi->uv_mode])
   6105       assert(0);
   6106 #endif  // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
   6107     mi->skip = 1;
   6108     for (plane = 0; plane < MAX_MB_PLANE; ++plane)
   6109       vp9_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane, 1);
   6110     if (output_enabled) sum_intra_stats(td->counts, mi);
   6111     vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip,
   6112                     VPXMAX(bsize, BLOCK_8X8));
   6113   } else {
   6114     int ref;
   6115     const int is_compound = has_second_ref(mi);
   6116     set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
   6117     for (ref = 0; ref < 1 + is_compound; ++ref) {
   6118       YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mi->ref_frame[ref]);
   6119       assert(cfg != NULL);
   6120       vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
   6121                            &xd->block_refs[ref]->sf);
   6122     }
   6123     if (!(cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready) || seg_skip)
   6124       vp9_build_inter_predictors_sby(xd, mi_row, mi_col,
   6125                                      VPXMAX(bsize, BLOCK_8X8));
   6126 
   6127     vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col,
   6128                                     VPXMAX(bsize, BLOCK_8X8));
   6129 
   6130     vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8));
   6131     vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip,
   6132                     VPXMAX(bsize, BLOCK_8X8));
   6133   }
   6134 
   6135   if (seg_skip) {
   6136     assert(mi->skip);
   6137   }
   6138 
   6139   if (output_enabled) {
   6140     if (cm->tx_mode == TX_MODE_SELECT && mi->sb_type >= BLOCK_8X8 &&
   6141         !(is_inter_block(mi) && mi->skip)) {
   6142       ++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd),
   6143                       &td->counts->tx)[mi->tx_size];
   6144     } else {
   6145       // The new intra coding scheme requires no change of transform size
   6146       if (is_inter_block(mi)) {
   6147         mi->tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode],
   6148                              max_txsize_lookup[bsize]);
   6149       } else {
   6150         mi->tx_size = (bsize >= BLOCK_8X8) ? mi->tx_size : TX_4X4;
   6151       }
   6152     }
   6153 
   6154     ++td->counts->tx.tx_totals[mi->tx_size];
   6155     ++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])];
   6156     if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
   6157       vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize);
   6158     if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0 &&
   6159         (!cpi->use_svc ||
   6160          (cpi->use_svc &&
   6161           !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
   6162           cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)))
   6163       update_zeromv_cnt(cpi, mi, mi_row, mi_col, bsize);
   6164   }
   6165 }
   6166