Home | History | Annotate | Download | only in encoder
      1 /*
      2  * Copyright (c) 2019, Alliance for Open Media. All rights reserved
      3  *
      4  * This source code is subject to the terms of the BSD 2 Clause License and
      5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6  * was not distributed with this source code in the LICENSE file, you can
      7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8  * Media Patent License 1.0 was not distributed with this source code in the
      9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10  */
     11 
     12 #include <limits.h>
     13 #include <math.h>
     14 #include <stdbool.h>
     15 #include <stdio.h>
     16 
     17 #include "config/aom_config.h"
     18 #include "config/aom_dsp_rtcd.h"
     19 #include "config/av1_rtcd.h"
     20 
     21 #include "aom_dsp/aom_dsp_common.h"
     22 #include "aom_dsp/binary_codes_writer.h"
     23 #include "aom_ports/mem.h"
     24 #include "aom_ports/aom_timer.h"
     25 #include "aom_ports/system_state.h"
     26 
     27 #include "av1/common/reconinter.h"
     28 #include "av1/common/blockd.h"
     29 
     30 #include "av1/encoder/encodeframe.h"
     31 #include "av1/encoder/var_based_part.h"
     32 #include "av1/encoder/reconinter_enc.h"
     33 
     34 extern const uint8_t AV1_VAR_OFFS[];
     35 
     36 typedef struct {
     37   // TODO(kyslov): consider changing to 64bit
     38 
     39   // This struct is used for computing variance in choose_partitioning(), where
     40   // the max number of samples within a superblock is 32x32 (with 4x4 avg).
     41   // With 8bit bitdepth, uint32_t is enough for sum_square_error (2^8 * 2^8 * 32
     42   // * 32 = 2^26). For high bitdepth we need to consider changing this to 64 bit
     43   uint32_t sum_square_error;
     44   int32_t sum_error;
     45   int log2_count;
     46   int variance;
     47 } var;
     48 
     49 typedef struct {
     50   var none;
     51   var horz[2];
     52   var vert[2];
     53 } partition_variance;
     54 
     55 typedef struct {
     56   partition_variance part_variances;
     57   var split[4];
     58 } v4x4;
     59 
     60 typedef struct {
     61   partition_variance part_variances;
     62   v4x4 split[4];
     63 } v8x8;
     64 
     65 typedef struct {
     66   partition_variance part_variances;
     67   v8x8 split[4];
     68 } v16x16;
     69 
     70 typedef struct {
     71   partition_variance part_variances;
     72   v16x16 split[4];
     73 } v32x32;
     74 
     75 typedef struct {
     76   partition_variance part_variances;
     77   v32x32 split[4];
     78 } v64x64;
     79 
     80 typedef struct {
     81   partition_variance part_variances;
     82   v64x64 split[4];
     83 } v128x128;
     84 
     85 typedef struct {
     86   partition_variance *part_variances;
     87   var *split[4];
     88 } variance_node;
     89 
     90 static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
     91   int i;
     92   node->part_variances = NULL;
     93   switch (bsize) {
     94     case BLOCK_128X128: {
     95       v128x128 *vt = (v128x128 *)data;
     96       node->part_variances = &vt->part_variances;
     97       for (i = 0; i < 4; i++)
     98         node->split[i] = &vt->split[i].part_variances.none;
     99       break;
    100     }
    101     case BLOCK_64X64: {
    102       v64x64 *vt = (v64x64 *)data;
    103       node->part_variances = &vt->part_variances;
    104       for (i = 0; i < 4; i++)
    105         node->split[i] = &vt->split[i].part_variances.none;
    106       break;
    107     }
    108     case BLOCK_32X32: {
    109       v32x32 *vt = (v32x32 *)data;
    110       node->part_variances = &vt->part_variances;
    111       for (i = 0; i < 4; i++)
    112         node->split[i] = &vt->split[i].part_variances.none;
    113       break;
    114     }
    115     case BLOCK_16X16: {
    116       v16x16 *vt = (v16x16 *)data;
    117       node->part_variances = &vt->part_variances;
    118       for (i = 0; i < 4; i++)
    119         node->split[i] = &vt->split[i].part_variances.none;
    120       break;
    121     }
    122     case BLOCK_8X8: {
    123       v8x8 *vt = (v8x8 *)data;
    124       node->part_variances = &vt->part_variances;
    125       for (i = 0; i < 4; i++)
    126         node->split[i] = &vt->split[i].part_variances.none;
    127       break;
    128     }
    129     default: {
    130       v4x4 *vt = (v4x4 *)data;
    131       assert(bsize == BLOCK_4X4);
    132       node->part_variances = &vt->part_variances;
    133       for (i = 0; i < 4; i++) node->split[i] = &vt->split[i];
    134       break;
    135     }
    136   }
    137 }
    138 
    139 // Set variance values given sum square error, sum error, count.
    140 static void fill_variance(uint32_t s2, int32_t s, int c, var *v) {
    141   v->sum_square_error = s2;
    142   v->sum_error = s;
    143   v->log2_count = c;
    144 }
    145 
    146 static void get_variance(var *v) {
    147   v->variance =
    148       (int)(256 * (v->sum_square_error -
    149                    (uint32_t)(((int64_t)v->sum_error * v->sum_error) >>
    150                               v->log2_count)) >>
    151             v->log2_count);
    152 }
    153 
    154 static void sum_2_variances(const var *a, const var *b, var *r) {
    155   assert(a->log2_count == b->log2_count);
    156   fill_variance(a->sum_square_error + b->sum_square_error,
    157                 a->sum_error + b->sum_error, a->log2_count + 1, r);
    158 }
    159 
    160 static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
    161   variance_node node;
    162   memset(&node, 0, sizeof(node));
    163   tree_to_node(data, bsize, &node);
    164   sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
    165   sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
    166   sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
    167   sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
    168   sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
    169                   &node.part_variances->none);
    170 }
    171 
    172 static void set_block_size(AV1_COMP *const cpi, MACROBLOCK *const x,
    173                            MACROBLOCKD *const xd, int mi_row, int mi_col,
    174                            BLOCK_SIZE bsize) {
    175   if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
    176     set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
    177     xd->mi[0]->sb_type = bsize;
    178   }
    179 }
    180 
    181 static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCK *const x,
    182                                MACROBLOCKD *const xd,
    183                                const TileInfo *const tile, void *data,
    184                                BLOCK_SIZE bsize, int mi_row, int mi_col,
    185                                int64_t threshold, BLOCK_SIZE bsize_min,
    186                                int force_split) {
    187   AV1_COMMON *const cm = &cpi->common;
    188   variance_node vt;
    189   const int block_width = mi_size_wide[bsize];
    190   const int block_height = mi_size_high[bsize];
    191 
    192   assert(block_height == block_width);
    193   tree_to_node(data, bsize, &vt);
    194 
    195   if (force_split == 1) return 0;
    196 
    197   if (mi_col + block_width > tile->mi_col_end ||
    198       mi_row + block_height > tile->mi_row_end)
    199     return 0;
    200 
    201   // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
    202   // variance is below threshold, otherwise split will be selected.
    203   // No check for vert/horiz split as too few samples for variance.
    204   if (bsize == bsize_min) {
    205     // Variance already computed to set the force_split.
    206     if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
    207     if (mi_col + block_width / 2 < cm->mi_cols &&
    208         mi_row + block_height / 2 < cm->mi_rows &&
    209         vt.part_variances->none.variance < threshold) {
    210       set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
    211       return 1;
    212     }
    213     return 0;
    214   } else if (bsize > bsize_min) {
    215     // Variance already computed to set the force_split.
    216     if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
    217     // For key frame: take split for bsize above 32X32 or very high variance.
    218     if (frame_is_intra_only(cm) &&
    219         (bsize > BLOCK_32X32 ||
    220          vt.part_variances->none.variance > (threshold << 4))) {
    221       return 0;
    222     }
    223     // If variance is low, take the bsize (no split).
    224     if (mi_col + block_width / 2 < cm->mi_cols &&
    225         mi_row + block_height / 2 < cm->mi_rows &&
    226         vt.part_variances->none.variance < threshold) {
    227       set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
    228       return 1;
    229     }
    230 
    231     // Check vertical split.
    232     if (mi_row + block_height / 2 < cm->mi_rows) {
    233       BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT);
    234       get_variance(&vt.part_variances->vert[0]);
    235       get_variance(&vt.part_variances->vert[1]);
    236       if (vt.part_variances->vert[0].variance < threshold &&
    237           vt.part_variances->vert[1].variance < threshold &&
    238           get_plane_block_size(subsize, xd->plane[1].subsampling_x,
    239                                xd->plane[1].subsampling_y) < BLOCK_INVALID) {
    240         set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
    241         set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize);
    242         return 1;
    243       }
    244     }
    245     // Check horizontal split.
    246     if (mi_col + block_width / 2 < cm->mi_cols) {
    247       BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ);
    248       get_variance(&vt.part_variances->horz[0]);
    249       get_variance(&vt.part_variances->horz[1]);
    250       if (vt.part_variances->horz[0].variance < threshold &&
    251           vt.part_variances->horz[1].variance < threshold &&
    252           get_plane_block_size(subsize, xd->plane[1].subsampling_x,
    253                                xd->plane[1].subsampling_y) < BLOCK_INVALID) {
    254         set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
    255         set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize);
    256         return 1;
    257       }
    258     }
    259 
    260     return 0;
    261   }
    262   return 0;
    263 }
    264 
    265 static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d,
    266                                  int dp, int x16_idx, int y16_idx, v16x16 *vst,
    267                                  int pixels_wide, int pixels_high,
    268                                  int is_key_frame) {
    269   int k;
    270   for (k = 0; k < 4; k++) {
    271     int x8_idx = x16_idx + ((k & 1) << 3);
    272     int y8_idx = y16_idx + ((k >> 1) << 3);
    273     unsigned int sse = 0;
    274     int sum = 0;
    275     if (x8_idx < pixels_wide && y8_idx < pixels_high) {
    276       int s_avg;
    277       int d_avg = 128;
    278       s_avg = aom_avg_8x8(s + y8_idx * sp + x8_idx, sp);
    279       if (!is_key_frame) d_avg = aom_avg_8x8(d + y8_idx * dp + x8_idx, dp);
    280 
    281       sum = s_avg - d_avg;
    282       sse = sum * sum;
    283     }
    284     fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
    285   }
    286 }
    287 
    288 static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
    289                               int dp, int x16_idx, int y16_idx, int pixels_wide,
    290                               int pixels_high) {
    291   int k;
    292   int minmax_max = 0;
    293   int minmax_min = 255;
    294   // Loop over the 4 8x8 subblocks.
    295   for (k = 0; k < 4; k++) {
    296     int x8_idx = x16_idx + ((k & 1) << 3);
    297     int y8_idx = y16_idx + ((k >> 1) << 3);
    298     int min = 0;
    299     int max = 0;
    300     if (x8_idx < pixels_wide && y8_idx < pixels_high) {
    301       aom_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp,
    302                      &min, &max);
    303       if ((max - min) > minmax_max) minmax_max = (max - min);
    304       if ((max - min) < minmax_min) minmax_min = (max - min);
    305     }
    306   }
    307   return (minmax_max - minmax_min);
    308 }
    309 
    310 static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
    311                                  int dp, int x8_idx, int y8_idx, v8x8 *vst,
    312                                  int pixels_wide, int pixels_high,
    313                                  int is_key_frame) {
    314   int k;
    315   for (k = 0; k < 4; k++) {
    316     int x4_idx = x8_idx + ((k & 1) << 2);
    317     int y4_idx = y8_idx + ((k >> 1) << 2);
    318     unsigned int sse = 0;
    319     int sum = 0;
    320     if (x4_idx < pixels_wide && y4_idx < pixels_high) {
    321       int s_avg;
    322       int d_avg = 128;
    323       s_avg = aom_avg_4x4(s + y4_idx * sp + x4_idx, sp);
    324       if (!is_key_frame) d_avg = aom_avg_4x4(d + y4_idx * dp + x4_idx, dp);
    325       sum = s_avg - d_avg;
    326       sse = sum * sum;
    327     }
    328     fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
    329   }
    330 }
    331 
    332 static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed,
    333                                          int width, int height,
    334                                          int content_state) {
    335   if (speed >= 8) {
    336     if (width <= 640 && height <= 480)
    337       return (5 * threshold_base) >> 2;
    338     else if ((content_state == kLowSadLowSumdiff) ||
    339              (content_state == kHighSadLowSumdiff) ||
    340              (content_state == kLowVarHighSumdiff))
    341       return (5 * threshold_base) >> 2;
    342   } else if (speed == 7) {
    343     if ((content_state == kLowSadLowSumdiff) ||
    344         (content_state == kHighSadLowSumdiff) ||
    345         (content_state == kLowVarHighSumdiff)) {
    346       return (5 * threshold_base) >> 2;
    347     }
    348   }
    349   return threshold_base;
    350 }
    351 
    352 // Set the variance split thresholds for following the block sizes:
    353 // 0 - threshold_128x128, 1 - threshold_64x64, 2 - threshold_32x32,
    354 // 3 - vbp_threshold_16x16. 4 - vbp_threshold_8x8 (to split to 4x4 partition) is
    355 // currently only used on key frame.
    356 static void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[], int q,
    357                                int content_state) {
    358   AV1_COMMON *const cm = &cpi->common;
    359   const int is_key_frame = frame_is_intra_only(cm);
    360   const int threshold_multiplier = is_key_frame ? 40 : 1;
    361   int64_t threshold_base =
    362       (int64_t)(threshold_multiplier * cpi->dequants.y_dequant_QTX[q][1]);
    363 
    364   if (is_key_frame) {
    365     thresholds[0] = threshold_base;
    366     thresholds[1] = threshold_base;
    367     thresholds[2] = threshold_base >> 2;
    368     thresholds[3] = threshold_base >> 2;
    369     thresholds[4] = threshold_base << 2;
    370   } else {
    371     // Increase base variance threshold based on content_state/sum_diff level.
    372     threshold_base = scale_part_thresh_sumdiff(
    373         threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state);
    374 
    375     thresholds[1] = threshold_base;
    376     thresholds[3] = threshold_base << cpi->oxcf.speed;
    377     if (cm->width >= 1280 && cm->height >= 720)
    378       thresholds[3] = thresholds[3] << 1;
    379     if (cm->width <= 352 && cm->height <= 288) {
    380       thresholds[1] = threshold_base >> 3;
    381       thresholds[2] = threshold_base >> 1;
    382       thresholds[3] = threshold_base << 3;
    383     } else if (cm->width < 1280 && cm->height < 720) {
    384       thresholds[2] = (5 * threshold_base) >> 2;
    385     } else if (cm->width < 1920 && cm->height < 1080) {
    386       thresholds[2] = threshold_base << 1;
    387       thresholds[3] <<= 2;
    388     } else {
    389       thresholds[2] = (5 * threshold_base) >> 1;
    390     }
    391   }
    392 }
    393 
    394 void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int q,
    395                                            int content_state) {
    396   AV1_COMMON *const cm = &cpi->common;
    397   SPEED_FEATURES *const sf = &cpi->sf;
    398   const int is_key_frame = frame_is_intra_only(cm);
    399   if (sf->partition_search_type != VAR_BASED_PARTITION) {
    400     return;
    401   } else {
    402     set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state);
    403     // The thresholds below are not changed locally.
    404     if (is_key_frame) {
    405       cpi->vbp_threshold_sad = 0;
    406       cpi->vbp_threshold_copy = 0;
    407       cpi->vbp_bsize_min = BLOCK_8X8;
    408     } else {
    409       if (cm->width <= 352 && cm->height <= 288)
    410         cpi->vbp_threshold_sad = 10;
    411       else
    412         cpi->vbp_threshold_sad = (cpi->dequants.y_dequant_QTX[q][1] << 1) > 1000
    413                                      ? (cpi->dequants.y_dequant_QTX[q][1] << 1)
    414                                      : 1000;
    415       cpi->vbp_bsize_min = BLOCK_16X16;
    416       if (cm->width <= 352 && cm->height <= 288)
    417         cpi->vbp_threshold_copy = 4000;
    418       else if (cm->width <= 640 && cm->height <= 360)
    419         cpi->vbp_threshold_copy = 8000;
    420       else
    421         cpi->vbp_threshold_copy =
    422             (cpi->dequants.y_dequant_QTX[q][1] << 3) > 8000
    423                 ? (cpi->dequants.y_dequant_QTX[q][1] << 3)
    424                 : 8000;
    425     }
    426     cpi->vbp_threshold_minmax = 15 + (q >> 3);
    427   }
    428 }
    429 
    430 // This function chooses partitioning based on the variance between source and
    431 // reconstructed last, where variance is computed for down-sampled inputs.
    432 // TODO(kyslov): lot of things. Bring back noise estimation, brush up partition
    433 // selection and most of all - retune the thresholds
    434 int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
    435                                       MACROBLOCK *x, int mi_row, int mi_col) {
    436   AV1_COMMON *const cm = &cpi->common;
    437   MACROBLOCKD *xd = &x->e_mbd;
    438 
    439   int i, j, k, m;
    440   v128x128 *vt;
    441   v16x16 *vt2 = NULL;
    442   unsigned char force_split[85];
    443   int avg_32x32;
    444   int max_var_32x32 = 0;
    445   int min_var_32x32 = INT_MAX;
    446   int var_32x32;
    447   int var_64x64;
    448   int min_var_64x64 = INT_MAX;
    449   int max_var_64x64 = 0;
    450   int avg_16x16[4];
    451   int maxvar_16x16[4];
    452   int minvar_16x16[4];
    453   int64_t threshold_4x4avg;
    454   int content_state = 0;
    455   uint8_t *s;
    456   const uint8_t *d;
    457   int sp;
    458   int dp;
    459   int compute_minmax_variance = 1;
    460   int is_key_frame = frame_is_intra_only(cm);
    461   int pixels_wide = 128, pixels_high = 128;
    462   assert(cm->seq_params.sb_size == BLOCK_64X64 ||
    463          cm->seq_params.sb_size == BLOCK_128X128);
    464   const int is_small_sb = (cm->seq_params.sb_size == BLOCK_64X64);
    465   const int num_64x64_blocks = is_small_sb ? 1 : 4;
    466 
    467   CHECK_MEM_ERROR(cm, vt, aom_calloc(1, sizeof(*vt)));
    468 
    469   int64_t thresholds[5] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1],
    470                             cpi->vbp_thresholds[2], cpi->vbp_thresholds[3],
    471                             cpi->vbp_thresholds[4] };
    472 
    473   const int low_res = (cm->width <= 352 && cm->height <= 288);
    474   int variance4x4downsample[64];
    475   int segment_id;
    476   const int num_planes = av1_num_planes(cm);
    477 
    478   segment_id = xd->mi[0]->segment_id;
    479 
    480   set_vbp_thresholds(cpi, thresholds, cm->base_qindex, content_state);
    481 
    482   if (is_small_sb) {
    483     pixels_wide = 64;
    484     pixels_high = 64;
    485   }
    486 
    487   // For non keyframes, disable 4x4 average for low resolution when speed = 8
    488   threshold_4x4avg = INT64_MAX;
    489 
    490   if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
    491   if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
    492 
    493   s = x->plane[0].src.buf;
    494   sp = x->plane[0].src.stride;
    495 
    496   // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
    497   // 5-20 for the 16x16 blocks.
    498   force_split[0] = 0;
    499 
    500   if (!is_key_frame) {
    501     // TODO(kyslov): we are assuming that the ref is LAST_FRAME! Check if it
    502     // is!!
    503     MB_MODE_INFO *mi = xd->mi[0];
    504     const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
    505 
    506     assert(yv12 != NULL);
    507 
    508     av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
    509                          get_ref_scale_factors(cm, LAST_FRAME), num_planes);
    510     mi->ref_frame[0] = LAST_FRAME;
    511     mi->ref_frame[1] = NONE_FRAME;
    512     mi->sb_type = cm->seq_params.sb_size;
    513     mi->mv[0].as_int = 0;
    514     mi->interp_filters = av1_make_interp_filters(BILINEAR, BILINEAR);
    515     if (xd->mb_to_right_edge >= 0 && xd->mb_to_bottom_edge >= 0) {
    516       const MV dummy_mv = { 0, 0 };
    517       av1_int_pro_motion_estimation(cpi, x, cm->seq_params.sb_size, mi_row,
    518                                     mi_col, &dummy_mv);
    519     }
    520 
    521 // TODO(kyslov): bring the small SAD functionality back
    522 #if 0
    523     y_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, x->plane[0].src.stride,
    524                                    xd->plane[0].pre[0].buf,
    525                                    xd->plane[0].pre[0].stride);
    526 #endif
    527     x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
    528 
    529     set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
    530     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL,
    531                                   cm->seq_params.sb_size, AOM_PLANE_Y,
    532                                   AOM_PLANE_Y);
    533 
    534     d = xd->plane[0].dst.buf;
    535     dp = xd->plane[0].dst.stride;
    536 
    537     // If the y_sad is very small, take 64x64 as partition and exit.
    538     // Don't check on boosted segment for now, as 64x64 is suppressed there.
    539 #if 0
    540         if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad)
    541        { const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64]; const
    542        int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64]; if (mi_col +
    543        block_width / 2 < cm->mi_cols && mi_row + block_height / 2 < cm->mi_rows)
    544        { set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_128X128);
    545             x->variance_low[0] = 1;
    546             return 0;
    547           }
    548         }
    549 #endif
    550   } else {
    551     d = AV1_VAR_OFFS;
    552     dp = 0;
    553   }
    554 
    555   if (low_res && threshold_4x4avg < INT64_MAX)
    556     CHECK_MEM_ERROR(cm, vt2, aom_calloc(64, sizeof(*vt2)));
    557   // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances
    558   // for splits.
    559   for (m = 0; m < num_64x64_blocks; m++) {
    560     const int x64_idx = ((m & 1) << 6);
    561     const int y64_idx = ((m >> 1) << 6);
    562     const int m2 = m << 2;
    563     force_split[m + 1] = 0;
    564     for (i = 0; i < 4; i++) {
    565       const int x32_idx = x64_idx + ((i & 1) << 5);
    566       const int y32_idx = y64_idx + ((i >> 1) << 5);
    567       const int i2 = (m2 + i) << 2;
    568       force_split[5 + m2 + i] = 0;
    569       avg_16x16[i] = 0;
    570       maxvar_16x16[i] = 0;
    571       minvar_16x16[i] = INT_MAX;
    572       for (j = 0; j < 4; j++) {
    573         const int x16_idx = x32_idx + ((j & 1) << 4);
    574         const int y16_idx = y32_idx + ((j >> 1) << 4);
    575         const int split_index = 21 + i2 + j;
    576         v16x16 *vst = &vt->split[m].split[i].split[j];
    577         force_split[split_index] = 0;
    578         variance4x4downsample[i2 + j] = 0;
    579         if (!is_key_frame) {
    580           fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst, pixels_wide,
    581                                pixels_high, is_key_frame);
    582           fill_variance_tree(&vt->split[m].split[i].split[j], BLOCK_16X16);
    583           get_variance(&vt->split[m].split[i].split[j].part_variances.none);
    584           avg_16x16[i] +=
    585               vt->split[m].split[i].split[j].part_variances.none.variance;
    586           if (vt->split[m].split[i].split[j].part_variances.none.variance <
    587               minvar_16x16[i])
    588             minvar_16x16[i] =
    589                 vt->split[m].split[i].split[j].part_variances.none.variance;
    590           if (vt->split[m].split[i].split[j].part_variances.none.variance >
    591               maxvar_16x16[i])
    592             maxvar_16x16[i] =
    593                 vt->split[m].split[i].split[j].part_variances.none.variance;
    594           if (vt->split[m].split[i].split[j].part_variances.none.variance >
    595               thresholds[3]) {
    596             // 16X16 variance is above threshold for split, so force split to
    597             // 8x8 for this 16x16 block (this also forces splits for upper
    598             // levels).
    599             force_split[split_index] = 1;
    600             force_split[5 + m2 + i] = 1;
    601             force_split[m + 1] = 1;
    602             force_split[0] = 1;
    603           } else if (compute_minmax_variance &&
    604                      vt->split[m]
    605                              .split[i]
    606                              .split[j]
    607                              .part_variances.none.variance > thresholds[2] &&
    608                      !cyclic_refresh_segment_id_boosted(segment_id)) {
    609             // We have some nominal amount of 16x16 variance (based on average),
    610             // compute the minmax over the 8x8 sub-blocks, and if above
    611             // threshold, force split to 8x8 block for this 16x16 block.
    612             int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx,
    613                                             pixels_wide, pixels_high);
    614             int thresh_minmax = (int)cpi->vbp_threshold_minmax;
    615             if (minmax > thresh_minmax) {
    616               force_split[split_index] = 1;
    617               force_split[5 + m2 + i] = 1;
    618               force_split[m + 1] = 1;
    619               force_split[0] = 1;
    620             }
    621           }
    622         }
    623         if (is_key_frame) {
    624           force_split[split_index] = 0;
    625           // Go down to 4x4 down-sampling for variance.
    626           variance4x4downsample[i2 + j] = 1;
    627           for (k = 0; k < 4; k++) {
    628             int x8_idx = x16_idx + ((k & 1) << 3);
    629             int y8_idx = y16_idx + ((k >> 1) << 3);
    630             v8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k];
    631             fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2,
    632                                  pixels_wide, pixels_high, is_key_frame);
    633           }
    634         }
    635       }
    636     }
    637   }
    638 
    639   // Fill the rest of the variance tree by summing split partition values.
    640   for (m = 0; m < num_64x64_blocks; ++m) {
    641     avg_32x32 = 0;
    642     const int m2 = m << 2;
    643     for (i = 0; i < 4; i++) {
    644       const int i2 = (m2 + i) << 2;
    645       for (j = 0; j < 4; j++) {
    646         const int split_index = 21 + i2 + j;
    647         if (variance4x4downsample[i2 + j] == 1) {
    648           v16x16 *vtemp =
    649               (!is_key_frame) ? &vt2[i2 + j] : &vt->split[m].split[i].split[j];
    650           for (k = 0; k < 4; k++)
    651             fill_variance_tree(&vtemp->split[k], BLOCK_8X8);
    652           fill_variance_tree(vtemp, BLOCK_16X16);
    653           // If variance of this 16x16 block is above the threshold, force block
    654           // to split. This also forces a split on the upper levels.
    655           get_variance(&vtemp->part_variances.none);
    656           if (vtemp->part_variances.none.variance > thresholds[3]) {
    657             force_split[split_index] = 1;
    658             force_split[5 + m2 + i] = 1;
    659             force_split[m + 1] = 1;
    660             force_split[0] = 1;
    661           }
    662         }
    663       }
    664       fill_variance_tree(&vt->split[m].split[i], BLOCK_32X32);
    665       // If variance of this 32x32 block is above the threshold, or if its above
    666       // (some threshold of) the average variance over the sub-16x16 blocks,
    667       // then force this block to split. This also forces a split on the upper
    668       // (64x64) level.
    669       if (!force_split[5 + m2 + i]) {
    670         get_variance(&vt->split[m].split[i].part_variances.none);
    671         var_32x32 = vt->split[m].split[i].part_variances.none.variance;
    672         max_var_32x32 = AOMMAX(var_32x32, max_var_32x32);
    673         min_var_32x32 = AOMMIN(var_32x32, min_var_32x32);
    674         if (vt->split[m].split[i].part_variances.none.variance >
    675                 thresholds[2] ||
    676             (!is_key_frame &&
    677              vt->split[m].split[i].part_variances.none.variance >
    678                  (thresholds[2] >> 1) &&
    679              vt->split[m].split[i].part_variances.none.variance >
    680                  (avg_16x16[i] >> 1))) {
    681           force_split[5 + m2 + i] = 1;
    682           force_split[m + 1] = 1;
    683           force_split[0] = 1;
    684         } else if (!is_key_frame && cm->height <= 360 &&
    685                    (maxvar_16x16[i] - minvar_16x16[i]) > (thresholds[2] >> 1) &&
    686                    maxvar_16x16[i] > thresholds[2]) {
    687           force_split[5 + m2 + i] = 1;
    688           force_split[m + 1] = 1;
    689           force_split[0] = 1;
    690         }
    691         avg_32x32 += var_32x32;
    692       }
    693     }
    694     if (!force_split[1 + m]) {
    695       fill_variance_tree(&vt->split[m], BLOCK_64X64);
    696       get_variance(&vt->split[m].part_variances.none);
    697       var_64x64 = vt->split[m].part_variances.none.variance;
    698       max_var_64x64 = AOMMAX(var_64x64, max_var_64x64);
    699       min_var_64x64 = AOMMIN(var_64x64, min_var_64x64);
    700       // If variance of this 64x64 block is above (some threshold of) the
    701       // average variance over the sub-32x32 blocks, then force this block to
    702       // split. Only checking this for noise level >= medium for now.
    703 
    704       if (!is_key_frame &&
    705           (max_var_32x32 - min_var_32x32) > 3 * (thresholds[1] >> 3) &&
    706           max_var_32x32 > thresholds[1] >> 1)
    707         force_split[1 + m] = 1;
    708     }
    709     if (is_small_sb) force_split[0] = 1;
    710   }
    711 
    712   if (!force_split[0]) {
    713     fill_variance_tree(vt, BLOCK_128X128);
    714     get_variance(&vt->part_variances.none);
    715     if (!is_key_frame &&
    716         (max_var_64x64 - min_var_64x64) > 3 * (thresholds[0] >> 3) &&
    717         max_var_64x64 > thresholds[0] >> 1)
    718       force_split[0] = 1;
    719   }
    720 
    721   if (!set_vt_partitioning(cpi, x, xd, tile, vt, BLOCK_128X128, mi_row, mi_col,
    722                            thresholds[0], BLOCK_16X16, force_split[0])) {
    723     for (m = 0; m < num_64x64_blocks; ++m) {
    724       const int x64_idx = ((m & 1) << 4);
    725       const int y64_idx = ((m >> 1) << 4);
    726       const int m2 = m << 2;
    727 
    728       // Now go through the entire structure, splitting every block size until
    729       // we get to one that's got a variance lower than our threshold.
    730       if (!set_vt_partitioning(cpi, x, xd, tile, &vt->split[m], BLOCK_64X64,
    731                                mi_row + y64_idx, mi_col + x64_idx,
    732                                thresholds[1], BLOCK_16X16,
    733                                force_split[1 + m])) {
    734         for (i = 0; i < 4; ++i) {
    735           const int x32_idx = ((i & 1) << 3);
    736           const int y32_idx = ((i >> 1) << 3);
    737           const int i2 = (m2 + i) << 2;
    738           if (!set_vt_partitioning(cpi, x, xd, tile, &vt->split[m].split[i],
    739                                    BLOCK_32X32, (mi_row + y64_idx + y32_idx),
    740                                    (mi_col + x64_idx + x32_idx), thresholds[2],
    741                                    BLOCK_16X16, force_split[5 + m2 + i])) {
    742             for (j = 0; j < 4; ++j) {
    743               const int x16_idx = ((j & 1) << 2);
    744               const int y16_idx = ((j >> 1) << 2);
    745               const int split_index = 21 + i2 + j;
    746               // For inter frames: if variance4x4downsample[] == 1 for this
    747               // 16x16 block, then the variance is based on 4x4 down-sampling,
    748               // so use vt2 in set_vt_partioning(), otherwise use vt.
    749               v16x16 *vtemp =
    750                   (!is_key_frame && variance4x4downsample[i2 + j] == 1)
    751                       ? &vt2[i2 + j]
    752                       : &vt->split[m].split[i].split[j];
    753               if (!set_vt_partitioning(cpi, x, xd, tile, vtemp, BLOCK_16X16,
    754                                        mi_row + y64_idx + y32_idx + y16_idx,
    755                                        mi_col + x64_idx + x32_idx + x16_idx,
    756                                        thresholds[3], BLOCK_8X8,
    757                                        force_split[split_index])) {
    758                 for (k = 0; k < 4; ++k) {
    759                   const int x8_idx = (k & 1) << 1;
    760                   const int y8_idx = (k >> 1) << 1;
    761                   set_block_size(
    762                       cpi, x, xd,
    763                       (mi_row + y64_idx + y32_idx + y16_idx + y8_idx),
    764                       (mi_col + x64_idx + x32_idx + x16_idx + x8_idx),
    765                       BLOCK_8X8);
    766                 }
    767               }
    768             }
    769           }
    770         }
    771       }
    772     }
    773   }
    774 
    775   if (vt2) aom_free(vt2);
    776   if (vt) aom_free(vt);
    777   return 0;
    778 }
    779