1 /* 2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <limits.h> 13 #include <math.h> 14 #include <stdbool.h> 15 #include <stdio.h> 16 17 #include "config/aom_config.h" 18 #include "config/aom_dsp_rtcd.h" 19 #include "config/av1_rtcd.h" 20 21 #include "aom_dsp/aom_dsp_common.h" 22 #include "aom_dsp/binary_codes_writer.h" 23 #include "aom_ports/mem.h" 24 #include "aom_ports/aom_timer.h" 25 #include "aom_ports/system_state.h" 26 27 #include "av1/common/reconinter.h" 28 #include "av1/common/blockd.h" 29 30 #include "av1/encoder/encodeframe.h" 31 #include "av1/encoder/var_based_part.h" 32 #include "av1/encoder/reconinter_enc.h" 33 34 extern const uint8_t AV1_VAR_OFFS[]; 35 36 typedef struct { 37 // TODO(kyslov): consider changing to 64bit 38 39 // This struct is used for computing variance in choose_partitioning(), where 40 // the max number of samples within a superblock is 32x32 (with 4x4 avg). 41 // With 8bit bitdepth, uint32_t is enough for sum_square_error (2^8 * 2^8 * 32 42 // * 32 = 2^26). For high bitdepth we need to consider changing this to 64 bit 43 uint32_t sum_square_error; 44 int32_t sum_error; 45 int log2_count; 46 int variance; 47 } var; 48 49 typedef struct { 50 var none; 51 var horz[2]; 52 var vert[2]; 53 } partition_variance; 54 55 typedef struct { 56 partition_variance part_variances; 57 var split[4]; 58 } v4x4; 59 60 typedef struct { 61 partition_variance part_variances; 62 v4x4 split[4]; 63 } v8x8; 64 65 typedef struct { 66 partition_variance part_variances; 67 v8x8 split[4]; 68 } v16x16; 69 70 typedef struct { 71 partition_variance part_variances; 72 v16x16 split[4]; 73 } v32x32; 74 75 typedef struct { 76 partition_variance part_variances; 77 v32x32 split[4]; 78 } v64x64; 79 80 typedef struct { 81 partition_variance part_variances; 82 v64x64 split[4]; 83 } v128x128; 84 85 typedef struct { 86 partition_variance *part_variances; 87 var *split[4]; 88 } variance_node; 89 90 static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) { 91 int i; 92 node->part_variances = NULL; 93 switch (bsize) { 94 case BLOCK_128X128: { 95 v128x128 *vt = (v128x128 *)data; 96 node->part_variances = &vt->part_variances; 97 for (i = 0; i < 4; i++) 98 node->split[i] = &vt->split[i].part_variances.none; 99 break; 100 } 101 case BLOCK_64X64: { 102 v64x64 *vt = (v64x64 *)data; 103 node->part_variances = &vt->part_variances; 104 for (i = 0; i < 4; i++) 105 node->split[i] = &vt->split[i].part_variances.none; 106 break; 107 } 108 case BLOCK_32X32: { 109 v32x32 *vt = (v32x32 *)data; 110 node->part_variances = &vt->part_variances; 111 for (i = 0; i < 4; i++) 112 node->split[i] = &vt->split[i].part_variances.none; 113 break; 114 } 115 case BLOCK_16X16: { 116 v16x16 *vt = (v16x16 *)data; 117 node->part_variances = &vt->part_variances; 118 for (i = 0; i < 4; i++) 119 node->split[i] = &vt->split[i].part_variances.none; 120 break; 121 } 122 case BLOCK_8X8: { 123 v8x8 *vt = (v8x8 *)data; 124 node->part_variances = &vt->part_variances; 125 for (i = 0; i < 4; i++) 126 node->split[i] = &vt->split[i].part_variances.none; 127 break; 128 } 129 default: { 130 v4x4 *vt = (v4x4 *)data; 131 assert(bsize == BLOCK_4X4); 132 node->part_variances = &vt->part_variances; 133 for (i = 0; i < 4; i++) node->split[i] = &vt->split[i]; 134 break; 135 } 136 } 137 } 138 139 // Set variance values given sum square error, sum error, count. 140 static void fill_variance(uint32_t s2, int32_t s, int c, var *v) { 141 v->sum_square_error = s2; 142 v->sum_error = s; 143 v->log2_count = c; 144 } 145 146 static void get_variance(var *v) { 147 v->variance = 148 (int)(256 * (v->sum_square_error - 149 (uint32_t)(((int64_t)v->sum_error * v->sum_error) >> 150 v->log2_count)) >> 151 v->log2_count); 152 } 153 154 static void sum_2_variances(const var *a, const var *b, var *r) { 155 assert(a->log2_count == b->log2_count); 156 fill_variance(a->sum_square_error + b->sum_square_error, 157 a->sum_error + b->sum_error, a->log2_count + 1, r); 158 } 159 160 static void fill_variance_tree(void *data, BLOCK_SIZE bsize) { 161 variance_node node; 162 memset(&node, 0, sizeof(node)); 163 tree_to_node(data, bsize, &node); 164 sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]); 165 sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]); 166 sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]); 167 sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]); 168 sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1], 169 &node.part_variances->none); 170 } 171 172 static void set_block_size(AV1_COMP *const cpi, MACROBLOCK *const x, 173 MACROBLOCKD *const xd, int mi_row, int mi_col, 174 BLOCK_SIZE bsize) { 175 if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) { 176 set_mode_info_offsets(cpi, x, xd, mi_row, mi_col); 177 xd->mi[0]->sb_type = bsize; 178 } 179 } 180 181 static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCK *const x, 182 MACROBLOCKD *const xd, 183 const TileInfo *const tile, void *data, 184 BLOCK_SIZE bsize, int mi_row, int mi_col, 185 int64_t threshold, BLOCK_SIZE bsize_min, 186 int force_split) { 187 AV1_COMMON *const cm = &cpi->common; 188 variance_node vt; 189 const int block_width = mi_size_wide[bsize]; 190 const int block_height = mi_size_high[bsize]; 191 192 assert(block_height == block_width); 193 tree_to_node(data, bsize, &vt); 194 195 if (force_split == 1) return 0; 196 197 if (mi_col + block_width > tile->mi_col_end || 198 mi_row + block_height > tile->mi_row_end) 199 return 0; 200 201 // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if 202 // variance is below threshold, otherwise split will be selected. 203 // No check for vert/horiz split as too few samples for variance. 204 if (bsize == bsize_min) { 205 // Variance already computed to set the force_split. 206 if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); 207 if (mi_col + block_width / 2 < cm->mi_cols && 208 mi_row + block_height / 2 < cm->mi_rows && 209 vt.part_variances->none.variance < threshold) { 210 set_block_size(cpi, x, xd, mi_row, mi_col, bsize); 211 return 1; 212 } 213 return 0; 214 } else if (bsize > bsize_min) { 215 // Variance already computed to set the force_split. 216 if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); 217 // For key frame: take split for bsize above 32X32 or very high variance. 218 if (frame_is_intra_only(cm) && 219 (bsize > BLOCK_32X32 || 220 vt.part_variances->none.variance > (threshold << 4))) { 221 return 0; 222 } 223 // If variance is low, take the bsize (no split). 224 if (mi_col + block_width / 2 < cm->mi_cols && 225 mi_row + block_height / 2 < cm->mi_rows && 226 vt.part_variances->none.variance < threshold) { 227 set_block_size(cpi, x, xd, mi_row, mi_col, bsize); 228 return 1; 229 } 230 231 // Check vertical split. 232 if (mi_row + block_height / 2 < cm->mi_rows) { 233 BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT); 234 get_variance(&vt.part_variances->vert[0]); 235 get_variance(&vt.part_variances->vert[1]); 236 if (vt.part_variances->vert[0].variance < threshold && 237 vt.part_variances->vert[1].variance < threshold && 238 get_plane_block_size(subsize, xd->plane[1].subsampling_x, 239 xd->plane[1].subsampling_y) < BLOCK_INVALID) { 240 set_block_size(cpi, x, xd, mi_row, mi_col, subsize); 241 set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize); 242 return 1; 243 } 244 } 245 // Check horizontal split. 246 if (mi_col + block_width / 2 < cm->mi_cols) { 247 BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ); 248 get_variance(&vt.part_variances->horz[0]); 249 get_variance(&vt.part_variances->horz[1]); 250 if (vt.part_variances->horz[0].variance < threshold && 251 vt.part_variances->horz[1].variance < threshold && 252 get_plane_block_size(subsize, xd->plane[1].subsampling_x, 253 xd->plane[1].subsampling_y) < BLOCK_INVALID) { 254 set_block_size(cpi, x, xd, mi_row, mi_col, subsize); 255 set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize); 256 return 1; 257 } 258 } 259 260 return 0; 261 } 262 return 0; 263 } 264 265 static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d, 266 int dp, int x16_idx, int y16_idx, v16x16 *vst, 267 int pixels_wide, int pixels_high, 268 int is_key_frame) { 269 int k; 270 for (k = 0; k < 4; k++) { 271 int x8_idx = x16_idx + ((k & 1) << 3); 272 int y8_idx = y16_idx + ((k >> 1) << 3); 273 unsigned int sse = 0; 274 int sum = 0; 275 if (x8_idx < pixels_wide && y8_idx < pixels_high) { 276 int s_avg; 277 int d_avg = 128; 278 s_avg = aom_avg_8x8(s + y8_idx * sp + x8_idx, sp); 279 if (!is_key_frame) d_avg = aom_avg_8x8(d + y8_idx * dp + x8_idx, dp); 280 281 sum = s_avg - d_avg; 282 sse = sum * sum; 283 } 284 fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); 285 } 286 } 287 288 static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d, 289 int dp, int x16_idx, int y16_idx, int pixels_wide, 290 int pixels_high) { 291 int k; 292 int minmax_max = 0; 293 int minmax_min = 255; 294 // Loop over the 4 8x8 subblocks. 295 for (k = 0; k < 4; k++) { 296 int x8_idx = x16_idx + ((k & 1) << 3); 297 int y8_idx = y16_idx + ((k >> 1) << 3); 298 int min = 0; 299 int max = 0; 300 if (x8_idx < pixels_wide && y8_idx < pixels_high) { 301 aom_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp, 302 &min, &max); 303 if ((max - min) > minmax_max) minmax_max = (max - min); 304 if ((max - min) < minmax_min) minmax_min = (max - min); 305 } 306 } 307 return (minmax_max - minmax_min); 308 } 309 310 static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d, 311 int dp, int x8_idx, int y8_idx, v8x8 *vst, 312 int pixels_wide, int pixels_high, 313 int is_key_frame) { 314 int k; 315 for (k = 0; k < 4; k++) { 316 int x4_idx = x8_idx + ((k & 1) << 2); 317 int y4_idx = y8_idx + ((k >> 1) << 2); 318 unsigned int sse = 0; 319 int sum = 0; 320 if (x4_idx < pixels_wide && y4_idx < pixels_high) { 321 int s_avg; 322 int d_avg = 128; 323 s_avg = aom_avg_4x4(s + y4_idx * sp + x4_idx, sp); 324 if (!is_key_frame) d_avg = aom_avg_4x4(d + y4_idx * dp + x4_idx, dp); 325 sum = s_avg - d_avg; 326 sse = sum * sum; 327 } 328 fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); 329 } 330 } 331 332 static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed, 333 int width, int height, 334 int content_state) { 335 if (speed >= 8) { 336 if (width <= 640 && height <= 480) 337 return (5 * threshold_base) >> 2; 338 else if ((content_state == kLowSadLowSumdiff) || 339 (content_state == kHighSadLowSumdiff) || 340 (content_state == kLowVarHighSumdiff)) 341 return (5 * threshold_base) >> 2; 342 } else if (speed == 7) { 343 if ((content_state == kLowSadLowSumdiff) || 344 (content_state == kHighSadLowSumdiff) || 345 (content_state == kLowVarHighSumdiff)) { 346 return (5 * threshold_base) >> 2; 347 } 348 } 349 return threshold_base; 350 } 351 352 // Set the variance split thresholds for following the block sizes: 353 // 0 - threshold_128x128, 1 - threshold_64x64, 2 - threshold_32x32, 354 // 3 - vbp_threshold_16x16. 4 - vbp_threshold_8x8 (to split to 4x4 partition) is 355 // currently only used on key frame. 356 static void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[], int q, 357 int content_state) { 358 AV1_COMMON *const cm = &cpi->common; 359 const int is_key_frame = frame_is_intra_only(cm); 360 const int threshold_multiplier = is_key_frame ? 40 : 1; 361 int64_t threshold_base = 362 (int64_t)(threshold_multiplier * cpi->dequants.y_dequant_QTX[q][1]); 363 364 if (is_key_frame) { 365 thresholds[0] = threshold_base; 366 thresholds[1] = threshold_base; 367 thresholds[2] = threshold_base >> 2; 368 thresholds[3] = threshold_base >> 2; 369 thresholds[4] = threshold_base << 2; 370 } else { 371 // Increase base variance threshold based on content_state/sum_diff level. 372 threshold_base = scale_part_thresh_sumdiff( 373 threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state); 374 375 thresholds[1] = threshold_base; 376 thresholds[3] = threshold_base << cpi->oxcf.speed; 377 if (cm->width >= 1280 && cm->height >= 720) 378 thresholds[3] = thresholds[3] << 1; 379 if (cm->width <= 352 && cm->height <= 288) { 380 thresholds[1] = threshold_base >> 3; 381 thresholds[2] = threshold_base >> 1; 382 thresholds[3] = threshold_base << 3; 383 } else if (cm->width < 1280 && cm->height < 720) { 384 thresholds[2] = (5 * threshold_base) >> 2; 385 } else if (cm->width < 1920 && cm->height < 1080) { 386 thresholds[2] = threshold_base << 1; 387 thresholds[3] <<= 2; 388 } else { 389 thresholds[2] = (5 * threshold_base) >> 1; 390 } 391 } 392 } 393 394 void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int q, 395 int content_state) { 396 AV1_COMMON *const cm = &cpi->common; 397 SPEED_FEATURES *const sf = &cpi->sf; 398 const int is_key_frame = frame_is_intra_only(cm); 399 if (sf->partition_search_type != VAR_BASED_PARTITION) { 400 return; 401 } else { 402 set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state); 403 // The thresholds below are not changed locally. 404 if (is_key_frame) { 405 cpi->vbp_threshold_sad = 0; 406 cpi->vbp_threshold_copy = 0; 407 cpi->vbp_bsize_min = BLOCK_8X8; 408 } else { 409 if (cm->width <= 352 && cm->height <= 288) 410 cpi->vbp_threshold_sad = 10; 411 else 412 cpi->vbp_threshold_sad = (cpi->dequants.y_dequant_QTX[q][1] << 1) > 1000 413 ? (cpi->dequants.y_dequant_QTX[q][1] << 1) 414 : 1000; 415 cpi->vbp_bsize_min = BLOCK_16X16; 416 if (cm->width <= 352 && cm->height <= 288) 417 cpi->vbp_threshold_copy = 4000; 418 else if (cm->width <= 640 && cm->height <= 360) 419 cpi->vbp_threshold_copy = 8000; 420 else 421 cpi->vbp_threshold_copy = 422 (cpi->dequants.y_dequant_QTX[q][1] << 3) > 8000 423 ? (cpi->dequants.y_dequant_QTX[q][1] << 3) 424 : 8000; 425 } 426 cpi->vbp_threshold_minmax = 15 + (q >> 3); 427 } 428 } 429 430 // This function chooses partitioning based on the variance between source and 431 // reconstructed last, where variance is computed for down-sampled inputs. 432 // TODO(kyslov): lot of things. Bring back noise estimation, brush up partition 433 // selection and most of all - retune the thresholds 434 int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile, 435 MACROBLOCK *x, int mi_row, int mi_col) { 436 AV1_COMMON *const cm = &cpi->common; 437 MACROBLOCKD *xd = &x->e_mbd; 438 439 int i, j, k, m; 440 v128x128 *vt; 441 v16x16 *vt2 = NULL; 442 unsigned char force_split[85]; 443 int avg_32x32; 444 int max_var_32x32 = 0; 445 int min_var_32x32 = INT_MAX; 446 int var_32x32; 447 int var_64x64; 448 int min_var_64x64 = INT_MAX; 449 int max_var_64x64 = 0; 450 int avg_16x16[4]; 451 int maxvar_16x16[4]; 452 int minvar_16x16[4]; 453 int64_t threshold_4x4avg; 454 int content_state = 0; 455 uint8_t *s; 456 const uint8_t *d; 457 int sp; 458 int dp; 459 int compute_minmax_variance = 1; 460 int is_key_frame = frame_is_intra_only(cm); 461 int pixels_wide = 128, pixels_high = 128; 462 assert(cm->seq_params.sb_size == BLOCK_64X64 || 463 cm->seq_params.sb_size == BLOCK_128X128); 464 const int is_small_sb = (cm->seq_params.sb_size == BLOCK_64X64); 465 const int num_64x64_blocks = is_small_sb ? 1 : 4; 466 467 CHECK_MEM_ERROR(cm, vt, aom_calloc(1, sizeof(*vt))); 468 469 int64_t thresholds[5] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], 470 cpi->vbp_thresholds[2], cpi->vbp_thresholds[3], 471 cpi->vbp_thresholds[4] }; 472 473 const int low_res = (cm->width <= 352 && cm->height <= 288); 474 int variance4x4downsample[64]; 475 int segment_id; 476 const int num_planes = av1_num_planes(cm); 477 478 segment_id = xd->mi[0]->segment_id; 479 480 set_vbp_thresholds(cpi, thresholds, cm->base_qindex, content_state); 481 482 if (is_small_sb) { 483 pixels_wide = 64; 484 pixels_high = 64; 485 } 486 487 // For non keyframes, disable 4x4 average for low resolution when speed = 8 488 threshold_4x4avg = INT64_MAX; 489 490 if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3); 491 if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3); 492 493 s = x->plane[0].src.buf; 494 sp = x->plane[0].src.stride; 495 496 // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks, 497 // 5-20 for the 16x16 blocks. 498 force_split[0] = 0; 499 500 if (!is_key_frame) { 501 // TODO(kyslov): we are assuming that the ref is LAST_FRAME! Check if it 502 // is!! 503 MB_MODE_INFO *mi = xd->mi[0]; 504 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME); 505 506 assert(yv12 != NULL); 507 508 av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, 509 get_ref_scale_factors(cm, LAST_FRAME), num_planes); 510 mi->ref_frame[0] = LAST_FRAME; 511 mi->ref_frame[1] = NONE_FRAME; 512 mi->sb_type = cm->seq_params.sb_size; 513 mi->mv[0].as_int = 0; 514 mi->interp_filters = av1_make_interp_filters(BILINEAR, BILINEAR); 515 if (xd->mb_to_right_edge >= 0 && xd->mb_to_bottom_edge >= 0) { 516 const MV dummy_mv = { 0, 0 }; 517 av1_int_pro_motion_estimation(cpi, x, cm->seq_params.sb_size, mi_row, 518 mi_col, &dummy_mv); 519 } 520 521 // TODO(kyslov): bring the small SAD functionality back 522 #if 0 523 y_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, x->plane[0].src.stride, 524 xd->plane[0].pre[0].buf, 525 xd->plane[0].pre[0].stride); 526 #endif 527 x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv; 528 529 set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); 530 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, 531 cm->seq_params.sb_size, AOM_PLANE_Y, 532 AOM_PLANE_Y); 533 534 d = xd->plane[0].dst.buf; 535 dp = xd->plane[0].dst.stride; 536 537 // If the y_sad is very small, take 64x64 as partition and exit. 538 // Don't check on boosted segment for now, as 64x64 is suppressed there. 539 #if 0 540 if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) 541 { const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64]; const 542 int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64]; if (mi_col + 543 block_width / 2 < cm->mi_cols && mi_row + block_height / 2 < cm->mi_rows) 544 { set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_128X128); 545 x->variance_low[0] = 1; 546 return 0; 547 } 548 } 549 #endif 550 } else { 551 d = AV1_VAR_OFFS; 552 dp = 0; 553 } 554 555 if (low_res && threshold_4x4avg < INT64_MAX) 556 CHECK_MEM_ERROR(cm, vt2, aom_calloc(64, sizeof(*vt2))); 557 // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances 558 // for splits. 559 for (m = 0; m < num_64x64_blocks; m++) { 560 const int x64_idx = ((m & 1) << 6); 561 const int y64_idx = ((m >> 1) << 6); 562 const int m2 = m << 2; 563 force_split[m + 1] = 0; 564 for (i = 0; i < 4; i++) { 565 const int x32_idx = x64_idx + ((i & 1) << 5); 566 const int y32_idx = y64_idx + ((i >> 1) << 5); 567 const int i2 = (m2 + i) << 2; 568 force_split[5 + m2 + i] = 0; 569 avg_16x16[i] = 0; 570 maxvar_16x16[i] = 0; 571 minvar_16x16[i] = INT_MAX; 572 for (j = 0; j < 4; j++) { 573 const int x16_idx = x32_idx + ((j & 1) << 4); 574 const int y16_idx = y32_idx + ((j >> 1) << 4); 575 const int split_index = 21 + i2 + j; 576 v16x16 *vst = &vt->split[m].split[i].split[j]; 577 force_split[split_index] = 0; 578 variance4x4downsample[i2 + j] = 0; 579 if (!is_key_frame) { 580 fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst, pixels_wide, 581 pixels_high, is_key_frame); 582 fill_variance_tree(&vt->split[m].split[i].split[j], BLOCK_16X16); 583 get_variance(&vt->split[m].split[i].split[j].part_variances.none); 584 avg_16x16[i] += 585 vt->split[m].split[i].split[j].part_variances.none.variance; 586 if (vt->split[m].split[i].split[j].part_variances.none.variance < 587 minvar_16x16[i]) 588 minvar_16x16[i] = 589 vt->split[m].split[i].split[j].part_variances.none.variance; 590 if (vt->split[m].split[i].split[j].part_variances.none.variance > 591 maxvar_16x16[i]) 592 maxvar_16x16[i] = 593 vt->split[m].split[i].split[j].part_variances.none.variance; 594 if (vt->split[m].split[i].split[j].part_variances.none.variance > 595 thresholds[3]) { 596 // 16X16 variance is above threshold for split, so force split to 597 // 8x8 for this 16x16 block (this also forces splits for upper 598 // levels). 599 force_split[split_index] = 1; 600 force_split[5 + m2 + i] = 1; 601 force_split[m + 1] = 1; 602 force_split[0] = 1; 603 } else if (compute_minmax_variance && 604 vt->split[m] 605 .split[i] 606 .split[j] 607 .part_variances.none.variance > thresholds[2] && 608 !cyclic_refresh_segment_id_boosted(segment_id)) { 609 // We have some nominal amount of 16x16 variance (based on average), 610 // compute the minmax over the 8x8 sub-blocks, and if above 611 // threshold, force split to 8x8 block for this 16x16 block. 612 int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx, 613 pixels_wide, pixels_high); 614 int thresh_minmax = (int)cpi->vbp_threshold_minmax; 615 if (minmax > thresh_minmax) { 616 force_split[split_index] = 1; 617 force_split[5 + m2 + i] = 1; 618 force_split[m + 1] = 1; 619 force_split[0] = 1; 620 } 621 } 622 } 623 if (is_key_frame) { 624 force_split[split_index] = 0; 625 // Go down to 4x4 down-sampling for variance. 626 variance4x4downsample[i2 + j] = 1; 627 for (k = 0; k < 4; k++) { 628 int x8_idx = x16_idx + ((k & 1) << 3); 629 int y8_idx = y16_idx + ((k >> 1) << 3); 630 v8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k]; 631 fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2, 632 pixels_wide, pixels_high, is_key_frame); 633 } 634 } 635 } 636 } 637 } 638 639 // Fill the rest of the variance tree by summing split partition values. 640 for (m = 0; m < num_64x64_blocks; ++m) { 641 avg_32x32 = 0; 642 const int m2 = m << 2; 643 for (i = 0; i < 4; i++) { 644 const int i2 = (m2 + i) << 2; 645 for (j = 0; j < 4; j++) { 646 const int split_index = 21 + i2 + j; 647 if (variance4x4downsample[i2 + j] == 1) { 648 v16x16 *vtemp = 649 (!is_key_frame) ? &vt2[i2 + j] : &vt->split[m].split[i].split[j]; 650 for (k = 0; k < 4; k++) 651 fill_variance_tree(&vtemp->split[k], BLOCK_8X8); 652 fill_variance_tree(vtemp, BLOCK_16X16); 653 // If variance of this 16x16 block is above the threshold, force block 654 // to split. This also forces a split on the upper levels. 655 get_variance(&vtemp->part_variances.none); 656 if (vtemp->part_variances.none.variance > thresholds[3]) { 657 force_split[split_index] = 1; 658 force_split[5 + m2 + i] = 1; 659 force_split[m + 1] = 1; 660 force_split[0] = 1; 661 } 662 } 663 } 664 fill_variance_tree(&vt->split[m].split[i], BLOCK_32X32); 665 // If variance of this 32x32 block is above the threshold, or if its above 666 // (some threshold of) the average variance over the sub-16x16 blocks, 667 // then force this block to split. This also forces a split on the upper 668 // (64x64) level. 669 if (!force_split[5 + m2 + i]) { 670 get_variance(&vt->split[m].split[i].part_variances.none); 671 var_32x32 = vt->split[m].split[i].part_variances.none.variance; 672 max_var_32x32 = AOMMAX(var_32x32, max_var_32x32); 673 min_var_32x32 = AOMMIN(var_32x32, min_var_32x32); 674 if (vt->split[m].split[i].part_variances.none.variance > 675 thresholds[2] || 676 (!is_key_frame && 677 vt->split[m].split[i].part_variances.none.variance > 678 (thresholds[2] >> 1) && 679 vt->split[m].split[i].part_variances.none.variance > 680 (avg_16x16[i] >> 1))) { 681 force_split[5 + m2 + i] = 1; 682 force_split[m + 1] = 1; 683 force_split[0] = 1; 684 } else if (!is_key_frame && cm->height <= 360 && 685 (maxvar_16x16[i] - minvar_16x16[i]) > (thresholds[2] >> 1) && 686 maxvar_16x16[i] > thresholds[2]) { 687 force_split[5 + m2 + i] = 1; 688 force_split[m + 1] = 1; 689 force_split[0] = 1; 690 } 691 avg_32x32 += var_32x32; 692 } 693 } 694 if (!force_split[1 + m]) { 695 fill_variance_tree(&vt->split[m], BLOCK_64X64); 696 get_variance(&vt->split[m].part_variances.none); 697 var_64x64 = vt->split[m].part_variances.none.variance; 698 max_var_64x64 = AOMMAX(var_64x64, max_var_64x64); 699 min_var_64x64 = AOMMIN(var_64x64, min_var_64x64); 700 // If variance of this 64x64 block is above (some threshold of) the 701 // average variance over the sub-32x32 blocks, then force this block to 702 // split. Only checking this for noise level >= medium for now. 703 704 if (!is_key_frame && 705 (max_var_32x32 - min_var_32x32) > 3 * (thresholds[1] >> 3) && 706 max_var_32x32 > thresholds[1] >> 1) 707 force_split[1 + m] = 1; 708 } 709 if (is_small_sb) force_split[0] = 1; 710 } 711 712 if (!force_split[0]) { 713 fill_variance_tree(vt, BLOCK_128X128); 714 get_variance(&vt->part_variances.none); 715 if (!is_key_frame && 716 (max_var_64x64 - min_var_64x64) > 3 * (thresholds[0] >> 3) && 717 max_var_64x64 > thresholds[0] >> 1) 718 force_split[0] = 1; 719 } 720 721 if (!set_vt_partitioning(cpi, x, xd, tile, vt, BLOCK_128X128, mi_row, mi_col, 722 thresholds[0], BLOCK_16X16, force_split[0])) { 723 for (m = 0; m < num_64x64_blocks; ++m) { 724 const int x64_idx = ((m & 1) << 4); 725 const int y64_idx = ((m >> 1) << 4); 726 const int m2 = m << 2; 727 728 // Now go through the entire structure, splitting every block size until 729 // we get to one that's got a variance lower than our threshold. 730 if (!set_vt_partitioning(cpi, x, xd, tile, &vt->split[m], BLOCK_64X64, 731 mi_row + y64_idx, mi_col + x64_idx, 732 thresholds[1], BLOCK_16X16, 733 force_split[1 + m])) { 734 for (i = 0; i < 4; ++i) { 735 const int x32_idx = ((i & 1) << 3); 736 const int y32_idx = ((i >> 1) << 3); 737 const int i2 = (m2 + i) << 2; 738 if (!set_vt_partitioning(cpi, x, xd, tile, &vt->split[m].split[i], 739 BLOCK_32X32, (mi_row + y64_idx + y32_idx), 740 (mi_col + x64_idx + x32_idx), thresholds[2], 741 BLOCK_16X16, force_split[5 + m2 + i])) { 742 for (j = 0; j < 4; ++j) { 743 const int x16_idx = ((j & 1) << 2); 744 const int y16_idx = ((j >> 1) << 2); 745 const int split_index = 21 + i2 + j; 746 // For inter frames: if variance4x4downsample[] == 1 for this 747 // 16x16 block, then the variance is based on 4x4 down-sampling, 748 // so use vt2 in set_vt_partioning(), otherwise use vt. 749 v16x16 *vtemp = 750 (!is_key_frame && variance4x4downsample[i2 + j] == 1) 751 ? &vt2[i2 + j] 752 : &vt->split[m].split[i].split[j]; 753 if (!set_vt_partitioning(cpi, x, xd, tile, vtemp, BLOCK_16X16, 754 mi_row + y64_idx + y32_idx + y16_idx, 755 mi_col + x64_idx + x32_idx + x16_idx, 756 thresholds[3], BLOCK_8X8, 757 force_split[split_index])) { 758 for (k = 0; k < 4; ++k) { 759 const int x8_idx = (k & 1) << 1; 760 const int y8_idx = (k >> 1) << 1; 761 set_block_size( 762 cpi, x, xd, 763 (mi_row + y64_idx + y32_idx + y16_idx + y8_idx), 764 (mi_col + x64_idx + x32_idx + x16_idx + x8_idx), 765 BLOCK_8X8); 766 } 767 } 768 } 769 } 770 } 771 } 772 } 773 } 774 775 if (vt2) aom_free(vt2); 776 if (vt) aom_free(vt); 777 return 0; 778 } 779