1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <limits.h> 12 #include <math.h> 13 #include <stdio.h> 14 15 #include "./vp9_rtcd.h" 16 #include "./vpx_dsp_rtcd.h" 17 #include "./vpx_config.h" 18 19 #include "vpx_dsp/vpx_dsp_common.h" 20 #include "vpx_ports/mem.h" 21 #include "vpx_ports/vpx_timer.h" 22 #include "vpx_ports/system_state.h" 23 24 #include "vp9/common/vp9_common.h" 25 #include "vp9/common/vp9_entropy.h" 26 #include "vp9/common/vp9_entropymode.h" 27 #include "vp9/common/vp9_idct.h" 28 #include "vp9/common/vp9_mvref_common.h" 29 #include "vp9/common/vp9_pred_common.h" 30 #include "vp9/common/vp9_quant_common.h" 31 #include "vp9/common/vp9_reconintra.h" 32 #include "vp9/common/vp9_reconinter.h" 33 #include "vp9/common/vp9_seg_common.h" 34 #include "vp9/common/vp9_tile_common.h" 35 36 #include "vp9/encoder/vp9_aq_360.h" 37 #include "vp9/encoder/vp9_aq_complexity.h" 38 #include "vp9/encoder/vp9_aq_cyclicrefresh.h" 39 #include "vp9/encoder/vp9_aq_variance.h" 40 #include "vp9/encoder/vp9_encodeframe.h" 41 #include "vp9/encoder/vp9_encodemb.h" 42 #include "vp9/encoder/vp9_encodemv.h" 43 #include "vp9/encoder/vp9_ethread.h" 44 #include "vp9/encoder/vp9_extend.h" 45 #include "vp9/encoder/vp9_multi_thread.h" 46 #include "vp9/encoder/vp9_partition_models.h" 47 #include "vp9/encoder/vp9_pickmode.h" 48 #include "vp9/encoder/vp9_rd.h" 49 #include "vp9/encoder/vp9_rdopt.h" 50 #include "vp9/encoder/vp9_segmentation.h" 51 #include "vp9/encoder/vp9_tokenize.h" 52 53 static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, 54 int output_enabled, int mi_row, int mi_col, 55 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx); 56 57 // This is used as a reference when computing the source variance for the 58 // purpose of activity masking. 59 // Eventually this should be replaced by custom no-reference routines, 60 // which will be faster. 61 static const uint8_t VP9_VAR_OFFS[64] = { 62 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 63 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 64 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 65 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 66 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 67 }; 68 69 #if CONFIG_VP9_HIGHBITDEPTH 70 static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = { 71 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 72 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 73 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 74 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 75 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 76 }; 77 78 static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = { 79 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 80 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 81 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 82 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 83 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 84 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 85 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 86 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4 87 }; 88 89 static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = { 90 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 91 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 92 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 93 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 94 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 95 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 96 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 97 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 98 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 99 128 * 16 100 }; 101 #endif // CONFIG_VP9_HIGHBITDEPTH 102 103 unsigned int vp9_get_sby_variance(VP9_COMP *cpi, const struct buf_2d *ref, 104 BLOCK_SIZE bs) { 105 unsigned int sse; 106 const unsigned int var = 107 cpi->fn_ptr[bs].vf(ref->buf, ref->stride, VP9_VAR_OFFS, 0, &sse); 108 return var; 109 } 110 111 #if CONFIG_VP9_HIGHBITDEPTH 112 unsigned int vp9_high_get_sby_variance(VP9_COMP *cpi, const struct buf_2d *ref, 113 BLOCK_SIZE bs, int bd) { 114 unsigned int var, sse; 115 switch (bd) { 116 case 10: 117 var = 118 cpi->fn_ptr[bs].vf(ref->buf, ref->stride, 119 CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10), 0, &sse); 120 break; 121 case 12: 122 var = 123 cpi->fn_ptr[bs].vf(ref->buf, ref->stride, 124 CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12), 0, &sse); 125 break; 126 case 8: 127 default: 128 var = 129 cpi->fn_ptr[bs].vf(ref->buf, ref->stride, 130 CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8), 0, &sse); 131 break; 132 } 133 return var; 134 } 135 #endif // CONFIG_VP9_HIGHBITDEPTH 136 137 unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi, 138 const struct buf_2d *ref, 139 BLOCK_SIZE bs) { 140 return ROUND_POWER_OF_TWO(vp9_get_sby_variance(cpi, ref, bs), 141 num_pels_log2_lookup[bs]); 142 } 143 144 #if CONFIG_VP9_HIGHBITDEPTH 145 unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi, 146 const struct buf_2d *ref, 147 BLOCK_SIZE bs, int bd) { 148 return (unsigned int)ROUND64_POWER_OF_TWO( 149 (int64_t)vp9_high_get_sby_variance(cpi, ref, bs, bd), 150 num_pels_log2_lookup[bs]); 151 } 152 #endif // CONFIG_VP9_HIGHBITDEPTH 153 154 static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi, 155 const struct buf_2d *ref, 156 int mi_row, int mi_col, 157 BLOCK_SIZE bs) { 158 unsigned int sse, var; 159 uint8_t *last_y; 160 const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME); 161 162 assert(last != NULL); 163 last_y = 164 &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE]; 165 var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse); 166 return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); 167 } 168 169 static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x, 170 int mi_row, int mi_col) { 171 unsigned int var = get_sby_perpixel_diff_variance( 172 cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64); 173 if (var < 8) 174 return BLOCK_64X64; 175 else if (var < 128) 176 return BLOCK_32X32; 177 else if (var < 2048) 178 return BLOCK_16X16; 179 else 180 return BLOCK_8X8; 181 } 182 183 static void set_segment_index(VP9_COMP *cpi, MACROBLOCK *const x, int mi_row, 184 int mi_col, BLOCK_SIZE bsize, int segment_index) { 185 VP9_COMMON *const cm = &cpi->common; 186 const struct segmentation *const seg = &cm->seg; 187 MACROBLOCKD *const xd = &x->e_mbd; 188 MODE_INFO *mi = xd->mi[0]; 189 190 const AQ_MODE aq_mode = cpi->oxcf.aq_mode; 191 const uint8_t *const map = 192 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; 193 194 // Initialize the segmentation index as 0. 195 mi->segment_id = 0; 196 197 // Skip the rest if AQ mode is disabled. 198 if (!seg->enabled) return; 199 200 switch (aq_mode) { 201 case CYCLIC_REFRESH_AQ: 202 mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); 203 break; 204 case VARIANCE_AQ: 205 if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame || 206 cpi->force_update_segmentation || 207 (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { 208 int min_energy; 209 int max_energy; 210 // Get sub block energy range 211 if (bsize >= BLOCK_32X32) { 212 vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy, 213 &max_energy); 214 } else { 215 min_energy = bsize <= BLOCK_16X16 ? x->mb_energy 216 : vp9_block_energy(cpi, x, bsize); 217 } 218 mi->segment_id = vp9_vaq_segment_id(min_energy); 219 } else { 220 mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); 221 } 222 break; 223 case LOOKAHEAD_AQ: 224 mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); 225 break; 226 case EQUATOR360_AQ: 227 if (cm->frame_type == KEY_FRAME || cpi->force_update_segmentation) 228 mi->segment_id = vp9_360aq_segment_id(mi_row, cm->mi_rows); 229 else 230 mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); 231 break; 232 case PSNR_AQ: mi->segment_id = segment_index; break; 233 default: 234 // NO_AQ or PSNR_AQ 235 break; 236 } 237 238 vp9_init_plane_quantizers(cpi, x); 239 } 240 241 // Lighter version of set_offsets that only sets the mode info 242 // pointers. 243 static INLINE void set_mode_info_offsets(VP9_COMMON *const cm, 244 MACROBLOCK *const x, 245 MACROBLOCKD *const xd, int mi_row, 246 int mi_col) { 247 const int idx_str = xd->mi_stride * mi_row + mi_col; 248 xd->mi = cm->mi_grid_visible + idx_str; 249 xd->mi[0] = cm->mi + idx_str; 250 x->mbmi_ext = x->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col); 251 } 252 253 static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, 254 MACROBLOCK *const x, int mi_row, int mi_col, 255 BLOCK_SIZE bsize) { 256 VP9_COMMON *const cm = &cpi->common; 257 MACROBLOCKD *const xd = &x->e_mbd; 258 const int mi_width = num_8x8_blocks_wide_lookup[bsize]; 259 const int mi_height = num_8x8_blocks_high_lookup[bsize]; 260 MvLimits *const mv_limits = &x->mv_limits; 261 262 set_skip_context(xd, mi_row, mi_col); 263 264 set_mode_info_offsets(cm, x, xd, mi_row, mi_col); 265 266 // Set up destination pointers. 267 vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); 268 269 // Set up limit values for MV components. 270 // Mv beyond the range do not produce new/different prediction block. 271 mv_limits->row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND); 272 mv_limits->col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND); 273 mv_limits->row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND; 274 mv_limits->col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND; 275 276 // Set up distance of MB to edge of frame in 1/8th pel units. 277 assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1))); 278 set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows, 279 cm->mi_cols); 280 281 // Set up source buffers. 282 vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); 283 284 // R/D setup. 285 x->rddiv = cpi->rd.RDDIV; 286 x->rdmult = cpi->rd.RDMULT; 287 288 // required by vp9_append_sub8x8_mvs_for_idx() and vp9_find_best_ref_mvs() 289 xd->tile = *tile; 290 } 291 292 static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd, 293 int mi_row, int mi_col, 294 BLOCK_SIZE bsize) { 295 const int block_width = 296 VPXMIN(num_8x8_blocks_wide_lookup[bsize], cm->mi_cols - mi_col); 297 const int block_height = 298 VPXMIN(num_8x8_blocks_high_lookup[bsize], cm->mi_rows - mi_row); 299 const int mi_stride = xd->mi_stride; 300 MODE_INFO *const src_mi = xd->mi[0]; 301 int i, j; 302 303 for (j = 0; j < block_height; ++j) 304 for (i = 0; i < block_width; ++i) xd->mi[j * mi_stride + i] = src_mi; 305 } 306 307 static void set_block_size(VP9_COMP *const cpi, MACROBLOCK *const x, 308 MACROBLOCKD *const xd, int mi_row, int mi_col, 309 BLOCK_SIZE bsize) { 310 if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) { 311 set_mode_info_offsets(&cpi->common, x, xd, mi_row, mi_col); 312 xd->mi[0]->sb_type = bsize; 313 } 314 } 315 316 typedef struct { 317 // This struct is used for computing variance in choose_partitioning(), where 318 // the max number of samples within a superblock is 16x16 (with 4x4 avg). Even 319 // in high bitdepth, uint32_t is enough for sum_square_error (2^12 * 2^12 * 16 320 // * 16 = 2^32). 321 uint32_t sum_square_error; 322 int32_t sum_error; 323 int log2_count; 324 int variance; 325 } var; 326 327 typedef struct { 328 var none; 329 var horz[2]; 330 var vert[2]; 331 } partition_variance; 332 333 typedef struct { 334 partition_variance part_variances; 335 var split[4]; 336 } v4x4; 337 338 typedef struct { 339 partition_variance part_variances; 340 v4x4 split[4]; 341 } v8x8; 342 343 typedef struct { 344 partition_variance part_variances; 345 v8x8 split[4]; 346 } v16x16; 347 348 typedef struct { 349 partition_variance part_variances; 350 v16x16 split[4]; 351 } v32x32; 352 353 typedef struct { 354 partition_variance part_variances; 355 v32x32 split[4]; 356 } v64x64; 357 358 typedef struct { 359 partition_variance *part_variances; 360 var *split[4]; 361 } variance_node; 362 363 typedef enum { 364 V16X16, 365 V32X32, 366 V64X64, 367 } TREE_LEVEL; 368 369 static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) { 370 int i; 371 node->part_variances = NULL; 372 switch (bsize) { 373 case BLOCK_64X64: { 374 v64x64 *vt = (v64x64 *)data; 375 node->part_variances = &vt->part_variances; 376 for (i = 0; i < 4; i++) 377 node->split[i] = &vt->split[i].part_variances.none; 378 break; 379 } 380 case BLOCK_32X32: { 381 v32x32 *vt = (v32x32 *)data; 382 node->part_variances = &vt->part_variances; 383 for (i = 0; i < 4; i++) 384 node->split[i] = &vt->split[i].part_variances.none; 385 break; 386 } 387 case BLOCK_16X16: { 388 v16x16 *vt = (v16x16 *)data; 389 node->part_variances = &vt->part_variances; 390 for (i = 0; i < 4; i++) 391 node->split[i] = &vt->split[i].part_variances.none; 392 break; 393 } 394 case BLOCK_8X8: { 395 v8x8 *vt = (v8x8 *)data; 396 node->part_variances = &vt->part_variances; 397 for (i = 0; i < 4; i++) 398 node->split[i] = &vt->split[i].part_variances.none; 399 break; 400 } 401 default: { 402 v4x4 *vt = (v4x4 *)data; 403 assert(bsize == BLOCK_4X4); 404 node->part_variances = &vt->part_variances; 405 for (i = 0; i < 4; i++) node->split[i] = &vt->split[i]; 406 break; 407 } 408 } 409 } 410 411 // Set variance values given sum square error, sum error, count. 412 static void fill_variance(uint32_t s2, int32_t s, int c, var *v) { 413 v->sum_square_error = s2; 414 v->sum_error = s; 415 v->log2_count = c; 416 } 417 418 static void get_variance(var *v) { 419 v->variance = 420 (int)(256 * (v->sum_square_error - 421 (uint32_t)(((int64_t)v->sum_error * v->sum_error) >> 422 v->log2_count)) >> 423 v->log2_count); 424 } 425 426 static void sum_2_variances(const var *a, const var *b, var *r) { 427 assert(a->log2_count == b->log2_count); 428 fill_variance(a->sum_square_error + b->sum_square_error, 429 a->sum_error + b->sum_error, a->log2_count + 1, r); 430 } 431 432 static void fill_variance_tree(void *data, BLOCK_SIZE bsize) { 433 variance_node node; 434 memset(&node, 0, sizeof(node)); 435 tree_to_node(data, bsize, &node); 436 sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]); 437 sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]); 438 sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]); 439 sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]); 440 sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1], 441 &node.part_variances->none); 442 } 443 444 static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x, 445 MACROBLOCKD *const xd, void *data, 446 BLOCK_SIZE bsize, int mi_row, int mi_col, 447 int64_t threshold, BLOCK_SIZE bsize_min, 448 int force_split) { 449 VP9_COMMON *const cm = &cpi->common; 450 variance_node vt; 451 const int block_width = num_8x8_blocks_wide_lookup[bsize]; 452 const int block_height = num_8x8_blocks_high_lookup[bsize]; 453 454 assert(block_height == block_width); 455 tree_to_node(data, bsize, &vt); 456 457 if (force_split == 1) return 0; 458 459 // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if 460 // variance is below threshold, otherwise split will be selected. 461 // No check for vert/horiz split as too few samples for variance. 462 if (bsize == bsize_min) { 463 // Variance already computed to set the force_split. 464 if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); 465 if (mi_col + block_width / 2 < cm->mi_cols && 466 mi_row + block_height / 2 < cm->mi_rows && 467 vt.part_variances->none.variance < threshold) { 468 set_block_size(cpi, x, xd, mi_row, mi_col, bsize); 469 return 1; 470 } 471 return 0; 472 } else if (bsize > bsize_min) { 473 // Variance already computed to set the force_split. 474 if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); 475 // For key frame: take split for bsize above 32X32 or very high variance. 476 if (frame_is_intra_only(cm) && 477 (bsize > BLOCK_32X32 || 478 vt.part_variances->none.variance > (threshold << 4))) { 479 return 0; 480 } 481 // If variance is low, take the bsize (no split). 482 if (mi_col + block_width / 2 < cm->mi_cols && 483 mi_row + block_height / 2 < cm->mi_rows && 484 vt.part_variances->none.variance < threshold) { 485 set_block_size(cpi, x, xd, mi_row, mi_col, bsize); 486 return 1; 487 } 488 489 // Check vertical split. 490 if (mi_row + block_height / 2 < cm->mi_rows) { 491 BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT); 492 get_variance(&vt.part_variances->vert[0]); 493 get_variance(&vt.part_variances->vert[1]); 494 if (vt.part_variances->vert[0].variance < threshold && 495 vt.part_variances->vert[1].variance < threshold && 496 get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) { 497 set_block_size(cpi, x, xd, mi_row, mi_col, subsize); 498 set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize); 499 return 1; 500 } 501 } 502 // Check horizontal split. 503 if (mi_col + block_width / 2 < cm->mi_cols) { 504 BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ); 505 get_variance(&vt.part_variances->horz[0]); 506 get_variance(&vt.part_variances->horz[1]); 507 if (vt.part_variances->horz[0].variance < threshold && 508 vt.part_variances->horz[1].variance < threshold && 509 get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) { 510 set_block_size(cpi, x, xd, mi_row, mi_col, subsize); 511 set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize); 512 return 1; 513 } 514 } 515 516 return 0; 517 } 518 return 0; 519 } 520 521 static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed, 522 int width, int height, 523 int content_state) { 524 if (speed >= 8) { 525 if (width <= 640 && height <= 480) 526 return (5 * threshold_base) >> 2; 527 else if ((content_state == kLowSadLowSumdiff) || 528 (content_state == kHighSadLowSumdiff) || 529 (content_state == kLowVarHighSumdiff)) 530 return (5 * threshold_base) >> 2; 531 } else if (speed == 7) { 532 if ((content_state == kLowSadLowSumdiff) || 533 (content_state == kHighSadLowSumdiff) || 534 (content_state == kLowVarHighSumdiff)) { 535 return (5 * threshold_base) >> 2; 536 } 537 } 538 return threshold_base; 539 } 540 541 // Set the variance split thresholds for following the block sizes: 542 // 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16, 543 // 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is 544 // currently only used on key frame. 545 static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q, 546 int content_state) { 547 VP9_COMMON *const cm = &cpi->common; 548 const int is_key_frame = frame_is_intra_only(cm); 549 const int threshold_multiplier = is_key_frame ? 20 : 1; 550 int64_t threshold_base = 551 (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]); 552 553 if (is_key_frame) { 554 thresholds[0] = threshold_base; 555 thresholds[1] = threshold_base >> 2; 556 thresholds[2] = threshold_base >> 2; 557 thresholds[3] = threshold_base << 2; 558 } else { 559 // Increase base variance threshold based on estimated noise level. 560 if (cpi->noise_estimate.enabled && cm->width >= 640 && cm->height >= 480) { 561 NOISE_LEVEL noise_level = 562 vp9_noise_estimate_extract_level(&cpi->noise_estimate); 563 if (noise_level == kHigh) 564 threshold_base = 3 * threshold_base; 565 else if (noise_level == kMedium) 566 threshold_base = threshold_base << 1; 567 else if (noise_level < kLow) 568 threshold_base = (7 * threshold_base) >> 3; 569 } 570 #if CONFIG_VP9_TEMPORAL_DENOISING 571 if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && 572 cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow) 573 threshold_base = 574 vp9_scale_part_thresh(threshold_base, cpi->denoiser.denoising_level, 575 content_state, cpi->svc.temporal_layer_id); 576 else 577 threshold_base = 578 scale_part_thresh_sumdiff(threshold_base, cpi->oxcf.speed, cm->width, 579 cm->height, content_state); 580 #else 581 // Increase base variance threshold based on content_state/sum_diff level. 582 threshold_base = scale_part_thresh_sumdiff( 583 threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state); 584 #endif 585 thresholds[0] = threshold_base; 586 thresholds[2] = threshold_base << cpi->oxcf.speed; 587 if (cm->width >= 1280 && cm->height >= 720 && cpi->oxcf.speed < 7) 588 thresholds[2] = thresholds[2] << 1; 589 if (cm->width <= 352 && cm->height <= 288) { 590 thresholds[0] = threshold_base >> 3; 591 thresholds[1] = threshold_base >> 1; 592 thresholds[2] = threshold_base << 3; 593 } else if (cm->width < 1280 && cm->height < 720) { 594 thresholds[1] = (5 * threshold_base) >> 2; 595 } else if (cm->width < 1920 && cm->height < 1080) { 596 thresholds[1] = threshold_base << 1; 597 } else { 598 thresholds[1] = (5 * threshold_base) >> 1; 599 } 600 if (cpi->sf.disable_16x16part_nonkey) thresholds[2] = INT64_MAX; 601 } 602 } 603 604 void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q, 605 int content_state) { 606 VP9_COMMON *const cm = &cpi->common; 607 SPEED_FEATURES *const sf = &cpi->sf; 608 const int is_key_frame = frame_is_intra_only(cm); 609 if (sf->partition_search_type != VAR_BASED_PARTITION && 610 sf->partition_search_type != REFERENCE_PARTITION) { 611 return; 612 } else { 613 set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state); 614 // The thresholds below are not changed locally. 615 if (is_key_frame) { 616 cpi->vbp_threshold_sad = 0; 617 cpi->vbp_threshold_copy = 0; 618 cpi->vbp_bsize_min = BLOCK_8X8; 619 } else { 620 if (cm->width <= 352 && cm->height <= 288) 621 cpi->vbp_threshold_sad = 10; 622 else 623 cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000 624 ? (cpi->y_dequant[q][1] << 1) 625 : 1000; 626 cpi->vbp_bsize_min = BLOCK_16X16; 627 if (cm->width <= 352 && cm->height <= 288) 628 cpi->vbp_threshold_copy = 4000; 629 else if (cm->width <= 640 && cm->height <= 360) 630 cpi->vbp_threshold_copy = 8000; 631 else 632 cpi->vbp_threshold_copy = (cpi->y_dequant[q][1] << 3) > 8000 633 ? (cpi->y_dequant[q][1] << 3) 634 : 8000; 635 if (cpi->rc.high_source_sad || 636 (cpi->use_svc && cpi->svc.high_source_sad_superframe)) { 637 cpi->vbp_threshold_sad = 0; 638 cpi->vbp_threshold_copy = 0; 639 } 640 } 641 cpi->vbp_threshold_minmax = 15 + (q >> 3); 642 } 643 } 644 645 // Compute the minmax over the 8x8 subblocks. 646 static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d, 647 int dp, int x16_idx, int y16_idx, 648 #if CONFIG_VP9_HIGHBITDEPTH 649 int highbd_flag, 650 #endif 651 int pixels_wide, int pixels_high) { 652 int k; 653 int minmax_max = 0; 654 int minmax_min = 255; 655 // Loop over the 4 8x8 subblocks. 656 for (k = 0; k < 4; k++) { 657 int x8_idx = x16_idx + ((k & 1) << 3); 658 int y8_idx = y16_idx + ((k >> 1) << 3); 659 int min = 0; 660 int max = 0; 661 if (x8_idx < pixels_wide && y8_idx < pixels_high) { 662 #if CONFIG_VP9_HIGHBITDEPTH 663 if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { 664 vpx_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp, 665 d + y8_idx * dp + x8_idx, dp, &min, &max); 666 } else { 667 vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, 668 dp, &min, &max); 669 } 670 #else 671 vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp, 672 &min, &max); 673 #endif 674 if ((max - min) > minmax_max) minmax_max = (max - min); 675 if ((max - min) < minmax_min) minmax_min = (max - min); 676 } 677 } 678 return (minmax_max - minmax_min); 679 } 680 681 static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d, 682 int dp, int x8_idx, int y8_idx, v8x8 *vst, 683 #if CONFIG_VP9_HIGHBITDEPTH 684 int highbd_flag, 685 #endif 686 int pixels_wide, int pixels_high, 687 int is_key_frame) { 688 int k; 689 for (k = 0; k < 4; k++) { 690 int x4_idx = x8_idx + ((k & 1) << 2); 691 int y4_idx = y8_idx + ((k >> 1) << 2); 692 unsigned int sse = 0; 693 int sum = 0; 694 if (x4_idx < pixels_wide && y4_idx < pixels_high) { 695 int s_avg; 696 int d_avg = 128; 697 #if CONFIG_VP9_HIGHBITDEPTH 698 if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { 699 s_avg = vpx_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp); 700 if (!is_key_frame) 701 d_avg = vpx_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp); 702 } else { 703 s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp); 704 if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp); 705 } 706 #else 707 s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp); 708 if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp); 709 #endif 710 sum = s_avg - d_avg; 711 sse = sum * sum; 712 } 713 fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); 714 } 715 } 716 717 static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d, 718 int dp, int x16_idx, int y16_idx, v16x16 *vst, 719 #if CONFIG_VP9_HIGHBITDEPTH 720 int highbd_flag, 721 #endif 722 int pixels_wide, int pixels_high, 723 int is_key_frame) { 724 int k; 725 for (k = 0; k < 4; k++) { 726 int x8_idx = x16_idx + ((k & 1) << 3); 727 int y8_idx = y16_idx + ((k >> 1) << 3); 728 unsigned int sse = 0; 729 int sum = 0; 730 if (x8_idx < pixels_wide && y8_idx < pixels_high) { 731 int s_avg; 732 int d_avg = 128; 733 #if CONFIG_VP9_HIGHBITDEPTH 734 if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { 735 s_avg = vpx_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp); 736 if (!is_key_frame) 737 d_avg = vpx_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp); 738 } else { 739 s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp); 740 if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp); 741 } 742 #else 743 s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp); 744 if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp); 745 #endif 746 sum = s_avg - d_avg; 747 sse = sum * sum; 748 } 749 fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); 750 } 751 } 752 753 // Check if most of the superblock is skin content, and if so, force split to 754 // 32x32, and set x->sb_is_skin for use in mode selection. 755 static int skin_sb_split(VP9_COMP *cpi, MACROBLOCK *x, const int low_res, 756 int mi_row, int mi_col, int *force_split) { 757 VP9_COMMON *const cm = &cpi->common; 758 #if CONFIG_VP9_HIGHBITDEPTH 759 if (cm->use_highbitdepth) return 0; 760 #endif 761 // Avoid checking superblocks on/near boundary and avoid low resolutions. 762 // Note superblock may still pick 64X64 if y_sad is very small 763 // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is. 764 if (!low_res && (mi_col >= 8 && mi_col + 8 < cm->mi_cols && mi_row >= 8 && 765 mi_row + 8 < cm->mi_rows)) { 766 int num_16x16_skin = 0; 767 int num_16x16_nonskin = 0; 768 uint8_t *ysignal = x->plane[0].src.buf; 769 uint8_t *usignal = x->plane[1].src.buf; 770 uint8_t *vsignal = x->plane[2].src.buf; 771 int sp = x->plane[0].src.stride; 772 int spuv = x->plane[1].src.stride; 773 const int block_index = mi_row * cm->mi_cols + mi_col; 774 const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; 775 const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; 776 const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); 777 const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); 778 // Loop through the 16x16 sub-blocks. 779 int i, j; 780 for (i = 0; i < ymis; i += 2) { 781 for (j = 0; j < xmis; j += 2) { 782 int bl_index = block_index + i * cm->mi_cols + j; 783 int is_skin = cpi->skin_map[bl_index]; 784 num_16x16_skin += is_skin; 785 num_16x16_nonskin += (1 - is_skin); 786 if (num_16x16_nonskin > 3) { 787 // Exit loop if at least 4 of the 16x16 blocks are not skin. 788 i = ymis; 789 break; 790 } 791 ysignal += 16; 792 usignal += 8; 793 vsignal += 8; 794 } 795 ysignal += (sp << 4) - 64; 796 usignal += (spuv << 3) - 32; 797 vsignal += (spuv << 3) - 32; 798 } 799 if (num_16x16_skin > 12) { 800 *force_split = 1; 801 return 1; 802 } 803 } 804 return 0; 805 } 806 807 static void set_low_temp_var_flag(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, 808 v64x64 *vt, int64_t thresholds[], 809 MV_REFERENCE_FRAME ref_frame_partition, 810 int mi_col, int mi_row) { 811 int i, j; 812 VP9_COMMON *const cm = &cpi->common; 813 const int mv_thr = cm->width > 640 ? 8 : 4; 814 // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected and 815 // int_pro mv is small. If the temporal variance is small set the flag 816 // variance_low for the block. The variance threshold can be adjusted, the 817 // higher the more aggressive. 818 if (ref_frame_partition == LAST_FRAME && 819 (cpi->sf.short_circuit_low_temp_var == 1 || 820 (xd->mi[0]->mv[0].as_mv.col < mv_thr && 821 xd->mi[0]->mv[0].as_mv.col > -mv_thr && 822 xd->mi[0]->mv[0].as_mv.row < mv_thr && 823 xd->mi[0]->mv[0].as_mv.row > -mv_thr))) { 824 if (xd->mi[0]->sb_type == BLOCK_64X64) { 825 if ((vt->part_variances).none.variance < (thresholds[0] >> 1)) 826 x->variance_low[0] = 1; 827 } else if (xd->mi[0]->sb_type == BLOCK_64X32) { 828 for (i = 0; i < 2; i++) { 829 if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2)) 830 x->variance_low[i + 1] = 1; 831 } 832 } else if (xd->mi[0]->sb_type == BLOCK_32X64) { 833 for (i = 0; i < 2; i++) { 834 if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2)) 835 x->variance_low[i + 3] = 1; 836 } 837 } else { 838 for (i = 0; i < 4; i++) { 839 const int idx[4][2] = { { 0, 0 }, { 0, 4 }, { 4, 0 }, { 4, 4 } }; 840 const int idx_str = 841 cm->mi_stride * (mi_row + idx[i][0]) + mi_col + idx[i][1]; 842 MODE_INFO **this_mi = cm->mi_grid_visible + idx_str; 843 844 if (cm->mi_cols <= mi_col + idx[i][1] || 845 cm->mi_rows <= mi_row + idx[i][0]) 846 continue; 847 848 if ((*this_mi)->sb_type == BLOCK_32X32) { 849 int64_t threshold_32x32 = (cpi->sf.short_circuit_low_temp_var == 1 || 850 cpi->sf.short_circuit_low_temp_var == 3) 851 ? ((5 * thresholds[1]) >> 3) 852 : (thresholds[1] >> 1); 853 if (vt->split[i].part_variances.none.variance < threshold_32x32) 854 x->variance_low[i + 5] = 1; 855 } else if (cpi->sf.short_circuit_low_temp_var >= 2) { 856 // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block 857 // inside. 858 if ((*this_mi)->sb_type == BLOCK_16X16 || 859 (*this_mi)->sb_type == BLOCK_32X16 || 860 (*this_mi)->sb_type == BLOCK_16X32) { 861 for (j = 0; j < 4; j++) { 862 if (vt->split[i].split[j].part_variances.none.variance < 863 (thresholds[2] >> 8)) 864 x->variance_low[(i << 2) + j + 9] = 1; 865 } 866 } 867 } 868 } 869 } 870 } 871 } 872 873 static void copy_partitioning_helper(VP9_COMP *cpi, MACROBLOCK *x, 874 MACROBLOCKD *xd, BLOCK_SIZE bsize, 875 int mi_row, int mi_col) { 876 VP9_COMMON *const cm = &cpi->common; 877 BLOCK_SIZE *prev_part = cpi->prev_partition; 878 int start_pos = mi_row * cm->mi_stride + mi_col; 879 880 const int bsl = b_width_log2_lookup[bsize]; 881 const int bs = (1 << bsl) >> 2; 882 BLOCK_SIZE subsize; 883 PARTITION_TYPE partition; 884 885 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 886 887 partition = partition_lookup[bsl][prev_part[start_pos]]; 888 subsize = get_subsize(bsize, partition); 889 890 if (subsize < BLOCK_8X8) { 891 set_block_size(cpi, x, xd, mi_row, mi_col, bsize); 892 } else { 893 switch (partition) { 894 case PARTITION_NONE: 895 set_block_size(cpi, x, xd, mi_row, mi_col, bsize); 896 break; 897 case PARTITION_HORZ: 898 set_block_size(cpi, x, xd, mi_row, mi_col, subsize); 899 set_block_size(cpi, x, xd, mi_row + bs, mi_col, subsize); 900 break; 901 case PARTITION_VERT: 902 set_block_size(cpi, x, xd, mi_row, mi_col, subsize); 903 set_block_size(cpi, x, xd, mi_row, mi_col + bs, subsize); 904 break; 905 default: 906 assert(partition == PARTITION_SPLIT); 907 copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col); 908 copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col); 909 copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col + bs); 910 copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col + bs); 911 break; 912 } 913 } 914 } 915 916 static int copy_partitioning(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, 917 int mi_row, int mi_col, int segment_id, 918 int sb_offset) { 919 int svc_copy_allowed = 1; 920 int frames_since_key_thresh = 1; 921 if (cpi->use_svc) { 922 // For SVC, don't allow copy if base spatial layer is key frame, or if 923 // frame is not a temporal enhancement layer frame. 924 int layer = LAYER_IDS_TO_IDX(0, cpi->svc.temporal_layer_id, 925 cpi->svc.number_temporal_layers); 926 const LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; 927 if (lc->is_key_frame || !cpi->svc.non_reference_frame) svc_copy_allowed = 0; 928 frames_since_key_thresh = cpi->svc.number_spatial_layers << 1; 929 } 930 if (cpi->rc.frames_since_key > frames_since_key_thresh && svc_copy_allowed && 931 !cpi->resize_pending && segment_id == CR_SEGMENT_ID_BASE && 932 cpi->prev_segment_id[sb_offset] == CR_SEGMENT_ID_BASE && 933 cpi->copied_frame_cnt[sb_offset] < cpi->max_copied_frame) { 934 if (cpi->prev_partition != NULL) { 935 copy_partitioning_helper(cpi, x, xd, BLOCK_64X64, mi_row, mi_col); 936 cpi->copied_frame_cnt[sb_offset] += 1; 937 memcpy(x->variance_low, &(cpi->prev_variance_low[sb_offset * 25]), 938 sizeof(x->variance_low)); 939 return 1; 940 } 941 } 942 943 return 0; 944 } 945 946 static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, 947 BLOCK_SIZE bsize, int mi_row, int mi_col, 948 int mi_row_high, int mi_col_high) { 949 VP9_COMMON *const cm = &cpi->common; 950 SVC *const svc = &cpi->svc; 951 BLOCK_SIZE *prev_part = svc->prev_partition_svc; 952 // Variables with _high are for higher resolution. 953 int bsize_high = 0; 954 int subsize_high = 0; 955 const int bsl_high = b_width_log2_lookup[bsize]; 956 const int bs_high = (1 << bsl_high) >> 2; 957 const int has_rows = (mi_row_high + bs_high) < cm->mi_rows; 958 const int has_cols = (mi_col_high + bs_high) < cm->mi_cols; 959 960 const int row_boundary_block_scale_factor[BLOCK_SIZES] = { 961 13, 13, 13, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0 962 }; 963 const int col_boundary_block_scale_factor[BLOCK_SIZES] = { 964 13, 13, 13, 2, 2, 0, 2, 2, 0, 2, 2, 0, 0 965 }; 966 int start_pos; 967 BLOCK_SIZE bsize_low; 968 PARTITION_TYPE partition_high; 969 970 if (mi_row_high >= cm->mi_rows || mi_col_high >= cm->mi_cols) return 0; 971 if (mi_row >= svc->mi_rows[svc->spatial_layer_id - 1] || 972 mi_col >= svc->mi_cols[svc->spatial_layer_id - 1]) 973 return 0; 974 975 // Find corresponding (mi_col/mi_row) block down-scaled by 2x2. 976 start_pos = mi_row * (svc->mi_stride[svc->spatial_layer_id - 1]) + mi_col; 977 bsize_low = prev_part[start_pos]; 978 // The block size is too big for boundaries. Do variance based partitioning. 979 if ((!has_rows || !has_cols) && bsize_low > BLOCK_16X16) return 1; 980 981 // For reference frames: return 1 (do variance-based partitioning) if the 982 // superblock is not low source sad and lower-resoln bsize is below 32x32. 983 if (!cpi->svc.non_reference_frame && !x->skip_low_source_sad && 984 bsize_low < BLOCK_32X32) 985 return 1; 986 987 // Scale up block size by 2x2. Force 64x64 for size larger than 32x32. 988 if (bsize_low < BLOCK_32X32) { 989 bsize_high = bsize_low + 3; 990 } else if (bsize_low >= BLOCK_32X32) { 991 bsize_high = BLOCK_64X64; 992 } 993 // Scale up blocks on boundary. 994 if (!has_cols && has_rows) { 995 bsize_high = bsize_low + row_boundary_block_scale_factor[bsize_low]; 996 } else if (has_cols && !has_rows) { 997 bsize_high = bsize_low + col_boundary_block_scale_factor[bsize_low]; 998 } else if (!has_cols && !has_rows) { 999 bsize_high = bsize_low; 1000 } 1001 1002 partition_high = partition_lookup[bsl_high][bsize_high]; 1003 subsize_high = get_subsize(bsize, partition_high); 1004 1005 if (subsize_high < BLOCK_8X8) { 1006 set_block_size(cpi, x, xd, mi_row_high, mi_col_high, bsize_high); 1007 } else { 1008 const int bsl = b_width_log2_lookup[bsize]; 1009 const int bs = (1 << bsl) >> 2; 1010 switch (partition_high) { 1011 case PARTITION_NONE: 1012 set_block_size(cpi, x, xd, mi_row_high, mi_col_high, bsize_high); 1013 break; 1014 case PARTITION_HORZ: 1015 set_block_size(cpi, x, xd, mi_row_high, mi_col_high, subsize_high); 1016 if (subsize_high < BLOCK_64X64) 1017 set_block_size(cpi, x, xd, mi_row_high + bs_high, mi_col_high, 1018 subsize_high); 1019 break; 1020 case PARTITION_VERT: 1021 set_block_size(cpi, x, xd, mi_row_high, mi_col_high, subsize_high); 1022 if (subsize_high < BLOCK_64X64) 1023 set_block_size(cpi, x, xd, mi_row_high, mi_col_high + bs_high, 1024 subsize_high); 1025 break; 1026 default: 1027 assert(partition_high == PARTITION_SPLIT); 1028 if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row, mi_col, 1029 mi_row_high, mi_col_high)) 1030 return 1; 1031 if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row + (bs >> 1), 1032 mi_col, mi_row_high + bs_high, mi_col_high)) 1033 return 1; 1034 if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row, 1035 mi_col + (bs >> 1), mi_row_high, 1036 mi_col_high + bs_high)) 1037 return 1; 1038 if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row + (bs >> 1), 1039 mi_col + (bs >> 1), mi_row_high + bs_high, 1040 mi_col_high + bs_high)) 1041 return 1; 1042 break; 1043 } 1044 } 1045 1046 return 0; 1047 } 1048 1049 static void update_partition_svc(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, 1050 int mi_col) { 1051 VP9_COMMON *const cm = &cpi->common; 1052 BLOCK_SIZE *prev_part = cpi->svc.prev_partition_svc; 1053 int start_pos = mi_row * cm->mi_stride + mi_col; 1054 const int bsl = b_width_log2_lookup[bsize]; 1055 const int bs = (1 << bsl) >> 2; 1056 BLOCK_SIZE subsize; 1057 PARTITION_TYPE partition; 1058 const MODE_INFO *mi = NULL; 1059 int xx, yy; 1060 1061 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 1062 1063 mi = cm->mi_grid_visible[start_pos]; 1064 partition = partition_lookup[bsl][mi->sb_type]; 1065 subsize = get_subsize(bsize, partition); 1066 if (subsize < BLOCK_8X8) { 1067 prev_part[start_pos] = bsize; 1068 } else { 1069 switch (partition) { 1070 case PARTITION_NONE: 1071 prev_part[start_pos] = bsize; 1072 if (bsize == BLOCK_64X64) { 1073 for (xx = 0; xx < 8; xx += 4) 1074 for (yy = 0; yy < 8; yy += 4) { 1075 if ((mi_row + xx < cm->mi_rows) && (mi_col + yy < cm->mi_cols)) 1076 prev_part[start_pos + xx * cm->mi_stride + yy] = bsize; 1077 } 1078 } 1079 break; 1080 case PARTITION_HORZ: 1081 prev_part[start_pos] = subsize; 1082 if (mi_row + bs < cm->mi_rows) 1083 prev_part[start_pos + bs * cm->mi_stride] = subsize; 1084 break; 1085 case PARTITION_VERT: 1086 prev_part[start_pos] = subsize; 1087 if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize; 1088 break; 1089 default: 1090 assert(partition == PARTITION_SPLIT); 1091 update_partition_svc(cpi, subsize, mi_row, mi_col); 1092 update_partition_svc(cpi, subsize, mi_row + bs, mi_col); 1093 update_partition_svc(cpi, subsize, mi_row, mi_col + bs); 1094 update_partition_svc(cpi, subsize, mi_row + bs, mi_col + bs); 1095 break; 1096 } 1097 } 1098 } 1099 1100 static void update_prev_partition_helper(VP9_COMP *cpi, BLOCK_SIZE bsize, 1101 int mi_row, int mi_col) { 1102 VP9_COMMON *const cm = &cpi->common; 1103 BLOCK_SIZE *prev_part = cpi->prev_partition; 1104 int start_pos = mi_row * cm->mi_stride + mi_col; 1105 const int bsl = b_width_log2_lookup[bsize]; 1106 const int bs = (1 << bsl) >> 2; 1107 BLOCK_SIZE subsize; 1108 PARTITION_TYPE partition; 1109 const MODE_INFO *mi = NULL; 1110 1111 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 1112 1113 mi = cm->mi_grid_visible[start_pos]; 1114 partition = partition_lookup[bsl][mi->sb_type]; 1115 subsize = get_subsize(bsize, partition); 1116 if (subsize < BLOCK_8X8) { 1117 prev_part[start_pos] = bsize; 1118 } else { 1119 switch (partition) { 1120 case PARTITION_NONE: prev_part[start_pos] = bsize; break; 1121 case PARTITION_HORZ: 1122 prev_part[start_pos] = subsize; 1123 if (mi_row + bs < cm->mi_rows) 1124 prev_part[start_pos + bs * cm->mi_stride] = subsize; 1125 break; 1126 case PARTITION_VERT: 1127 prev_part[start_pos] = subsize; 1128 if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize; 1129 break; 1130 default: 1131 assert(partition == PARTITION_SPLIT); 1132 update_prev_partition_helper(cpi, subsize, mi_row, mi_col); 1133 update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col); 1134 update_prev_partition_helper(cpi, subsize, mi_row, mi_col + bs); 1135 update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col + bs); 1136 break; 1137 } 1138 } 1139 } 1140 1141 static void update_prev_partition(VP9_COMP *cpi, MACROBLOCK *x, int segment_id, 1142 int mi_row, int mi_col, int sb_offset) { 1143 update_prev_partition_helper(cpi, BLOCK_64X64, mi_row, mi_col); 1144 cpi->prev_segment_id[sb_offset] = segment_id; 1145 memcpy(&(cpi->prev_variance_low[sb_offset * 25]), x->variance_low, 1146 sizeof(x->variance_low)); 1147 // Reset the counter for copy partitioning 1148 cpi->copied_frame_cnt[sb_offset] = 0; 1149 } 1150 1151 static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize, 1152 unsigned int y_sad, int is_key_frame) { 1153 int i; 1154 MACROBLOCKD *xd = &x->e_mbd; 1155 1156 if (is_key_frame) return; 1157 1158 // For speed >= 8, avoid the chroma check if y_sad is above threshold. 1159 if (cpi->oxcf.speed >= 8) { 1160 if (y_sad > cpi->vbp_thresholds[1] && 1161 (!cpi->noise_estimate.enabled || 1162 vp9_noise_estimate_extract_level(&cpi->noise_estimate) < kMedium)) 1163 return; 1164 } 1165 1166 for (i = 1; i <= 2; ++i) { 1167 unsigned int uv_sad = UINT_MAX; 1168 struct macroblock_plane *p = &x->plane[i]; 1169 struct macroblockd_plane *pd = &xd->plane[i]; 1170 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); 1171 1172 if (bs != BLOCK_INVALID) 1173 uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, pd->dst.buf, 1174 pd->dst.stride); 1175 1176 // TODO(marpan): Investigate if we should lower this threshold if 1177 // superblock is detected as skin. 1178 x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2); 1179 } 1180 } 1181 1182 static uint64_t avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift, 1183 int sb_offset) { 1184 unsigned int tmp_sse; 1185 uint64_t tmp_sad; 1186 unsigned int tmp_variance; 1187 const BLOCK_SIZE bsize = BLOCK_64X64; 1188 uint8_t *src_y = cpi->Source->y_buffer; 1189 int src_ystride = cpi->Source->y_stride; 1190 uint8_t *last_src_y = cpi->Last_Source->y_buffer; 1191 int last_src_ystride = cpi->Last_Source->y_stride; 1192 uint64_t avg_source_sad_threshold = 10000; 1193 uint64_t avg_source_sad_threshold2 = 12000; 1194 #if CONFIG_VP9_HIGHBITDEPTH 1195 if (cpi->common.use_highbitdepth) return 0; 1196 #endif 1197 src_y += shift; 1198 last_src_y += shift; 1199 tmp_sad = 1200 cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y, last_src_ystride); 1201 tmp_variance = vpx_variance64x64(src_y, src_ystride, last_src_y, 1202 last_src_ystride, &tmp_sse); 1203 // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12) 1204 if (tmp_sad < avg_source_sad_threshold) 1205 x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kLowSadLowSumdiff 1206 : kLowSadHighSumdiff; 1207 else 1208 x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kHighSadLowSumdiff 1209 : kHighSadHighSumdiff; 1210 1211 // Detect large lighting change. 1212 if (cpi->oxcf.content != VP9E_CONTENT_SCREEN && 1213 cpi->oxcf.rc_mode == VPX_CBR && tmp_variance < (tmp_sse >> 3) && 1214 (tmp_sse - tmp_variance) > 10000) 1215 x->content_state_sb = kLowVarHighSumdiff; 1216 else if (tmp_sad > (avg_source_sad_threshold << 1)) 1217 x->content_state_sb = kVeryHighSad; 1218 1219 if (cpi->content_state_sb_fd != NULL) { 1220 if (tmp_sad < avg_source_sad_threshold2) { 1221 // Cap the increment to 255. 1222 if (cpi->content_state_sb_fd[sb_offset] < 255) 1223 cpi->content_state_sb_fd[sb_offset]++; 1224 } else { 1225 cpi->content_state_sb_fd[sb_offset] = 0; 1226 } 1227 } 1228 if (tmp_sad == 0) x->zero_temp_sad_source = 1; 1229 return tmp_sad; 1230 } 1231 1232 // This function chooses partitioning based on the variance between source and 1233 // reconstructed last, where variance is computed for down-sampled inputs. 1234 static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, 1235 MACROBLOCK *x, int mi_row, int mi_col) { 1236 VP9_COMMON *const cm = &cpi->common; 1237 MACROBLOCKD *xd = &x->e_mbd; 1238 int i, j, k, m; 1239 v64x64 vt; 1240 v16x16 *vt2 = NULL; 1241 int force_split[21]; 1242 int avg_32x32; 1243 int max_var_32x32 = 0; 1244 int min_var_32x32 = INT_MAX; 1245 int var_32x32; 1246 int avg_16x16[4]; 1247 int maxvar_16x16[4]; 1248 int minvar_16x16[4]; 1249 int64_t threshold_4x4avg; 1250 NOISE_LEVEL noise_level = kLow; 1251 int content_state = 0; 1252 uint8_t *s; 1253 const uint8_t *d; 1254 int sp; 1255 int dp; 1256 int compute_minmax_variance = 1; 1257 unsigned int y_sad = UINT_MAX; 1258 BLOCK_SIZE bsize = BLOCK_64X64; 1259 // Ref frame used in partitioning. 1260 MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME; 1261 int pixels_wide = 64, pixels_high = 64; 1262 int64_t thresholds[4] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], 1263 cpi->vbp_thresholds[2], cpi->vbp_thresholds[3] }; 1264 int scene_change_detected = 1265 cpi->rc.high_source_sad || 1266 (cpi->use_svc && cpi->svc.high_source_sad_superframe); 1267 1268 // For the variance computation under SVC mode, we treat the frame as key if 1269 // the reference (base layer frame) is key frame (i.e., is_key_frame == 1). 1270 int is_key_frame = 1271 (frame_is_intra_only(cm) || 1272 (is_one_pass_cbr_svc(cpi) && 1273 cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)); 1274 // Always use 4x4 partition for key frame. 1275 const int use_4x4_partition = frame_is_intra_only(cm); 1276 const int low_res = (cm->width <= 352 && cm->height <= 288); 1277 int variance4x4downsample[16]; 1278 int segment_id; 1279 int sb_offset = (cm->mi_stride >> 3) * (mi_row >> 3) + (mi_col >> 3); 1280 1281 // For SVC: check if LAST frame is NULL or if the resolution of LAST is 1282 // different than the current frame resolution, and if so, treat this frame 1283 // as a key frame, for the purpose of the superblock partitioning. 1284 // LAST == NULL can happen in some cases where enhancement spatial layers are 1285 // enabled dyanmically in the stream and the only reference is the spatial 1286 // reference (GOLDEN). 1287 if (cpi->use_svc) { 1288 const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, LAST_FRAME); 1289 if (ref == NULL || ref->y_crop_height != cm->height || 1290 ref->y_crop_width != cm->width) 1291 is_key_frame = 1; 1292 } 1293 1294 set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); 1295 set_segment_index(cpi, x, mi_row, mi_col, BLOCK_64X64, 0); 1296 segment_id = xd->mi[0]->segment_id; 1297 1298 if (cpi->oxcf.speed >= 8 || (cpi->use_svc && cpi->svc.non_reference_frame)) 1299 compute_minmax_variance = 0; 1300 1301 memset(x->variance_low, 0, sizeof(x->variance_low)); 1302 1303 if (cpi->sf.use_source_sad && !is_key_frame) { 1304 int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); 1305 content_state = x->content_state_sb; 1306 x->skip_low_source_sad = (content_state == kLowSadLowSumdiff || 1307 content_state == kLowSadHighSumdiff) 1308 ? 1 1309 : 0; 1310 x->lowvar_highsumdiff = (content_state == kLowVarHighSumdiff) ? 1 : 0; 1311 if (cpi->content_state_sb_fd != NULL) 1312 x->last_sb_high_content = cpi->content_state_sb_fd[sb_offset2]; 1313 1314 // For SVC on top spatial layer: use/scale the partition from 1315 // the lower spatial resolution if svc_use_lowres_part is enabled. 1316 if (cpi->sf.svc_use_lowres_part && 1317 cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1 && 1318 cpi->svc.prev_partition_svc != NULL && content_state != kVeryHighSad) { 1319 if (!scale_partitioning_svc(cpi, x, xd, BLOCK_64X64, mi_row >> 1, 1320 mi_col >> 1, mi_row, mi_col)) { 1321 if (cpi->sf.copy_partition_flag) { 1322 update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset); 1323 } 1324 return 0; 1325 } 1326 } 1327 // If source_sad is low copy the partition without computing the y_sad. 1328 if (x->skip_low_source_sad && cpi->sf.copy_partition_flag && 1329 !scene_change_detected && 1330 copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) { 1331 x->sb_use_mv_part = 1; 1332 if (cpi->sf.svc_use_lowres_part && 1333 cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) 1334 update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); 1335 return 0; 1336 } 1337 } 1338 1339 if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && 1340 cyclic_refresh_segment_id_boosted(segment_id)) { 1341 int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); 1342 set_vbp_thresholds(cpi, thresholds, q, content_state); 1343 } else { 1344 set_vbp_thresholds(cpi, thresholds, cm->base_qindex, content_state); 1345 } 1346 1347 // For non keyframes, disable 4x4 average for low resolution when speed = 8 1348 threshold_4x4avg = (cpi->oxcf.speed < 8) ? thresholds[1] << 1 : INT64_MAX; 1349 1350 if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3); 1351 if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3); 1352 1353 s = x->plane[0].src.buf; 1354 sp = x->plane[0].src.stride; 1355 1356 // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks, 1357 // 5-20 for the 16x16 blocks. 1358 force_split[0] = scene_change_detected; 1359 1360 if (!is_key_frame) { 1361 // In the case of spatial/temporal scalable coding, the assumption here is 1362 // that the temporal reference frame will always be of type LAST_FRAME. 1363 // TODO(marpan): If that assumption is broken, we need to revisit this code. 1364 MODE_INFO *mi = xd->mi[0]; 1365 YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); 1366 1367 const YV12_BUFFER_CONFIG *yv12_g = NULL; 1368 unsigned int y_sad_g, y_sad_thr, y_sad_last; 1369 bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 + 1370 (mi_row + 4 < cm->mi_rows); 1371 1372 assert(yv12 != NULL); 1373 1374 if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) || 1375 cpi->svc.use_gf_temporal_ref_current_layer) { 1376 // For now, GOLDEN will not be used for non-zero spatial layers, since 1377 // it may not be a temporal reference. 1378 yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); 1379 } 1380 1381 // Only compute y_sad_g (sad for golden reference) for speed < 8. 1382 if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 && 1383 (cpi->ref_frame_flags & VP9_GOLD_FLAG)) { 1384 vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, 1385 &cm->frame_refs[GOLDEN_FRAME - 1].sf); 1386 y_sad_g = cpi->fn_ptr[bsize].sdf( 1387 x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, 1388 xd->plane[0].pre[0].stride); 1389 } else { 1390 y_sad_g = UINT_MAX; 1391 } 1392 1393 if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR && 1394 cpi->rc.is_src_frame_alt_ref) { 1395 yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME); 1396 vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, 1397 &cm->frame_refs[ALTREF_FRAME - 1].sf); 1398 mi->ref_frame[0] = ALTREF_FRAME; 1399 y_sad_g = UINT_MAX; 1400 } else { 1401 vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, 1402 &cm->frame_refs[LAST_FRAME - 1].sf); 1403 mi->ref_frame[0] = LAST_FRAME; 1404 } 1405 mi->ref_frame[1] = NONE; 1406 mi->sb_type = BLOCK_64X64; 1407 mi->mv[0].as_int = 0; 1408 mi->interp_filter = BILINEAR; 1409 1410 if (cpi->oxcf.speed >= 8 && !low_res && 1411 x->content_state_sb != kVeryHighSad) { 1412 y_sad = cpi->fn_ptr[bsize].sdf( 1413 x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, 1414 xd->plane[0].pre[0].stride); 1415 } else { 1416 const MV dummy_mv = { 0, 0 }; 1417 y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col, 1418 &dummy_mv); 1419 x->sb_use_mv_part = 1; 1420 x->sb_mvcol_part = mi->mv[0].as_mv.col; 1421 x->sb_mvrow_part = mi->mv[0].as_mv.row; 1422 if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && 1423 cpi->svc.spatial_layer_id == 0 && 1424 cpi->svc.high_num_blocks_with_motion && !x->zero_temp_sad_source && 1425 cm->width > 640 && cm->height > 480) { 1426 // Disable split below 16x16 block size when scroll motion is detected. 1427 // TODO(marpan/jianj): Improve this condition: issue is that search 1428 // range is hard-coded/limited in vp9_int_pro_motion_estimation() so 1429 // scroll motion may not be detected here. 1430 if ((abs(x->sb_mvrow_part) >= 48 && abs(x->sb_mvcol_part) <= 8) || 1431 y_sad < 100000) { 1432 compute_minmax_variance = 0; 1433 thresholds[2] = INT64_MAX; 1434 } 1435 } 1436 } 1437 1438 y_sad_last = y_sad; 1439 // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad 1440 // are close if short_circuit_low_temp_var is on. 1441 y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad; 1442 if (y_sad_g < y_sad_thr) { 1443 vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, 1444 &cm->frame_refs[GOLDEN_FRAME - 1].sf); 1445 mi->ref_frame[0] = GOLDEN_FRAME; 1446 mi->mv[0].as_int = 0; 1447 y_sad = y_sad_g; 1448 ref_frame_partition = GOLDEN_FRAME; 1449 } else { 1450 x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv; 1451 ref_frame_partition = LAST_FRAME; 1452 } 1453 1454 set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); 1455 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64); 1456 1457 if (cpi->use_skin_detection) 1458 x->sb_is_skin = 1459 skin_sb_split(cpi, x, low_res, mi_row, mi_col, force_split); 1460 1461 d = xd->plane[0].dst.buf; 1462 dp = xd->plane[0].dst.stride; 1463 1464 // If the y_sad is very small, take 64x64 as partition and exit. 1465 // Don't check on boosted segment for now, as 64x64 is suppressed there. 1466 if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) { 1467 const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64]; 1468 const int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64]; 1469 if (mi_col + block_width / 2 < cm->mi_cols && 1470 mi_row + block_height / 2 < cm->mi_rows) { 1471 set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_64X64); 1472 x->variance_low[0] = 1; 1473 chroma_check(cpi, x, bsize, y_sad, is_key_frame); 1474 if (cpi->sf.svc_use_lowres_part && 1475 cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) 1476 update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); 1477 if (cpi->sf.copy_partition_flag) { 1478 update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset); 1479 } 1480 return 0; 1481 } 1482 } 1483 1484 // If the y_sad is small enough, copy the partition of the superblock in the 1485 // last frame to current frame only if the last frame is not a keyframe. 1486 // Stop the copy every cpi->max_copied_frame to refresh the partition. 1487 // TODO(jianj) : tune the threshold. 1488 if (cpi->sf.copy_partition_flag && y_sad_last < cpi->vbp_threshold_copy && 1489 copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) { 1490 chroma_check(cpi, x, bsize, y_sad, is_key_frame); 1491 if (cpi->sf.svc_use_lowres_part && 1492 cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) 1493 update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); 1494 return 0; 1495 } 1496 } else { 1497 d = VP9_VAR_OFFS; 1498 dp = 0; 1499 #if CONFIG_VP9_HIGHBITDEPTH 1500 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 1501 switch (xd->bd) { 1502 case 10: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10); break; 1503 case 12: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12); break; 1504 case 8: 1505 default: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8); break; 1506 } 1507 } 1508 #endif // CONFIG_VP9_HIGHBITDEPTH 1509 } 1510 1511 if (low_res && threshold_4x4avg < INT64_MAX) 1512 CHECK_MEM_ERROR(cm, vt2, vpx_calloc(16, sizeof(*vt2))); 1513 // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances 1514 // for splits. 1515 for (i = 0; i < 4; i++) { 1516 const int x32_idx = ((i & 1) << 5); 1517 const int y32_idx = ((i >> 1) << 5); 1518 const int i2 = i << 2; 1519 force_split[i + 1] = 0; 1520 avg_16x16[i] = 0; 1521 maxvar_16x16[i] = 0; 1522 minvar_16x16[i] = INT_MAX; 1523 for (j = 0; j < 4; j++) { 1524 const int x16_idx = x32_idx + ((j & 1) << 4); 1525 const int y16_idx = y32_idx + ((j >> 1) << 4); 1526 const int split_index = 5 + i2 + j; 1527 v16x16 *vst = &vt.split[i].split[j]; 1528 force_split[split_index] = 0; 1529 variance4x4downsample[i2 + j] = 0; 1530 if (!is_key_frame) { 1531 fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst, 1532 #if CONFIG_VP9_HIGHBITDEPTH 1533 xd->cur_buf->flags, 1534 #endif 1535 pixels_wide, pixels_high, is_key_frame); 1536 fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16); 1537 get_variance(&vt.split[i].split[j].part_variances.none); 1538 avg_16x16[i] += vt.split[i].split[j].part_variances.none.variance; 1539 if (vt.split[i].split[j].part_variances.none.variance < minvar_16x16[i]) 1540 minvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance; 1541 if (vt.split[i].split[j].part_variances.none.variance > maxvar_16x16[i]) 1542 maxvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance; 1543 if (vt.split[i].split[j].part_variances.none.variance > thresholds[2]) { 1544 // 16X16 variance is above threshold for split, so force split to 8x8 1545 // for this 16x16 block (this also forces splits for upper levels). 1546 force_split[split_index] = 1; 1547 force_split[i + 1] = 1; 1548 force_split[0] = 1; 1549 } else if (compute_minmax_variance && 1550 vt.split[i].split[j].part_variances.none.variance > 1551 thresholds[1] && 1552 !cyclic_refresh_segment_id_boosted(segment_id)) { 1553 // We have some nominal amount of 16x16 variance (based on average), 1554 // compute the minmax over the 8x8 sub-blocks, and if above threshold, 1555 // force split to 8x8 block for this 16x16 block. 1556 int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx, 1557 #if CONFIG_VP9_HIGHBITDEPTH 1558 xd->cur_buf->flags, 1559 #endif 1560 pixels_wide, pixels_high); 1561 int thresh_minmax = (int)cpi->vbp_threshold_minmax; 1562 if (x->content_state_sb == kVeryHighSad) 1563 thresh_minmax = thresh_minmax << 1; 1564 if (minmax > thresh_minmax) { 1565 force_split[split_index] = 1; 1566 force_split[i + 1] = 1; 1567 force_split[0] = 1; 1568 } 1569 } 1570 } 1571 if (is_key_frame || 1572 (low_res && vt.split[i].split[j].part_variances.none.variance > 1573 threshold_4x4avg)) { 1574 force_split[split_index] = 0; 1575 // Go down to 4x4 down-sampling for variance. 1576 variance4x4downsample[i2 + j] = 1; 1577 for (k = 0; k < 4; k++) { 1578 int x8_idx = x16_idx + ((k & 1) << 3); 1579 int y8_idx = y16_idx + ((k >> 1) << 3); 1580 v8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k]; 1581 fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2, 1582 #if CONFIG_VP9_HIGHBITDEPTH 1583 xd->cur_buf->flags, 1584 #endif 1585 pixels_wide, pixels_high, is_key_frame); 1586 } 1587 } 1588 } 1589 } 1590 if (cpi->noise_estimate.enabled) 1591 noise_level = vp9_noise_estimate_extract_level(&cpi->noise_estimate); 1592 // Fill the rest of the variance tree by summing split partition values. 1593 avg_32x32 = 0; 1594 for (i = 0; i < 4; i++) { 1595 const int i2 = i << 2; 1596 for (j = 0; j < 4; j++) { 1597 if (variance4x4downsample[i2 + j] == 1) { 1598 v16x16 *vtemp = (!is_key_frame) ? &vt2[i2 + j] : &vt.split[i].split[j]; 1599 for (m = 0; m < 4; m++) fill_variance_tree(&vtemp->split[m], BLOCK_8X8); 1600 fill_variance_tree(vtemp, BLOCK_16X16); 1601 // If variance of this 16x16 block is above the threshold, force block 1602 // to split. This also forces a split on the upper levels. 1603 get_variance(&vtemp->part_variances.none); 1604 if (vtemp->part_variances.none.variance > thresholds[2]) { 1605 force_split[5 + i2 + j] = 1; 1606 force_split[i + 1] = 1; 1607 force_split[0] = 1; 1608 } 1609 } 1610 } 1611 fill_variance_tree(&vt.split[i], BLOCK_32X32); 1612 // If variance of this 32x32 block is above the threshold, or if its above 1613 // (some threshold of) the average variance over the sub-16x16 blocks, then 1614 // force this block to split. This also forces a split on the upper 1615 // (64x64) level. 1616 if (!force_split[i + 1]) { 1617 get_variance(&vt.split[i].part_variances.none); 1618 var_32x32 = vt.split[i].part_variances.none.variance; 1619 max_var_32x32 = VPXMAX(var_32x32, max_var_32x32); 1620 min_var_32x32 = VPXMIN(var_32x32, min_var_32x32); 1621 if (vt.split[i].part_variances.none.variance > thresholds[1] || 1622 (!is_key_frame && 1623 vt.split[i].part_variances.none.variance > (thresholds[1] >> 1) && 1624 vt.split[i].part_variances.none.variance > (avg_16x16[i] >> 1))) { 1625 force_split[i + 1] = 1; 1626 force_split[0] = 1; 1627 } else if (!is_key_frame && noise_level < kLow && cm->height <= 360 && 1628 (maxvar_16x16[i] - minvar_16x16[i]) > (thresholds[1] >> 1) && 1629 maxvar_16x16[i] > thresholds[1]) { 1630 force_split[i + 1] = 1; 1631 force_split[0] = 1; 1632 } 1633 avg_32x32 += var_32x32; 1634 } 1635 } 1636 if (!force_split[0]) { 1637 fill_variance_tree(&vt, BLOCK_64X64); 1638 get_variance(&vt.part_variances.none); 1639 // If variance of this 64x64 block is above (some threshold of) the average 1640 // variance over the sub-32x32 blocks, then force this block to split. 1641 // Only checking this for noise level >= medium for now. 1642 if (!is_key_frame && noise_level >= kMedium && 1643 vt.part_variances.none.variance > (9 * avg_32x32) >> 5) 1644 force_split[0] = 1; 1645 // Else if the maximum 32x32 variance minus the miniumum 32x32 variance in 1646 // a 64x64 block is greater than threshold and the maximum 32x32 variance is 1647 // above a miniumum threshold, then force the split of a 64x64 block 1648 // Only check this for low noise. 1649 else if (!is_key_frame && noise_level < kMedium && 1650 (max_var_32x32 - min_var_32x32) > 3 * (thresholds[0] >> 3) && 1651 max_var_32x32 > thresholds[0] >> 1) 1652 force_split[0] = 1; 1653 } 1654 1655 // Now go through the entire structure, splitting every block size until 1656 // we get to one that's got a variance lower than our threshold. 1657 if (mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows || 1658 !set_vt_partitioning(cpi, x, xd, &vt, BLOCK_64X64, mi_row, mi_col, 1659 thresholds[0], BLOCK_16X16, force_split[0])) { 1660 for (i = 0; i < 4; ++i) { 1661 const int x32_idx = ((i & 1) << 2); 1662 const int y32_idx = ((i >> 1) << 2); 1663 const int i2 = i << 2; 1664 if (!set_vt_partitioning(cpi, x, xd, &vt.split[i], BLOCK_32X32, 1665 (mi_row + y32_idx), (mi_col + x32_idx), 1666 thresholds[1], BLOCK_16X16, 1667 force_split[i + 1])) { 1668 for (j = 0; j < 4; ++j) { 1669 const int x16_idx = ((j & 1) << 1); 1670 const int y16_idx = ((j >> 1) << 1); 1671 // For inter frames: if variance4x4downsample[] == 1 for this 16x16 1672 // block, then the variance is based on 4x4 down-sampling, so use vt2 1673 // in set_vt_partioning(), otherwise use vt. 1674 v16x16 *vtemp = (!is_key_frame && variance4x4downsample[i2 + j] == 1) 1675 ? &vt2[i2 + j] 1676 : &vt.split[i].split[j]; 1677 if (!set_vt_partitioning( 1678 cpi, x, xd, vtemp, BLOCK_16X16, mi_row + y32_idx + y16_idx, 1679 mi_col + x32_idx + x16_idx, thresholds[2], cpi->vbp_bsize_min, 1680 force_split[5 + i2 + j])) { 1681 for (k = 0; k < 4; ++k) { 1682 const int x8_idx = (k & 1); 1683 const int y8_idx = (k >> 1); 1684 if (use_4x4_partition) { 1685 if (!set_vt_partitioning(cpi, x, xd, &vtemp->split[k], 1686 BLOCK_8X8, 1687 mi_row + y32_idx + y16_idx + y8_idx, 1688 mi_col + x32_idx + x16_idx + x8_idx, 1689 thresholds[3], BLOCK_8X8, 0)) { 1690 set_block_size( 1691 cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx), 1692 (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_4X4); 1693 } 1694 } else { 1695 set_block_size( 1696 cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx), 1697 (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_8X8); 1698 } 1699 } 1700 } 1701 } 1702 } 1703 } 1704 } 1705 1706 if (!frame_is_intra_only(cm) && cpi->sf.copy_partition_flag) { 1707 update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset); 1708 } 1709 1710 if (!frame_is_intra_only(cm) && cpi->sf.svc_use_lowres_part && 1711 cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) 1712 update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); 1713 1714 if (cpi->sf.short_circuit_low_temp_var) { 1715 set_low_temp_var_flag(cpi, x, xd, &vt, thresholds, ref_frame_partition, 1716 mi_col, mi_row); 1717 } 1718 1719 chroma_check(cpi, x, bsize, y_sad, is_key_frame); 1720 if (vt2) vpx_free(vt2); 1721 return 0; 1722 } 1723 1724 static void update_state(VP9_COMP *cpi, ThreadData *td, PICK_MODE_CONTEXT *ctx, 1725 int mi_row, int mi_col, BLOCK_SIZE bsize, 1726 int output_enabled) { 1727 int i, x_idx, y; 1728 VP9_COMMON *const cm = &cpi->common; 1729 RD_COUNTS *const rdc = &td->rd_counts; 1730 MACROBLOCK *const x = &td->mb; 1731 MACROBLOCKD *const xd = &x->e_mbd; 1732 struct macroblock_plane *const p = x->plane; 1733 struct macroblockd_plane *const pd = xd->plane; 1734 MODE_INFO *mi = &ctx->mic; 1735 MODE_INFO *const xdmi = xd->mi[0]; 1736 MODE_INFO *mi_addr = xd->mi[0]; 1737 const struct segmentation *const seg = &cm->seg; 1738 const int bw = num_8x8_blocks_wide_lookup[mi->sb_type]; 1739 const int bh = num_8x8_blocks_high_lookup[mi->sb_type]; 1740 const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); 1741 const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); 1742 MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; 1743 int w, h; 1744 1745 const int mis = cm->mi_stride; 1746 const int mi_width = num_8x8_blocks_wide_lookup[bsize]; 1747 const int mi_height = num_8x8_blocks_high_lookup[bsize]; 1748 int max_plane; 1749 1750 assert(mi->sb_type == bsize); 1751 1752 *mi_addr = *mi; 1753 *x->mbmi_ext = ctx->mbmi_ext; 1754 1755 // If segmentation in use 1756 if (seg->enabled) { 1757 // For in frame complexity AQ copy the segment id from the segment map. 1758 if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { 1759 const uint8_t *const map = 1760 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; 1761 mi_addr->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); 1762 } 1763 // Else for cyclic refresh mode update the segment map, set the segment id 1764 // and then update the quantizer. 1765 if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { 1766 vp9_cyclic_refresh_update_segment(cpi, xd->mi[0], mi_row, mi_col, bsize, 1767 ctx->rate, ctx->dist, x->skip, p); 1768 } 1769 } 1770 1771 max_plane = is_inter_block(xdmi) ? MAX_MB_PLANE : 1; 1772 for (i = 0; i < max_plane; ++i) { 1773 p[i].coeff = ctx->coeff_pbuf[i][1]; 1774 p[i].qcoeff = ctx->qcoeff_pbuf[i][1]; 1775 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; 1776 p[i].eobs = ctx->eobs_pbuf[i][1]; 1777 } 1778 1779 for (i = max_plane; i < MAX_MB_PLANE; ++i) { 1780 p[i].coeff = ctx->coeff_pbuf[i][2]; 1781 p[i].qcoeff = ctx->qcoeff_pbuf[i][2]; 1782 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2]; 1783 p[i].eobs = ctx->eobs_pbuf[i][2]; 1784 } 1785 1786 // Restore the coding context of the MB to that that was in place 1787 // when the mode was picked for it 1788 for (y = 0; y < mi_height; y++) 1789 for (x_idx = 0; x_idx < mi_width; x_idx++) 1790 if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx && 1791 (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) { 1792 xd->mi[x_idx + y * mis] = mi_addr; 1793 } 1794 1795 if (cpi->oxcf.aq_mode != NO_AQ) vp9_init_plane_quantizers(cpi, x); 1796 1797 if (is_inter_block(xdmi) && xdmi->sb_type < BLOCK_8X8) { 1798 xdmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; 1799 xdmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; 1800 } 1801 1802 x->skip = ctx->skip; 1803 memcpy(x->zcoeff_blk[xdmi->tx_size], ctx->zcoeff_blk, 1804 sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); 1805 1806 if (!output_enabled) return; 1807 1808 #if CONFIG_INTERNAL_STATS 1809 if (frame_is_intra_only(cm)) { 1810 static const int kf_mode_index[] = { 1811 THR_DC /*DC_PRED*/, THR_V_PRED /*V_PRED*/, 1812 THR_H_PRED /*H_PRED*/, THR_D45_PRED /*D45_PRED*/, 1813 THR_D135_PRED /*D135_PRED*/, THR_D117_PRED /*D117_PRED*/, 1814 THR_D153_PRED /*D153_PRED*/, THR_D207_PRED /*D207_PRED*/, 1815 THR_D63_PRED /*D63_PRED*/, THR_TM /*TM_PRED*/, 1816 }; 1817 ++cpi->mode_chosen_counts[kf_mode_index[xdmi->mode]]; 1818 } else { 1819 // Note how often each mode chosen as best 1820 ++cpi->mode_chosen_counts[ctx->best_mode_index]; 1821 } 1822 #endif 1823 if (!frame_is_intra_only(cm)) { 1824 if (is_inter_block(xdmi)) { 1825 vp9_update_mv_count(td); 1826 1827 if (cm->interp_filter == SWITCHABLE) { 1828 const int ctx = get_pred_context_switchable_interp(xd); 1829 ++td->counts->switchable_interp[ctx][xdmi->interp_filter]; 1830 } 1831 } 1832 1833 rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; 1834 rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff; 1835 rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff; 1836 1837 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) 1838 rdc->filter_diff[i] += ctx->best_filter_diff[i]; 1839 } 1840 1841 for (h = 0; h < y_mis; ++h) { 1842 MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; 1843 for (w = 0; w < x_mis; ++w) { 1844 MV_REF *const mv = frame_mv + w; 1845 mv->ref_frame[0] = mi->ref_frame[0]; 1846 mv->ref_frame[1] = mi->ref_frame[1]; 1847 mv->mv[0].as_int = mi->mv[0].as_int; 1848 mv->mv[1].as_int = mi->mv[1].as_int; 1849 } 1850 } 1851 } 1852 1853 void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, 1854 int mi_row, int mi_col) { 1855 uint8_t *const buffers[3] = { src->y_buffer, src->u_buffer, src->v_buffer }; 1856 const int strides[3] = { src->y_stride, src->uv_stride, src->uv_stride }; 1857 int i; 1858 1859 // Set current frame pointer. 1860 x->e_mbd.cur_buf = src; 1861 1862 for (i = 0; i < MAX_MB_PLANE; i++) 1863 setup_pred_plane(&x->plane[i].src, buffers[i], strides[i], mi_row, mi_col, 1864 NULL, x->e_mbd.plane[i].subsampling_x, 1865 x->e_mbd.plane[i].subsampling_y); 1866 } 1867 1868 static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, 1869 RD_COST *rd_cost, BLOCK_SIZE bsize) { 1870 MACROBLOCKD *const xd = &x->e_mbd; 1871 MODE_INFO *const mi = xd->mi[0]; 1872 INTERP_FILTER filter_ref; 1873 1874 filter_ref = get_pred_context_switchable_interp(xd); 1875 if (filter_ref == SWITCHABLE_FILTERS) filter_ref = EIGHTTAP; 1876 1877 mi->sb_type = bsize; 1878 mi->mode = ZEROMV; 1879 mi->tx_size = 1880 VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[tx_mode]); 1881 mi->skip = 1; 1882 mi->uv_mode = DC_PRED; 1883 mi->ref_frame[0] = LAST_FRAME; 1884 mi->ref_frame[1] = NONE; 1885 mi->mv[0].as_int = 0; 1886 mi->interp_filter = filter_ref; 1887 1888 xd->mi[0]->bmi[0].as_mv[0].as_int = 0; 1889 x->skip = 1; 1890 1891 vp9_rd_cost_init(rd_cost); 1892 } 1893 1894 static void set_segment_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x, 1895 int mi_row, int mi_col, BLOCK_SIZE bsize, 1896 AQ_MODE aq_mode) { 1897 int segment_qindex; 1898 VP9_COMMON *const cm = &cpi->common; 1899 const uint8_t *const map = 1900 cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; 1901 1902 vp9_init_plane_quantizers(cpi, x); 1903 vpx_clear_system_state(); 1904 segment_qindex = 1905 vp9_get_qindex(&cm->seg, x->e_mbd.mi[0]->segment_id, cm->base_qindex); 1906 1907 if (aq_mode == NO_AQ || aq_mode == PSNR_AQ) { 1908 if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult; 1909 return; 1910 } 1911 1912 if (aq_mode == CYCLIC_REFRESH_AQ) { 1913 // If segment is boosted, use rdmult for that segment. 1914 if (cyclic_refresh_segment_id_boosted( 1915 get_segment_id(cm, map, bsize, mi_row, mi_col))) 1916 x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); 1917 return; 1918 } 1919 1920 x->rdmult = vp9_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q); 1921 } 1922 1923 static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, 1924 MACROBLOCK *const x, int mi_row, int mi_col, 1925 RD_COST *rd_cost, BLOCK_SIZE bsize, 1926 PICK_MODE_CONTEXT *ctx, int64_t best_rd) { 1927 VP9_COMMON *const cm = &cpi->common; 1928 TileInfo *const tile_info = &tile_data->tile_info; 1929 MACROBLOCKD *const xd = &x->e_mbd; 1930 MODE_INFO *mi; 1931 struct macroblock_plane *const p = x->plane; 1932 struct macroblockd_plane *const pd = xd->plane; 1933 const AQ_MODE aq_mode = cpi->oxcf.aq_mode; 1934 int i, orig_rdmult; 1935 1936 vpx_clear_system_state(); 1937 1938 // Use the lower precision, but faster, 32x32 fdct for mode selection. 1939 x->use_lp32x32fdct = 1; 1940 1941 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 1942 mi = xd->mi[0]; 1943 mi->sb_type = bsize; 1944 1945 for (i = 0; i < MAX_MB_PLANE; ++i) { 1946 p[i].coeff = ctx->coeff_pbuf[i][0]; 1947 p[i].qcoeff = ctx->qcoeff_pbuf[i][0]; 1948 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0]; 1949 p[i].eobs = ctx->eobs_pbuf[i][0]; 1950 } 1951 ctx->is_coded = 0; 1952 ctx->skippable = 0; 1953 ctx->pred_pixel_ready = 0; 1954 x->skip_recode = 0; 1955 1956 // Set to zero to make sure we do not use the previous encoded frame stats 1957 mi->skip = 0; 1958 1959 #if CONFIG_VP9_HIGHBITDEPTH 1960 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 1961 x->source_variance = vp9_high_get_sby_perpixel_variance( 1962 cpi, &x->plane[0].src, bsize, xd->bd); 1963 } else { 1964 x->source_variance = 1965 vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); 1966 } 1967 #else 1968 x->source_variance = 1969 vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); 1970 #endif // CONFIG_VP9_HIGHBITDEPTH 1971 1972 // Save rdmult before it might be changed, so it can be restored later. 1973 orig_rdmult = x->rdmult; 1974 1975 if ((cpi->sf.tx_domain_thresh > 0.0) || (cpi->sf.quant_opt_thresh > 0.0)) { 1976 double logvar = vp9_log_block_var(cpi, x, bsize); 1977 // Check block complexity as part of descision on using pixel or transform 1978 // domain distortion in rd tests. 1979 x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion && 1980 (logvar >= cpi->sf.tx_domain_thresh); 1981 1982 // Check block complexity as part of descision on using quantized 1983 // coefficient optimisation inside the rd loop. 1984 x->block_qcoeff_opt = 1985 cpi->sf.allow_quant_coeff_opt && (logvar <= cpi->sf.quant_opt_thresh); 1986 } else { 1987 x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion; 1988 x->block_qcoeff_opt = cpi->sf.allow_quant_coeff_opt; 1989 } 1990 1991 set_segment_index(cpi, x, mi_row, mi_col, bsize, 0); 1992 set_segment_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode); 1993 1994 // Find best coding mode & reconstruct the MB so it is available 1995 // as a predictor for MBs that follow in the SB 1996 if (frame_is_intra_only(cm)) { 1997 vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd); 1998 } else { 1999 if (bsize >= BLOCK_8X8) { 2000 if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) 2001 vp9_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize, 2002 ctx, best_rd); 2003 else 2004 vp9_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost, 2005 bsize, ctx, best_rd); 2006 } else { 2007 vp9_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, rd_cost, 2008 bsize, ctx, best_rd); 2009 } 2010 } 2011 2012 // Examine the resulting rate and for AQ mode 2 make a segment choice. 2013 if ((rd_cost->rate != INT_MAX) && (aq_mode == COMPLEXITY_AQ) && 2014 (bsize >= BLOCK_16X16) && 2015 (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame || 2016 (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) { 2017 vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate); 2018 } 2019 2020 // TODO(jingning) The rate-distortion optimization flow needs to be 2021 // refactored to provide proper exit/return handle. 2022 if (rd_cost->rate == INT_MAX) 2023 rd_cost->rdcost = INT64_MAX; 2024 else 2025 rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); 2026 2027 x->rdmult = orig_rdmult; 2028 2029 ctx->rate = rd_cost->rate; 2030 ctx->dist = rd_cost->dist; 2031 } 2032 2033 static void update_stats(VP9_COMMON *cm, ThreadData *td) { 2034 const MACROBLOCK *x = &td->mb; 2035 const MACROBLOCKD *const xd = &x->e_mbd; 2036 const MODE_INFO *const mi = xd->mi[0]; 2037 const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; 2038 const BLOCK_SIZE bsize = mi->sb_type; 2039 2040 if (!frame_is_intra_only(cm)) { 2041 FRAME_COUNTS *const counts = td->counts; 2042 const int inter_block = is_inter_block(mi); 2043 const int seg_ref_active = 2044 segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_REF_FRAME); 2045 if (!seg_ref_active) { 2046 counts->intra_inter[get_intra_inter_context(xd)][inter_block]++; 2047 // If the segment reference feature is enabled we have only a single 2048 // reference frame allowed for the segment so exclude it from 2049 // the reference frame counts used to work out probabilities. 2050 if (inter_block) { 2051 const MV_REFERENCE_FRAME ref0 = mi->ref_frame[0]; 2052 if (cm->reference_mode == REFERENCE_MODE_SELECT) 2053 counts->comp_inter[vp9_get_reference_mode_context(cm, xd)] 2054 [has_second_ref(mi)]++; 2055 2056 if (has_second_ref(mi)) { 2057 const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; 2058 const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd); 2059 const int bit = mi->ref_frame[!idx] == cm->comp_var_ref[1]; 2060 counts->comp_ref[ctx][bit]++; 2061 } else { 2062 counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0] 2063 [ref0 != LAST_FRAME]++; 2064 if (ref0 != LAST_FRAME) 2065 counts->single_ref[vp9_get_pred_context_single_ref_p2(xd)][1] 2066 [ref0 != GOLDEN_FRAME]++; 2067 } 2068 } 2069 } 2070 if (inter_block && 2071 !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) { 2072 const int mode_ctx = mbmi_ext->mode_context[mi->ref_frame[0]]; 2073 if (bsize >= BLOCK_8X8) { 2074 const PREDICTION_MODE mode = mi->mode; 2075 ++counts->inter_mode[mode_ctx][INTER_OFFSET(mode)]; 2076 } else { 2077 const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; 2078 const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; 2079 int idx, idy; 2080 for (idy = 0; idy < 2; idy += num_4x4_h) { 2081 for (idx = 0; idx < 2; idx += num_4x4_w) { 2082 const int j = idy * 2 + idx; 2083 const PREDICTION_MODE b_mode = mi->bmi[j].as_mode; 2084 ++counts->inter_mode[mode_ctx][INTER_OFFSET(b_mode)]; 2085 } 2086 } 2087 } 2088 } 2089 } 2090 } 2091 2092 static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col, 2093 ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], 2094 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], 2095 PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], 2096 BLOCK_SIZE bsize) { 2097 MACROBLOCKD *const xd = &x->e_mbd; 2098 int p; 2099 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 2100 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 2101 int mi_width = num_8x8_blocks_wide_lookup[bsize]; 2102 int mi_height = num_8x8_blocks_high_lookup[bsize]; 2103 for (p = 0; p < MAX_MB_PLANE; p++) { 2104 memcpy(xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x), 2105 a + num_4x4_blocks_wide * p, 2106 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> 2107 xd->plane[p].subsampling_x); 2108 memcpy(xd->left_context[p] + 2109 ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), 2110 l + num_4x4_blocks_high * p, 2111 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> 2112 xd->plane[p].subsampling_y); 2113 } 2114 memcpy(xd->above_seg_context + mi_col, sa, 2115 sizeof(*xd->above_seg_context) * mi_width); 2116 memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl, 2117 sizeof(xd->left_seg_context[0]) * mi_height); 2118 } 2119 2120 static void save_context(MACROBLOCK *const x, int mi_row, int mi_col, 2121 ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], 2122 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], 2123 PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], 2124 BLOCK_SIZE bsize) { 2125 const MACROBLOCKD *const xd = &x->e_mbd; 2126 int p; 2127 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 2128 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 2129 int mi_width = num_8x8_blocks_wide_lookup[bsize]; 2130 int mi_height = num_8x8_blocks_high_lookup[bsize]; 2131 2132 // buffer the above/left context information of the block in search. 2133 for (p = 0; p < MAX_MB_PLANE; ++p) { 2134 memcpy(a + num_4x4_blocks_wide * p, 2135 xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x), 2136 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> 2137 xd->plane[p].subsampling_x); 2138 memcpy(l + num_4x4_blocks_high * p, 2139 xd->left_context[p] + 2140 ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), 2141 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> 2142 xd->plane[p].subsampling_y); 2143 } 2144 memcpy(sa, xd->above_seg_context + mi_col, 2145 sizeof(*xd->above_seg_context) * mi_width); 2146 memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK), 2147 sizeof(xd->left_seg_context[0]) * mi_height); 2148 } 2149 2150 static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, ThreadData *td, 2151 TOKENEXTRA **tp, int mi_row, int mi_col, 2152 int output_enabled, BLOCK_SIZE bsize, 2153 PICK_MODE_CONTEXT *ctx) { 2154 MACROBLOCK *const x = &td->mb; 2155 set_offsets(cpi, tile, x, mi_row, mi_col, bsize); 2156 2157 if (cpi->sf.enable_tpl_model && cpi->oxcf.aq_mode == NO_AQ) 2158 x->rdmult = x->cb_rdmult; 2159 2160 update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled); 2161 encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx); 2162 2163 if (output_enabled) { 2164 update_stats(&cpi->common, td); 2165 2166 (*tp)->token = EOSB_TOKEN; 2167 (*tp)++; 2168 } 2169 } 2170 2171 static void encode_sb(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile, 2172 TOKENEXTRA **tp, int mi_row, int mi_col, 2173 int output_enabled, BLOCK_SIZE bsize, PC_TREE *pc_tree) { 2174 VP9_COMMON *const cm = &cpi->common; 2175 MACROBLOCK *const x = &td->mb; 2176 MACROBLOCKD *const xd = &x->e_mbd; 2177 2178 const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; 2179 int ctx; 2180 PARTITION_TYPE partition; 2181 BLOCK_SIZE subsize = bsize; 2182 2183 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 2184 2185 if (bsize >= BLOCK_8X8) { 2186 ctx = partition_plane_context(xd, mi_row, mi_col, bsize); 2187 subsize = get_subsize(bsize, pc_tree->partitioning); 2188 } else { 2189 ctx = 0; 2190 subsize = BLOCK_4X4; 2191 } 2192 2193 partition = partition_lookup[bsl][subsize]; 2194 if (output_enabled && bsize != BLOCK_4X4) 2195 td->counts->partition[ctx][partition]++; 2196 2197 switch (partition) { 2198 case PARTITION_NONE: 2199 encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, 2200 &pc_tree->none); 2201 break; 2202 case PARTITION_VERT: 2203 encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, 2204 &pc_tree->vertical[0]); 2205 if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { 2206 encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled, 2207 subsize, &pc_tree->vertical[1]); 2208 } 2209 break; 2210 case PARTITION_HORZ: 2211 encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, 2212 &pc_tree->horizontal[0]); 2213 if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { 2214 encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled, 2215 subsize, &pc_tree->horizontal[1]); 2216 } 2217 break; 2218 default: 2219 assert(partition == PARTITION_SPLIT); 2220 if (bsize == BLOCK_8X8) { 2221 encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, 2222 pc_tree->leaf_split[0]); 2223 } else { 2224 encode_sb(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, 2225 pc_tree->split[0]); 2226 encode_sb(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled, 2227 subsize, pc_tree->split[1]); 2228 encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled, 2229 subsize, pc_tree->split[2]); 2230 encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled, 2231 subsize, pc_tree->split[3]); 2232 } 2233 break; 2234 } 2235 2236 if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) 2237 update_partition_context(xd, mi_row, mi_col, subsize, bsize); 2238 } 2239 2240 // Check to see if the given partition size is allowed for a specified number 2241 // of 8x8 block rows and columns remaining in the image. 2242 // If not then return the largest allowed partition size 2243 static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left, 2244 int cols_left, int *bh, int *bw) { 2245 if (rows_left <= 0 || cols_left <= 0) { 2246 return VPXMIN(bsize, BLOCK_8X8); 2247 } else { 2248 for (; bsize > 0; bsize -= 3) { 2249 *bh = num_8x8_blocks_high_lookup[bsize]; 2250 *bw = num_8x8_blocks_wide_lookup[bsize]; 2251 if ((*bh <= rows_left) && (*bw <= cols_left)) { 2252 break; 2253 } 2254 } 2255 } 2256 return bsize; 2257 } 2258 2259 static void set_partial_b64x64_partition(MODE_INFO *mi, int mis, int bh_in, 2260 int bw_in, int row8x8_remaining, 2261 int col8x8_remaining, BLOCK_SIZE bsize, 2262 MODE_INFO **mi_8x8) { 2263 int bh = bh_in; 2264 int r, c; 2265 for (r = 0; r < MI_BLOCK_SIZE; r += bh) { 2266 int bw = bw_in; 2267 for (c = 0; c < MI_BLOCK_SIZE; c += bw) { 2268 const int index = r * mis + c; 2269 mi_8x8[index] = mi + index; 2270 mi_8x8[index]->sb_type = find_partition_size( 2271 bsize, row8x8_remaining - r, col8x8_remaining - c, &bh, &bw); 2272 } 2273 } 2274 } 2275 2276 // This function attempts to set all mode info entries in a given SB64 2277 // to the same block partition size. 2278 // However, at the bottom and right borders of the image the requested size 2279 // may not be allowed in which case this code attempts to choose the largest 2280 // allowable partition. 2281 static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile, 2282 MODE_INFO **mi_8x8, int mi_row, int mi_col, 2283 BLOCK_SIZE bsize) { 2284 VP9_COMMON *const cm = &cpi->common; 2285 const int mis = cm->mi_stride; 2286 const int row8x8_remaining = tile->mi_row_end - mi_row; 2287 const int col8x8_remaining = tile->mi_col_end - mi_col; 2288 int block_row, block_col; 2289 MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col; 2290 int bh = num_8x8_blocks_high_lookup[bsize]; 2291 int bw = num_8x8_blocks_wide_lookup[bsize]; 2292 2293 assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); 2294 2295 // Apply the requested partition size to the SB64 if it is all "in image" 2296 if ((col8x8_remaining >= MI_BLOCK_SIZE) && 2297 (row8x8_remaining >= MI_BLOCK_SIZE)) { 2298 for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) { 2299 for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) { 2300 int index = block_row * mis + block_col; 2301 mi_8x8[index] = mi_upper_left + index; 2302 mi_8x8[index]->sb_type = bsize; 2303 } 2304 } 2305 } else { 2306 // Else this is a partial SB64. 2307 set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining, 2308 col8x8_remaining, bsize, mi_8x8); 2309 } 2310 } 2311 2312 static const struct { 2313 int row; 2314 int col; 2315 } coord_lookup[16] = { 2316 // 32x32 index = 0 2317 { 0, 0 }, 2318 { 0, 2 }, 2319 { 2, 0 }, 2320 { 2, 2 }, 2321 // 32x32 index = 1 2322 { 0, 4 }, 2323 { 0, 6 }, 2324 { 2, 4 }, 2325 { 2, 6 }, 2326 // 32x32 index = 2 2327 { 4, 0 }, 2328 { 4, 2 }, 2329 { 6, 0 }, 2330 { 6, 2 }, 2331 // 32x32 index = 3 2332 { 4, 4 }, 2333 { 4, 6 }, 2334 { 6, 4 }, 2335 { 6, 6 }, 2336 }; 2337 2338 static void set_source_var_based_partition(VP9_COMP *cpi, 2339 const TileInfo *const tile, 2340 MACROBLOCK *const x, 2341 MODE_INFO **mi_8x8, int mi_row, 2342 int mi_col) { 2343 VP9_COMMON *const cm = &cpi->common; 2344 const int mis = cm->mi_stride; 2345 const int row8x8_remaining = tile->mi_row_end - mi_row; 2346 const int col8x8_remaining = tile->mi_col_end - mi_col; 2347 MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col; 2348 2349 vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); 2350 2351 assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); 2352 2353 // In-image SB64 2354 if ((col8x8_remaining >= MI_BLOCK_SIZE) && 2355 (row8x8_remaining >= MI_BLOCK_SIZE)) { 2356 int i, j; 2357 int index; 2358 diff d32[4]; 2359 const int offset = (mi_row >> 1) * cm->mb_cols + (mi_col >> 1); 2360 int is_larger_better = 0; 2361 int use32x32 = 0; 2362 unsigned int thr = cpi->source_var_thresh; 2363 2364 memset(d32, 0, 4 * sizeof(diff)); 2365 2366 for (i = 0; i < 4; i++) { 2367 diff *d16[4]; 2368 2369 for (j = 0; j < 4; j++) { 2370 int b_mi_row = coord_lookup[i * 4 + j].row; 2371 int b_mi_col = coord_lookup[i * 4 + j].col; 2372 int boffset = b_mi_row / 2 * cm->mb_cols + b_mi_col / 2; 2373 2374 d16[j] = cpi->source_diff_var + offset + boffset; 2375 2376 index = b_mi_row * mis + b_mi_col; 2377 mi_8x8[index] = mi_upper_left + index; 2378 mi_8x8[index]->sb_type = BLOCK_16X16; 2379 2380 // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition 2381 // size to further improve quality. 2382 } 2383 2384 is_larger_better = (d16[0]->var < thr) && (d16[1]->var < thr) && 2385 (d16[2]->var < thr) && (d16[3]->var < thr); 2386 2387 // Use 32x32 partition 2388 if (is_larger_better) { 2389 use32x32 += 1; 2390 2391 for (j = 0; j < 4; j++) { 2392 d32[i].sse += d16[j]->sse; 2393 d32[i].sum += d16[j]->sum; 2394 } 2395 2396 d32[i].var = 2397 (unsigned int)(d32[i].sse - 2398 (unsigned int)(((int64_t)d32[i].sum * d32[i].sum) >> 2399 10)); 2400 2401 index = coord_lookup[i * 4].row * mis + coord_lookup[i * 4].col; 2402 mi_8x8[index] = mi_upper_left + index; 2403 mi_8x8[index]->sb_type = BLOCK_32X32; 2404 } 2405 } 2406 2407 if (use32x32 == 4) { 2408 thr <<= 1; 2409 is_larger_better = (d32[0].var < thr) && (d32[1].var < thr) && 2410 (d32[2].var < thr) && (d32[3].var < thr); 2411 2412 // Use 64x64 partition 2413 if (is_larger_better) { 2414 mi_8x8[0] = mi_upper_left; 2415 mi_8x8[0]->sb_type = BLOCK_64X64; 2416 } 2417 } 2418 } else { // partial in-image SB64 2419 int bh = num_8x8_blocks_high_lookup[BLOCK_16X16]; 2420 int bw = num_8x8_blocks_wide_lookup[BLOCK_16X16]; 2421 set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining, 2422 col8x8_remaining, BLOCK_16X16, mi_8x8); 2423 } 2424 } 2425 2426 static void update_state_rt(VP9_COMP *cpi, ThreadData *td, 2427 PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, 2428 int bsize) { 2429 VP9_COMMON *const cm = &cpi->common; 2430 MACROBLOCK *const x = &td->mb; 2431 MACROBLOCKD *const xd = &x->e_mbd; 2432 MODE_INFO *const mi = xd->mi[0]; 2433 struct macroblock_plane *const p = x->plane; 2434 const struct segmentation *const seg = &cm->seg; 2435 const int bw = num_8x8_blocks_wide_lookup[mi->sb_type]; 2436 const int bh = num_8x8_blocks_high_lookup[mi->sb_type]; 2437 const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); 2438 const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); 2439 2440 *(xd->mi[0]) = ctx->mic; 2441 *(x->mbmi_ext) = ctx->mbmi_ext; 2442 2443 if (seg->enabled && cpi->oxcf.aq_mode != NO_AQ) { 2444 // For in frame complexity AQ or variance AQ, copy segment_id from 2445 // segmentation_map. 2446 if (cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ) { 2447 const uint8_t *const map = 2448 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; 2449 mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); 2450 } else { 2451 // Setting segmentation map for cyclic_refresh. 2452 vp9_cyclic_refresh_update_segment(cpi, mi, mi_row, mi_col, bsize, 2453 ctx->rate, ctx->dist, x->skip, p); 2454 } 2455 vp9_init_plane_quantizers(cpi, x); 2456 } 2457 2458 if (is_inter_block(mi)) { 2459 vp9_update_mv_count(td); 2460 if (cm->interp_filter == SWITCHABLE) { 2461 const int pred_ctx = get_pred_context_switchable_interp(xd); 2462 ++td->counts->switchable_interp[pred_ctx][mi->interp_filter]; 2463 } 2464 2465 if (mi->sb_type < BLOCK_8X8) { 2466 mi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; 2467 mi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; 2468 } 2469 } 2470 2471 if (cm->use_prev_frame_mvs || !cm->error_resilient_mode || 2472 (cpi->svc.use_base_mv && cpi->svc.number_spatial_layers > 1 && 2473 cpi->svc.spatial_layer_id != cpi->svc.number_spatial_layers - 1)) { 2474 MV_REF *const frame_mvs = 2475 cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; 2476 int w, h; 2477 2478 for (h = 0; h < y_mis; ++h) { 2479 MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; 2480 for (w = 0; w < x_mis; ++w) { 2481 MV_REF *const mv = frame_mv + w; 2482 mv->ref_frame[0] = mi->ref_frame[0]; 2483 mv->ref_frame[1] = mi->ref_frame[1]; 2484 mv->mv[0].as_int = mi->mv[0].as_int; 2485 mv->mv[1].as_int = mi->mv[1].as_int; 2486 } 2487 } 2488 } 2489 2490 x->skip = ctx->skip; 2491 x->skip_txfm[0] = (mi->segment_id || xd->lossless) ? 0 : ctx->skip_txfm[0]; 2492 } 2493 2494 static void encode_b_rt(VP9_COMP *cpi, ThreadData *td, 2495 const TileInfo *const tile, TOKENEXTRA **tp, int mi_row, 2496 int mi_col, int output_enabled, BLOCK_SIZE bsize, 2497 PICK_MODE_CONTEXT *ctx) { 2498 MACROBLOCK *const x = &td->mb; 2499 set_offsets(cpi, tile, x, mi_row, mi_col, bsize); 2500 update_state_rt(cpi, td, ctx, mi_row, mi_col, bsize); 2501 2502 encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx); 2503 update_stats(&cpi->common, td); 2504 2505 (*tp)->token = EOSB_TOKEN; 2506 (*tp)++; 2507 } 2508 2509 static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td, 2510 const TileInfo *const tile, TOKENEXTRA **tp, 2511 int mi_row, int mi_col, int output_enabled, 2512 BLOCK_SIZE bsize, PC_TREE *pc_tree) { 2513 VP9_COMMON *const cm = &cpi->common; 2514 MACROBLOCK *const x = &td->mb; 2515 MACROBLOCKD *const xd = &x->e_mbd; 2516 2517 const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; 2518 int ctx; 2519 PARTITION_TYPE partition; 2520 BLOCK_SIZE subsize; 2521 2522 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 2523 2524 if (bsize >= BLOCK_8X8) { 2525 const int idx_str = xd->mi_stride * mi_row + mi_col; 2526 MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; 2527 ctx = partition_plane_context(xd, mi_row, mi_col, bsize); 2528 subsize = mi_8x8[0]->sb_type; 2529 } else { 2530 ctx = 0; 2531 subsize = BLOCK_4X4; 2532 } 2533 2534 partition = partition_lookup[bsl][subsize]; 2535 if (output_enabled && bsize != BLOCK_4X4) 2536 td->counts->partition[ctx][partition]++; 2537 2538 switch (partition) { 2539 case PARTITION_NONE: 2540 encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, 2541 &pc_tree->none); 2542 break; 2543 case PARTITION_VERT: 2544 encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, 2545 &pc_tree->vertical[0]); 2546 if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { 2547 encode_b_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled, 2548 subsize, &pc_tree->vertical[1]); 2549 } 2550 break; 2551 case PARTITION_HORZ: 2552 encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, 2553 &pc_tree->horizontal[0]); 2554 if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { 2555 encode_b_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled, 2556 subsize, &pc_tree->horizontal[1]); 2557 } 2558 break; 2559 default: 2560 assert(partition == PARTITION_SPLIT); 2561 subsize = get_subsize(bsize, PARTITION_SPLIT); 2562 encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, 2563 pc_tree->split[0]); 2564 encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled, 2565 subsize, pc_tree->split[1]); 2566 encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled, 2567 subsize, pc_tree->split[2]); 2568 encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, 2569 output_enabled, subsize, pc_tree->split[3]); 2570 break; 2571 } 2572 2573 if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) 2574 update_partition_context(xd, mi_row, mi_col, subsize, bsize); 2575 } 2576 2577 static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, 2578 TileDataEnc *tile_data, MODE_INFO **mi_8x8, 2579 TOKENEXTRA **tp, int mi_row, int mi_col, 2580 BLOCK_SIZE bsize, int *rate, int64_t *dist, 2581 int do_recon, PC_TREE *pc_tree) { 2582 VP9_COMMON *const cm = &cpi->common; 2583 TileInfo *const tile_info = &tile_data->tile_info; 2584 MACROBLOCK *const x = &td->mb; 2585 MACROBLOCKD *const xd = &x->e_mbd; 2586 const int mis = cm->mi_stride; 2587 const int bsl = b_width_log2_lookup[bsize]; 2588 const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2; 2589 const int bss = (1 << bsl) / 4; 2590 int i, pl; 2591 PARTITION_TYPE partition = PARTITION_NONE; 2592 BLOCK_SIZE subsize; 2593 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; 2594 PARTITION_CONTEXT sl[8], sa[8]; 2595 RD_COST last_part_rdc, none_rdc, chosen_rdc; 2596 BLOCK_SIZE sub_subsize = BLOCK_4X4; 2597 int splits_below = 0; 2598 BLOCK_SIZE bs_type = mi_8x8[0]->sb_type; 2599 int do_partition_search = 1; 2600 PICK_MODE_CONTEXT *ctx = &pc_tree->none; 2601 2602 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 2603 2604 assert(num_4x4_blocks_wide_lookup[bsize] == 2605 num_4x4_blocks_high_lookup[bsize]); 2606 2607 vp9_rd_cost_reset(&last_part_rdc); 2608 vp9_rd_cost_reset(&none_rdc); 2609 vp9_rd_cost_reset(&chosen_rdc); 2610 2611 partition = partition_lookup[bsl][bs_type]; 2612 subsize = get_subsize(bsize, partition); 2613 2614 pc_tree->partitioning = partition; 2615 save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 2616 2617 if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ) { 2618 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 2619 x->mb_energy = vp9_block_energy(cpi, x, bsize); 2620 } 2621 2622 if (do_partition_search && 2623 cpi->sf.partition_search_type == SEARCH_PARTITION && 2624 cpi->sf.adjust_partitioning_from_last_frame) { 2625 // Check if any of the sub blocks are further split. 2626 if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) { 2627 sub_subsize = get_subsize(subsize, PARTITION_SPLIT); 2628 splits_below = 1; 2629 for (i = 0; i < 4; i++) { 2630 int jj = i >> 1, ii = i & 0x01; 2631 MODE_INFO *this_mi = mi_8x8[jj * bss * mis + ii * bss]; 2632 if (this_mi && this_mi->sb_type >= sub_subsize) { 2633 splits_below = 0; 2634 } 2635 } 2636 } 2637 2638 // If partition is not none try none unless each of the 4 splits are split 2639 // even further.. 2640 if (partition != PARTITION_NONE && !splits_below && 2641 mi_row + (mi_step >> 1) < cm->mi_rows && 2642 mi_col + (mi_step >> 1) < cm->mi_cols) { 2643 pc_tree->partitioning = PARTITION_NONE; 2644 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize, ctx, 2645 INT64_MAX); 2646 2647 pl = partition_plane_context(xd, mi_row, mi_col, bsize); 2648 2649 if (none_rdc.rate < INT_MAX) { 2650 none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; 2651 none_rdc.rdcost = 2652 RDCOST(x->rdmult, x->rddiv, none_rdc.rate, none_rdc.dist); 2653 } 2654 2655 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 2656 mi_8x8[0]->sb_type = bs_type; 2657 pc_tree->partitioning = partition; 2658 } 2659 } 2660 2661 switch (partition) { 2662 case PARTITION_NONE: 2663 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, bsize, 2664 ctx, INT64_MAX); 2665 break; 2666 case PARTITION_HORZ: 2667 pc_tree->horizontal[0].skip_ref_frame_mask = 0; 2668 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, 2669 subsize, &pc_tree->horizontal[0], INT64_MAX); 2670 if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && 2671 mi_row + (mi_step >> 1) < cm->mi_rows) { 2672 RD_COST tmp_rdc; 2673 PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; 2674 vp9_rd_cost_init(&tmp_rdc); 2675 update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); 2676 encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); 2677 pc_tree->horizontal[1].skip_ref_frame_mask = 0; 2678 rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col, 2679 &tmp_rdc, subsize, &pc_tree->horizontal[1], INT64_MAX); 2680 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { 2681 vp9_rd_cost_reset(&last_part_rdc); 2682 break; 2683 } 2684 last_part_rdc.rate += tmp_rdc.rate; 2685 last_part_rdc.dist += tmp_rdc.dist; 2686 last_part_rdc.rdcost += tmp_rdc.rdcost; 2687 } 2688 break; 2689 case PARTITION_VERT: 2690 pc_tree->vertical[0].skip_ref_frame_mask = 0; 2691 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, 2692 subsize, &pc_tree->vertical[0], INT64_MAX); 2693 if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && 2694 mi_col + (mi_step >> 1) < cm->mi_cols) { 2695 RD_COST tmp_rdc; 2696 PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0]; 2697 vp9_rd_cost_init(&tmp_rdc); 2698 update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); 2699 encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); 2700 pc_tree->vertical[bsize > BLOCK_8X8].skip_ref_frame_mask = 0; 2701 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1), 2702 &tmp_rdc, subsize, 2703 &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX); 2704 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { 2705 vp9_rd_cost_reset(&last_part_rdc); 2706 break; 2707 } 2708 last_part_rdc.rate += tmp_rdc.rate; 2709 last_part_rdc.dist += tmp_rdc.dist; 2710 last_part_rdc.rdcost += tmp_rdc.rdcost; 2711 } 2712 break; 2713 default: 2714 assert(partition == PARTITION_SPLIT); 2715 if (bsize == BLOCK_8X8) { 2716 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, 2717 subsize, pc_tree->leaf_split[0], INT64_MAX); 2718 break; 2719 } 2720 last_part_rdc.rate = 0; 2721 last_part_rdc.dist = 0; 2722 last_part_rdc.rdcost = 0; 2723 for (i = 0; i < 4; i++) { 2724 int x_idx = (i & 1) * (mi_step >> 1); 2725 int y_idx = (i >> 1) * (mi_step >> 1); 2726 int jj = i >> 1, ii = i & 0x01; 2727 RD_COST tmp_rdc; 2728 if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) 2729 continue; 2730 2731 vp9_rd_cost_init(&tmp_rdc); 2732 rd_use_partition(cpi, td, tile_data, mi_8x8 + jj * bss * mis + ii * bss, 2733 tp, mi_row + y_idx, mi_col + x_idx, subsize, 2734 &tmp_rdc.rate, &tmp_rdc.dist, i != 3, 2735 pc_tree->split[i]); 2736 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { 2737 vp9_rd_cost_reset(&last_part_rdc); 2738 break; 2739 } 2740 last_part_rdc.rate += tmp_rdc.rate; 2741 last_part_rdc.dist += tmp_rdc.dist; 2742 } 2743 break; 2744 } 2745 2746 pl = partition_plane_context(xd, mi_row, mi_col, bsize); 2747 if (last_part_rdc.rate < INT_MAX) { 2748 last_part_rdc.rate += cpi->partition_cost[pl][partition]; 2749 last_part_rdc.rdcost = 2750 RDCOST(x->rdmult, x->rddiv, last_part_rdc.rate, last_part_rdc.dist); 2751 } 2752 2753 if (do_partition_search && cpi->sf.adjust_partitioning_from_last_frame && 2754 cpi->sf.partition_search_type == SEARCH_PARTITION && 2755 partition != PARTITION_SPLIT && bsize > BLOCK_8X8 && 2756 (mi_row + mi_step < cm->mi_rows || 2757 mi_row + (mi_step >> 1) == cm->mi_rows) && 2758 (mi_col + mi_step < cm->mi_cols || 2759 mi_col + (mi_step >> 1) == cm->mi_cols)) { 2760 BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT); 2761 chosen_rdc.rate = 0; 2762 chosen_rdc.dist = 0; 2763 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 2764 pc_tree->partitioning = PARTITION_SPLIT; 2765 2766 // Split partition. 2767 for (i = 0; i < 4; i++) { 2768 int x_idx = (i & 1) * (mi_step >> 1); 2769 int y_idx = (i >> 1) * (mi_step >> 1); 2770 RD_COST tmp_rdc; 2771 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; 2772 PARTITION_CONTEXT sl[8], sa[8]; 2773 2774 if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) 2775 continue; 2776 2777 save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 2778 pc_tree->split[i]->partitioning = PARTITION_NONE; 2779 rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, 2780 &tmp_rdc, split_subsize, &pc_tree->split[i]->none, 2781 INT64_MAX); 2782 2783 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 2784 2785 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { 2786 vp9_rd_cost_reset(&chosen_rdc); 2787 break; 2788 } 2789 2790 chosen_rdc.rate += tmp_rdc.rate; 2791 chosen_rdc.dist += tmp_rdc.dist; 2792 2793 if (i != 3) 2794 encode_sb(cpi, td, tile_info, tp, mi_row + y_idx, mi_col + x_idx, 0, 2795 split_subsize, pc_tree->split[i]); 2796 2797 pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx, 2798 split_subsize); 2799 chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; 2800 } 2801 pl = partition_plane_context(xd, mi_row, mi_col, bsize); 2802 if (chosen_rdc.rate < INT_MAX) { 2803 chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; 2804 chosen_rdc.rdcost = 2805 RDCOST(x->rdmult, x->rddiv, chosen_rdc.rate, chosen_rdc.dist); 2806 } 2807 } 2808 2809 // If last_part is better set the partitioning to that. 2810 if (last_part_rdc.rdcost < chosen_rdc.rdcost) { 2811 mi_8x8[0]->sb_type = bsize; 2812 if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition; 2813 chosen_rdc = last_part_rdc; 2814 } 2815 // If none was better set the partitioning to that. 2816 if (none_rdc.rdcost < chosen_rdc.rdcost) { 2817 if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; 2818 chosen_rdc = none_rdc; 2819 } 2820 2821 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 2822 2823 // We must have chosen a partitioning and encoding or we'll fail later on. 2824 // No other opportunities for success. 2825 if (bsize == BLOCK_64X64) 2826 assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX); 2827 2828 if (do_recon) { 2829 int output_enabled = (bsize == BLOCK_64X64); 2830 encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, 2831 pc_tree); 2832 } 2833 2834 *rate = chosen_rdc.rate; 2835 *dist = chosen_rdc.dist; 2836 } 2837 2838 static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = { 2839 BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, 2840 BLOCK_4X4, BLOCK_8X8, BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, 2841 BLOCK_16X16, BLOCK_16X16, BLOCK_16X16 2842 }; 2843 2844 static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = { 2845 BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, 2846 BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, 2847 BLOCK_64X64, BLOCK_64X64, BLOCK_64X64 2848 }; 2849 2850 // Look at all the mode_info entries for blocks that are part of this 2851 // partition and find the min and max values for sb_type. 2852 // At the moment this is designed to work on a 64x64 SB but could be 2853 // adjusted to use a size parameter. 2854 // 2855 // The min and max are assumed to have been initialized prior to calling this 2856 // function so repeat calls can accumulate a min and max of more than one sb64. 2857 static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8, 2858 BLOCK_SIZE *min_block_size, 2859 BLOCK_SIZE *max_block_size, 2860 int bs_hist[BLOCK_SIZES]) { 2861 int sb_width_in_blocks = MI_BLOCK_SIZE; 2862 int sb_height_in_blocks = MI_BLOCK_SIZE; 2863 int i, j; 2864 int index = 0; 2865 2866 // Check the sb_type for each block that belongs to this region. 2867 for (i = 0; i < sb_height_in_blocks; ++i) { 2868 for (j = 0; j < sb_width_in_blocks; ++j) { 2869 MODE_INFO *mi = mi_8x8[index + j]; 2870 BLOCK_SIZE sb_type = mi ? mi->sb_type : 0; 2871 bs_hist[sb_type]++; 2872 *min_block_size = VPXMIN(*min_block_size, sb_type); 2873 *max_block_size = VPXMAX(*max_block_size, sb_type); 2874 } 2875 index += xd->mi_stride; 2876 } 2877 } 2878 2879 // Next square block size less or equal than current block size. 2880 static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = { 2881 BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8, 2882 BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, 2883 BLOCK_32X32, BLOCK_32X32, BLOCK_64X64 2884 }; 2885 2886 // Look at neighboring blocks and set a min and max partition size based on 2887 // what they chose. 2888 static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, 2889 MACROBLOCKD *const xd, int mi_row, 2890 int mi_col, BLOCK_SIZE *min_block_size, 2891 BLOCK_SIZE *max_block_size) { 2892 VP9_COMMON *const cm = &cpi->common; 2893 MODE_INFO **mi = xd->mi; 2894 const int left_in_image = !!xd->left_mi; 2895 const int above_in_image = !!xd->above_mi; 2896 const int row8x8_remaining = tile->mi_row_end - mi_row; 2897 const int col8x8_remaining = tile->mi_col_end - mi_col; 2898 int bh, bw; 2899 BLOCK_SIZE min_size = BLOCK_4X4; 2900 BLOCK_SIZE max_size = BLOCK_64X64; 2901 int bs_hist[BLOCK_SIZES] = { 0 }; 2902 2903 // Trap case where we do not have a prediction. 2904 if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) { 2905 // Default "min to max" and "max to min" 2906 min_size = BLOCK_64X64; 2907 max_size = BLOCK_4X4; 2908 2909 // NOTE: each call to get_sb_partition_size_range() uses the previous 2910 // passed in values for min and max as a starting point. 2911 // Find the min and max partition used in previous frame at this location 2912 if (cm->frame_type != KEY_FRAME) { 2913 MODE_INFO **prev_mi = 2914 &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col]; 2915 get_sb_partition_size_range(xd, prev_mi, &min_size, &max_size, bs_hist); 2916 } 2917 // Find the min and max partition sizes used in the left SB64 2918 if (left_in_image) { 2919 MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE]; 2920 get_sb_partition_size_range(xd, left_sb64_mi, &min_size, &max_size, 2921 bs_hist); 2922 } 2923 // Find the min and max partition sizes used in the above SB64. 2924 if (above_in_image) { 2925 MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE]; 2926 get_sb_partition_size_range(xd, above_sb64_mi, &min_size, &max_size, 2927 bs_hist); 2928 } 2929 2930 // Adjust observed min and max for "relaxed" auto partition case. 2931 if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) { 2932 min_size = min_partition_size[min_size]; 2933 max_size = max_partition_size[max_size]; 2934 } 2935 } 2936 2937 // Check border cases where max and min from neighbors may not be legal. 2938 max_size = find_partition_size(max_size, row8x8_remaining, col8x8_remaining, 2939 &bh, &bw); 2940 // Test for blocks at the edge of the active image. 2941 // This may be the actual edge of the image or where there are formatting 2942 // bars. 2943 if (vp9_active_edge_sb(cpi, mi_row, mi_col)) { 2944 min_size = BLOCK_4X4; 2945 } else { 2946 min_size = 2947 VPXMIN(cpi->sf.rd_auto_partition_min_limit, VPXMIN(min_size, max_size)); 2948 } 2949 2950 // When use_square_partition_only is true, make sure at least one square 2951 // partition is allowed by selecting the next smaller square size as 2952 // *min_block_size. 2953 if (cpi->sf.use_square_partition_only && 2954 next_square_size[max_size] < min_size) { 2955 min_size = next_square_size[max_size]; 2956 } 2957 2958 *min_block_size = min_size; 2959 *max_block_size = max_size; 2960 } 2961 2962 // TODO(jingning) refactor functions setting partition search range 2963 static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, int mi_row, 2964 int mi_col, BLOCK_SIZE bsize, 2965 BLOCK_SIZE *min_bs, BLOCK_SIZE *max_bs) { 2966 int mi_width = num_8x8_blocks_wide_lookup[bsize]; 2967 int mi_height = num_8x8_blocks_high_lookup[bsize]; 2968 int idx, idy; 2969 2970 MODE_INFO *mi; 2971 const int idx_str = cm->mi_stride * mi_row + mi_col; 2972 MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str]; 2973 BLOCK_SIZE bs, min_size, max_size; 2974 2975 min_size = BLOCK_64X64; 2976 max_size = BLOCK_4X4; 2977 2978 if (prev_mi) { 2979 for (idy = 0; idy < mi_height; ++idy) { 2980 for (idx = 0; idx < mi_width; ++idx) { 2981 mi = prev_mi[idy * cm->mi_stride + idx]; 2982 bs = mi ? mi->sb_type : bsize; 2983 min_size = VPXMIN(min_size, bs); 2984 max_size = VPXMAX(max_size, bs); 2985 } 2986 } 2987 } 2988 2989 if (xd->left_mi) { 2990 for (idy = 0; idy < mi_height; ++idy) { 2991 mi = xd->mi[idy * cm->mi_stride - 1]; 2992 bs = mi ? mi->sb_type : bsize; 2993 min_size = VPXMIN(min_size, bs); 2994 max_size = VPXMAX(max_size, bs); 2995 } 2996 } 2997 2998 if (xd->above_mi) { 2999 for (idx = 0; idx < mi_width; ++idx) { 3000 mi = xd->mi[idx - cm->mi_stride]; 3001 bs = mi ? mi->sb_type : bsize; 3002 min_size = VPXMIN(min_size, bs); 3003 max_size = VPXMAX(max_size, bs); 3004 } 3005 } 3006 3007 if (min_size == max_size) { 3008 min_size = min_partition_size[min_size]; 3009 max_size = max_partition_size[max_size]; 3010 } 3011 3012 *min_bs = min_size; 3013 *max_bs = max_size; 3014 } 3015 3016 static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { 3017 memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv)); 3018 } 3019 3020 static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { 3021 memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv)); 3022 } 3023 3024 #if CONFIG_FP_MB_STATS 3025 const int num_16x16_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1, 3026 1, 2, 2, 2, 4, 4 }; 3027 const int num_16x16_blocks_high_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1, 3028 2, 1, 2, 4, 2, 4 }; 3029 const int qindex_skip_threshold_lookup[BLOCK_SIZES] = { 3030 0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120 3031 }; 3032 const int qindex_split_threshold_lookup[BLOCK_SIZES] = { 3033 0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120 3034 }; 3035 const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = { 3036 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6 3037 }; 3038 3039 typedef enum { 3040 MV_ZERO = 0, 3041 MV_LEFT = 1, 3042 MV_UP = 2, 3043 MV_RIGHT = 3, 3044 MV_DOWN = 4, 3045 MV_INVALID 3046 } MOTION_DIRECTION; 3047 3048 static INLINE MOTION_DIRECTION get_motion_direction_fp(uint8_t fp_byte) { 3049 if (fp_byte & FPMB_MOTION_ZERO_MASK) { 3050 return MV_ZERO; 3051 } else if (fp_byte & FPMB_MOTION_LEFT_MASK) { 3052 return MV_LEFT; 3053 } else if (fp_byte & FPMB_MOTION_RIGHT_MASK) { 3054 return MV_RIGHT; 3055 } else if (fp_byte & FPMB_MOTION_UP_MASK) { 3056 return MV_UP; 3057 } else { 3058 return MV_DOWN; 3059 } 3060 } 3061 3062 static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv, 3063 MOTION_DIRECTION that_mv) { 3064 if (this_mv == that_mv) { 3065 return 0; 3066 } else { 3067 return abs(this_mv - that_mv) == 2 ? 2 : 1; 3068 } 3069 } 3070 #endif 3071 3072 // Calculate prediction based on the given input features and neural net config. 3073 // Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden 3074 // layer. 3075 static void nn_predict(const float *features, const NN_CONFIG *nn_config, 3076 float *output) { 3077 int num_input_nodes = nn_config->num_inputs; 3078 int buf_index = 0; 3079 float buf[2][NN_MAX_NODES_PER_LAYER]; 3080 const float *input_nodes = features; 3081 3082 // Propagate hidden layers. 3083 const int num_layers = nn_config->num_hidden_layers; 3084 int layer, node, i; 3085 assert(num_layers <= NN_MAX_HIDDEN_LAYERS); 3086 for (layer = 0; layer < num_layers; ++layer) { 3087 const float *weights = nn_config->weights[layer]; 3088 const float *bias = nn_config->bias[layer]; 3089 float *output_nodes = buf[buf_index]; 3090 const int num_output_nodes = nn_config->num_hidden_nodes[layer]; 3091 assert(num_output_nodes < NN_MAX_NODES_PER_LAYER); 3092 for (node = 0; node < num_output_nodes; ++node) { 3093 float val = 0.0f; 3094 for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i]; 3095 val += bias[node]; 3096 // ReLU as activation function. 3097 val = VPXMAX(val, 0.0f); 3098 output_nodes[node] = val; 3099 weights += num_input_nodes; 3100 } 3101 num_input_nodes = num_output_nodes; 3102 input_nodes = output_nodes; 3103 buf_index = 1 - buf_index; 3104 } 3105 3106 // Final output layer. 3107 { 3108 const float *weights = nn_config->weights[num_layers]; 3109 for (node = 0; node < nn_config->num_outputs; ++node) { 3110 const float *bias = nn_config->bias[num_layers]; 3111 float val = 0.0f; 3112 for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i]; 3113 output[node] = val + bias[node]; 3114 weights += num_input_nodes; 3115 } 3116 } 3117 } 3118 3119 #define FEATURES 7 3120 // Machine-learning based partition search early termination. 3121 // Return 1 to skip split and rect partitions. 3122 static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd, 3123 PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, 3124 BLOCK_SIZE bsize) { 3125 const int mag_mv = 3126 abs(ctx->mic.mv[0].as_mv.col) + abs(ctx->mic.mv[0].as_mv.row); 3127 const int left_in_image = !!xd->left_mi; 3128 const int above_in_image = !!xd->above_mi; 3129 MODE_INFO **prev_mi = 3130 &cm->prev_mi_grid_visible[mi_col + cm->mi_stride * mi_row]; 3131 int above_par = 0; // above_partitioning 3132 int left_par = 0; // left_partitioning 3133 int last_par = 0; // last_partitioning 3134 int offset = 0; 3135 int i; 3136 BLOCK_SIZE context_size; 3137 const NN_CONFIG *nn_config = NULL; 3138 const float *mean, *sd, *linear_weights; 3139 float nn_score, linear_score; 3140 float features[FEATURES]; 3141 3142 assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]); 3143 vpx_clear_system_state(); 3144 3145 switch (bsize) { 3146 case BLOCK_64X64: 3147 offset = 0; 3148 nn_config = &vp9_partition_nnconfig_64x64; 3149 break; 3150 case BLOCK_32X32: 3151 offset = 8; 3152 nn_config = &vp9_partition_nnconfig_32x32; 3153 break; 3154 case BLOCK_16X16: 3155 offset = 16; 3156 nn_config = &vp9_partition_nnconfig_16x16; 3157 break; 3158 default: assert(0 && "Unexpected block size."); return 0; 3159 } 3160 3161 if (above_in_image) { 3162 context_size = xd->above_mi->sb_type; 3163 if (context_size < bsize) 3164 above_par = 2; 3165 else if (context_size == bsize) 3166 above_par = 1; 3167 } 3168 3169 if (left_in_image) { 3170 context_size = xd->left_mi->sb_type; 3171 if (context_size < bsize) 3172 left_par = 2; 3173 else if (context_size == bsize) 3174 left_par = 1; 3175 } 3176 3177 if (prev_mi) { 3178 context_size = prev_mi[0]->sb_type; 3179 if (context_size < bsize) 3180 last_par = 2; 3181 else if (context_size == bsize) 3182 last_par = 1; 3183 } 3184 3185 mean = &vp9_partition_feature_mean[offset]; 3186 sd = &vp9_partition_feature_std[offset]; 3187 features[0] = ((float)ctx->rate - mean[0]) / sd[0]; 3188 features[1] = ((float)ctx->dist - mean[1]) / sd[1]; 3189 features[2] = ((float)mag_mv / 2 - mean[2]) * sd[2]; 3190 features[3] = ((float)(left_par + above_par) / 2 - mean[3]) * sd[3]; 3191 features[4] = ((float)ctx->sum_y_eobs - mean[4]) / sd[4]; 3192 features[5] = ((float)cm->base_qindex - mean[5]) * sd[5]; 3193 features[6] = ((float)last_par - mean[6]) * sd[6]; 3194 3195 // Predict using linear model. 3196 linear_weights = &vp9_partition_linear_weights[offset]; 3197 linear_score = linear_weights[FEATURES]; 3198 for (i = 0; i < FEATURES; ++i) 3199 linear_score += linear_weights[i] * features[i]; 3200 if (linear_score > 0.1f) return 0; 3201 3202 // Predict using neural net model. 3203 nn_predict(features, nn_config, &nn_score); 3204 3205 if (linear_score < -0.0f && nn_score < 0.1f) return 1; 3206 if (nn_score < -0.0f && linear_score < 0.1f) return 1; 3207 return 0; 3208 } 3209 #undef FEATURES 3210 3211 #define FEATURES 4 3212 // ML-based partition search breakout. 3213 static int ml_predict_breakout(VP9_COMP *const cpi, BLOCK_SIZE bsize, 3214 const MACROBLOCK *const x, 3215 const RD_COST *const rd_cost) { 3216 DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = { 0 }; 3217 const VP9_COMMON *const cm = &cpi->common; 3218 float features[FEATURES]; 3219 const float *linear_weights = NULL; // Linear model weights. 3220 float linear_score = 0.0f; 3221 const int qindex = cm->base_qindex; 3222 const int q_ctx = qindex >= 200 ? 0 : (qindex >= 150 ? 1 : 2); 3223 const int is_720p_or_larger = VPXMIN(cm->width, cm->height) >= 720; 3224 const int resolution_ctx = is_720p_or_larger ? 1 : 0; 3225 3226 switch (bsize) { 3227 case BLOCK_64X64: 3228 linear_weights = vp9_partition_breakout_weights_64[resolution_ctx][q_ctx]; 3229 break; 3230 case BLOCK_32X32: 3231 linear_weights = vp9_partition_breakout_weights_32[resolution_ctx][q_ctx]; 3232 break; 3233 case BLOCK_16X16: 3234 linear_weights = vp9_partition_breakout_weights_16[resolution_ctx][q_ctx]; 3235 break; 3236 case BLOCK_8X8: 3237 linear_weights = vp9_partition_breakout_weights_8[resolution_ctx][q_ctx]; 3238 break; 3239 default: assert(0 && "Unexpected block size."); return 0; 3240 } 3241 if (!linear_weights) return 0; 3242 3243 { // Generate feature values. 3244 #if CONFIG_VP9_HIGHBITDEPTH 3245 const int ac_q = 3246 vp9_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8); 3247 #else 3248 const int ac_q = vp9_ac_quant(qindex, 0, cm->bit_depth); 3249 #endif // CONFIG_VP9_HIGHBITDEPTH 3250 const int num_pels_log2 = num_pels_log2_lookup[bsize]; 3251 int feature_index = 0; 3252 unsigned int var, sse; 3253 float rate_f, dist_f; 3254 3255 #if CONFIG_VP9_HIGHBITDEPTH 3256 if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 3257 var = 3258 vp9_high_get_sby_variance(cpi, &x->plane[0].src, bsize, x->e_mbd.bd); 3259 } else { 3260 var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride, 3261 vp9_64_zeros, 0, &sse); 3262 } 3263 #else 3264 var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride, 3265 vp9_64_zeros, 0, &sse); 3266 #endif 3267 var = var >> num_pels_log2; 3268 3269 vpx_clear_system_state(); 3270 3271 rate_f = (float)VPXMIN(rd_cost->rate, INT_MAX); 3272 dist_f = (float)(VPXMIN(rd_cost->dist, INT_MAX) >> num_pels_log2); 3273 rate_f = 3274 ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) * 3275 rate_f; 3276 3277 features[feature_index++] = rate_f; 3278 features[feature_index++] = dist_f; 3279 features[feature_index++] = (float)var; 3280 features[feature_index++] = (float)ac_q; 3281 assert(feature_index == FEATURES); 3282 } 3283 3284 { // Calculate the output score. 3285 int i; 3286 linear_score = linear_weights[FEATURES]; 3287 for (i = 0; i < FEATURES; ++i) 3288 linear_score += linear_weights[i] * features[i]; 3289 } 3290 3291 return linear_score >= cpi->sf.ml_partition_search_breakout_thresh[q_ctx]; 3292 } 3293 #undef FEATURES 3294 3295 #define FEATURES 17 3296 #define LABELS 4 3297 static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x, 3298 BLOCK_SIZE bsize, 3299 const PC_TREE *const pc_tree, 3300 int *allow_horz, int *allow_vert, 3301 int64_t ref_rd, int mi_row, int mi_col) { 3302 const NN_CONFIG *nn_config = NULL; 3303 float score[LABELS] = { 3304 0.0f, 3305 }; 3306 int thresh = -1; 3307 int i; 3308 3309 if (ref_rd <= 0 || ref_rd > 1000000000) return; 3310 3311 switch (bsize) { 3312 case BLOCK_8X8: break; 3313 case BLOCK_16X16: 3314 nn_config = &vp9_rect_part_nnconfig_16; 3315 thresh = cpi->sf.ml_prune_rect_partition_threhold[1]; 3316 break; 3317 case BLOCK_32X32: 3318 nn_config = &vp9_rect_part_nnconfig_32; 3319 thresh = cpi->sf.ml_prune_rect_partition_threhold[2]; 3320 break; 3321 case BLOCK_64X64: 3322 nn_config = &vp9_rect_part_nnconfig_64; 3323 thresh = cpi->sf.ml_prune_rect_partition_threhold[3]; 3324 break; 3325 default: assert(0 && "Unexpected block size."); return; 3326 } 3327 if (!nn_config || thresh < 0) return; 3328 3329 // Feature extraction and model score calculation. 3330 { 3331 const int64_t none_rdcost = pc_tree->none.rdcost; 3332 const VP9_COMMON *const cm = &cpi->common; 3333 #if CONFIG_VP9_HIGHBITDEPTH 3334 const int dc_q = 3335 vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8); 3336 #else 3337 const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth); 3338 #endif // CONFIG_VP9_HIGHBITDEPTH 3339 int feature_index = 0; 3340 unsigned int block_var = 0; 3341 unsigned int sub_block_var[4] = { 0 }; 3342 float features[FEATURES]; 3343 3344 features[feature_index++] = 3345 (float)(pc_tree->partitioning == PARTITION_NONE); 3346 features[feature_index++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f); 3347 3348 // Calculate source pixel variance. 3349 { 3350 struct buf_2d buf; 3351 const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); 3352 const int bs = 4 * num_4x4_blocks_wide_lookup[bsize]; 3353 const MACROBLOCKD *const xd = &x->e_mbd; 3354 vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); 3355 3356 (void)xd; 3357 #if CONFIG_VP9_HIGHBITDEPTH 3358 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 3359 block_var = vp9_high_get_sby_perpixel_variance(cpi, &x->plane[0].src, 3360 bsize, xd->bd); 3361 } else { 3362 block_var = vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); 3363 } 3364 #else 3365 block_var = vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); 3366 #endif // CONFIG_VP9_HIGHBITDEPTH 3367 3368 buf.stride = x->plane[0].src.stride; 3369 for (i = 0; i < 4; ++i) { 3370 const int x_idx = (i & 1) * bs / 2; 3371 const int y_idx = (i >> 1) * bs / 2; 3372 buf.buf = x->plane[0].src.buf + x_idx + y_idx * buf.stride; 3373 #if CONFIG_VP9_HIGHBITDEPTH 3374 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 3375 sub_block_var[i] = 3376 vp9_high_get_sby_perpixel_variance(cpi, &buf, subsize, xd->bd); 3377 } else { 3378 sub_block_var[i] = vp9_get_sby_perpixel_variance(cpi, &buf, subsize); 3379 } 3380 #else 3381 sub_block_var[i] = vp9_get_sby_perpixel_variance(cpi, &buf, subsize); 3382 #endif // CONFIG_VP9_HIGHBITDEPTH 3383 } 3384 } 3385 3386 features[feature_index++] = logf((float)block_var + 1.0f); 3387 features[feature_index++] = logf((float)ref_rd + 1.0f); 3388 features[feature_index++] = (none_rdcost > 0 && none_rdcost < 1000000000) 3389 ? (float)pc_tree->none.skippable 3390 : 0.0f; 3391 3392 for (i = 0; i < 4; ++i) { 3393 const int64_t this_rd = pc_tree->split[i]->none.rdcost; 3394 const int rd_valid = this_rd > 0 && this_rd < 1000000000; 3395 // Ratio between sub-block RD and whole block RD. 3396 features[feature_index++] = 3397 rd_valid ? ((float)this_rd / (float)ref_rd) : 1.0f; 3398 // Sub-block skippable. 3399 features[feature_index++] = 3400 rd_valid ? ((float)pc_tree->split[i]->none.skippable) : 0.0f; 3401 } 3402 3403 { 3404 const float denom = (float)(block_var + 1); 3405 const float low_b = 0.1f; 3406 const float high_b = 10.0f; 3407 for (i = 0; i < 4; ++i) { 3408 // Ratio between the quarter sub-block variance and the 3409 // whole-block variance. 3410 float var_ratio = (float)(sub_block_var[i] + 1) / denom; 3411 if (var_ratio < low_b) var_ratio = low_b; 3412 if (var_ratio > high_b) var_ratio = high_b; 3413 features[feature_index++] = var_ratio; 3414 } 3415 } 3416 assert(feature_index == FEATURES); 3417 nn_predict(features, nn_config, score); 3418 } 3419 3420 // Make decisions based on the model score. 3421 { 3422 int max_score = -1000; 3423 int horz = 0, vert = 0; 3424 int int_score[LABELS]; 3425 for (i = 0; i < LABELS; ++i) { 3426 int_score[i] = (int)(100 * score[i]); 3427 max_score = VPXMAX(int_score[i], max_score); 3428 } 3429 thresh = max_score - thresh; 3430 for (i = 0; i < LABELS; ++i) { 3431 if (int_score[i] >= thresh) { 3432 if ((i >> 0) & 1) horz = 1; 3433 if ((i >> 1) & 1) vert = 1; 3434 } 3435 } 3436 *allow_horz = *allow_horz && horz; 3437 *allow_vert = *allow_vert && vert; 3438 } 3439 } 3440 #undef FEATURES 3441 #undef LABELS 3442 3443 // Use a neural net model to prune partition-none and partition-split search. 3444 // The model uses prediction residue variance and quantization step size as 3445 // input features. 3446 #define FEATURES 6 3447 static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x, 3448 BLOCK_SIZE bsize, int mi_row, 3449 int mi_col, int *none, int *split) { 3450 VP9_COMMON *const cm = &cpi->common; 3451 MACROBLOCKD *xd = &x->e_mbd; 3452 MODE_INFO *mi = xd->mi[0]; 3453 const NN_CONFIG *nn_config = NULL; 3454 #if CONFIG_VP9_HIGHBITDEPTH 3455 DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64 * 2]); 3456 uint8_t *const pred_buf = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) 3457 ? (CONVERT_TO_BYTEPTR(pred_buffer)) 3458 : pred_buffer; 3459 #else 3460 DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64]); 3461 uint8_t *const pred_buf = pred_buffer; 3462 #endif // CONFIG_VP9_HIGHBITDEPTH 3463 const int speed = cpi->oxcf.speed; 3464 int i; 3465 float thresh = 0.0f; 3466 3467 switch (bsize) { 3468 case BLOCK_64X64: 3469 nn_config = &vp9_var_rd_part_nnconfig_64; 3470 thresh = speed > 0 ? 3.5f : 3.0f; 3471 break; 3472 case BLOCK_32X32: 3473 nn_config = &vp9_var_rd_part_nnconfig_32; 3474 thresh = speed > 0 ? 3.5f : 3.0f; 3475 break; 3476 case BLOCK_16X16: 3477 nn_config = &vp9_var_rd_part_nnconfig_16; 3478 thresh = speed > 0 ? 3.5f : 4.0f; 3479 break; 3480 case BLOCK_8X8: 3481 nn_config = &vp9_var_rd_part_nnconfig_8; 3482 if (cm->width >= 720 && cm->height >= 720) 3483 thresh = speed > 0 ? 2.5f : 2.0f; 3484 else 3485 thresh = speed > 0 ? 3.5f : 2.0f; 3486 break; 3487 default: assert(0 && "Unexpected block size."); return; 3488 } 3489 3490 if (!nn_config) return; 3491 3492 mi->ref_frame[1] = NONE; 3493 mi->sb_type = bsize; 3494 // Do a simple single motion search to find a prediction for current block. 3495 // The variance of the residue will be used as input features. 3496 { 3497 const MV_REFERENCE_FRAME ref = 3498 cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME; 3499 YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref); 3500 MV ref_mv = { 0, 0 }; 3501 MV ref_mv_full = { 0, 0 }; 3502 const int step_param = 1; 3503 const MvLimits tmp_mv_limits = x->mv_limits; 3504 const SEARCH_METHODS search_method = NSTEP; 3505 const int sadpb = x->sadperbit16; 3506 MV best_mv = { 0, 0 }; 3507 int cost_list[5]; 3508 3509 assert(yv12 != NULL); 3510 if (!yv12) return; 3511 vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, 3512 &cm->frame_refs[ref - 1].sf); 3513 mi->ref_frame[0] = ref; 3514 vp9_set_mv_search_range(&x->mv_limits, &ref_mv); 3515 vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param, 3516 search_method, sadpb, cond_cost_list(cpi, cost_list), 3517 &ref_mv, &best_mv, 0, 0); 3518 best_mv.row *= 8; 3519 best_mv.col *= 8; 3520 x->mv_limits = tmp_mv_limits; 3521 mi->mv[0].as_mv = best_mv; 3522 3523 set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); 3524 xd->plane[0].dst.buf = pred_buf; 3525 xd->plane[0].dst.stride = 64; 3526 vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); 3527 } 3528 3529 vpx_clear_system_state(); 3530 3531 { 3532 float features[FEATURES] = { 0.0f }; 3533 #if CONFIG_VP9_HIGHBITDEPTH 3534 const int dc_q = 3535 vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (xd->bd - 8); 3536 #else 3537 const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth); 3538 #endif // CONFIG_VP9_HIGHBITDEPTH 3539 int feature_idx = 0; 3540 float score; 3541 3542 // Generate model input features. 3543 features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f); 3544 vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); 3545 // Get the variance of the residue as input features. 3546 { 3547 const int bs = 4 * num_4x4_blocks_wide_lookup[bsize]; 3548 const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); 3549 const uint8_t *pred = pred_buf; 3550 const uint8_t *src = x->plane[0].src.buf; 3551 const int src_stride = x->plane[0].src.stride; 3552 const int pred_stride = 64; 3553 unsigned int sse; 3554 // Variance of whole block. 3555 const unsigned int var = 3556 cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse); 3557 const float factor = (var == 0) ? 1.0f : (1.0f / (float)var); 3558 3559 features[feature_idx++] = logf((float)var + 1.0f); 3560 for (i = 0; i < 4; ++i) { 3561 const int x_idx = (i & 1) * bs / 2; 3562 const int y_idx = (i >> 1) * bs / 2; 3563 const int src_offset = y_idx * src_stride + x_idx; 3564 const int pred_offset = y_idx * pred_stride + x_idx; 3565 // Variance of quarter block. 3566 const unsigned int sub_var = 3567 cpi->fn_ptr[subsize].vf(src + src_offset, src_stride, 3568 pred + pred_offset, pred_stride, &sse); 3569 const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var; 3570 features[feature_idx++] = var_ratio; 3571 } 3572 } 3573 assert(feature_idx == FEATURES); 3574 3575 // Feed the features into the model to get the confidence score. 3576 nn_predict(features, nn_config, &score); 3577 3578 // Higher score means that the model has higher confidence that the split 3579 // partition is better than the non-split partition. So if the score is 3580 // high enough, we skip the none-split partition search; if the score is 3581 // low enough, we skip the split partition search. 3582 if (score > thresh) *none = 0; 3583 if (score < -thresh) *split = 0; 3584 } 3585 } 3586 #undef FEATURES 3587 #undef LABELS 3588 3589 static int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, 3590 int mi_col, int orig_rdmult) { 3591 const int gf_group_index = cpi->twopass.gf_group.index; 3592 TplDepFrame *tpl_frame = &cpi->tpl_stats[gf_group_index]; 3593 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; 3594 int tpl_stride = tpl_frame->stride; 3595 int64_t intra_cost = 0; 3596 int64_t mc_dep_cost = 0; 3597 int mi_wide = num_8x8_blocks_wide_lookup[bsize]; 3598 int mi_high = num_8x8_blocks_high_lookup[bsize]; 3599 int row, col; 3600 3601 int dr = 0; 3602 int count = 0; 3603 double r0, rk, beta; 3604 3605 if (tpl_frame->is_valid == 0) return orig_rdmult; 3606 3607 if (cpi->twopass.gf_group.layer_depth[gf_group_index] > 1) return orig_rdmult; 3608 3609 if (gf_group_index >= MAX_ARF_GOP_SIZE) return orig_rdmult; 3610 3611 for (row = mi_row; row < mi_row + mi_high; ++row) { 3612 for (col = mi_col; col < mi_col + mi_wide; ++col) { 3613 TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col]; 3614 3615 if (row >= cpi->common.mi_rows || col >= cpi->common.mi_cols) continue; 3616 3617 intra_cost += this_stats->intra_cost; 3618 mc_dep_cost += this_stats->mc_dep_cost; 3619 3620 ++count; 3621 } 3622 } 3623 3624 vpx_clear_system_state(); 3625 3626 r0 = cpi->rd.r0; 3627 rk = (double)intra_cost / mc_dep_cost; 3628 beta = r0 / rk; 3629 dr = vp9_get_adaptive_rdmult(cpi, beta); 3630 3631 dr = VPXMIN(dr, orig_rdmult * 3 / 2); 3632 dr = VPXMAX(dr, orig_rdmult * 1 / 2); 3633 3634 dr = VPXMAX(1, dr); 3635 3636 return dr; 3637 } 3638 3639 // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are 3640 // unlikely to be selected depending on previous rate-distortion optimization 3641 // results, for encoding speed-up. 3642 static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, 3643 TileDataEnc *tile_data, TOKENEXTRA **tp, 3644 int mi_row, int mi_col, BLOCK_SIZE bsize, 3645 RD_COST *rd_cost, int64_t best_rd, 3646 PC_TREE *pc_tree) { 3647 VP9_COMMON *const cm = &cpi->common; 3648 TileInfo *const tile_info = &tile_data->tile_info; 3649 MACROBLOCK *const x = &td->mb; 3650 MACROBLOCKD *const xd = &x->e_mbd; 3651 const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2; 3652 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; 3653 PARTITION_CONTEXT sl[8], sa[8]; 3654 TOKENEXTRA *tp_orig = *tp; 3655 PICK_MODE_CONTEXT *const ctx = &pc_tree->none; 3656 int i; 3657 const int pl = partition_plane_context(xd, mi_row, mi_col, bsize); 3658 BLOCK_SIZE subsize; 3659 RD_COST this_rdc, sum_rdc, best_rdc; 3660 int do_split = bsize >= BLOCK_8X8; 3661 int do_rect = 1; 3662 INTERP_FILTER pred_interp_filter; 3663 3664 // Override skipping rectangular partition operations for edge blocks 3665 const int force_horz_split = (mi_row + mi_step >= cm->mi_rows); 3666 const int force_vert_split = (mi_col + mi_step >= cm->mi_cols); 3667 const int xss = x->e_mbd.plane[1].subsampling_x; 3668 const int yss = x->e_mbd.plane[1].subsampling_y; 3669 3670 BLOCK_SIZE min_size = x->min_partition_size; 3671 BLOCK_SIZE max_size = x->max_partition_size; 3672 3673 #if CONFIG_FP_MB_STATS 3674 unsigned int src_diff_var = UINT_MAX; 3675 int none_complexity = 0; 3676 #endif 3677 3678 int partition_none_allowed = !force_horz_split && !force_vert_split; 3679 int partition_horz_allowed = 3680 !force_vert_split && yss <= xss && bsize >= BLOCK_8X8; 3681 int partition_vert_allowed = 3682 !force_horz_split && xss <= yss && bsize >= BLOCK_8X8; 3683 3684 int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_thr.dist; 3685 int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate; 3686 int must_split = 0; 3687 int partition_mul = cpi->sf.enable_tpl_model && cpi->oxcf.aq_mode == NO_AQ 3688 ? x->cb_rdmult 3689 : cpi->rd.RDMULT; 3690 // Ref frames picked in the [i_th] quarter subblock during square partition 3691 // RD search. It may be used to prune ref frame selection of rect partitions. 3692 uint8_t ref_frames_used[4] = { 0, 0, 0, 0 }; 3693 3694 (void)*tp_orig; 3695 3696 assert(num_8x8_blocks_wide_lookup[bsize] == 3697 num_8x8_blocks_high_lookup[bsize]); 3698 3699 dist_breakout_thr >>= 3700 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); 3701 3702 rate_breakout_thr *= num_pels_log2_lookup[bsize]; 3703 3704 vp9_rd_cost_init(&this_rdc); 3705 vp9_rd_cost_init(&sum_rdc); 3706 vp9_rd_cost_reset(&best_rdc); 3707 best_rdc.rdcost = best_rd; 3708 3709 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 3710 3711 if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ && 3712 cpi->oxcf.aq_mode != LOOKAHEAD_AQ) 3713 x->mb_energy = vp9_block_energy(cpi, x, bsize); 3714 3715 if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) { 3716 int cb_partition_search_ctrl = 3717 ((pc_tree->index == 0 || pc_tree->index == 3) + 3718 get_chessboard_index(cm->current_video_frame)) & 3719 0x1; 3720 3721 if (cb_partition_search_ctrl && bsize > min_size && bsize < max_size) 3722 set_partition_range(cm, xd, mi_row, mi_col, bsize, &min_size, &max_size); 3723 } 3724 3725 // Get sub block energy range 3726 if (bsize >= BLOCK_16X16) { 3727 int min_energy, max_energy; 3728 vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy, 3729 &max_energy); 3730 must_split = (min_energy < -3) && (max_energy - min_energy > 2); 3731 } 3732 3733 // Determine partition types in search according to the speed features. 3734 // The threshold set here has to be of square block size. 3735 if (cpi->sf.auto_min_max_partition_size) { 3736 partition_none_allowed &= (bsize <= max_size); 3737 partition_horz_allowed &= 3738 ((bsize <= max_size && bsize > min_size) || force_horz_split); 3739 partition_vert_allowed &= 3740 ((bsize <= max_size && bsize > min_size) || force_vert_split); 3741 do_split &= bsize > min_size; 3742 } 3743 3744 if (cpi->sf.use_square_partition_only && 3745 (bsize > cpi->sf.use_square_only_thresh_high || 3746 bsize < cpi->sf.use_square_only_thresh_low)) { 3747 if (cpi->use_svc) { 3748 if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless) 3749 partition_horz_allowed &= force_horz_split; 3750 if (!vp9_active_v_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless) 3751 partition_vert_allowed &= force_vert_split; 3752 } else { 3753 partition_horz_allowed &= force_horz_split; 3754 partition_vert_allowed &= force_vert_split; 3755 } 3756 } 3757 3758 save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 3759 3760 #if CONFIG_FP_MB_STATS 3761 if (cpi->use_fp_mb_stats) { 3762 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 3763 src_diff_var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, mi_row, 3764 mi_col, bsize); 3765 } 3766 #endif 3767 3768 #if CONFIG_FP_MB_STATS 3769 // Decide whether we shall split directly and skip searching NONE by using 3770 // the first pass block statistics 3771 if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_split && 3772 partition_none_allowed && src_diff_var > 4 && 3773 cm->base_qindex < qindex_split_threshold_lookup[bsize]) { 3774 int mb_row = mi_row >> 1; 3775 int mb_col = mi_col >> 1; 3776 int mb_row_end = 3777 VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows); 3778 int mb_col_end = 3779 VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols); 3780 int r, c; 3781 3782 // compute a complexity measure, basically measure inconsistency of motion 3783 // vectors obtained from the first pass in the current block 3784 for (r = mb_row; r < mb_row_end; r++) { 3785 for (c = mb_col; c < mb_col_end; c++) { 3786 const int mb_index = r * cm->mb_cols + c; 3787 3788 MOTION_DIRECTION this_mv; 3789 MOTION_DIRECTION right_mv; 3790 MOTION_DIRECTION bottom_mv; 3791 3792 this_mv = 3793 get_motion_direction_fp(cpi->twopass.this_frame_mb_stats[mb_index]); 3794 3795 // to its right 3796 if (c != mb_col_end - 1) { 3797 right_mv = get_motion_direction_fp( 3798 cpi->twopass.this_frame_mb_stats[mb_index + 1]); 3799 none_complexity += get_motion_inconsistency(this_mv, right_mv); 3800 } 3801 3802 // to its bottom 3803 if (r != mb_row_end - 1) { 3804 bottom_mv = get_motion_direction_fp( 3805 cpi->twopass.this_frame_mb_stats[mb_index + cm->mb_cols]); 3806 none_complexity += get_motion_inconsistency(this_mv, bottom_mv); 3807 } 3808 3809 // do not count its left and top neighbors to avoid double counting 3810 } 3811 } 3812 3813 if (none_complexity > complexity_16x16_blocks_threshold[bsize]) { 3814 partition_none_allowed = 0; 3815 } 3816 } 3817 #endif 3818 3819 pc_tree->partitioning = PARTITION_NONE; 3820 3821 if (cpi->sf.ml_var_partition_pruning) { 3822 const int do_ml_var_partition_pruning = 3823 !frame_is_intra_only(cm) && partition_none_allowed && do_split && 3824 mi_row + num_8x8_blocks_high_lookup[bsize] <= cm->mi_rows && 3825 mi_col + num_8x8_blocks_wide_lookup[bsize] <= cm->mi_cols; 3826 if (do_ml_var_partition_pruning) { 3827 ml_predict_var_rd_paritioning(cpi, x, bsize, mi_row, mi_col, 3828 &partition_none_allowed, &do_split); 3829 } 3830 } 3831 3832 // PARTITION_NONE 3833 if (partition_none_allowed) { 3834 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx, 3835 best_rdc.rdcost); 3836 ctx->rdcost = this_rdc.rdcost; 3837 if (this_rdc.rate != INT_MAX) { 3838 if (cpi->sf.prune_ref_frame_for_rect_partitions) { 3839 const int ref1 = ctx->mic.ref_frame[0]; 3840 const int ref2 = ctx->mic.ref_frame[1]; 3841 for (i = 0; i < 4; ++i) { 3842 ref_frames_used[i] |= (1 << ref1); 3843 if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); 3844 } 3845 } 3846 if (bsize >= BLOCK_8X8) { 3847 this_rdc.rdcost += RDCOST(partition_mul, x->rddiv, 3848 cpi->partition_cost[pl][PARTITION_NONE], 0); 3849 this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; 3850 } 3851 3852 if (this_rdc.rdcost < best_rdc.rdcost) { 3853 MODE_INFO *mi = xd->mi[0]; 3854 3855 best_rdc = this_rdc; 3856 if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; 3857 3858 if (cpi->sf.ml_partition_search_early_termination) { 3859 // Currently, the machine-learning based partition search early 3860 // termination is only used while bsize is 16x16, 32x32 or 64x64, 3861 // VPXMIN(cm->width, cm->height) >= 480, and speed = 0. 3862 if (!x->e_mbd.lossless && 3863 !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP) && 3864 ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) { 3865 if (ml_pruning_partition(cm, xd, ctx, mi_row, mi_col, bsize)) { 3866 do_split = 0; 3867 do_rect = 0; 3868 } 3869 } 3870 } 3871 3872 if ((do_split || do_rect) && !x->e_mbd.lossless && ctx->skippable) { 3873 const int use_ml_based_breakout = 3874 cpi->sf.use_ml_partition_search_breakout && 3875 cm->base_qindex >= 100; 3876 if (use_ml_based_breakout) { 3877 if (ml_predict_breakout(cpi, bsize, x, &this_rdc)) { 3878 do_split = 0; 3879 do_rect = 0; 3880 } 3881 } else { 3882 if (!cpi->sf.ml_partition_search_early_termination) { 3883 if ((best_rdc.dist < (dist_breakout_thr >> 2)) || 3884 (best_rdc.dist < dist_breakout_thr && 3885 best_rdc.rate < rate_breakout_thr)) { 3886 do_split = 0; 3887 do_rect = 0; 3888 } 3889 } 3890 } 3891 } 3892 3893 #if CONFIG_FP_MB_STATS 3894 // Check if every 16x16 first pass block statistics has zero 3895 // motion and the corresponding first pass residue is small enough. 3896 // If that is the case, check the difference variance between the 3897 // current frame and the last frame. If the variance is small enough, 3898 // stop further splitting in RD optimization 3899 if (cpi->use_fp_mb_stats && do_split != 0 && 3900 cm->base_qindex > qindex_skip_threshold_lookup[bsize]) { 3901 int mb_row = mi_row >> 1; 3902 int mb_col = mi_col >> 1; 3903 int mb_row_end = 3904 VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows); 3905 int mb_col_end = 3906 VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols); 3907 int r, c; 3908 3909 int skip = 1; 3910 for (r = mb_row; r < mb_row_end; r++) { 3911 for (c = mb_col; c < mb_col_end; c++) { 3912 const int mb_index = r * cm->mb_cols + c; 3913 if (!(cpi->twopass.this_frame_mb_stats[mb_index] & 3914 FPMB_MOTION_ZERO_MASK) || 3915 !(cpi->twopass.this_frame_mb_stats[mb_index] & 3916 FPMB_ERROR_SMALL_MASK)) { 3917 skip = 0; 3918 break; 3919 } 3920 } 3921 if (skip == 0) { 3922 break; 3923 } 3924 } 3925 3926 if (skip) { 3927 if (src_diff_var == UINT_MAX) { 3928 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 3929 src_diff_var = get_sby_perpixel_diff_variance( 3930 cpi, &x->plane[0].src, mi_row, mi_col, bsize); 3931 } 3932 if (src_diff_var < 8) { 3933 do_split = 0; 3934 do_rect = 0; 3935 } 3936 } 3937 } 3938 #endif 3939 } 3940 } 3941 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 3942 } else { 3943 vp9_zero(ctx->pred_mv); 3944 ctx->mic.interp_filter = EIGHTTAP; 3945 } 3946 3947 // store estimated motion vector 3948 store_pred_mv(x, ctx); 3949 3950 // If the interp_filter is marked as SWITCHABLE_FILTERS, it was for an 3951 // intra block and used for context purposes. 3952 if (ctx->mic.interp_filter == SWITCHABLE_FILTERS) { 3953 pred_interp_filter = EIGHTTAP; 3954 } else { 3955 pred_interp_filter = ctx->mic.interp_filter; 3956 } 3957 3958 // PARTITION_SPLIT 3959 // TODO(jingning): use the motion vectors given by the above search as 3960 // the starting point of motion search in the following partition type check. 3961 pc_tree->split[0]->none.rdcost = 0; 3962 pc_tree->split[1]->none.rdcost = 0; 3963 pc_tree->split[2]->none.rdcost = 0; 3964 pc_tree->split[3]->none.rdcost = 0; 3965 if (do_split || must_split) { 3966 subsize = get_subsize(bsize, PARTITION_SPLIT); 3967 load_pred_mv(x, ctx); 3968 if (bsize == BLOCK_8X8) { 3969 i = 4; 3970 if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed) 3971 pc_tree->leaf_split[0]->pred_interp_filter = pred_interp_filter; 3972 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, 3973 pc_tree->leaf_split[0], best_rdc.rdcost); 3974 if (sum_rdc.rate == INT_MAX) { 3975 sum_rdc.rdcost = INT64_MAX; 3976 } else { 3977 if (cpi->sf.prune_ref_frame_for_rect_partitions) { 3978 const int ref1 = pc_tree->leaf_split[0]->mic.ref_frame[0]; 3979 const int ref2 = pc_tree->leaf_split[0]->mic.ref_frame[1]; 3980 for (i = 0; i < 4; ++i) { 3981 ref_frames_used[i] |= (1 << ref1); 3982 if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); 3983 } 3984 } 3985 } 3986 } else { 3987 for (i = 0; (i < 4) && ((sum_rdc.rdcost < best_rdc.rdcost) || must_split); 3988 ++i) { 3989 const int x_idx = (i & 1) * mi_step; 3990 const int y_idx = (i >> 1) * mi_step; 3991 3992 if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) 3993 continue; 3994 3995 pc_tree->split[i]->index = i; 3996 if (cpi->sf.prune_ref_frame_for_rect_partitions) 3997 pc_tree->split[i]->none.rate = INT_MAX; 3998 rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx, 3999 mi_col + x_idx, subsize, &this_rdc, 4000 // A must split test here increases the number of sub 4001 // partitions but hurts metrics results quite a bit, 4002 // so this extra test is commented out pending 4003 // further tests on whether it adds much in terms of 4004 // visual quality. 4005 // (must_split) ? best_rdc.rdcost 4006 // : best_rdc.rdcost - sum_rdc.rdcost, 4007 best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]); 4008 4009 if (this_rdc.rate == INT_MAX) { 4010 sum_rdc.rdcost = INT64_MAX; 4011 break; 4012 } else { 4013 if (cpi->sf.prune_ref_frame_for_rect_partitions && 4014 pc_tree->split[i]->none.rate != INT_MAX) { 4015 const int ref1 = pc_tree->split[i]->none.mic.ref_frame[0]; 4016 const int ref2 = pc_tree->split[i]->none.mic.ref_frame[1]; 4017 ref_frames_used[i] |= (1 << ref1); 4018 if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); 4019 } 4020 sum_rdc.rate += this_rdc.rate; 4021 sum_rdc.dist += this_rdc.dist; 4022 sum_rdc.rdcost += this_rdc.rdcost; 4023 } 4024 } 4025 } 4026 4027 if (((sum_rdc.rdcost < best_rdc.rdcost) || must_split) && i == 4) { 4028 sum_rdc.rdcost += RDCOST(partition_mul, x->rddiv, 4029 cpi->partition_cost[pl][PARTITION_SPLIT], 0); 4030 sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; 4031 4032 if ((sum_rdc.rdcost < best_rdc.rdcost) || 4033 (must_split && (sum_rdc.dist < best_rdc.dist))) { 4034 best_rdc = sum_rdc; 4035 pc_tree->partitioning = PARTITION_SPLIT; 4036 4037 // Rate and distortion based partition search termination clause. 4038 if (!cpi->sf.ml_partition_search_early_termination && 4039 !x->e_mbd.lossless && 4040 ((best_rdc.dist < (dist_breakout_thr >> 2)) || 4041 (best_rdc.dist < dist_breakout_thr && 4042 best_rdc.rate < rate_breakout_thr))) { 4043 do_rect = 0; 4044 } 4045 } 4046 } else { 4047 // skip rectangular partition test when larger block size 4048 // gives better rd cost 4049 if (cpi->sf.less_rectangular_check && 4050 (bsize > cpi->sf.use_square_only_thresh_high || 4051 best_rdc.dist < dist_breakout_thr)) 4052 do_rect &= !partition_none_allowed; 4053 } 4054 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 4055 } 4056 4057 pc_tree->horizontal[0].skip_ref_frame_mask = 0; 4058 pc_tree->horizontal[1].skip_ref_frame_mask = 0; 4059 pc_tree->vertical[0].skip_ref_frame_mask = 0; 4060 pc_tree->vertical[1].skip_ref_frame_mask = 0; 4061 if (cpi->sf.prune_ref_frame_for_rect_partitions) { 4062 uint8_t used_frames; 4063 used_frames = ref_frames_used[0] | ref_frames_used[1]; 4064 if (used_frames) pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames; 4065 used_frames = ref_frames_used[2] | ref_frames_used[3]; 4066 if (used_frames) pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames; 4067 used_frames = ref_frames_used[0] | ref_frames_used[2]; 4068 if (used_frames) pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames; 4069 used_frames = ref_frames_used[1] | ref_frames_used[3]; 4070 if (used_frames) pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames; 4071 } 4072 4073 { 4074 const int do_ml_rect_partition_pruning = 4075 !frame_is_intra_only(cm) && !force_horz_split && !force_vert_split && 4076 (partition_horz_allowed || partition_vert_allowed) && bsize > BLOCK_8X8; 4077 if (do_ml_rect_partition_pruning) { 4078 ml_prune_rect_partition(cpi, x, bsize, pc_tree, &partition_horz_allowed, 4079 &partition_vert_allowed, best_rdc.rdcost, mi_row, 4080 mi_col); 4081 } 4082 } 4083 4084 // PARTITION_HORZ 4085 if (partition_horz_allowed && 4086 (do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) { 4087 const int part_mode_rate = cpi->partition_cost[pl][PARTITION_HORZ]; 4088 const int64_t part_mode_rdcost = 4089 RDCOST(partition_mul, x->rddiv, part_mode_rate, 0); 4090 subsize = get_subsize(bsize, PARTITION_HORZ); 4091 load_pred_mv(x, ctx); 4092 if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && 4093 partition_none_allowed) 4094 pc_tree->horizontal[0].pred_interp_filter = pred_interp_filter; 4095 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, 4096 &pc_tree->horizontal[0], 4097 best_rdc.rdcost - part_mode_rdcost); 4098 if (sum_rdc.rdcost < INT64_MAX) { 4099 sum_rdc.rdcost += part_mode_rdcost; 4100 sum_rdc.rate += part_mode_rate; 4101 } 4102 4103 if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows && 4104 bsize > BLOCK_8X8) { 4105 PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; 4106 update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); 4107 encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); 4108 if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && 4109 partition_none_allowed) 4110 pc_tree->horizontal[1].pred_interp_filter = pred_interp_filter; 4111 rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc, 4112 subsize, &pc_tree->horizontal[1], 4113 best_rdc.rdcost - sum_rdc.rdcost); 4114 if (this_rdc.rate == INT_MAX) { 4115 sum_rdc.rdcost = INT64_MAX; 4116 } else { 4117 sum_rdc.rate += this_rdc.rate; 4118 sum_rdc.dist += this_rdc.dist; 4119 sum_rdc.rdcost += this_rdc.rdcost; 4120 } 4121 } 4122 4123 if (sum_rdc.rdcost < best_rdc.rdcost) { 4124 best_rdc = sum_rdc; 4125 pc_tree->partitioning = PARTITION_HORZ; 4126 4127 if (cpi->sf.less_rectangular_check && 4128 bsize > cpi->sf.use_square_only_thresh_high) 4129 do_rect = 0; 4130 } 4131 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 4132 } 4133 4134 // PARTITION_VERT 4135 if (partition_vert_allowed && 4136 (do_rect || vp9_active_v_edge(cpi, mi_col, mi_step))) { 4137 const int part_mode_rate = cpi->partition_cost[pl][PARTITION_VERT]; 4138 const int64_t part_mode_rdcost = 4139 RDCOST(partition_mul, x->rddiv, part_mode_rate, 0); 4140 subsize = get_subsize(bsize, PARTITION_VERT); 4141 load_pred_mv(x, ctx); 4142 if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && 4143 partition_none_allowed) 4144 pc_tree->vertical[0].pred_interp_filter = pred_interp_filter; 4145 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, 4146 &pc_tree->vertical[0], best_rdc.rdcost - part_mode_rdcost); 4147 if (sum_rdc.rdcost < INT64_MAX) { 4148 sum_rdc.rdcost += part_mode_rdcost; 4149 sum_rdc.rate += part_mode_rate; 4150 } 4151 4152 if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols && 4153 bsize > BLOCK_8X8) { 4154 update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0); 4155 encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, 4156 &pc_tree->vertical[0]); 4157 if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && 4158 partition_none_allowed) 4159 pc_tree->vertical[1].pred_interp_filter = pred_interp_filter; 4160 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc, 4161 subsize, &pc_tree->vertical[1], 4162 best_rdc.rdcost - sum_rdc.rdcost); 4163 if (this_rdc.rate == INT_MAX) { 4164 sum_rdc.rdcost = INT64_MAX; 4165 } else { 4166 sum_rdc.rate += this_rdc.rate; 4167 sum_rdc.dist += this_rdc.dist; 4168 sum_rdc.rdcost += this_rdc.rdcost; 4169 } 4170 } 4171 4172 if (sum_rdc.rdcost < best_rdc.rdcost) { 4173 best_rdc = sum_rdc; 4174 pc_tree->partitioning = PARTITION_VERT; 4175 } 4176 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 4177 } 4178 4179 // TODO(jbb): This code added so that we avoid static analysis 4180 // warning related to the fact that best_rd isn't used after this 4181 // point. This code should be refactored so that the duplicate 4182 // checks occur in some sub function and thus are used... 4183 (void)best_rd; 4184 *rd_cost = best_rdc; 4185 4186 if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && 4187 pc_tree->index != 3) { 4188 int output_enabled = (bsize == BLOCK_64X64); 4189 encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, 4190 pc_tree); 4191 } 4192 4193 if (bsize == BLOCK_64X64) { 4194 assert(tp_orig < *tp); 4195 assert(best_rdc.rate < INT_MAX); 4196 assert(best_rdc.dist < INT64_MAX); 4197 } else { 4198 assert(tp_orig == *tp); 4199 } 4200 } 4201 4202 static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td, 4203 TileDataEnc *tile_data, int mi_row, 4204 TOKENEXTRA **tp) { 4205 VP9_COMMON *const cm = &cpi->common; 4206 TileInfo *const tile_info = &tile_data->tile_info; 4207 MACROBLOCK *const x = &td->mb; 4208 MACROBLOCKD *const xd = &x->e_mbd; 4209 SPEED_FEATURES *const sf = &cpi->sf; 4210 const int mi_col_start = tile_info->mi_col_start; 4211 const int mi_col_end = tile_info->mi_col_end; 4212 int mi_col; 4213 const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; 4214 const int num_sb_cols = 4215 get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2); 4216 int sb_col_in_tile; 4217 4218 // Initialize the left context for the new SB row 4219 memset(&xd->left_context, 0, sizeof(xd->left_context)); 4220 memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); 4221 4222 // Code each SB in the row 4223 for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end; 4224 mi_col += MI_BLOCK_SIZE, sb_col_in_tile++) { 4225 const struct segmentation *const seg = &cm->seg; 4226 int dummy_rate; 4227 int64_t dummy_dist; 4228 RD_COST dummy_rdc; 4229 int i; 4230 int seg_skip = 0; 4231 4232 const int idx_str = cm->mi_stride * mi_row + mi_col; 4233 MODE_INFO **mi = cm->mi_grid_visible + idx_str; 4234 4235 (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row, 4236 sb_col_in_tile); 4237 4238 if (sf->adaptive_pred_interp_filter) { 4239 for (i = 0; i < 64; ++i) td->leaf_tree[i].pred_interp_filter = SWITCHABLE; 4240 4241 for (i = 0; i < 64; ++i) { 4242 td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE; 4243 td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE; 4244 td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE; 4245 td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE; 4246 } 4247 } 4248 4249 for (i = 0; i < MAX_REF_FRAMES; ++i) { 4250 x->pred_mv[i].row = INT16_MAX; 4251 x->pred_mv[i].col = INT16_MAX; 4252 } 4253 td->pc_root->index = 0; 4254 4255 if (seg->enabled) { 4256 const uint8_t *const map = 4257 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; 4258 int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col); 4259 seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP); 4260 } 4261 4262 x->source_variance = UINT_MAX; 4263 if (sf->partition_search_type == FIXED_PARTITION || seg_skip) { 4264 const BLOCK_SIZE bsize = 4265 seg_skip ? BLOCK_64X64 : sf->always_this_block_size; 4266 set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); 4267 set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); 4268 rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, 4269 &dummy_rate, &dummy_dist, 1, td->pc_root); 4270 } else if (cpi->partition_search_skippable_frame) { 4271 BLOCK_SIZE bsize; 4272 set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); 4273 bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col); 4274 set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); 4275 rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, 4276 &dummy_rate, &dummy_dist, 1, td->pc_root); 4277 } else if (sf->partition_search_type == VAR_BASED_PARTITION && 4278 cm->frame_type != KEY_FRAME) { 4279 choose_partitioning(cpi, tile_info, x, mi_row, mi_col); 4280 rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, 4281 &dummy_rate, &dummy_dist, 1, td->pc_root); 4282 } else { 4283 int orig_rdmult = cpi->rd.RDMULT; 4284 x->cb_rdmult = orig_rdmult; 4285 if (cpi->twopass.gf_group.index > 0 && cpi->sf.enable_tpl_model) { 4286 int dr = 4287 get_rdmult_delta(cpi, BLOCK_64X64, mi_row, mi_col, orig_rdmult); 4288 x->cb_rdmult = dr; 4289 } 4290 4291 // If required set upper and lower partition size limits 4292 if (sf->auto_min_max_partition_size) { 4293 set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); 4294 rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col, 4295 &x->min_partition_size, &x->max_partition_size); 4296 } 4297 td->pc_root->none.rdcost = 0; 4298 rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64, 4299 &dummy_rdc, INT64_MAX, td->pc_root); 4300 } 4301 (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row, 4302 sb_col_in_tile, num_sb_cols); 4303 } 4304 } 4305 4306 static void init_encode_frame_mb_context(VP9_COMP *cpi) { 4307 MACROBLOCK *const x = &cpi->td.mb; 4308 VP9_COMMON *const cm = &cpi->common; 4309 MACROBLOCKD *const xd = &x->e_mbd; 4310 const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); 4311 4312 // Copy data over into macro block data structures. 4313 vp9_setup_src_planes(x, cpi->Source, 0, 0); 4314 4315 vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); 4316 4317 // Note: this memset assumes above_context[0], [1] and [2] 4318 // are allocated as part of the same buffer. 4319 memset(xd->above_context[0], 0, 4320 sizeof(*xd->above_context[0]) * 2 * aligned_mi_cols * MAX_MB_PLANE); 4321 memset(xd->above_seg_context, 0, 4322 sizeof(*xd->above_seg_context) * aligned_mi_cols); 4323 } 4324 4325 static int check_dual_ref_flags(VP9_COMP *cpi) { 4326 const int ref_flags = cpi->ref_frame_flags; 4327 4328 if (segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) { 4329 return 0; 4330 } else { 4331 return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG) + 4332 !!(ref_flags & VP9_ALT_FLAG)) >= 2; 4333 } 4334 } 4335 4336 static void reset_skip_tx_size(VP9_COMMON *cm, TX_SIZE max_tx_size) { 4337 int mi_row, mi_col; 4338 const int mis = cm->mi_stride; 4339 MODE_INFO **mi_ptr = cm->mi_grid_visible; 4340 4341 for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) { 4342 for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) { 4343 if (mi_ptr[mi_col]->tx_size > max_tx_size) 4344 mi_ptr[mi_col]->tx_size = max_tx_size; 4345 } 4346 } 4347 } 4348 4349 static MV_REFERENCE_FRAME get_frame_type(const VP9_COMP *cpi) { 4350 if (frame_is_intra_only(&cpi->common)) 4351 return INTRA_FRAME; 4352 else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame) 4353 return ALTREF_FRAME; 4354 else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) 4355 return GOLDEN_FRAME; 4356 else 4357 return LAST_FRAME; 4358 } 4359 4360 static TX_MODE select_tx_mode(const VP9_COMP *cpi, MACROBLOCKD *const xd) { 4361 if (xd->lossless) return ONLY_4X4; 4362 if (cpi->common.frame_type == KEY_FRAME && cpi->sf.use_nonrd_pick_mode) 4363 return ALLOW_16X16; 4364 if (cpi->sf.tx_size_search_method == USE_LARGESTALL) 4365 return ALLOW_32X32; 4366 else if (cpi->sf.tx_size_search_method == USE_FULL_RD || 4367 cpi->sf.tx_size_search_method == USE_TX_8X8) 4368 return TX_MODE_SELECT; 4369 else 4370 return cpi->common.tx_mode; 4371 } 4372 4373 static void hybrid_intra_mode_search(VP9_COMP *cpi, MACROBLOCK *const x, 4374 RD_COST *rd_cost, BLOCK_SIZE bsize, 4375 PICK_MODE_CONTEXT *ctx) { 4376 if (!cpi->sf.nonrd_keyframe && bsize < BLOCK_16X16) 4377 vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX); 4378 else 4379 vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx); 4380 } 4381 4382 static void hybrid_search_svc_baseiskey(VP9_COMP *cpi, MACROBLOCK *const x, 4383 RD_COST *rd_cost, BLOCK_SIZE bsize, 4384 PICK_MODE_CONTEXT *ctx, 4385 TileDataEnc *tile_data, int mi_row, 4386 int mi_col) { 4387 if (!cpi->sf.nonrd_keyframe && bsize <= BLOCK_8X8) { 4388 vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX); 4389 } else { 4390 if (cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF) 4391 vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx); 4392 else if (bsize >= BLOCK_8X8) 4393 vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, 4394 ctx); 4395 else 4396 vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx); 4397 } 4398 } 4399 4400 static void hybrid_search_scene_change(VP9_COMP *cpi, MACROBLOCK *const x, 4401 RD_COST *rd_cost, BLOCK_SIZE bsize, 4402 PICK_MODE_CONTEXT *ctx, 4403 TileDataEnc *tile_data, int mi_row, 4404 int mi_col) { 4405 if (!cpi->sf.nonrd_keyframe && bsize <= BLOCK_8X8) { 4406 vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX); 4407 } else { 4408 vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx); 4409 } 4410 } 4411 4412 static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, 4413 MACROBLOCK *const x, int mi_row, int mi_col, 4414 RD_COST *rd_cost, BLOCK_SIZE bsize, 4415 PICK_MODE_CONTEXT *ctx) { 4416 VP9_COMMON *const cm = &cpi->common; 4417 TileInfo *const tile_info = &tile_data->tile_info; 4418 MACROBLOCKD *const xd = &x->e_mbd; 4419 MODE_INFO *mi; 4420 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; 4421 BLOCK_SIZE bs = VPXMAX(bsize, BLOCK_8X8); // processing unit block size 4422 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs]; 4423 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs]; 4424 int plane; 4425 4426 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 4427 4428 set_segment_index(cpi, x, mi_row, mi_col, bsize, 0); 4429 4430 mi = xd->mi[0]; 4431 mi->sb_type = bsize; 4432 4433 for (plane = 0; plane < MAX_MB_PLANE; ++plane) { 4434 struct macroblockd_plane *pd = &xd->plane[plane]; 4435 memcpy(a + num_4x4_blocks_wide * plane, pd->above_context, 4436 (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x); 4437 memcpy(l + num_4x4_blocks_high * plane, pd->left_context, 4438 (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y); 4439 } 4440 4441 if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) 4442 if (cyclic_refresh_segment_id_boosted(mi->segment_id)) 4443 x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); 4444 4445 if (frame_is_intra_only(cm)) 4446 hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx); 4447 else if (cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) 4448 hybrid_search_svc_baseiskey(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row, 4449 mi_col); 4450 else if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) 4451 set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize); 4452 else if (bsize >= BLOCK_8X8) { 4453 if (cpi->rc.hybrid_intra_scene_change) 4454 hybrid_search_scene_change(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row, 4455 mi_col); 4456 else 4457 vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, 4458 ctx); 4459 } else { 4460 vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx); 4461 } 4462 4463 duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); 4464 4465 for (plane = 0; plane < MAX_MB_PLANE; ++plane) { 4466 struct macroblockd_plane *pd = &xd->plane[plane]; 4467 memcpy(pd->above_context, a + num_4x4_blocks_wide * plane, 4468 (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x); 4469 memcpy(pd->left_context, l + num_4x4_blocks_high * plane, 4470 (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y); 4471 } 4472 4473 if (rd_cost->rate == INT_MAX) vp9_rd_cost_reset(rd_cost); 4474 4475 ctx->rate = rd_cost->rate; 4476 ctx->dist = rd_cost->dist; 4477 } 4478 4479 static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x, int mi_row, 4480 int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree) { 4481 MACROBLOCKD *xd = &x->e_mbd; 4482 int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; 4483 PARTITION_TYPE partition = pc_tree->partitioning; 4484 BLOCK_SIZE subsize = get_subsize(bsize, partition); 4485 4486 assert(bsize >= BLOCK_8X8); 4487 4488 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 4489 4490 switch (partition) { 4491 case PARTITION_NONE: 4492 set_mode_info_offsets(cm, x, xd, mi_row, mi_col); 4493 *(xd->mi[0]) = pc_tree->none.mic; 4494 *(x->mbmi_ext) = pc_tree->none.mbmi_ext; 4495 duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); 4496 break; 4497 case PARTITION_VERT: 4498 set_mode_info_offsets(cm, x, xd, mi_row, mi_col); 4499 *(xd->mi[0]) = pc_tree->vertical[0].mic; 4500 *(x->mbmi_ext) = pc_tree->vertical[0].mbmi_ext; 4501 duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize); 4502 4503 if (mi_col + hbs < cm->mi_cols) { 4504 set_mode_info_offsets(cm, x, xd, mi_row, mi_col + hbs); 4505 *(xd->mi[0]) = pc_tree->vertical[1].mic; 4506 *(x->mbmi_ext) = pc_tree->vertical[1].mbmi_ext; 4507 duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, subsize); 4508 } 4509 break; 4510 case PARTITION_HORZ: 4511 set_mode_info_offsets(cm, x, xd, mi_row, mi_col); 4512 *(xd->mi[0]) = pc_tree->horizontal[0].mic; 4513 *(x->mbmi_ext) = pc_tree->horizontal[0].mbmi_ext; 4514 duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize); 4515 if (mi_row + hbs < cm->mi_rows) { 4516 set_mode_info_offsets(cm, x, xd, mi_row + hbs, mi_col); 4517 *(xd->mi[0]) = pc_tree->horizontal[1].mic; 4518 *(x->mbmi_ext) = pc_tree->horizontal[1].mbmi_ext; 4519 duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, subsize); 4520 } 4521 break; 4522 case PARTITION_SPLIT: { 4523 fill_mode_info_sb(cm, x, mi_row, mi_col, subsize, pc_tree->split[0]); 4524 fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize, 4525 pc_tree->split[1]); 4526 fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize, 4527 pc_tree->split[2]); 4528 fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize, 4529 pc_tree->split[3]); 4530 break; 4531 } 4532 default: break; 4533 } 4534 } 4535 4536 // Reset the prediction pixel ready flag recursively. 4537 static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) { 4538 pc_tree->none.pred_pixel_ready = 0; 4539 pc_tree->horizontal[0].pred_pixel_ready = 0; 4540 pc_tree->horizontal[1].pred_pixel_ready = 0; 4541 pc_tree->vertical[0].pred_pixel_ready = 0; 4542 pc_tree->vertical[1].pred_pixel_ready = 0; 4543 4544 if (bsize > BLOCK_8X8) { 4545 BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); 4546 int i; 4547 for (i = 0; i < 4; ++i) pred_pixel_ready_reset(pc_tree->split[i], subsize); 4548 } 4549 } 4550 4551 #if CONFIG_ML_VAR_PARTITION 4552 #define FEATURES 6 4553 #define LABELS 2 4554 static int ml_predict_var_paritioning(VP9_COMP *cpi, MACROBLOCK *x, 4555 BLOCK_SIZE bsize, int mi_row, 4556 int mi_col) { 4557 VP9_COMMON *const cm = &cpi->common; 4558 const NN_CONFIG *nn_config = NULL; 4559 4560 switch (bsize) { 4561 case BLOCK_64X64: nn_config = &vp9_var_part_nnconfig_64; break; 4562 case BLOCK_32X32: nn_config = &vp9_var_part_nnconfig_32; break; 4563 case BLOCK_16X16: nn_config = &vp9_var_part_nnconfig_16; break; 4564 case BLOCK_8X8: break; 4565 default: assert(0 && "Unexpected block size."); return -1; 4566 } 4567 4568 if (!nn_config) return -1; 4569 4570 vpx_clear_system_state(); 4571 4572 { 4573 const float thresh = cpi->oxcf.speed <= 5 ? 1.25f : 0.0f; 4574 float features[FEATURES] = { 0.0f }; 4575 const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth); 4576 int feature_idx = 0; 4577 float score[LABELS]; 4578 4579 features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f); 4580 vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); 4581 { 4582 const int bs = 4 * num_4x4_blocks_wide_lookup[bsize]; 4583 const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); 4584 const int sb_offset_row = 8 * (mi_row & 7); 4585 const int sb_offset_col = 8 * (mi_col & 7); 4586 const uint8_t *pred = x->est_pred + sb_offset_row * 64 + sb_offset_col; 4587 const uint8_t *src = x->plane[0].src.buf; 4588 const int src_stride = x->plane[0].src.stride; 4589 const int pred_stride = 64; 4590 unsigned int sse; 4591 int i; 4592 // Variance of whole block. 4593 const unsigned int var = 4594 cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse); 4595 const float factor = (var == 0) ? 1.0f : (1.0f / (float)var); 4596 4597 features[feature_idx++] = logf((float)var + 1.0f); 4598 for (i = 0; i < 4; ++i) { 4599 const int x_idx = (i & 1) * bs / 2; 4600 const int y_idx = (i >> 1) * bs / 2; 4601 const int src_offset = y_idx * src_stride + x_idx; 4602 const int pred_offset = y_idx * pred_stride + x_idx; 4603 // Variance of quarter block. 4604 const unsigned int sub_var = 4605 cpi->fn_ptr[subsize].vf(src + src_offset, src_stride, 4606 pred + pred_offset, pred_stride, &sse); 4607 const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var; 4608 features[feature_idx++] = var_ratio; 4609 } 4610 } 4611 4612 assert(feature_idx == FEATURES); 4613 nn_predict(features, nn_config, score); 4614 if (score[0] > thresh) return PARTITION_SPLIT; 4615 if (score[0] < -thresh) return PARTITION_NONE; 4616 return -1; 4617 } 4618 } 4619 #undef FEATURES 4620 #undef LABELS 4621 #endif // CONFIG_ML_VAR_PARTITION 4622 4623 static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, 4624 TileDataEnc *tile_data, TOKENEXTRA **tp, 4625 int mi_row, int mi_col, BLOCK_SIZE bsize, 4626 RD_COST *rd_cost, int do_recon, 4627 int64_t best_rd, PC_TREE *pc_tree) { 4628 const SPEED_FEATURES *const sf = &cpi->sf; 4629 VP9_COMMON *const cm = &cpi->common; 4630 TileInfo *const tile_info = &tile_data->tile_info; 4631 MACROBLOCK *const x = &td->mb; 4632 MACROBLOCKD *const xd = &x->e_mbd; 4633 const int ms = num_8x8_blocks_wide_lookup[bsize] / 2; 4634 TOKENEXTRA *tp_orig = *tp; 4635 PICK_MODE_CONTEXT *ctx = &pc_tree->none; 4636 int i; 4637 BLOCK_SIZE subsize = bsize; 4638 RD_COST this_rdc, sum_rdc, best_rdc; 4639 int do_split = bsize >= BLOCK_8X8; 4640 int do_rect = 1; 4641 // Override skipping rectangular partition operations for edge blocks 4642 const int force_horz_split = (mi_row + ms >= cm->mi_rows); 4643 const int force_vert_split = (mi_col + ms >= cm->mi_cols); 4644 const int xss = x->e_mbd.plane[1].subsampling_x; 4645 const int yss = x->e_mbd.plane[1].subsampling_y; 4646 4647 int partition_none_allowed = !force_horz_split && !force_vert_split; 4648 int partition_horz_allowed = 4649 !force_vert_split && yss <= xss && bsize >= BLOCK_8X8; 4650 int partition_vert_allowed = 4651 !force_horz_split && xss <= yss && bsize >= BLOCK_8X8; 4652 #if CONFIG_ML_VAR_PARTITION 4653 const int use_ml_based_partitioning = 4654 sf->partition_search_type == ML_BASED_PARTITION; 4655 #endif // CONFIG_ML_VAR_PARTITION 4656 4657 (void)*tp_orig; 4658 4659 // Avoid checking for rectangular partitions for speed >= 6. 4660 if (cpi->oxcf.speed >= 6) do_rect = 0; 4661 4662 assert(num_8x8_blocks_wide_lookup[bsize] == 4663 num_8x8_blocks_high_lookup[bsize]); 4664 4665 vp9_rd_cost_init(&sum_rdc); 4666 vp9_rd_cost_reset(&best_rdc); 4667 best_rdc.rdcost = best_rd; 4668 4669 // Determine partition types in search according to the speed features. 4670 // The threshold set here has to be of square block size. 4671 if (sf->auto_min_max_partition_size) { 4672 partition_none_allowed &= 4673 (bsize <= x->max_partition_size && bsize >= x->min_partition_size); 4674 partition_horz_allowed &= 4675 ((bsize <= x->max_partition_size && bsize > x->min_partition_size) || 4676 force_horz_split); 4677 partition_vert_allowed &= 4678 ((bsize <= x->max_partition_size && bsize > x->min_partition_size) || 4679 force_vert_split); 4680 do_split &= bsize > x->min_partition_size; 4681 } 4682 if (sf->use_square_partition_only) { 4683 partition_horz_allowed &= force_horz_split; 4684 partition_vert_allowed &= force_vert_split; 4685 } 4686 4687 #if CONFIG_ML_VAR_PARTITION 4688 if (use_ml_based_partitioning) { 4689 if (partition_none_allowed || do_split) do_rect = 0; 4690 if (partition_none_allowed && do_split) { 4691 const int ml_predicted_partition = 4692 ml_predict_var_paritioning(cpi, x, bsize, mi_row, mi_col); 4693 if (ml_predicted_partition == PARTITION_NONE) do_split = 0; 4694 if (ml_predicted_partition == PARTITION_SPLIT) partition_none_allowed = 0; 4695 } 4696 } 4697 #endif // CONFIG_ML_VAR_PARTITION 4698 4699 if (!partition_none_allowed && !do_split) do_rect = 1; 4700 4701 ctx->pred_pixel_ready = 4702 !(partition_vert_allowed || partition_horz_allowed || do_split); 4703 4704 // PARTITION_NONE 4705 if (partition_none_allowed) { 4706 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, 4707 ctx); 4708 ctx->mic = *xd->mi[0]; 4709 ctx->mbmi_ext = *x->mbmi_ext; 4710 ctx->skip_txfm[0] = x->skip_txfm[0]; 4711 ctx->skip = x->skip; 4712 4713 if (this_rdc.rate != INT_MAX) { 4714 const int pl = partition_plane_context(xd, mi_row, mi_col, bsize); 4715 this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; 4716 this_rdc.rdcost = 4717 RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); 4718 if (this_rdc.rdcost < best_rdc.rdcost) { 4719 best_rdc = this_rdc; 4720 if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; 4721 4722 #if CONFIG_ML_VAR_PARTITION 4723 if (!use_ml_based_partitioning) 4724 #endif // CONFIG_ML_VAR_PARTITION 4725 { 4726 int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist; 4727 int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate; 4728 dist_breakout_thr >>= 4729 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); 4730 rate_breakout_thr *= num_pels_log2_lookup[bsize]; 4731 if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr && 4732 this_rdc.dist < dist_breakout_thr) { 4733 do_split = 0; 4734 do_rect = 0; 4735 } 4736 } 4737 } 4738 } 4739 } 4740 4741 // store estimated motion vector 4742 store_pred_mv(x, ctx); 4743 4744 // PARTITION_SPLIT 4745 if (do_split) { 4746 int pl = partition_plane_context(xd, mi_row, mi_col, bsize); 4747 sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; 4748 sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); 4749 subsize = get_subsize(bsize, PARTITION_SPLIT); 4750 for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) { 4751 const int x_idx = (i & 1) * ms; 4752 const int y_idx = (i >> 1) * ms; 4753 4754 if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) 4755 continue; 4756 load_pred_mv(x, ctx); 4757 nonrd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx, 4758 mi_col + x_idx, subsize, &this_rdc, 0, 4759 best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]); 4760 4761 if (this_rdc.rate == INT_MAX) { 4762 vp9_rd_cost_reset(&sum_rdc); 4763 } else { 4764 sum_rdc.rate += this_rdc.rate; 4765 sum_rdc.dist += this_rdc.dist; 4766 sum_rdc.rdcost += this_rdc.rdcost; 4767 } 4768 } 4769 4770 if (sum_rdc.rdcost < best_rdc.rdcost) { 4771 best_rdc = sum_rdc; 4772 pc_tree->partitioning = PARTITION_SPLIT; 4773 } else { 4774 // skip rectangular partition test when larger block size 4775 // gives better rd cost 4776 if (sf->less_rectangular_check) do_rect &= !partition_none_allowed; 4777 } 4778 } 4779 4780 // PARTITION_HORZ 4781 if (partition_horz_allowed && do_rect) { 4782 subsize = get_subsize(bsize, PARTITION_HORZ); 4783 load_pred_mv(x, ctx); 4784 pc_tree->horizontal[0].pred_pixel_ready = 1; 4785 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, 4786 &pc_tree->horizontal[0]); 4787 4788 pc_tree->horizontal[0].mic = *xd->mi[0]; 4789 pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext; 4790 pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; 4791 pc_tree->horizontal[0].skip = x->skip; 4792 4793 if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + ms < cm->mi_rows) { 4794 load_pred_mv(x, ctx); 4795 pc_tree->horizontal[1].pred_pixel_ready = 1; 4796 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + ms, mi_col, &this_rdc, 4797 subsize, &pc_tree->horizontal[1]); 4798 4799 pc_tree->horizontal[1].mic = *xd->mi[0]; 4800 pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext; 4801 pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; 4802 pc_tree->horizontal[1].skip = x->skip; 4803 4804 if (this_rdc.rate == INT_MAX) { 4805 vp9_rd_cost_reset(&sum_rdc); 4806 } else { 4807 int pl = partition_plane_context(xd, mi_row, mi_col, bsize); 4808 this_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ]; 4809 sum_rdc.rate += this_rdc.rate; 4810 sum_rdc.dist += this_rdc.dist; 4811 sum_rdc.rdcost = 4812 RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); 4813 } 4814 } 4815 4816 if (sum_rdc.rdcost < best_rdc.rdcost) { 4817 best_rdc = sum_rdc; 4818 pc_tree->partitioning = PARTITION_HORZ; 4819 } else { 4820 pred_pixel_ready_reset(pc_tree, bsize); 4821 } 4822 } 4823 4824 // PARTITION_VERT 4825 if (partition_vert_allowed && do_rect) { 4826 subsize = get_subsize(bsize, PARTITION_VERT); 4827 load_pred_mv(x, ctx); 4828 pc_tree->vertical[0].pred_pixel_ready = 1; 4829 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, 4830 &pc_tree->vertical[0]); 4831 pc_tree->vertical[0].mic = *xd->mi[0]; 4832 pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext; 4833 pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; 4834 pc_tree->vertical[0].skip = x->skip; 4835 4836 if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + ms < cm->mi_cols) { 4837 load_pred_mv(x, ctx); 4838 pc_tree->vertical[1].pred_pixel_ready = 1; 4839 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + ms, &this_rdc, 4840 subsize, &pc_tree->vertical[1]); 4841 pc_tree->vertical[1].mic = *xd->mi[0]; 4842 pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext; 4843 pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; 4844 pc_tree->vertical[1].skip = x->skip; 4845 4846 if (this_rdc.rate == INT_MAX) { 4847 vp9_rd_cost_reset(&sum_rdc); 4848 } else { 4849 int pl = partition_plane_context(xd, mi_row, mi_col, bsize); 4850 sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT]; 4851 sum_rdc.rate += this_rdc.rate; 4852 sum_rdc.dist += this_rdc.dist; 4853 sum_rdc.rdcost = 4854 RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); 4855 } 4856 } 4857 4858 if (sum_rdc.rdcost < best_rdc.rdcost) { 4859 best_rdc = sum_rdc; 4860 pc_tree->partitioning = PARTITION_VERT; 4861 } else { 4862 pred_pixel_ready_reset(pc_tree, bsize); 4863 } 4864 } 4865 4866 *rd_cost = best_rdc; 4867 4868 if (best_rdc.rate == INT_MAX) { 4869 vp9_rd_cost_reset(rd_cost); 4870 return; 4871 } 4872 4873 // update mode info array 4874 fill_mode_info_sb(cm, x, mi_row, mi_col, bsize, pc_tree); 4875 4876 if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && do_recon) { 4877 int output_enabled = (bsize == BLOCK_64X64); 4878 encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, 4879 pc_tree); 4880 } 4881 4882 if (bsize == BLOCK_64X64 && do_recon) { 4883 assert(tp_orig < *tp); 4884 assert(best_rdc.rate < INT_MAX); 4885 assert(best_rdc.dist < INT64_MAX); 4886 } else { 4887 assert(tp_orig == *tp); 4888 } 4889 } 4890 4891 static void nonrd_select_partition(VP9_COMP *cpi, ThreadData *td, 4892 TileDataEnc *tile_data, MODE_INFO **mi, 4893 TOKENEXTRA **tp, int mi_row, int mi_col, 4894 BLOCK_SIZE bsize, int output_enabled, 4895 RD_COST *rd_cost, PC_TREE *pc_tree) { 4896 VP9_COMMON *const cm = &cpi->common; 4897 TileInfo *const tile_info = &tile_data->tile_info; 4898 MACROBLOCK *const x = &td->mb; 4899 MACROBLOCKD *const xd = &x->e_mbd; 4900 const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; 4901 const int mis = cm->mi_stride; 4902 PARTITION_TYPE partition; 4903 BLOCK_SIZE subsize; 4904 RD_COST this_rdc; 4905 BLOCK_SIZE subsize_ref = 4906 (cpi->sf.adapt_partition_source_sad) ? BLOCK_8X8 : BLOCK_16X16; 4907 4908 vp9_rd_cost_reset(&this_rdc); 4909 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 4910 4911 subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4; 4912 partition = partition_lookup[bsl][subsize]; 4913 4914 if (bsize == BLOCK_32X32 && subsize == BLOCK_32X32) { 4915 x->max_partition_size = BLOCK_32X32; 4916 x->min_partition_size = BLOCK_16X16; 4917 nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost, 4918 0, INT64_MAX, pc_tree); 4919 } else if (bsize == BLOCK_32X32 && partition != PARTITION_NONE && 4920 subsize >= subsize_ref) { 4921 x->max_partition_size = BLOCK_32X32; 4922 x->min_partition_size = BLOCK_8X8; 4923 nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost, 4924 0, INT64_MAX, pc_tree); 4925 } else if (bsize == BLOCK_16X16 && partition != PARTITION_NONE) { 4926 x->max_partition_size = BLOCK_16X16; 4927 x->min_partition_size = BLOCK_8X8; 4928 nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost, 4929 0, INT64_MAX, pc_tree); 4930 } else { 4931 switch (partition) { 4932 case PARTITION_NONE: 4933 pc_tree->none.pred_pixel_ready = 1; 4934 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, 4935 &pc_tree->none); 4936 pc_tree->none.mic = *xd->mi[0]; 4937 pc_tree->none.mbmi_ext = *x->mbmi_ext; 4938 pc_tree->none.skip_txfm[0] = x->skip_txfm[0]; 4939 pc_tree->none.skip = x->skip; 4940 break; 4941 case PARTITION_VERT: 4942 pc_tree->vertical[0].pred_pixel_ready = 1; 4943 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, 4944 &pc_tree->vertical[0]); 4945 pc_tree->vertical[0].mic = *xd->mi[0]; 4946 pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext; 4947 pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; 4948 pc_tree->vertical[0].skip = x->skip; 4949 if (mi_col + hbs < cm->mi_cols) { 4950 pc_tree->vertical[1].pred_pixel_ready = 1; 4951 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, 4952 &this_rdc, subsize, &pc_tree->vertical[1]); 4953 pc_tree->vertical[1].mic = *xd->mi[0]; 4954 pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext; 4955 pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; 4956 pc_tree->vertical[1].skip = x->skip; 4957 if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && 4958 rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { 4959 rd_cost->rate += this_rdc.rate; 4960 rd_cost->dist += this_rdc.dist; 4961 } 4962 } 4963 break; 4964 case PARTITION_HORZ: 4965 pc_tree->horizontal[0].pred_pixel_ready = 1; 4966 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, 4967 &pc_tree->horizontal[0]); 4968 pc_tree->horizontal[0].mic = *xd->mi[0]; 4969 pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext; 4970 pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; 4971 pc_tree->horizontal[0].skip = x->skip; 4972 if (mi_row + hbs < cm->mi_rows) { 4973 pc_tree->horizontal[1].pred_pixel_ready = 1; 4974 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, 4975 &this_rdc, subsize, &pc_tree->horizontal[1]); 4976 pc_tree->horizontal[1].mic = *xd->mi[0]; 4977 pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext; 4978 pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; 4979 pc_tree->horizontal[1].skip = x->skip; 4980 if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && 4981 rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { 4982 rd_cost->rate += this_rdc.rate; 4983 rd_cost->dist += this_rdc.dist; 4984 } 4985 } 4986 break; 4987 default: 4988 assert(partition == PARTITION_SPLIT); 4989 subsize = get_subsize(bsize, PARTITION_SPLIT); 4990 nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, 4991 subsize, output_enabled, rd_cost, 4992 pc_tree->split[0]); 4993 nonrd_select_partition(cpi, td, tile_data, mi + hbs, tp, mi_row, 4994 mi_col + hbs, subsize, output_enabled, &this_rdc, 4995 pc_tree->split[1]); 4996 if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && 4997 rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { 4998 rd_cost->rate += this_rdc.rate; 4999 rd_cost->dist += this_rdc.dist; 5000 } 5001 nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis, tp, 5002 mi_row + hbs, mi_col, subsize, output_enabled, 5003 &this_rdc, pc_tree->split[2]); 5004 if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && 5005 rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { 5006 rd_cost->rate += this_rdc.rate; 5007 rd_cost->dist += this_rdc.dist; 5008 } 5009 nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp, 5010 mi_row + hbs, mi_col + hbs, subsize, 5011 output_enabled, &this_rdc, pc_tree->split[3]); 5012 if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && 5013 rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { 5014 rd_cost->rate += this_rdc.rate; 5015 rd_cost->dist += this_rdc.dist; 5016 } 5017 break; 5018 } 5019 } 5020 5021 if (bsize == BLOCK_64X64 && output_enabled) 5022 encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, 1, bsize, pc_tree); 5023 } 5024 5025 static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td, 5026 TileDataEnc *tile_data, MODE_INFO **mi, 5027 TOKENEXTRA **tp, int mi_row, int mi_col, 5028 BLOCK_SIZE bsize, int output_enabled, 5029 RD_COST *dummy_cost, PC_TREE *pc_tree) { 5030 VP9_COMMON *const cm = &cpi->common; 5031 TileInfo *tile_info = &tile_data->tile_info; 5032 MACROBLOCK *const x = &td->mb; 5033 MACROBLOCKD *const xd = &x->e_mbd; 5034 const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; 5035 const int mis = cm->mi_stride; 5036 PARTITION_TYPE partition; 5037 BLOCK_SIZE subsize; 5038 5039 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 5040 5041 subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4; 5042 partition = partition_lookup[bsl][subsize]; 5043 5044 if (output_enabled && bsize != BLOCK_4X4) { 5045 int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); 5046 td->counts->partition[ctx][partition]++; 5047 } 5048 5049 switch (partition) { 5050 case PARTITION_NONE: 5051 pc_tree->none.pred_pixel_ready = 1; 5052 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, 5053 subsize, &pc_tree->none); 5054 pc_tree->none.mic = *xd->mi[0]; 5055 pc_tree->none.mbmi_ext = *x->mbmi_ext; 5056 pc_tree->none.skip_txfm[0] = x->skip_txfm[0]; 5057 pc_tree->none.skip = x->skip; 5058 encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, 5059 subsize, &pc_tree->none); 5060 break; 5061 case PARTITION_VERT: 5062 pc_tree->vertical[0].pred_pixel_ready = 1; 5063 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, 5064 subsize, &pc_tree->vertical[0]); 5065 pc_tree->vertical[0].mic = *xd->mi[0]; 5066 pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext; 5067 pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; 5068 pc_tree->vertical[0].skip = x->skip; 5069 encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, 5070 subsize, &pc_tree->vertical[0]); 5071 if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { 5072 pc_tree->vertical[1].pred_pixel_ready = 1; 5073 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, dummy_cost, 5074 subsize, &pc_tree->vertical[1]); 5075 pc_tree->vertical[1].mic = *xd->mi[0]; 5076 pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext; 5077 pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; 5078 pc_tree->vertical[1].skip = x->skip; 5079 encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col + hbs, 5080 output_enabled, subsize, &pc_tree->vertical[1]); 5081 } 5082 break; 5083 case PARTITION_HORZ: 5084 pc_tree->horizontal[0].pred_pixel_ready = 1; 5085 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, 5086 subsize, &pc_tree->horizontal[0]); 5087 pc_tree->horizontal[0].mic = *xd->mi[0]; 5088 pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext; 5089 pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; 5090 pc_tree->horizontal[0].skip = x->skip; 5091 encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, 5092 subsize, &pc_tree->horizontal[0]); 5093 5094 if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { 5095 pc_tree->horizontal[1].pred_pixel_ready = 1; 5096 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, dummy_cost, 5097 subsize, &pc_tree->horizontal[1]); 5098 pc_tree->horizontal[1].mic = *xd->mi[0]; 5099 pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext; 5100 pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; 5101 pc_tree->horizontal[1].skip = x->skip; 5102 encode_b_rt(cpi, td, tile_info, tp, mi_row + hbs, mi_col, 5103 output_enabled, subsize, &pc_tree->horizontal[1]); 5104 } 5105 break; 5106 default: 5107 assert(partition == PARTITION_SPLIT); 5108 subsize = get_subsize(bsize, PARTITION_SPLIT); 5109 if (bsize == BLOCK_8X8) { 5110 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, 5111 subsize, pc_tree->leaf_split[0]); 5112 encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, 5113 subsize, pc_tree->leaf_split[0]); 5114 } else { 5115 nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, subsize, 5116 output_enabled, dummy_cost, pc_tree->split[0]); 5117 nonrd_use_partition(cpi, td, tile_data, mi + hbs, tp, mi_row, 5118 mi_col + hbs, subsize, output_enabled, dummy_cost, 5119 pc_tree->split[1]); 5120 nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis, tp, 5121 mi_row + hbs, mi_col, subsize, output_enabled, 5122 dummy_cost, pc_tree->split[2]); 5123 nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp, 5124 mi_row + hbs, mi_col + hbs, subsize, output_enabled, 5125 dummy_cost, pc_tree->split[3]); 5126 } 5127 break; 5128 } 5129 5130 if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) 5131 update_partition_context(xd, mi_row, mi_col, subsize, bsize); 5132 } 5133 5134 #if CONFIG_ML_VAR_PARTITION 5135 // Get a prediction(stored in x->est_pred) for the whole 64x64 superblock. 5136 static void get_estimated_pred(VP9_COMP *cpi, const TileInfo *const tile, 5137 MACROBLOCK *x, int mi_row, int mi_col) { 5138 VP9_COMMON *const cm = &cpi->common; 5139 const int is_key_frame = frame_is_intra_only(cm); 5140 5141 set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); 5142 5143 if (!is_key_frame) { 5144 MACROBLOCKD *xd = &x->e_mbd; 5145 MODE_INFO *mi = xd->mi[0]; 5146 YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); 5147 const YV12_BUFFER_CONFIG *yv12_g = NULL; 5148 const BLOCK_SIZE bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 + 5149 (mi_row + 4 < cm->mi_rows); 5150 int pixels_wide = 64, pixels_high = 64; 5151 unsigned int y_sad_g, y_sad_thr; 5152 unsigned int y_sad = UINT_MAX; 5153 5154 assert(yv12 != NULL); 5155 5156 if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3); 5157 if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3); 5158 5159 if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) || 5160 cpi->svc.use_gf_temporal_ref_current_layer) { 5161 // For now, GOLDEN will not be used for non-zero spatial layers, since 5162 // it may not be a temporal reference. 5163 yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); 5164 } 5165 5166 // Only compute y_sad_g (sad for golden reference) for speed < 8. 5167 if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 && 5168 (cpi->ref_frame_flags & VP9_GOLD_FLAG)) { 5169 vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, 5170 &cm->frame_refs[GOLDEN_FRAME - 1].sf); 5171 y_sad_g = cpi->fn_ptr[bsize].sdf( 5172 x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, 5173 xd->plane[0].pre[0].stride); 5174 } else { 5175 y_sad_g = UINT_MAX; 5176 } 5177 5178 if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR && 5179 cpi->rc.is_src_frame_alt_ref) { 5180 yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME); 5181 vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, 5182 &cm->frame_refs[ALTREF_FRAME - 1].sf); 5183 mi->ref_frame[0] = ALTREF_FRAME; 5184 y_sad_g = UINT_MAX; 5185 } else { 5186 vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, 5187 &cm->frame_refs[LAST_FRAME - 1].sf); 5188 mi->ref_frame[0] = LAST_FRAME; 5189 } 5190 mi->ref_frame[1] = NONE; 5191 mi->sb_type = BLOCK_64X64; 5192 mi->mv[0].as_int = 0; 5193 mi->interp_filter = BILINEAR; 5194 5195 { 5196 const MV dummy_mv = { 0, 0 }; 5197 y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col, 5198 &dummy_mv); 5199 x->sb_use_mv_part = 1; 5200 x->sb_mvcol_part = mi->mv[0].as_mv.col; 5201 x->sb_mvrow_part = mi->mv[0].as_mv.row; 5202 } 5203 5204 // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad 5205 // are close if short_circuit_low_temp_var is on. 5206 y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad; 5207 if (y_sad_g < y_sad_thr) { 5208 vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, 5209 &cm->frame_refs[GOLDEN_FRAME - 1].sf); 5210 mi->ref_frame[0] = GOLDEN_FRAME; 5211 mi->mv[0].as_int = 0; 5212 y_sad = y_sad_g; 5213 } else { 5214 x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv; 5215 } 5216 5217 set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); 5218 xd->plane[0].dst.buf = x->est_pred; 5219 xd->plane[0].dst.stride = 64; 5220 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64); 5221 } else { 5222 #if CONFIG_VP9_HIGHBITDEPTH 5223 switch (xd->bd) { 5224 case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break; 5225 case 10: 5226 memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0])); 5227 break; 5228 case 12: 5229 memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0])); 5230 break; 5231 } 5232 #else 5233 memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); 5234 #endif // CONFIG_VP9_HIGHBITDEPTH 5235 } 5236 } 5237 #endif // CONFIG_ML_VAR_PARTITION 5238 5239 static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, 5240 TileDataEnc *tile_data, int mi_row, 5241 TOKENEXTRA **tp) { 5242 SPEED_FEATURES *const sf = &cpi->sf; 5243 VP9_COMMON *const cm = &cpi->common; 5244 TileInfo *const tile_info = &tile_data->tile_info; 5245 MACROBLOCK *const x = &td->mb; 5246 MACROBLOCKD *const xd = &x->e_mbd; 5247 const int mi_col_start = tile_info->mi_col_start; 5248 const int mi_col_end = tile_info->mi_col_end; 5249 int mi_col; 5250 const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; 5251 const int num_sb_cols = 5252 get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2); 5253 int sb_col_in_tile; 5254 5255 // Initialize the left context for the new SB row 5256 memset(&xd->left_context, 0, sizeof(xd->left_context)); 5257 memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); 5258 5259 // Code each SB in the row 5260 for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end; 5261 mi_col += MI_BLOCK_SIZE, ++sb_col_in_tile) { 5262 const struct segmentation *const seg = &cm->seg; 5263 RD_COST dummy_rdc; 5264 const int idx_str = cm->mi_stride * mi_row + mi_col; 5265 MODE_INFO **mi = cm->mi_grid_visible + idx_str; 5266 PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type; 5267 BLOCK_SIZE bsize = BLOCK_64X64; 5268 int seg_skip = 0; 5269 int i; 5270 5271 (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row, 5272 sb_col_in_tile); 5273 5274 if (cpi->use_skin_detection) { 5275 vp9_compute_skin_sb(cpi, BLOCK_16X16, mi_row, mi_col); 5276 } 5277 5278 x->source_variance = UINT_MAX; 5279 for (i = 0; i < MAX_REF_FRAMES; ++i) { 5280 x->pred_mv[i].row = INT16_MAX; 5281 x->pred_mv[i].col = INT16_MAX; 5282 } 5283 vp9_rd_cost_init(&dummy_rdc); 5284 x->color_sensitivity[0] = 0; 5285 x->color_sensitivity[1] = 0; 5286 x->sb_is_skin = 0; 5287 x->skip_low_source_sad = 0; 5288 x->lowvar_highsumdiff = 0; 5289 x->content_state_sb = 0; 5290 x->zero_temp_sad_source = 0; 5291 x->sb_use_mv_part = 0; 5292 x->sb_mvcol_part = 0; 5293 x->sb_mvrow_part = 0; 5294 x->sb_pickmode_part = 0; 5295 x->arf_frame_usage = 0; 5296 x->lastgolden_frame_usage = 0; 5297 5298 if (seg->enabled) { 5299 const uint8_t *const map = 5300 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; 5301 int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col); 5302 seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP); 5303 if (seg_skip) { 5304 partition_search_type = FIXED_PARTITION; 5305 } 5306 } 5307 5308 if (cpi->compute_source_sad_onepass && cpi->sf.use_source_sad) { 5309 int shift = cpi->Source->y_stride * (mi_row << 3) + (mi_col << 3); 5310 int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); 5311 int64_t source_sad = avg_source_sad(cpi, x, shift, sb_offset2); 5312 if (sf->adapt_partition_source_sad && 5313 (cpi->oxcf.rc_mode == VPX_VBR && !cpi->rc.is_src_frame_alt_ref && 5314 source_sad > sf->adapt_partition_thresh && 5315 (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) 5316 partition_search_type = REFERENCE_PARTITION; 5317 } 5318 5319 // Set the partition type of the 64X64 block 5320 switch (partition_search_type) { 5321 case VAR_BASED_PARTITION: 5322 // TODO(jingning, marpan): The mode decision and encoding process 5323 // support both intra and inter sub8x8 block coding for RTC mode. 5324 // Tune the thresholds accordingly to use sub8x8 block coding for 5325 // coding performance improvement. 5326 choose_partitioning(cpi, tile_info, x, mi_row, mi_col); 5327 nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, 5328 BLOCK_64X64, 1, &dummy_rdc, td->pc_root); 5329 break; 5330 #if CONFIG_ML_VAR_PARTITION 5331 case ML_BASED_PARTITION: 5332 get_estimated_pred(cpi, tile_info, x, mi_row, mi_col); 5333 x->max_partition_size = BLOCK_64X64; 5334 x->min_partition_size = BLOCK_8X8; 5335 x->sb_pickmode_part = 1; 5336 nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, 5337 BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, 5338 td->pc_root); 5339 break; 5340 #endif // CONFIG_ML_VAR_PARTITION 5341 case SOURCE_VAR_BASED_PARTITION: 5342 set_source_var_based_partition(cpi, tile_info, x, mi, mi_row, mi_col); 5343 nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, 5344 BLOCK_64X64, 1, &dummy_rdc, td->pc_root); 5345 break; 5346 case FIXED_PARTITION: 5347 if (!seg_skip) bsize = sf->always_this_block_size; 5348 set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); 5349 nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, 5350 BLOCK_64X64, 1, &dummy_rdc, td->pc_root); 5351 break; 5352 default: 5353 assert(partition_search_type == REFERENCE_PARTITION); 5354 x->sb_pickmode_part = 1; 5355 set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); 5356 // Use nonrd_pick_partition on scene-cut for VBR mode. 5357 // nonrd_pick_partition does not support 4x4 partition, so avoid it 5358 // on key frame for now. 5359 if ((cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.high_source_sad && 5360 cpi->oxcf.speed < 6 && !frame_is_intra_only(cm) && 5361 (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) { 5362 // Use lower max_partition_size for low resoultions. 5363 if (cm->width <= 352 && cm->height <= 288) 5364 x->max_partition_size = BLOCK_32X32; 5365 else 5366 x->max_partition_size = BLOCK_64X64; 5367 x->min_partition_size = BLOCK_8X8; 5368 nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, 5369 BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, 5370 td->pc_root); 5371 } else { 5372 choose_partitioning(cpi, tile_info, x, mi_row, mi_col); 5373 // TODO(marpan): Seems like nonrd_select_partition does not support 5374 // 4x4 partition. Since 4x4 is used on key frame, use this switch 5375 // for now. 5376 if (frame_is_intra_only(cm)) 5377 nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, 5378 BLOCK_64X64, 1, &dummy_rdc, td->pc_root); 5379 else 5380 nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, 5381 BLOCK_64X64, 1, &dummy_rdc, td->pc_root); 5382 } 5383 5384 break; 5385 } 5386 5387 // Update ref_frame usage for inter frame if this group is ARF group. 5388 if (!cpi->rc.is_src_frame_alt_ref && !cpi->refresh_golden_frame && 5389 !cpi->refresh_alt_ref_frame && cpi->rc.alt_ref_gf_group && 5390 cpi->sf.use_altref_onepass) { 5391 int sboffset = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); 5392 if (cpi->count_arf_frame_usage != NULL) 5393 cpi->count_arf_frame_usage[sboffset] = x->arf_frame_usage; 5394 if (cpi->count_lastgolden_frame_usage != NULL) 5395 cpi->count_lastgolden_frame_usage[sboffset] = x->lastgolden_frame_usage; 5396 } 5397 5398 (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row, 5399 sb_col_in_tile, num_sb_cols); 5400 } 5401 } 5402 // end RTC play code 5403 5404 static INLINE uint32_t variance(const diff *const d) { 5405 return d->sse - (uint32_t)(((int64_t)d->sum * d->sum) >> 8); 5406 } 5407 5408 #if CONFIG_VP9_HIGHBITDEPTH 5409 static INLINE uint32_t variance_highbd(diff *const d) { 5410 const int64_t var = (int64_t)d->sse - (((int64_t)d->sum * d->sum) >> 8); 5411 return (var >= 0) ? (uint32_t)var : 0; 5412 } 5413 #endif // CONFIG_VP9_HIGHBITDEPTH 5414 5415 static int set_var_thresh_from_histogram(VP9_COMP *cpi) { 5416 const SPEED_FEATURES *const sf = &cpi->sf; 5417 const VP9_COMMON *const cm = &cpi->common; 5418 5419 const uint8_t *src = cpi->Source->y_buffer; 5420 const uint8_t *last_src = cpi->Last_Source->y_buffer; 5421 const int src_stride = cpi->Source->y_stride; 5422 const int last_stride = cpi->Last_Source->y_stride; 5423 5424 // Pick cutoff threshold 5425 const int cutoff = (VPXMIN(cm->width, cm->height) >= 720) 5426 ? (cm->MBs * VAR_HIST_LARGE_CUT_OFF / 100) 5427 : (cm->MBs * VAR_HIST_SMALL_CUT_OFF / 100); 5428 DECLARE_ALIGNED(16, int, hist[VAR_HIST_BINS]); 5429 diff *var16 = cpi->source_diff_var; 5430 5431 int sum = 0; 5432 int i, j; 5433 5434 memset(hist, 0, VAR_HIST_BINS * sizeof(hist[0])); 5435 5436 for (i = 0; i < cm->mb_rows; i++) { 5437 for (j = 0; j < cm->mb_cols; j++) { 5438 #if CONFIG_VP9_HIGHBITDEPTH 5439 if (cm->use_highbitdepth) { 5440 switch (cm->bit_depth) { 5441 case VPX_BITS_8: 5442 vpx_highbd_8_get16x16var(src, src_stride, last_src, last_stride, 5443 &var16->sse, &var16->sum); 5444 var16->var = variance(var16); 5445 break; 5446 case VPX_BITS_10: 5447 vpx_highbd_10_get16x16var(src, src_stride, last_src, last_stride, 5448 &var16->sse, &var16->sum); 5449 var16->var = variance_highbd(var16); 5450 break; 5451 default: 5452 assert(cm->bit_depth == VPX_BITS_12); 5453 vpx_highbd_12_get16x16var(src, src_stride, last_src, last_stride, 5454 &var16->sse, &var16->sum); 5455 var16->var = variance_highbd(var16); 5456 break; 5457 } 5458 } else { 5459 vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse, 5460 &var16->sum); 5461 var16->var = variance(var16); 5462 } 5463 #else 5464 vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse, 5465 &var16->sum); 5466 var16->var = variance(var16); 5467 #endif // CONFIG_VP9_HIGHBITDEPTH 5468 5469 if (var16->var >= VAR_HIST_MAX_BG_VAR) 5470 hist[VAR_HIST_BINS - 1]++; 5471 else 5472 hist[var16->var / VAR_HIST_FACTOR]++; 5473 5474 src += 16; 5475 last_src += 16; 5476 var16++; 5477 } 5478 5479 src = src - cm->mb_cols * 16 + 16 * src_stride; 5480 last_src = last_src - cm->mb_cols * 16 + 16 * last_stride; 5481 } 5482 5483 cpi->source_var_thresh = 0; 5484 5485 if (hist[VAR_HIST_BINS - 1] < cutoff) { 5486 for (i = 0; i < VAR_HIST_BINS - 1; i++) { 5487 sum += hist[i]; 5488 5489 if (sum > cutoff) { 5490 cpi->source_var_thresh = (i + 1) * VAR_HIST_FACTOR; 5491 return 0; 5492 } 5493 } 5494 } 5495 5496 return sf->search_type_check_frequency; 5497 } 5498 5499 static void source_var_based_partition_search_method(VP9_COMP *cpi) { 5500 VP9_COMMON *const cm = &cpi->common; 5501 SPEED_FEATURES *const sf = &cpi->sf; 5502 5503 if (cm->frame_type == KEY_FRAME) { 5504 // For key frame, use SEARCH_PARTITION. 5505 sf->partition_search_type = SEARCH_PARTITION; 5506 } else if (cm->intra_only) { 5507 sf->partition_search_type = FIXED_PARTITION; 5508 } else { 5509 if (cm->last_width != cm->width || cm->last_height != cm->height) { 5510 if (cpi->source_diff_var) vpx_free(cpi->source_diff_var); 5511 5512 CHECK_MEM_ERROR(cm, cpi->source_diff_var, 5513 vpx_calloc(cm->MBs, sizeof(diff))); 5514 } 5515 5516 if (!cpi->frames_till_next_var_check) 5517 cpi->frames_till_next_var_check = set_var_thresh_from_histogram(cpi); 5518 5519 if (cpi->frames_till_next_var_check > 0) { 5520 sf->partition_search_type = FIXED_PARTITION; 5521 cpi->frames_till_next_var_check--; 5522 } 5523 } 5524 } 5525 5526 static int get_skip_encode_frame(const VP9_COMMON *cm, ThreadData *const td) { 5527 unsigned int intra_count = 0, inter_count = 0; 5528 int j; 5529 5530 for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) { 5531 intra_count += td->counts->intra_inter[j][0]; 5532 inter_count += td->counts->intra_inter[j][1]; 5533 } 5534 5535 return (intra_count << 2) < inter_count && cm->frame_type != KEY_FRAME && 5536 cm->show_frame; 5537 } 5538 5539 void vp9_init_tile_data(VP9_COMP *cpi) { 5540 VP9_COMMON *const cm = &cpi->common; 5541 const int tile_cols = 1 << cm->log2_tile_cols; 5542 const int tile_rows = 1 << cm->log2_tile_rows; 5543 int tile_col, tile_row; 5544 TOKENEXTRA *pre_tok = cpi->tile_tok[0][0]; 5545 TOKENLIST *tplist = cpi->tplist[0][0]; 5546 int tile_tok = 0; 5547 int tplist_count = 0; 5548 5549 if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) { 5550 if (cpi->tile_data != NULL) vpx_free(cpi->tile_data); 5551 CHECK_MEM_ERROR( 5552 cm, cpi->tile_data, 5553 vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data))); 5554 cpi->allocated_tiles = tile_cols * tile_rows; 5555 5556 for (tile_row = 0; tile_row < tile_rows; ++tile_row) 5557 for (tile_col = 0; tile_col < tile_cols; ++tile_col) { 5558 TileDataEnc *tile_data = 5559 &cpi->tile_data[tile_row * tile_cols + tile_col]; 5560 int i, j; 5561 for (i = 0; i < BLOCK_SIZES; ++i) { 5562 for (j = 0; j < MAX_MODES; ++j) { 5563 tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT; 5564 #if CONFIG_CONSISTENT_RECODE 5565 tile_data->thresh_freq_fact_prev[i][j] = RD_THRESH_INIT_FACT; 5566 #endif 5567 tile_data->mode_map[i][j] = j; 5568 } 5569 } 5570 #if CONFIG_MULTITHREAD 5571 tile_data->row_base_thresh_freq_fact = NULL; 5572 #endif 5573 } 5574 } 5575 5576 for (tile_row = 0; tile_row < tile_rows; ++tile_row) { 5577 for (tile_col = 0; tile_col < tile_cols; ++tile_col) { 5578 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; 5579 TileInfo *tile_info = &this_tile->tile_info; 5580 if (cpi->sf.adaptive_rd_thresh_row_mt && 5581 this_tile->row_base_thresh_freq_fact == NULL) 5582 vp9_row_mt_alloc_rd_thresh(cpi, this_tile); 5583 vp9_tile_init(tile_info, cm, tile_row, tile_col); 5584 5585 cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok; 5586 pre_tok = cpi->tile_tok[tile_row][tile_col]; 5587 tile_tok = allocated_tokens(*tile_info); 5588 5589 cpi->tplist[tile_row][tile_col] = tplist + tplist_count; 5590 tplist = cpi->tplist[tile_row][tile_col]; 5591 tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2); 5592 } 5593 } 5594 } 5595 5596 void vp9_encode_sb_row(VP9_COMP *cpi, ThreadData *td, int tile_row, 5597 int tile_col, int mi_row) { 5598 VP9_COMMON *const cm = &cpi->common; 5599 const int tile_cols = 1 << cm->log2_tile_cols; 5600 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; 5601 const TileInfo *const tile_info = &this_tile->tile_info; 5602 TOKENEXTRA *tok = NULL; 5603 int tile_sb_row; 5604 int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 1) >> 1; 5605 5606 tile_sb_row = mi_cols_aligned_to_sb(mi_row - tile_info->mi_row_start) >> 5607 MI_BLOCK_SIZE_LOG2; 5608 get_start_tok(cpi, tile_row, tile_col, mi_row, &tok); 5609 cpi->tplist[tile_row][tile_col][tile_sb_row].start = tok; 5610 5611 if (cpi->sf.use_nonrd_pick_mode) 5612 encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok); 5613 else 5614 encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok); 5615 5616 cpi->tplist[tile_row][tile_col][tile_sb_row].stop = tok; 5617 cpi->tplist[tile_row][tile_col][tile_sb_row].count = 5618 (unsigned int)(cpi->tplist[tile_row][tile_col][tile_sb_row].stop - 5619 cpi->tplist[tile_row][tile_col][tile_sb_row].start); 5620 assert(tok - cpi->tplist[tile_row][tile_col][tile_sb_row].start <= 5621 get_token_alloc(MI_BLOCK_SIZE >> 1, tile_mb_cols)); 5622 5623 (void)tile_mb_cols; 5624 } 5625 5626 void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row, 5627 int tile_col) { 5628 VP9_COMMON *const cm = &cpi->common; 5629 const int tile_cols = 1 << cm->log2_tile_cols; 5630 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; 5631 const TileInfo *const tile_info = &this_tile->tile_info; 5632 const int mi_row_start = tile_info->mi_row_start; 5633 const int mi_row_end = tile_info->mi_row_end; 5634 int mi_row; 5635 5636 for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE) 5637 vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row); 5638 } 5639 5640 static void encode_tiles(VP9_COMP *cpi) { 5641 VP9_COMMON *const cm = &cpi->common; 5642 const int tile_cols = 1 << cm->log2_tile_cols; 5643 const int tile_rows = 1 << cm->log2_tile_rows; 5644 int tile_col, tile_row; 5645 5646 vp9_init_tile_data(cpi); 5647 5648 for (tile_row = 0; tile_row < tile_rows; ++tile_row) 5649 for (tile_col = 0; tile_col < tile_cols; ++tile_col) 5650 vp9_encode_tile(cpi, &cpi->td, tile_row, tile_col); 5651 } 5652 5653 #if CONFIG_FP_MB_STATS 5654 static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats, 5655 VP9_COMMON *cm, uint8_t **this_frame_mb_stats) { 5656 uint8_t *mb_stats_in = firstpass_mb_stats->mb_stats_start + 5657 cm->current_video_frame * cm->MBs * sizeof(uint8_t); 5658 5659 if (mb_stats_in > firstpass_mb_stats->mb_stats_end) return EOF; 5660 5661 *this_frame_mb_stats = mb_stats_in; 5662 5663 return 1; 5664 } 5665 #endif 5666 5667 static void encode_frame_internal(VP9_COMP *cpi) { 5668 SPEED_FEATURES *const sf = &cpi->sf; 5669 ThreadData *const td = &cpi->td; 5670 MACROBLOCK *const x = &td->mb; 5671 VP9_COMMON *const cm = &cpi->common; 5672 MACROBLOCKD *const xd = &x->e_mbd; 5673 const int gf_group_index = cpi->twopass.gf_group.index; 5674 5675 xd->mi = cm->mi_grid_visible; 5676 xd->mi[0] = cm->mi; 5677 vp9_zero(*td->counts); 5678 vp9_zero(cpi->td.rd_counts); 5679 5680 xd->lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 && 5681 cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; 5682 5683 #if CONFIG_VP9_HIGHBITDEPTH 5684 if (cm->use_highbitdepth) 5685 x->fwd_txfm4x4 = xd->lossless ? vp9_highbd_fwht4x4 : vpx_highbd_fdct4x4; 5686 else 5687 x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4; 5688 x->highbd_inv_txfm_add = 5689 xd->lossless ? vp9_highbd_iwht4x4_add : vp9_highbd_idct4x4_add; 5690 #else 5691 x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4; 5692 #endif // CONFIG_VP9_HIGHBITDEPTH 5693 x->inv_txfm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; 5694 #if CONFIG_CONSISTENT_RECODE 5695 x->optimize = sf->optimize_coefficients == 1 && cpi->oxcf.pass != 1; 5696 #endif 5697 if (xd->lossless) x->optimize = 0; 5698 x->sharpness = cpi->oxcf.sharpness; 5699 x->adjust_rdmult_by_segment = (cpi->oxcf.aq_mode == VARIANCE_AQ); 5700 5701 cm->tx_mode = select_tx_mode(cpi, xd); 5702 5703 vp9_frame_init_quantizer(cpi); 5704 5705 vp9_initialize_rd_consts(cpi); 5706 vp9_initialize_me_consts(cpi, x, cm->base_qindex); 5707 init_encode_frame_mb_context(cpi); 5708 cm->use_prev_frame_mvs = 5709 !cm->error_resilient_mode && cm->width == cm->last_width && 5710 cm->height == cm->last_height && !cm->intra_only && cm->last_show_frame; 5711 // Special case: set prev_mi to NULL when the previous mode info 5712 // context cannot be used. 5713 cm->prev_mi = 5714 cm->use_prev_frame_mvs ? cm->prev_mip + cm->mi_stride + 1 : NULL; 5715 5716 x->quant_fp = cpi->sf.use_quant_fp; 5717 vp9_zero(x->skip_txfm); 5718 if (sf->use_nonrd_pick_mode) { 5719 // Initialize internal buffer pointers for rtc coding, where non-RD 5720 // mode decision is used and hence no buffer pointer swap needed. 5721 int i; 5722 struct macroblock_plane *const p = x->plane; 5723 struct macroblockd_plane *const pd = xd->plane; 5724 PICK_MODE_CONTEXT *ctx = &cpi->td.pc_root->none; 5725 5726 for (i = 0; i < MAX_MB_PLANE; ++i) { 5727 p[i].coeff = ctx->coeff_pbuf[i][0]; 5728 p[i].qcoeff = ctx->qcoeff_pbuf[i][0]; 5729 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0]; 5730 p[i].eobs = ctx->eobs_pbuf[i][0]; 5731 } 5732 vp9_zero(x->zcoeff_blk); 5733 5734 if (cm->frame_type != KEY_FRAME && cpi->rc.frames_since_golden == 0 && 5735 !(cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR) && 5736 !cpi->use_svc) 5737 cpi->ref_frame_flags &= (~VP9_GOLD_FLAG); 5738 5739 if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION) 5740 source_var_based_partition_search_method(cpi); 5741 } else if (gf_group_index && gf_group_index < MAX_ARF_GOP_SIZE && 5742 cpi->sf.enable_tpl_model) { 5743 TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index]; 5744 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; 5745 5746 int tpl_stride = tpl_frame->stride; 5747 int64_t intra_cost_base = 0; 5748 int64_t mc_dep_cost_base = 0; 5749 int row, col; 5750 5751 for (row = 0; row < cm->mi_rows && tpl_frame->is_valid; ++row) { 5752 for (col = 0; col < cm->mi_cols; ++col) { 5753 TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col]; 5754 intra_cost_base += this_stats->intra_cost; 5755 mc_dep_cost_base += this_stats->mc_dep_cost; 5756 } 5757 } 5758 5759 vpx_clear_system_state(); 5760 5761 if (tpl_frame->is_valid) 5762 cpi->rd.r0 = (double)intra_cost_base / mc_dep_cost_base; 5763 } 5764 5765 { 5766 struct vpx_usec_timer emr_timer; 5767 vpx_usec_timer_start(&emr_timer); 5768 5769 #if CONFIG_FP_MB_STATS 5770 if (cpi->use_fp_mb_stats) { 5771 input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm, 5772 &cpi->twopass.this_frame_mb_stats); 5773 } 5774 #endif 5775 5776 if (!cpi->row_mt) { 5777 cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read_dummy; 5778 cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write_dummy; 5779 // If allowed, encoding tiles in parallel with one thread handling one 5780 // tile when row based multi-threading is disabled. 5781 if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1) 5782 vp9_encode_tiles_mt(cpi); 5783 else 5784 encode_tiles(cpi); 5785 } else { 5786 cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read; 5787 cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write; 5788 vp9_encode_tiles_row_mt(cpi); 5789 } 5790 5791 vpx_usec_timer_mark(&emr_timer); 5792 cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer); 5793 } 5794 5795 sf->skip_encode_frame = 5796 sf->skip_encode_sb ? get_skip_encode_frame(cm, td) : 0; 5797 5798 #if 0 5799 // Keep record of the total distortion this time around for future use 5800 cpi->last_frame_distortion = cpi->frame_distortion; 5801 #endif 5802 } 5803 5804 static INTERP_FILTER get_interp_filter( 5805 const int64_t threshes[SWITCHABLE_FILTER_CONTEXTS], int is_alt_ref) { 5806 if (!is_alt_ref && threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP] && 5807 threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP_SHARP] && 5808 threshes[EIGHTTAP_SMOOTH] > threshes[SWITCHABLE - 1]) { 5809 return EIGHTTAP_SMOOTH; 5810 } else if (threshes[EIGHTTAP_SHARP] > threshes[EIGHTTAP] && 5811 threshes[EIGHTTAP_SHARP] > threshes[SWITCHABLE - 1]) { 5812 return EIGHTTAP_SHARP; 5813 } else if (threshes[EIGHTTAP] > threshes[SWITCHABLE - 1]) { 5814 return EIGHTTAP; 5815 } else { 5816 return SWITCHABLE; 5817 } 5818 } 5819 5820 static int compute_frame_aq_offset(struct VP9_COMP *cpi) { 5821 VP9_COMMON *const cm = &cpi->common; 5822 MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible; 5823 struct segmentation *const seg = &cm->seg; 5824 5825 int mi_row, mi_col; 5826 int sum_delta = 0; 5827 int map_index = 0; 5828 int qdelta_index; 5829 int segment_id; 5830 5831 for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { 5832 MODE_INFO **mi_8x8 = mi_8x8_ptr; 5833 for (mi_col = 0; mi_col < cm->mi_cols; mi_col++, mi_8x8++) { 5834 segment_id = mi_8x8[0]->segment_id; 5835 qdelta_index = get_segdata(seg, segment_id, SEG_LVL_ALT_Q); 5836 sum_delta += qdelta_index; 5837 map_index++; 5838 } 5839 mi_8x8_ptr += cm->mi_stride; 5840 } 5841 5842 return sum_delta / (cm->mi_rows * cm->mi_cols); 5843 } 5844 5845 #if CONFIG_CONSISTENT_RECODE 5846 static void restore_encode_params(VP9_COMP *cpi) { 5847 VP9_COMMON *const cm = &cpi->common; 5848 const int tile_cols = 1 << cm->log2_tile_cols; 5849 const int tile_rows = 1 << cm->log2_tile_rows; 5850 int tile_col, tile_row; 5851 int i, j; 5852 RD_OPT *rd_opt = &cpi->rd; 5853 for (i = 0; i < MAX_REF_FRAMES; i++) { 5854 for (j = 0; j < REFERENCE_MODES; j++) 5855 rd_opt->prediction_type_threshes[i][j] = 5856 rd_opt->prediction_type_threshes_prev[i][j]; 5857 5858 for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++) 5859 rd_opt->filter_threshes[i][j] = rd_opt->filter_threshes_prev[i][j]; 5860 } 5861 5862 if (cpi->tile_data != NULL) { 5863 for (tile_row = 0; tile_row < tile_rows; ++tile_row) 5864 for (tile_col = 0; tile_col < tile_cols; ++tile_col) { 5865 TileDataEnc *tile_data = 5866 &cpi->tile_data[tile_row * tile_cols + tile_col]; 5867 for (i = 0; i < BLOCK_SIZES; ++i) { 5868 for (j = 0; j < MAX_MODES; ++j) { 5869 tile_data->thresh_freq_fact[i][j] = 5870 tile_data->thresh_freq_fact_prev[i][j]; 5871 } 5872 } 5873 } 5874 } 5875 5876 cm->interp_filter = cpi->sf.default_interp_filter; 5877 } 5878 #endif 5879 5880 void vp9_encode_frame(VP9_COMP *cpi) { 5881 VP9_COMMON *const cm = &cpi->common; 5882 5883 #if CONFIG_CONSISTENT_RECODE 5884 restore_encode_params(cpi); 5885 #endif 5886 5887 // In the longer term the encoder should be generalized to match the 5888 // decoder such that we allow compound where one of the 3 buffers has a 5889 // different sign bias and that buffer is then the fixed ref. However, this 5890 // requires further work in the rd loop. For now the only supported encoder 5891 // side behavior is where the ALT ref buffer has opposite sign bias to 5892 // the other two. 5893 if (!frame_is_intra_only(cm)) { 5894 if (vp9_compound_reference_allowed(cm)) { 5895 cpi->allow_comp_inter_inter = 1; 5896 vp9_setup_compound_reference_mode(cm); 5897 } else { 5898 cpi->allow_comp_inter_inter = 0; 5899 } 5900 } 5901 5902 if (cpi->sf.frame_parameter_update) { 5903 int i; 5904 RD_OPT *const rd_opt = &cpi->rd; 5905 FRAME_COUNTS *counts = cpi->td.counts; 5906 RD_COUNTS *const rdc = &cpi->td.rd_counts; 5907 5908 // This code does a single RD pass over the whole frame assuming 5909 // either compound, single or hybrid prediction as per whatever has 5910 // worked best for that type of frame in the past. 5911 // It also predicts whether another coding mode would have worked 5912 // better than this coding mode. If that is the case, it remembers 5913 // that for subsequent frames. 5914 // It also does the same analysis for transform size selection. 5915 const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi); 5916 int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type]; 5917 int64_t *const filter_thrs = rd_opt->filter_threshes[frame_type]; 5918 const int is_alt_ref = frame_type == ALTREF_FRAME; 5919 5920 /* prediction (compound, single or hybrid) mode selection */ 5921 if (is_alt_ref || !cpi->allow_comp_inter_inter) 5922 cm->reference_mode = SINGLE_REFERENCE; 5923 else if (mode_thrs[COMPOUND_REFERENCE] > mode_thrs[SINGLE_REFERENCE] && 5924 mode_thrs[COMPOUND_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT] && 5925 check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100) 5926 cm->reference_mode = COMPOUND_REFERENCE; 5927 else if (mode_thrs[SINGLE_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT]) 5928 cm->reference_mode = SINGLE_REFERENCE; 5929 else 5930 cm->reference_mode = REFERENCE_MODE_SELECT; 5931 5932 if (cm->interp_filter == SWITCHABLE) 5933 cm->interp_filter = get_interp_filter(filter_thrs, is_alt_ref); 5934 5935 encode_frame_internal(cpi); 5936 5937 for (i = 0; i < REFERENCE_MODES; ++i) 5938 mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2; 5939 5940 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) 5941 filter_thrs[i] = (filter_thrs[i] + rdc->filter_diff[i] / cm->MBs) / 2; 5942 5943 if (cm->reference_mode == REFERENCE_MODE_SELECT) { 5944 int single_count_zero = 0; 5945 int comp_count_zero = 0; 5946 5947 for (i = 0; i < COMP_INTER_CONTEXTS; i++) { 5948 single_count_zero += counts->comp_inter[i][0]; 5949 comp_count_zero += counts->comp_inter[i][1]; 5950 } 5951 5952 if (comp_count_zero == 0) { 5953 cm->reference_mode = SINGLE_REFERENCE; 5954 vp9_zero(counts->comp_inter); 5955 } else if (single_count_zero == 0) { 5956 cm->reference_mode = COMPOUND_REFERENCE; 5957 vp9_zero(counts->comp_inter); 5958 } 5959 } 5960 5961 if (cm->tx_mode == TX_MODE_SELECT) { 5962 int count4x4 = 0; 5963 int count8x8_lp = 0, count8x8_8x8p = 0; 5964 int count16x16_16x16p = 0, count16x16_lp = 0; 5965 int count32x32 = 0; 5966 5967 for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { 5968 count4x4 += counts->tx.p32x32[i][TX_4X4]; 5969 count4x4 += counts->tx.p16x16[i][TX_4X4]; 5970 count4x4 += counts->tx.p8x8[i][TX_4X4]; 5971 5972 count8x8_lp += counts->tx.p32x32[i][TX_8X8]; 5973 count8x8_lp += counts->tx.p16x16[i][TX_8X8]; 5974 count8x8_8x8p += counts->tx.p8x8[i][TX_8X8]; 5975 5976 count16x16_16x16p += counts->tx.p16x16[i][TX_16X16]; 5977 count16x16_lp += counts->tx.p32x32[i][TX_16X16]; 5978 count32x32 += counts->tx.p32x32[i][TX_32X32]; 5979 } 5980 if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 && 5981 count32x32 == 0) { 5982 cm->tx_mode = ALLOW_8X8; 5983 reset_skip_tx_size(cm, TX_8X8); 5984 } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 && 5985 count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) { 5986 cm->tx_mode = ONLY_4X4; 5987 reset_skip_tx_size(cm, TX_4X4); 5988 } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) { 5989 cm->tx_mode = ALLOW_32X32; 5990 } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) { 5991 cm->tx_mode = ALLOW_16X16; 5992 reset_skip_tx_size(cm, TX_16X16); 5993 } 5994 } 5995 } else { 5996 FRAME_COUNTS *counts = cpi->td.counts; 5997 cm->reference_mode = SINGLE_REFERENCE; 5998 if (cpi->allow_comp_inter_inter && cpi->sf.use_compound_nonrd_pickmode && 5999 cpi->rc.alt_ref_gf_group && !cpi->rc.is_src_frame_alt_ref && 6000 cm->frame_type != KEY_FRAME) 6001 cm->reference_mode = REFERENCE_MODE_SELECT; 6002 6003 encode_frame_internal(cpi); 6004 6005 if (cm->reference_mode == REFERENCE_MODE_SELECT) { 6006 int single_count_zero = 0; 6007 int comp_count_zero = 0; 6008 int i; 6009 for (i = 0; i < COMP_INTER_CONTEXTS; i++) { 6010 single_count_zero += counts->comp_inter[i][0]; 6011 comp_count_zero += counts->comp_inter[i][1]; 6012 } 6013 if (comp_count_zero == 0) { 6014 cm->reference_mode = SINGLE_REFERENCE; 6015 vp9_zero(counts->comp_inter); 6016 } else if (single_count_zero == 0) { 6017 cm->reference_mode = COMPOUND_REFERENCE; 6018 vp9_zero(counts->comp_inter); 6019 } 6020 } 6021 } 6022 6023 // If segmented AQ is enabled compute the average AQ weighting. 6024 if (cm->seg.enabled && (cpi->oxcf.aq_mode != NO_AQ) && 6025 (cm->seg.update_map || cm->seg.update_data)) { 6026 cm->seg.aq_av_offset = compute_frame_aq_offset(cpi); 6027 } 6028 } 6029 6030 static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) { 6031 const PREDICTION_MODE y_mode = mi->mode; 6032 const PREDICTION_MODE uv_mode = mi->uv_mode; 6033 const BLOCK_SIZE bsize = mi->sb_type; 6034 6035 if (bsize < BLOCK_8X8) { 6036 int idx, idy; 6037 const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; 6038 const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; 6039 for (idy = 0; idy < 2; idy += num_4x4_h) 6040 for (idx = 0; idx < 2; idx += num_4x4_w) 6041 ++counts->y_mode[0][mi->bmi[idy * 2 + idx].as_mode]; 6042 } else { 6043 ++counts->y_mode[size_group_lookup[bsize]][y_mode]; 6044 } 6045 6046 ++counts->uv_mode[y_mode][uv_mode]; 6047 } 6048 6049 static void update_zeromv_cnt(VP9_COMP *const cpi, const MODE_INFO *const mi, 6050 int mi_row, int mi_col, BLOCK_SIZE bsize) { 6051 const VP9_COMMON *const cm = &cpi->common; 6052 MV mv = mi->mv[0].as_mv; 6053 const int bw = num_8x8_blocks_wide_lookup[bsize]; 6054 const int bh = num_8x8_blocks_high_lookup[bsize]; 6055 const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); 6056 const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); 6057 const int block_index = mi_row * cm->mi_cols + mi_col; 6058 int x, y; 6059 for (y = 0; y < ymis; y++) 6060 for (x = 0; x < xmis; x++) { 6061 int map_offset = block_index + y * cm->mi_cols + x; 6062 if (mi->ref_frame[0] == LAST_FRAME && is_inter_block(mi) && 6063 mi->segment_id <= CR_SEGMENT_ID_BOOST2) { 6064 if (abs(mv.row) < 8 && abs(mv.col) < 8) { 6065 if (cpi->consec_zero_mv[map_offset] < 255) 6066 cpi->consec_zero_mv[map_offset]++; 6067 } else { 6068 cpi->consec_zero_mv[map_offset] = 0; 6069 } 6070 } 6071 } 6072 } 6073 6074 static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, 6075 int output_enabled, int mi_row, int mi_col, 6076 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { 6077 VP9_COMMON *const cm = &cpi->common; 6078 MACROBLOCK *const x = &td->mb; 6079 MACROBLOCKD *const xd = &x->e_mbd; 6080 MODE_INFO *mi = xd->mi[0]; 6081 const int seg_skip = 6082 segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP); 6083 x->skip_recode = !x->select_tx_size && mi->sb_type >= BLOCK_8X8 && 6084 cpi->oxcf.aq_mode != COMPLEXITY_AQ && 6085 cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ && 6086 cpi->sf.allow_skip_recode; 6087 6088 if (!x->skip_recode && !cpi->sf.use_nonrd_pick_mode) 6089 memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); 6090 6091 x->skip_optimize = ctx->is_coded; 6092 ctx->is_coded = 1; 6093 x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct; 6094 x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame && 6095 x->q_index < QIDX_SKIP_THRESH); 6096 6097 if (x->skip_encode) return; 6098 6099 if (!is_inter_block(mi)) { 6100 int plane; 6101 #if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH 6102 if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && 6103 (xd->above_mi == NULL || xd->left_mi == NULL) && 6104 need_top_left[mi->uv_mode]) 6105 assert(0); 6106 #endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH 6107 mi->skip = 1; 6108 for (plane = 0; plane < MAX_MB_PLANE; ++plane) 6109 vp9_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane, 1); 6110 if (output_enabled) sum_intra_stats(td->counts, mi); 6111 vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip, 6112 VPXMAX(bsize, BLOCK_8X8)); 6113 } else { 6114 int ref; 6115 const int is_compound = has_second_ref(mi); 6116 set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); 6117 for (ref = 0; ref < 1 + is_compound; ++ref) { 6118 YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mi->ref_frame[ref]); 6119 assert(cfg != NULL); 6120 vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col, 6121 &xd->block_refs[ref]->sf); 6122 } 6123 if (!(cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready) || seg_skip) 6124 vp9_build_inter_predictors_sby(xd, mi_row, mi_col, 6125 VPXMAX(bsize, BLOCK_8X8)); 6126 6127 vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, 6128 VPXMAX(bsize, BLOCK_8X8)); 6129 6130 vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8)); 6131 vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip, 6132 VPXMAX(bsize, BLOCK_8X8)); 6133 } 6134 6135 if (seg_skip) { 6136 assert(mi->skip); 6137 } 6138 6139 if (output_enabled) { 6140 if (cm->tx_mode == TX_MODE_SELECT && mi->sb_type >= BLOCK_8X8 && 6141 !(is_inter_block(mi) && mi->skip)) { 6142 ++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd), 6143 &td->counts->tx)[mi->tx_size]; 6144 } else { 6145 // The new intra coding scheme requires no change of transform size 6146 if (is_inter_block(mi)) { 6147 mi->tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode], 6148 max_txsize_lookup[bsize]); 6149 } else { 6150 mi->tx_size = (bsize >= BLOCK_8X8) ? mi->tx_size : TX_4X4; 6151 } 6152 } 6153 6154 ++td->counts->tx.tx_totals[mi->tx_size]; 6155 ++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])]; 6156 if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) 6157 vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize); 6158 if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0 && 6159 (!cpi->use_svc || 6160 (cpi->use_svc && 6161 !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame && 6162 cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1))) 6163 update_zeromv_cnt(cpi, mi, mi_row, mi_col, bsize); 6164 } 6165 } 6166