1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <limits.h> 13 14 #include "av1/encoder/encoder.h" 15 #include "av1/encoder/speed_features.h" 16 #include "av1/encoder/rdopt.h" 17 18 #include "aom_dsp/aom_dsp_common.h" 19 20 #define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method 21 // Max speed setting for tx domain evaluation 22 #define MAX_TX_DOMAIN_EVAL_SPEED 5 23 static MESH_PATTERN 24 good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = { 25 { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } }, 26 { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } }, 27 { { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } }, 28 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } }, 29 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } }, 30 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } }, 31 }; 32 static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] = { 33 50, 50, 25, 15, 5, 1 34 }; 35 36 // TODO(huisu (at) google.com): These settings are pretty relaxed, tune them for 37 // each speed setting 38 static MESH_PATTERN intrabc_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = { 39 { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } }, 40 { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } }, 41 { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } }, 42 { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } }, 43 { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } }, 44 { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } }, 45 }; 46 static uint8_t intrabc_max_mesh_pct[MAX_MESH_SPEED + 1] = { 100, 100, 100, 47 25, 25, 10 }; 48 49 // Threshold values to be used for pruning the txfm_domain_distortion 50 // based on block MSE 51 // TODO(any): Experiment the threshold logic based on variance metric 52 static unsigned int tx_domain_dist_thresholds[MAX_TX_DOMAIN_EVAL_SPEED + 1] = { 53 UINT_MAX, 162754, 22026, 22026, 22026, 0 54 }; 55 // Threshold values to be used for disabling coeff RD-optimization 56 // based on block MSE 57 // TODO(any): Experiment the threshold logic based on variance metric 58 static unsigned int coeff_opt_dist_thresholds[5] = { UINT_MAX, 162754, 162754, 59 22026, 22026 }; 60 // scaling values to be used for gating wedge/compound segment based on best 61 // approximate rd 62 static int comp_type_rd_threshold_mul[3] = { 1, 11, 12 }; 63 static int comp_type_rd_threshold_div[3] = { 3, 16, 16 }; 64 65 // Intra only frames, golden frames (except alt ref overlays) and 66 // alt ref frames tend to be coded at a higher than ambient quality 67 static int frame_is_boosted(const AV1_COMP *cpi) { 68 return frame_is_kf_gf_arf(cpi); 69 } 70 71 // Sets a partition size down to which the auto partition code will always 72 // search (can go lower), based on the image dimensions. The logic here 73 // is that the extent to which ringing artefacts are offensive, depends 74 // partly on the screen area that over which they propogate. Propogation is 75 // limited by transform block size but the screen area take up by a given block 76 // size will be larger for a small image format stretched to full screen. 77 static BLOCK_SIZE set_partition_min_limit(const AV1_COMMON *const cm) { 78 unsigned int screen_area = (cm->width * cm->height); 79 80 // Select block size based on image format size. 81 if (screen_area < 1280 * 720) { 82 // Formats smaller in area than 720P 83 return BLOCK_4X4; 84 } else if (screen_area < 1920 * 1080) { 85 // Format >= 720P and < 1080P 86 return BLOCK_8X8; 87 } else { 88 // Formats 1080P and up 89 return BLOCK_16X16; 90 } 91 } 92 93 static void set_good_speed_feature_framesize_dependent( 94 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) { 95 const AV1_COMMON *const cm = &cpi->common; 96 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; 97 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; 98 99 if (is_480p_or_larger) { 100 sf->use_square_partition_only_threshold = BLOCK_128X128; 101 if (is_720p_or_larger) 102 sf->auto_max_partition_based_on_simple_motion = ADAPT_PRED; 103 else 104 sf->auto_max_partition_based_on_simple_motion = RELAXED_PRED; 105 } else { 106 sf->use_square_partition_only_threshold = BLOCK_64X64; 107 sf->auto_max_partition_based_on_simple_motion = DIRECT_PRED; 108 } 109 110 // TODO(huisu (at) google.com): train models for 720P and above. 111 if (!is_720p_or_larger) { 112 sf->ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8 113 sf->ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16 114 sf->ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32 115 sf->ml_partition_search_breakout_thresh[3] = 500; // BLOCK_64X64 116 sf->ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128 117 } 118 119 if (is_720p_or_larger && speed >= CONFIG_2PASS_PARTITION_SEARCH_LVL_START && 120 speed < CONFIG_2PASS_PARTITION_SEARCH_LVL_END) { 121 sf->two_pass_partition_search = 1; 122 } 123 124 if (speed >= 1) { 125 if (is_720p_or_larger) { 126 sf->use_square_partition_only_threshold = BLOCK_128X128; 127 } else if (is_480p_or_larger) { 128 sf->use_square_partition_only_threshold = BLOCK_64X64; 129 } else { 130 sf->use_square_partition_only_threshold = BLOCK_32X32; 131 } 132 133 if (!is_720p_or_larger) { 134 sf->ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8 135 sf->ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16 136 sf->ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32 137 sf->ml_partition_search_breakout_thresh[3] = 300; // BLOCK_64X64 138 sf->ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128 139 140 sf->firstpass_simple_motion_search_early_term = 1; 141 } 142 } 143 144 if (speed >= 2) { 145 if (is_720p_or_larger) { 146 sf->use_square_partition_only_threshold = BLOCK_64X64; 147 } else if (is_480p_or_larger) { 148 sf->use_square_partition_only_threshold = BLOCK_32X32; 149 } else { 150 // TODO(chiyotsai (at) google.com): Setting the threshold to BLOCK_16X16 incurs 151 // a large loss (about 0.584%). Try increasing the threshold on boosted 152 // frame and see if it improves the performance. 153 sf->use_square_partition_only_threshold = BLOCK_32X32; 154 } 155 156 if (is_720p_or_larger) { 157 sf->adaptive_pred_interp_filter = 0; 158 sf->partition_search_breakout_dist_thr = (1 << 24); 159 sf->partition_search_breakout_rate_thr = 120; 160 } else { 161 sf->partition_search_breakout_dist_thr = (1 << 22); 162 sf->partition_search_breakout_rate_thr = 100; 163 } 164 sf->rd_auto_partition_min_limit = set_partition_min_limit(cm); 165 } 166 167 if (speed >= 3) { 168 if (is_720p_or_larger) { 169 sf->partition_search_breakout_dist_thr = (1 << 25); 170 sf->partition_search_breakout_rate_thr = 200; 171 } else { 172 sf->max_intra_bsize = BLOCK_32X32; 173 sf->partition_search_breakout_dist_thr = (1 << 23); 174 sf->partition_search_breakout_rate_thr = 120; 175 } 176 sf->use_first_partition_pass_interintra_stats = 177 sf->two_pass_partition_search; 178 } 179 180 if (speed >= 4) { 181 if (is_720p_or_larger) { 182 sf->partition_search_breakout_dist_thr = (1 << 26); 183 } else { 184 sf->partition_search_breakout_dist_thr = (1 << 24); 185 } 186 } 187 } 188 189 static void set_good_speed_features_framesize_independent( 190 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) { 191 const AV1_COMMON *const cm = &cpi->common; 192 const int boosted = frame_is_boosted(cpi); 193 const int is_boosted_arf2_bwd_type = 194 boosted || cpi->refresh_bwd_ref_frame || cpi->refresh_alt2_ref_frame; 195 196 // Speed 0 for all speed features that give neutral coding performance change. 197 sf->reduce_inter_modes = 1; 198 sf->prune_ext_partition_types_search_level = 1; 199 sf->ml_prune_rect_partition = 1; 200 sf->ml_prune_ab_partition = 1; 201 sf->ml_prune_4_partition = 1; 202 sf->simple_motion_search_prune_rect = 1; 203 sf->adaptive_txb_search_level = 1; 204 sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_SKIP_MV_SEARCH; 205 sf->model_based_prune_tx_search_level = 1; 206 sf->model_based_post_interp_filter_breakout = 1; 207 sf->model_based_motion_mode_rd_breakout = 1; 208 209 // TODO(debargha): Test, tweak and turn on either 1 or 2 210 sf->inter_mode_rd_model_estimation = 1; 211 sf->inter_mode_rd_model_estimation_adaptive = 0; 212 213 sf->two_loop_comp_search = 0; 214 sf->prune_ref_frame_for_rect_partitions = 215 boosted ? 0 : (is_boosted_arf2_bwd_type ? 1 : 2); 216 sf->less_rectangular_check_level = 1; 217 sf->gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3; 218 sf->gm_disable_recode = 1; 219 sf->use_fast_interpolation_filter_search = 1; 220 sf->intra_tx_size_search_init_depth_sqr = 1; 221 sf->intra_angle_estimation = 1; 222 sf->selective_ref_frame = 1; 223 sf->prune_wedge_pred_diff_based = 1; 224 sf->disable_wedge_search_var_thresh = 0; 225 sf->disable_wedge_search_edge_thresh = 0; 226 sf->prune_motion_mode_level = 1; 227 sf->cb_pred_filter_search = 0; 228 sf->use_nonrd_pick_mode = 0; 229 sf->use_real_time_ref_set = 0; 230 231 if (speed >= 1) { 232 sf->gm_erroradv_type = GM_ERRORADV_TR_1; 233 sf->selective_ref_frame = 2; 234 235 sf->intra_tx_size_search_init_depth_rect = 1; 236 sf->tx_size_search_lgr_block = 1; 237 238 sf->prune_ext_partition_types_search_level = 2; 239 sf->skip_repeat_interpolation_filter_search = 1; 240 sf->tx_type_search.skip_tx_search = 1; 241 sf->tx_type_search.ml_tx_split_thresh = 40; 242 sf->model_based_prune_tx_search_level = 0; 243 sf->adaptive_txb_search_level = 2; 244 sf->use_intra_txb_hash = 1; 245 sf->optimize_b_precheck = 1; 246 sf->dual_sgr_penalty_level = 1; 247 sf->use_accurate_subpel_search = USE_4_TAPS; 248 sf->reuse_inter_intra_mode = 1; 249 sf->prune_comp_search_by_single_result = 1; 250 sf->skip_repeated_newmv = 1; 251 sf->obmc_full_pixel_search_level = 1; 252 // TODO(anyone): Following speed feature will be further explored to 253 // identify the appropriate tradeoff between encoder performance and its 254 // speed. 255 sf->prune_single_motion_modes_by_simple_trans = 1; 256 257 sf->simple_motion_search_split_only = 1; 258 sf->simple_motion_search_early_term_none = 1; 259 260 sf->disable_wedge_search_var_thresh = 0; 261 sf->disable_wedge_search_edge_thresh = 0; 262 sf->disable_interinter_wedge_newmv_search = boosted ? 0 : 1; 263 sf->prune_comp_type_by_comp_avg = 1; 264 sf->prune_motion_mode_level = 2; 265 sf->gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2; 266 sf->cb_pred_filter_search = 1; 267 sf->use_transform_domain_distortion = boosted ? 0 : 1; 268 sf->perform_coeff_opt = boosted ? 0 : 1; 269 sf->use_inter_txb_hash = 0; 270 } 271 272 if (speed >= 2) { 273 sf->gm_erroradv_type = GM_ERRORADV_TR_2; 274 275 sf->selective_ref_frame = 3; 276 sf->inter_tx_size_search_init_depth_rect = 1; 277 sf->inter_tx_size_search_init_depth_sqr = 1; 278 279 sf->fast_cdef_search = 1; 280 281 sf->adaptive_rd_thresh = 1; 282 sf->mv.auto_mv_step_size = 1; 283 sf->mv.subpel_iters_per_step = 1; 284 sf->disable_filter_search_var_thresh = 100; 285 sf->comp_inter_joint_search_thresh = BLOCK_SIZES_ALL; 286 287 sf->partition_search_breakout_rate_thr = 80; 288 sf->allow_partition_search_skip = 1; 289 sf->disable_wedge_search_var_thresh = 100; 290 sf->disable_wedge_search_edge_thresh = 0; 291 sf->disable_interinter_wedge_newmv_search = 1; 292 sf->fast_wedge_sign_estimate = 1; 293 sf->disable_dual_filter = 1; 294 sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED; 295 sf->prune_comp_type_by_comp_avg = 2; 296 // TODO(Sachin): Enable/Enhance this speed feature for speed 2 & 3 297 sf->cb_pred_filter_search = 0; 298 sf->adaptive_interp_filter_search = 1; 299 sf->perform_coeff_opt = boosted ? 0 : 2; 300 } 301 302 if (speed >= 3) { 303 sf->tx_size_search_method = boosted ? USE_FULL_RD : USE_LARGESTALL; 304 sf->less_rectangular_check_level = 2; 305 sf->adaptive_pred_interp_filter = 1; 306 // adaptive_motion_search breaks encoder multi-thread tests. 307 // The values in x->pred_mv[] differ for single and multi-thread cases. 308 // See aomedia:1778. 309 // sf->adaptive_motion_search = 1; 310 sf->recode_loop = ALLOW_RECODE_KFARFGF; 311 sf->use_transform_domain_distortion = boosted ? 1 : 2; 312 sf->use_accurate_subpel_search = USE_2_TAPS; 313 sf->adaptive_rd_thresh = 2; 314 if (cpi->oxcf.enable_smooth_interintra) { 315 sf->disable_smooth_interintra = 316 (boosted || cpi->refresh_bwd_ref_frame || cpi->refresh_alt2_ref_frame) 317 ? 0 318 : 1; 319 } 320 sf->tx_type_search.prune_mode = PRUNE_2D_FAST; 321 sf->gm_search_type = GM_DISABLE_SEARCH; 322 sf->prune_comp_search_by_single_result = 2; 323 sf->prune_motion_mode_level = boosted ? 2 : 3; 324 sf->prune_warp_using_wmtype = 1; 325 // TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine 326 // it with cpi->sf.disable_wedge_search_var_thresh. 327 sf->disable_wedge_interintra_search = 1; 328 // TODO(any): Experiment with the early exit mechanism for speeds 0, 1 and 2 329 // and clean-up the speed feature 330 sf->perform_best_rd_based_gating_for_chroma = 1; 331 sf->prune_ref_frame_for_rect_partitions = 332 frame_is_intra_only(&cpi->common) ? 0 : (boosted ? 1 : 2); 333 sf->perform_coeff_opt = is_boosted_arf2_bwd_type ? 2 : 3; 334 sf->prune_comp_type_by_model_rd = boosted ? 0 : 1; 335 // TODO(Venkat): Clean-up frame type dependency for 336 // simple_motion_search_split_only in partition search function and set the 337 // speed feature accordingly 338 // TODO(Venkat): Evaluate this speed feature for speed 1 & 2 339 sf->simple_motion_search_split_only = 340 cm->allow_screen_content_tools ? 1 : 2; 341 sf->disable_smooth_intra = 342 !frame_is_intra_only(&cpi->common) || (cpi->rc.frames_to_key != 1); 343 } 344 345 if (speed >= 4) { 346 sf->use_intra_txb_hash = 0; 347 sf->tx_type_search.fast_intra_tx_type_search = 1; 348 sf->disable_loop_restoration_chroma = 349 (boosted || cm->allow_screen_content_tools) ? 0 : 1; 350 sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED; 351 sf->adaptive_pred_interp_filter = 0; 352 sf->cb_pred_filter_search = 1; 353 sf->adaptive_mode_search = 1; 354 sf->alt_ref_search_fp = 1; 355 sf->skip_sharp_interp_filter_search = 1; 356 sf->perform_coeff_opt = is_boosted_arf2_bwd_type ? 2 : 4; 357 sf->adaptive_txb_search_level = boosted ? 2 : 3; 358 } 359 360 if (speed >= 5) { 361 sf->recode_loop = ALLOW_RECODE_KFMAXBW; 362 sf->intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V; 363 sf->intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL; 364 sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; 365 sf->intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL; 366 sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; 367 sf->intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL; 368 sf->tx_size_search_method = USE_LARGESTALL; 369 sf->mv.search_method = BIGDIA; 370 sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_MORE; 371 sf->adaptive_rd_thresh = 4; 372 sf->mode_search_skip_flags = 373 (cm->current_frame.frame_type == KEY_FRAME) 374 ? 0 375 : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | 376 FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR | 377 FLAG_EARLY_TERMINATE; 378 sf->disable_filter_search_var_thresh = 200; 379 sf->use_fast_coef_costing = 1; 380 sf->partition_search_breakout_rate_thr = 300; 381 sf->use_transform_domain_distortion = 2; 382 } 383 384 if (speed >= 6) { 385 int i; 386 sf->optimize_coefficients = NO_TRELLIS_OPT; 387 sf->mv.search_method = HEX; 388 sf->disable_filter_search_var_thresh = 500; 389 for (i = 0; i < TX_SIZES; ++i) { 390 sf->intra_y_mode_mask[i] = INTRA_DC; 391 sf->intra_uv_mode_mask[i] = UV_INTRA_DC_CFL; 392 } 393 sf->partition_search_breakout_rate_thr = 500; 394 sf->mv.reduce_first_step_size = 1; 395 sf->simple_model_rd_from_var = 1; 396 } 397 if (speed >= 7) { 398 sf->default_max_partition_size = BLOCK_32X32; 399 sf->default_min_partition_size = BLOCK_8X8; 400 sf->intra_y_mode_mask[TX_64X64] = INTRA_DC; 401 sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; 402 sf->frame_parameter_update = 0; 403 sf->mv.search_method = FAST_HEX; 404 sf->partition_search_type = REFERENCE_PARTITION; 405 sf->mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH; 406 // TODO(any): evaluate adaptive_mode_search=1 for speed 7 & 8 407 sf->adaptive_mode_search = 2; 408 } 409 if (speed >= 8) { 410 sf->mv.search_method = FAST_DIAMOND; 411 sf->mv.subpel_force_stop = HALF_PEL; 412 sf->lpf_pick = LPF_PICK_FROM_Q; 413 } 414 } 415 416 // TODO(kyslov): now this is very similar to 417 // set_good_speed_features_framesize_independent 418 // except it sets non-rd flag on speed8. This function will likely 419 // be modified in the future with RT-specific speed features 420 static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi, 421 SPEED_FEATURES *sf, 422 int speed) { 423 AV1_COMMON *const cm = &cpi->common; 424 const int boosted = frame_is_boosted(cpi); 425 426 // Speed 0 for all speed features that give neutral coding performance change. 427 sf->reduce_inter_modes = 1; 428 sf->prune_ext_partition_types_search_level = 1; 429 sf->ml_prune_rect_partition = 1; 430 sf->ml_prune_ab_partition = 1; 431 sf->ml_prune_4_partition = 1; 432 sf->adaptive_txb_search_level = 1; 433 sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_SKIP_MV_SEARCH; 434 sf->model_based_prune_tx_search_level = 1; 435 sf->model_based_post_interp_filter_breakout = 1; 436 sf->model_based_motion_mode_rd_breakout = 1; 437 438 // TODO(debargha): Test, tweak and turn on either 1 or 2 439 sf->inter_mode_rd_model_estimation = 0; 440 sf->inter_mode_rd_model_estimation_adaptive = 0; 441 sf->two_loop_comp_search = 0; 442 443 sf->prune_ref_frame_for_rect_partitions = !boosted; 444 sf->less_rectangular_check_level = 1; 445 sf->gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3; 446 sf->gm_disable_recode = 1; 447 sf->use_fast_interpolation_filter_search = 1; 448 sf->intra_tx_size_search_init_depth_sqr = 1; 449 sf->intra_angle_estimation = 1; 450 sf->selective_ref_frame = 1; 451 sf->prune_wedge_pred_diff_based = 1; 452 sf->disable_wedge_search_var_thresh = 0; 453 sf->disable_wedge_search_edge_thresh = 0; 454 sf->prune_motion_mode_level = 1; 455 sf->cb_pred_filter_search = 0; 456 sf->use_nonrd_pick_mode = 0; 457 sf->use_real_time_ref_set = 0; 458 459 if (speed >= 1) { 460 sf->gm_erroradv_type = GM_ERRORADV_TR_1; 461 sf->selective_ref_frame = 2; 462 463 sf->intra_tx_size_search_init_depth_rect = 1; 464 sf->tx_size_search_lgr_block = 1; 465 sf->prune_ext_partition_types_search_level = 2; 466 sf->skip_repeat_interpolation_filter_search = 1; 467 sf->tx_type_search.skip_tx_search = 1; 468 sf->tx_type_search.ml_tx_split_thresh = 40; 469 sf->model_based_prune_tx_search_level = 0; 470 sf->adaptive_txb_search_level = 2; 471 sf->use_intra_txb_hash = 1; 472 sf->optimize_b_precheck = 1; 473 sf->dual_sgr_penalty_level = 1; 474 sf->use_accurate_subpel_search = USE_4_TAPS; 475 sf->reuse_inter_intra_mode = 1; 476 sf->prune_comp_search_by_single_result = 1; 477 sf->skip_repeated_newmv = 1; 478 sf->obmc_full_pixel_search_level = 1; 479 // TODO(anyone): Following speed feature will be further explored to 480 // identify the appropriate tradeoff between encoder performance and its 481 // speed. 482 sf->prune_single_motion_modes_by_simple_trans = 1; 483 484 sf->simple_motion_search_prune_rect = 1; 485 486 sf->disable_wedge_search_var_thresh = 0; 487 sf->disable_wedge_search_edge_thresh = 0; 488 sf->prune_comp_type_by_comp_avg = 1; 489 sf->prune_motion_mode_level = 2; 490 sf->gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2; 491 sf->cb_pred_filter_search = 1; 492 sf->use_transform_domain_distortion = boosted ? 0 : 1; 493 } 494 495 if (speed >= 2) { 496 sf->gm_erroradv_type = GM_ERRORADV_TR_2; 497 498 sf->selective_ref_frame = 3; 499 sf->inter_tx_size_search_init_depth_rect = 1; 500 sf->inter_tx_size_search_init_depth_sqr = 1; 501 sf->fast_cdef_search = 1; 502 503 sf->adaptive_rd_thresh = 1; 504 sf->mv.auto_mv_step_size = 1; 505 sf->mv.subpel_iters_per_step = 1; 506 sf->disable_filter_search_var_thresh = 100; 507 sf->comp_inter_joint_search_thresh = BLOCK_SIZES_ALL; 508 509 sf->partition_search_breakout_rate_thr = 80; 510 sf->allow_partition_search_skip = 1; 511 sf->disable_wedge_search_var_thresh = 100; 512 sf->disable_wedge_search_edge_thresh = 0; 513 sf->fast_wedge_sign_estimate = 1; 514 sf->disable_dual_filter = 1; 515 sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED; 516 sf->prune_comp_type_by_comp_avg = 2; 517 sf->cb_pred_filter_search = 0; 518 sf->adaptive_interp_filter_search = 1; 519 } 520 521 if (speed >= 3) { 522 sf->selective_ref_frame = 4; 523 sf->tx_size_search_method = boosted ? USE_FULL_RD : USE_LARGESTALL; 524 sf->less_rectangular_check_level = 2; 525 sf->adaptive_pred_interp_filter = 1; 526 // adaptive_motion_search breaks encoder multi-thread tests. 527 // The values in x->pred_mv[] differ for single and multi-thread cases. 528 // See aomedia:1778. 529 // sf->adaptive_motion_search = 1; 530 sf->recode_loop = ALLOW_RECODE_KFARFGF; 531 sf->use_transform_domain_distortion = 1; 532 sf->use_accurate_subpel_search = USE_2_TAPS; 533 sf->adaptive_rd_thresh = 2; 534 sf->tx_type_search.prune_mode = PRUNE_2D_FAST; 535 sf->gm_search_type = GM_DISABLE_SEARCH; 536 sf->prune_comp_search_by_single_result = 2; 537 sf->prune_motion_mode_level = boosted ? 2 : 3; 538 sf->prune_warp_using_wmtype = 1; 539 // TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine 540 // it with cpi->sf.disable_wedge_search_var_thresh. 541 sf->disable_wedge_interintra_search = 1; 542 } 543 544 if (speed >= 4) { 545 sf->use_intra_txb_hash = 0; 546 sf->use_mb_rd_hash = 0; 547 sf->tx_type_search.fast_intra_tx_type_search = 1; 548 sf->tx_type_search.fast_inter_tx_type_search = 1; 549 sf->tx_size_search_method = 550 frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; 551 sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED; 552 sf->adaptive_pred_interp_filter = 0; 553 sf->adaptive_mode_search = 1; 554 sf->alt_ref_search_fp = 1; 555 sf->skip_sharp_interp_filter_search = 1; 556 } 557 558 if (speed >= 5) { 559 sf->recode_loop = ALLOW_RECODE_KFMAXBW; 560 sf->intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V; 561 sf->intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL; 562 sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; 563 sf->intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL; 564 sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; 565 sf->intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL; 566 sf->tx_size_search_method = USE_LARGESTALL; 567 sf->mv.search_method = BIGDIA; 568 sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_MORE; 569 sf->adaptive_rd_thresh = 4; 570 sf->mode_search_skip_flags = 571 (cm->current_frame.frame_type == KEY_FRAME) 572 ? 0 573 : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | 574 FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR | 575 FLAG_EARLY_TERMINATE; 576 sf->disable_filter_search_var_thresh = 200; 577 sf->use_fast_coef_costing = 1; 578 sf->partition_search_breakout_rate_thr = 300; 579 sf->use_transform_domain_distortion = 2; 580 } 581 582 if (speed >= 6) { 583 int i; 584 sf->optimize_coefficients = NO_TRELLIS_OPT; 585 sf->mv.search_method = HEX; 586 sf->disable_filter_search_var_thresh = 500; 587 for (i = 0; i < TX_SIZES; ++i) { 588 sf->intra_y_mode_mask[i] = INTRA_DC; 589 sf->intra_uv_mode_mask[i] = UV_INTRA_DC_CFL; 590 } 591 sf->partition_search_breakout_rate_thr = 500; 592 sf->mv.reduce_first_step_size = 1; 593 sf->simple_model_rd_from_var = 1; 594 } 595 if (speed >= 7) { 596 sf->default_max_partition_size = BLOCK_32X32; 597 sf->default_min_partition_size = BLOCK_8X8; 598 sf->intra_y_mode_mask[TX_64X64] = INTRA_DC; 599 sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; 600 sf->frame_parameter_update = 0; 601 sf->mv.search_method = FAST_HEX; 602 sf->partition_search_type = REFERENCE_PARTITION; 603 sf->mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH; 604 } 605 if (speed >= 8) { 606 sf->mv.search_method = FAST_DIAMOND; 607 sf->lpf_pick = LPF_PICK_FROM_Q; 608 sf->default_max_partition_size = BLOCK_128X128; 609 sf->default_min_partition_size = BLOCK_8X8; 610 sf->partition_search_type = VAR_BASED_PARTITION; 611 sf->use_real_time_ref_set = 1; 612 // Can't use LARGEST TX mode with pre-calculated partition 613 // and disabled TX64 614 if (!cpi->oxcf.enable_tx64) sf->tx_size_search_method = USE_FAST_RD; 615 sf->use_nonrd_pick_mode = 1; 616 sf->inter_mode_rd_model_estimation = 2; 617 } 618 } 619 620 void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) { 621 SPEED_FEATURES *const sf = &cpi->sf; 622 const AV1EncoderConfig *const oxcf = &cpi->oxcf; 623 624 if (oxcf->mode == GOOD) { 625 set_good_speed_feature_framesize_dependent(cpi, sf, speed); 626 } 627 628 // This is only used in motion vector unit test. 629 if (cpi->oxcf.motion_vector_unit_test == 1) 630 cpi->find_fractional_mv_step = av1_return_max_sub_pixel_mv; 631 else if (cpi->oxcf.motion_vector_unit_test == 2) 632 cpi->find_fractional_mv_step = av1_return_min_sub_pixel_mv; 633 } 634 635 void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) { 636 AV1_COMMON *const cm = &cpi->common; 637 SPEED_FEATURES *const sf = &cpi->sf; 638 MACROBLOCK *const x = &cpi->td.mb; 639 const AV1EncoderConfig *const oxcf = &cpi->oxcf; 640 int i; 641 642 // best quality defaults 643 sf->frame_parameter_update = 1; 644 sf->mv.search_method = NSTEP; 645 sf->recode_loop = ALLOW_RECODE; 646 sf->mv.subpel_search_method = SUBPEL_TREE; 647 sf->mv.subpel_iters_per_step = 2; 648 sf->mv.subpel_force_stop = EIGHTH_PEL; 649 if (cpi->oxcf.disable_trellis_quant == 3) { 650 sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf) 651 ? NO_ESTIMATE_YRD_TRELLIS_OPT 652 : NO_TRELLIS_OPT; 653 } else if (cpi->oxcf.disable_trellis_quant == 2) { 654 sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf) 655 ? FINAL_PASS_TRELLIS_OPT 656 : NO_TRELLIS_OPT; 657 } else if (cpi->oxcf.disable_trellis_quant == 0) { 658 if (is_lossless_requested(&cpi->oxcf)) 659 sf->optimize_coefficients = NO_TRELLIS_OPT; 660 else 661 sf->optimize_coefficients = FULL_TRELLIS_OPT; 662 } else if (cpi->oxcf.disable_trellis_quant == 1) { 663 sf->optimize_coefficients = NO_TRELLIS_OPT; 664 } else { 665 assert(0 && "Invalid disable_trellis_quant value"); 666 } 667 sf->gm_erroradv_type = GM_ERRORADV_TR_0; 668 sf->mv.reduce_first_step_size = 0; 669 sf->mv.auto_mv_step_size = 0; 670 sf->comp_inter_joint_search_thresh = BLOCK_4X4; 671 sf->adaptive_rd_thresh = 0; 672 // TODO(sarahparker) Pair this with a speed setting once experiments are done 673 sf->trellis_eob_fast = 0; 674 sf->tx_size_search_method = cpi->oxcf.tx_size_search_method; 675 sf->inter_tx_size_search_init_depth_sqr = 0; 676 sf->inter_tx_size_search_init_depth_rect = 0; 677 sf->intra_tx_size_search_init_depth_rect = 0; 678 sf->intra_tx_size_search_init_depth_sqr = 0; 679 sf->tx_size_search_lgr_block = 0; 680 sf->model_based_prune_tx_search_level = 0; 681 sf->model_based_post_interp_filter_breakout = 0; 682 sf->model_based_motion_mode_rd_breakout = 0; 683 sf->reduce_inter_modes = 0; 684 sf->selective_ref_gm = 1; 685 sf->adaptive_motion_search = 0; 686 sf->adaptive_pred_interp_filter = 0; 687 sf->adaptive_mode_search = 0; 688 sf->alt_ref_search_fp = 0; 689 sf->partition_search_type = SEARCH_PARTITION; 690 sf->tx_type_search.prune_mode = PRUNE_2D_ACCURATE; 691 sf->tx_type_search.ml_tx_split_thresh = 30; 692 sf->tx_type_search.use_skip_flag_prediction = 1; 693 sf->tx_type_search.fast_intra_tx_type_search = 0; 694 sf->tx_type_search.fast_inter_tx_type_search = 0; 695 sf->tx_type_search.skip_tx_search = 0; 696 sf->selective_ref_frame = 0; 697 sf->less_rectangular_check_level = 0; 698 sf->use_square_partition_only_threshold = BLOCK_128X128; 699 sf->prune_ref_frame_for_rect_partitions = 0; 700 sf->auto_max_partition_based_on_simple_motion = NOT_IN_USE; 701 sf->auto_min_partition_based_on_simple_motion = 0; 702 sf->rd_auto_partition_min_limit = BLOCK_4X4; 703 sf->default_max_partition_size = BLOCK_LARGEST; 704 sf->default_min_partition_size = BLOCK_4X4; 705 sf->adjust_partitioning_from_last_frame = 0; 706 sf->mode_search_skip_flags = 0; 707 sf->disable_filter_search_var_thresh = 0; 708 sf->allow_partition_search_skip = 0; 709 sf->use_accurate_subpel_search = USE_8_TAPS; 710 sf->disable_wedge_search_edge_thresh = 0; 711 sf->use_first_partition_pass_interintra_stats = 0; 712 sf->disable_wedge_search_var_thresh = 0; 713 sf->disable_loop_restoration_chroma = 0; 714 sf->fast_wedge_sign_estimate = 0; 715 sf->prune_wedge_pred_diff_based = 0; 716 sf->drop_ref = 0; 717 sf->skip_intra_in_interframe = 1; 718 sf->txb_split_cap = 1; 719 sf->adaptive_txb_search_level = 0; 720 sf->two_pass_partition_search = 0; 721 sf->firstpass_simple_motion_search_early_term = 0; 722 sf->use_intra_txb_hash = 0; 723 sf->use_inter_txb_hash = 1; 724 sf->use_mb_rd_hash = 1; 725 sf->optimize_b_precheck = 0; 726 sf->two_loop_comp_search = 1; 727 sf->second_loop_comp_fast_tx_search = 0; 728 sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_ENABLED; 729 sf->reuse_inter_intra_mode = 0; 730 sf->intra_angle_estimation = 0; 731 sf->skip_obmc_in_uniform_mv_field = 0; 732 sf->skip_wm_in_uniform_mv_field = 0; 733 sf->adaptive_interp_filter_search = 0; 734 735 for (i = 0; i < TX_SIZES; i++) { 736 sf->intra_y_mode_mask[i] = INTRA_ALL; 737 sf->intra_uv_mode_mask[i] = UV_INTRA_ALL; 738 } 739 sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE; 740 sf->use_fast_coef_costing = 0; 741 sf->max_intra_bsize = BLOCK_LARGEST; 742 // This setting only takes effect when partition_search_type is set 743 // to FIXED_PARTITION. 744 sf->always_this_block_size = BLOCK_16X16; 745 // Recode loop tolerance %. 746 sf->recode_tolerance = 25; 747 sf->partition_search_breakout_dist_thr = 0; 748 sf->partition_search_breakout_rate_thr = 0; 749 sf->simple_model_rd_from_var = 0; 750 sf->prune_ext_partition_types_search_level = 0; 751 sf->ml_prune_rect_partition = 0; 752 sf->ml_prune_ab_partition = 0; 753 sf->ml_prune_4_partition = 0; 754 sf->fast_cdef_search = 0; 755 for (i = 0; i < PARTITION_BLOCK_SIZES; ++i) { 756 sf->ml_partition_search_breakout_thresh[i] = -1; // -1 means not enabled. 757 } 758 sf->simple_motion_search_split_only = 0; 759 sf->simple_motion_search_prune_rect = 0; 760 sf->simple_motion_search_early_term_none = 0; 761 762 // Set this at the appropriate speed levels 763 sf->use_transform_domain_distortion = 0; 764 sf->gm_search_type = GM_FULL_SEARCH; 765 sf->gm_disable_recode = 0; 766 sf->use_fast_interpolation_filter_search = 0; 767 sf->disable_dual_filter = 0; 768 sf->skip_repeat_interpolation_filter_search = 0; 769 sf->use_hash_based_trellis = 0; 770 sf->prune_comp_search_by_single_result = 0; 771 sf->skip_repeated_newmv = 0; 772 sf->prune_single_motion_modes_by_simple_trans = 0; 773 774 // Set decoder side speed feature to use less dual sgr modes 775 sf->dual_sgr_penalty_level = 0; 776 777 // TODO(angiebird, debargha): Re-evaluate the impact of 778 // inter_mode_rd_model_estimation in conjunction with 779 // model_based_motion_mode_rd_breakout 780 sf->inter_mode_rd_model_estimation = 0; 781 sf->inter_mode_rd_model_estimation_adaptive = 0; 782 783 sf->obmc_full_pixel_search_level = 0; 784 sf->skip_sharp_interp_filter_search = 0; 785 sf->prune_comp_type_by_comp_avg = 0; 786 sf->disable_interinter_wedge_newmv_search = 0; 787 sf->disable_smooth_interintra = 0; 788 sf->prune_motion_mode_level = 0; 789 sf->prune_warp_using_wmtype = 0; 790 sf->disable_wedge_interintra_search = 0; 791 sf->perform_coeff_opt = 0; 792 sf->prune_comp_type_by_model_rd = 0; 793 sf->disable_smooth_intra = 0; 794 sf->perform_best_rd_based_gating_for_chroma = 0; 795 796 if (oxcf->mode == GOOD) 797 set_good_speed_features_framesize_independent(cpi, sf, speed); 798 else if (oxcf->mode == REALTIME) 799 set_rt_speed_features_framesize_independent(cpi, sf, speed); 800 801 if (!cpi->seq_params_locked) { 802 cpi->common.seq_params.enable_dual_filter &= !sf->disable_dual_filter; 803 } 804 805 // sf->partition_search_breakout_dist_thr is set assuming max 64x64 806 // blocks. Normalise this if the blocks are bigger. 807 if (MAX_SB_SIZE_LOG2 > 6) { 808 sf->partition_search_breakout_dist_thr <<= 2 * (MAX_SB_SIZE_LOG2 - 6); 809 } 810 811 cpi->diamond_search_sad = av1_diamond_search_sad; 812 813 sf->allow_exhaustive_searches = 1; 814 815 const int mesh_speed = AOMMIN(speed, MAX_MESH_SPEED); 816 if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) 817 sf->exhaustive_searches_thresh = (1 << 24); 818 else 819 sf->exhaustive_searches_thresh = (1 << 25); 820 sf->max_exaustive_pct = good_quality_max_mesh_pct[mesh_speed]; 821 if (mesh_speed > 0) 822 sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1; 823 824 for (i = 0; i < MAX_MESH_STEP; ++i) { 825 sf->mesh_patterns[i].range = 826 good_quality_mesh_patterns[mesh_speed][i].range; 827 sf->mesh_patterns[i].interval = 828 good_quality_mesh_patterns[mesh_speed][i].interval; 829 } 830 if ((frame_is_intra_only(cm) && cm->allow_screen_content_tools) && 831 (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION || 832 cpi->oxcf.content == AOM_CONTENT_SCREEN)) { 833 for (i = 0; i < MAX_MESH_STEP; ++i) { 834 sf->mesh_patterns[i].range = intrabc_mesh_patterns[mesh_speed][i].range; 835 sf->mesh_patterns[i].interval = 836 intrabc_mesh_patterns[mesh_speed][i].interval; 837 } 838 sf->max_exaustive_pct = intrabc_max_mesh_pct[mesh_speed]; 839 } 840 841 // Slow quant, dct and trellis not worthwhile for first pass 842 // so make sure they are always turned off. 843 if (oxcf->pass == 1) sf->optimize_coefficients = NO_TRELLIS_OPT; 844 845 // No recode or trellis for 1 pass. 846 if (oxcf->pass == 0) { 847 sf->recode_loop = DISALLOW_RECODE; 848 sf->optimize_coefficients = NO_TRELLIS_OPT; 849 } 850 // FIXME: trellis not very efficient for quantization matrices 851 if (oxcf->using_qm) sf->optimize_coefficients = NO_TRELLIS_OPT; 852 853 if (sf->mv.subpel_search_method == SUBPEL_TREE) { 854 cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree; 855 } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED) { 856 cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree_pruned; 857 } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_MORE) { 858 cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree_pruned_more; 859 } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_EVENMORE) { 860 cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree_pruned_evenmore; 861 } 862 863 x->min_partition_size = sf->default_min_partition_size; 864 x->max_partition_size = sf->default_max_partition_size; 865 866 // This is only used in motion vector unit test. 867 if (cpi->oxcf.motion_vector_unit_test == 1) 868 cpi->find_fractional_mv_step = av1_return_max_sub_pixel_mv; 869 else if (cpi->oxcf.motion_vector_unit_test == 2) 870 cpi->find_fractional_mv_step = av1_return_min_sub_pixel_mv; 871 cpi->max_comp_type_rd_threshold_mul = 872 comp_type_rd_threshold_mul[sf->prune_comp_type_by_comp_avg]; 873 cpi->max_comp_type_rd_threshold_div = 874 comp_type_rd_threshold_div[sf->prune_comp_type_by_comp_avg]; 875 const int tx_domain_speed = AOMMIN(speed, MAX_TX_DOMAIN_EVAL_SPEED); 876 cpi->tx_domain_dist_threshold = tx_domain_dist_thresholds[tx_domain_speed]; 877 878 // assert ensures that coeff_opt_dist_thresholds is accessed correctly 879 assert(cpi->sf.perform_coeff_opt >= 0 && cpi->sf.perform_coeff_opt < 5); 880 cpi->coeff_opt_dist_threshold = 881 coeff_opt_dist_thresholds[cpi->sf.perform_coeff_opt]; 882 883 #if CONFIG_DIST_8X8 884 if (sf->use_transform_domain_distortion > 0) cpi->oxcf.using_dist_8x8 = 0; 885 886 if (cpi->oxcf.using_dist_8x8) x->min_partition_size = BLOCK_8X8; 887 #endif // CONFIG_DIST_8X8 888 if (cpi->oxcf.row_mt == 1 && (cpi->oxcf.max_threads > 1)) { 889 sf->adaptive_rd_thresh = 0; 890 if (sf->inter_mode_rd_model_estimation == 1) { 891 sf->inter_mode_rd_model_estimation = 0; 892 sf->inter_mode_rd_model_estimation_adaptive = 0; 893 } 894 } 895 } 896