1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <assert.h> 13 #include <math.h> 14 #include <stdbool.h> 15 16 #include "config/aom_dsp_rtcd.h" 17 #include "config/av1_rtcd.h" 18 19 #include "aom_dsp/aom_dsp_common.h" 20 #include "aom_dsp/blend.h" 21 #include "aom_mem/aom_mem.h" 22 #include "aom_ports/aom_timer.h" 23 #include "aom_ports/mem.h" 24 #include "aom_ports/system_state.h" 25 26 #include "av1/common/cfl.h" 27 #include "av1/common/common.h" 28 #include "av1/common/common_data.h" 29 #include "av1/common/entropy.h" 30 #include "av1/common/entropymode.h" 31 #include "av1/common/idct.h" 32 #include "av1/common/mvref_common.h" 33 #include "av1/common/obmc.h" 34 #include "av1/common/onyxc_int.h" 35 #include "av1/common/pred_common.h" 36 #include "av1/common/quant_common.h" 37 #include "av1/common/reconinter.h" 38 #include "av1/common/reconintra.h" 39 #include "av1/common/scan.h" 40 #include "av1/common/seg_common.h" 41 #include "av1/common/txb_common.h" 42 #include "av1/common/warped_motion.h" 43 44 #include "av1/encoder/aq_variance.h" 45 #include "av1/encoder/av1_quantize.h" 46 #include "av1/encoder/cost.h" 47 #include "av1/encoder/encodemb.h" 48 #include "av1/encoder/encodemv.h" 49 #include "av1/encoder/encoder.h" 50 #include "av1/encoder/encodetxb.h" 51 #include "av1/encoder/hybrid_fwd_txfm.h" 52 #include "av1/encoder/mcomp.h" 53 #include "av1/encoder/ml.h" 54 #include "av1/encoder/palette.h" 55 #include "av1/encoder/pustats.h" 56 #include "av1/encoder/random.h" 57 #include "av1/encoder/ratectrl.h" 58 #include "av1/encoder/rd.h" 59 #include "av1/encoder/rdopt.h" 60 #include "av1/encoder/reconinter_enc.h" 61 #include "av1/encoder/tokenize.h" 62 #include "av1/encoder/tx_prune_model_weights.h" 63 64 // Set this macro as 1 to collect data about tx size selection. 65 #define COLLECT_TX_SIZE_DATA 0 66 67 #if COLLECT_TX_SIZE_DATA 68 static const char av1_tx_size_data_output_file[] = "tx_size_data.txt"; 69 #endif 70 71 typedef void (*model_rd_for_sb_type)( 72 const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, 73 int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum, 74 int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb, 75 int *plane_rate, int64_t *plane_sse, int64_t *plane_dist); 76 typedef void (*model_rd_from_sse_type)(const AV1_COMP *const cpi, 77 const MACROBLOCK *const x, 78 BLOCK_SIZE plane_bsize, int plane, 79 int64_t sse, int num_samples, int *rate, 80 int64_t *dist); 81 82 static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize, 83 MACROBLOCK *x, MACROBLOCKD *xd, int plane_from, 84 int plane_to, int mi_row, int mi_col, 85 int *out_rate_sum, int64_t *out_dist_sum, 86 int *skip_txfm_sb, int64_t *skip_sse_sb, 87 int *plane_rate, int64_t *plane_sse, 88 int64_t *plane_dist); 89 static void model_rd_for_sb_with_curvfit( 90 const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, 91 int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum, 92 int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb, 93 int *plane_rate, int64_t *plane_sse, int64_t *plane_dist); 94 static void model_rd_for_sb_with_surffit( 95 const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, 96 int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum, 97 int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb, 98 int *plane_rate, int64_t *plane_sse, int64_t *plane_dist); 99 static void model_rd_for_sb_with_dnn( 100 const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, 101 int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum, 102 int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb, 103 int *plane_rate, int64_t *plane_sse, int64_t *plane_dist); 104 static void model_rd_for_sb_with_fullrdy( 105 const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, 106 int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum, 107 int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb, 108 int *plane_rate, int64_t *plane_sse, int64_t *plane_dist); 109 static void model_rd_from_sse(const AV1_COMP *const cpi, 110 const MACROBLOCK *const x, BLOCK_SIZE plane_bsize, 111 int plane, int64_t sse, int num_samples, 112 int *rate, int64_t *dist); 113 static void model_rd_with_dnn(const AV1_COMP *const cpi, 114 const MACROBLOCK *const x, BLOCK_SIZE plane_bsize, 115 int plane, int64_t sse, int num_samples, 116 int *rate, int64_t *dist); 117 static void model_rd_with_curvfit(const AV1_COMP *const cpi, 118 const MACROBLOCK *const x, 119 BLOCK_SIZE plane_bsize, int plane, 120 int64_t sse, int num_samples, int *rate, 121 int64_t *dist); 122 static void model_rd_with_surffit(const AV1_COMP *const cpi, 123 const MACROBLOCK *const x, 124 BLOCK_SIZE plane_bsize, int plane, 125 int64_t sse, int num_samples, int *rate, 126 int64_t *dist); 127 128 enum { 129 MODELRD_LEGACY, 130 MODELRD_CURVFIT, 131 MODELRD_SUFFIT, 132 MODELRD_DNN, 133 MODELRD_FULLRDY, 134 MODELRD_TYPES 135 } UENUM1BYTE(ModelRdType); 136 137 static model_rd_for_sb_type model_rd_sb_fn[MODELRD_TYPES] = { 138 model_rd_for_sb, model_rd_for_sb_with_curvfit, model_rd_for_sb_with_surffit, 139 model_rd_for_sb_with_dnn, model_rd_for_sb_with_fullrdy 140 }; 141 142 static model_rd_from_sse_type model_rd_sse_fn[MODELRD_TYPES] = { 143 model_rd_from_sse, model_rd_with_curvfit, model_rd_with_surffit, 144 model_rd_with_dnn, NULL 145 }; 146 147 // 0: Legacy model 148 // 1: Curve fit model 149 // 2: Surface fit model 150 // 3: DNN regression model 151 // 4: Full rd model 152 #define MODELRD_TYPE_INTERP_FILTER 1 153 #define MODELRD_TYPE_TX_SEARCH_PRUNE 1 154 #define MODELRD_TYPE_MASKED_COMPOUND 1 155 #define MODELRD_TYPE_INTERINTRA 1 156 #define MODELRD_TYPE_INTRA 1 157 #define MODELRD_TYPE_DIST_WTD_COMPOUND 1 158 #define MODELRD_TYPE_MOTION_MODE_RD 1 159 160 #define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS) 161 static const InterpFilters filter_sets[DUAL_FILTER_SET_SIZE] = { 162 0x00000000, 0x00010000, 0x00020000, // y = 0 163 0x00000001, 0x00010001, 0x00020001, // y = 1 164 0x00000002, 0x00010002, 0x00020002, // y = 2 165 }; 166 167 static const double ADST_FLIP_SVM[8] = { 168 /* vertical */ 169 -6.6623, -2.8062, -3.2531, 3.1671, 170 /* horizontal */ 171 -7.7051, -3.2234, -3.6193, 3.4533 172 }; 173 174 typedef struct { 175 PREDICTION_MODE mode; 176 MV_REFERENCE_FRAME ref_frame[2]; 177 } MODE_DEFINITION; 178 179 enum { 180 FTXS_NONE = 0, 181 FTXS_DCT_AND_1D_DCT_ONLY = 1 << 0, 182 FTXS_DISABLE_TRELLIS_OPT = 1 << 1, 183 FTXS_USE_TRANSFORM_DOMAIN = 1 << 2 184 } UENUM1BYTE(FAST_TX_SEARCH_MODE); 185 186 struct rdcost_block_args { 187 const AV1_COMP *cpi; 188 MACROBLOCK *x; 189 ENTROPY_CONTEXT t_above[MAX_MIB_SIZE]; 190 ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]; 191 RD_STATS rd_stats; 192 int64_t this_rd; 193 int64_t best_rd; 194 int exit_early; 195 int incomplete_exit; 196 int use_fast_coef_costing; 197 FAST_TX_SEARCH_MODE ftxs_mode; 198 int skip_trellis; 199 }; 200 201 #define LAST_NEW_MV_INDEX 6 202 static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { 203 { NEARESTMV, { LAST_FRAME, NONE_FRAME } }, 204 { NEARESTMV, { LAST2_FRAME, NONE_FRAME } }, 205 { NEARESTMV, { LAST3_FRAME, NONE_FRAME } }, 206 { NEARESTMV, { BWDREF_FRAME, NONE_FRAME } }, 207 { NEARESTMV, { ALTREF2_FRAME, NONE_FRAME } }, 208 { NEARESTMV, { ALTREF_FRAME, NONE_FRAME } }, 209 { NEARESTMV, { GOLDEN_FRAME, NONE_FRAME } }, 210 211 { NEWMV, { LAST_FRAME, NONE_FRAME } }, 212 { NEWMV, { LAST2_FRAME, NONE_FRAME } }, 213 { NEWMV, { LAST3_FRAME, NONE_FRAME } }, 214 { NEWMV, { BWDREF_FRAME, NONE_FRAME } }, 215 { NEWMV, { ALTREF2_FRAME, NONE_FRAME } }, 216 { NEWMV, { ALTREF_FRAME, NONE_FRAME } }, 217 { NEWMV, { GOLDEN_FRAME, NONE_FRAME } }, 218 219 { NEARMV, { LAST_FRAME, NONE_FRAME } }, 220 { NEARMV, { LAST2_FRAME, NONE_FRAME } }, 221 { NEARMV, { LAST3_FRAME, NONE_FRAME } }, 222 { NEARMV, { BWDREF_FRAME, NONE_FRAME } }, 223 { NEARMV, { ALTREF2_FRAME, NONE_FRAME } }, 224 { NEARMV, { ALTREF_FRAME, NONE_FRAME } }, 225 { NEARMV, { GOLDEN_FRAME, NONE_FRAME } }, 226 227 { GLOBALMV, { LAST_FRAME, NONE_FRAME } }, 228 { GLOBALMV, { LAST2_FRAME, NONE_FRAME } }, 229 { GLOBALMV, { LAST3_FRAME, NONE_FRAME } }, 230 { GLOBALMV, { BWDREF_FRAME, NONE_FRAME } }, 231 { GLOBALMV, { ALTREF2_FRAME, NONE_FRAME } }, 232 { GLOBALMV, { GOLDEN_FRAME, NONE_FRAME } }, 233 { GLOBALMV, { ALTREF_FRAME, NONE_FRAME } }, 234 235 // TODO(zoeliu): May need to reconsider the order on the modes to check 236 237 { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } }, 238 { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } }, 239 { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } }, 240 { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } }, 241 { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } }, 242 { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } }, 243 { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } }, 244 { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } }, 245 { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } }, 246 { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } }, 247 { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } }, 248 { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, 249 250 { NEAREST_NEARESTMV, { LAST_FRAME, LAST2_FRAME } }, 251 { NEAREST_NEARESTMV, { LAST_FRAME, LAST3_FRAME } }, 252 { NEAREST_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } }, 253 { NEAREST_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } }, 254 255 { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } }, 256 { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } }, 257 { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } }, 258 { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } }, 259 { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } }, 260 { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } }, 261 { GLOBAL_GLOBALMV, { LAST_FRAME, ALTREF_FRAME } }, 262 263 { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } }, 264 { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } }, 265 { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } }, 266 { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } }, 267 { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } }, 268 { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } }, 269 { GLOBAL_GLOBALMV, { LAST2_FRAME, ALTREF_FRAME } }, 270 271 { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } }, 272 { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } }, 273 { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } }, 274 { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } }, 275 { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } }, 276 { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } }, 277 { GLOBAL_GLOBALMV, { LAST3_FRAME, ALTREF_FRAME } }, 278 279 { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } }, 280 { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } }, 281 { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } }, 282 { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } }, 283 { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } }, 284 { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } }, 285 { GLOBAL_GLOBALMV, { GOLDEN_FRAME, ALTREF_FRAME } }, 286 287 { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } }, 288 { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } }, 289 { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } }, 290 { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } }, 291 { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } }, 292 { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } }, 293 { GLOBAL_GLOBALMV, { LAST_FRAME, BWDREF_FRAME } }, 294 295 { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } }, 296 { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } }, 297 { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } }, 298 { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } }, 299 { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } }, 300 { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } }, 301 { GLOBAL_GLOBALMV, { LAST2_FRAME, BWDREF_FRAME } }, 302 303 { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } }, 304 { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } }, 305 { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } }, 306 { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } }, 307 { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } }, 308 { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } }, 309 { GLOBAL_GLOBALMV, { LAST3_FRAME, BWDREF_FRAME } }, 310 311 { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } }, 312 { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } }, 313 { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } }, 314 { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } }, 315 { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } }, 316 { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } }, 317 { GLOBAL_GLOBALMV, { GOLDEN_FRAME, BWDREF_FRAME } }, 318 319 { NEAR_NEARMV, { LAST_FRAME, ALTREF2_FRAME } }, 320 { NEW_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } }, 321 { NEAREST_NEWMV, { LAST_FRAME, ALTREF2_FRAME } }, 322 { NEW_NEARMV, { LAST_FRAME, ALTREF2_FRAME } }, 323 { NEAR_NEWMV, { LAST_FRAME, ALTREF2_FRAME } }, 324 { NEW_NEWMV, { LAST_FRAME, ALTREF2_FRAME } }, 325 { GLOBAL_GLOBALMV, { LAST_FRAME, ALTREF2_FRAME } }, 326 327 { NEAR_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } }, 328 { NEW_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } }, 329 { NEAREST_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } }, 330 { NEW_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } }, 331 { NEAR_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } }, 332 { NEW_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } }, 333 { GLOBAL_GLOBALMV, { LAST2_FRAME, ALTREF2_FRAME } }, 334 335 { NEAR_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } }, 336 { NEW_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } }, 337 { NEAREST_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } }, 338 { NEW_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } }, 339 { NEAR_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } }, 340 { NEW_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } }, 341 { GLOBAL_GLOBALMV, { LAST3_FRAME, ALTREF2_FRAME } }, 342 343 { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, 344 { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, 345 { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, 346 { NEW_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, 347 { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, 348 { NEW_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, 349 { GLOBAL_GLOBALMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, 350 351 { NEAR_NEARMV, { LAST_FRAME, LAST2_FRAME } }, 352 { NEW_NEARESTMV, { LAST_FRAME, LAST2_FRAME } }, 353 { NEAREST_NEWMV, { LAST_FRAME, LAST2_FRAME } }, 354 { NEW_NEARMV, { LAST_FRAME, LAST2_FRAME } }, 355 { NEAR_NEWMV, { LAST_FRAME, LAST2_FRAME } }, 356 { NEW_NEWMV, { LAST_FRAME, LAST2_FRAME } }, 357 { GLOBAL_GLOBALMV, { LAST_FRAME, LAST2_FRAME } }, 358 359 { NEAR_NEARMV, { LAST_FRAME, LAST3_FRAME } }, 360 { NEW_NEARESTMV, { LAST_FRAME, LAST3_FRAME } }, 361 { NEAREST_NEWMV, { LAST_FRAME, LAST3_FRAME } }, 362 { NEW_NEARMV, { LAST_FRAME, LAST3_FRAME } }, 363 { NEAR_NEWMV, { LAST_FRAME, LAST3_FRAME } }, 364 { NEW_NEWMV, { LAST_FRAME, LAST3_FRAME } }, 365 { GLOBAL_GLOBALMV, { LAST_FRAME, LAST3_FRAME } }, 366 367 { NEAR_NEARMV, { LAST_FRAME, GOLDEN_FRAME } }, 368 { NEW_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } }, 369 { NEAREST_NEWMV, { LAST_FRAME, GOLDEN_FRAME } }, 370 { NEW_NEARMV, { LAST_FRAME, GOLDEN_FRAME } }, 371 { NEAR_NEWMV, { LAST_FRAME, GOLDEN_FRAME } }, 372 { NEW_NEWMV, { LAST_FRAME, GOLDEN_FRAME } }, 373 { GLOBAL_GLOBALMV, { LAST_FRAME, GOLDEN_FRAME } }, 374 375 { NEAR_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } }, 376 { NEW_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } }, 377 { NEAREST_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } }, 378 { NEW_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } }, 379 { NEAR_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } }, 380 { NEW_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } }, 381 { GLOBAL_GLOBALMV, { BWDREF_FRAME, ALTREF_FRAME } }, 382 383 // intra modes 384 { DC_PRED, { INTRA_FRAME, NONE_FRAME } }, 385 { PAETH_PRED, { INTRA_FRAME, NONE_FRAME } }, 386 { SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } }, 387 { SMOOTH_V_PRED, { INTRA_FRAME, NONE_FRAME } }, 388 { SMOOTH_H_PRED, { INTRA_FRAME, NONE_FRAME } }, 389 { H_PRED, { INTRA_FRAME, NONE_FRAME } }, 390 { V_PRED, { INTRA_FRAME, NONE_FRAME } }, 391 { D135_PRED, { INTRA_FRAME, NONE_FRAME } }, 392 { D203_PRED, { INTRA_FRAME, NONE_FRAME } }, 393 { D157_PRED, { INTRA_FRAME, NONE_FRAME } }, 394 { D67_PRED, { INTRA_FRAME, NONE_FRAME } }, 395 { D113_PRED, { INTRA_FRAME, NONE_FRAME } }, 396 { D45_PRED, { INTRA_FRAME, NONE_FRAME } }, 397 }; 398 399 static const int16_t intra_to_mode_idx[INTRA_MODE_NUM] = { 400 THR_DC, // DC_PRED, 401 THR_V_PRED, // V_PRED, 402 THR_H_PRED, // H_PRED, 403 THR_D45_PRED, // D45_PRED, 404 THR_D135_PRED, // D135_PRED, 405 THR_D113_PRED, // D113_PRED, 406 THR_D157_PRED, // D157_PRED, 407 THR_D203_PRED, // D203_PRED, 408 THR_D67_PRED, // D67_PRED, 409 THR_SMOOTH, // SMOOTH_PRED, 410 THR_SMOOTH_V, // SMOOTH_V_PRED, 411 THR_SMOOTH_H, // SMOOTH_H_PRED, 412 THR_PAETH, // PAETH_PRED, 413 }; 414 415 /* clang-format off */ 416 static const int16_t single_inter_to_mode_idx[SINGLE_INTER_MODE_NUM] 417 [REF_FRAMES] = { 418 // NEARESTMV, 419 { -1, THR_NEARESTMV, THR_NEARESTL2, THR_NEARESTL3, 420 THR_NEARESTG, THR_NEARESTB, THR_NEARESTA2, THR_NEARESTA, }, 421 // NEARMV, 422 { -1, THR_NEARMV, THR_NEARL2, THR_NEARL3, 423 THR_NEARG, THR_NEARB, THR_NEARA2, THR_NEARA, }, 424 // GLOBALMV, 425 { -1, THR_GLOBALMV, THR_GLOBALL2, THR_GLOBALL3, 426 THR_GLOBALG, THR_GLOBALB, THR_GLOBALA2, THR_GLOBALA, }, 427 // NEWMV, 428 { -1, THR_NEWMV, THR_NEWL2, THR_NEWL3, 429 THR_NEWG, THR_NEWB, THR_NEWA2, THR_NEWA, }, 430 }; 431 /* clang-format on */ 432 433 /* clang-format off */ 434 static const int16_t comp_inter_to_mode_idx[COMP_INTER_MODE_NUM][REF_FRAMES] 435 [REF_FRAMES] = { 436 // NEAREST_NEARESTMV, 437 { 438 { -1, -1, -1, -1, -1, -1, -1, -1, }, 439 { -1, -1, 440 THR_COMP_NEAREST_NEARESTLL2, THR_COMP_NEAREST_NEARESTLL3, 441 THR_COMP_NEAREST_NEARESTLG, THR_COMP_NEAREST_NEARESTLB, 442 THR_COMP_NEAREST_NEARESTLA2, THR_COMP_NEAREST_NEARESTLA, }, 443 { -1, -1, 444 -1, -1, 445 -1, THR_COMP_NEAREST_NEARESTL2B, 446 THR_COMP_NEAREST_NEARESTL2A2, THR_COMP_NEAREST_NEARESTL2A, }, 447 { -1, -1, 448 -1, -1, 449 -1, THR_COMP_NEAREST_NEARESTL3B, 450 THR_COMP_NEAREST_NEARESTL3A2, THR_COMP_NEAREST_NEARESTL3A, }, 451 { -1, -1, 452 -1, -1, 453 -1, THR_COMP_NEAREST_NEARESTGB, 454 THR_COMP_NEAREST_NEARESTGA2, THR_COMP_NEAREST_NEARESTGA, }, 455 { -1, -1, 456 -1, -1, 457 -1, -1, 458 -1, THR_COMP_NEAREST_NEARESTBA, }, 459 { -1, -1, -1, -1, -1, -1, -1, -1, }, 460 { -1, -1, -1, -1, -1, -1, -1, -1, }, 461 }, 462 // NEAR_NEARMV, 463 { 464 { -1, -1, -1, -1, -1, -1, -1, -1, }, 465 { -1, -1, 466 THR_COMP_NEAR_NEARLL2, THR_COMP_NEAR_NEARLL3, 467 THR_COMP_NEAR_NEARLG, THR_COMP_NEAR_NEARLB, 468 THR_COMP_NEAR_NEARLA2, THR_COMP_NEAR_NEARLA, }, 469 { -1, -1, 470 -1, -1, 471 -1, THR_COMP_NEAR_NEARL2B, 472 THR_COMP_NEAR_NEARL2A2, THR_COMP_NEAR_NEARL2A, }, 473 { -1, -1, 474 -1, -1, 475 -1, THR_COMP_NEAR_NEARL3B, 476 THR_COMP_NEAR_NEARL3A2, THR_COMP_NEAR_NEARL3A, }, 477 { -1, -1, 478 -1, -1, 479 -1, THR_COMP_NEAR_NEARGB, 480 THR_COMP_NEAR_NEARGA2, THR_COMP_NEAR_NEARGA, }, 481 { -1, -1, 482 -1, -1, 483 -1, -1, 484 -1, THR_COMP_NEAR_NEARBA, }, 485 { -1, -1, -1, -1, -1, -1, -1, -1, }, 486 { -1, -1, -1, -1, -1, -1, -1, -1, }, 487 }, 488 // NEAREST_NEWMV, 489 { 490 { -1, -1, -1, -1, -1, -1, -1, -1, }, 491 { -1, -1, 492 THR_COMP_NEAREST_NEWLL2, THR_COMP_NEAREST_NEWLL3, 493 THR_COMP_NEAREST_NEWLG, THR_COMP_NEAREST_NEWLB, 494 THR_COMP_NEAREST_NEWLA2, THR_COMP_NEAREST_NEWLA, }, 495 { -1, -1, 496 -1, -1, 497 -1, THR_COMP_NEAREST_NEWL2B, 498 THR_COMP_NEAREST_NEWL2A2, THR_COMP_NEAREST_NEWL2A, }, 499 { -1, -1, 500 -1, -1, 501 -1, THR_COMP_NEAREST_NEWL3B, 502 THR_COMP_NEAREST_NEWL3A2, THR_COMP_NEAREST_NEWL3A, }, 503 { -1, -1, 504 -1, -1, 505 -1, THR_COMP_NEAREST_NEWGB, 506 THR_COMP_NEAREST_NEWGA2, THR_COMP_NEAREST_NEWGA, }, 507 { -1, -1, 508 -1, -1, 509 -1, -1, 510 -1, THR_COMP_NEAREST_NEWBA, }, 511 { -1, -1, -1, -1, -1, -1, -1, -1, }, 512 { -1, -1, -1, -1, -1, -1, -1, -1, }, 513 }, 514 // NEW_NEARESTMV, 515 { 516 { -1, -1, -1, -1, -1, -1, -1, -1, }, 517 { -1, -1, 518 THR_COMP_NEW_NEARESTLL2, THR_COMP_NEW_NEARESTLL3, 519 THR_COMP_NEW_NEARESTLG, THR_COMP_NEW_NEARESTLB, 520 THR_COMP_NEW_NEARESTLA2, THR_COMP_NEW_NEARESTLA, }, 521 { -1, -1, 522 -1, -1, 523 -1, THR_COMP_NEW_NEARESTL2B, 524 THR_COMP_NEW_NEARESTL2A2, THR_COMP_NEW_NEARESTL2A, }, 525 { -1, -1, 526 -1, -1, 527 -1, THR_COMP_NEW_NEARESTL3B, 528 THR_COMP_NEW_NEARESTL3A2, THR_COMP_NEW_NEARESTL3A, }, 529 { -1, -1, 530 -1, -1, 531 -1, THR_COMP_NEW_NEARESTGB, 532 THR_COMP_NEW_NEARESTGA2, THR_COMP_NEW_NEARESTGA, }, 533 { -1, -1, 534 -1, -1, 535 -1, -1, 536 -1, THR_COMP_NEW_NEARESTBA, }, 537 { -1, -1, -1, -1, -1, -1, -1, -1, }, 538 { -1, -1, -1, -1, -1, -1, -1, -1, }, 539 }, 540 // NEAR_NEWMV, 541 { 542 { -1, -1, -1, -1, -1, -1, -1, -1, }, 543 { -1, -1, 544 THR_COMP_NEAR_NEWLL2, THR_COMP_NEAR_NEWLL3, 545 THR_COMP_NEAR_NEWLG, THR_COMP_NEAR_NEWLB, 546 THR_COMP_NEAR_NEWLA2, THR_COMP_NEAR_NEWLA, }, 547 { -1, -1, 548 -1, -1, 549 -1, THR_COMP_NEAR_NEWL2B, 550 THR_COMP_NEAR_NEWL2A2, THR_COMP_NEAR_NEWL2A, }, 551 { -1, -1, 552 -1, -1, 553 -1, THR_COMP_NEAR_NEWL3B, 554 THR_COMP_NEAR_NEWL3A2, THR_COMP_NEAR_NEWL3A, }, 555 { -1, -1, 556 -1, -1, 557 -1, THR_COMP_NEAR_NEWGB, 558 THR_COMP_NEAR_NEWGA2, THR_COMP_NEAR_NEWGA, }, 559 { -1, -1, 560 -1, -1, 561 -1, -1, 562 -1, THR_COMP_NEAR_NEWBA, }, 563 { -1, -1, -1, -1, -1, -1, -1, -1, }, 564 { -1, -1, -1, -1, -1, -1, -1, -1, }, 565 }, 566 // NEW_NEARMV, 567 { 568 { -1, -1, -1, -1, -1, -1, -1, -1, }, 569 { -1, -1, 570 THR_COMP_NEW_NEARLL2, THR_COMP_NEW_NEARLL3, 571 THR_COMP_NEW_NEARLG, THR_COMP_NEW_NEARLB, 572 THR_COMP_NEW_NEARLA2, THR_COMP_NEW_NEARLA, }, 573 { -1, -1, 574 -1, -1, 575 -1, THR_COMP_NEW_NEARL2B, 576 THR_COMP_NEW_NEARL2A2, THR_COMP_NEW_NEARL2A, }, 577 { -1, -1, 578 -1, -1, 579 -1, THR_COMP_NEW_NEARL3B, 580 THR_COMP_NEW_NEARL3A2, THR_COMP_NEW_NEARL3A, }, 581 { -1, -1, 582 -1, -1, 583 -1, THR_COMP_NEW_NEARGB, 584 THR_COMP_NEW_NEARGA2, THR_COMP_NEW_NEARGA, }, 585 { -1, -1, 586 -1, -1, 587 -1, -1, 588 -1, THR_COMP_NEW_NEARBA, }, 589 { -1, -1, -1, -1, -1, -1, -1, -1, }, 590 { -1, -1, -1, -1, -1, -1, -1, -1, }, 591 }, 592 // GLOBAL_GLOBALMV, 593 { 594 { -1, -1, -1, -1, -1, -1, -1, -1, }, 595 { -1, -1, 596 THR_COMP_GLOBAL_GLOBALLL2, THR_COMP_GLOBAL_GLOBALLL3, 597 THR_COMP_GLOBAL_GLOBALLG, THR_COMP_GLOBAL_GLOBALLB, 598 THR_COMP_GLOBAL_GLOBALLA2, THR_COMP_GLOBAL_GLOBALLA, }, 599 { -1, -1, 600 -1, -1, 601 -1, THR_COMP_GLOBAL_GLOBALL2B, 602 THR_COMP_GLOBAL_GLOBALL2A2, THR_COMP_GLOBAL_GLOBALL2A, }, 603 { -1, -1, 604 -1, -1, 605 -1, THR_COMP_GLOBAL_GLOBALL3B, 606 THR_COMP_GLOBAL_GLOBALL3A2, THR_COMP_GLOBAL_GLOBALL3A, }, 607 { -1, -1, 608 -1, -1, 609 -1, THR_COMP_GLOBAL_GLOBALGB, 610 THR_COMP_GLOBAL_GLOBALGA2, THR_COMP_GLOBAL_GLOBALGA, }, 611 { -1, -1, 612 -1, -1, 613 -1, -1, 614 -1, THR_COMP_GLOBAL_GLOBALBA, }, 615 { -1, -1, -1, -1, -1, -1, -1, -1, }, 616 { -1, -1, -1, -1, -1, -1, -1, -1, }, 617 }, 618 // NEW_NEWMV, 619 { 620 { -1, -1, -1, -1, -1, -1, -1, -1, }, 621 { -1, -1, 622 THR_COMP_NEW_NEWLL2, THR_COMP_NEW_NEWLL3, 623 THR_COMP_NEW_NEWLG, THR_COMP_NEW_NEWLB, 624 THR_COMP_NEW_NEWLA2, THR_COMP_NEW_NEWLA, }, 625 { -1, -1, 626 -1, -1, 627 -1, THR_COMP_NEW_NEWL2B, 628 THR_COMP_NEW_NEWL2A2, THR_COMP_NEW_NEWL2A, }, 629 { -1, -1, 630 -1, -1, 631 -1, THR_COMP_NEW_NEWL3B, 632 THR_COMP_NEW_NEWL3A2, THR_COMP_NEW_NEWL3A, }, 633 { -1, -1, 634 -1, -1, 635 -1, THR_COMP_NEW_NEWGB, 636 THR_COMP_NEW_NEWGA2, THR_COMP_NEW_NEWGA, }, 637 { -1, -1, 638 -1, -1, 639 -1, -1, 640 -1, THR_COMP_NEW_NEWBA, }, 641 { -1, -1, -1, -1, -1, -1, -1, -1, }, 642 { -1, -1, -1, -1, -1, -1, -1, -1, }, 643 }, 644 }; 645 /* clang-format on */ 646 647 static int get_prediction_mode_idx(PREDICTION_MODE this_mode, 648 MV_REFERENCE_FRAME ref_frame, 649 MV_REFERENCE_FRAME second_ref_frame) { 650 if (this_mode < INTRA_MODE_END) { 651 assert(ref_frame == INTRA_FRAME); 652 assert(second_ref_frame == NONE_FRAME); 653 return intra_to_mode_idx[this_mode - INTRA_MODE_START]; 654 } 655 if (this_mode >= SINGLE_INTER_MODE_START && 656 this_mode < SINGLE_INTER_MODE_END) { 657 assert((ref_frame > INTRA_FRAME) && (ref_frame <= ALTREF_FRAME)); 658 return single_inter_to_mode_idx[this_mode - SINGLE_INTER_MODE_START] 659 [ref_frame]; 660 } 661 if (this_mode >= COMP_INTER_MODE_START && this_mode < COMP_INTER_MODE_END) { 662 assert((ref_frame > INTRA_FRAME) && (ref_frame <= ALTREF_FRAME)); 663 assert((second_ref_frame > INTRA_FRAME) && 664 (second_ref_frame <= ALTREF_FRAME)); 665 return comp_inter_to_mode_idx[this_mode - COMP_INTER_MODE_START][ref_frame] 666 [second_ref_frame]; 667 } 668 assert(0); 669 return -1; 670 } 671 672 static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = { 673 DC_PRED, H_PRED, V_PRED, SMOOTH_PRED, PAETH_PRED, 674 SMOOTH_V_PRED, SMOOTH_H_PRED, D135_PRED, D203_PRED, D157_PRED, 675 D67_PRED, D113_PRED, D45_PRED, 676 }; 677 678 static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = { 679 UV_DC_PRED, UV_CFL_PRED, UV_H_PRED, UV_V_PRED, 680 UV_SMOOTH_PRED, UV_PAETH_PRED, UV_SMOOTH_V_PRED, UV_SMOOTH_H_PRED, 681 UV_D135_PRED, UV_D203_PRED, UV_D157_PRED, UV_D67_PRED, 682 UV_D113_PRED, UV_D45_PRED, 683 }; 684 685 typedef struct SingleInterModeState { 686 int64_t rd; 687 MV_REFERENCE_FRAME ref_frame; 688 int valid; 689 } SingleInterModeState; 690 691 typedef struct InterModeSearchState { 692 int64_t best_rd; 693 MB_MODE_INFO best_mbmode; 694 int best_rate_y; 695 int best_rate_uv; 696 int best_mode_skippable; 697 int best_skip2; 698 int best_mode_index; 699 int skip_intra_modes; 700 int num_available_refs; 701 int64_t dist_refs[REF_FRAMES]; 702 int dist_order_refs[REF_FRAMES]; 703 int64_t mode_threshold[MAX_MODES]; 704 PREDICTION_MODE best_intra_mode; 705 int64_t best_intra_rd; 706 int angle_stats_ready; 707 uint8_t directional_mode_skip_mask[INTRA_MODES]; 708 unsigned int best_pred_sse; 709 int rate_uv_intra[TX_SIZES_ALL]; 710 int rate_uv_tokenonly[TX_SIZES_ALL]; 711 int64_t dist_uvs[TX_SIZES_ALL]; 712 int skip_uvs[TX_SIZES_ALL]; 713 UV_PREDICTION_MODE mode_uv[TX_SIZES_ALL]; 714 PALETTE_MODE_INFO pmi_uv[TX_SIZES_ALL]; 715 int8_t uv_angle_delta[TX_SIZES_ALL]; 716 int64_t best_pred_rd[REFERENCE_MODES]; 717 int64_t best_pred_diff[REFERENCE_MODES]; 718 // Save a set of single_newmv for each checked ref_mv. 719 int_mv single_newmv[MAX_REF_MV_SERCH][REF_FRAMES]; 720 int single_newmv_rate[MAX_REF_MV_SERCH][REF_FRAMES]; 721 int single_newmv_valid[MAX_REF_MV_SERCH][REF_FRAMES]; 722 int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SERCH][REF_FRAMES]; 723 // The rd of simple translation in single inter modes 724 int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SERCH][REF_FRAMES]; 725 726 // Single search results by [directions][modes][reference frames] 727 SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS]; 728 int single_state_cnt[2][SINGLE_INTER_MODE_NUM]; 729 SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM] 730 [FWD_REFS]; 731 int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM]; 732 733 MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS]; 734 } InterModeSearchState; 735 736 static int inter_mode_data_block_idx(BLOCK_SIZE bsize) { 737 if (bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 || 738 bsize == BLOCK_4X16 || bsize == BLOCK_16X4) { 739 return -1; 740 } 741 return 1; 742 } 743 744 void av1_inter_mode_data_init(TileDataEnc *tile_data) { 745 for (int i = 0; i < BLOCK_SIZES_ALL; ++i) { 746 InterModeRdModel *md = &tile_data->inter_mode_rd_models[i]; 747 md->ready = 0; 748 md->num = 0; 749 md->dist_sum = 0; 750 md->ld_sum = 0; 751 md->sse_sum = 0; 752 md->sse_sse_sum = 0; 753 md->sse_ld_sum = 0; 754 } 755 } 756 757 static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize, 758 int64_t sse, int *est_residue_cost, 759 int64_t *est_dist) { 760 aom_clear_system_state(); 761 const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize]; 762 if (md->ready) { 763 if (sse < md->dist_mean) { 764 *est_residue_cost = 0; 765 *est_dist = sse; 766 } else { 767 *est_dist = (int64_t)round(md->dist_mean); 768 const double est_ld = md->a * sse + md->b; 769 // Clamp estimated rate cost by INT_MAX / 2. 770 // TODO(angiebird (at) google.com): find better solution than clamping. 771 if (fabs(est_ld) < 1e-2) { 772 *est_residue_cost = INT_MAX / 2; 773 } else { 774 double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld); 775 if (est_residue_cost_dbl < 0) { 776 *est_residue_cost = 0; 777 } else { 778 *est_residue_cost = 779 (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2); 780 } 781 } 782 if (*est_residue_cost <= 0) { 783 *est_residue_cost = 0; 784 *est_dist = sse; 785 } 786 } 787 return 1; 788 } 789 return 0; 790 } 791 792 void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) { 793 aom_clear_system_state(); 794 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { 795 const int block_idx = inter_mode_data_block_idx(bsize); 796 InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize]; 797 if (block_idx == -1) continue; 798 if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) { 799 continue; 800 } else { 801 if (md->ready == 0) { 802 md->dist_mean = md->dist_sum / md->num; 803 md->ld_mean = md->ld_sum / md->num; 804 md->sse_mean = md->sse_sum / md->num; 805 md->sse_sse_mean = md->sse_sse_sum / md->num; 806 md->sse_ld_mean = md->sse_ld_sum / md->num; 807 } else { 808 const double factor = 3; 809 md->dist_mean = 810 (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1); 811 md->ld_mean = 812 (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1); 813 md->sse_mean = 814 (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1); 815 md->sse_sse_mean = 816 (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) / 817 (factor + 1); 818 md->sse_ld_mean = 819 (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) / 820 (factor + 1); 821 } 822 823 const double my = md->ld_mean; 824 const double mx = md->sse_mean; 825 const double dx = sqrt(md->sse_sse_mean); 826 const double dxy = md->sse_ld_mean; 827 828 md->a = (dxy - mx * my) / (dx * dx - mx * mx); 829 md->b = my - md->a * mx; 830 md->ready = 1; 831 832 md->num = 0; 833 md->dist_sum = 0; 834 md->ld_sum = 0; 835 md->sse_sum = 0; 836 md->sse_sse_sum = 0; 837 md->sse_ld_sum = 0; 838 } 839 (void)rdmult; 840 } 841 } 842 843 static void inter_mode_data_push(TileDataEnc *tile_data, BLOCK_SIZE bsize, 844 int64_t sse, int64_t dist, int residue_cost) { 845 if (residue_cost == 0 || sse == dist) return; 846 const int block_idx = inter_mode_data_block_idx(bsize); 847 if (block_idx == -1) return; 848 InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize]; 849 if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) { 850 aom_clear_system_state(); 851 const double ld = (sse - dist) * 1. / residue_cost; 852 ++rd_model->num; 853 rd_model->dist_sum += dist; 854 rd_model->ld_sum += ld; 855 rd_model->sse_sum += sse; 856 rd_model->sse_sse_sum += (double)sse * (double)sse; 857 rd_model->sse_ld_sum += sse * ld; 858 } 859 } 860 861 static void inter_modes_info_push(InterModesInfo *inter_modes_info, 862 int mode_rate, int64_t sse, int64_t rd, 863 bool true_rd, uint8_t *blk_skip, 864 RD_STATS *rd_cost, RD_STATS *rd_cost_y, 865 RD_STATS *rd_cost_uv, 866 const MB_MODE_INFO *mbmi) { 867 const int num = inter_modes_info->num; 868 assert(num < MAX_INTER_MODES); 869 inter_modes_info->mbmi_arr[num] = *mbmi; 870 inter_modes_info->mode_rate_arr[num] = mode_rate; 871 inter_modes_info->sse_arr[num] = sse; 872 inter_modes_info->est_rd_arr[num] = rd; 873 inter_modes_info->true_rd_arr[num] = true_rd; 874 if (blk_skip != NULL) { 875 memcpy(inter_modes_info->blk_skip_arr[num], blk_skip, 876 sizeof(blk_skip[0]) * MAX_MIB_SIZE * MAX_MIB_SIZE); 877 } 878 inter_modes_info->rd_cost_arr[num] = *rd_cost; 879 inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y; 880 inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv; 881 ++inter_modes_info->num; 882 } 883 884 static int compare_rd_idx_pair(const void *a, const void *b) { 885 if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) { 886 return 0; 887 } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) { 888 return 1; 889 } else { 890 return -1; 891 } 892 } 893 894 static void inter_modes_info_sort(const InterModesInfo *inter_modes_info, 895 RdIdxPair *rd_idx_pair_arr) { 896 if (inter_modes_info->num == 0) { 897 return; 898 } 899 for (int i = 0; i < inter_modes_info->num; ++i) { 900 rd_idx_pair_arr[i].idx = i; 901 rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i]; 902 } 903 qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]), 904 compare_rd_idx_pair); 905 } 906 907 static INLINE int write_uniform_cost(int n, int v) { 908 const int l = get_unsigned_bits(n); 909 const int m = (1 << l) - n; 910 if (l == 0) return 0; 911 if (v < m) 912 return av1_cost_literal(l - 1); 913 else 914 return av1_cost_literal(l); 915 } 916 917 // Similar to store_cfl_required(), but for use during the RDO process, 918 // where we haven't yet determined whether this block uses CfL. 919 static INLINE CFL_ALLOWED_TYPE store_cfl_required_rdo(const AV1_COMMON *cm, 920 const MACROBLOCK *x) { 921 const MACROBLOCKD *xd = &x->e_mbd; 922 923 if (cm->seq_params.monochrome || x->skip_chroma_rd) return CFL_DISALLOWED; 924 925 if (!xd->cfl.is_chroma_reference) { 926 // For non-chroma-reference blocks, we should always store the luma pixels, 927 // in case the corresponding chroma-reference block uses CfL. 928 // Note that this can only happen for block sizes which are <8 on 929 // their shortest side, as otherwise they would be chroma reference 930 // blocks. 931 return CFL_ALLOWED; 932 } 933 934 // For chroma reference blocks, we should store data in the encoder iff we're 935 // allowed to try out CfL. 936 return is_cfl_allowed(xd); 937 } 938 939 // constants for prune 1 and prune 2 decision boundaries 940 #define FAST_EXT_TX_CORR_MID 0.0 941 #define FAST_EXT_TX_EDST_MID 0.1 942 #define FAST_EXT_TX_CORR_MARGIN 0.5 943 #define FAST_EXT_TX_EDST_MARGIN 0.3 944 945 static int inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, 946 RD_STATS *rd_stats, BLOCK_SIZE bsize, 947 int64_t ref_best_rd, FAST_TX_SEARCH_MODE ftxs_mode); 948 949 static unsigned pixel_dist_visible_only( 950 const AV1_COMP *const cpi, const MACROBLOCK *x, const uint8_t *src, 951 const int src_stride, const uint8_t *dst, const int dst_stride, 952 const BLOCK_SIZE tx_bsize, int txb_rows, int txb_cols, int visible_rows, 953 int visible_cols) { 954 unsigned sse; 955 956 if (txb_rows == visible_rows && txb_cols == visible_cols) { 957 cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse); 958 return sse; 959 } 960 const MACROBLOCKD *xd = &x->e_mbd; 961 962 if (is_cur_buf_hbd(xd)) { 963 uint64_t sse64 = aom_highbd_sse_odd_size(src, src_stride, dst, dst_stride, 964 visible_cols, visible_rows); 965 return (unsigned int)ROUND_POWER_OF_TWO(sse64, (xd->bd - 8) * 2); 966 } 967 sse = aom_sse_odd_size(src, src_stride, dst, dst_stride, visible_cols, 968 visible_rows); 969 return sse; 970 } 971 972 #if CONFIG_DIST_8X8 973 static uint64_t cdef_dist_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src, 974 int sstride, int coeff_shift) { 975 uint64_t svar = 0; 976 uint64_t dvar = 0; 977 uint64_t sum_s = 0; 978 uint64_t sum_d = 0; 979 uint64_t sum_s2 = 0; 980 uint64_t sum_d2 = 0; 981 uint64_t sum_sd = 0; 982 uint64_t dist = 0; 983 984 int i, j; 985 for (i = 0; i < 8; i++) { 986 for (j = 0; j < 8; j++) { 987 sum_s += src[i * sstride + j]; 988 sum_d += dst[i * dstride + j]; 989 sum_s2 += src[i * sstride + j] * src[i * sstride + j]; 990 sum_d2 += dst[i * dstride + j] * dst[i * dstride + j]; 991 sum_sd += src[i * sstride + j] * dst[i * dstride + j]; 992 } 993 } 994 /* Compute the variance -- the calculation cannot go negative. */ 995 svar = sum_s2 - ((sum_s * sum_s + 32) >> 6); 996 dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6); 997 998 // Tuning of jm's original dering distortion metric used in CDEF tool, 999 // suggested by jm 1000 const uint64_t a = 4; 1001 const uint64_t b = 2; 1002 const uint64_t c1 = (400 * a << 2 * coeff_shift); 1003 const uint64_t c2 = (b * 20000 * a * a << 4 * coeff_shift); 1004 1005 dist = (uint64_t)floor(.5 + (sum_d2 + sum_s2 - 2 * sum_sd) * .5 * 1006 (svar + dvar + c1) / 1007 (sqrt(svar * (double)dvar + c2))); 1008 1009 // Calibrate dist to have similar rate for the same QP with MSE only 1010 // distortion (as in master branch) 1011 dist = (uint64_t)((float)dist * 0.75); 1012 1013 return dist; 1014 } 1015 1016 static int od_compute_var_4x4(uint16_t *x, int stride) { 1017 int sum; 1018 int s2; 1019 int i; 1020 sum = 0; 1021 s2 = 0; 1022 for (i = 0; i < 4; i++) { 1023 int j; 1024 for (j = 0; j < 4; j++) { 1025 int t; 1026 1027 t = x[i * stride + j]; 1028 sum += t; 1029 s2 += t * t; 1030 } 1031 } 1032 1033 return (s2 - (sum * sum >> 4)) >> 4; 1034 } 1035 1036 /* OD_DIST_LP_MID controls the frequency weighting filter used for computing 1037 the distortion. For a value X, the filter is [1 X 1]/(X + 2) and 1038 is applied both horizontally and vertically. For X=5, the filter is 1039 a good approximation for the OD_QM8_Q4_HVS quantization matrix. */ 1040 #define OD_DIST_LP_MID (5) 1041 #define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2) 1042 1043 static double od_compute_dist_8x8(int use_activity_masking, uint16_t *x, 1044 uint16_t *y, od_coeff *e_lp, int stride) { 1045 double sum; 1046 int min_var; 1047 double mean_var; 1048 double var_stat; 1049 double activity; 1050 double calibration; 1051 int i; 1052 int j; 1053 double vardist; 1054 1055 vardist = 0; 1056 1057 #if 1 1058 min_var = INT_MAX; 1059 mean_var = 0; 1060 for (i = 0; i < 3; i++) { 1061 for (j = 0; j < 3; j++) { 1062 int varx; 1063 int vary; 1064 varx = od_compute_var_4x4(x + 2 * i * stride + 2 * j, stride); 1065 vary = od_compute_var_4x4(y + 2 * i * stride + 2 * j, stride); 1066 min_var = OD_MINI(min_var, varx); 1067 mean_var += 1. / (1 + varx); 1068 /* The cast to (double) is to avoid an overflow before the sqrt.*/ 1069 vardist += varx - 2 * sqrt(varx * (double)vary) + vary; 1070 } 1071 } 1072 /* We use a different variance statistic depending on whether activity 1073 masking is used, since the harmonic mean appeared slightly worse with 1074 masking off. The calibration constant just ensures that we preserve the 1075 rate compared to activity=1. */ 1076 if (use_activity_masking) { 1077 calibration = 1.95; 1078 var_stat = 9. / mean_var; 1079 } else { 1080 calibration = 1.62; 1081 var_stat = min_var; 1082 } 1083 /* 1.62 is a calibration constant, 0.25 is a noise floor and 1/6 is the 1084 activity masking constant. */ 1085 activity = calibration * pow(.25 + var_stat, -1. / 6); 1086 #else 1087 activity = 1; 1088 #endif // 1 1089 sum = 0; 1090 for (i = 0; i < 8; i++) { 1091 for (j = 0; j < 8; j++) 1092 sum += e_lp[i * stride + j] * (double)e_lp[i * stride + j]; 1093 } 1094 /* Normalize the filter to unit DC response. */ 1095 sum *= 1. / (OD_DIST_LP_NORM * OD_DIST_LP_NORM * OD_DIST_LP_NORM * 1096 OD_DIST_LP_NORM); 1097 return activity * activity * (sum + vardist); 1098 } 1099 1100 // Note : Inputs x and y are in a pixel domain 1101 static double od_compute_dist_common(int activity_masking, uint16_t *x, 1102 uint16_t *y, int bsize_w, int bsize_h, 1103 int qindex, od_coeff *tmp, 1104 od_coeff *e_lp) { 1105 int i, j; 1106 double sum = 0; 1107 const int mid = OD_DIST_LP_MID; 1108 1109 for (j = 0; j < bsize_w; j++) { 1110 e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j]; 1111 e_lp[(bsize_h - 1) * bsize_w + j] = mid * tmp[(bsize_h - 1) * bsize_w + j] + 1112 2 * tmp[(bsize_h - 2) * bsize_w + j]; 1113 } 1114 for (i = 1; i < bsize_h - 1; i++) { 1115 for (j = 0; j < bsize_w; j++) { 1116 e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] + 1117 tmp[(i - 1) * bsize_w + j] + 1118 tmp[(i + 1) * bsize_w + j]; 1119 } 1120 } 1121 for (i = 0; i < bsize_h; i += 8) { 1122 for (j = 0; j < bsize_w; j += 8) { 1123 sum += od_compute_dist_8x8(activity_masking, &x[i * bsize_w + j], 1124 &y[i * bsize_w + j], &e_lp[i * bsize_w + j], 1125 bsize_w); 1126 } 1127 } 1128 /* Scale according to linear regression against SSE, for 8x8 blocks. */ 1129 if (activity_masking) { 1130 sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) + 1131 (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0); 1132 } else { 1133 sum *= qindex >= 128 1134 ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128) 1135 : qindex <= 43 ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43) 1136 : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43); 1137 } 1138 1139 return sum; 1140 } 1141 1142 static double od_compute_dist(uint16_t *x, uint16_t *y, int bsize_w, 1143 int bsize_h, int qindex) { 1144 assert(bsize_w >= 8 && bsize_h >= 8); 1145 1146 int activity_masking = 0; 1147 1148 int i, j; 1149 DECLARE_ALIGNED(16, od_coeff, e[MAX_SB_SQUARE]); 1150 DECLARE_ALIGNED(16, od_coeff, tmp[MAX_SB_SQUARE]); 1151 DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_SB_SQUARE]); 1152 for (i = 0; i < bsize_h; i++) { 1153 for (j = 0; j < bsize_w; j++) { 1154 e[i * bsize_w + j] = x[i * bsize_w + j] - y[i * bsize_w + j]; 1155 } 1156 } 1157 int mid = OD_DIST_LP_MID; 1158 for (i = 0; i < bsize_h; i++) { 1159 tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1]; 1160 tmp[i * bsize_w + bsize_w - 1] = 1161 mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2]; 1162 for (j = 1; j < bsize_w - 1; j++) { 1163 tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] + 1164 e[i * bsize_w + j + 1]; 1165 } 1166 } 1167 return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h, 1168 qindex, tmp, e_lp); 1169 } 1170 1171 static double od_compute_dist_diff(uint16_t *x, int16_t *e, int bsize_w, 1172 int bsize_h, int qindex) { 1173 assert(bsize_w >= 8 && bsize_h >= 8); 1174 1175 int activity_masking = 0; 1176 1177 DECLARE_ALIGNED(16, uint16_t, y[MAX_SB_SQUARE]); 1178 DECLARE_ALIGNED(16, od_coeff, tmp[MAX_SB_SQUARE]); 1179 DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_SB_SQUARE]); 1180 int i, j; 1181 for (i = 0; i < bsize_h; i++) { 1182 for (j = 0; j < bsize_w; j++) { 1183 y[i * bsize_w + j] = x[i * bsize_w + j] - e[i * bsize_w + j]; 1184 } 1185 } 1186 int mid = OD_DIST_LP_MID; 1187 for (i = 0; i < bsize_h; i++) { 1188 tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1]; 1189 tmp[i * bsize_w + bsize_w - 1] = 1190 mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2]; 1191 for (j = 1; j < bsize_w - 1; j++) { 1192 tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] + 1193 e[i * bsize_w + j + 1]; 1194 } 1195 } 1196 return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h, 1197 qindex, tmp, e_lp); 1198 } 1199 1200 int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCK *x, 1201 const uint8_t *src, int src_stride, const uint8_t *dst, 1202 int dst_stride, const BLOCK_SIZE tx_bsize, int bsw, 1203 int bsh, int visible_w, int visible_h, int qindex) { 1204 int64_t d = 0; 1205 int i, j; 1206 const MACROBLOCKD *xd = &x->e_mbd; 1207 1208 DECLARE_ALIGNED(16, uint16_t, orig[MAX_SB_SQUARE]); 1209 DECLARE_ALIGNED(16, uint16_t, rec[MAX_SB_SQUARE]); 1210 1211 assert(bsw >= 8); 1212 assert(bsh >= 8); 1213 assert((bsw & 0x07) == 0); 1214 assert((bsh & 0x07) == 0); 1215 1216 if (x->tune_metric == AOM_TUNE_CDEF_DIST || 1217 x->tune_metric == AOM_TUNE_DAALA_DIST) { 1218 if (is_cur_buf_hbd(xd)) { 1219 for (j = 0; j < bsh; j++) 1220 for (i = 0; i < bsw; i++) 1221 orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i]; 1222 1223 if ((bsw == visible_w) && (bsh == visible_h)) { 1224 for (j = 0; j < bsh; j++) 1225 for (i = 0; i < bsw; i++) 1226 rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i]; 1227 } else { 1228 for (j = 0; j < visible_h; j++) 1229 for (i = 0; i < visible_w; i++) 1230 rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i]; 1231 1232 if (visible_w < bsw) { 1233 for (j = 0; j < bsh; j++) 1234 for (i = visible_w; i < bsw; i++) 1235 rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i]; 1236 } 1237 1238 if (visible_h < bsh) { 1239 for (j = visible_h; j < bsh; j++) 1240 for (i = 0; i < bsw; i++) 1241 rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i]; 1242 } 1243 } 1244 } else { 1245 for (j = 0; j < bsh; j++) 1246 for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i]; 1247 1248 if ((bsw == visible_w) && (bsh == visible_h)) { 1249 for (j = 0; j < bsh; j++) 1250 for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i]; 1251 } else { 1252 for (j = 0; j < visible_h; j++) 1253 for (i = 0; i < visible_w; i++) 1254 rec[j * bsw + i] = dst[j * dst_stride + i]; 1255 1256 if (visible_w < bsw) { 1257 for (j = 0; j < bsh; j++) 1258 for (i = visible_w; i < bsw; i++) 1259 rec[j * bsw + i] = src[j * src_stride + i]; 1260 } 1261 1262 if (visible_h < bsh) { 1263 for (j = visible_h; j < bsh; j++) 1264 for (i = 0; i < bsw; i++) 1265 rec[j * bsw + i] = src[j * src_stride + i]; 1266 } 1267 } 1268 } 1269 } 1270 1271 if (x->tune_metric == AOM_TUNE_DAALA_DIST) { 1272 d = (int64_t)od_compute_dist(orig, rec, bsw, bsh, qindex); 1273 } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) { 1274 int coeff_shift = AOMMAX(xd->bd - 8, 0); 1275 1276 for (i = 0; i < bsh; i += 8) { 1277 for (j = 0; j < bsw; j += 8) { 1278 d += cdef_dist_8x8_16bit(&rec[i * bsw + j], bsw, &orig[i * bsw + j], 1279 bsw, coeff_shift); 1280 } 1281 } 1282 if (is_cur_buf_hbd(xd)) d = ((uint64_t)d) >> 2 * coeff_shift; 1283 } else { 1284 // Otherwise, MSE by default 1285 d = pixel_dist_visible_only(cpi, x, src, src_stride, dst, dst_stride, 1286 tx_bsize, bsh, bsw, visible_h, visible_w); 1287 } 1288 1289 return d; 1290 } 1291 1292 static int64_t dist_8x8_diff(const MACROBLOCK *x, const uint8_t *src, 1293 int src_stride, const int16_t *diff, 1294 int diff_stride, int bsw, int bsh, int visible_w, 1295 int visible_h, int qindex) { 1296 int64_t d = 0; 1297 int i, j; 1298 const MACROBLOCKD *xd = &x->e_mbd; 1299 1300 DECLARE_ALIGNED(16, uint16_t, orig[MAX_SB_SQUARE]); 1301 DECLARE_ALIGNED(16, int16_t, diff16[MAX_SB_SQUARE]); 1302 1303 assert(bsw >= 8); 1304 assert(bsh >= 8); 1305 assert((bsw & 0x07) == 0); 1306 assert((bsh & 0x07) == 0); 1307 1308 if (x->tune_metric == AOM_TUNE_CDEF_DIST || 1309 x->tune_metric == AOM_TUNE_DAALA_DIST) { 1310 if (is_cur_buf_hbd(xd)) { 1311 for (j = 0; j < bsh; j++) 1312 for (i = 0; i < bsw; i++) 1313 orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i]; 1314 } else { 1315 for (j = 0; j < bsh; j++) 1316 for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i]; 1317 } 1318 1319 if ((bsw == visible_w) && (bsh == visible_h)) { 1320 for (j = 0; j < bsh; j++) 1321 for (i = 0; i < bsw; i++) 1322 diff16[j * bsw + i] = diff[j * diff_stride + i]; 1323 } else { 1324 for (j = 0; j < visible_h; j++) 1325 for (i = 0; i < visible_w; i++) 1326 diff16[j * bsw + i] = diff[j * diff_stride + i]; 1327 1328 if (visible_w < bsw) { 1329 for (j = 0; j < bsh; j++) 1330 for (i = visible_w; i < bsw; i++) diff16[j * bsw + i] = 0; 1331 } 1332 1333 if (visible_h < bsh) { 1334 for (j = visible_h; j < bsh; j++) 1335 for (i = 0; i < bsw; i++) diff16[j * bsw + i] = 0; 1336 } 1337 } 1338 } 1339 1340 if (x->tune_metric == AOM_TUNE_DAALA_DIST) { 1341 d = (int64_t)od_compute_dist_diff(orig, diff16, bsw, bsh, qindex); 1342 } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) { 1343 int coeff_shift = AOMMAX(xd->bd - 8, 0); 1344 DECLARE_ALIGNED(16, uint16_t, dst16[MAX_SB_SQUARE]); 1345 1346 for (i = 0; i < bsh; i++) { 1347 for (j = 0; j < bsw; j++) { 1348 dst16[i * bsw + j] = orig[i * bsw + j] - diff16[i * bsw + j]; 1349 } 1350 } 1351 1352 for (i = 0; i < bsh; i += 8) { 1353 for (j = 0; j < bsw; j += 8) { 1354 d += cdef_dist_8x8_16bit(&dst16[i * bsw + j], bsw, &orig[i * bsw + j], 1355 bsw, coeff_shift); 1356 } 1357 } 1358 // Don't scale 'd' for HBD since it will be done by caller side for diff 1359 // input 1360 } else { 1361 // Otherwise, MSE by default 1362 d = aom_sum_squares_2d_i16(diff, diff_stride, visible_w, visible_h); 1363 } 1364 1365 return d; 1366 } 1367 #endif // CONFIG_DIST_8X8 1368 1369 static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize, 1370 const uint8_t *src, int src_stride, 1371 const uint8_t *dst, int dst_stride, 1372 int need_4th, double *hordist, 1373 double *verdist) { 1374 const int bw = block_size_wide[bsize]; 1375 const int bh = block_size_high[bsize]; 1376 unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 1377 1378 if (bsize < BLOCK_16X16 || (bsize >= BLOCK_4X16 && bsize <= BLOCK_32X8)) { 1379 // Special cases: calculate 'esq' values manually, as we don't have 'vf' 1380 // functions for the 16 (very small) sub-blocks of this block. 1381 const int w_shift = (bw == 4) ? 0 : (bw == 8) ? 1 : (bw == 16) ? 2 : 3; 1382 const int h_shift = (bh == 4) ? 0 : (bh == 8) ? 1 : (bh == 16) ? 2 : 3; 1383 assert(bw <= 32); 1384 assert(bh <= 32); 1385 assert(((bw - 1) >> w_shift) + (((bh - 1) >> h_shift) << 2) == 15); 1386 if (cpi->common.seq_params.use_highbitdepth) { 1387 const uint16_t *src16 = CONVERT_TO_SHORTPTR(src); 1388 const uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst); 1389 for (int i = 0; i < bh; ++i) 1390 for (int j = 0; j < bw; ++j) { 1391 const int index = (j >> w_shift) + ((i >> h_shift) << 2); 1392 esq[index] += 1393 (src16[j + i * src_stride] - dst16[j + i * dst_stride]) * 1394 (src16[j + i * src_stride] - dst16[j + i * dst_stride]); 1395 } 1396 } else { 1397 for (int i = 0; i < bh; ++i) 1398 for (int j = 0; j < bw; ++j) { 1399 const int index = (j >> w_shift) + ((i >> h_shift) << 2); 1400 esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) * 1401 (src[j + i * src_stride] - dst[j + i * dst_stride]); 1402 } 1403 } 1404 } else { // Calculate 'esq' values using 'vf' functions on the 16 sub-blocks. 1405 const int f_index = 1406 (bsize < BLOCK_SIZES) ? bsize - BLOCK_16X16 : bsize - BLOCK_8X16; 1407 assert(f_index >= 0 && f_index < BLOCK_SIZES_ALL); 1408 const BLOCK_SIZE subsize = (BLOCK_SIZE)f_index; 1409 assert(block_size_wide[bsize] == 4 * block_size_wide[subsize]); 1410 assert(block_size_high[bsize] == 4 * block_size_high[subsize]); 1411 cpi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[0]); 1412 cpi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride, 1413 &esq[1]); 1414 cpi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride, 1415 &esq[2]); 1416 cpi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4, 1417 dst_stride, &esq[3]); 1418 src += bh / 4 * src_stride; 1419 dst += bh / 4 * dst_stride; 1420 1421 cpi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[4]); 1422 cpi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride, 1423 &esq[5]); 1424 cpi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride, 1425 &esq[6]); 1426 cpi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4, 1427 dst_stride, &esq[7]); 1428 src += bh / 4 * src_stride; 1429 dst += bh / 4 * dst_stride; 1430 1431 cpi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[8]); 1432 cpi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride, 1433 &esq[9]); 1434 cpi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride, 1435 &esq[10]); 1436 cpi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4, 1437 dst_stride, &esq[11]); 1438 src += bh / 4 * src_stride; 1439 dst += bh / 4 * dst_stride; 1440 1441 cpi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[12]); 1442 cpi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride, 1443 &esq[13]); 1444 cpi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride, 1445 &esq[14]); 1446 cpi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4, 1447 dst_stride, &esq[15]); 1448 } 1449 1450 double total = (double)esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] + 1451 esq[6] + esq[7] + esq[8] + esq[9] + esq[10] + esq[11] + 1452 esq[12] + esq[13] + esq[14] + esq[15]; 1453 if (total > 0) { 1454 const double e_recip = 1.0 / total; 1455 hordist[0] = ((double)esq[0] + esq[4] + esq[8] + esq[12]) * e_recip; 1456 hordist[1] = ((double)esq[1] + esq[5] + esq[9] + esq[13]) * e_recip; 1457 hordist[2] = ((double)esq[2] + esq[6] + esq[10] + esq[14]) * e_recip; 1458 if (need_4th) { 1459 hordist[3] = ((double)esq[3] + esq[7] + esq[11] + esq[15]) * e_recip; 1460 } 1461 verdist[0] = ((double)esq[0] + esq[1] + esq[2] + esq[3]) * e_recip; 1462 verdist[1] = ((double)esq[4] + esq[5] + esq[6] + esq[7]) * e_recip; 1463 verdist[2] = ((double)esq[8] + esq[9] + esq[10] + esq[11]) * e_recip; 1464 if (need_4th) { 1465 verdist[3] = ((double)esq[12] + esq[13] + esq[14] + esq[15]) * e_recip; 1466 } 1467 } else { 1468 hordist[0] = verdist[0] = 0.25; 1469 hordist[1] = verdist[1] = 0.25; 1470 hordist[2] = verdist[2] = 0.25; 1471 if (need_4th) { 1472 hordist[3] = verdist[3] = 0.25; 1473 } 1474 } 1475 } 1476 1477 static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize, 1478 const uint8_t *src, int src_stride, 1479 const uint8_t *dst, int dst_stride) { 1480 int prune_bitmask = 0; 1481 double svm_proj_h = 0, svm_proj_v = 0; 1482 double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 }; 1483 get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride, 0, 1484 hdist, vdist); 1485 1486 svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] + 1487 vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3]; 1488 svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] + 1489 hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7]; 1490 if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN) 1491 prune_bitmask |= 1 << FLIPADST_1D; 1492 else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN) 1493 prune_bitmask |= 1 << ADST_1D; 1494 1495 if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN) 1496 prune_bitmask |= 1 << (FLIPADST_1D + 8); 1497 else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN) 1498 prune_bitmask |= 1 << (ADST_1D + 8); 1499 1500 return prune_bitmask; 1501 } 1502 1503 static int dct_vs_idtx(const int16_t *diff, int stride, int w, int h) { 1504 float hcorr, vcorr; 1505 int prune_bitmask = 0; 1506 av1_get_horver_correlation_full(diff, stride, w, h, &hcorr, &vcorr); 1507 1508 if (vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN) 1509 prune_bitmask |= 1 << IDTX_1D; 1510 else if (vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN) 1511 prune_bitmask |= 1 << DCT_1D; 1512 1513 if (hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN) 1514 prune_bitmask |= 1 << (IDTX_1D + 8); 1515 else if (hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN) 1516 prune_bitmask |= 1 << (DCT_1D + 8); 1517 return prune_bitmask; 1518 } 1519 1520 // Performance drop: 0.5%, Speed improvement: 24% 1521 static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize, 1522 MACROBLOCK *x, const MACROBLOCKD *xd, 1523 int adst_flipadst, int dct_idtx) { 1524 int prune = 0; 1525 1526 if (adst_flipadst) { 1527 const struct macroblock_plane *const p = &x->plane[0]; 1528 const struct macroblockd_plane *const pd = &xd->plane[0]; 1529 prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, 1530 pd->dst.buf, pd->dst.stride); 1531 } 1532 if (dct_idtx) { 1533 av1_subtract_plane(x, bsize, 0); 1534 const struct macroblock_plane *const p = &x->plane[0]; 1535 const int bw = block_size_wide[bsize]; 1536 const int bh = block_size_high[bsize]; 1537 prune |= dct_vs_idtx(p->src_diff, bw, bw, bh); 1538 } 1539 1540 return prune; 1541 } 1542 1543 // Performance drop: 0.3%, Speed improvement: 5% 1544 static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize, 1545 const MACROBLOCK *x, const MACROBLOCKD *xd) { 1546 const struct macroblock_plane *const p = &x->plane[0]; 1547 const struct macroblockd_plane *const pd = &xd->plane[0]; 1548 return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf, 1549 pd->dst.stride); 1550 } 1551 1552 // 1D Transforms used in inter set, this needs to be changed if 1553 // ext_tx_used_inter is changed 1554 static const int ext_tx_used_inter_1D[EXT_TX_SETS_INTER][TX_TYPES_1D] = { 1555 { 1, 0, 0, 0 }, 1556 { 1, 1, 1, 1 }, 1557 { 1, 1, 1, 1 }, 1558 { 1, 0, 0, 1 }, 1559 }; 1560 1561 static void get_energy_distribution_finer(const int16_t *diff, int stride, 1562 int bw, int bh, float *hordist, 1563 float *verdist) { 1564 // First compute downscaled block energy values (esq); downscale factors 1565 // are defined by w_shift and h_shift. 1566 unsigned int esq[256]; 1567 const int w_shift = bw <= 8 ? 0 : 1; 1568 const int h_shift = bh <= 8 ? 0 : 1; 1569 const int esq_w = bw >> w_shift; 1570 const int esq_h = bh >> h_shift; 1571 const int esq_sz = esq_w * esq_h; 1572 int i, j; 1573 memset(esq, 0, esq_sz * sizeof(esq[0])); 1574 if (w_shift) { 1575 for (i = 0; i < bh; i++) { 1576 unsigned int *cur_esq_row = esq + (i >> h_shift) * esq_w; 1577 const int16_t *cur_diff_row = diff + i * stride; 1578 for (j = 0; j < bw; j += 2) { 1579 cur_esq_row[j >> 1] += (cur_diff_row[j] * cur_diff_row[j] + 1580 cur_diff_row[j + 1] * cur_diff_row[j + 1]); 1581 } 1582 } 1583 } else { 1584 for (i = 0; i < bh; i++) { 1585 unsigned int *cur_esq_row = esq + (i >> h_shift) * esq_w; 1586 const int16_t *cur_diff_row = diff + i * stride; 1587 for (j = 0; j < bw; j++) { 1588 cur_esq_row[j] += cur_diff_row[j] * cur_diff_row[j]; 1589 } 1590 } 1591 } 1592 1593 uint64_t total = 0; 1594 for (i = 0; i < esq_sz; i++) total += esq[i]; 1595 1596 // Output hordist and verdist arrays are normalized 1D projections of esq 1597 if (total == 0) { 1598 float hor_val = 1.0f / esq_w; 1599 for (j = 0; j < esq_w - 1; j++) hordist[j] = hor_val; 1600 float ver_val = 1.0f / esq_h; 1601 for (i = 0; i < esq_h - 1; i++) verdist[i] = ver_val; 1602 return; 1603 } 1604 1605 const float e_recip = 1.0f / (float)total; 1606 memset(hordist, 0, (esq_w - 1) * sizeof(hordist[0])); 1607 memset(verdist, 0, (esq_h - 1) * sizeof(verdist[0])); 1608 const unsigned int *cur_esq_row; 1609 for (i = 0; i < esq_h - 1; i++) { 1610 cur_esq_row = esq + i * esq_w; 1611 for (j = 0; j < esq_w - 1; j++) { 1612 hordist[j] += (float)cur_esq_row[j]; 1613 verdist[i] += (float)cur_esq_row[j]; 1614 } 1615 verdist[i] += (float)cur_esq_row[j]; 1616 } 1617 cur_esq_row = esq + i * esq_w; 1618 for (j = 0; j < esq_w - 1; j++) hordist[j] += (float)cur_esq_row[j]; 1619 1620 for (j = 0; j < esq_w - 1; j++) hordist[j] *= e_recip; 1621 for (i = 0; i < esq_h - 1; i++) verdist[i] *= e_recip; 1622 } 1623 1624 // Similar to get_horver_correlation, but also takes into account first 1625 // row/column, when computing horizontal/vertical correlation. 1626 void av1_get_horver_correlation_full_c(const int16_t *diff, int stride, 1627 int width, int height, float *hcorr, 1628 float *vcorr) { 1629 // The following notation is used: 1630 // x - current pixel 1631 // y - left neighbor pixel 1632 // z - top neighbor pixel 1633 int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0; 1634 int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0; 1635 int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0; 1636 1637 // First, process horizontal correlation on just the first row 1638 x_sum += diff[0]; 1639 x2_sum += diff[0] * diff[0]; 1640 x_firstrow += diff[0]; 1641 x2_firstrow += diff[0] * diff[0]; 1642 for (int j = 1; j < width; ++j) { 1643 const int16_t x = diff[j]; 1644 const int16_t y = diff[j - 1]; 1645 x_sum += x; 1646 x_firstrow += x; 1647 x2_sum += x * x; 1648 x2_firstrow += x * x; 1649 xy_sum += x * y; 1650 } 1651 1652 // Process vertical correlation in the first column 1653 x_firstcol += diff[0]; 1654 x2_firstcol += diff[0] * diff[0]; 1655 for (int i = 1; i < height; ++i) { 1656 const int16_t x = diff[i * stride]; 1657 const int16_t z = diff[(i - 1) * stride]; 1658 x_sum += x; 1659 x_firstcol += x; 1660 x2_sum += x * x; 1661 x2_firstcol += x * x; 1662 xz_sum += x * z; 1663 } 1664 1665 // Now process horiz and vert correlation through the rest unit 1666 for (int i = 1; i < height; ++i) { 1667 for (int j = 1; j < width; ++j) { 1668 const int16_t x = diff[i * stride + j]; 1669 const int16_t y = diff[i * stride + j - 1]; 1670 const int16_t z = diff[(i - 1) * stride + j]; 1671 x_sum += x; 1672 x2_sum += x * x; 1673 xy_sum += x * y; 1674 xz_sum += x * z; 1675 } 1676 } 1677 1678 for (int j = 0; j < width; ++j) { 1679 x_finalrow += diff[(height - 1) * stride + j]; 1680 x2_finalrow += 1681 diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j]; 1682 } 1683 for (int i = 0; i < height; ++i) { 1684 x_finalcol += diff[i * stride + width - 1]; 1685 x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1]; 1686 } 1687 1688 int64_t xhor_sum = x_sum - x_finalcol; 1689 int64_t xver_sum = x_sum - x_finalrow; 1690 int64_t y_sum = x_sum - x_firstcol; 1691 int64_t z_sum = x_sum - x_firstrow; 1692 int64_t x2hor_sum = x2_sum - x2_finalcol; 1693 int64_t x2ver_sum = x2_sum - x2_finalrow; 1694 int64_t y2_sum = x2_sum - x2_firstcol; 1695 int64_t z2_sum = x2_sum - x2_firstrow; 1696 1697 const float num_hor = (float)(height * (width - 1)); 1698 const float num_ver = (float)((height - 1) * width); 1699 1700 const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor; 1701 const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver; 1702 1703 const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor; 1704 const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver; 1705 1706 const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor; 1707 const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver; 1708 1709 if (xhor_var_n > 0 && y_var_n > 0) { 1710 *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n); 1711 *hcorr = *hcorr < 0 ? 0 : *hcorr; 1712 } else { 1713 *hcorr = 1.0; 1714 } 1715 if (xver_var_n > 0 && z_var_n > 0) { 1716 *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n); 1717 *vcorr = *vcorr < 0 ? 0 : *vcorr; 1718 } else { 1719 *vcorr = 1.0; 1720 } 1721 } 1722 1723 // Transforms raw scores into a probability distribution across 16 TX types 1724 static void score_2D_transform_pow8(float *scores_2D, float shift) { 1725 float sum = 0.0f; 1726 int i; 1727 for (i = 0; i < 16; i++) { 1728 const float v = AOMMIN(AOMMAX(scores_2D[i] + shift, 0.0f), 100.0f); 1729 const float v2 = v * v; 1730 const float v4 = v2 * v2; 1731 scores_2D[i] = v4 * v4; 1732 sum += scores_2D[i]; 1733 } 1734 for (i = 0; i < 16; i++) { 1735 if (scores_2D[i] < sum * 1e-4) 1736 scores_2D[i] = 0.0f; 1737 else 1738 scores_2D[i] /= sum; 1739 } 1740 } 1741 1742 // These thresholds were calibrated to provide a certain number of TX types 1743 // pruned by the model on average, i.e. selecting a threshold with index i 1744 // will lead to pruning i+1 TX types on average 1745 static const float *prune_2D_adaptive_thresholds[] = { 1746 // TX_4X4 1747 (float[]){ 0.00549f, 0.01306f, 0.02039f, 0.02747f, 0.03406f, 0.04065f, 1748 0.04724f, 0.05383f, 0.06067f, 0.06799f, 0.07605f, 0.08533f, 1749 0.09778f, 0.11780f }, 1750 // TX_8X8 1751 (float[]){ 0.00037f, 0.00183f, 0.00525f, 0.01038f, 0.01697f, 0.02502f, 1752 0.03381f, 0.04333f, 0.05286f, 0.06287f, 0.07434f, 0.08850f, 1753 0.10803f, 0.14124f }, 1754 // TX_16X16 1755 (float[]){ 0.01404f, 0.02820f, 0.04211f, 0.05164f, 0.05798f, 0.06335f, 1756 0.06897f, 0.07629f, 0.08875f, 0.11169f }, 1757 // TX_32X32 1758 NULL, 1759 // TX_64X64 1760 NULL, 1761 // TX_4X8 1762 (float[]){ 0.00183f, 0.00745f, 0.01428f, 0.02185f, 0.02966f, 0.03723f, 1763 0.04456f, 0.05188f, 0.05920f, 0.06702f, 0.07605f, 0.08704f, 1764 0.10168f, 0.12585f }, 1765 // TX_8X4 1766 (float[]){ 0.00085f, 0.00476f, 0.01135f, 0.01892f, 0.02698f, 0.03528f, 1767 0.04358f, 0.05164f, 0.05994f, 0.06848f, 0.07849f, 0.09021f, 1768 0.10583f, 0.13123f }, 1769 // TX_8X16 1770 (float[]){ 0.00037f, 0.00232f, 0.00671f, 0.01257f, 0.01965f, 0.02722f, 1771 0.03552f, 0.04382f, 0.05237f, 0.06189f, 0.07336f, 0.08728f, 1772 0.10730f, 0.14221f }, 1773 // TX_16X8 1774 (float[]){ 0.00061f, 0.00330f, 0.00818f, 0.01453f, 0.02185f, 0.02966f, 1775 0.03772f, 0.04578f, 0.05383f, 0.06262f, 0.07288f, 0.08582f, 1776 0.10339f, 0.13464f }, 1777 // TX_16X32 1778 NULL, 1779 // TX_32X16 1780 NULL, 1781 // TX_32X64 1782 NULL, 1783 // TX_64X32 1784 NULL, 1785 // TX_4X16 1786 (float[]){ 0.00232f, 0.00671f, 0.01257f, 0.01941f, 0.02673f, 0.03430f, 1787 0.04211f, 0.04968f, 0.05750f, 0.06580f, 0.07507f, 0.08655f, 1788 0.10242f, 0.12878f }, 1789 // TX_16X4 1790 (float[]){ 0.00110f, 0.00525f, 0.01208f, 0.01990f, 0.02795f, 0.03601f, 1791 0.04358f, 0.05115f, 0.05896f, 0.06702f, 0.07629f, 0.08752f, 1792 0.10217f, 0.12610f }, 1793 // TX_8X32 1794 NULL, 1795 // TX_32X8 1796 NULL, 1797 // TX_16X64 1798 NULL, 1799 // TX_64X16 1800 NULL, 1801 }; 1802 1803 static uint16_t prune_tx_2D(MACROBLOCK *x, BLOCK_SIZE bsize, TX_SIZE tx_size, 1804 int blk_row, int blk_col, TxSetType tx_set_type, 1805 TX_TYPE_PRUNE_MODE prune_mode) { 1806 static const int tx_type_table_2D[16] = { 1807 DCT_DCT, DCT_ADST, DCT_FLIPADST, V_DCT, 1808 ADST_DCT, ADST_ADST, ADST_FLIPADST, V_ADST, 1809 FLIPADST_DCT, FLIPADST_ADST, FLIPADST_FLIPADST, V_FLIPADST, 1810 H_DCT, H_ADST, H_FLIPADST, IDTX 1811 }; 1812 if (tx_set_type != EXT_TX_SET_ALL16 && 1813 tx_set_type != EXT_TX_SET_DTT9_IDTX_1DDCT) 1814 return 0; 1815 const NN_CONFIG *nn_config_hor = av1_tx_type_nnconfig_map_hor[tx_size]; 1816 const NN_CONFIG *nn_config_ver = av1_tx_type_nnconfig_map_ver[tx_size]; 1817 if (!nn_config_hor || !nn_config_ver) return 0; // Model not established yet. 1818 1819 aom_clear_system_state(); 1820 float hfeatures[16], vfeatures[16]; 1821 float hscores[4], vscores[4]; 1822 float scores_2D[16]; 1823 const int bw = tx_size_wide[tx_size]; 1824 const int bh = tx_size_high[tx_size]; 1825 const int hfeatures_num = bw <= 8 ? bw : bw / 2; 1826 const int vfeatures_num = bh <= 8 ? bh : bh / 2; 1827 assert(hfeatures_num <= 16); 1828 assert(vfeatures_num <= 16); 1829 1830 const struct macroblock_plane *const p = &x->plane[0]; 1831 const int diff_stride = block_size_wide[bsize]; 1832 const int16_t *diff = p->src_diff + 4 * blk_row * diff_stride + 4 * blk_col; 1833 get_energy_distribution_finer(diff, diff_stride, bw, bh, hfeatures, 1834 vfeatures); 1835 av1_get_horver_correlation_full(diff, diff_stride, bw, bh, 1836 &hfeatures[hfeatures_num - 1], 1837 &vfeatures[vfeatures_num - 1]); 1838 av1_nn_predict(hfeatures, nn_config_hor, hscores); 1839 av1_nn_predict(vfeatures, nn_config_ver, vscores); 1840 aom_clear_system_state(); 1841 1842 float score_2D_average = 0.0f; 1843 for (int i = 0; i < 4; i++) { 1844 float *cur_scores_2D = scores_2D + i * 4; 1845 cur_scores_2D[0] = vscores[i] * hscores[0]; 1846 cur_scores_2D[1] = vscores[i] * hscores[1]; 1847 cur_scores_2D[2] = vscores[i] * hscores[2]; 1848 cur_scores_2D[3] = vscores[i] * hscores[3]; 1849 score_2D_average += cur_scores_2D[0] + cur_scores_2D[1] + cur_scores_2D[2] + 1850 cur_scores_2D[3]; 1851 } 1852 score_2D_average /= 16; 1853 1854 const int prune_aggr_table[2][2] = { { 6, 4 }, { 10, 7 } }; 1855 int pruning_aggressiveness = 1; 1856 if (tx_set_type == EXT_TX_SET_ALL16) { 1857 score_2D_transform_pow8(scores_2D, (10 - score_2D_average)); 1858 pruning_aggressiveness = 1859 prune_aggr_table[prune_mode - PRUNE_2D_ACCURATE][0]; 1860 } else if (tx_set_type == EXT_TX_SET_DTT9_IDTX_1DDCT) { 1861 score_2D_transform_pow8(scores_2D, (20 - score_2D_average)); 1862 pruning_aggressiveness = 1863 prune_aggr_table[prune_mode - PRUNE_2D_ACCURATE][1]; 1864 } 1865 1866 // Always keep the TX type with the highest score, prune all others with 1867 // score below score_thresh. 1868 int max_score_i = 0; 1869 float max_score = 0.0f; 1870 for (int i = 0; i < 16; i++) { 1871 if (scores_2D[i] > max_score && 1872 av1_ext_tx_used[tx_set_type][tx_type_table_2D[i]]) { 1873 max_score = scores_2D[i]; 1874 max_score_i = i; 1875 } 1876 } 1877 1878 const float score_thresh = 1879 prune_2D_adaptive_thresholds[tx_size][pruning_aggressiveness - 1]; 1880 1881 uint16_t prune_bitmask = 0; 1882 for (int i = 0; i < 16; i++) { 1883 if (scores_2D[i] < score_thresh && i != max_score_i) 1884 prune_bitmask |= (1 << tx_type_table_2D[i]); 1885 } 1886 return prune_bitmask; 1887 } 1888 1889 // ((prune >> vtx_tab[tx_type]) & 1) 1890 static const uint16_t prune_v_mask[] = { 1891 0x0000, 0x0425, 0x108a, 0x14af, 0x4150, 0x4575, 0x51da, 0x55ff, 1892 0xaa00, 0xae25, 0xba8a, 0xbeaf, 0xeb50, 0xef75, 0xfbda, 0xffff, 1893 }; 1894 1895 // ((prune >> (htx_tab[tx_type] + 8)) & 1) 1896 static const uint16_t prune_h_mask[] = { 1897 0x0000, 0x0813, 0x210c, 0x291f, 0x80e0, 0x88f3, 0xa1ec, 0xa9ff, 1898 0x5600, 0x5e13, 0x770c, 0x7f1f, 0xd6e0, 0xdef3, 0xf7ec, 0xffff, 1899 }; 1900 1901 static INLINE uint16_t gen_tx_search_prune_mask(int tx_search_prune) { 1902 uint8_t prune_v = tx_search_prune & 0x0F; 1903 uint8_t prune_h = (tx_search_prune >> 8) & 0x0F; 1904 return (prune_v_mask[prune_v] & prune_h_mask[prune_h]); 1905 } 1906 1907 static void prune_tx(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, 1908 const MACROBLOCKD *const xd, int tx_set_type) { 1909 x->tx_search_prune[tx_set_type] = 0; 1910 x->tx_split_prune_flag = 0; 1911 const MB_MODE_INFO *mbmi = xd->mi[0]; 1912 const int is_inter = is_inter_block(mbmi); 1913 if ((is_inter && cpi->oxcf.use_inter_dct_only) || 1914 (!is_inter && cpi->oxcf.use_intra_dct_only)) { 1915 x->tx_search_prune[tx_set_type] = ~(1 << DCT_DCT); 1916 return; 1917 } 1918 if (!is_inter || cpi->sf.tx_type_search.prune_mode == NO_PRUNE || 1919 x->use_default_inter_tx_type || xd->lossless[mbmi->segment_id] || 1920 x->cb_partition_scan) 1921 return; 1922 int tx_set = ext_tx_set_index[1][tx_set_type]; 1923 assert(tx_set >= 0); 1924 const int *tx_set_1D = ext_tx_used_inter_1D[tx_set]; 1925 int prune = 0; 1926 switch (cpi->sf.tx_type_search.prune_mode) { 1927 case NO_PRUNE: return; 1928 case PRUNE_ONE: 1929 if (!(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) return; 1930 prune = prune_one_for_sby(cpi, bsize, x, xd); 1931 x->tx_search_prune[tx_set_type] = gen_tx_search_prune_mask(prune); 1932 break; 1933 case PRUNE_TWO: 1934 if (!(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) { 1935 if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return; 1936 prune = prune_two_for_sby(cpi, bsize, x, xd, 0, 1); 1937 } else if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) { 1938 prune = prune_two_for_sby(cpi, bsize, x, xd, 1, 0); 1939 } else { 1940 prune = prune_two_for_sby(cpi, bsize, x, xd, 1, 1); 1941 } 1942 x->tx_search_prune[tx_set_type] = gen_tx_search_prune_mask(prune); 1943 break; 1944 case PRUNE_2D_ACCURATE: 1945 case PRUNE_2D_FAST: break; 1946 default: assert(0); 1947 } 1948 } 1949 1950 static void model_rd_from_sse(const AV1_COMP *const cpi, 1951 const MACROBLOCK *const x, BLOCK_SIZE plane_bsize, 1952 int plane, int64_t sse, int num_samples, 1953 int *rate, int64_t *dist) { 1954 (void)num_samples; 1955 const MACROBLOCKD *const xd = &x->e_mbd; 1956 const struct macroblockd_plane *const pd = &xd->plane[plane]; 1957 const int dequant_shift = (is_cur_buf_hbd(xd)) ? xd->bd - 5 : 3; 1958 1959 // Fast approximate the modelling function. 1960 if (cpi->sf.simple_model_rd_from_var) { 1961 const int64_t square_error = sse; 1962 int quantizer = pd->dequant_Q3[1] >> dequant_shift; 1963 if (quantizer < 120) 1964 *rate = (int)AOMMIN( 1965 (square_error * (280 - quantizer)) >> (16 - AV1_PROB_COST_SHIFT), 1966 INT_MAX); 1967 else 1968 *rate = 0; 1969 assert(*rate >= 0); 1970 *dist = (square_error * quantizer) >> 8; 1971 } else { 1972 av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[plane_bsize], 1973 pd->dequant_Q3[1] >> dequant_shift, rate, 1974 dist); 1975 } 1976 *dist <<= 4; 1977 } 1978 1979 static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x) { 1980 const AV1_COMMON *cm = &cpi->common; 1981 const int num_planes = av1_num_planes(cm); 1982 const MACROBLOCKD *xd = &x->e_mbd; 1983 const MB_MODE_INFO *mbmi = xd->mi[0]; 1984 int64_t total_sse = 0; 1985 for (int plane = 0; plane < num_planes; ++plane) { 1986 const struct macroblock_plane *const p = &x->plane[plane]; 1987 const struct macroblockd_plane *const pd = &xd->plane[plane]; 1988 const BLOCK_SIZE bs = get_plane_block_size(mbmi->sb_type, pd->subsampling_x, 1989 pd->subsampling_y); 1990 unsigned int sse; 1991 1992 if (x->skip_chroma_rd && plane) continue; 1993 1994 cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, 1995 &sse); 1996 total_sse += sse; 1997 } 1998 total_sse <<= 4; 1999 return total_sse; 2000 } 2001 2002 static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize, 2003 MACROBLOCK *x, MACROBLOCKD *xd, int plane_from, 2004 int plane_to, int mi_row, int mi_col, 2005 int *out_rate_sum, int64_t *out_dist_sum, 2006 int *skip_txfm_sb, int64_t *skip_sse_sb, 2007 int *plane_rate, int64_t *plane_sse, 2008 int64_t *plane_dist) { 2009 // Note our transform coeffs are 8 times an orthogonal transform. 2010 // Hence quantizer step is also 8 times. To get effective quantizer 2011 // we need to divide by 8 before sending to modeling function. 2012 int plane; 2013 (void)mi_row; 2014 (void)mi_col; 2015 const int ref = xd->mi[0]->ref_frame[0]; 2016 2017 int64_t rate_sum = 0; 2018 int64_t dist_sum = 0; 2019 int64_t total_sse = 0; 2020 2021 for (plane = plane_from; plane <= plane_to; ++plane) { 2022 struct macroblock_plane *const p = &x->plane[plane]; 2023 struct macroblockd_plane *const pd = &xd->plane[plane]; 2024 const BLOCK_SIZE plane_bsize = 2025 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); 2026 const int bw = block_size_wide[plane_bsize]; 2027 const int bh = block_size_high[plane_bsize]; 2028 int64_t sse; 2029 int rate; 2030 int64_t dist; 2031 2032 if (x->skip_chroma_rd && plane) continue; 2033 2034 if (is_cur_buf_hbd(xd)) { 2035 sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf, 2036 pd->dst.stride, bw, bh); 2037 } else { 2038 sse = aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw, 2039 bh); 2040 } 2041 sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2); 2042 2043 model_rd_from_sse(cpi, x, plane_bsize, plane, sse, bw * bh, &rate, &dist); 2044 2045 if (plane == 0) x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX); 2046 2047 total_sse += sse; 2048 rate_sum += rate; 2049 dist_sum += dist; 2050 if (plane_rate) plane_rate[plane] = rate; 2051 if (plane_sse) plane_sse[plane] = sse; 2052 if (plane_dist) plane_dist[plane] = dist; 2053 assert(rate_sum >= 0); 2054 } 2055 2056 if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0; 2057 if (skip_sse_sb) *skip_sse_sb = total_sse << 4; 2058 rate_sum = AOMMIN(rate_sum, INT_MAX); 2059 *out_rate_sum = (int)rate_sum; 2060 *out_dist_sum = dist_sum; 2061 } 2062 2063 int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, 2064 intptr_t block_size, int64_t *ssz) { 2065 int i; 2066 int64_t error = 0, sqcoeff = 0; 2067 2068 for (i = 0; i < block_size; i++) { 2069 const int diff = coeff[i] - dqcoeff[i]; 2070 error += diff * diff; 2071 sqcoeff += coeff[i] * coeff[i]; 2072 } 2073 2074 *ssz = sqcoeff; 2075 return error; 2076 } 2077 2078 int64_t av1_highbd_block_error_c(const tran_low_t *coeff, 2079 const tran_low_t *dqcoeff, intptr_t block_size, 2080 int64_t *ssz, int bd) { 2081 int i; 2082 int64_t error = 0, sqcoeff = 0; 2083 int shift = 2 * (bd - 8); 2084 int rounding = shift > 0 ? 1 << (shift - 1) : 0; 2085 2086 for (i = 0; i < block_size; i++) { 2087 const int64_t diff = coeff[i] - dqcoeff[i]; 2088 error += diff * diff; 2089 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i]; 2090 } 2091 assert(error >= 0 && sqcoeff >= 0); 2092 error = (error + rounding) >> shift; 2093 sqcoeff = (sqcoeff + rounding) >> shift; 2094 2095 *ssz = sqcoeff; 2096 return error; 2097 } 2098 2099 // Get transform block visible dimensions cropped to the MI units. 2100 static void get_txb_dimensions(const MACROBLOCKD *xd, int plane, 2101 BLOCK_SIZE plane_bsize, int blk_row, int blk_col, 2102 BLOCK_SIZE tx_bsize, int *width, int *height, 2103 int *visible_width, int *visible_height) { 2104 assert(tx_bsize <= plane_bsize); 2105 int txb_height = block_size_high[tx_bsize]; 2106 int txb_width = block_size_wide[tx_bsize]; 2107 const int block_height = block_size_high[plane_bsize]; 2108 const int block_width = block_size_wide[plane_bsize]; 2109 const struct macroblockd_plane *const pd = &xd->plane[plane]; 2110 // TODO(aconverse (at) google.com): Investigate using crop_width/height here rather 2111 // than the MI size 2112 const int block_rows = 2113 (xd->mb_to_bottom_edge >= 0) 2114 ? block_height 2115 : (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + block_height; 2116 const int block_cols = 2117 (xd->mb_to_right_edge >= 0) 2118 ? block_width 2119 : (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + block_width; 2120 const int tx_unit_size = tx_size_wide_log2[0]; 2121 if (width) *width = txb_width; 2122 if (height) *height = txb_height; 2123 *visible_width = clamp(block_cols - (blk_col << tx_unit_size), 0, txb_width); 2124 *visible_height = 2125 clamp(block_rows - (blk_row << tx_unit_size), 0, txb_height); 2126 } 2127 2128 // Compute the pixel domain distortion from src and dst on all visible 4x4s in 2129 // the 2130 // transform block. 2131 static unsigned pixel_dist(const AV1_COMP *const cpi, const MACROBLOCK *x, 2132 int plane, const uint8_t *src, const int src_stride, 2133 const uint8_t *dst, const int dst_stride, 2134 int blk_row, int blk_col, 2135 const BLOCK_SIZE plane_bsize, 2136 const BLOCK_SIZE tx_bsize) { 2137 int txb_rows, txb_cols, visible_rows, visible_cols; 2138 const MACROBLOCKD *xd = &x->e_mbd; 2139 2140 get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, 2141 &txb_cols, &txb_rows, &visible_cols, &visible_rows); 2142 assert(visible_rows > 0); 2143 assert(visible_cols > 0); 2144 2145 #if CONFIG_DIST_8X8 2146 if (x->using_dist_8x8 && plane == 0) 2147 return (unsigned)av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride, 2148 tx_bsize, txb_cols, txb_rows, visible_cols, 2149 visible_rows, x->qindex); 2150 #endif // CONFIG_DIST_8X8 2151 2152 unsigned sse = pixel_dist_visible_only(cpi, x, src, src_stride, dst, 2153 dst_stride, tx_bsize, txb_rows, 2154 txb_cols, visible_rows, visible_cols); 2155 2156 return sse; 2157 } 2158 2159 // Compute the pixel domain distortion from diff on all visible 4x4s in the 2160 // transform block. 2161 static INLINE int64_t pixel_diff_dist(const MACROBLOCK *x, int plane, 2162 int blk_row, int blk_col, 2163 const BLOCK_SIZE plane_bsize, 2164 const BLOCK_SIZE tx_bsize, 2165 unsigned int *block_mse_q8) { 2166 int visible_rows, visible_cols; 2167 const MACROBLOCKD *xd = &x->e_mbd; 2168 get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, NULL, 2169 NULL, &visible_cols, &visible_rows); 2170 const int diff_stride = block_size_wide[plane_bsize]; 2171 const int16_t *diff = x->plane[plane].src_diff; 2172 #if CONFIG_DIST_8X8 2173 int txb_height = block_size_high[tx_bsize]; 2174 int txb_width = block_size_wide[tx_bsize]; 2175 if (x->using_dist_8x8 && plane == 0) { 2176 const int src_stride = x->plane[plane].src.stride; 2177 const int src_idx = (blk_row * src_stride + blk_col) 2178 << tx_size_wide_log2[0]; 2179 const int diff_idx = (blk_row * diff_stride + blk_col) 2180 << tx_size_wide_log2[0]; 2181 const uint8_t *src = &x->plane[plane].src.buf[src_idx]; 2182 return dist_8x8_diff(x, src, src_stride, diff + diff_idx, diff_stride, 2183 txb_width, txb_height, visible_cols, visible_rows, 2184 x->qindex); 2185 } 2186 #endif 2187 diff += ((blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]); 2188 uint64_t sse = 2189 aom_sum_squares_2d_i16(diff, diff_stride, visible_cols, visible_rows); 2190 if (block_mse_q8 != NULL) 2191 *block_mse_q8 = (unsigned int)((256 * sse) / (visible_cols * visible_rows)); 2192 return sse; 2193 } 2194 2195 int av1_count_colors(const uint8_t *src, int stride, int rows, int cols, 2196 int *val_count) { 2197 const int max_pix_val = 1 << 8; 2198 memset(val_count, 0, max_pix_val * sizeof(val_count[0])); 2199 for (int r = 0; r < rows; ++r) { 2200 for (int c = 0; c < cols; ++c) { 2201 const int this_val = src[r * stride + c]; 2202 assert(this_val < max_pix_val); 2203 ++val_count[this_val]; 2204 } 2205 } 2206 int n = 0; 2207 for (int i = 0; i < max_pix_val; ++i) { 2208 if (val_count[i]) ++n; 2209 } 2210 return n; 2211 } 2212 2213 int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols, 2214 int bit_depth, int *val_count) { 2215 assert(bit_depth <= 12); 2216 const int max_pix_val = 1 << bit_depth; 2217 const uint16_t *src = CONVERT_TO_SHORTPTR(src8); 2218 memset(val_count, 0, max_pix_val * sizeof(val_count[0])); 2219 for (int r = 0; r < rows; ++r) { 2220 for (int c = 0; c < cols; ++c) { 2221 const int this_val = src[r * stride + c]; 2222 assert(this_val < max_pix_val); 2223 if (this_val >= max_pix_val) return 0; 2224 ++val_count[this_val]; 2225 } 2226 } 2227 int n = 0; 2228 for (int i = 0; i < max_pix_val; ++i) { 2229 if (val_count[i]) ++n; 2230 } 2231 return n; 2232 } 2233 2234 static void inverse_transform_block_facade(MACROBLOCKD *xd, int plane, 2235 int block, int blk_row, int blk_col, 2236 int eob, int reduced_tx_set) { 2237 struct macroblockd_plane *const pd = &xd->plane[plane]; 2238 tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 2239 const PLANE_TYPE plane_type = get_plane_type(plane); 2240 const TX_SIZE tx_size = av1_get_tx_size(plane, xd); 2241 const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, 2242 tx_size, reduced_tx_set); 2243 const int dst_stride = pd->dst.stride; 2244 uint8_t *dst = 2245 &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]]; 2246 av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, dst, 2247 dst_stride, eob, reduced_tx_set); 2248 } 2249 2250 static int find_tx_size_rd_info(TXB_RD_RECORD *cur_record, const uint32_t hash); 2251 2252 static uint32_t get_intra_txb_hash(MACROBLOCK *x, int plane, int blk_row, 2253 int blk_col, BLOCK_SIZE plane_bsize, 2254 TX_SIZE tx_size) { 2255 int16_t tmp_data[64 * 64]; 2256 const int diff_stride = block_size_wide[plane_bsize]; 2257 const int16_t *diff = x->plane[plane].src_diff; 2258 const int16_t *cur_diff_row = diff + 4 * blk_row * diff_stride + 4 * blk_col; 2259 const int txb_w = tx_size_wide[tx_size]; 2260 const int txb_h = tx_size_high[tx_size]; 2261 uint8_t *hash_data = (uint8_t *)cur_diff_row; 2262 if (txb_w != diff_stride) { 2263 int16_t *cur_hash_row = tmp_data; 2264 for (int i = 0; i < txb_h; i++) { 2265 memcpy(cur_hash_row, cur_diff_row, sizeof(*diff) * txb_w); 2266 cur_hash_row += txb_w; 2267 cur_diff_row += diff_stride; 2268 } 2269 hash_data = (uint8_t *)tmp_data; 2270 } 2271 CRC32C *crc = &x->mb_rd_record.crc_calculator; 2272 const uint32_t hash = av1_get_crc32c_value(crc, hash_data, 2 * txb_w * txb_h); 2273 return (hash << 5) + tx_size; 2274 } 2275 2276 static INLINE void dist_block_tx_domain(MACROBLOCK *x, int plane, int block, 2277 TX_SIZE tx_size, int64_t *out_dist, 2278 int64_t *out_sse) { 2279 MACROBLOCKD *const xd = &x->e_mbd; 2280 const struct macroblock_plane *const p = &x->plane[plane]; 2281 const struct macroblockd_plane *const pd = &xd->plane[plane]; 2282 // Transform domain distortion computation is more efficient as it does 2283 // not involve an inverse transform, but it is less accurate. 2284 const int buffer_length = av1_get_max_eob(tx_size); 2285 int64_t this_sse; 2286 // TX-domain results need to shift down to Q2/D10 to match pixel 2287 // domain distortion values which are in Q2^2 2288 int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2; 2289 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); 2290 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 2291 2292 if (is_cur_buf_hbd(xd)) 2293 *out_dist = av1_highbd_block_error(coeff, dqcoeff, buffer_length, &this_sse, 2294 xd->bd); 2295 else 2296 *out_dist = av1_block_error(coeff, dqcoeff, buffer_length, &this_sse); 2297 2298 *out_dist = RIGHT_SIGNED_SHIFT(*out_dist, shift); 2299 *out_sse = RIGHT_SIGNED_SHIFT(this_sse, shift); 2300 } 2301 2302 static INLINE int64_t dist_block_px_domain(const AV1_COMP *cpi, MACROBLOCK *x, 2303 int plane, BLOCK_SIZE plane_bsize, 2304 int block, int blk_row, int blk_col, 2305 TX_SIZE tx_size) { 2306 MACROBLOCKD *const xd = &x->e_mbd; 2307 const struct macroblock_plane *const p = &x->plane[plane]; 2308 const struct macroblockd_plane *const pd = &xd->plane[plane]; 2309 const uint16_t eob = p->eobs[block]; 2310 const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size]; 2311 const int bsw = block_size_wide[tx_bsize]; 2312 const int bsh = block_size_high[tx_bsize]; 2313 const int src_stride = x->plane[plane].src.stride; 2314 const int dst_stride = xd->plane[plane].dst.stride; 2315 // Scale the transform block index to pixel unit. 2316 const int src_idx = (blk_row * src_stride + blk_col) << tx_size_wide_log2[0]; 2317 const int dst_idx = (blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]; 2318 const uint8_t *src = &x->plane[plane].src.buf[src_idx]; 2319 const uint8_t *dst = &xd->plane[plane].dst.buf[dst_idx]; 2320 const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 2321 2322 assert(cpi != NULL); 2323 assert(tx_size_wide_log2[0] == tx_size_high_log2[0]); 2324 2325 uint8_t *recon; 2326 DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]); 2327 2328 if (is_cur_buf_hbd(xd)) { 2329 recon = CONVERT_TO_BYTEPTR(recon16); 2330 av1_highbd_convolve_2d_copy_sr(CONVERT_TO_SHORTPTR(dst), dst_stride, 2331 CONVERT_TO_SHORTPTR(recon), MAX_TX_SIZE, bsw, 2332 bsh, NULL, NULL, 0, 0, NULL, xd->bd); 2333 } else { 2334 recon = (uint8_t *)recon16; 2335 av1_convolve_2d_copy_sr(dst, dst_stride, recon, MAX_TX_SIZE, bsw, bsh, NULL, 2336 NULL, 0, 0, NULL); 2337 } 2338 2339 const PLANE_TYPE plane_type = get_plane_type(plane); 2340 TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, tx_size, 2341 cpi->common.reduced_tx_set_used); 2342 av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, recon, 2343 MAX_TX_SIZE, eob, 2344 cpi->common.reduced_tx_set_used); 2345 2346 return 16 * pixel_dist(cpi, x, plane, src, src_stride, recon, MAX_TX_SIZE, 2347 blk_row, blk_col, plane_bsize, tx_bsize); 2348 } 2349 2350 static double get_diff_mean(const uint8_t *src, int src_stride, 2351 const uint8_t *dst, int dst_stride, int w, int h) { 2352 double sum = 0.0; 2353 for (int j = 0; j < h; ++j) { 2354 for (int i = 0; i < w; ++i) { 2355 const int diff = src[j * src_stride + i] - dst[j * dst_stride + i]; 2356 sum += diff; 2357 } 2358 } 2359 assert(w > 0 && h > 0); 2360 return sum / (w * h); 2361 } 2362 2363 static double get_highbd_diff_mean(const uint8_t *src8, int src_stride, 2364 const uint8_t *dst8, int dst_stride, int w, 2365 int h) { 2366 const uint16_t *src = CONVERT_TO_SHORTPTR(src8); 2367 const uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); 2368 double sum = 0.0; 2369 for (int j = 0; j < h; ++j) { 2370 for (int i = 0; i < w; ++i) { 2371 const int diff = src[j * src_stride + i] - dst[j * dst_stride + i]; 2372 sum += diff; 2373 } 2374 } 2375 assert(w > 0 && h > 0); 2376 return sum / (w * h); 2377 } 2378 2379 static double get_sse_norm(const int16_t *diff, int stride, int w, int h) { 2380 double sum = 0.0; 2381 for (int j = 0; j < h; ++j) { 2382 for (int i = 0; i < w; ++i) { 2383 const int err = diff[j * stride + i]; 2384 sum += err * err; 2385 } 2386 } 2387 assert(w > 0 && h > 0); 2388 return sum / (w * h); 2389 } 2390 2391 static double get_sad_norm(const int16_t *diff, int stride, int w, int h) { 2392 double sum = 0.0; 2393 for (int j = 0; j < h; ++j) { 2394 for (int i = 0; i < w; ++i) { 2395 sum += abs(diff[j * stride + i]); 2396 } 2397 } 2398 assert(w > 0 && h > 0); 2399 return sum / (w * h); 2400 } 2401 2402 static void get_2x2_normalized_sses_and_sads( 2403 const AV1_COMP *const cpi, BLOCK_SIZE tx_bsize, const uint8_t *const src, 2404 int src_stride, const uint8_t *const dst, int dst_stride, 2405 const int16_t *const src_diff, int diff_stride, double *const sse_norm_arr, 2406 double *const sad_norm_arr) { 2407 const BLOCK_SIZE tx_bsize_half = 2408 get_partition_subsize(tx_bsize, PARTITION_SPLIT); 2409 if (tx_bsize_half == BLOCK_INVALID) { // manually calculate stats 2410 const int half_width = block_size_wide[tx_bsize] / 2; 2411 const int half_height = block_size_high[tx_bsize] / 2; 2412 for (int row = 0; row < 2; ++row) { 2413 for (int col = 0; col < 2; ++col) { 2414 const int16_t *const this_src_diff = 2415 src_diff + row * half_height * diff_stride + col * half_width; 2416 if (sse_norm_arr) { 2417 sse_norm_arr[row * 2 + col] = 2418 get_sse_norm(this_src_diff, diff_stride, half_width, half_height); 2419 } 2420 if (sad_norm_arr) { 2421 sad_norm_arr[row * 2 + col] = 2422 get_sad_norm(this_src_diff, diff_stride, half_width, half_height); 2423 } 2424 } 2425 } 2426 } else { // use function pointers to calculate stats 2427 const int half_width = block_size_wide[tx_bsize_half]; 2428 const int half_height = block_size_high[tx_bsize_half]; 2429 const int num_samples_half = half_width * half_height; 2430 for (int row = 0; row < 2; ++row) { 2431 for (int col = 0; col < 2; ++col) { 2432 const uint8_t *const this_src = 2433 src + row * half_height * src_stride + col * half_width; 2434 const uint8_t *const this_dst = 2435 dst + row * half_height * dst_stride + col * half_width; 2436 2437 if (sse_norm_arr) { 2438 unsigned int this_sse; 2439 cpi->fn_ptr[tx_bsize_half].vf(this_src, src_stride, this_dst, 2440 dst_stride, &this_sse); 2441 sse_norm_arr[row * 2 + col] = (double)this_sse / num_samples_half; 2442 } 2443 2444 if (sad_norm_arr) { 2445 const unsigned int this_sad = cpi->fn_ptr[tx_bsize_half].sdf( 2446 this_src, src_stride, this_dst, dst_stride); 2447 sad_norm_arr[row * 2 + col] = (double)this_sad / num_samples_half; 2448 } 2449 } 2450 } 2451 } 2452 } 2453 2454 // NOTE: CONFIG_COLLECT_RD_STATS has 3 possible values 2455 // 0: Do not collect any RD stats 2456 // 1: Collect RD stats for transform units 2457 // 2: Collect RD stats for partition units 2458 #if CONFIG_COLLECT_RD_STATS 2459 2460 #if CONFIG_COLLECT_RD_STATS == 1 2461 static double get_mean(const int16_t *diff, int stride, int w, int h) { 2462 double sum = 0.0; 2463 for (int j = 0; j < h; ++j) { 2464 for (int i = 0; i < w; ++i) { 2465 sum += diff[j * stride + i]; 2466 } 2467 } 2468 assert(w > 0 && h > 0); 2469 return sum / (w * h); 2470 } 2471 2472 static void PrintTransformUnitStats(const AV1_COMP *const cpi, MACROBLOCK *x, 2473 const RD_STATS *const rd_stats, int blk_row, 2474 int blk_col, BLOCK_SIZE plane_bsize, 2475 TX_SIZE tx_size, TX_TYPE tx_type, 2476 int64_t rd) { 2477 if (rd_stats->rate == INT_MAX || rd_stats->dist == INT64_MAX) return; 2478 2479 // Generate small sample to restrict output size. 2480 static unsigned int seed = 21743; 2481 if (lcg_rand16(&seed) % 256 > 0) return; 2482 2483 const char output_file[] = "tu_stats.txt"; 2484 FILE *fout = fopen(output_file, "a"); 2485 if (!fout) return; 2486 2487 const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size]; 2488 const MACROBLOCKD *const xd = &x->e_mbd; 2489 const int plane = 0; 2490 struct macroblock_plane *const p = &x->plane[plane]; 2491 const struct macroblockd_plane *const pd = &xd->plane[plane]; 2492 const int txw = tx_size_wide[tx_size]; 2493 const int txh = tx_size_high[tx_size]; 2494 const int dequant_shift = (is_cur_buf_hbd(xd)) ? xd->bd - 5 : 3; 2495 const int q_step = pd->dequant_Q3[1] >> dequant_shift; 2496 const int num_samples = txw * txh; 2497 2498 const double rate_norm = (double)rd_stats->rate / num_samples; 2499 const double dist_norm = (double)rd_stats->dist / num_samples; 2500 2501 fprintf(fout, "%g %g", rate_norm, dist_norm); 2502 2503 const int src_stride = p->src.stride; 2504 const uint8_t *const src = 2505 &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]]; 2506 const int dst_stride = pd->dst.stride; 2507 const uint8_t *const dst = 2508 &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]]; 2509 unsigned int sse; 2510 cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse); 2511 const double sse_norm = (double)sse / num_samples; 2512 2513 const unsigned int sad = 2514 cpi->fn_ptr[tx_bsize].sdf(src, src_stride, dst, dst_stride); 2515 const double sad_norm = (double)sad / num_samples; 2516 2517 fprintf(fout, " %g %g", sse_norm, sad_norm); 2518 2519 const int diff_stride = block_size_wide[plane_bsize]; 2520 const int16_t *const src_diff = 2521 &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]]; 2522 2523 double sse_norm_arr[4], sad_norm_arr[4]; 2524 get_2x2_normalized_sses_and_sads(cpi, tx_bsize, src, src_stride, dst, 2525 dst_stride, src_diff, diff_stride, 2526 sse_norm_arr, sad_norm_arr); 2527 for (int i = 0; i < 4; ++i) { 2528 fprintf(fout, " %g", sse_norm_arr[i]); 2529 } 2530 for (int i = 0; i < 4; ++i) { 2531 fprintf(fout, " %g", sad_norm_arr[i]); 2532 } 2533 2534 const TX_TYPE_1D tx_type_1d_row = htx_tab[tx_type]; 2535 const TX_TYPE_1D tx_type_1d_col = vtx_tab[tx_type]; 2536 2537 fprintf(fout, " %d %d %d %d %d", q_step, tx_size_wide[tx_size], 2538 tx_size_high[tx_size], tx_type_1d_row, tx_type_1d_col); 2539 2540 int model_rate; 2541 int64_t model_dist; 2542 model_rd_sse_fn[MODELRD_CURVFIT](cpi, x, tx_bsize, plane, sse, num_samples, 2543 &model_rate, &model_dist); 2544 const double model_rate_norm = (double)model_rate / num_samples; 2545 const double model_dist_norm = (double)model_dist / num_samples; 2546 fprintf(fout, " %g %g", model_rate_norm, model_dist_norm); 2547 2548 const double mean = get_mean(src_diff, diff_stride, txw, txh); 2549 float hor_corr, vert_corr; 2550 av1_get_horver_correlation_full(src_diff, diff_stride, txw, txh, &hor_corr, 2551 &vert_corr); 2552 fprintf(fout, " %g %g %g", mean, hor_corr, vert_corr); 2553 2554 double hdist[4] = { 0 }, vdist[4] = { 0 }; 2555 get_energy_distribution_fine(cpi, tx_bsize, src, src_stride, dst, dst_stride, 2556 1, hdist, vdist); 2557 fprintf(fout, " %g %g %g %g %g %g %g %g", hdist[0], hdist[1], hdist[2], 2558 hdist[3], vdist[0], vdist[1], vdist[2], vdist[3]); 2559 2560 fprintf(fout, " %d %" PRId64, x->rdmult, rd); 2561 2562 fprintf(fout, "\n"); 2563 fclose(fout); 2564 } 2565 #endif // CONFIG_COLLECT_RD_STATS == 1 2566 2567 #if CONFIG_COLLECT_RD_STATS >= 2 2568 static void PrintPredictionUnitStats(const AV1_COMP *const cpi, 2569 const TileDataEnc *tile_data, 2570 MACROBLOCK *x, 2571 const RD_STATS *const rd_stats, 2572 BLOCK_SIZE plane_bsize) { 2573 if (rd_stats->invalid_rate) return; 2574 if (rd_stats->rate == INT_MAX || rd_stats->dist == INT64_MAX) return; 2575 2576 if (cpi->sf.inter_mode_rd_model_estimation == 1 && 2577 (tile_data == NULL || 2578 !tile_data->inter_mode_rd_models[plane_bsize].ready)) 2579 return; 2580 (void)tile_data; 2581 // Generate small sample to restrict output size. 2582 static unsigned int seed = 95014; 2583 2584 if ((lcg_rand16(&seed) % (1 << (14 - num_pels_log2_lookup[plane_bsize]))) != 2585 1) 2586 return; 2587 2588 const char output_file[] = "pu_stats.txt"; 2589 FILE *fout = fopen(output_file, "a"); 2590 if (!fout) return; 2591 2592 const MACROBLOCKD *const xd = &x->e_mbd; 2593 const int plane = 0; 2594 struct macroblock_plane *const p = &x->plane[plane]; 2595 const struct macroblockd_plane *const pd = &xd->plane[plane]; 2596 const int diff_stride = block_size_wide[plane_bsize]; 2597 int bw, bh; 2598 get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL, &bw, 2599 &bh); 2600 const int num_samples = bw * bh; 2601 const int dequant_shift = (is_cur_buf_hbd(xd)) ? xd->bd - 5 : 3; 2602 const int q_step = pd->dequant_Q3[1] >> dequant_shift; 2603 2604 const double rate_norm = (double)rd_stats->rate / num_samples; 2605 const double dist_norm = (double)rd_stats->dist / num_samples; 2606 const double rdcost_norm = 2607 (double)RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) / num_samples; 2608 2609 fprintf(fout, "%g %g %g", rate_norm, dist_norm, rdcost_norm); 2610 2611 const int src_stride = p->src.stride; 2612 const uint8_t *const src = p->src.buf; 2613 const int dst_stride = pd->dst.stride; 2614 const uint8_t *const dst = pd->dst.buf; 2615 const int16_t *const src_diff = p->src_diff; 2616 const int shift = (xd->bd - 8); 2617 2618 int64_t sse; 2619 if (is_cur_buf_hbd(xd)) { 2620 sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, 2621 bw, bh); 2622 } else { 2623 sse = 2624 aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw, bh); 2625 } 2626 sse = ROUND_POWER_OF_TWO(sse, shift * 2); 2627 const double sse_norm = (double)sse / num_samples; 2628 2629 const unsigned int sad = 2630 cpi->fn_ptr[plane_bsize].sdf(src, src_stride, dst, dst_stride); 2631 const double sad_norm = 2632 (double)sad / (1 << num_pels_log2_lookup[plane_bsize]); 2633 2634 fprintf(fout, " %g %g", sse_norm, sad_norm); 2635 2636 double sse_norm_arr[4], sad_norm_arr[4]; 2637 get_2x2_normalized_sses_and_sads(cpi, plane_bsize, src, src_stride, dst, 2638 dst_stride, src_diff, diff_stride, 2639 sse_norm_arr, sad_norm_arr); 2640 if (shift) { 2641 for (int k = 0; k < 4; ++k) sse_norm_arr[k] /= (1 << (2 * shift)); 2642 for (int k = 0; k < 4; ++k) sad_norm_arr[k] /= (1 << shift); 2643 } 2644 for (int i = 0; i < 4; ++i) { 2645 fprintf(fout, " %g", sse_norm_arr[i]); 2646 } 2647 for (int i = 0; i < 4; ++i) { 2648 fprintf(fout, " %g", sad_norm_arr[i]); 2649 } 2650 2651 fprintf(fout, " %d %d %d %d", q_step, x->rdmult, bw, bh); 2652 2653 int model_rate; 2654 int64_t model_dist; 2655 model_rd_sse_fn[MODELRD_CURVFIT](cpi, x, plane_bsize, plane, sse, num_samples, 2656 &model_rate, &model_dist); 2657 const double model_rdcost_norm = 2658 (double)RDCOST(x->rdmult, model_rate, model_dist) / num_samples; 2659 const double model_rate_norm = (double)model_rate / num_samples; 2660 const double model_dist_norm = (double)model_dist / num_samples; 2661 fprintf(fout, " %g %g %g", model_rate_norm, model_dist_norm, 2662 model_rdcost_norm); 2663 2664 double mean; 2665 if (is_cur_buf_hbd(xd)) { 2666 mean = get_highbd_diff_mean(p->src.buf, p->src.stride, pd->dst.buf, 2667 pd->dst.stride, bw, bh); 2668 } else { 2669 mean = get_diff_mean(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, 2670 bw, bh); 2671 } 2672 mean /= (1 << shift); 2673 float hor_corr, vert_corr; 2674 av1_get_horver_correlation_full(src_diff, diff_stride, bw, bh, &hor_corr, 2675 &vert_corr); 2676 fprintf(fout, " %g %g %g", mean, hor_corr, vert_corr); 2677 2678 double hdist[4] = { 0 }, vdist[4] = { 0 }; 2679 get_energy_distribution_fine(cpi, plane_bsize, src, src_stride, dst, 2680 dst_stride, 1, hdist, vdist); 2681 fprintf(fout, " %g %g %g %g %g %g %g %g", hdist[0], hdist[1], hdist[2], 2682 hdist[3], vdist[0], vdist[1], vdist[2], vdist[3]); 2683 2684 if (cpi->sf.inter_mode_rd_model_estimation == 1) { 2685 assert(tile_data->inter_mode_rd_models[plane_bsize].ready); 2686 const int64_t overall_sse = get_sse(cpi, x); 2687 int est_residue_cost = 0; 2688 int64_t est_dist = 0; 2689 get_est_rate_dist(tile_data, plane_bsize, overall_sse, &est_residue_cost, 2690 &est_dist); 2691 const double est_residue_cost_norm = (double)est_residue_cost / num_samples; 2692 const double est_dist_norm = (double)est_dist / num_samples; 2693 const double est_rdcost_norm = 2694 (double)RDCOST(x->rdmult, est_residue_cost, est_dist) / num_samples; 2695 fprintf(fout, " %g %g %g", est_residue_cost_norm, est_dist_norm, 2696 est_rdcost_norm); 2697 } 2698 2699 fprintf(fout, "\n"); 2700 fclose(fout); 2701 } 2702 #endif // CONFIG_COLLECT_RD_STATS >= 2 2703 #endif // CONFIG_COLLECT_RD_STATS 2704 2705 static void model_rd_with_dnn(const AV1_COMP *const cpi, 2706 const MACROBLOCK *const x, BLOCK_SIZE plane_bsize, 2707 int plane, int64_t sse, int num_samples, 2708 int *rate, int64_t *dist) { 2709 const MACROBLOCKD *const xd = &x->e_mbd; 2710 const struct macroblockd_plane *const pd = &xd->plane[plane]; 2711 const int log_numpels = num_pels_log2_lookup[plane_bsize]; 2712 2713 const int dequant_shift = (is_cur_buf_hbd(xd)) ? xd->bd - 5 : 3; 2714 const int q_step = AOMMAX(pd->dequant_Q3[1] >> dequant_shift, 1); 2715 2716 const struct macroblock_plane *const p = &x->plane[plane]; 2717 int bw, bh; 2718 get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL, &bw, 2719 &bh); 2720 const int src_stride = p->src.stride; 2721 const uint8_t *const src = p->src.buf; 2722 const int dst_stride = pd->dst.stride; 2723 const uint8_t *const dst = pd->dst.buf; 2724 const int16_t *const src_diff = p->src_diff; 2725 const int diff_stride = block_size_wide[plane_bsize]; 2726 const int shift = (xd->bd - 8); 2727 2728 if (sse == 0) { 2729 if (rate) *rate = 0; 2730 if (dist) *dist = 0; 2731 return; 2732 } 2733 if (plane) { 2734 int model_rate; 2735 int64_t model_dist; 2736 model_rd_with_curvfit(cpi, x, plane_bsize, plane, sse, num_samples, 2737 &model_rate, &model_dist); 2738 if (rate) *rate = model_rate; 2739 if (dist) *dist = model_dist; 2740 return; 2741 } 2742 2743 aom_clear_system_state(); 2744 const double sse_norm = (double)sse / num_samples; 2745 2746 double sse_norm_arr[4]; 2747 get_2x2_normalized_sses_and_sads(cpi, plane_bsize, src, src_stride, dst, 2748 dst_stride, src_diff, diff_stride, 2749 sse_norm_arr, NULL); 2750 double mean; 2751 if (is_cur_buf_hbd(xd)) { 2752 mean = get_highbd_diff_mean(src, src_stride, dst, dst_stride, bw, bh); 2753 } else { 2754 mean = get_diff_mean(src, src_stride, dst, dst_stride, bw, bh); 2755 } 2756 if (shift) { 2757 for (int k = 0; k < 4; ++k) sse_norm_arr[k] /= (1 << (2 * shift)); 2758 mean /= (1 << shift); 2759 } 2760 double sse_norm_sum = 0.0, sse_frac_arr[3]; 2761 for (int k = 0; k < 4; ++k) sse_norm_sum += sse_norm_arr[k]; 2762 for (int k = 0; k < 3; ++k) 2763 sse_frac_arr[k] = 2764 sse_norm_sum > 0.0 ? sse_norm_arr[k] / sse_norm_sum : 0.25; 2765 const double q_sqr = (double)(q_step * q_step); 2766 const double q_sqr_by_sse_norm = q_sqr / (sse_norm + 1.0); 2767 const double mean_sqr_by_sse_norm = mean * mean / (sse_norm + 1.0); 2768 float hor_corr, vert_corr; 2769 av1_get_horver_correlation_full(src_diff, diff_stride, bw, bh, &hor_corr, 2770 &vert_corr); 2771 2772 float features[NUM_FEATURES_PUSTATS]; 2773 features[0] = (float)hor_corr; 2774 features[1] = (float)log_numpels; 2775 features[2] = (float)mean_sqr_by_sse_norm; 2776 features[3] = (float)q_sqr_by_sse_norm; 2777 features[4] = (float)sse_frac_arr[0]; 2778 features[5] = (float)sse_frac_arr[1]; 2779 features[6] = (float)sse_frac_arr[2]; 2780 features[7] = (float)vert_corr; 2781 2782 float rate_f, dist_by_sse_norm_f; 2783 av1_nn_predict(features, &av1_pustats_dist_nnconfig, &dist_by_sse_norm_f); 2784 av1_nn_predict(features, &av1_pustats_rate_nnconfig, &rate_f); 2785 aom_clear_system_state(); 2786 const float dist_f = (float)((double)dist_by_sse_norm_f * (1.0 + sse_norm)); 2787 int rate_i = (int)(AOMMAX(0.0, rate_f * num_samples) + 0.5); 2788 int64_t dist_i = (int64_t)(AOMMAX(0.0, dist_f * num_samples) + 0.5); 2789 2790 // Check if skip is better 2791 if (rate_i == 0) { 2792 dist_i = sse << 4; 2793 } else if (RDCOST(x->rdmult, rate_i, dist_i) >= 2794 RDCOST(x->rdmult, 0, sse << 4)) { 2795 rate_i = 0; 2796 dist_i = sse << 4; 2797 } 2798 2799 if (rate) *rate = rate_i; 2800 if (dist) *dist = dist_i; 2801 return; 2802 } 2803 2804 static void model_rd_for_sb_with_dnn( 2805 const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, 2806 int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum, 2807 int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb, 2808 int *plane_rate, int64_t *plane_sse, int64_t *plane_dist) { 2809 (void)mi_row; 2810 (void)mi_col; 2811 // Note our transform coeffs are 8 times an orthogonal transform. 2812 // Hence quantizer step is also 8 times. To get effective quantizer 2813 // we need to divide by 8 before sending to modeling function. 2814 const int ref = xd->mi[0]->ref_frame[0]; 2815 2816 int64_t rate_sum = 0; 2817 int64_t dist_sum = 0; 2818 int64_t total_sse = 0; 2819 2820 for (int plane = plane_from; plane <= plane_to; ++plane) { 2821 struct macroblockd_plane *const pd = &xd->plane[plane]; 2822 const BLOCK_SIZE plane_bsize = 2823 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); 2824 int64_t dist, sse; 2825 int rate; 2826 2827 if (x->skip_chroma_rd && plane) continue; 2828 2829 const struct macroblock_plane *const p = &x->plane[plane]; 2830 const int shift = (xd->bd - 8); 2831 int bw, bh; 2832 get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL, 2833 &bw, &bh); 2834 if (is_cur_buf_hbd(xd)) { 2835 sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf, 2836 pd->dst.stride, bw, bh); 2837 } else { 2838 sse = aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw, 2839 bh); 2840 } 2841 sse = ROUND_POWER_OF_TWO(sse, shift * 2); 2842 2843 model_rd_with_dnn(cpi, x, plane_bsize, plane, sse, bw * bh, &rate, &dist); 2844 2845 if (plane == 0) x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX); 2846 2847 total_sse += sse; 2848 rate_sum += rate; 2849 dist_sum += dist; 2850 2851 if (plane_rate) plane_rate[plane] = rate; 2852 if (plane_sse) plane_sse[plane] = sse; 2853 if (plane_dist) plane_dist[plane] = dist; 2854 } 2855 2856 if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0; 2857 if (skip_sse_sb) *skip_sse_sb = total_sse << 4; 2858 *out_rate_sum = (int)rate_sum; 2859 *out_dist_sum = dist_sum; 2860 } 2861 2862 // Fits a surface for rate and distortion using as features: 2863 // log2(sse_norm + 1) and log2(sse_norm/qstep^2) 2864 static void model_rd_with_surffit(const AV1_COMP *const cpi, 2865 const MACROBLOCK *const x, 2866 BLOCK_SIZE plane_bsize, int plane, 2867 int64_t sse, int num_samples, int *rate, 2868 int64_t *dist) { 2869 (void)cpi; 2870 (void)plane_bsize; 2871 const MACROBLOCKD *const xd = &x->e_mbd; 2872 const struct macroblockd_plane *const pd = &xd->plane[plane]; 2873 const int dequant_shift = (is_cur_buf_hbd(xd)) ? xd->bd - 5 : 3; 2874 const int qstep = AOMMAX(pd->dequant_Q3[1] >> dequant_shift, 1); 2875 if (sse == 0) { 2876 if (rate) *rate = 0; 2877 if (dist) *dist = 0; 2878 return; 2879 } 2880 aom_clear_system_state(); 2881 const double sse_norm = (double)sse / num_samples; 2882 const double qstepsqr = (double)qstep * qstep; 2883 const double xm = log(sse_norm + 1.0) / log(2.0); 2884 const double yl = log(sse_norm / qstepsqr) / log(2.0); 2885 double rate_f, dist_by_sse_norm_f; 2886 2887 av1_model_rd_surffit(plane_bsize, sse_norm, xm, yl, &rate_f, 2888 &dist_by_sse_norm_f); 2889 2890 const double dist_f = dist_by_sse_norm_f * sse_norm; 2891 int rate_i = (int)(AOMMAX(0.0, rate_f * num_samples) + 0.5); 2892 int64_t dist_i = (int64_t)(AOMMAX(0.0, dist_f * num_samples) + 0.5); 2893 aom_clear_system_state(); 2894 2895 // Check if skip is better 2896 if (rate_i == 0) { 2897 dist_i = sse << 4; 2898 } else if (RDCOST(x->rdmult, rate_i, dist_i) >= 2899 RDCOST(x->rdmult, 0, sse << 4)) { 2900 rate_i = 0; 2901 dist_i = sse << 4; 2902 } 2903 2904 if (rate) *rate = rate_i; 2905 if (dist) *dist = dist_i; 2906 } 2907 2908 static void model_rd_for_sb_with_surffit( 2909 const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, 2910 int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum, 2911 int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb, 2912 int *plane_rate, int64_t *plane_sse, int64_t *plane_dist) { 2913 (void)mi_row; 2914 (void)mi_col; 2915 // Note our transform coeffs are 8 times an orthogonal transform. 2916 // Hence quantizer step is also 8 times. To get effective quantizer 2917 // we need to divide by 8 before sending to modeling function. 2918 const int ref = xd->mi[0]->ref_frame[0]; 2919 2920 int64_t rate_sum = 0; 2921 int64_t dist_sum = 0; 2922 int64_t total_sse = 0; 2923 2924 for (int plane = plane_from; plane <= plane_to; ++plane) { 2925 struct macroblockd_plane *const pd = &xd->plane[plane]; 2926 const BLOCK_SIZE plane_bsize = 2927 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); 2928 int64_t dist, sse; 2929 int rate; 2930 2931 if (x->skip_chroma_rd && plane) continue; 2932 2933 int bw, bh; 2934 const struct macroblock_plane *const p = &x->plane[plane]; 2935 const int shift = (xd->bd - 8); 2936 get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL, 2937 &bw, &bh); 2938 if (is_cur_buf_hbd(xd)) { 2939 sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf, 2940 pd->dst.stride, bw, bh); 2941 } else { 2942 sse = aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw, 2943 bh); 2944 } 2945 sse = ROUND_POWER_OF_TWO(sse, shift * 2); 2946 2947 model_rd_with_surffit(cpi, x, plane_bsize, plane, sse, bw * bh, &rate, 2948 &dist); 2949 2950 if (plane == 0) x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX); 2951 2952 total_sse += sse; 2953 rate_sum += rate; 2954 dist_sum += dist; 2955 2956 if (plane_rate) plane_rate[plane] = rate; 2957 if (plane_sse) plane_sse[plane] = sse; 2958 if (plane_dist) plane_dist[plane] = dist; 2959 } 2960 2961 if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0; 2962 if (skip_sse_sb) *skip_sse_sb = total_sse << 4; 2963 *out_rate_sum = (int)rate_sum; 2964 *out_dist_sum = dist_sum; 2965 } 2966 2967 // Fits a curve for rate and distortion using as feature: 2968 // log2(sse_norm/qstep^2) 2969 static void model_rd_with_curvfit(const AV1_COMP *const cpi, 2970 const MACROBLOCK *const x, 2971 BLOCK_SIZE plane_bsize, int plane, 2972 int64_t sse, int num_samples, int *rate, 2973 int64_t *dist) { 2974 (void)cpi; 2975 (void)plane_bsize; 2976 const MACROBLOCKD *const xd = &x->e_mbd; 2977 const struct macroblockd_plane *const pd = &xd->plane[plane]; 2978 const int dequant_shift = (is_cur_buf_hbd(xd)) ? xd->bd - 5 : 3; 2979 const int qstep = AOMMAX(pd->dequant_Q3[1] >> dequant_shift, 1); 2980 2981 if (sse == 0) { 2982 if (rate) *rate = 0; 2983 if (dist) *dist = 0; 2984 return; 2985 } 2986 aom_clear_system_state(); 2987 const double sse_norm = (double)sse / num_samples; 2988 const double qstepsqr = (double)qstep * qstep; 2989 const double xqr = log2(sse_norm / qstepsqr); 2990 2991 double rate_f, dist_by_sse_norm_f; 2992 av1_model_rd_curvfit(plane_bsize, sse_norm, xqr, &rate_f, 2993 &dist_by_sse_norm_f); 2994 2995 const double dist_f = dist_by_sse_norm_f * sse_norm; 2996 int rate_i = (int)(AOMMAX(0.0, rate_f * num_samples) + 0.5); 2997 int64_t dist_i = (int64_t)(AOMMAX(0.0, dist_f * num_samples) + 0.5); 2998 aom_clear_system_state(); 2999 3000 // Check if skip is better 3001 if (rate_i == 0) { 3002 dist_i = sse << 4; 3003 } else if (RDCOST(x->rdmult, rate_i, dist_i) >= 3004 RDCOST(x->rdmult, 0, sse << 4)) { 3005 rate_i = 0; 3006 dist_i = sse << 4; 3007 } 3008 3009 if (rate) *rate = rate_i; 3010 if (dist) *dist = dist_i; 3011 } 3012 3013 static void model_rd_for_sb_with_curvfit( 3014 const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, 3015 int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum, 3016 int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb, 3017 int *plane_rate, int64_t *plane_sse, int64_t *plane_dist) { 3018 (void)mi_row; 3019 (void)mi_col; 3020 // Note our transform coeffs are 8 times an orthogonal transform. 3021 // Hence quantizer step is also 8 times. To get effective quantizer 3022 // we need to divide by 8 before sending to modeling function. 3023 const int ref = xd->mi[0]->ref_frame[0]; 3024 3025 int64_t rate_sum = 0; 3026 int64_t dist_sum = 0; 3027 int64_t total_sse = 0; 3028 3029 for (int plane = plane_from; plane <= plane_to; ++plane) { 3030 struct macroblockd_plane *const pd = &xd->plane[plane]; 3031 const BLOCK_SIZE plane_bsize = 3032 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); 3033 int64_t dist, sse; 3034 int rate; 3035 3036 if (x->skip_chroma_rd && plane) continue; 3037 3038 int bw, bh; 3039 const struct macroblock_plane *const p = &x->plane[plane]; 3040 const int shift = (xd->bd - 8); 3041 get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL, 3042 &bw, &bh); 3043 3044 if (is_cur_buf_hbd(xd)) { 3045 sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf, 3046 pd->dst.stride, bw, bh); 3047 } else { 3048 sse = aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw, 3049 bh); 3050 } 3051 3052 sse = ROUND_POWER_OF_TWO(sse, shift * 2); 3053 model_rd_with_curvfit(cpi, x, plane_bsize, plane, sse, bw * bh, &rate, 3054 &dist); 3055 3056 if (plane == 0) x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX); 3057 3058 total_sse += sse; 3059 rate_sum += rate; 3060 dist_sum += dist; 3061 3062 if (plane_rate) plane_rate[plane] = rate; 3063 if (plane_sse) plane_sse[plane] = sse; 3064 if (plane_dist) plane_dist[plane] = dist; 3065 } 3066 3067 if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0; 3068 if (skip_sse_sb) *skip_sse_sb = total_sse << 4; 3069 *out_rate_sum = (int)rate_sum; 3070 *out_dist_sum = dist_sum; 3071 } 3072 3073 static int64_t search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane, 3074 int block, int blk_row, int blk_col, 3075 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, 3076 const TXB_CTX *const txb_ctx, 3077 FAST_TX_SEARCH_MODE ftxs_mode, 3078 int use_fast_coef_costing, int skip_trellis, 3079 int64_t ref_best_rd, RD_STATS *best_rd_stats) { 3080 const AV1_COMMON *cm = &cpi->common; 3081 MACROBLOCKD *xd = &x->e_mbd; 3082 struct macroblockd_plane *const pd = &xd->plane[plane]; 3083 MB_MODE_INFO *mbmi = xd->mi[0]; 3084 const int is_inter = is_inter_block(mbmi); 3085 int64_t best_rd = INT64_MAX; 3086 uint16_t best_eob = 0; 3087 TX_TYPE best_tx_type = DCT_DCT; 3088 TX_TYPE last_tx_type = TX_TYPES; 3089 const int fast_tx_search = ftxs_mode & FTXS_DCT_AND_1D_DCT_ONLY; 3090 // The buffer used to swap dqcoeff in macroblockd_plane so we can keep dqcoeff 3091 // of the best tx_type 3092 DECLARE_ALIGNED(32, tran_low_t, this_dqcoeff[MAX_SB_SQUARE]); 3093 tran_low_t *orig_dqcoeff = pd->dqcoeff; 3094 tran_low_t *best_dqcoeff = this_dqcoeff; 3095 const int txk_type_idx = 3096 av1_get_txk_type_index(plane_bsize, blk_row, blk_col); 3097 int perform_block_coeff_opt; 3098 av1_invalid_rd_stats(best_rd_stats); 3099 3100 TXB_RD_INFO *intra_txb_rd_info = NULL; 3101 uint16_t cur_joint_ctx = 0; 3102 const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2); 3103 const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2); 3104 const int within_border = 3105 mi_row >= xd->tile.mi_row_start && 3106 (mi_row + mi_size_high[plane_bsize] < xd->tile.mi_row_end) && 3107 mi_col >= xd->tile.mi_col_start && 3108 (mi_col + mi_size_wide[plane_bsize] < xd->tile.mi_col_end); 3109 skip_trellis |= 3110 cpi->optimize_seg_arr[mbmi->segment_id] == NO_TRELLIS_OPT || 3111 cpi->optimize_seg_arr[mbmi->segment_id] == FINAL_PASS_TRELLIS_OPT; 3112 if (within_border && cpi->sf.use_intra_txb_hash && frame_is_intra_only(cm) && 3113 !is_inter && plane == 0 && 3114 tx_size_wide[tx_size] == tx_size_high[tx_size]) { 3115 const uint32_t intra_hash = 3116 get_intra_txb_hash(x, plane, blk_row, blk_col, plane_bsize, tx_size); 3117 const int intra_hash_idx = 3118 find_tx_size_rd_info(&x->txb_rd_record_intra, intra_hash); 3119 intra_txb_rd_info = &x->txb_rd_record_intra.tx_rd_info[intra_hash_idx]; 3120 3121 cur_joint_ctx = (txb_ctx->dc_sign_ctx << 8) + txb_ctx->txb_skip_ctx; 3122 if (intra_txb_rd_info->entropy_context == cur_joint_ctx && 3123 x->txb_rd_record_intra.tx_rd_info[intra_hash_idx].valid) { 3124 mbmi->txk_type[txk_type_idx] = intra_txb_rd_info->tx_type; 3125 const TX_TYPE ref_tx_type = 3126 av1_get_tx_type(get_plane_type(plane), &x->e_mbd, blk_row, blk_col, 3127 tx_size, cpi->common.reduced_tx_set_used); 3128 if (ref_tx_type == intra_txb_rd_info->tx_type) { 3129 best_rd_stats->rate = intra_txb_rd_info->rate; 3130 best_rd_stats->dist = intra_txb_rd_info->dist; 3131 best_rd_stats->sse = intra_txb_rd_info->sse; 3132 best_rd_stats->skip = intra_txb_rd_info->eob == 0; 3133 x->plane[plane].eobs[block] = intra_txb_rd_info->eob; 3134 x->plane[plane].txb_entropy_ctx[block] = 3135 intra_txb_rd_info->txb_entropy_ctx; 3136 best_rd = RDCOST(x->rdmult, best_rd_stats->rate, best_rd_stats->dist); 3137 best_eob = intra_txb_rd_info->eob; 3138 best_tx_type = intra_txb_rd_info->tx_type; 3139 update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size, 3140 best_tx_type); 3141 goto RECON_INTRA; 3142 } 3143 } 3144 } 3145 3146 int rate_cost = 0; 3147 TX_TYPE txk_start = DCT_DCT; 3148 TX_TYPE txk_end = TX_TYPES - 1; 3149 if ((!is_inter && x->use_default_intra_tx_type) || 3150 (is_inter && x->use_default_inter_tx_type)) { 3151 txk_start = txk_end = 3152 get_default_tx_type(0, xd, tx_size, cpi->is_screen_content_type); 3153 } else if (x->rd_model == LOW_TXFM_RD || x->cb_partition_scan) { 3154 if (plane == 0) txk_end = DCT_DCT; 3155 } 3156 3157 uint8_t best_txb_ctx = 0; 3158 const TxSetType tx_set_type = 3159 av1_get_ext_tx_set_type(tx_size, is_inter, cm->reduced_tx_set_used); 3160 3161 TX_TYPE uv_tx_type = DCT_DCT; 3162 if (plane) { 3163 // tx_type of PLANE_TYPE_UV should be the same as PLANE_TYPE_Y 3164 uv_tx_type = txk_start = txk_end = 3165 av1_get_tx_type(get_plane_type(plane), xd, blk_row, blk_col, tx_size, 3166 cm->reduced_tx_set_used); 3167 } 3168 const uint16_t ext_tx_used_flag = av1_ext_tx_used_flag[tx_set_type]; 3169 if (xd->lossless[mbmi->segment_id] || txsize_sqr_up_map[tx_size] > TX_32X32 || 3170 ext_tx_used_flag == 0x0001 || 3171 (is_inter && cpi->oxcf.use_inter_dct_only) || 3172 (!is_inter && cpi->oxcf.use_intra_dct_only)) { 3173 txk_start = txk_end = DCT_DCT; 3174 } 3175 uint16_t allowed_tx_mask = 0; // 1: allow; 0: skip. 3176 if (txk_start == txk_end) { 3177 allowed_tx_mask = 1 << txk_start; 3178 allowed_tx_mask &= ext_tx_used_flag; 3179 } else if (fast_tx_search) { 3180 allowed_tx_mask = 0x0c01; // V_DCT, H_DCT, DCT_DCT 3181 allowed_tx_mask &= ext_tx_used_flag; 3182 } else { 3183 assert(plane == 0); 3184 allowed_tx_mask = ext_tx_used_flag; 3185 // !fast_tx_search && txk_end != txk_start && plane == 0 3186 const int do_prune = cpi->sf.tx_type_search.prune_mode > NO_PRUNE; 3187 if (do_prune && is_inter) { 3188 if (cpi->sf.tx_type_search.prune_mode >= PRUNE_2D_ACCURATE) { 3189 const uint16_t prune = 3190 prune_tx_2D(x, plane_bsize, tx_size, blk_row, blk_col, tx_set_type, 3191 cpi->sf.tx_type_search.prune_mode); 3192 allowed_tx_mask &= (~prune); 3193 } else { 3194 allowed_tx_mask &= (~x->tx_search_prune[tx_set_type]); 3195 } 3196 } 3197 } 3198 3199 if (cpi->oxcf.enable_flip_idtx == 0) { 3200 for (TX_TYPE tx_type = FLIPADST_DCT; tx_type <= H_FLIPADST; ++tx_type) { 3201 allowed_tx_mask &= ~(1 << tx_type); 3202 } 3203 } 3204 3205 // Need to have at least one transform type allowed. 3206 if (allowed_tx_mask == 0) { 3207 txk_start = txk_end = (plane ? uv_tx_type : DCT_DCT); 3208 allowed_tx_mask = (1 << txk_start); 3209 } 3210 3211 const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size]; 3212 int64_t block_sse = 0; 3213 unsigned int block_mse_q8 = UINT_MAX; 3214 block_sse = pixel_diff_dist(x, plane, blk_row, blk_col, plane_bsize, tx_bsize, 3215 &block_mse_q8); 3216 assert(block_mse_q8 != UINT_MAX); 3217 if (is_cur_buf_hbd(xd)) { 3218 block_sse = ROUND_POWER_OF_TWO(block_sse, (xd->bd - 8) * 2); 3219 block_mse_q8 = ROUND_POWER_OF_TWO(block_mse_q8, (xd->bd - 8) * 2); 3220 } 3221 block_sse *= 16; 3222 // Tranform domain distortion is accurate for higher residuals. 3223 // TODO(any): Experiment with variance and mean based thresholds 3224 int use_transform_domain_distortion = 3225 (cpi->sf.use_transform_domain_distortion > 0) && 3226 (block_mse_q8 >= cpi->tx_domain_dist_threshold) && 3227 // Any 64-pt transforms only preserves half the coefficients. 3228 // Therefore transform domain distortion is not valid for these 3229 // transform sizes. 3230 txsize_sqr_up_map[tx_size] != TX_64X64; 3231 #if CONFIG_DIST_8X8 3232 if (x->using_dist_8x8) use_transform_domain_distortion = 0; 3233 #endif 3234 int calc_pixel_domain_distortion_final = 3235 cpi->sf.use_transform_domain_distortion == 1 && 3236 use_transform_domain_distortion && x->rd_model != LOW_TXFM_RD && 3237 !x->cb_partition_scan; 3238 if (calc_pixel_domain_distortion_final && 3239 (txk_start == txk_end || allowed_tx_mask == 0x0001)) 3240 calc_pixel_domain_distortion_final = use_transform_domain_distortion = 0; 3241 3242 const uint16_t *eobs_ptr = x->plane[plane].eobs; 3243 3244 // Used mse based threshold logic to take decision of R-D of optimization of 3245 // coeffs. For smaller residuals, coeff optimization would be helpful. For 3246 // larger residuals, R-D optimization may not be effective. 3247 // TODO(any): Experiment with variance and mean based thresholds 3248 perform_block_coeff_opt = (block_mse_q8 <= cpi->coeff_opt_dist_threshold); 3249 3250 for (TX_TYPE tx_type = txk_start; tx_type <= txk_end; ++tx_type) { 3251 if (!(allowed_tx_mask & (1 << tx_type))) continue; 3252 if (plane == 0) mbmi->txk_type[txk_type_idx] = tx_type; 3253 RD_STATS this_rd_stats; 3254 av1_invalid_rd_stats(&this_rd_stats); 3255 if (skip_trellis || (!perform_block_coeff_opt)) { 3256 av1_xform_quant( 3257 cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, tx_type, 3258 USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP); 3259 rate_cost = av1_cost_coeffs(cm, x, plane, block, tx_size, tx_type, 3260 txb_ctx, use_fast_coef_costing); 3261 } else { 3262 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, 3263 tx_size, tx_type, AV1_XFORM_QUANT_FP); 3264 if (cpi->sf.optimize_b_precheck && best_rd < INT64_MAX && 3265 eobs_ptr[block] >= 4) { 3266 // Calculate distortion quickly in transform domain. 3267 dist_block_tx_domain(x, plane, block, tx_size, &this_rd_stats.dist, 3268 &this_rd_stats.sse); 3269 3270 const int64_t best_rd_ = AOMMIN(best_rd, ref_best_rd); 3271 const int64_t dist_cost_estimate = 3272 RDCOST(x->rdmult, 0, AOMMIN(this_rd_stats.dist, this_rd_stats.sse)); 3273 if (dist_cost_estimate - (dist_cost_estimate >> 3) > best_rd_) continue; 3274 } 3275 av1_optimize_b(cpi, x, plane, block, tx_size, tx_type, txb_ctx, 3276 cpi->sf.trellis_eob_fast, &rate_cost); 3277 } 3278 if (eobs_ptr[block] == 0) { 3279 // When eob is 0, pixel domain distortion is more efficient and accurate. 3280 this_rd_stats.dist = this_rd_stats.sse = block_sse; 3281 } else if (use_transform_domain_distortion) { 3282 dist_block_tx_domain(x, plane, block, tx_size, &this_rd_stats.dist, 3283 &this_rd_stats.sse); 3284 } else { 3285 int64_t sse_diff = INT64_MAX; 3286 // high_energy threshold assumes that every pixel within a txfm block 3287 // has a residue energy of at least 25% of the maximum, i.e. 128 * 128 3288 // for 8 bit, then the threshold is scaled based on input bit depth. 3289 const int64_t high_energy_thresh = 3290 ((int64_t)128 * 128 * tx_size_2d[tx_size]) << ((xd->bd - 8) * 2); 3291 const int is_high_energy = (block_sse >= high_energy_thresh); 3292 if (tx_size == TX_64X64 || is_high_energy) { 3293 // Because 3 out 4 quadrants of transform coefficients are forced to 3294 // zero, the inverse transform has a tendency to overflow. sse_diff 3295 // is effectively the energy of those 3 quadrants, here we use it 3296 // to decide if we should do pixel domain distortion. If the energy 3297 // is mostly in first quadrant, then it is unlikely that we have 3298 // overflow issue in inverse transform. 3299 dist_block_tx_domain(x, plane, block, tx_size, &this_rd_stats.dist, 3300 &this_rd_stats.sse); 3301 sse_diff = block_sse - this_rd_stats.sse; 3302 } 3303 if (tx_size != TX_64X64 || !is_high_energy || 3304 (sse_diff * 2) < this_rd_stats.sse) { 3305 const int64_t tx_domain_dist = this_rd_stats.dist; 3306 this_rd_stats.dist = dist_block_px_domain( 3307 cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size); 3308 // For high energy blocks, occasionally, the pixel domain distortion 3309 // can be artificially low due to clamping at reconstruction stage 3310 // even when inverse transform output is hugely different from the 3311 // actual residue. 3312 if (is_high_energy && this_rd_stats.dist < tx_domain_dist) 3313 this_rd_stats.dist = tx_domain_dist; 3314 } else { 3315 this_rd_stats.dist += sse_diff; 3316 } 3317 this_rd_stats.sse = block_sse; 3318 } 3319 3320 this_rd_stats.rate = rate_cost; 3321 3322 const int64_t rd = 3323 RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist); 3324 3325 if (rd < best_rd) { 3326 best_rd = rd; 3327 *best_rd_stats = this_rd_stats; 3328 best_tx_type = tx_type; 3329 best_txb_ctx = x->plane[plane].txb_entropy_ctx[block]; 3330 best_eob = x->plane[plane].eobs[block]; 3331 last_tx_type = best_tx_type; 3332 3333 // Swap qcoeff and dqcoeff buffers 3334 tran_low_t *const tmp_dqcoeff = best_dqcoeff; 3335 best_dqcoeff = pd->dqcoeff; 3336 pd->dqcoeff = tmp_dqcoeff; 3337 } 3338 3339 #if CONFIG_COLLECT_RD_STATS == 1 3340 if (plane == 0) { 3341 PrintTransformUnitStats(cpi, x, &this_rd_stats, blk_row, blk_col, 3342 plane_bsize, tx_size, tx_type, rd); 3343 } 3344 #endif // CONFIG_COLLECT_RD_STATS == 1 3345 3346 #if COLLECT_TX_SIZE_DATA 3347 // Generate small sample to restrict output size. 3348 static unsigned int seed = 21743; 3349 if (lcg_rand16(&seed) % 200 == 0) { 3350 FILE *fp = NULL; 3351 3352 if (within_border) { 3353 fp = fopen(av1_tx_size_data_output_file, "a"); 3354 } 3355 3356 if (fp) { 3357 // Transform info and RD 3358 const int txb_w = tx_size_wide[tx_size]; 3359 const int txb_h = tx_size_high[tx_size]; 3360 3361 // Residue signal. 3362 const int diff_stride = block_size_wide[plane_bsize]; 3363 struct macroblock_plane *const p = &x->plane[plane]; 3364 const int16_t *src_diff = 3365 &p->src_diff[(blk_row * diff_stride + blk_col) * 4]; 3366 3367 for (int r = 0; r < txb_h; ++r) { 3368 for (int c = 0; c < txb_w; ++c) { 3369 fprintf(fp, "%d,", src_diff[c]); 3370 } 3371 src_diff += diff_stride; 3372 } 3373 3374 fprintf(fp, "%d,%d,%d,%" PRId64, txb_w, txb_h, tx_type, rd); 3375 fprintf(fp, "\n"); 3376 fclose(fp); 3377 } 3378 } 3379 #endif // COLLECT_TX_SIZE_DATA 3380 3381 if (cpi->sf.adaptive_txb_search_level) { 3382 if ((best_rd - (best_rd >> cpi->sf.adaptive_txb_search_level)) > 3383 ref_best_rd) { 3384 break; 3385 } 3386 } 3387 3388 // Skip transform type search when we found the block has been quantized to 3389 // all zero and at the same time, it has better rdcost than doing transform. 3390 if (cpi->sf.tx_type_search.skip_tx_search && !best_eob) break; 3391 } 3392 3393 assert(best_rd != INT64_MAX); 3394 3395 best_rd_stats->skip = best_eob == 0; 3396 if (plane == 0) { 3397 update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size, 3398 best_tx_type); 3399 } 3400 x->plane[plane].txb_entropy_ctx[block] = best_txb_ctx; 3401 x->plane[plane].eobs[block] = best_eob; 3402 3403 pd->dqcoeff = best_dqcoeff; 3404 3405 if (calc_pixel_domain_distortion_final && best_eob) { 3406 best_rd_stats->dist = dist_block_px_domain( 3407 cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size); 3408 best_rd_stats->sse = block_sse; 3409 } 3410 3411 if (intra_txb_rd_info != NULL) { 3412 intra_txb_rd_info->valid = 1; 3413 intra_txb_rd_info->entropy_context = cur_joint_ctx; 3414 intra_txb_rd_info->rate = best_rd_stats->rate; 3415 intra_txb_rd_info->dist = best_rd_stats->dist; 3416 intra_txb_rd_info->sse = best_rd_stats->sse; 3417 intra_txb_rd_info->eob = best_eob; 3418 intra_txb_rd_info->txb_entropy_ctx = best_txb_ctx; 3419 if (plane == 0) intra_txb_rd_info->tx_type = best_tx_type; 3420 } 3421 3422 RECON_INTRA: 3423 if (!is_inter && best_eob && 3424 (blk_row + tx_size_high_unit[tx_size] < mi_size_high[plane_bsize] || 3425 blk_col + tx_size_wide_unit[tx_size] < mi_size_wide[plane_bsize])) { 3426 // intra mode needs decoded result such that the next transform block 3427 // can use it for prediction. 3428 // if the last search tx_type is the best tx_type, we don't need to 3429 // do this again 3430 if (best_tx_type != last_tx_type) { 3431 if (skip_trellis) { 3432 av1_xform_quant( 3433 cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, 3434 best_tx_type, 3435 USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP); 3436 } else { 3437 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, 3438 tx_size, best_tx_type, AV1_XFORM_QUANT_FP); 3439 av1_optimize_b(cpi, x, plane, block, tx_size, best_tx_type, txb_ctx, 3440 cpi->sf.trellis_eob_fast, &rate_cost); 3441 } 3442 } 3443 3444 inverse_transform_block_facade(xd, plane, block, blk_row, blk_col, 3445 x->plane[plane].eobs[block], 3446 cm->reduced_tx_set_used); 3447 3448 // This may happen because of hash collision. The eob stored in the hash 3449 // table is non-zero, but the real eob is zero. We need to make sure tx_type 3450 // is DCT_DCT in this case. 3451 if (plane == 0 && x->plane[plane].eobs[block] == 0 && 3452 best_tx_type != DCT_DCT) { 3453 update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size, 3454 DCT_DCT); 3455 } 3456 } 3457 pd->dqcoeff = orig_dqcoeff; 3458 3459 return best_rd; 3460 } 3461 3462 static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, 3463 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { 3464 struct rdcost_block_args *args = arg; 3465 MACROBLOCK *const x = args->x; 3466 MACROBLOCKD *const xd = &x->e_mbd; 3467 const int is_inter = is_inter_block(xd->mi[0]); 3468 const AV1_COMP *cpi = args->cpi; 3469 ENTROPY_CONTEXT *a = args->t_above + blk_col; 3470 ENTROPY_CONTEXT *l = args->t_left + blk_row; 3471 const AV1_COMMON *cm = &cpi->common; 3472 RD_STATS this_rd_stats; 3473 3474 av1_init_rd_stats(&this_rd_stats); 3475 3476 if (args->exit_early) { 3477 args->incomplete_exit = 1; 3478 return; 3479 } 3480 3481 if (!is_inter) { 3482 av1_predict_intra_block_facade(cm, xd, plane, blk_col, blk_row, tx_size); 3483 av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size); 3484 } 3485 TXB_CTX txb_ctx; 3486 get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx); 3487 search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, 3488 &txb_ctx, args->ftxs_mode, args->use_fast_coef_costing, 3489 args->skip_trellis, args->best_rd - args->this_rd, 3490 &this_rd_stats); 3491 3492 if (plane == AOM_PLANE_Y && xd->cfl.store_y) { 3493 assert(!is_inter || plane_bsize < BLOCK_8X8); 3494 cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize); 3495 } 3496 3497 #if CONFIG_RD_DEBUG 3498 av1_update_txb_coeff_cost(&this_rd_stats, plane, tx_size, blk_row, blk_col, 3499 this_rd_stats.rate); 3500 #endif // CONFIG_RD_DEBUG 3501 av1_set_txb_context(x, plane, block, tx_size, a, l); 3502 3503 const int blk_idx = 3504 blk_row * (block_size_wide[plane_bsize] >> tx_size_wide_log2[0]) + 3505 blk_col; 3506 3507 if (plane == 0) 3508 set_blk_skip(x, plane, blk_idx, x->plane[plane].eobs[block] == 0); 3509 else 3510 set_blk_skip(x, plane, blk_idx, 0); 3511 3512 const int64_t rd1 = RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist); 3513 const int64_t rd2 = RDCOST(x->rdmult, 0, this_rd_stats.sse); 3514 3515 // TODO(jingning): temporarily enabled only for luma component 3516 const int64_t rd = AOMMIN(rd1, rd2); 3517 3518 this_rd_stats.skip &= !x->plane[plane].eobs[block]; 3519 3520 av1_merge_rd_stats(&args->rd_stats, &this_rd_stats); 3521 3522 args->this_rd += rd; 3523 3524 if (args->this_rd > args->best_rd) args->exit_early = 1; 3525 } 3526 3527 static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi, 3528 RD_STATS *rd_stats, int64_t ref_best_rd, 3529 int64_t this_rd, int plane, BLOCK_SIZE bsize, 3530 TX_SIZE tx_size, int use_fast_coef_casting, 3531 FAST_TX_SEARCH_MODE ftxs_mode, int skip_trellis) { 3532 MACROBLOCKD *const xd = &x->e_mbd; 3533 const struct macroblockd_plane *const pd = &xd->plane[plane]; 3534 struct rdcost_block_args args; 3535 av1_zero(args); 3536 args.x = x; 3537 args.cpi = cpi; 3538 args.best_rd = ref_best_rd; 3539 args.use_fast_coef_costing = use_fast_coef_casting; 3540 args.ftxs_mode = ftxs_mode; 3541 args.this_rd = this_rd; 3542 args.skip_trellis = skip_trellis; 3543 av1_init_rd_stats(&args.rd_stats); 3544 3545 if (!cpi->oxcf.enable_tx64 && txsize_sqr_up_map[tx_size] == TX_64X64) { 3546 av1_invalid_rd_stats(rd_stats); 3547 return; 3548 } 3549 3550 if (plane == 0) xd->mi[0]->tx_size = tx_size; 3551 3552 av1_get_entropy_contexts(bsize, pd, args.t_above, args.t_left); 3553 3554 if (args.this_rd > args.best_rd) { 3555 args.exit_early = 1; 3556 } 3557 3558 av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm, 3559 &args); 3560 3561 MB_MODE_INFO *const mbmi = xd->mi[0]; 3562 const int is_inter = is_inter_block(mbmi); 3563 const int invalid_rd = is_inter ? args.incomplete_exit : args.exit_early; 3564 3565 if (invalid_rd) { 3566 av1_invalid_rd_stats(rd_stats); 3567 } else { 3568 *rd_stats = args.rd_stats; 3569 } 3570 } 3571 3572 static int tx_size_cost(const AV1_COMMON *const cm, const MACROBLOCK *const x, 3573 BLOCK_SIZE bsize, TX_SIZE tx_size) { 3574 assert(bsize == x->e_mbd.mi[0]->sb_type); 3575 if (cm->tx_mode != TX_MODE_SELECT || !block_signals_txsize(bsize)) return 0; 3576 3577 const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize); 3578 const int depth = tx_size_to_depth(tx_size, bsize); 3579 const MACROBLOCKD *const xd = &x->e_mbd; 3580 const int tx_size_ctx = get_tx_size_context(xd); 3581 return x->tx_size_cost[tx_size_cat][tx_size_ctx][depth]; 3582 } 3583 3584 static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, 3585 RD_STATS *rd_stats, int64_t ref_best_rd, BLOCK_SIZE bs, 3586 TX_SIZE tx_size, FAST_TX_SEARCH_MODE ftxs_mode, 3587 int skip_trellis) { 3588 const AV1_COMMON *const cm = &cpi->common; 3589 MACROBLOCKD *const xd = &x->e_mbd; 3590 MB_MODE_INFO *const mbmi = xd->mi[0]; 3591 int64_t rd = INT64_MAX; 3592 const int skip_ctx = av1_get_skip_context(xd); 3593 int s0, s1; 3594 const int is_inter = is_inter_block(mbmi); 3595 const int tx_select = 3596 cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type); 3597 int ctx = txfm_partition_context( 3598 xd->above_txfm_context, xd->left_txfm_context, mbmi->sb_type, tx_size); 3599 const int r_tx_size = is_inter ? x->txfm_partition_cost[ctx][0] 3600 : tx_size_cost(cm, x, bs, tx_size); 3601 3602 assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed_bsize(bs))); 3603 3604 s0 = x->skip_cost[skip_ctx][0]; 3605 s1 = x->skip_cost[skip_ctx][1]; 3606 3607 int64_t skip_rd; 3608 int64_t this_rd; 3609 3610 if (is_inter) { 3611 skip_rd = RDCOST(x->rdmult, s1, 0); 3612 this_rd = RDCOST(x->rdmult, s0 + r_tx_size * tx_select, 0); 3613 } else { 3614 skip_rd = RDCOST(x->rdmult, s1 + r_tx_size * tx_select, 0); 3615 this_rd = RDCOST(x->rdmult, s0 + r_tx_size * tx_select, 0); 3616 } 3617 3618 mbmi->tx_size = tx_size; 3619 txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, AOMMIN(this_rd, skip_rd), 3620 AOM_PLANE_Y, bs, tx_size, cpi->sf.use_fast_coef_costing, 3621 ftxs_mode, skip_trellis); 3622 if (rd_stats->rate == INT_MAX) return INT64_MAX; 3623 3624 // rdstats->rate should include all the rate except skip/non-skip cost as the 3625 // same is accounted in the caller functions after rd evaluation of all 3626 // planes. However the decisions should be done after considering the 3627 // skip/non-skip header cost 3628 if (rd_stats->skip) { 3629 if (is_inter) { 3630 rd = RDCOST(x->rdmult, s1, rd_stats->sse); 3631 } else { 3632 rd = RDCOST(x->rdmult, s1 + r_tx_size * tx_select, rd_stats->sse); 3633 rd_stats->rate += r_tx_size * tx_select; 3634 } 3635 } else { 3636 rd = RDCOST(x->rdmult, rd_stats->rate + s0 + r_tx_size * tx_select, 3637 rd_stats->dist); 3638 rd_stats->rate += r_tx_size * tx_select; 3639 } 3640 if (is_inter && !xd->lossless[xd->mi[0]->segment_id]) { 3641 int64_t temp_skip_rd = RDCOST(x->rdmult, s1, rd_stats->sse); 3642 if (temp_skip_rd <= rd) { 3643 rd = temp_skip_rd; 3644 rd_stats->rate = 0; 3645 rd_stats->dist = rd_stats->sse; 3646 rd_stats->skip = 1; 3647 } 3648 } 3649 3650 return rd; 3651 } 3652 3653 static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs, 3654 MACROBLOCK *x, int64_t ref_best_rd, 3655 RD_STATS *rd_stats) { 3656 MACROBLOCKD *const xd = &x->e_mbd; 3657 av1_subtract_plane(x, bs, 0); 3658 x->rd_model = LOW_TXFM_RD; 3659 int skip_trellis = cpi->optimize_seg_arr[xd->mi[0]->segment_id] == 3660 NO_ESTIMATE_YRD_TRELLIS_OPT; 3661 const int64_t rd = 3662 txfm_yrd(cpi, x, rd_stats, ref_best_rd, bs, max_txsize_rect_lookup[bs], 3663 FTXS_NONE, skip_trellis); 3664 x->rd_model = FULL_TXFM_RD; 3665 if (rd != INT64_MAX) { 3666 const int skip_ctx = av1_get_skip_context(xd); 3667 if (rd_stats->skip) { 3668 const int s1 = x->skip_cost[skip_ctx][1]; 3669 rd_stats->rate = s1; 3670 } else { 3671 const int s0 = x->skip_cost[skip_ctx][0]; 3672 rd_stats->rate += s0; 3673 } 3674 } 3675 return rd; 3676 } 3677 3678 static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x, 3679 RD_STATS *rd_stats, int64_t ref_best_rd, 3680 BLOCK_SIZE bs) { 3681 const AV1_COMMON *const cm = &cpi->common; 3682 MACROBLOCKD *const xd = &x->e_mbd; 3683 MB_MODE_INFO *const mbmi = xd->mi[0]; 3684 const int is_inter = is_inter_block(mbmi); 3685 mbmi->tx_size = tx_size_from_tx_mode(bs, cm->tx_mode); 3686 const TxSetType tx_set_type = 3687 av1_get_ext_tx_set_type(mbmi->tx_size, is_inter, cm->reduced_tx_set_used); 3688 prune_tx(cpi, bs, x, xd, tx_set_type); 3689 const int skip_ctx = av1_get_skip_context(xd); 3690 int s0, s1; 3691 3692 s0 = x->skip_cost[skip_ctx][0]; 3693 s1 = x->skip_cost[skip_ctx][1]; 3694 3695 int64_t skip_rd = RDCOST(x->rdmult, s1, 0); 3696 int64_t this_rd = RDCOST(x->rdmult, s0, 0); 3697 3698 txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, AOMMIN(this_rd, skip_rd), 3699 AOM_PLANE_Y, bs, mbmi->tx_size, 3700 cpi->sf.use_fast_coef_costing, FTXS_NONE, 0); 3701 // Reset the pruning flags. 3702 av1_zero(x->tx_search_prune); 3703 x->tx_split_prune_flag = 0; 3704 } 3705 3706 static void choose_smallest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x, 3707 RD_STATS *rd_stats, int64_t ref_best_rd, 3708 BLOCK_SIZE bs) { 3709 MACROBLOCKD *const xd = &x->e_mbd; 3710 MB_MODE_INFO *const mbmi = xd->mi[0]; 3711 3712 mbmi->tx_size = TX_4X4; 3713 // TODO(any) : Pass this_rd based on skip/non-skip cost 3714 txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, 0, bs, mbmi->tx_size, 3715 cpi->sf.use_fast_coef_costing, FTXS_NONE, 0); 3716 } 3717 3718 static INLINE int bsize_to_num_blk(BLOCK_SIZE bsize) { 3719 int num_blk = 1 << (num_pels_log2_lookup[bsize] - 2 * tx_size_wide_log2[0]); 3720 return num_blk; 3721 } 3722 3723 static int get_search_init_depth(int mi_width, int mi_height, int is_inter, 3724 const SPEED_FEATURES *sf) { 3725 if (sf->tx_size_search_method == USE_LARGESTALL) return MAX_VARTX_DEPTH; 3726 3727 if (sf->tx_size_search_lgr_block) { 3728 if (mi_width > mi_size_wide[BLOCK_64X64] || 3729 mi_height > mi_size_high[BLOCK_64X64]) 3730 return MAX_VARTX_DEPTH; 3731 } 3732 3733 if (is_inter) { 3734 return (mi_height != mi_width) ? sf->inter_tx_size_search_init_depth_rect 3735 : sf->inter_tx_size_search_init_depth_sqr; 3736 } else { 3737 return (mi_height != mi_width) ? sf->intra_tx_size_search_init_depth_rect 3738 : sf->intra_tx_size_search_init_depth_sqr; 3739 } 3740 } 3741 3742 static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi, 3743 MACROBLOCK *x, RD_STATS *rd_stats, 3744 int64_t ref_best_rd, BLOCK_SIZE bs) { 3745 av1_invalid_rd_stats(rd_stats); 3746 3747 const AV1_COMMON *const cm = &cpi->common; 3748 MACROBLOCKD *const xd = &x->e_mbd; 3749 MB_MODE_INFO *const mbmi = xd->mi[0]; 3750 const TX_SIZE max_rect_tx_size = max_txsize_rect_lookup[bs]; 3751 const int tx_select = cm->tx_mode == TX_MODE_SELECT; 3752 int start_tx; 3753 int depth, init_depth; 3754 3755 if (tx_select) { 3756 start_tx = max_rect_tx_size; 3757 init_depth = get_search_init_depth(mi_size_wide[bs], mi_size_high[bs], 3758 is_inter_block(mbmi), &cpi->sf); 3759 } else { 3760 const TX_SIZE chosen_tx_size = tx_size_from_tx_mode(bs, cm->tx_mode); 3761 start_tx = chosen_tx_size; 3762 init_depth = MAX_TX_DEPTH; 3763 } 3764 3765 prune_tx(cpi, bs, x, xd, EXT_TX_SET_ALL16); 3766 3767 TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN]; 3768 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; 3769 TX_SIZE best_tx_size = max_rect_tx_size; 3770 int64_t best_rd = INT64_MAX; 3771 const int n4 = bsize_to_num_blk(bs); 3772 x->rd_model = FULL_TXFM_RD; 3773 depth = init_depth; 3774 int64_t rd[MAX_TX_DEPTH + 1] = { INT64_MAX, INT64_MAX, INT64_MAX }; 3775 for (int n = start_tx; depth <= MAX_TX_DEPTH; 3776 depth++, n = sub_tx_size_map[n]) { 3777 #if CONFIG_DIST_8X8 3778 if (x->using_dist_8x8) { 3779 if (tx_size_wide[n] < 8 || tx_size_high[n] < 8) continue; 3780 } 3781 #endif 3782 if (!cpi->oxcf.enable_tx64 && txsize_sqr_up_map[n] == TX_64X64) continue; 3783 3784 RD_STATS this_rd_stats; 3785 rd[depth] = 3786 txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, n, FTXS_NONE, 0); 3787 3788 if (rd[depth] < best_rd) { 3789 memcpy(best_txk_type, mbmi->txk_type, 3790 sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN); 3791 memcpy(best_blk_skip, x->blk_skip, sizeof(best_blk_skip[0]) * n4); 3792 best_tx_size = n; 3793 best_rd = rd[depth]; 3794 *rd_stats = this_rd_stats; 3795 } 3796 if (n == TX_4X4) break; 3797 // If we are searching three depths, prune the smallest size depending 3798 // on rd results for the first two depths for low contrast blocks. 3799 if (depth > init_depth && depth != MAX_TX_DEPTH && 3800 x->source_variance < 256) { 3801 if (rd[depth - 1] != INT64_MAX && rd[depth] > rd[depth - 1]) break; 3802 } 3803 } 3804 3805 if (rd_stats->rate != INT_MAX) { 3806 mbmi->tx_size = best_tx_size; 3807 memcpy(mbmi->txk_type, best_txk_type, 3808 sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN); 3809 memcpy(x->blk_skip, best_blk_skip, sizeof(best_blk_skip[0]) * n4); 3810 } 3811 3812 // Reset the pruning flags. 3813 av1_zero(x->tx_search_prune); 3814 x->tx_split_prune_flag = 0; 3815 } 3816 3817 // origin_threshold * 128 / 100 3818 static const uint32_t skip_pred_threshold[3][BLOCK_SIZES_ALL] = { 3819 { 3820 64, 64, 64, 70, 60, 60, 68, 68, 68, 68, 68, 3821 68, 68, 68, 68, 68, 64, 64, 70, 70, 68, 68, 3822 }, 3823 { 3824 88, 88, 88, 86, 87, 87, 68, 68, 68, 68, 68, 3825 68, 68, 68, 68, 68, 88, 88, 86, 86, 68, 68, 3826 }, 3827 { 3828 90, 93, 93, 90, 93, 93, 74, 74, 74, 74, 74, 3829 74, 74, 74, 74, 74, 90, 90, 90, 90, 74, 74, 3830 }, 3831 }; 3832 3833 // lookup table for predict_skip_flag 3834 // int max_tx_size = max_txsize_rect_lookup[bsize]; 3835 // if (tx_size_high[max_tx_size] > 16 || tx_size_wide[max_tx_size] > 16) 3836 // max_tx_size = AOMMIN(max_txsize_lookup[bsize], TX_16X16); 3837 static const TX_SIZE max_predict_sf_tx_size[BLOCK_SIZES_ALL] = { 3838 TX_4X4, TX_4X8, TX_8X4, TX_8X8, TX_8X16, TX_16X8, 3839 TX_16X16, TX_16X16, TX_16X16, TX_16X16, TX_16X16, TX_16X16, 3840 TX_16X16, TX_16X16, TX_16X16, TX_16X16, TX_4X16, TX_16X4, 3841 TX_8X8, TX_8X8, TX_16X16, TX_16X16, 3842 }; 3843 3844 // Uses simple features on top of DCT coefficients to quickly predict 3845 // whether optimal RD decision is to skip encoding the residual. 3846 // The sse value is stored in dist. 3847 static int predict_skip_flag(MACROBLOCK *x, BLOCK_SIZE bsize, int64_t *dist, 3848 int reduced_tx_set) { 3849 const int bw = block_size_wide[bsize]; 3850 const int bh = block_size_high[bsize]; 3851 const MACROBLOCKD *xd = &x->e_mbd; 3852 const int16_t dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd); 3853 3854 *dist = pixel_diff_dist(x, 0, 0, 0, bsize, bsize, NULL); 3855 3856 const int64_t mse = *dist / bw / bh; 3857 // Normalized quantizer takes the transform upscaling factor (8 for tx size 3858 // smaller than 32) into account. 3859 const int16_t normalized_dc_q = dc_q >> 3; 3860 const int64_t mse_thresh = (int64_t)normalized_dc_q * normalized_dc_q / 8; 3861 // Predict not to skip when mse is larger than threshold. 3862 if (mse > mse_thresh) return 0; 3863 3864 const int max_tx_size = max_predict_sf_tx_size[bsize]; 3865 const int tx_h = tx_size_high[max_tx_size]; 3866 const int tx_w = tx_size_wide[max_tx_size]; 3867 DECLARE_ALIGNED(32, tran_low_t, coefs[32 * 32]); 3868 TxfmParam param; 3869 param.tx_type = DCT_DCT; 3870 param.tx_size = max_tx_size; 3871 param.bd = xd->bd; 3872 param.is_hbd = is_cur_buf_hbd(xd); 3873 param.lossless = 0; 3874 param.tx_set_type = av1_get_ext_tx_set_type( 3875 param.tx_size, is_inter_block(xd->mi[0]), reduced_tx_set); 3876 const int bd_idx = (xd->bd == 8) ? 0 : ((xd->bd == 10) ? 1 : 2); 3877 const uint32_t max_qcoef_thresh = skip_pred_threshold[bd_idx][bsize]; 3878 const int16_t *src_diff = x->plane[0].src_diff; 3879 const int n_coeff = tx_w * tx_h; 3880 const int16_t ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd); 3881 const uint32_t dc_thresh = max_qcoef_thresh * dc_q; 3882 const uint32_t ac_thresh = max_qcoef_thresh * ac_q; 3883 for (int row = 0; row < bh; row += tx_h) { 3884 for (int col = 0; col < bw; col += tx_w) { 3885 av1_fwd_txfm(src_diff + col, coefs, bw, ¶m); 3886 // Operating on TX domain, not pixels; we want the QTX quantizers 3887 const uint32_t dc_coef = (((uint32_t)abs(coefs[0])) << 7); 3888 if (dc_coef >= dc_thresh) return 0; 3889 for (int i = 1; i < n_coeff; ++i) { 3890 const uint32_t ac_coef = (((uint32_t)abs(coefs[i])) << 7); 3891 if (ac_coef >= ac_thresh) return 0; 3892 } 3893 } 3894 src_diff += tx_h * bw; 3895 } 3896 return 1; 3897 } 3898 3899 // Used to set proper context for early termination with skip = 1. 3900 static void set_skip_flag(MACROBLOCK *x, RD_STATS *rd_stats, int bsize, 3901 int64_t dist) { 3902 MACROBLOCKD *const xd = &x->e_mbd; 3903 MB_MODE_INFO *const mbmi = xd->mi[0]; 3904 const int n4 = bsize_to_num_blk(bsize); 3905 const TX_SIZE tx_size = max_txsize_rect_lookup[bsize]; 3906 memset(mbmi->txk_type, DCT_DCT, sizeof(mbmi->txk_type[0]) * TXK_TYPE_BUF_LEN); 3907 memset(mbmi->inter_tx_size, tx_size, sizeof(mbmi->inter_tx_size)); 3908 mbmi->tx_size = tx_size; 3909 for (int i = 0; i < n4; ++i) set_blk_skip(x, 0, i, 1); 3910 rd_stats->skip = 1; 3911 if (is_cur_buf_hbd(xd)) dist = ROUND_POWER_OF_TWO(dist, (xd->bd - 8) * 2); 3912 rd_stats->dist = rd_stats->sse = (dist << 4); 3913 // Though decision is to make the block as skip based on luma stats, 3914 // it is possible that block becomes non skip after chroma rd. In addition 3915 // intermediate non skip costs calculated by caller function will be 3916 // incorrect, if rate is set as zero (i.e., if zero_blk_rate is not 3917 // accounted). Hence intermediate rate is populated to code the luma tx blks 3918 // as skip, the caller function based on final rd decision (i.e., skip vs 3919 // non-skip) sets the final rate accordingly. Here the rate populated 3920 // corresponds to coding all the tx blocks with zero_blk_rate (based on max tx 3921 // size possible) in the current block. Eg: For 128*128 block, rate would be 3922 // 4 * zero_blk_rate where zero_blk_rate corresponds to coding of one 64x64 tx 3923 // block as 'all zeros' 3924 ENTROPY_CONTEXT ctxa[MAX_MIB_SIZE]; 3925 ENTROPY_CONTEXT ctxl[MAX_MIB_SIZE]; 3926 av1_get_entropy_contexts(bsize, &xd->plane[0], ctxa, ctxl); 3927 ENTROPY_CONTEXT *ta = ctxa; 3928 ENTROPY_CONTEXT *tl = ctxl; 3929 const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); 3930 TXB_CTX txb_ctx; 3931 get_txb_ctx(bsize, tx_size, 0, ta, tl, &txb_ctx); 3932 const int zero_blk_rate = x->coeff_costs[txs_ctx][PLANE_TYPE_Y] 3933 .txb_skip_cost[txb_ctx.txb_skip_ctx][1]; 3934 rd_stats->rate = zero_blk_rate * 3935 (block_size_wide[bsize] >> tx_size_wide_log2[tx_size]) * 3936 (block_size_high[bsize] >> tx_size_high_log2[tx_size]); 3937 } 3938 3939 static INLINE uint32_t get_block_residue_hash(MACROBLOCK *x, BLOCK_SIZE bsize) { 3940 const int rows = block_size_high[bsize]; 3941 const int cols = block_size_wide[bsize]; 3942 const int16_t *diff = x->plane[0].src_diff; 3943 const uint32_t hash = av1_get_crc32c_value(&x->mb_rd_record.crc_calculator, 3944 (uint8_t *)diff, 2 * rows * cols); 3945 return (hash << 5) + bsize; 3946 } 3947 3948 static void save_tx_rd_info(int n4, uint32_t hash, const MACROBLOCK *const x, 3949 const RD_STATS *const rd_stats, 3950 MB_RD_RECORD *tx_rd_record) { 3951 int index; 3952 if (tx_rd_record->num < RD_RECORD_BUFFER_LEN) { 3953 index = 3954 (tx_rd_record->index_start + tx_rd_record->num) % RD_RECORD_BUFFER_LEN; 3955 ++tx_rd_record->num; 3956 } else { 3957 index = tx_rd_record->index_start; 3958 tx_rd_record->index_start = 3959 (tx_rd_record->index_start + 1) % RD_RECORD_BUFFER_LEN; 3960 } 3961 MB_RD_INFO *const tx_rd_info = &tx_rd_record->tx_rd_info[index]; 3962 const MACROBLOCKD *const xd = &x->e_mbd; 3963 const MB_MODE_INFO *const mbmi = xd->mi[0]; 3964 tx_rd_info->hash_value = hash; 3965 tx_rd_info->tx_size = mbmi->tx_size; 3966 memcpy(tx_rd_info->blk_skip, x->blk_skip, 3967 sizeof(tx_rd_info->blk_skip[0]) * n4); 3968 av1_copy(tx_rd_info->inter_tx_size, mbmi->inter_tx_size); 3969 av1_copy(tx_rd_info->txk_type, mbmi->txk_type); 3970 tx_rd_info->rd_stats = *rd_stats; 3971 } 3972 3973 static void fetch_tx_rd_info(int n4, const MB_RD_INFO *const tx_rd_info, 3974 RD_STATS *const rd_stats, MACROBLOCK *const x) { 3975 MACROBLOCKD *const xd = &x->e_mbd; 3976 MB_MODE_INFO *const mbmi = xd->mi[0]; 3977 mbmi->tx_size = tx_rd_info->tx_size; 3978 memcpy(x->blk_skip, tx_rd_info->blk_skip, 3979 sizeof(tx_rd_info->blk_skip[0]) * n4); 3980 av1_copy(mbmi->inter_tx_size, tx_rd_info->inter_tx_size); 3981 av1_copy(mbmi->txk_type, tx_rd_info->txk_type); 3982 *rd_stats = tx_rd_info->rd_stats; 3983 } 3984 3985 static INLINE int32_t find_mb_rd_info(const MB_RD_RECORD *const mb_rd_record, 3986 const int64_t ref_best_rd, 3987 const uint32_t hash) { 3988 int32_t match_index = -1; 3989 if (ref_best_rd != INT64_MAX) { 3990 for (int i = 0; i < mb_rd_record->num; ++i) { 3991 const int index = (mb_rd_record->index_start + i) % RD_RECORD_BUFFER_LEN; 3992 // If there is a match in the tx_rd_record, fetch the RD decision and 3993 // terminate early. 3994 if (mb_rd_record->tx_rd_info[index].hash_value == hash) { 3995 match_index = index; 3996 break; 3997 } 3998 } 3999 } 4000 return match_index; 4001 } 4002 4003 static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, 4004 RD_STATS *rd_stats, BLOCK_SIZE bs, 4005 int64_t ref_best_rd) { 4006 MACROBLOCKD *xd = &x->e_mbd; 4007 av1_init_rd_stats(rd_stats); 4008 int is_inter = is_inter_block(xd->mi[0]); 4009 assert(bs == xd->mi[0]->sb_type); 4010 4011 const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2); 4012 const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2); 4013 4014 uint32_t hash = 0; 4015 int32_t match_index = -1; 4016 MB_RD_RECORD *mb_rd_record = NULL; 4017 const int within_border = mi_row >= xd->tile.mi_row_start && 4018 (mi_row + mi_size_high[bs] < xd->tile.mi_row_end) && 4019 mi_col >= xd->tile.mi_col_start && 4020 (mi_col + mi_size_wide[bs] < xd->tile.mi_col_end); 4021 const int is_mb_rd_hash_enabled = 4022 (within_border && cpi->sf.use_mb_rd_hash && is_inter); 4023 const int n4 = bsize_to_num_blk(bs); 4024 if (is_mb_rd_hash_enabled) { 4025 hash = get_block_residue_hash(x, bs); 4026 mb_rd_record = &x->mb_rd_record; 4027 match_index = find_mb_rd_info(mb_rd_record, ref_best_rd, hash); 4028 if (match_index != -1) { 4029 MB_RD_INFO *tx_rd_info = &mb_rd_record->tx_rd_info[match_index]; 4030 fetch_tx_rd_info(n4, tx_rd_info, rd_stats, x); 4031 // Reset the pruning flags. 4032 av1_zero(x->tx_search_prune); 4033 x->tx_split_prune_flag = 0; 4034 return; 4035 } 4036 } 4037 4038 // If we predict that skip is the optimal RD decision - set the respective 4039 // context and terminate early. 4040 int64_t dist; 4041 4042 if (cpi->sf.tx_type_search.use_skip_flag_prediction && is_inter && 4043 (!xd->lossless[xd->mi[0]->segment_id]) && 4044 predict_skip_flag(x, bs, &dist, cpi->common.reduced_tx_set_used)) { 4045 // Populate rdstats as per skip decision 4046 set_skip_flag(x, rd_stats, bs, dist); 4047 // Save the RD search results into tx_rd_record. 4048 if (is_mb_rd_hash_enabled) 4049 save_tx_rd_info(n4, hash, x, rd_stats, mb_rd_record); 4050 // Reset the pruning flags. 4051 av1_zero(x->tx_search_prune); 4052 x->tx_split_prune_flag = 0; 4053 return; 4054 } 4055 4056 if (xd->lossless[xd->mi[0]->segment_id]) { 4057 choose_smallest_tx_size(cpi, x, rd_stats, ref_best_rd, bs); 4058 } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL) { 4059 choose_largest_tx_size(cpi, x, rd_stats, ref_best_rd, bs); 4060 } else { 4061 choose_tx_size_type_from_rd(cpi, x, rd_stats, ref_best_rd, bs); 4062 } 4063 4064 // Save the RD search results into tx_rd_record. 4065 if (is_mb_rd_hash_enabled) { 4066 assert(mb_rd_record != NULL); 4067 save_tx_rd_info(n4, hash, x, rd_stats, mb_rd_record); 4068 } 4069 } 4070 4071 // Return the rate cost for luma prediction mode info. of intra blocks. 4072 static int intra_mode_info_cost_y(const AV1_COMP *cpi, const MACROBLOCK *x, 4073 const MB_MODE_INFO *mbmi, BLOCK_SIZE bsize, 4074 int mode_cost) { 4075 int total_rate = mode_cost; 4076 const int use_palette = mbmi->palette_mode_info.palette_size[0] > 0; 4077 const int use_filter_intra = mbmi->filter_intra_mode_info.use_filter_intra; 4078 const int use_intrabc = mbmi->use_intrabc; 4079 // Can only activate one mode. 4080 assert(((mbmi->mode != DC_PRED) + use_palette + use_intrabc + 4081 use_filter_intra) <= 1); 4082 const int try_palette = 4083 av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type); 4084 if (try_palette && mbmi->mode == DC_PRED) { 4085 const MACROBLOCKD *xd = &x->e_mbd; 4086 const int bsize_ctx = av1_get_palette_bsize_ctx(bsize); 4087 const int mode_ctx = av1_get_palette_mode_ctx(xd); 4088 total_rate += x->palette_y_mode_cost[bsize_ctx][mode_ctx][use_palette]; 4089 if (use_palette) { 4090 const uint8_t *const color_map = xd->plane[0].color_index_map; 4091 int block_width, block_height, rows, cols; 4092 av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows, 4093 &cols); 4094 const int plt_size = mbmi->palette_mode_info.palette_size[0]; 4095 int palette_mode_cost = 4096 x->palette_y_size_cost[bsize_ctx][plt_size - PALETTE_MIN_SIZE] + 4097 write_uniform_cost(plt_size, color_map[0]); 4098 uint16_t color_cache[2 * PALETTE_MAX_SIZE]; 4099 const int n_cache = av1_get_palette_cache(xd, 0, color_cache); 4100 palette_mode_cost += 4101 av1_palette_color_cost_y(&mbmi->palette_mode_info, color_cache, 4102 n_cache, cpi->common.seq_params.bit_depth); 4103 palette_mode_cost += 4104 av1_cost_color_map(x, 0, bsize, mbmi->tx_size, PALETTE_MAP); 4105 total_rate += palette_mode_cost; 4106 } 4107 } 4108 if (av1_filter_intra_allowed(&cpi->common, mbmi)) { 4109 total_rate += x->filter_intra_cost[mbmi->sb_type][use_filter_intra]; 4110 if (use_filter_intra) { 4111 total_rate += x->filter_intra_mode_cost[mbmi->filter_intra_mode_info 4112 .filter_intra_mode]; 4113 } 4114 } 4115 if (av1_is_directional_mode(mbmi->mode)) { 4116 if (av1_use_angle_delta(bsize)) { 4117 total_rate += x->angle_delta_cost[mbmi->mode - V_PRED] 4118 [MAX_ANGLE_DELTA + 4119 mbmi->angle_delta[PLANE_TYPE_Y]]; 4120 } 4121 } 4122 if (av1_allow_intrabc(&cpi->common)) 4123 total_rate += x->intrabc_cost[use_intrabc]; 4124 return total_rate; 4125 } 4126 4127 // Return the rate cost for chroma prediction mode info. of intra blocks. 4128 static int intra_mode_info_cost_uv(const AV1_COMP *cpi, const MACROBLOCK *x, 4129 const MB_MODE_INFO *mbmi, BLOCK_SIZE bsize, 4130 int mode_cost) { 4131 int total_rate = mode_cost; 4132 const int use_palette = mbmi->palette_mode_info.palette_size[1] > 0; 4133 const UV_PREDICTION_MODE mode = mbmi->uv_mode; 4134 // Can only activate one mode. 4135 assert(((mode != UV_DC_PRED) + use_palette + mbmi->use_intrabc) <= 1); 4136 4137 const int try_palette = 4138 av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type); 4139 if (try_palette && mode == UV_DC_PRED) { 4140 const PALETTE_MODE_INFO *pmi = &mbmi->palette_mode_info; 4141 total_rate += 4142 x->palette_uv_mode_cost[pmi->palette_size[0] > 0][use_palette]; 4143 if (use_palette) { 4144 const int bsize_ctx = av1_get_palette_bsize_ctx(bsize); 4145 const int plt_size = pmi->palette_size[1]; 4146 const MACROBLOCKD *xd = &x->e_mbd; 4147 const uint8_t *const color_map = xd->plane[1].color_index_map; 4148 int palette_mode_cost = 4149 x->palette_uv_size_cost[bsize_ctx][plt_size - PALETTE_MIN_SIZE] + 4150 write_uniform_cost(plt_size, color_map[0]); 4151 uint16_t color_cache[2 * PALETTE_MAX_SIZE]; 4152 const int n_cache = av1_get_palette_cache(xd, 1, color_cache); 4153 palette_mode_cost += av1_palette_color_cost_uv( 4154 pmi, color_cache, n_cache, cpi->common.seq_params.bit_depth); 4155 palette_mode_cost += 4156 av1_cost_color_map(x, 1, bsize, mbmi->tx_size, PALETTE_MAP); 4157 total_rate += palette_mode_cost; 4158 } 4159 } 4160 if (av1_is_directional_mode(get_uv_mode(mode))) { 4161 if (av1_use_angle_delta(bsize)) { 4162 total_rate += 4163 x->angle_delta_cost[mode - V_PRED][mbmi->angle_delta[PLANE_TYPE_UV] + 4164 MAX_ANGLE_DELTA]; 4165 } 4166 } 4167 return total_rate; 4168 } 4169 4170 static int conditional_skipintra(PREDICTION_MODE mode, 4171 PREDICTION_MODE best_intra_mode) { 4172 if (mode == D113_PRED && best_intra_mode != V_PRED && 4173 best_intra_mode != D135_PRED) 4174 return 1; 4175 if (mode == D67_PRED && best_intra_mode != V_PRED && 4176 best_intra_mode != D45_PRED) 4177 return 1; 4178 if (mode == D203_PRED && best_intra_mode != H_PRED && 4179 best_intra_mode != D45_PRED) 4180 return 1; 4181 if (mode == D157_PRED && best_intra_mode != H_PRED && 4182 best_intra_mode != D135_PRED) 4183 return 1; 4184 return 0; 4185 } 4186 4187 // Model based RD estimation for luma intra blocks. 4188 static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x, 4189 BLOCK_SIZE bsize, int mode_cost, int mi_row, 4190 int mi_col) { 4191 const AV1_COMMON *cm = &cpi->common; 4192 MACROBLOCKD *const xd = &x->e_mbd; 4193 MB_MODE_INFO *const mbmi = xd->mi[0]; 4194 assert(!is_inter_block(mbmi)); 4195 RD_STATS this_rd_stats; 4196 int row, col; 4197 int64_t temp_sse, this_rd; 4198 TX_SIZE tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode); 4199 const int stepr = tx_size_high_unit[tx_size]; 4200 const int stepc = tx_size_wide_unit[tx_size]; 4201 const int max_blocks_wide = max_block_wide(xd, bsize, 0); 4202 const int max_blocks_high = max_block_high(xd, bsize, 0); 4203 mbmi->tx_size = tx_size; 4204 // Prediction. 4205 for (row = 0; row < max_blocks_high; row += stepr) { 4206 for (col = 0; col < max_blocks_wide; col += stepc) { 4207 av1_predict_intra_block_facade(cm, xd, 0, col, row, tx_size); 4208 } 4209 } 4210 // RD estimation. 4211 model_rd_sb_fn[MODELRD_TYPE_INTRA]( 4212 cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &this_rd_stats.rate, 4213 &this_rd_stats.dist, &this_rd_stats.skip, &temp_sse, NULL, NULL, NULL); 4214 if (av1_is_directional_mode(mbmi->mode) && av1_use_angle_delta(bsize)) { 4215 mode_cost += 4216 x->angle_delta_cost[mbmi->mode - V_PRED] 4217 [MAX_ANGLE_DELTA + mbmi->angle_delta[PLANE_TYPE_Y]]; 4218 } 4219 if (mbmi->mode == DC_PRED && 4220 av1_filter_intra_allowed_bsize(cm, mbmi->sb_type)) { 4221 if (mbmi->filter_intra_mode_info.use_filter_intra) { 4222 const int mode = mbmi->filter_intra_mode_info.filter_intra_mode; 4223 mode_cost += x->filter_intra_cost[mbmi->sb_type][1] + 4224 x->filter_intra_mode_cost[mode]; 4225 } else { 4226 mode_cost += x->filter_intra_cost[mbmi->sb_type][0]; 4227 } 4228 } 4229 this_rd = 4230 RDCOST(x->rdmult, this_rd_stats.rate + mode_cost, this_rd_stats.dist); 4231 return this_rd; 4232 } 4233 4234 // Extends 'color_map' array from 'orig_width x orig_height' to 'new_width x 4235 // new_height'. Extra rows and columns are filled in by copying last valid 4236 // row/column. 4237 static void extend_palette_color_map(uint8_t *const color_map, int orig_width, 4238 int orig_height, int new_width, 4239 int new_height) { 4240 int j; 4241 assert(new_width >= orig_width); 4242 assert(new_height >= orig_height); 4243 if (new_width == orig_width && new_height == orig_height) return; 4244 4245 for (j = orig_height - 1; j >= 0; --j) { 4246 memmove(color_map + j * new_width, color_map + j * orig_width, orig_width); 4247 // Copy last column to extra columns. 4248 memset(color_map + j * new_width + orig_width, 4249 color_map[j * new_width + orig_width - 1], new_width - orig_width); 4250 } 4251 // Copy last row to extra rows. 4252 for (j = orig_height; j < new_height; ++j) { 4253 memcpy(color_map + j * new_width, color_map + (orig_height - 1) * new_width, 4254 new_width); 4255 } 4256 } 4257 4258 // Bias toward using colors in the cache. 4259 // TODO(huisu): Try other schemes to improve compression. 4260 static void optimize_palette_colors(uint16_t *color_cache, int n_cache, 4261 int n_colors, int stride, int *centroids) { 4262 if (n_cache <= 0) return; 4263 for (int i = 0; i < n_colors * stride; i += stride) { 4264 int min_diff = abs(centroids[i] - (int)color_cache[0]); 4265 int idx = 0; 4266 for (int j = 1; j < n_cache; ++j) { 4267 const int this_diff = abs(centroids[i] - color_cache[j]); 4268 if (this_diff < min_diff) { 4269 min_diff = this_diff; 4270 idx = j; 4271 } 4272 } 4273 if (min_diff <= 1) centroids[i] = color_cache[idx]; 4274 } 4275 } 4276 4277 // Given the base colors as specified in centroids[], calculate the RD cost 4278 // of palette mode. 4279 static void palette_rd_y(const AV1_COMP *const cpi, MACROBLOCK *x, 4280 MB_MODE_INFO *mbmi, BLOCK_SIZE bsize, int mi_row, 4281 int mi_col, int dc_mode_cost, const int *data, 4282 int *centroids, int n, uint16_t *color_cache, 4283 int n_cache, MB_MODE_INFO *best_mbmi, 4284 uint8_t *best_palette_color_map, int64_t *best_rd, 4285 int64_t *best_model_rd, int *rate, int *rate_tokenonly, 4286 int *rate_overhead, int64_t *distortion, 4287 int *skippable, PICK_MODE_CONTEXT *ctx, 4288 uint8_t *blk_skip) { 4289 optimize_palette_colors(color_cache, n_cache, n, 1, centroids); 4290 int k = av1_remove_duplicates(centroids, n); 4291 if (k < PALETTE_MIN_SIZE) { 4292 // Too few unique colors to create a palette. And DC_PRED will work 4293 // well for that case anyway. So skip. 4294 return; 4295 } 4296 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; 4297 if (cpi->common.seq_params.use_highbitdepth) 4298 for (int i = 0; i < k; ++i) 4299 pmi->palette_colors[i] = clip_pixel_highbd( 4300 (int)centroids[i], cpi->common.seq_params.bit_depth); 4301 else 4302 for (int i = 0; i < k; ++i) 4303 pmi->palette_colors[i] = clip_pixel(centroids[i]); 4304 pmi->palette_size[0] = k; 4305 MACROBLOCKD *const xd = &x->e_mbd; 4306 uint8_t *const color_map = xd->plane[0].color_index_map; 4307 int block_width, block_height, rows, cols; 4308 av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows, 4309 &cols); 4310 av1_calc_indices(data, centroids, color_map, rows * cols, k, 1); 4311 extend_palette_color_map(color_map, cols, rows, block_width, block_height); 4312 const int palette_mode_cost = 4313 intra_mode_info_cost_y(cpi, x, mbmi, bsize, dc_mode_cost); 4314 int64_t this_model_rd = 4315 intra_model_yrd(cpi, x, bsize, palette_mode_cost, mi_row, mi_col); 4316 if (*best_model_rd != INT64_MAX && 4317 this_model_rd > *best_model_rd + (*best_model_rd >> 1)) 4318 return; 4319 if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd; 4320 RD_STATS tokenonly_rd_stats; 4321 super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd); 4322 if (tokenonly_rd_stats.rate == INT_MAX) return; 4323 int this_rate = tokenonly_rd_stats.rate + palette_mode_cost; 4324 int64_t this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); 4325 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->sb_type)) { 4326 tokenonly_rd_stats.rate -= 4327 tx_size_cost(&cpi->common, x, bsize, mbmi->tx_size); 4328 } 4329 if (this_rd < *best_rd) { 4330 *best_rd = this_rd; 4331 memcpy(best_palette_color_map, color_map, 4332 block_width * block_height * sizeof(color_map[0])); 4333 *best_mbmi = *mbmi; 4334 memcpy(blk_skip, x->blk_skip, sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); 4335 *rate_overhead = this_rate - tokenonly_rd_stats.rate; 4336 if (rate) *rate = this_rate; 4337 if (rate_tokenonly) *rate_tokenonly = tokenonly_rd_stats.rate; 4338 if (distortion) *distortion = tokenonly_rd_stats.dist; 4339 if (skippable) *skippable = tokenonly_rd_stats.skip; 4340 } 4341 } 4342 4343 static int rd_pick_palette_intra_sby( 4344 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, 4345 int mi_col, int dc_mode_cost, MB_MODE_INFO *best_mbmi, 4346 uint8_t *best_palette_color_map, int64_t *best_rd, int64_t *best_model_rd, 4347 int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable, 4348 PICK_MODE_CONTEXT *ctx, uint8_t *best_blk_skip) { 4349 int rate_overhead = 0; 4350 MACROBLOCKD *const xd = &x->e_mbd; 4351 MB_MODE_INFO *const mbmi = xd->mi[0]; 4352 assert(!is_inter_block(mbmi)); 4353 assert(av1_allow_palette(cpi->common.allow_screen_content_tools, bsize)); 4354 const SequenceHeader *const seq_params = &cpi->common.seq_params; 4355 int colors, n; 4356 const int src_stride = x->plane[0].src.stride; 4357 const uint8_t *const src = x->plane[0].src.buf; 4358 uint8_t *const color_map = xd->plane[0].color_index_map; 4359 int block_width, block_height, rows, cols; 4360 av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows, 4361 &cols); 4362 4363 int count_buf[1 << 12]; // Maximum (1 << 12) color levels. 4364 if (seq_params->use_highbitdepth) 4365 colors = av1_count_colors_highbd(src, src_stride, rows, cols, 4366 seq_params->bit_depth, count_buf); 4367 else 4368 colors = av1_count_colors(src, src_stride, rows, cols, count_buf); 4369 mbmi->filter_intra_mode_info.use_filter_intra = 0; 4370 4371 if (colors > 1 && colors <= 64) { 4372 int r, c, i; 4373 const int max_itr = 50; 4374 int *const data = x->palette_buffer->kmeans_data_buf; 4375 int centroids[PALETTE_MAX_SIZE]; 4376 int lb, ub, val; 4377 uint16_t *src16 = CONVERT_TO_SHORTPTR(src); 4378 if (seq_params->use_highbitdepth) 4379 lb = ub = src16[0]; 4380 else 4381 lb = ub = src[0]; 4382 4383 if (seq_params->use_highbitdepth) { 4384 for (r = 0; r < rows; ++r) { 4385 for (c = 0; c < cols; ++c) { 4386 val = src16[r * src_stride + c]; 4387 data[r * cols + c] = val; 4388 if (val < lb) 4389 lb = val; 4390 else if (val > ub) 4391 ub = val; 4392 } 4393 } 4394 } else { 4395 for (r = 0; r < rows; ++r) { 4396 for (c = 0; c < cols; ++c) { 4397 val = src[r * src_stride + c]; 4398 data[r * cols + c] = val; 4399 if (val < lb) 4400 lb = val; 4401 else if (val > ub) 4402 ub = val; 4403 } 4404 } 4405 } 4406 4407 mbmi->mode = DC_PRED; 4408 mbmi->filter_intra_mode_info.use_filter_intra = 0; 4409 4410 uint16_t color_cache[2 * PALETTE_MAX_SIZE]; 4411 const int n_cache = av1_get_palette_cache(xd, 0, color_cache); 4412 4413 // Find the dominant colors, stored in top_colors[]. 4414 int top_colors[PALETTE_MAX_SIZE] = { 0 }; 4415 for (i = 0; i < AOMMIN(colors, PALETTE_MAX_SIZE); ++i) { 4416 int max_count = 0; 4417 for (int j = 0; j < (1 << seq_params->bit_depth); ++j) { 4418 if (count_buf[j] > max_count) { 4419 max_count = count_buf[j]; 4420 top_colors[i] = j; 4421 } 4422 } 4423 assert(max_count > 0); 4424 count_buf[top_colors[i]] = 0; 4425 } 4426 4427 // Try the dominant colors directly. 4428 // TODO(huisu (at) google.com): Try to avoid duplicate computation in cases 4429 // where the dominant colors and the k-means results are similar. 4430 for (n = AOMMIN(colors, PALETTE_MAX_SIZE); n >= 2; --n) { 4431 for (i = 0; i < n; ++i) centroids[i] = top_colors[i]; 4432 palette_rd_y(cpi, x, mbmi, bsize, mi_row, mi_col, dc_mode_cost, data, 4433 centroids, n, color_cache, n_cache, best_mbmi, 4434 best_palette_color_map, best_rd, best_model_rd, rate, 4435 rate_tokenonly, &rate_overhead, distortion, skippable, ctx, 4436 best_blk_skip); 4437 } 4438 4439 // K-means clustering. 4440 for (n = AOMMIN(colors, PALETTE_MAX_SIZE); n >= 2; --n) { 4441 if (colors == PALETTE_MIN_SIZE) { 4442 // Special case: These colors automatically become the centroids. 4443 assert(colors == n); 4444 assert(colors == 2); 4445 centroids[0] = lb; 4446 centroids[1] = ub; 4447 } else { 4448 for (i = 0; i < n; ++i) { 4449 centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2; 4450 } 4451 av1_k_means(data, centroids, color_map, rows * cols, n, 1, max_itr); 4452 } 4453 palette_rd_y(cpi, x, mbmi, bsize, mi_row, mi_col, dc_mode_cost, data, 4454 centroids, n, color_cache, n_cache, best_mbmi, 4455 best_palette_color_map, best_rd, best_model_rd, rate, 4456 rate_tokenonly, &rate_overhead, distortion, skippable, ctx, 4457 best_blk_skip); 4458 } 4459 } 4460 4461 if (best_mbmi->palette_mode_info.palette_size[0] > 0) { 4462 memcpy(color_map, best_palette_color_map, 4463 block_width * block_height * sizeof(best_palette_color_map[0])); 4464 } 4465 *mbmi = *best_mbmi; 4466 return rate_overhead; 4467 } 4468 4469 // Return 1 if an filter intra mode is selected; return 0 otherwise. 4470 static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x, 4471 int mi_row, int mi_col, int *rate, 4472 int *rate_tokenonly, int64_t *distortion, 4473 int *skippable, BLOCK_SIZE bsize, 4474 int mode_cost, int64_t *best_rd, 4475 int64_t *best_model_rd, 4476 PICK_MODE_CONTEXT *ctx) { 4477 MACROBLOCKD *const xd = &x->e_mbd; 4478 MB_MODE_INFO *mbmi = xd->mi[0]; 4479 int filter_intra_selected_flag = 0; 4480 FILTER_INTRA_MODE mode; 4481 TX_SIZE best_tx_size = TX_8X8; 4482 FILTER_INTRA_MODE_INFO filter_intra_mode_info; 4483 TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN]; 4484 (void)ctx; 4485 av1_zero(filter_intra_mode_info); 4486 mbmi->filter_intra_mode_info.use_filter_intra = 1; 4487 mbmi->mode = DC_PRED; 4488 mbmi->palette_mode_info.palette_size[0] = 0; 4489 4490 for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) { 4491 int64_t this_rd, this_model_rd; 4492 RD_STATS tokenonly_rd_stats; 4493 mbmi->filter_intra_mode_info.filter_intra_mode = mode; 4494 this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost, mi_row, mi_col); 4495 if (*best_model_rd != INT64_MAX && 4496 this_model_rd > *best_model_rd + (*best_model_rd >> 1)) 4497 continue; 4498 if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd; 4499 super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd); 4500 if (tokenonly_rd_stats.rate == INT_MAX) continue; 4501 const int this_rate = 4502 tokenonly_rd_stats.rate + 4503 intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost); 4504 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); 4505 4506 if (this_rd < *best_rd) { 4507 *best_rd = this_rd; 4508 best_tx_size = mbmi->tx_size; 4509 filter_intra_mode_info = mbmi->filter_intra_mode_info; 4510 memcpy(best_txk_type, mbmi->txk_type, 4511 sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN); 4512 memcpy(ctx->blk_skip, x->blk_skip, 4513 sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); 4514 *rate = this_rate; 4515 *rate_tokenonly = tokenonly_rd_stats.rate; 4516 *distortion = tokenonly_rd_stats.dist; 4517 *skippable = tokenonly_rd_stats.skip; 4518 filter_intra_selected_flag = 1; 4519 } 4520 } 4521 4522 if (filter_intra_selected_flag) { 4523 mbmi->mode = DC_PRED; 4524 mbmi->tx_size = best_tx_size; 4525 mbmi->filter_intra_mode_info = filter_intra_mode_info; 4526 memcpy(mbmi->txk_type, best_txk_type, 4527 sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN); 4528 return 1; 4529 } else { 4530 return 0; 4531 } 4532 } 4533 4534 // Run RD calculation with given luma intra prediction angle., and return 4535 // the RD cost. Update the best mode info. if the RD cost is the best so far. 4536 static int64_t calc_rd_given_intra_angle( 4537 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, 4538 int mi_col, int mode_cost, int64_t best_rd_in, int8_t angle_delta, 4539 int max_angle_delta, int *rate, RD_STATS *rd_stats, int *best_angle_delta, 4540 TX_SIZE *best_tx_size, int64_t *best_rd, int64_t *best_model_rd, 4541 TX_TYPE *best_txk_type, uint8_t *best_blk_skip) { 4542 RD_STATS tokenonly_rd_stats; 4543 int64_t this_rd, this_model_rd; 4544 MB_MODE_INFO *mbmi = x->e_mbd.mi[0]; 4545 const int n4 = bsize_to_num_blk(bsize); 4546 assert(!is_inter_block(mbmi)); 4547 mbmi->angle_delta[PLANE_TYPE_Y] = angle_delta; 4548 this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost, mi_row, mi_col); 4549 if (*best_model_rd != INT64_MAX && 4550 this_model_rd > *best_model_rd + (*best_model_rd >> 1)) 4551 return INT64_MAX; 4552 if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd; 4553 super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in); 4554 if (tokenonly_rd_stats.rate == INT_MAX) return INT64_MAX; 4555 4556 int this_rate = 4557 mode_cost + tokenonly_rd_stats.rate + 4558 x->angle_delta_cost[mbmi->mode - V_PRED][max_angle_delta + angle_delta]; 4559 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); 4560 4561 if (this_rd < *best_rd) { 4562 memcpy(best_txk_type, mbmi->txk_type, 4563 sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN); 4564 memcpy(best_blk_skip, x->blk_skip, sizeof(best_blk_skip[0]) * n4); 4565 *best_rd = this_rd; 4566 *best_angle_delta = mbmi->angle_delta[PLANE_TYPE_Y]; 4567 *best_tx_size = mbmi->tx_size; 4568 *rate = this_rate; 4569 rd_stats->rate = tokenonly_rd_stats.rate; 4570 rd_stats->dist = tokenonly_rd_stats.dist; 4571 rd_stats->skip = tokenonly_rd_stats.skip; 4572 } 4573 return this_rd; 4574 } 4575 4576 // With given luma directional intra prediction mode, pick the best angle delta 4577 // Return the RD cost corresponding to the best angle delta. 4578 static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x, 4579 int mi_row, int mi_col, int *rate, 4580 RD_STATS *rd_stats, BLOCK_SIZE bsize, 4581 int mode_cost, int64_t best_rd, 4582 int64_t *best_model_rd) { 4583 MB_MODE_INFO *mbmi = x->e_mbd.mi[0]; 4584 assert(!is_inter_block(mbmi)); 4585 4586 int best_angle_delta = 0; 4587 int64_t rd_cost[2 * (MAX_ANGLE_DELTA + 2)]; 4588 TX_SIZE best_tx_size = mbmi->tx_size; 4589 TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN]; 4590 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; 4591 4592 for (int i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX; 4593 4594 int first_try = 1; 4595 for (int angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) { 4596 for (int i = 0; i < 2; ++i) { 4597 const int64_t best_rd_in = 4598 (best_rd == INT64_MAX) ? INT64_MAX 4599 : (best_rd + (best_rd >> (first_try ? 3 : 5))); 4600 const int64_t this_rd = calc_rd_given_intra_angle( 4601 cpi, x, bsize, mi_row, mi_col, mode_cost, best_rd_in, 4602 (1 - 2 * i) * angle_delta, MAX_ANGLE_DELTA, rate, rd_stats, 4603 &best_angle_delta, &best_tx_size, &best_rd, best_model_rd, 4604 best_txk_type, best_blk_skip); 4605 rd_cost[2 * angle_delta + i] = this_rd; 4606 if (first_try && this_rd == INT64_MAX) return best_rd; 4607 first_try = 0; 4608 if (angle_delta == 0) { 4609 rd_cost[1] = this_rd; 4610 break; 4611 } 4612 } 4613 } 4614 4615 assert(best_rd != INT64_MAX); 4616 for (int angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) { 4617 for (int i = 0; i < 2; ++i) { 4618 int skip_search = 0; 4619 const int64_t rd_thresh = best_rd + (best_rd >> 5); 4620 if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh && 4621 rd_cost[2 * (angle_delta - 1) + i] > rd_thresh) 4622 skip_search = 1; 4623 if (!skip_search) { 4624 calc_rd_given_intra_angle(cpi, x, bsize, mi_row, mi_col, mode_cost, 4625 best_rd, (1 - 2 * i) * angle_delta, 4626 MAX_ANGLE_DELTA, rate, rd_stats, 4627 &best_angle_delta, &best_tx_size, &best_rd, 4628 best_model_rd, best_txk_type, best_blk_skip); 4629 } 4630 } 4631 } 4632 4633 if (rd_stats->rate != INT_MAX) { 4634 mbmi->tx_size = best_tx_size; 4635 mbmi->angle_delta[PLANE_TYPE_Y] = best_angle_delta; 4636 memcpy(mbmi->txk_type, best_txk_type, 4637 sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN); 4638 memcpy(x->blk_skip, best_blk_skip, 4639 sizeof(best_blk_skip[0]) * bsize_to_num_blk(bsize)); 4640 } 4641 return best_rd; 4642 } 4643 4644 // Indices are sign, integer, and fractional part of the gradient value 4645 static const uint8_t gradient_to_angle_bin[2][7][16] = { 4646 { 4647 { 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0 }, 4648 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 }, 4649 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, 4650 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, 4651 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, 4652 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, 4653 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, 4654 }, 4655 { 4656 { 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4 }, 4657 { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3 }, 4658 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 }, 4659 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 }, 4660 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 }, 4661 { 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, 4662 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, 4663 }, 4664 }; 4665 4666 /* clang-format off */ 4667 static const uint8_t mode_to_angle_bin[INTRA_MODES] = { 4668 0, 2, 6, 0, 4, 3, 5, 7, 1, 0, 4669 0, 4670 }; 4671 /* clang-format on */ 4672 4673 static void get_gradient_hist(const uint8_t *src, int src_stride, int rows, 4674 int cols, uint64_t *hist) { 4675 src += src_stride; 4676 for (int r = 1; r < rows; ++r) { 4677 for (int c = 1; c < cols; ++c) { 4678 int dx = src[c] - src[c - 1]; 4679 int dy = src[c] - src[c - src_stride]; 4680 int index; 4681 const int temp = dx * dx + dy * dy; 4682 if (dy == 0) { 4683 index = 2; 4684 } else { 4685 const int sn = (dx > 0) ^ (dy > 0); 4686 dx = abs(dx); 4687 dy = abs(dy); 4688 const int remd = (dx % dy) * 16 / dy; 4689 const int quot = dx / dy; 4690 index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)]; 4691 } 4692 hist[index] += temp; 4693 } 4694 src += src_stride; 4695 } 4696 } 4697 4698 static void get_highbd_gradient_hist(const uint8_t *src8, int src_stride, 4699 int rows, int cols, uint64_t *hist) { 4700 uint16_t *src = CONVERT_TO_SHORTPTR(src8); 4701 src += src_stride; 4702 for (int r = 1; r < rows; ++r) { 4703 for (int c = 1; c < cols; ++c) { 4704 int dx = src[c] - src[c - 1]; 4705 int dy = src[c] - src[c - src_stride]; 4706 int index; 4707 const int temp = dx * dx + dy * dy; 4708 if (dy == 0) { 4709 index = 2; 4710 } else { 4711 const int sn = (dx > 0) ^ (dy > 0); 4712 dx = abs(dx); 4713 dy = abs(dy); 4714 const int remd = (dx % dy) * 16 / dy; 4715 const int quot = dx / dy; 4716 index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)]; 4717 } 4718 hist[index] += temp; 4719 } 4720 src += src_stride; 4721 } 4722 } 4723 4724 static void angle_estimation(const uint8_t *src, int src_stride, int rows, 4725 int cols, BLOCK_SIZE bsize, int is_hbd, 4726 uint8_t *directional_mode_skip_mask) { 4727 // Check if angle_delta is used 4728 if (!av1_use_angle_delta(bsize)) return; 4729 4730 uint64_t hist[DIRECTIONAL_MODES] = { 0 }; 4731 if (is_hbd) 4732 get_highbd_gradient_hist(src, src_stride, rows, cols, hist); 4733 else 4734 get_gradient_hist(src, src_stride, rows, cols, hist); 4735 4736 int i; 4737 uint64_t hist_sum = 0; 4738 for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i]; 4739 for (i = 0; i < INTRA_MODES; ++i) { 4740 if (av1_is_directional_mode(i)) { 4741 const uint8_t angle_bin = mode_to_angle_bin[i]; 4742 uint64_t score = 2 * hist[angle_bin]; 4743 int weight = 2; 4744 if (angle_bin > 0) { 4745 score += hist[angle_bin - 1]; 4746 ++weight; 4747 } 4748 if (angle_bin < DIRECTIONAL_MODES - 1) { 4749 score += hist[angle_bin + 1]; 4750 ++weight; 4751 } 4752 const int thresh = 10; 4753 if (score * thresh < hist_sum * weight) directional_mode_skip_mask[i] = 1; 4754 } 4755 } 4756 } 4757 4758 // Given selected prediction mode, search for the best tx type and size. 4759 static void intra_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, 4760 BLOCK_SIZE bsize, const int *bmode_costs, 4761 int64_t *best_rd, int *rate, int *rate_tokenonly, 4762 int64_t *distortion, int *skippable, 4763 MB_MODE_INFO *best_mbmi, PICK_MODE_CONTEXT *ctx) { 4764 MACROBLOCKD *const xd = &x->e_mbd; 4765 MB_MODE_INFO *const mbmi = xd->mi[0]; 4766 RD_STATS rd_stats; 4767 super_block_yrd(cpi, x, &rd_stats, bsize, *best_rd); 4768 if (rd_stats.rate == INT_MAX) return; 4769 int this_rate_tokenonly = rd_stats.rate; 4770 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->sb_type)) { 4771 // super_block_yrd above includes the cost of the tx_size in the 4772 // tokenonly rate, but for intra blocks, tx_size is always coded 4773 // (prediction granularity), so we account for it in the full rate, 4774 // not the tokenonly rate. 4775 this_rate_tokenonly -= tx_size_cost(&cpi->common, x, bsize, mbmi->tx_size); 4776 } 4777 const int this_rate = 4778 rd_stats.rate + 4779 intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode]); 4780 const int64_t this_rd = RDCOST(x->rdmult, this_rate, rd_stats.dist); 4781 if (this_rd < *best_rd) { 4782 *best_mbmi = *mbmi; 4783 *best_rd = this_rd; 4784 *rate = this_rate; 4785 *rate_tokenonly = this_rate_tokenonly; 4786 *distortion = rd_stats.dist; 4787 *skippable = rd_stats.skip; 4788 memcpy(ctx->blk_skip, x->blk_skip, 4789 sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); 4790 } 4791 } 4792 4793 // This function is used only for intra_only frames 4794 static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, 4795 int mi_row, int mi_col, int *rate, 4796 int *rate_tokenonly, int64_t *distortion, 4797 int *skippable, BLOCK_SIZE bsize, 4798 int64_t best_rd, PICK_MODE_CONTEXT *ctx) { 4799 MACROBLOCKD *const xd = &x->e_mbd; 4800 MB_MODE_INFO *const mbmi = xd->mi[0]; 4801 assert(!is_inter_block(mbmi)); 4802 int64_t best_model_rd = INT64_MAX; 4803 const int rows = block_size_high[bsize]; 4804 const int cols = block_size_wide[bsize]; 4805 int is_directional_mode; 4806 uint8_t directional_mode_skip_mask[INTRA_MODES] = { 0 }; 4807 int beat_best_rd = 0; 4808 const int *bmode_costs; 4809 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; 4810 const int try_palette = 4811 cpi->oxcf.enable_palette && 4812 av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type); 4813 uint8_t *best_palette_color_map = 4814 try_palette ? x->palette_buffer->best_palette_color_map : NULL; 4815 const MB_MODE_INFO *above_mi = xd->above_mbmi; 4816 const MB_MODE_INFO *left_mi = xd->left_mbmi; 4817 const PREDICTION_MODE A = av1_above_block_mode(above_mi); 4818 const PREDICTION_MODE L = av1_left_block_mode(left_mi); 4819 const int above_ctx = intra_mode_context[A]; 4820 const int left_ctx = intra_mode_context[L]; 4821 bmode_costs = x->y_mode_costs[above_ctx][left_ctx]; 4822 4823 mbmi->angle_delta[PLANE_TYPE_Y] = 0; 4824 if (cpi->sf.intra_angle_estimation) { 4825 const int src_stride = x->plane[0].src.stride; 4826 const uint8_t *src = x->plane[0].src.buf; 4827 angle_estimation(src, src_stride, rows, cols, bsize, is_cur_buf_hbd(xd), 4828 directional_mode_skip_mask); 4829 } 4830 mbmi->filter_intra_mode_info.use_filter_intra = 0; 4831 pmi->palette_size[0] = 0; 4832 4833 if (cpi->sf.tx_type_search.fast_intra_tx_type_search) 4834 x->use_default_intra_tx_type = 1; 4835 else 4836 x->use_default_intra_tx_type = 0; 4837 4838 MB_MODE_INFO best_mbmi = *mbmi; 4839 /* Y Search for intra prediction mode */ 4840 for (int mode_idx = INTRA_MODE_START; mode_idx < INTRA_MODE_END; ++mode_idx) { 4841 RD_STATS this_rd_stats; 4842 int this_rate, this_rate_tokenonly, s; 4843 int64_t this_distortion, this_rd, this_model_rd; 4844 mbmi->mode = intra_rd_search_mode_order[mode_idx]; 4845 if ((!cpi->oxcf.enable_smooth_intra || cpi->sf.disable_smooth_intra) && 4846 (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED || 4847 mbmi->mode == SMOOTH_V_PRED)) 4848 continue; 4849 if (!cpi->oxcf.enable_paeth_intra && mbmi->mode == PAETH_PRED) continue; 4850 mbmi->angle_delta[PLANE_TYPE_Y] = 0; 4851 this_model_rd = 4852 intra_model_yrd(cpi, x, bsize, bmode_costs[mbmi->mode], mi_row, mi_col); 4853 if (best_model_rd != INT64_MAX && 4854 this_model_rd > best_model_rd + (best_model_rd >> 1)) 4855 continue; 4856 if (this_model_rd < best_model_rd) best_model_rd = this_model_rd; 4857 is_directional_mode = av1_is_directional_mode(mbmi->mode); 4858 if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue; 4859 if (is_directional_mode && av1_use_angle_delta(bsize) && 4860 cpi->oxcf.enable_angle_delta) { 4861 this_rd_stats.rate = INT_MAX; 4862 rd_pick_intra_angle_sby(cpi, x, mi_row, mi_col, &this_rate, 4863 &this_rd_stats, bsize, bmode_costs[mbmi->mode], 4864 best_rd, &best_model_rd); 4865 } else { 4866 super_block_yrd(cpi, x, &this_rd_stats, bsize, best_rd); 4867 } 4868 this_rate_tokenonly = this_rd_stats.rate; 4869 this_distortion = this_rd_stats.dist; 4870 s = this_rd_stats.skip; 4871 4872 if (this_rate_tokenonly == INT_MAX) continue; 4873 4874 if (!xd->lossless[mbmi->segment_id] && 4875 block_signals_txsize(mbmi->sb_type)) { 4876 // super_block_yrd above includes the cost of the tx_size in the 4877 // tokenonly rate, but for intra blocks, tx_size is always coded 4878 // (prediction granularity), so we account for it in the full rate, 4879 // not the tokenonly rate. 4880 this_rate_tokenonly -= 4881 tx_size_cost(&cpi->common, x, bsize, mbmi->tx_size); 4882 } 4883 this_rate = 4884 this_rd_stats.rate + 4885 intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode]); 4886 this_rd = RDCOST(x->rdmult, this_rate, this_distortion); 4887 if (this_rd < best_rd) { 4888 best_mbmi = *mbmi; 4889 best_rd = this_rd; 4890 beat_best_rd = 1; 4891 *rate = this_rate; 4892 *rate_tokenonly = this_rate_tokenonly; 4893 *distortion = this_distortion; 4894 *skippable = s; 4895 memcpy(ctx->blk_skip, x->blk_skip, 4896 sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); 4897 } 4898 } 4899 4900 if (try_palette) { 4901 rd_pick_palette_intra_sby( 4902 cpi, x, bsize, mi_row, mi_col, bmode_costs[DC_PRED], &best_mbmi, 4903 best_palette_color_map, &best_rd, &best_model_rd, rate, rate_tokenonly, 4904 distortion, skippable, ctx, ctx->blk_skip); 4905 } 4906 4907 if (beat_best_rd && av1_filter_intra_allowed_bsize(&cpi->common, bsize)) { 4908 if (rd_pick_filter_intra_sby( 4909 cpi, x, mi_row, mi_col, rate, rate_tokenonly, distortion, skippable, 4910 bsize, bmode_costs[DC_PRED], &best_rd, &best_model_rd, ctx)) { 4911 best_mbmi = *mbmi; 4912 } 4913 } 4914 4915 // If previous searches use only the default tx type, do an extra search for 4916 // the best tx type. 4917 if (x->use_default_intra_tx_type) { 4918 *mbmi = best_mbmi; 4919 x->use_default_intra_tx_type = 0; 4920 intra_block_yrd(cpi, x, bsize, bmode_costs, &best_rd, rate, rate_tokenonly, 4921 distortion, skippable, &best_mbmi, ctx); 4922 } 4923 4924 *mbmi = best_mbmi; 4925 return best_rd; 4926 } 4927 4928 // Return value 0: early termination triggered, no valid rd cost available; 4929 // 1: rd cost values are valid. 4930 static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x, 4931 RD_STATS *rd_stats, BLOCK_SIZE bsize, 4932 int64_t ref_best_rd) { 4933 MACROBLOCKD *const xd = &x->e_mbd; 4934 MB_MODE_INFO *const mbmi = xd->mi[0]; 4935 struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_U]; 4936 const TX_SIZE uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd); 4937 int plane; 4938 int is_cost_valid = 1; 4939 const int is_inter = is_inter_block(mbmi); 4940 int64_t this_rd = 0, skip_rd = 0; 4941 av1_init_rd_stats(rd_stats); 4942 4943 if (ref_best_rd < 0) is_cost_valid = 0; 4944 4945 if (x->skip_chroma_rd) return is_cost_valid; 4946 4947 bsize = scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y); 4948 4949 if (is_inter && is_cost_valid) { 4950 for (plane = 1; plane < MAX_MB_PLANE; ++plane) 4951 av1_subtract_plane(x, bsize, plane); 4952 } 4953 4954 if (is_cost_valid) { 4955 for (plane = 1; plane < MAX_MB_PLANE; ++plane) { 4956 RD_STATS pn_rd_stats; 4957 int64_t chroma_ref_best_rd = ref_best_rd; 4958 // For inter blocks, refined ref_best_rd is used for early exit 4959 // For intra blocks, even though current rd crosses ref_best_rd, early 4960 // exit is not recommended as current rd is used for gating subsequent 4961 // modes as well (say, for angular modes) 4962 // TODO(any): Extend the early exit mechanism for intra modes as well 4963 if (cpi->sf.perform_best_rd_based_gating_for_chroma && is_inter && 4964 chroma_ref_best_rd != INT64_MAX) 4965 chroma_ref_best_rd = ref_best_rd - AOMMIN(this_rd, skip_rd); 4966 txfm_rd_in_plane(x, cpi, &pn_rd_stats, chroma_ref_best_rd, 0, plane, 4967 bsize, uv_tx_size, cpi->sf.use_fast_coef_costing, 4968 FTXS_NONE, 0); 4969 if (pn_rd_stats.rate == INT_MAX) { 4970 is_cost_valid = 0; 4971 break; 4972 } 4973 av1_merge_rd_stats(rd_stats, &pn_rd_stats); 4974 this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); 4975 skip_rd = RDCOST(x->rdmult, 0, rd_stats->sse); 4976 if (AOMMIN(this_rd, skip_rd) > ref_best_rd) { 4977 is_cost_valid = 0; 4978 break; 4979 } 4980 } 4981 } 4982 4983 if (!is_cost_valid) { 4984 // reset cost value 4985 av1_invalid_rd_stats(rd_stats); 4986 } 4987 4988 return is_cost_valid; 4989 } 4990 4991 // Pick transform type for a transform block of tx_size. 4992 static void tx_type_rd(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, 4993 int blk_row, int blk_col, int plane, int block, 4994 int plane_bsize, TXB_CTX *txb_ctx, RD_STATS *rd_stats, 4995 FAST_TX_SEARCH_MODE ftxs_mode, int64_t ref_rdcost, 4996 TXB_RD_INFO *rd_info_array) { 4997 const struct macroblock_plane *const p = &x->plane[plane]; 4998 const uint16_t cur_joint_ctx = 4999 (txb_ctx->dc_sign_ctx << 8) + txb_ctx->txb_skip_ctx; 5000 const int txk_type_idx = 5001 av1_get_txk_type_index(plane_bsize, blk_row, blk_col); 5002 // Look up RD and terminate early in case when we've already processed exactly 5003 // the same residual with exactly the same entropy context. 5004 if (rd_info_array != NULL && rd_info_array->valid && 5005 rd_info_array->entropy_context == cur_joint_ctx) { 5006 if (plane == 0) 5007 x->e_mbd.mi[0]->txk_type[txk_type_idx] = rd_info_array->tx_type; 5008 const TX_TYPE ref_tx_type = 5009 av1_get_tx_type(get_plane_type(plane), &x->e_mbd, blk_row, blk_col, 5010 tx_size, cpi->common.reduced_tx_set_used); 5011 if (ref_tx_type == rd_info_array->tx_type) { 5012 rd_stats->rate += rd_info_array->rate; 5013 rd_stats->dist += rd_info_array->dist; 5014 rd_stats->sse += rd_info_array->sse; 5015 rd_stats->skip &= rd_info_array->eob == 0; 5016 p->eobs[block] = rd_info_array->eob; 5017 p->txb_entropy_ctx[block] = rd_info_array->txb_entropy_ctx; 5018 return; 5019 } 5020 } 5021 5022 RD_STATS this_rd_stats; 5023 search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, 5024 txb_ctx, ftxs_mode, 0, 0, ref_rdcost, &this_rd_stats); 5025 5026 av1_merge_rd_stats(rd_stats, &this_rd_stats); 5027 5028 // Save RD results for possible reuse in future. 5029 if (rd_info_array != NULL) { 5030 rd_info_array->valid = 1; 5031 rd_info_array->entropy_context = cur_joint_ctx; 5032 rd_info_array->rate = this_rd_stats.rate; 5033 rd_info_array->dist = this_rd_stats.dist; 5034 rd_info_array->sse = this_rd_stats.sse; 5035 rd_info_array->eob = p->eobs[block]; 5036 rd_info_array->txb_entropy_ctx = p->txb_entropy_ctx[block]; 5037 if (plane == 0) { 5038 rd_info_array->tx_type = x->e_mbd.mi[0]->txk_type[txk_type_idx]; 5039 } 5040 } 5041 } 5042 5043 static void get_mean_and_dev(const int16_t *data, int stride, int bw, int bh, 5044 float *mean, float *dev) { 5045 int x_sum = 0; 5046 uint64_t x2_sum = 0; 5047 for (int i = 0; i < bh; ++i) { 5048 for (int j = 0; j < bw; ++j) { 5049 const int val = data[j]; 5050 x_sum += val; 5051 x2_sum += val * val; 5052 } 5053 data += stride; 5054 } 5055 5056 const int num = bw * bh; 5057 const float e_x = (float)x_sum / num; 5058 const float e_x2 = (float)((double)x2_sum / num); 5059 const float diff = e_x2 - e_x * e_x; 5060 *dev = (diff > 0) ? sqrtf(diff) : 0; 5061 *mean = e_x; 5062 } 5063 5064 static void get_mean_and_dev_float(const float *data, int stride, int bw, 5065 int bh, float *mean, float *dev) { 5066 float x_sum = 0; 5067 float x2_sum = 0; 5068 for (int i = 0; i < bh; ++i) { 5069 for (int j = 0; j < bw; ++j) { 5070 const float val = data[j]; 5071 x_sum += val; 5072 x2_sum += val * val; 5073 } 5074 data += stride; 5075 } 5076 5077 const int num = bw * bh; 5078 const float e_x = x_sum / num; 5079 const float e_x2 = x2_sum / num; 5080 const float diff = e_x2 - e_x * e_x; 5081 *dev = (diff > 0) ? sqrtf(diff) : 0; 5082 *mean = e_x; 5083 } 5084 5085 // Feature used by the model to predict tx split: the mean and standard 5086 // deviation values of the block and sub-blocks. 5087 static void get_mean_dev_features(const int16_t *data, int stride, int bw, 5088 int bh, int levels, float *feature) { 5089 int feature_idx = 0; 5090 int width = bw; 5091 int height = bh; 5092 const int16_t *const data_ptr = &data[0]; 5093 for (int lv = 0; lv < levels; ++lv) { 5094 if (width < 2 || height < 2) break; 5095 float mean_buf[16]; 5096 float dev_buf[16]; 5097 int blk_idx = 0; 5098 for (int row = 0; row < bh; row += height) { 5099 for (int col = 0; col < bw; col += width) { 5100 float mean, dev; 5101 get_mean_and_dev(data_ptr + row * stride + col, stride, width, height, 5102 &mean, &dev); 5103 feature[feature_idx++] = mean; 5104 feature[feature_idx++] = dev; 5105 mean_buf[blk_idx] = mean; 5106 dev_buf[blk_idx++] = dev; 5107 } 5108 } 5109 if (blk_idx > 1) { 5110 float mean, dev; 5111 // Deviation of means. 5112 get_mean_and_dev_float(mean_buf, 1, 1, blk_idx, &mean, &dev); 5113 feature[feature_idx++] = dev; 5114 // Mean of deviations. 5115 get_mean_and_dev_float(dev_buf, 1, 1, blk_idx, &mean, &dev); 5116 feature[feature_idx++] = mean; 5117 } 5118 // Reduce the block size when proceeding to the next level. 5119 if (height == width) { 5120 height = height >> 1; 5121 width = width >> 1; 5122 } else if (height > width) { 5123 height = height >> 1; 5124 } else { 5125 width = width >> 1; 5126 } 5127 } 5128 } 5129 5130 static int ml_predict_tx_split(MACROBLOCK *x, BLOCK_SIZE bsize, int blk_row, 5131 int blk_col, TX_SIZE tx_size) { 5132 const NN_CONFIG *nn_config = av1_tx_split_nnconfig_map[tx_size]; 5133 if (!nn_config) return -1; 5134 5135 const int diff_stride = block_size_wide[bsize]; 5136 const int16_t *diff = 5137 x->plane[0].src_diff + 4 * blk_row * diff_stride + 4 * blk_col; 5138 const int bw = tx_size_wide[tx_size]; 5139 const int bh = tx_size_high[tx_size]; 5140 aom_clear_system_state(); 5141 5142 float features[64] = { 0.0f }; 5143 get_mean_dev_features(diff, diff_stride, bw, bh, 2, features); 5144 5145 float score = 0.0f; 5146 av1_nn_predict(features, nn_config, &score); 5147 aom_clear_system_state(); 5148 if (score > 8.0f) return 100; 5149 if (score < -8.0f) return 0; 5150 score = 1.0f / (1.0f + (float)exp(-score)); 5151 return (int)(score * 100); 5152 } 5153 5154 typedef struct { 5155 int64_t rd; 5156 int txb_entropy_ctx; 5157 TX_TYPE tx_type; 5158 } TxCandidateInfo; 5159 5160 static void try_tx_block_no_split( 5161 const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, int blk_col, int block, 5162 TX_SIZE tx_size, int depth, BLOCK_SIZE plane_bsize, 5163 const ENTROPY_CONTEXT *ta, const ENTROPY_CONTEXT *tl, 5164 int txfm_partition_ctx, RD_STATS *rd_stats, int64_t ref_best_rd, 5165 FAST_TX_SEARCH_MODE ftxs_mode, TXB_RD_INFO_NODE *rd_info_node, 5166 TxCandidateInfo *no_split) { 5167 MACROBLOCKD *const xd = &x->e_mbd; 5168 MB_MODE_INFO *const mbmi = xd->mi[0]; 5169 struct macroblock_plane *const p = &x->plane[0]; 5170 const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0]; 5171 5172 no_split->rd = INT64_MAX; 5173 no_split->txb_entropy_ctx = 0; 5174 no_split->tx_type = TX_TYPES; 5175 5176 const ENTROPY_CONTEXT *const pta = ta + blk_col; 5177 const ENTROPY_CONTEXT *const ptl = tl + blk_row; 5178 5179 const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); 5180 TXB_CTX txb_ctx; 5181 get_txb_ctx(plane_bsize, tx_size, 0, pta, ptl, &txb_ctx); 5182 const int zero_blk_rate = x->coeff_costs[txs_ctx][PLANE_TYPE_Y] 5183 .txb_skip_cost[txb_ctx.txb_skip_ctx][1]; 5184 5185 rd_stats->ref_rdcost = ref_best_rd; 5186 rd_stats->zero_rate = zero_blk_rate; 5187 const int index = av1_get_txb_size_index(plane_bsize, blk_row, blk_col); 5188 mbmi->inter_tx_size[index] = tx_size; 5189 tx_type_rd(cpi, x, tx_size, blk_row, blk_col, 0, block, plane_bsize, &txb_ctx, 5190 rd_stats, ftxs_mode, ref_best_rd, 5191 rd_info_node != NULL ? rd_info_node->rd_info_array : NULL); 5192 assert(rd_stats->rate < INT_MAX); 5193 5194 if ((RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >= 5195 RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) || 5196 rd_stats->skip == 1) && 5197 !xd->lossless[mbmi->segment_id]) { 5198 #if CONFIG_RD_DEBUG 5199 av1_update_txb_coeff_cost(rd_stats, 0, tx_size, blk_row, blk_col, 5200 zero_blk_rate - rd_stats->rate); 5201 #endif // CONFIG_RD_DEBUG 5202 rd_stats->rate = zero_blk_rate; 5203 rd_stats->dist = rd_stats->sse; 5204 rd_stats->skip = 1; 5205 set_blk_skip(x, 0, blk_row * bw + blk_col, 1); 5206 p->eobs[block] = 0; 5207 update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size, 5208 DCT_DCT); 5209 } else { 5210 set_blk_skip(x, 0, blk_row * bw + blk_col, 0); 5211 rd_stats->skip = 0; 5212 } 5213 5214 if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) 5215 rd_stats->rate += x->txfm_partition_cost[txfm_partition_ctx][0]; 5216 5217 no_split->rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); 5218 no_split->txb_entropy_ctx = p->txb_entropy_ctx[block]; 5219 const int txk_type_idx = 5220 av1_get_txk_type_index(plane_bsize, blk_row, blk_col); 5221 no_split->tx_type = mbmi->txk_type[txk_type_idx]; 5222 } 5223 5224 static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, 5225 int blk_col, int block, TX_SIZE tx_size, int depth, 5226 BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta, 5227 ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above, 5228 TXFM_CONTEXT *tx_left, RD_STATS *rd_stats, 5229 int64_t prev_level_rd, int64_t ref_best_rd, 5230 int *is_cost_valid, FAST_TX_SEARCH_MODE ftxs_mode, 5231 TXB_RD_INFO_NODE *rd_info_node); 5232 5233 static void try_tx_block_split( 5234 const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, int blk_col, int block, 5235 TX_SIZE tx_size, int depth, BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta, 5236 ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left, 5237 int txfm_partition_ctx, int64_t no_split_rd, int64_t ref_best_rd, 5238 FAST_TX_SEARCH_MODE ftxs_mode, TXB_RD_INFO_NODE *rd_info_node, 5239 RD_STATS *split_rd_stats, int64_t *split_rd) { 5240 assert(tx_size < TX_SIZES_ALL); 5241 MACROBLOCKD *const xd = &x->e_mbd; 5242 const int max_blocks_high = max_block_high(xd, plane_bsize, 0); 5243 const int max_blocks_wide = max_block_wide(xd, plane_bsize, 0); 5244 const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; 5245 const int bsw = tx_size_wide_unit[sub_txs]; 5246 const int bsh = tx_size_high_unit[sub_txs]; 5247 const int sub_step = bsw * bsh; 5248 const int nblks = 5249 (tx_size_high_unit[tx_size] / bsh) * (tx_size_wide_unit[tx_size] / bsw); 5250 assert(nblks > 0); 5251 int blk_idx = 0; 5252 int64_t tmp_rd = 0; 5253 *split_rd = INT64_MAX; 5254 split_rd_stats->rate = x->txfm_partition_cost[txfm_partition_ctx][1]; 5255 5256 for (int r = 0; r < tx_size_high_unit[tx_size]; r += bsh) { 5257 for (int c = 0; c < tx_size_wide_unit[tx_size]; c += bsw, ++blk_idx) { 5258 assert(blk_idx < 4); 5259 const int offsetr = blk_row + r; 5260 const int offsetc = blk_col + c; 5261 if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue; 5262 5263 RD_STATS this_rd_stats; 5264 int this_cost_valid = 1; 5265 select_tx_block( 5266 cpi, x, offsetr, offsetc, block, sub_txs, depth + 1, plane_bsize, ta, 5267 tl, tx_above, tx_left, &this_rd_stats, no_split_rd / nblks, 5268 ref_best_rd - tmp_rd, &this_cost_valid, ftxs_mode, 5269 (rd_info_node != NULL) ? rd_info_node->children[blk_idx] : NULL); 5270 if (!this_cost_valid) return; 5271 av1_merge_rd_stats(split_rd_stats, &this_rd_stats); 5272 tmp_rd = RDCOST(x->rdmult, split_rd_stats->rate, split_rd_stats->dist); 5273 if (no_split_rd < tmp_rd) return; 5274 block += sub_step; 5275 } 5276 } 5277 5278 *split_rd = tmp_rd; 5279 } 5280 5281 // Search for the best tx partition/type for a given luma block. 5282 static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, 5283 int blk_col, int block, TX_SIZE tx_size, int depth, 5284 BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta, 5285 ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above, 5286 TXFM_CONTEXT *tx_left, RD_STATS *rd_stats, 5287 int64_t prev_level_rd, int64_t ref_best_rd, 5288 int *is_cost_valid, FAST_TX_SEARCH_MODE ftxs_mode, 5289 TXB_RD_INFO_NODE *rd_info_node) { 5290 assert(tx_size < TX_SIZES_ALL); 5291 av1_init_rd_stats(rd_stats); 5292 if (ref_best_rd < 0) { 5293 *is_cost_valid = 0; 5294 return; 5295 } 5296 5297 MACROBLOCKD *const xd = &x->e_mbd; 5298 const int max_blocks_high = max_block_high(xd, plane_bsize, 0); 5299 const int max_blocks_wide = max_block_wide(xd, plane_bsize, 0); 5300 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; 5301 5302 const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0]; 5303 MB_MODE_INFO *const mbmi = xd->mi[0]; 5304 const int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row, 5305 mbmi->sb_type, tx_size); 5306 struct macroblock_plane *const p = &x->plane[0]; 5307 5308 const int try_no_split = 5309 cpi->oxcf.enable_tx64 || txsize_sqr_up_map[tx_size] != TX_64X64; 5310 int try_split = tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH; 5311 #if CONFIG_DIST_8X8 5312 if (x->using_dist_8x8) 5313 try_split &= tx_size_wide[tx_size] >= 16 && tx_size_high[tx_size] >= 16; 5314 #endif 5315 TxCandidateInfo no_split = { INT64_MAX, 0, TX_TYPES }; 5316 5317 // TX no split 5318 if (try_no_split) { 5319 try_tx_block_no_split(cpi, x, blk_row, blk_col, block, tx_size, depth, 5320 plane_bsize, ta, tl, ctx, rd_stats, ref_best_rd, 5321 ftxs_mode, rd_info_node, &no_split); 5322 5323 if (cpi->sf.adaptive_txb_search_level && 5324 (no_split.rd - 5325 (no_split.rd >> (1 + cpi->sf.adaptive_txb_search_level))) > 5326 ref_best_rd) { 5327 *is_cost_valid = 0; 5328 return; 5329 } 5330 5331 if (cpi->sf.txb_split_cap) { 5332 if (p->eobs[block] == 0) try_split = 0; 5333 } 5334 5335 if (cpi->sf.adaptive_txb_search_level && 5336 (no_split.rd - 5337 (no_split.rd >> (2 + cpi->sf.adaptive_txb_search_level))) > 5338 prev_level_rd) { 5339 try_split = 0; 5340 } 5341 } 5342 5343 if (x->e_mbd.bd == 8 && !x->cb_partition_scan && try_split) { 5344 const int threshold = cpi->sf.tx_type_search.ml_tx_split_thresh; 5345 if (threshold >= 0) { 5346 const int split_score = 5347 ml_predict_tx_split(x, plane_bsize, blk_row, blk_col, tx_size); 5348 if (split_score >= 0 && split_score < threshold) try_split = 0; 5349 } 5350 } 5351 5352 // TX split 5353 int64_t split_rd = INT64_MAX; 5354 RD_STATS split_rd_stats; 5355 av1_init_rd_stats(&split_rd_stats); 5356 if (try_split) { 5357 try_tx_block_split(cpi, x, blk_row, blk_col, block, tx_size, depth, 5358 plane_bsize, ta, tl, tx_above, tx_left, ctx, no_split.rd, 5359 AOMMIN(no_split.rd, ref_best_rd), ftxs_mode, 5360 rd_info_node, &split_rd_stats, &split_rd); 5361 } 5362 5363 if (no_split.rd < split_rd) { 5364 ENTROPY_CONTEXT *pta = ta + blk_col; 5365 ENTROPY_CONTEXT *ptl = tl + blk_row; 5366 const TX_SIZE tx_size_selected = tx_size; 5367 p->txb_entropy_ctx[block] = no_split.txb_entropy_ctx; 5368 av1_set_txb_context(x, 0, block, tx_size_selected, pta, ptl); 5369 txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size, 5370 tx_size); 5371 for (int idy = 0; idy < tx_size_high_unit[tx_size]; ++idy) { 5372 for (int idx = 0; idx < tx_size_wide_unit[tx_size]; ++idx) { 5373 const int index = 5374 av1_get_txb_size_index(plane_bsize, blk_row + idy, blk_col + idx); 5375 mbmi->inter_tx_size[index] = tx_size_selected; 5376 } 5377 } 5378 mbmi->tx_size = tx_size_selected; 5379 update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size, 5380 no_split.tx_type); 5381 set_blk_skip(x, 0, blk_row * bw + blk_col, rd_stats->skip); 5382 } else { 5383 *rd_stats = split_rd_stats; 5384 if (split_rd == INT64_MAX) *is_cost_valid = 0; 5385 } 5386 } 5387 5388 static int64_t select_tx_size_and_type(const AV1_COMP *cpi, MACROBLOCK *x, 5389 RD_STATS *rd_stats, BLOCK_SIZE bsize, 5390 int64_t ref_best_rd, 5391 TXB_RD_INFO_NODE *rd_info_tree) { 5392 MACROBLOCKD *const xd = &x->e_mbd; 5393 assert(is_inter_block(xd->mi[0])); 5394 5395 // TODO(debargha): enable this as a speed feature where the 5396 // select_inter_block_yrd() function above will use a simplified search 5397 // such as not using full optimize, but the inter_block_yrd() function 5398 // will use more complex search given that the transform partitions have 5399 // already been decided. 5400 5401 const int fast_tx_search = cpi->sf.tx_size_search_method > USE_FULL_RD; 5402 int64_t rd_thresh = ref_best_rd; 5403 if (fast_tx_search && rd_thresh < INT64_MAX) { 5404 if (INT64_MAX - rd_thresh > (rd_thresh >> 3)) rd_thresh += (rd_thresh >> 3); 5405 } 5406 assert(rd_thresh > 0); 5407 5408 const FAST_TX_SEARCH_MODE ftxs_mode = 5409 fast_tx_search ? FTXS_DCT_AND_1D_DCT_ONLY : FTXS_NONE; 5410 const struct macroblockd_plane *const pd = &xd->plane[0]; 5411 const BLOCK_SIZE plane_bsize = 5412 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); 5413 const int mi_width = mi_size_wide[plane_bsize]; 5414 const int mi_height = mi_size_high[plane_bsize]; 5415 ENTROPY_CONTEXT ctxa[MAX_MIB_SIZE]; 5416 ENTROPY_CONTEXT ctxl[MAX_MIB_SIZE]; 5417 TXFM_CONTEXT tx_above[MAX_MIB_SIZE]; 5418 TXFM_CONTEXT tx_left[MAX_MIB_SIZE]; 5419 av1_get_entropy_contexts(bsize, pd, ctxa, ctxl); 5420 memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width); 5421 memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height); 5422 5423 const int skip_ctx = av1_get_skip_context(xd); 5424 const int s0 = x->skip_cost[skip_ctx][0]; 5425 const int s1 = x->skip_cost[skip_ctx][1]; 5426 const int init_depth = 5427 get_search_init_depth(mi_width, mi_height, 1, &cpi->sf); 5428 const TX_SIZE max_tx_size = max_txsize_rect_lookup[plane_bsize]; 5429 const int bh = tx_size_high_unit[max_tx_size]; 5430 const int bw = tx_size_wide_unit[max_tx_size]; 5431 const int step = bw * bh; 5432 int64_t skip_rd = RDCOST(x->rdmult, s1, 0); 5433 int64_t this_rd = RDCOST(x->rdmult, s0, 0); 5434 int block = 0; 5435 5436 av1_init_rd_stats(rd_stats); 5437 for (int idy = 0; idy < mi_height; idy += bh) { 5438 for (int idx = 0; idx < mi_width; idx += bw) { 5439 const int64_t best_rd_sofar = 5440 (rd_thresh == INT64_MAX) ? INT64_MAX 5441 : (rd_thresh - (AOMMIN(skip_rd, this_rd))); 5442 int is_cost_valid = 1; 5443 RD_STATS pn_rd_stats; 5444 select_tx_block(cpi, x, idy, idx, block, max_tx_size, init_depth, 5445 plane_bsize, ctxa, ctxl, tx_above, tx_left, &pn_rd_stats, 5446 INT64_MAX, best_rd_sofar, &is_cost_valid, ftxs_mode, 5447 rd_info_tree); 5448 if (!is_cost_valid || pn_rd_stats.rate == INT_MAX) { 5449 av1_invalid_rd_stats(rd_stats); 5450 return INT64_MAX; 5451 } 5452 av1_merge_rd_stats(rd_stats, &pn_rd_stats); 5453 skip_rd = RDCOST(x->rdmult, s1, rd_stats->sse); 5454 this_rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist); 5455 block += step; 5456 if (rd_info_tree != NULL) rd_info_tree += 1; 5457 } 5458 } 5459 5460 if (skip_rd <= this_rd) { 5461 rd_stats->skip = 1; 5462 } else { 5463 rd_stats->skip = 0; 5464 } 5465 5466 if (rd_stats->rate == INT_MAX) return INT64_MAX; 5467 5468 // If fast_tx_search is true, only DCT and 1D DCT were tested in 5469 // select_inter_block_yrd() above. Do a better search for tx type with 5470 // tx sizes already decided. 5471 if (fast_tx_search) { 5472 if (!inter_block_yrd(cpi, x, rd_stats, bsize, ref_best_rd, FTXS_NONE)) 5473 return INT64_MAX; 5474 } 5475 5476 int64_t rd; 5477 if (rd_stats->skip) { 5478 rd = RDCOST(x->rdmult, s1, rd_stats->sse); 5479 } else { 5480 rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist); 5481 if (!xd->lossless[xd->mi[0]->segment_id]) 5482 rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse)); 5483 } 5484 5485 return rd; 5486 } 5487 5488 // Finds rd cost for a y block, given the transform size partitions 5489 static void tx_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, 5490 int blk_col, int block, TX_SIZE tx_size, 5491 BLOCK_SIZE plane_bsize, int depth, 5492 ENTROPY_CONTEXT *above_ctx, ENTROPY_CONTEXT *left_ctx, 5493 TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left, 5494 int64_t ref_best_rd, RD_STATS *rd_stats, 5495 FAST_TX_SEARCH_MODE ftxs_mode) { 5496 MACROBLOCKD *const xd = &x->e_mbd; 5497 MB_MODE_INFO *const mbmi = xd->mi[0]; 5498 const int max_blocks_high = max_block_high(xd, plane_bsize, 0); 5499 const int max_blocks_wide = max_block_wide(xd, plane_bsize, 0); 5500 5501 assert(tx_size < TX_SIZES_ALL); 5502 5503 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; 5504 5505 const TX_SIZE plane_tx_size = mbmi->inter_tx_size[av1_get_txb_size_index( 5506 plane_bsize, blk_row, blk_col)]; 5507 5508 int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row, 5509 mbmi->sb_type, tx_size); 5510 5511 av1_init_rd_stats(rd_stats); 5512 if (tx_size == plane_tx_size) { 5513 ENTROPY_CONTEXT *ta = above_ctx + blk_col; 5514 ENTROPY_CONTEXT *tl = left_ctx + blk_row; 5515 const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); 5516 TXB_CTX txb_ctx; 5517 get_txb_ctx(plane_bsize, tx_size, 0, ta, tl, &txb_ctx); 5518 5519 const int zero_blk_rate = x->coeff_costs[txs_ctx][get_plane_type(0)] 5520 .txb_skip_cost[txb_ctx.txb_skip_ctx][1]; 5521 rd_stats->zero_rate = zero_blk_rate; 5522 rd_stats->ref_rdcost = ref_best_rd; 5523 tx_type_rd(cpi, x, tx_size, blk_row, blk_col, 0, block, plane_bsize, 5524 &txb_ctx, rd_stats, ftxs_mode, ref_best_rd, NULL); 5525 const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0]; 5526 if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >= 5527 RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) || 5528 rd_stats->skip == 1) { 5529 rd_stats->rate = zero_blk_rate; 5530 rd_stats->dist = rd_stats->sse; 5531 rd_stats->skip = 1; 5532 set_blk_skip(x, 0, blk_row * mi_width + blk_col, 1); 5533 x->plane[0].eobs[block] = 0; 5534 x->plane[0].txb_entropy_ctx[block] = 0; 5535 update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size, 5536 DCT_DCT); 5537 } else { 5538 rd_stats->skip = 0; 5539 set_blk_skip(x, 0, blk_row * mi_width + blk_col, 0); 5540 } 5541 if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) 5542 rd_stats->rate += x->txfm_partition_cost[ctx][0]; 5543 av1_set_txb_context(x, 0, block, tx_size, ta, tl); 5544 txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size, 5545 tx_size); 5546 } else { 5547 const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; 5548 const int bsw = tx_size_wide_unit[sub_txs]; 5549 const int bsh = tx_size_high_unit[sub_txs]; 5550 const int step = bsh * bsw; 5551 RD_STATS pn_rd_stats; 5552 int64_t this_rd = 0; 5553 assert(bsw > 0 && bsh > 0); 5554 5555 for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) { 5556 for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) { 5557 const int offsetr = blk_row + row; 5558 const int offsetc = blk_col + col; 5559 5560 if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue; 5561 5562 av1_init_rd_stats(&pn_rd_stats); 5563 tx_block_yrd(cpi, x, offsetr, offsetc, block, sub_txs, plane_bsize, 5564 depth + 1, above_ctx, left_ctx, tx_above, tx_left, 5565 ref_best_rd - this_rd, &pn_rd_stats, ftxs_mode); 5566 if (pn_rd_stats.rate == INT_MAX) { 5567 av1_invalid_rd_stats(rd_stats); 5568 return; 5569 } 5570 av1_merge_rd_stats(rd_stats, &pn_rd_stats); 5571 this_rd += RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist); 5572 block += step; 5573 } 5574 } 5575 5576 if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) 5577 rd_stats->rate += x->txfm_partition_cost[ctx][1]; 5578 } 5579 } 5580 5581 // Return value 0: early termination triggered, no valid rd cost available; 5582 // 1: rd cost values are valid. 5583 static int inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, 5584 RD_STATS *rd_stats, BLOCK_SIZE bsize, 5585 int64_t ref_best_rd, FAST_TX_SEARCH_MODE ftxs_mode) { 5586 MACROBLOCKD *const xd = &x->e_mbd; 5587 int is_cost_valid = 1; 5588 int64_t this_rd = 0; 5589 5590 if (ref_best_rd < 0) is_cost_valid = 0; 5591 5592 av1_init_rd_stats(rd_stats); 5593 5594 if (is_cost_valid) { 5595 const struct macroblockd_plane *const pd = &xd->plane[0]; 5596 const BLOCK_SIZE plane_bsize = 5597 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); 5598 const int mi_width = mi_size_wide[plane_bsize]; 5599 const int mi_height = mi_size_high[plane_bsize]; 5600 const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0); 5601 const int bh = tx_size_high_unit[max_tx_size]; 5602 const int bw = tx_size_wide_unit[max_tx_size]; 5603 const int init_depth = 5604 get_search_init_depth(mi_width, mi_height, 1, &cpi->sf); 5605 int idx, idy; 5606 int block = 0; 5607 int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size]; 5608 ENTROPY_CONTEXT ctxa[MAX_MIB_SIZE]; 5609 ENTROPY_CONTEXT ctxl[MAX_MIB_SIZE]; 5610 TXFM_CONTEXT tx_above[MAX_MIB_SIZE]; 5611 TXFM_CONTEXT tx_left[MAX_MIB_SIZE]; 5612 RD_STATS pn_rd_stats; 5613 5614 av1_get_entropy_contexts(bsize, pd, ctxa, ctxl); 5615 memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width); 5616 memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height); 5617 5618 for (idy = 0; idy < mi_height; idy += bh) { 5619 for (idx = 0; idx < mi_width; idx += bw) { 5620 av1_init_rd_stats(&pn_rd_stats); 5621 tx_block_yrd(cpi, x, idy, idx, block, max_tx_size, plane_bsize, 5622 init_depth, ctxa, ctxl, tx_above, tx_left, 5623 ref_best_rd - this_rd, &pn_rd_stats, ftxs_mode); 5624 if (pn_rd_stats.rate == INT_MAX) { 5625 av1_invalid_rd_stats(rd_stats); 5626 return 0; 5627 } 5628 av1_merge_rd_stats(rd_stats, &pn_rd_stats); 5629 this_rd += 5630 AOMMIN(RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist), 5631 RDCOST(x->rdmult, pn_rd_stats.zero_rate, pn_rd_stats.sse)); 5632 block += step; 5633 } 5634 } 5635 } 5636 5637 const int skip_ctx = av1_get_skip_context(xd); 5638 const int s0 = x->skip_cost[skip_ctx][0]; 5639 const int s1 = x->skip_cost[skip_ctx][1]; 5640 int64_t skip_rd = RDCOST(x->rdmult, s1, rd_stats->sse); 5641 this_rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist); 5642 if (skip_rd < this_rd) { 5643 this_rd = skip_rd; 5644 rd_stats->rate = 0; 5645 rd_stats->dist = rd_stats->sse; 5646 rd_stats->skip = 1; 5647 } 5648 if (this_rd > ref_best_rd) is_cost_valid = 0; 5649 5650 if (!is_cost_valid) { 5651 // reset cost value 5652 av1_invalid_rd_stats(rd_stats); 5653 } 5654 return is_cost_valid; 5655 } 5656 5657 static int find_tx_size_rd_info(TXB_RD_RECORD *cur_record, 5658 const uint32_t hash) { 5659 // Linear search through the circular buffer to find matching hash. 5660 for (int i = cur_record->index_start - 1; i >= 0; i--) { 5661 if (cur_record->hash_vals[i] == hash) return i; 5662 } 5663 for (int i = cur_record->num - 1; i >= cur_record->index_start; i--) { 5664 if (cur_record->hash_vals[i] == hash) return i; 5665 } 5666 int index; 5667 // If not found - add new RD info into the buffer and return its index 5668 if (cur_record->num < TX_SIZE_RD_RECORD_BUFFER_LEN) { 5669 index = (cur_record->index_start + cur_record->num) % 5670 TX_SIZE_RD_RECORD_BUFFER_LEN; 5671 cur_record->num++; 5672 } else { 5673 index = cur_record->index_start; 5674 cur_record->index_start = 5675 (cur_record->index_start + 1) % TX_SIZE_RD_RECORD_BUFFER_LEN; 5676 } 5677 5678 cur_record->hash_vals[index] = hash; 5679 av1_zero(cur_record->tx_rd_info[index]); 5680 return index; 5681 } 5682 5683 typedef struct { 5684 int leaf; 5685 int8_t children[4]; 5686 } RD_RECORD_IDX_NODE; 5687 5688 static const RD_RECORD_IDX_NODE rd_record_tree_8x8[] = { 5689 { 1, { 0 } }, 5690 }; 5691 5692 static const RD_RECORD_IDX_NODE rd_record_tree_8x16[] = { 5693 { 0, { 1, 2, -1, -1 } }, 5694 { 1, { 0, 0, 0, 0 } }, 5695 { 1, { 0, 0, 0, 0 } }, 5696 }; 5697 5698 static const RD_RECORD_IDX_NODE rd_record_tree_16x8[] = { 5699 { 0, { 1, 2, -1, -1 } }, 5700 { 1, { 0 } }, 5701 { 1, { 0 } }, 5702 }; 5703 5704 static const RD_RECORD_IDX_NODE rd_record_tree_16x16[] = { 5705 { 0, { 1, 2, 3, 4 } }, { 1, { 0 } }, { 1, { 0 } }, { 1, { 0 } }, { 1, { 0 } }, 5706 }; 5707 5708 static const RD_RECORD_IDX_NODE rd_record_tree_1_2[] = { 5709 { 0, { 1, 2, -1, -1 } }, 5710 { 0, { 3, 4, 5, 6 } }, 5711 { 0, { 7, 8, 9, 10 } }, 5712 }; 5713 5714 static const RD_RECORD_IDX_NODE rd_record_tree_2_1[] = { 5715 { 0, { 1, 2, -1, -1 } }, 5716 { 0, { 3, 4, 7, 8 } }, 5717 { 0, { 5, 6, 9, 10 } }, 5718 }; 5719 5720 static const RD_RECORD_IDX_NODE rd_record_tree_sqr[] = { 5721 { 0, { 1, 2, 3, 4 } }, { 0, { 5, 6, 9, 10 } }, { 0, { 7, 8, 11, 12 } }, 5722 { 0, { 13, 14, 17, 18 } }, { 0, { 15, 16, 19, 20 } }, 5723 }; 5724 5725 static const RD_RECORD_IDX_NODE rd_record_tree_64x128[] = { 5726 { 0, { 2, 3, 4, 5 } }, { 0, { 6, 7, 8, 9 } }, 5727 { 0, { 10, 11, 14, 15 } }, { 0, { 12, 13, 16, 17 } }, 5728 { 0, { 18, 19, 22, 23 } }, { 0, { 20, 21, 24, 25 } }, 5729 { 0, { 26, 27, 30, 31 } }, { 0, { 28, 29, 32, 33 } }, 5730 { 0, { 34, 35, 38, 39 } }, { 0, { 36, 37, 40, 41 } }, 5731 }; 5732 5733 static const RD_RECORD_IDX_NODE rd_record_tree_128x64[] = { 5734 { 0, { 2, 3, 6, 7 } }, { 0, { 4, 5, 8, 9 } }, 5735 { 0, { 10, 11, 18, 19 } }, { 0, { 12, 13, 20, 21 } }, 5736 { 0, { 14, 15, 22, 23 } }, { 0, { 16, 17, 24, 25 } }, 5737 { 0, { 26, 27, 34, 35 } }, { 0, { 28, 29, 36, 37 } }, 5738 { 0, { 30, 31, 38, 39 } }, { 0, { 32, 33, 40, 41 } }, 5739 }; 5740 5741 static const RD_RECORD_IDX_NODE rd_record_tree_128x128[] = { 5742 { 0, { 4, 5, 8, 9 } }, { 0, { 6, 7, 10, 11 } }, 5743 { 0, { 12, 13, 16, 17 } }, { 0, { 14, 15, 18, 19 } }, 5744 { 0, { 20, 21, 28, 29 } }, { 0, { 22, 23, 30, 31 } }, 5745 { 0, { 24, 25, 32, 33 } }, { 0, { 26, 27, 34, 35 } }, 5746 { 0, { 36, 37, 44, 45 } }, { 0, { 38, 39, 46, 47 } }, 5747 { 0, { 40, 41, 48, 49 } }, { 0, { 42, 43, 50, 51 } }, 5748 { 0, { 52, 53, 60, 61 } }, { 0, { 54, 55, 62, 63 } }, 5749 { 0, { 56, 57, 64, 65 } }, { 0, { 58, 59, 66, 67 } }, 5750 { 0, { 68, 69, 76, 77 } }, { 0, { 70, 71, 78, 79 } }, 5751 { 0, { 72, 73, 80, 81 } }, { 0, { 74, 75, 82, 83 } }, 5752 }; 5753 5754 static const RD_RECORD_IDX_NODE rd_record_tree_1_4[] = { 5755 { 0, { 1, -1, 2, -1 } }, 5756 { 0, { 3, 4, -1, -1 } }, 5757 { 0, { 5, 6, -1, -1 } }, 5758 }; 5759 5760 static const RD_RECORD_IDX_NODE rd_record_tree_4_1[] = { 5761 { 0, { 1, 2, -1, -1 } }, 5762 { 0, { 3, 4, -1, -1 } }, 5763 { 0, { 5, 6, -1, -1 } }, 5764 }; 5765 5766 static const RD_RECORD_IDX_NODE *rd_record_tree[BLOCK_SIZES_ALL] = { 5767 NULL, // BLOCK_4X4 5768 NULL, // BLOCK_4X8 5769 NULL, // BLOCK_8X4 5770 rd_record_tree_8x8, // BLOCK_8X8 5771 rd_record_tree_8x16, // BLOCK_8X16 5772 rd_record_tree_16x8, // BLOCK_16X8 5773 rd_record_tree_16x16, // BLOCK_16X16 5774 rd_record_tree_1_2, // BLOCK_16X32 5775 rd_record_tree_2_1, // BLOCK_32X16 5776 rd_record_tree_sqr, // BLOCK_32X32 5777 rd_record_tree_1_2, // BLOCK_32X64 5778 rd_record_tree_2_1, // BLOCK_64X32 5779 rd_record_tree_sqr, // BLOCK_64X64 5780 rd_record_tree_64x128, // BLOCK_64X128 5781 rd_record_tree_128x64, // BLOCK_128X64 5782 rd_record_tree_128x128, // BLOCK_128X128 5783 NULL, // BLOCK_4X16 5784 NULL, // BLOCK_16X4 5785 rd_record_tree_1_4, // BLOCK_8X32 5786 rd_record_tree_4_1, // BLOCK_32X8 5787 rd_record_tree_1_4, // BLOCK_16X64 5788 rd_record_tree_4_1, // BLOCK_64X16 5789 }; 5790 5791 static const int rd_record_tree_size[BLOCK_SIZES_ALL] = { 5792 0, // BLOCK_4X4 5793 0, // BLOCK_4X8 5794 0, // BLOCK_8X4 5795 sizeof(rd_record_tree_8x8) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_8X8 5796 sizeof(rd_record_tree_8x16) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_8X16 5797 sizeof(rd_record_tree_16x8) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_16X8 5798 sizeof(rd_record_tree_16x16) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_16X16 5799 sizeof(rd_record_tree_1_2) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_16X32 5800 sizeof(rd_record_tree_2_1) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_32X16 5801 sizeof(rd_record_tree_sqr) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_32X32 5802 sizeof(rd_record_tree_1_2) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_32X64 5803 sizeof(rd_record_tree_2_1) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_64X32 5804 sizeof(rd_record_tree_sqr) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_64X64 5805 sizeof(rd_record_tree_64x128) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_64X128 5806 sizeof(rd_record_tree_128x64) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_128X64 5807 sizeof(rd_record_tree_128x128) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_128X128 5808 0, // BLOCK_4X16 5809 0, // BLOCK_16X4 5810 sizeof(rd_record_tree_1_4) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_8X32 5811 sizeof(rd_record_tree_4_1) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_32X8 5812 sizeof(rd_record_tree_1_4) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_16X64 5813 sizeof(rd_record_tree_4_1) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_64X16 5814 }; 5815 5816 static INLINE void init_rd_record_tree(TXB_RD_INFO_NODE *tree, 5817 BLOCK_SIZE bsize) { 5818 const RD_RECORD_IDX_NODE *rd_record = rd_record_tree[bsize]; 5819 const int size = rd_record_tree_size[bsize]; 5820 for (int i = 0; i < size; ++i) { 5821 if (rd_record[i].leaf) { 5822 av1_zero(tree[i].children); 5823 } else { 5824 for (int j = 0; j < 4; ++j) { 5825 const int8_t idx = rd_record[i].children[j]; 5826 tree[i].children[j] = idx > 0 ? &tree[idx] : NULL; 5827 } 5828 } 5829 } 5830 } 5831 5832 // Go through all TX blocks that could be used in TX size search, compute 5833 // residual hash values for them and find matching RD info that stores previous 5834 // RD search results for these TX blocks. The idea is to prevent repeated 5835 // rate/distortion computations that happen because of the combination of 5836 // partition and TX size search. The resulting RD info records are returned in 5837 // the form of a quadtree for easier access in actual TX size search. 5838 static int find_tx_size_rd_records(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, 5839 int mi_col, TXB_RD_INFO_NODE *dst_rd_info) { 5840 TXB_RD_RECORD *rd_records_table[4] = { x->txb_rd_record_8X8, 5841 x->txb_rd_record_16X16, 5842 x->txb_rd_record_32X32, 5843 x->txb_rd_record_64X64 }; 5844 const TX_SIZE max_square_tx_size = max_txsize_lookup[bsize]; 5845 const int bw = block_size_wide[bsize]; 5846 const int bh = block_size_high[bsize]; 5847 5848 // Hashing is performed only for square TX sizes larger than TX_4X4 5849 if (max_square_tx_size < TX_8X8) return 0; 5850 const int diff_stride = bw; 5851 const struct macroblock_plane *const p = &x->plane[0]; 5852 const int16_t *diff = &p->src_diff[0]; 5853 init_rd_record_tree(dst_rd_info, bsize); 5854 // Coordinates of the top-left corner of current block within the superblock 5855 // measured in pixels: 5856 const int mi_row_in_sb = (mi_row % MAX_MIB_SIZE) << MI_SIZE_LOG2; 5857 const int mi_col_in_sb = (mi_col % MAX_MIB_SIZE) << MI_SIZE_LOG2; 5858 int cur_rd_info_idx = 0; 5859 int cur_tx_depth = 0; 5860 TX_SIZE cur_tx_size = max_txsize_rect_lookup[bsize]; 5861 while (cur_tx_depth <= MAX_VARTX_DEPTH) { 5862 const int cur_tx_bw = tx_size_wide[cur_tx_size]; 5863 const int cur_tx_bh = tx_size_high[cur_tx_size]; 5864 if (cur_tx_bw < 8 || cur_tx_bh < 8) break; 5865 const TX_SIZE next_tx_size = sub_tx_size_map[cur_tx_size]; 5866 const int tx_size_idx = cur_tx_size - TX_8X8; 5867 for (int row = 0; row < bh; row += cur_tx_bh) { 5868 for (int col = 0; col < bw; col += cur_tx_bw) { 5869 if (cur_tx_bw != cur_tx_bh) { 5870 // Use dummy nodes for all rectangular transforms within the 5871 // TX size search tree. 5872 dst_rd_info[cur_rd_info_idx].rd_info_array = NULL; 5873 } else { 5874 // Get spatial location of this TX block within the superblock 5875 // (measured in cur_tx_bsize units). 5876 const int row_in_sb = (mi_row_in_sb + row) / cur_tx_bh; 5877 const int col_in_sb = (mi_col_in_sb + col) / cur_tx_bw; 5878 5879 int16_t hash_data[MAX_SB_SQUARE]; 5880 int16_t *cur_hash_row = hash_data; 5881 const int16_t *cur_diff_row = diff + row * diff_stride + col; 5882 for (int i = 0; i < cur_tx_bh; i++) { 5883 memcpy(cur_hash_row, cur_diff_row, sizeof(*hash_data) * cur_tx_bw); 5884 cur_hash_row += cur_tx_bw; 5885 cur_diff_row += diff_stride; 5886 } 5887 const int hash = av1_get_crc32c_value(&x->mb_rd_record.crc_calculator, 5888 (uint8_t *)hash_data, 5889 2 * cur_tx_bw * cur_tx_bh); 5890 // Find corresponding RD info based on the hash value. 5891 const int record_idx = 5892 row_in_sb * (MAX_MIB_SIZE >> (tx_size_idx + 1)) + col_in_sb; 5893 TXB_RD_RECORD *records = &rd_records_table[tx_size_idx][record_idx]; 5894 int idx = find_tx_size_rd_info(records, hash); 5895 dst_rd_info[cur_rd_info_idx].rd_info_array = 5896 &records->tx_rd_info[idx]; 5897 } 5898 ++cur_rd_info_idx; 5899 } 5900 } 5901 cur_tx_size = next_tx_size; 5902 ++cur_tx_depth; 5903 } 5904 return 1; 5905 } 5906 5907 // Search for best transform size and type for luma inter blocks. 5908 static void pick_tx_size_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x, 5909 RD_STATS *rd_stats, BLOCK_SIZE bsize, 5910 int mi_row, int mi_col, int64_t ref_best_rd) { 5911 const AV1_COMMON *cm = &cpi->common; 5912 MACROBLOCKD *const xd = &x->e_mbd; 5913 assert(is_inter_block(xd->mi[0])); 5914 5915 av1_invalid_rd_stats(rd_stats); 5916 5917 if (cpi->sf.model_based_prune_tx_search_level && ref_best_rd != INT64_MAX) { 5918 int model_rate; 5919 int64_t model_dist; 5920 int model_skip; 5921 model_rd_sb_fn[MODELRD_TYPE_TX_SEARCH_PRUNE]( 5922 cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &model_rate, &model_dist, 5923 &model_skip, NULL, NULL, NULL, NULL); 5924 const int64_t model_rd = RDCOST(x->rdmult, model_rate, model_dist); 5925 // If the modeled rd is a lot worse than the best so far, breakout. 5926 // TODO(debargha, urvang): Improve the model and make the check below 5927 // tighter. 5928 assert(cpi->sf.model_based_prune_tx_search_level >= 0 && 5929 cpi->sf.model_based_prune_tx_search_level <= 2); 5930 static const int prune_factor_by8[] = { 3, 5 }; 5931 if (!model_skip && 5932 ((model_rd * 5933 prune_factor_by8[cpi->sf.model_based_prune_tx_search_level - 1]) >> 5934 3) > ref_best_rd) 5935 return; 5936 } 5937 5938 uint32_t hash = 0; 5939 int32_t match_index = -1; 5940 MB_RD_RECORD *mb_rd_record = NULL; 5941 const int within_border = 5942 mi_row >= xd->tile.mi_row_start && 5943 (mi_row + mi_size_high[bsize] < xd->tile.mi_row_end) && 5944 mi_col >= xd->tile.mi_col_start && 5945 (mi_col + mi_size_wide[bsize] < xd->tile.mi_col_end); 5946 const int is_mb_rd_hash_enabled = (within_border && cpi->sf.use_mb_rd_hash); 5947 const int n4 = bsize_to_num_blk(bsize); 5948 if (is_mb_rd_hash_enabled) { 5949 hash = get_block_residue_hash(x, bsize); 5950 mb_rd_record = &x->mb_rd_record; 5951 match_index = find_mb_rd_info(mb_rd_record, ref_best_rd, hash); 5952 if (match_index != -1) { 5953 MB_RD_INFO *tx_rd_info = &mb_rd_record->tx_rd_info[match_index]; 5954 fetch_tx_rd_info(n4, tx_rd_info, rd_stats, x); 5955 return; 5956 } 5957 } 5958 5959 // If we predict that skip is the optimal RD decision - set the respective 5960 // context and terminate early. 5961 int64_t dist; 5962 if (cpi->sf.tx_type_search.use_skip_flag_prediction && 5963 predict_skip_flag(x, bsize, &dist, cm->reduced_tx_set_used)) { 5964 set_skip_flag(x, rd_stats, bsize, dist); 5965 // Save the RD search results into tx_rd_record. 5966 if (is_mb_rd_hash_enabled) 5967 save_tx_rd_info(n4, hash, x, rd_stats, mb_rd_record); 5968 return; 5969 } 5970 #if CONFIG_SPEED_STATS 5971 ++x->tx_search_count; 5972 #endif // CONFIG_SPEED_STATS 5973 5974 // Precompute residual hashes and find existing or add new RD records to 5975 // store and reuse rate and distortion values to speed up TX size search. 5976 TXB_RD_INFO_NODE matched_rd_info[4 + 16 + 64]; 5977 int found_rd_info = 0; 5978 if (ref_best_rd != INT64_MAX && within_border && cpi->sf.use_inter_txb_hash) { 5979 found_rd_info = 5980 find_tx_size_rd_records(x, bsize, mi_row, mi_col, matched_rd_info); 5981 } 5982 5983 // Get the tx_size 1 level down 5984 const TX_SIZE min_tx_size = sub_tx_size_map[max_txsize_rect_lookup[bsize]]; 5985 const TxSetType tx_set_type = 5986 av1_get_ext_tx_set_type(min_tx_size, 1, cm->reduced_tx_set_used); 5987 prune_tx(cpi, bsize, x, xd, tx_set_type); 5988 5989 int found = 0; 5990 RD_STATS this_rd_stats; 5991 av1_init_rd_stats(&this_rd_stats); 5992 const int64_t rd = 5993 select_tx_size_and_type(cpi, x, &this_rd_stats, bsize, ref_best_rd, 5994 found_rd_info ? matched_rd_info : NULL); 5995 5996 if (rd < INT64_MAX) { 5997 *rd_stats = this_rd_stats; 5998 found = 1; 5999 } 6000 6001 // Reset the pruning flags. 6002 av1_zero(x->tx_search_prune); 6003 x->tx_split_prune_flag = 0; 6004 6005 // We should always find at least one candidate unless ref_best_rd is less 6006 // than INT64_MAX (in which case, all the calls to select_tx_size_fix_type 6007 // might have failed to find something better) 6008 assert(IMPLIES(!found, ref_best_rd != INT64_MAX)); 6009 if (!found) return; 6010 6011 // Save the RD search results into tx_rd_record. 6012 if (is_mb_rd_hash_enabled) { 6013 assert(mb_rd_record != NULL); 6014 save_tx_rd_info(n4, hash, x, rd_stats, mb_rd_record); 6015 } 6016 } 6017 6018 static void model_rd_for_sb_with_fullrdy( 6019 const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, 6020 int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum, 6021 int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb, 6022 int *plane_rate, int64_t *plane_sse, int64_t *plane_dist) { 6023 const int ref = xd->mi[0]->ref_frame[0]; 6024 6025 int64_t rate_sum = 0; 6026 int64_t dist_sum = 0; 6027 int64_t total_sse = 0; 6028 6029 for (int plane = plane_from; plane <= plane_to; ++plane) { 6030 struct macroblock_plane *const p = &x->plane[plane]; 6031 struct macroblockd_plane *const pd = &xd->plane[plane]; 6032 const BLOCK_SIZE plane_bsize = 6033 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); 6034 const int bw = block_size_wide[plane_bsize]; 6035 const int bh = block_size_high[plane_bsize]; 6036 int64_t sse; 6037 int rate; 6038 int64_t dist; 6039 6040 if (x->skip_chroma_rd && plane) continue; 6041 6042 if (is_cur_buf_hbd(xd)) { 6043 sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf, 6044 pd->dst.stride, bw, bh); 6045 } else { 6046 sse = aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw, 6047 bh); 6048 } 6049 sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2); 6050 6051 RD_STATS rd_stats; 6052 if (plane == 0) { 6053 pick_tx_size_type_yrd(cpi, x, &rd_stats, bsize, mi_row, mi_col, 6054 INT64_MAX); 6055 if (rd_stats.invalid_rate) { 6056 rate = 0; 6057 dist = sse << 4; 6058 } else { 6059 rate = rd_stats.rate; 6060 dist = rd_stats.dist; 6061 } 6062 } else { 6063 model_rd_with_curvfit(cpi, x, plane_bsize, plane, sse, bw * bh, &rate, 6064 &dist); 6065 } 6066 6067 if (plane == 0) x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX); 6068 6069 total_sse += sse; 6070 rate_sum += rate; 6071 dist_sum += dist; 6072 6073 if (plane_rate) plane_rate[plane] = rate; 6074 if (plane_sse) plane_sse[plane] = sse; 6075 if (plane_dist) plane_dist[plane] = dist; 6076 } 6077 6078 if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0; 6079 if (skip_sse_sb) *skip_sse_sb = total_sse << 4; 6080 *out_rate_sum = (int)rate_sum; 6081 *out_dist_sum = dist_sum; 6082 } 6083 6084 static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, 6085 int dc_mode_cost, 6086 uint8_t *best_palette_color_map, 6087 MB_MODE_INFO *const best_mbmi, 6088 int64_t *best_rd, int *rate, 6089 int *rate_tokenonly, int64_t *distortion, 6090 int *skippable) { 6091 MACROBLOCKD *const xd = &x->e_mbd; 6092 MB_MODE_INFO *const mbmi = xd->mi[0]; 6093 assert(!is_inter_block(mbmi)); 6094 assert( 6095 av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type)); 6096 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; 6097 const BLOCK_SIZE bsize = mbmi->sb_type; 6098 const SequenceHeader *const seq_params = &cpi->common.seq_params; 6099 int this_rate; 6100 int64_t this_rd; 6101 int colors_u, colors_v, colors; 6102 const int src_stride = x->plane[1].src.stride; 6103 const uint8_t *const src_u = x->plane[1].src.buf; 6104 const uint8_t *const src_v = x->plane[2].src.buf; 6105 uint8_t *const color_map = xd->plane[1].color_index_map; 6106 RD_STATS tokenonly_rd_stats; 6107 int plane_block_width, plane_block_height, rows, cols; 6108 av1_get_block_dimensions(bsize, 1, xd, &plane_block_width, 6109 &plane_block_height, &rows, &cols); 6110 6111 mbmi->uv_mode = UV_DC_PRED; 6112 6113 int count_buf[1 << 12]; // Maximum (1 << 12) color levels. 6114 if (seq_params->use_highbitdepth) { 6115 colors_u = av1_count_colors_highbd(src_u, src_stride, rows, cols, 6116 seq_params->bit_depth, count_buf); 6117 colors_v = av1_count_colors_highbd(src_v, src_stride, rows, cols, 6118 seq_params->bit_depth, count_buf); 6119 } else { 6120 colors_u = av1_count_colors(src_u, src_stride, rows, cols, count_buf); 6121 colors_v = av1_count_colors(src_v, src_stride, rows, cols, count_buf); 6122 } 6123 6124 uint16_t color_cache[2 * PALETTE_MAX_SIZE]; 6125 const int n_cache = av1_get_palette_cache(xd, 1, color_cache); 6126 6127 colors = colors_u > colors_v ? colors_u : colors_v; 6128 if (colors > 1 && colors <= 64) { 6129 int r, c, n, i, j; 6130 const int max_itr = 50; 6131 int lb_u, ub_u, val_u; 6132 int lb_v, ub_v, val_v; 6133 int *const data = x->palette_buffer->kmeans_data_buf; 6134 int centroids[2 * PALETTE_MAX_SIZE]; 6135 6136 uint16_t *src_u16 = CONVERT_TO_SHORTPTR(src_u); 6137 uint16_t *src_v16 = CONVERT_TO_SHORTPTR(src_v); 6138 if (seq_params->use_highbitdepth) { 6139 lb_u = src_u16[0]; 6140 ub_u = src_u16[0]; 6141 lb_v = src_v16[0]; 6142 ub_v = src_v16[0]; 6143 } else { 6144 lb_u = src_u[0]; 6145 ub_u = src_u[0]; 6146 lb_v = src_v[0]; 6147 ub_v = src_v[0]; 6148 } 6149 6150 for (r = 0; r < rows; ++r) { 6151 for (c = 0; c < cols; ++c) { 6152 if (seq_params->use_highbitdepth) { 6153 val_u = src_u16[r * src_stride + c]; 6154 val_v = src_v16[r * src_stride + c]; 6155 data[(r * cols + c) * 2] = val_u; 6156 data[(r * cols + c) * 2 + 1] = val_v; 6157 } else { 6158 val_u = src_u[r * src_stride + c]; 6159 val_v = src_v[r * src_stride + c]; 6160 data[(r * cols + c) * 2] = val_u; 6161 data[(r * cols + c) * 2 + 1] = val_v; 6162 } 6163 if (val_u < lb_u) 6164 lb_u = val_u; 6165 else if (val_u > ub_u) 6166 ub_u = val_u; 6167 if (val_v < lb_v) 6168 lb_v = val_v; 6169 else if (val_v > ub_v) 6170 ub_v = val_v; 6171 } 6172 } 6173 6174 for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2; 6175 --n) { 6176 for (i = 0; i < n; ++i) { 6177 centroids[i * 2] = lb_u + (2 * i + 1) * (ub_u - lb_u) / n / 2; 6178 centroids[i * 2 + 1] = lb_v + (2 * i + 1) * (ub_v - lb_v) / n / 2; 6179 } 6180 av1_k_means(data, centroids, color_map, rows * cols, n, 2, max_itr); 6181 optimize_palette_colors(color_cache, n_cache, n, 2, centroids); 6182 // Sort the U channel colors in ascending order. 6183 for (i = 0; i < 2 * (n - 1); i += 2) { 6184 int min_idx = i; 6185 int min_val = centroids[i]; 6186 for (j = i + 2; j < 2 * n; j += 2) 6187 if (centroids[j] < min_val) min_val = centroids[j], min_idx = j; 6188 if (min_idx != i) { 6189 int temp_u = centroids[i], temp_v = centroids[i + 1]; 6190 centroids[i] = centroids[min_idx]; 6191 centroids[i + 1] = centroids[min_idx + 1]; 6192 centroids[min_idx] = temp_u, centroids[min_idx + 1] = temp_v; 6193 } 6194 } 6195 av1_calc_indices(data, centroids, color_map, rows * cols, n, 2); 6196 extend_palette_color_map(color_map, cols, rows, plane_block_width, 6197 plane_block_height); 6198 pmi->palette_size[1] = n; 6199 for (i = 1; i < 3; ++i) { 6200 for (j = 0; j < n; ++j) { 6201 if (seq_params->use_highbitdepth) 6202 pmi->palette_colors[i * PALETTE_MAX_SIZE + j] = clip_pixel_highbd( 6203 (int)centroids[j * 2 + i - 1], seq_params->bit_depth); 6204 else 6205 pmi->palette_colors[i * PALETTE_MAX_SIZE + j] = 6206 clip_pixel((int)centroids[j * 2 + i - 1]); 6207 } 6208 } 6209 6210 super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd); 6211 if (tokenonly_rd_stats.rate == INT_MAX) continue; 6212 this_rate = tokenonly_rd_stats.rate + 6213 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, dc_mode_cost); 6214 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); 6215 if (this_rd < *best_rd) { 6216 *best_rd = this_rd; 6217 *best_mbmi = *mbmi; 6218 memcpy(best_palette_color_map, color_map, 6219 plane_block_width * plane_block_height * 6220 sizeof(best_palette_color_map[0])); 6221 *rate = this_rate; 6222 *distortion = tokenonly_rd_stats.dist; 6223 *rate_tokenonly = tokenonly_rd_stats.rate; 6224 *skippable = tokenonly_rd_stats.skip; 6225 } 6226 } 6227 } 6228 if (best_mbmi->palette_mode_info.palette_size[1] > 0) { 6229 memcpy(color_map, best_palette_color_map, 6230 plane_block_width * plane_block_height * 6231 sizeof(best_palette_color_map[0])); 6232 } 6233 } 6234 6235 // Run RD calculation with given chroma intra prediction angle., and return 6236 // the RD cost. Update the best mode info. if the RD cost is the best so far. 6237 static int64_t pick_intra_angle_routine_sbuv( 6238 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, 6239 int rate_overhead, int64_t best_rd_in, int *rate, RD_STATS *rd_stats, 6240 int *best_angle_delta, int64_t *best_rd) { 6241 MB_MODE_INFO *mbmi = x->e_mbd.mi[0]; 6242 assert(!is_inter_block(mbmi)); 6243 int this_rate; 6244 int64_t this_rd; 6245 RD_STATS tokenonly_rd_stats; 6246 6247 if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in)) 6248 return INT64_MAX; 6249 this_rate = tokenonly_rd_stats.rate + 6250 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, rate_overhead); 6251 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); 6252 if (this_rd < *best_rd) { 6253 *best_rd = this_rd; 6254 *best_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV]; 6255 *rate = this_rate; 6256 rd_stats->rate = tokenonly_rd_stats.rate; 6257 rd_stats->dist = tokenonly_rd_stats.dist; 6258 rd_stats->skip = tokenonly_rd_stats.skip; 6259 } 6260 return this_rd; 6261 } 6262 6263 // With given chroma directional intra prediction mode, pick the best angle 6264 // delta. Return true if a RD cost that is smaller than the input one is found. 6265 static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, 6266 BLOCK_SIZE bsize, int rate_overhead, 6267 int64_t best_rd, int *rate, 6268 RD_STATS *rd_stats) { 6269 MACROBLOCKD *const xd = &x->e_mbd; 6270 MB_MODE_INFO *mbmi = xd->mi[0]; 6271 assert(!is_inter_block(mbmi)); 6272 int i, angle_delta, best_angle_delta = 0; 6273 int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)]; 6274 6275 rd_stats->rate = INT_MAX; 6276 rd_stats->skip = 0; 6277 rd_stats->dist = INT64_MAX; 6278 for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX; 6279 6280 for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) { 6281 for (i = 0; i < 2; ++i) { 6282 best_rd_in = (best_rd == INT64_MAX) 6283 ? INT64_MAX 6284 : (best_rd + (best_rd >> ((angle_delta == 0) ? 3 : 5))); 6285 mbmi->angle_delta[PLANE_TYPE_UV] = (1 - 2 * i) * angle_delta; 6286 this_rd = pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead, 6287 best_rd_in, rate, rd_stats, 6288 &best_angle_delta, &best_rd); 6289 rd_cost[2 * angle_delta + i] = this_rd; 6290 if (angle_delta == 0) { 6291 if (this_rd == INT64_MAX) return 0; 6292 rd_cost[1] = this_rd; 6293 break; 6294 } 6295 } 6296 } 6297 6298 assert(best_rd != INT64_MAX); 6299 for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) { 6300 int64_t rd_thresh; 6301 for (i = 0; i < 2; ++i) { 6302 int skip_search = 0; 6303 rd_thresh = best_rd + (best_rd >> 5); 6304 if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh && 6305 rd_cost[2 * (angle_delta - 1) + i] > rd_thresh) 6306 skip_search = 1; 6307 if (!skip_search) { 6308 mbmi->angle_delta[PLANE_TYPE_UV] = (1 - 2 * i) * angle_delta; 6309 pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead, best_rd, 6310 rate, rd_stats, &best_angle_delta, 6311 &best_rd); 6312 } 6313 } 6314 } 6315 6316 mbmi->angle_delta[PLANE_TYPE_UV] = best_angle_delta; 6317 return rd_stats->rate != INT_MAX; 6318 } 6319 6320 #define PLANE_SIGN_TO_JOINT_SIGN(plane, a, b) \ 6321 (plane == CFL_PRED_U ? a * CFL_SIGNS + b - 1 : b * CFL_SIGNS + a - 1) 6322 static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi, 6323 TX_SIZE tx_size, int64_t best_rd) { 6324 MACROBLOCKD *const xd = &x->e_mbd; 6325 MB_MODE_INFO *const mbmi = xd->mi[0]; 6326 6327 const BLOCK_SIZE bsize = mbmi->sb_type; 6328 #if CONFIG_DEBUG 6329 assert(is_cfl_allowed(xd) && cpi->oxcf.enable_cfl_intra); 6330 const int ssx = xd->plane[AOM_PLANE_U].subsampling_x; 6331 const int ssy = xd->plane[AOM_PLANE_U].subsampling_y; 6332 const BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, ssx, ssy); 6333 (void)plane_bsize; 6334 assert(plane_bsize < BLOCK_SIZES_ALL); 6335 if (!xd->lossless[mbmi->segment_id]) { 6336 assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]); 6337 assert(block_size_high[plane_bsize] == tx_size_high[tx_size]); 6338 } 6339 #endif // CONFIG_DEBUG 6340 6341 xd->cfl.use_dc_pred_cache = 1; 6342 const int64_t mode_rd = 6343 RDCOST(x->rdmult, 6344 x->intra_uv_mode_cost[CFL_ALLOWED][mbmi->mode][UV_CFL_PRED], 0); 6345 int64_t best_rd_uv[CFL_JOINT_SIGNS][CFL_PRED_PLANES]; 6346 int best_c[CFL_JOINT_SIGNS][CFL_PRED_PLANES]; 6347 #if CONFIG_DEBUG 6348 int best_rate_uv[CFL_JOINT_SIGNS][CFL_PRED_PLANES]; 6349 #endif // CONFIG_DEBUG 6350 6351 for (int plane = 0; plane < CFL_PRED_PLANES; plane++) { 6352 RD_STATS rd_stats; 6353 av1_init_rd_stats(&rd_stats); 6354 for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) { 6355 best_rd_uv[joint_sign][plane] = INT64_MAX; 6356 best_c[joint_sign][plane] = 0; 6357 } 6358 // Collect RD stats for an alpha value of zero in this plane. 6359 // Skip i == CFL_SIGN_ZERO as (0, 0) is invalid. 6360 for (int i = CFL_SIGN_NEG; i < CFL_SIGNS; i++) { 6361 const int joint_sign = PLANE_SIGN_TO_JOINT_SIGN(plane, CFL_SIGN_ZERO, i); 6362 if (i == CFL_SIGN_NEG) { 6363 mbmi->cfl_alpha_idx = 0; 6364 mbmi->cfl_alpha_signs = joint_sign; 6365 txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, 0, plane + 1, bsize, 6366 tx_size, cpi->sf.use_fast_coef_costing, FTXS_NONE, 0); 6367 if (rd_stats.rate == INT_MAX) break; 6368 } 6369 const int alpha_rate = x->cfl_cost[joint_sign][plane][0]; 6370 best_rd_uv[joint_sign][plane] = 6371 RDCOST(x->rdmult, rd_stats.rate + alpha_rate, rd_stats.dist); 6372 #if CONFIG_DEBUG 6373 best_rate_uv[joint_sign][plane] = rd_stats.rate; 6374 #endif // CONFIG_DEBUG 6375 } 6376 } 6377 6378 int best_joint_sign = -1; 6379 6380 for (int plane = 0; plane < CFL_PRED_PLANES; plane++) { 6381 for (int pn_sign = CFL_SIGN_NEG; pn_sign < CFL_SIGNS; pn_sign++) { 6382 int progress = 0; 6383 for (int c = 0; c < CFL_ALPHABET_SIZE; c++) { 6384 int flag = 0; 6385 RD_STATS rd_stats; 6386 if (c > 2 && progress < c) break; 6387 av1_init_rd_stats(&rd_stats); 6388 for (int i = 0; i < CFL_SIGNS; i++) { 6389 const int joint_sign = PLANE_SIGN_TO_JOINT_SIGN(plane, pn_sign, i); 6390 if (i == 0) { 6391 mbmi->cfl_alpha_idx = (c << CFL_ALPHABET_SIZE_LOG2) + c; 6392 mbmi->cfl_alpha_signs = joint_sign; 6393 txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, 0, plane + 1, bsize, 6394 tx_size, cpi->sf.use_fast_coef_costing, FTXS_NONE, 6395 0); 6396 if (rd_stats.rate == INT_MAX) break; 6397 } 6398 const int alpha_rate = x->cfl_cost[joint_sign][plane][c]; 6399 int64_t this_rd = 6400 RDCOST(x->rdmult, rd_stats.rate + alpha_rate, rd_stats.dist); 6401 if (this_rd >= best_rd_uv[joint_sign][plane]) continue; 6402 best_rd_uv[joint_sign][plane] = this_rd; 6403 best_c[joint_sign][plane] = c; 6404 #if CONFIG_DEBUG 6405 best_rate_uv[joint_sign][plane] = rd_stats.rate; 6406 #endif // CONFIG_DEBUG 6407 flag = 2; 6408 if (best_rd_uv[joint_sign][!plane] == INT64_MAX) continue; 6409 this_rd += mode_rd + best_rd_uv[joint_sign][!plane]; 6410 if (this_rd >= best_rd) continue; 6411 best_rd = this_rd; 6412 best_joint_sign = joint_sign; 6413 } 6414 progress += flag; 6415 } 6416 } 6417 } 6418 6419 int best_rate_overhead = INT_MAX; 6420 int ind = 0; 6421 if (best_joint_sign >= 0) { 6422 const int u = best_c[best_joint_sign][CFL_PRED_U]; 6423 const int v = best_c[best_joint_sign][CFL_PRED_V]; 6424 ind = (u << CFL_ALPHABET_SIZE_LOG2) + v; 6425 best_rate_overhead = x->cfl_cost[best_joint_sign][CFL_PRED_U][u] + 6426 x->cfl_cost[best_joint_sign][CFL_PRED_V][v]; 6427 #if CONFIG_DEBUG 6428 xd->cfl.rate = x->intra_uv_mode_cost[CFL_ALLOWED][mbmi->mode][UV_CFL_PRED] + 6429 best_rate_overhead + 6430 best_rate_uv[best_joint_sign][CFL_PRED_U] + 6431 best_rate_uv[best_joint_sign][CFL_PRED_V]; 6432 #endif // CONFIG_DEBUG 6433 } else { 6434 best_joint_sign = 0; 6435 } 6436 6437 mbmi->cfl_alpha_idx = ind; 6438 mbmi->cfl_alpha_signs = best_joint_sign; 6439 xd->cfl.use_dc_pred_cache = 0; 6440 xd->cfl.dc_pred_is_cached[0] = 0; 6441 xd->cfl.dc_pred_is_cached[1] = 0; 6442 return best_rate_overhead; 6443 } 6444 6445 static void init_sbuv_mode(MB_MODE_INFO *const mbmi) { 6446 mbmi->uv_mode = UV_DC_PRED; 6447 mbmi->palette_mode_info.palette_size[1] = 0; 6448 } 6449 6450 static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, 6451 int *rate, int *rate_tokenonly, 6452 int64_t *distortion, int *skippable, 6453 BLOCK_SIZE bsize, TX_SIZE max_tx_size) { 6454 MACROBLOCKD *xd = &x->e_mbd; 6455 MB_MODE_INFO *mbmi = xd->mi[0]; 6456 assert(!is_inter_block(mbmi)); 6457 MB_MODE_INFO best_mbmi = *mbmi; 6458 int64_t best_rd = INT64_MAX, this_rd; 6459 6460 for (int mode_idx = 0; mode_idx < UV_INTRA_MODES; ++mode_idx) { 6461 int this_rate; 6462 RD_STATS tokenonly_rd_stats; 6463 UV_PREDICTION_MODE mode = uv_rd_search_mode_order[mode_idx]; 6464 const int is_directional_mode = av1_is_directional_mode(get_uv_mode(mode)); 6465 if (!(cpi->sf.intra_uv_mode_mask[txsize_sqr_up_map[max_tx_size]] & 6466 (1 << mode))) 6467 continue; 6468 if (!cpi->oxcf.enable_smooth_intra && mode >= UV_SMOOTH_PRED && 6469 mode <= UV_SMOOTH_H_PRED) 6470 continue; 6471 6472 if (!cpi->oxcf.enable_paeth_intra && mode == UV_PAETH_PRED) continue; 6473 6474 mbmi->uv_mode = mode; 6475 int cfl_alpha_rate = 0; 6476 if (mode == UV_CFL_PRED) { 6477 if (!is_cfl_allowed(xd) || !cpi->oxcf.enable_cfl_intra) continue; 6478 assert(!is_directional_mode); 6479 const TX_SIZE uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd); 6480 cfl_alpha_rate = cfl_rd_pick_alpha(x, cpi, uv_tx_size, best_rd); 6481 if (cfl_alpha_rate == INT_MAX) continue; 6482 } 6483 mbmi->angle_delta[PLANE_TYPE_UV] = 0; 6484 if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type) && 6485 cpi->oxcf.enable_angle_delta) { 6486 const int rate_overhead = 6487 x->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mode]; 6488 if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd, 6489 &this_rate, &tokenonly_rd_stats)) 6490 continue; 6491 } else { 6492 if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) { 6493 continue; 6494 } 6495 } 6496 const int mode_cost = 6497 x->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mode] + 6498 cfl_alpha_rate; 6499 this_rate = tokenonly_rd_stats.rate + 6500 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, mode_cost); 6501 if (mode == UV_CFL_PRED) { 6502 assert(is_cfl_allowed(xd) && cpi->oxcf.enable_cfl_intra); 6503 #if CONFIG_DEBUG 6504 if (!xd->lossless[mbmi->segment_id]) 6505 assert(xd->cfl.rate == tokenonly_rd_stats.rate + mode_cost); 6506 #endif // CONFIG_DEBUG 6507 } 6508 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); 6509 6510 if (this_rd < best_rd) { 6511 best_mbmi = *mbmi; 6512 best_rd = this_rd; 6513 *rate = this_rate; 6514 *rate_tokenonly = tokenonly_rd_stats.rate; 6515 *distortion = tokenonly_rd_stats.dist; 6516 *skippable = tokenonly_rd_stats.skip; 6517 } 6518 } 6519 6520 const int try_palette = 6521 cpi->oxcf.enable_palette && 6522 av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type); 6523 if (try_palette) { 6524 uint8_t *best_palette_color_map = x->palette_buffer->best_palette_color_map; 6525 rd_pick_palette_intra_sbuv( 6526 cpi, x, 6527 x->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][UV_DC_PRED], 6528 best_palette_color_map, &best_mbmi, &best_rd, rate, rate_tokenonly, 6529 distortion, skippable); 6530 } 6531 6532 *mbmi = best_mbmi; 6533 // Make sure we actually chose a mode 6534 assert(best_rd < INT64_MAX); 6535 return best_rd; 6536 } 6537 6538 static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x, 6539 BLOCK_SIZE bsize, TX_SIZE max_tx_size, 6540 int *rate_uv, int *rate_uv_tokenonly, 6541 int64_t *dist_uv, int *skip_uv, 6542 UV_PREDICTION_MODE *mode_uv) { 6543 const AV1_COMMON *const cm = &cpi->common; 6544 MACROBLOCKD *xd = &x->e_mbd; 6545 MB_MODE_INFO *mbmi = xd->mi[0]; 6546 const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2); 6547 const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2); 6548 // Use an estimated rd for uv_intra based on DC_PRED if the 6549 // appropriate speed flag is set. 6550 init_sbuv_mode(mbmi); 6551 if (x->skip_chroma_rd) { 6552 *rate_uv = 0; 6553 *rate_uv_tokenonly = 0; 6554 *dist_uv = 0; 6555 *skip_uv = 1; 6556 *mode_uv = UV_DC_PRED; 6557 return; 6558 } 6559 xd->cfl.is_chroma_reference = 6560 is_chroma_reference(mi_row, mi_col, bsize, cm->seq_params.subsampling_x, 6561 cm->seq_params.subsampling_y); 6562 bsize = scale_chroma_bsize(bsize, xd->plane[AOM_PLANE_U].subsampling_x, 6563 xd->plane[AOM_PLANE_U].subsampling_y); 6564 // Only store reconstructed luma when there's chroma RDO. When there's no 6565 // chroma RDO, the reconstructed luma will be stored in encode_superblock(). 6566 xd->cfl.store_y = store_cfl_required_rdo(cm, x); 6567 if (xd->cfl.store_y) { 6568 // Restore reconstructed luma values. 6569 av1_encode_intra_block_plane(cpi, x, mbmi->sb_type, AOM_PLANE_Y, 6570 cpi->optimize_seg_arr[mbmi->segment_id], 6571 mi_row, mi_col); 6572 xd->cfl.store_y = 0; 6573 } 6574 rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, 6575 bsize, max_tx_size); 6576 *mode_uv = mbmi->uv_mode; 6577 } 6578 6579 static int cost_mv_ref(const MACROBLOCK *const x, PREDICTION_MODE mode, 6580 int16_t mode_context) { 6581 if (is_inter_compound_mode(mode)) { 6582 return x 6583 ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)]; 6584 } 6585 6586 int mode_cost = 0; 6587 int16_t mode_ctx = mode_context & NEWMV_CTX_MASK; 6588 6589 assert(is_inter_mode(mode)); 6590 6591 if (mode == NEWMV) { 6592 mode_cost = x->newmv_mode_cost[mode_ctx][0]; 6593 return mode_cost; 6594 } else { 6595 mode_cost = x->newmv_mode_cost[mode_ctx][1]; 6596 mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK; 6597 6598 if (mode == GLOBALMV) { 6599 mode_cost += x->zeromv_mode_cost[mode_ctx][0]; 6600 return mode_cost; 6601 } else { 6602 mode_cost += x->zeromv_mode_cost[mode_ctx][1]; 6603 mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK; 6604 mode_cost += x->refmv_mode_cost[mode_ctx][mode != NEARESTMV]; 6605 return mode_cost; 6606 } 6607 } 6608 } 6609 6610 static int get_interinter_compound_mask_rate(const MACROBLOCK *const x, 6611 const MB_MODE_INFO *const mbmi) { 6612 switch (mbmi->interinter_comp.type) { 6613 case COMPOUND_AVERAGE: return 0; 6614 case COMPOUND_WEDGE: 6615 return get_interinter_wedge_bits(mbmi->sb_type) > 0 6616 ? av1_cost_literal(1) + 6617 x->wedge_idx_cost[mbmi->sb_type] 6618 [mbmi->interinter_comp.wedge_index] 6619 : 0; 6620 case COMPOUND_DIFFWTD: return av1_cost_literal(1); 6621 default: assert(0); return 0; 6622 } 6623 } 6624 6625 static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) { 6626 return (mv->row >> 3) < mv_limits->row_min || 6627 (mv->row >> 3) > mv_limits->row_max || 6628 (mv->col >> 3) < mv_limits->col_min || 6629 (mv->col >> 3) > mv_limits->col_max; 6630 } 6631 6632 static INLINE PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode, 6633 int ref_idx, int is_comp_pred) { 6634 PREDICTION_MODE single_mode; 6635 if (is_comp_pred) { 6636 single_mode = 6637 ref_idx ? compound_ref1_mode(this_mode) : compound_ref0_mode(this_mode); 6638 } else { 6639 single_mode = this_mode; 6640 } 6641 return single_mode; 6642 } 6643 6644 static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, 6645 BLOCK_SIZE bsize, int_mv *cur_mv, int mi_row, 6646 int mi_col, int_mv *ref_mv_sub8x8[2], 6647 const uint8_t *mask, int mask_stride, 6648 int *rate_mv, const int block) { 6649 const AV1_COMMON *const cm = &cpi->common; 6650 const int num_planes = av1_num_planes(cm); 6651 const int pw = block_size_wide[bsize]; 6652 const int ph = block_size_high[bsize]; 6653 const int plane = 0; 6654 MACROBLOCKD *xd = &x->e_mbd; 6655 MB_MODE_INFO *mbmi = xd->mi[0]; 6656 // This function should only ever be called for compound modes 6657 assert(has_second_ref(mbmi)); 6658 const int_mv init_mv[2] = { cur_mv[0], cur_mv[1] }; 6659 const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] }; 6660 int_mv ref_mv[2]; 6661 int ite, ref; 6662 // ic and ir are the 4x4 coordinates of the sub8x8 at index "block" 6663 const int ic = block & 1; 6664 const int ir = (block - ic) >> 1; 6665 struct macroblockd_plane *const pd = &xd->plane[0]; 6666 const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic; 6667 const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir; 6668 6669 ConvolveParams conv_params = get_conv_params(0, plane, xd->bd); 6670 conv_params.use_dist_wtd_comp_avg = 0; 6671 WarpTypesAllowed warp_types[2]; 6672 for (ref = 0; ref < 2; ++ref) { 6673 const WarpedMotionParams *const wm = 6674 &xd->global_motion[xd->mi[0]->ref_frame[ref]]; 6675 const int is_global = is_global_mv_block(xd->mi[0], wm->wmtype); 6676 warp_types[ref].global_warp_allowed = is_global; 6677 warp_types[ref].local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL; 6678 } 6679 6680 // Do joint motion search in compound mode to get more accurate mv. 6681 struct buf_2d backup_yv12[2][MAX_MB_PLANE]; 6682 int last_besterr[2] = { INT_MAX, INT_MAX }; 6683 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = { 6684 av1_get_scaled_ref_frame(cpi, refs[0]), 6685 av1_get_scaled_ref_frame(cpi, refs[1]) 6686 }; 6687 6688 // Prediction buffer from second frame. 6689 DECLARE_ALIGNED(16, uint8_t, second_pred16[MAX_SB_SQUARE * sizeof(uint16_t)]); 6690 uint8_t *second_pred = get_buf_by_bd(xd, second_pred16); 6691 (void)ref_mv_sub8x8; 6692 6693 MV *const best_mv = &x->best_mv.as_mv; 6694 const int search_range = SEARCH_RANGE_8P; 6695 const int sadpb = x->sadperbit16; 6696 // Allow joint search multiple times iteratively for each reference frame 6697 // and break out of the search loop if it couldn't find a better mv. 6698 for (ite = 0; ite < 4; ite++) { 6699 struct buf_2d ref_yv12[2]; 6700 int bestsme = INT_MAX; 6701 MvLimits tmp_mv_limits = x->mv_limits; 6702 int id = ite % 2; // Even iterations search in the first reference frame, 6703 // odd iterations search in the second. The predictor 6704 // found for the 'other' reference frame is factored in. 6705 if (ite >= 2 && cur_mv[!id].as_int == init_mv[!id].as_int) { 6706 if (cur_mv[id].as_int == init_mv[id].as_int) { 6707 break; 6708 } else { 6709 int_mv cur_int_mv, init_int_mv; 6710 cur_int_mv.as_mv.col = cur_mv[id].as_mv.col >> 3; 6711 cur_int_mv.as_mv.row = cur_mv[id].as_mv.row >> 3; 6712 init_int_mv.as_mv.row = init_mv[id].as_mv.row >> 3; 6713 init_int_mv.as_mv.col = init_mv[id].as_mv.col >> 3; 6714 if (cur_int_mv.as_int == init_int_mv.as_int) { 6715 break; 6716 } 6717 } 6718 } 6719 for (ref = 0; ref < 2; ++ref) { 6720 ref_mv[ref] = av1_get_ref_mv(x, ref); 6721 // Swap out the reference frame for a version that's been scaled to 6722 // match the resolution of the current frame, allowing the existing 6723 // motion search code to be used without additional modifications. 6724 if (scaled_ref_frame[ref]) { 6725 int i; 6726 for (i = 0; i < num_planes; i++) 6727 backup_yv12[ref][i] = xd->plane[i].pre[ref]; 6728 av1_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, 6729 NULL, num_planes); 6730 } 6731 } 6732 6733 assert(IMPLIES(scaled_ref_frame[0] != NULL, 6734 cm->width == scaled_ref_frame[0]->y_crop_width && 6735 cm->height == scaled_ref_frame[0]->y_crop_height)); 6736 assert(IMPLIES(scaled_ref_frame[1] != NULL, 6737 cm->width == scaled_ref_frame[1]->y_crop_width && 6738 cm->height == scaled_ref_frame[1]->y_crop_height)); 6739 6740 // Initialize based on (possibly scaled) prediction buffers. 6741 ref_yv12[0] = xd->plane[plane].pre[0]; 6742 ref_yv12[1] = xd->plane[plane].pre[1]; 6743 6744 // Get the prediction block from the 'other' reference frame. 6745 const InterpFilters interp_filters = EIGHTTAP_REGULAR; 6746 6747 // Since we have scaled the reference frames to match the size of the 6748 // current frame we must use a unit scaling factor during mode selection. 6749 av1_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride, 6750 second_pred, pw, &cur_mv[!id].as_mv, 6751 &cm->sf_identity, pw, ph, &conv_params, 6752 interp_filters, &warp_types[!id], p_col, p_row, 6753 plane, !id, MV_PRECISION_Q3, mi_col * MI_SIZE, 6754 mi_row * MI_SIZE, xd, cm->allow_warped_motion); 6755 6756 const int order_idx = id != 0; 6757 av1_dist_wtd_comp_weight_assign( 6758 cm, mbmi, order_idx, &xd->jcp_param.fwd_offset, 6759 &xd->jcp_param.bck_offset, &xd->jcp_param.use_dist_wtd_comp_avg, 1); 6760 6761 // Do full-pixel compound motion search on the current reference frame. 6762 if (id) xd->plane[plane].pre[0] = ref_yv12[id]; 6763 av1_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv); 6764 6765 // Use the mv result from the single mode as mv predictor. 6766 *best_mv = cur_mv[id].as_mv; 6767 6768 best_mv->col >>= 3; 6769 best_mv->row >>= 3; 6770 6771 // Small-range full-pixel motion search. 6772 bestsme = av1_refining_search_8p_c(x, sadpb, search_range, 6773 &cpi->fn_ptr[bsize], mask, mask_stride, 6774 id, &ref_mv[id].as_mv, second_pred); 6775 if (bestsme < INT_MAX) { 6776 if (mask) 6777 bestsme = av1_get_mvpred_mask_var(x, best_mv, &ref_mv[id].as_mv, 6778 second_pred, mask, mask_stride, id, 6779 &cpi->fn_ptr[bsize], 1); 6780 else 6781 bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv[id].as_mv, 6782 second_pred, &cpi->fn_ptr[bsize], 1); 6783 } 6784 6785 x->mv_limits = tmp_mv_limits; 6786 6787 // Restore the pointer to the first (possibly scaled) prediction buffer. 6788 if (id) xd->plane[plane].pre[0] = ref_yv12[0]; 6789 6790 for (ref = 0; ref < 2; ++ref) { 6791 if (scaled_ref_frame[ref]) { 6792 // Swap back the original buffers for subpel motion search. 6793 for (int i = 0; i < num_planes; i++) { 6794 xd->plane[i].pre[ref] = backup_yv12[ref][i]; 6795 } 6796 // Re-initialize based on unscaled prediction buffers. 6797 ref_yv12[ref] = xd->plane[plane].pre[ref]; 6798 } 6799 } 6800 6801 // Do sub-pixel compound motion search on the current reference frame. 6802 if (id) xd->plane[plane].pre[0] = ref_yv12[id]; 6803 6804 if (cpi->common.cur_frame_force_integer_mv) { 6805 x->best_mv.as_mv.row *= 8; 6806 x->best_mv.as_mv.col *= 8; 6807 } 6808 if (bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0) { 6809 int dis; /* TODO: use dis in distortion calculation later. */ 6810 unsigned int sse; 6811 bestsme = cpi->find_fractional_mv_step( 6812 x, cm, mi_row, mi_col, &ref_mv[id].as_mv, 6813 cpi->common.allow_high_precision_mv, x->errorperbit, 6814 &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL, 6815 x->nmv_vec_cost, x->mv_cost_stack, &dis, &sse, second_pred, mask, 6816 mask_stride, id, pw, ph, cpi->sf.use_accurate_subpel_search, 1); 6817 } 6818 6819 // Restore the pointer to the first prediction buffer. 6820 if (id) xd->plane[plane].pre[0] = ref_yv12[0]; 6821 if (bestsme < last_besterr[id]) { 6822 cur_mv[id].as_mv = *best_mv; 6823 last_besterr[id] = bestsme; 6824 } else { 6825 break; 6826 } 6827 } 6828 6829 *rate_mv = 0; 6830 6831 for (ref = 0; ref < 2; ++ref) { 6832 const int_mv curr_ref_mv = av1_get_ref_mv(x, ref); 6833 *rate_mv += 6834 av1_mv_bit_cost(&cur_mv[ref].as_mv, &curr_ref_mv.as_mv, x->nmv_vec_cost, 6835 x->mv_cost_stack, MV_COST_WEIGHT); 6836 } 6837 } 6838 6839 static void estimate_ref_frame_costs( 6840 const AV1_COMMON *cm, const MACROBLOCKD *xd, const MACROBLOCK *x, 6841 int segment_id, unsigned int *ref_costs_single, 6842 unsigned int (*ref_costs_comp)[REF_FRAMES]) { 6843 int seg_ref_active = 6844 segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME); 6845 if (seg_ref_active) { 6846 memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single)); 6847 int ref_frame; 6848 for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) 6849 memset(ref_costs_comp[ref_frame], 0, 6850 REF_FRAMES * sizeof((*ref_costs_comp)[0])); 6851 } else { 6852 int intra_inter_ctx = av1_get_intra_inter_context(xd); 6853 ref_costs_single[INTRA_FRAME] = x->intra_inter_cost[intra_inter_ctx][0]; 6854 unsigned int base_cost = x->intra_inter_cost[intra_inter_ctx][1]; 6855 6856 for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i) 6857 ref_costs_single[i] = base_cost; 6858 6859 const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd); 6860 const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd); 6861 const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd); 6862 const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd); 6863 const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd); 6864 const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd); 6865 6866 // Determine cost of a single ref frame, where frame types are represented 6867 // by a tree: 6868 // Level 0: add cost whether this ref is a forward or backward ref 6869 ref_costs_single[LAST_FRAME] += x->single_ref_cost[ctx_p1][0][0]; 6870 ref_costs_single[LAST2_FRAME] += x->single_ref_cost[ctx_p1][0][0]; 6871 ref_costs_single[LAST3_FRAME] += x->single_ref_cost[ctx_p1][0][0]; 6872 ref_costs_single[GOLDEN_FRAME] += x->single_ref_cost[ctx_p1][0][0]; 6873 ref_costs_single[BWDREF_FRAME] += x->single_ref_cost[ctx_p1][0][1]; 6874 ref_costs_single[ALTREF2_FRAME] += x->single_ref_cost[ctx_p1][0][1]; 6875 ref_costs_single[ALTREF_FRAME] += x->single_ref_cost[ctx_p1][0][1]; 6876 6877 // Level 1: if this ref is forward ref, 6878 // add cost whether it is last/last2 or last3/golden 6879 ref_costs_single[LAST_FRAME] += x->single_ref_cost[ctx_p3][2][0]; 6880 ref_costs_single[LAST2_FRAME] += x->single_ref_cost[ctx_p3][2][0]; 6881 ref_costs_single[LAST3_FRAME] += x->single_ref_cost[ctx_p3][2][1]; 6882 ref_costs_single[GOLDEN_FRAME] += x->single_ref_cost[ctx_p3][2][1]; 6883 6884 // Level 1: if this ref is backward ref 6885 // then add cost whether this ref is altref or backward ref 6886 ref_costs_single[BWDREF_FRAME] += x->single_ref_cost[ctx_p2][1][0]; 6887 ref_costs_single[ALTREF2_FRAME] += x->single_ref_cost[ctx_p2][1][0]; 6888 ref_costs_single[ALTREF_FRAME] += x->single_ref_cost[ctx_p2][1][1]; 6889 6890 // Level 2: further add cost whether this ref is last or last2 6891 ref_costs_single[LAST_FRAME] += x->single_ref_cost[ctx_p4][3][0]; 6892 ref_costs_single[LAST2_FRAME] += x->single_ref_cost[ctx_p4][3][1]; 6893 6894 // Level 2: last3 or golden 6895 ref_costs_single[LAST3_FRAME] += x->single_ref_cost[ctx_p5][4][0]; 6896 ref_costs_single[GOLDEN_FRAME] += x->single_ref_cost[ctx_p5][4][1]; 6897 6898 // Level 2: bwdref or altref2 6899 ref_costs_single[BWDREF_FRAME] += x->single_ref_cost[ctx_p6][5][0]; 6900 ref_costs_single[ALTREF2_FRAME] += x->single_ref_cost[ctx_p6][5][1]; 6901 6902 if (cm->current_frame.reference_mode != SINGLE_REFERENCE) { 6903 // Similar to single ref, determine cost of compound ref frames. 6904 // cost_compound_refs = cost_first_ref + cost_second_ref 6905 const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd); 6906 const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd); 6907 const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd); 6908 const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd); 6909 const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd); 6910 6911 const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd); 6912 unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 }; 6913 6914 ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] = 6915 ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] = 6916 base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][1]; 6917 ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0; 6918 ref_bicomp_costs[ALTREF_FRAME] = 0; 6919 6920 // cost of first ref frame 6921 ref_bicomp_costs[LAST_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][0]; 6922 ref_bicomp_costs[LAST2_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][0]; 6923 ref_bicomp_costs[LAST3_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][1]; 6924 ref_bicomp_costs[GOLDEN_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][1]; 6925 6926 ref_bicomp_costs[LAST_FRAME] += x->comp_ref_cost[ref_comp_ctx_p1][1][0]; 6927 ref_bicomp_costs[LAST2_FRAME] += x->comp_ref_cost[ref_comp_ctx_p1][1][1]; 6928 6929 ref_bicomp_costs[LAST3_FRAME] += x->comp_ref_cost[ref_comp_ctx_p2][2][0]; 6930 ref_bicomp_costs[GOLDEN_FRAME] += x->comp_ref_cost[ref_comp_ctx_p2][2][1]; 6931 6932 // cost of second ref frame 6933 ref_bicomp_costs[BWDREF_FRAME] += 6934 x->comp_bwdref_cost[bwdref_comp_ctx_p][0][0]; 6935 ref_bicomp_costs[ALTREF2_FRAME] += 6936 x->comp_bwdref_cost[bwdref_comp_ctx_p][0][0]; 6937 ref_bicomp_costs[ALTREF_FRAME] += 6938 x->comp_bwdref_cost[bwdref_comp_ctx_p][0][1]; 6939 6940 ref_bicomp_costs[BWDREF_FRAME] += 6941 x->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0]; 6942 ref_bicomp_costs[ALTREF2_FRAME] += 6943 x->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1]; 6944 6945 // cost: if one ref frame is forward ref, the other ref is backward ref 6946 int ref0, ref1; 6947 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) { 6948 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) { 6949 ref_costs_comp[ref0][ref1] = 6950 ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1]; 6951 } 6952 } 6953 6954 // cost: if both ref frames are the same side. 6955 const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd); 6956 const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd); 6957 const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd); 6958 ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 6959 base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] + 6960 x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] + 6961 x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0]; 6962 ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 6963 base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] + 6964 x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] + 6965 x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] + 6966 x->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0]; 6967 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 6968 base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] + 6969 x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] + 6970 x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] + 6971 x->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1]; 6972 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 6973 base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] + 6974 x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1]; 6975 } else { 6976 int ref0, ref1; 6977 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) { 6978 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) 6979 ref_costs_comp[ref0][ref1] = 512; 6980 } 6981 ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512; 6982 ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512; 6983 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512; 6984 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512; 6985 } 6986 } 6987 } 6988 6989 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, 6990 int mode_index, 6991 int64_t comp_pred_diff[REFERENCE_MODES], 6992 int skippable) { 6993 MACROBLOCKD *const xd = &x->e_mbd; 6994 6995 // Take a snapshot of the coding context so it can be 6996 // restored if we decide to encode this way 6997 ctx->skip = x->skip; 6998 ctx->skippable = skippable; 6999 ctx->best_mode_index = mode_index; 7000 ctx->mic = *xd->mi[0]; 7001 ctx->mbmi_ext = *x->mbmi_ext; 7002 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE]; 7003 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE]; 7004 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT]; 7005 } 7006 7007 static void setup_buffer_ref_mvs_inter( 7008 const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame, 7009 BLOCK_SIZE block_size, int mi_row, int mi_col, 7010 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) { 7011 const AV1_COMMON *cm = &cpi->common; 7012 const int num_planes = av1_num_planes(cm); 7013 const YV12_BUFFER_CONFIG *scaled_ref_frame = 7014 av1_get_scaled_ref_frame(cpi, ref_frame); 7015 MACROBLOCKD *const xd = &x->e_mbd; 7016 MB_MODE_INFO *const mbmi = xd->mi[0]; 7017 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; 7018 const struct scale_factors *const sf = 7019 get_ref_scale_factors_const(cm, ref_frame); 7020 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame); 7021 assert(yv12 != NULL); 7022 7023 if (scaled_ref_frame) { 7024 // Setup pred block based on scaled reference, because av1_mv_pred() doesn't 7025 // support scaling. 7026 av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, mi_row, 7027 mi_col, NULL, NULL, num_planes); 7028 } else { 7029 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf, 7030 num_planes); 7031 } 7032 7033 // Gets an initial list of candidate vectors from neighbours and orders them 7034 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count, 7035 mbmi_ext->ref_mv_stack, NULL, mbmi_ext->global_mvs, mi_row, 7036 mi_col, mbmi_ext->mode_context); 7037 7038 // Further refinement that is encode side only to test the top few candidates 7039 // in full and choose the best as the center point for subsequent searches. 7040 // The current implementation doesn't support scaling. 7041 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride, 7042 ref_frame, block_size); 7043 7044 // Go back to unscaled reference. 7045 if (scaled_ref_frame) { 7046 // We had temporarily setup pred block based on scaled reference above. Go 7047 // back to unscaled reference now, for subsequent use. 7048 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf, 7049 num_planes); 7050 } 7051 } 7052 7053 static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x, 7054 BLOCK_SIZE bsize, int mi_row, int mi_col, 7055 int ref_idx, int *rate_mv) { 7056 MACROBLOCKD *xd = &x->e_mbd; 7057 const AV1_COMMON *cm = &cpi->common; 7058 const int num_planes = av1_num_planes(cm); 7059 MB_MODE_INFO *mbmi = xd->mi[0]; 7060 struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } }; 7061 int bestsme = INT_MAX; 7062 int step_param; 7063 int sadpb = x->sadperbit16; 7064 MV mvp_full; 7065 int ref = mbmi->ref_frame[ref_idx]; 7066 MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv; 7067 7068 MvLimits tmp_mv_limits = x->mv_limits; 7069 int cost_list[5]; 7070 7071 const YV12_BUFFER_CONFIG *scaled_ref_frame = 7072 av1_get_scaled_ref_frame(cpi, ref); 7073 7074 if (scaled_ref_frame) { 7075 // Swap out the reference frame for a version that's been scaled to 7076 // match the resolution of the current frame, allowing the existing 7077 // full-pixel motion search code to be used without additional 7078 // modifications. 7079 for (int i = 0; i < num_planes; i++) { 7080 backup_yv12[i] = xd->plane[i].pre[ref_idx]; 7081 } 7082 av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL, 7083 num_planes); 7084 } 7085 7086 // Work out the size of the first step in the mv step search. 7087 // 0 here is maximum length first step. 1 is AOMMAX >> 1 etc. 7088 if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) { 7089 // Take the weighted average of the step_params based on the last frame's 7090 // max mv magnitude and that based on the best ref mvs of the current 7091 // block for the given reference. 7092 step_param = 7093 (av1_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) / 7094 2; 7095 } else { 7096 step_param = cpi->mv_step_param; 7097 } 7098 7099 if (cpi->sf.adaptive_motion_search && bsize < cm->seq_params.sb_size) { 7100 int boffset = 7101 2 * (mi_size_wide_log2[cm->seq_params.sb_size] - 7102 AOMMIN(mi_size_high_log2[bsize], mi_size_wide_log2[bsize])); 7103 step_param = AOMMAX(step_param, boffset); 7104 } 7105 7106 if (cpi->sf.adaptive_motion_search) { 7107 int bwl = mi_size_wide_log2[bsize]; 7108 int bhl = mi_size_high_log2[bsize]; 7109 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4); 7110 7111 if (tlevel < 5) { 7112 step_param += 2; 7113 step_param = AOMMIN(step_param, MAX_MVSEARCH_STEPS - 1); 7114 } 7115 7116 // prev_mv_sad is not setup for dynamically scaled frames. 7117 if (cpi->oxcf.resize_mode != RESIZE_RANDOM) { 7118 int i; 7119 for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) { 7120 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) { 7121 x->pred_mv[ref].row = 0; 7122 x->pred_mv[ref].col = 0; 7123 x->best_mv.as_int = INVALID_MV; 7124 7125 if (scaled_ref_frame) { 7126 // Swap back the original buffers before returning. 7127 for (int j = 0; j < num_planes; ++j) 7128 xd->plane[j].pre[ref_idx] = backup_yv12[j]; 7129 } 7130 return; 7131 } 7132 } 7133 } 7134 } 7135 7136 // Note: MV limits are modified here. Always restore the original values 7137 // after full-pixel motion search. 7138 av1_set_mv_search_range(&x->mv_limits, &ref_mv); 7139 7140 if (mbmi->motion_mode != SIMPLE_TRANSLATION) 7141 mvp_full = mbmi->mv[0].as_mv; 7142 else 7143 mvp_full = ref_mv; 7144 7145 mvp_full.col >>= 3; 7146 mvp_full.row >>= 3; 7147 7148 x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV; 7149 7150 switch (mbmi->motion_mode) { 7151 case SIMPLE_TRANSLATION: 7152 bestsme = av1_full_pixel_search( 7153 cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, 0, 7154 sadpb, cond_cost_list(cpi, cost_list), &ref_mv, INT_MAX, 1, 7155 (MI_SIZE * mi_col), (MI_SIZE * mi_row), 0, &cpi->ss_cfg[SS_CFG_SRC]); 7156 break; 7157 case OBMC_CAUSAL: 7158 bestsme = av1_obmc_full_pixel_search( 7159 cpi, x, &mvp_full, step_param, sadpb, 7160 MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], &ref_mv, 7161 &(x->best_mv.as_mv), 0, &cpi->ss_cfg[SS_CFG_SRC]); 7162 break; 7163 default: assert(0 && "Invalid motion mode!\n"); 7164 } 7165 7166 if (scaled_ref_frame) { 7167 // Swap back the original buffers for subpel motion search. 7168 for (int i = 0; i < num_planes; i++) { 7169 xd->plane[i].pre[ref_idx] = backup_yv12[i]; 7170 } 7171 } 7172 7173 x->mv_limits = tmp_mv_limits; 7174 7175 if (cpi->common.cur_frame_force_integer_mv) { 7176 x->best_mv.as_mv.row *= 8; 7177 x->best_mv.as_mv.col *= 8; 7178 } 7179 const int use_fractional_mv = 7180 bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0; 7181 if (use_fractional_mv) { 7182 int dis; /* TODO: use dis in distortion calculation later. */ 7183 switch (mbmi->motion_mode) { 7184 case SIMPLE_TRANSLATION: 7185 if (cpi->sf.use_accurate_subpel_search) { 7186 int best_mv_var; 7187 const int try_second = x->second_best_mv.as_int != INVALID_MV && 7188 x->second_best_mv.as_int != x->best_mv.as_int; 7189 const int pw = block_size_wide[bsize]; 7190 const int ph = block_size_high[bsize]; 7191 best_mv_var = cpi->find_fractional_mv_step( 7192 x, cm, mi_row, mi_col, &ref_mv, cm->allow_high_precision_mv, 7193 x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, 7194 cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list), 7195 x->nmv_vec_cost, x->mv_cost_stack, &dis, &x->pred_sse[ref], NULL, 7196 NULL, 0, 0, pw, ph, cpi->sf.use_accurate_subpel_search, 1); 7197 7198 if (try_second) { 7199 const int minc = 7200 AOMMAX(x->mv_limits.col_min * 8, ref_mv.col - MV_MAX); 7201 const int maxc = 7202 AOMMIN(x->mv_limits.col_max * 8, ref_mv.col + MV_MAX); 7203 const int minr = 7204 AOMMAX(x->mv_limits.row_min * 8, ref_mv.row - MV_MAX); 7205 const int maxr = 7206 AOMMIN(x->mv_limits.row_max * 8, ref_mv.row + MV_MAX); 7207 int this_var; 7208 MV best_mv = x->best_mv.as_mv; 7209 7210 x->best_mv = x->second_best_mv; 7211 if (x->best_mv.as_mv.row * 8 <= maxr && 7212 x->best_mv.as_mv.row * 8 >= minr && 7213 x->best_mv.as_mv.col * 8 <= maxc && 7214 x->best_mv.as_mv.col * 8 >= minc) { 7215 this_var = cpi->find_fractional_mv_step( 7216 x, cm, mi_row, mi_col, &ref_mv, cm->allow_high_precision_mv, 7217 x->errorperbit, &cpi->fn_ptr[bsize], 7218 cpi->sf.mv.subpel_force_stop, 7219 cpi->sf.mv.subpel_iters_per_step, 7220 cond_cost_list(cpi, cost_list), x->nmv_vec_cost, 7221 x->mv_cost_stack, &dis, &x->pred_sse[ref], NULL, NULL, 0, 0, 7222 pw, ph, cpi->sf.use_accurate_subpel_search, 0); 7223 if (this_var < best_mv_var) best_mv = x->best_mv.as_mv; 7224 x->best_mv.as_mv = best_mv; 7225 } 7226 } 7227 } else { 7228 cpi->find_fractional_mv_step( 7229 x, cm, mi_row, mi_col, &ref_mv, cm->allow_high_precision_mv, 7230 x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, 7231 cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list), 7232 x->nmv_vec_cost, x->mv_cost_stack, &dis, &x->pred_sse[ref], NULL, 7233 NULL, 0, 0, 0, 0, 0, 1); 7234 } 7235 break; 7236 case OBMC_CAUSAL: 7237 av1_find_best_obmc_sub_pixel_tree_up( 7238 x, cm, mi_row, mi_col, &x->best_mv.as_mv, &ref_mv, 7239 cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], 7240 cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step, 7241 x->nmv_vec_cost, x->mv_cost_stack, &dis, &x->pred_sse[ref], 0, 7242 cpi->sf.use_accurate_subpel_search); 7243 break; 7244 default: assert(0 && "Invalid motion mode!\n"); 7245 } 7246 } 7247 *rate_mv = av1_mv_bit_cost(&x->best_mv.as_mv, &ref_mv, x->nmv_vec_cost, 7248 x->mv_cost_stack, MV_COST_WEIGHT); 7249 7250 if (cpi->sf.adaptive_motion_search && mbmi->motion_mode == SIMPLE_TRANSLATION) 7251 x->pred_mv[ref] = x->best_mv.as_mv; 7252 } 7253 7254 static INLINE void restore_dst_buf(MACROBLOCKD *xd, const BUFFER_SET dst, 7255 const int num_planes) { 7256 for (int i = 0; i < num_planes; i++) { 7257 xd->plane[i].dst.buf = dst.plane[i]; 7258 xd->plane[i].dst.stride = dst.stride[i]; 7259 } 7260 } 7261 7262 static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x, 7263 BLOCK_SIZE bsize, const MV *other_mv, 7264 int mi_row, int mi_col, const int block, 7265 int ref_idx, uint8_t *second_pred) { 7266 const AV1_COMMON *const cm = &cpi->common; 7267 const int pw = block_size_wide[bsize]; 7268 const int ph = block_size_high[bsize]; 7269 MACROBLOCKD *xd = &x->e_mbd; 7270 MB_MODE_INFO *mbmi = xd->mi[0]; 7271 const int other_ref = mbmi->ref_frame[!ref_idx]; 7272 struct macroblockd_plane *const pd = &xd->plane[0]; 7273 // ic and ir are the 4x4 coordinates of the sub8x8 at index "block" 7274 const int ic = block & 1; 7275 const int ir = (block - ic) >> 1; 7276 const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic; 7277 const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir; 7278 const WarpedMotionParams *const wm = &xd->global_motion[other_ref]; 7279 int is_global = is_global_mv_block(xd->mi[0], wm->wmtype); 7280 7281 // This function should only ever be called for compound modes 7282 assert(has_second_ref(mbmi)); 7283 7284 const int plane = 0; 7285 struct buf_2d ref_yv12 = xd->plane[plane].pre[!ref_idx]; 7286 7287 struct scale_factors sf; 7288 av1_setup_scale_factors_for_frame(&sf, ref_yv12.width, ref_yv12.height, 7289 cm->width, cm->height); 7290 7291 ConvolveParams conv_params = get_conv_params(0, plane, xd->bd); 7292 WarpTypesAllowed warp_types; 7293 warp_types.global_warp_allowed = is_global; 7294 warp_types.local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL; 7295 7296 // Get the prediction block from the 'other' reference frame. 7297 av1_build_inter_predictor(ref_yv12.buf, ref_yv12.stride, second_pred, pw, 7298 other_mv, &sf, pw, ph, &conv_params, 7299 mbmi->interp_filters, &warp_types, p_col, p_row, 7300 plane, !ref_idx, MV_PRECISION_Q3, mi_col * MI_SIZE, 7301 mi_row * MI_SIZE, xd, cm->allow_warped_motion); 7302 7303 av1_dist_wtd_comp_weight_assign(cm, mbmi, 0, &xd->jcp_param.fwd_offset, 7304 &xd->jcp_param.bck_offset, 7305 &xd->jcp_param.use_dist_wtd_comp_avg, 1); 7306 } 7307 7308 // Search for the best mv for one component of a compound, 7309 // given that the other component is fixed. 7310 static void compound_single_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, 7311 BLOCK_SIZE bsize, MV *this_mv, 7312 int mi_row, int mi_col, 7313 const uint8_t *second_pred, 7314 const uint8_t *mask, int mask_stride, 7315 int *rate_mv, int ref_idx) { 7316 const AV1_COMMON *const cm = &cpi->common; 7317 const int num_planes = av1_num_planes(cm); 7318 const int pw = block_size_wide[bsize]; 7319 const int ph = block_size_high[bsize]; 7320 MACROBLOCKD *xd = &x->e_mbd; 7321 MB_MODE_INFO *mbmi = xd->mi[0]; 7322 const int ref = mbmi->ref_frame[ref_idx]; 7323 const int_mv ref_mv = av1_get_ref_mv(x, ref_idx); 7324 struct macroblockd_plane *const pd = &xd->plane[0]; 7325 7326 struct buf_2d backup_yv12[MAX_MB_PLANE]; 7327 const YV12_BUFFER_CONFIG *const scaled_ref_frame = 7328 av1_get_scaled_ref_frame(cpi, ref); 7329 7330 // Check that this is either an interinter or an interintra block 7331 assert(has_second_ref(mbmi) || (ref_idx == 0 && is_interintra_mode(mbmi))); 7332 7333 // Store the first prediction buffer. 7334 struct buf_2d orig_yv12; 7335 if (ref_idx) { 7336 orig_yv12 = pd->pre[0]; 7337 pd->pre[0] = pd->pre[ref_idx]; 7338 } 7339 7340 if (scaled_ref_frame) { 7341 int i; 7342 // Swap out the reference frame for a version that's been scaled to 7343 // match the resolution of the current frame, allowing the existing 7344 // full-pixel motion search code to be used without additional 7345 // modifications. 7346 for (i = 0; i < num_planes; i++) backup_yv12[i] = xd->plane[i].pre[ref_idx]; 7347 av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL, 7348 num_planes); 7349 } 7350 7351 int bestsme = INT_MAX; 7352 int sadpb = x->sadperbit16; 7353 MV *const best_mv = &x->best_mv.as_mv; 7354 int search_range = SEARCH_RANGE_8P; 7355 7356 MvLimits tmp_mv_limits = x->mv_limits; 7357 7358 // Do compound motion search on the current reference frame. 7359 av1_set_mv_search_range(&x->mv_limits, &ref_mv.as_mv); 7360 7361 // Use the mv result from the single mode as mv predictor. 7362 *best_mv = *this_mv; 7363 7364 best_mv->col >>= 3; 7365 best_mv->row >>= 3; 7366 7367 // Small-range full-pixel motion search. 7368 bestsme = av1_refining_search_8p_c(x, sadpb, search_range, 7369 &cpi->fn_ptr[bsize], mask, mask_stride, 7370 ref_idx, &ref_mv.as_mv, second_pred); 7371 if (bestsme < INT_MAX) { 7372 if (mask) 7373 bestsme = 7374 av1_get_mvpred_mask_var(x, best_mv, &ref_mv.as_mv, second_pred, mask, 7375 mask_stride, ref_idx, &cpi->fn_ptr[bsize], 1); 7376 else 7377 bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv.as_mv, second_pred, 7378 &cpi->fn_ptr[bsize], 1); 7379 } 7380 7381 x->mv_limits = tmp_mv_limits; 7382 7383 if (scaled_ref_frame) { 7384 // Swap back the original buffers for subpel motion search. 7385 for (int i = 0; i < num_planes; i++) { 7386 xd->plane[i].pre[ref_idx] = backup_yv12[i]; 7387 } 7388 } 7389 7390 if (cpi->common.cur_frame_force_integer_mv) { 7391 x->best_mv.as_mv.row *= 8; 7392 x->best_mv.as_mv.col *= 8; 7393 } 7394 const int use_fractional_mv = 7395 bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0; 7396 if (use_fractional_mv) { 7397 int dis; /* TODO: use dis in distortion calculation later. */ 7398 unsigned int sse; 7399 bestsme = cpi->find_fractional_mv_step( 7400 x, cm, mi_row, mi_col, &ref_mv.as_mv, 7401 cpi->common.allow_high_precision_mv, x->errorperbit, 7402 &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL, 7403 x->nmv_vec_cost, x->mv_cost_stack, &dis, &sse, second_pred, mask, 7404 mask_stride, ref_idx, pw, ph, cpi->sf.use_accurate_subpel_search, 1); 7405 } 7406 7407 // Restore the pointer to the first unscaled prediction buffer. 7408 if (ref_idx) pd->pre[0] = orig_yv12; 7409 7410 if (bestsme < INT_MAX) *this_mv = *best_mv; 7411 7412 *rate_mv = 0; 7413 7414 *rate_mv += av1_mv_bit_cost(this_mv, &ref_mv.as_mv, x->nmv_vec_cost, 7415 x->mv_cost_stack, MV_COST_WEIGHT); 7416 } 7417 7418 // Wrapper for compound_single_motion_search, for the common case 7419 // where the second prediction is also an inter mode. 7420 static void compound_single_motion_search_interinter( 7421 const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *cur_mv, 7422 int mi_row, int mi_col, const uint8_t *mask, int mask_stride, int *rate_mv, 7423 const int block, int ref_idx) { 7424 MACROBLOCKD *xd = &x->e_mbd; 7425 // This function should only ever be called for compound modes 7426 assert(has_second_ref(xd->mi[0])); 7427 7428 // Prediction buffer from second frame. 7429 DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]); 7430 uint8_t *second_pred; 7431 if (is_cur_buf_hbd(xd)) 7432 second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16); 7433 else 7434 second_pred = (uint8_t *)second_pred_alloc_16; 7435 7436 MV *this_mv = &cur_mv[ref_idx].as_mv; 7437 const MV *other_mv = &cur_mv[!ref_idx].as_mv; 7438 7439 build_second_inter_pred(cpi, x, bsize, other_mv, mi_row, mi_col, block, 7440 ref_idx, second_pred); 7441 7442 compound_single_motion_search(cpi, x, bsize, this_mv, mi_row, mi_col, 7443 second_pred, mask, mask_stride, rate_mv, 7444 ref_idx); 7445 } 7446 7447 static void do_masked_motion_search_indexed( 7448 const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv, 7449 const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE bsize, 7450 int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv, int which) { 7451 // NOTE: which values: 0 - 0 only, 1 - 1 only, 2 - both 7452 MACROBLOCKD *xd = &x->e_mbd; 7453 MB_MODE_INFO *mbmi = xd->mi[0]; 7454 BLOCK_SIZE sb_type = mbmi->sb_type; 7455 const uint8_t *mask; 7456 const int mask_stride = block_size_wide[bsize]; 7457 7458 mask = av1_get_compound_type_mask(comp_data, sb_type); 7459 7460 tmp_mv[0].as_int = cur_mv[0].as_int; 7461 tmp_mv[1].as_int = cur_mv[1].as_int; 7462 if (which == 0 || which == 1) { 7463 compound_single_motion_search_interinter(cpi, x, bsize, tmp_mv, mi_row, 7464 mi_col, mask, mask_stride, rate_mv, 7465 0, which); 7466 } else if (which == 2) { 7467 joint_motion_search(cpi, x, bsize, tmp_mv, mi_row, mi_col, NULL, mask, 7468 mask_stride, rate_mv, 0); 7469 } 7470 } 7471 7472 #define USE_DISCOUNT_NEWMV_TEST 0 7473 #if USE_DISCOUNT_NEWMV_TEST 7474 // In some situations we want to discount the apparent cost of a new motion 7475 // vector. Where there is a subtle motion field and especially where there is 7476 // low spatial complexity then it can be hard to cover the cost of a new motion 7477 // vector in a single block, even if that motion vector reduces distortion. 7478 // However, once established that vector may be usable through the nearest and 7479 // near mv modes to reduce distortion in subsequent blocks and also improve 7480 // visual quality. 7481 #define NEW_MV_DISCOUNT_FACTOR 8 7482 static INLINE void get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode, 7483 int ref_idx, int ref_mv_idx, 7484 const MV_REFERENCE_FRAME *ref_frame, 7485 const MB_MODE_INFO_EXT *mbmi_ext); 7486 static int discount_newmv_test(const AV1_COMP *const cpi, const MACROBLOCK *x, 7487 PREDICTION_MODE this_mode, int_mv this_mv) { 7488 if (this_mode == NEWMV && this_mv.as_int != 0 && 7489 !cpi->rc.is_src_frame_alt_ref) { 7490 // Only discount new_mv when nearst_mv and all near_mv are zero, and the 7491 // new_mv is not equal to global_mv 7492 const AV1_COMMON *const cm = &cpi->common; 7493 const MACROBLOCKD *const xd = &x->e_mbd; 7494 const MB_MODE_INFO *const mbmi = xd->mi[0]; 7495 const MV_REFERENCE_FRAME tmp_ref_frames[2] = { mbmi->ref_frame[0], 7496 NONE_FRAME }; 7497 const uint8_t ref_frame_type = av1_ref_frame_type(tmp_ref_frames); 7498 int_mv nearest_mv; 7499 get_this_mv(&nearest_mv, NEARESTMV, 0, 0, tmp_ref_frames, x->mbmi_ext); 7500 int ret = nearest_mv.as_int == 0; 7501 for (int ref_mv_idx = 0; 7502 ref_mv_idx < x->mbmi_ext->ref_mv_count[ref_frame_type]; ++ref_mv_idx) { 7503 int_mv near_mv; 7504 get_this_mv(&near_mv, NEARMV, 0, ref_mv_idx, tmp_ref_frames, x->mbmi_ext); 7505 ret &= near_mv.as_int == 0; 7506 } 7507 if (cm->global_motion[tmp_ref_frames[0]].wmtype <= TRANSLATION) { 7508 int_mv global_mv; 7509 get_this_mv(&global_mv, GLOBALMV, 0, 0, tmp_ref_frames, x->mbmi_ext); 7510 ret &= global_mv.as_int != this_mv.as_int; 7511 } 7512 return ret; 7513 } 7514 return 0; 7515 } 7516 #endif 7517 7518 #define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3) 7519 #define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3) 7520 7521 // TODO(jingning): this mv clamping function should be block size dependent. 7522 static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) { 7523 clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN, 7524 xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN, 7525 xd->mb_to_top_edge - LEFT_TOP_MARGIN, 7526 xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); 7527 } 7528 7529 static int estimate_wedge_sign(const AV1_COMP *cpi, const MACROBLOCK *x, 7530 const BLOCK_SIZE bsize, const uint8_t *pred0, 7531 int stride0, const uint8_t *pred1, int stride1) { 7532 static const BLOCK_SIZE split_qtr[BLOCK_SIZES_ALL] = { 7533 // 4X4 7534 BLOCK_INVALID, 7535 // 4X8, 8X4, 8X8 7536 BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X4, 7537 // 8X16, 16X8, 16X16 7538 BLOCK_4X8, BLOCK_8X4, BLOCK_8X8, 7539 // 16X32, 32X16, 32X32 7540 BLOCK_8X16, BLOCK_16X8, BLOCK_16X16, 7541 // 32X64, 64X32, 64X64 7542 BLOCK_16X32, BLOCK_32X16, BLOCK_32X32, 7543 // 64x128, 128x64, 128x128 7544 BLOCK_32X64, BLOCK_64X32, BLOCK_64X64, 7545 // 4X16, 16X4, 8X32 7546 BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X16, 7547 // 32X8, 16X64, 64X16 7548 BLOCK_16X4, BLOCK_8X32, BLOCK_32X8 7549 }; 7550 const struct macroblock_plane *const p = &x->plane[0]; 7551 const uint8_t *src = p->src.buf; 7552 int src_stride = p->src.stride; 7553 const int bw = block_size_wide[bsize]; 7554 const int bh = block_size_high[bsize]; 7555 uint32_t esq[2][4]; 7556 int64_t tl, br; 7557 7558 const BLOCK_SIZE f_index = split_qtr[bsize]; 7559 assert(f_index != BLOCK_INVALID); 7560 7561 if (is_cur_buf_hbd(&x->e_mbd)) { 7562 pred0 = CONVERT_TO_BYTEPTR(pred0); 7563 pred1 = CONVERT_TO_BYTEPTR(pred1); 7564 } 7565 7566 cpi->fn_ptr[f_index].vf(src, src_stride, pred0, stride0, &esq[0][0]); 7567 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred0 + bw / 2, stride0, 7568 &esq[0][1]); 7569 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride, 7570 pred0 + bh / 2 * stride0, stride0, &esq[0][2]); 7571 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride, 7572 pred0 + bh / 2 * stride0 + bw / 2, stride0, 7573 &esq[0][3]); 7574 cpi->fn_ptr[f_index].vf(src, src_stride, pred1, stride1, &esq[1][0]); 7575 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred1 + bw / 2, stride1, 7576 &esq[1][1]); 7577 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride, 7578 pred1 + bh / 2 * stride1, stride0, &esq[1][2]); 7579 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride, 7580 pred1 + bh / 2 * stride1 + bw / 2, stride0, 7581 &esq[1][3]); 7582 7583 tl = ((int64_t)esq[0][0] + esq[0][1] + esq[0][2]) - 7584 ((int64_t)esq[1][0] + esq[1][1] + esq[1][2]); 7585 br = ((int64_t)esq[1][3] + esq[1][1] + esq[1][2]) - 7586 ((int64_t)esq[0][3] + esq[0][1] + esq[0][2]); 7587 return (tl + br > 0); 7588 } 7589 7590 // Choose the best wedge index and sign 7591 static int64_t pick_wedge(const AV1_COMP *const cpi, const MACROBLOCK *const x, 7592 const BLOCK_SIZE bsize, const uint8_t *const p0, 7593 const int16_t *const residual1, 7594 const int16_t *const diff10, 7595 int *const best_wedge_sign, 7596 int *const best_wedge_index) { 7597 const MACROBLOCKD *const xd = &x->e_mbd; 7598 const struct buf_2d *const src = &x->plane[0].src; 7599 const int bw = block_size_wide[bsize]; 7600 const int bh = block_size_high[bsize]; 7601 const int N = bw * bh; 7602 assert(N >= 64); 7603 int rate; 7604 int64_t dist; 7605 int64_t rd, best_rd = INT64_MAX; 7606 int wedge_index; 7607 int wedge_sign; 7608 int wedge_types = (1 << get_wedge_bits_lookup(bsize)); 7609 const uint8_t *mask; 7610 uint64_t sse; 7611 const int hbd = is_cur_buf_hbd(xd); 7612 const int bd_round = hbd ? (xd->bd - 8) * 2 : 0; 7613 7614 DECLARE_ALIGNED(32, int16_t, residual0[MAX_SB_SQUARE]); // src - pred0 7615 if (hbd) { 7616 aom_highbd_subtract_block(bh, bw, residual0, bw, src->buf, src->stride, 7617 CONVERT_TO_BYTEPTR(p0), bw, xd->bd); 7618 } else { 7619 aom_subtract_block(bh, bw, residual0, bw, src->buf, src->stride, p0, bw); 7620 } 7621 7622 int64_t sign_limit = ((int64_t)aom_sum_squares_i16(residual0, N) - 7623 (int64_t)aom_sum_squares_i16(residual1, N)) * 7624 (1 << WEDGE_WEIGHT_BITS) / 2; 7625 int16_t *ds = residual0; 7626 7627 av1_wedge_compute_delta_squares(ds, residual0, residual1, N); 7628 7629 for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) { 7630 mask = av1_get_contiguous_soft_mask(wedge_index, 0, bsize); 7631 7632 wedge_sign = av1_wedge_sign_from_residuals(ds, mask, N, sign_limit); 7633 7634 mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize); 7635 sse = av1_wedge_sse_from_residuals(residual1, diff10, mask, N); 7636 sse = ROUND_POWER_OF_TWO(sse, bd_round); 7637 7638 model_rd_sse_fn[MODELRD_TYPE_MASKED_COMPOUND](cpi, x, bsize, 0, sse, N, 7639 &rate, &dist); 7640 // int rate2; 7641 // int64_t dist2; 7642 // model_rd_with_curvfit(cpi, x, bsize, 0, sse, N, &rate2, &dist2); 7643 // printf("sse %"PRId64": leagacy: %d %"PRId64", curvfit %d %"PRId64"\n", 7644 // sse, rate, dist, rate2, dist2); dist = dist2; 7645 // rate = rate2; 7646 7647 rate += x->wedge_idx_cost[bsize][wedge_index]; 7648 rd = RDCOST(x->rdmult, rate, dist); 7649 7650 if (rd < best_rd) { 7651 *best_wedge_index = wedge_index; 7652 *best_wedge_sign = wedge_sign; 7653 best_rd = rd; 7654 } 7655 } 7656 7657 return best_rd - 7658 RDCOST(x->rdmult, x->wedge_idx_cost[bsize][*best_wedge_index], 0); 7659 } 7660 7661 // Choose the best wedge index the specified sign 7662 static int64_t pick_wedge_fixed_sign(const AV1_COMP *const cpi, 7663 const MACROBLOCK *const x, 7664 const BLOCK_SIZE bsize, 7665 const int16_t *const residual1, 7666 const int16_t *const diff10, 7667 const int wedge_sign, 7668 int *const best_wedge_index) { 7669 const MACROBLOCKD *const xd = &x->e_mbd; 7670 7671 const int bw = block_size_wide[bsize]; 7672 const int bh = block_size_high[bsize]; 7673 const int N = bw * bh; 7674 assert(N >= 64); 7675 int rate; 7676 int64_t dist; 7677 int64_t rd, best_rd = INT64_MAX; 7678 int wedge_index; 7679 int wedge_types = (1 << get_wedge_bits_lookup(bsize)); 7680 const uint8_t *mask; 7681 uint64_t sse; 7682 const int hbd = is_cur_buf_hbd(xd); 7683 const int bd_round = hbd ? (xd->bd - 8) * 2 : 0; 7684 for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) { 7685 mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize); 7686 sse = av1_wedge_sse_from_residuals(residual1, diff10, mask, N); 7687 sse = ROUND_POWER_OF_TWO(sse, bd_round); 7688 7689 model_rd_sse_fn[MODELRD_TYPE_MASKED_COMPOUND](cpi, x, bsize, 0, sse, N, 7690 &rate, &dist); 7691 rate += x->wedge_idx_cost[bsize][wedge_index]; 7692 rd = RDCOST(x->rdmult, rate, dist); 7693 7694 if (rd < best_rd) { 7695 *best_wedge_index = wedge_index; 7696 best_rd = rd; 7697 } 7698 } 7699 return best_rd - 7700 RDCOST(x->rdmult, x->wedge_idx_cost[bsize][*best_wedge_index], 0); 7701 } 7702 7703 static int64_t pick_interinter_wedge( 7704 const AV1_COMP *const cpi, MACROBLOCK *const x, const BLOCK_SIZE bsize, 7705 const uint8_t *const p0, const uint8_t *const p1, 7706 const int16_t *const residual1, const int16_t *const diff10) { 7707 MACROBLOCKD *const xd = &x->e_mbd; 7708 MB_MODE_INFO *const mbmi = xd->mi[0]; 7709 const int bw = block_size_wide[bsize]; 7710 7711 int64_t rd; 7712 int wedge_index = -1; 7713 int wedge_sign = 0; 7714 7715 assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize)); 7716 assert(cpi->common.seq_params.enable_masked_compound); 7717 7718 if (cpi->sf.fast_wedge_sign_estimate) { 7719 wedge_sign = estimate_wedge_sign(cpi, x, bsize, p0, bw, p1, bw); 7720 rd = pick_wedge_fixed_sign(cpi, x, bsize, residual1, diff10, wedge_sign, 7721 &wedge_index); 7722 } else { 7723 rd = pick_wedge(cpi, x, bsize, p0, residual1, diff10, &wedge_sign, 7724 &wedge_index); 7725 } 7726 7727 mbmi->interinter_comp.wedge_sign = wedge_sign; 7728 mbmi->interinter_comp.wedge_index = wedge_index; 7729 return rd; 7730 } 7731 7732 static int64_t pick_interinter_seg(const AV1_COMP *const cpi, 7733 MACROBLOCK *const x, const BLOCK_SIZE bsize, 7734 const uint8_t *const p0, 7735 const uint8_t *const p1, 7736 const int16_t *const residual1, 7737 const int16_t *const diff10) { 7738 MACROBLOCKD *const xd = &x->e_mbd; 7739 MB_MODE_INFO *const mbmi = xd->mi[0]; 7740 const int bw = block_size_wide[bsize]; 7741 const int bh = block_size_high[bsize]; 7742 const int N = 1 << num_pels_log2_lookup[bsize]; 7743 int rate; 7744 int64_t dist; 7745 DIFFWTD_MASK_TYPE cur_mask_type; 7746 int64_t best_rd = INT64_MAX; 7747 DIFFWTD_MASK_TYPE best_mask_type = 0; 7748 const int hbd = is_cur_buf_hbd(xd); 7749 const int bd_round = hbd ? (xd->bd - 8) * 2 : 0; 7750 DECLARE_ALIGNED(16, uint8_t, seg_mask[2 * MAX_SB_SQUARE]); 7751 uint8_t *tmp_mask[2] = { xd->seg_mask, seg_mask }; 7752 // try each mask type and its inverse 7753 for (cur_mask_type = 0; cur_mask_type < DIFFWTD_MASK_TYPES; cur_mask_type++) { 7754 // build mask and inverse 7755 if (hbd) 7756 av1_build_compound_diffwtd_mask_highbd( 7757 tmp_mask[cur_mask_type], cur_mask_type, CONVERT_TO_BYTEPTR(p0), bw, 7758 CONVERT_TO_BYTEPTR(p1), bw, bh, bw, xd->bd); 7759 else 7760 av1_build_compound_diffwtd_mask(tmp_mask[cur_mask_type], cur_mask_type, 7761 p0, bw, p1, bw, bh, bw); 7762 7763 // compute rd for mask 7764 uint64_t sse = av1_wedge_sse_from_residuals(residual1, diff10, 7765 tmp_mask[cur_mask_type], N); 7766 sse = ROUND_POWER_OF_TWO(sse, bd_round); 7767 7768 model_rd_sse_fn[MODELRD_TYPE_MASKED_COMPOUND](cpi, x, bsize, 0, sse, N, 7769 &rate, &dist); 7770 const int64_t rd0 = RDCOST(x->rdmult, rate, dist); 7771 7772 if (rd0 < best_rd) { 7773 best_mask_type = cur_mask_type; 7774 best_rd = rd0; 7775 } 7776 } 7777 mbmi->interinter_comp.mask_type = best_mask_type; 7778 if (best_mask_type == DIFFWTD_38_INV) { 7779 memcpy(xd->seg_mask, seg_mask, N * 2); 7780 } 7781 return best_rd; 7782 } 7783 7784 static int64_t pick_interintra_wedge(const AV1_COMP *const cpi, 7785 const MACROBLOCK *const x, 7786 const BLOCK_SIZE bsize, 7787 const uint8_t *const p0, 7788 const uint8_t *const p1) { 7789 const MACROBLOCKD *const xd = &x->e_mbd; 7790 MB_MODE_INFO *const mbmi = xd->mi[0]; 7791 assert(is_interintra_wedge_used(bsize)); 7792 assert(cpi->common.seq_params.enable_interintra_compound); 7793 7794 const struct buf_2d *const src = &x->plane[0].src; 7795 const int bw = block_size_wide[bsize]; 7796 const int bh = block_size_high[bsize]; 7797 DECLARE_ALIGNED(32, int16_t, residual1[MAX_SB_SQUARE]); // src - pred1 7798 DECLARE_ALIGNED(32, int16_t, diff10[MAX_SB_SQUARE]); // pred1 - pred0 7799 if (is_cur_buf_hbd(xd)) { 7800 aom_highbd_subtract_block(bh, bw, residual1, bw, src->buf, src->stride, 7801 CONVERT_TO_BYTEPTR(p1), bw, xd->bd); 7802 aom_highbd_subtract_block(bh, bw, diff10, bw, CONVERT_TO_BYTEPTR(p1), bw, 7803 CONVERT_TO_BYTEPTR(p0), bw, xd->bd); 7804 } else { 7805 aom_subtract_block(bh, bw, residual1, bw, src->buf, src->stride, p1, bw); 7806 aom_subtract_block(bh, bw, diff10, bw, p1, bw, p0, bw); 7807 } 7808 int wedge_index = -1; 7809 int64_t rd = 7810 pick_wedge_fixed_sign(cpi, x, bsize, residual1, diff10, 0, &wedge_index); 7811 7812 mbmi->interintra_wedge_sign = 0; 7813 mbmi->interintra_wedge_index = wedge_index; 7814 return rd; 7815 } 7816 7817 static int64_t pick_interinter_mask(const AV1_COMP *const cpi, MACROBLOCK *x, 7818 const BLOCK_SIZE bsize, 7819 const uint8_t *const p0, 7820 const uint8_t *const p1, 7821 const int16_t *const residual1, 7822 const int16_t *const diff10) { 7823 const COMPOUND_TYPE compound_type = x->e_mbd.mi[0]->interinter_comp.type; 7824 switch (compound_type) { 7825 case COMPOUND_WEDGE: 7826 return pick_interinter_wedge(cpi, x, bsize, p0, p1, residual1, diff10); 7827 case COMPOUND_DIFFWTD: 7828 return pick_interinter_seg(cpi, x, bsize, p0, p1, residual1, diff10); 7829 default: assert(0); return 0; 7830 } 7831 } 7832 7833 static int interinter_compound_motion_search(const AV1_COMP *const cpi, 7834 MACROBLOCK *x, 7835 const int_mv *const cur_mv, 7836 const BLOCK_SIZE bsize, 7837 const PREDICTION_MODE this_mode, 7838 int mi_row, int mi_col) { 7839 MACROBLOCKD *const xd = &x->e_mbd; 7840 MB_MODE_INFO *const mbmi = xd->mi[0]; 7841 int_mv tmp_mv[2]; 7842 int tmp_rate_mv = 0; 7843 mbmi->interinter_comp.seg_mask = xd->seg_mask; 7844 const INTERINTER_COMPOUND_DATA *compound_data = &mbmi->interinter_comp; 7845 7846 if (this_mode == NEW_NEWMV) { 7847 do_masked_motion_search_indexed(cpi, x, cur_mv, compound_data, bsize, 7848 mi_row, mi_col, tmp_mv, &tmp_rate_mv, 2); 7849 mbmi->mv[0].as_int = tmp_mv[0].as_int; 7850 mbmi->mv[1].as_int = tmp_mv[1].as_int; 7851 } else if (this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV) { 7852 do_masked_motion_search_indexed(cpi, x, cur_mv, compound_data, bsize, 7853 mi_row, mi_col, tmp_mv, &tmp_rate_mv, 0); 7854 mbmi->mv[0].as_int = tmp_mv[0].as_int; 7855 } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) { 7856 do_masked_motion_search_indexed(cpi, x, cur_mv, compound_data, bsize, 7857 mi_row, mi_col, tmp_mv, &tmp_rate_mv, 1); 7858 mbmi->mv[1].as_int = tmp_mv[1].as_int; 7859 } 7860 return tmp_rate_mv; 7861 } 7862 7863 static void get_inter_predictors_masked_compound( 7864 const AV1_COMP *const cpi, MACROBLOCK *x, const BLOCK_SIZE bsize, 7865 int mi_row, int mi_col, uint8_t **preds0, uint8_t **preds1, 7866 int16_t *residual1, int16_t *diff10, int *strides) { 7867 const AV1_COMMON *cm = &cpi->common; 7868 MACROBLOCKD *xd = &x->e_mbd; 7869 const int bw = block_size_wide[bsize]; 7870 const int bh = block_size_high[bsize]; 7871 int can_use_previous = cm->allow_warped_motion; 7872 // get inter predictors to use for masked compound modes 7873 av1_build_inter_predictors_for_planes_single_buf( 7874 xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides, can_use_previous); 7875 av1_build_inter_predictors_for_planes_single_buf( 7876 xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides, can_use_previous); 7877 const struct buf_2d *const src = &x->plane[0].src; 7878 if (is_cur_buf_hbd(xd)) { 7879 aom_highbd_subtract_block(bh, bw, residual1, bw, src->buf, src->stride, 7880 CONVERT_TO_BYTEPTR(*preds1), bw, xd->bd); 7881 aom_highbd_subtract_block(bh, bw, diff10, bw, CONVERT_TO_BYTEPTR(*preds1), 7882 bw, CONVERT_TO_BYTEPTR(*preds0), bw, xd->bd); 7883 } else { 7884 aom_subtract_block(bh, bw, residual1, bw, src->buf, src->stride, *preds1, 7885 bw); 7886 aom_subtract_block(bh, bw, diff10, bw, *preds1, bw, *preds0, bw); 7887 } 7888 } 7889 7890 static int64_t build_and_cost_compound_type( 7891 const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv, 7892 const BLOCK_SIZE bsize, const PREDICTION_MODE this_mode, int *rs2, 7893 int rate_mv, const BUFFER_SET *ctx, int *out_rate_mv, uint8_t **preds0, 7894 uint8_t **preds1, int16_t *residual1, int16_t *diff10, int *strides, 7895 int mi_row, int mi_col, int mode_rate, int64_t ref_best_rd, 7896 int *calc_pred_masked_compound, int32_t *comp_rate, int64_t *comp_dist, 7897 int64_t *const comp_model_rd, const int64_t comp_best_model_rd, 7898 int64_t *const comp_model_rd_cur) { 7899 const AV1_COMMON *const cm = &cpi->common; 7900 MACROBLOCKD *xd = &x->e_mbd; 7901 MB_MODE_INFO *const mbmi = xd->mi[0]; 7902 int64_t best_rd_cur = INT64_MAX; 7903 int64_t rd = INT64_MAX; 7904 const COMPOUND_TYPE compound_type = mbmi->interinter_comp.type; 7905 int rate_sum, tmp_skip_txfm_sb; 7906 int64_t dist_sum, tmp_skip_sse_sb; 7907 7908 // TODO(any): Save pred and mask calculation as well into records. However 7909 // this may increase memory requirements as compound segment mask needs to be 7910 // stored in each record. 7911 if (*calc_pred_masked_compound) { 7912 get_inter_predictors_masked_compound(cpi, x, bsize, mi_row, mi_col, preds0, 7913 preds1, residual1, diff10, strides); 7914 *calc_pred_masked_compound = 0; 7915 } 7916 if (cpi->sf.prune_wedge_pred_diff_based && compound_type == COMPOUND_WEDGE) { 7917 unsigned int sse; 7918 if (is_cur_buf_hbd(xd)) 7919 (void)cpi->fn_ptr[bsize].vf(CONVERT_TO_BYTEPTR(*preds0), *strides, 7920 CONVERT_TO_BYTEPTR(*preds1), *strides, &sse); 7921 else 7922 (void)cpi->fn_ptr[bsize].vf(*preds0, *strides, *preds1, *strides, &sse); 7923 const unsigned int mse = 7924 ROUND_POWER_OF_TWO(sse, num_pels_log2_lookup[bsize]); 7925 // If two predictors are very similar, skip wedge compound mode search 7926 if (mse < 8 || (!have_newmv_in_inter_mode(this_mode) && mse < 64)) { 7927 *comp_model_rd_cur = INT64_MAX; 7928 return INT64_MAX; 7929 } 7930 } 7931 7932 best_rd_cur = 7933 pick_interinter_mask(cpi, x, bsize, *preds0, *preds1, residual1, diff10); 7934 *rs2 += get_interinter_compound_mask_rate(x, mbmi); 7935 best_rd_cur += RDCOST(x->rdmult, *rs2 + rate_mv, 0); 7936 7937 // Although the true rate_mv might be different after motion search, but it 7938 // is unlikely to be the best mode considering the transform rd cost and other 7939 // mode overhead cost 7940 int64_t mode_rd = RDCOST(x->rdmult, *rs2 + mode_rate, 0); 7941 if (mode_rd > ref_best_rd) { 7942 *comp_model_rd_cur = INT64_MAX; 7943 return INT64_MAX; 7944 } 7945 7946 // Reuse data if matching record is found 7947 if (comp_rate[compound_type] == INT_MAX) { 7948 if (have_newmv_in_inter_mode(this_mode) && 7949 compound_type == COMPOUND_WEDGE && 7950 !cpi->sf.disable_interinter_wedge_newmv_search) { 7951 *out_rate_mv = interinter_compound_motion_search( 7952 cpi, x, cur_mv, bsize, this_mode, mi_row, mi_col); 7953 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, ctx, bsize, 7954 AOM_PLANE_Y, AOM_PLANE_Y); 7955 7956 model_rd_sb_fn[MODELRD_TYPE_MASKED_COMPOUND]( 7957 cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &rate_sum, &dist_sum, 7958 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, NULL, NULL, NULL); 7959 rd = RDCOST(x->rdmult, *rs2 + *out_rate_mv + rate_sum, dist_sum); 7960 *comp_model_rd_cur = rd; 7961 if (rd >= best_rd_cur) { 7962 mbmi->mv[0].as_int = cur_mv[0].as_int; 7963 mbmi->mv[1].as_int = cur_mv[1].as_int; 7964 *out_rate_mv = rate_mv; 7965 av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0, preds0, 7966 strides, preds1, strides); 7967 *comp_model_rd_cur = best_rd_cur; 7968 } 7969 } else { 7970 *out_rate_mv = rate_mv; 7971 av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0, preds0, strides, 7972 preds1, strides); 7973 model_rd_sb_fn[MODELRD_TYPE_MASKED_COMPOUND]( 7974 cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &rate_sum, &dist_sum, 7975 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, NULL, NULL, NULL); 7976 *comp_model_rd_cur = 7977 RDCOST(x->rdmult, *rs2 + *out_rate_mv + rate_sum, dist_sum); 7978 } 7979 7980 RD_STATS rd_stats; 7981 7982 if (cpi->sf.prune_comp_type_by_model_rd && 7983 (*comp_model_rd_cur > comp_best_model_rd) && 7984 comp_best_model_rd != INT64_MAX) { 7985 *comp_model_rd_cur = INT64_MAX; 7986 return INT64_MAX; 7987 } 7988 rd = estimate_yrd_for_sb(cpi, bsize, x, ref_best_rd, &rd_stats); 7989 if (rd != INT64_MAX) { 7990 rd = 7991 RDCOST(x->rdmult, *rs2 + *out_rate_mv + rd_stats.rate, rd_stats.dist); 7992 // Backup rate and distortion for future reuse 7993 comp_rate[compound_type] = rd_stats.rate; 7994 comp_dist[compound_type] = rd_stats.dist; 7995 comp_model_rd[compound_type] = *comp_model_rd_cur; 7996 } 7997 } else { 7998 assert(comp_dist[compound_type] != INT64_MAX); 7999 // When disable_interinter_wedge_newmv_search is set, motion refinement is 8000 // disabled. Hence rate and distortion can be reused in this case as well 8001 assert(IMPLIES(have_newmv_in_inter_mode(this_mode), 8002 cpi->sf.disable_interinter_wedge_newmv_search)); 8003 assert(mbmi->mv[0].as_int == cur_mv[0].as_int); 8004 assert(mbmi->mv[1].as_int == cur_mv[1].as_int); 8005 *out_rate_mv = rate_mv; 8006 // Calculate RD cost based on stored stats 8007 rd = RDCOST(x->rdmult, *rs2 + *out_rate_mv + comp_rate[compound_type], 8008 comp_dist[compound_type]); 8009 *comp_model_rd_cur = comp_model_rd[compound_type]; 8010 } 8011 return rd; 8012 } 8013 8014 typedef struct { 8015 // OBMC secondary prediction buffers and respective strides 8016 uint8_t *above_pred_buf[MAX_MB_PLANE]; 8017 int above_pred_stride[MAX_MB_PLANE]; 8018 uint8_t *left_pred_buf[MAX_MB_PLANE]; 8019 int left_pred_stride[MAX_MB_PLANE]; 8020 int_mv (*single_newmv)[REF_FRAMES]; 8021 // Pointer to array of motion vectors to use for each ref and their rates 8022 // Should point to first of 2 arrays in 2D array 8023 int (*single_newmv_rate)[REF_FRAMES]; 8024 int (*single_newmv_valid)[REF_FRAMES]; 8025 // Pointer to array of predicted rate-distortion 8026 // Should point to first of 2 arrays in 2D array 8027 int64_t (*modelled_rd)[MAX_REF_MV_SERCH][REF_FRAMES]; 8028 InterpFilter single_filter[MB_MODE_COUNT][REF_FRAMES]; 8029 int ref_frame_cost; 8030 int single_comp_cost; 8031 int64_t (*simple_rd)[MAX_REF_MV_SERCH][REF_FRAMES]; 8032 int skip_motion_mode; 8033 INTERINTRA_MODE *inter_intra_mode; 8034 int single_ref_first_pass; 8035 SimpleRDState *simple_rd_state; 8036 } HandleInterModeArgs; 8037 8038 /* If the current mode shares the same mv with other modes with higher cost, 8039 * skip this mode. */ 8040 static int skip_repeated_mv(const AV1_COMMON *const cm, 8041 const MACROBLOCK *const x, 8042 PREDICTION_MODE this_mode, 8043 const MV_REFERENCE_FRAME ref_frames[2], 8044 InterModeSearchState *search_state) { 8045 const int is_comp_pred = ref_frames[1] > INTRA_FRAME; 8046 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames); 8047 const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; 8048 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type]; 8049 PREDICTION_MODE compare_mode = MB_MODE_COUNT; 8050 if (!is_comp_pred) { 8051 if (this_mode == NEARMV) { 8052 if (ref_mv_count == 0) { 8053 // NEARMV has the same motion vector as NEARESTMV 8054 compare_mode = NEARESTMV; 8055 } 8056 if (ref_mv_count == 1 && 8057 cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) { 8058 // NEARMV has the same motion vector as GLOBALMV 8059 compare_mode = GLOBALMV; 8060 } 8061 } 8062 if (this_mode == GLOBALMV) { 8063 if (ref_mv_count == 0 && 8064 cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) { 8065 // GLOBALMV has the same motion vector as NEARESTMV 8066 compare_mode = NEARESTMV; 8067 } 8068 if (ref_mv_count == 1) { 8069 // GLOBALMV has the same motion vector as NEARMV 8070 compare_mode = NEARMV; 8071 } 8072 } 8073 8074 if (compare_mode != MB_MODE_COUNT) { 8075 // Use modelled_rd to check whether compare mode was searched 8076 if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] != 8077 INT64_MAX) { 8078 const int16_t mode_ctx = 8079 av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames); 8080 const int compare_cost = cost_mv_ref(x, compare_mode, mode_ctx); 8081 const int this_cost = cost_mv_ref(x, this_mode, mode_ctx); 8082 8083 // Only skip if the mode cost is larger than compare mode cost 8084 if (this_cost > compare_cost) { 8085 search_state->modelled_rd[this_mode][0][ref_frames[0]] = 8086 search_state->modelled_rd[compare_mode][0][ref_frames[0]]; 8087 return 1; 8088 } 8089 } 8090 } 8091 } 8092 return 0; 8093 } 8094 8095 static INLINE int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv, 8096 const AV1_COMMON *cm, 8097 const MACROBLOCK *x) { 8098 const MACROBLOCKD *const xd = &x->e_mbd; 8099 *out_mv = in_mv; 8100 lower_mv_precision(&out_mv->as_mv, cm->allow_high_precision_mv, 8101 cm->cur_frame_force_integer_mv); 8102 clamp_mv2(&out_mv->as_mv, xd); 8103 return !mv_check_bounds(&x->mv_limits, &out_mv->as_mv); 8104 } 8105 8106 static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x, 8107 const BLOCK_SIZE bsize, int_mv *cur_mv, 8108 const int mi_row, const int mi_col, 8109 int *const rate_mv, 8110 HandleInterModeArgs *const args) { 8111 const MACROBLOCKD *const xd = &x->e_mbd; 8112 const MB_MODE_INFO *const mbmi = xd->mi[0]; 8113 const int is_comp_pred = has_second_ref(mbmi); 8114 const PREDICTION_MODE this_mode = mbmi->mode; 8115 const int refs[2] = { mbmi->ref_frame[0], 8116 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] }; 8117 const int ref_mv_idx = mbmi->ref_mv_idx; 8118 int i; 8119 8120 (void)args; 8121 8122 if (is_comp_pred) { 8123 if (this_mode == NEW_NEWMV) { 8124 cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int; 8125 cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int; 8126 8127 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { 8128 joint_motion_search(cpi, x, bsize, cur_mv, mi_row, mi_col, NULL, NULL, 8129 0, rate_mv, 0); 8130 } else { 8131 *rate_mv = 0; 8132 for (i = 0; i < 2; ++i) { 8133 const int_mv ref_mv = av1_get_ref_mv(x, i); 8134 *rate_mv += 8135 av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv, x->nmv_vec_cost, 8136 x->mv_cost_stack, MV_COST_WEIGHT); 8137 } 8138 } 8139 } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) { 8140 cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int; 8141 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { 8142 compound_single_motion_search_interinter( 8143 cpi, x, bsize, cur_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 1); 8144 } else { 8145 const int_mv ref_mv = av1_get_ref_mv(x, 1); 8146 *rate_mv = 8147 av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv, x->nmv_vec_cost, 8148 x->mv_cost_stack, MV_COST_WEIGHT); 8149 } 8150 } else { 8151 assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV); 8152 cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int; 8153 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { 8154 compound_single_motion_search_interinter( 8155 cpi, x, bsize, cur_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 0); 8156 } else { 8157 const int_mv ref_mv = av1_get_ref_mv(x, 0); 8158 *rate_mv = 8159 av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv, x->nmv_vec_cost, 8160 x->mv_cost_stack, MV_COST_WEIGHT); 8161 } 8162 } 8163 } else { 8164 single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, rate_mv); 8165 if (x->best_mv.as_int == INVALID_MV) return INT64_MAX; 8166 8167 args->single_newmv[ref_mv_idx][refs[0]] = x->best_mv; 8168 args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv; 8169 args->single_newmv_valid[ref_mv_idx][refs[0]] = 1; 8170 8171 cur_mv[0].as_int = x->best_mv.as_int; 8172 8173 #if USE_DISCOUNT_NEWMV_TEST 8174 // Estimate the rate implications of a new mv but discount this 8175 // under certain circumstances where we want to help initiate a weak 8176 // motion field, where the distortion gain for a single block may not 8177 // be enough to overcome the cost of a new mv. 8178 if (discount_newmv_test(cpi, x, this_mode, x->best_mv)) { 8179 *rate_mv = AOMMAX(*rate_mv / NEW_MV_DISCOUNT_FACTOR, 1); 8180 } 8181 #endif 8182 } 8183 8184 return 0; 8185 } 8186 8187 static INLINE void swap_dst_buf(MACROBLOCKD *xd, const BUFFER_SET *dst_bufs[2], 8188 int num_planes) { 8189 const BUFFER_SET *buf0 = dst_bufs[0]; 8190 dst_bufs[0] = dst_bufs[1]; 8191 dst_bufs[1] = buf0; 8192 restore_dst_buf(xd, *dst_bufs[0], num_planes); 8193 } 8194 8195 static INLINE int get_switchable_rate(MACROBLOCK *const x, 8196 const InterpFilters filters, 8197 const int ctx[2]) { 8198 int inter_filter_cost; 8199 const InterpFilter filter0 = av1_extract_interp_filter(filters, 0); 8200 const InterpFilter filter1 = av1_extract_interp_filter(filters, 1); 8201 inter_filter_cost = x->switchable_interp_costs[ctx[0]][filter0]; 8202 inter_filter_cost += x->switchable_interp_costs[ctx[1]][filter1]; 8203 return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost; 8204 } 8205 8206 // calculate the rdcost of given interpolation_filter 8207 static INLINE int64_t interpolation_filter_rd( 8208 MACROBLOCK *const x, const AV1_COMP *const cpi, 8209 const TileDataEnc *tile_data, BLOCK_SIZE bsize, int mi_row, int mi_col, 8210 const BUFFER_SET *const orig_dst, int64_t *const rd, 8211 int *const switchable_rate, int *const skip_txfm_sb, 8212 int64_t *const skip_sse_sb, const BUFFER_SET *dst_bufs[2], int filter_idx, 8213 const int switchable_ctx[2], const int skip_pred, int *rate, 8214 int64_t *dist) { 8215 const AV1_COMMON *cm = &cpi->common; 8216 const int num_planes = av1_num_planes(cm); 8217 MACROBLOCKD *const xd = &x->e_mbd; 8218 MB_MODE_INFO *const mbmi = xd->mi[0]; 8219 int tmp_rate[2], tmp_skip_sb[2] = { 1, 1 }; 8220 int64_t tmp_dist[2], tmp_skip_sse[2] = { 0, 0 }; 8221 8222 const InterpFilters last_best = mbmi->interp_filters; 8223 mbmi->interp_filters = filter_sets[filter_idx]; 8224 const int tmp_rs = 8225 get_switchable_rate(x, mbmi->interp_filters, switchable_ctx); 8226 8227 int64_t min_rd = RDCOST(x->rdmult, tmp_rs, 0); 8228 if (min_rd > *rd) { 8229 mbmi->interp_filters = last_best; 8230 return 0; 8231 } 8232 8233 (void)tile_data; 8234 8235 assert(skip_pred != 2); 8236 assert((skip_pred >= 0) && (skip_pred <= cpi->default_interp_skip_flags)); 8237 assert(rate[0] >= 0); 8238 assert(dist[0] >= 0); 8239 assert((skip_txfm_sb[0] == 0) || (skip_txfm_sb[0] == 1)); 8240 assert(skip_sse_sb[0] >= 0); 8241 assert(rate[1] >= 0); 8242 assert(dist[1] >= 0); 8243 assert((skip_txfm_sb[1] == 0) || (skip_txfm_sb[1] == 1)); 8244 assert(skip_sse_sb[1] >= 0); 8245 8246 if (skip_pred != cpi->default_interp_skip_flags) { 8247 if (skip_pred != DEFAULT_LUMA_INTERP_SKIP_FLAG) { 8248 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 8249 AOM_PLANE_Y, AOM_PLANE_Y); 8250 #if CONFIG_COLLECT_RD_STATS == 3 8251 RD_STATS rd_stats_y; 8252 pick_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize, mi_row, mi_col, 8253 INT64_MAX); 8254 PrintPredictionUnitStats(cpi, tile_data, x, &rd_stats_y, bsize); 8255 #endif // CONFIG_COLLECT_RD_STATS == 3 8256 model_rd_sb_fn[MODELRD_TYPE_INTERP_FILTER]( 8257 cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &tmp_rate[0], &tmp_dist[0], 8258 &tmp_skip_sb[0], &tmp_skip_sse[0], NULL, NULL, NULL); 8259 tmp_rate[1] = tmp_rate[0]; 8260 tmp_dist[1] = tmp_dist[0]; 8261 } else { 8262 // only luma MC is skipped 8263 tmp_rate[1] = rate[0]; 8264 tmp_dist[1] = dist[0]; 8265 } 8266 if (num_planes > 1) { 8267 for (int plane = 1; plane < num_planes; ++plane) { 8268 int tmp_rate_uv, tmp_skip_sb_uv; 8269 int64_t tmp_dist_uv, tmp_skip_sse_uv; 8270 int64_t tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate[1], tmp_dist[1]); 8271 if (tmp_rd >= *rd) { 8272 mbmi->interp_filters = last_best; 8273 return 0; 8274 } 8275 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 8276 plane, plane); 8277 model_rd_sb_fn[MODELRD_TYPE_INTERP_FILTER]( 8278 cpi, bsize, x, xd, plane, plane, mi_row, mi_col, &tmp_rate_uv, 8279 &tmp_dist_uv, &tmp_skip_sb_uv, &tmp_skip_sse_uv, NULL, NULL, NULL); 8280 tmp_rate[1] = 8281 (int)AOMMIN(((int64_t)tmp_rate[1] + (int64_t)tmp_rate_uv), INT_MAX); 8282 tmp_dist[1] += tmp_dist_uv; 8283 tmp_skip_sb[1] &= tmp_skip_sb_uv; 8284 tmp_skip_sse[1] += tmp_skip_sse_uv; 8285 } 8286 } 8287 } else { 8288 // both luma and chroma MC is skipped 8289 tmp_rate[1] = rate[1]; 8290 tmp_dist[1] = dist[1]; 8291 } 8292 int64_t tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate[1], tmp_dist[1]); 8293 8294 if (tmp_rd < *rd) { 8295 *rd = tmp_rd; 8296 *switchable_rate = tmp_rs; 8297 if (skip_pred != cpi->default_interp_skip_flags) { 8298 if (skip_pred == 0) { 8299 // Overwrite the data as current filter is the best one 8300 tmp_skip_sb[1] = tmp_skip_sb[0] & tmp_skip_sb[1]; 8301 tmp_skip_sse[1] = tmp_skip_sse[0] + tmp_skip_sse[1]; 8302 memcpy(rate, tmp_rate, sizeof(*rate) * 2); 8303 memcpy(dist, tmp_dist, sizeof(*dist) * 2); 8304 memcpy(skip_txfm_sb, tmp_skip_sb, sizeof(*skip_txfm_sb) * 2); 8305 memcpy(skip_sse_sb, tmp_skip_sse, sizeof(*skip_sse_sb) * 2); 8306 // As luma MC data is computed, no need to recompute after the search 8307 x->recalc_luma_mc_data = 0; 8308 } else if (skip_pred == DEFAULT_LUMA_INTERP_SKIP_FLAG) { 8309 // As luma MC data is not computed, update of luma data can be skipped 8310 rate[1] = tmp_rate[1]; 8311 dist[1] = tmp_dist[1]; 8312 skip_txfm_sb[1] = skip_txfm_sb[0] & tmp_skip_sb[1]; 8313 skip_sse_sb[1] = skip_sse_sb[0] + tmp_skip_sse[1]; 8314 // As luma MC data is not recomputed and current filter is the best, 8315 // indicate the possibility of recomputing MC data 8316 // If current buffer contains valid MC data, toggle to indicate that 8317 // luma MC data needs to be recomputed 8318 x->recalc_luma_mc_data ^= 1; 8319 } 8320 swap_dst_buf(xd, dst_bufs, num_planes); 8321 } 8322 return 1; 8323 } 8324 mbmi->interp_filters = last_best; 8325 return 0; 8326 } 8327 8328 static INLINE void pred_dual_interp_filter_rd( 8329 MACROBLOCK *const x, const AV1_COMP *const cpi, 8330 const TileDataEnc *tile_data, BLOCK_SIZE bsize, int mi_row, int mi_col, 8331 const BUFFER_SET *const orig_dst, int64_t *const rd, 8332 int *const switchable_rate, int *const skip_txfm_sb, 8333 int64_t *const skip_sse_sb, const BUFFER_SET *dst_bufs[2], 8334 InterpFilters filter_idx, const int switchable_ctx[2], const int skip_pred, 8335 int *rate, int64_t *dist, InterpFilters af_horiz, InterpFilters af_vert, 8336 InterpFilters lf_horiz, InterpFilters lf_vert) { 8337 if ((af_horiz == lf_horiz) && (af_horiz != SWITCHABLE)) { 8338 if (((af_vert == lf_vert) && (af_vert != SWITCHABLE))) { 8339 filter_idx = af_horiz + (af_vert * SWITCHABLE_FILTERS); 8340 if (filter_idx) { 8341 interpolation_filter_rd(x, cpi, tile_data, bsize, mi_row, mi_col, 8342 orig_dst, rd, switchable_rate, skip_txfm_sb, 8343 skip_sse_sb, dst_bufs, filter_idx, 8344 switchable_ctx, skip_pred, rate, dist); 8345 } 8346 } else { 8347 for (filter_idx = af_horiz; filter_idx < (DUAL_FILTER_SET_SIZE); 8348 filter_idx += SWITCHABLE_FILTERS) { 8349 if (filter_idx) { 8350 interpolation_filter_rd(x, cpi, tile_data, bsize, mi_row, mi_col, 8351 orig_dst, rd, switchable_rate, skip_txfm_sb, 8352 skip_sse_sb, dst_bufs, filter_idx, 8353 switchable_ctx, skip_pred, rate, dist); 8354 } 8355 } 8356 } 8357 } else if ((af_vert == lf_vert) && (af_vert != SWITCHABLE)) { 8358 for (filter_idx = (af_vert * SWITCHABLE_FILTERS); 8359 filter_idx <= ((af_vert * SWITCHABLE_FILTERS) + 2); filter_idx += 1) { 8360 if (filter_idx) { 8361 interpolation_filter_rd(x, cpi, tile_data, bsize, mi_row, mi_col, 8362 orig_dst, rd, switchable_rate, skip_txfm_sb, 8363 skip_sse_sb, dst_bufs, filter_idx, 8364 switchable_ctx, skip_pred, rate, dist); 8365 } 8366 } 8367 } 8368 } 8369 8370 // Find the best interp filter if dual_interp_filter = 0 8371 static INLINE void find_best_non_dual_interp_filter( 8372 MACROBLOCK *const x, const AV1_COMP *const cpi, 8373 const TileDataEnc *tile_data, BLOCK_SIZE bsize, int mi_row, int mi_col, 8374 const BUFFER_SET *const orig_dst, int64_t *const rd, 8375 int *const switchable_rate, int *const skip_txfm_sb, 8376 int64_t *const skip_sse_sb, const BUFFER_SET *dst_bufs[2], 8377 const int switchable_ctx[2], const int skip_ver, const int skip_hor, 8378 int *rate, int64_t *dist, int filter_set_size) { 8379 int16_t i; 8380 MACROBLOCKD *const xd = &x->e_mbd; 8381 MB_MODE_INFO *const mbmi = xd->mi[0]; 8382 8383 // Regular filter evaluation should have been done and hence the same should 8384 // be the winner 8385 assert(x->e_mbd.mi[0]->interp_filters == filter_sets[0]); 8386 assert(filter_set_size == DUAL_FILTER_SET_SIZE); 8387 if ((skip_hor & skip_ver) != cpi->default_interp_skip_flags) { 8388 const AV1_COMMON *cm = &cpi->common; 8389 int bsl, pred_filter_search; 8390 InterpFilters af = SWITCHABLE, lf = SWITCHABLE, filter_idx = 0; 8391 const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; 8392 const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; 8393 bsl = mi_size_wide_log2[bsize]; 8394 pred_filter_search = 8395 cpi->sf.cb_pred_filter_search 8396 ? (((mi_row + mi_col) >> bsl) + 8397 get_chessboard_index(cm->current_frame.frame_number)) & 8398 0x1 8399 : 0; 8400 if (above_mbmi && is_inter_block(above_mbmi)) { 8401 af = above_mbmi->interp_filters; 8402 } 8403 if (left_mbmi && is_inter_block(left_mbmi)) { 8404 lf = left_mbmi->interp_filters; 8405 } 8406 pred_filter_search &= ((af == lf) && (af != SWITCHABLE)); 8407 if (pred_filter_search) { 8408 filter_idx = SWITCHABLE * (af & 0xf); 8409 // This assert tells that (filter_x == filter_y) for non-dual filter case 8410 assert((filter_sets[filter_idx] & 0xffff) == 8411 (filter_sets[filter_idx] >> 16)); 8412 if (cpi->sf.adaptive_interp_filter_search && 8413 (cpi->sf.interp_filter_search_mask & (1 << (filter_idx >> 2)))) { 8414 return; 8415 } 8416 if (filter_idx) { 8417 interpolation_filter_rd( 8418 x, cpi, tile_data, bsize, mi_row, mi_col, orig_dst, rd, 8419 switchable_rate, skip_txfm_sb, skip_sse_sb, dst_bufs, filter_idx, 8420 switchable_ctx, (skip_hor & skip_ver), rate, dist); 8421 } 8422 return; 8423 } 8424 } 8425 // Reuse regular filter's modeled rd data for sharp filter for following 8426 // cases 8427 // 1) When bsize is 4x4 8428 // 2) When block width is 4 (i.e. 4x8/4x16 blocks) and MV in vertical 8429 // direction is full-pel 8430 // 3) When block height is 4 (i.e. 8x4/16x4 blocks) and MV in horizontal 8431 // direction is full-pel 8432 // TODO(any): Optimize cases 2 and 3 further if luma MV in relavant direction 8433 // alone is full-pel 8434 8435 if ((bsize == BLOCK_4X4) || 8436 (block_size_wide[bsize] == 4 && 8437 skip_ver == cpi->default_interp_skip_flags) || 8438 (block_size_high[bsize] == 4 && 8439 skip_hor == cpi->default_interp_skip_flags)) { 8440 int skip_pred = cpi->default_interp_skip_flags; 8441 for (i = filter_set_size - 1; i > 0; i -= (SWITCHABLE_FILTERS + 1)) { 8442 // This assert tells that (filter_x == filter_y) for non-dual filter case 8443 assert((filter_sets[i] & 0xffff) == (filter_sets[i] >> 16)); 8444 if (cpi->sf.adaptive_interp_filter_search && 8445 (cpi->sf.interp_filter_search_mask & (1 << (i >> 2)))) { 8446 continue; 8447 } 8448 interpolation_filter_rd(x, cpi, tile_data, bsize, mi_row, mi_col, 8449 orig_dst, rd, switchable_rate, skip_txfm_sb, 8450 skip_sse_sb, dst_bufs, i, switchable_ctx, 8451 skip_pred, rate, dist); 8452 skip_pred = (skip_hor & skip_ver); 8453 } 8454 } else { 8455 int skip_pred = (skip_hor & skip_ver); 8456 for (i = (SWITCHABLE_FILTERS + 1); i < filter_set_size; 8457 i += (SWITCHABLE_FILTERS + 1)) { 8458 // This assert tells that (filter_x == filter_y) for non-dual filter case 8459 assert((filter_sets[i] & 0xffff) == (filter_sets[i] >> 16)); 8460 if (cpi->sf.adaptive_interp_filter_search && 8461 (cpi->sf.interp_filter_search_mask & (1 << (i >> 2)))) { 8462 continue; 8463 } 8464 interpolation_filter_rd(x, cpi, tile_data, bsize, mi_row, mi_col, 8465 orig_dst, rd, switchable_rate, skip_txfm_sb, 8466 skip_sse_sb, dst_bufs, i, switchable_ctx, 8467 skip_pred, rate, dist); 8468 // In first iteration, smooth filter is evaluated. If smooth filter 8469 // (which is less sharper) is the winner among regular and smooth filters, 8470 // sharp filter evaluation is skipped 8471 // TODO(any): Refine this gating based on modelled rd only (i.e., by not 8472 // accounting switchable filter rate) 8473 if (cpi->sf.skip_sharp_interp_filter_search && 8474 skip_pred != cpi->default_interp_skip_flags) { 8475 if (mbmi->interp_filters == filter_sets[(SWITCHABLE_FILTERS + 1)]) 8476 break; 8477 } 8478 } 8479 } 8480 } 8481 8482 // check if there is saved result match with this search 8483 static INLINE int is_interp_filter_match(const INTERPOLATION_FILTER_STATS *st, 8484 MB_MODE_INFO *const mi) { 8485 for (int i = 0; i < 2; ++i) { 8486 if ((st->ref_frames[i] != mi->ref_frame[i]) || 8487 (st->mv[i].as_int != mi->mv[i].as_int)) { 8488 return 0; 8489 } 8490 } 8491 if (has_second_ref(mi) && st->comp_type != mi->interinter_comp.type) return 0; 8492 return 1; 8493 } 8494 8495 // Checks if characteristics of search match 8496 static INLINE int is_comp_rd_match(const AV1_COMP *const cpi, 8497 const MACROBLOCK *const x, 8498 const COMP_RD_STATS *st, 8499 const MB_MODE_INFO *const mi, 8500 int32_t *comp_rate, int64_t *comp_dist, 8501 int64_t *comp_model_rd) { 8502 // TODO(ranjit): Ensure that compound type search use regular filter always 8503 // and check if following check can be removed 8504 // Check if interp filter matches with previous case 8505 if (st->filter != mi->interp_filters) return 0; 8506 8507 const MACROBLOCKD *const xd = &x->e_mbd; 8508 // Match MV and reference indices 8509 for (int i = 0; i < 2; ++i) { 8510 if ((st->ref_frames[i] != mi->ref_frame[i]) || 8511 (st->mv[i].as_int != mi->mv[i].as_int)) { 8512 return 0; 8513 } 8514 const WarpedMotionParams *const wm = &xd->global_motion[mi->ref_frame[i]]; 8515 if (is_global_mv_block(mi, wm->wmtype) != st->is_global[i]) return 0; 8516 } 8517 8518 // Store the stats for compound average 8519 comp_rate[COMPOUND_AVERAGE] = st->rate[COMPOUND_AVERAGE]; 8520 comp_dist[COMPOUND_AVERAGE] = st->dist[COMPOUND_AVERAGE]; 8521 comp_model_rd[COMPOUND_AVERAGE] = st->comp_model_rd[COMPOUND_AVERAGE]; 8522 comp_rate[COMPOUND_DISTWTD] = st->rate[COMPOUND_DISTWTD]; 8523 comp_dist[COMPOUND_DISTWTD] = st->dist[COMPOUND_DISTWTD]; 8524 comp_model_rd[COMPOUND_DISTWTD] = st->comp_model_rd[COMPOUND_DISTWTD]; 8525 8526 // For compound wedge/segment, reuse data only if NEWMV is not present in 8527 // either of the directions 8528 if ((!have_newmv_in_inter_mode(mi->mode) && 8529 !have_newmv_in_inter_mode(st->mode)) || 8530 (cpi->sf.disable_interinter_wedge_newmv_search)) { 8531 memcpy(&comp_rate[COMPOUND_WEDGE], &st->rate[COMPOUND_WEDGE], 8532 sizeof(comp_rate[COMPOUND_WEDGE]) * 2); 8533 memcpy(&comp_dist[COMPOUND_WEDGE], &st->dist[COMPOUND_WEDGE], 8534 sizeof(comp_dist[COMPOUND_WEDGE]) * 2); 8535 memcpy(&comp_model_rd[COMPOUND_WEDGE], &st->comp_model_rd[COMPOUND_WEDGE], 8536 sizeof(comp_model_rd[COMPOUND_WEDGE]) * 2); 8537 } 8538 return 1; 8539 } 8540 8541 static INLINE int find_interp_filter_in_stats(MACROBLOCK *x, 8542 MB_MODE_INFO *const mbmi) { 8543 const int comp_idx = mbmi->compound_idx; 8544 const int offset = x->interp_filter_stats_idx[comp_idx]; 8545 for (int j = 0; j < offset; ++j) { 8546 const INTERPOLATION_FILTER_STATS *st = &x->interp_filter_stats[comp_idx][j]; 8547 if (is_interp_filter_match(st, mbmi)) { 8548 mbmi->interp_filters = st->filters; 8549 return j; 8550 } 8551 } 8552 return -1; // no match result found 8553 } 8554 // Checks if similar compound type search case is accounted earlier 8555 // If found, returns relevant rd data 8556 static INLINE int find_comp_rd_in_stats(const AV1_COMP *const cpi, 8557 const MACROBLOCK *x, 8558 const MB_MODE_INFO *const mbmi, 8559 int32_t *comp_rate, int64_t *comp_dist, 8560 int64_t *comp_model_rd) { 8561 for (int j = 0; j < x->comp_rd_stats_idx; ++j) { 8562 if (is_comp_rd_match(cpi, x, &x->comp_rd_stats[j], mbmi, comp_rate, 8563 comp_dist, comp_model_rd)) { 8564 return 1; 8565 } 8566 } 8567 return 0; // no match result found 8568 } 8569 8570 static INLINE void save_interp_filter_search_stat(MACROBLOCK *x, 8571 MB_MODE_INFO *const mbmi, 8572 int64_t rd, int skip_txfm_sb, 8573 int64_t skip_sse_sb, 8574 unsigned int pred_sse) { 8575 const int comp_idx = mbmi->compound_idx; 8576 const int offset = x->interp_filter_stats_idx[comp_idx]; 8577 if (offset < MAX_INTERP_FILTER_STATS) { 8578 INTERPOLATION_FILTER_STATS stat = { mbmi->interp_filters, 8579 { mbmi->mv[0], mbmi->mv[1] }, 8580 { mbmi->ref_frame[0], 8581 mbmi->ref_frame[1] }, 8582 mbmi->interinter_comp.type, 8583 rd, 8584 skip_txfm_sb, 8585 skip_sse_sb, 8586 pred_sse }; 8587 x->interp_filter_stats[comp_idx][offset] = stat; 8588 x->interp_filter_stats_idx[comp_idx]++; 8589 } 8590 } 8591 8592 static INLINE void save_comp_rd_search_stat(MACROBLOCK *x, 8593 const MB_MODE_INFO *const mbmi, 8594 const int32_t *comp_rate, 8595 const int64_t *comp_dist, 8596 const int64_t *comp_model_rd, 8597 const int_mv *cur_mv) { 8598 const int offset = x->comp_rd_stats_idx; 8599 if (offset < MAX_COMP_RD_STATS) { 8600 COMP_RD_STATS *const rd_stats = x->comp_rd_stats + offset; 8601 memcpy(rd_stats->rate, comp_rate, sizeof(rd_stats->rate)); 8602 memcpy(rd_stats->dist, comp_dist, sizeof(rd_stats->dist)); 8603 memcpy(rd_stats->comp_model_rd, comp_model_rd, 8604 sizeof(rd_stats->comp_model_rd)); 8605 memcpy(rd_stats->mv, cur_mv, sizeof(rd_stats->mv)); 8606 memcpy(rd_stats->ref_frames, mbmi->ref_frame, sizeof(rd_stats->ref_frames)); 8607 rd_stats->mode = mbmi->mode; 8608 rd_stats->filter = mbmi->interp_filters; 8609 rd_stats->ref_mv_idx = mbmi->ref_mv_idx; 8610 const MACROBLOCKD *const xd = &x->e_mbd; 8611 for (int i = 0; i < 2; ++i) { 8612 const WarpedMotionParams *const wm = 8613 &xd->global_motion[mbmi->ref_frame[i]]; 8614 rd_stats->is_global[i] = is_global_mv_block(mbmi, wm->wmtype); 8615 } 8616 ++x->comp_rd_stats_idx; 8617 } 8618 } 8619 8620 static int64_t interpolation_filter_search( 8621 MACROBLOCK *const x, const AV1_COMP *const cpi, 8622 const TileDataEnc *tile_data, BLOCK_SIZE bsize, int mi_row, int mi_col, 8623 const BUFFER_SET *const tmp_dst, const BUFFER_SET *const orig_dst, 8624 InterpFilter (*const single_filter)[REF_FRAMES], int64_t *const rd, 8625 int *const switchable_rate, int *const skip_txfm_sb, 8626 int64_t *const skip_sse_sb, int *skip_build_pred, HandleInterModeArgs *args, 8627 int64_t ref_best_rd) { 8628 const AV1_COMMON *cm = &cpi->common; 8629 const int num_planes = av1_num_planes(cm); 8630 MACROBLOCKD *const xd = &x->e_mbd; 8631 MB_MODE_INFO *const mbmi = xd->mi[0]; 8632 const int need_search = 8633 av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd); 8634 int i; 8635 // Index 0 corresponds to luma rd data and index 1 corresponds to cummulative 8636 // data of all planes 8637 int tmp_rate[2] = { 0, 0 }; 8638 int64_t tmp_dist[2] = { 0, 0 }; 8639 int best_skip_txfm_sb[2] = { 1, 1 }; 8640 int64_t best_skip_sse_sb[2] = { 0, 0 }; 8641 const int ref_frame = xd->mi[0]->ref_frame[0]; 8642 8643 (void)single_filter; 8644 int match_found_idx = -1; 8645 const InterpFilter assign_filter = cm->interp_filter; 8646 if (cpi->sf.skip_repeat_interpolation_filter_search && need_search) { 8647 match_found_idx = find_interp_filter_in_stats(x, mbmi); 8648 } 8649 if (match_found_idx != -1) { 8650 const int comp_idx = mbmi->compound_idx; 8651 *rd = x->interp_filter_stats[comp_idx][match_found_idx].rd; 8652 *skip_txfm_sb = 8653 x->interp_filter_stats[comp_idx][match_found_idx].skip_txfm_sb; 8654 *skip_sse_sb = 8655 x->interp_filter_stats[comp_idx][match_found_idx].skip_sse_sb; 8656 x->pred_sse[ref_frame] = 8657 x->interp_filter_stats[comp_idx][match_found_idx].pred_sse; 8658 return 0; 8659 } 8660 if (!need_search || match_found_idx == -1) { 8661 set_default_interp_filters(mbmi, assign_filter); 8662 } 8663 int switchable_ctx[2]; 8664 switchable_ctx[0] = av1_get_pred_context_switchable_interp(xd, 0); 8665 switchable_ctx[1] = av1_get_pred_context_switchable_interp(xd, 1); 8666 *switchable_rate = 8667 get_switchable_rate(x, mbmi->interp_filters, switchable_ctx); 8668 if (!(*skip_build_pred)) { 8669 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 0, 8670 av1_num_planes(cm) - 1); 8671 *skip_build_pred = 1; 8672 } 8673 8674 #if CONFIG_COLLECT_RD_STATS == 3 8675 RD_STATS rd_stats_y; 8676 pick_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize, mi_row, mi_col, INT64_MAX); 8677 PrintPredictionUnitStats(cpi, tile_data, x, &rd_stats_y, bsize); 8678 #endif // CONFIG_COLLECT_RD_STATS == 3 8679 model_rd_sb_fn[MODELRD_TYPE_INTERP_FILTER]( 8680 cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &tmp_rate[0], &tmp_dist[0], 8681 &best_skip_txfm_sb[0], &best_skip_sse_sb[0], NULL, NULL, NULL); 8682 if (num_planes > 1) 8683 model_rd_sb_fn[MODELRD_TYPE_INTERP_FILTER]( 8684 cpi, bsize, x, xd, 1, num_planes - 1, mi_row, mi_col, &tmp_rate[1], 8685 &tmp_dist[1], &best_skip_txfm_sb[1], &best_skip_sse_sb[1], NULL, NULL, 8686 NULL); 8687 tmp_rate[1] = 8688 (int)AOMMIN((int64_t)tmp_rate[0] + (int64_t)tmp_rate[1], INT_MAX); 8689 assert(tmp_rate[1] >= 0); 8690 tmp_dist[1] = tmp_dist[0] + tmp_dist[1]; 8691 best_skip_txfm_sb[1] = best_skip_txfm_sb[0] & best_skip_txfm_sb[1]; 8692 best_skip_sse_sb[1] = best_skip_sse_sb[0] + best_skip_sse_sb[1]; 8693 *rd = RDCOST(x->rdmult, (*switchable_rate + tmp_rate[1]), tmp_dist[1]); 8694 *skip_txfm_sb = best_skip_txfm_sb[1]; 8695 *skip_sse_sb = best_skip_sse_sb[1]; 8696 x->pred_sse[ref_frame] = (unsigned int)(best_skip_sse_sb[0] >> 4); 8697 8698 if (assign_filter != SWITCHABLE || match_found_idx != -1) { 8699 return 0; 8700 } 8701 if (!need_search) { 8702 assert(mbmi->interp_filters == 8703 av1_broadcast_interp_filter(EIGHTTAP_REGULAR)); 8704 return 0; 8705 } 8706 if (args->modelled_rd != NULL) { 8707 if (has_second_ref(mbmi)) { 8708 const int ref_mv_idx = mbmi->ref_mv_idx; 8709 int refs[2] = { mbmi->ref_frame[0], 8710 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; 8711 const int mode0 = compound_ref0_mode(mbmi->mode); 8712 const int mode1 = compound_ref1_mode(mbmi->mode); 8713 const int64_t mrd = AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]], 8714 args->modelled_rd[mode1][ref_mv_idx][refs[1]]); 8715 if ((*rd >> 1) > mrd && ref_best_rd < INT64_MAX) { 8716 return INT64_MAX; 8717 } 8718 } 8719 } 8720 8721 x->recalc_luma_mc_data = 0; 8722 // skip_flag=xx (in binary form) 8723 // Setting 0th flag corresonds to skipping luma MC and setting 1st bt 8724 // corresponds to skipping chroma MC skip_flag=0 corresponds to "Don't skip 8725 // luma and chroma MC" Skip flag=1 corresponds to "Skip Luma MC only" 8726 // Skip_flag=2 is not a valid case 8727 // skip_flag=3 corresponds to "Skip both luma and chroma MC" 8728 int skip_hor = cpi->default_interp_skip_flags; 8729 int skip_ver = cpi->default_interp_skip_flags; 8730 const int is_compound = has_second_ref(mbmi); 8731 assert(is_intrabc_block(mbmi) == 0); 8732 for (int j = 0; j < 1 + is_compound; ++j) { 8733 const struct scale_factors *const sf = 8734 get_ref_scale_factors_const(cm, mbmi->ref_frame[j]); 8735 // TODO(any): Refine skip flag calculation considering scaling 8736 if (av1_is_scaled(sf)) { 8737 skip_hor = 0; 8738 skip_ver = 0; 8739 break; 8740 } 8741 const MV mv = mbmi->mv[j].as_mv; 8742 int skip_hor_plane = 0; 8743 int skip_ver_plane = 0; 8744 for (int k = 0; k < AOMMAX(1, (num_planes - 1)); ++k) { 8745 struct macroblockd_plane *const pd = &xd->plane[k]; 8746 const int bw = pd->width; 8747 const int bh = pd->height; 8748 const MV mv_q4 = clamp_mv_to_umv_border_sb( 8749 xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y); 8750 const int sub_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS; 8751 const int sub_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS; 8752 skip_hor_plane |= ((sub_x == 0) << k); 8753 skip_ver_plane |= ((sub_y == 0) << k); 8754 } 8755 skip_hor = skip_hor & skip_hor_plane; 8756 skip_ver = skip_ver & skip_ver_plane; 8757 // It is not valid that "luma MV is sub-pel, whereas chroma MV is not" 8758 assert(skip_hor != 2); 8759 assert(skip_ver != 2); 8760 } 8761 // When compond prediction type is compound segment wedge, luma MC and chroma 8762 // MC need to go hand in hand as mask generated during luma MC is reuired for 8763 // chroma MC. If skip_hor = 0 and skip_ver = 1, mask used for chroma MC during 8764 // vertical filter decision may be incorrect as temporary MC evaluation 8765 // overwrites the mask. Make skip_ver as 0 for this case so that mask is 8766 // populated during luma MC 8767 if (is_compound && mbmi->compound_idx == 1 && 8768 mbmi->interinter_comp.type == COMPOUND_DIFFWTD) { 8769 assert(mbmi->comp_group_idx == 1); 8770 if (skip_hor == 0 && skip_ver == 1) skip_ver = 0; 8771 } 8772 // do interp_filter search 8773 const int filter_set_size = DUAL_FILTER_SET_SIZE; 8774 restore_dst_buf(xd, *tmp_dst, num_planes); 8775 const BUFFER_SET *dst_bufs[2] = { tmp_dst, orig_dst }; 8776 if (cpi->sf.use_fast_interpolation_filter_search && 8777 cm->seq_params.enable_dual_filter) { 8778 // default to (R,R): EIGHTTAP_REGULARxEIGHTTAP_REGULAR 8779 int best_dual_mode = 0; 8780 // Find best of {R}x{R,Sm,Sh} 8781 const int bw = block_size_wide[bsize]; 8782 const int bh = block_size_high[bsize]; 8783 int skip_pred; 8784 int bsl, pred_filter_search; 8785 InterpFilters af_horiz = SWITCHABLE, af_vert = SWITCHABLE, 8786 lf_horiz = SWITCHABLE, lf_vert = SWITCHABLE, filter_idx = 0; 8787 const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; 8788 const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; 8789 bsl = mi_size_wide_log2[bsize]; 8790 pred_filter_search = 8791 cpi->sf.cb_pred_filter_search 8792 ? (((mi_row + mi_col) >> bsl) + 8793 get_chessboard_index(cm->current_frame.frame_number)) & 8794 0x1 8795 : 0; 8796 if (above_mbmi && is_inter_block(above_mbmi)) { 8797 af_horiz = av1_extract_interp_filter(above_mbmi->interp_filters, 1); 8798 af_vert = av1_extract_interp_filter(above_mbmi->interp_filters, 0); 8799 } 8800 if (left_mbmi && is_inter_block(left_mbmi)) { 8801 lf_horiz = av1_extract_interp_filter(left_mbmi->interp_filters, 1); 8802 lf_vert = av1_extract_interp_filter(left_mbmi->interp_filters, 0); 8803 } 8804 pred_filter_search &= !have_newmv_in_inter_mode(mbmi->mode); 8805 pred_filter_search &= 8806 ((af_horiz == lf_horiz) && (af_horiz != SWITCHABLE)) || 8807 ((af_vert == lf_vert) && (af_vert != SWITCHABLE)); 8808 if (pred_filter_search) { 8809 pred_dual_interp_filter_rd( 8810 x, cpi, tile_data, bsize, mi_row, mi_col, orig_dst, rd, 8811 switchable_rate, best_skip_txfm_sb, best_skip_sse_sb, dst_bufs, 8812 filter_idx, switchable_ctx, (skip_hor & skip_ver), tmp_rate, tmp_dist, 8813 af_horiz, af_vert, lf_horiz, lf_vert); 8814 } else { 8815 skip_pred = bw <= 4 ? cpi->default_interp_skip_flags : skip_hor; 8816 for (i = (SWITCHABLE_FILTERS - 1); i >= 1; --i) { 8817 if (interpolation_filter_rd( 8818 x, cpi, tile_data, bsize, mi_row, mi_col, orig_dst, rd, 8819 switchable_rate, best_skip_txfm_sb, best_skip_sse_sb, dst_bufs, 8820 i, switchable_ctx, skip_pred, tmp_rate, tmp_dist)) { 8821 best_dual_mode = i; 8822 } 8823 skip_pred = skip_hor; 8824 } 8825 // From best of horizontal EIGHTTAP_REGULAR modes, check vertical modes 8826 skip_pred = bh <= 4 ? cpi->default_interp_skip_flags : skip_ver; 8827 assert(filter_set_size == DUAL_FILTER_SET_SIZE); 8828 for (i = (best_dual_mode + (SWITCHABLE_FILTERS * 2)); 8829 i >= (best_dual_mode + SWITCHABLE_FILTERS); 8830 i -= SWITCHABLE_FILTERS) { 8831 interpolation_filter_rd( 8832 x, cpi, tile_data, bsize, mi_row, mi_col, orig_dst, rd, 8833 switchable_rate, best_skip_txfm_sb, best_skip_sse_sb, dst_bufs, i, 8834 switchable_ctx, skip_pred, tmp_rate, tmp_dist); 8835 skip_pred = skip_ver; 8836 } 8837 } 8838 } else if (cm->seq_params.enable_dual_filter == 0) { 8839 find_best_non_dual_interp_filter( 8840 x, cpi, tile_data, bsize, mi_row, mi_col, orig_dst, rd, switchable_rate, 8841 best_skip_txfm_sb, best_skip_sse_sb, dst_bufs, switchable_ctx, skip_ver, 8842 skip_hor, tmp_rate, tmp_dist, filter_set_size); 8843 } else { 8844 // EIGHTTAP_REGULAR mode is calculated beforehand 8845 for (i = 1; i < filter_set_size; ++i) { 8846 interpolation_filter_rd(x, cpi, tile_data, bsize, mi_row, mi_col, 8847 orig_dst, rd, switchable_rate, best_skip_txfm_sb, 8848 best_skip_sse_sb, dst_bufs, i, switchable_ctx, 8849 (skip_hor & skip_ver), tmp_rate, tmp_dist); 8850 } 8851 } 8852 swap_dst_buf(xd, dst_bufs, num_planes); 8853 // Recompute final MC data if required 8854 if (x->recalc_luma_mc_data == 1) { 8855 // Recomputing final luma MC data is required only if the same was skipped 8856 // in either of the directions Condition below is necessary, but not 8857 // sufficient 8858 assert((skip_hor == 1) || (skip_ver == 1)); 8859 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 8860 AOM_PLANE_Y, AOM_PLANE_Y); 8861 } 8862 *skip_txfm_sb = best_skip_txfm_sb[1]; 8863 *skip_sse_sb = best_skip_sse_sb[1]; 8864 x->pred_sse[ref_frame] = (unsigned int)(best_skip_sse_sb[0] >> 4); 8865 8866 // save search results 8867 if (cpi->sf.skip_repeat_interpolation_filter_search) { 8868 assert(match_found_idx == -1); 8869 save_interp_filter_search_stat(x, mbmi, *rd, *skip_txfm_sb, *skip_sse_sb, 8870 x->pred_sse[ref_frame]); 8871 } 8872 return 0; 8873 } 8874 8875 static int txfm_search(const AV1_COMP *cpi, const TileDataEnc *tile_data, 8876 MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, 8877 RD_STATS *rd_stats, RD_STATS *rd_stats_y, 8878 RD_STATS *rd_stats_uv, int mode_rate, 8879 int64_t ref_best_rd) { 8880 /* 8881 * This function combines y and uv planes' transform search processes 8882 * together, when the prediction is generated. It first does subtraction to 8883 * obtain the prediction error. Then it calls 8884 * pick_tx_size_type_yrd/super_block_yrd and super_block_uvrd sequentially and 8885 * handles the early terminations happening in those functions. At the end, it 8886 * computes the rd_stats/_y/_uv accordingly. 8887 */ 8888 const AV1_COMMON *cm = &cpi->common; 8889 MACROBLOCKD *const xd = &x->e_mbd; 8890 MB_MODE_INFO *const mbmi = xd->mi[0]; 8891 const int ref_frame_1 = mbmi->ref_frame[1]; 8892 const int64_t mode_rd = RDCOST(x->rdmult, mode_rate, 0); 8893 const int64_t rd_thresh = 8894 ref_best_rd == INT64_MAX ? INT64_MAX : ref_best_rd - mode_rd; 8895 const int skip_ctx = av1_get_skip_context(xd); 8896 const int skip_flag_cost[2] = { x->skip_cost[skip_ctx][0], 8897 x->skip_cost[skip_ctx][1] }; 8898 const int64_t min_header_rate = 8899 mode_rate + AOMMIN(skip_flag_cost[0], skip_flag_cost[1]); 8900 // Account for minimum skip and non_skip rd. 8901 // Eventually either one of them will be added to mode_rate 8902 const int64_t min_header_rd_possible = RDCOST(x->rdmult, min_header_rate, 0); 8903 (void)tile_data; 8904 8905 if (min_header_rd_possible > ref_best_rd) { 8906 av1_invalid_rd_stats(rd_stats_y); 8907 return 0; 8908 } 8909 8910 av1_init_rd_stats(rd_stats); 8911 av1_init_rd_stats(rd_stats_y); 8912 rd_stats->rate = mode_rate; 8913 8914 // cost and distortion 8915 av1_subtract_plane(x, bsize, 0); 8916 if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) { 8917 pick_tx_size_type_yrd(cpi, x, rd_stats_y, bsize, mi_row, mi_col, rd_thresh); 8918 #if CONFIG_COLLECT_RD_STATS == 2 8919 PrintPredictionUnitStats(cpi, tile_data, x, rd_stats_y, bsize); 8920 #endif // CONFIG_COLLECT_RD_STATS == 2 8921 } else { 8922 super_block_yrd(cpi, x, rd_stats_y, bsize, rd_thresh); 8923 memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size)); 8924 for (int i = 0; i < xd->n4_h * xd->n4_w; ++i) 8925 set_blk_skip(x, 0, i, rd_stats_y->skip); 8926 } 8927 8928 if (rd_stats_y->rate == INT_MAX) { 8929 // TODO(angiebird): check if we need this 8930 // restore_dst_buf(xd, *orig_dst, num_planes); 8931 mbmi->ref_frame[1] = ref_frame_1; 8932 return 0; 8933 } 8934 8935 av1_merge_rd_stats(rd_stats, rd_stats_y); 8936 8937 const int64_t non_skip_rdcosty = 8938 RDCOST(x->rdmult, rd_stats->rate + skip_flag_cost[0], rd_stats->dist); 8939 const int64_t skip_rdcosty = 8940 RDCOST(x->rdmult, mode_rate + skip_flag_cost[1], rd_stats->sse); 8941 const int64_t min_rdcosty = AOMMIN(non_skip_rdcosty, skip_rdcosty); 8942 if (min_rdcosty > ref_best_rd) { 8943 const int64_t tokenonly_rdy = 8944 AOMMIN(RDCOST(x->rdmult, rd_stats_y->rate, rd_stats_y->dist), 8945 RDCOST(x->rdmult, 0, rd_stats_y->sse)); 8946 // Invalidate rd_stats_y to skip the rest of the motion modes search 8947 if (tokenonly_rdy - (tokenonly_rdy >> cpi->sf.prune_motion_mode_level) > 8948 rd_thresh) 8949 av1_invalid_rd_stats(rd_stats_y); 8950 mbmi->ref_frame[1] = ref_frame_1; 8951 return 0; 8952 } 8953 8954 av1_init_rd_stats(rd_stats_uv); 8955 const int num_planes = av1_num_planes(cm); 8956 if (num_planes > 1) { 8957 int64_t ref_best_chroma_rd = ref_best_rd; 8958 // Calculate best rd cost possible for chroma 8959 if (cpi->sf.perform_best_rd_based_gating_for_chroma && 8960 (ref_best_chroma_rd != INT64_MAX)) { 8961 ref_best_chroma_rd = 8962 (ref_best_chroma_rd - AOMMIN(non_skip_rdcosty, skip_rdcosty)); 8963 } 8964 const int is_cost_valid_uv = 8965 super_block_uvrd(cpi, x, rd_stats_uv, bsize, ref_best_chroma_rd); 8966 if (!is_cost_valid_uv) { 8967 mbmi->ref_frame[1] = ref_frame_1; 8968 return 0; 8969 } 8970 av1_merge_rd_stats(rd_stats, rd_stats_uv); 8971 } 8972 8973 if (rd_stats->skip) { 8974 rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate; 8975 rd_stats_y->rate = 0; 8976 rd_stats_uv->rate = 0; 8977 rd_stats->dist = rd_stats->sse; 8978 rd_stats_y->dist = rd_stats_y->sse; 8979 rd_stats_uv->dist = rd_stats_uv->sse; 8980 rd_stats->rate += skip_flag_cost[1]; 8981 mbmi->skip = 1; 8982 // here mbmi->skip temporarily plays a role as what this_skip2 does 8983 8984 const int64_t tmprd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); 8985 if (tmprd > ref_best_rd) { 8986 mbmi->ref_frame[1] = ref_frame_1; 8987 return 0; 8988 } 8989 } else if (!xd->lossless[mbmi->segment_id] && 8990 (RDCOST(x->rdmult, 8991 rd_stats_y->rate + rd_stats_uv->rate + skip_flag_cost[0], 8992 rd_stats->dist) >= 8993 RDCOST(x->rdmult, skip_flag_cost[1], rd_stats->sse))) { 8994 rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate; 8995 rd_stats->rate += skip_flag_cost[1]; 8996 rd_stats->dist = rd_stats->sse; 8997 rd_stats_y->dist = rd_stats_y->sse; 8998 rd_stats_uv->dist = rd_stats_uv->sse; 8999 rd_stats_y->rate = 0; 9000 rd_stats_uv->rate = 0; 9001 mbmi->skip = 1; 9002 } else { 9003 rd_stats->rate += skip_flag_cost[0]; 9004 mbmi->skip = 0; 9005 } 9006 9007 return 1; 9008 } 9009 9010 static INLINE bool enable_wedge_search(MACROBLOCK *const x, 9011 const AV1_COMP *const cpi) { 9012 // Enable wedge search if source variance and edge strength are above 9013 // the thresholds. 9014 return x->source_variance > cpi->sf.disable_wedge_search_var_thresh && 9015 x->edge_strength > cpi->sf.disable_wedge_search_edge_thresh; 9016 } 9017 9018 static INLINE bool enable_wedge_interinter_search(MACROBLOCK *const x, 9019 const AV1_COMP *const cpi) { 9020 return enable_wedge_search(x, cpi) && cpi->oxcf.enable_interinter_wedge; 9021 } 9022 9023 static INLINE bool enable_wedge_interintra_search(MACROBLOCK *const x, 9024 const AV1_COMP *const cpi) { 9025 return enable_wedge_search(x, cpi) && cpi->oxcf.enable_interintra_wedge && 9026 !cpi->sf.disable_wedge_interintra_search; 9027 } 9028 9029 static int handle_inter_intra_mode(const AV1_COMP *const cpi, 9030 MACROBLOCK *const x, BLOCK_SIZE bsize, 9031 int mi_row, int mi_col, MB_MODE_INFO *mbmi, 9032 HandleInterModeArgs *args, 9033 int64_t ref_best_rd, int *rate_mv, 9034 int *tmp_rate2, const BUFFER_SET *orig_dst) { 9035 const AV1_COMMON *const cm = &cpi->common; 9036 const int num_planes = av1_num_planes(cm); 9037 MACROBLOCKD *xd = &x->e_mbd; 9038 9039 INTERINTRA_MODE best_interintra_mode = II_DC_PRED; 9040 int64_t rd = INT64_MAX; 9041 int64_t best_interintra_rd = INT64_MAX; 9042 int rmode, rate_sum; 9043 int64_t dist_sum; 9044 int tmp_rate_mv = 0; 9045 int tmp_skip_txfm_sb; 9046 int bw = block_size_wide[bsize]; 9047 int64_t tmp_skip_sse_sb; 9048 DECLARE_ALIGNED(16, uint8_t, tmp_buf_[2 * MAX_INTERINTRA_SB_SQUARE]); 9049 DECLARE_ALIGNED(16, uint8_t, intrapred_[2 * MAX_INTERINTRA_SB_SQUARE]); 9050 uint8_t *tmp_buf = get_buf_by_bd(xd, tmp_buf_); 9051 uint8_t *intrapred = get_buf_by_bd(xd, intrapred_); 9052 const int *const interintra_mode_cost = 9053 x->interintra_mode_cost[size_group_lookup[bsize]]; 9054 const int_mv mv0 = mbmi->mv[0]; 9055 const int is_wedge_used = is_interintra_wedge_used(bsize); 9056 int rwedge = is_wedge_used ? x->wedge_interintra_cost[bsize][0] : 0; 9057 mbmi->ref_frame[1] = NONE_FRAME; 9058 xd->plane[0].dst.buf = tmp_buf; 9059 xd->plane[0].dst.stride = bw; 9060 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 9061 AOM_PLANE_Y, AOM_PLANE_Y); 9062 9063 restore_dst_buf(xd, *orig_dst, num_planes); 9064 mbmi->ref_frame[1] = INTRA_FRAME; 9065 best_interintra_mode = args->inter_intra_mode[mbmi->ref_frame[0]]; 9066 9067 if (cpi->oxcf.enable_smooth_interintra && 9068 !cpi->sf.disable_smooth_interintra) { 9069 mbmi->use_wedge_interintra = 0; 9070 int j = 0; 9071 if (cpi->sf.reuse_inter_intra_mode == 0 || 9072 best_interintra_mode == INTERINTRA_MODES) { 9073 for (j = 0; j < INTERINTRA_MODES; ++j) { 9074 if ((!cpi->oxcf.enable_smooth_intra || cpi->sf.disable_smooth_intra) && 9075 (INTERINTRA_MODE)j == II_SMOOTH_PRED) 9076 continue; 9077 mbmi->interintra_mode = (INTERINTRA_MODE)j; 9078 rmode = interintra_mode_cost[mbmi->interintra_mode]; 9079 av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst, 9080 intrapred, bw); 9081 av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw); 9082 model_rd_sb_fn[MODELRD_TYPE_INTERINTRA]( 9083 cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &rate_sum, &dist_sum, 9084 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, NULL, NULL, NULL); 9085 rd = RDCOST(x->rdmult, tmp_rate_mv + rate_sum + rmode, dist_sum); 9086 if (rd < best_interintra_rd) { 9087 best_interintra_rd = rd; 9088 best_interintra_mode = mbmi->interintra_mode; 9089 } 9090 } 9091 args->inter_intra_mode[mbmi->ref_frame[0]] = best_interintra_mode; 9092 } 9093 assert(IMPLIES(!cpi->oxcf.enable_smooth_interintra || 9094 cpi->sf.disable_smooth_interintra, 9095 best_interintra_mode != II_SMOOTH_PRED)); 9096 rmode = interintra_mode_cost[best_interintra_mode]; 9097 if (j == 0 || best_interintra_mode != II_SMOOTH_PRED) { 9098 mbmi->interintra_mode = best_interintra_mode; 9099 av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst, 9100 intrapred, bw); 9101 av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw); 9102 } 9103 9104 RD_STATS rd_stats; 9105 rd = estimate_yrd_for_sb(cpi, bsize, x, ref_best_rd, &rd_stats); 9106 if (rd != INT64_MAX) { 9107 rd = RDCOST(x->rdmult, *rate_mv + rmode + rd_stats.rate + rwedge, 9108 rd_stats.dist); 9109 } 9110 best_interintra_rd = rd; 9111 if (ref_best_rd < INT64_MAX && 9112 ((best_interintra_rd >> 4) * 9) > ref_best_rd) { 9113 return -1; 9114 } 9115 } 9116 if (is_wedge_used) { 9117 int64_t best_interintra_rd_nowedge = rd; 9118 int64_t best_interintra_rd_wedge = INT64_MAX; 9119 int_mv tmp_mv; 9120 if (enable_wedge_interintra_search(x, cpi)) { 9121 mbmi->use_wedge_interintra = 1; 9122 9123 rwedge = av1_cost_literal(get_interintra_wedge_bits(bsize)) + 9124 x->wedge_interintra_cost[bsize][1]; 9125 9126 if (!cpi->oxcf.enable_smooth_interintra || 9127 cpi->sf.disable_smooth_interintra) { 9128 if (best_interintra_mode == INTERINTRA_MODES) { 9129 mbmi->interintra_mode = II_SMOOTH_PRED; 9130 best_interintra_mode = II_SMOOTH_PRED; 9131 av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst, 9132 intrapred, bw); 9133 best_interintra_rd_wedge = 9134 pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_); 9135 9136 int j = 0; 9137 for (j = 0; j < INTERINTRA_MODES; ++j) { 9138 mbmi->interintra_mode = (INTERINTRA_MODE)j; 9139 rmode = interintra_mode_cost[mbmi->interintra_mode]; 9140 av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, 9141 orig_dst, intrapred, bw); 9142 av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw); 9143 model_rd_sb_fn[MODELRD_TYPE_INTERINTRA]( 9144 cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &rate_sum, &dist_sum, 9145 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, NULL, NULL, NULL); 9146 rd = RDCOST(x->rdmult, tmp_rate_mv + rate_sum + rmode, dist_sum); 9147 if (rd < best_interintra_rd) { 9148 best_interintra_rd_wedge = rd; 9149 best_interintra_mode = mbmi->interintra_mode; 9150 } 9151 } 9152 args->inter_intra_mode[mbmi->ref_frame[0]] = best_interintra_mode; 9153 mbmi->interintra_mode = best_interintra_mode; 9154 9155 if (best_interintra_mode != II_SMOOTH_PRED) { 9156 av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, 9157 orig_dst, intrapred, bw); 9158 } 9159 } else { 9160 mbmi->interintra_mode = best_interintra_mode; 9161 av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst, 9162 intrapred, bw); 9163 best_interintra_rd_wedge = 9164 pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_); 9165 } 9166 } else { 9167 best_interintra_rd_wedge = 9168 pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_); 9169 } 9170 9171 rmode = interintra_mode_cost[mbmi->interintra_mode]; 9172 best_interintra_rd_wedge += 9173 RDCOST(x->rdmult, rmode + *rate_mv + rwedge, 0); 9174 rd = INT64_MAX; 9175 // Refine motion vector. 9176 if (have_newmv_in_inter_mode(mbmi->mode)) { 9177 // get negative of mask 9178 const uint8_t *mask = av1_get_contiguous_soft_mask( 9179 mbmi->interintra_wedge_index, 1, bsize); 9180 tmp_mv = mbmi->mv[0]; 9181 compound_single_motion_search(cpi, x, bsize, &tmp_mv.as_mv, mi_row, 9182 mi_col, intrapred, mask, bw, &tmp_rate_mv, 9183 0); 9184 if (mbmi->mv[0].as_int != tmp_mv.as_int) { 9185 mbmi->mv[0].as_int = tmp_mv.as_int; 9186 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 9187 AOM_PLANE_Y, AOM_PLANE_Y); 9188 model_rd_sb_fn[MODELRD_TYPE_MASKED_COMPOUND]( 9189 cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &rate_sum, &dist_sum, 9190 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, NULL, NULL, NULL); 9191 rd = RDCOST(x->rdmult, tmp_rate_mv + rmode + rate_sum + rwedge, 9192 dist_sum); 9193 } 9194 } 9195 if (rd >= best_interintra_rd_wedge) { 9196 tmp_mv.as_int = mv0.as_int; 9197 tmp_rate_mv = *rate_mv; 9198 av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw); 9199 } 9200 // Evaluate closer to true rd 9201 RD_STATS rd_stats; 9202 rd = estimate_yrd_for_sb(cpi, bsize, x, ref_best_rd, &rd_stats); 9203 if (rd != INT64_MAX) { 9204 rd = RDCOST(x->rdmult, rmode + tmp_rate_mv + rwedge + rd_stats.rate, 9205 rd_stats.dist); 9206 } 9207 best_interintra_rd_wedge = rd; 9208 if ((!cpi->oxcf.enable_smooth_interintra || 9209 cpi->sf.disable_smooth_interintra) && 9210 best_interintra_rd_wedge == INT64_MAX) 9211 return -1; 9212 if (best_interintra_rd_wedge < best_interintra_rd_nowedge) { 9213 mbmi->use_wedge_interintra = 1; 9214 mbmi->mv[0].as_int = tmp_mv.as_int; 9215 *tmp_rate2 += tmp_rate_mv - *rate_mv; 9216 *rate_mv = tmp_rate_mv; 9217 } else { 9218 mbmi->use_wedge_interintra = 0; 9219 mbmi->mv[0].as_int = mv0.as_int; 9220 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 9221 AOM_PLANE_Y, AOM_PLANE_Y); 9222 } 9223 } else { 9224 if (!cpi->oxcf.enable_smooth_interintra || 9225 cpi->sf.disable_smooth_interintra) 9226 return -1; 9227 mbmi->use_wedge_interintra = 0; 9228 } 9229 } else { 9230 if (best_interintra_rd == INT64_MAX) return -1; 9231 } 9232 if (num_planes > 1) { 9233 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 9234 AOM_PLANE_U, num_planes - 1); 9235 } 9236 return 0; 9237 } 9238 9239 // If number of valid neighbours is 1, 9240 // 1) ROTZOOM parameters can be obtained reliably (2 parameters from 9241 // one neighbouring MV) 9242 // 2) For IDENTITY/TRANSLATION cases, warp can perform better due to 9243 // a different interpolation filter being used. However the quality 9244 // gains (due to the same) may not be much 9245 // For above 2 cases warp evaluation is skipped 9246 9247 static int check_if_optimal_warp(const AV1_COMP *cpi, 9248 WarpedMotionParams *wm_params, 9249 int num_proj_ref) { 9250 int is_valid_warp = 1; 9251 if (cpi->sf.prune_warp_using_wmtype) { 9252 TransformationType wmtype = get_wmtype(wm_params); 9253 if (num_proj_ref == 1) { 9254 if (wmtype != ROTZOOM) is_valid_warp = 0; 9255 } else { 9256 if (wmtype < ROTZOOM) is_valid_warp = 0; 9257 } 9258 } 9259 return is_valid_warp; 9260 } 9261 9262 struct obmc_check_mv_field_ctxt { 9263 MB_MODE_INFO *current_mi; 9264 int mv_field_check_result; 9265 }; 9266 9267 static INLINE void obmc_check_identical_mv(MACROBLOCKD *xd, int rel_mi_col, 9268 uint8_t nb_mi_width, 9269 MB_MODE_INFO *nb_mi, void *fun_ctxt, 9270 const int num_planes) { 9271 (void)xd; 9272 (void)rel_mi_col; 9273 (void)nb_mi_width; 9274 (void)num_planes; 9275 struct obmc_check_mv_field_ctxt *ctxt = 9276 (struct obmc_check_mv_field_ctxt *)fun_ctxt; 9277 const MB_MODE_INFO *current_mi = ctxt->current_mi; 9278 9279 if (ctxt->mv_field_check_result == 0) return; 9280 9281 if (nb_mi->ref_frame[0] != current_mi->ref_frame[0] || 9282 nb_mi->mv[0].as_int != current_mi->mv[0].as_int || 9283 nb_mi->interp_filters != current_mi->interp_filters) { 9284 ctxt->mv_field_check_result = 0; 9285 } 9286 } 9287 9288 // Check if the neighbors' motions used by obmc have same parameters as for 9289 // the current block. If all the parameters are identical, obmc will produce 9290 // the same prediction as from regular bmc, therefore we can skip the 9291 // overlapping operations for less complexity. The parameters checked include 9292 // reference frame, motion vector, and interpolation filter. 9293 int check_identical_obmc_mv_field(const AV1_COMMON *cm, MACROBLOCKD *xd, 9294 int mi_row, int mi_col) { 9295 const BLOCK_SIZE bsize = xd->mi[0]->sb_type; 9296 struct obmc_check_mv_field_ctxt mv_field_check_ctxt = { xd->mi[0], 1 }; 9297 9298 foreach_overlappable_nb_above(cm, xd, mi_col, 9299 max_neighbor_obmc[mi_size_wide_log2[bsize]], 9300 obmc_check_identical_mv, &mv_field_check_ctxt); 9301 foreach_overlappable_nb_left(cm, xd, mi_row, 9302 max_neighbor_obmc[mi_size_high_log2[bsize]], 9303 obmc_check_identical_mv, &mv_field_check_ctxt); 9304 9305 return mv_field_check_ctxt.mv_field_check_result; 9306 } 9307 9308 static int skip_interintra_based_on_first_pass_stats(const AV1_COMP *const cpi, 9309 MACROBLOCK *const x, 9310 BLOCK_SIZE bsize, 9311 int mi_row, int mi_col) { 9312 MACROBLOCKD *xd = &x->e_mbd; 9313 MB_MODE_INFO *mbmi = xd->mi[0]; 9314 if (cpi->two_pass_partition_search && 9315 cpi->sf.use_first_partition_pass_interintra_stats && 9316 !x->cb_partition_scan) { 9317 const int mi_width = mi_size_wide[bsize]; 9318 const int mi_height = mi_size_high[bsize]; 9319 // Search in the stats table to see if obmc motion mode was used in the 9320 // first pass of partition search. 9321 for (int row = mi_row; row < mi_row + mi_width; 9322 row += FIRST_PARTITION_PASS_SAMPLE_REGION) { 9323 for (int col = mi_col; col < mi_col + mi_height; 9324 col += FIRST_PARTITION_PASS_SAMPLE_REGION) { 9325 const int index = av1_first_partition_pass_stats_index(row, col); 9326 const FIRST_PARTITION_PASS_STATS *const stats = 9327 &x->first_partition_pass_stats[index]; 9328 if (stats->interintra_motion_mode_count[mbmi->ref_frame[0]]) { 9329 return 0; 9330 } 9331 } 9332 } 9333 return 1; 9334 } 9335 return 0; 9336 } 9337 9338 // TODO(afergs): Refactor the MBMI references in here - there's four 9339 // TODO(afergs): Refactor optional args - add them to a struct or remove 9340 static int64_t motion_mode_rd( 9341 const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x, 9342 BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y, 9343 RD_STATS *rd_stats_uv, int *disable_skip, int mi_row, int mi_col, 9344 HandleInterModeArgs *const args, int64_t ref_best_rd, const int *refs, 9345 int *rate_mv, const BUFFER_SET *orig_dst, int64_t *best_est_rd, 9346 int do_tx_search, InterModesInfo *inter_modes_info) { 9347 const AV1_COMMON *const cm = &cpi->common; 9348 const int num_planes = av1_num_planes(cm); 9349 MACROBLOCKD *xd = &x->e_mbd; 9350 MB_MODE_INFO *mbmi = xd->mi[0]; 9351 const int is_comp_pred = has_second_ref(mbmi); 9352 const PREDICTION_MODE this_mode = mbmi->mode; 9353 const int rate2_nocoeff = rd_stats->rate; 9354 int best_xskip = 0, best_disable_skip = 0; 9355 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv; 9356 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; 9357 const int rate_mv0 = *rate_mv; 9358 int skip_interintra_mode = 0; 9359 const int interintra_allowed = cm->seq_params.enable_interintra_compound && 9360 is_interintra_allowed(mbmi) && 9361 mbmi->compound_idx; 9362 int pts0[SAMPLES_ARRAY_SIZE], pts_inref0[SAMPLES_ARRAY_SIZE]; 9363 9364 assert(mbmi->ref_frame[1] != INTRA_FRAME); 9365 const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1]; 9366 (void)tile_data; 9367 av1_invalid_rd_stats(&best_rd_stats); 9368 aom_clear_system_state(); 9369 mbmi->num_proj_ref = 1; // assume num_proj_ref >=1 9370 MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION; 9371 if (cm->switchable_motion_mode) { 9372 last_motion_mode_allowed = motion_mode_allowed(xd->global_motion, xd, mbmi, 9373 cm->allow_warped_motion); 9374 } 9375 if (last_motion_mode_allowed == WARPED_CAUSAL) { 9376 mbmi->num_proj_ref = findSamples(cm, xd, mi_row, mi_col, pts0, pts_inref0); 9377 } 9378 const int total_samples = mbmi->num_proj_ref; 9379 if (total_samples == 0) { 9380 last_motion_mode_allowed = OBMC_CAUSAL; 9381 } 9382 9383 const MB_MODE_INFO base_mbmi = *mbmi; 9384 MB_MODE_INFO best_mbmi; 9385 SimpleRDState *const simple_states = &args->simple_rd_state[mbmi->ref_mv_idx]; 9386 const int switchable_rate = 9387 av1_is_interp_needed(xd) ? av1_get_switchable_rate(cm, x, xd) : 0; 9388 int64_t best_rd = INT64_MAX; 9389 int best_rate_mv = rate_mv0; 9390 const int identical_obmc_mv_field_detected = 9391 (cpi->sf.skip_obmc_in_uniform_mv_field || 9392 cpi->sf.skip_wm_in_uniform_mv_field) 9393 ? check_identical_obmc_mv_field(cm, xd, mi_row, mi_col) 9394 : 0; 9395 for (int mode_index = (int)SIMPLE_TRANSLATION; 9396 mode_index <= (int)last_motion_mode_allowed + interintra_allowed; 9397 mode_index++) { 9398 if (args->skip_motion_mode && mode_index) continue; 9399 if (cpi->sf.prune_single_motion_modes_by_simple_trans && 9400 args->single_ref_first_pass && mode_index) 9401 break; 9402 int tmp_rate2 = rate2_nocoeff; 9403 const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed; 9404 int tmp_rate_mv = rate_mv0; 9405 9406 *mbmi = base_mbmi; 9407 if (is_interintra_mode) { 9408 mbmi->motion_mode = SIMPLE_TRANSLATION; 9409 } else { 9410 mbmi->motion_mode = (MOTION_MODE)mode_index; 9411 assert(mbmi->ref_frame[1] != INTRA_FRAME); 9412 } 9413 9414 if (cpi->oxcf.enable_obmc == 0 && mbmi->motion_mode == OBMC_CAUSAL) 9415 continue; 9416 9417 if (identical_obmc_mv_field_detected) { 9418 if (cpi->sf.skip_obmc_in_uniform_mv_field && 9419 mbmi->motion_mode == OBMC_CAUSAL) 9420 continue; 9421 if (cpi->sf.skip_wm_in_uniform_mv_field && 9422 mbmi->motion_mode == WARPED_CAUSAL) 9423 continue; 9424 } 9425 9426 if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) { 9427 // SIMPLE_TRANSLATION mode: no need to recalculate. 9428 // The prediction is calculated before motion_mode_rd() is called in 9429 // handle_inter_mode() 9430 if (cpi->sf.prune_single_motion_modes_by_simple_trans && !is_comp_pred) { 9431 if (args->single_ref_first_pass == 0) { 9432 if (simple_states->early_skipped) { 9433 assert(simple_states->rd_stats.rdcost == INT64_MAX); 9434 return INT64_MAX; 9435 } 9436 if (simple_states->rd_stats.rdcost != INT64_MAX) { 9437 best_rd = simple_states->rd_stats.rdcost; 9438 best_rd_stats = simple_states->rd_stats; 9439 best_rd_stats_y = simple_states->rd_stats_y; 9440 best_rd_stats_uv = simple_states->rd_stats_uv; 9441 memcpy(best_blk_skip, simple_states->blk_skip, 9442 sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w); 9443 best_xskip = simple_states->skip; 9444 best_disable_skip = simple_states->disable_skip; 9445 best_mbmi = *mbmi; 9446 } 9447 continue; 9448 } 9449 simple_states->early_skipped = 0; 9450 } 9451 } else if (mbmi->motion_mode == OBMC_CAUSAL) { 9452 const uint32_t cur_mv = mbmi->mv[0].as_int; 9453 assert(!is_comp_pred); 9454 if (have_newmv_in_inter_mode(this_mode)) { 9455 single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, &tmp_rate_mv); 9456 mbmi->mv[0].as_int = x->best_mv.as_int; 9457 #if USE_DISCOUNT_NEWMV_TEST 9458 if (discount_newmv_test(cpi, x, this_mode, mbmi->mv[0])) { 9459 tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1); 9460 } 9461 #endif 9462 tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv; 9463 } 9464 if (mbmi->mv[0].as_int != cur_mv) { 9465 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 9466 0, av1_num_planes(cm) - 1); 9467 } 9468 av1_build_obmc_inter_prediction( 9469 cm, xd, mi_row, mi_col, args->above_pred_buf, args->above_pred_stride, 9470 args->left_pred_buf, args->left_pred_stride); 9471 } else if (mbmi->motion_mode == WARPED_CAUSAL) { 9472 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE]; 9473 mbmi->motion_mode = WARPED_CAUSAL; 9474 mbmi->wm_params.wmtype = DEFAULT_WMTYPE; 9475 mbmi->interp_filters = av1_broadcast_interp_filter( 9476 av1_unswitchable_filter(cm->interp_filter)); 9477 9478 memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0)); 9479 memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0)); 9480 // Select the samples according to motion vector difference 9481 if (mbmi->num_proj_ref > 1) { 9482 mbmi->num_proj_ref = selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref, 9483 mbmi->num_proj_ref, bsize); 9484 } 9485 9486 if (!find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize, 9487 mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col, 9488 &mbmi->wm_params, mi_row, mi_col)) { 9489 // Refine MV for NEWMV mode 9490 assert(!is_comp_pred); 9491 if (have_newmv_in_inter_mode(this_mode)) { 9492 const int_mv mv0 = mbmi->mv[0]; 9493 const WarpedMotionParams wm_params0 = mbmi->wm_params; 9494 const int num_proj_ref0 = mbmi->num_proj_ref; 9495 9496 if (cpi->sf.prune_warp_using_wmtype) { 9497 TransformationType wmtype = get_wmtype(&mbmi->wm_params); 9498 if (wmtype < ROTZOOM) continue; 9499 } 9500 9501 // Refine MV in a small range. 9502 av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts0, pts_inref0, 9503 total_samples); 9504 9505 // Keep the refined MV and WM parameters. 9506 if (mv0.as_int != mbmi->mv[0].as_int) { 9507 const int ref = refs[0]; 9508 const int_mv ref_mv = av1_get_ref_mv(x, 0); 9509 tmp_rate_mv = av1_mv_bit_cost(&mbmi->mv[0].as_mv, &ref_mv.as_mv, 9510 x->nmv_vec_cost, x->mv_cost_stack, 9511 MV_COST_WEIGHT); 9512 9513 if (cpi->sf.adaptive_motion_search) 9514 x->pred_mv[ref] = mbmi->mv[0].as_mv; 9515 9516 #if USE_DISCOUNT_NEWMV_TEST 9517 if (discount_newmv_test(cpi, x, this_mode, mbmi->mv[0])) { 9518 tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1); 9519 } 9520 #endif 9521 tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv; 9522 } else { 9523 // Restore the old MV and WM parameters. 9524 mbmi->mv[0] = mv0; 9525 mbmi->wm_params = wm_params0; 9526 mbmi->num_proj_ref = num_proj_ref0; 9527 } 9528 } else { 9529 if (!check_if_optimal_warp(cpi, &mbmi->wm_params, mbmi->num_proj_ref)) 9530 continue; 9531 } 9532 9533 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 9534 av1_num_planes(cm) - 1); 9535 } else { 9536 continue; 9537 } 9538 } else if (is_interintra_mode) { 9539 skip_interintra_mode = skip_interintra_based_on_first_pass_stats( 9540 cpi, x, bsize, mi_row, mi_col); 9541 if (skip_interintra_mode) continue; 9542 const int ret = handle_inter_intra_mode( 9543 cpi, x, bsize, mi_row, mi_col, mbmi, args, ref_best_rd, &tmp_rate_mv, 9544 &tmp_rate2, orig_dst); 9545 if (ret < 0) continue; 9546 } 9547 9548 x->skip = 0; 9549 rd_stats->dist = 0; 9550 rd_stats->sse = 0; 9551 rd_stats->skip = 1; 9552 rd_stats->rate = tmp_rate2; 9553 if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate; 9554 if (interintra_allowed) { 9555 rd_stats->rate += x->interintra_cost[size_group_lookup[bsize]] 9556 [mbmi->ref_frame[1] == INTRA_FRAME]; 9557 if (mbmi->ref_frame[1] == INTRA_FRAME) { 9558 rd_stats->rate += x->interintra_mode_cost[size_group_lookup[bsize]] 9559 [mbmi->interintra_mode]; 9560 if (is_interintra_wedge_used(bsize)) { 9561 rd_stats->rate += 9562 x->wedge_interintra_cost[bsize][mbmi->use_wedge_interintra]; 9563 if (mbmi->use_wedge_interintra) { 9564 rd_stats->rate += 9565 av1_cost_literal(get_interintra_wedge_bits(bsize)); 9566 } 9567 } 9568 } 9569 } 9570 if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) && 9571 (mbmi->ref_frame[1] != INTRA_FRAME)) { 9572 if (last_motion_mode_allowed == WARPED_CAUSAL) { 9573 rd_stats->rate += x->motion_mode_cost[bsize][mbmi->motion_mode]; 9574 } else { 9575 rd_stats->rate += x->motion_mode_cost1[bsize][mbmi->motion_mode]; 9576 } 9577 } 9578 9579 if (cpi->sf.model_based_motion_mode_rd_breakout && do_tx_search) { 9580 int model_rate; 9581 int64_t model_dist; 9582 model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD]( 9583 cpi, mbmi->sb_type, x, xd, 0, num_planes - 1, mi_row, mi_col, 9584 &model_rate, &model_dist, NULL, NULL, NULL, NULL, NULL); 9585 const int64_t est_rd = 9586 RDCOST(x->rdmult, rd_stats->rate + model_rate, model_dist); 9587 if ((est_rd >> 3) * 6 > ref_best_rd) { 9588 mbmi->ref_frame[1] = ref_frame_1; 9589 continue; 9590 } 9591 } 9592 9593 if (!do_tx_search) { 9594 int64_t curr_sse = -1; 9595 int est_residue_cost = 0; 9596 int64_t est_dist = 0; 9597 int64_t est_rd = 0; 9598 if (cpi->sf.inter_mode_rd_model_estimation == 1) { 9599 curr_sse = get_sse(cpi, x); 9600 const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse, 9601 &est_residue_cost, &est_dist); 9602 (void)has_est_rd; 9603 assert(has_est_rd); 9604 } else if (cpi->sf.inter_mode_rd_model_estimation == 2 || 9605 cpi->sf.use_nonrd_pick_mode) { 9606 model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD]( 9607 cpi, bsize, x, xd, 0, num_planes - 1, mi_row, mi_col, 9608 &est_residue_cost, &est_dist, NULL, &curr_sse, NULL, NULL, NULL); 9609 } 9610 est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist); 9611 if (est_rd * 0.8 > *best_est_rd) { 9612 mbmi->ref_frame[1] = ref_frame_1; 9613 continue; 9614 } 9615 const int mode_rate = rd_stats->rate; 9616 rd_stats->rate += est_residue_cost; 9617 rd_stats->dist = est_dist; 9618 rd_stats->rdcost = est_rd; 9619 *best_est_rd = AOMMIN(*best_est_rd, rd_stats->rdcost); 9620 if (cm->current_frame.reference_mode == SINGLE_REFERENCE) { 9621 if (!is_comp_pred) { 9622 assert(curr_sse >= 0); 9623 inter_modes_info_push(inter_modes_info, mode_rate, curr_sse, 9624 rd_stats->rdcost, false, NULL, rd_stats, 9625 rd_stats_y, rd_stats_uv, mbmi); 9626 } 9627 } else { 9628 assert(curr_sse >= 0); 9629 inter_modes_info_push(inter_modes_info, mode_rate, curr_sse, 9630 rd_stats->rdcost, false, NULL, rd_stats, 9631 rd_stats_y, rd_stats_uv, mbmi); 9632 } 9633 } else { 9634 if (!txfm_search(cpi, tile_data, x, bsize, mi_row, mi_col, rd_stats, 9635 rd_stats_y, rd_stats_uv, rd_stats->rate, ref_best_rd)) { 9636 if (rd_stats_y->rate == INT_MAX && mode_index == 0) { 9637 if (cpi->sf.prune_single_motion_modes_by_simple_trans && 9638 !is_comp_pred) { 9639 simple_states->early_skipped = 1; 9640 } 9641 return INT64_MAX; 9642 } 9643 continue; 9644 } 9645 9646 const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); 9647 ref_best_rd = AOMMIN(ref_best_rd, curr_rd); 9648 *disable_skip = 0; 9649 if (cpi->sf.inter_mode_rd_model_estimation == 1) { 9650 const int skip_ctx = av1_get_skip_context(xd); 9651 inter_mode_data_push(tile_data, mbmi->sb_type, rd_stats->sse, 9652 rd_stats->dist, 9653 rd_stats_y->rate + rd_stats_uv->rate + 9654 x->skip_cost[skip_ctx][mbmi->skip]); 9655 } 9656 9657 // 2 means to both do the tx search and also update the inter_modes_info 9658 // structure, since some modes will be conditionally TX searched. 9659 if (do_tx_search == 2) { 9660 rd_stats->rdcost = curr_rd; 9661 inter_modes_info_push(inter_modes_info, rd_stats->rate, rd_stats->sse, 9662 curr_rd, true, x->blk_skip, rd_stats, rd_stats_y, 9663 rd_stats_uv, mbmi); 9664 } 9665 } 9666 9667 if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) { 9668 if (is_nontrans_global_motion(xd, xd->mi[0])) { 9669 mbmi->interp_filters = av1_broadcast_interp_filter( 9670 av1_unswitchable_filter(cm->interp_filter)); 9671 } 9672 } 9673 9674 const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); 9675 if (mode_index == 0) { 9676 args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd; 9677 if (!is_comp_pred) { 9678 simple_states->rd_stats = *rd_stats; 9679 simple_states->rd_stats.rdcost = tmp_rd; 9680 simple_states->rd_stats_y = *rd_stats_y; 9681 simple_states->rd_stats_uv = *rd_stats_uv; 9682 memcpy(simple_states->blk_skip, x->blk_skip, 9683 sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w); 9684 simple_states->skip = x->skip; 9685 simple_states->disable_skip = *disable_skip; 9686 } 9687 } 9688 if (mode_index == 0 || tmp_rd < best_rd) { 9689 best_mbmi = *mbmi; 9690 best_rd = tmp_rd; 9691 best_rd_stats = *rd_stats; 9692 best_rd_stats_y = *rd_stats_y; 9693 best_rate_mv = tmp_rate_mv; 9694 if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv; 9695 memcpy(best_blk_skip, x->blk_skip, 9696 sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w); 9697 best_xskip = x->skip; 9698 best_disable_skip = *disable_skip; 9699 if (best_xskip) break; 9700 } 9701 } 9702 mbmi->ref_frame[1] = ref_frame_1; 9703 *rate_mv = best_rate_mv; 9704 if (best_rd == INT64_MAX) { 9705 av1_invalid_rd_stats(rd_stats); 9706 restore_dst_buf(xd, *orig_dst, num_planes); 9707 return INT64_MAX; 9708 } 9709 *mbmi = best_mbmi; 9710 *rd_stats = best_rd_stats; 9711 *rd_stats_y = best_rd_stats_y; 9712 if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv; 9713 memcpy(x->blk_skip, best_blk_skip, 9714 sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w); 9715 x->skip = best_xskip; 9716 *disable_skip = best_disable_skip; 9717 9718 restore_dst_buf(xd, *orig_dst, num_planes); 9719 return 0; 9720 } 9721 9722 static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi, 9723 MACROBLOCK *const x, BLOCK_SIZE bsize, int mi_row, 9724 int mi_col, const BUFFER_SET *const orig_dst) { 9725 const AV1_COMMON *cm = &cpi->common; 9726 const int num_planes = av1_num_planes(cm); 9727 MACROBLOCKD *const xd = &x->e_mbd; 9728 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 0, 9729 av1_num_planes(cm) - 1); 9730 9731 int64_t total_sse = 0; 9732 for (int plane = 0; plane < num_planes; ++plane) { 9733 const struct macroblock_plane *const p = &x->plane[plane]; 9734 const struct macroblockd_plane *const pd = &xd->plane[plane]; 9735 const BLOCK_SIZE plane_bsize = 9736 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); 9737 const int bw = block_size_wide[plane_bsize]; 9738 const int bh = block_size_high[plane_bsize]; 9739 9740 av1_subtract_plane(x, bsize, plane); 9741 int64_t sse = aom_sum_squares_2d_i16(p->src_diff, bw, bw, bh) << 4; 9742 total_sse += sse; 9743 } 9744 const int skip_mode_ctx = av1_get_skip_mode_context(xd); 9745 rd_stats->dist = rd_stats->sse = total_sse; 9746 rd_stats->rate = x->skip_mode_cost[skip_mode_ctx][1]; 9747 rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); 9748 9749 restore_dst_buf(xd, *orig_dst, num_planes); 9750 return 0; 9751 } 9752 9753 static INLINE int get_ref_mv_offset(PREDICTION_MODE single_mode, 9754 uint8_t ref_mv_idx) { 9755 assert(is_inter_singleref_mode(single_mode)); 9756 int ref_mv_offset; 9757 if (single_mode == NEARESTMV) { 9758 ref_mv_offset = 0; 9759 } else if (single_mode == NEARMV) { 9760 ref_mv_offset = ref_mv_idx + 1; 9761 } else { 9762 ref_mv_offset = -1; 9763 } 9764 return ref_mv_offset; 9765 } 9766 9767 static INLINE void get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode, 9768 int ref_idx, int ref_mv_idx, 9769 const MV_REFERENCE_FRAME *ref_frame, 9770 const MB_MODE_INFO_EXT *mbmi_ext) { 9771 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame); 9772 const int is_comp_pred = ref_frame[1] > INTRA_FRAME; 9773 const PREDICTION_MODE single_mode = 9774 get_single_mode(this_mode, ref_idx, is_comp_pred); 9775 assert(is_inter_singleref_mode(single_mode)); 9776 if (single_mode == NEWMV) { 9777 this_mv->as_int = INVALID_MV; 9778 } else if (single_mode == GLOBALMV) { 9779 *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]]; 9780 } else { 9781 assert(single_mode == NEARMV || single_mode == NEARESTMV); 9782 const int ref_mv_offset = get_ref_mv_offset(single_mode, ref_mv_idx); 9783 if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) { 9784 assert(ref_mv_offset >= 0); 9785 if (ref_idx == 0) { 9786 *this_mv = 9787 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv; 9788 } else { 9789 *this_mv = 9790 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv; 9791 } 9792 } else { 9793 *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]]; 9794 } 9795 } 9796 } 9797 9798 // This function update the non-new mv for the current prediction mode 9799 static INLINE int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode, 9800 const AV1_COMMON *cm, const MACROBLOCK *x) { 9801 const MACROBLOCKD *xd = &x->e_mbd; 9802 const MB_MODE_INFO *mbmi = xd->mi[0]; 9803 const int is_comp_pred = has_second_ref(mbmi); 9804 int ret = 1; 9805 for (int i = 0; i < is_comp_pred + 1; ++i) { 9806 int_mv this_mv; 9807 get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx, mbmi->ref_frame, 9808 x->mbmi_ext); 9809 const PREDICTION_MODE single_mode = 9810 get_single_mode(this_mode, i, is_comp_pred); 9811 if (single_mode == NEWMV) { 9812 cur_mv[i] = this_mv; 9813 } else { 9814 ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x); 9815 } 9816 } 9817 return ret; 9818 } 9819 9820 static INLINE int get_drl_cost(const MB_MODE_INFO *mbmi, 9821 const MB_MODE_INFO_EXT *mbmi_ext, 9822 int (*drl_mode_cost0)[2], 9823 int8_t ref_frame_type) { 9824 int cost = 0; 9825 if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) { 9826 for (int idx = 0; idx < 2; ++idx) { 9827 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) { 9828 uint8_t drl_ctx = 9829 av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx); 9830 cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx]; 9831 if (mbmi->ref_mv_idx == idx) return cost; 9832 } 9833 } 9834 return cost; 9835 } 9836 9837 if (have_nearmv_in_inter_mode(mbmi->mode)) { 9838 for (int idx = 1; idx < 3; ++idx) { 9839 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) { 9840 uint8_t drl_ctx = 9841 av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx); 9842 cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)]; 9843 if (mbmi->ref_mv_idx == (idx - 1)) return cost; 9844 } 9845 } 9846 return cost; 9847 } 9848 return cost; 9849 } 9850 9851 // Struct for buffers used by compound_type_rd() function. 9852 // For sizes and alignment of these arrays, refer to 9853 // alloc_compound_type_rd_buffers() function. 9854 typedef struct { 9855 uint8_t *pred0; 9856 uint8_t *pred1; 9857 int16_t *residual1; // src - pred1 9858 int16_t *diff10; // pred1 - pred0 9859 uint8_t *tmp_best_mask_buf; // backup of the best segmentation mask 9860 } CompoundTypeRdBuffers; 9861 9862 static int compound_type_rd( 9863 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_col, 9864 int mi_row, int_mv *cur_mv, int mode_search_mask, int masked_compound_used, 9865 const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, 9866 CompoundTypeRdBuffers *buffers, int *rate_mv, int64_t *rd, 9867 RD_STATS *rd_stats, int64_t ref_best_rd, int *is_luma_interp_done) { 9868 const AV1_COMMON *cm = &cpi->common; 9869 MACROBLOCKD *xd = &x->e_mbd; 9870 MB_MODE_INFO *mbmi = xd->mi[0]; 9871 const PREDICTION_MODE this_mode = mbmi->mode; 9872 const int bw = block_size_wide[bsize]; 9873 int rs2; 9874 int_mv best_mv[2]; 9875 int best_tmp_rate_mv = *rate_mv; 9876 INTERINTER_COMPOUND_DATA best_compound_data; 9877 best_compound_data.type = COMPOUND_AVERAGE; 9878 uint8_t *preds0[1] = { buffers->pred0 }; 9879 uint8_t *preds1[1] = { buffers->pred1 }; 9880 int strides[1] = { bw }; 9881 int tmp_rate_mv; 9882 const int num_pix = 1 << num_pels_log2_lookup[bsize]; 9883 const int mask_len = 2 * num_pix * sizeof(uint8_t); 9884 COMPOUND_TYPE cur_type; 9885 int best_compmode_interinter_cost = 0; 9886 int calc_pred_masked_compound = 1; 9887 int64_t comp_dist[COMPOUND_TYPES] = { INT64_MAX, INT64_MAX, INT64_MAX, 9888 INT64_MAX }; 9889 int32_t comp_rate[COMPOUND_TYPES] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX }; 9890 int64_t comp_model_rd[COMPOUND_TYPES] = { INT64_MAX, INT64_MAX, INT64_MAX, 9891 INT64_MAX }; 9892 const int match_found = 9893 find_comp_rd_in_stats(cpi, x, mbmi, comp_rate, comp_dist, comp_model_rd); 9894 9895 best_mv[0].as_int = cur_mv[0].as_int; 9896 best_mv[1].as_int = cur_mv[1].as_int; 9897 *rd = INT64_MAX; 9898 int rate_sum, tmp_skip_txfm_sb; 9899 int64_t dist_sum, tmp_skip_sse_sb; 9900 int64_t comp_best_model_rd = INT64_MAX; 9901 // Special handling if both compound_average and compound_distwtd 9902 // are to be searched. In this case, first estimate between the two 9903 // modes and then call estimate_yrd_for_sb() only for the better of 9904 // the two. 9905 const int try_average_comp = (mode_search_mask & (1 << COMPOUND_AVERAGE)); 9906 const int try_distwtd_comp = 9907 ((mode_search_mask & (1 << COMPOUND_DISTWTD)) && 9908 cm->seq_params.order_hint_info.enable_dist_wtd_comp == 1 && 9909 cpi->sf.use_dist_wtd_comp_flag != DIST_WTD_COMP_DISABLED); 9910 const int try_average_and_distwtd_comp = 9911 try_average_comp && try_distwtd_comp && 9912 comp_rate[COMPOUND_AVERAGE] == INT_MAX && 9913 comp_rate[COMPOUND_DISTWTD] == INT_MAX; 9914 for (cur_type = COMPOUND_AVERAGE; cur_type < COMPOUND_TYPES; cur_type++) { 9915 if (((1 << cur_type) & mode_search_mask) == 0) { 9916 if (cur_type == COMPOUND_AVERAGE) restore_dst_buf(xd, *tmp_dst, 1); 9917 continue; 9918 } 9919 if (!is_interinter_compound_used(cur_type, bsize)) continue; 9920 if (cur_type >= COMPOUND_WEDGE && !masked_compound_used) break; 9921 if (cur_type == COMPOUND_DISTWTD && !try_distwtd_comp) continue; 9922 if (cur_type == COMPOUND_AVERAGE && try_average_and_distwtd_comp) continue; 9923 9924 int64_t comp_model_rd_cur = INT64_MAX; 9925 tmp_rate_mv = *rate_mv; 9926 int64_t best_rd_cur = INT64_MAX; 9927 const int comp_group_idx_ctx = get_comp_group_idx_context(xd); 9928 const int comp_index_ctx = get_comp_index_context(cm, xd); 9929 9930 if (cur_type == COMPOUND_DISTWTD && try_average_and_distwtd_comp) { 9931 int est_rate[2]; 9932 int64_t est_dist[2], est_rd[2]; 9933 9934 int masked_type_cost[2] = { 0, 0 }; 9935 mbmi->comp_group_idx = 0; 9936 9937 // First find the modeled rd cost for COMPOUND_AVERAGE 9938 mbmi->interinter_comp.type = COMPOUND_AVERAGE; 9939 mbmi->compound_idx = 1; 9940 if (masked_compound_used) { 9941 masked_type_cost[COMPOUND_AVERAGE] += 9942 x->comp_group_idx_cost[comp_group_idx_ctx][mbmi->comp_group_idx]; 9943 } 9944 masked_type_cost[COMPOUND_AVERAGE] += 9945 x->comp_idx_cost[comp_index_ctx][mbmi->compound_idx]; 9946 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 9947 AOM_PLANE_Y, AOM_PLANE_Y); 9948 *is_luma_interp_done = 1; 9949 model_rd_sb_fn[MODELRD_CURVFIT]( 9950 cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &est_rate[COMPOUND_AVERAGE], 9951 &est_dist[COMPOUND_AVERAGE], NULL, NULL, NULL, NULL, NULL); 9952 est_rate[COMPOUND_AVERAGE] += masked_type_cost[COMPOUND_AVERAGE]; 9953 est_rd[COMPOUND_AVERAGE] = 9954 RDCOST(x->rdmult, est_rate[COMPOUND_AVERAGE] + *rate_mv, 9955 est_dist[COMPOUND_AVERAGE]); 9956 restore_dst_buf(xd, *tmp_dst, 1); 9957 9958 // Next find the modeled rd cost for COMPOUND_DISTWTD 9959 mbmi->interinter_comp.type = COMPOUND_DISTWTD; 9960 mbmi->compound_idx = 0; 9961 if (masked_compound_used) { 9962 masked_type_cost[COMPOUND_DISTWTD] += 9963 x->comp_group_idx_cost[comp_group_idx_ctx][mbmi->comp_group_idx]; 9964 } 9965 masked_type_cost[COMPOUND_DISTWTD] += 9966 x->comp_idx_cost[comp_index_ctx][mbmi->compound_idx]; 9967 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 9968 AOM_PLANE_Y, AOM_PLANE_Y); 9969 model_rd_sb_fn[MODELRD_CURVFIT]( 9970 cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &est_rate[COMPOUND_DISTWTD], 9971 &est_dist[COMPOUND_DISTWTD], NULL, NULL, NULL, NULL, NULL); 9972 est_rate[COMPOUND_DISTWTD] += masked_type_cost[COMPOUND_DISTWTD]; 9973 est_rd[COMPOUND_DISTWTD] = 9974 RDCOST(x->rdmult, est_rate[COMPOUND_DISTWTD] + *rate_mv, 9975 est_dist[COMPOUND_DISTWTD]); 9976 9977 // Choose the better of the two based on modeled cost and call 9978 // estimate_yrd_for_sb() for that one. 9979 if (est_rd[COMPOUND_AVERAGE] <= est_rd[COMPOUND_DISTWTD]) { 9980 mbmi->interinter_comp.type = COMPOUND_AVERAGE; 9981 mbmi->compound_idx = 1; 9982 restore_dst_buf(xd, *orig_dst, 1); 9983 RD_STATS est_rd_stats; 9984 const int64_t est_rd_ = 9985 estimate_yrd_for_sb(cpi, bsize, x, ref_best_rd, &est_rd_stats); 9986 rs2 = masked_type_cost[COMPOUND_AVERAGE]; 9987 if (est_rd_ != INT64_MAX) { 9988 best_rd_cur = RDCOST(x->rdmult, rs2 + *rate_mv + est_rd_stats.rate, 9989 est_rd_stats.dist); 9990 restore_dst_buf(xd, *tmp_dst, 1); 9991 comp_rate[COMPOUND_AVERAGE] = est_rd_stats.rate; 9992 comp_dist[COMPOUND_AVERAGE] = est_rd_stats.dist; 9993 comp_model_rd[COMPOUND_AVERAGE] = est_rd[COMPOUND_AVERAGE]; 9994 comp_model_rd_cur = est_rd[COMPOUND_AVERAGE]; 9995 } 9996 restore_dst_buf(xd, *tmp_dst, 1); 9997 } else { 9998 RD_STATS est_rd_stats; 9999 const int64_t est_rd_ = 10000 estimate_yrd_for_sb(cpi, bsize, x, ref_best_rd, &est_rd_stats); 10001 rs2 = masked_type_cost[COMPOUND_DISTWTD]; 10002 if (est_rd_ != INT64_MAX) { 10003 best_rd_cur = RDCOST(x->rdmult, rs2 + *rate_mv + est_rd_stats.rate, 10004 est_rd_stats.dist); 10005 comp_rate[COMPOUND_DISTWTD] = est_rd_stats.rate; 10006 comp_dist[COMPOUND_DISTWTD] = est_rd_stats.dist; 10007 comp_model_rd[COMPOUND_DISTWTD] = est_rd[COMPOUND_DISTWTD]; 10008 comp_model_rd_cur = est_rd[COMPOUND_DISTWTD]; 10009 } 10010 } 10011 } else { 10012 mbmi->interinter_comp.type = cur_type; 10013 int masked_type_cost = 0; 10014 if (cur_type == COMPOUND_AVERAGE || cur_type == COMPOUND_DISTWTD) { 10015 mbmi->comp_group_idx = 0; 10016 mbmi->compound_idx = (cur_type == COMPOUND_AVERAGE); 10017 if (masked_compound_used) { 10018 masked_type_cost += 10019 x->comp_group_idx_cost[comp_group_idx_ctx][mbmi->comp_group_idx]; 10020 } 10021 masked_type_cost += 10022 x->comp_idx_cost[comp_index_ctx][mbmi->compound_idx]; 10023 rs2 = masked_type_cost; 10024 const int64_t mode_rd = RDCOST(x->rdmult, rs2 + rd_stats->rate, 0); 10025 if (mode_rd < ref_best_rd) { 10026 // Reuse data if matching record is found 10027 if (comp_rate[cur_type] == INT_MAX) { 10028 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, 10029 bsize, AOM_PLANE_Y, AOM_PLANE_Y); 10030 if (cur_type == COMPOUND_AVERAGE) *is_luma_interp_done = 1; 10031 RD_STATS est_rd_stats; 10032 const int64_t est_rd = 10033 estimate_yrd_for_sb(cpi, bsize, x, ref_best_rd, &est_rd_stats); 10034 if (comp_rate[cur_type] != INT_MAX) { 10035 assert(comp_rate[cur_type] == est_rd_stats.rate); 10036 assert(comp_dist[cur_type] == est_rd_stats.dist); 10037 } 10038 if (est_rd != INT64_MAX) { 10039 best_rd_cur = 10040 RDCOST(x->rdmult, rs2 + *rate_mv + est_rd_stats.rate, 10041 est_rd_stats.dist); 10042 model_rd_sb_fn[MODELRD_TYPE_MASKED_COMPOUND]( 10043 cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &rate_sum, &dist_sum, 10044 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, NULL, NULL, NULL); 10045 comp_model_rd_cur = 10046 RDCOST(x->rdmult, rs2 + *rate_mv + rate_sum, dist_sum); 10047 10048 // Backup rate and distortion for future reuse 10049 comp_rate[cur_type] = est_rd_stats.rate; 10050 comp_dist[cur_type] = est_rd_stats.dist; 10051 comp_model_rd[cur_type] = comp_model_rd_cur; 10052 } 10053 } else { 10054 // Calculate RD cost based on stored stats 10055 assert(comp_dist[cur_type] != INT64_MAX); 10056 best_rd_cur = 10057 RDCOST(x->rdmult, rs2 + *rate_mv + comp_rate[cur_type], 10058 comp_dist[cur_type]); 10059 comp_model_rd_cur = comp_model_rd[cur_type]; 10060 } 10061 } 10062 // use spare buffer for following compound type try 10063 if (cur_type == COMPOUND_AVERAGE) restore_dst_buf(xd, *tmp_dst, 1); 10064 } else { 10065 mbmi->comp_group_idx = 1; 10066 mbmi->compound_idx = 1; 10067 masked_type_cost += 10068 x->comp_group_idx_cost[comp_group_idx_ctx][mbmi->comp_group_idx]; 10069 masked_type_cost += 10070 x->compound_type_cost[bsize][cur_type - COMPOUND_WEDGE]; 10071 rs2 = masked_type_cost; 10072 10073 if (((*rd / cpi->max_comp_type_rd_threshold_div) * 10074 cpi->max_comp_type_rd_threshold_mul) < ref_best_rd) { 10075 const COMPOUND_TYPE compound_type = mbmi->interinter_comp.type; 10076 10077 if (!((compound_type == COMPOUND_WEDGE && 10078 !enable_wedge_interinter_search(x, cpi)) || 10079 (compound_type == COMPOUND_DIFFWTD && 10080 !cpi->oxcf.enable_diff_wtd_comp))) 10081 best_rd_cur = build_and_cost_compound_type( 10082 cpi, x, cur_mv, bsize, this_mode, &rs2, *rate_mv, orig_dst, 10083 &tmp_rate_mv, preds0, preds1, buffers->residual1, 10084 buffers->diff10, strides, mi_row, mi_col, rd_stats->rate, 10085 ref_best_rd, &calc_pred_masked_compound, comp_rate, comp_dist, 10086 comp_model_rd, comp_best_model_rd, &comp_model_rd_cur); 10087 } 10088 } 10089 } 10090 if (best_rd_cur < *rd) { 10091 *rd = best_rd_cur; 10092 comp_best_model_rd = comp_model_rd_cur; 10093 best_compound_data = mbmi->interinter_comp; 10094 if (masked_compound_used && cur_type >= COMPOUND_WEDGE) { 10095 memcpy(buffers->tmp_best_mask_buf, xd->seg_mask, mask_len); 10096 } 10097 best_compmode_interinter_cost = rs2; 10098 if (have_newmv_in_inter_mode(this_mode)) { 10099 if (cur_type == COMPOUND_WEDGE) { 10100 best_tmp_rate_mv = tmp_rate_mv; 10101 best_mv[0].as_int = mbmi->mv[0].as_int; 10102 best_mv[1].as_int = mbmi->mv[1].as_int; 10103 } else { 10104 best_mv[0].as_int = cur_mv[0].as_int; 10105 best_mv[1].as_int = cur_mv[1].as_int; 10106 } 10107 } 10108 } 10109 // reset to original mvs for next iteration 10110 mbmi->mv[0].as_int = cur_mv[0].as_int; 10111 mbmi->mv[1].as_int = cur_mv[1].as_int; 10112 } 10113 if (mbmi->interinter_comp.type != best_compound_data.type) { 10114 mbmi->comp_group_idx = (best_compound_data.type < COMPOUND_WEDGE) ? 0 : 1; 10115 mbmi->compound_idx = !(best_compound_data.type == COMPOUND_DISTWTD); 10116 mbmi->interinter_comp = best_compound_data; 10117 memcpy(xd->seg_mask, buffers->tmp_best_mask_buf, mask_len); 10118 } 10119 if (have_newmv_in_inter_mode(this_mode)) { 10120 mbmi->mv[0].as_int = best_mv[0].as_int; 10121 mbmi->mv[1].as_int = best_mv[1].as_int; 10122 if (mbmi->interinter_comp.type == COMPOUND_WEDGE) { 10123 rd_stats->rate += best_tmp_rate_mv - *rate_mv; 10124 *rate_mv = best_tmp_rate_mv; 10125 } 10126 } 10127 restore_dst_buf(xd, *orig_dst, 1); 10128 if (!match_found) 10129 save_comp_rd_search_stat(x, mbmi, comp_rate, comp_dist, comp_model_rd, 10130 cur_mv); 10131 return best_compmode_interinter_cost; 10132 } 10133 10134 static INLINE int is_single_newmv_valid(HandleInterModeArgs *args, 10135 MB_MODE_INFO *mbmi, 10136 PREDICTION_MODE this_mode) { 10137 for (int ref_idx = 0; ref_idx < 2; ++ref_idx) { 10138 const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx, 1); 10139 const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx]; 10140 if (single_mode == NEWMV && 10141 args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) { 10142 return 0; 10143 } 10144 } 10145 return 1; 10146 } 10147 10148 static int get_drl_refmv_count(const MACROBLOCK *const x, 10149 const MV_REFERENCE_FRAME *ref_frame, 10150 PREDICTION_MODE mode) { 10151 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; 10152 const int8_t ref_frame_type = av1_ref_frame_type(ref_frame); 10153 const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0; 10154 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type]; 10155 const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV); 10156 const int has_drl = 10157 (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1); 10158 const int ref_set = 10159 has_drl ? AOMMIN(MAX_REF_MV_SERCH, ref_mv_count - has_nearmv) : 1; 10160 10161 return ref_set; 10162 } 10163 10164 typedef struct { 10165 int64_t rd; 10166 int drl_cost; 10167 int rate_mv; 10168 int_mv mv; 10169 } inter_mode_info; 10170 10171 static int64_t handle_inter_mode( 10172 AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x, 10173 BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y, 10174 RD_STATS *rd_stats_uv, int *disable_skip, int mi_row, int mi_col, 10175 HandleInterModeArgs *args, int64_t ref_best_rd, uint8_t *const tmp_buf, 10176 CompoundTypeRdBuffers *rd_buffers, int64_t *best_est_rd, 10177 const int do_tx_search, InterModesInfo *inter_modes_info) { 10178 const AV1_COMMON *cm = &cpi->common; 10179 const int num_planes = av1_num_planes(cm); 10180 MACROBLOCKD *xd = &x->e_mbd; 10181 MB_MODE_INFO *mbmi = xd->mi[0]; 10182 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; 10183 const int is_comp_pred = has_second_ref(mbmi); 10184 const PREDICTION_MODE this_mode = mbmi->mode; 10185 int i; 10186 int refs[2] = { mbmi->ref_frame[0], 10187 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; 10188 int rate_mv = 0; 10189 int64_t rd = INT64_MAX; 10190 10191 // do first prediction into the destination buffer. Do the next 10192 // prediction into a temporary buffer. Then keep track of which one 10193 // of these currently holds the best predictor, and use the other 10194 // one for future predictions. In the end, copy from tmp_buf to 10195 // dst if necessary. 10196 struct macroblockd_plane *p = xd->plane; 10197 const BUFFER_SET orig_dst = { 10198 { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf }, 10199 { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride }, 10200 }; 10201 const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE, 10202 tmp_buf + 2 * MAX_SB_SQUARE }, 10203 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } }; 10204 10205 int skip_txfm_sb = 0; 10206 int64_t skip_sse_sb = INT64_MAX; 10207 int16_t mode_ctx; 10208 const int masked_compound_used = is_any_masked_compound_used(bsize) && 10209 cm->seq_params.enable_masked_compound; 10210 int64_t ret_val = INT64_MAX; 10211 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); 10212 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv; 10213 int64_t best_rd = INT64_MAX; 10214 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; 10215 MB_MODE_INFO best_mbmi = *mbmi; 10216 int best_disable_skip; 10217 int best_xskip; 10218 int64_t newmv_ret_val = INT64_MAX; 10219 int_mv backup_mv[2] = { { 0 } }; 10220 int backup_rate_mv = 0; 10221 inter_mode_info mode_info[MAX_REF_MV_SERCH]; 10222 10223 int mode_search_mask[2]; 10224 const int do_two_loop_comp_search = 10225 is_comp_pred && cpi->sf.two_loop_comp_search; 10226 if (do_two_loop_comp_search) { 10227 // TODO(debargha): Change this to try alternate ways of splitting 10228 // modes while doing two pass compound_mode search. 10229 mode_search_mask[0] = (1 << COMPOUND_AVERAGE); 10230 } else { 10231 mode_search_mask[0] = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) | 10232 (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD); 10233 } 10234 mode_search_mask[1] = ((1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) | 10235 (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD)) - 10236 mode_search_mask[0]; 10237 10238 // TODO(jingning): This should be deprecated shortly. 10239 const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0; 10240 const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode); 10241 10242 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) { 10243 mode_info[ref_mv_idx].mv.as_int = INVALID_MV; 10244 mode_info[ref_mv_idx].rd = INT64_MAX; 10245 10246 if (cpi->sf.reduce_inter_modes && ref_mv_idx > 0) { 10247 if (mbmi->ref_frame[0] == LAST2_FRAME || 10248 mbmi->ref_frame[0] == LAST3_FRAME || 10249 mbmi->ref_frame[1] == LAST2_FRAME || 10250 mbmi->ref_frame[1] == LAST3_FRAME) { 10251 if (mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx + has_nearmv] 10252 .weight < REF_CAT_LEVEL) { 10253 continue; 10254 } 10255 } 10256 } 10257 if (cpi->sf.prune_single_motion_modes_by_simple_trans && !is_comp_pred && 10258 args->single_ref_first_pass == 0) { 10259 if (args->simple_rd_state[ref_mv_idx].early_skipped) { 10260 continue; 10261 } 10262 } 10263 av1_init_rd_stats(rd_stats); 10264 10265 mbmi->interinter_comp.type = COMPOUND_AVERAGE; 10266 mbmi->comp_group_idx = 0; 10267 mbmi->compound_idx = 1; 10268 if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME; 10269 10270 mode_ctx = 10271 av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame); 10272 10273 mbmi->num_proj_ref = 0; 10274 mbmi->motion_mode = SIMPLE_TRANSLATION; 10275 mbmi->ref_mv_idx = ref_mv_idx; 10276 10277 if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, this_mode))) { 10278 continue; 10279 } 10280 10281 rd_stats->rate += args->ref_frame_cost + args->single_comp_cost; 10282 const int drl_cost = 10283 get_drl_cost(mbmi, mbmi_ext, x->drl_mode_cost0, ref_frame_type); 10284 rd_stats->rate += drl_cost; 10285 mode_info[ref_mv_idx].drl_cost = drl_cost; 10286 10287 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd && 10288 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) { 10289 continue; 10290 } 10291 10292 const RD_STATS backup_rd_stats = *rd_stats; 10293 10294 for (int comp_loop_idx = 0; comp_loop_idx <= do_two_loop_comp_search; 10295 ++comp_loop_idx) { 10296 int rs = 0; 10297 int compmode_interinter_cost = 0; 10298 10299 if (is_comp_pred && comp_loop_idx == 1) *rd_stats = backup_rd_stats; 10300 10301 int_mv cur_mv[2]; 10302 if (!build_cur_mv(cur_mv, this_mode, cm, x)) { 10303 continue; 10304 } 10305 if (have_newmv_in_inter_mode(this_mode)) { 10306 if (comp_loop_idx == 1) { 10307 cur_mv[0] = backup_mv[0]; 10308 cur_mv[1] = backup_mv[1]; 10309 rate_mv = backup_rate_mv; 10310 } 10311 10312 #if CONFIG_COLLECT_COMPONENT_TIMING 10313 start_timing(cpi, handle_newmv_time); 10314 #endif 10315 if (cpi->sf.prune_single_motion_modes_by_simple_trans && 10316 args->single_ref_first_pass == 0 && !is_comp_pred) { 10317 const int ref0 = mbmi->ref_frame[0]; 10318 newmv_ret_val = args->single_newmv_valid[ref_mv_idx][ref0] ? 0 : 1; 10319 cur_mv[0] = args->single_newmv[ref_mv_idx][ref0]; 10320 rate_mv = args->single_newmv_rate[ref_mv_idx][ref0]; 10321 } else if (comp_loop_idx == 0) { 10322 newmv_ret_val = handle_newmv(cpi, x, bsize, cur_mv, mi_row, mi_col, 10323 &rate_mv, args); 10324 10325 // Store cur_mv and rate_mv so that they can be restored in the next 10326 // iteration of the loop 10327 backup_mv[0] = cur_mv[0]; 10328 backup_mv[1] = cur_mv[1]; 10329 backup_rate_mv = rate_mv; 10330 } 10331 #if CONFIG_COLLECT_COMPONENT_TIMING 10332 end_timing(cpi, handle_newmv_time); 10333 #endif 10334 10335 if (newmv_ret_val != 0) { 10336 continue; 10337 } else { 10338 rd_stats->rate += rate_mv; 10339 } 10340 10341 if (cpi->sf.skip_repeated_newmv) { 10342 if (!is_comp_pred && this_mode == NEWMV && ref_mv_idx > 0) { 10343 int skip = 0; 10344 int this_rate_mv = 0; 10345 for (i = 0; i < ref_mv_idx; ++i) { 10346 // Check if the motion search result same as previous results 10347 if (cur_mv[0].as_int == args->single_newmv[i][refs[0]].as_int) { 10348 // If the compared mode has no valid rd, it is unlikely this 10349 // mode will be the best mode 10350 if (mode_info[i].rd == INT64_MAX) { 10351 skip = 1; 10352 break; 10353 } 10354 // Compare the cost difference including drl cost and mv cost 10355 if (mode_info[i].mv.as_int != INVALID_MV) { 10356 const int compare_cost = 10357 mode_info[i].rate_mv + mode_info[i].drl_cost; 10358 const int_mv ref_mv = av1_get_ref_mv(x, 0); 10359 this_rate_mv = av1_mv_bit_cost( 10360 &mode_info[i].mv.as_mv, &ref_mv.as_mv, x->nmv_vec_cost, 10361 x->mv_cost_stack, MV_COST_WEIGHT); 10362 const int this_cost = this_rate_mv + drl_cost; 10363 10364 if (compare_cost < this_cost) { 10365 skip = 1; 10366 break; 10367 } else { 10368 // If the cost is less than current best result, make this 10369 // the best and update corresponding variables 10370 if (best_mbmi.ref_mv_idx == i) { 10371 assert(best_rd != INT64_MAX); 10372 best_mbmi.ref_mv_idx = ref_mv_idx; 10373 best_rd_stats.rate += this_cost - compare_cost; 10374 best_rd = RDCOST(x->rdmult, best_rd_stats.rate, 10375 best_rd_stats.dist); 10376 if (best_rd < ref_best_rd) ref_best_rd = best_rd; 10377 skip = 1; 10378 break; 10379 } 10380 } 10381 } 10382 } 10383 } 10384 if (skip) { 10385 args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = 10386 args->modelled_rd[this_mode][i][refs[0]]; 10387 args->simple_rd[this_mode][ref_mv_idx][refs[0]] = 10388 args->simple_rd[this_mode][i][refs[0]]; 10389 mode_info[ref_mv_idx].rd = mode_info[i].rd; 10390 mode_info[ref_mv_idx].rate_mv = this_rate_mv; 10391 mode_info[ref_mv_idx].mv.as_int = mode_info[i].mv.as_int; 10392 10393 restore_dst_buf(xd, orig_dst, num_planes); 10394 continue; 10395 } 10396 } 10397 } 10398 } 10399 for (i = 0; i < is_comp_pred + 1; ++i) { 10400 mbmi->mv[i].as_int = cur_mv[i].as_int; 10401 } 10402 const int ref_mv_cost = cost_mv_ref(x, this_mode, mode_ctx); 10403 #if USE_DISCOUNT_NEWMV_TEST 10404 // We don't include the cost of the second reference here, because there 10405 // are only three options: Last/Golden, ARF/Last or Golden/ARF, or in 10406 // other words if you present them in that order, the second one is always 10407 // known if the first is known. 10408 // 10409 // Under some circumstances we discount the cost of new mv mode to 10410 // encourage initiation of a motion field. 10411 if (discount_newmv_test(cpi, x, this_mode, mbmi->mv[0])) { 10412 // discount_newmv_test only applies discount on NEWMV mode. 10413 assert(this_mode == NEWMV); 10414 rd_stats->rate += AOMMIN(cost_mv_ref(x, this_mode, mode_ctx), 10415 cost_mv_ref(x, NEARESTMV, mode_ctx)); 10416 } else { 10417 rd_stats->rate += ref_mv_cost; 10418 } 10419 #else 10420 rd_stats->rate += ref_mv_cost; 10421 #endif 10422 10423 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd && 10424 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) { 10425 continue; 10426 } 10427 10428 #if CONFIG_COLLECT_COMPONENT_TIMING 10429 start_timing(cpi, compound_type_rd_time); 10430 #endif 10431 int skip_build_pred = 0; 10432 if (is_comp_pred) { 10433 if (mode_search_mask[comp_loop_idx] == (1 << COMPOUND_AVERAGE)) { 10434 // Only compound_average 10435 mbmi->interinter_comp.type = COMPOUND_AVERAGE; 10436 mbmi->num_proj_ref = 0; 10437 mbmi->motion_mode = SIMPLE_TRANSLATION; 10438 mbmi->comp_group_idx = 0; 10439 mbmi->compound_idx = 1; 10440 const int comp_index_ctx = get_comp_index_context(cm, xd); 10441 compmode_interinter_cost += 10442 x->comp_idx_cost[comp_index_ctx][mbmi->compound_idx]; 10443 } else if (mode_search_mask[comp_loop_idx] == (1 << COMPOUND_DISTWTD)) { 10444 // Only compound_distwtd 10445 if (!cm->seq_params.order_hint_info.enable_dist_wtd_comp || 10446 cpi->sf.use_dist_wtd_comp_flag == DIST_WTD_COMP_DISABLED || 10447 (do_two_loop_comp_search && mbmi->mode == GLOBAL_GLOBALMV)) 10448 continue; 10449 mbmi->interinter_comp.type = COMPOUND_DISTWTD; 10450 mbmi->num_proj_ref = 0; 10451 mbmi->motion_mode = SIMPLE_TRANSLATION; 10452 mbmi->comp_group_idx = 0; 10453 mbmi->compound_idx = 0; 10454 const int comp_index_ctx = get_comp_index_context(cm, xd); 10455 compmode_interinter_cost += 10456 x->comp_idx_cost[comp_index_ctx][mbmi->compound_idx]; 10457 } else { 10458 // Find matching interp filter or set to default interp filter 10459 const int need_search = 10460 av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd); 10461 int match_found = -1; 10462 const InterpFilter assign_filter = cm->interp_filter; 10463 int is_luma_interp_done = 0; 10464 if (cpi->sf.skip_repeat_interpolation_filter_search && need_search) { 10465 match_found = find_interp_filter_in_stats(x, mbmi); 10466 } 10467 if (!need_search || match_found == -1) { 10468 set_default_interp_filters(mbmi, assign_filter); 10469 } 10470 10471 int64_t best_rd_compound; 10472 compmode_interinter_cost = compound_type_rd( 10473 cpi, x, bsize, mi_col, mi_row, cur_mv, 10474 mode_search_mask[comp_loop_idx], masked_compound_used, &orig_dst, 10475 &tmp_dst, rd_buffers, &rate_mv, &best_rd_compound, rd_stats, 10476 ref_best_rd, &is_luma_interp_done); 10477 if (ref_best_rd < INT64_MAX && 10478 (best_rd_compound >> 4) * (11 + 2 * do_two_loop_comp_search) > 10479 ref_best_rd) { 10480 restore_dst_buf(xd, orig_dst, num_planes); 10481 continue; 10482 } 10483 // No need to call av1_enc_build_inter_predictor for luma if 10484 // COMPOUND_AVERAGE is selected because it is the first 10485 // candidate in compound_type_rd, and the following 10486 // compound types searching uses tmp_dst buffer 10487 10488 if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && 10489 is_luma_interp_done) { 10490 if (num_planes > 1) { 10491 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, 10492 bsize, AOM_PLANE_U, num_planes - 1); 10493 } 10494 skip_build_pred = 1; 10495 } 10496 } 10497 } 10498 #if CONFIG_COLLECT_COMPONENT_TIMING 10499 end_timing(cpi, compound_type_rd_time); 10500 #endif 10501 10502 #if CONFIG_COLLECT_COMPONENT_TIMING 10503 start_timing(cpi, interpolation_filter_search_time); 10504 #endif 10505 ret_val = interpolation_filter_search( 10506 x, cpi, tile_data, bsize, mi_row, mi_col, &tmp_dst, &orig_dst, 10507 args->single_filter, &rd, &rs, &skip_txfm_sb, &skip_sse_sb, 10508 &skip_build_pred, args, ref_best_rd); 10509 #if CONFIG_COLLECT_COMPONENT_TIMING 10510 end_timing(cpi, interpolation_filter_search_time); 10511 #endif 10512 if (args->modelled_rd != NULL && !is_comp_pred) { 10513 args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd; 10514 } 10515 if (ret_val != 0) { 10516 restore_dst_buf(xd, orig_dst, num_planes); 10517 continue; 10518 } else if (cpi->sf.model_based_post_interp_filter_breakout && 10519 ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) { 10520 restore_dst_buf(xd, orig_dst, num_planes); 10521 break; 10522 } 10523 10524 if (!is_comp_pred) 10525 args->single_filter[this_mode][refs[0]] = 10526 av1_extract_interp_filter(mbmi->interp_filters, 0); 10527 10528 if (args->modelled_rd != NULL) { 10529 if (is_comp_pred) { 10530 const int mode0 = compound_ref0_mode(this_mode); 10531 const int mode1 = compound_ref1_mode(this_mode); 10532 const int64_t mrd = 10533 AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]], 10534 args->modelled_rd[mode1][ref_mv_idx][refs[1]]); 10535 if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) { 10536 restore_dst_buf(xd, orig_dst, num_planes); 10537 continue; 10538 } 10539 } 10540 } 10541 rd_stats->rate += compmode_interinter_cost; 10542 if (skip_build_pred != 1) { 10543 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 10544 0, av1_num_planes(cm) - 1); 10545 } 10546 10547 if (cpi->sf.second_loop_comp_fast_tx_search && comp_loop_idx == 1) { 10548 // TODO(chengchen): this speed feature introduces big loss. 10549 // Need better estimation of rate distortion. 10550 int dummy_rate; 10551 int64_t dummy_dist; 10552 int plane_rate[MAX_MB_PLANE] = { 0 }; 10553 int64_t plane_sse[MAX_MB_PLANE] = { 0 }; 10554 int64_t plane_dist[MAX_MB_PLANE] = { 0 }; 10555 10556 model_rd_sb_fn[MODELRD_TYPE_DIST_WTD_COMPOUND]( 10557 cpi, bsize, x, xd, 0, num_planes - 1, mi_row, mi_col, &dummy_rate, 10558 &dummy_dist, &skip_txfm_sb, &skip_sse_sb, plane_rate, plane_sse, 10559 plane_dist); 10560 10561 rd_stats->rate += rs; 10562 rd_stats->rate += plane_rate[0] + plane_rate[1] + plane_rate[2]; 10563 rd_stats_y->rate = plane_rate[0]; 10564 rd_stats_uv->rate = plane_rate[1] + plane_rate[2]; 10565 rd_stats->sse = plane_sse[0] + plane_sse[1] + plane_sse[2]; 10566 rd_stats_y->sse = plane_sse[0]; 10567 rd_stats_uv->sse = plane_sse[1] + plane_sse[2]; 10568 rd_stats->dist = plane_dist[0] + plane_dist[1] + plane_dist[2]; 10569 rd_stats_y->dist = plane_dist[0]; 10570 rd_stats_uv->dist = plane_dist[1] + plane_dist[2]; 10571 } else { 10572 #if CONFIG_COLLECT_COMPONENT_TIMING 10573 start_timing(cpi, motion_mode_rd_time); 10574 #endif 10575 ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y, 10576 rd_stats_uv, disable_skip, mi_row, mi_col, 10577 args, ref_best_rd, refs, &rate_mv, &orig_dst, 10578 best_est_rd, do_tx_search, inter_modes_info); 10579 #if CONFIG_COLLECT_COMPONENT_TIMING 10580 end_timing(cpi, motion_mode_rd_time); 10581 #endif 10582 } 10583 mode_info[ref_mv_idx].mv.as_int = mbmi->mv[0].as_int; 10584 mode_info[ref_mv_idx].rate_mv = rate_mv; 10585 if (ret_val != INT64_MAX) { 10586 int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); 10587 mode_info[ref_mv_idx].rd = tmp_rd; 10588 if (tmp_rd < best_rd) { 10589 best_rd_stats = *rd_stats; 10590 best_rd_stats_y = *rd_stats_y; 10591 best_rd_stats_uv = *rd_stats_uv; 10592 best_rd = tmp_rd; 10593 best_mbmi = *mbmi; 10594 best_disable_skip = *disable_skip; 10595 best_xskip = x->skip; 10596 memcpy(best_blk_skip, x->blk_skip, 10597 sizeof(best_blk_skip[0]) * xd->n4_h * xd->n4_w); 10598 } 10599 10600 if (tmp_rd < ref_best_rd) { 10601 ref_best_rd = tmp_rd; 10602 } 10603 } 10604 restore_dst_buf(xd, orig_dst, num_planes); 10605 } 10606 } 10607 10608 if (best_rd == INT64_MAX) return INT64_MAX; 10609 10610 // re-instate status of the best choice 10611 *rd_stats = best_rd_stats; 10612 *rd_stats_y = best_rd_stats_y; 10613 *rd_stats_uv = best_rd_stats_uv; 10614 *mbmi = best_mbmi; 10615 *disable_skip = best_disable_skip; 10616 x->skip = best_xskip; 10617 assert(IMPLIES(mbmi->comp_group_idx == 1, 10618 mbmi->interinter_comp.type != COMPOUND_AVERAGE)); 10619 memcpy(x->blk_skip, best_blk_skip, 10620 sizeof(best_blk_skip[0]) * xd->n4_h * xd->n4_w); 10621 10622 return RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); 10623 } 10624 10625 static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, 10626 RD_STATS *rd_stats, BLOCK_SIZE bsize, 10627 int64_t best_rd) { 10628 const AV1_COMMON *const cm = &cpi->common; 10629 if (!av1_allow_intrabc(cm) || !cpi->oxcf.enable_intrabc) return INT64_MAX; 10630 const int num_planes = av1_num_planes(cm); 10631 10632 MACROBLOCKD *const xd = &x->e_mbd; 10633 const TileInfo *tile = &xd->tile; 10634 MB_MODE_INFO *mbmi = xd->mi[0]; 10635 const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE); 10636 const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE); 10637 const int w = block_size_wide[bsize]; 10638 const int h = block_size_high[bsize]; 10639 const int sb_row = mi_row >> cm->seq_params.mib_size_log2; 10640 const int sb_col = mi_col >> cm->seq_params.mib_size_log2; 10641 10642 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; 10643 MV_REFERENCE_FRAME ref_frame = INTRA_FRAME; 10644 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count, 10645 mbmi_ext->ref_mv_stack, NULL, mbmi_ext->global_mvs, mi_row, 10646 mi_col, mbmi_ext->mode_context); 10647 10648 int_mv nearestmv, nearmv; 10649 av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv, 10650 0); 10651 10652 if (nearestmv.as_int == INVALID_MV) { 10653 nearestmv.as_int = 0; 10654 } 10655 if (nearmv.as_int == INVALID_MV) { 10656 nearmv.as_int = 0; 10657 } 10658 10659 int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv; 10660 if (dv_ref.as_int == 0) 10661 av1_find_ref_dv(&dv_ref, tile, cm->seq_params.mib_size, mi_row, mi_col); 10662 // Ref DV should not have sub-pel. 10663 assert((dv_ref.as_mv.col & 7) == 0); 10664 assert((dv_ref.as_mv.row & 7) == 0); 10665 mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref; 10666 10667 struct buf_2d yv12_mb[MAX_MB_PLANE]; 10668 av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, mi_row, mi_col, NULL, NULL, 10669 num_planes); 10670 for (int i = 0; i < num_planes; ++i) { 10671 xd->plane[i].pre[0] = yv12_mb[i]; 10672 } 10673 10674 enum IntrabcMotionDirection { 10675 IBC_MOTION_ABOVE, 10676 IBC_MOTION_LEFT, 10677 IBC_MOTION_DIRECTIONS 10678 }; 10679 10680 MB_MODE_INFO best_mbmi = *mbmi; 10681 RD_STATS best_rdstats = *rd_stats; 10682 int best_skip = x->skip; 10683 10684 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 }; 10685 for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE; 10686 dir < IBC_MOTION_DIRECTIONS; ++dir) { 10687 const MvLimits tmp_mv_limits = x->mv_limits; 10688 switch (dir) { 10689 case IBC_MOTION_ABOVE: 10690 x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE; 10691 x->mv_limits.col_max = (tile->mi_col_end - mi_col) * MI_SIZE - w; 10692 x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE; 10693 x->mv_limits.row_max = 10694 (sb_row * cm->seq_params.mib_size - mi_row) * MI_SIZE - h; 10695 break; 10696 case IBC_MOTION_LEFT: 10697 x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE; 10698 x->mv_limits.col_max = 10699 (sb_col * cm->seq_params.mib_size - mi_col) * MI_SIZE - w; 10700 // TODO(aconverse (at) google.com): Minimize the overlap between above and 10701 // left areas. 10702 x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE; 10703 int bottom_coded_mi_edge = 10704 AOMMIN((sb_row + 1) * cm->seq_params.mib_size, tile->mi_row_end); 10705 x->mv_limits.row_max = (bottom_coded_mi_edge - mi_row) * MI_SIZE - h; 10706 break; 10707 default: assert(0); 10708 } 10709 assert(x->mv_limits.col_min >= tmp_mv_limits.col_min); 10710 assert(x->mv_limits.col_max <= tmp_mv_limits.col_max); 10711 assert(x->mv_limits.row_min >= tmp_mv_limits.row_min); 10712 assert(x->mv_limits.row_max <= tmp_mv_limits.row_max); 10713 av1_set_mv_search_range(&x->mv_limits, &dv_ref.as_mv); 10714 10715 if (x->mv_limits.col_max < x->mv_limits.col_min || 10716 x->mv_limits.row_max < x->mv_limits.row_min) { 10717 x->mv_limits = tmp_mv_limits; 10718 continue; 10719 } 10720 10721 int step_param = cpi->mv_step_param; 10722 MV mvp_full = dv_ref.as_mv; 10723 mvp_full.col >>= 3; 10724 mvp_full.row >>= 3; 10725 const int sadpb = x->sadperbit16; 10726 int cost_list[5]; 10727 const int bestsme = av1_full_pixel_search( 10728 cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, 0, 10729 sadpb, cond_cost_list(cpi, cost_list), &dv_ref.as_mv, INT_MAX, 1, 10730 (MI_SIZE * mi_col), (MI_SIZE * mi_row), 1, 10731 &cpi->ss_cfg[SS_CFG_LOOKAHEAD]); 10732 10733 x->mv_limits = tmp_mv_limits; 10734 if (bestsme == INT_MAX) continue; 10735 mvp_full = x->best_mv.as_mv; 10736 const MV dv = { .row = mvp_full.row * 8, .col = mvp_full.col * 8 }; 10737 if (mv_check_bounds(&x->mv_limits, &dv)) continue; 10738 if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize, 10739 cm->seq_params.mib_size_log2)) 10740 continue; 10741 10742 // DV should not have sub-pel. 10743 assert((dv.col & 7) == 0); 10744 assert((dv.row & 7) == 0); 10745 memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info)); 10746 mbmi->filter_intra_mode_info.use_filter_intra = 0; 10747 mbmi->use_intrabc = 1; 10748 mbmi->mode = DC_PRED; 10749 mbmi->uv_mode = UV_DC_PRED; 10750 mbmi->motion_mode = SIMPLE_TRANSLATION; 10751 mbmi->mv[0].as_mv = dv; 10752 mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR); 10753 mbmi->skip = 0; 10754 x->skip = 0; 10755 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 10756 av1_num_planes(cm) - 1); 10757 10758 int *dvcost[2] = { (int *)&cpi->dv_cost[0][MV_MAX], 10759 (int *)&cpi->dv_cost[1][MV_MAX] }; 10760 // TODO(aconverse (at) google.com): The full motion field defining discount 10761 // in MV_COST_WEIGHT is too large. Explore other values. 10762 const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, cpi->dv_joint_cost, 10763 dvcost, MV_COST_WEIGHT_SUB); 10764 const int rate_mode = x->intrabc_cost[1]; 10765 RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv; 10766 if (!txfm_search(cpi, NULL, x, bsize, mi_row, mi_col, &rd_stats_yuv, 10767 &rd_stats_y, &rd_stats_uv, rate_mode + rate_mv, INT64_MAX)) 10768 continue; 10769 rd_stats_yuv.rdcost = 10770 RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist); 10771 if (rd_stats_yuv.rdcost < best_rd) { 10772 best_rd = rd_stats_yuv.rdcost; 10773 best_mbmi = *mbmi; 10774 best_skip = mbmi->skip; 10775 best_rdstats = rd_stats_yuv; 10776 memcpy(best_blk_skip, x->blk_skip, 10777 sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w); 10778 } 10779 } 10780 *mbmi = best_mbmi; 10781 *rd_stats = best_rdstats; 10782 x->skip = best_skip; 10783 memcpy(x->blk_skip, best_blk_skip, 10784 sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w); 10785 #if CONFIG_RD_DEBUG 10786 mbmi->rd_stats = *rd_stats; 10787 #endif 10788 return best_rd; 10789 } 10790 10791 void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, int mi_row, 10792 int mi_col, RD_STATS *rd_cost, BLOCK_SIZE bsize, 10793 PICK_MODE_CONTEXT *ctx, int64_t best_rd) { 10794 const AV1_COMMON *const cm = &cpi->common; 10795 MACROBLOCKD *const xd = &x->e_mbd; 10796 MB_MODE_INFO *const mbmi = xd->mi[0]; 10797 const int num_planes = av1_num_planes(cm); 10798 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0; 10799 int y_skip = 0, uv_skip = 0; 10800 int64_t dist_y = 0, dist_uv = 0; 10801 TX_SIZE max_uv_tx_size; 10802 10803 ctx->skip = 0; 10804 mbmi->ref_frame[0] = INTRA_FRAME; 10805 mbmi->ref_frame[1] = NONE_FRAME; 10806 mbmi->use_intrabc = 0; 10807 mbmi->mv[0].as_int = 0; 10808 10809 const int64_t intra_yrd = 10810 rd_pick_intra_sby_mode(cpi, x, mi_row, mi_col, &rate_y, &rate_y_tokenonly, 10811 &dist_y, &y_skip, bsize, best_rd, ctx); 10812 10813 if (intra_yrd < best_rd) { 10814 // Only store reconstructed luma when there's chroma RDO. When there's no 10815 // chroma RDO, the reconstructed luma will be stored in encode_superblock(). 10816 xd->cfl.is_chroma_reference = 10817 is_chroma_reference(mi_row, mi_col, bsize, cm->seq_params.subsampling_x, 10818 cm->seq_params.subsampling_y); 10819 xd->cfl.store_y = store_cfl_required_rdo(cm, x); 10820 if (xd->cfl.store_y) { 10821 // Restore reconstructed luma values. 10822 memcpy(x->blk_skip, ctx->blk_skip, 10823 sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); 10824 av1_encode_intra_block_plane(cpi, x, bsize, AOM_PLANE_Y, 10825 cpi->optimize_seg_arr[mbmi->segment_id], 10826 mi_row, mi_col); 10827 xd->cfl.store_y = 0; 10828 } 10829 if (num_planes > 1) { 10830 max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd); 10831 init_sbuv_mode(mbmi); 10832 if (!x->skip_chroma_rd) 10833 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv, 10834 &uv_skip, bsize, max_uv_tx_size); 10835 } 10836 10837 if (y_skip && (uv_skip || x->skip_chroma_rd)) { 10838 rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + 10839 x->skip_cost[av1_get_skip_context(xd)][1]; 10840 rd_cost->dist = dist_y + dist_uv; 10841 } else { 10842 rd_cost->rate = 10843 rate_y + rate_uv + x->skip_cost[av1_get_skip_context(xd)][0]; 10844 rd_cost->dist = dist_y + dist_uv; 10845 } 10846 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist); 10847 } else { 10848 rd_cost->rate = INT_MAX; 10849 } 10850 10851 if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd) 10852 best_rd = rd_cost->rdcost; 10853 if (rd_pick_intrabc_mode_sb(cpi, x, rd_cost, bsize, best_rd) < best_rd) { 10854 ctx->skip = x->skip; 10855 memcpy(ctx->blk_skip, x->blk_skip, 10856 sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); 10857 assert(rd_cost->rate != INT_MAX); 10858 } 10859 if (rd_cost->rate == INT_MAX) return; 10860 10861 ctx->mic = *xd->mi[0]; 10862 ctx->mbmi_ext = *x->mbmi_ext; 10863 } 10864 10865 static void restore_uv_color_map(const AV1_COMP *const cpi, MACROBLOCK *x) { 10866 MACROBLOCKD *const xd = &x->e_mbd; 10867 MB_MODE_INFO *const mbmi = xd->mi[0]; 10868 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; 10869 const BLOCK_SIZE bsize = mbmi->sb_type; 10870 int src_stride = x->plane[1].src.stride; 10871 const uint8_t *const src_u = x->plane[1].src.buf; 10872 const uint8_t *const src_v = x->plane[2].src.buf; 10873 int *const data = x->palette_buffer->kmeans_data_buf; 10874 int centroids[2 * PALETTE_MAX_SIZE]; 10875 uint8_t *const color_map = xd->plane[1].color_index_map; 10876 int r, c; 10877 const uint16_t *const src_u16 = CONVERT_TO_SHORTPTR(src_u); 10878 const uint16_t *const src_v16 = CONVERT_TO_SHORTPTR(src_v); 10879 int plane_block_width, plane_block_height, rows, cols; 10880 av1_get_block_dimensions(bsize, 1, xd, &plane_block_width, 10881 &plane_block_height, &rows, &cols); 10882 10883 for (r = 0; r < rows; ++r) { 10884 for (c = 0; c < cols; ++c) { 10885 if (cpi->common.seq_params.use_highbitdepth) { 10886 data[(r * cols + c) * 2] = src_u16[r * src_stride + c]; 10887 data[(r * cols + c) * 2 + 1] = src_v16[r * src_stride + c]; 10888 } else { 10889 data[(r * cols + c) * 2] = src_u[r * src_stride + c]; 10890 data[(r * cols + c) * 2 + 1] = src_v[r * src_stride + c]; 10891 } 10892 } 10893 } 10894 10895 for (r = 1; r < 3; ++r) { 10896 for (c = 0; c < pmi->palette_size[1]; ++c) { 10897 centroids[c * 2 + r - 1] = pmi->palette_colors[r * PALETTE_MAX_SIZE + c]; 10898 } 10899 } 10900 10901 av1_calc_indices(data, centroids, color_map, rows * cols, 10902 pmi->palette_size[1], 2); 10903 extend_palette_color_map(color_map, cols, rows, plane_block_width, 10904 plane_block_height); 10905 } 10906 10907 static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x, 10908 const MACROBLOCKD *xd, int mi_row, 10909 int mi_col, const uint8_t *above, 10910 int above_stride, const uint8_t *left, 10911 int left_stride); 10912 10913 static void rd_pick_skip_mode(RD_STATS *rd_cost, 10914 InterModeSearchState *search_state, 10915 const AV1_COMP *const cpi, MACROBLOCK *const x, 10916 BLOCK_SIZE bsize, int mi_row, int mi_col, 10917 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) { 10918 const AV1_COMMON *const cm = &cpi->common; 10919 const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info; 10920 const int num_planes = av1_num_planes(cm); 10921 MACROBLOCKD *const xd = &x->e_mbd; 10922 MB_MODE_INFO *const mbmi = xd->mi[0]; 10923 10924 x->compound_idx = 1; // COMPOUND_AVERAGE 10925 RD_STATS skip_mode_rd_stats; 10926 av1_invalid_rd_stats(&skip_mode_rd_stats); 10927 10928 if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX || 10929 skip_mode_info->ref_frame_idx_1 == INVALID_IDX) { 10930 return; 10931 } 10932 10933 const MV_REFERENCE_FRAME ref_frame = 10934 LAST_FRAME + skip_mode_info->ref_frame_idx_0; 10935 const MV_REFERENCE_FRAME second_ref_frame = 10936 LAST_FRAME + skip_mode_info->ref_frame_idx_1; 10937 const PREDICTION_MODE this_mode = NEAREST_NEARESTMV; 10938 const int mode_index = 10939 get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame); 10940 10941 if (mode_index == -1) { 10942 return; 10943 } 10944 10945 if (!cpi->oxcf.enable_onesided_comp && cpi->all_one_sided_refs) { 10946 return; 10947 } 10948 10949 mbmi->mode = this_mode; 10950 mbmi->uv_mode = UV_DC_PRED; 10951 mbmi->ref_frame[0] = ref_frame; 10952 mbmi->ref_frame[1] = second_ref_frame; 10953 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); 10954 if (x->mbmi_ext->ref_mv_count[ref_frame_type] == UINT8_MAX) { 10955 if (x->mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX || 10956 x->mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) { 10957 return; 10958 } 10959 MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext; 10960 av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count, 10961 mbmi_ext->ref_mv_stack, NULL, mbmi_ext->global_mvs, mi_row, 10962 mi_col, mbmi_ext->mode_context); 10963 } 10964 10965 assert(this_mode == NEAREST_NEARESTMV); 10966 if (!build_cur_mv(mbmi->mv, this_mode, cm, x)) { 10967 return; 10968 } 10969 10970 mbmi->filter_intra_mode_info.use_filter_intra = 0; 10971 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1); 10972 mbmi->comp_group_idx = 0; 10973 mbmi->compound_idx = x->compound_idx; 10974 mbmi->interinter_comp.type = COMPOUND_AVERAGE; 10975 mbmi->motion_mode = SIMPLE_TRANSLATION; 10976 mbmi->ref_mv_idx = 0; 10977 mbmi->skip_mode = mbmi->skip = 1; 10978 10979 set_default_interp_filters(mbmi, cm->interp_filter); 10980 10981 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); 10982 for (int i = 0; i < num_planes; i++) { 10983 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i]; 10984 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i]; 10985 } 10986 10987 BUFFER_SET orig_dst; 10988 for (int i = 0; i < num_planes; i++) { 10989 orig_dst.plane[i] = xd->plane[i].dst.buf; 10990 orig_dst.stride[i] = xd->plane[i].dst.stride; 10991 } 10992 10993 // Obtain the rdcost for skip_mode. 10994 skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, mi_row, mi_col, &orig_dst); 10995 10996 // Compare the use of skip_mode with the best intra/inter mode obtained. 10997 const int skip_mode_ctx = av1_get_skip_mode_context(xd); 10998 const int64_t best_intra_inter_mode_cost = 10999 (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) 11000 ? RDCOST(x->rdmult, 11001 rd_cost->rate + x->skip_mode_cost[skip_mode_ctx][0], 11002 rd_cost->dist) 11003 : INT64_MAX; 11004 11005 if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost && 11006 (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) { 11007 assert(mode_index != -1); 11008 search_state->best_mbmode.skip_mode = 1; 11009 search_state->best_mbmode = *mbmi; 11010 11011 search_state->best_mbmode.skip_mode = search_state->best_mbmode.skip = 1; 11012 search_state->best_mbmode.mode = NEAREST_NEARESTMV; 11013 search_state->best_mbmode.ref_frame[0] = mbmi->ref_frame[0]; 11014 search_state->best_mbmode.ref_frame[1] = mbmi->ref_frame[1]; 11015 search_state->best_mbmode.mv[0].as_int = mbmi->mv[0].as_int; 11016 search_state->best_mbmode.mv[1].as_int = mbmi->mv[1].as_int; 11017 search_state->best_mbmode.ref_mv_idx = 0; 11018 11019 // Set up tx_size related variables for skip-specific loop filtering. 11020 search_state->best_mbmode.tx_size = 11021 block_signals_txsize(bsize) ? tx_size_from_tx_mode(bsize, cm->tx_mode) 11022 : max_txsize_rect_lookup[bsize]; 11023 memset(search_state->best_mbmode.inter_tx_size, 11024 search_state->best_mbmode.tx_size, 11025 sizeof(search_state->best_mbmode.inter_tx_size)); 11026 set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->n4_w, xd->n4_h, 11027 search_state->best_mbmode.skip && is_inter_block(mbmi), xd); 11028 11029 // Set up color-related variables for skip mode. 11030 search_state->best_mbmode.uv_mode = UV_DC_PRED; 11031 search_state->best_mbmode.palette_mode_info.palette_size[0] = 0; 11032 search_state->best_mbmode.palette_mode_info.palette_size[1] = 0; 11033 11034 search_state->best_mbmode.comp_group_idx = 0; 11035 search_state->best_mbmode.compound_idx = x->compound_idx; 11036 search_state->best_mbmode.interinter_comp.type = COMPOUND_AVERAGE; 11037 search_state->best_mbmode.motion_mode = SIMPLE_TRANSLATION; 11038 11039 search_state->best_mbmode.interintra_mode = 11040 (INTERINTRA_MODE)(II_DC_PRED - 1); 11041 search_state->best_mbmode.filter_intra_mode_info.use_filter_intra = 0; 11042 11043 set_default_interp_filters(&search_state->best_mbmode, cm->interp_filter); 11044 11045 search_state->best_mode_index = mode_index; 11046 11047 // Update rd_cost 11048 rd_cost->rate = skip_mode_rd_stats.rate; 11049 rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist; 11050 rd_cost->rdcost = skip_mode_rd_stats.rdcost; 11051 11052 search_state->best_rd = rd_cost->rdcost; 11053 search_state->best_skip2 = 1; 11054 search_state->best_mode_skippable = 1; 11055 11056 x->skip = 1; 11057 } 11058 } 11059 11060 // speed feature: fast intra/inter transform type search 11061 // Used for speed >= 2 11062 // When this speed feature is on, in rd mode search, only DCT is used. 11063 // After the mode is determined, this function is called, to select 11064 // transform types and get accurate rdcost. 11065 static void sf_refine_fast_tx_type_search( 11066 const AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, 11067 RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, 11068 int best_mode_index, MB_MODE_INFO *best_mbmode, 11069 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int best_rate_y, 11070 int best_rate_uv, int *best_skip2) { 11071 const AV1_COMMON *const cm = &cpi->common; 11072 const SPEED_FEATURES *const sf = &cpi->sf; 11073 MACROBLOCKD *const xd = &x->e_mbd; 11074 MB_MODE_INFO *const mbmi = xd->mi[0]; 11075 const int num_planes = av1_num_planes(cm); 11076 11077 if (xd->lossless[mbmi->segment_id] == 0 && best_mode_index >= 0 && 11078 ((sf->tx_type_search.fast_inter_tx_type_search == 1 && 11079 is_inter_mode(best_mbmode->mode)) || 11080 (sf->tx_type_search.fast_intra_tx_type_search == 1 && 11081 !is_inter_mode(best_mbmode->mode)))) { 11082 int skip_blk = 0; 11083 RD_STATS rd_stats_y, rd_stats_uv; 11084 const int skip_ctx = av1_get_skip_context(xd); 11085 11086 x->use_default_inter_tx_type = 0; 11087 x->use_default_intra_tx_type = 0; 11088 11089 *mbmi = *best_mbmode; 11090 11091 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); 11092 11093 // Select prediction reference frames. 11094 for (int i = 0; i < num_planes; i++) { 11095 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i]; 11096 if (has_second_ref(mbmi)) 11097 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i]; 11098 } 11099 11100 if (is_inter_mode(mbmi->mode)) { 11101 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 11102 av1_num_planes(cm) - 1); 11103 if (mbmi->motion_mode == OBMC_CAUSAL) 11104 av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col); 11105 11106 av1_subtract_plane(x, bsize, 0); 11107 if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) { 11108 pick_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize, mi_row, mi_col, 11109 INT64_MAX); 11110 assert(rd_stats_y.rate != INT_MAX); 11111 } else { 11112 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX); 11113 memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size)); 11114 for (int i = 0; i < xd->n4_h * xd->n4_w; ++i) 11115 set_blk_skip(x, 0, i, rd_stats_y.skip); 11116 } 11117 } else { 11118 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX); 11119 } 11120 11121 if (num_planes > 1) { 11122 super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX); 11123 } else { 11124 av1_init_rd_stats(&rd_stats_uv); 11125 } 11126 11127 if (RDCOST(x->rdmult, 11128 x->skip_cost[skip_ctx][0] + rd_stats_y.rate + rd_stats_uv.rate, 11129 (rd_stats_y.dist + rd_stats_uv.dist)) > 11130 RDCOST(x->rdmult, x->skip_cost[skip_ctx][1], 11131 (rd_stats_y.sse + rd_stats_uv.sse))) { 11132 skip_blk = 1; 11133 rd_stats_y.rate = x->skip_cost[skip_ctx][1]; 11134 rd_stats_uv.rate = 0; 11135 rd_stats_y.dist = rd_stats_y.sse; 11136 rd_stats_uv.dist = rd_stats_uv.sse; 11137 } else { 11138 skip_blk = 0; 11139 rd_stats_y.rate += x->skip_cost[skip_ctx][0]; 11140 } 11141 11142 if (RDCOST(x->rdmult, best_rate_y + best_rate_uv, rd_cost->dist) > 11143 RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate, 11144 (rd_stats_y.dist + rd_stats_uv.dist))) { 11145 best_mbmode->tx_size = mbmi->tx_size; 11146 av1_copy(best_mbmode->inter_tx_size, mbmi->inter_tx_size); 11147 memcpy(ctx->blk_skip, x->blk_skip, 11148 sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); 11149 av1_copy(best_mbmode->txk_type, mbmi->txk_type); 11150 rd_cost->rate += 11151 (rd_stats_y.rate + rd_stats_uv.rate - best_rate_y - best_rate_uv); 11152 rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist; 11153 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist); 11154 *best_skip2 = skip_blk; 11155 } 11156 } 11157 } 11158 11159 typedef struct { 11160 // Mask for each reference frame, specifying which prediction modes to NOT try 11161 // during search. 11162 uint32_t pred_modes[REF_FRAMES]; 11163 // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of 11164 // reference frames (i, j). 11165 // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1 11166 // (NONE_FRAME). 11167 bool ref_combo[REF_FRAMES][REF_FRAMES + 1]; 11168 } mode_skip_mask_t; 11169 11170 // Update 'ref_combo' mask to disable given 'ref' in single and compound modes. 11171 static void disable_reference(MV_REFERENCE_FRAME ref, 11172 bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) { 11173 for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) { 11174 ref_combo[ref][ref2 + 1] = true; 11175 } 11176 } 11177 11178 // Update 'ref_combo' mask to disable all inter references except ALTREF. 11179 static void disable_inter_references_except_altref( 11180 bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) { 11181 disable_reference(LAST_FRAME, ref_combo); 11182 disable_reference(LAST2_FRAME, ref_combo); 11183 disable_reference(LAST3_FRAME, ref_combo); 11184 disable_reference(GOLDEN_FRAME, ref_combo); 11185 disable_reference(BWDREF_FRAME, ref_combo); 11186 disable_reference(ALTREF2_FRAME, ref_combo); 11187 } 11188 11189 static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = { 11190 { LAST_FRAME, NONE_FRAME }, { ALTREF_FRAME, NONE_FRAME }, 11191 { LAST_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, NONE_FRAME }, 11192 { INTRA_FRAME, NONE_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME }, 11193 { LAST_FRAME, GOLDEN_FRAME }, { LAST_FRAME, INTRA_FRAME }, 11194 { LAST_FRAME, BWDREF_FRAME }, { LAST_FRAME, LAST3_FRAME }, 11195 { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME }, 11196 { BWDREF_FRAME, NONE_FRAME }, { BWDREF_FRAME, ALTREF_FRAME }, 11197 { ALTREF_FRAME, INTRA_FRAME }, { BWDREF_FRAME, INTRA_FRAME }, 11198 }; 11199 11200 static const MV_REFERENCE_FRAME real_time_ref_combos[][2] = { 11201 { LAST_FRAME, NONE_FRAME }, 11202 { ALTREF_FRAME, NONE_FRAME }, 11203 { GOLDEN_FRAME, NONE_FRAME }, 11204 { INTRA_FRAME, NONE_FRAME } 11205 }; 11206 11207 typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET; 11208 11209 static void default_skip_mask(mode_skip_mask_t *mask, REF_SET ref_set) { 11210 if (ref_set == REF_SET_FULL) { 11211 // Everything available by default. 11212 memset(mask, 0, sizeof(*mask)); 11213 } else { 11214 // All modes available by default. 11215 memset(mask->pred_modes, 0, sizeof(mask->pred_modes)); 11216 // All references disabled first. 11217 for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) { 11218 for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) { 11219 mask->ref_combo[ref1][ref2 + 1] = true; 11220 } 11221 } 11222 const MV_REFERENCE_FRAME(*ref_set_combos)[2]; 11223 int num_ref_combos; 11224 11225 // Then enable reduced set of references explicitly. 11226 switch (ref_set) { 11227 case REF_SET_REDUCED: 11228 ref_set_combos = reduced_ref_combos; 11229 num_ref_combos = 11230 (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]); 11231 break; 11232 case REF_SET_REALTIME: 11233 ref_set_combos = real_time_ref_combos; 11234 num_ref_combos = 11235 (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]); 11236 break; 11237 default: assert(0); num_ref_combos = 0; 11238 } 11239 11240 for (int i = 0; i < num_ref_combos; ++i) { 11241 const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i]; 11242 mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false; 11243 } 11244 } 11245 } 11246 11247 static void init_mode_skip_mask(mode_skip_mask_t *mask, const AV1_COMP *cpi, 11248 MACROBLOCK *x, BLOCK_SIZE bsize) { 11249 const AV1_COMMON *const cm = &cpi->common; 11250 const struct segmentation *const seg = &cm->seg; 11251 MACROBLOCKD *const xd = &x->e_mbd; 11252 MB_MODE_INFO *const mbmi = xd->mi[0]; 11253 unsigned char segment_id = mbmi->segment_id; 11254 const SPEED_FEATURES *const sf = &cpi->sf; 11255 REF_SET ref_set = REF_SET_FULL; 11256 11257 if (sf->use_real_time_ref_set) 11258 ref_set = REF_SET_REALTIME; 11259 else if (cpi->oxcf.enable_reduced_reference_set) 11260 ref_set = REF_SET_REDUCED; 11261 11262 default_skip_mask(mask, ref_set); 11263 11264 int min_pred_mv_sad = INT_MAX; 11265 MV_REFERENCE_FRAME ref_frame; 11266 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) 11267 min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]); 11268 11269 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { 11270 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) { 11271 // Skip checking missing reference in both single and compound reference 11272 // modes. 11273 disable_reference(ref_frame, mask->ref_combo); 11274 } else { 11275 // Skip fixed mv modes for poor references 11276 if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) { 11277 mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO; 11278 } 11279 } 11280 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && 11281 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { 11282 // Reference not used for the segment. 11283 disable_reference(ref_frame, mask->ref_combo); 11284 } 11285 } 11286 // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature 11287 // is disabled for this segment. This is to prevent the possibility that we 11288 // end up unable to pick any mode. 11289 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) { 11290 // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame, 11291 // unless ARNR filtering is enabled in which case we want 11292 // an unfiltered alternative. We allow near/nearest as well 11293 // because they may result in zero-zero MVs but be cheaper. 11294 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { 11295 disable_inter_references_except_altref(mask->ref_combo); 11296 11297 mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO; 11298 const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME }; 11299 int_mv near_mv, nearest_mv, global_mv; 11300 get_this_mv(&nearest_mv, NEARESTMV, 0, 0, tmp_ref_frames, x->mbmi_ext); 11301 get_this_mv(&near_mv, NEARMV, 0, 0, tmp_ref_frames, x->mbmi_ext); 11302 get_this_mv(&global_mv, GLOBALMV, 0, 0, tmp_ref_frames, x->mbmi_ext); 11303 11304 if (near_mv.as_int != global_mv.as_int) 11305 mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV); 11306 if (nearest_mv.as_int != global_mv.as_int) 11307 mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV); 11308 } 11309 } 11310 11311 if (cpi->rc.is_src_frame_alt_ref) { 11312 if (sf->alt_ref_search_fp) { 11313 assert(cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME]); 11314 mask->pred_modes[ALTREF_FRAME] = 0; 11315 disable_inter_references_except_altref(mask->ref_combo); 11316 disable_reference(INTRA_FRAME, mask->ref_combo); 11317 } 11318 } 11319 11320 if (sf->alt_ref_search_fp) 11321 if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX) 11322 if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1)) 11323 mask->pred_modes[ALTREF_FRAME] |= INTER_ALL; 11324 11325 if (sf->adaptive_mode_search) { 11326 if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref && 11327 cpi->rc.frames_since_golden >= 3) 11328 if ((x->pred_mv_sad[GOLDEN_FRAME] >> 1) > x->pred_mv_sad[LAST_FRAME]) 11329 mask->pred_modes[GOLDEN_FRAME] |= INTER_ALL; 11330 } 11331 11332 if (bsize > sf->max_intra_bsize) { 11333 disable_reference(INTRA_FRAME, mask->ref_combo); 11334 } 11335 11336 mask->pred_modes[INTRA_FRAME] |= 11337 ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]); 11338 } 11339 11340 // Please add/modify parameter setting in this function, making it consistent 11341 // and easy to read and maintain. 11342 static void set_params_rd_pick_inter_mode( 11343 const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args, 11344 BLOCK_SIZE bsize, int mi_row, int mi_col, mode_skip_mask_t *mode_skip_mask, 11345 int skip_ref_frame_mask, unsigned int ref_costs_single[REF_FRAMES], 11346 unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES], 11347 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) { 11348 const AV1_COMMON *const cm = &cpi->common; 11349 const int num_planes = av1_num_planes(cm); 11350 MACROBLOCKD *const xd = &x->e_mbd; 11351 MB_MODE_INFO *const mbmi = xd->mi[0]; 11352 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; 11353 unsigned char segment_id = mbmi->segment_id; 11354 int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; 11355 int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, 11356 MAX_SB_SIZE >> 1 }; 11357 int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, 11358 MAX_SB_SIZE >> 1 }; 11359 int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; 11360 11361 for (int i = 0; i < MB_MODE_COUNT; ++i) 11362 for (int k = 0; k < REF_FRAMES; ++k) args->single_filter[i][k] = SWITCHABLE; 11363 11364 if (is_cur_buf_hbd(xd)) { 11365 int len = sizeof(uint16_t); 11366 args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(x->above_pred_buf); 11367 args->above_pred_buf[1] = 11368 CONVERT_TO_BYTEPTR(x->above_pred_buf + (MAX_SB_SQUARE >> 1) * len); 11369 args->above_pred_buf[2] = 11370 CONVERT_TO_BYTEPTR(x->above_pred_buf + MAX_SB_SQUARE * len); 11371 args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(x->left_pred_buf); 11372 args->left_pred_buf[1] = 11373 CONVERT_TO_BYTEPTR(x->left_pred_buf + (MAX_SB_SQUARE >> 1) * len); 11374 args->left_pred_buf[2] = 11375 CONVERT_TO_BYTEPTR(x->left_pred_buf + MAX_SB_SQUARE * len); 11376 } else { 11377 args->above_pred_buf[0] = x->above_pred_buf; 11378 args->above_pred_buf[1] = x->above_pred_buf + (MAX_SB_SQUARE >> 1); 11379 args->above_pred_buf[2] = x->above_pred_buf + MAX_SB_SQUARE; 11380 args->left_pred_buf[0] = x->left_pred_buf; 11381 args->left_pred_buf[1] = x->left_pred_buf + (MAX_SB_SQUARE >> 1); 11382 args->left_pred_buf[2] = x->left_pred_buf + MAX_SB_SQUARE; 11383 } 11384 11385 av1_collect_neighbors_ref_counts(xd); 11386 11387 estimate_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single, 11388 ref_costs_comp); 11389 11390 MV_REFERENCE_FRAME ref_frame; 11391 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { 11392 x->pred_mv_sad[ref_frame] = INT_MAX; 11393 x->mbmi_ext->mode_context[ref_frame] = 0; 11394 mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX; 11395 if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) { 11396 if (mbmi->partition != PARTITION_NONE && 11397 mbmi->partition != PARTITION_SPLIT) { 11398 if (skip_ref_frame_mask & (1 << ref_frame)) { 11399 int skip = 1; 11400 for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) { 11401 if (!(skip_ref_frame_mask & (1 << r))) { 11402 const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES]; 11403 if (rf[0] == ref_frame || rf[1] == ref_frame) { 11404 skip = 0; 11405 break; 11406 } 11407 } 11408 } 11409 if (skip) continue; 11410 } 11411 } 11412 assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL); 11413 setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, mi_row, mi_col, 11414 yv12_mb); 11415 } 11416 } 11417 // ref_frame = ALTREF_FRAME 11418 for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) { 11419 x->mbmi_ext->mode_context[ref_frame] = 0; 11420 mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX; 11421 const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES]; 11422 if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) && 11423 (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) { 11424 continue; 11425 } 11426 11427 if (mbmi->partition != PARTITION_NONE && 11428 mbmi->partition != PARTITION_SPLIT) { 11429 if (skip_ref_frame_mask & (1 << ref_frame)) { 11430 continue; 11431 } 11432 } 11433 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count, 11434 mbmi_ext->ref_mv_stack, NULL, mbmi_ext->global_mvs, mi_row, 11435 mi_col, mbmi_ext->mode_context); 11436 } 11437 11438 av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col); 11439 11440 if (check_num_overlappable_neighbors(mbmi) && 11441 is_motion_variation_allowed_bsize(bsize)) { 11442 av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col, 11443 args->above_pred_buf, dst_width1, 11444 dst_height1, args->above_pred_stride); 11445 av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col, 11446 args->left_pred_buf, dst_width2, 11447 dst_height2, args->left_pred_stride); 11448 av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, mi_col, 11449 0, num_planes); 11450 calc_target_weighted_pred( 11451 cm, x, xd, mi_row, mi_col, args->above_pred_buf[0], 11452 args->above_pred_stride[0], args->left_pred_buf[0], 11453 args->left_pred_stride[0]); 11454 } 11455 11456 init_mode_skip_mask(mode_skip_mask, cpi, x, bsize); 11457 11458 if (cpi->sf.tx_type_search.fast_intra_tx_type_search || 11459 cpi->oxcf.use_intra_default_tx_only) 11460 x->use_default_intra_tx_type = 1; 11461 else 11462 x->use_default_intra_tx_type = 0; 11463 11464 if (cpi->sf.tx_type_search.fast_inter_tx_type_search) 11465 x->use_default_inter_tx_type = 1; 11466 else 11467 x->use_default_inter_tx_type = 0; 11468 if (cpi->sf.skip_repeat_interpolation_filter_search) { 11469 x->interp_filter_stats_idx[0] = 0; 11470 x->interp_filter_stats_idx[1] = 0; 11471 } 11472 x->comp_rd_stats_idx = 0; 11473 } 11474 11475 // TODO(kyslov): now this is very similar to set_params_rd_pick_inter_mode 11476 // (except that doesn't set ALTREF parameters) 11477 // consider passing a flag to select non-rd path (similar to 11478 // encode_sb_row) 11479 static void set_params_nonrd_pick_inter_mode( 11480 const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args, 11481 BLOCK_SIZE bsize, int mi_row, int mi_col, mode_skip_mask_t *mode_skip_mask, 11482 int skip_ref_frame_mask, unsigned int ref_costs_single[REF_FRAMES], 11483 unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES], 11484 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) { 11485 const AV1_COMMON *const cm = &cpi->common; 11486 const int num_planes = av1_num_planes(cm); 11487 MACROBLOCKD *const xd = &x->e_mbd; 11488 MB_MODE_INFO *const mbmi = xd->mi[0]; 11489 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; 11490 unsigned char segment_id = mbmi->segment_id; 11491 int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; 11492 int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, 11493 MAX_SB_SIZE >> 1 }; 11494 int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, 11495 MAX_SB_SIZE >> 1 }; 11496 int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; 11497 11498 for (int i = 0; i < MB_MODE_COUNT; ++i) 11499 for (int k = 0; k < REF_FRAMES; ++k) args->single_filter[i][k] = SWITCHABLE; 11500 11501 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 11502 int len = sizeof(uint16_t); 11503 args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(x->above_pred_buf); 11504 args->above_pred_buf[1] = 11505 CONVERT_TO_BYTEPTR(x->above_pred_buf + (MAX_SB_SQUARE >> 1) * len); 11506 args->above_pred_buf[2] = 11507 CONVERT_TO_BYTEPTR(x->above_pred_buf + MAX_SB_SQUARE * len); 11508 args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(x->left_pred_buf); 11509 args->left_pred_buf[1] = 11510 CONVERT_TO_BYTEPTR(x->left_pred_buf + (MAX_SB_SQUARE >> 1) * len); 11511 args->left_pred_buf[2] = 11512 CONVERT_TO_BYTEPTR(x->left_pred_buf + MAX_SB_SQUARE * len); 11513 } else { 11514 args->above_pred_buf[0] = x->above_pred_buf; 11515 args->above_pred_buf[1] = x->above_pred_buf + (MAX_SB_SQUARE >> 1); 11516 args->above_pred_buf[2] = x->above_pred_buf + MAX_SB_SQUARE; 11517 args->left_pred_buf[0] = x->left_pred_buf; 11518 args->left_pred_buf[1] = x->left_pred_buf + (MAX_SB_SQUARE >> 1); 11519 args->left_pred_buf[2] = x->left_pred_buf + MAX_SB_SQUARE; 11520 } 11521 11522 av1_collect_neighbors_ref_counts(xd); 11523 11524 estimate_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single, 11525 ref_costs_comp); 11526 11527 MV_REFERENCE_FRAME ref_frame; 11528 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { 11529 x->pred_mv_sad[ref_frame] = INT_MAX; 11530 x->mbmi_ext->mode_context[ref_frame] = 0; 11531 mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX; 11532 if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) { 11533 if (mbmi->partition != PARTITION_NONE && 11534 mbmi->partition != PARTITION_SPLIT) { 11535 if (skip_ref_frame_mask & (1 << ref_frame)) { 11536 int skip = 1; 11537 for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) { 11538 if (!(skip_ref_frame_mask & (1 << r))) { 11539 const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES]; 11540 if (rf[0] == ref_frame || rf[1] == ref_frame) { 11541 skip = 0; 11542 break; 11543 } 11544 } 11545 } 11546 if (skip) continue; 11547 } 11548 } 11549 assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL); 11550 setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, mi_row, mi_col, 11551 yv12_mb); 11552 } 11553 } 11554 av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col); 11555 11556 if (check_num_overlappable_neighbors(mbmi) && 11557 is_motion_variation_allowed_bsize(bsize)) { 11558 av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col, 11559 args->above_pred_buf, dst_width1, 11560 dst_height1, args->above_pred_stride); 11561 av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col, 11562 args->left_pred_buf, dst_width2, 11563 dst_height2, args->left_pred_stride); 11564 av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, mi_col, 11565 0, num_planes); 11566 calc_target_weighted_pred( 11567 cm, x, xd, mi_row, mi_col, args->above_pred_buf[0], 11568 args->above_pred_stride[0], args->left_pred_buf[0], 11569 args->left_pred_stride[0]); 11570 } 11571 init_mode_skip_mask(mode_skip_mask, cpi, x, bsize); 11572 11573 if (cpi->sf.tx_type_search.fast_intra_tx_type_search) 11574 x->use_default_intra_tx_type = 1; 11575 else 11576 x->use_default_intra_tx_type = 0; 11577 11578 if (cpi->sf.tx_type_search.fast_inter_tx_type_search) 11579 x->use_default_inter_tx_type = 1; 11580 else 11581 x->use_default_inter_tx_type = 0; 11582 if (cpi->sf.skip_repeat_interpolation_filter_search) { 11583 x->interp_filter_stats_idx[0] = 0; 11584 x->interp_filter_stats_idx[1] = 0; 11585 } 11586 } 11587 11588 static void search_palette_mode(const AV1_COMP *cpi, MACROBLOCK *x, int mi_row, 11589 int mi_col, RD_STATS *rd_cost, 11590 PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize, 11591 MB_MODE_INFO *const mbmi, 11592 PALETTE_MODE_INFO *const pmi, 11593 unsigned int *ref_costs_single, 11594 InterModeSearchState *search_state) { 11595 const AV1_COMMON *const cm = &cpi->common; 11596 const int num_planes = av1_num_planes(cm); 11597 MACROBLOCKD *const xd = &x->e_mbd; 11598 int rate2 = 0; 11599 int64_t distortion2 = 0, best_rd_palette = search_state->best_rd, this_rd, 11600 best_model_rd_palette = INT64_MAX; 11601 int skippable = 0, rate_overhead_palette = 0; 11602 RD_STATS rd_stats_y; 11603 TX_SIZE uv_tx = TX_4X4; 11604 uint8_t *const best_palette_color_map = 11605 x->palette_buffer->best_palette_color_map; 11606 uint8_t *const color_map = xd->plane[0].color_index_map; 11607 MB_MODE_INFO best_mbmi_palette = *mbmi; 11608 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; 11609 const int *const intra_mode_cost = x->mbmode_cost[size_group_lookup[bsize]]; 11610 const int rows = block_size_high[bsize]; 11611 const int cols = block_size_wide[bsize]; 11612 11613 mbmi->mode = DC_PRED; 11614 mbmi->uv_mode = UV_DC_PRED; 11615 mbmi->ref_frame[0] = INTRA_FRAME; 11616 mbmi->ref_frame[1] = NONE_FRAME; 11617 rate_overhead_palette = rd_pick_palette_intra_sby( 11618 cpi, x, bsize, mi_row, mi_col, intra_mode_cost[DC_PRED], 11619 &best_mbmi_palette, best_palette_color_map, &best_rd_palette, 11620 &best_model_rd_palette, NULL, NULL, NULL, NULL, ctx, best_blk_skip); 11621 if (pmi->palette_size[0] == 0) return; 11622 11623 memcpy(x->blk_skip, best_blk_skip, 11624 sizeof(best_blk_skip[0]) * bsize_to_num_blk(bsize)); 11625 11626 memcpy(color_map, best_palette_color_map, 11627 rows * cols * sizeof(best_palette_color_map[0])); 11628 super_block_yrd(cpi, x, &rd_stats_y, bsize, search_state->best_rd); 11629 if (rd_stats_y.rate == INT_MAX) return; 11630 11631 skippable = rd_stats_y.skip; 11632 distortion2 = rd_stats_y.dist; 11633 rate2 = rd_stats_y.rate + rate_overhead_palette; 11634 rate2 += ref_costs_single[INTRA_FRAME]; 11635 if (num_planes > 1) { 11636 uv_tx = av1_get_tx_size(AOM_PLANE_U, xd); 11637 if (search_state->rate_uv_intra[uv_tx] == INT_MAX) { 11638 choose_intra_uv_mode( 11639 cpi, x, bsize, uv_tx, &search_state->rate_uv_intra[uv_tx], 11640 &search_state->rate_uv_tokenonly[uv_tx], 11641 &search_state->dist_uvs[uv_tx], &search_state->skip_uvs[uv_tx], 11642 &search_state->mode_uv[uv_tx]); 11643 search_state->pmi_uv[uv_tx] = *pmi; 11644 search_state->uv_angle_delta[uv_tx] = mbmi->angle_delta[PLANE_TYPE_UV]; 11645 } 11646 mbmi->uv_mode = search_state->mode_uv[uv_tx]; 11647 pmi->palette_size[1] = search_state->pmi_uv[uv_tx].palette_size[1]; 11648 if (pmi->palette_size[1] > 0) { 11649 memcpy(pmi->palette_colors + PALETTE_MAX_SIZE, 11650 search_state->pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE, 11651 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0])); 11652 } 11653 mbmi->angle_delta[PLANE_TYPE_UV] = search_state->uv_angle_delta[uv_tx]; 11654 skippable = skippable && search_state->skip_uvs[uv_tx]; 11655 distortion2 += search_state->dist_uvs[uv_tx]; 11656 rate2 += search_state->rate_uv_intra[uv_tx]; 11657 } 11658 11659 if (skippable) { 11660 rate2 -= rd_stats_y.rate; 11661 if (num_planes > 1) rate2 -= search_state->rate_uv_tokenonly[uv_tx]; 11662 rate2 += x->skip_cost[av1_get_skip_context(xd)][1]; 11663 } else { 11664 rate2 += x->skip_cost[av1_get_skip_context(xd)][0]; 11665 } 11666 this_rd = RDCOST(x->rdmult, rate2, distortion2); 11667 if (this_rd < search_state->best_rd) { 11668 search_state->best_mode_index = 3; 11669 mbmi->mv[0].as_int = 0; 11670 rd_cost->rate = rate2; 11671 rd_cost->dist = distortion2; 11672 rd_cost->rdcost = this_rd; 11673 search_state->best_rd = this_rd; 11674 search_state->best_mbmode = *mbmi; 11675 search_state->best_skip2 = 0; 11676 search_state->best_mode_skippable = skippable; 11677 memcpy(ctx->blk_skip, x->blk_skip, 11678 sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); 11679 } 11680 } 11681 11682 static void init_inter_mode_search_state(InterModeSearchState *search_state, 11683 const AV1_COMP *cpi, 11684 const TileDataEnc *tile_data, 11685 const MACROBLOCK *x, BLOCK_SIZE bsize, 11686 int64_t best_rd_so_far) { 11687 search_state->best_rd = best_rd_so_far; 11688 11689 av1_zero(search_state->best_mbmode); 11690 11691 search_state->best_rate_y = INT_MAX; 11692 11693 search_state->best_rate_uv = INT_MAX; 11694 11695 search_state->best_mode_skippable = 0; 11696 11697 search_state->best_skip2 = 0; 11698 11699 search_state->best_mode_index = -1; 11700 11701 const MACROBLOCKD *const xd = &x->e_mbd; 11702 const MB_MODE_INFO *const mbmi = xd->mi[0]; 11703 const unsigned char segment_id = mbmi->segment_id; 11704 11705 search_state->skip_intra_modes = 0; 11706 11707 search_state->num_available_refs = 0; 11708 memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs)); 11709 memset(search_state->dist_order_refs, -1, 11710 sizeof(search_state->dist_order_refs)); 11711 11712 for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i) 11713 search_state->mode_threshold[i] = 0; 11714 const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize]; 11715 for (int i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i) 11716 search_state->mode_threshold[i] = 11717 ((int64_t)rd_threshes[i] * tile_data->thresh_freq_fact[bsize][i]) >> 5; 11718 11719 search_state->best_intra_mode = DC_PRED; 11720 search_state->best_intra_rd = INT64_MAX; 11721 11722 search_state->angle_stats_ready = 0; 11723 av1_zero(search_state->directional_mode_skip_mask); 11724 11725 search_state->best_pred_sse = UINT_MAX; 11726 11727 for (int i = 0; i < TX_SIZES_ALL; i++) 11728 search_state->rate_uv_intra[i] = INT_MAX; 11729 11730 av1_zero(search_state->pmi_uv); 11731 11732 for (int i = 0; i < REFERENCE_MODES; ++i) 11733 search_state->best_pred_rd[i] = INT64_MAX; 11734 11735 av1_zero(search_state->single_newmv); 11736 av1_zero(search_state->single_newmv_rate); 11737 av1_zero(search_state->single_newmv_valid); 11738 for (int i = 0; i < MB_MODE_COUNT; ++i) { 11739 for (int j = 0; j < MAX_REF_MV_SERCH; ++j) { 11740 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) { 11741 search_state->modelled_rd[i][j][ref_frame] = INT64_MAX; 11742 search_state->simple_rd[i][j][ref_frame] = INT64_MAX; 11743 } 11744 } 11745 } 11746 11747 for (int dir = 0; dir < 2; ++dir) { 11748 for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) { 11749 for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) { 11750 SingleInterModeState *state; 11751 11752 state = &search_state->single_state[dir][mode][ref_frame]; 11753 state->ref_frame = NONE_FRAME; 11754 state->rd = INT64_MAX; 11755 11756 state = &search_state->single_state_modelled[dir][mode][ref_frame]; 11757 state->ref_frame = NONE_FRAME; 11758 state->rd = INT64_MAX; 11759 } 11760 } 11761 } 11762 for (int dir = 0; dir < 2; ++dir) { 11763 for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) { 11764 for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) { 11765 search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME; 11766 } 11767 } 11768 } 11769 av1_zero(search_state->single_state_cnt); 11770 av1_zero(search_state->single_state_modelled_cnt); 11771 } 11772 11773 bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask, 11774 const MV_REFERENCE_FRAME *ref_frame, 11775 const PREDICTION_MODE this_mode) { 11776 if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) { 11777 return true; 11778 } 11779 11780 return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1]; 11781 } 11782 11783 static int inter_mode_compatible_skip(const AV1_COMP *cpi, const MACROBLOCK *x, 11784 BLOCK_SIZE bsize, int mode_index) { 11785 const AV1_COMMON *const cm = &cpi->common; 11786 const struct segmentation *const seg = &cm->seg; 11787 const MV_REFERENCE_FRAME *ref_frame = av1_mode_order[mode_index].ref_frame; 11788 const PREDICTION_MODE this_mode = av1_mode_order[mode_index].mode; 11789 const CurrentFrame *const current_frame = &cm->current_frame; 11790 const MACROBLOCKD *const xd = &x->e_mbd; 11791 const MB_MODE_INFO *const mbmi = xd->mi[0]; 11792 const unsigned char segment_id = mbmi->segment_id; 11793 const int comp_pred = ref_frame[1] > INTRA_FRAME; 11794 11795 if (comp_pred) { 11796 if (frame_is_intra_only(cm)) return 1; 11797 11798 if (current_frame->reference_mode == SINGLE_REFERENCE) return 1; 11799 11800 // Skip compound inter modes if ARF is not available. 11801 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[1]])) 11802 return 1; 11803 11804 // Do not allow compound prediction if the segment level reference frame 11805 // feature is in use as in this case there can only be one reference. 11806 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1; 11807 11808 if (!is_comp_ref_allowed(bsize)) return 1; 11809 } 11810 11811 if (ref_frame[0] > INTRA_FRAME && ref_frame[1] == INTRA_FRAME) { 11812 // Mode must be compatible 11813 if (!is_interintra_allowed_mode(this_mode)) return 1; 11814 if (!is_interintra_allowed_bsize(bsize)) return 1; 11815 } 11816 11817 return 0; 11818 } 11819 11820 static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x, 11821 BLOCK_SIZE bsize, int mib_size, 11822 int mi_row, int mi_col) { 11823 const int sb_size_mask = mib_size - 1; 11824 const int mi_row_in_sb = mi_row & sb_size_mask; 11825 const int mi_col_in_sb = mi_col & sb_size_mask; 11826 const int mi_w = mi_size_wide[bsize]; 11827 const int mi_h = mi_size_high[bsize]; 11828 int picked_ref_frames_mask = 0; 11829 for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) { 11830 for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) { 11831 picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j]; 11832 } 11833 } 11834 return picked_ref_frames_mask; 11835 } 11836 11837 // Case 1: return 0, means don't skip this mode 11838 // Case 2: return 1, means skip this mode completely 11839 // Case 3: return 2, means skip compound only, but still try single motion modes 11840 static int inter_mode_search_order_independent_skip( 11841 const AV1_COMP *cpi, const MACROBLOCK *x, BLOCK_SIZE bsize, int mode_index, 11842 int mi_row, int mi_col, mode_skip_mask_t *mode_skip_mask, 11843 InterModeSearchState *search_state, int skip_ref_frame_mask) { 11844 const SPEED_FEATURES *const sf = &cpi->sf; 11845 const AV1_COMMON *const cm = &cpi->common; 11846 const OrderHintInfo *const order_hint_info = &cm->seq_params.order_hint_info; 11847 const CurrentFrame *const current_frame = &cm->current_frame; 11848 const MACROBLOCKD *const xd = &x->e_mbd; 11849 const MB_MODE_INFO *const mbmi = xd->mi[0]; 11850 const MV_REFERENCE_FRAME *ref_frame = av1_mode_order[mode_index].ref_frame; 11851 const PREDICTION_MODE this_mode = av1_mode_order[mode_index].mode; 11852 const int comp_pred = ref_frame[1] > INTRA_FRAME; 11853 int skip_motion_mode = 0; 11854 11855 if (mask_says_skip(mode_skip_mask, ref_frame, this_mode)) { 11856 return 1; 11857 } 11858 11859 // If no valid mode has been found so far in PARTITION_NONE when finding a 11860 // valid partition is required, do not skip mode. 11861 if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE && 11862 x->must_find_valid_partition) 11863 return 0; 11864 11865 if (mbmi->partition != PARTITION_NONE && mbmi->partition != PARTITION_SPLIT) { 11866 const int ref_type = av1_ref_frame_type(ref_frame); 11867 int skip_ref = skip_ref_frame_mask & (1 << ref_type); 11868 if (ref_type <= ALTREF_FRAME && skip_ref) { 11869 // Since the compound ref modes depends on the motion estimation result of 11870 // two single ref modes( best mv of single ref modes as the start point ) 11871 // If current single ref mode is marked skip, we need to check if it will 11872 // be used in compound ref modes. 11873 for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) { 11874 if (!(skip_ref_frame_mask & (1 << r))) { 11875 const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES]; 11876 if (rf[0] == ref_type || rf[1] == ref_type) { 11877 // Found a not skipped compound ref mode which contains current 11878 // single ref. So this single ref can't be skipped completly 11879 // Just skip it's motion mode search, still try it's simple 11880 // transition mode. 11881 skip_motion_mode = 1; 11882 skip_ref = 0; 11883 break; 11884 } 11885 } 11886 } 11887 } 11888 if (skip_ref) return 1; 11889 } 11890 11891 if (cpi->two_pass_partition_search && !x->cb_partition_scan) { 11892 const int mi_width = mi_size_wide[bsize]; 11893 const int mi_height = mi_size_high[bsize]; 11894 int found = 0; 11895 // Search in the stats table to see if the ref frames have been used in the 11896 // first pass of partition search. 11897 for (int row = mi_row; row < mi_row + mi_width && !found; 11898 row += FIRST_PARTITION_PASS_SAMPLE_REGION) { 11899 for (int col = mi_col; col < mi_col + mi_height && !found; 11900 col += FIRST_PARTITION_PASS_SAMPLE_REGION) { 11901 const int index = av1_first_partition_pass_stats_index(row, col); 11902 const FIRST_PARTITION_PASS_STATS *const stats = 11903 &x->first_partition_pass_stats[index]; 11904 if (stats->ref0_counts[ref_frame[0]] && 11905 (ref_frame[1] < 0 || stats->ref1_counts[ref_frame[1]])) { 11906 found = 1; 11907 break; 11908 } 11909 } 11910 } 11911 if (!found) return 1; 11912 } 11913 11914 // This is only used in motion vector unit test. 11915 if (cpi->oxcf.motion_vector_unit_test && ref_frame[0] == INTRA_FRAME) 11916 return 1; 11917 11918 if (ref_frame[0] == INTRA_FRAME) { 11919 if (this_mode != DC_PRED) { 11920 // Disable intra modes other than DC_PRED for blocks with low variance 11921 // Threshold for intra skipping based on source variance 11922 // TODO(debargha): Specialize the threshold for super block sizes 11923 const unsigned int skip_intra_var_thresh = 64; 11924 if ((sf->mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) && 11925 x->source_variance < skip_intra_var_thresh) 11926 return 1; 11927 } 11928 } 11929 11930 if (sf->selective_ref_frame) { 11931 if (sf->selective_ref_frame >= 3 || x->cb_partition_scan) { 11932 if (ref_frame[0] == ALTREF2_FRAME || ref_frame[1] == ALTREF2_FRAME) 11933 if (get_relative_dist( 11934 order_hint_info, 11935 cm->cur_frame->ref_order_hints[ALTREF2_FRAME - LAST_FRAME], 11936 current_frame->order_hint) < 0) 11937 return 1; 11938 if (ref_frame[0] == BWDREF_FRAME || ref_frame[1] == BWDREF_FRAME) 11939 if (get_relative_dist( 11940 order_hint_info, 11941 cm->cur_frame->ref_order_hints[BWDREF_FRAME - LAST_FRAME], 11942 current_frame->order_hint) < 0) 11943 return 1; 11944 } 11945 11946 if (sf->selective_ref_frame >= 2 || 11947 (sf->selective_ref_frame == 1 && comp_pred)) { 11948 if (ref_frame[0] == LAST3_FRAME || ref_frame[1] == LAST3_FRAME) 11949 if (get_relative_dist( 11950 order_hint_info, 11951 cm->cur_frame->ref_order_hints[LAST3_FRAME - LAST_FRAME], 11952 cm->cur_frame->ref_order_hints[GOLDEN_FRAME - LAST_FRAME]) <= 0) 11953 return 1; 11954 if (ref_frame[0] == LAST2_FRAME || ref_frame[1] == LAST2_FRAME) 11955 if (get_relative_dist( 11956 order_hint_info, 11957 cm->cur_frame->ref_order_hints[LAST2_FRAME - LAST_FRAME], 11958 cm->cur_frame->ref_order_hints[GOLDEN_FRAME - LAST_FRAME]) <= 0) 11959 return 1; 11960 } 11961 } 11962 11963 // One-sided compound is used only when all reference frames are one-sided. 11964 if ((sf->selective_ref_frame >= 2) && comp_pred && !cpi->all_one_sided_refs) { 11965 unsigned int ref_offsets[2]; 11966 for (int i = 0; i < 2; ++i) { 11967 const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame[i]); 11968 assert(buf != NULL); 11969 ref_offsets[i] = buf->order_hint; 11970 } 11971 if ((get_relative_dist(order_hint_info, ref_offsets[0], 11972 current_frame->order_hint) <= 0 && 11973 get_relative_dist(order_hint_info, ref_offsets[1], 11974 current_frame->order_hint) <= 0) || 11975 (get_relative_dist(order_hint_info, ref_offsets[0], 11976 current_frame->order_hint) > 0 && 11977 get_relative_dist(order_hint_info, ref_offsets[1], 11978 current_frame->order_hint) > 0)) 11979 return 1; 11980 } 11981 11982 if (sf->selective_ref_frame >= 4 && comp_pred) { 11983 // Check if one of the reference is ALTREF2_FRAME and BWDREF_FRAME is a 11984 // valid reference. 11985 if ((ref_frame[0] == ALTREF2_FRAME || ref_frame[1] == ALTREF2_FRAME) && 11986 (cpi->ref_frame_flags & av1_ref_frame_flag_list[BWDREF_FRAME])) { 11987 // Check if both ALTREF2_FRAME and BWDREF_FRAME are future references. 11988 if ((get_relative_dist( 11989 order_hint_info, 11990 cm->cur_frame->ref_order_hints[ALTREF2_FRAME - LAST_FRAME], 11991 current_frame->order_hint) > 0) && 11992 (get_relative_dist( 11993 order_hint_info, 11994 cm->cur_frame->ref_order_hints[BWDREF_FRAME - LAST_FRAME], 11995 current_frame->order_hint) > 0)) { 11996 // Drop ALTREF2_FRAME as a reference if BWDREF_FRAME is a closer 11997 // reference to the current frame than ALTREF2_FRAME 11998 if (get_relative_dist( 11999 order_hint_info, 12000 cm->cur_frame->ref_order_hints[ALTREF2_FRAME - LAST_FRAME], 12001 cm->cur_frame->ref_order_hints[BWDREF_FRAME - LAST_FRAME]) >= 12002 0) { 12003 const RefCntBuffer *const buf_arf2 = 12004 get_ref_frame_buf(cm, ALTREF2_FRAME); 12005 assert(buf_arf2 != NULL); 12006 const RefCntBuffer *const buf_bwd = 12007 get_ref_frame_buf(cm, BWDREF_FRAME); 12008 assert(buf_bwd != NULL); 12009 (void)buf_arf2; 12010 (void)buf_bwd; 12011 return 1; 12012 } 12013 } 12014 } 12015 } 12016 12017 if (skip_repeated_mv(cm, x, this_mode, ref_frame, search_state)) { 12018 return 1; 12019 } 12020 if (skip_motion_mode) { 12021 return 2; 12022 } 12023 12024 if (!cpi->oxcf.enable_global_motion && 12025 (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) { 12026 return 1; 12027 } 12028 12029 if (!cpi->oxcf.enable_onesided_comp && comp_pred && cpi->all_one_sided_refs) { 12030 return 1; 12031 } 12032 12033 return 0; 12034 } 12035 12036 static INLINE void init_mbmi(MB_MODE_INFO *mbmi, int mode_index, 12037 const AV1_COMMON *cm) { 12038 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; 12039 PREDICTION_MODE this_mode = av1_mode_order[mode_index].mode; 12040 mbmi->ref_mv_idx = 0; 12041 mbmi->mode = this_mode; 12042 mbmi->uv_mode = UV_DC_PRED; 12043 mbmi->ref_frame[0] = av1_mode_order[mode_index].ref_frame[0]; 12044 mbmi->ref_frame[1] = av1_mode_order[mode_index].ref_frame[1]; 12045 pmi->palette_size[0] = 0; 12046 pmi->palette_size[1] = 0; 12047 mbmi->filter_intra_mode_info.use_filter_intra = 0; 12048 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0; 12049 mbmi->motion_mode = SIMPLE_TRANSLATION; 12050 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1); 12051 set_default_interp_filters(mbmi, cm->interp_filter); 12052 } 12053 12054 static int64_t handle_intra_mode(InterModeSearchState *search_state, 12055 const AV1_COMP *cpi, MACROBLOCK *x, 12056 BLOCK_SIZE bsize, int mi_row, int mi_col, 12057 int ref_frame_cost, 12058 const PICK_MODE_CONTEXT *ctx, int disable_skip, 12059 RD_STATS *rd_stats, RD_STATS *rd_stats_y, 12060 RD_STATS *rd_stats_uv) { 12061 const AV1_COMMON *cm = &cpi->common; 12062 const SPEED_FEATURES *const sf = &cpi->sf; 12063 MACROBLOCKD *const xd = &x->e_mbd; 12064 MB_MODE_INFO *const mbmi = xd->mi[0]; 12065 assert(mbmi->ref_frame[0] == INTRA_FRAME); 12066 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; 12067 const int try_palette = 12068 cpi->oxcf.enable_palette && 12069 av1_allow_palette(cm->allow_screen_content_tools, mbmi->sb_type); 12070 const int *const intra_mode_cost = x->mbmode_cost[size_group_lookup[bsize]]; 12071 const int intra_cost_penalty = av1_get_intra_cost_penalty( 12072 cm->base_qindex, cm->y_dc_delta_q, cm->seq_params.bit_depth); 12073 const int rows = block_size_high[bsize]; 12074 const int cols = block_size_wide[bsize]; 12075 const int num_planes = av1_num_planes(cm); 12076 const int skip_ctx = av1_get_skip_context(xd); 12077 12078 int known_rate = intra_mode_cost[mbmi->mode]; 12079 known_rate += ref_frame_cost; 12080 if (mbmi->mode != DC_PRED && mbmi->mode != PAETH_PRED) 12081 known_rate += intra_cost_penalty; 12082 known_rate += AOMMIN(x->skip_cost[skip_ctx][0], x->skip_cost[skip_ctx][1]); 12083 const int64_t known_rd = RDCOST(x->rdmult, known_rate, 0); 12084 if (known_rd > search_state->best_rd) { 12085 search_state->skip_intra_modes = 1; 12086 return INT64_MAX; 12087 } 12088 12089 TX_SIZE uv_tx; 12090 int is_directional_mode = av1_is_directional_mode(mbmi->mode); 12091 if (is_directional_mode && av1_use_angle_delta(bsize) && 12092 cpi->oxcf.enable_angle_delta) { 12093 int rate_dummy; 12094 int64_t model_rd = INT64_MAX; 12095 if (sf->intra_angle_estimation && !search_state->angle_stats_ready) { 12096 const int src_stride = x->plane[0].src.stride; 12097 const uint8_t *src = x->plane[0].src.buf; 12098 angle_estimation(src, src_stride, rows, cols, bsize, is_cur_buf_hbd(xd), 12099 search_state->directional_mode_skip_mask); 12100 search_state->angle_stats_ready = 1; 12101 } 12102 if (search_state->directional_mode_skip_mask[mbmi->mode]) return INT64_MAX; 12103 av1_init_rd_stats(rd_stats_y); 12104 rd_stats_y->rate = INT_MAX; 12105 rd_pick_intra_angle_sby(cpi, x, mi_row, mi_col, &rate_dummy, rd_stats_y, 12106 bsize, intra_mode_cost[mbmi->mode], 12107 search_state->best_rd, &model_rd); 12108 } else { 12109 av1_init_rd_stats(rd_stats_y); 12110 mbmi->angle_delta[PLANE_TYPE_Y] = 0; 12111 super_block_yrd(cpi, x, rd_stats_y, bsize, search_state->best_rd); 12112 } 12113 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; 12114 memcpy(best_blk_skip, x->blk_skip, 12115 sizeof(best_blk_skip[0]) * ctx->num_4x4_blk); 12116 int try_filter_intra = 0; 12117 int64_t best_rd_tmp = INT64_MAX; 12118 if (mbmi->mode == DC_PRED && av1_filter_intra_allowed_bsize(cm, bsize)) { 12119 if (rd_stats_y->rate != INT_MAX) { 12120 const int tmp_rate = rd_stats_y->rate + x->filter_intra_cost[bsize][0] + 12121 intra_mode_cost[mbmi->mode]; 12122 best_rd_tmp = RDCOST(x->rdmult, tmp_rate, rd_stats_y->dist); 12123 try_filter_intra = !((best_rd_tmp / 2) > search_state->best_rd); 12124 } else { 12125 try_filter_intra = !(search_state->best_mbmode.skip); 12126 } 12127 } 12128 if (try_filter_intra) { 12129 RD_STATS rd_stats_y_fi; 12130 int filter_intra_selected_flag = 0; 12131 TX_SIZE best_tx_size = mbmi->tx_size; 12132 TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN]; 12133 memcpy(best_txk_type, mbmi->txk_type, 12134 sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN); 12135 FILTER_INTRA_MODE best_fi_mode = FILTER_DC_PRED; 12136 12137 mbmi->filter_intra_mode_info.use_filter_intra = 1; 12138 for (FILTER_INTRA_MODE fi_mode = FILTER_DC_PRED; 12139 fi_mode < FILTER_INTRA_MODES; ++fi_mode) { 12140 int64_t this_rd_tmp; 12141 mbmi->filter_intra_mode_info.filter_intra_mode = fi_mode; 12142 super_block_yrd(cpi, x, &rd_stats_y_fi, bsize, search_state->best_rd); 12143 if (rd_stats_y_fi.rate == INT_MAX) { 12144 continue; 12145 } 12146 const int this_rate_tmp = 12147 rd_stats_y_fi.rate + 12148 intra_mode_info_cost_y(cpi, x, mbmi, bsize, 12149 intra_mode_cost[mbmi->mode]); 12150 this_rd_tmp = RDCOST(x->rdmult, this_rate_tmp, rd_stats_y_fi.dist); 12151 12152 if (this_rd_tmp != INT64_MAX && this_rd_tmp / 2 > search_state->best_rd) { 12153 break; 12154 } 12155 if (this_rd_tmp < best_rd_tmp) { 12156 best_tx_size = mbmi->tx_size; 12157 memcpy(best_txk_type, mbmi->txk_type, 12158 sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN); 12159 memcpy(best_blk_skip, x->blk_skip, 12160 sizeof(best_blk_skip[0]) * ctx->num_4x4_blk); 12161 best_fi_mode = fi_mode; 12162 *rd_stats_y = rd_stats_y_fi; 12163 filter_intra_selected_flag = 1; 12164 best_rd_tmp = this_rd_tmp; 12165 } 12166 } 12167 12168 mbmi->tx_size = best_tx_size; 12169 memcpy(mbmi->txk_type, best_txk_type, 12170 sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN); 12171 memcpy(x->blk_skip, best_blk_skip, 12172 sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); 12173 12174 if (filter_intra_selected_flag) { 12175 mbmi->filter_intra_mode_info.use_filter_intra = 1; 12176 mbmi->filter_intra_mode_info.filter_intra_mode = best_fi_mode; 12177 } else { 12178 mbmi->filter_intra_mode_info.use_filter_intra = 0; 12179 } 12180 } 12181 if (rd_stats_y->rate == INT_MAX) return INT64_MAX; 12182 const int mode_cost_y = 12183 intra_mode_info_cost_y(cpi, x, mbmi, bsize, intra_mode_cost[mbmi->mode]); 12184 av1_init_rd_stats(rd_stats); 12185 av1_init_rd_stats(rd_stats_uv); 12186 if (num_planes > 1) { 12187 uv_tx = av1_get_tx_size(AOM_PLANE_U, xd); 12188 if (search_state->rate_uv_intra[uv_tx] == INT_MAX) { 12189 int rate_y = 12190 rd_stats_y->skip ? x->skip_cost[skip_ctx][1] : rd_stats_y->rate; 12191 const int64_t rdy = 12192 RDCOST(x->rdmult, rate_y + mode_cost_y, rd_stats_y->dist); 12193 if (search_state->best_rd < (INT64_MAX / 2) && 12194 rdy > (search_state->best_rd + (search_state->best_rd >> 2))) { 12195 search_state->skip_intra_modes = 1; 12196 return INT64_MAX; 12197 } 12198 choose_intra_uv_mode( 12199 cpi, x, bsize, uv_tx, &search_state->rate_uv_intra[uv_tx], 12200 &search_state->rate_uv_tokenonly[uv_tx], 12201 &search_state->dist_uvs[uv_tx], &search_state->skip_uvs[uv_tx], 12202 &search_state->mode_uv[uv_tx]); 12203 if (try_palette) search_state->pmi_uv[uv_tx] = *pmi; 12204 search_state->uv_angle_delta[uv_tx] = mbmi->angle_delta[PLANE_TYPE_UV]; 12205 12206 const int uv_rate = search_state->rate_uv_tokenonly[uv_tx]; 12207 const int64_t uv_dist = search_state->dist_uvs[uv_tx]; 12208 const int64_t uv_rd = RDCOST(x->rdmult, uv_rate, uv_dist); 12209 if (uv_rd > search_state->best_rd) { 12210 search_state->skip_intra_modes = 1; 12211 return INT64_MAX; 12212 } 12213 } 12214 12215 rd_stats_uv->rate = search_state->rate_uv_tokenonly[uv_tx]; 12216 rd_stats_uv->dist = search_state->dist_uvs[uv_tx]; 12217 rd_stats_uv->skip = search_state->skip_uvs[uv_tx]; 12218 rd_stats->skip = rd_stats_y->skip && rd_stats_uv->skip; 12219 mbmi->uv_mode = search_state->mode_uv[uv_tx]; 12220 if (try_palette) { 12221 pmi->palette_size[1] = search_state->pmi_uv[uv_tx].palette_size[1]; 12222 memcpy(pmi->palette_colors + PALETTE_MAX_SIZE, 12223 search_state->pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE, 12224 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0])); 12225 } 12226 mbmi->angle_delta[PLANE_TYPE_UV] = search_state->uv_angle_delta[uv_tx]; 12227 } 12228 rd_stats->rate = rd_stats_y->rate + mode_cost_y; 12229 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) { 12230 // super_block_yrd above includes the cost of the tx_size in the 12231 // tokenonly rate, but for intra blocks, tx_size is always coded 12232 // (prediction granularity), so we account for it in the full rate, 12233 // not the tokenonly rate. 12234 rd_stats_y->rate -= tx_size_cost(cm, x, bsize, mbmi->tx_size); 12235 } 12236 if (num_planes > 1 && !x->skip_chroma_rd) { 12237 const int uv_mode_cost = 12238 x->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mbmi->uv_mode]; 12239 rd_stats->rate += 12240 rd_stats_uv->rate + 12241 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost); 12242 } 12243 if (mbmi->mode != DC_PRED && mbmi->mode != PAETH_PRED) 12244 rd_stats->rate += intra_cost_penalty; 12245 rd_stats->dist = rd_stats_y->dist + rd_stats_uv->dist; 12246 12247 // Estimate the reference frame signaling cost and add it 12248 // to the rolling cost variable. 12249 rd_stats->rate += ref_frame_cost; 12250 if (rd_stats->skip) { 12251 // Back out the coefficient coding costs 12252 rd_stats->rate -= (rd_stats_y->rate + rd_stats_uv->rate); 12253 rd_stats_y->rate = 0; 12254 rd_stats_uv->rate = 0; 12255 // Cost the skip mb case 12256 rd_stats->rate += x->skip_cost[skip_ctx][1]; 12257 } else { 12258 // Add in the cost of the no skip flag. 12259 rd_stats->rate += x->skip_cost[skip_ctx][0]; 12260 } 12261 // Calculate the final RD estimate for this mode. 12262 const int64_t this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); 12263 // Keep record of best intra rd 12264 if (this_rd < search_state->best_intra_rd) { 12265 search_state->best_intra_rd = this_rd; 12266 search_state->best_intra_mode = mbmi->mode; 12267 } 12268 12269 if (sf->skip_intra_in_interframe) { 12270 if (search_state->best_rd < (INT64_MAX / 2) && 12271 this_rd > (search_state->best_rd + (search_state->best_rd >> 1))) 12272 search_state->skip_intra_modes = 1; 12273 } 12274 12275 if (!disable_skip) { 12276 for (int i = 0; i < REFERENCE_MODES; ++i) 12277 search_state->best_pred_rd[i] = 12278 AOMMIN(search_state->best_pred_rd[i], this_rd); 12279 } 12280 return this_rd; 12281 } 12282 12283 static void collect_single_states(MACROBLOCK *x, 12284 InterModeSearchState *search_state, 12285 const MB_MODE_INFO *const mbmi) { 12286 int i, j; 12287 const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0]; 12288 const PREDICTION_MODE this_mode = mbmi->mode; 12289 const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1; 12290 const int mode_offset = INTER_OFFSET(this_mode); 12291 const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode); 12292 12293 // Simple rd 12294 int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame]; 12295 for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) { 12296 int64_t rd = search_state->simple_rd[this_mode][ref_mv_idx][ref_frame]; 12297 if (rd < simple_rd) simple_rd = rd; 12298 } 12299 12300 // Insertion sort of single_state 12301 SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 }; 12302 SingleInterModeState *state_s = search_state->single_state[dir][mode_offset]; 12303 i = search_state->single_state_cnt[dir][mode_offset]; 12304 for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j) 12305 state_s[j] = state_s[j - 1]; 12306 state_s[j] = this_state_s; 12307 search_state->single_state_cnt[dir][mode_offset]++; 12308 12309 // Modelled rd 12310 int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame]; 12311 for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) { 12312 int64_t rd = search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame]; 12313 if (rd < modelled_rd) modelled_rd = rd; 12314 } 12315 12316 // Insertion sort of single_state_modelled 12317 SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 }; 12318 SingleInterModeState *state_m = 12319 search_state->single_state_modelled[dir][mode_offset]; 12320 i = search_state->single_state_modelled_cnt[dir][mode_offset]; 12321 for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j) 12322 state_m[j] = state_m[j - 1]; 12323 state_m[j] = this_state_m; 12324 search_state->single_state_modelled_cnt[dir][mode_offset]++; 12325 } 12326 12327 static void analyze_single_states(const AV1_COMP *cpi, 12328 InterModeSearchState *search_state) { 12329 int i, j, dir, mode; 12330 if (cpi->sf.prune_comp_search_by_single_result >= 1) { 12331 for (dir = 0; dir < 2; ++dir) { 12332 int64_t best_rd; 12333 SingleInterModeState(*state)[FWD_REFS]; 12334 const int prune_factor = 12335 cpi->sf.prune_comp_search_by_single_result >= 2 ? 6 : 5; 12336 12337 // Use the best rd of GLOBALMV or NEWMV to prune the unlikely 12338 // reference frames for all the modes (NEARESTMV and NEARMV may not 12339 // have same motion vectors). Always keep the best of each mode 12340 // because it might form the best possible combination with other mode. 12341 state = search_state->single_state[dir]; 12342 best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd, 12343 state[INTER_OFFSET(GLOBALMV)][0].rd); 12344 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) { 12345 for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) { 12346 if (state[mode][i].rd != INT64_MAX && 12347 (state[mode][i].rd >> 3) * prune_factor > best_rd) { 12348 state[mode][i].valid = 0; 12349 } 12350 } 12351 } 12352 12353 state = search_state->single_state_modelled[dir]; 12354 best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd, 12355 state[INTER_OFFSET(GLOBALMV)][0].rd); 12356 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) { 12357 for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; 12358 ++i) { 12359 if (state[mode][i].rd != INT64_MAX && 12360 (state[mode][i].rd >> 3) * prune_factor > best_rd) { 12361 state[mode][i].valid = 0; 12362 } 12363 } 12364 } 12365 } 12366 } 12367 12368 // Ordering by simple rd first, then by modelled rd 12369 for (dir = 0; dir < 2; ++dir) { 12370 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) { 12371 const int state_cnt_s = search_state->single_state_cnt[dir][mode]; 12372 const int state_cnt_m = 12373 search_state->single_state_modelled_cnt[dir][mode]; 12374 SingleInterModeState *state_s = search_state->single_state[dir][mode]; 12375 SingleInterModeState *state_m = 12376 search_state->single_state_modelled[dir][mode]; 12377 int count = 0; 12378 const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m); 12379 for (i = 0; i < state_cnt_s; ++i) { 12380 if (state_s[i].rd == INT64_MAX) break; 12381 if (state_s[i].valid) 12382 search_state->single_rd_order[dir][mode][count++] = 12383 state_s[i].ref_frame; 12384 } 12385 if (count < max_candidates) { 12386 for (i = 0; i < state_cnt_m; ++i) { 12387 if (state_m[i].rd == INT64_MAX) break; 12388 if (state_m[i].valid) { 12389 int ref_frame = state_m[i].ref_frame; 12390 int match = 0; 12391 // Check if existing already 12392 for (j = 0; j < count; ++j) { 12393 if (search_state->single_rd_order[dir][mode][j] == ref_frame) { 12394 match = 1; 12395 break; 12396 } 12397 } 12398 if (!match) { 12399 // Check if this ref_frame is removed in simple rd 12400 int valid = 1; 12401 for (j = 0; j < state_cnt_s; j++) { 12402 if (ref_frame == state_s[j].ref_frame && !state_s[j].valid) { 12403 valid = 0; 12404 break; 12405 } 12406 } 12407 if (valid) 12408 search_state->single_rd_order[dir][mode][count++] = ref_frame; 12409 } 12410 if (count >= max_candidates) break; 12411 } 12412 } 12413 } 12414 } 12415 } 12416 } 12417 12418 static int compound_skip_get_candidates( 12419 const AV1_COMP *cpi, const InterModeSearchState *search_state, 12420 const int dir, const PREDICTION_MODE mode) { 12421 const int mode_offset = INTER_OFFSET(mode); 12422 const SingleInterModeState *state = 12423 search_state->single_state[dir][mode_offset]; 12424 const SingleInterModeState *state_modelled = 12425 search_state->single_state_modelled[dir][mode_offset]; 12426 int max_candidates = 0; 12427 int candidates; 12428 12429 for (int i = 0; i < FWD_REFS; ++i) { 12430 if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break; 12431 max_candidates++; 12432 } 12433 12434 candidates = max_candidates; 12435 if (cpi->sf.prune_comp_search_by_single_result >= 2) { 12436 candidates = AOMMIN(2, max_candidates); 12437 } 12438 if (cpi->sf.prune_comp_search_by_single_result >= 3) { 12439 if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX && 12440 state[0].ref_frame == state_modelled[0].ref_frame) 12441 candidates = 1; 12442 if (mode == NEARMV || mode == GLOBALMV) candidates = 1; 12443 } 12444 return candidates; 12445 } 12446 12447 static int compound_skip_by_single_states( 12448 const AV1_COMP *cpi, const InterModeSearchState *search_state, 12449 const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame, 12450 const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) { 12451 const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame }; 12452 const int mode[2] = { compound_ref0_mode(this_mode), 12453 compound_ref1_mode(this_mode) }; 12454 const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) }; 12455 const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1, 12456 refs[1] <= GOLDEN_FRAME ? 0 : 1 }; 12457 int ref_searched[2] = { 0, 0 }; 12458 int ref_mv_match[2] = { 1, 1 }; 12459 int i, j; 12460 12461 for (i = 0; i < 2; ++i) { 12462 const SingleInterModeState *state = 12463 search_state->single_state[mode_dir[i]][mode_offset[i]]; 12464 const int state_cnt = 12465 search_state->single_state_cnt[mode_dir[i]][mode_offset[i]]; 12466 for (j = 0; j < state_cnt; ++j) { 12467 if (state[j].ref_frame == refs[i]) { 12468 ref_searched[i] = 1; 12469 break; 12470 } 12471 } 12472 } 12473 12474 const int ref_set = get_drl_refmv_count(x, refs, this_mode); 12475 for (i = 0; i < 2; ++i) { 12476 if (mode[i] == NEARESTMV || mode[i] == NEARMV) { 12477 const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME }; 12478 int idential = 1; 12479 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) { 12480 int_mv single_mv; 12481 int_mv comp_mv; 12482 get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, single_refs, 12483 x->mbmi_ext); 12484 get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, refs, x->mbmi_ext); 12485 12486 idential &= (single_mv.as_int == comp_mv.as_int); 12487 if (!idential) { 12488 ref_mv_match[i] = 0; 12489 break; 12490 } 12491 } 12492 } 12493 } 12494 12495 for (i = 0; i < 2; ++i) { 12496 if (ref_searched[i] && ref_mv_match[i]) { 12497 const int candidates = 12498 compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]); 12499 const MV_REFERENCE_FRAME *ref_order = 12500 search_state->single_rd_order[mode_dir[i]][mode_offset[i]]; 12501 int match = 0; 12502 for (j = 0; j < candidates; ++j) { 12503 if (refs[i] == ref_order[j]) { 12504 match = 1; 12505 break; 12506 } 12507 } 12508 if (!match) return 1; 12509 } 12510 } 12511 12512 return 0; 12513 } 12514 12515 static INLINE int sf_check_is_drop_ref(const MODE_DEFINITION *mode, 12516 InterModeSearchState *search_state) { 12517 const MV_REFERENCE_FRAME ref_frame = mode->ref_frame[0]; 12518 const MV_REFERENCE_FRAME second_ref_frame = mode->ref_frame[1]; 12519 if (search_state->num_available_refs > 2) { 12520 if ((ref_frame == search_state->dist_order_refs[0] && 12521 second_ref_frame == search_state->dist_order_refs[1]) || 12522 (ref_frame == search_state->dist_order_refs[1] && 12523 second_ref_frame == search_state->dist_order_refs[0])) 12524 return 1; // drop this pair of refs 12525 } 12526 return 0; 12527 } 12528 12529 static INLINE void sf_drop_ref_analyze(InterModeSearchState *search_state, 12530 const MODE_DEFINITION *mode, 12531 int64_t distortion2) { 12532 const PREDICTION_MODE this_mode = mode->mode; 12533 MV_REFERENCE_FRAME ref_frame = mode->ref_frame[0]; 12534 const int idx = ref_frame - LAST_FRAME; 12535 if (idx && distortion2 > search_state->dist_refs[idx]) { 12536 search_state->dist_refs[idx] = distortion2; 12537 search_state->dist_order_refs[idx] = ref_frame; 12538 } 12539 12540 // Reach the last single ref prediction mode 12541 if (ref_frame == ALTREF_FRAME && this_mode == GLOBALMV) { 12542 // bubble sort dist_refs and the order index 12543 for (int i = 0; i < REF_FRAMES; ++i) { 12544 for (int k = i + 1; k < REF_FRAMES; ++k) { 12545 if (search_state->dist_refs[i] < search_state->dist_refs[k]) { 12546 int64_t tmp_dist = search_state->dist_refs[i]; 12547 search_state->dist_refs[i] = search_state->dist_refs[k]; 12548 search_state->dist_refs[k] = tmp_dist; 12549 12550 int tmp_idx = search_state->dist_order_refs[i]; 12551 search_state->dist_order_refs[i] = search_state->dist_order_refs[k]; 12552 search_state->dist_order_refs[k] = tmp_idx; 12553 } 12554 } 12555 } 12556 for (int i = 0; i < REF_FRAMES; ++i) { 12557 if (search_state->dist_refs[i] == -1) break; 12558 search_state->num_available_refs = i; 12559 } 12560 search_state->num_available_refs++; 12561 } 12562 } 12563 12564 // sf->prune_single_motion_modes_by_simple_trans 12565 static int analyze_simple_trans_states(const AV1_COMP *cpi, MACROBLOCK *x) { 12566 (void)cpi; 12567 int64_t rdcosts[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX, INT64_MAX, 12568 INT64_MAX, INT64_MAX, INT64_MAX, INT64_MAX }; 12569 int skip_ref = 0; 12570 int64_t min_rd = INT64_MAX; 12571 for (int i = 0; i < SINGLE_REF_MODES; ++i) { 12572 const MODE_DEFINITION *mode_order = &av1_mode_order[i]; 12573 const MV_REFERENCE_FRAME ref_frame = mode_order->ref_frame[0]; 12574 for (int k = 0; k < MAX_REF_MV_SERCH; ++k) { 12575 const int64_t rd = x->simple_rd_state[i][k].rd_stats.rdcost; 12576 rdcosts[ref_frame] = AOMMIN(rdcosts[ref_frame], rd); 12577 min_rd = AOMMIN(min_rd, rd); 12578 } 12579 } 12580 int valid_cnt = 0; 12581 for (int i = 1; i < REF_FRAMES; ++i) { 12582 if (rdcosts[i] == INT64_MAX) { 12583 skip_ref |= (1 << i); 12584 } else { 12585 valid_cnt++; 12586 } 12587 } 12588 if (valid_cnt < 2) { 12589 return 0; 12590 } 12591 min_rd += (min_rd >> 1); 12592 if (valid_cnt > 2) { 12593 for (int i = 1; i < REF_FRAMES; ++i) { 12594 if (rdcosts[i] > min_rd) { 12595 skip_ref |= (1 << i); 12596 } 12597 } 12598 } 12599 return skip_ref; 12600 } 12601 12602 static void alloc_compound_type_rd_buffers(AV1_COMMON *const cm, 12603 CompoundTypeRdBuffers *const bufs) { 12604 CHECK_MEM_ERROR( 12605 cm, bufs->pred0, 12606 (uint8_t *)aom_memalign(16, 2 * MAX_SB_SQUARE * sizeof(*bufs->pred0))); 12607 CHECK_MEM_ERROR( 12608 cm, bufs->pred1, 12609 (uint8_t *)aom_memalign(16, 2 * MAX_SB_SQUARE * sizeof(*bufs->pred1))); 12610 CHECK_MEM_ERROR( 12611 cm, bufs->residual1, 12612 (int16_t *)aom_memalign(32, MAX_SB_SQUARE * sizeof(*bufs->residual1))); 12613 CHECK_MEM_ERROR( 12614 cm, bufs->diff10, 12615 (int16_t *)aom_memalign(32, MAX_SB_SQUARE * sizeof(*bufs->diff10))); 12616 CHECK_MEM_ERROR(cm, bufs->tmp_best_mask_buf, 12617 (uint8_t *)aom_malloc(2 * MAX_SB_SQUARE * 12618 sizeof(*bufs->tmp_best_mask_buf))); 12619 } 12620 12621 static void release_compound_type_rd_buffers( 12622 CompoundTypeRdBuffers *const bufs) { 12623 aom_free(bufs->pred0); 12624 aom_free(bufs->pred1); 12625 aom_free(bufs->residual1); 12626 aom_free(bufs->diff10); 12627 aom_free(bufs->tmp_best_mask_buf); 12628 av1_zero(*bufs); // Set all pointers to NULL for safety. 12629 } 12630 12631 // Enables do_tx_search on a per-mode basis. 12632 int do_tx_search_mode(int do_tx_search_global, int midx, int adaptive) { 12633 if (!adaptive || do_tx_search_global) { 12634 return do_tx_search_global; 12635 } 12636 // A value of 2 indicates it is being turned on conditionally 12637 // for the mode. Turn it on for the first 7 modes. 12638 return midx < 7 ? 2 : 0; 12639 } 12640 12641 void av1_rd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data, 12642 MACROBLOCK *x, int mi_row, int mi_col, 12643 RD_STATS *rd_cost, BLOCK_SIZE bsize, 12644 PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) { 12645 AV1_COMMON *const cm = &cpi->common; 12646 const int num_planes = av1_num_planes(cm); 12647 const SPEED_FEATURES *const sf = &cpi->sf; 12648 MACROBLOCKD *const xd = &x->e_mbd; 12649 MB_MODE_INFO *const mbmi = xd->mi[0]; 12650 const int try_palette = 12651 cpi->oxcf.enable_palette && 12652 av1_allow_palette(cm->allow_screen_content_tools, mbmi->sb_type); 12653 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; 12654 const struct segmentation *const seg = &cm->seg; 12655 PREDICTION_MODE this_mode; 12656 unsigned char segment_id = mbmi->segment_id; 12657 int i; 12658 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]; 12659 unsigned int ref_costs_single[REF_FRAMES]; 12660 unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES]; 12661 int *comp_inter_cost = x->comp_inter_cost[av1_get_reference_mode_context(xd)]; 12662 mode_skip_mask_t mode_skip_mask; 12663 uint8_t motion_mode_skip_mask = 0; // second pass of single ref modes 12664 12665 InterModeSearchState search_state; 12666 init_inter_mode_search_state(&search_state, cpi, tile_data, x, bsize, 12667 best_rd_so_far); 12668 INTERINTRA_MODE interintra_modes[REF_FRAMES] = { 12669 INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, 12670 INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES 12671 }; 12672 HandleInterModeArgs args = { 12673 { NULL }, { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }, 12674 { NULL }, { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1 }, 12675 NULL, NULL, 12676 NULL, search_state.modelled_rd, 12677 { { 0 } }, INT_MAX, 12678 INT_MAX, search_state.simple_rd, 12679 0, interintra_modes, 12680 1, NULL 12681 }; 12682 for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX; 12683 12684 av1_invalid_rd_stats(rd_cost); 12685 12686 // Ref frames that are selected by square partition blocks. 12687 int picked_ref_frames_mask = 0; 12688 if (cpi->sf.prune_ref_frame_for_rect_partitions && 12689 mbmi->partition != PARTITION_NONE && mbmi->partition != PARTITION_SPLIT) { 12690 // prune_ref_frame_for_rect_partitions = 1 implies prune only extended 12691 // partition blocks. prune_ref_frame_for_rect_partitions >=2 12692 // implies prune for vert, horiz and extended partition blocks. 12693 if ((mbmi->partition != PARTITION_VERT && 12694 mbmi->partition != PARTITION_HORZ) || 12695 cpi->sf.prune_ref_frame_for_rect_partitions >= 2) { 12696 picked_ref_frames_mask = fetch_picked_ref_frames_mask( 12697 x, bsize, cm->seq_params.mib_size, mi_row, mi_col); 12698 } 12699 } 12700 12701 // Skip ref frames that never selected by square blocks. 12702 const int skip_ref_frame_mask = 12703 picked_ref_frames_mask ? ~picked_ref_frames_mask : 0; 12704 12705 // init params, set frame modes, speed features 12706 set_params_rd_pick_inter_mode(cpi, x, &args, bsize, mi_row, mi_col, 12707 &mode_skip_mask, skip_ref_frame_mask, 12708 ref_costs_single, ref_costs_comp, yv12_mb); 12709 12710 int64_t best_est_rd = INT64_MAX; 12711 // TODO(angiebird): Turn this on when this speed feature is well tested 12712 const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize]; 12713 // If do_tx_search_global is 0, only estimated RD should be computed. 12714 // If do_tx_search_global is 1, all modes have TX search performed. 12715 // If do_tx_search_global is 2, some modes will have TX search performed. 12716 const int do_tx_search_global = 12717 !((cpi->sf.inter_mode_rd_model_estimation == 1 && md->ready) || 12718 (cpi->sf.inter_mode_rd_model_estimation == 2 && 12719 x->source_variance < 512)); 12720 InterModesInfo *inter_modes_info = x->inter_modes_info; 12721 inter_modes_info->num = 0; 12722 12723 int intra_mode_num = 0; 12724 int intra_mode_idx_ls[MAX_MODES]; 12725 int reach_first_comp_mode = 0; 12726 12727 // Temporary buffers used by handle_inter_mode(). 12728 uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_obmc_bufs[0]); 12729 12730 CompoundTypeRdBuffers rd_buffers; 12731 alloc_compound_type_rd_buffers(cm, &rd_buffers); 12732 12733 for (int midx = 0; midx < MAX_MODES; ++midx) { 12734 const int do_tx_search = do_tx_search_mode( 12735 do_tx_search_global, midx, sf->inter_mode_rd_model_estimation_adaptive); 12736 const MODE_DEFINITION *mode_order = &av1_mode_order[midx]; 12737 this_mode = mode_order->mode; 12738 const MV_REFERENCE_FRAME ref_frame = mode_order->ref_frame[0]; 12739 const MV_REFERENCE_FRAME second_ref_frame = mode_order->ref_frame[1]; 12740 const int comp_pred = second_ref_frame > INTRA_FRAME; 12741 12742 // When single ref motion search ends: 12743 // 1st pass: To evaluate single ref RD results and rewind to the beginning; 12744 // 2nd pass: To continue with compound ref search. 12745 if (sf->prune_single_motion_modes_by_simple_trans) { 12746 if (comp_pred && args.single_ref_first_pass) { 12747 args.single_ref_first_pass = 0; 12748 // Reach the first comp ref mode 12749 // Reset midx to start the 2nd pass for single ref motion search 12750 midx = -1; 12751 motion_mode_skip_mask = analyze_simple_trans_states(cpi, x); 12752 continue; 12753 } 12754 if (!comp_pred) { // single ref mode 12755 if (args.single_ref_first_pass) { 12756 // clear stats 12757 for (int k = 0; k < MAX_REF_MV_SERCH; ++k) { 12758 x->simple_rd_state[midx][k].rd_stats.rdcost = INT64_MAX; 12759 x->simple_rd_state[midx][k].early_skipped = 0; 12760 } 12761 } else { 12762 if (motion_mode_skip_mask & (1 << ref_frame)) { 12763 continue; 12764 } 12765 } 12766 } 12767 } 12768 12769 // Reach the first compound prediction mode 12770 if (sf->prune_comp_search_by_single_result > 0 && comp_pred && 12771 reach_first_comp_mode == 0) { 12772 analyze_single_states(cpi, &search_state); 12773 reach_first_comp_mode = 1; 12774 } 12775 int64_t this_rd = INT64_MAX; 12776 int disable_skip = 0; 12777 int rate2 = 0, rate_y = 0, rate_uv = 0; 12778 int64_t distortion2 = 0; 12779 int skippable = 0; 12780 int this_skip2 = 0; 12781 12782 init_mbmi(mbmi, midx, cm); 12783 12784 x->skip = 0; 12785 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); 12786 12787 if (inter_mode_compatible_skip(cpi, x, bsize, midx)) continue; 12788 12789 const int ret = inter_mode_search_order_independent_skip( 12790 cpi, x, bsize, midx, mi_row, mi_col, &mode_skip_mask, &search_state, 12791 skip_ref_frame_mask); 12792 if (ret == 1) continue; 12793 args.skip_motion_mode = (ret == 2); 12794 12795 if (sf->drop_ref && comp_pred) { 12796 if (sf_check_is_drop_ref(mode_order, &search_state)) { 12797 continue; 12798 } 12799 } 12800 12801 if (search_state.best_rd < search_state.mode_threshold[midx]) continue; 12802 12803 if (sf->prune_comp_search_by_single_result > 0 && comp_pred) { 12804 if (compound_skip_by_single_states(cpi, &search_state, this_mode, 12805 ref_frame, second_ref_frame, x)) 12806 continue; 12807 } 12808 12809 const int ref_frame_cost = comp_pred 12810 ? ref_costs_comp[ref_frame][second_ref_frame] 12811 : ref_costs_single[ref_frame]; 12812 const int compmode_cost = 12813 is_comp_ref_allowed(mbmi->sb_type) ? comp_inter_cost[comp_pred] : 0; 12814 const int real_compmode_cost = 12815 cm->current_frame.reference_mode == REFERENCE_MODE_SELECT 12816 ? compmode_cost 12817 : 0; 12818 12819 if (comp_pred) { 12820 if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && 12821 search_state.best_mode_index >= 0 && 12822 search_state.best_mbmode.ref_frame[0] == INTRA_FRAME) 12823 continue; 12824 } 12825 12826 if (ref_frame == INTRA_FRAME) { 12827 if ((!cpi->oxcf.enable_smooth_intra || sf->disable_smooth_intra) && 12828 (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED || 12829 mbmi->mode == SMOOTH_V_PRED)) 12830 continue; 12831 if (!cpi->oxcf.enable_paeth_intra && mbmi->mode == PAETH_PRED) continue; 12832 if (sf->adaptive_mode_search > 1) 12833 if ((x->source_variance << num_pels_log2_lookup[bsize]) > 12834 search_state.best_pred_sse) 12835 continue; 12836 12837 if (this_mode != DC_PRED) { 12838 // Only search the oblique modes if the best so far is 12839 // one of the neighboring directional modes 12840 if ((sf->mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && 12841 (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) { 12842 if (search_state.best_mode_index >= 0 && 12843 search_state.best_mbmode.ref_frame[0] > INTRA_FRAME) 12844 continue; 12845 } 12846 if (sf->mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { 12847 if (conditional_skipintra(this_mode, search_state.best_intra_mode)) 12848 continue; 12849 } 12850 } 12851 } 12852 12853 // Select prediction reference frames. 12854 for (i = 0; i < num_planes; i++) { 12855 xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; 12856 if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; 12857 } 12858 12859 if (ref_frame == INTRA_FRAME) { 12860 intra_mode_idx_ls[intra_mode_num++] = midx; 12861 continue; 12862 } else { 12863 mbmi->angle_delta[PLANE_TYPE_Y] = 0; 12864 mbmi->angle_delta[PLANE_TYPE_UV] = 0; 12865 mbmi->filter_intra_mode_info.use_filter_intra = 0; 12866 mbmi->ref_mv_idx = 0; 12867 int64_t ref_best_rd = search_state.best_rd; 12868 { 12869 RD_STATS rd_stats, rd_stats_y, rd_stats_uv; 12870 av1_init_rd_stats(&rd_stats); 12871 rd_stats.rate = rate2; 12872 12873 // Point to variables that are maintained between loop iterations 12874 args.single_newmv = search_state.single_newmv; 12875 args.single_newmv_rate = search_state.single_newmv_rate; 12876 args.single_newmv_valid = search_state.single_newmv_valid; 12877 args.single_comp_cost = real_compmode_cost; 12878 args.ref_frame_cost = ref_frame_cost; 12879 if (midx < MAX_SINGLE_REF_MODES) { 12880 args.simple_rd_state = x->simple_rd_state[midx]; 12881 } 12882 12883 #if CONFIG_COLLECT_COMPONENT_TIMING 12884 start_timing(cpi, handle_inter_mode_time); 12885 #endif 12886 this_rd = handle_inter_mode( 12887 cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, 12888 &disable_skip, mi_row, mi_col, &args, ref_best_rd, tmp_buf, 12889 &rd_buffers, &best_est_rd, do_tx_search, inter_modes_info); 12890 #if CONFIG_COLLECT_COMPONENT_TIMING 12891 end_timing(cpi, handle_inter_mode_time); 12892 #endif 12893 rate2 = rd_stats.rate; 12894 skippable = rd_stats.skip; 12895 distortion2 = rd_stats.dist; 12896 rate_y = rd_stats_y.rate; 12897 rate_uv = rd_stats_uv.rate; 12898 } 12899 12900 if (sf->prune_comp_search_by_single_result > 0 && 12901 is_inter_singleref_mode(this_mode) && args.single_ref_first_pass) { 12902 collect_single_states(x, &search_state, mbmi); 12903 } 12904 12905 if (this_rd == INT64_MAX) continue; 12906 12907 this_skip2 = mbmi->skip; 12908 this_rd = RDCOST(x->rdmult, rate2, distortion2); 12909 if (this_skip2) { 12910 rate_y = 0; 12911 rate_uv = 0; 12912 } 12913 } 12914 12915 // Did this mode help.. i.e. is it the new best mode 12916 if (this_rd < search_state.best_rd || x->skip) { 12917 int mode_excluded = 0; 12918 if (comp_pred) { 12919 mode_excluded = cm->current_frame.reference_mode == SINGLE_REFERENCE; 12920 } 12921 if (!mode_excluded) { 12922 // Note index of best mode so far 12923 search_state.best_mode_index = midx; 12924 12925 if (ref_frame == INTRA_FRAME) { 12926 /* required for left and above block mv */ 12927 mbmi->mv[0].as_int = 0; 12928 } else { 12929 search_state.best_pred_sse = x->pred_sse[ref_frame]; 12930 } 12931 12932 rd_cost->rate = rate2; 12933 rd_cost->dist = distortion2; 12934 rd_cost->rdcost = this_rd; 12935 search_state.best_rd = this_rd; 12936 search_state.best_mbmode = *mbmi; 12937 search_state.best_skip2 = this_skip2; 12938 search_state.best_mode_skippable = skippable; 12939 if (do_tx_search) { 12940 // When do_tx_search == 0, handle_inter_mode won't provide correct 12941 // rate_y and rate_uv because txfm_search process is replaced by 12942 // rd estimation. 12943 // Therfore, we should avoid updating best_rate_y and best_rate_uv 12944 // here. These two values will be updated when txfm_search is called 12945 search_state.best_rate_y = 12946 rate_y + 12947 x->skip_cost[av1_get_skip_context(xd)][this_skip2 || skippable]; 12948 search_state.best_rate_uv = rate_uv; 12949 } 12950 memcpy(ctx->blk_skip, x->blk_skip, 12951 sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); 12952 } 12953 } 12954 12955 /* keep record of best compound/single-only prediction */ 12956 if (!disable_skip && ref_frame != INTRA_FRAME) { 12957 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; 12958 12959 if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) { 12960 single_rate = rate2 - compmode_cost; 12961 hybrid_rate = rate2; 12962 } else { 12963 single_rate = rate2; 12964 hybrid_rate = rate2 + compmode_cost; 12965 } 12966 12967 single_rd = RDCOST(x->rdmult, single_rate, distortion2); 12968 hybrid_rd = RDCOST(x->rdmult, hybrid_rate, distortion2); 12969 12970 if (!comp_pred) { 12971 if (single_rd < search_state.best_pred_rd[SINGLE_REFERENCE]) 12972 search_state.best_pred_rd[SINGLE_REFERENCE] = single_rd; 12973 } else { 12974 if (single_rd < search_state.best_pred_rd[COMPOUND_REFERENCE]) 12975 search_state.best_pred_rd[COMPOUND_REFERENCE] = single_rd; 12976 } 12977 if (hybrid_rd < search_state.best_pred_rd[REFERENCE_MODE_SELECT]) 12978 search_state.best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd; 12979 } 12980 if (sf->drop_ref && second_ref_frame == NONE_FRAME) { 12981 // Collect data from single ref mode, and analyze data. 12982 sf_drop_ref_analyze(&search_state, mode_order, distortion2); 12983 } 12984 12985 if (x->skip && !comp_pred) break; 12986 } 12987 12988 release_compound_type_rd_buffers(&rd_buffers); 12989 12990 #if CONFIG_COLLECT_COMPONENT_TIMING 12991 start_timing(cpi, do_tx_search_time); 12992 #endif 12993 if (do_tx_search_global != 1) { 12994 inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr); 12995 search_state.best_rd = INT64_MAX; 12996 12997 int64_t top_est_rd = 12998 inter_modes_info->num > 0 12999 ? inter_modes_info 13000 ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx] 13001 : INT64_MAX; 13002 for (int j = 0; j < inter_modes_info->num; ++j) { 13003 const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx; 13004 *mbmi = inter_modes_info->mbmi_arr[data_idx]; 13005 int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx]; 13006 if (curr_est_rd * 0.80 > top_est_rd) break; 13007 13008 RD_STATS rd_stats; 13009 RD_STATS rd_stats_y; 13010 RD_STATS rd_stats_uv; 13011 13012 bool true_rd = inter_modes_info->true_rd_arr[data_idx]; 13013 if (true_rd) { 13014 rd_stats = inter_modes_info->rd_cost_arr[data_idx]; 13015 rd_stats_y = inter_modes_info->rd_cost_y_arr[data_idx]; 13016 rd_stats_uv = inter_modes_info->rd_cost_uv_arr[data_idx]; 13017 memcpy(x->blk_skip, inter_modes_info->blk_skip_arr[data_idx], 13018 sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); 13019 } else { 13020 const int mode_rate = inter_modes_info->mode_rate_arr[data_idx]; 13021 13022 x->skip = 0; 13023 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); 13024 13025 // Select prediction reference frames. 13026 const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME; 13027 for (i = 0; i < num_planes; i++) { 13028 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i]; 13029 if (is_comp_pred) 13030 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i]; 13031 } 13032 13033 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 13034 av1_num_planes(cm) - 1); 13035 if (mbmi->motion_mode == OBMC_CAUSAL) 13036 av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col); 13037 13038 if (!txfm_search(cpi, tile_data, x, bsize, mi_row, mi_col, &rd_stats, 13039 &rd_stats_y, &rd_stats_uv, mode_rate, 13040 search_state.best_rd)) { 13041 continue; 13042 } else if (cpi->sf.inter_mode_rd_model_estimation == 1) { 13043 const int skip_ctx = av1_get_skip_context(xd); 13044 inter_mode_data_push(tile_data, mbmi->sb_type, rd_stats.sse, 13045 rd_stats.dist, 13046 rd_stats_y.rate + rd_stats_uv.rate + 13047 x->skip_cost[skip_ctx][mbmi->skip]); 13048 } 13049 rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist); 13050 } 13051 13052 if (rd_stats.rdcost < search_state.best_rd) { 13053 search_state.best_rd = rd_stats.rdcost; 13054 // Note index of best mode so far 13055 const int mode_index = get_prediction_mode_idx( 13056 mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]); 13057 search_state.best_mode_index = mode_index; 13058 *rd_cost = rd_stats; 13059 search_state.best_rd = rd_stats.rdcost; 13060 search_state.best_mbmode = *mbmi; 13061 search_state.best_skip2 = mbmi->skip; 13062 search_state.best_mode_skippable = rd_stats.skip; 13063 search_state.best_rate_y = 13064 rd_stats_y.rate + 13065 x->skip_cost[av1_get_skip_context(xd)][rd_stats.skip || mbmi->skip]; 13066 search_state.best_rate_uv = rd_stats_uv.rate; 13067 memcpy(ctx->blk_skip, x->blk_skip, 13068 sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); 13069 } 13070 } 13071 } 13072 #if CONFIG_COLLECT_COMPONENT_TIMING 13073 end_timing(cpi, do_tx_search_time); 13074 #endif 13075 13076 #if CONFIG_COLLECT_COMPONENT_TIMING 13077 start_timing(cpi, handle_intra_mode_time); 13078 #endif 13079 for (int j = 0; j < intra_mode_num; ++j) { 13080 const int mode_index = intra_mode_idx_ls[j]; 13081 const MV_REFERENCE_FRAME ref_frame = 13082 av1_mode_order[mode_index].ref_frame[0]; 13083 assert(av1_mode_order[mode_index].ref_frame[1] == NONE_FRAME); 13084 assert(ref_frame == INTRA_FRAME); 13085 if (sf->skip_intra_in_interframe && search_state.skip_intra_modes) break; 13086 init_mbmi(mbmi, mode_index, cm); 13087 x->skip = 0; 13088 set_ref_ptrs(cm, xd, INTRA_FRAME, NONE_FRAME); 13089 13090 // Select prediction reference frames. 13091 for (i = 0; i < num_planes; i++) { 13092 xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; 13093 } 13094 13095 RD_STATS intra_rd_stats, intra_rd_stats_y, intra_rd_stats_uv; 13096 13097 const int ref_frame_cost = ref_costs_single[ref_frame]; 13098 intra_rd_stats.rdcost = handle_intra_mode( 13099 &search_state, cpi, x, bsize, mi_row, mi_col, ref_frame_cost, ctx, 0, 13100 &intra_rd_stats, &intra_rd_stats_y, &intra_rd_stats_uv); 13101 if (intra_rd_stats.rdcost < search_state.best_rd) { 13102 search_state.best_rd = intra_rd_stats.rdcost; 13103 // Note index of best mode so far 13104 search_state.best_mode_index = mode_index; 13105 *rd_cost = intra_rd_stats; 13106 search_state.best_rd = intra_rd_stats.rdcost; 13107 search_state.best_mbmode = *mbmi; 13108 search_state.best_skip2 = 0; 13109 search_state.best_mode_skippable = intra_rd_stats.skip; 13110 search_state.best_rate_y = 13111 intra_rd_stats_y.rate + 13112 x->skip_cost[av1_get_skip_context(xd)][intra_rd_stats.skip]; 13113 search_state.best_rate_uv = intra_rd_stats_uv.rate; 13114 memcpy(ctx->blk_skip, x->blk_skip, 13115 sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); 13116 } 13117 } 13118 #if CONFIG_COLLECT_COMPONENT_TIMING 13119 end_timing(cpi, handle_intra_mode_time); 13120 #endif 13121 13122 // In effect only when speed >= 2. 13123 sf_refine_fast_tx_type_search( 13124 cpi, x, mi_row, mi_col, rd_cost, bsize, ctx, search_state.best_mode_index, 13125 &search_state.best_mbmode, yv12_mb, search_state.best_rate_y, 13126 search_state.best_rate_uv, &search_state.best_skip2); 13127 13128 // Only try palette mode when the best mode so far is an intra mode. 13129 if (try_palette && !is_inter_mode(search_state.best_mbmode.mode)) { 13130 search_palette_mode(cpi, x, mi_row, mi_col, rd_cost, ctx, bsize, mbmi, pmi, 13131 ref_costs_single, &search_state); 13132 } 13133 search_state.best_mbmode.skip_mode = 0; 13134 if (cm->current_frame.skip_mode_info.skip_mode_flag && 13135 !segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && 13136 is_comp_ref_allowed(bsize)) { 13137 rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, mi_row, mi_col, 13138 yv12_mb); 13139 } 13140 13141 // Make sure that the ref_mv_idx is only nonzero when we're 13142 // using a mode which can support ref_mv_idx 13143 if (search_state.best_mbmode.ref_mv_idx != 0 && 13144 !(search_state.best_mbmode.mode == NEWMV || 13145 search_state.best_mbmode.mode == NEW_NEWMV || 13146 have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) { 13147 search_state.best_mbmode.ref_mv_idx = 0; 13148 } 13149 13150 if (search_state.best_mode_index < 0 || 13151 search_state.best_rd >= best_rd_so_far) { 13152 rd_cost->rate = INT_MAX; 13153 rd_cost->rdcost = INT64_MAX; 13154 return; 13155 } 13156 13157 assert( 13158 (cm->interp_filter == SWITCHABLE) || 13159 (cm->interp_filter == 13160 av1_extract_interp_filter(search_state.best_mbmode.interp_filters, 0)) || 13161 !is_inter_block(&search_state.best_mbmode)); 13162 assert( 13163 (cm->interp_filter == SWITCHABLE) || 13164 (cm->interp_filter == 13165 av1_extract_interp_filter(search_state.best_mbmode.interp_filters, 1)) || 13166 !is_inter_block(&search_state.best_mbmode)); 13167 13168 if (!cpi->rc.is_src_frame_alt_ref) 13169 av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact, 13170 sf->adaptive_rd_thresh, bsize, 13171 search_state.best_mode_index); 13172 13173 // macroblock modes 13174 *mbmi = search_state.best_mbmode; 13175 x->skip |= search_state.best_skip2; 13176 13177 // Note: this section is needed since the mode may have been forced to 13178 // GLOBALMV by the all-zero mode handling of ref-mv. 13179 if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) { 13180 // Correct the interp filters for GLOBALMV 13181 if (is_nontrans_global_motion(xd, xd->mi[0])) { 13182 assert(mbmi->interp_filters == 13183 av1_broadcast_interp_filter( 13184 av1_unswitchable_filter(cm->interp_filter))); 13185 } 13186 } 13187 13188 for (i = 0; i < REFERENCE_MODES; ++i) { 13189 if (search_state.best_pred_rd[i] == INT64_MAX) 13190 search_state.best_pred_diff[i] = INT_MIN; 13191 else 13192 search_state.best_pred_diff[i] = 13193 search_state.best_rd - search_state.best_pred_rd[i]; 13194 } 13195 13196 x->skip |= search_state.best_mode_skippable; 13197 13198 assert(search_state.best_mode_index >= 0); 13199 13200 store_coding_context(x, ctx, search_state.best_mode_index, 13201 search_state.best_pred_diff, 13202 search_state.best_mode_skippable); 13203 13204 if (pmi->palette_size[1] > 0) { 13205 assert(try_palette); 13206 restore_uv_color_map(cpi, x); 13207 } 13208 } 13209 13210 // TODO(kyslov): now this is very similar to av1_rd_pick_inter_mode_sb except: 13211 // it only checks non-compound mode and 13212 // it doesn't check palette mode 13213 // it doesn't refine tx search 13214 // this function is likely to be heavily modified with nonrd mode 13215 // decision 13216 void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data, 13217 MACROBLOCK *x, int mi_row, int mi_col, 13218 RD_STATS *rd_cost, BLOCK_SIZE bsize, 13219 PICK_MODE_CONTEXT *ctx, 13220 int64_t best_rd_so_far) { 13221 AV1_COMMON *const cm = &cpi->common; 13222 const int num_planes = av1_num_planes(cm); 13223 const SPEED_FEATURES *const sf = &cpi->sf; 13224 MACROBLOCKD *const xd = &x->e_mbd; 13225 MB_MODE_INFO *const mbmi = xd->mi[0]; 13226 const struct segmentation *const seg = &cm->seg; 13227 PREDICTION_MODE this_mode; 13228 unsigned char segment_id = mbmi->segment_id; 13229 int i; 13230 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]; 13231 unsigned int ref_costs_single[REF_FRAMES]; 13232 unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES]; 13233 int *comp_inter_cost = x->comp_inter_cost[av1_get_reference_mode_context(xd)]; 13234 mode_skip_mask_t mode_skip_mask; 13235 uint8_t motion_mode_skip_mask = 0; // second pass of single ref modes 13236 13237 InterModeSearchState search_state; 13238 init_inter_mode_search_state(&search_state, cpi, tile_data, x, bsize, 13239 best_rd_so_far); 13240 INTERINTRA_MODE interintra_modes[REF_FRAMES] = { 13241 INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, 13242 INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES 13243 }; 13244 HandleInterModeArgs args = { 13245 { NULL }, { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }, 13246 { NULL }, { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1 }, 13247 NULL, NULL, 13248 NULL, search_state.modelled_rd, 13249 { { 0 } }, INT_MAX, 13250 INT_MAX, search_state.simple_rd, 13251 0, interintra_modes, 13252 1, NULL 13253 }; 13254 for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX; 13255 13256 av1_invalid_rd_stats(rd_cost); 13257 13258 // Ref frames that are selected by square partition blocks. 13259 int picked_ref_frames_mask = 0; 13260 if (cpi->sf.prune_ref_frame_for_rect_partitions && 13261 mbmi->partition != PARTITION_NONE && mbmi->partition != PARTITION_SPLIT) { 13262 // Don't enable for vert and horz partition blocks if current frame 13263 // will be used as bwd or arf2. 13264 if ((!cpi->refresh_bwd_ref_frame && !cpi->refresh_alt2_ref_frame) || 13265 (mbmi->partition != PARTITION_VERT && 13266 mbmi->partition != PARTITION_HORZ)) { 13267 picked_ref_frames_mask = fetch_picked_ref_frames_mask( 13268 x, bsize, cm->seq_params.mib_size, mi_row, mi_col); 13269 } 13270 } 13271 13272 // Skip ref frames that never selected by square blocks. 13273 const int skip_ref_frame_mask = 13274 picked_ref_frames_mask ? ~picked_ref_frames_mask : 0; 13275 13276 // init params, set frame modes, speed features 13277 set_params_nonrd_pick_inter_mode(cpi, x, &args, bsize, mi_row, mi_col, 13278 &mode_skip_mask, skip_ref_frame_mask, 13279 ref_costs_single, ref_costs_comp, yv12_mb); 13280 13281 int64_t best_est_rd = INT64_MAX; 13282 InterModesInfo *inter_modes_info = x->inter_modes_info; 13283 inter_modes_info->num = 0; 13284 13285 int intra_mode_num = 0; 13286 int intra_mode_idx_ls[MAX_MODES]; 13287 int reach_first_comp_mode = 0; 13288 13289 // Temporary buffers used by handle_inter_mode(). 13290 uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_obmc_bufs[0]); 13291 13292 CompoundTypeRdBuffers rd_buffers; 13293 alloc_compound_type_rd_buffers(cm, &rd_buffers); 13294 13295 for (int midx = 0; midx < MAX_MODES; ++midx) { 13296 const MODE_DEFINITION *mode_order = &av1_mode_order[midx]; 13297 this_mode = mode_order->mode; 13298 const MV_REFERENCE_FRAME ref_frame = mode_order->ref_frame[0]; 13299 const MV_REFERENCE_FRAME second_ref_frame = mode_order->ref_frame[1]; 13300 const int comp_pred = second_ref_frame > INTRA_FRAME; 13301 13302 if (second_ref_frame != NONE_FRAME) continue; 13303 13304 // When single ref motion search ends: 13305 // 1st pass: To evaluate single ref RD results and rewind to the beginning; 13306 // 2nd pass: To continue with compound ref search. 13307 if (sf->prune_single_motion_modes_by_simple_trans) { 13308 if (comp_pred && args.single_ref_first_pass) { 13309 args.single_ref_first_pass = 0; 13310 // Reach the first comp ref mode 13311 // Reset midx to start the 2nd pass for single ref motion search 13312 midx = -1; 13313 motion_mode_skip_mask = analyze_simple_trans_states(cpi, x); 13314 continue; 13315 } 13316 if (!comp_pred && ref_frame != INTRA_FRAME) { // single ref mode 13317 if (args.single_ref_first_pass) { 13318 // clear stats 13319 for (int k = 0; k < MAX_REF_MV_SERCH; ++k) { 13320 x->simple_rd_state[midx][k].rd_stats.rdcost = INT64_MAX; 13321 x->simple_rd_state[midx][k].early_skipped = 0; 13322 } 13323 } else { 13324 if (motion_mode_skip_mask & (1 << ref_frame)) { 13325 continue; 13326 } 13327 } 13328 } 13329 } 13330 13331 // Reach the first compound prediction mode 13332 if (sf->prune_comp_search_by_single_result > 0 && comp_pred && 13333 reach_first_comp_mode == 0) { 13334 analyze_single_states(cpi, &search_state); 13335 reach_first_comp_mode = 1; 13336 } 13337 int64_t this_rd = INT64_MAX; 13338 int disable_skip = 0; 13339 int rate2 = 0; 13340 int64_t distortion2 = 0; 13341 int skippable = 0; 13342 int this_skip2 = 0; 13343 13344 init_mbmi(mbmi, midx, cm); 13345 13346 x->skip = 0; 13347 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); 13348 13349 if (inter_mode_compatible_skip(cpi, x, bsize, midx)) continue; 13350 13351 const int ret = inter_mode_search_order_independent_skip( 13352 cpi, x, bsize, midx, mi_row, mi_col, &mode_skip_mask, &search_state, 13353 skip_ref_frame_mask); 13354 if (ret == 1) continue; 13355 args.skip_motion_mode = (ret == 2); 13356 13357 if (sf->drop_ref && comp_pred) { 13358 if (sf_check_is_drop_ref(mode_order, &search_state)) { 13359 continue; 13360 } 13361 } 13362 13363 if (search_state.best_rd < search_state.mode_threshold[midx]) continue; 13364 13365 if (sf->prune_comp_search_by_single_result > 0 && comp_pred) { 13366 if (compound_skip_by_single_states(cpi, &search_state, this_mode, 13367 ref_frame, second_ref_frame, x)) 13368 continue; 13369 } 13370 13371 const int ref_frame_cost = comp_pred 13372 ? ref_costs_comp[ref_frame][second_ref_frame] 13373 : ref_costs_single[ref_frame]; 13374 const int compmode_cost = 13375 is_comp_ref_allowed(mbmi->sb_type) ? comp_inter_cost[comp_pred] : 0; 13376 const int real_compmode_cost = 13377 cm->current_frame.reference_mode == REFERENCE_MODE_SELECT 13378 ? compmode_cost 13379 : 0; 13380 13381 if (comp_pred) { 13382 if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && 13383 search_state.best_mode_index >= 0 && 13384 search_state.best_mbmode.ref_frame[0] == INTRA_FRAME) 13385 continue; 13386 } 13387 13388 if (ref_frame == INTRA_FRAME) { 13389 if (!cpi->oxcf.enable_smooth_intra && 13390 (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED || 13391 mbmi->mode == SMOOTH_V_PRED)) 13392 continue; 13393 if (!cpi->oxcf.enable_paeth_intra && mbmi->mode == PAETH_PRED) continue; 13394 if (sf->adaptive_mode_search > 1) 13395 if ((x->source_variance << num_pels_log2_lookup[bsize]) > 13396 search_state.best_pred_sse) 13397 continue; 13398 13399 if (this_mode != DC_PRED) { 13400 // Only search the oblique modes if the best so far is 13401 // one of the neighboring directional modes 13402 if ((sf->mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && 13403 (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) { 13404 if (search_state.best_mode_index >= 0 && 13405 search_state.best_mbmode.ref_frame[0] > INTRA_FRAME) 13406 continue; 13407 } 13408 if (sf->mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { 13409 if (conditional_skipintra(this_mode, search_state.best_intra_mode)) 13410 continue; 13411 } 13412 } 13413 } 13414 13415 // Select prediction reference frames. 13416 for (i = 0; i < num_planes; i++) { 13417 xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; 13418 if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; 13419 } 13420 13421 if (ref_frame == INTRA_FRAME) { 13422 intra_mode_idx_ls[intra_mode_num++] = midx; 13423 continue; 13424 } else { 13425 mbmi->angle_delta[PLANE_TYPE_Y] = 0; 13426 mbmi->angle_delta[PLANE_TYPE_UV] = 0; 13427 mbmi->filter_intra_mode_info.use_filter_intra = 0; 13428 mbmi->ref_mv_idx = 0; 13429 int64_t ref_best_rd = search_state.best_rd; 13430 { 13431 RD_STATS rd_stats, rd_stats_y, rd_stats_uv; 13432 av1_init_rd_stats(&rd_stats); 13433 rd_stats.rate = rate2; 13434 13435 // Point to variables that are maintained between loop iterations 13436 args.single_newmv = search_state.single_newmv; 13437 args.single_newmv_rate = search_state.single_newmv_rate; 13438 args.single_newmv_valid = search_state.single_newmv_valid; 13439 args.single_comp_cost = real_compmode_cost; 13440 args.ref_frame_cost = ref_frame_cost; 13441 if (midx < MAX_SINGLE_REF_MODES) { 13442 args.simple_rd_state = x->simple_rd_state[midx]; 13443 } 13444 this_rd = handle_inter_mode( 13445 cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, 13446 &disable_skip, mi_row, mi_col, &args, ref_best_rd, tmp_buf, 13447 &rd_buffers, &best_est_rd, 0, inter_modes_info); 13448 rate2 = rd_stats.rate; 13449 skippable = rd_stats.skip; 13450 distortion2 = rd_stats.dist; 13451 } 13452 13453 if (sf->prune_comp_search_by_single_result > 0 && 13454 is_inter_singleref_mode(this_mode) && args.single_ref_first_pass) { 13455 collect_single_states(x, &search_state, mbmi); 13456 } 13457 13458 if (this_rd == INT64_MAX) continue; 13459 13460 this_skip2 = mbmi->skip; 13461 this_rd = RDCOST(x->rdmult, rate2, distortion2); 13462 } 13463 13464 // Did this mode help.. i.e. is it the new best mode 13465 if (this_rd < search_state.best_rd || x->skip) { 13466 int mode_excluded = 0; 13467 if (comp_pred) { 13468 mode_excluded = cm->current_frame.reference_mode == SINGLE_REFERENCE; 13469 } 13470 if (!mode_excluded) { 13471 // Note index of best mode so far 13472 search_state.best_mode_index = midx; 13473 13474 if (ref_frame == INTRA_FRAME) { 13475 /* required for left and above block mv */ 13476 mbmi->mv[0].as_int = 0; 13477 } else { 13478 search_state.best_pred_sse = x->pred_sse[ref_frame]; 13479 } 13480 13481 rd_cost->rate = rate2; 13482 rd_cost->dist = distortion2; 13483 rd_cost->rdcost = this_rd; 13484 search_state.best_rd = this_rd; 13485 search_state.best_mbmode = *mbmi; 13486 search_state.best_skip2 = this_skip2; 13487 search_state.best_mode_skippable = skippable; 13488 memcpy(ctx->blk_skip, x->blk_skip, 13489 sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); 13490 } 13491 } 13492 13493 /* keep record of best compound/single-only prediction */ 13494 if (!disable_skip && ref_frame != INTRA_FRAME) { 13495 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; 13496 13497 if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) { 13498 single_rate = rate2 - compmode_cost; 13499 hybrid_rate = rate2; 13500 } else { 13501 single_rate = rate2; 13502 hybrid_rate = rate2 + compmode_cost; 13503 } 13504 13505 single_rd = RDCOST(x->rdmult, single_rate, distortion2); 13506 hybrid_rd = RDCOST(x->rdmult, hybrid_rate, distortion2); 13507 13508 if (!comp_pred) { 13509 if (single_rd < search_state.best_pred_rd[SINGLE_REFERENCE]) 13510 search_state.best_pred_rd[SINGLE_REFERENCE] = single_rd; 13511 } else { 13512 if (single_rd < search_state.best_pred_rd[COMPOUND_REFERENCE]) 13513 search_state.best_pred_rd[COMPOUND_REFERENCE] = single_rd; 13514 } 13515 if (hybrid_rd < search_state.best_pred_rd[REFERENCE_MODE_SELECT]) 13516 search_state.best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd; 13517 } 13518 if (sf->drop_ref && second_ref_frame == NONE_FRAME) { 13519 // Collect data from single ref mode, and analyze data. 13520 sf_drop_ref_analyze(&search_state, mode_order, distortion2); 13521 } 13522 13523 if (x->skip && !comp_pred) break; 13524 } 13525 13526 release_compound_type_rd_buffers(&rd_buffers); 13527 13528 inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr); 13529 search_state.best_rd = INT64_MAX; 13530 13531 if (inter_modes_info->num > 0) { 13532 const int data_idx = inter_modes_info->rd_idx_pair_arr[0].idx; 13533 *mbmi = inter_modes_info->mbmi_arr[data_idx]; 13534 const int mode_rate = inter_modes_info->mode_rate_arr[data_idx]; 13535 13536 x->skip = 0; 13537 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); 13538 13539 // Select prediction reference frames. 13540 const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME; 13541 for (i = 0; i < num_planes; i++) { 13542 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i]; 13543 if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i]; 13544 } 13545 13546 RD_STATS rd_stats; 13547 RD_STATS rd_stats_y; 13548 RD_STATS rd_stats_uv; 13549 13550 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 13551 av1_num_planes(cm) - 1); 13552 if (mbmi->motion_mode == OBMC_CAUSAL) 13553 av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col); 13554 13555 if (txfm_search(cpi, tile_data, x, bsize, mi_row, mi_col, &rd_stats, 13556 &rd_stats_y, &rd_stats_uv, mode_rate, 13557 search_state.best_rd)) { 13558 if (cpi->sf.inter_mode_rd_model_estimation == 1) { 13559 const int skip_ctx = av1_get_skip_context(xd); 13560 inter_mode_data_push(tile_data, mbmi->sb_type, rd_stats.sse, 13561 rd_stats.dist, 13562 rd_stats_y.rate + rd_stats_uv.rate + 13563 x->skip_cost[skip_ctx][mbmi->skip]); 13564 } 13565 rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist); 13566 13567 if (rd_stats.rdcost < search_state.best_rd) { 13568 search_state.best_rd = rd_stats.rdcost; 13569 // Note index of best mode so far 13570 const int mode_index = get_prediction_mode_idx( 13571 mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]); 13572 search_state.best_mode_index = mode_index; 13573 *rd_cost = rd_stats; 13574 search_state.best_rd = rd_stats.rdcost; 13575 search_state.best_mbmode = *mbmi; 13576 search_state.best_skip2 = mbmi->skip; 13577 search_state.best_mode_skippable = rd_stats.skip; 13578 search_state.best_rate_y = 13579 rd_stats_y.rate + 13580 x->skip_cost[av1_get_skip_context(xd)][rd_stats.skip || mbmi->skip]; 13581 search_state.best_rate_uv = rd_stats_uv.rate; 13582 memcpy(ctx->blk_skip, x->blk_skip, 13583 sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); 13584 } 13585 } 13586 } 13587 13588 for (int j = 0; j < intra_mode_num; ++j) { 13589 const int mode_index = intra_mode_idx_ls[j]; 13590 const MV_REFERENCE_FRAME ref_frame = 13591 av1_mode_order[mode_index].ref_frame[0]; 13592 assert(av1_mode_order[mode_index].ref_frame[1] == NONE_FRAME); 13593 assert(ref_frame == INTRA_FRAME); 13594 if (sf->skip_intra_in_interframe && search_state.skip_intra_modes) break; 13595 init_mbmi(mbmi, mode_index, cm); 13596 x->skip = 0; 13597 set_ref_ptrs(cm, xd, INTRA_FRAME, NONE_FRAME); 13598 13599 // Select prediction reference frames. 13600 for (i = 0; i < num_planes; i++) { 13601 xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; 13602 } 13603 13604 RD_STATS intra_rd_stats, intra_rd_stats_y, intra_rd_stats_uv; 13605 13606 const int ref_frame_cost = ref_costs_single[ref_frame]; 13607 intra_rd_stats.rdcost = handle_intra_mode( 13608 &search_state, cpi, x, bsize, mi_row, mi_col, ref_frame_cost, ctx, 0, 13609 &intra_rd_stats, &intra_rd_stats_y, &intra_rd_stats_uv); 13610 if (intra_rd_stats.rdcost < search_state.best_rd) { 13611 search_state.best_rd = intra_rd_stats.rdcost; 13612 // Note index of best mode so far 13613 search_state.best_mode_index = mode_index; 13614 *rd_cost = intra_rd_stats; 13615 search_state.best_rd = intra_rd_stats.rdcost; 13616 search_state.best_mbmode = *mbmi; 13617 search_state.best_skip2 = 0; 13618 search_state.best_mode_skippable = intra_rd_stats.skip; 13619 search_state.best_rate_y = 13620 intra_rd_stats_y.rate + 13621 x->skip_cost[av1_get_skip_context(xd)][intra_rd_stats.skip]; 13622 search_state.best_rate_uv = intra_rd_stats_uv.rate; 13623 memcpy(ctx->blk_skip, x->blk_skip, 13624 sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); 13625 } 13626 } 13627 13628 search_state.best_mbmode.skip_mode = 0; 13629 if (cm->current_frame.skip_mode_info.skip_mode_flag && 13630 !segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && 13631 is_comp_ref_allowed(bsize)) { 13632 rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, mi_row, mi_col, 13633 yv12_mb); 13634 } 13635 13636 // Make sure that the ref_mv_idx is only nonzero when we're 13637 // using a mode which can support ref_mv_idx 13638 if (search_state.best_mbmode.ref_mv_idx != 0 && 13639 !(search_state.best_mbmode.mode == NEWMV || 13640 search_state.best_mbmode.mode == NEW_NEWMV || 13641 have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) { 13642 search_state.best_mbmode.ref_mv_idx = 0; 13643 } 13644 13645 if (search_state.best_mode_index < 0 || 13646 search_state.best_rd >= best_rd_so_far) { 13647 rd_cost->rate = INT_MAX; 13648 rd_cost->rdcost = INT64_MAX; 13649 return; 13650 } 13651 13652 assert( 13653 (cm->interp_filter == SWITCHABLE) || 13654 (cm->interp_filter == 13655 av1_extract_interp_filter(search_state.best_mbmode.interp_filters, 0)) || 13656 !is_inter_block(&search_state.best_mbmode)); 13657 assert( 13658 (cm->interp_filter == SWITCHABLE) || 13659 (cm->interp_filter == 13660 av1_extract_interp_filter(search_state.best_mbmode.interp_filters, 1)) || 13661 !is_inter_block(&search_state.best_mbmode)); 13662 13663 if (!cpi->rc.is_src_frame_alt_ref) 13664 av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact, 13665 sf->adaptive_rd_thresh, bsize, 13666 search_state.best_mode_index); 13667 13668 // macroblock modes 13669 *mbmi = search_state.best_mbmode; 13670 x->skip |= search_state.best_skip2; 13671 13672 // Note: this section is needed since the mode may have been forced to 13673 // GLOBALMV by the all-zero mode handling of ref-mv. 13674 if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) { 13675 // Correct the interp filters for GLOBALMV 13676 if (is_nontrans_global_motion(xd, xd->mi[0])) { 13677 assert(mbmi->interp_filters == 13678 av1_broadcast_interp_filter( 13679 av1_unswitchable_filter(cm->interp_filter))); 13680 } 13681 } 13682 13683 for (i = 0; i < REFERENCE_MODES; ++i) { 13684 if (search_state.best_pred_rd[i] == INT64_MAX) 13685 search_state.best_pred_diff[i] = INT_MIN; 13686 else 13687 search_state.best_pred_diff[i] = 13688 search_state.best_rd - search_state.best_pred_rd[i]; 13689 } 13690 13691 x->skip |= search_state.best_mode_skippable; 13692 13693 assert(search_state.best_mode_index >= 0); 13694 13695 store_coding_context(x, ctx, search_state.best_mode_index, 13696 search_state.best_pred_diff, 13697 search_state.best_mode_skippable); 13698 } 13699 13700 void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi, 13701 TileDataEnc *tile_data, MACROBLOCK *x, 13702 int mi_row, int mi_col, 13703 RD_STATS *rd_cost, BLOCK_SIZE bsize, 13704 PICK_MODE_CONTEXT *ctx, 13705 int64_t best_rd_so_far) { 13706 const AV1_COMMON *const cm = &cpi->common; 13707 MACROBLOCKD *const xd = &x->e_mbd; 13708 MB_MODE_INFO *const mbmi = xd->mi[0]; 13709 unsigned char segment_id = mbmi->segment_id; 13710 const int comp_pred = 0; 13711 int i; 13712 int64_t best_pred_diff[REFERENCE_MODES]; 13713 unsigned int ref_costs_single[REF_FRAMES]; 13714 unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES]; 13715 int *comp_inter_cost = x->comp_inter_cost[av1_get_reference_mode_context(xd)]; 13716 InterpFilter best_filter = SWITCHABLE; 13717 int64_t this_rd = INT64_MAX; 13718 int rate2 = 0; 13719 const int64_t distortion2 = 0; 13720 (void)mi_row; 13721 (void)mi_col; 13722 13723 av1_collect_neighbors_ref_counts(xd); 13724 13725 estimate_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single, 13726 ref_costs_comp); 13727 13728 for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX; 13729 for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX; 13730 13731 rd_cost->rate = INT_MAX; 13732 13733 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)); 13734 13735 mbmi->palette_mode_info.palette_size[0] = 0; 13736 mbmi->palette_mode_info.palette_size[1] = 0; 13737 mbmi->filter_intra_mode_info.use_filter_intra = 0; 13738 mbmi->mode = GLOBALMV; 13739 mbmi->motion_mode = SIMPLE_TRANSLATION; 13740 mbmi->uv_mode = UV_DC_PRED; 13741 if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) 13742 mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME); 13743 else 13744 mbmi->ref_frame[0] = LAST_FRAME; 13745 mbmi->ref_frame[1] = NONE_FRAME; 13746 mbmi->mv[0].as_int = 13747 gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]], 13748 cm->allow_high_precision_mv, bsize, mi_col, mi_row, 13749 cm->cur_frame_force_integer_mv) 13750 .as_int; 13751 mbmi->tx_size = max_txsize_lookup[bsize]; 13752 x->skip = 1; 13753 13754 mbmi->ref_mv_idx = 0; 13755 13756 mbmi->motion_mode = SIMPLE_TRANSLATION; 13757 av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col); 13758 if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) { 13759 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE]; 13760 mbmi->num_proj_ref = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref); 13761 // Select the samples according to motion vector difference 13762 if (mbmi->num_proj_ref > 1) 13763 mbmi->num_proj_ref = selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref, 13764 mbmi->num_proj_ref, bsize); 13765 } 13766 13767 set_default_interp_filters(mbmi, cm->interp_filter); 13768 13769 if (cm->interp_filter != SWITCHABLE) { 13770 best_filter = cm->interp_filter; 13771 } else { 13772 best_filter = EIGHTTAP_REGULAR; 13773 if (av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd) && 13774 x->source_variance >= cpi->sf.disable_filter_search_var_thresh) { 13775 int rs; 13776 int best_rs = INT_MAX; 13777 for (i = 0; i < SWITCHABLE_FILTERS; ++i) { 13778 mbmi->interp_filters = av1_broadcast_interp_filter(i); 13779 rs = av1_get_switchable_rate(cm, x, xd); 13780 if (rs < best_rs) { 13781 best_rs = rs; 13782 best_filter = av1_extract_interp_filter(mbmi->interp_filters, 0); 13783 } 13784 } 13785 } 13786 } 13787 // Set the appropriate filter 13788 mbmi->interp_filters = av1_broadcast_interp_filter(best_filter); 13789 rate2 += av1_get_switchable_rate(cm, x, xd); 13790 13791 if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) 13792 rate2 += comp_inter_cost[comp_pred]; 13793 13794 // Estimate the reference frame signaling cost and add it 13795 // to the rolling cost variable. 13796 rate2 += ref_costs_single[LAST_FRAME]; 13797 this_rd = RDCOST(x->rdmult, rate2, distortion2); 13798 13799 rd_cost->rate = rate2; 13800 rd_cost->dist = distortion2; 13801 rd_cost->rdcost = this_rd; 13802 13803 if (this_rd >= best_rd_so_far) { 13804 rd_cost->rate = INT_MAX; 13805 rd_cost->rdcost = INT64_MAX; 13806 return; 13807 } 13808 13809 assert((cm->interp_filter == SWITCHABLE) || 13810 (cm->interp_filter == 13811 av1_extract_interp_filter(mbmi->interp_filters, 0))); 13812 13813 av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact, 13814 cpi->sf.adaptive_rd_thresh, bsize, THR_GLOBALMV); 13815 13816 av1_zero(best_pred_diff); 13817 13818 store_coding_context(x, ctx, THR_GLOBALMV, best_pred_diff, 0); 13819 } 13820 13821 struct calc_target_weighted_pred_ctxt { 13822 const MACROBLOCK *x; 13823 const uint8_t *tmp; 13824 int tmp_stride; 13825 int overlap; 13826 }; 13827 13828 static INLINE void calc_target_weighted_pred_above( 13829 MACROBLOCKD *xd, int rel_mi_col, uint8_t nb_mi_width, MB_MODE_INFO *nb_mi, 13830 void *fun_ctxt, const int num_planes) { 13831 (void)nb_mi; 13832 (void)num_planes; 13833 13834 struct calc_target_weighted_pred_ctxt *ctxt = 13835 (struct calc_target_weighted_pred_ctxt *)fun_ctxt; 13836 13837 const int bw = xd->n4_w << MI_SIZE_LOG2; 13838 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap); 13839 13840 int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_col * MI_SIZE); 13841 int32_t *mask = ctxt->x->mask_buf + (rel_mi_col * MI_SIZE); 13842 const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE; 13843 const int is_hbd = is_cur_buf_hbd(xd); 13844 13845 if (!is_hbd) { 13846 for (int row = 0; row < ctxt->overlap; ++row) { 13847 const uint8_t m0 = mask1d[row]; 13848 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0; 13849 for (int col = 0; col < nb_mi_width * MI_SIZE; ++col) { 13850 wsrc[col] = m1 * tmp[col]; 13851 mask[col] = m0; 13852 } 13853 wsrc += bw; 13854 mask += bw; 13855 tmp += ctxt->tmp_stride; 13856 } 13857 } else { 13858 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp); 13859 13860 for (int row = 0; row < ctxt->overlap; ++row) { 13861 const uint8_t m0 = mask1d[row]; 13862 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0; 13863 for (int col = 0; col < nb_mi_width * MI_SIZE; ++col) { 13864 wsrc[col] = m1 * tmp16[col]; 13865 mask[col] = m0; 13866 } 13867 wsrc += bw; 13868 mask += bw; 13869 tmp16 += ctxt->tmp_stride; 13870 } 13871 } 13872 } 13873 13874 static INLINE void calc_target_weighted_pred_left( 13875 MACROBLOCKD *xd, int rel_mi_row, uint8_t nb_mi_height, MB_MODE_INFO *nb_mi, 13876 void *fun_ctxt, const int num_planes) { 13877 (void)nb_mi; 13878 (void)num_planes; 13879 13880 struct calc_target_weighted_pred_ctxt *ctxt = 13881 (struct calc_target_weighted_pred_ctxt *)fun_ctxt; 13882 13883 const int bw = xd->n4_w << MI_SIZE_LOG2; 13884 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap); 13885 13886 int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_row * MI_SIZE * bw); 13887 int32_t *mask = ctxt->x->mask_buf + (rel_mi_row * MI_SIZE * bw); 13888 const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride); 13889 const int is_hbd = is_cur_buf_hbd(xd); 13890 13891 if (!is_hbd) { 13892 for (int row = 0; row < nb_mi_height * MI_SIZE; ++row) { 13893 for (int col = 0; col < ctxt->overlap; ++col) { 13894 const uint8_t m0 = mask1d[col]; 13895 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0; 13896 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 + 13897 (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1; 13898 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0; 13899 } 13900 wsrc += bw; 13901 mask += bw; 13902 tmp += ctxt->tmp_stride; 13903 } 13904 } else { 13905 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp); 13906 13907 for (int row = 0; row < nb_mi_height * MI_SIZE; ++row) { 13908 for (int col = 0; col < ctxt->overlap; ++col) { 13909 const uint8_t m0 = mask1d[col]; 13910 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0; 13911 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 + 13912 (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1; 13913 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0; 13914 } 13915 wsrc += bw; 13916 mask += bw; 13917 tmp16 += ctxt->tmp_stride; 13918 } 13919 } 13920 } 13921 13922 // This function has a structure similar to av1_build_obmc_inter_prediction 13923 // 13924 // The OBMC predictor is computed as: 13925 // 13926 // PObmc(x,y) = 13927 // AOM_BLEND_A64(Mh(x), 13928 // AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)), 13929 // PLeft(x, y)) 13930 // 13931 // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate 13932 // rounding, this can be written as: 13933 // 13934 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) = 13935 // Mh(x) * Mv(y) * P(x,y) + 13936 // Mh(x) * Cv(y) * Pabove(x,y) + 13937 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y) 13938 // 13939 // Where : 13940 // 13941 // Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y) 13942 // Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y) 13943 // 13944 // This function computes 'wsrc' and 'mask' as: 13945 // 13946 // wsrc(x, y) = 13947 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) - 13948 // Mh(x) * Cv(y) * Pabove(x,y) + 13949 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y) 13950 // 13951 // mask(x, y) = Mh(x) * Mv(y) 13952 // 13953 // These can then be used to efficiently approximate the error for any 13954 // predictor P in the context of the provided neighbouring predictors by 13955 // computing: 13956 // 13957 // error(x, y) = 13958 // wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2) 13959 // 13960 static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x, 13961 const MACROBLOCKD *xd, int mi_row, 13962 int mi_col, const uint8_t *above, 13963 int above_stride, const uint8_t *left, 13964 int left_stride) { 13965 const BLOCK_SIZE bsize = xd->mi[0]->sb_type; 13966 const int bw = xd->n4_w << MI_SIZE_LOG2; 13967 const int bh = xd->n4_h << MI_SIZE_LOG2; 13968 int32_t *mask_buf = x->mask_buf; 13969 int32_t *wsrc_buf = x->wsrc_buf; 13970 13971 const int is_hbd = is_cur_buf_hbd(xd); 13972 const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA; 13973 13974 // plane 0 should not be subsampled 13975 assert(xd->plane[0].subsampling_x == 0); 13976 assert(xd->plane[0].subsampling_y == 0); 13977 13978 av1_zero_array(wsrc_buf, bw * bh); 13979 for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA; 13980 13981 // handle above row 13982 if (xd->up_available) { 13983 const int overlap = 13984 AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1; 13985 struct calc_target_weighted_pred_ctxt ctxt = { x, above, above_stride, 13986 overlap }; 13987 foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd, mi_col, 13988 max_neighbor_obmc[mi_size_wide_log2[bsize]], 13989 calc_target_weighted_pred_above, &ctxt); 13990 } 13991 13992 for (int i = 0; i < bw * bh; ++i) { 13993 wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA; 13994 mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA; 13995 } 13996 13997 // handle left column 13998 if (xd->left_available) { 13999 const int overlap = 14000 AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1; 14001 struct calc_target_weighted_pred_ctxt ctxt = { x, left, left_stride, 14002 overlap }; 14003 foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd, mi_row, 14004 max_neighbor_obmc[mi_size_high_log2[bsize]], 14005 calc_target_weighted_pred_left, &ctxt); 14006 } 14007 14008 if (!is_hbd) { 14009 const uint8_t *src = x->plane[0].src.buf; 14010 14011 for (int row = 0; row < bh; ++row) { 14012 for (int col = 0; col < bw; ++col) { 14013 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col]; 14014 } 14015 wsrc_buf += bw; 14016 src += x->plane[0].src.stride; 14017 } 14018 } else { 14019 const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf); 14020 14021 for (int row = 0; row < bh; ++row) { 14022 for (int col = 0; col < bw; ++col) { 14023 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col]; 14024 } 14025 wsrc_buf += bw; 14026 src += x->plane[0].src.stride; 14027 } 14028 } 14029 } 14030 14031 /* Use standard 3x3 Sobel matrix. Macro so it can be used for either high or 14032 low bit-depth arrays. */ 14033 #define SOBEL_X(src, stride, i, j) \ 14034 ((src)[((i)-1) + (stride) * ((j)-1)] - \ 14035 (src)[((i) + 1) + (stride) * ((j)-1)] + /* NOLINT */ \ 14036 2 * (src)[((i)-1) + (stride) * (j)] - /* NOLINT */ \ 14037 2 * (src)[((i) + 1) + (stride) * (j)] + /* NOLINT */ \ 14038 (src)[((i)-1) + (stride) * ((j) + 1)] - /* NOLINT */ \ 14039 (src)[((i) + 1) + (stride) * ((j) + 1)]) /* NOLINT */ 14040 #define SOBEL_Y(src, stride, i, j) \ 14041 ((src)[((i)-1) + (stride) * ((j)-1)] + \ 14042 2 * (src)[(i) + (stride) * ((j)-1)] + /* NOLINT */ \ 14043 (src)[((i) + 1) + (stride) * ((j)-1)] - /* NOLINT */ \ 14044 (src)[((i)-1) + (stride) * ((j) + 1)] - /* NOLINT */ \ 14045 2 * (src)[(i) + (stride) * ((j) + 1)] - /* NOLINT */ \ 14046 (src)[((i) + 1) + (stride) * ((j) + 1)]) /* NOLINT */ 14047 14048 sobel_xy sobel(const uint8_t *input, int stride, int i, int j, bool high_bd) { 14049 int16_t s_x; 14050 int16_t s_y; 14051 if (high_bd) { 14052 const uint16_t *src = CONVERT_TO_SHORTPTR(input); 14053 s_x = SOBEL_X(src, stride, i, j); 14054 s_y = SOBEL_Y(src, stride, i, j); 14055 } else { 14056 s_x = SOBEL_X(input, stride, i, j); 14057 s_y = SOBEL_Y(input, stride, i, j); 14058 } 14059 sobel_xy r = { .x = s_x, .y = s_y }; 14060 return r; 14061 } 14062 14063 // 8-tap Gaussian convolution filter with sigma = 1.3, sums to 128, 14064 // all co-efficients must be even. 14065 DECLARE_ALIGNED(16, static const int16_t, gauss_filter[8]) = { 2, 12, 30, 40, 14066 30, 12, 2, 0 }; 14067 14068 void gaussian_blur(const uint8_t *src, int src_stride, int w, int h, 14069 uint8_t *dst, bool high_bd, int bd) { 14070 ConvolveParams conv_params = get_conv_params(0, 0, bd); 14071 InterpFilterParams filter = { .filter_ptr = gauss_filter, 14072 .taps = 8, 14073 .subpel_shifts = 0, 14074 .interp_filter = EIGHTTAP_REGULAR }; 14075 // Requirements from the vector-optimized implementations. 14076 assert(h % 4 == 0); 14077 assert(w % 8 == 0); 14078 // Because we use an eight tap filter, the stride should be at least 7 + w. 14079 assert(src_stride >= w + 7); 14080 if (high_bd) { 14081 av1_highbd_convolve_2d_sr(CONVERT_TO_SHORTPTR(src), src_stride, 14082 CONVERT_TO_SHORTPTR(dst), w, w, h, &filter, 14083 &filter, 0, 0, &conv_params, bd); 14084 } else { 14085 av1_convolve_2d_sr(src, src_stride, dst, w, w, h, &filter, &filter, 0, 0, 14086 &conv_params); 14087 } 14088 } 14089 14090 static EdgeInfo edge_probability(const uint8_t *input, int w, int h, 14091 bool high_bd, int bd) { 14092 // The probability of an edge in the whole image is the same as the highest 14093 // probability of an edge for any individual pixel. Use Sobel as the metric 14094 // for finding an edge. 14095 uint16_t highest = 0; 14096 uint16_t highest_x = 0; 14097 uint16_t highest_y = 0; 14098 // Ignore the 1 pixel border around the image for the computation. 14099 for (int j = 1; j < h - 1; ++j) { 14100 for (int i = 1; i < w - 1; ++i) { 14101 sobel_xy g = sobel(input, w, i, j, high_bd); 14102 // Scale down to 8-bit to get same output regardless of bit depth. 14103 int16_t g_x = g.x >> (bd - 8); 14104 int16_t g_y = g.y >> (bd - 8); 14105 uint16_t magnitude = (uint16_t)sqrt(g_x * g_x + g_y * g_y); 14106 highest = AOMMAX(highest, magnitude); 14107 highest_x = AOMMAX(highest_x, g_x); 14108 highest_y = AOMMAX(highest_y, g_y); 14109 } 14110 } 14111 EdgeInfo ei = { .magnitude = highest, .x = highest_x, .y = highest_y }; 14112 return ei; 14113 } 14114 14115 /* Uses most of the Canny edge detection algorithm to find if there are any 14116 * edges in the image. 14117 */ 14118 EdgeInfo av1_edge_exists(const uint8_t *src, int src_stride, int w, int h, 14119 bool high_bd, int bd) { 14120 if (w < 3 || h < 3) { 14121 EdgeInfo n = { .magnitude = 0, .x = 0, .y = 0 }; 14122 return n; 14123 } 14124 uint8_t *blurred; 14125 if (high_bd) { 14126 blurred = CONVERT_TO_BYTEPTR(aom_memalign(32, sizeof(uint16_t) * w * h)); 14127 } else { 14128 blurred = (uint8_t *)aom_memalign(32, sizeof(uint8_t) * w * h); 14129 } 14130 gaussian_blur(src, src_stride, w, h, blurred, high_bd, bd); 14131 // Skip the non-maximum suppression step in Canny edge detection. We just 14132 // want a probability of an edge existing in the buffer, which is determined 14133 // by the strongest edge in it -- we don't need to eliminate the weaker 14134 // edges. Use Sobel for the edge detection. 14135 EdgeInfo prob = edge_probability(blurred, w, h, high_bd, bd); 14136 if (high_bd) { 14137 aom_free(CONVERT_TO_SHORTPTR(blurred)); 14138 } else { 14139 aom_free(blurred); 14140 } 14141 return prob; 14142 } 14143