1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #ifndef AOM_AV1_ENCODER_RD_H_ 13 #define AOM_AV1_ENCODER_RD_H_ 14 15 #include <limits.h> 16 17 #include "av1/common/blockd.h" 18 19 #include "av1/encoder/block.h" 20 #include "av1/encoder/context_tree.h" 21 #include "av1/encoder/cost.h" 22 23 #ifdef __cplusplus 24 extern "C" { 25 #endif 26 27 #define RDDIV_BITS 7 28 #define RD_EPB_SHIFT 6 29 30 #define RDCOST(RM, R, D) \ 31 (ROUND_POWER_OF_TWO(((int64_t)(R)) * (RM), AV1_PROB_COST_SHIFT) + \ 32 ((D) * (1 << RDDIV_BITS))) 33 34 #define RDCOST_DBL(RM, R, D) \ 35 (((((double)(R)) * (RM)) / (double)(1 << AV1_PROB_COST_SHIFT)) + \ 36 ((double)(D) * (1 << RDDIV_BITS))) 37 38 #define QIDX_SKIP_THRESH 115 39 40 #define MV_COST_WEIGHT 108 41 #define MV_COST_WEIGHT_SUB 120 42 43 #define RD_THRESH_MAX_FACT 64 44 #define RD_THRESH_INC 1 45 46 // Factor to weigh the rate for switchable interp filters. 47 #define SWITCHABLE_INTERP_RATE_FACTOR 1 48 49 // This enumerator type needs to be kept aligned with the mode order in 50 // const MODE_DEFINITION av1_mode_order[MAX_MODES] used in the rd code. 51 enum { 52 THR_NEARESTMV, 53 THR_NEARESTL2, 54 THR_NEARESTL3, 55 THR_NEARESTB, 56 THR_NEARESTA2, 57 THR_NEARESTA, 58 THR_NEARESTG, 59 60 THR_NEWMV, 61 THR_NEWL2, 62 THR_NEWL3, 63 THR_NEWB, 64 THR_NEWA2, 65 THR_NEWA, 66 THR_NEWG, 67 68 THR_NEARMV, 69 THR_NEARL2, 70 THR_NEARL3, 71 THR_NEARB, 72 THR_NEARA2, 73 THR_NEARA, 74 THR_NEARG, 75 76 THR_GLOBALMV, 77 THR_GLOBALL2, 78 THR_GLOBALL3, 79 THR_GLOBALB, 80 THR_GLOBALA2, 81 THR_GLOBALA, 82 THR_GLOBALG, 83 84 THR_COMP_NEAREST_NEARESTLA, 85 THR_COMP_NEAREST_NEARESTL2A, 86 THR_COMP_NEAREST_NEARESTL3A, 87 THR_COMP_NEAREST_NEARESTGA, 88 THR_COMP_NEAREST_NEARESTLB, 89 THR_COMP_NEAREST_NEARESTL2B, 90 THR_COMP_NEAREST_NEARESTL3B, 91 THR_COMP_NEAREST_NEARESTGB, 92 THR_COMP_NEAREST_NEARESTLA2, 93 THR_COMP_NEAREST_NEARESTL2A2, 94 THR_COMP_NEAREST_NEARESTL3A2, 95 THR_COMP_NEAREST_NEARESTGA2, 96 THR_COMP_NEAREST_NEARESTLL2, 97 THR_COMP_NEAREST_NEARESTLL3, 98 THR_COMP_NEAREST_NEARESTLG, 99 THR_COMP_NEAREST_NEARESTBA, 100 101 THR_COMP_NEAR_NEARLA, 102 THR_COMP_NEW_NEARESTLA, 103 THR_COMP_NEAREST_NEWLA, 104 THR_COMP_NEW_NEARLA, 105 THR_COMP_NEAR_NEWLA, 106 THR_COMP_NEW_NEWLA, 107 THR_COMP_GLOBAL_GLOBALLA, 108 109 THR_COMP_NEAR_NEARL2A, 110 THR_COMP_NEW_NEARESTL2A, 111 THR_COMP_NEAREST_NEWL2A, 112 THR_COMP_NEW_NEARL2A, 113 THR_COMP_NEAR_NEWL2A, 114 THR_COMP_NEW_NEWL2A, 115 THR_COMP_GLOBAL_GLOBALL2A, 116 117 THR_COMP_NEAR_NEARL3A, 118 THR_COMP_NEW_NEARESTL3A, 119 THR_COMP_NEAREST_NEWL3A, 120 THR_COMP_NEW_NEARL3A, 121 THR_COMP_NEAR_NEWL3A, 122 THR_COMP_NEW_NEWL3A, 123 THR_COMP_GLOBAL_GLOBALL3A, 124 125 THR_COMP_NEAR_NEARGA, 126 THR_COMP_NEW_NEARESTGA, 127 THR_COMP_NEAREST_NEWGA, 128 THR_COMP_NEW_NEARGA, 129 THR_COMP_NEAR_NEWGA, 130 THR_COMP_NEW_NEWGA, 131 THR_COMP_GLOBAL_GLOBALGA, 132 133 THR_COMP_NEAR_NEARLB, 134 THR_COMP_NEW_NEARESTLB, 135 THR_COMP_NEAREST_NEWLB, 136 THR_COMP_NEW_NEARLB, 137 THR_COMP_NEAR_NEWLB, 138 THR_COMP_NEW_NEWLB, 139 THR_COMP_GLOBAL_GLOBALLB, 140 141 THR_COMP_NEAR_NEARL2B, 142 THR_COMP_NEW_NEARESTL2B, 143 THR_COMP_NEAREST_NEWL2B, 144 THR_COMP_NEW_NEARL2B, 145 THR_COMP_NEAR_NEWL2B, 146 THR_COMP_NEW_NEWL2B, 147 THR_COMP_GLOBAL_GLOBALL2B, 148 149 THR_COMP_NEAR_NEARL3B, 150 THR_COMP_NEW_NEARESTL3B, 151 THR_COMP_NEAREST_NEWL3B, 152 THR_COMP_NEW_NEARL3B, 153 THR_COMP_NEAR_NEWL3B, 154 THR_COMP_NEW_NEWL3B, 155 THR_COMP_GLOBAL_GLOBALL3B, 156 157 THR_COMP_NEAR_NEARGB, 158 THR_COMP_NEW_NEARESTGB, 159 THR_COMP_NEAREST_NEWGB, 160 THR_COMP_NEW_NEARGB, 161 THR_COMP_NEAR_NEWGB, 162 THR_COMP_NEW_NEWGB, 163 THR_COMP_GLOBAL_GLOBALGB, 164 165 THR_COMP_NEAR_NEARLA2, 166 THR_COMP_NEW_NEARESTLA2, 167 THR_COMP_NEAREST_NEWLA2, 168 THR_COMP_NEW_NEARLA2, 169 THR_COMP_NEAR_NEWLA2, 170 THR_COMP_NEW_NEWLA2, 171 THR_COMP_GLOBAL_GLOBALLA2, 172 173 THR_COMP_NEAR_NEARL2A2, 174 THR_COMP_NEW_NEARESTL2A2, 175 THR_COMP_NEAREST_NEWL2A2, 176 THR_COMP_NEW_NEARL2A2, 177 THR_COMP_NEAR_NEWL2A2, 178 THR_COMP_NEW_NEWL2A2, 179 THR_COMP_GLOBAL_GLOBALL2A2, 180 181 THR_COMP_NEAR_NEARL3A2, 182 THR_COMP_NEW_NEARESTL3A2, 183 THR_COMP_NEAREST_NEWL3A2, 184 THR_COMP_NEW_NEARL3A2, 185 THR_COMP_NEAR_NEWL3A2, 186 THR_COMP_NEW_NEWL3A2, 187 THR_COMP_GLOBAL_GLOBALL3A2, 188 189 THR_COMP_NEAR_NEARGA2, 190 THR_COMP_NEW_NEARESTGA2, 191 THR_COMP_NEAREST_NEWGA2, 192 THR_COMP_NEW_NEARGA2, 193 THR_COMP_NEAR_NEWGA2, 194 THR_COMP_NEW_NEWGA2, 195 THR_COMP_GLOBAL_GLOBALGA2, 196 197 THR_COMP_NEAR_NEARLL2, 198 THR_COMP_NEW_NEARESTLL2, 199 THR_COMP_NEAREST_NEWLL2, 200 THR_COMP_NEW_NEARLL2, 201 THR_COMP_NEAR_NEWLL2, 202 THR_COMP_NEW_NEWLL2, 203 THR_COMP_GLOBAL_GLOBALLL2, 204 205 THR_COMP_NEAR_NEARLL3, 206 THR_COMP_NEW_NEARESTLL3, 207 THR_COMP_NEAREST_NEWLL3, 208 THR_COMP_NEW_NEARLL3, 209 THR_COMP_NEAR_NEWLL3, 210 THR_COMP_NEW_NEWLL3, 211 THR_COMP_GLOBAL_GLOBALLL3, 212 213 THR_COMP_NEAR_NEARLG, 214 THR_COMP_NEW_NEARESTLG, 215 THR_COMP_NEAREST_NEWLG, 216 THR_COMP_NEW_NEARLG, 217 THR_COMP_NEAR_NEWLG, 218 THR_COMP_NEW_NEWLG, 219 THR_COMP_GLOBAL_GLOBALLG, 220 221 THR_COMP_NEAR_NEARBA, 222 THR_COMP_NEW_NEARESTBA, 223 THR_COMP_NEAREST_NEWBA, 224 THR_COMP_NEW_NEARBA, 225 THR_COMP_NEAR_NEWBA, 226 THR_COMP_NEW_NEWBA, 227 THR_COMP_GLOBAL_GLOBALBA, 228 229 THR_DC, 230 THR_PAETH, 231 THR_SMOOTH, 232 THR_SMOOTH_V, 233 THR_SMOOTH_H, 234 THR_H_PRED, 235 THR_V_PRED, 236 THR_D135_PRED, 237 THR_D203_PRED, 238 THR_D157_PRED, 239 THR_D67_PRED, 240 THR_D113_PRED, 241 THR_D45_PRED, 242 243 MAX_MODES, 244 245 LAST_SINGLE_REF_MODES = THR_GLOBALG, 246 MAX_SINGLE_REF_MODES = LAST_SINGLE_REF_MODES + 1, 247 LAST_COMP_REF_MODES = THR_COMP_GLOBAL_GLOBALBA, 248 MAX_COMP_REF_MODES = LAST_COMP_REF_MODES + 1 249 } UENUM1BYTE(THR_MODES); 250 251 enum { 252 THR_LAST, 253 THR_LAST2, 254 THR_LAST3, 255 THR_BWDR, 256 THR_ALTR2, 257 THR_GOLD, 258 THR_ALTR, 259 260 THR_COMP_LA, 261 THR_COMP_L2A, 262 THR_COMP_L3A, 263 THR_COMP_GA, 264 265 THR_COMP_LB, 266 THR_COMP_L2B, 267 THR_COMP_L3B, 268 THR_COMP_GB, 269 270 THR_COMP_LA2, 271 THR_COMP_L2A2, 272 THR_COMP_L3A2, 273 THR_COMP_GA2, 274 275 THR_INTRA, 276 277 MAX_REFS 278 } UENUM1BYTE(THR_MODES_SUB8X8); 279 280 typedef struct RD_OPT { 281 // Thresh_mult is used to set a threshold for the rd score. A higher value 282 // means that we will accept the best mode so far more often. This number 283 // is used in combination with the current block size, and thresh_freq_fact 284 // to pick a threshold. 285 int thresh_mult[MAX_MODES]; 286 287 int threshes[MAX_SEGMENTS][BLOCK_SIZES_ALL][MAX_MODES]; 288 289 int64_t prediction_type_threshes[REF_FRAMES][REFERENCE_MODES]; 290 291 int RDMULT; 292 293 double r0; 294 } RD_OPT; 295 296 static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) { 297 #if CONFIG_RD_DEBUG 298 int plane; 299 #endif 300 rd_stats->rate = 0; 301 rd_stats->dist = 0; 302 rd_stats->rdcost = 0; 303 rd_stats->sse = 0; 304 rd_stats->skip = 1; 305 rd_stats->zero_rate = 0; 306 rd_stats->invalid_rate = 0; 307 rd_stats->ref_rdcost = INT64_MAX; 308 #if CONFIG_RD_DEBUG 309 // This may run into problems when monochrome video is 310 // encoded, as there will only be 1 plane 311 for (plane = 0; plane < MAX_MB_PLANE; ++plane) { 312 rd_stats->txb_coeff_cost[plane] = 0; 313 { 314 int r, c; 315 for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r) 316 for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) 317 rd_stats->txb_coeff_cost_map[plane][r][c] = 0; 318 } 319 } 320 #endif 321 } 322 323 static INLINE void av1_invalid_rd_stats(RD_STATS *rd_stats) { 324 #if CONFIG_RD_DEBUG 325 int plane; 326 #endif 327 rd_stats->rate = INT_MAX; 328 rd_stats->dist = INT64_MAX; 329 rd_stats->rdcost = INT64_MAX; 330 rd_stats->sse = INT64_MAX; 331 rd_stats->skip = 0; 332 rd_stats->zero_rate = 0; 333 rd_stats->invalid_rate = 1; 334 rd_stats->ref_rdcost = INT64_MAX; 335 #if CONFIG_RD_DEBUG 336 // This may run into problems when monochrome video is 337 // encoded, as there will only be 1 plane 338 for (plane = 0; plane < MAX_MB_PLANE; ++plane) { 339 rd_stats->txb_coeff_cost[plane] = INT_MAX; 340 { 341 int r, c; 342 for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r) 343 for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) 344 rd_stats->txb_coeff_cost_map[plane][r][c] = INT_MAX; 345 } 346 } 347 #endif 348 } 349 350 static INLINE void av1_merge_rd_stats(RD_STATS *rd_stats_dst, 351 const RD_STATS *rd_stats_src) { 352 #if CONFIG_RD_DEBUG 353 int plane; 354 #endif 355 rd_stats_dst->rate += rd_stats_src->rate; 356 if (!rd_stats_dst->zero_rate) 357 rd_stats_dst->zero_rate = rd_stats_src->zero_rate; 358 rd_stats_dst->dist += rd_stats_src->dist; 359 rd_stats_dst->sse += rd_stats_src->sse; 360 rd_stats_dst->skip &= rd_stats_src->skip; 361 rd_stats_dst->invalid_rate &= rd_stats_src->invalid_rate; 362 #if CONFIG_RD_DEBUG 363 // This may run into problems when monochrome video is 364 // encoded, as there will only be 1 plane 365 for (plane = 0; plane < MAX_MB_PLANE; ++plane) { 366 rd_stats_dst->txb_coeff_cost[plane] += rd_stats_src->txb_coeff_cost[plane]; 367 { 368 // TODO(angiebird): optimize this part 369 int r, c; 370 int ref_txb_coeff_cost = 0; 371 for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r) 372 for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) { 373 rd_stats_dst->txb_coeff_cost_map[plane][r][c] += 374 rd_stats_src->txb_coeff_cost_map[plane][r][c]; 375 ref_txb_coeff_cost += rd_stats_dst->txb_coeff_cost_map[plane][r][c]; 376 } 377 assert(ref_txb_coeff_cost == rd_stats_dst->txb_coeff_cost[plane]); 378 } 379 } 380 #endif 381 } 382 383 struct TileInfo; 384 struct TileDataEnc; 385 struct AV1_COMP; 386 struct macroblock; 387 388 int av1_compute_rd_mult_based_on_qindex(const struct AV1_COMP *cpi, int qindex); 389 390 int av1_compute_rd_mult(const struct AV1_COMP *cpi, int qindex); 391 392 void av1_initialize_rd_consts(struct AV1_COMP *cpi); 393 394 void av1_initialize_cost_tables(const AV1_COMMON *const cm, MACROBLOCK *x); 395 396 void av1_initialize_me_consts(const struct AV1_COMP *cpi, MACROBLOCK *x, 397 int qindex); 398 399 void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n, 400 unsigned int qstep, int *rate, int64_t *dist); 401 402 void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr, 403 double *rate_f, double *distbysse_f); 404 void av1_model_rd_surffit(BLOCK_SIZE bsize, double sse_norm, double xm, 405 double yl, double *rate_f, double *distbysse_f); 406 407 int av1_get_switchable_rate(const AV1_COMMON *const cm, MACROBLOCK *x, 408 const MACROBLOCKD *xd); 409 410 int av1_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block, 411 int stride); 412 413 int16_t *av1_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block, 414 int16_t *base); 415 416 YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const struct AV1_COMP *cpi, 417 int ref_frame); 418 419 void av1_init_me_luts(void); 420 421 void av1_set_mvcost(MACROBLOCK *x, int ref, int ref_mv_idx); 422 423 void av1_get_entropy_contexts(BLOCK_SIZE bsize, 424 const struct macroblockd_plane *pd, 425 ENTROPY_CONTEXT t_above[MAX_MIB_SIZE], 426 ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]); 427 428 void av1_set_rd_speed_thresholds(struct AV1_COMP *cpi); 429 430 void av1_update_rd_thresh_fact(const AV1_COMMON *const cm, 431 int (*fact)[MAX_MODES], int rd_thresh, int bsize, 432 int best_mode_index); 433 434 static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh, 435 int thresh_fact) { 436 return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX; 437 } 438 439 void av1_mv_pred(const struct AV1_COMP *cpi, MACROBLOCK *x, 440 uint8_t *ref_y_buffer, int ref_y_stride, int ref_frame, 441 BLOCK_SIZE block_size); 442 443 static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) { 444 x->errorperbit = rdmult >> RD_EPB_SHIFT; 445 x->errorperbit += (x->errorperbit == 0); 446 } 447 448 void av1_setup_pred_block(const MACROBLOCKD *xd, 449 struct buf_2d dst[MAX_MB_PLANE], 450 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, 451 const struct scale_factors *scale, 452 const struct scale_factors *scale_uv, 453 const int num_planes); 454 455 int av1_get_intra_cost_penalty(int qindex, int qdelta, 456 aom_bit_depth_t bit_depth); 457 458 void av1_fill_mode_rates(AV1_COMMON *const cm, MACROBLOCK *x, 459 FRAME_CONTEXT *fc); 460 461 void av1_fill_coeff_costs(MACROBLOCK *x, FRAME_CONTEXT *fc, 462 const int num_planes); 463 464 int av1_get_adaptive_rdmult(const struct AV1_COMP *cpi, double beta); 465 466 #ifdef __cplusplus 467 } // extern "C" 468 #endif 469 470 #endif // AOM_AV1_ENCODER_RD_H_ 471