1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #ifndef AOM_AV1_ENCODER_BLOCK_H_ 13 #define AOM_AV1_ENCODER_BLOCK_H_ 14 15 #include "av1/common/entropymv.h" 16 #include "av1/common/entropy.h" 17 #include "av1/common/mvref_common.h" 18 #include "av1/encoder/hash.h" 19 #if CONFIG_DIST_8X8 20 #include "aom/aomcx.h" 21 #endif 22 23 #ifdef __cplusplus 24 extern "C" { 25 #endif 26 27 typedef struct { 28 unsigned int sse; 29 int sum; 30 unsigned int var; 31 } DIFF; 32 33 typedef struct macroblock_plane { 34 DECLARE_ALIGNED(16, int16_t, src_diff[MAX_SB_SQUARE]); 35 tran_low_t *qcoeff; 36 tran_low_t *coeff; 37 uint16_t *eobs; 38 uint8_t *txb_entropy_ctx; 39 struct buf_2d src; 40 41 // Quantizer setings 42 // These are used/accessed only in the quantization process 43 // RDO does not / must not depend on any of these values 44 // All values below share the coefficient scale/shift used in TX 45 const int16_t *quant_fp_QTX; 46 const int16_t *round_fp_QTX; 47 const int16_t *quant_QTX; 48 const int16_t *quant_shift_QTX; 49 const int16_t *zbin_QTX; 50 const int16_t *round_QTX; 51 const int16_t *dequant_QTX; 52 } MACROBLOCK_PLANE; 53 54 typedef struct { 55 int txb_skip_cost[TXB_SKIP_CONTEXTS][2]; 56 int base_eob_cost[SIG_COEF_CONTEXTS_EOB][3]; 57 int base_cost[SIG_COEF_CONTEXTS][8]; 58 int eob_extra_cost[EOB_COEF_CONTEXTS][2]; 59 int dc_sign_cost[DC_SIGN_CONTEXTS][2]; 60 int lps_cost[LEVEL_CONTEXTS][COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1]; 61 } LV_MAP_COEFF_COST; 62 63 typedef struct { 64 int eob_cost[2][11]; 65 } LV_MAP_EOB_COST; 66 67 typedef struct { 68 tran_low_t tcoeff[MAX_MB_PLANE][MAX_SB_SQUARE]; 69 uint16_t eobs[MAX_MB_PLANE][MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; 70 uint8_t txb_skip_ctx[MAX_MB_PLANE] 71 [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; 72 int dc_sign_ctx[MAX_MB_PLANE] 73 [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; 74 } CB_COEFF_BUFFER; 75 76 typedef struct { 77 // TODO(angiebird): Reduce the buffer size according to sb_type 78 CB_COEFF_BUFFER *cb_coef_buff; 79 CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE]; 80 int_mv global_mvs[REF_FRAMES]; 81 int cb_offset; 82 int16_t mode_context[MODE_CTX_REF_FRAMES]; 83 uint8_t ref_mv_count[MODE_CTX_REF_FRAMES]; 84 } MB_MODE_INFO_EXT; 85 86 typedef struct { 87 int col_min; 88 int col_max; 89 int row_min; 90 int row_max; 91 } MvLimits; 92 93 typedef struct { 94 uint8_t best_palette_color_map[MAX_PALETTE_SQUARE]; 95 int kmeans_data_buf[2 * MAX_PALETTE_SQUARE]; 96 } PALETTE_BUFFER; 97 98 typedef struct { 99 TX_SIZE tx_size; 100 TX_SIZE inter_tx_size[INTER_TX_SIZE_BUF_LEN]; 101 uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; 102 TX_TYPE txk_type[TXK_TYPE_BUF_LEN]; 103 RD_STATS rd_stats; 104 uint32_t hash_value; 105 } MB_RD_INFO; 106 107 #define RD_RECORD_BUFFER_LEN 8 108 typedef struct { 109 MB_RD_INFO tx_rd_info[RD_RECORD_BUFFER_LEN]; // Circular buffer. 110 int index_start; 111 int num; 112 CRC32C crc_calculator; // Hash function. 113 } MB_RD_RECORD; 114 115 typedef struct { 116 int64_t dist; 117 int64_t sse; 118 int rate; 119 uint16_t eob; 120 TX_TYPE tx_type; 121 uint16_t entropy_context; 122 uint8_t txb_entropy_ctx; 123 uint8_t valid; 124 uint8_t fast; // This is not being used now. 125 } TXB_RD_INFO; 126 127 #define TX_SIZE_RD_RECORD_BUFFER_LEN 256 128 typedef struct { 129 uint32_t hash_vals[TX_SIZE_RD_RECORD_BUFFER_LEN]; 130 TXB_RD_INFO tx_rd_info[TX_SIZE_RD_RECORD_BUFFER_LEN]; 131 int index_start; 132 int num; 133 } TXB_RD_RECORD; 134 135 typedef struct tx_size_rd_info_node { 136 TXB_RD_INFO *rd_info_array; // Points to array of size TX_TYPES. 137 struct tx_size_rd_info_node *children[4]; 138 } TXB_RD_INFO_NODE; 139 140 // Simple translation rd state for prune_comp_search_by_single_result 141 typedef struct { 142 RD_STATS rd_stats; 143 RD_STATS rd_stats_y; 144 RD_STATS rd_stats_uv; 145 uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; 146 uint8_t skip; 147 uint8_t disable_skip; 148 uint8_t early_skipped; 149 } SimpleRDState; 150 151 // 4: NEAREST, NEW, NEAR, GLOBAL 152 #define SINGLE_REF_MODES ((REF_FRAMES - 1) * 4) 153 154 // Region size for mode decision sampling in the first pass of partition 155 // search(two_pass_partition_search speed feature), in units of mi size(4). 156 // Used by the mode pruning in two_pass_partition_search feature. 157 #define FIRST_PARTITION_PASS_SAMPLE_REGION 8 158 #define FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2 3 159 #define FIRST_PARTITION_PASS_STATS_TABLES \ 160 (MAX_MIB_SIZE >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2) * \ 161 (MAX_MIB_SIZE >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2) 162 #define FIRST_PARTITION_PASS_STATS_STRIDE \ 163 (MAX_MIB_SIZE_LOG2 - FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2) 164 165 static INLINE int av1_first_partition_pass_stats_index(int mi_row, int mi_col) { 166 const int row = 167 (mi_row & MAX_MIB_MASK) >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2; 168 const int col = 169 (mi_col & MAX_MIB_MASK) >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2; 170 return (row << FIRST_PARTITION_PASS_STATS_STRIDE) + col; 171 } 172 173 typedef struct { 174 uint8_t ref0_counts[REF_FRAMES]; // Counters for ref_frame[0]. 175 uint8_t ref1_counts[REF_FRAMES]; // Counters for ref_frame[1]. 176 int sample_counts; // Number of samples collected. 177 uint8_t interintra_motion_mode_count[REF_FRAMES]; // Counter for interintra 178 // motion mode 179 } FIRST_PARTITION_PASS_STATS; 180 181 #define MAX_INTERP_FILTER_STATS 64 182 typedef struct { 183 InterpFilters filters; 184 int_mv mv[2]; 185 int8_t ref_frames[2]; 186 COMPOUND_TYPE comp_type; 187 int64_t rd; 188 int skip_txfm_sb; 189 int64_t skip_sse_sb; 190 unsigned int pred_sse; 191 } INTERPOLATION_FILTER_STATS; 192 193 #define MAX_COMP_RD_STATS 64 194 typedef struct { 195 int32_t rate[COMPOUND_TYPES]; 196 int64_t dist[COMPOUND_TYPES]; 197 int64_t comp_model_rd[COMPOUND_TYPES]; 198 int_mv mv[2]; 199 MV_REFERENCE_FRAME ref_frames[2]; 200 PREDICTION_MODE mode; 201 InterpFilters filter; 202 int ref_mv_idx; 203 int is_global[2]; 204 } COMP_RD_STATS; 205 206 struct inter_modes_info; 207 typedef struct macroblock MACROBLOCK; 208 struct macroblock { 209 struct macroblock_plane plane[MAX_MB_PLANE]; 210 211 // Determine if one would go with reduced complexity transform block 212 // search model to select prediction modes, or full complexity model 213 // to select transform kernel. 214 int rd_model; 215 216 // Indicate if the encoder is running in the first pass partition search. 217 // In that case, apply certain speed features therein to reduce the overhead 218 // cost in the first pass search. 219 int cb_partition_scan; 220 221 FIRST_PARTITION_PASS_STATS 222 first_partition_pass_stats[FIRST_PARTITION_PASS_STATS_TABLES]; 223 224 // [comp_idx][saved stat_idx] 225 INTERPOLATION_FILTER_STATS interp_filter_stats[2][MAX_INTERP_FILTER_STATS]; 226 int interp_filter_stats_idx[2]; 227 228 // prune_comp_search_by_single_result (3:MAX_REF_MV_SERCH) 229 SimpleRDState simple_rd_state[SINGLE_REF_MODES][3]; 230 231 // Activate constrained coding block partition search range. 232 int use_cb_search_range; 233 234 // Inter macroblock RD search info. 235 MB_RD_RECORD mb_rd_record; 236 237 // Inter transform block RD search info. for square TX sizes. 238 TXB_RD_RECORD txb_rd_record_8X8[(MAX_MIB_SIZE >> 1) * (MAX_MIB_SIZE >> 1)]; 239 TXB_RD_RECORD txb_rd_record_16X16[(MAX_MIB_SIZE >> 2) * (MAX_MIB_SIZE >> 2)]; 240 TXB_RD_RECORD txb_rd_record_32X32[(MAX_MIB_SIZE >> 3) * (MAX_MIB_SIZE >> 3)]; 241 TXB_RD_RECORD txb_rd_record_64X64[(MAX_MIB_SIZE >> 4) * (MAX_MIB_SIZE >> 4)]; 242 243 // Intra transform block RD search info. for square TX sizes. 244 TXB_RD_RECORD txb_rd_record_intra; 245 246 MACROBLOCKD e_mbd; 247 MB_MODE_INFO_EXT *mbmi_ext; 248 int skip_block; 249 int qindex; 250 251 // The equivalent error at the current rdmult of one whole bit (not one 252 // bitcost unit). 253 int errorperbit; 254 // The equivalend SAD error of one (whole) bit at the current quantizer 255 // for large blocks. 256 int sadperbit16; 257 // The equivalend SAD error of one (whole) bit at the current quantizer 258 // for sub-8x8 blocks. 259 int sadperbit4; 260 int rdmult; 261 int cb_rdmult; 262 int mb_energy; 263 int sb_energy_level; 264 int *m_search_count_ptr; 265 int *ex_search_count_ptr; 266 267 unsigned int txb_split_count; 268 #if CONFIG_SPEED_STATS 269 unsigned int tx_search_count; 270 #endif // CONFIG_SPEED_STATS 271 272 // These are set to their default values at the beginning, and then adjusted 273 // further in the encoding process. 274 BLOCK_SIZE min_partition_size; 275 BLOCK_SIZE max_partition_size; 276 277 unsigned int max_mv_context[REF_FRAMES]; 278 unsigned int source_variance; 279 unsigned int simple_motion_pred_sse; 280 unsigned int pred_sse[REF_FRAMES]; 281 int pred_mv_sad[REF_FRAMES]; 282 283 int nmv_vec_cost[MV_JOINTS]; 284 int *nmvcost[2]; 285 int *nmvcost_hp[2]; 286 int **mv_cost_stack; 287 288 int32_t *wsrc_buf; 289 int32_t *mask_buf; 290 uint8_t *above_pred_buf; 291 uint8_t *left_pred_buf; 292 293 PALETTE_BUFFER *palette_buffer; 294 295 CONV_BUF_TYPE *tmp_conv_dst; 296 uint8_t *tmp_obmc_bufs[2]; 297 298 FRAME_CONTEXT *row_ctx; 299 // This context will be used to update color_map_cdf pointer which would be 300 // used during pack bitstream. For single thread and tile-multithreading case 301 // this ponter will be same as xd->tile_ctx, but for the case of row-mt: 302 // xd->tile_ctx will point to a temporary context while tile_pb_ctx will point 303 // to the accurate tile context. 304 FRAME_CONTEXT *tile_pb_ctx; 305 306 struct inter_modes_info *inter_modes_info; 307 308 // buffer for hash value calculation of a block 309 // used only in av1_get_block_hash_value() 310 // [first hash/second hash] 311 // [two buffers used ping-pong] 312 uint32_t *hash_value_buffer[2][2]; 313 314 CRC_CALCULATOR crc_calculator1; 315 CRC_CALCULATOR crc_calculator2; 316 int g_crc_initialized; 317 318 // These define limits to motion vector components to prevent them 319 // from extending outside the UMV borders 320 MvLimits mv_limits; 321 322 uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; 323 324 int skip; 325 int skip_chroma_rd; 326 int skip_cost[SKIP_CONTEXTS][2]; 327 328 int skip_mode; // 0: off; 1: on 329 int skip_mode_cost[SKIP_CONTEXTS][2]; 330 331 int compound_idx; 332 333 LV_MAP_COEFF_COST coeff_costs[TX_SIZES][PLANE_TYPES]; 334 LV_MAP_EOB_COST eob_costs[7][2]; 335 uint16_t cb_offset; 336 337 // mode costs 338 int intra_inter_cost[INTRA_INTER_CONTEXTS][2]; 339 340 int mbmode_cost[BLOCK_SIZE_GROUPS][INTRA_MODES]; 341 int newmv_mode_cost[NEWMV_MODE_CONTEXTS][2]; 342 int zeromv_mode_cost[GLOBALMV_MODE_CONTEXTS][2]; 343 int refmv_mode_cost[REFMV_MODE_CONTEXTS][2]; 344 int drl_mode_cost0[DRL_MODE_CONTEXTS][2]; 345 346 int comp_inter_cost[COMP_INTER_CONTEXTS][2]; 347 int single_ref_cost[REF_CONTEXTS][SINGLE_REFS - 1][2]; 348 int comp_ref_type_cost[COMP_REF_TYPE_CONTEXTS] 349 [CDF_SIZE(COMP_REFERENCE_TYPES)]; 350 int uni_comp_ref_cost[UNI_COMP_REF_CONTEXTS][UNIDIR_COMP_REFS - 1] 351 [CDF_SIZE(2)]; 352 // Cost for signaling ref_frame[0] (LAST_FRAME, LAST2_FRAME, LAST3_FRAME or 353 // GOLDEN_FRAME) in bidir-comp mode. 354 int comp_ref_cost[REF_CONTEXTS][FWD_REFS - 1][2]; 355 // Cost for signaling ref_frame[1] (ALTREF_FRAME, ALTREF2_FRAME, or 356 // BWDREF_FRAME) in bidir-comp mode. 357 int comp_bwdref_cost[REF_CONTEXTS][BWD_REFS - 1][2]; 358 int inter_compound_mode_cost[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES]; 359 int compound_type_cost[BLOCK_SIZES_ALL][MASKED_COMPOUND_TYPES]; 360 int wedge_idx_cost[BLOCK_SIZES_ALL][16]; 361 int interintra_cost[BLOCK_SIZE_GROUPS][2]; 362 int wedge_interintra_cost[BLOCK_SIZES_ALL][2]; 363 int interintra_mode_cost[BLOCK_SIZE_GROUPS][INTERINTRA_MODES]; 364 int motion_mode_cost[BLOCK_SIZES_ALL][MOTION_MODES]; 365 int motion_mode_cost1[BLOCK_SIZES_ALL][2]; 366 int intra_uv_mode_cost[CFL_ALLOWED_TYPES][INTRA_MODES][UV_INTRA_MODES]; 367 int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES]; 368 int filter_intra_cost[BLOCK_SIZES_ALL][2]; 369 int filter_intra_mode_cost[FILTER_INTRA_MODES]; 370 int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS]; 371 int partition_cost[PARTITION_CONTEXTS][EXT_PARTITION_TYPES]; 372 int palette_y_size_cost[PALATTE_BSIZE_CTXS][PALETTE_SIZES]; 373 int palette_uv_size_cost[PALATTE_BSIZE_CTXS][PALETTE_SIZES]; 374 int palette_y_color_cost[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS] 375 [PALETTE_COLORS]; 376 int palette_uv_color_cost[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS] 377 [PALETTE_COLORS]; 378 int palette_y_mode_cost[PALATTE_BSIZE_CTXS][PALETTE_Y_MODE_CONTEXTS][2]; 379 int palette_uv_mode_cost[PALETTE_UV_MODE_CONTEXTS][2]; 380 // The rate associated with each alpha codeword 381 int cfl_cost[CFL_JOINT_SIGNS][CFL_PRED_PLANES][CFL_ALPHABET_SIZE]; 382 int tx_size_cost[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES]; 383 int txfm_partition_cost[TXFM_PARTITION_CONTEXTS][2]; 384 int inter_tx_type_costs[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES]; 385 int intra_tx_type_costs[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES] 386 [TX_TYPES]; 387 int angle_delta_cost[DIRECTIONAL_MODES][2 * MAX_ANGLE_DELTA + 1]; 388 int switchable_restore_cost[RESTORE_SWITCHABLE_TYPES]; 389 int wiener_restore_cost[2]; 390 int sgrproj_restore_cost[2]; 391 int intrabc_cost[2]; 392 393 // Used to store sub partition's choices. 394 MV pred_mv[REF_FRAMES]; 395 396 // Store the best motion vector during motion search 397 int_mv best_mv; 398 // Store the second best motion vector during full-pixel motion search 399 int_mv second_best_mv; 400 401 // Store the fractional best motion vector during sub/Qpel-pixel motion search 402 int_mv fractional_best_mv[3]; 403 404 // Ref frames that are selected by square partition blocks within a super- 405 // block, in MI resolution. They can be used to prune ref frames for 406 // rectangular blocks. 407 int picked_ref_frames_mask[32 * 32]; 408 409 // use default transform and skip transform type search for intra modes 410 int use_default_intra_tx_type; 411 // use default transform and skip transform type search for inter modes 412 int use_default_inter_tx_type; 413 #if CONFIG_DIST_8X8 414 int using_dist_8x8; 415 aom_tune_metric tune_metric; 416 #endif // CONFIG_DIST_8X8 417 int comp_idx_cost[COMP_INDEX_CONTEXTS][2]; 418 int comp_group_idx_cost[COMP_GROUP_IDX_CONTEXTS][2]; 419 // Bit flags for pruning tx type search, tx split, etc. 420 int tx_search_prune[EXT_TX_SET_TYPES]; 421 int must_find_valid_partition; 422 int tx_split_prune_flag; // Flag to skip tx split RD search. 423 int recalc_luma_mc_data; // Flag to indicate recalculation of MC data during 424 // interpolation filter search 425 // The likelihood of an edge existing in the block (using partial Canny edge 426 // detection). For reference, 556 is the value returned for a solid 427 // vertical black/white edge. 428 uint16_t edge_strength; 429 // The strongest edge strength seen along the x/y axis. 430 uint16_t edge_strength_x; 431 uint16_t edge_strength_y; 432 433 // [Saved stat index] 434 COMP_RD_STATS comp_rd_stats[MAX_COMP_RD_STATS]; 435 int comp_rd_stats_idx; 436 }; 437 438 static INLINE int is_rect_tx_allowed_bsize(BLOCK_SIZE bsize) { 439 static const char LUT[BLOCK_SIZES_ALL] = { 440 0, // BLOCK_4X4 441 1, // BLOCK_4X8 442 1, // BLOCK_8X4 443 0, // BLOCK_8X8 444 1, // BLOCK_8X16 445 1, // BLOCK_16X8 446 0, // BLOCK_16X16 447 1, // BLOCK_16X32 448 1, // BLOCK_32X16 449 0, // BLOCK_32X32 450 1, // BLOCK_32X64 451 1, // BLOCK_64X32 452 0, // BLOCK_64X64 453 0, // BLOCK_64X128 454 0, // BLOCK_128X64 455 0, // BLOCK_128X128 456 1, // BLOCK_4X16 457 1, // BLOCK_16X4 458 1, // BLOCK_8X32 459 1, // BLOCK_32X8 460 1, // BLOCK_16X64 461 1, // BLOCK_64X16 462 }; 463 464 return LUT[bsize]; 465 } 466 467 static INLINE int is_rect_tx_allowed(const MACROBLOCKD *xd, 468 const MB_MODE_INFO *mbmi) { 469 return is_rect_tx_allowed_bsize(mbmi->sb_type) && 470 !xd->lossless[mbmi->segment_id]; 471 } 472 473 static INLINE int tx_size_to_depth(TX_SIZE tx_size, BLOCK_SIZE bsize) { 474 TX_SIZE ctx_size = max_txsize_rect_lookup[bsize]; 475 int depth = 0; 476 while (tx_size != ctx_size) { 477 depth++; 478 ctx_size = sub_tx_size_map[ctx_size]; 479 assert(depth <= MAX_TX_DEPTH); 480 } 481 return depth; 482 } 483 484 static INLINE void set_blk_skip(MACROBLOCK *x, int plane, int blk_idx, 485 int skip) { 486 if (skip) 487 x->blk_skip[blk_idx] |= 1UL << plane; 488 else 489 x->blk_skip[blk_idx] &= ~(1UL << plane); 490 #ifndef NDEBUG 491 // Set chroma planes to uninitialized states when luma is set to check if 492 // it will be set later 493 if (plane == 0) { 494 x->blk_skip[blk_idx] |= 1UL << (1 + 4); 495 x->blk_skip[blk_idx] |= 1UL << (2 + 4); 496 } 497 498 // Clear the initialization checking bit 499 x->blk_skip[blk_idx] &= ~(1UL << (plane + 4)); 500 #endif 501 } 502 503 static INLINE int is_blk_skip(MACROBLOCK *x, int plane, int blk_idx) { 504 #ifndef NDEBUG 505 // Check if this is initialized 506 assert(!(x->blk_skip[blk_idx] & (1UL << (plane + 4)))); 507 508 // The magic number is 0x77, this is to test if there is garbage data 509 assert((x->blk_skip[blk_idx] & 0x88) == 0); 510 #endif 511 return (x->blk_skip[blk_idx] >> plane) & 1; 512 } 513 514 #ifdef __cplusplus 515 } // extern "C" 516 #endif 517 518 #endif // AOM_AV1_ENCODER_BLOCK_H_ 519