1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <limits.h> 13 #include <float.h> 14 #include <math.h> 15 #include <stdbool.h> 16 #include <stdio.h> 17 18 #include "config/aom_config.h" 19 #include "config/aom_dsp_rtcd.h" 20 #include "config/av1_rtcd.h" 21 22 #include "aom_dsp/aom_dsp_common.h" 23 #include "aom_dsp/binary_codes_writer.h" 24 #include "aom_ports/mem.h" 25 #include "aom_ports/aom_timer.h" 26 #include "aom_ports/system_state.h" 27 28 #if CONFIG_MISMATCH_DEBUG 29 #include "aom_util/debug_util.h" 30 #endif // CONFIG_MISMATCH_DEBUG 31 32 #include "av1/common/cfl.h" 33 #include "av1/common/common.h" 34 #include "av1/common/entropy.h" 35 #include "av1/common/entropymode.h" 36 #include "av1/common/idct.h" 37 #include "av1/common/mv.h" 38 #include "av1/common/mvref_common.h" 39 #include "av1/common/pred_common.h" 40 #include "av1/common/quant_common.h" 41 #include "av1/common/reconintra.h" 42 #include "av1/common/reconinter.h" 43 #include "av1/common/seg_common.h" 44 #include "av1/common/tile_common.h" 45 #include "av1/common/warped_motion.h" 46 47 #include "av1/encoder/aq_complexity.h" 48 #include "av1/encoder/aq_cyclicrefresh.h" 49 #include "av1/encoder/aq_variance.h" 50 #include "av1/encoder/global_motion.h" 51 #include "av1/encoder/encodeframe.h" 52 #include "av1/encoder/encodemb.h" 53 #include "av1/encoder/encodemv.h" 54 #include "av1/encoder/encodetxb.h" 55 #include "av1/encoder/ethread.h" 56 #include "av1/encoder/extend.h" 57 #include "av1/encoder/ml.h" 58 #include "av1/encoder/partition_strategy.h" 59 #include "av1/encoder/partition_model_weights.h" 60 #include "av1/encoder/rd.h" 61 #include "av1/encoder/rdopt.h" 62 #include "av1/encoder/reconinter_enc.h" 63 #include "av1/encoder/segmentation.h" 64 #include "av1/encoder/tokenize.h" 65 #include "av1/encoder/var_based_part.h" 66 67 static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data, 68 ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run, 69 int mi_row, int mi_col, BLOCK_SIZE bsize, 70 int *rate); 71 static int ml_predict_breakout(const AV1_COMP *const cpi, BLOCK_SIZE bsize, 72 const MACROBLOCK *const x, 73 const RD_STATS *const rd_stats, 74 unsigned int pb_source_variance); 75 76 // This is used as a reference when computing the source variance for the 77 // purposes of activity masking. 78 // Eventually this should be replaced by custom no-reference routines, 79 // which will be faster. 80 const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = { 81 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 82 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 83 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 84 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 85 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 86 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 87 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 88 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 89 128, 128, 128, 128, 128, 128, 128, 128 90 }; 91 92 static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = { 93 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 94 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 95 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 96 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 97 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 98 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 99 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 100 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 101 128, 128, 128, 128, 128, 128, 128, 128 102 }; 103 104 static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = { 105 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 106 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 107 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 108 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 109 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 110 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 111 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 112 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 113 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 114 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 115 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 116 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 117 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 118 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 119 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 120 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4 121 }; 122 123 static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = { 124 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 125 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 126 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 127 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 129 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 130 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 131 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 132 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 133 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 134 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 135 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 136 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 137 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 138 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 139 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 140 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 141 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 142 128 * 16, 128 * 16 143 }; 144 145 unsigned int av1_get_sby_perpixel_variance(const AV1_COMP *cpi, 146 const struct buf_2d *ref, 147 BLOCK_SIZE bs) { 148 unsigned int sse; 149 const unsigned int var = 150 cpi->fn_ptr[bs].vf(ref->buf, ref->stride, AV1_VAR_OFFS, 0, &sse); 151 return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); 152 } 153 154 unsigned int av1_high_get_sby_perpixel_variance(const AV1_COMP *cpi, 155 const struct buf_2d *ref, 156 BLOCK_SIZE bs, int bd) { 157 unsigned int var, sse; 158 switch (bd) { 159 case 10: 160 var = 161 cpi->fn_ptr[bs].vf(ref->buf, ref->stride, 162 CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_10), 0, &sse); 163 break; 164 case 12: 165 var = 166 cpi->fn_ptr[bs].vf(ref->buf, ref->stride, 167 CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_12), 0, &sse); 168 break; 169 case 8: 170 default: 171 var = 172 cpi->fn_ptr[bs].vf(ref->buf, ref->stride, 173 CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_8), 0, &sse); 174 break; 175 } 176 return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); 177 } 178 179 static unsigned int get_sby_perpixel_diff_variance(const AV1_COMP *const cpi, 180 const struct buf_2d *ref, 181 int mi_row, int mi_col, 182 BLOCK_SIZE bs) { 183 unsigned int sse, var; 184 uint8_t *last_y; 185 const YV12_BUFFER_CONFIG *last = 186 get_ref_frame_yv12_buf(&cpi->common, LAST_FRAME); 187 188 assert(last != NULL); 189 last_y = 190 &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE]; 191 var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse); 192 return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); 193 } 194 195 static BLOCK_SIZE get_rd_var_based_fixed_partition(AV1_COMP *cpi, MACROBLOCK *x, 196 int mi_row, int mi_col) { 197 unsigned int var = get_sby_perpixel_diff_variance( 198 cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64); 199 if (var < 8) 200 return BLOCK_64X64; 201 else if (var < 128) 202 return BLOCK_32X32; 203 else if (var < 2048) 204 return BLOCK_16X16; 205 else 206 return BLOCK_8X8; 207 } 208 209 static void set_offsets_without_segment_id(const AV1_COMP *const cpi, 210 const TileInfo *const tile, 211 MACROBLOCK *const x, int mi_row, 212 int mi_col, BLOCK_SIZE bsize) { 213 const AV1_COMMON *const cm = &cpi->common; 214 const int num_planes = av1_num_planes(cm); 215 MACROBLOCKD *const xd = &x->e_mbd; 216 const int mi_width = mi_size_wide[bsize]; 217 const int mi_height = mi_size_high[bsize]; 218 219 set_mode_info_offsets(cpi, x, xd, mi_row, mi_col); 220 221 set_skip_context(xd, mi_row, mi_col, num_planes); 222 xd->above_txfm_context = cm->above_txfm_context[tile->tile_row] + mi_col; 223 xd->left_txfm_context = 224 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); 225 226 // Set up destination pointers. 227 av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, mi_col, 0, 228 num_planes); 229 230 // Set up limit values for MV components. 231 // Mv beyond the range do not produce new/different prediction block. 232 x->mv_limits.row_min = 233 -(((mi_row + mi_height) * MI_SIZE) + AOM_INTERP_EXTEND); 234 x->mv_limits.col_min = -(((mi_col + mi_width) * MI_SIZE) + AOM_INTERP_EXTEND); 235 x->mv_limits.row_max = (cm->mi_rows - mi_row) * MI_SIZE + AOM_INTERP_EXTEND; 236 x->mv_limits.col_max = (cm->mi_cols - mi_col) * MI_SIZE + AOM_INTERP_EXTEND; 237 238 set_plane_n4(xd, mi_width, mi_height, num_planes); 239 240 // Set up distance of MB to edge of frame in 1/8th pel units. 241 assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1))); 242 set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows, 243 cm->mi_cols); 244 245 // Set up source buffers. 246 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize); 247 248 // R/D setup. 249 x->rdmult = cpi->rd.RDMULT; 250 251 // required by av1_append_sub8x8_mvs_for_idx() and av1_find_best_ref_mvs() 252 xd->tile = *tile; 253 254 xd->cfl.mi_row = mi_row; 255 xd->cfl.mi_col = mi_col; 256 } 257 258 static void set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile, 259 MACROBLOCK *const x, int mi_row, int mi_col, 260 BLOCK_SIZE bsize) { 261 const AV1_COMMON *const cm = &cpi->common; 262 const struct segmentation *const seg = &cm->seg; 263 MACROBLOCKD *const xd = &x->e_mbd; 264 MB_MODE_INFO *mbmi; 265 266 set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize); 267 268 // Setup segment ID. 269 mbmi = xd->mi[0]; 270 mbmi->segment_id = 0; 271 if (seg->enabled) { 272 if (seg->enabled && !cpi->vaq_refresh) { 273 const uint8_t *const map = 274 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; 275 mbmi->segment_id = 276 map ? get_segment_id(cm, map, bsize, mi_row, mi_col) : 0; 277 } 278 av1_init_plane_quantizers(cpi, x, mbmi->segment_id); 279 } 280 } 281 282 static void update_filter_type_count(uint8_t allow_update_cdf, 283 FRAME_COUNTS *counts, 284 const MACROBLOCKD *xd, 285 const MB_MODE_INFO *mbmi) { 286 int dir; 287 for (dir = 0; dir < 2; ++dir) { 288 const int ctx = av1_get_pred_context_switchable_interp(xd, dir); 289 InterpFilter filter = av1_extract_interp_filter(mbmi->interp_filters, dir); 290 ++counts->switchable_interp[ctx][filter]; 291 if (allow_update_cdf) { 292 update_cdf(xd->tile_ctx->switchable_interp_cdf[ctx], filter, 293 SWITCHABLE_FILTERS); 294 } 295 } 296 } 297 298 static void update_global_motion_used(PREDICTION_MODE mode, BLOCK_SIZE bsize, 299 const MB_MODE_INFO *mbmi, 300 RD_COUNTS *rdc) { 301 if (mode == GLOBALMV || mode == GLOBAL_GLOBALMV) { 302 const int num_4x4s = mi_size_wide[bsize] * mi_size_high[bsize]; 303 int ref; 304 for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) { 305 rdc->global_motion_used[mbmi->ref_frame[ref]] += num_4x4s; 306 } 307 } 308 } 309 310 static void reset_tx_size(MACROBLOCK *x, MB_MODE_INFO *mbmi, 311 const TX_MODE tx_mode) { 312 MACROBLOCKD *const xd = &x->e_mbd; 313 if (xd->lossless[mbmi->segment_id]) { 314 mbmi->tx_size = TX_4X4; 315 } else if (tx_mode != TX_MODE_SELECT) { 316 mbmi->tx_size = tx_size_from_tx_mode(mbmi->sb_type, tx_mode); 317 } else { 318 BLOCK_SIZE bsize = mbmi->sb_type; 319 TX_SIZE min_tx_size = depth_to_tx_size(MAX_TX_DEPTH, bsize); 320 mbmi->tx_size = (TX_SIZE)TXSIZEMAX(mbmi->tx_size, min_tx_size); 321 } 322 if (is_inter_block(mbmi)) { 323 memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size)); 324 } 325 memset(mbmi->txk_type, DCT_DCT, sizeof(mbmi->txk_type[0]) * TXK_TYPE_BUF_LEN); 326 av1_zero(x->blk_skip); 327 x->skip = 0; 328 } 329 330 static void update_state(const AV1_COMP *const cpi, 331 const TileDataEnc *const tile_data, ThreadData *td, 332 const PICK_MODE_CONTEXT *const ctx, int mi_row, 333 int mi_col, BLOCK_SIZE bsize, RUN_TYPE dry_run) { 334 int i, x_idx, y; 335 const AV1_COMMON *const cm = &cpi->common; 336 const int num_planes = av1_num_planes(cm); 337 RD_COUNTS *const rdc = &td->rd_counts; 338 MACROBLOCK *const x = &td->mb; 339 MACROBLOCKD *const xd = &x->e_mbd; 340 struct macroblock_plane *const p = x->plane; 341 struct macroblockd_plane *const pd = xd->plane; 342 const MB_MODE_INFO *const mi = &ctx->mic; 343 MB_MODE_INFO *const mi_addr = xd->mi[0]; 344 const struct segmentation *const seg = &cm->seg; 345 const int bw = mi_size_wide[mi->sb_type]; 346 const int bh = mi_size_high[mi->sb_type]; 347 const int mis = cm->mi_stride; 348 const int mi_width = mi_size_wide[bsize]; 349 const int mi_height = mi_size_high[bsize]; 350 351 assert(mi->sb_type == bsize); 352 353 *mi_addr = *mi; 354 *x->mbmi_ext = ctx->mbmi_ext; 355 356 memcpy(x->blk_skip, ctx->blk_skip, sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); 357 358 x->skip = ctx->skip; 359 360 // If segmentation in use 361 if (seg->enabled) { 362 // For in frame complexity AQ copy the segment id from the segment map. 363 if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { 364 const uint8_t *const map = 365 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; 366 mi_addr->segment_id = 367 map ? get_segment_id(cm, map, bsize, mi_row, mi_col) : 0; 368 reset_tx_size(x, mi_addr, cm->tx_mode); 369 } 370 // Else for cyclic refresh mode update the segment map, set the segment id 371 // and then update the quantizer. 372 if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { 373 av1_cyclic_refresh_update_segment(cpi, mi_addr, mi_row, mi_col, bsize, 374 ctx->rate, ctx->dist, x->skip); 375 } 376 if (mi_addr->uv_mode == UV_CFL_PRED && !is_cfl_allowed(xd)) 377 mi_addr->uv_mode = UV_DC_PRED; 378 } 379 380 for (i = 0; i < num_planes; ++i) { 381 p[i].coeff = ctx->coeff[i]; 382 p[i].qcoeff = ctx->qcoeff[i]; 383 pd[i].dqcoeff = ctx->dqcoeff[i]; 384 p[i].eobs = ctx->eobs[i]; 385 p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i]; 386 } 387 for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i]; 388 // Restore the coding context of the MB to that that was in place 389 // when the mode was picked for it 390 for (y = 0; y < mi_height; y++) 391 for (x_idx = 0; x_idx < mi_width; x_idx++) 392 if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx && 393 (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) { 394 xd->mi[x_idx + y * mis] = mi_addr; 395 } 396 397 if (cpi->oxcf.aq_mode) av1_init_plane_quantizers(cpi, x, mi_addr->segment_id); 398 399 if (dry_run) return; 400 401 #if CONFIG_INTERNAL_STATS 402 { 403 unsigned int *const mode_chosen_counts = 404 (unsigned int *)cpi->mode_chosen_counts; // Cast const away. 405 if (frame_is_intra_only(cm)) { 406 static const int kf_mode_index[] = { 407 THR_DC /*DC_PRED*/, 408 THR_V_PRED /*V_PRED*/, 409 THR_H_PRED /*H_PRED*/, 410 THR_D45_PRED /*D45_PRED*/, 411 THR_D135_PRED /*D135_PRED*/, 412 THR_D113_PRED /*D113_PRED*/, 413 THR_D157_PRED /*D157_PRED*/, 414 THR_D203_PRED /*D203_PRED*/, 415 THR_D67_PRED /*D67_PRED*/, 416 THR_SMOOTH, /*SMOOTH_PRED*/ 417 THR_SMOOTH_V, /*SMOOTH_V_PRED*/ 418 THR_SMOOTH_H, /*SMOOTH_H_PRED*/ 419 THR_PAETH /*PAETH_PRED*/, 420 }; 421 ++mode_chosen_counts[kf_mode_index[mi_addr->mode]]; 422 } else { 423 // Note how often each mode chosen as best 424 ++mode_chosen_counts[ctx->best_mode_index]; 425 } 426 } 427 #endif 428 if (!frame_is_intra_only(cm)) { 429 if (is_inter_block(mi_addr)) { 430 // TODO(sarahparker): global motion stats need to be handled per-tile 431 // to be compatible with tile-based threading. 432 update_global_motion_used(mi_addr->mode, bsize, mi_addr, rdc); 433 } 434 435 if (cm->interp_filter == SWITCHABLE && 436 mi_addr->motion_mode != WARPED_CAUSAL && 437 !is_nontrans_global_motion(xd, xd->mi[0])) { 438 update_filter_type_count(tile_data->allow_update_cdf, td->counts, xd, 439 mi_addr); 440 } 441 442 rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; 443 rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff; 444 rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff; 445 } 446 447 const int x_mis = AOMMIN(bw, cm->mi_cols - mi_col); 448 const int y_mis = AOMMIN(bh, cm->mi_rows - mi_row); 449 av1_copy_frame_mvs(cm, mi, mi_row, mi_col, x_mis, y_mis); 450 } 451 452 void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, 453 int mi_row, int mi_col, const int num_planes, 454 BLOCK_SIZE bsize) { 455 // Set current frame pointer. 456 x->e_mbd.cur_buf = src; 457 458 // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet 459 // the static analysis warnings. 460 for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) { 461 const int is_uv = i > 0; 462 setup_pred_plane( 463 &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv], 464 src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL, 465 x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y); 466 } 467 } 468 469 static int set_segment_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x, 470 int8_t segment_id) { 471 const AV1_COMMON *const cm = &cpi->common; 472 av1_init_plane_quantizers(cpi, x, segment_id); 473 aom_clear_system_state(); 474 int segment_qindex = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex); 475 return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q); 476 } 477 478 static int set_deltaq_rdmult(const AV1_COMP *const cpi, MACROBLOCKD *const xd) { 479 const AV1_COMMON *const cm = &cpi->common; 480 481 return av1_compute_rd_mult( 482 cpi, cm->base_qindex + xd->delta_qindex + cm->y_dc_delta_q); 483 } 484 485 static EdgeInfo edge_info(const struct buf_2d *ref, const BLOCK_SIZE bsize, 486 const bool high_bd, const int bd) { 487 const int width = block_size_wide[bsize]; 488 const int height = block_size_high[bsize]; 489 // Implementation requires width to be a multiple of 8. It also requires 490 // height to be a multiple of 4, but this is always the case. 491 assert(height % 4 == 0); 492 if (width % 8 != 0) { 493 EdgeInfo ei = { .magnitude = 0, .x = 0, .y = 0 }; 494 return ei; 495 } 496 return av1_edge_exists(ref->buf, ref->stride, width, height, high_bd, bd); 497 } 498 499 static int use_pb_simple_motion_pred_sse(const AV1_COMP *const cpi) { 500 // TODO(debargha, yuec): Not in use, need to implement a speed feature 501 // utilizing this data point, and replace '0' by the corresponding speed 502 // feature flag. 503 return 0 && !frame_is_intra_only(&cpi->common); 504 } 505 506 static void pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data, 507 MACROBLOCK *const x, int mi_row, int mi_col, 508 RD_STATS *rd_cost, PARTITION_TYPE partition, 509 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, 510 int64_t best_rd, int use_nonrd_pick_mode) { 511 AV1_COMMON *const cm = &cpi->common; 512 const int num_planes = av1_num_planes(cm); 513 TileInfo *const tile_info = &tile_data->tile_info; 514 MACROBLOCKD *const xd = &x->e_mbd; 515 MB_MODE_INFO *mbmi; 516 MB_MODE_INFO *ctx_mbmi = &ctx->mic; 517 struct macroblock_plane *const p = x->plane; 518 struct macroblockd_plane *const pd = xd->plane; 519 const AQ_MODE aq_mode = cpi->oxcf.aq_mode; 520 const DELTAQ_MODE deltaq_mode = cpi->oxcf.deltaq_mode; 521 int i, orig_rdmult; 522 523 #if CONFIG_COLLECT_COMPONENT_TIMING 524 start_timing(cpi, rd_pick_sb_modes_time); 525 #endif 526 527 if (best_rd < 0) { 528 ctx->rdcost = INT64_MAX; 529 ctx->skip = 0; 530 av1_invalid_rd_stats(rd_cost); 531 return; 532 } 533 534 aom_clear_system_state(); 535 536 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 537 538 mbmi = xd->mi[0]; 539 540 if (ctx->rd_mode_is_ready) { 541 assert(ctx_mbmi->sb_type == bsize); 542 assert(ctx_mbmi->partition == partition); 543 *mbmi = *ctx_mbmi; 544 rd_cost->rate = ctx->rate; 545 rd_cost->dist = ctx->dist; 546 rd_cost->rdcost = ctx->rdcost; 547 } else { 548 mbmi->sb_type = bsize; 549 mbmi->partition = partition; 550 } 551 552 #if CONFIG_RD_DEBUG 553 mbmi->mi_row = mi_row; 554 mbmi->mi_col = mi_col; 555 #endif 556 557 for (i = 0; i < num_planes; ++i) { 558 p[i].coeff = ctx->coeff[i]; 559 p[i].qcoeff = ctx->qcoeff[i]; 560 pd[i].dqcoeff = ctx->dqcoeff[i]; 561 p[i].eobs = ctx->eobs[i]; 562 p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i]; 563 } 564 565 for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i]; 566 567 if (!ctx->rd_mode_is_ready) { 568 ctx->skippable = 0; 569 570 // Set to zero to make sure we do not use the previous encoded frame stats 571 mbmi->skip = 0; 572 573 // Reset skip mode flag. 574 mbmi->skip_mode = 0; 575 } 576 577 x->skip_chroma_rd = 578 !is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x, 579 xd->plane[1].subsampling_y); 580 581 if (ctx->rd_mode_is_ready) { 582 x->skip = ctx->skip; 583 *x->mbmi_ext = ctx->mbmi_ext; 584 return; 585 } 586 587 if (is_cur_buf_hbd(xd)) { 588 x->source_variance = av1_high_get_sby_perpixel_variance( 589 cpi, &x->plane[0].src, bsize, xd->bd); 590 } else { 591 x->source_variance = 592 av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); 593 } 594 if (use_pb_simple_motion_pred_sse(cpi)) { 595 const MV ref_mv_full = { .row = 0, .col = 0 }; 596 unsigned int var = 0; 597 av1_simple_motion_sse_var(cpi, x, mi_row, mi_col, bsize, ref_mv_full, 0, 598 &x->simple_motion_pred_sse, &var); 599 } 600 601 // If the threshold for disabling wedge search is zero, it means the feature 602 // should not be used. Use a value that will always succeed in the check. 603 if (cpi->sf.disable_wedge_search_edge_thresh == 0) { 604 x->edge_strength = UINT16_MAX; 605 x->edge_strength_x = UINT16_MAX; 606 x->edge_strength_y = UINT16_MAX; 607 } else { 608 EdgeInfo ei = 609 edge_info(&x->plane[0].src, bsize, is_cur_buf_hbd(xd), xd->bd); 610 x->edge_strength = ei.magnitude; 611 x->edge_strength_x = ei.x; 612 x->edge_strength_y = ei.y; 613 } 614 // Save rdmult before it might be changed, so it can be restored later. 615 orig_rdmult = x->rdmult; 616 617 if (aq_mode == VARIANCE_AQ) { 618 if (cpi->vaq_refresh) { 619 const int energy = bsize <= BLOCK_16X16 620 ? x->mb_energy 621 : av1_log_block_var(cpi, x, bsize); 622 mbmi->segment_id = energy; 623 } 624 x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id); 625 } else if (aq_mode == COMPLEXITY_AQ) { 626 x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id); 627 } else if (aq_mode == CYCLIC_REFRESH_AQ) { 628 // If segment is boosted, use rdmult for that segment. 629 if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) 630 x->rdmult = av1_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); 631 } else if (cpi->oxcf.enable_tpl_model) { 632 x->rdmult = x->cb_rdmult; 633 } 634 635 if (deltaq_mode > 0) x->rdmult = set_deltaq_rdmult(cpi, xd); 636 637 // Find best coding mode & reconstruct the MB so it is available 638 // as a predictor for MBs that follow in the SB 639 if (frame_is_intra_only(cm)) { 640 #if CONFIG_COLLECT_COMPONENT_TIMING 641 start_timing(cpi, av1_rd_pick_intra_mode_sb_time); 642 #endif 643 av1_rd_pick_intra_mode_sb(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx, 644 best_rd); 645 #if CONFIG_COLLECT_COMPONENT_TIMING 646 end_timing(cpi, av1_rd_pick_intra_mode_sb_time); 647 #endif 648 } else { 649 #if CONFIG_COLLECT_COMPONENT_TIMING 650 start_timing(cpi, av1_rd_pick_inter_mode_sb_time); 651 #endif 652 if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { 653 av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col, 654 rd_cost, bsize, ctx, best_rd); 655 } else { 656 // TODO(kyslov): do the same for pick_intra_mode and 657 // pick_inter_mode_sb_seg_skip 658 if (use_nonrd_pick_mode) { 659 av1_nonrd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost, 660 bsize, ctx, best_rd); 661 } else { 662 av1_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost, 663 bsize, ctx, best_rd); 664 } 665 } 666 #if CONFIG_COLLECT_COMPONENT_TIMING 667 end_timing(cpi, av1_rd_pick_inter_mode_sb_time); 668 #endif 669 } 670 671 // Examine the resulting rate and for AQ mode 2 make a segment choice. 672 if ((rd_cost->rate != INT_MAX) && (aq_mode == COMPLEXITY_AQ) && 673 (bsize >= BLOCK_16X16) && 674 (cm->current_frame.frame_type == KEY_FRAME || 675 cpi->refresh_alt_ref_frame || cpi->refresh_alt2_ref_frame || 676 (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) { 677 av1_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate); 678 } 679 680 x->rdmult = orig_rdmult; 681 682 // TODO(jingning) The rate-distortion optimization flow needs to be 683 // refactored to provide proper exit/return handle. 684 if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX; 685 686 ctx->rate = rd_cost->rate; 687 ctx->dist = rd_cost->dist; 688 ctx->rdcost = rd_cost->rdcost; 689 690 #if CONFIG_COLLECT_COMPONENT_TIMING 691 end_timing(cpi, rd_pick_sb_modes_time); 692 #endif 693 } 694 695 static void update_inter_mode_stats(FRAME_CONTEXT *fc, FRAME_COUNTS *counts, 696 PREDICTION_MODE mode, int16_t mode_context, 697 uint8_t allow_update_cdf) { 698 (void)counts; 699 700 int16_t mode_ctx = mode_context & NEWMV_CTX_MASK; 701 if (mode == NEWMV) { 702 #if CONFIG_ENTROPY_STATS 703 ++counts->newmv_mode[mode_ctx][0]; 704 #endif 705 if (allow_update_cdf) update_cdf(fc->newmv_cdf[mode_ctx], 0, 2); 706 return; 707 } else { 708 #if CONFIG_ENTROPY_STATS 709 ++counts->newmv_mode[mode_ctx][1]; 710 #endif 711 if (allow_update_cdf) update_cdf(fc->newmv_cdf[mode_ctx], 1, 2); 712 713 mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK; 714 if (mode == GLOBALMV) { 715 #if CONFIG_ENTROPY_STATS 716 ++counts->zeromv_mode[mode_ctx][0]; 717 #endif 718 if (allow_update_cdf) update_cdf(fc->zeromv_cdf[mode_ctx], 0, 2); 719 return; 720 } else { 721 #if CONFIG_ENTROPY_STATS 722 ++counts->zeromv_mode[mode_ctx][1]; 723 #endif 724 if (allow_update_cdf) update_cdf(fc->zeromv_cdf[mode_ctx], 1, 2); 725 mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK; 726 #if CONFIG_ENTROPY_STATS 727 ++counts->refmv_mode[mode_ctx][mode != NEARESTMV]; 728 #endif 729 if (allow_update_cdf) 730 update_cdf(fc->refmv_cdf[mode_ctx], mode != NEARESTMV, 2); 731 } 732 } 733 } 734 735 static void update_palette_cdf(MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi, 736 FRAME_COUNTS *counts, uint8_t allow_update_cdf) { 737 FRAME_CONTEXT *fc = xd->tile_ctx; 738 const BLOCK_SIZE bsize = mbmi->sb_type; 739 const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; 740 const int palette_bsize_ctx = av1_get_palette_bsize_ctx(bsize); 741 742 (void)counts; 743 744 if (mbmi->mode == DC_PRED) { 745 const int n = pmi->palette_size[0]; 746 const int palette_mode_ctx = av1_get_palette_mode_ctx(xd); 747 748 #if CONFIG_ENTROPY_STATS 749 ++counts->palette_y_mode[palette_bsize_ctx][palette_mode_ctx][n > 0]; 750 #endif 751 if (allow_update_cdf) 752 update_cdf(fc->palette_y_mode_cdf[palette_bsize_ctx][palette_mode_ctx], 753 n > 0, 2); 754 if (n > 0) { 755 #if CONFIG_ENTROPY_STATS 756 ++counts->palette_y_size[palette_bsize_ctx][n - PALETTE_MIN_SIZE]; 757 #endif 758 if (allow_update_cdf) { 759 update_cdf(fc->palette_y_size_cdf[palette_bsize_ctx], 760 n - PALETTE_MIN_SIZE, PALETTE_SIZES); 761 } 762 } 763 } 764 765 if (mbmi->uv_mode == UV_DC_PRED) { 766 const int n = pmi->palette_size[1]; 767 const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0); 768 769 #if CONFIG_ENTROPY_STATS 770 ++counts->palette_uv_mode[palette_uv_mode_ctx][n > 0]; 771 #endif 772 if (allow_update_cdf) 773 update_cdf(fc->palette_uv_mode_cdf[palette_uv_mode_ctx], n > 0, 2); 774 775 if (n > 0) { 776 #if CONFIG_ENTROPY_STATS 777 ++counts->palette_uv_size[palette_bsize_ctx][n - PALETTE_MIN_SIZE]; 778 #endif 779 if (allow_update_cdf) { 780 update_cdf(fc->palette_uv_size_cdf[palette_bsize_ctx], 781 n - PALETTE_MIN_SIZE, PALETTE_SIZES); 782 } 783 } 784 } 785 } 786 787 static void sum_intra_stats(const AV1_COMMON *const cm, FRAME_COUNTS *counts, 788 MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi, 789 const MB_MODE_INFO *above_mi, 790 const MB_MODE_INFO *left_mi, const int intraonly, 791 const int mi_row, const int mi_col, 792 uint8_t allow_update_cdf) { 793 FRAME_CONTEXT *fc = xd->tile_ctx; 794 const PREDICTION_MODE y_mode = mbmi->mode; 795 const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode; 796 (void)counts; 797 const BLOCK_SIZE bsize = mbmi->sb_type; 798 799 if (intraonly) { 800 #if CONFIG_ENTROPY_STATS 801 const PREDICTION_MODE above = av1_above_block_mode(above_mi); 802 const PREDICTION_MODE left = av1_left_block_mode(left_mi); 803 const int above_ctx = intra_mode_context[above]; 804 const int left_ctx = intra_mode_context[left]; 805 ++counts->kf_y_mode[above_ctx][left_ctx][y_mode]; 806 #endif // CONFIG_ENTROPY_STATS 807 if (allow_update_cdf) 808 update_cdf(get_y_mode_cdf(fc, above_mi, left_mi), y_mode, INTRA_MODES); 809 } else { 810 #if CONFIG_ENTROPY_STATS 811 ++counts->y_mode[size_group_lookup[bsize]][y_mode]; 812 #endif // CONFIG_ENTROPY_STATS 813 if (allow_update_cdf) 814 update_cdf(fc->y_mode_cdf[size_group_lookup[bsize]], y_mode, INTRA_MODES); 815 } 816 817 if (av1_filter_intra_allowed(cm, mbmi)) { 818 const int use_filter_intra_mode = 819 mbmi->filter_intra_mode_info.use_filter_intra; 820 #if CONFIG_ENTROPY_STATS 821 ++counts->filter_intra[mbmi->sb_type][use_filter_intra_mode]; 822 if (use_filter_intra_mode) { 823 ++counts 824 ->filter_intra_mode[mbmi->filter_intra_mode_info.filter_intra_mode]; 825 } 826 #endif // CONFIG_ENTROPY_STATS 827 if (allow_update_cdf) { 828 update_cdf(fc->filter_intra_cdfs[mbmi->sb_type], use_filter_intra_mode, 829 2); 830 if (use_filter_intra_mode) { 831 update_cdf(fc->filter_intra_mode_cdf, 832 mbmi->filter_intra_mode_info.filter_intra_mode, 833 FILTER_INTRA_MODES); 834 } 835 } 836 } 837 if (av1_is_directional_mode(mbmi->mode) && av1_use_angle_delta(bsize)) { 838 #if CONFIG_ENTROPY_STATS 839 ++counts->angle_delta[mbmi->mode - V_PRED] 840 [mbmi->angle_delta[PLANE_TYPE_Y] + MAX_ANGLE_DELTA]; 841 #endif 842 if (allow_update_cdf) { 843 update_cdf(fc->angle_delta_cdf[mbmi->mode - V_PRED], 844 mbmi->angle_delta[PLANE_TYPE_Y] + MAX_ANGLE_DELTA, 845 2 * MAX_ANGLE_DELTA + 1); 846 } 847 } 848 849 if (!is_chroma_reference(mi_row, mi_col, bsize, 850 xd->plane[AOM_PLANE_U].subsampling_x, 851 xd->plane[AOM_PLANE_U].subsampling_y)) 852 return; 853 854 #if CONFIG_ENTROPY_STATS 855 ++counts->uv_mode[is_cfl_allowed(xd)][y_mode][uv_mode]; 856 #endif // CONFIG_ENTROPY_STATS 857 if (allow_update_cdf) { 858 const CFL_ALLOWED_TYPE cfl_allowed = is_cfl_allowed(xd); 859 update_cdf(fc->uv_mode_cdf[cfl_allowed][y_mode], uv_mode, 860 UV_INTRA_MODES - !cfl_allowed); 861 } 862 if (uv_mode == UV_CFL_PRED) { 863 const int joint_sign = mbmi->cfl_alpha_signs; 864 const int idx = mbmi->cfl_alpha_idx; 865 866 #if CONFIG_ENTROPY_STATS 867 ++counts->cfl_sign[joint_sign]; 868 #endif 869 if (allow_update_cdf) 870 update_cdf(fc->cfl_sign_cdf, joint_sign, CFL_JOINT_SIGNS); 871 if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) { 872 aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)]; 873 874 #if CONFIG_ENTROPY_STATS 875 ++counts->cfl_alpha[CFL_CONTEXT_U(joint_sign)][CFL_IDX_U(idx)]; 876 #endif 877 if (allow_update_cdf) 878 update_cdf(cdf_u, CFL_IDX_U(idx), CFL_ALPHABET_SIZE); 879 } 880 if (CFL_SIGN_V(joint_sign) != CFL_SIGN_ZERO) { 881 aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)]; 882 883 #if CONFIG_ENTROPY_STATS 884 ++counts->cfl_alpha[CFL_CONTEXT_V(joint_sign)][CFL_IDX_V(idx)]; 885 #endif 886 if (allow_update_cdf) 887 update_cdf(cdf_v, CFL_IDX_V(idx), CFL_ALPHABET_SIZE); 888 } 889 } 890 if (av1_is_directional_mode(get_uv_mode(uv_mode)) && 891 av1_use_angle_delta(bsize)) { 892 #if CONFIG_ENTROPY_STATS 893 ++counts->angle_delta[uv_mode - UV_V_PRED] 894 [mbmi->angle_delta[PLANE_TYPE_UV] + MAX_ANGLE_DELTA]; 895 #endif 896 if (allow_update_cdf) { 897 update_cdf(fc->angle_delta_cdf[uv_mode - UV_V_PRED], 898 mbmi->angle_delta[PLANE_TYPE_UV] + MAX_ANGLE_DELTA, 899 2 * MAX_ANGLE_DELTA + 1); 900 } 901 } 902 if (av1_allow_palette(cm->allow_screen_content_tools, bsize)) 903 update_palette_cdf(xd, mbmi, counts, allow_update_cdf); 904 } 905 906 static void update_stats(const AV1_COMMON *const cm, TileDataEnc *tile_data, 907 ThreadData *td, int mi_row, int mi_col) { 908 MACROBLOCK *x = &td->mb; 909 MACROBLOCKD *const xd = &x->e_mbd; 910 const MB_MODE_INFO *const mbmi = xd->mi[0]; 911 const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; 912 const CurrentFrame *const current_frame = &cm->current_frame; 913 const BLOCK_SIZE bsize = mbmi->sb_type; 914 FRAME_CONTEXT *fc = xd->tile_ctx; 915 const uint8_t allow_update_cdf = tile_data->allow_update_cdf; 916 917 // delta quant applies to both intra and inter 918 const int super_block_upper_left = 919 ((mi_row & (cm->seq_params.mib_size - 1)) == 0) && 920 ((mi_col & (cm->seq_params.mib_size - 1)) == 0); 921 922 const int seg_ref_active = 923 segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME); 924 925 if (current_frame->skip_mode_info.skip_mode_flag && !seg_ref_active && 926 is_comp_ref_allowed(bsize)) { 927 const int skip_mode_ctx = av1_get_skip_mode_context(xd); 928 #if CONFIG_ENTROPY_STATS 929 td->counts->skip_mode[skip_mode_ctx][mbmi->skip_mode]++; 930 #endif 931 if (allow_update_cdf) 932 update_cdf(fc->skip_mode_cdfs[skip_mode_ctx], mbmi->skip_mode, 2); 933 } 934 935 if (!mbmi->skip_mode) { 936 if (!seg_ref_active) { 937 const int skip_ctx = av1_get_skip_context(xd); 938 #if CONFIG_ENTROPY_STATS 939 td->counts->skip[skip_ctx][mbmi->skip]++; 940 #endif 941 if (allow_update_cdf) update_cdf(fc->skip_cdfs[skip_ctx], mbmi->skip, 2); 942 } 943 } 944 945 const DeltaQInfo *const delta_q_info = &cm->delta_q_info; 946 if (delta_q_info->delta_q_present_flag && 947 (bsize != cm->seq_params.sb_size || !mbmi->skip) && 948 super_block_upper_left) { 949 #if CONFIG_ENTROPY_STATS 950 const int dq = 951 (mbmi->current_qindex - xd->current_qindex) / delta_q_info->delta_q_res; 952 const int absdq = abs(dq); 953 for (int i = 0; i < AOMMIN(absdq, DELTA_Q_SMALL); ++i) { 954 td->counts->delta_q[i][1]++; 955 } 956 if (absdq < DELTA_Q_SMALL) td->counts->delta_q[absdq][0]++; 957 #endif 958 xd->current_qindex = mbmi->current_qindex; 959 if (delta_q_info->delta_lf_present_flag) { 960 if (delta_q_info->delta_lf_multi) { 961 const int frame_lf_count = 962 av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2; 963 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) { 964 #if CONFIG_ENTROPY_STATS 965 const int delta_lf = (mbmi->delta_lf[lf_id] - xd->delta_lf[lf_id]) / 966 delta_q_info->delta_lf_res; 967 const int abs_delta_lf = abs(delta_lf); 968 for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) { 969 td->counts->delta_lf_multi[lf_id][i][1]++; 970 } 971 if (abs_delta_lf < DELTA_LF_SMALL) 972 td->counts->delta_lf_multi[lf_id][abs_delta_lf][0]++; 973 #endif 974 xd->delta_lf[lf_id] = mbmi->delta_lf[lf_id]; 975 } 976 } else { 977 #if CONFIG_ENTROPY_STATS 978 const int delta_lf = 979 (mbmi->delta_lf_from_base - xd->delta_lf_from_base) / 980 delta_q_info->delta_lf_res; 981 const int abs_delta_lf = abs(delta_lf); 982 for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) { 983 td->counts->delta_lf[i][1]++; 984 } 985 if (abs_delta_lf < DELTA_LF_SMALL) 986 td->counts->delta_lf[abs_delta_lf][0]++; 987 #endif 988 xd->delta_lf_from_base = mbmi->delta_lf_from_base; 989 } 990 } 991 } 992 993 if (!is_inter_block(mbmi)) { 994 sum_intra_stats(cm, td->counts, xd, mbmi, xd->above_mbmi, xd->left_mbmi, 995 frame_is_intra_only(cm), mi_row, mi_col, 996 tile_data->allow_update_cdf); 997 } 998 999 if (av1_allow_intrabc(cm)) { 1000 if (allow_update_cdf) 1001 update_cdf(fc->intrabc_cdf, is_intrabc_block(mbmi), 2); 1002 #if CONFIG_ENTROPY_STATS 1003 ++td->counts->intrabc[is_intrabc_block(mbmi)]; 1004 #endif // CONFIG_ENTROPY_STATS 1005 } 1006 1007 if (!frame_is_intra_only(cm)) { 1008 RD_COUNTS *rdc = &td->rd_counts; 1009 1010 FRAME_COUNTS *const counts = td->counts; 1011 1012 if (mbmi->skip_mode) { 1013 rdc->skip_mode_used_flag = 1; 1014 if (current_frame->reference_mode == REFERENCE_MODE_SELECT) { 1015 assert(has_second_ref(mbmi)); 1016 rdc->compound_ref_used_flag = 1; 1017 } 1018 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); 1019 return; 1020 } 1021 1022 const int inter_block = is_inter_block(mbmi); 1023 1024 if (!seg_ref_active) { 1025 #if CONFIG_ENTROPY_STATS 1026 counts->intra_inter[av1_get_intra_inter_context(xd)][inter_block]++; 1027 #endif 1028 if (allow_update_cdf) { 1029 update_cdf(fc->intra_inter_cdf[av1_get_intra_inter_context(xd)], 1030 inter_block, 2); 1031 } 1032 // If the segment reference feature is enabled we have only a single 1033 // reference frame allowed for the segment so exclude it from 1034 // the reference frame counts used to work out probabilities. 1035 if (inter_block) { 1036 const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0]; 1037 const MV_REFERENCE_FRAME ref1 = mbmi->ref_frame[1]; 1038 1039 av1_collect_neighbors_ref_counts(xd); 1040 1041 if (current_frame->reference_mode == REFERENCE_MODE_SELECT) { 1042 if (has_second_ref(mbmi)) 1043 // This flag is also updated for 4x4 blocks 1044 rdc->compound_ref_used_flag = 1; 1045 if (is_comp_ref_allowed(bsize)) { 1046 #if CONFIG_ENTROPY_STATS 1047 counts->comp_inter[av1_get_reference_mode_context(xd)] 1048 [has_second_ref(mbmi)]++; 1049 #endif // CONFIG_ENTROPY_STATS 1050 if (allow_update_cdf) { 1051 update_cdf(av1_get_reference_mode_cdf(xd), has_second_ref(mbmi), 1052 2); 1053 } 1054 } 1055 } 1056 1057 if (has_second_ref(mbmi)) { 1058 const COMP_REFERENCE_TYPE comp_ref_type = has_uni_comp_refs(mbmi) 1059 ? UNIDIR_COMP_REFERENCE 1060 : BIDIR_COMP_REFERENCE; 1061 if (allow_update_cdf) { 1062 update_cdf(av1_get_comp_reference_type_cdf(xd), comp_ref_type, 1063 COMP_REFERENCE_TYPES); 1064 } 1065 #if CONFIG_ENTROPY_STATS 1066 counts->comp_ref_type[av1_get_comp_reference_type_context(xd)] 1067 [comp_ref_type]++; 1068 #endif // CONFIG_ENTROPY_STATS 1069 1070 if (comp_ref_type == UNIDIR_COMP_REFERENCE) { 1071 const int bit = (ref0 == BWDREF_FRAME); 1072 if (allow_update_cdf) 1073 update_cdf(av1_get_pred_cdf_uni_comp_ref_p(xd), bit, 2); 1074 #if CONFIG_ENTROPY_STATS 1075 counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p(xd)][0] 1076 [bit]++; 1077 #endif // CONFIG_ENTROPY_STATS 1078 if (!bit) { 1079 const int bit1 = (ref1 == LAST3_FRAME || ref1 == GOLDEN_FRAME); 1080 if (allow_update_cdf) 1081 update_cdf(av1_get_pred_cdf_uni_comp_ref_p1(xd), bit1, 2); 1082 #if CONFIG_ENTROPY_STATS 1083 counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p1(xd)][1] 1084 [bit1]++; 1085 #endif // CONFIG_ENTROPY_STATS 1086 if (bit1) { 1087 if (allow_update_cdf) { 1088 update_cdf(av1_get_pred_cdf_uni_comp_ref_p2(xd), 1089 ref1 == GOLDEN_FRAME, 2); 1090 } 1091 #if CONFIG_ENTROPY_STATS 1092 counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p2(xd)] 1093 [2][ref1 == GOLDEN_FRAME]++; 1094 #endif // CONFIG_ENTROPY_STATS 1095 } 1096 } 1097 } else { 1098 const int bit = (ref0 == GOLDEN_FRAME || ref0 == LAST3_FRAME); 1099 if (allow_update_cdf) 1100 update_cdf(av1_get_pred_cdf_comp_ref_p(xd), bit, 2); 1101 #if CONFIG_ENTROPY_STATS 1102 counts->comp_ref[av1_get_pred_context_comp_ref_p(xd)][0][bit]++; 1103 #endif // CONFIG_ENTROPY_STATS 1104 if (!bit) { 1105 if (allow_update_cdf) { 1106 update_cdf(av1_get_pred_cdf_comp_ref_p1(xd), 1107 ref0 == LAST2_FRAME, 2); 1108 } 1109 #if CONFIG_ENTROPY_STATS 1110 counts->comp_ref[av1_get_pred_context_comp_ref_p1(xd)][1] 1111 [ref0 == LAST2_FRAME]++; 1112 #endif // CONFIG_ENTROPY_STATS 1113 } else { 1114 if (allow_update_cdf) { 1115 update_cdf(av1_get_pred_cdf_comp_ref_p2(xd), 1116 ref0 == GOLDEN_FRAME, 2); 1117 } 1118 #if CONFIG_ENTROPY_STATS 1119 counts->comp_ref[av1_get_pred_context_comp_ref_p2(xd)][2] 1120 [ref0 == GOLDEN_FRAME]++; 1121 #endif // CONFIG_ENTROPY_STATS 1122 } 1123 if (allow_update_cdf) { 1124 update_cdf(av1_get_pred_cdf_comp_bwdref_p(xd), 1125 ref1 == ALTREF_FRAME, 2); 1126 } 1127 #if CONFIG_ENTROPY_STATS 1128 counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p(xd)][0] 1129 [ref1 == ALTREF_FRAME]++; 1130 #endif // CONFIG_ENTROPY_STATS 1131 if (ref1 != ALTREF_FRAME) { 1132 if (allow_update_cdf) { 1133 update_cdf(av1_get_pred_cdf_comp_bwdref_p1(xd), 1134 ref1 == ALTREF2_FRAME, 2); 1135 } 1136 #if CONFIG_ENTROPY_STATS 1137 counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p1(xd)][1] 1138 [ref1 == ALTREF2_FRAME]++; 1139 #endif // CONFIG_ENTROPY_STATS 1140 } 1141 } 1142 } else { 1143 const int bit = (ref0 >= BWDREF_FRAME); 1144 if (allow_update_cdf) 1145 update_cdf(av1_get_pred_cdf_single_ref_p1(xd), bit, 2); 1146 #if CONFIG_ENTROPY_STATS 1147 counts->single_ref[av1_get_pred_context_single_ref_p1(xd)][0][bit]++; 1148 #endif // CONFIG_ENTROPY_STATS 1149 if (bit) { 1150 assert(ref0 <= ALTREF_FRAME); 1151 if (allow_update_cdf) { 1152 update_cdf(av1_get_pred_cdf_single_ref_p2(xd), 1153 ref0 == ALTREF_FRAME, 2); 1154 } 1155 #if CONFIG_ENTROPY_STATS 1156 counts->single_ref[av1_get_pred_context_single_ref_p2(xd)][1] 1157 [ref0 == ALTREF_FRAME]++; 1158 #endif // CONFIG_ENTROPY_STATS 1159 if (ref0 != ALTREF_FRAME) { 1160 if (allow_update_cdf) { 1161 update_cdf(av1_get_pred_cdf_single_ref_p6(xd), 1162 ref0 == ALTREF2_FRAME, 2); 1163 } 1164 #if CONFIG_ENTROPY_STATS 1165 counts->single_ref[av1_get_pred_context_single_ref_p6(xd)][5] 1166 [ref0 == ALTREF2_FRAME]++; 1167 #endif // CONFIG_ENTROPY_STATS 1168 } 1169 } else { 1170 const int bit1 = !(ref0 == LAST2_FRAME || ref0 == LAST_FRAME); 1171 if (allow_update_cdf) 1172 update_cdf(av1_get_pred_cdf_single_ref_p3(xd), bit1, 2); 1173 #if CONFIG_ENTROPY_STATS 1174 counts 1175 ->single_ref[av1_get_pred_context_single_ref_p3(xd)][2][bit1]++; 1176 #endif // CONFIG_ENTROPY_STATS 1177 if (!bit1) { 1178 if (allow_update_cdf) { 1179 update_cdf(av1_get_pred_cdf_single_ref_p4(xd), 1180 ref0 != LAST_FRAME, 2); 1181 } 1182 #if CONFIG_ENTROPY_STATS 1183 counts->single_ref[av1_get_pred_context_single_ref_p4(xd)][3] 1184 [ref0 != LAST_FRAME]++; 1185 #endif // CONFIG_ENTROPY_STATS 1186 } else { 1187 if (allow_update_cdf) { 1188 update_cdf(av1_get_pred_cdf_single_ref_p5(xd), 1189 ref0 != LAST3_FRAME, 2); 1190 } 1191 #if CONFIG_ENTROPY_STATS 1192 counts->single_ref[av1_get_pred_context_single_ref_p5(xd)][4] 1193 [ref0 != LAST3_FRAME]++; 1194 #endif // CONFIG_ENTROPY_STATS 1195 } 1196 } 1197 } 1198 1199 if (cm->seq_params.enable_interintra_compound && 1200 is_interintra_allowed(mbmi)) { 1201 const int bsize_group = size_group_lookup[bsize]; 1202 if (mbmi->ref_frame[1] == INTRA_FRAME) { 1203 #if CONFIG_ENTROPY_STATS 1204 counts->interintra[bsize_group][1]++; 1205 #endif 1206 if (allow_update_cdf) 1207 update_cdf(fc->interintra_cdf[bsize_group], 1, 2); 1208 #if CONFIG_ENTROPY_STATS 1209 counts->interintra_mode[bsize_group][mbmi->interintra_mode]++; 1210 #endif 1211 if (allow_update_cdf) { 1212 update_cdf(fc->interintra_mode_cdf[bsize_group], 1213 mbmi->interintra_mode, INTERINTRA_MODES); 1214 } 1215 if (is_interintra_wedge_used(bsize)) { 1216 #if CONFIG_ENTROPY_STATS 1217 counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++; 1218 #endif 1219 if (allow_update_cdf) { 1220 update_cdf(fc->wedge_interintra_cdf[bsize], 1221 mbmi->use_wedge_interintra, 2); 1222 } 1223 if (mbmi->use_wedge_interintra) { 1224 #if CONFIG_ENTROPY_STATS 1225 counts->wedge_idx[bsize][mbmi->interintra_wedge_index]++; 1226 #endif 1227 if (allow_update_cdf) { 1228 update_cdf(fc->wedge_idx_cdf[bsize], 1229 mbmi->interintra_wedge_index, 16); 1230 } 1231 } 1232 } 1233 } else { 1234 #if CONFIG_ENTROPY_STATS 1235 counts->interintra[bsize_group][0]++; 1236 #endif 1237 if (allow_update_cdf) 1238 update_cdf(fc->interintra_cdf[bsize_group], 0, 2); 1239 } 1240 } 1241 1242 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); 1243 const MOTION_MODE motion_allowed = 1244 cm->switchable_motion_mode 1245 ? motion_mode_allowed(xd->global_motion, xd, mbmi, 1246 cm->allow_warped_motion) 1247 : SIMPLE_TRANSLATION; 1248 if (mbmi->ref_frame[1] != INTRA_FRAME) { 1249 if (motion_allowed == WARPED_CAUSAL) { 1250 #if CONFIG_ENTROPY_STATS 1251 counts->motion_mode[bsize][mbmi->motion_mode]++; 1252 #endif 1253 if (allow_update_cdf) { 1254 update_cdf(fc->motion_mode_cdf[bsize], mbmi->motion_mode, 1255 MOTION_MODES); 1256 } 1257 } else if (motion_allowed == OBMC_CAUSAL) { 1258 #if CONFIG_ENTROPY_STATS 1259 counts->obmc[bsize][mbmi->motion_mode == OBMC_CAUSAL]++; 1260 #endif 1261 if (allow_update_cdf) { 1262 update_cdf(fc->obmc_cdf[bsize], mbmi->motion_mode == OBMC_CAUSAL, 1263 2); 1264 } 1265 } 1266 } 1267 1268 if (has_second_ref(mbmi)) { 1269 assert(current_frame->reference_mode != SINGLE_REFERENCE && 1270 is_inter_compound_mode(mbmi->mode) && 1271 mbmi->motion_mode == SIMPLE_TRANSLATION); 1272 1273 const int masked_compound_used = 1274 is_any_masked_compound_used(bsize) && 1275 cm->seq_params.enable_masked_compound; 1276 if (masked_compound_used) { 1277 const int comp_group_idx_ctx = get_comp_group_idx_context(xd); 1278 #if CONFIG_ENTROPY_STATS 1279 ++counts->comp_group_idx[comp_group_idx_ctx][mbmi->comp_group_idx]; 1280 #endif 1281 if (allow_update_cdf) { 1282 update_cdf(fc->comp_group_idx_cdf[comp_group_idx_ctx], 1283 mbmi->comp_group_idx, 2); 1284 } 1285 } 1286 1287 if (mbmi->comp_group_idx == 0) { 1288 const int comp_index_ctx = get_comp_index_context(cm, xd); 1289 #if CONFIG_ENTROPY_STATS 1290 ++counts->compound_index[comp_index_ctx][mbmi->compound_idx]; 1291 #endif 1292 if (allow_update_cdf) { 1293 update_cdf(fc->compound_index_cdf[comp_index_ctx], 1294 mbmi->compound_idx, 2); 1295 } 1296 } else { 1297 assert(masked_compound_used); 1298 if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) { 1299 #if CONFIG_ENTROPY_STATS 1300 ++counts->compound_type[bsize][mbmi->interinter_comp.type - 1301 COMPOUND_WEDGE]; 1302 #endif 1303 if (allow_update_cdf) { 1304 update_cdf(fc->compound_type_cdf[bsize], 1305 mbmi->interinter_comp.type - COMPOUND_WEDGE, 1306 MASKED_COMPOUND_TYPES); 1307 } 1308 } 1309 } 1310 } 1311 if (mbmi->interinter_comp.type == COMPOUND_WEDGE) { 1312 if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) { 1313 #if CONFIG_ENTROPY_STATS 1314 counts->wedge_idx[bsize][mbmi->interinter_comp.wedge_index]++; 1315 #endif 1316 if (allow_update_cdf) { 1317 update_cdf(fc->wedge_idx_cdf[bsize], 1318 mbmi->interinter_comp.wedge_index, 16); 1319 } 1320 } 1321 } 1322 } 1323 } 1324 1325 if (inter_block && 1326 !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { 1327 int16_t mode_ctx; 1328 const PREDICTION_MODE mode = mbmi->mode; 1329 1330 mode_ctx = 1331 av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame); 1332 if (has_second_ref(mbmi)) { 1333 #if CONFIG_ENTROPY_STATS 1334 ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)]; 1335 #endif 1336 if (allow_update_cdf) 1337 update_cdf(fc->inter_compound_mode_cdf[mode_ctx], 1338 INTER_COMPOUND_OFFSET(mode), INTER_COMPOUND_MODES); 1339 } else { 1340 update_inter_mode_stats(fc, counts, mode, mode_ctx, allow_update_cdf); 1341 } 1342 1343 int mode_allowed = (mbmi->mode == NEWMV); 1344 mode_allowed |= (mbmi->mode == NEW_NEWMV); 1345 if (mode_allowed) { 1346 uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); 1347 int idx; 1348 1349 for (idx = 0; idx < 2; ++idx) { 1350 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) { 1351 #if CONFIG_ENTROPY_STATS 1352 uint8_t drl_ctx = 1353 av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx); 1354 ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx]; 1355 #endif 1356 1357 if (mbmi->ref_mv_idx == idx) break; 1358 } 1359 } 1360 } 1361 1362 if (have_nearmv_in_inter_mode(mbmi->mode)) { 1363 uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); 1364 int idx; 1365 1366 for (idx = 1; idx < 3; ++idx) { 1367 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) { 1368 #if CONFIG_ENTROPY_STATS 1369 uint8_t drl_ctx = 1370 av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx); 1371 ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx - 1]; 1372 #endif 1373 1374 if (mbmi->ref_mv_idx == idx - 1) break; 1375 } 1376 } 1377 } 1378 } 1379 } 1380 } 1381 1382 typedef struct { 1383 ENTROPY_CONTEXT a[MAX_MIB_SIZE * MAX_MB_PLANE]; 1384 ENTROPY_CONTEXT l[MAX_MIB_SIZE * MAX_MB_PLANE]; 1385 PARTITION_CONTEXT sa[MAX_MIB_SIZE]; 1386 PARTITION_CONTEXT sl[MAX_MIB_SIZE]; 1387 TXFM_CONTEXT *p_ta; 1388 TXFM_CONTEXT *p_tl; 1389 TXFM_CONTEXT ta[MAX_MIB_SIZE]; 1390 TXFM_CONTEXT tl[MAX_MIB_SIZE]; 1391 } RD_SEARCH_MACROBLOCK_CONTEXT; 1392 1393 static void restore_context(MACROBLOCK *x, 1394 const RD_SEARCH_MACROBLOCK_CONTEXT *ctx, int mi_row, 1395 int mi_col, BLOCK_SIZE bsize, 1396 const int num_planes) { 1397 MACROBLOCKD *xd = &x->e_mbd; 1398 int p; 1399 const int num_4x4_blocks_wide = 1400 block_size_wide[bsize] >> tx_size_wide_log2[0]; 1401 const int num_4x4_blocks_high = 1402 block_size_high[bsize] >> tx_size_high_log2[0]; 1403 int mi_width = mi_size_wide[bsize]; 1404 int mi_height = mi_size_high[bsize]; 1405 for (p = 0; p < num_planes; p++) { 1406 int tx_col = mi_col; 1407 int tx_row = mi_row & MAX_MIB_MASK; 1408 memcpy(xd->above_context[p] + (tx_col >> xd->plane[p].subsampling_x), 1409 ctx->a + num_4x4_blocks_wide * p, 1410 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> 1411 xd->plane[p].subsampling_x); 1412 memcpy(xd->left_context[p] + (tx_row >> xd->plane[p].subsampling_y), 1413 ctx->l + num_4x4_blocks_high * p, 1414 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> 1415 xd->plane[p].subsampling_y); 1416 } 1417 memcpy(xd->above_seg_context + mi_col, ctx->sa, 1418 sizeof(*xd->above_seg_context) * mi_width); 1419 memcpy(xd->left_seg_context + (mi_row & MAX_MIB_MASK), ctx->sl, 1420 sizeof(xd->left_seg_context[0]) * mi_height); 1421 xd->above_txfm_context = ctx->p_ta; 1422 xd->left_txfm_context = ctx->p_tl; 1423 memcpy(xd->above_txfm_context, ctx->ta, 1424 sizeof(*xd->above_txfm_context) * mi_width); 1425 memcpy(xd->left_txfm_context, ctx->tl, 1426 sizeof(*xd->left_txfm_context) * mi_height); 1427 } 1428 1429 static void save_context(const MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *ctx, 1430 int mi_row, int mi_col, BLOCK_SIZE bsize, 1431 const int num_planes) { 1432 const MACROBLOCKD *xd = &x->e_mbd; 1433 int p; 1434 const int num_4x4_blocks_wide = 1435 block_size_wide[bsize] >> tx_size_wide_log2[0]; 1436 const int num_4x4_blocks_high = 1437 block_size_high[bsize] >> tx_size_high_log2[0]; 1438 int mi_width = mi_size_wide[bsize]; 1439 int mi_height = mi_size_high[bsize]; 1440 1441 // buffer the above/left context information of the block in search. 1442 for (p = 0; p < num_planes; ++p) { 1443 int tx_col = mi_col; 1444 int tx_row = mi_row & MAX_MIB_MASK; 1445 memcpy(ctx->a + num_4x4_blocks_wide * p, 1446 xd->above_context[p] + (tx_col >> xd->plane[p].subsampling_x), 1447 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> 1448 xd->plane[p].subsampling_x); 1449 memcpy(ctx->l + num_4x4_blocks_high * p, 1450 xd->left_context[p] + (tx_row >> xd->plane[p].subsampling_y), 1451 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> 1452 xd->plane[p].subsampling_y); 1453 } 1454 memcpy(ctx->sa, xd->above_seg_context + mi_col, 1455 sizeof(*xd->above_seg_context) * mi_width); 1456 memcpy(ctx->sl, xd->left_seg_context + (mi_row & MAX_MIB_MASK), 1457 sizeof(xd->left_seg_context[0]) * mi_height); 1458 memcpy(ctx->ta, xd->above_txfm_context, 1459 sizeof(*xd->above_txfm_context) * mi_width); 1460 memcpy(ctx->tl, xd->left_txfm_context, 1461 sizeof(*xd->left_txfm_context) * mi_height); 1462 ctx->p_ta = xd->above_txfm_context; 1463 ctx->p_tl = xd->left_txfm_context; 1464 } 1465 1466 static void encode_b(const AV1_COMP *const cpi, TileDataEnc *tile_data, 1467 ThreadData *td, TOKENEXTRA **tp, int mi_row, int mi_col, 1468 RUN_TYPE dry_run, BLOCK_SIZE bsize, 1469 PARTITION_TYPE partition, 1470 const PICK_MODE_CONTEXT *const ctx, int *rate) { 1471 TileInfo *const tile = &tile_data->tile_info; 1472 MACROBLOCK *const x = &td->mb; 1473 MACROBLOCKD *xd = &x->e_mbd; 1474 1475 set_offsets(cpi, tile, x, mi_row, mi_col, bsize); 1476 MB_MODE_INFO *mbmi = xd->mi[0]; 1477 mbmi->partition = partition; 1478 update_state(cpi, tile_data, td, ctx, mi_row, mi_col, bsize, dry_run); 1479 if (cpi->oxcf.enable_tpl_model && cpi->oxcf.aq_mode == NO_AQ && 1480 cpi->oxcf.deltaq_mode == 0) { 1481 x->rdmult = x->cb_rdmult; 1482 } 1483 1484 if (!dry_run) av1_set_coeff_buffer(cpi, x, mi_row, mi_col); 1485 1486 encode_superblock(cpi, tile_data, td, tp, dry_run, mi_row, mi_col, bsize, 1487 rate); 1488 1489 if (!dry_run) { 1490 x->cb_offset += block_size_wide[bsize] * block_size_high[bsize]; 1491 if (bsize == cpi->common.seq_params.sb_size && mbmi->skip == 1 && 1492 cpi->common.delta_q_info.delta_lf_present_flag) { 1493 const int frame_lf_count = av1_num_planes(&cpi->common) > 1 1494 ? FRAME_LF_COUNT 1495 : FRAME_LF_COUNT - 2; 1496 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) 1497 mbmi->delta_lf[lf_id] = xd->delta_lf[lf_id]; 1498 mbmi->delta_lf_from_base = xd->delta_lf_from_base; 1499 } 1500 if (has_second_ref(mbmi)) { 1501 if (mbmi->compound_idx == 0 || 1502 mbmi->interinter_comp.type == COMPOUND_AVERAGE) 1503 mbmi->comp_group_idx = 0; 1504 else 1505 mbmi->comp_group_idx = 1; 1506 } 1507 update_stats(&cpi->common, tile_data, td, mi_row, mi_col); 1508 } 1509 } 1510 1511 static void encode_sb(const AV1_COMP *const cpi, ThreadData *td, 1512 TileDataEnc *tile_data, TOKENEXTRA **tp, int mi_row, 1513 int mi_col, RUN_TYPE dry_run, BLOCK_SIZE bsize, 1514 PC_TREE *pc_tree, int *rate) { 1515 const AV1_COMMON *const cm = &cpi->common; 1516 MACROBLOCK *const x = &td->mb; 1517 MACROBLOCKD *const xd = &x->e_mbd; 1518 const int hbs = mi_size_wide[bsize] / 2; 1519 const int is_partition_root = bsize >= BLOCK_8X8; 1520 const int ctx = is_partition_root 1521 ? partition_plane_context(xd, mi_row, mi_col, bsize) 1522 : -1; 1523 const PARTITION_TYPE partition = pc_tree->partitioning; 1524 const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); 1525 int quarter_step = mi_size_wide[bsize] / 4; 1526 int i; 1527 BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT); 1528 1529 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 1530 1531 if (!dry_run && ctx >= 0) { 1532 const int has_rows = (mi_row + hbs) < cm->mi_rows; 1533 const int has_cols = (mi_col + hbs) < cm->mi_cols; 1534 1535 if (has_rows && has_cols) { 1536 #if CONFIG_ENTROPY_STATS 1537 td->counts->partition[ctx][partition]++; 1538 #endif 1539 1540 if (tile_data->allow_update_cdf) { 1541 FRAME_CONTEXT *fc = xd->tile_ctx; 1542 update_cdf(fc->partition_cdf[ctx], partition, 1543 partition_cdf_length(bsize)); 1544 } 1545 } 1546 } 1547 1548 switch (partition) { 1549 case PARTITION_NONE: 1550 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize, 1551 partition, &pc_tree->none, rate); 1552 break; 1553 case PARTITION_VERT: 1554 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize, 1555 partition, &pc_tree->vertical[0], rate); 1556 if (mi_col + hbs < cm->mi_cols) { 1557 encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize, 1558 partition, &pc_tree->vertical[1], rate); 1559 } 1560 break; 1561 case PARTITION_HORZ: 1562 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize, 1563 partition, &pc_tree->horizontal[0], rate); 1564 if (mi_row + hbs < cm->mi_rows) { 1565 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize, 1566 partition, &pc_tree->horizontal[1], rate); 1567 } 1568 break; 1569 case PARTITION_SPLIT: 1570 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize, 1571 pc_tree->split[0], rate); 1572 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + hbs, dry_run, subsize, 1573 pc_tree->split[1], rate); 1574 encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col, dry_run, subsize, 1575 pc_tree->split[2], rate); 1576 encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col + hbs, dry_run, 1577 subsize, pc_tree->split[3], rate); 1578 break; 1579 1580 case PARTITION_HORZ_A: 1581 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2, 1582 partition, &pc_tree->horizontala[0], rate); 1583 encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2, 1584 partition, &pc_tree->horizontala[1], rate); 1585 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize, 1586 partition, &pc_tree->horizontala[2], rate); 1587 break; 1588 case PARTITION_HORZ_B: 1589 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize, 1590 partition, &pc_tree->horizontalb[0], rate); 1591 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2, 1592 partition, &pc_tree->horizontalb[1], rate); 1593 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run, 1594 bsize2, partition, &pc_tree->horizontalb[2], rate); 1595 break; 1596 case PARTITION_VERT_A: 1597 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2, 1598 partition, &pc_tree->verticala[0], rate); 1599 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2, 1600 partition, &pc_tree->verticala[1], rate); 1601 encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize, 1602 partition, &pc_tree->verticala[2], rate); 1603 1604 break; 1605 case PARTITION_VERT_B: 1606 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize, 1607 partition, &pc_tree->verticalb[0], rate); 1608 encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2, 1609 partition, &pc_tree->verticalb[1], rate); 1610 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run, 1611 bsize2, partition, &pc_tree->verticalb[2], rate); 1612 break; 1613 case PARTITION_HORZ_4: 1614 for (i = 0; i < 4; ++i) { 1615 int this_mi_row = mi_row + i * quarter_step; 1616 if (i > 0 && this_mi_row >= cm->mi_rows) break; 1617 1618 encode_b(cpi, tile_data, td, tp, this_mi_row, mi_col, dry_run, subsize, 1619 partition, &pc_tree->horizontal4[i], rate); 1620 } 1621 break; 1622 case PARTITION_VERT_4: 1623 for (i = 0; i < 4; ++i) { 1624 int this_mi_col = mi_col + i * quarter_step; 1625 if (i > 0 && this_mi_col >= cm->mi_cols) break; 1626 1627 encode_b(cpi, tile_data, td, tp, mi_row, this_mi_col, dry_run, subsize, 1628 partition, &pc_tree->vertical4[i], rate); 1629 } 1630 break; 1631 default: assert(0 && "Invalid partition type."); break; 1632 } 1633 1634 update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition); 1635 } 1636 1637 static void set_partial_sb_partition(const AV1_COMMON *const cm, 1638 MB_MODE_INFO *mi, int bh_in, int bw_in, 1639 int mi_rows_remaining, 1640 int mi_cols_remaining, BLOCK_SIZE bsize, 1641 MB_MODE_INFO **mib) { 1642 int bh = bh_in; 1643 int r, c; 1644 for (r = 0; r < cm->seq_params.mib_size; r += bh) { 1645 int bw = bw_in; 1646 for (c = 0; c < cm->seq_params.mib_size; c += bw) { 1647 const int index = r * cm->mi_stride + c; 1648 mib[index] = mi + index; 1649 mib[index]->sb_type = find_partition_size( 1650 bsize, mi_rows_remaining - r, mi_cols_remaining - c, &bh, &bw); 1651 } 1652 } 1653 } 1654 1655 // This function attempts to set all mode info entries in a given superblock 1656 // to the same block partition size. 1657 // However, at the bottom and right borders of the image the requested size 1658 // may not be allowed in which case this code attempts to choose the largest 1659 // allowable partition. 1660 static void set_fixed_partitioning(AV1_COMP *cpi, const TileInfo *const tile, 1661 MB_MODE_INFO **mib, int mi_row, int mi_col, 1662 BLOCK_SIZE bsize) { 1663 AV1_COMMON *const cm = &cpi->common; 1664 const int mi_rows_remaining = tile->mi_row_end - mi_row; 1665 const int mi_cols_remaining = tile->mi_col_end - mi_col; 1666 int block_row, block_col; 1667 MB_MODE_INFO *const mi_upper_left = cm->mi + mi_row * cm->mi_stride + mi_col; 1668 int bh = mi_size_high[bsize]; 1669 int bw = mi_size_wide[bsize]; 1670 1671 assert((mi_rows_remaining > 0) && (mi_cols_remaining > 0)); 1672 1673 // Apply the requested partition size to the SB if it is all "in image" 1674 if ((mi_cols_remaining >= cm->seq_params.mib_size) && 1675 (mi_rows_remaining >= cm->seq_params.mib_size)) { 1676 for (block_row = 0; block_row < cm->seq_params.mib_size; block_row += bh) { 1677 for (block_col = 0; block_col < cm->seq_params.mib_size; 1678 block_col += bw) { 1679 int index = block_row * cm->mi_stride + block_col; 1680 mib[index] = mi_upper_left + index; 1681 mib[index]->sb_type = bsize; 1682 } 1683 } 1684 } else { 1685 // Else this is a partial SB. 1686 set_partial_sb_partition(cm, mi_upper_left, bh, bw, mi_rows_remaining, 1687 mi_cols_remaining, bsize, mib); 1688 } 1689 } 1690 1691 static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, 1692 TileDataEnc *tile_data, MB_MODE_INFO **mib, 1693 TOKENEXTRA **tp, int mi_row, int mi_col, 1694 BLOCK_SIZE bsize, int *rate, int64_t *dist, 1695 int do_recon, PC_TREE *pc_tree) { 1696 AV1_COMMON *const cm = &cpi->common; 1697 const int num_planes = av1_num_planes(cm); 1698 TileInfo *const tile_info = &tile_data->tile_info; 1699 MACROBLOCK *const x = &td->mb; 1700 MACROBLOCKD *const xd = &x->e_mbd; 1701 const int bs = mi_size_wide[bsize]; 1702 const int hbs = bs / 2; 1703 int i; 1704 const int pl = (bsize >= BLOCK_8X8) 1705 ? partition_plane_context(xd, mi_row, mi_col, bsize) 1706 : 0; 1707 const PARTITION_TYPE partition = 1708 (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize) 1709 : PARTITION_NONE; 1710 const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); 1711 RD_SEARCH_MACROBLOCK_CONTEXT x_ctx; 1712 RD_STATS last_part_rdc, none_rdc, chosen_rdc; 1713 BLOCK_SIZE sub_subsize = BLOCK_4X4; 1714 int splits_below = 0; 1715 BLOCK_SIZE bs_type = mib[0]->sb_type; 1716 int do_partition_search = 1; 1717 PICK_MODE_CONTEXT *ctx_none = &pc_tree->none; 1718 1719 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 1720 1721 assert(mi_size_wide[bsize] == mi_size_high[bsize]); 1722 1723 av1_invalid_rd_stats(&last_part_rdc); 1724 av1_invalid_rd_stats(&none_rdc); 1725 av1_invalid_rd_stats(&chosen_rdc); 1726 1727 pc_tree->partitioning = partition; 1728 1729 xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col; 1730 xd->left_txfm_context = 1731 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); 1732 save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 1733 1734 if (bsize == BLOCK_16X16 && cpi->vaq_refresh) { 1735 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 1736 x->mb_energy = av1_log_block_var(cpi, x, bsize); 1737 } 1738 1739 if (do_partition_search && 1740 cpi->sf.partition_search_type == SEARCH_PARTITION && 1741 cpi->sf.adjust_partitioning_from_last_frame) { 1742 // Check if any of the sub blocks are further split. 1743 if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) { 1744 sub_subsize = get_partition_subsize(subsize, PARTITION_SPLIT); 1745 splits_below = 1; 1746 for (i = 0; i < 4; i++) { 1747 int jj = i >> 1, ii = i & 0x01; 1748 MB_MODE_INFO *this_mi = mib[jj * hbs * cm->mi_stride + ii * hbs]; 1749 if (this_mi && this_mi->sb_type >= sub_subsize) { 1750 splits_below = 0; 1751 } 1752 } 1753 } 1754 1755 // If partition is not none try none unless each of the 4 splits are split 1756 // even further.. 1757 if (partition != PARTITION_NONE && !splits_below && 1758 mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) { 1759 pc_tree->partitioning = PARTITION_NONE; 1760 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, 1761 PARTITION_NONE, bsize, ctx_none, INT64_MAX, 0); 1762 1763 if (none_rdc.rate < INT_MAX) { 1764 none_rdc.rate += x->partition_cost[pl][PARTITION_NONE]; 1765 none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist); 1766 } 1767 1768 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 1769 mib[0]->sb_type = bs_type; 1770 pc_tree->partitioning = partition; 1771 } 1772 } 1773 1774 switch (partition) { 1775 case PARTITION_NONE: 1776 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, 1777 PARTITION_NONE, bsize, ctx_none, INT64_MAX, 0); 1778 break; 1779 case PARTITION_HORZ: 1780 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, 1781 PARTITION_HORZ, subsize, &pc_tree->horizontal[0], INT64_MAX, 1782 0); 1783 if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && 1784 mi_row + hbs < cm->mi_rows) { 1785 RD_STATS tmp_rdc; 1786 const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0]; 1787 av1_init_rd_stats(&tmp_rdc); 1788 update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1); 1789 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, 1790 mi_col, subsize, NULL); 1791 pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc, 1792 PARTITION_HORZ, subsize, &pc_tree->horizontal[1], 1793 INT64_MAX, 0); 1794 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { 1795 av1_invalid_rd_stats(&last_part_rdc); 1796 break; 1797 } 1798 last_part_rdc.rate += tmp_rdc.rate; 1799 last_part_rdc.dist += tmp_rdc.dist; 1800 last_part_rdc.rdcost += tmp_rdc.rdcost; 1801 } 1802 break; 1803 case PARTITION_VERT: 1804 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, 1805 PARTITION_VERT, subsize, &pc_tree->vertical[0], INT64_MAX, 1806 0); 1807 if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && 1808 mi_col + hbs < cm->mi_cols) { 1809 RD_STATS tmp_rdc; 1810 const PICK_MODE_CONTEXT *const ctx_v = &pc_tree->vertical[0]; 1811 av1_init_rd_stats(&tmp_rdc); 1812 update_state(cpi, tile_data, td, ctx_v, mi_row, mi_col, subsize, 1); 1813 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, 1814 mi_col, subsize, NULL); 1815 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc, 1816 PARTITION_VERT, subsize, 1817 &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX, 0); 1818 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { 1819 av1_invalid_rd_stats(&last_part_rdc); 1820 break; 1821 } 1822 last_part_rdc.rate += tmp_rdc.rate; 1823 last_part_rdc.dist += tmp_rdc.dist; 1824 last_part_rdc.rdcost += tmp_rdc.rdcost; 1825 } 1826 break; 1827 case PARTITION_SPLIT: 1828 last_part_rdc.rate = 0; 1829 last_part_rdc.dist = 0; 1830 last_part_rdc.rdcost = 0; 1831 for (i = 0; i < 4; i++) { 1832 int x_idx = (i & 1) * hbs; 1833 int y_idx = (i >> 1) * hbs; 1834 int jj = i >> 1, ii = i & 0x01; 1835 RD_STATS tmp_rdc; 1836 if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) 1837 continue; 1838 1839 av1_init_rd_stats(&tmp_rdc); 1840 rd_use_partition(cpi, td, tile_data, 1841 mib + jj * hbs * cm->mi_stride + ii * hbs, tp, 1842 mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate, 1843 &tmp_rdc.dist, i != 3, pc_tree->split[i]); 1844 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { 1845 av1_invalid_rd_stats(&last_part_rdc); 1846 break; 1847 } 1848 last_part_rdc.rate += tmp_rdc.rate; 1849 last_part_rdc.dist += tmp_rdc.dist; 1850 } 1851 break; 1852 case PARTITION_VERT_A: 1853 case PARTITION_VERT_B: 1854 case PARTITION_HORZ_A: 1855 case PARTITION_HORZ_B: 1856 case PARTITION_HORZ_4: 1857 case PARTITION_VERT_4: 1858 assert(0 && "Cannot handle extended partition types"); 1859 default: assert(0); break; 1860 } 1861 1862 if (last_part_rdc.rate < INT_MAX) { 1863 last_part_rdc.rate += x->partition_cost[pl][partition]; 1864 last_part_rdc.rdcost = 1865 RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist); 1866 } 1867 1868 if (do_partition_search && cpi->sf.adjust_partitioning_from_last_frame && 1869 cpi->sf.partition_search_type == SEARCH_PARTITION && 1870 partition != PARTITION_SPLIT && bsize > BLOCK_8X8 && 1871 (mi_row + bs < cm->mi_rows || mi_row + hbs == cm->mi_rows) && 1872 (mi_col + bs < cm->mi_cols || mi_col + hbs == cm->mi_cols)) { 1873 BLOCK_SIZE split_subsize = get_partition_subsize(bsize, PARTITION_SPLIT); 1874 chosen_rdc.rate = 0; 1875 chosen_rdc.dist = 0; 1876 1877 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 1878 pc_tree->partitioning = PARTITION_SPLIT; 1879 1880 // Split partition. 1881 for (i = 0; i < 4; i++) { 1882 int x_idx = (i & 1) * hbs; 1883 int y_idx = (i >> 1) * hbs; 1884 RD_STATS tmp_rdc; 1885 1886 if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) 1887 continue; 1888 1889 save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 1890 pc_tree->split[i]->partitioning = PARTITION_NONE; 1891 pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, &tmp_rdc, 1892 PARTITION_SPLIT, split_subsize, &pc_tree->split[i]->none, 1893 INT64_MAX, 0); 1894 1895 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 1896 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { 1897 av1_invalid_rd_stats(&chosen_rdc); 1898 break; 1899 } 1900 1901 chosen_rdc.rate += tmp_rdc.rate; 1902 chosen_rdc.dist += tmp_rdc.dist; 1903 1904 if (i != 3) 1905 encode_sb(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, 1906 OUTPUT_ENABLED, split_subsize, pc_tree->split[i], NULL); 1907 1908 chosen_rdc.rate += x->partition_cost[pl][PARTITION_NONE]; 1909 } 1910 if (chosen_rdc.rate < INT_MAX) { 1911 chosen_rdc.rate += x->partition_cost[pl][PARTITION_SPLIT]; 1912 chosen_rdc.rdcost = RDCOST(x->rdmult, chosen_rdc.rate, chosen_rdc.dist); 1913 } 1914 } 1915 1916 // If last_part is better set the partitioning to that. 1917 if (last_part_rdc.rdcost < chosen_rdc.rdcost) { 1918 mib[0]->sb_type = bsize; 1919 if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition; 1920 chosen_rdc = last_part_rdc; 1921 } 1922 // If none was better set the partitioning to that. 1923 if (none_rdc.rdcost < chosen_rdc.rdcost) { 1924 if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; 1925 chosen_rdc = none_rdc; 1926 } 1927 1928 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 1929 1930 // We must have chosen a partitioning and encoding or we'll fail later on. 1931 // No other opportunities for success. 1932 if (bsize == cm->seq_params.sb_size) 1933 assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX); 1934 1935 if (do_recon) { 1936 if (bsize == cm->seq_params.sb_size) { 1937 // NOTE: To get estimate for rate due to the tokens, use: 1938 // int rate_coeffs = 0; 1939 // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS, 1940 // bsize, pc_tree, &rate_coeffs); 1941 x->cb_offset = 0; 1942 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize, 1943 pc_tree, NULL); 1944 } else { 1945 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize, 1946 pc_tree, NULL); 1947 } 1948 } 1949 1950 *rate = chosen_rdc.rate; 1951 *dist = chosen_rdc.dist; 1952 } 1953 1954 // TODO(kyslov): now this is very similar to rd_use_partition (except that 1955 // doesn't do extra search arounf suggested partitioning) 1956 // consider passing a flag to select non-rd path (similar to 1957 // encode_sb_row) 1958 static void nonrd_use_partition(AV1_COMP *cpi, ThreadData *td, 1959 TileDataEnc *tile_data, MB_MODE_INFO **mib, 1960 TOKENEXTRA **tp, int mi_row, int mi_col, 1961 BLOCK_SIZE bsize, int *rate, int64_t *dist, 1962 int do_recon, PC_TREE *pc_tree) { 1963 AV1_COMMON *const cm = &cpi->common; 1964 const int num_planes = av1_num_planes(cm); 1965 TileInfo *const tile_info = &tile_data->tile_info; 1966 MACROBLOCK *const x = &td->mb; 1967 MACROBLOCKD *const xd = &x->e_mbd; 1968 const int bs = mi_size_wide[bsize]; 1969 const int hbs = bs / 2; 1970 int i; 1971 const int pl = (bsize >= BLOCK_8X8) 1972 ? partition_plane_context(xd, mi_row, mi_col, bsize) 1973 : 0; 1974 const PARTITION_TYPE partition = 1975 (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize) 1976 : PARTITION_NONE; 1977 const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); 1978 RD_SEARCH_MACROBLOCK_CONTEXT x_ctx; 1979 RD_STATS last_part_rdc; 1980 PICK_MODE_CONTEXT *ctx_none = &pc_tree->none; 1981 1982 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 1983 1984 assert(mi_size_wide[bsize] == mi_size_high[bsize]); 1985 1986 av1_invalid_rd_stats(&last_part_rdc); 1987 1988 pc_tree->partitioning = partition; 1989 1990 xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col; 1991 xd->left_txfm_context = 1992 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); 1993 save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 1994 1995 if (bsize == BLOCK_16X16 && cpi->vaq_refresh) { 1996 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 1997 x->mb_energy = av1_log_block_var(cpi, x, bsize); 1998 } 1999 2000 switch (partition) { 2001 case PARTITION_NONE: 2002 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, 2003 PARTITION_NONE, bsize, ctx_none, INT64_MAX, 1); 2004 break; 2005 case PARTITION_HORZ: 2006 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, 2007 PARTITION_HORZ, subsize, &pc_tree->horizontal[0], INT64_MAX, 2008 1); 2009 if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && 2010 mi_row + hbs < cm->mi_rows) { 2011 RD_STATS tmp_rdc; 2012 const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0]; 2013 av1_init_rd_stats(&tmp_rdc); 2014 update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1); 2015 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, 2016 mi_col, subsize, NULL); 2017 pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc, 2018 PARTITION_HORZ, subsize, &pc_tree->horizontal[1], 2019 INT64_MAX, 1); 2020 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { 2021 av1_invalid_rd_stats(&last_part_rdc); 2022 break; 2023 } 2024 last_part_rdc.rate += tmp_rdc.rate; 2025 last_part_rdc.dist += tmp_rdc.dist; 2026 last_part_rdc.rdcost += tmp_rdc.rdcost; 2027 } 2028 break; 2029 case PARTITION_VERT: 2030 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, 2031 PARTITION_VERT, subsize, &pc_tree->vertical[0], INT64_MAX, 2032 1); 2033 if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && 2034 mi_col + hbs < cm->mi_cols) { 2035 RD_STATS tmp_rdc; 2036 const PICK_MODE_CONTEXT *const ctx_v = &pc_tree->vertical[0]; 2037 av1_init_rd_stats(&tmp_rdc); 2038 update_state(cpi, tile_data, td, ctx_v, mi_row, mi_col, subsize, 1); 2039 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, 2040 mi_col, subsize, NULL); 2041 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc, 2042 PARTITION_VERT, subsize, 2043 &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX, 1); 2044 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { 2045 av1_invalid_rd_stats(&last_part_rdc); 2046 break; 2047 } 2048 last_part_rdc.rate += tmp_rdc.rate; 2049 last_part_rdc.dist += tmp_rdc.dist; 2050 last_part_rdc.rdcost += tmp_rdc.rdcost; 2051 } 2052 break; 2053 case PARTITION_SPLIT: 2054 last_part_rdc.rate = 0; 2055 last_part_rdc.dist = 0; 2056 last_part_rdc.rdcost = 0; 2057 for (i = 0; i < 4; i++) { 2058 int x_idx = (i & 1) * hbs; 2059 int y_idx = (i >> 1) * hbs; 2060 int jj = i >> 1, ii = i & 0x01; 2061 RD_STATS tmp_rdc; 2062 if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) 2063 continue; 2064 2065 av1_init_rd_stats(&tmp_rdc); 2066 nonrd_use_partition( 2067 cpi, td, tile_data, mib + jj * hbs * cm->mi_stride + ii * hbs, tp, 2068 mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate, 2069 &tmp_rdc.dist, i != 3, pc_tree->split[i]); 2070 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { 2071 av1_invalid_rd_stats(&last_part_rdc); 2072 break; 2073 } 2074 last_part_rdc.rate += tmp_rdc.rate; 2075 last_part_rdc.dist += tmp_rdc.dist; 2076 } 2077 break; 2078 case PARTITION_VERT_A: 2079 case PARTITION_VERT_B: 2080 case PARTITION_HORZ_A: 2081 case PARTITION_HORZ_B: 2082 case PARTITION_HORZ_4: 2083 case PARTITION_VERT_4: 2084 assert(0 && "Cannot handle extended partition types"); 2085 default: assert(0); break; 2086 } 2087 2088 if (last_part_rdc.rate < INT_MAX) { 2089 last_part_rdc.rate += x->partition_cost[pl][partition]; 2090 last_part_rdc.rdcost = 2091 RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist); 2092 } 2093 2094 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 2095 2096 // We must have chosen a partitioning and encoding or we'll fail later on. 2097 // No other opportunities for success. 2098 if (bsize == cm->seq_params.sb_size) 2099 assert(last_part_rdc.rate < INT_MAX && last_part_rdc.dist < INT64_MAX); 2100 2101 if (do_recon) { 2102 if (bsize == cm->seq_params.sb_size) { 2103 // NOTE: To get estimate for rate due to the tokens, use: 2104 // int rate_coeffs = 0; 2105 // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS, 2106 // bsize, pc_tree, &rate_coeffs); 2107 x->cb_offset = 0; 2108 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize, 2109 pc_tree, NULL); 2110 } else { 2111 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize, 2112 pc_tree, NULL); 2113 } 2114 } 2115 2116 *rate = last_part_rdc.rate; 2117 *dist = last_part_rdc.dist; 2118 } 2119 2120 // Checks to see if a super block is on a horizontal image edge. 2121 // In most cases this is the "real" edge unless there are formatting 2122 // bars embedded in the stream. 2123 static int active_h_edge(const AV1_COMP *cpi, int mi_row, int mi_step) { 2124 int top_edge = 0; 2125 int bottom_edge = cpi->common.mi_rows; 2126 int is_active_h_edge = 0; 2127 2128 // For two pass account for any formatting bars detected. 2129 if (cpi->oxcf.pass == 2) { 2130 const TWO_PASS *const twopass = &cpi->twopass; 2131 2132 // The inactive region is specified in MBs not mi units. 2133 // The image edge is in the following MB row. 2134 top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2); 2135 2136 bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2); 2137 bottom_edge = AOMMAX(top_edge, bottom_edge); 2138 } 2139 2140 if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) || 2141 ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) { 2142 is_active_h_edge = 1; 2143 } 2144 return is_active_h_edge; 2145 } 2146 2147 // Checks to see if a super block is on a vertical image edge. 2148 // In most cases this is the "real" edge unless there are formatting 2149 // bars embedded in the stream. 2150 static int active_v_edge(const AV1_COMP *cpi, int mi_col, int mi_step) { 2151 int left_edge = 0; 2152 int right_edge = cpi->common.mi_cols; 2153 int is_active_v_edge = 0; 2154 2155 // For two pass account for any formatting bars detected. 2156 if (cpi->oxcf.pass == 2) { 2157 const TWO_PASS *const twopass = &cpi->twopass; 2158 2159 // The inactive region is specified in MBs not mi units. 2160 // The image edge is in the following MB row. 2161 left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2); 2162 2163 right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2); 2164 right_edge = AOMMAX(left_edge, right_edge); 2165 } 2166 2167 if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) || 2168 ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) { 2169 is_active_v_edge = 1; 2170 } 2171 return is_active_v_edge; 2172 } 2173 2174 static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { 2175 memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv)); 2176 } 2177 2178 static INLINE void load_pred_mv(MACROBLOCK *x, 2179 const PICK_MODE_CONTEXT *const ctx) { 2180 memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv)); 2181 } 2182 2183 // Try searching for an encoding for the given subblock. Returns zero if the 2184 // rdcost is already too high (to tell the caller not to bother searching for 2185 // encodings of further subblocks) 2186 static int rd_try_subblock(AV1_COMP *const cpi, ThreadData *td, 2187 TileDataEnc *tile_data, TOKENEXTRA **tp, int is_last, 2188 int mi_row, int mi_col, BLOCK_SIZE subsize, 2189 RD_STATS *best_rdc, RD_STATS *sum_rdc, 2190 RD_STATS *this_rdc, PARTITION_TYPE partition, 2191 PICK_MODE_CONTEXT *prev_ctx, 2192 PICK_MODE_CONTEXT *this_ctx) { 2193 #define RTS_X_RATE_NOCOEF_ARG 2194 #define RTS_MAX_RDCOST best_rdc->rdcost 2195 2196 MACROBLOCK *const x = &td->mb; 2197 2198 if (cpi->sf.adaptive_motion_search) load_pred_mv(x, prev_ctx); 2199 2200 const int64_t rdcost_remaining = best_rdc->rdcost == INT64_MAX 2201 ? INT64_MAX 2202 : (best_rdc->rdcost - sum_rdc->rdcost); 2203 2204 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, this_rdc, 2205 RTS_X_RATE_NOCOEF_ARG partition, subsize, this_ctx, 2206 rdcost_remaining, 0); 2207 2208 if (this_rdc->rate == INT_MAX) { 2209 sum_rdc->rdcost = INT64_MAX; 2210 } else { 2211 sum_rdc->rate += this_rdc->rate; 2212 sum_rdc->dist += this_rdc->dist; 2213 sum_rdc->rdcost += this_rdc->rdcost; 2214 } 2215 2216 if (sum_rdc->rdcost >= RTS_MAX_RDCOST) return 0; 2217 2218 if (!is_last) { 2219 update_state(cpi, tile_data, td, this_ctx, mi_row, mi_col, subsize, 1); 2220 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, 2221 subsize, NULL); 2222 } 2223 2224 return 1; 2225 2226 #undef RTS_X_RATE_NOCOEF_ARG 2227 #undef RTS_MAX_RDCOST 2228 } 2229 2230 static void rd_test_partition3(AV1_COMP *const cpi, ThreadData *td, 2231 TileDataEnc *tile_data, TOKENEXTRA **tp, 2232 PC_TREE *pc_tree, RD_STATS *best_rdc, 2233 PICK_MODE_CONTEXT ctxs[3], 2234 PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, 2235 BLOCK_SIZE bsize, PARTITION_TYPE partition, 2236 int mi_row0, int mi_col0, BLOCK_SIZE subsize0, 2237 int mi_row1, int mi_col1, BLOCK_SIZE subsize1, 2238 int mi_row2, int mi_col2, BLOCK_SIZE subsize2) { 2239 MACROBLOCK *const x = &td->mb; 2240 MACROBLOCKD *const xd = &x->e_mbd; 2241 RD_STATS sum_rdc, this_rdc; 2242 #define RTP_STX_TRY_ARGS 2243 int pl = partition_plane_context(xd, mi_row, mi_col, bsize); 2244 av1_init_rd_stats(&sum_rdc); 2245 sum_rdc.rate = x->partition_cost[pl][partition]; 2246 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); 2247 if (!rd_try_subblock(cpi, td, tile_data, tp, 0, mi_row0, mi_col0, subsize0, 2248 best_rdc, &sum_rdc, &this_rdc, 2249 RTP_STX_TRY_ARGS partition, ctx, &ctxs[0])) 2250 return; 2251 2252 if (!rd_try_subblock(cpi, td, tile_data, tp, 0, mi_row1, mi_col1, subsize1, 2253 best_rdc, &sum_rdc, &this_rdc, 2254 RTP_STX_TRY_ARGS partition, &ctxs[0], &ctxs[1])) 2255 return; 2256 2257 // With the new layout of mixed partitions for PARTITION_HORZ_B and 2258 // PARTITION_VERT_B, the last subblock might start past halfway through the 2259 // main block, so we might signal it even though the subblock lies strictly 2260 // outside the image. In that case, we won't spend any bits coding it and the 2261 // difference (obviously) doesn't contribute to the error. 2262 const int try_block2 = 1; 2263 if (try_block2 && 2264 !rd_try_subblock(cpi, td, tile_data, tp, 1, mi_row2, mi_col2, subsize2, 2265 best_rdc, &sum_rdc, &this_rdc, 2266 RTP_STX_TRY_ARGS partition, &ctxs[1], &ctxs[2])) 2267 return; 2268 2269 if (sum_rdc.rdcost >= best_rdc->rdcost) return; 2270 2271 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); 2272 2273 if (sum_rdc.rdcost >= best_rdc->rdcost) return; 2274 2275 *best_rdc = sum_rdc; 2276 pc_tree->partitioning = partition; 2277 2278 #undef RTP_STX_TRY_ARGS 2279 } 2280 2281 static void reset_partition(PC_TREE *pc_tree, BLOCK_SIZE bsize) { 2282 pc_tree->partitioning = PARTITION_NONE; 2283 pc_tree->cb_search_range = SEARCH_FULL_PLANE; 2284 pc_tree->none.skip = 0; 2285 2286 pc_tree->pc_tree_stats.valid = 0; 2287 pc_tree->pc_tree_stats.split = 0; 2288 pc_tree->pc_tree_stats.skip = 0; 2289 pc_tree->pc_tree_stats.rdcost = INT64_MAX; 2290 2291 for (int i = 0; i < 4; i++) { 2292 pc_tree->pc_tree_stats.sub_block_split[i] = 0; 2293 pc_tree->pc_tree_stats.sub_block_skip[i] = 0; 2294 pc_tree->pc_tree_stats.sub_block_rdcost[i] = INT64_MAX; 2295 } 2296 2297 if (bsize >= BLOCK_8X8) { 2298 BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); 2299 for (int idx = 0; idx < 4; ++idx) 2300 reset_partition(pc_tree->split[idx], subsize); 2301 } 2302 } 2303 2304 static void rd_pick_sqr_partition(AV1_COMP *const cpi, ThreadData *td, 2305 TileDataEnc *tile_data, TOKENEXTRA **tp, 2306 int mi_row, int mi_col, BLOCK_SIZE bsize, 2307 RD_STATS *rd_cost, int64_t best_rd, 2308 PC_TREE *pc_tree, int64_t *none_rd) { 2309 const AV1_COMMON *const cm = &cpi->common; 2310 TileInfo *const tile_info = &tile_data->tile_info; 2311 MACROBLOCK *const x = &td->mb; 2312 MACROBLOCKD *const xd = &x->e_mbd; 2313 const int mi_step = mi_size_wide[bsize] / 2; 2314 RD_SEARCH_MACROBLOCK_CONTEXT x_ctx; 2315 const TOKENEXTRA *const tp_orig = *tp; 2316 PICK_MODE_CONTEXT *ctx_none = &pc_tree->none; 2317 int tmp_partition_cost[PARTITION_TYPES]; 2318 BLOCK_SIZE subsize; 2319 RD_STATS this_rdc, sum_rdc, best_rdc, pn_rdc; 2320 const int bsize_at_least_8x8 = (bsize >= BLOCK_8X8); 2321 int do_square_split = bsize_at_least_8x8; 2322 const int pl = bsize_at_least_8x8 2323 ? partition_plane_context(xd, mi_row, mi_col, bsize) 2324 : 0; 2325 const int *partition_cost = 2326 pl >= 0 ? x->partition_cost[pl] : x->partition_cost[0]; 2327 const int num_planes = av1_num_planes(cm); 2328 2329 int64_t split_rd[4] = { 0, 0, 0, 0 }; 2330 2331 // Override skipping rectangular partition operations for edge blocks 2332 const int has_rows = (mi_row + mi_step < cm->mi_rows); 2333 const int has_cols = (mi_col + mi_step < cm->mi_cols); 2334 2335 if (none_rd) *none_rd = 0; 2336 2337 int partition_none_allowed = has_rows && has_cols; 2338 2339 (void)*tp_orig; 2340 (void)split_rd; 2341 2342 if (best_rd < 0) { 2343 pc_tree->none.rdcost = INT64_MAX; 2344 pc_tree->none.skip = 0; 2345 av1_invalid_rd_stats(rd_cost); 2346 return; 2347 } 2348 pc_tree->pc_tree_stats.valid = 1; 2349 2350 // Override partition costs at the edges of the frame in the same 2351 // way as in read_partition (see decodeframe.c) 2352 if (!(has_rows && has_cols)) { 2353 assert(bsize_at_least_8x8 && pl >= 0); 2354 const aom_cdf_prob *partition_cdf = cm->fc->partition_cdf[pl]; 2355 for (int i = 0; i < PARTITION_TYPES; ++i) tmp_partition_cost[i] = INT_MAX; 2356 if (has_cols) { 2357 // At the bottom, the two possibilities are HORZ and SPLIT 2358 aom_cdf_prob bot_cdf[2]; 2359 partition_gather_vert_alike(bot_cdf, partition_cdf, bsize); 2360 static const int bot_inv_map[2] = { PARTITION_HORZ, PARTITION_SPLIT }; 2361 av1_cost_tokens_from_cdf(tmp_partition_cost, bot_cdf, bot_inv_map); 2362 } else if (has_rows) { 2363 // At the right, the two possibilities are VERT and SPLIT 2364 aom_cdf_prob rhs_cdf[2]; 2365 partition_gather_horz_alike(rhs_cdf, partition_cdf, bsize); 2366 static const int rhs_inv_map[2] = { PARTITION_VERT, PARTITION_SPLIT }; 2367 av1_cost_tokens_from_cdf(tmp_partition_cost, rhs_cdf, rhs_inv_map); 2368 } else { 2369 // At the bottom right, we always split 2370 tmp_partition_cost[PARTITION_SPLIT] = 0; 2371 } 2372 2373 partition_cost = tmp_partition_cost; 2374 } 2375 2376 #ifndef NDEBUG 2377 // Nothing should rely on the default value of this array (which is just 2378 // leftover from encoding the previous block. Setting it to fixed pattern 2379 // when debugging. 2380 // bit 0, 1, 2 are blk_skip of each plane 2381 // bit 4, 5, 6 are initialization checking of each plane 2382 memset(x->blk_skip, 0x77, sizeof(x->blk_skip)); 2383 #endif // NDEBUG 2384 2385 assert(mi_size_wide[bsize] == mi_size_high[bsize]); 2386 2387 av1_init_rd_stats(&this_rdc); 2388 av1_init_rd_stats(&sum_rdc); 2389 av1_invalid_rd_stats(&best_rdc); 2390 best_rdc.rdcost = best_rd; 2391 2392 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 2393 2394 if (bsize == BLOCK_16X16 && cpi->vaq_refresh) 2395 x->mb_energy = av1_log_block_var(cpi, x, bsize); 2396 2397 xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col; 2398 xd->left_txfm_context = 2399 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); 2400 save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 2401 2402 #if CONFIG_DIST_8X8 2403 if (x->using_dist_8x8) { 2404 if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8) 2405 do_square_split = 0; 2406 } 2407 #endif 2408 2409 // PARTITION_NONE 2410 if (partition_none_allowed) { 2411 int pt_cost = 0; 2412 if (bsize_at_least_8x8) { 2413 pc_tree->partitioning = PARTITION_NONE; 2414 pt_cost = partition_cost[PARTITION_NONE] < INT_MAX 2415 ? partition_cost[PARTITION_NONE] 2416 : 0; 2417 } 2418 const int64_t partition_rd_cost = RDCOST(x->rdmult, pt_cost, 0); 2419 const int64_t best_remain_rdcost = 2420 best_rdc.rdcost == INT64_MAX ? INT64_MAX 2421 : (best_rdc.rdcost - partition_rd_cost); 2422 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_NONE, 2423 bsize, ctx_none, best_remain_rdcost, 0); 2424 2425 pc_tree->pc_tree_stats.rdcost = ctx_none->rdcost; 2426 pc_tree->pc_tree_stats.skip = ctx_none->skip; 2427 2428 if (none_rd) *none_rd = this_rdc.rdcost; 2429 if (this_rdc.rate != INT_MAX) { 2430 if (bsize_at_least_8x8) { 2431 this_rdc.rate += pt_cost; 2432 this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist); 2433 } 2434 2435 if (this_rdc.rdcost < best_rdc.rdcost) { 2436 // Adjust dist breakout threshold according to the partition size. 2437 const int64_t dist_breakout_thr = 2438 cpi->sf.partition_search_breakout_dist_thr >> 2439 ((2 * (MAX_SB_SIZE_LOG2 - 2)) - 2440 (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize])); 2441 const int rate_breakout_thr = 2442 cpi->sf.partition_search_breakout_rate_thr * 2443 num_pels_log2_lookup[bsize]; 2444 2445 best_rdc = this_rdc; 2446 if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE; 2447 2448 pc_tree->cb_search_range = SEARCH_FULL_PLANE; 2449 2450 if (!x->e_mbd.lossless[xd->mi[0]->segment_id] && ctx_none->skippable) { 2451 const int use_ml_based_breakout = 2452 bsize <= cpi->sf.use_square_partition_only_threshold && 2453 bsize > BLOCK_4X4 && xd->bd == 8; 2454 2455 // TODO(anyone): Currently this is using the same model and threshold 2456 // values as in rd_pick_partition. Retraining the model and tuning the 2457 // threshold values might be helpful to improve the speed. 2458 if (use_ml_based_breakout) { 2459 if (ml_predict_breakout(cpi, bsize, x, &this_rdc, 2460 x->source_variance)) { 2461 do_square_split = 0; 2462 } 2463 } 2464 2465 // If all y, u, v transform blocks in this partition are skippable, 2466 // and the dist & rate are within the thresholds, the partition search 2467 // is terminated for current branch of the partition search tree. The 2468 // dist & rate thresholds are set to 0 at speed 0 to disable the early 2469 // termination at that speed. 2470 if (best_rdc.dist < dist_breakout_thr && 2471 best_rdc.rate < rate_breakout_thr) { 2472 do_square_split = 0; 2473 } 2474 } 2475 2476 if (cpi->sf.firstpass_simple_motion_search_early_term && 2477 cm->show_frame && bsize <= BLOCK_32X32 && bsize >= BLOCK_8X8 && 2478 !frame_is_intra_only(cm) && mi_row + mi_step < cm->mi_rows && 2479 mi_col + mi_step < cm->mi_cols && this_rdc.rdcost < INT64_MAX && 2480 this_rdc.rdcost >= 0 && this_rdc.rate < INT_MAX && 2481 this_rdc.rate >= 0 && do_square_split) { 2482 av1_firstpass_simple_motion_search_early_term( 2483 cpi, x, pc_tree, mi_row, mi_col, bsize, &this_rdc, 2484 &do_square_split); 2485 } 2486 } 2487 } 2488 2489 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 2490 } 2491 2492 // store estimated motion vector 2493 if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx_none); 2494 2495 int64_t temp_best_rdcost = best_rdc.rdcost; 2496 pn_rdc = best_rdc; 2497 2498 // PARTITION_SPLIT 2499 if (do_square_split) { 2500 int reached_last_index = 0; 2501 subsize = get_partition_subsize(bsize, PARTITION_SPLIT); 2502 int idx; 2503 2504 sum_rdc.rate = partition_cost[PARTITION_SPLIT]; 2505 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); 2506 2507 for (idx = 0; idx < 4 && sum_rdc.rdcost < temp_best_rdcost; ++idx) { 2508 const int x_idx = (idx & 1) * mi_step; 2509 const int y_idx = (idx >> 1) * mi_step; 2510 2511 if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) 2512 continue; 2513 2514 if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none); 2515 2516 pc_tree->split[idx]->index = idx; 2517 int64_t *p_split_rd = &split_rd[idx]; 2518 const int64_t best_remain_rdcost = 2519 (temp_best_rdcost == INT64_MAX) ? INT64_MAX 2520 : (temp_best_rdcost - sum_rdc.rdcost); 2521 rd_pick_sqr_partition( 2522 cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize, 2523 &this_rdc, best_remain_rdcost, pc_tree->split[idx], p_split_rd); 2524 2525 pc_tree->pc_tree_stats.sub_block_rdcost[idx] = this_rdc.rdcost; 2526 pc_tree->pc_tree_stats.sub_block_skip[idx] = 2527 pc_tree->split[idx]->none.skip; 2528 2529 if (this_rdc.rate == INT_MAX) { 2530 sum_rdc.rdcost = INT64_MAX; 2531 break; 2532 } else { 2533 sum_rdc.rate += this_rdc.rate; 2534 sum_rdc.dist += this_rdc.dist; 2535 sum_rdc.rdcost += this_rdc.rdcost; 2536 } 2537 } 2538 reached_last_index = (idx == 4); 2539 2540 if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) { 2541 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); 2542 2543 if (sum_rdc.rdcost < best_rdc.rdcost) { 2544 best_rdc = sum_rdc; 2545 pc_tree->partitioning = PARTITION_SPLIT; 2546 } 2547 } 2548 2549 int has_split = 0; 2550 if (pc_tree->partitioning == PARTITION_SPLIT) { 2551 for (int cb_idx = 0; cb_idx <= AOMMIN(idx, 3); ++cb_idx) { 2552 if (pc_tree->split[cb_idx]->partitioning == PARTITION_SPLIT) 2553 ++has_split; 2554 } 2555 2556 if (has_split >= 3 || sum_rdc.rdcost < (pn_rdc.rdcost >> 1)) { 2557 pc_tree->cb_search_range = SPLIT_PLANE; 2558 } 2559 } 2560 2561 if (pc_tree->partitioning == PARTITION_NONE) { 2562 pc_tree->cb_search_range = SEARCH_SAME_PLANE; 2563 if (pn_rdc.dist <= sum_rdc.dist) 2564 pc_tree->cb_search_range = NONE_PARTITION_PLANE; 2565 } 2566 2567 if (pn_rdc.rate == INT_MAX) pc_tree->cb_search_range = NONE_PARTITION_PLANE; 2568 2569 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 2570 } // if (do_split) 2571 2572 pc_tree->pc_tree_stats.split = pc_tree->partitioning == PARTITION_SPLIT; 2573 if (do_square_split) { 2574 for (int i = 0; i < 4; ++i) { 2575 pc_tree->pc_tree_stats.sub_block_split[i] = 2576 pc_tree->split[i]->partitioning == PARTITION_SPLIT; 2577 } 2578 } 2579 2580 // TODO(jbb): This code added so that we avoid static analysis 2581 // warning related to the fact that best_rd isn't used after this 2582 // point. This code should be refactored so that the duplicate 2583 // checks occur in some sub function and thus are used... 2584 (void)best_rd; 2585 *rd_cost = best_rdc; 2586 2587 if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && 2588 pc_tree->index != 3) { 2589 if (bsize == cm->seq_params.sb_size) { 2590 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 2591 } else { 2592 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize, 2593 pc_tree, NULL); 2594 } 2595 } 2596 2597 if (bsize == cm->seq_params.sb_size) { 2598 assert(best_rdc.rate < INT_MAX); 2599 assert(best_rdc.dist < INT64_MAX); 2600 } else { 2601 assert(tp_orig == *tp); 2602 } 2603 } 2604 2605 // split_score indicates confidence of picking split partition; 2606 // none_score indicates confidence of picking none partition; 2607 #define FEATURE_SIZE 19 2608 static int ml_prune_2pass_split_partition(const PC_TREE_STATS *pc_tree_stats, 2609 BLOCK_SIZE bsize, int *split_score, 2610 int *none_score) { 2611 if (!pc_tree_stats->valid) return 0; 2612 const float *split_weights = NULL; 2613 const float *none_weights = NULL; 2614 switch (bsize) { 2615 case BLOCK_4X4: break; 2616 case BLOCK_8X8: 2617 split_weights = av1_2pass_split_partition_weights_8; 2618 none_weights = av1_2pass_none_partition_weights_8; 2619 break; 2620 case BLOCK_16X16: 2621 split_weights = av1_2pass_split_partition_weights_16; 2622 none_weights = av1_2pass_none_partition_weights_16; 2623 break; 2624 case BLOCK_32X32: 2625 split_weights = av1_2pass_split_partition_weights_32; 2626 none_weights = av1_2pass_none_partition_weights_32; 2627 break; 2628 case BLOCK_64X64: 2629 split_weights = av1_2pass_split_partition_weights_64; 2630 none_weights = av1_2pass_none_partition_weights_64; 2631 break; 2632 case BLOCK_128X128: 2633 split_weights = av1_2pass_split_partition_weights_128; 2634 none_weights = av1_2pass_none_partition_weights_128; 2635 break; 2636 default: assert(0 && "Unexpected bsize."); 2637 } 2638 if (!split_weights || !none_weights) return 0; 2639 2640 aom_clear_system_state(); 2641 2642 float features[FEATURE_SIZE]; 2643 int feature_index = 0; 2644 features[feature_index++] = (float)pc_tree_stats->split; 2645 features[feature_index++] = (float)pc_tree_stats->skip; 2646 const int rdcost = (int)AOMMIN(INT_MAX, pc_tree_stats->rdcost); 2647 const int rd_valid = rdcost > 0 && rdcost < 1000000000; 2648 features[feature_index++] = (float)rd_valid; 2649 for (int i = 0; i < 4; ++i) { 2650 features[feature_index++] = (float)pc_tree_stats->sub_block_split[i]; 2651 features[feature_index++] = (float)pc_tree_stats->sub_block_skip[i]; 2652 const int sub_rdcost = 2653 (int)AOMMIN(INT_MAX, pc_tree_stats->sub_block_rdcost[i]); 2654 const int sub_rd_valid = sub_rdcost > 0 && sub_rdcost < 1000000000; 2655 features[feature_index++] = (float)sub_rd_valid; 2656 // Ratio between the sub-block RD and the whole-block RD. 2657 float rd_ratio = 1.0f; 2658 if (rd_valid && sub_rd_valid && sub_rdcost < rdcost) 2659 rd_ratio = (float)sub_rdcost / (float)rdcost; 2660 features[feature_index++] = rd_ratio; 2661 } 2662 assert(feature_index == FEATURE_SIZE); 2663 2664 float score_1 = split_weights[FEATURE_SIZE]; 2665 float score_2 = none_weights[FEATURE_SIZE]; 2666 for (int i = 0; i < FEATURE_SIZE; ++i) { 2667 score_1 += features[i] * split_weights[i]; 2668 score_2 += features[i] * none_weights[i]; 2669 } 2670 *split_score = (int)(score_1 * 100); 2671 *none_score = (int)(score_2 * 100); 2672 return 1; 2673 } 2674 #undef FEATURE_SIZE 2675 2676 static void ml_prune_rect_partition(const AV1_COMP *const cpi, 2677 const MACROBLOCK *const x, BLOCK_SIZE bsize, 2678 int64_t best_rd, int64_t none_rd, 2679 int64_t *split_rd, 2680 int *const dst_prune_horz, 2681 int *const dst_prune_vert) { 2682 if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return; 2683 best_rd = AOMMAX(best_rd, 1); 2684 const NN_CONFIG *nn_config = NULL; 2685 const float prob_thresholds[5] = { 0.01f, 0.01f, 0.004f, 0.002f, 0.002f }; 2686 float cur_thresh = 0.0f; 2687 switch (bsize) { 2688 case BLOCK_8X8: 2689 nn_config = &av1_rect_partition_nnconfig_8; 2690 cur_thresh = prob_thresholds[0]; 2691 break; 2692 case BLOCK_16X16: 2693 nn_config = &av1_rect_partition_nnconfig_16; 2694 cur_thresh = prob_thresholds[1]; 2695 break; 2696 case BLOCK_32X32: 2697 nn_config = &av1_rect_partition_nnconfig_32; 2698 cur_thresh = prob_thresholds[2]; 2699 break; 2700 case BLOCK_64X64: 2701 nn_config = &av1_rect_partition_nnconfig_64; 2702 cur_thresh = prob_thresholds[3]; 2703 break; 2704 case BLOCK_128X128: 2705 nn_config = &av1_rect_partition_nnconfig_128; 2706 cur_thresh = prob_thresholds[4]; 2707 break; 2708 default: assert(0 && "Unexpected bsize."); 2709 } 2710 if (!nn_config) return; 2711 aom_clear_system_state(); 2712 2713 // 1. Compute input features 2714 float features[9]; 2715 2716 // RD cost ratios 2717 for (int i = 0; i < 5; i++) features[i] = 1.0f; 2718 if (none_rd > 0 && none_rd < 1000000000) 2719 features[0] = (float)none_rd / (float)best_rd; 2720 for (int i = 0; i < 4; i++) { 2721 if (split_rd[i] > 0 && split_rd[i] < 1000000000) 2722 features[1 + i] = (float)split_rd[i] / (float)best_rd; 2723 } 2724 2725 // Variance ratios 2726 const MACROBLOCKD *const xd = &x->e_mbd; 2727 int whole_block_variance; 2728 if (is_cur_buf_hbd(xd)) { 2729 whole_block_variance = av1_high_get_sby_perpixel_variance( 2730 cpi, &x->plane[0].src, bsize, xd->bd); 2731 } else { 2732 whole_block_variance = 2733 av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); 2734 } 2735 whole_block_variance = AOMMAX(whole_block_variance, 1); 2736 2737 int split_variance[4]; 2738 const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); 2739 struct buf_2d buf; 2740 buf.stride = x->plane[0].src.stride; 2741 const int bw = block_size_wide[bsize]; 2742 for (int i = 0; i < 4; ++i) { 2743 const int x_idx = (i & 1) * bw / 2; 2744 const int y_idx = (i >> 1) * bw / 2; 2745 buf.buf = x->plane[0].src.buf + x_idx + y_idx * buf.stride; 2746 if (is_cur_buf_hbd(xd)) { 2747 split_variance[i] = 2748 av1_high_get_sby_perpixel_variance(cpi, &buf, subsize, xd->bd); 2749 } else { 2750 split_variance[i] = av1_get_sby_perpixel_variance(cpi, &buf, subsize); 2751 } 2752 } 2753 2754 for (int i = 0; i < 4; i++) 2755 features[5 + i] = (float)split_variance[i] / (float)whole_block_variance; 2756 2757 // 2. Do the prediction and prune 0-2 partitions based on their probabilities 2758 float raw_scores[3] = { 0.0f }; 2759 av1_nn_predict(features, nn_config, raw_scores); 2760 aom_clear_system_state(); 2761 float probs[3] = { 0.0f }; 2762 av1_nn_softmax(raw_scores, probs, 3); 2763 2764 // probs[0] is the probability of the fact that both rectangular partitions 2765 // are worse than current best_rd 2766 if (probs[1] <= cur_thresh) (*dst_prune_horz) = 1; 2767 if (probs[2] <= cur_thresh) (*dst_prune_vert) = 1; 2768 } 2769 2770 // Use a ML model to predict if horz_a, horz_b, vert_a, and vert_b should be 2771 // considered. 2772 static void ml_prune_ab_partition(BLOCK_SIZE bsize, int part_ctx, int var_ctx, 2773 int64_t best_rd, int64_t horz_rd[2], 2774 int64_t vert_rd[2], int64_t split_rd[4], 2775 int *const horza_partition_allowed, 2776 int *const horzb_partition_allowed, 2777 int *const verta_partition_allowed, 2778 int *const vertb_partition_allowed) { 2779 if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return; 2780 const NN_CONFIG *nn_config = NULL; 2781 switch (bsize) { 2782 case BLOCK_8X8: nn_config = NULL; break; 2783 case BLOCK_16X16: nn_config = &av1_ab_partition_nnconfig_16; break; 2784 case BLOCK_32X32: nn_config = &av1_ab_partition_nnconfig_32; break; 2785 case BLOCK_64X64: nn_config = &av1_ab_partition_nnconfig_64; break; 2786 case BLOCK_128X128: nn_config = &av1_ab_partition_nnconfig_128; break; 2787 default: assert(0 && "Unexpected bsize."); 2788 } 2789 if (!nn_config) return; 2790 2791 aom_clear_system_state(); 2792 2793 // Generate features. 2794 float features[10]; 2795 int feature_index = 0; 2796 features[feature_index++] = (float)part_ctx; 2797 features[feature_index++] = (float)var_ctx; 2798 const int rdcost = (int)AOMMIN(INT_MAX, best_rd); 2799 int sub_block_rdcost[8] = { 0 }; 2800 int rd_index = 0; 2801 for (int i = 0; i < 2; ++i) { 2802 if (horz_rd[i] > 0 && horz_rd[i] < 1000000000) 2803 sub_block_rdcost[rd_index] = (int)horz_rd[i]; 2804 ++rd_index; 2805 } 2806 for (int i = 0; i < 2; ++i) { 2807 if (vert_rd[i] > 0 && vert_rd[i] < 1000000000) 2808 sub_block_rdcost[rd_index] = (int)vert_rd[i]; 2809 ++rd_index; 2810 } 2811 for (int i = 0; i < 4; ++i) { 2812 if (split_rd[i] > 0 && split_rd[i] < 1000000000) 2813 sub_block_rdcost[rd_index] = (int)split_rd[i]; 2814 ++rd_index; 2815 } 2816 for (int i = 0; i < 8; ++i) { 2817 // Ratio between the sub-block RD and the whole-block RD. 2818 float rd_ratio = 1.0f; 2819 if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost) 2820 rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost; 2821 features[feature_index++] = rd_ratio; 2822 } 2823 assert(feature_index == 10); 2824 2825 // Calculate scores using the NN model. 2826 float score[16] = { 0.0f }; 2827 av1_nn_predict(features, nn_config, score); 2828 aom_clear_system_state(); 2829 int int_score[16]; 2830 int max_score = -1000; 2831 for (int i = 0; i < 16; ++i) { 2832 int_score[i] = (int)(100 * score[i]); 2833 max_score = AOMMAX(int_score[i], max_score); 2834 } 2835 2836 // Make decisions based on the model scores. 2837 int thresh = max_score; 2838 switch (bsize) { 2839 case BLOCK_16X16: thresh -= 150; break; 2840 case BLOCK_32X32: thresh -= 100; break; 2841 default: break; 2842 } 2843 *horza_partition_allowed = 0; 2844 *horzb_partition_allowed = 0; 2845 *verta_partition_allowed = 0; 2846 *vertb_partition_allowed = 0; 2847 for (int i = 0; i < 16; ++i) { 2848 if (int_score[i] >= thresh) { 2849 if ((i >> 0) & 1) *horza_partition_allowed = 1; 2850 if ((i >> 1) & 1) *horzb_partition_allowed = 1; 2851 if ((i >> 2) & 1) *verta_partition_allowed = 1; 2852 if ((i >> 3) & 1) *vertb_partition_allowed = 1; 2853 } 2854 } 2855 } 2856 2857 #define FEATURES 18 2858 #define LABELS 4 2859 // Use a ML model to predict if horz4 and vert4 should be considered. 2860 static void ml_prune_4_partition(const AV1_COMP *const cpi, MACROBLOCK *const x, 2861 BLOCK_SIZE bsize, int part_ctx, 2862 int64_t best_rd, int64_t horz_rd[2], 2863 int64_t vert_rd[2], int64_t split_rd[4], 2864 int *const partition_horz4_allowed, 2865 int *const partition_vert4_allowed, 2866 unsigned int pb_source_variance, int mi_row, 2867 int mi_col) { 2868 if (best_rd >= 1000000000) return; 2869 const NN_CONFIG *nn_config = NULL; 2870 switch (bsize) { 2871 case BLOCK_16X16: nn_config = &av1_4_partition_nnconfig_16; break; 2872 case BLOCK_32X32: nn_config = &av1_4_partition_nnconfig_32; break; 2873 case BLOCK_64X64: nn_config = &av1_4_partition_nnconfig_64; break; 2874 default: assert(0 && "Unexpected bsize."); 2875 } 2876 if (!nn_config) return; 2877 2878 aom_clear_system_state(); 2879 2880 // Generate features. 2881 float features[FEATURES]; 2882 int feature_index = 0; 2883 features[feature_index++] = (float)part_ctx; 2884 features[feature_index++] = (float)get_unsigned_bits(pb_source_variance); 2885 2886 const int rdcost = (int)AOMMIN(INT_MAX, best_rd); 2887 int sub_block_rdcost[8] = { 0 }; 2888 int rd_index = 0; 2889 for (int i = 0; i < 2; ++i) { 2890 if (horz_rd[i] > 0 && horz_rd[i] < 1000000000) 2891 sub_block_rdcost[rd_index] = (int)horz_rd[i]; 2892 ++rd_index; 2893 } 2894 for (int i = 0; i < 2; ++i) { 2895 if (vert_rd[i] > 0 && vert_rd[i] < 1000000000) 2896 sub_block_rdcost[rd_index] = (int)vert_rd[i]; 2897 ++rd_index; 2898 } 2899 for (int i = 0; i < 4; ++i) { 2900 if (split_rd[i] > 0 && split_rd[i] < 1000000000) 2901 sub_block_rdcost[rd_index] = (int)split_rd[i]; 2902 ++rd_index; 2903 } 2904 for (int i = 0; i < 8; ++i) { 2905 // Ratio between the sub-block RD and the whole-block RD. 2906 float rd_ratio = 1.0f; 2907 if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost) 2908 rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost; 2909 features[feature_index++] = rd_ratio; 2910 } 2911 2912 // Get variance of the 1:4 and 4:1 sub-blocks. 2913 unsigned int horz_4_source_var[4] = { 0 }; 2914 unsigned int vert_4_source_var[4] = { 0 }; 2915 { 2916 BLOCK_SIZE horz_4_bs = get_partition_subsize(bsize, PARTITION_HORZ_4); 2917 BLOCK_SIZE vert_4_bs = get_partition_subsize(bsize, PARTITION_VERT_4); 2918 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, 2919 av1_num_planes(&cpi->common), bsize); 2920 const int src_stride = x->plane[0].src.stride; 2921 const uint8_t *src = x->plane[0].src.buf; 2922 const MACROBLOCKD *const xd = &x->e_mbd; 2923 for (int i = 0; i < 4; ++i) { 2924 const uint8_t *horz_src = 2925 src + i * block_size_high[horz_4_bs] * src_stride; 2926 const uint8_t *vert_src = src + i * block_size_wide[vert_4_bs]; 2927 unsigned int horz_var, vert_var, sse; 2928 if (is_cur_buf_hbd(xd)) { 2929 switch (xd->bd) { 2930 case 10: 2931 horz_var = cpi->fn_ptr[horz_4_bs].vf( 2932 horz_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_10), 2933 0, &sse); 2934 vert_var = cpi->fn_ptr[vert_4_bs].vf( 2935 vert_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_10), 2936 0, &sse); 2937 break; 2938 case 12: 2939 horz_var = cpi->fn_ptr[horz_4_bs].vf( 2940 horz_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_12), 2941 0, &sse); 2942 vert_var = cpi->fn_ptr[vert_4_bs].vf( 2943 vert_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_12), 2944 0, &sse); 2945 break; 2946 case 8: 2947 default: 2948 horz_var = cpi->fn_ptr[horz_4_bs].vf( 2949 horz_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_8), 2950 0, &sse); 2951 vert_var = cpi->fn_ptr[vert_4_bs].vf( 2952 vert_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_8), 2953 0, &sse); 2954 break; 2955 } 2956 horz_4_source_var[i] = 2957 ROUND_POWER_OF_TWO(horz_var, num_pels_log2_lookup[horz_4_bs]); 2958 vert_4_source_var[i] = 2959 ROUND_POWER_OF_TWO(vert_var, num_pels_log2_lookup[vert_4_bs]); 2960 } else { 2961 horz_var = cpi->fn_ptr[horz_4_bs].vf(horz_src, src_stride, AV1_VAR_OFFS, 2962 0, &sse); 2963 vert_var = cpi->fn_ptr[vert_4_bs].vf(vert_src, src_stride, AV1_VAR_OFFS, 2964 0, &sse); 2965 horz_4_source_var[i] = 2966 ROUND_POWER_OF_TWO(horz_var, num_pels_log2_lookup[horz_4_bs]); 2967 vert_4_source_var[i] = 2968 ROUND_POWER_OF_TWO(vert_var, num_pels_log2_lookup[vert_4_bs]); 2969 } 2970 } 2971 } 2972 2973 const float denom = (float)(pb_source_variance + 1); 2974 const float low_b = 0.1f; 2975 const float high_b = 10.0f; 2976 for (int i = 0; i < 4; ++i) { 2977 // Ratio between the 4:1 sub-block variance and the whole-block variance. 2978 float var_ratio = (float)(horz_4_source_var[i] + 1) / denom; 2979 if (var_ratio < low_b) var_ratio = low_b; 2980 if (var_ratio > high_b) var_ratio = high_b; 2981 features[feature_index++] = var_ratio; 2982 } 2983 for (int i = 0; i < 4; ++i) { 2984 // Ratio between the 1:4 sub-block RD and the whole-block RD. 2985 float var_ratio = (float)(vert_4_source_var[i] + 1) / denom; 2986 if (var_ratio < low_b) var_ratio = low_b; 2987 if (var_ratio > high_b) var_ratio = high_b; 2988 features[feature_index++] = var_ratio; 2989 } 2990 assert(feature_index == FEATURES); 2991 2992 // Calculate scores using the NN model. 2993 float score[LABELS] = { 0.0f }; 2994 av1_nn_predict(features, nn_config, score); 2995 aom_clear_system_state(); 2996 int int_score[LABELS]; 2997 int max_score = -1000; 2998 for (int i = 0; i < LABELS; ++i) { 2999 int_score[i] = (int)(100 * score[i]); 3000 max_score = AOMMAX(int_score[i], max_score); 3001 } 3002 3003 // Make decisions based on the model scores. 3004 int thresh = max_score; 3005 switch (bsize) { 3006 case BLOCK_16X16: thresh -= 500; break; 3007 case BLOCK_32X32: thresh -= 500; break; 3008 case BLOCK_64X64: thresh -= 200; break; 3009 default: break; 3010 } 3011 *partition_horz4_allowed = 0; 3012 *partition_vert4_allowed = 0; 3013 for (int i = 0; i < LABELS; ++i) { 3014 if (int_score[i] >= thresh) { 3015 if ((i >> 0) & 1) *partition_horz4_allowed = 1; 3016 if ((i >> 1) & 1) *partition_vert4_allowed = 1; 3017 } 3018 } 3019 } 3020 #undef FEATURES 3021 #undef LABELS 3022 3023 #define FEATURES 4 3024 // ML-based partition search breakout. 3025 static int ml_predict_breakout(const AV1_COMP *const cpi, BLOCK_SIZE bsize, 3026 const MACROBLOCK *const x, 3027 const RD_STATS *const rd_stats, 3028 unsigned int pb_source_variance) { 3029 const NN_CONFIG *nn_config = NULL; 3030 int thresh = 0; 3031 switch (bsize) { 3032 case BLOCK_8X8: 3033 nn_config = &av1_partition_breakout_nnconfig_8; 3034 thresh = cpi->sf.ml_partition_search_breakout_thresh[0]; 3035 break; 3036 case BLOCK_16X16: 3037 nn_config = &av1_partition_breakout_nnconfig_16; 3038 thresh = cpi->sf.ml_partition_search_breakout_thresh[1]; 3039 break; 3040 case BLOCK_32X32: 3041 nn_config = &av1_partition_breakout_nnconfig_32; 3042 thresh = cpi->sf.ml_partition_search_breakout_thresh[2]; 3043 break; 3044 case BLOCK_64X64: 3045 nn_config = &av1_partition_breakout_nnconfig_64; 3046 thresh = cpi->sf.ml_partition_search_breakout_thresh[3]; 3047 break; 3048 case BLOCK_128X128: 3049 nn_config = &av1_partition_breakout_nnconfig_128; 3050 thresh = cpi->sf.ml_partition_search_breakout_thresh[4]; 3051 break; 3052 default: assert(0 && "Unexpected bsize."); 3053 } 3054 if (!nn_config || thresh < 0) return 0; 3055 3056 // Generate feature values. 3057 float features[FEATURES]; 3058 int feature_index = 0; 3059 aom_clear_system_state(); 3060 3061 const int num_pels_log2 = num_pels_log2_lookup[bsize]; 3062 float rate_f = (float)AOMMIN(rd_stats->rate, INT_MAX); 3063 rate_f = ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) * 3064 rate_f; 3065 features[feature_index++] = rate_f; 3066 3067 const float dist_f = 3068 (float)(AOMMIN(rd_stats->dist, INT_MAX) >> num_pels_log2); 3069 features[feature_index++] = dist_f; 3070 3071 features[feature_index++] = (float)pb_source_variance; 3072 3073 const int dc_q = (int)x->plane[0].dequant_QTX[0]; 3074 features[feature_index++] = (float)(dc_q * dc_q) / 256.0f; 3075 assert(feature_index == FEATURES); 3076 3077 // Calculate score using the NN model. 3078 float score = 0.0f; 3079 av1_nn_predict(features, nn_config, &score); 3080 aom_clear_system_state(); 3081 3082 // Make decision. 3083 return (int)(score * 100) >= thresh; 3084 } 3085 #undef FEATURES 3086 3087 // Record the ref frames that have been selected by square partition blocks. 3088 static void update_picked_ref_frames_mask(MACROBLOCK *const x, int ref_type, 3089 BLOCK_SIZE bsize, int mib_size, 3090 int mi_row, int mi_col) { 3091 assert(mi_size_wide[bsize] == mi_size_high[bsize]); 3092 const int sb_size_mask = mib_size - 1; 3093 const int mi_row_in_sb = mi_row & sb_size_mask; 3094 const int mi_col_in_sb = mi_col & sb_size_mask; 3095 const int mi_size = mi_size_wide[bsize]; 3096 for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_size; ++i) { 3097 for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_size; ++j) { 3098 x->picked_ref_frames_mask[i * 32 + j] |= 1 << ref_type; 3099 } 3100 } 3101 } 3102 3103 // TODO(jinging,jimbankoski,rbultje): properly skip partition types that are 3104 // unlikely to be selected depending on previous rate-distortion optimization 3105 // results, for encoding speed-up. 3106 // TODO(chiyotsai (at) google.com): Move these ml related varables to a seprate file 3107 // to separate low level ml logic from partition logic 3108 #define NUM_SIMPLE_MOTION_FEATURES 28 3109 static void rd_pick_partition(AV1_COMP *const cpi, ThreadData *td, 3110 TileDataEnc *tile_data, TOKENEXTRA **tp, 3111 int mi_row, int mi_col, BLOCK_SIZE bsize, 3112 BLOCK_SIZE max_sq_part, BLOCK_SIZE min_sq_part, 3113 RD_STATS *rd_cost, int64_t best_rd, 3114 PC_TREE *pc_tree, int64_t *none_rd) { 3115 const AV1_COMMON *const cm = &cpi->common; 3116 const int num_planes = av1_num_planes(cm); 3117 TileInfo *const tile_info = &tile_data->tile_info; 3118 MACROBLOCK *const x = &td->mb; 3119 MACROBLOCKD *const xd = &x->e_mbd; 3120 const int mi_step = mi_size_wide[bsize] / 2; 3121 RD_SEARCH_MACROBLOCK_CONTEXT x_ctx; 3122 const TOKENEXTRA *const tp_orig = *tp; 3123 PICK_MODE_CONTEXT *ctx_none = &pc_tree->none; 3124 int tmp_partition_cost[PARTITION_TYPES]; 3125 BLOCK_SIZE subsize; 3126 RD_STATS this_rdc, sum_rdc, best_rdc; 3127 const int bsize_at_least_8x8 = (bsize >= BLOCK_8X8); 3128 int do_square_split = bsize_at_least_8x8; 3129 const int pl = bsize_at_least_8x8 3130 ? partition_plane_context(xd, mi_row, mi_col, bsize) 3131 : 0; 3132 const int *partition_cost = 3133 pl >= 0 ? x->partition_cost[pl] : x->partition_cost[0]; 3134 3135 int do_rectangular_split = cpi->oxcf.enable_rect_partitions; 3136 int64_t cur_none_rd = 0; 3137 int64_t split_rd[4] = { 0, 0, 0, 0 }; 3138 int64_t horz_rd[2] = { 0, 0 }; 3139 int64_t vert_rd[2] = { 0, 0 }; 3140 int prune_horz = 0; 3141 int prune_vert = 0; 3142 int terminate_partition_search = 0; 3143 3144 int split_ctx_is_ready[2] = { 0, 0 }; 3145 int horz_ctx_is_ready = 0; 3146 int vert_ctx_is_ready = 0; 3147 BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT); 3148 3149 if (best_rd < 0) { 3150 pc_tree->none.rdcost = INT64_MAX; 3151 pc_tree->none.skip = 0; 3152 av1_invalid_rd_stats(rd_cost); 3153 return; 3154 } 3155 if (bsize == cm->seq_params.sb_size) x->must_find_valid_partition = 0; 3156 3157 // Override skipping rectangular partition operations for edge blocks 3158 const int has_rows = (mi_row + mi_step < cm->mi_rows); 3159 const int has_cols = (mi_col + mi_step < cm->mi_cols); 3160 const int xss = x->e_mbd.plane[1].subsampling_x; 3161 const int yss = x->e_mbd.plane[1].subsampling_y; 3162 3163 if (none_rd) *none_rd = 0; 3164 int partition_none_allowed = has_rows && has_cols; 3165 int partition_horz_allowed = has_cols && yss <= xss && bsize_at_least_8x8 && 3166 cpi->oxcf.enable_rect_partitions; 3167 int partition_vert_allowed = has_rows && xss <= yss && bsize_at_least_8x8 && 3168 cpi->oxcf.enable_rect_partitions; 3169 3170 (void)*tp_orig; 3171 3172 #if CONFIG_COLLECT_PARTITION_STATS 3173 int partition_decisions[EXT_PARTITION_TYPES] = { 0 }; 3174 int partition_attempts[EXT_PARTITION_TYPES] = { 0 }; 3175 int64_t partition_times[EXT_PARTITION_TYPES] = { 0 }; 3176 struct aom_usec_timer partition_timer = { 0 }; 3177 int partition_timer_on = 0; 3178 #if CONFIG_COLLECT_PARTITION_STATS == 2 3179 PartitionStats *part_stats = &cpi->partition_stats; 3180 #endif 3181 #endif 3182 3183 // Override partition costs at the edges of the frame in the same 3184 // way as in read_partition (see decodeframe.c) 3185 if (!(has_rows && has_cols)) { 3186 assert(bsize_at_least_8x8 && pl >= 0); 3187 const aom_cdf_prob *partition_cdf = cm->fc->partition_cdf[pl]; 3188 for (int i = 0; i < PARTITION_TYPES; ++i) tmp_partition_cost[i] = INT_MAX; 3189 if (has_cols) { 3190 // At the bottom, the two possibilities are HORZ and SPLIT 3191 aom_cdf_prob bot_cdf[2]; 3192 partition_gather_vert_alike(bot_cdf, partition_cdf, bsize); 3193 static const int bot_inv_map[2] = { PARTITION_HORZ, PARTITION_SPLIT }; 3194 av1_cost_tokens_from_cdf(tmp_partition_cost, bot_cdf, bot_inv_map); 3195 } else if (has_rows) { 3196 // At the right, the two possibilities are VERT and SPLIT 3197 aom_cdf_prob rhs_cdf[2]; 3198 partition_gather_horz_alike(rhs_cdf, partition_cdf, bsize); 3199 static const int rhs_inv_map[2] = { PARTITION_VERT, PARTITION_SPLIT }; 3200 av1_cost_tokens_from_cdf(tmp_partition_cost, rhs_cdf, rhs_inv_map); 3201 } else { 3202 // At the bottom right, we always split 3203 tmp_partition_cost[PARTITION_SPLIT] = 0; 3204 } 3205 3206 partition_cost = tmp_partition_cost; 3207 do_square_split &= partition_cost[PARTITION_SPLIT] != INT_MAX; 3208 } 3209 3210 #ifndef NDEBUG 3211 // Nothing should rely on the default value of this array (which is just 3212 // leftover from encoding the previous block. Setting it to fixed pattern 3213 // when debugging. 3214 // bit 0, 1, 2 are blk_skip of each plane 3215 // bit 4, 5, 6 are initialization checking of each plane 3216 memset(x->blk_skip, 0x77, sizeof(x->blk_skip)); 3217 #endif // NDEBUG 3218 3219 assert(mi_size_wide[bsize] == mi_size_high[bsize]); 3220 3221 av1_init_rd_stats(&this_rdc); 3222 av1_invalid_rd_stats(&best_rdc); 3223 best_rdc.rdcost = best_rd; 3224 3225 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 3226 3227 if (bsize == BLOCK_16X16 && cpi->vaq_refresh) 3228 x->mb_energy = av1_log_block_var(cpi, x, bsize); 3229 3230 if (bsize > cpi->sf.use_square_partition_only_threshold) { 3231 partition_horz_allowed &= !has_rows; 3232 partition_vert_allowed &= !has_cols; 3233 } 3234 3235 if (bsize > BLOCK_4X4 && x->use_cb_search_range) { 3236 int split_score = 0; 3237 int none_score = 0; 3238 const int score_valid = ml_prune_2pass_split_partition( 3239 &pc_tree->pc_tree_stats, bsize, &split_score, &none_score); 3240 if (score_valid) { 3241 { 3242 const int only_split_thresh = 300; 3243 const int no_none_thresh = 250; 3244 const int no_split_thresh = 0; 3245 if (split_score > only_split_thresh) { 3246 partition_none_allowed = 0; 3247 partition_horz_allowed = 0; 3248 partition_vert_allowed = 0; 3249 } else if (split_score > no_none_thresh) { 3250 partition_none_allowed = 0; 3251 } 3252 if (split_score < no_split_thresh) do_square_split = 0; 3253 } 3254 { 3255 const int no_split_thresh = 120; 3256 const int no_none_thresh = -120; 3257 if (none_score > no_split_thresh && partition_none_allowed) 3258 do_square_split = 0; 3259 if (none_score < no_none_thresh) partition_none_allowed = 0; 3260 } 3261 } else { 3262 if (pc_tree->cb_search_range == SPLIT_PLANE) { 3263 partition_none_allowed = 0; 3264 partition_horz_allowed = 0; 3265 partition_vert_allowed = 0; 3266 } 3267 if (pc_tree->cb_search_range == SEARCH_SAME_PLANE) do_square_split = 0; 3268 if (pc_tree->cb_search_range == NONE_PARTITION_PLANE) { 3269 do_square_split = 0; 3270 partition_horz_allowed = 0; 3271 partition_vert_allowed = 0; 3272 } 3273 } 3274 3275 // Fall back to default values in case all partition modes are rejected. 3276 if (partition_none_allowed == 0 && do_square_split == 0 && 3277 partition_horz_allowed == 0 && partition_vert_allowed == 0) { 3278 do_square_split = bsize_at_least_8x8; 3279 partition_none_allowed = has_rows && has_cols; 3280 partition_horz_allowed = has_cols && yss <= xss && bsize_at_least_8x8 && 3281 cpi->oxcf.enable_rect_partitions; 3282 partition_vert_allowed = has_rows && xss <= yss && bsize_at_least_8x8 && 3283 cpi->oxcf.enable_rect_partitions; 3284 } 3285 } 3286 3287 xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col; 3288 xd->left_txfm_context = 3289 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); 3290 save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 3291 3292 // Use simple_motion_search to prune partitions. This must be done prior to 3293 // PARTITION_SPLIT to propagate the initial mvs to a smaller blocksize. 3294 const int try_split_only = 3295 cpi->sf.simple_motion_search_split_only && bsize >= BLOCK_8X8 && 3296 do_square_split && mi_row + mi_size_high[bsize] <= cm->mi_rows && 3297 mi_col + mi_size_wide[bsize] <= cm->mi_cols && !frame_is_intra_only(cm) && 3298 !av1_superres_scaled(cm); 3299 3300 if (try_split_only) { 3301 av1_simple_motion_search_based_split( 3302 cpi, x, mi_row, mi_col, bsize, &partition_none_allowed, 3303 &partition_horz_allowed, &partition_vert_allowed, &do_rectangular_split, 3304 &do_square_split); 3305 } 3306 3307 const int try_prune_rect = 3308 cpi->sf.simple_motion_search_prune_rect && !frame_is_intra_only(cm) && 3309 do_rectangular_split && 3310 (do_square_split || partition_none_allowed || 3311 (prune_horz && prune_vert)) && 3312 (partition_horz_allowed || partition_vert_allowed) && bsize >= BLOCK_8X8; 3313 3314 float simple_motion_features[NUM_SIMPLE_MOTION_FEATURES] = { 0.0f }; 3315 int simple_motion_features_are_valid = 0; 3316 3317 if (try_prune_rect) { 3318 av1_simple_motion_search_prune_part( 3319 cpi, x, pc_tree, mi_row, mi_col, bsize, &partition_none_allowed, 3320 &partition_horz_allowed, &partition_vert_allowed, &do_square_split, 3321 &do_rectangular_split, &prune_horz, &prune_vert, simple_motion_features, 3322 &simple_motion_features_are_valid); 3323 } 3324 3325 // Max and min square partition levels are defined as the partition nodes that 3326 // the recursive function rd_pick_partition() can reach. To implement this: 3327 // only PARTITION_NONE is allowed if the current node equals min_sq_part, 3328 // only PARTITION_SPLIT is allowed if the current node exceeds max_sq_part. 3329 assert(block_size_wide[min_sq_part] == block_size_high[min_sq_part]); 3330 assert(block_size_wide[max_sq_part] == block_size_high[max_sq_part]); 3331 assert(min_sq_part <= max_sq_part); 3332 assert(block_size_wide[bsize] == block_size_high[bsize]); 3333 const int max_partition_size = block_size_wide[max_sq_part]; 3334 const int min_partition_size = block_size_wide[min_sq_part]; 3335 const int blksize = block_size_wide[bsize]; 3336 assert(min_partition_size <= max_partition_size); 3337 const int is_le_min_sq_part = blksize <= min_partition_size; 3338 const int is_gt_max_sq_part = blksize > max_partition_size; 3339 if (is_gt_max_sq_part) { 3340 // If current block size is larger than max, only allow split. 3341 partition_none_allowed = 0; 3342 partition_horz_allowed = 0; 3343 partition_vert_allowed = 0; 3344 do_square_split = 1; 3345 } else if (is_le_min_sq_part) { 3346 // If current block size is less or equal to min, only allow none if valid 3347 // block large enough; only allow split otherwise. 3348 partition_horz_allowed = 0; 3349 partition_vert_allowed = 0; 3350 // only disable square split when current block is not at the picture 3351 // boundary. otherwise, inherit the square split flag from previous logic 3352 if (has_rows && has_cols) do_square_split = 0; 3353 partition_none_allowed = !do_square_split; 3354 } 3355 do_square_split &= partition_cost[PARTITION_SPLIT] != INT_MAX; 3356 3357 BEGIN_PARTITION_SEARCH: 3358 if (x->must_find_valid_partition) { 3359 do_square_split = 3360 bsize_at_least_8x8 && partition_cost[PARTITION_SPLIT] != INT_MAX; 3361 partition_none_allowed = has_rows && has_cols; 3362 partition_horz_allowed = has_cols && yss <= xss && bsize_at_least_8x8 && 3363 cpi->oxcf.enable_rect_partitions; 3364 partition_vert_allowed = has_rows && xss <= yss && bsize_at_least_8x8 && 3365 cpi->oxcf.enable_rect_partitions; 3366 terminate_partition_search = 0; 3367 } 3368 3369 // Partition block source pixel variance. 3370 unsigned int pb_source_variance = UINT_MAX; 3371 3372 // Partition block sse after simple motion compensation, not in use now, 3373 // but will be used for upcoming speed features 3374 unsigned int pb_simple_motion_pred_sse = UINT_MAX; 3375 (void)pb_simple_motion_pred_sse; 3376 3377 #if CONFIG_DIST_8X8 3378 if (x->using_dist_8x8) { 3379 if (block_size_high[bsize] <= 8) partition_horz_allowed = 0; 3380 if (block_size_wide[bsize] <= 8) partition_vert_allowed = 0; 3381 if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8) 3382 do_square_split = 0; 3383 } 3384 #endif 3385 3386 // PARTITION_NONE 3387 if (is_le_min_sq_part && has_rows && has_cols) partition_none_allowed = 1; 3388 if (!terminate_partition_search && partition_none_allowed && 3389 !is_gt_max_sq_part) { 3390 int pt_cost = 0; 3391 if (bsize_at_least_8x8) { 3392 pt_cost = partition_cost[PARTITION_NONE] < INT_MAX 3393 ? partition_cost[PARTITION_NONE] 3394 : 0; 3395 } 3396 const int64_t partition_rd_cost = RDCOST(x->rdmult, pt_cost, 0); 3397 const int64_t best_remain_rdcost = 3398 (best_rdc.rdcost == INT64_MAX) ? INT64_MAX 3399 : (best_rdc.rdcost - partition_rd_cost); 3400 #if CONFIG_COLLECT_PARTITION_STATS 3401 if (best_remain_rdcost >= 0) { 3402 partition_attempts[PARTITION_NONE] += 1; 3403 aom_usec_timer_start(&partition_timer); 3404 partition_timer_on = 1; 3405 } 3406 #endif 3407 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_NONE, 3408 bsize, ctx_none, best_remain_rdcost, 0); 3409 #if CONFIG_COLLECT_PARTITION_STATS 3410 if (partition_timer_on) { 3411 aom_usec_timer_mark(&partition_timer); 3412 int64_t time = aom_usec_timer_elapsed(&partition_timer); 3413 partition_times[PARTITION_NONE] += time; 3414 partition_timer_on = 0; 3415 } 3416 #endif 3417 pb_source_variance = x->source_variance; 3418 pb_simple_motion_pred_sse = x->simple_motion_pred_sse; 3419 if (none_rd) *none_rd = this_rdc.rdcost; 3420 cur_none_rd = this_rdc.rdcost; 3421 if (this_rdc.rate != INT_MAX) { 3422 if (cpi->sf.prune_ref_frame_for_rect_partitions) { 3423 const int ref_type = av1_ref_frame_type(ctx_none->mic.ref_frame); 3424 update_picked_ref_frames_mask(x, ref_type, bsize, 3425 cm->seq_params.mib_size, mi_row, mi_col); 3426 } 3427 if (bsize_at_least_8x8) { 3428 this_rdc.rate += pt_cost; 3429 this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist); 3430 } 3431 3432 if (this_rdc.rdcost < best_rdc.rdcost) { 3433 // Adjust dist breakout threshold according to the partition size. 3434 const int64_t dist_breakout_thr = 3435 cpi->sf.partition_search_breakout_dist_thr >> 3436 ((2 * (MAX_SB_SIZE_LOG2 - 2)) - 3437 (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize])); 3438 const int rate_breakout_thr = 3439 cpi->sf.partition_search_breakout_rate_thr * 3440 num_pels_log2_lookup[bsize]; 3441 3442 best_rdc = this_rdc; 3443 if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE; 3444 3445 if ((do_square_split || do_rectangular_split) && 3446 !x->e_mbd.lossless[xd->mi[0]->segment_id] && ctx_none->skippable) { 3447 const int use_ml_based_breakout = 3448 bsize <= cpi->sf.use_square_partition_only_threshold && 3449 bsize > BLOCK_4X4 && xd->bd == 8; 3450 if (use_ml_based_breakout) { 3451 if (ml_predict_breakout(cpi, bsize, x, &this_rdc, 3452 pb_source_variance)) { 3453 do_square_split = 0; 3454 do_rectangular_split = 0; 3455 } 3456 } 3457 3458 // If all y, u, v transform blocks in this partition are skippable, 3459 // and the dist & rate are within the thresholds, the partition 3460 // search is terminated for current branch of the partition search 3461 // tree. The dist & rate thresholds are set to 0 at speed 0 to 3462 // disable the early termination at that speed. 3463 if (best_rdc.dist < dist_breakout_thr && 3464 best_rdc.rate < rate_breakout_thr) { 3465 do_square_split = 0; 3466 do_rectangular_split = 0; 3467 } 3468 } 3469 3470 if (cpi->sf.simple_motion_search_early_term_none && cm->show_frame && 3471 !frame_is_intra_only(cm) && bsize >= BLOCK_16X16 && 3472 mi_row + mi_step < cm->mi_rows && mi_col + mi_step < cm->mi_cols && 3473 this_rdc.rdcost < INT64_MAX && this_rdc.rdcost >= 0 && 3474 this_rdc.rate < INT_MAX && this_rdc.rate >= 0 && 3475 (do_square_split || do_rectangular_split)) { 3476 av1_simple_motion_search_early_term_none( 3477 cpi, x, pc_tree, mi_row, mi_col, bsize, &this_rdc, 3478 &terminate_partition_search, simple_motion_features, 3479 &simple_motion_features_are_valid); 3480 } 3481 } 3482 } 3483 3484 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 3485 } 3486 3487 // store estimated motion vector 3488 if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx_none); 3489 3490 // PARTITION_SPLIT 3491 if ((!terminate_partition_search && do_square_split) || is_gt_max_sq_part) { 3492 av1_init_rd_stats(&sum_rdc); 3493 subsize = get_partition_subsize(bsize, PARTITION_SPLIT); 3494 sum_rdc.rate = partition_cost[PARTITION_SPLIT]; 3495 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); 3496 3497 int idx; 3498 #if CONFIG_COLLECT_PARTITION_STATS 3499 if (best_rdc.rdcost - sum_rdc.rdcost >= 0) { 3500 partition_attempts[PARTITION_SPLIT] += 1; 3501 aom_usec_timer_start(&partition_timer); 3502 partition_timer_on = 1; 3503 } 3504 #endif 3505 for (idx = 0; idx < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++idx) { 3506 const int x_idx = (idx & 1) * mi_step; 3507 const int y_idx = (idx >> 1) * mi_step; 3508 3509 if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) 3510 continue; 3511 3512 if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none); 3513 3514 pc_tree->split[idx]->index = idx; 3515 int64_t *p_split_rd = &split_rd[idx]; 3516 const int64_t best_remain_rdcost = 3517 best_rdc.rdcost == INT64_MAX ? INT64_MAX 3518 : (best_rdc.rdcost - sum_rdc.rdcost); 3519 rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, 3520 subsize, max_sq_part, min_sq_part, &this_rdc, 3521 best_remain_rdcost, pc_tree->split[idx], p_split_rd); 3522 3523 if (this_rdc.rate == INT_MAX) { 3524 sum_rdc.rdcost = INT64_MAX; 3525 break; 3526 } else { 3527 sum_rdc.rate += this_rdc.rate; 3528 sum_rdc.dist += this_rdc.dist; 3529 sum_rdc.rdcost += this_rdc.rdcost; 3530 if (idx <= 1 && (bsize <= BLOCK_8X8 || 3531 pc_tree->split[idx]->partitioning == PARTITION_NONE)) { 3532 const MB_MODE_INFO *const mbmi = &pc_tree->split[idx]->none.mic; 3533 const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; 3534 // Neither palette mode nor cfl predicted 3535 if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) { 3536 if (mbmi->uv_mode != UV_CFL_PRED) split_ctx_is_ready[idx] = 1; 3537 } 3538 } 3539 } 3540 } 3541 #if CONFIG_COLLECT_PARTITION_STATS 3542 if (partition_timer_on) { 3543 aom_usec_timer_mark(&partition_timer); 3544 int64_t time = aom_usec_timer_elapsed(&partition_timer); 3545 partition_times[PARTITION_SPLIT] += time; 3546 partition_timer_on = 0; 3547 } 3548 #endif 3549 const int reached_last_index = (idx == 4); 3550 3551 if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) { 3552 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); 3553 3554 if (sum_rdc.rdcost < best_rdc.rdcost) { 3555 best_rdc = sum_rdc; 3556 pc_tree->partitioning = PARTITION_SPLIT; 3557 } 3558 } else if (cpi->sf.less_rectangular_check_level > 0) { 3559 // skip rectangular partition test when larger block size 3560 // gives better rd cost 3561 if (cpi->sf.less_rectangular_check_level == 2 || idx <= 2) 3562 do_rectangular_split &= !partition_none_allowed; 3563 } 3564 3565 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 3566 } // if (do_split) 3567 3568 if (cpi->sf.ml_prune_rect_partition && !frame_is_intra_only(cm) && 3569 (partition_horz_allowed || partition_vert_allowed) && 3570 !(prune_horz || prune_vert)) { 3571 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize); 3572 ml_prune_rect_partition(cpi, x, bsize, best_rdc.rdcost, cur_none_rd, 3573 split_rd, &prune_horz, &prune_vert); 3574 } 3575 3576 // PARTITION_HORZ 3577 assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_horz_allowed)); 3578 if (!terminate_partition_search && partition_horz_allowed && !prune_horz && 3579 (do_rectangular_split || active_h_edge(cpi, mi_row, mi_step)) && 3580 !is_gt_max_sq_part) { 3581 av1_init_rd_stats(&sum_rdc); 3582 subsize = get_partition_subsize(bsize, PARTITION_HORZ); 3583 if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none); 3584 if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && 3585 partition_none_allowed) { 3586 pc_tree->horizontal[0].pred_interp_filter = 3587 av1_extract_interp_filter(ctx_none->mic.interp_filters, 0); 3588 } 3589 sum_rdc.rate = partition_cost[PARTITION_HORZ]; 3590 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); 3591 const int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX 3592 ? INT64_MAX 3593 : (best_rdc.rdcost - sum_rdc.rdcost); 3594 #if CONFIG_COLLECT_PARTITION_STATS 3595 if (best_remain_rdcost >= 0) { 3596 partition_attempts[PARTITION_HORZ] += 1; 3597 aom_usec_timer_start(&partition_timer); 3598 partition_timer_on = 1; 3599 } 3600 #endif 3601 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_HORZ, 3602 subsize, &pc_tree->horizontal[0], best_remain_rdcost, 0); 3603 3604 if (this_rdc.rate == INT_MAX) { 3605 sum_rdc.rdcost = INT64_MAX; 3606 } else { 3607 sum_rdc.rate += this_rdc.rate; 3608 sum_rdc.dist += this_rdc.dist; 3609 sum_rdc.rdcost += this_rdc.rdcost; 3610 } 3611 horz_rd[0] = this_rdc.rdcost; 3612 3613 if (sum_rdc.rdcost < best_rdc.rdcost && has_rows) { 3614 const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0]; 3615 const MB_MODE_INFO *const mbmi = &pc_tree->horizontal[0].mic; 3616 const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; 3617 // Neither palette mode nor cfl predicted 3618 if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) { 3619 if (mbmi->uv_mode != UV_CFL_PRED) horz_ctx_is_ready = 1; 3620 } 3621 update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1); 3622 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, 3623 subsize, NULL); 3624 3625 if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_h); 3626 3627 if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && 3628 partition_none_allowed) { 3629 pc_tree->horizontal[1].pred_interp_filter = 3630 av1_extract_interp_filter(ctx_h->mic.interp_filters, 0); 3631 } 3632 pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc, 3633 PARTITION_HORZ, subsize, &pc_tree->horizontal[1], 3634 best_rdc.rdcost - sum_rdc.rdcost, 0); 3635 horz_rd[1] = this_rdc.rdcost; 3636 3637 if (this_rdc.rate == INT_MAX) { 3638 sum_rdc.rdcost = INT64_MAX; 3639 } else { 3640 sum_rdc.rate += this_rdc.rate; 3641 sum_rdc.dist += this_rdc.dist; 3642 sum_rdc.rdcost += this_rdc.rdcost; 3643 } 3644 } 3645 #if CONFIG_COLLECT_PARTITION_STATS 3646 if (partition_timer_on) { 3647 aom_usec_timer_mark(&partition_timer); 3648 int64_t time = aom_usec_timer_elapsed(&partition_timer); 3649 partition_times[PARTITION_HORZ] += time; 3650 partition_timer_on = 0; 3651 } 3652 #endif 3653 3654 if (sum_rdc.rdcost < best_rdc.rdcost) { 3655 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); 3656 if (sum_rdc.rdcost < best_rdc.rdcost) { 3657 best_rdc = sum_rdc; 3658 pc_tree->partitioning = PARTITION_HORZ; 3659 } 3660 } 3661 3662 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 3663 } 3664 3665 // PARTITION_VERT 3666 assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_vert_allowed)); 3667 if (!terminate_partition_search && partition_vert_allowed && !prune_vert && 3668 (do_rectangular_split || active_v_edge(cpi, mi_col, mi_step)) && 3669 !is_gt_max_sq_part) { 3670 av1_init_rd_stats(&sum_rdc); 3671 subsize = get_partition_subsize(bsize, PARTITION_VERT); 3672 3673 if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none); 3674 3675 if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && 3676 partition_none_allowed) { 3677 pc_tree->vertical[0].pred_interp_filter = 3678 av1_extract_interp_filter(ctx_none->mic.interp_filters, 0); 3679 } 3680 sum_rdc.rate = partition_cost[PARTITION_VERT]; 3681 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); 3682 const int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX 3683 ? INT64_MAX 3684 : (best_rdc.rdcost - sum_rdc.rdcost); 3685 #if CONFIG_COLLECT_PARTITION_STATS 3686 if (best_remain_rdcost >= 0) { 3687 partition_attempts[PARTITION_VERT] += 1; 3688 aom_usec_timer_start(&partition_timer); 3689 partition_timer_on = 1; 3690 } 3691 #endif 3692 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_VERT, 3693 subsize, &pc_tree->vertical[0], best_remain_rdcost, 0); 3694 3695 if (this_rdc.rate == INT_MAX) { 3696 sum_rdc.rdcost = INT64_MAX; 3697 } else { 3698 sum_rdc.rate += this_rdc.rate; 3699 sum_rdc.dist += this_rdc.dist; 3700 sum_rdc.rdcost += this_rdc.rdcost; 3701 } 3702 vert_rd[0] = this_rdc.rdcost; 3703 if (sum_rdc.rdcost < best_rdc.rdcost && has_cols) { 3704 const MB_MODE_INFO *const mbmi = &pc_tree->vertical[0].mic; 3705 const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; 3706 // Neither palette mode nor cfl predicted 3707 if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) { 3708 if (mbmi->uv_mode != UV_CFL_PRED) vert_ctx_is_ready = 1; 3709 } 3710 update_state(cpi, tile_data, td, &pc_tree->vertical[0], mi_row, mi_col, 3711 subsize, 1); 3712 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, 3713 subsize, NULL); 3714 3715 if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none); 3716 3717 if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && 3718 partition_none_allowed) { 3719 pc_tree->vertical[1].pred_interp_filter = 3720 av1_extract_interp_filter(ctx_none->mic.interp_filters, 0); 3721 } 3722 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc, 3723 PARTITION_VERT, subsize, &pc_tree->vertical[1], 3724 best_rdc.rdcost - sum_rdc.rdcost, 0); 3725 vert_rd[1] = this_rdc.rdcost; 3726 3727 if (this_rdc.rate == INT_MAX) { 3728 sum_rdc.rdcost = INT64_MAX; 3729 } else { 3730 sum_rdc.rate += this_rdc.rate; 3731 sum_rdc.dist += this_rdc.dist; 3732 sum_rdc.rdcost += this_rdc.rdcost; 3733 } 3734 } 3735 #if CONFIG_COLLECT_PARTITION_STATS 3736 if (partition_timer_on) { 3737 aom_usec_timer_mark(&partition_timer); 3738 int64_t time = aom_usec_timer_elapsed(&partition_timer); 3739 partition_times[PARTITION_VERT] += time; 3740 partition_timer_on = 0; 3741 } 3742 #endif 3743 3744 if (sum_rdc.rdcost < best_rdc.rdcost) { 3745 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); 3746 if (sum_rdc.rdcost < best_rdc.rdcost) { 3747 best_rdc = sum_rdc; 3748 pc_tree->partitioning = PARTITION_VERT; 3749 } 3750 } 3751 3752 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 3753 } 3754 3755 if (pb_source_variance == UINT_MAX) { 3756 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize); 3757 if (is_cur_buf_hbd(xd)) { 3758 pb_source_variance = av1_high_get_sby_perpixel_variance( 3759 cpi, &x->plane[0].src, bsize, xd->bd); 3760 } else { 3761 pb_source_variance = 3762 av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); 3763 } 3764 } 3765 3766 if (use_pb_simple_motion_pred_sse(cpi) && 3767 pb_simple_motion_pred_sse == UINT_MAX) { 3768 const MV ref_mv_full = { .row = 0, .col = 0 }; 3769 unsigned int var = 0; 3770 3771 av1_simple_motion_sse_var(cpi, x, mi_row, mi_col, bsize, ref_mv_full, 0, 3772 &pb_simple_motion_pred_sse, &var); 3773 } 3774 3775 assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !do_rectangular_split)); 3776 3777 const int ext_partition_allowed = 3778 do_rectangular_split && bsize > BLOCK_8X8 && partition_none_allowed; 3779 3780 // The standard AB partitions are allowed whenever ext-partition-types are 3781 // allowed 3782 int horzab_partition_allowed = 3783 ext_partition_allowed & cpi->oxcf.enable_ab_partitions; 3784 int vertab_partition_allowed = 3785 ext_partition_allowed & cpi->oxcf.enable_ab_partitions; 3786 3787 #if CONFIG_DIST_8X8 3788 if (x->using_dist_8x8) { 3789 if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8) { 3790 horzab_partition_allowed = 0; 3791 vertab_partition_allowed = 0; 3792 } 3793 } 3794 #endif 3795 3796 if (cpi->sf.prune_ext_partition_types_search_level) { 3797 if (cpi->sf.prune_ext_partition_types_search_level == 1) { 3798 // TODO(debargha,huisu (at) google.com): may need to tune the threshold for 3799 // pb_source_variance. 3800 horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ || 3801 (pc_tree->partitioning == PARTITION_NONE && 3802 pb_source_variance < 32) || 3803 pc_tree->partitioning == PARTITION_SPLIT); 3804 vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT || 3805 (pc_tree->partitioning == PARTITION_NONE && 3806 pb_source_variance < 32) || 3807 pc_tree->partitioning == PARTITION_SPLIT); 3808 } else { 3809 horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ || 3810 pc_tree->partitioning == PARTITION_SPLIT); 3811 vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT || 3812 pc_tree->partitioning == PARTITION_SPLIT); 3813 } 3814 horz_rd[0] = (horz_rd[0] < INT64_MAX ? horz_rd[0] : 0); 3815 horz_rd[1] = (horz_rd[1] < INT64_MAX ? horz_rd[1] : 0); 3816 vert_rd[0] = (vert_rd[0] < INT64_MAX ? vert_rd[0] : 0); 3817 vert_rd[1] = (vert_rd[1] < INT64_MAX ? vert_rd[1] : 0); 3818 split_rd[0] = (split_rd[0] < INT64_MAX ? split_rd[0] : 0); 3819 split_rd[1] = (split_rd[1] < INT64_MAX ? split_rd[1] : 0); 3820 split_rd[2] = (split_rd[2] < INT64_MAX ? split_rd[2] : 0); 3821 split_rd[3] = (split_rd[3] < INT64_MAX ? split_rd[3] : 0); 3822 } 3823 int horza_partition_allowed = horzab_partition_allowed; 3824 int horzb_partition_allowed = horzab_partition_allowed; 3825 if (cpi->sf.prune_ext_partition_types_search_level) { 3826 const int64_t horz_a_rd = horz_rd[1] + split_rd[0] + split_rd[1]; 3827 const int64_t horz_b_rd = horz_rd[0] + split_rd[2] + split_rd[3]; 3828 switch (cpi->sf.prune_ext_partition_types_search_level) { 3829 case 1: 3830 horza_partition_allowed &= (horz_a_rd / 16 * 14 < best_rdc.rdcost); 3831 horzb_partition_allowed &= (horz_b_rd / 16 * 14 < best_rdc.rdcost); 3832 break; 3833 case 2: 3834 default: 3835 horza_partition_allowed &= (horz_a_rd / 16 * 15 < best_rdc.rdcost); 3836 horzb_partition_allowed &= (horz_b_rd / 16 * 15 < best_rdc.rdcost); 3837 break; 3838 } 3839 } 3840 3841 int verta_partition_allowed = vertab_partition_allowed; 3842 int vertb_partition_allowed = vertab_partition_allowed; 3843 if (cpi->sf.prune_ext_partition_types_search_level) { 3844 const int64_t vert_a_rd = vert_rd[1] + split_rd[0] + split_rd[2]; 3845 const int64_t vert_b_rd = vert_rd[0] + split_rd[1] + split_rd[3]; 3846 switch (cpi->sf.prune_ext_partition_types_search_level) { 3847 case 1: 3848 verta_partition_allowed &= (vert_a_rd / 16 * 14 < best_rdc.rdcost); 3849 vertb_partition_allowed &= (vert_b_rd / 16 * 14 < best_rdc.rdcost); 3850 break; 3851 case 2: 3852 default: 3853 verta_partition_allowed &= (vert_a_rd / 16 * 15 < best_rdc.rdcost); 3854 vertb_partition_allowed &= (vert_b_rd / 16 * 15 < best_rdc.rdcost); 3855 break; 3856 } 3857 } 3858 3859 if (cpi->sf.ml_prune_ab_partition && ext_partition_allowed && 3860 partition_horz_allowed && partition_vert_allowed) { 3861 // TODO(huisu (at) google.com): x->source_variance may not be the current 3862 // block's variance. The correct one to use is pb_source_variance. Need to 3863 // re-train the model to fix it. 3864 ml_prune_ab_partition(bsize, pc_tree->partitioning, 3865 get_unsigned_bits(x->source_variance), 3866 best_rdc.rdcost, horz_rd, vert_rd, split_rd, 3867 &horza_partition_allowed, &horzb_partition_allowed, 3868 &verta_partition_allowed, &vertb_partition_allowed); 3869 } 3870 3871 horza_partition_allowed &= cpi->oxcf.enable_ab_partitions; 3872 horzb_partition_allowed &= cpi->oxcf.enable_ab_partitions; 3873 verta_partition_allowed &= cpi->oxcf.enable_ab_partitions; 3874 vertb_partition_allowed &= cpi->oxcf.enable_ab_partitions; 3875 3876 // PARTITION_HORZ_A 3877 if (!terminate_partition_search && partition_horz_allowed && 3878 horza_partition_allowed && !is_gt_max_sq_part) { 3879 subsize = get_partition_subsize(bsize, PARTITION_HORZ_A); 3880 pc_tree->horizontala[0].rd_mode_is_ready = 0; 3881 pc_tree->horizontala[1].rd_mode_is_ready = 0; 3882 pc_tree->horizontala[2].rd_mode_is_ready = 0; 3883 if (split_ctx_is_ready[0]) { 3884 av1_copy_tree_context(&pc_tree->horizontala[0], &pc_tree->split[0]->none); 3885 pc_tree->horizontala[0].mic.partition = PARTITION_HORZ_A; 3886 pc_tree->horizontala[0].rd_mode_is_ready = 1; 3887 if (split_ctx_is_ready[1]) { 3888 av1_copy_tree_context(&pc_tree->horizontala[1], 3889 &pc_tree->split[1]->none); 3890 pc_tree->horizontala[1].mic.partition = PARTITION_HORZ_A; 3891 pc_tree->horizontala[1].rd_mode_is_ready = 1; 3892 } 3893 } 3894 #if CONFIG_COLLECT_PARTITION_STATS 3895 { 3896 RD_STATS tmp_sum_rdc; 3897 av1_init_rd_stats(&tmp_sum_rdc); 3898 tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_HORZ_A]; 3899 tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0); 3900 if (best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) { 3901 partition_attempts[PARTITION_HORZ_A] += 1; 3902 aom_usec_timer_start(&partition_timer); 3903 partition_timer_on = 1; 3904 } 3905 } 3906 #endif 3907 rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc, 3908 pc_tree->horizontala, ctx_none, mi_row, mi_col, bsize, 3909 PARTITION_HORZ_A, mi_row, mi_col, bsize2, mi_row, 3910 mi_col + mi_step, bsize2, mi_row + mi_step, mi_col, 3911 subsize); 3912 #if CONFIG_COLLECT_PARTITION_STATS 3913 if (partition_timer_on) { 3914 aom_usec_timer_mark(&partition_timer); 3915 int64_t time = aom_usec_timer_elapsed(&partition_timer); 3916 partition_times[PARTITION_HORZ_A] += time; 3917 partition_timer_on = 0; 3918 } 3919 #endif 3920 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 3921 } 3922 // PARTITION_HORZ_B 3923 if (!terminate_partition_search && partition_horz_allowed && 3924 horzb_partition_allowed && !is_gt_max_sq_part) { 3925 subsize = get_partition_subsize(bsize, PARTITION_HORZ_B); 3926 pc_tree->horizontalb[0].rd_mode_is_ready = 0; 3927 pc_tree->horizontalb[1].rd_mode_is_ready = 0; 3928 pc_tree->horizontalb[2].rd_mode_is_ready = 0; 3929 if (horz_ctx_is_ready) { 3930 av1_copy_tree_context(&pc_tree->horizontalb[0], &pc_tree->horizontal[0]); 3931 pc_tree->horizontalb[0].mic.partition = PARTITION_HORZ_B; 3932 pc_tree->horizontalb[0].rd_mode_is_ready = 1; 3933 } 3934 #if CONFIG_COLLECT_PARTITION_STATS 3935 { 3936 RD_STATS tmp_sum_rdc; 3937 av1_init_rd_stats(&tmp_sum_rdc); 3938 tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_HORZ_B]; 3939 tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0); 3940 if (best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) { 3941 partition_attempts[PARTITION_HORZ_B] += 1; 3942 aom_usec_timer_start(&partition_timer); 3943 partition_timer_on = 1; 3944 } 3945 } 3946 #endif 3947 rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc, 3948 pc_tree->horizontalb, ctx_none, mi_row, mi_col, bsize, 3949 PARTITION_HORZ_B, mi_row, mi_col, subsize, 3950 mi_row + mi_step, mi_col, bsize2, mi_row + mi_step, 3951 mi_col + mi_step, bsize2); 3952 3953 #if CONFIG_COLLECT_PARTITION_STATS 3954 if (partition_timer_on) { 3955 aom_usec_timer_mark(&partition_timer); 3956 int64_t time = aom_usec_timer_elapsed(&partition_timer); 3957 partition_times[PARTITION_HORZ_B] += time; 3958 partition_timer_on = 0; 3959 } 3960 #endif 3961 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 3962 } 3963 3964 // PARTITION_VERT_A 3965 if (!terminate_partition_search && partition_vert_allowed && 3966 verta_partition_allowed && !is_gt_max_sq_part) { 3967 subsize = get_partition_subsize(bsize, PARTITION_VERT_A); 3968 pc_tree->verticala[0].rd_mode_is_ready = 0; 3969 pc_tree->verticala[1].rd_mode_is_ready = 0; 3970 pc_tree->verticala[2].rd_mode_is_ready = 0; 3971 if (split_ctx_is_ready[0]) { 3972 av1_copy_tree_context(&pc_tree->verticala[0], &pc_tree->split[0]->none); 3973 pc_tree->verticala[0].mic.partition = PARTITION_VERT_A; 3974 pc_tree->verticala[0].rd_mode_is_ready = 1; 3975 } 3976 #if CONFIG_COLLECT_PARTITION_STATS 3977 { 3978 RD_STATS tmp_sum_rdc; 3979 av1_init_rd_stats(&tmp_sum_rdc); 3980 tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_VERT_A]; 3981 tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0); 3982 if (best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) { 3983 partition_attempts[PARTITION_VERT_A] += 1; 3984 aom_usec_timer_start(&partition_timer); 3985 partition_timer_on = 1; 3986 } 3987 } 3988 #endif 3989 rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc, 3990 pc_tree->verticala, ctx_none, mi_row, mi_col, bsize, 3991 PARTITION_VERT_A, mi_row, mi_col, bsize2, 3992 mi_row + mi_step, mi_col, bsize2, mi_row, 3993 mi_col + mi_step, subsize); 3994 #if CONFIG_COLLECT_PARTITION_STATS 3995 if (partition_timer_on) { 3996 aom_usec_timer_mark(&partition_timer); 3997 int64_t time = aom_usec_timer_elapsed(&partition_timer); 3998 partition_times[PARTITION_VERT_A] += time; 3999 partition_timer_on = 0; 4000 } 4001 #endif 4002 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 4003 } 4004 // PARTITION_VERT_B 4005 if (!terminate_partition_search && partition_vert_allowed && 4006 vertb_partition_allowed && !is_gt_max_sq_part) { 4007 subsize = get_partition_subsize(bsize, PARTITION_VERT_B); 4008 pc_tree->verticalb[0].rd_mode_is_ready = 0; 4009 pc_tree->verticalb[1].rd_mode_is_ready = 0; 4010 pc_tree->verticalb[2].rd_mode_is_ready = 0; 4011 if (vert_ctx_is_ready) { 4012 av1_copy_tree_context(&pc_tree->verticalb[0], &pc_tree->vertical[0]); 4013 pc_tree->verticalb[0].mic.partition = PARTITION_VERT_B; 4014 pc_tree->verticalb[0].rd_mode_is_ready = 1; 4015 } 4016 #if CONFIG_COLLECT_PARTITION_STATS 4017 { 4018 RD_STATS tmp_sum_rdc; 4019 av1_init_rd_stats(&tmp_sum_rdc); 4020 tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_VERT_B]; 4021 tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0); 4022 if (!frame_is_intra_only(cm) && 4023 best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) { 4024 partition_attempts[PARTITION_VERT_B] += 1; 4025 aom_usec_timer_start(&partition_timer); 4026 partition_timer_on = 1; 4027 } 4028 } 4029 #endif 4030 rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc, 4031 pc_tree->verticalb, ctx_none, mi_row, mi_col, bsize, 4032 PARTITION_VERT_B, mi_row, mi_col, subsize, mi_row, 4033 mi_col + mi_step, bsize2, mi_row + mi_step, 4034 mi_col + mi_step, bsize2); 4035 #if CONFIG_COLLECT_PARTITION_STATS 4036 if (partition_timer_on) { 4037 aom_usec_timer_mark(&partition_timer); 4038 int64_t time = aom_usec_timer_elapsed(&partition_timer); 4039 partition_times[PARTITION_VERT_B] += time; 4040 partition_timer_on = 0; 4041 } 4042 #endif 4043 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 4044 } 4045 4046 // partition4_allowed is 1 if we can use a PARTITION_HORZ_4 or 4047 // PARTITION_VERT_4 for this block. This is almost the same as 4048 // ext_partition_allowed, except that we don't allow 128x32 or 32x128 4049 // blocks, so we require that bsize is not BLOCK_128X128. 4050 const int partition4_allowed = cpi->oxcf.enable_1to4_partitions && 4051 ext_partition_allowed && 4052 bsize != BLOCK_128X128; 4053 4054 int partition_horz4_allowed = partition4_allowed && partition_horz_allowed; 4055 int partition_vert4_allowed = partition4_allowed && partition_vert_allowed; 4056 if (cpi->sf.prune_ext_partition_types_search_level == 2) { 4057 partition_horz4_allowed &= (pc_tree->partitioning == PARTITION_HORZ || 4058 pc_tree->partitioning == PARTITION_HORZ_A || 4059 pc_tree->partitioning == PARTITION_HORZ_B || 4060 pc_tree->partitioning == PARTITION_SPLIT || 4061 pc_tree->partitioning == PARTITION_NONE); 4062 partition_vert4_allowed &= (pc_tree->partitioning == PARTITION_VERT || 4063 pc_tree->partitioning == PARTITION_VERT_A || 4064 pc_tree->partitioning == PARTITION_VERT_B || 4065 pc_tree->partitioning == PARTITION_SPLIT || 4066 pc_tree->partitioning == PARTITION_NONE); 4067 } 4068 if (cpi->sf.ml_prune_4_partition && partition4_allowed && 4069 partition_horz_allowed && partition_vert_allowed) { 4070 ml_prune_4_partition(cpi, x, bsize, pc_tree->partitioning, best_rdc.rdcost, 4071 horz_rd, vert_rd, split_rd, &partition_horz4_allowed, 4072 &partition_vert4_allowed, pb_source_variance, mi_row, 4073 mi_col); 4074 } 4075 4076 #if CONFIG_DIST_8X8 4077 if (x->using_dist_8x8) { 4078 if (block_size_high[bsize] <= 16 || block_size_wide[bsize] <= 16) { 4079 partition_horz4_allowed = 0; 4080 partition_vert4_allowed = 0; 4081 } 4082 } 4083 #endif 4084 4085 if (blksize < (min_partition_size << 2)) { 4086 partition_horz4_allowed = 0; 4087 partition_vert4_allowed = 0; 4088 } 4089 4090 // PARTITION_HORZ_4 4091 assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_horz4_allowed)); 4092 if (!terminate_partition_search && partition_horz4_allowed && has_rows && 4093 (do_rectangular_split || active_h_edge(cpi, mi_row, mi_step)) && 4094 !is_gt_max_sq_part) { 4095 av1_init_rd_stats(&sum_rdc); 4096 const int quarter_step = mi_size_high[bsize] / 4; 4097 PICK_MODE_CONTEXT *ctx_prev = ctx_none; 4098 4099 subsize = get_partition_subsize(bsize, PARTITION_HORZ_4); 4100 sum_rdc.rate = partition_cost[PARTITION_HORZ_4]; 4101 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); 4102 4103 #if CONFIG_COLLECT_PARTITION_STATS 4104 if (best_rdc.rdcost - sum_rdc.rdcost >= 0) { 4105 partition_attempts[PARTITION_HORZ_4] += 1; 4106 aom_usec_timer_start(&partition_timer); 4107 partition_timer_on = 1; 4108 } 4109 #endif 4110 for (int i = 0; i < 4; ++i) { 4111 const int this_mi_row = mi_row + i * quarter_step; 4112 4113 if (i > 0 && this_mi_row >= cm->mi_rows) break; 4114 4115 PICK_MODE_CONTEXT *ctx_this = &pc_tree->horizontal4[i]; 4116 4117 ctx_this->rd_mode_is_ready = 0; 4118 if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 3), this_mi_row, 4119 mi_col, subsize, &best_rdc, &sum_rdc, &this_rdc, 4120 PARTITION_HORZ_4, ctx_prev, ctx_this)) 4121 break; 4122 4123 ctx_prev = ctx_this; 4124 } 4125 4126 if (sum_rdc.rdcost < best_rdc.rdcost) { 4127 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); 4128 if (sum_rdc.rdcost < best_rdc.rdcost) { 4129 best_rdc = sum_rdc; 4130 pc_tree->partitioning = PARTITION_HORZ_4; 4131 } 4132 } 4133 4134 #if CONFIG_COLLECT_PARTITION_STATS 4135 if (partition_timer_on) { 4136 aom_usec_timer_mark(&partition_timer); 4137 int64_t time = aom_usec_timer_elapsed(&partition_timer); 4138 partition_times[PARTITION_HORZ_4] += time; 4139 partition_timer_on = 0; 4140 } 4141 #endif 4142 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 4143 } 4144 4145 // PARTITION_VERT_4 4146 assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_vert4_allowed)); 4147 if (!terminate_partition_search && partition_vert4_allowed && has_cols && 4148 (do_rectangular_split || active_v_edge(cpi, mi_row, mi_step)) && 4149 !is_gt_max_sq_part) { 4150 av1_init_rd_stats(&sum_rdc); 4151 const int quarter_step = mi_size_wide[bsize] / 4; 4152 PICK_MODE_CONTEXT *ctx_prev = ctx_none; 4153 4154 subsize = get_partition_subsize(bsize, PARTITION_VERT_4); 4155 sum_rdc.rate = partition_cost[PARTITION_VERT_4]; 4156 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); 4157 4158 #if CONFIG_COLLECT_PARTITION_STATS 4159 if (best_rdc.rdcost - sum_rdc.rdcost >= 0) { 4160 partition_attempts[PARTITION_VERT_4] += 1; 4161 aom_usec_timer_start(&partition_timer); 4162 partition_timer_on = 1; 4163 } 4164 #endif 4165 for (int i = 0; i < 4; ++i) { 4166 const int this_mi_col = mi_col + i * quarter_step; 4167 4168 if (i > 0 && this_mi_col >= cm->mi_cols) break; 4169 4170 PICK_MODE_CONTEXT *ctx_this = &pc_tree->vertical4[i]; 4171 4172 ctx_this->rd_mode_is_ready = 0; 4173 if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 3), mi_row, 4174 this_mi_col, subsize, &best_rdc, &sum_rdc, &this_rdc, 4175 PARTITION_VERT_4, ctx_prev, ctx_this)) 4176 break; 4177 4178 ctx_prev = ctx_this; 4179 } 4180 4181 if (sum_rdc.rdcost < best_rdc.rdcost) { 4182 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); 4183 if (sum_rdc.rdcost < best_rdc.rdcost) { 4184 best_rdc = sum_rdc; 4185 pc_tree->partitioning = PARTITION_VERT_4; 4186 } 4187 } 4188 #if CONFIG_COLLECT_PARTITION_STATS 4189 if (partition_timer_on) { 4190 aom_usec_timer_mark(&partition_timer); 4191 int64_t time = aom_usec_timer_elapsed(&partition_timer); 4192 partition_times[PARTITION_VERT_4] += time; 4193 partition_timer_on = 0; 4194 } 4195 #endif 4196 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 4197 } 4198 4199 if (bsize == cm->seq_params.sb_size && best_rdc.rate == INT_MAX) { 4200 // Did not find a valid partition, go back and search again, with less 4201 // constraint on which partition types to search. 4202 x->must_find_valid_partition = 1; 4203 #if CONFIG_COLLECT_PARTITION_STATS == 2 4204 part_stats->partition_redo += 1; 4205 #endif 4206 goto BEGIN_PARTITION_SEARCH; 4207 } 4208 4209 // TODO(jbb): This code added so that we avoid static analysis 4210 // warning related to the fact that best_rd isn't used after this 4211 // point. This code should be refactored so that the duplicate 4212 // checks occur in some sub function and thus are used... 4213 (void)best_rd; 4214 *rd_cost = best_rdc; 4215 4216 #if CONFIG_COLLECT_PARTITION_STATS 4217 if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX) { 4218 partition_decisions[pc_tree->partitioning] += 1; 4219 } 4220 #endif 4221 4222 #if CONFIG_COLLECT_PARTITION_STATS == 1 4223 // If CONFIG_COLLECT_PARTITION_STATS is 1, then print out the stats for each 4224 // prediction block 4225 FILE *f = fopen("data.csv", "a"); 4226 fprintf(f, "%d,%d,%d,", bsize, cm->show_frame, frame_is_intra_only(cm)); 4227 for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) { 4228 fprintf(f, "%d,", partition_decisions[idx]); 4229 } 4230 for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) { 4231 fprintf(f, "%d,", partition_attempts[idx]); 4232 } 4233 for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) { 4234 fprintf(f, "%ld,", partition_times[idx]); 4235 } 4236 fprintf(f, "\n"); 4237 fclose(f); 4238 #endif 4239 4240 #if CONFIG_COLLECT_PARTITION_STATS == 2 4241 // If CONFIG_COLLECTION_PARTITION_STATS is 2, then we print out the stats for 4242 // the whole clip. So we need to pass the information upstream to the encoder 4243 const int bsize_idx = av1_get_bsize_idx_for_part_stats(bsize); 4244 int *agg_attempts = part_stats->partition_attempts[bsize_idx]; 4245 int *agg_decisions = part_stats->partition_decisions[bsize_idx]; 4246 int64_t *agg_times = part_stats->partition_times[bsize_idx]; 4247 for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) { 4248 agg_attempts[idx] += partition_attempts[idx]; 4249 agg_decisions[idx] += partition_decisions[idx]; 4250 agg_times[idx] += partition_times[idx]; 4251 } 4252 #endif 4253 4254 if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && 4255 pc_tree->index != 3) { 4256 if (bsize == cm->seq_params.sb_size) { 4257 x->cb_offset = 0; 4258 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize, 4259 pc_tree, NULL); 4260 } else { 4261 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize, 4262 pc_tree, NULL); 4263 } 4264 } 4265 4266 if (bsize == cm->seq_params.sb_size) { 4267 assert(best_rdc.rate < INT_MAX); 4268 assert(best_rdc.dist < INT64_MAX); 4269 } else { 4270 assert(tp_orig == *tp); 4271 } 4272 } 4273 #undef NUM_SIMPLE_MOTION_FEATURES 4274 4275 // Set all the counters as max. 4276 static void init_first_partition_pass_stats_tables( 4277 AV1_COMP *cpi, FIRST_PARTITION_PASS_STATS *stats) { 4278 for (int i = 0; i < FIRST_PARTITION_PASS_STATS_TABLES; ++i) { 4279 memset(stats[i].ref0_counts, 0xff, sizeof(stats[i].ref0_counts)); 4280 memset(stats[i].ref1_counts, 0xff, sizeof(stats[i].ref1_counts)); 4281 stats[i].sample_counts = INT_MAX; 4282 if (cpi->sf.use_first_partition_pass_interintra_stats) 4283 memset(stats[i].interintra_motion_mode_count, 0xff, 4284 sizeof(stats[i].interintra_motion_mode_count)); 4285 } 4286 } 4287 4288 // Minimum number of samples to trigger the mode pruning in 4289 // two_pass_partition_search feature. 4290 #define FIRST_PARTITION_PASS_MIN_SAMPLES 16 4291 4292 static int get_rdmult_delta(AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, 4293 int mi_col, int orig_rdmult) { 4294 TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index]; 4295 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; 4296 int tpl_stride = tpl_frame->stride; 4297 int64_t intra_cost = 0; 4298 int64_t mc_dep_cost = 0; 4299 int mi_wide = mi_size_wide[bsize]; 4300 int mi_high = mi_size_high[bsize]; 4301 int row, col; 4302 4303 int dr = 0; 4304 double r0, rk, beta; 4305 4306 if (tpl_frame->is_valid == 0) return orig_rdmult; 4307 4308 if (cpi->common.show_frame) return orig_rdmult; 4309 4310 if (cpi->twopass.gf_group.index >= MAX_LAG_BUFFERS) return orig_rdmult; 4311 4312 for (row = mi_row; row < mi_row + mi_high; ++row) { 4313 for (col = mi_col; col < mi_col + mi_wide; ++col) { 4314 TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col]; 4315 4316 if (row >= cpi->common.mi_rows || col >= cpi->common.mi_cols) continue; 4317 4318 intra_cost += this_stats->intra_cost; 4319 mc_dep_cost += this_stats->mc_dep_cost; 4320 } 4321 } 4322 4323 aom_clear_system_state(); 4324 4325 r0 = cpi->rd.r0; 4326 rk = (double)intra_cost / mc_dep_cost; 4327 beta = r0 / rk; 4328 dr = av1_get_adaptive_rdmult(cpi, beta); 4329 4330 dr = AOMMIN(dr, orig_rdmult * 3 / 2); 4331 dr = AOMMAX(dr, orig_rdmult * 1 / 2); 4332 4333 dr = AOMMAX(1, dr); 4334 4335 return dr; 4336 } 4337 4338 static void setup_delta_q(AV1_COMP *const cpi, MACROBLOCK *const x, 4339 const TileInfo *const tile_info, int mi_row, 4340 int mi_col, int num_planes) { 4341 AV1_COMMON *const cm = &cpi->common; 4342 MACROBLOCKD *const xd = &x->e_mbd; 4343 const DeltaQInfo *const delta_q_info = &cm->delta_q_info; 4344 const BLOCK_SIZE sb_size = cm->seq_params.sb_size; 4345 const int mib_size = cm->seq_params.mib_size; 4346 4347 // Delta-q modulation based on variance 4348 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size); 4349 4350 int offset_qindex; 4351 if (DELTAQ_MODULATION == 1) { 4352 const int block_wavelet_energy_level = 4353 av1_block_wavelet_energy_level(cpi, x, sb_size); 4354 x->sb_energy_level = block_wavelet_energy_level; 4355 offset_qindex = 4356 av1_compute_deltaq_from_energy_level(cpi, block_wavelet_energy_level); 4357 } else { 4358 const int block_var_level = av1_log_block_var(cpi, x, sb_size); 4359 x->sb_energy_level = block_var_level; 4360 offset_qindex = av1_compute_deltaq_from_energy_level(cpi, block_var_level); 4361 } 4362 const int qmask = ~(delta_q_info->delta_q_res - 1); 4363 int current_qindex = 4364 clamp(cm->base_qindex + offset_qindex, delta_q_info->delta_q_res, 4365 256 - delta_q_info->delta_q_res); 4366 current_qindex = 4367 ((current_qindex - cm->base_qindex + delta_q_info->delta_q_res / 2) & 4368 qmask) + 4369 cm->base_qindex; 4370 assert(current_qindex > 0); 4371 4372 xd->delta_qindex = current_qindex - cm->base_qindex; 4373 set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size); 4374 xd->mi[0]->current_qindex = current_qindex; 4375 av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id); 4376 if (cpi->oxcf.deltaq_mode == DELTA_Q_LF) { 4377 const int lfmask = ~(delta_q_info->delta_lf_res - 1); 4378 const int delta_lf_from_base = 4379 ((offset_qindex / 2 + delta_q_info->delta_lf_res / 2) & lfmask); 4380 4381 // pre-set the delta lf for loop filter. Note that this value is set 4382 // before mi is assigned for each block in current superblock 4383 for (int j = 0; j < AOMMIN(mib_size, cm->mi_rows - mi_row); j++) { 4384 for (int k = 0; k < AOMMIN(mib_size, cm->mi_cols - mi_col); k++) { 4385 cm->mi[(mi_row + j) * cm->mi_stride + (mi_col + k)].delta_lf_from_base = 4386 clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER); 4387 const int frame_lf_count = 4388 av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2; 4389 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) { 4390 cm->mi[(mi_row + j) * cm->mi_stride + (mi_col + k)].delta_lf[lf_id] = 4391 clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER); 4392 } 4393 } 4394 } 4395 } 4396 } 4397 4398 // First pass of partition search only considers square partition block sizes. 4399 // The results will be used in the second partition search pass to prune 4400 // unlikely partition candidates. 4401 static void first_partition_search_pass(AV1_COMP *cpi, ThreadData *td, 4402 TileDataEnc *tile_data, int mi_row, 4403 int mi_col, TOKENEXTRA **tp) { 4404 MACROBLOCK *const x = &td->mb; 4405 x->cb_partition_scan = 1; 4406 4407 const SPEED_FEATURES *const sf = &cpi->sf; 4408 // Reset the stats tables. 4409 av1_zero(x->first_partition_pass_stats); 4410 4411 AV1_COMMON *const cm = &cpi->common; 4412 const BLOCK_SIZE sb_size = cm->seq_params.sb_size; 4413 const int mib_size_log2 = cm->seq_params.mib_size_log2; 4414 PC_TREE *const pc_root = td->pc_root[mib_size_log2 - MIN_MIB_SIZE_LOG2]; 4415 RD_STATS dummy_rdc; 4416 rd_pick_sqr_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, 4417 &dummy_rdc, INT64_MAX, pc_root, NULL); 4418 x->cb_partition_scan = 0; 4419 4420 x->source_variance = UINT_MAX; 4421 x->simple_motion_pred_sse = UINT_MAX; 4422 if (sf->adaptive_pred_interp_filter) { 4423 const int leaf_nodes = 256; 4424 for (int i = 0; i < leaf_nodes; ++i) { 4425 td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE; 4426 td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE; 4427 td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE; 4428 td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE; 4429 } 4430 } 4431 4432 x->mb_rd_record.num = x->mb_rd_record.index_start = 0; 4433 av1_zero(x->txb_rd_record_8X8); 4434 av1_zero(x->txb_rd_record_16X16); 4435 av1_zero(x->txb_rd_record_32X32); 4436 av1_zero(x->txb_rd_record_64X64); 4437 av1_zero(x->txb_rd_record_intra); 4438 av1_zero(x->pred_mv); 4439 pc_root->index = 0; 4440 4441 for (int idy = 0; idy < mi_size_high[sb_size]; ++idy) { 4442 for (int idx = 0; idx < mi_size_wide[sb_size]; ++idx) { 4443 const int offset = cm->mi_stride * (mi_row + idy) + (mi_col + idx); 4444 cm->mi_grid_visible[offset] = 0; 4445 } 4446 } 4447 4448 x->use_cb_search_range = 1; 4449 4450 for (int i = 0; i < FIRST_PARTITION_PASS_STATS_TABLES; ++i) { 4451 FIRST_PARTITION_PASS_STATS *const stat = &x->first_partition_pass_stats[i]; 4452 if (stat->sample_counts < FIRST_PARTITION_PASS_MIN_SAMPLES) { 4453 // If there are not enough samples collected, make all available. 4454 memset(stat->ref0_counts, 0xff, sizeof(stat->ref0_counts)); 4455 memset(stat->ref1_counts, 0xff, sizeof(stat->ref1_counts)); 4456 if (cpi->sf.use_first_partition_pass_interintra_stats) 4457 memset(stat->interintra_motion_mode_count, 0xff, 4458 sizeof(stat->interintra_motion_mode_count)); 4459 } else if (sf->selective_ref_frame < 3) { 4460 // ALTREF2_FRAME and BWDREF_FRAME may be skipped during the 4461 // initial partition scan, so we don't eliminate them. 4462 stat->ref0_counts[ALTREF2_FRAME] = 0xff; 4463 stat->ref1_counts[ALTREF2_FRAME] = 0xff; 4464 stat->ref0_counts[BWDREF_FRAME] = 0xff; 4465 stat->ref1_counts[BWDREF_FRAME] = 0xff; 4466 if (cpi->sf.use_first_partition_pass_interintra_stats) { 4467 stat->interintra_motion_mode_count[ALTREF2_FRAME] = 0xff; 4468 stat->interintra_motion_mode_count[BWDREF_FRAME] = 0xff; 4469 } 4470 } 4471 } 4472 } 4473 4474 #define AVG_CDF_WEIGHT_LEFT 3 4475 #define AVG_CDF_WEIGHT_TOP_RIGHT 1 4476 4477 static void avg_cdf_symbol(aom_cdf_prob *cdf_ptr_left, aom_cdf_prob *cdf_ptr_tr, 4478 int num_cdfs, int cdf_stride, int nsymbs, 4479 int wt_left, int wt_tr) { 4480 for (int i = 0; i < num_cdfs; i++) { 4481 for (int j = 0; j <= nsymbs; j++) { 4482 cdf_ptr_left[i * cdf_stride + j] = 4483 (aom_cdf_prob)(((int)cdf_ptr_left[i * cdf_stride + j] * wt_left + 4484 (int)cdf_ptr_tr[i * cdf_stride + j] * wt_tr + 4485 ((wt_left + wt_tr) / 2)) / 4486 (wt_left + wt_tr)); 4487 assert(cdf_ptr_left[i * cdf_stride + j] >= 0 && 4488 cdf_ptr_left[i * cdf_stride + j] < CDF_PROB_TOP); 4489 } 4490 } 4491 } 4492 4493 #define AVERAGE_CDF(cname_left, cname_tr, nsymbs) \ 4494 AVG_CDF_STRIDE(cname_left, cname_tr, nsymbs, CDF_SIZE(nsymbs)) 4495 4496 #define AVG_CDF_STRIDE(cname_left, cname_tr, nsymbs, cdf_stride) \ 4497 do { \ 4498 aom_cdf_prob *cdf_ptr_left = (aom_cdf_prob *)cname_left; \ 4499 aom_cdf_prob *cdf_ptr_tr = (aom_cdf_prob *)cname_tr; \ 4500 int array_size = (int)sizeof(cname_left) / sizeof(aom_cdf_prob); \ 4501 int num_cdfs = array_size / cdf_stride; \ 4502 avg_cdf_symbol(cdf_ptr_left, cdf_ptr_tr, num_cdfs, cdf_stride, nsymbs, \ 4503 wt_left, wt_tr); \ 4504 } while (0) 4505 4506 static void avg_nmv(nmv_context *nmv_left, nmv_context *nmv_tr, int wt_left, 4507 int wt_tr) { 4508 AVERAGE_CDF(nmv_left->joints_cdf, nmv_tr->joints_cdf, 4); 4509 for (int i = 0; i < 2; i++) { 4510 AVERAGE_CDF(nmv_left->comps[i].classes_cdf, nmv_tr->comps[i].classes_cdf, 4511 MV_CLASSES); 4512 AVERAGE_CDF(nmv_left->comps[i].class0_fp_cdf, 4513 nmv_tr->comps[i].class0_fp_cdf, MV_FP_SIZE); 4514 AVERAGE_CDF(nmv_left->comps[i].fp_cdf, nmv_tr->comps[i].fp_cdf, MV_FP_SIZE); 4515 AVERAGE_CDF(nmv_left->comps[i].sign_cdf, nmv_tr->comps[i].sign_cdf, 2); 4516 AVERAGE_CDF(nmv_left->comps[i].class0_hp_cdf, 4517 nmv_tr->comps[i].class0_hp_cdf, 2); 4518 AVERAGE_CDF(nmv_left->comps[i].hp_cdf, nmv_tr->comps[i].hp_cdf, 2); 4519 AVERAGE_CDF(nmv_left->comps[i].class0_cdf, nmv_tr->comps[i].class0_cdf, 4520 CLASS0_SIZE); 4521 AVERAGE_CDF(nmv_left->comps[i].bits_cdf, nmv_tr->comps[i].bits_cdf, 2); 4522 } 4523 } 4524 4525 // In case of row-based multi-threading of encoder, since we always 4526 // keep a top - right sync, we can average the top - right SB's CDFs and 4527 // the left SB's CDFs and use the same for current SB's encoding to 4528 // improve the performance. This function facilitates the averaging 4529 // of CDF and used only when row-mt is enabled in encoder. 4530 static void avg_cdf_symbols(FRAME_CONTEXT *ctx_left, FRAME_CONTEXT *ctx_tr, 4531 int wt_left, int wt_tr) { 4532 AVERAGE_CDF(ctx_left->txb_skip_cdf, ctx_tr->txb_skip_cdf, 2); 4533 AVERAGE_CDF(ctx_left->eob_extra_cdf, ctx_tr->eob_extra_cdf, 2); 4534 AVERAGE_CDF(ctx_left->dc_sign_cdf, ctx_tr->dc_sign_cdf, 2); 4535 AVERAGE_CDF(ctx_left->eob_flag_cdf16, ctx_tr->eob_flag_cdf16, 5); 4536 AVERAGE_CDF(ctx_left->eob_flag_cdf32, ctx_tr->eob_flag_cdf32, 6); 4537 AVERAGE_CDF(ctx_left->eob_flag_cdf64, ctx_tr->eob_flag_cdf64, 7); 4538 AVERAGE_CDF(ctx_left->eob_flag_cdf128, ctx_tr->eob_flag_cdf128, 8); 4539 AVERAGE_CDF(ctx_left->eob_flag_cdf256, ctx_tr->eob_flag_cdf256, 9); 4540 AVERAGE_CDF(ctx_left->eob_flag_cdf512, ctx_tr->eob_flag_cdf512, 10); 4541 AVERAGE_CDF(ctx_left->eob_flag_cdf1024, ctx_tr->eob_flag_cdf1024, 11); 4542 AVERAGE_CDF(ctx_left->coeff_base_eob_cdf, ctx_tr->coeff_base_eob_cdf, 3); 4543 AVERAGE_CDF(ctx_left->coeff_base_cdf, ctx_tr->coeff_base_cdf, 4); 4544 AVERAGE_CDF(ctx_left->coeff_br_cdf, ctx_tr->coeff_br_cdf, BR_CDF_SIZE); 4545 AVERAGE_CDF(ctx_left->newmv_cdf, ctx_tr->newmv_cdf, 2); 4546 AVERAGE_CDF(ctx_left->zeromv_cdf, ctx_tr->zeromv_cdf, 2); 4547 AVERAGE_CDF(ctx_left->refmv_cdf, ctx_tr->refmv_cdf, 2); 4548 AVERAGE_CDF(ctx_left->drl_cdf, ctx_tr->drl_cdf, 2); 4549 AVERAGE_CDF(ctx_left->inter_compound_mode_cdf, 4550 ctx_tr->inter_compound_mode_cdf, INTER_COMPOUND_MODES); 4551 AVERAGE_CDF(ctx_left->compound_type_cdf, ctx_tr->compound_type_cdf, 4552 MASKED_COMPOUND_TYPES); 4553 AVERAGE_CDF(ctx_left->wedge_idx_cdf, ctx_tr->wedge_idx_cdf, 16); 4554 AVERAGE_CDF(ctx_left->interintra_cdf, ctx_tr->interintra_cdf, 2); 4555 AVERAGE_CDF(ctx_left->wedge_interintra_cdf, ctx_tr->wedge_interintra_cdf, 2); 4556 AVERAGE_CDF(ctx_left->interintra_mode_cdf, ctx_tr->interintra_mode_cdf, 4557 INTERINTRA_MODES); 4558 AVERAGE_CDF(ctx_left->motion_mode_cdf, ctx_tr->motion_mode_cdf, MOTION_MODES); 4559 AVERAGE_CDF(ctx_left->obmc_cdf, ctx_tr->obmc_cdf, 2); 4560 AVERAGE_CDF(ctx_left->palette_y_size_cdf, ctx_tr->palette_y_size_cdf, 4561 PALETTE_SIZES); 4562 AVERAGE_CDF(ctx_left->palette_uv_size_cdf, ctx_tr->palette_uv_size_cdf, 4563 PALETTE_SIZES); 4564 for (int j = 0; j < PALETTE_SIZES; j++) { 4565 int nsymbs = j + PALETTE_MIN_SIZE; 4566 AVG_CDF_STRIDE(ctx_left->palette_y_color_index_cdf[j], 4567 ctx_tr->palette_y_color_index_cdf[j], nsymbs, 4568 CDF_SIZE(PALETTE_COLORS)); 4569 AVG_CDF_STRIDE(ctx_left->palette_uv_color_index_cdf[j], 4570 ctx_tr->palette_uv_color_index_cdf[j], nsymbs, 4571 CDF_SIZE(PALETTE_COLORS)); 4572 } 4573 AVERAGE_CDF(ctx_left->palette_y_mode_cdf, ctx_tr->palette_y_mode_cdf, 2); 4574 AVERAGE_CDF(ctx_left->palette_uv_mode_cdf, ctx_tr->palette_uv_mode_cdf, 2); 4575 AVERAGE_CDF(ctx_left->comp_inter_cdf, ctx_tr->comp_inter_cdf, 2); 4576 AVERAGE_CDF(ctx_left->single_ref_cdf, ctx_tr->single_ref_cdf, 2); 4577 AVERAGE_CDF(ctx_left->comp_ref_type_cdf, ctx_tr->comp_ref_type_cdf, 2); 4578 AVERAGE_CDF(ctx_left->uni_comp_ref_cdf, ctx_tr->uni_comp_ref_cdf, 2); 4579 AVERAGE_CDF(ctx_left->comp_ref_cdf, ctx_tr->comp_ref_cdf, 2); 4580 AVERAGE_CDF(ctx_left->comp_bwdref_cdf, ctx_tr->comp_bwdref_cdf, 2); 4581 AVERAGE_CDF(ctx_left->txfm_partition_cdf, ctx_tr->txfm_partition_cdf, 2); 4582 AVERAGE_CDF(ctx_left->compound_index_cdf, ctx_tr->compound_index_cdf, 2); 4583 AVERAGE_CDF(ctx_left->comp_group_idx_cdf, ctx_tr->comp_group_idx_cdf, 2); 4584 AVERAGE_CDF(ctx_left->skip_mode_cdfs, ctx_tr->skip_mode_cdfs, 2); 4585 AVERAGE_CDF(ctx_left->skip_cdfs, ctx_tr->skip_cdfs, 2); 4586 AVERAGE_CDF(ctx_left->intra_inter_cdf, ctx_tr->intra_inter_cdf, 2); 4587 avg_nmv(&ctx_left->nmvc, &ctx_tr->nmvc, wt_left, wt_tr); 4588 avg_nmv(&ctx_left->ndvc, &ctx_tr->ndvc, wt_left, wt_tr); 4589 AVERAGE_CDF(ctx_left->intrabc_cdf, ctx_tr->intrabc_cdf, 2); 4590 AVERAGE_CDF(ctx_left->seg.tree_cdf, ctx_tr->seg.tree_cdf, MAX_SEGMENTS); 4591 AVERAGE_CDF(ctx_left->seg.pred_cdf, ctx_tr->seg.pred_cdf, 2); 4592 AVERAGE_CDF(ctx_left->seg.spatial_pred_seg_cdf, 4593 ctx_tr->seg.spatial_pred_seg_cdf, MAX_SEGMENTS); 4594 AVERAGE_CDF(ctx_left->filter_intra_cdfs, ctx_tr->filter_intra_cdfs, 2); 4595 AVERAGE_CDF(ctx_left->filter_intra_mode_cdf, ctx_tr->filter_intra_mode_cdf, 4596 FILTER_INTRA_MODES); 4597 AVERAGE_CDF(ctx_left->switchable_restore_cdf, ctx_tr->switchable_restore_cdf, 4598 RESTORE_SWITCHABLE_TYPES); 4599 AVERAGE_CDF(ctx_left->wiener_restore_cdf, ctx_tr->wiener_restore_cdf, 2); 4600 AVERAGE_CDF(ctx_left->sgrproj_restore_cdf, ctx_tr->sgrproj_restore_cdf, 2); 4601 AVERAGE_CDF(ctx_left->y_mode_cdf, ctx_tr->y_mode_cdf, INTRA_MODES); 4602 AVG_CDF_STRIDE(ctx_left->uv_mode_cdf[0], ctx_tr->uv_mode_cdf[0], 4603 UV_INTRA_MODES - 1, CDF_SIZE(UV_INTRA_MODES)); 4604 AVERAGE_CDF(ctx_left->uv_mode_cdf[1], ctx_tr->uv_mode_cdf[1], UV_INTRA_MODES); 4605 for (int i = 0; i < PARTITION_CONTEXTS; i++) { 4606 if (i < 4) { 4607 AVG_CDF_STRIDE(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 4, 4608 CDF_SIZE(10)); 4609 } else if (i < 16) { 4610 AVERAGE_CDF(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 10); 4611 } else { 4612 AVG_CDF_STRIDE(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 8, 4613 CDF_SIZE(10)); 4614 } 4615 } 4616 AVERAGE_CDF(ctx_left->switchable_interp_cdf, ctx_tr->switchable_interp_cdf, 4617 SWITCHABLE_FILTERS); 4618 AVERAGE_CDF(ctx_left->kf_y_cdf, ctx_tr->kf_y_cdf, INTRA_MODES); 4619 AVERAGE_CDF(ctx_left->angle_delta_cdf, ctx_tr->angle_delta_cdf, 4620 2 * MAX_ANGLE_DELTA + 1); 4621 AVG_CDF_STRIDE(ctx_left->tx_size_cdf[0], ctx_tr->tx_size_cdf[0], MAX_TX_DEPTH, 4622 CDF_SIZE(MAX_TX_DEPTH + 1)); 4623 AVERAGE_CDF(ctx_left->tx_size_cdf[1], ctx_tr->tx_size_cdf[1], 4624 MAX_TX_DEPTH + 1); 4625 AVERAGE_CDF(ctx_left->tx_size_cdf[2], ctx_tr->tx_size_cdf[2], 4626 MAX_TX_DEPTH + 1); 4627 AVERAGE_CDF(ctx_left->tx_size_cdf[3], ctx_tr->tx_size_cdf[3], 4628 MAX_TX_DEPTH + 1); 4629 AVERAGE_CDF(ctx_left->delta_q_cdf, ctx_tr->delta_q_cdf, DELTA_Q_PROBS + 1); 4630 AVERAGE_CDF(ctx_left->delta_lf_cdf, ctx_tr->delta_lf_cdf, DELTA_LF_PROBS + 1); 4631 for (int i = 0; i < FRAME_LF_COUNT; i++) { 4632 AVERAGE_CDF(ctx_left->delta_lf_multi_cdf[i], ctx_tr->delta_lf_multi_cdf[i], 4633 DELTA_LF_PROBS + 1); 4634 } 4635 AVG_CDF_STRIDE(ctx_left->intra_ext_tx_cdf[1], ctx_tr->intra_ext_tx_cdf[1], 7, 4636 CDF_SIZE(TX_TYPES)); 4637 AVG_CDF_STRIDE(ctx_left->intra_ext_tx_cdf[2], ctx_tr->intra_ext_tx_cdf[2], 5, 4638 CDF_SIZE(TX_TYPES)); 4639 AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[1], ctx_tr->inter_ext_tx_cdf[1], 16, 4640 CDF_SIZE(TX_TYPES)); 4641 AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[2], ctx_tr->inter_ext_tx_cdf[2], 12, 4642 CDF_SIZE(TX_TYPES)); 4643 AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[3], ctx_tr->inter_ext_tx_cdf[3], 2, 4644 CDF_SIZE(TX_TYPES)); 4645 AVERAGE_CDF(ctx_left->cfl_sign_cdf, ctx_tr->cfl_sign_cdf, CFL_JOINT_SIGNS); 4646 AVERAGE_CDF(ctx_left->cfl_alpha_cdf, ctx_tr->cfl_alpha_cdf, 4647 CFL_ALPHABET_SIZE); 4648 } 4649 4650 static void encode_sb_row(AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, 4651 int mi_row, TOKENEXTRA **tp, int use_nonrd_mode) { 4652 AV1_COMMON *const cm = &cpi->common; 4653 const int num_planes = av1_num_planes(cm); 4654 const TileInfo *const tile_info = &tile_data->tile_info; 4655 MACROBLOCK *const x = &td->mb; 4656 MACROBLOCKD *const xd = &x->e_mbd; 4657 const SPEED_FEATURES *const sf = &cpi->sf; 4658 const int leaf_nodes = 256; 4659 const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_data->tile_info); 4660 const BLOCK_SIZE sb_size = cm->seq_params.sb_size; 4661 const int mib_size = cm->seq_params.mib_size; 4662 const int mib_size_log2 = cm->seq_params.mib_size_log2; 4663 const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2; 4664 4665 #if CONFIG_COLLECT_COMPONENT_TIMING 4666 start_timing(cpi, encode_sb_time); 4667 #endif 4668 4669 // Initialize the left context for the new SB row 4670 av1_zero_left_context(xd); 4671 4672 // Reset delta for every tile 4673 if (mi_row == tile_info->mi_row_start) { 4674 if (cm->delta_q_info.delta_q_present_flag) 4675 xd->current_qindex = cm->base_qindex; 4676 if (cm->delta_q_info.delta_lf_present_flag) { 4677 av1_reset_loop_filter_delta(xd, av1_num_planes(cm)); 4678 } 4679 } 4680 4681 // Code each SB in the row 4682 for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0; 4683 mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) { 4684 (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row, 4685 sb_col_in_tile); 4686 if (tile_data->allow_update_cdf && (cpi->row_mt == 1) && 4687 (tile_info->mi_row_start != mi_row)) { 4688 if ((tile_info->mi_col_start == mi_col)) { 4689 // restore frame context of 1st column sb 4690 memcpy(xd->tile_ctx, x->row_ctx, sizeof(*xd->tile_ctx)); 4691 } else { 4692 int wt_left = AVG_CDF_WEIGHT_LEFT; 4693 int wt_tr = AVG_CDF_WEIGHT_TOP_RIGHT; 4694 if (tile_info->mi_col_end > (mi_col + mib_size)) 4695 avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile, wt_left, 4696 wt_tr); 4697 else 4698 avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile - 1, 4699 wt_left, wt_tr); 4700 } 4701 } 4702 4703 switch (cpi->oxcf.coeff_cost_upd_freq) { 4704 case COST_UPD_TILE: // Tile level 4705 if (mi_row != tile_info->mi_row_start) break; 4706 AOM_FALLTHROUGH_INTENDED; 4707 case COST_UPD_SBROW: // SB row level in tile 4708 if (mi_col != tile_info->mi_col_start) break; 4709 AOM_FALLTHROUGH_INTENDED; 4710 case COST_UPD_SB: // SB level 4711 av1_fill_coeff_costs(&td->mb, xd->tile_ctx, num_planes); 4712 break; 4713 default: assert(0); 4714 } 4715 4716 switch (cpi->oxcf.mode_cost_upd_freq) { 4717 case COST_UPD_TILE: // Tile level 4718 if (mi_row != tile_info->mi_row_start) break; 4719 AOM_FALLTHROUGH_INTENDED; 4720 case COST_UPD_SBROW: // SB row level in tile 4721 if (mi_col != tile_info->mi_col_start) break; 4722 AOM_FALLTHROUGH_INTENDED; 4723 case COST_UPD_SB: // SB level 4724 av1_fill_mode_rates(cm, x, xd->tile_ctx); 4725 break; 4726 default: assert(0); 4727 } 4728 4729 if (sf->adaptive_pred_interp_filter) { 4730 for (int i = 0; i < leaf_nodes; ++i) { 4731 td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE; 4732 td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE; 4733 td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE; 4734 td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE; 4735 } 4736 } 4737 4738 x->mb_rd_record.num = x->mb_rd_record.index_start = 0; 4739 4740 if (!use_nonrd_mode) { 4741 av1_zero(x->txb_rd_record_8X8); 4742 av1_zero(x->txb_rd_record_16X16); 4743 av1_zero(x->txb_rd_record_32X32); 4744 av1_zero(x->txb_rd_record_64X64); 4745 av1_zero(x->txb_rd_record_intra); 4746 } 4747 4748 av1_zero(x->picked_ref_frames_mask); 4749 4750 av1_zero(x->pred_mv); 4751 PC_TREE *const pc_root = td->pc_root[mib_size_log2 - MIN_MIB_SIZE_LOG2]; 4752 pc_root->index = 0; 4753 4754 if ((sf->simple_motion_search_prune_rect || 4755 sf->simple_motion_search_early_term_none || 4756 sf->firstpass_simple_motion_search_early_term) && 4757 !frame_is_intra_only(cm)) { 4758 init_simple_motion_search_mvs(pc_root); 4759 } 4760 4761 const struct segmentation *const seg = &cm->seg; 4762 int seg_skip = 0; 4763 if (seg->enabled) { 4764 const uint8_t *const map = 4765 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; 4766 const int segment_id = 4767 map ? get_segment_id(cm, map, sb_size, mi_row, mi_col) : 0; 4768 seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP); 4769 } 4770 xd->cur_frame_force_integer_mv = cm->cur_frame_force_integer_mv; 4771 4772 x->sb_energy_level = 0; 4773 if (cm->delta_q_info.delta_q_present_flag) 4774 setup_delta_q(cpi, x, tile_info, mi_row, mi_col, num_planes); 4775 4776 int dummy_rate; 4777 int64_t dummy_dist; 4778 RD_STATS dummy_rdc; 4779 const int idx_str = cm->mi_stride * mi_row + mi_col; 4780 MB_MODE_INFO **mi = cm->mi_grid_visible + idx_str; 4781 x->source_variance = UINT_MAX; 4782 x->simple_motion_pred_sse = UINT_MAX; 4783 if (sf->partition_search_type == FIXED_PARTITION || seg_skip) { 4784 set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size); 4785 const BLOCK_SIZE bsize = seg_skip ? sb_size : sf->always_this_block_size; 4786 set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); 4787 rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size, 4788 &dummy_rate, &dummy_dist, 1, pc_root); 4789 } else if (cpi->partition_search_skippable_frame) { 4790 set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size); 4791 const BLOCK_SIZE bsize = 4792 get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col); 4793 set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); 4794 rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size, 4795 &dummy_rate, &dummy_dist, 1, pc_root); 4796 } else if (sf->partition_search_type == VAR_BASED_PARTITION && 4797 use_nonrd_mode) { 4798 set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size); 4799 av1_choose_var_based_partitioning(cpi, tile_info, x, mi_row, mi_col); 4800 nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size, 4801 &dummy_rate, &dummy_dist, 1, pc_root); 4802 4803 } else { 4804 const int orig_rdmult = cpi->rd.RDMULT; 4805 x->cb_rdmult = orig_rdmult; 4806 if (cpi->twopass.gf_group.index > 0 && cpi->oxcf.enable_tpl_model && 4807 cpi->oxcf.aq_mode == NO_AQ && cpi->oxcf.deltaq_mode == 0) { 4808 const int dr = 4809 get_rdmult_delta(cpi, BLOCK_128X128, mi_row, mi_col, orig_rdmult); 4810 4811 x->cb_rdmult = dr; 4812 x->rdmult = x->cb_rdmult; 4813 } 4814 4815 reset_partition(pc_root, sb_size); 4816 x->use_cb_search_range = 0; 4817 #if CONFIG_COLLECT_COMPONENT_TIMING 4818 start_timing(cpi, first_partition_search_pass_time); 4819 #endif 4820 init_first_partition_pass_stats_tables(cpi, 4821 x->first_partition_pass_stats); 4822 // Do the first pass if we need two pass partition search 4823 if (cpi->two_pass_partition_search && 4824 cpi->sf.use_square_partition_only_threshold > BLOCK_4X4 && 4825 mi_row + mi_size_high[sb_size] <= cm->mi_rows && 4826 mi_col + mi_size_wide[sb_size] <= cm->mi_cols && 4827 cm->current_frame.frame_type != KEY_FRAME) { 4828 first_partition_search_pass(cpi, td, tile_data, mi_row, mi_col, tp); 4829 } 4830 #if CONFIG_COLLECT_COMPONENT_TIMING 4831 end_timing(cpi, first_partition_search_pass_time); 4832 #endif 4833 4834 #if CONFIG_COLLECT_COMPONENT_TIMING 4835 start_timing(cpi, rd_pick_partition_time); 4836 #endif 4837 BLOCK_SIZE max_sq_size = BLOCK_128X128; 4838 switch (cpi->oxcf.max_partition_size) { 4839 case 4: max_sq_size = BLOCK_4X4; break; 4840 case 8: max_sq_size = BLOCK_8X8; break; 4841 case 16: max_sq_size = BLOCK_16X16; break; 4842 case 32: max_sq_size = BLOCK_32X32; break; 4843 case 64: max_sq_size = BLOCK_64X64; break; 4844 case 128: max_sq_size = BLOCK_128X128; break; 4845 default: assert(0); break; 4846 } 4847 max_sq_size = AOMMIN(max_sq_size, sb_size); 4848 4849 BLOCK_SIZE min_sq_size = BLOCK_4X4; 4850 switch (cpi->oxcf.min_partition_size) { 4851 case 4: min_sq_size = BLOCK_4X4; break; 4852 case 8: min_sq_size = BLOCK_8X8; break; 4853 case 16: min_sq_size = BLOCK_16X16; break; 4854 case 32: min_sq_size = BLOCK_32X32; break; 4855 case 64: min_sq_size = BLOCK_64X64; break; 4856 case 128: min_sq_size = BLOCK_128X128; break; 4857 default: assert(0); break; 4858 } 4859 4860 if (use_auto_max_partition(cpi, sb_size, mi_row, mi_col)) { 4861 float features[FEATURE_SIZE_MAX_MIN_PART_PRED] = { 0.0f }; 4862 4863 av1_get_max_min_partition_features(cpi, x, mi_row, mi_col, features); 4864 max_sq_size = 4865 AOMMIN(av1_predict_max_partition(cpi, x, features), max_sq_size); 4866 } 4867 4868 min_sq_size = AOMMIN(min_sq_size, max_sq_size); 4869 4870 rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, 4871 max_sq_size, min_sq_size, &dummy_rdc, INT64_MAX, 4872 pc_root, NULL); 4873 #if CONFIG_COLLECT_COMPONENT_TIMING 4874 end_timing(cpi, rd_pick_partition_time); 4875 #endif 4876 } 4877 // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile. 4878 if (cpi->sf.inter_mode_rd_model_estimation == 1 && cm->tile_cols == 1 && 4879 cm->tile_rows == 1) { 4880 av1_inter_mode_data_fit(tile_data, x->rdmult); 4881 } 4882 if (tile_data->allow_update_cdf && (cpi->row_mt == 1) && 4883 (tile_info->mi_row_end > (mi_row + mib_size))) { 4884 if (sb_cols_in_tile == 1) 4885 memcpy(x->row_ctx, xd->tile_ctx, sizeof(*xd->tile_ctx)); 4886 else if (sb_col_in_tile >= 1) 4887 memcpy(x->row_ctx + sb_col_in_tile - 1, xd->tile_ctx, 4888 sizeof(*xd->tile_ctx)); 4889 } 4890 (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row, 4891 sb_col_in_tile, sb_cols_in_tile); 4892 } 4893 #if CONFIG_COLLECT_COMPONENT_TIMING 4894 end_timing(cpi, encode_sb_time); 4895 #endif 4896 } 4897 4898 static void init_encode_frame_mb_context(AV1_COMP *cpi) { 4899 AV1_COMMON *const cm = &cpi->common; 4900 const int num_planes = av1_num_planes(cm); 4901 MACROBLOCK *const x = &cpi->td.mb; 4902 MACROBLOCKD *const xd = &x->e_mbd; 4903 4904 // Copy data over into macro block data structures. 4905 av1_setup_src_planes(x, cpi->source, 0, 0, num_planes, 4906 cm->seq_params.sb_size); 4907 4908 av1_setup_block_planes(xd, cm->seq_params.subsampling_x, 4909 cm->seq_params.subsampling_y, num_planes); 4910 } 4911 4912 static MV_REFERENCE_FRAME get_frame_type(const AV1_COMP *cpi) { 4913 if (frame_is_intra_only(&cpi->common)) { 4914 return INTRA_FRAME; 4915 } else if ((cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame) || 4916 cpi->rc.is_src_frame_internal_arf) { 4917 // We will not update the golden frame with an internal overlay frame 4918 return ALTREF_FRAME; 4919 } else if (cpi->refresh_golden_frame || cpi->refresh_alt2_ref_frame || 4920 cpi->refresh_alt_ref_frame) { 4921 return GOLDEN_FRAME; 4922 } else { 4923 return LAST_FRAME; 4924 } 4925 } 4926 4927 static TX_MODE select_tx_mode(const AV1_COMP *cpi) { 4928 if (cpi->common.coded_lossless) return ONLY_4X4; 4929 if (cpi->sf.tx_size_search_method == USE_LARGESTALL) 4930 return TX_MODE_LARGEST; 4931 else if (cpi->sf.tx_size_search_method == USE_FULL_RD || 4932 cpi->sf.tx_size_search_method == USE_FAST_RD) 4933 return TX_MODE_SELECT; 4934 else 4935 return cpi->common.tx_mode; 4936 } 4937 4938 void av1_alloc_tile_data(AV1_COMP *cpi) { 4939 AV1_COMMON *const cm = &cpi->common; 4940 const int tile_cols = cm->tile_cols; 4941 const int tile_rows = cm->tile_rows; 4942 int tile_col, tile_row; 4943 4944 if (cpi->tile_data != NULL) aom_free(cpi->tile_data); 4945 CHECK_MEM_ERROR( 4946 cm, cpi->tile_data, 4947 aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data))); 4948 cpi->allocated_tiles = tile_cols * tile_rows; 4949 4950 for (tile_row = 0; tile_row < tile_rows; ++tile_row) 4951 for (tile_col = 0; tile_col < tile_cols; ++tile_col) { 4952 TileDataEnc *const tile_data = 4953 &cpi->tile_data[tile_row * tile_cols + tile_col]; 4954 int i, j; 4955 for (i = 0; i < BLOCK_SIZES_ALL; ++i) { 4956 for (j = 0; j < MAX_MODES; ++j) { 4957 tile_data->thresh_freq_fact[i][j] = 32; 4958 } 4959 } 4960 } 4961 } 4962 4963 void av1_init_tile_data(AV1_COMP *cpi) { 4964 AV1_COMMON *const cm = &cpi->common; 4965 const int num_planes = av1_num_planes(cm); 4966 const int tile_cols = cm->tile_cols; 4967 const int tile_rows = cm->tile_rows; 4968 int tile_col, tile_row; 4969 TOKENEXTRA *pre_tok = cpi->tile_tok[0][0]; 4970 TOKENLIST *tplist = cpi->tplist[0][0]; 4971 unsigned int tile_tok = 0; 4972 int tplist_count = 0; 4973 4974 for (tile_row = 0; tile_row < tile_rows; ++tile_row) { 4975 for (tile_col = 0; tile_col < tile_cols; ++tile_col) { 4976 TileDataEnc *const tile_data = 4977 &cpi->tile_data[tile_row * tile_cols + tile_col]; 4978 TileInfo *const tile_info = &tile_data->tile_info; 4979 av1_tile_init(tile_info, cm, tile_row, tile_col); 4980 4981 cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok; 4982 pre_tok = cpi->tile_tok[tile_row][tile_col]; 4983 tile_tok = allocated_tokens( 4984 *tile_info, cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes); 4985 cpi->tplist[tile_row][tile_col] = tplist + tplist_count; 4986 tplist = cpi->tplist[tile_row][tile_col]; 4987 tplist_count = av1_get_sb_rows_in_tile(cm, tile_data->tile_info); 4988 tile_data->allow_update_cdf = !cm->large_scale_tile; 4989 tile_data->allow_update_cdf = 4990 tile_data->allow_update_cdf && !cm->disable_cdf_update; 4991 tile_data->tctx = *cm->fc; 4992 } 4993 } 4994 } 4995 4996 void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row, 4997 int tile_col, int mi_row) { 4998 AV1_COMMON *const cm = &cpi->common; 4999 const int num_planes = av1_num_planes(cm); 5000 const int tile_cols = cm->tile_cols; 5001 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; 5002 const TileInfo *const tile_info = &this_tile->tile_info; 5003 TOKENEXTRA *tok = NULL; 5004 const int sb_row_in_tile = 5005 (mi_row - tile_info->mi_row_start) >> cm->seq_params.mib_size_log2; 5006 const int tile_mb_cols = 5007 (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2; 5008 const int num_mb_rows_in_sb = 5009 ((1 << (cm->seq_params.mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4; 5010 5011 get_start_tok(cpi, tile_row, tile_col, mi_row, &tok, 5012 cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes); 5013 cpi->tplist[tile_row][tile_col][sb_row_in_tile].start = tok; 5014 5015 encode_sb_row(cpi, td, this_tile, mi_row, &tok, cpi->sf.use_nonrd_pick_mode); 5016 5017 cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop = tok; 5018 cpi->tplist[tile_row][tile_col][sb_row_in_tile].count = 5019 (unsigned int)(cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop - 5020 cpi->tplist[tile_row][tile_col][sb_row_in_tile].start); 5021 5022 assert( 5023 (unsigned int)(tok - 5024 cpi->tplist[tile_row][tile_col][sb_row_in_tile].start) <= 5025 get_token_alloc(num_mb_rows_in_sb, tile_mb_cols, 5026 cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes)); 5027 5028 (void)tile_mb_cols; 5029 (void)num_mb_rows_in_sb; 5030 } 5031 5032 void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row, 5033 int tile_col) { 5034 AV1_COMMON *const cm = &cpi->common; 5035 TileDataEnc *const this_tile = 5036 &cpi->tile_data[tile_row * cm->tile_cols + tile_col]; 5037 const TileInfo *const tile_info = &this_tile->tile_info; 5038 int mi_row; 5039 5040 av1_inter_mode_data_init(this_tile); 5041 5042 av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start, 5043 tile_info->mi_col_end, tile_row); 5044 av1_init_above_context(cm, &td->mb.e_mbd, tile_row); 5045 5046 // Set up pointers to per thread motion search counters. 5047 this_tile->m_search_count = 0; // Count of motion search hits. 5048 this_tile->ex_search_count = 0; // Exhaustive mesh search hits. 5049 td->mb.m_search_count_ptr = &this_tile->m_search_count; 5050 td->mb.ex_search_count_ptr = &this_tile->ex_search_count; 5051 5052 cfl_init(&td->mb.e_mbd.cfl, &cm->seq_params); 5053 5054 av1_crc32c_calculator_init(&td->mb.mb_rd_record.crc_calculator); 5055 5056 for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end; 5057 mi_row += cm->seq_params.mib_size) { 5058 av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row); 5059 } 5060 } 5061 5062 static void encode_tiles(AV1_COMP *cpi) { 5063 AV1_COMMON *const cm = &cpi->common; 5064 const int tile_cols = cm->tile_cols; 5065 const int tile_rows = cm->tile_rows; 5066 int tile_col, tile_row; 5067 5068 if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) 5069 av1_alloc_tile_data(cpi); 5070 5071 av1_init_tile_data(cpi); 5072 5073 for (tile_row = 0; tile_row < tile_rows; ++tile_row) { 5074 for (tile_col = 0; tile_col < tile_cols; ++tile_col) { 5075 TileDataEnc *const this_tile = 5076 &cpi->tile_data[tile_row * cm->tile_cols + tile_col]; 5077 cpi->td.intrabc_used = 0; 5078 cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx; 5079 cpi->td.mb.tile_pb_ctx = &this_tile->tctx; 5080 av1_encode_tile(cpi, &cpi->td, tile_row, tile_col); 5081 cpi->intrabc_used |= cpi->td.intrabc_used; 5082 } 5083 } 5084 } 5085 5086 #define GLOBAL_TRANS_TYPES_ENC 3 // highest motion model to search 5087 static int gm_get_params_cost(const WarpedMotionParams *gm, 5088 const WarpedMotionParams *ref_gm, int allow_hp) { 5089 int params_cost = 0; 5090 int trans_bits, trans_prec_diff; 5091 switch (gm->wmtype) { 5092 case AFFINE: 5093 case ROTZOOM: 5094 params_cost += aom_count_signed_primitive_refsubexpfin( 5095 GM_ALPHA_MAX + 1, SUBEXPFIN_K, 5096 (ref_gm->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS), 5097 (gm->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS)); 5098 params_cost += aom_count_signed_primitive_refsubexpfin( 5099 GM_ALPHA_MAX + 1, SUBEXPFIN_K, 5100 (ref_gm->wmmat[3] >> GM_ALPHA_PREC_DIFF), 5101 (gm->wmmat[3] >> GM_ALPHA_PREC_DIFF)); 5102 if (gm->wmtype >= AFFINE) { 5103 params_cost += aom_count_signed_primitive_refsubexpfin( 5104 GM_ALPHA_MAX + 1, SUBEXPFIN_K, 5105 (ref_gm->wmmat[4] >> GM_ALPHA_PREC_DIFF), 5106 (gm->wmmat[4] >> GM_ALPHA_PREC_DIFF)); 5107 params_cost += aom_count_signed_primitive_refsubexpfin( 5108 GM_ALPHA_MAX + 1, SUBEXPFIN_K, 5109 (ref_gm->wmmat[5] >> GM_ALPHA_PREC_DIFF) - 5110 (1 << GM_ALPHA_PREC_BITS), 5111 (gm->wmmat[5] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS)); 5112 } 5113 AOM_FALLTHROUGH_INTENDED; 5114 case TRANSLATION: 5115 trans_bits = (gm->wmtype == TRANSLATION) 5116 ? GM_ABS_TRANS_ONLY_BITS - !allow_hp 5117 : GM_ABS_TRANS_BITS; 5118 trans_prec_diff = (gm->wmtype == TRANSLATION) 5119 ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp 5120 : GM_TRANS_PREC_DIFF; 5121 params_cost += aom_count_signed_primitive_refsubexpfin( 5122 (1 << trans_bits) + 1, SUBEXPFIN_K, 5123 (ref_gm->wmmat[0] >> trans_prec_diff), 5124 (gm->wmmat[0] >> trans_prec_diff)); 5125 params_cost += aom_count_signed_primitive_refsubexpfin( 5126 (1 << trans_bits) + 1, SUBEXPFIN_K, 5127 (ref_gm->wmmat[1] >> trans_prec_diff), 5128 (gm->wmmat[1] >> trans_prec_diff)); 5129 AOM_FALLTHROUGH_INTENDED; 5130 case IDENTITY: break; 5131 default: assert(0); 5132 } 5133 return (params_cost << AV1_PROB_COST_SHIFT); 5134 } 5135 5136 static int do_gm_search_logic(SPEED_FEATURES *const sf, int num_refs_using_gm, 5137 int frame) { 5138 (void)num_refs_using_gm; 5139 (void)frame; 5140 switch (sf->gm_search_type) { 5141 case GM_FULL_SEARCH: return 1; 5142 case GM_REDUCED_REF_SEARCH_SKIP_L2_L3: 5143 return !(frame == LAST2_FRAME || frame == LAST3_FRAME); 5144 case GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2: 5145 return !(frame == LAST2_FRAME || frame == LAST3_FRAME || 5146 (frame == ALTREF2_FRAME)); 5147 case GM_DISABLE_SEARCH: return 0; 5148 default: assert(0); 5149 } 5150 return 1; 5151 } 5152 5153 static int get_max_allowed_ref_frames(const AV1_COMP *cpi) { 5154 const unsigned int max_allowed_refs_for_given_speed = 5155 (cpi->sf.selective_ref_frame >= 3) ? INTER_REFS_PER_FRAME - 1 5156 : INTER_REFS_PER_FRAME; 5157 return AOMMIN(max_allowed_refs_for_given_speed, 5158 cpi->oxcf.max_reference_frames); 5159 } 5160 5161 // Enforce the number of references for each arbitrary frame based on user 5162 // options and speed. 5163 static void enforce_max_ref_frames(AV1_COMP *cpi) { 5164 MV_REFERENCE_FRAME ref_frame; 5165 int total_valid_refs = 0; 5166 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { 5167 if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) { 5168 total_valid_refs++; 5169 } 5170 } 5171 5172 const int max_allowed_refs = get_max_allowed_ref_frames(cpi); 5173 5174 // When more than 'max_allowed_refs' are available, we reduce the number of 5175 // reference frames one at a time based on this order. 5176 const MV_REFERENCE_FRAME disable_order[] = { 5177 LAST3_FRAME, 5178 LAST2_FRAME, 5179 ALTREF2_FRAME, 5180 GOLDEN_FRAME, 5181 }; 5182 5183 for (int i = 0; i < 4 && total_valid_refs > max_allowed_refs; ++i) { 5184 const MV_REFERENCE_FRAME ref_frame_to_disable = disable_order[i]; 5185 5186 if (!(cpi->ref_frame_flags & 5187 av1_ref_frame_flag_list[ref_frame_to_disable])) { 5188 continue; 5189 } 5190 5191 switch (ref_frame_to_disable) { 5192 case LAST3_FRAME: cpi->ref_frame_flags &= ~AOM_LAST3_FLAG; break; 5193 case LAST2_FRAME: cpi->ref_frame_flags &= ~AOM_LAST2_FLAG; break; 5194 case ALTREF2_FRAME: cpi->ref_frame_flags &= ~AOM_ALT2_FLAG; break; 5195 case GOLDEN_FRAME: cpi->ref_frame_flags &= ~AOM_GOLD_FLAG; break; 5196 default: assert(0); 5197 } 5198 --total_valid_refs; 5199 } 5200 assert(total_valid_refs <= max_allowed_refs); 5201 } 5202 5203 static INLINE int av1_refs_are_one_sided(const AV1_COMMON *cm) { 5204 assert(!frame_is_intra_only(cm)); 5205 5206 int one_sided_refs = 1; 5207 for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) { 5208 const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref); 5209 if (buf == NULL) continue; 5210 5211 const int ref_order_hint = buf->order_hint; 5212 if (get_relative_dist(&cm->seq_params.order_hint_info, ref_order_hint, 5213 (int)cm->current_frame.order_hint) > 0) { 5214 one_sided_refs = 0; // bwd reference 5215 break; 5216 } 5217 } 5218 return one_sided_refs; 5219 } 5220 5221 static INLINE void get_skip_mode_ref_offsets(const AV1_COMMON *cm, 5222 int ref_order_hint[2]) { 5223 const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info; 5224 ref_order_hint[0] = ref_order_hint[1] = 0; 5225 if (!skip_mode_info->skip_mode_allowed) return; 5226 5227 const RefCntBuffer *const buf_0 = 5228 get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_0); 5229 const RefCntBuffer *const buf_1 = 5230 get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_1); 5231 assert(buf_0 != NULL && buf_1 != NULL); 5232 5233 ref_order_hint[0] = buf_0->order_hint; 5234 ref_order_hint[1] = buf_1->order_hint; 5235 } 5236 5237 static int check_skip_mode_enabled(AV1_COMP *const cpi) { 5238 AV1_COMMON *const cm = &cpi->common; 5239 5240 av1_setup_skip_mode_allowed(cm); 5241 if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0; 5242 5243 // Turn off skip mode if the temporal distances of the reference pair to the 5244 // current frame are different by more than 1 frame. 5245 const int cur_offset = (int)cm->current_frame.order_hint; 5246 int ref_offset[2]; 5247 get_skip_mode_ref_offsets(cm, ref_offset); 5248 const int cur_to_ref0 = get_relative_dist(&cm->seq_params.order_hint_info, 5249 cur_offset, ref_offset[0]); 5250 const int cur_to_ref1 = abs(get_relative_dist(&cm->seq_params.order_hint_info, 5251 cur_offset, ref_offset[1])); 5252 if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0; 5253 5254 // High Latency: Turn off skip mode if all refs are fwd. 5255 if (cpi->all_one_sided_refs && cpi->oxcf.lag_in_frames > 0) return 0; 5256 5257 static const int flag_list[REF_FRAMES] = { 0, 5258 AOM_LAST_FLAG, 5259 AOM_LAST2_FLAG, 5260 AOM_LAST3_FLAG, 5261 AOM_GOLD_FLAG, 5262 AOM_BWD_FLAG, 5263 AOM_ALT2_FLAG, 5264 AOM_ALT_FLAG }; 5265 const int ref_frame[2] = { 5266 cm->current_frame.skip_mode_info.ref_frame_idx_0 + LAST_FRAME, 5267 cm->current_frame.skip_mode_info.ref_frame_idx_1 + LAST_FRAME 5268 }; 5269 if (!(cpi->ref_frame_flags & flag_list[ref_frame[0]]) || 5270 !(cpi->ref_frame_flags & flag_list[ref_frame[1]])) 5271 return 0; 5272 5273 return 1; 5274 } 5275 5276 // Function to decide if we can skip the global motion parameter computation 5277 // for a particular ref frame 5278 static INLINE int skip_gm_frame(AV1_COMMON *const cm, int ref_frame) { 5279 if ((ref_frame == LAST3_FRAME || ref_frame == LAST2_FRAME) && 5280 cm->global_motion[GOLDEN_FRAME].wmtype != IDENTITY) { 5281 return get_relative_dist( 5282 &cm->seq_params.order_hint_info, 5283 cm->cur_frame->ref_order_hints[ref_frame - LAST_FRAME], 5284 cm->cur_frame->ref_order_hints[GOLDEN_FRAME - LAST_FRAME]) <= 0; 5285 } 5286 return 0; 5287 } 5288 5289 static void set_default_interp_skip_flags(AV1_COMP *cpi) { 5290 const int num_planes = av1_num_planes(&cpi->common); 5291 cpi->default_interp_skip_flags = (num_planes == 1) 5292 ? DEFAULT_LUMA_INTERP_SKIP_FLAG 5293 : DEFAULT_INTERP_SKIP_FLAG; 5294 } 5295 5296 static void encode_frame_internal(AV1_COMP *cpi) { 5297 ThreadData *const td = &cpi->td; 5298 MACROBLOCK *const x = &td->mb; 5299 AV1_COMMON *const cm = &cpi->common; 5300 MACROBLOCKD *const xd = &x->e_mbd; 5301 RD_COUNTS *const rdc = &cpi->td.rd_counts; 5302 int i; 5303 5304 x->min_partition_size = AOMMIN(x->min_partition_size, cm->seq_params.sb_size); 5305 x->max_partition_size = AOMMIN(x->max_partition_size, cm->seq_params.sb_size); 5306 #if CONFIG_DIST_8X8 5307 x->using_dist_8x8 = cpi->oxcf.using_dist_8x8; 5308 x->tune_metric = cpi->oxcf.tuning; 5309 #endif 5310 cm->setup_mi(cm); 5311 5312 xd->mi = cm->mi_grid_visible; 5313 xd->mi[0] = cm->mi; 5314 5315 av1_zero(*td->counts); 5316 av1_zero(rdc->comp_pred_diff); 5317 // Two pass partition search can be enabled/disabled for different frames. 5318 // Reset this data at frame level to avoid any incorrect usage. 5319 init_first_partition_pass_stats_tables(cpi, x->first_partition_pass_stats); 5320 5321 // Reset the flag. 5322 cpi->intrabc_used = 0; 5323 // Need to disable intrabc when superres is selected 5324 if (av1_superres_scaled(cm)) { 5325 cm->allow_intrabc = 0; 5326 } 5327 5328 cm->allow_intrabc &= (cpi->oxcf.enable_intrabc); 5329 5330 if (cpi->oxcf.pass != 1 && av1_use_hash_me(cm)) { 5331 // add to hash table 5332 const int pic_width = cpi->source->y_crop_width; 5333 const int pic_height = cpi->source->y_crop_height; 5334 uint32_t *block_hash_values[2][2]; 5335 int8_t *is_block_same[2][3]; 5336 int k, j; 5337 5338 for (k = 0; k < 2; k++) { 5339 for (j = 0; j < 2; j++) { 5340 CHECK_MEM_ERROR(cm, block_hash_values[k][j], 5341 aom_malloc(sizeof(uint32_t) * pic_width * pic_height)); 5342 } 5343 5344 for (j = 0; j < 3; j++) { 5345 CHECK_MEM_ERROR(cm, is_block_same[k][j], 5346 aom_malloc(sizeof(int8_t) * pic_width * pic_height)); 5347 } 5348 } 5349 5350 av1_hash_table_create(&cm->cur_frame->hash_table); 5351 av1_generate_block_2x2_hash_value(cpi->source, block_hash_values[0], 5352 is_block_same[0], &cpi->td.mb); 5353 av1_generate_block_hash_value(cpi->source, 4, block_hash_values[0], 5354 block_hash_values[1], is_block_same[0], 5355 is_block_same[1], &cpi->td.mb); 5356 av1_add_to_hash_map_by_row_with_precal_data( 5357 &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2], 5358 pic_width, pic_height, 4); 5359 av1_generate_block_hash_value(cpi->source, 8, block_hash_values[1], 5360 block_hash_values[0], is_block_same[1], 5361 is_block_same[0], &cpi->td.mb); 5362 av1_add_to_hash_map_by_row_with_precal_data( 5363 &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2], 5364 pic_width, pic_height, 8); 5365 av1_generate_block_hash_value(cpi->source, 16, block_hash_values[0], 5366 block_hash_values[1], is_block_same[0], 5367 is_block_same[1], &cpi->td.mb); 5368 av1_add_to_hash_map_by_row_with_precal_data( 5369 &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2], 5370 pic_width, pic_height, 16); 5371 av1_generate_block_hash_value(cpi->source, 32, block_hash_values[1], 5372 block_hash_values[0], is_block_same[1], 5373 is_block_same[0], &cpi->td.mb); 5374 av1_add_to_hash_map_by_row_with_precal_data( 5375 &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2], 5376 pic_width, pic_height, 32); 5377 av1_generate_block_hash_value(cpi->source, 64, block_hash_values[0], 5378 block_hash_values[1], is_block_same[0], 5379 is_block_same[1], &cpi->td.mb); 5380 av1_add_to_hash_map_by_row_with_precal_data( 5381 &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2], 5382 pic_width, pic_height, 64); 5383 5384 av1_generate_block_hash_value(cpi->source, 128, block_hash_values[1], 5385 block_hash_values[0], is_block_same[1], 5386 is_block_same[0], &cpi->td.mb); 5387 av1_add_to_hash_map_by_row_with_precal_data( 5388 &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2], 5389 pic_width, pic_height, 128); 5390 5391 for (k = 0; k < 2; k++) { 5392 for (j = 0; j < 2; j++) { 5393 aom_free(block_hash_values[k][j]); 5394 } 5395 5396 for (j = 0; j < 3; j++) { 5397 aom_free(is_block_same[k][j]); 5398 } 5399 } 5400 } 5401 5402 for (i = 0; i < MAX_SEGMENTS; ++i) { 5403 const int qindex = cm->seg.enabled 5404 ? av1_get_qindex(&cm->seg, i, cm->base_qindex) 5405 : cm->base_qindex; 5406 xd->lossless[i] = qindex == 0 && cm->y_dc_delta_q == 0 && 5407 cm->u_dc_delta_q == 0 && cm->u_ac_delta_q == 0 && 5408 cm->v_dc_delta_q == 0 && cm->v_ac_delta_q == 0; 5409 if (xd->lossless[i]) cpi->has_lossless_segment = 1; 5410 xd->qindex[i] = qindex; 5411 if (xd->lossless[i]) { 5412 cpi->optimize_seg_arr[i] = 0; 5413 } else { 5414 cpi->optimize_seg_arr[i] = cpi->sf.optimize_coefficients; 5415 } 5416 } 5417 cm->coded_lossless = is_coded_lossless(cm, xd); 5418 cm->all_lossless = cm->coded_lossless && !av1_superres_scaled(cm); 5419 5420 cm->tx_mode = select_tx_mode(cpi); 5421 5422 // Fix delta q resolution for the moment 5423 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES; 5424 // Set delta_q_present_flag before it is used for the first time 5425 cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES; 5426 cm->delta_q_info.delta_q_present_flag = cpi->oxcf.deltaq_mode != NO_DELTA_Q; 5427 cm->delta_q_info.delta_lf_present_flag = cpi->oxcf.deltaq_mode == DELTA_Q_LF; 5428 cm->delta_q_info.delta_lf_multi = DEFAULT_DELTA_LF_MULTI; 5429 // update delta_q_present_flag and delta_lf_present_flag based on 5430 // base_qindex 5431 cm->delta_q_info.delta_q_present_flag &= cm->base_qindex > 0; 5432 cm->delta_q_info.delta_lf_present_flag &= cm->base_qindex > 0; 5433 5434 if (cpi->twopass.gf_group.index && 5435 cpi->twopass.gf_group.index < MAX_LAG_BUFFERS && 5436 cpi->oxcf.enable_tpl_model) { 5437 TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index]; 5438 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; 5439 5440 int tpl_stride = tpl_frame->stride; 5441 int64_t intra_cost_base = 0; 5442 int64_t mc_dep_cost_base = 0; 5443 int row, col; 5444 5445 for (row = 0; row < cm->mi_rows; ++row) { 5446 for (col = 0; col < cm->mi_cols; ++col) { 5447 TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col]; 5448 intra_cost_base += this_stats->intra_cost; 5449 mc_dep_cost_base += this_stats->mc_dep_cost; 5450 } 5451 } 5452 5453 aom_clear_system_state(); 5454 5455 if (tpl_frame->is_valid) 5456 cpi->rd.r0 = (double)intra_cost_base / mc_dep_cost_base; 5457 } 5458 5459 av1_frame_init_quantizer(cpi); 5460 5461 av1_initialize_rd_consts(cpi); 5462 av1_initialize_me_consts(cpi, x, cm->base_qindex); 5463 init_encode_frame_mb_context(cpi); 5464 set_default_interp_skip_flags(cpi); 5465 if (cm->prev_frame) 5466 cm->last_frame_seg_map = cm->prev_frame->seg_map; 5467 else 5468 cm->last_frame_seg_map = NULL; 5469 if (cm->allow_intrabc || cm->coded_lossless) { 5470 av1_set_default_ref_deltas(cm->lf.ref_deltas); 5471 av1_set_default_mode_deltas(cm->lf.mode_deltas); 5472 } else if (cm->prev_frame) { 5473 memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES); 5474 memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS); 5475 } 5476 memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES); 5477 memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS); 5478 5479 // Special case: set prev_mi to NULL when the previous mode info 5480 // context cannot be used. 5481 cm->prev_mi = cm->allow_ref_frame_mvs ? cm->prev_mip : NULL; 5482 5483 x->txb_split_count = 0; 5484 #if CONFIG_SPEED_STATS 5485 x->tx_search_count = 0; 5486 #endif // CONFIG_SPEED_STATS 5487 5488 #if CONFIG_COLLECT_COMPONENT_TIMING 5489 start_timing(cpi, av1_compute_global_motion_time); 5490 #endif 5491 av1_zero(rdc->global_motion_used); 5492 av1_zero(cpi->gmparams_cost); 5493 if (cpi->common.current_frame.frame_type == INTER_FRAME && cpi->source && 5494 cpi->oxcf.enable_global_motion && !cpi->global_motion_search_done) { 5495 YV12_BUFFER_CONFIG *ref_buf[REF_FRAMES]; 5496 int frame; 5497 double params_by_motion[RANSAC_NUM_MOTIONS * (MAX_PARAMDIM - 1)]; 5498 const double *params_this_motion; 5499 int inliers_by_motion[RANSAC_NUM_MOTIONS]; 5500 WarpedMotionParams tmp_wm_params; 5501 // clang-format off 5502 static const double kIdentityParams[MAX_PARAMDIM - 1] = { 5503 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0 5504 }; 5505 // clang-format on 5506 int num_refs_using_gm = 0; 5507 5508 for (frame = ALTREF_FRAME; frame >= LAST_FRAME; --frame) { 5509 ref_buf[frame] = NULL; 5510 RefCntBuffer *buf = get_ref_frame_buf(cm, frame); 5511 if (buf != NULL) ref_buf[frame] = &buf->buf; 5512 int pframe; 5513 cm->global_motion[frame] = default_warp_params; 5514 const WarpedMotionParams *ref_params = 5515 cm->prev_frame ? &cm->prev_frame->global_motion[frame] 5516 : &default_warp_params; 5517 // check for duplicate buffer 5518 for (pframe = ALTREF_FRAME; pframe > frame; --pframe) { 5519 if (ref_buf[frame] == ref_buf[pframe]) break; 5520 } 5521 if (pframe > frame) { 5522 memcpy(&cm->global_motion[frame], &cm->global_motion[pframe], 5523 sizeof(WarpedMotionParams)); 5524 } else if (ref_buf[frame] && 5525 ref_buf[frame]->y_crop_width == cpi->source->y_crop_width && 5526 ref_buf[frame]->y_crop_height == cpi->source->y_crop_height && 5527 do_gm_search_logic(&cpi->sf, num_refs_using_gm, frame) && 5528 !(cpi->sf.selective_ref_gm && skip_gm_frame(cm, frame))) { 5529 TransformationType model; 5530 const int64_t ref_frame_error = av1_frame_error( 5531 is_cur_buf_hbd(xd), xd->bd, ref_buf[frame]->y_buffer, 5532 ref_buf[frame]->y_stride, cpi->source->y_buffer, 5533 cpi->source->y_width, cpi->source->y_height, cpi->source->y_stride); 5534 5535 if (ref_frame_error == 0) continue; 5536 5537 aom_clear_system_state(); 5538 5539 // TODO(sarahparker, debargha): Explore do_adaptive_gm_estimation = 1 5540 const int do_adaptive_gm_estimation = 0; 5541 5542 const int ref_frame_dist = get_relative_dist( 5543 &cm->seq_params.order_hint_info, cm->current_frame.order_hint, 5544 cm->cur_frame->ref_order_hints[frame - LAST_FRAME]); 5545 const GlobalMotionEstimationType gm_estimation_type = 5546 cm->seq_params.order_hint_info.enable_order_hint && 5547 abs(ref_frame_dist) <= 2 && do_adaptive_gm_estimation 5548 ? GLOBAL_MOTION_DISFLOW_BASED 5549 : GLOBAL_MOTION_FEATURE_BASED; 5550 for (model = ROTZOOM; model < GLOBAL_TRANS_TYPES_ENC; ++model) { 5551 int64_t best_warp_error = INT64_MAX; 5552 // Initially set all params to identity. 5553 for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) { 5554 memcpy(params_by_motion + (MAX_PARAMDIM - 1) * i, kIdentityParams, 5555 (MAX_PARAMDIM - 1) * sizeof(*params_by_motion)); 5556 } 5557 5558 av1_compute_global_motion(model, cpi->source, ref_buf[frame], 5559 cpi->common.seq_params.bit_depth, 5560 gm_estimation_type, inliers_by_motion, 5561 params_by_motion, RANSAC_NUM_MOTIONS); 5562 5563 for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) { 5564 if (inliers_by_motion[i] == 0) continue; 5565 5566 params_this_motion = params_by_motion + (MAX_PARAMDIM - 1) * i; 5567 av1_convert_model_to_params(params_this_motion, &tmp_wm_params); 5568 5569 if (tmp_wm_params.wmtype != IDENTITY) { 5570 const int64_t warp_error = av1_refine_integerized_param( 5571 &tmp_wm_params, tmp_wm_params.wmtype, is_cur_buf_hbd(xd), 5572 xd->bd, ref_buf[frame]->y_buffer, ref_buf[frame]->y_width, 5573 ref_buf[frame]->y_height, ref_buf[frame]->y_stride, 5574 cpi->source->y_buffer, cpi->source->y_width, 5575 cpi->source->y_height, cpi->source->y_stride, 5, 5576 best_warp_error); 5577 if (warp_error < best_warp_error) { 5578 best_warp_error = warp_error; 5579 // Save the wm_params modified by 5580 // av1_refine_integerized_param() rather than motion index to 5581 // avoid rerunning refine() below. 5582 memcpy(&(cm->global_motion[frame]), &tmp_wm_params, 5583 sizeof(WarpedMotionParams)); 5584 } 5585 } 5586 } 5587 if (cm->global_motion[frame].wmtype <= AFFINE) 5588 if (!get_shear_params(&cm->global_motion[frame])) 5589 cm->global_motion[frame] = default_warp_params; 5590 5591 if (cm->global_motion[frame].wmtype == TRANSLATION) { 5592 cm->global_motion[frame].wmmat[0] = 5593 convert_to_trans_prec(cm->allow_high_precision_mv, 5594 cm->global_motion[frame].wmmat[0]) * 5595 GM_TRANS_ONLY_DECODE_FACTOR; 5596 cm->global_motion[frame].wmmat[1] = 5597 convert_to_trans_prec(cm->allow_high_precision_mv, 5598 cm->global_motion[frame].wmmat[1]) * 5599 GM_TRANS_ONLY_DECODE_FACTOR; 5600 } 5601 5602 // If the best error advantage found doesn't meet the threshold for 5603 // this motion type, revert to IDENTITY. 5604 if (!av1_is_enough_erroradvantage( 5605 (double)best_warp_error / ref_frame_error, 5606 gm_get_params_cost(&cm->global_motion[frame], ref_params, 5607 cm->allow_high_precision_mv), 5608 cpi->sf.gm_erroradv_type)) { 5609 cm->global_motion[frame] = default_warp_params; 5610 } 5611 if (cm->global_motion[frame].wmtype != IDENTITY) break; 5612 } 5613 aom_clear_system_state(); 5614 } 5615 if (cm->global_motion[frame].wmtype != IDENTITY) num_refs_using_gm++; 5616 cpi->gmparams_cost[frame] = 5617 gm_get_params_cost(&cm->global_motion[frame], ref_params, 5618 cm->allow_high_precision_mv) + 5619 cpi->gmtype_cost[cm->global_motion[frame].wmtype] - 5620 cpi->gmtype_cost[IDENTITY]; 5621 } 5622 // clear disabled ref_frames 5623 for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) { 5624 const int ref_disabled = 5625 !(cpi->ref_frame_flags & av1_ref_frame_flag_list[frame]); 5626 if (ref_disabled && cpi->sf.recode_loop != DISALLOW_RECODE) { 5627 cpi->gmparams_cost[frame] = 0; 5628 cm->global_motion[frame] = default_warp_params; 5629 } 5630 } 5631 cpi->global_motion_search_done = 1; 5632 } 5633 memcpy(cm->cur_frame->global_motion, cm->global_motion, 5634 REF_FRAMES * sizeof(WarpedMotionParams)); 5635 #if CONFIG_COLLECT_COMPONENT_TIMING 5636 end_timing(cpi, av1_compute_global_motion_time); 5637 #endif 5638 5639 #if CONFIG_COLLECT_COMPONENT_TIMING 5640 start_timing(cpi, av1_setup_motion_field_time); 5641 #endif 5642 av1_setup_motion_field(cm); 5643 #if CONFIG_COLLECT_COMPONENT_TIMING 5644 end_timing(cpi, av1_setup_motion_field_time); 5645 #endif 5646 5647 cpi->all_one_sided_refs = 5648 frame_is_intra_only(cm) ? 0 : av1_refs_are_one_sided(cm); 5649 5650 cm->current_frame.skip_mode_info.skip_mode_flag = 5651 check_skip_mode_enabled(cpi); 5652 5653 { 5654 cpi->row_mt_sync_read_ptr = av1_row_mt_sync_read_dummy; 5655 cpi->row_mt_sync_write_ptr = av1_row_mt_sync_write_dummy; 5656 cpi->row_mt = 0; 5657 if (cpi->oxcf.row_mt && (cpi->oxcf.max_threads > 1)) { 5658 cpi->row_mt = 1; 5659 cpi->row_mt_sync_read_ptr = av1_row_mt_sync_read; 5660 cpi->row_mt_sync_write_ptr = av1_row_mt_sync_write; 5661 av1_encode_tiles_row_mt(cpi); 5662 } else { 5663 if (AOMMIN(cpi->oxcf.max_threads, cm->tile_cols * cm->tile_rows) > 1) 5664 av1_encode_tiles_mt(cpi); 5665 else 5666 encode_tiles(cpi); 5667 } 5668 } 5669 5670 // If intrabc is allowed but never selected, reset the allow_intrabc flag. 5671 if (cm->allow_intrabc && !cpi->intrabc_used) cm->allow_intrabc = 0; 5672 if (cm->allow_intrabc) cm->delta_q_info.delta_lf_present_flag = 0; 5673 } 5674 5675 void av1_encode_frame(AV1_COMP *cpi) { 5676 AV1_COMMON *const cm = &cpi->common; 5677 CurrentFrame *const current_frame = &cm->current_frame; 5678 const int num_planes = av1_num_planes(cm); 5679 // Indicates whether or not to use a default reduced set for ext-tx 5680 // rather than the potential full set of 16 transforms 5681 cm->reduced_tx_set_used = cpi->oxcf.reduced_tx_type_set; 5682 5683 // Make sure segment_id is no larger than last_active_segid. 5684 if (cm->seg.enabled && cm->seg.update_map) { 5685 const int mi_rows = cm->mi_rows; 5686 const int mi_cols = cm->mi_cols; 5687 const int last_active_segid = cm->seg.last_active_segid; 5688 uint8_t *map = cpi->segmentation_map; 5689 for (int mi_row = 0; mi_row < mi_rows; ++mi_row) { 5690 for (int mi_col = 0; mi_col < mi_cols; ++mi_col) { 5691 map[mi_col] = AOMMIN(map[mi_col], last_active_segid); 5692 } 5693 map += mi_cols; 5694 } 5695 } 5696 5697 av1_setup_frame_buf_refs(cm); 5698 enforce_max_ref_frames(cpi); 5699 av1_setup_frame_sign_bias(cm); 5700 5701 #if CONFIG_MISMATCH_DEBUG 5702 mismatch_reset_frame(num_planes); 5703 #else 5704 (void)num_planes; 5705 #endif 5706 5707 if (cpi->sf.frame_parameter_update) { 5708 int i; 5709 RD_OPT *const rd_opt = &cpi->rd; 5710 RD_COUNTS *const rdc = &cpi->td.rd_counts; 5711 5712 // This code does a single RD pass over the whole frame assuming 5713 // either compound, single or hybrid prediction as per whatever has 5714 // worked best for that type of frame in the past. 5715 // It also predicts whether another coding mode would have worked 5716 // better than this coding mode. If that is the case, it remembers 5717 // that for subsequent frames. 5718 // It does the same analysis for transform size selection also. 5719 // 5720 // TODO(zoeliu): To investigate whether a frame_type other than 5721 // INTRA/ALTREF/GOLDEN/LAST needs to be specified seperately. 5722 const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi); 5723 int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type]; 5724 const int is_alt_ref = frame_type == ALTREF_FRAME; 5725 5726 /* prediction (compound, single or hybrid) mode selection */ 5727 // NOTE: "is_alt_ref" is true only for OVERLAY/INTNL_OVERLAY frames 5728 if (is_alt_ref || frame_is_intra_only(cm)) 5729 current_frame->reference_mode = SINGLE_REFERENCE; 5730 else 5731 current_frame->reference_mode = REFERENCE_MODE_SELECT; 5732 5733 cm->interp_filter = SWITCHABLE; 5734 if (cm->large_scale_tile) cm->interp_filter = EIGHTTAP_REGULAR; 5735 5736 cm->switchable_motion_mode = 1; 5737 5738 rdc->compound_ref_used_flag = 0; 5739 rdc->skip_mode_used_flag = 0; 5740 5741 encode_frame_internal(cpi); 5742 5743 for (i = 0; i < REFERENCE_MODES; ++i) 5744 mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2; 5745 5746 if (current_frame->reference_mode == REFERENCE_MODE_SELECT) { 5747 // Use a flag that includes 4x4 blocks 5748 if (rdc->compound_ref_used_flag == 0) { 5749 current_frame->reference_mode = SINGLE_REFERENCE; 5750 #if CONFIG_ENTROPY_STATS 5751 av1_zero(cpi->td.counts->comp_inter); 5752 #endif // CONFIG_ENTROPY_STATS 5753 } 5754 } 5755 // Re-check on the skip mode status as reference mode may have been 5756 // changed. 5757 SkipModeInfo *const skip_mode_info = ¤t_frame->skip_mode_info; 5758 if (frame_is_intra_only(cm) || 5759 current_frame->reference_mode == SINGLE_REFERENCE) { 5760 skip_mode_info->skip_mode_allowed = 0; 5761 skip_mode_info->skip_mode_flag = 0; 5762 } 5763 if (skip_mode_info->skip_mode_flag && rdc->skip_mode_used_flag == 0) 5764 skip_mode_info->skip_mode_flag = 0; 5765 5766 if (!cm->large_scale_tile) { 5767 if (cm->tx_mode == TX_MODE_SELECT && cpi->td.mb.txb_split_count == 0) 5768 cm->tx_mode = TX_MODE_LARGEST; 5769 } 5770 } else { 5771 encode_frame_internal(cpi); 5772 } 5773 } 5774 5775 static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd, 5776 FRAME_COUNTS *counts, TX_SIZE tx_size, int depth, 5777 int blk_row, int blk_col, 5778 uint8_t allow_update_cdf) { 5779 MB_MODE_INFO *mbmi = xd->mi[0]; 5780 const BLOCK_SIZE bsize = mbmi->sb_type; 5781 const int max_blocks_high = max_block_high(xd, bsize, 0); 5782 const int max_blocks_wide = max_block_wide(xd, bsize, 0); 5783 int ctx = txfm_partition_context(xd->above_txfm_context + blk_col, 5784 xd->left_txfm_context + blk_row, 5785 mbmi->sb_type, tx_size); 5786 const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col); 5787 const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index]; 5788 5789 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; 5790 assert(tx_size > TX_4X4); 5791 5792 if (depth == MAX_VARTX_DEPTH) { 5793 // Don't add to counts in this case 5794 mbmi->tx_size = tx_size; 5795 txfm_partition_update(xd->above_txfm_context + blk_col, 5796 xd->left_txfm_context + blk_row, tx_size, tx_size); 5797 return; 5798 } 5799 5800 if (tx_size == plane_tx_size) { 5801 #if CONFIG_ENTROPY_STATS 5802 ++counts->txfm_partition[ctx][0]; 5803 #endif 5804 if (allow_update_cdf) 5805 update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 0, 2); 5806 mbmi->tx_size = tx_size; 5807 txfm_partition_update(xd->above_txfm_context + blk_col, 5808 xd->left_txfm_context + blk_row, tx_size, tx_size); 5809 } else { 5810 const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; 5811 const int bsw = tx_size_wide_unit[sub_txs]; 5812 const int bsh = tx_size_high_unit[sub_txs]; 5813 5814 #if CONFIG_ENTROPY_STATS 5815 ++counts->txfm_partition[ctx][1]; 5816 #endif 5817 if (allow_update_cdf) 5818 update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 1, 2); 5819 ++x->txb_split_count; 5820 5821 if (sub_txs == TX_4X4) { 5822 mbmi->inter_tx_size[txb_size_index] = TX_4X4; 5823 mbmi->tx_size = TX_4X4; 5824 txfm_partition_update(xd->above_txfm_context + blk_col, 5825 xd->left_txfm_context + blk_row, TX_4X4, tx_size); 5826 return; 5827 } 5828 5829 for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) { 5830 for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) { 5831 int offsetr = row; 5832 int offsetc = col; 5833 5834 update_txfm_count(x, xd, counts, sub_txs, depth + 1, blk_row + offsetr, 5835 blk_col + offsetc, allow_update_cdf); 5836 } 5837 } 5838 } 5839 } 5840 5841 static void tx_partition_count_update(const AV1_COMMON *const cm, MACROBLOCK *x, 5842 BLOCK_SIZE plane_bsize, int mi_row, 5843 int mi_col, FRAME_COUNTS *td_counts, 5844 uint8_t allow_update_cdf) { 5845 MACROBLOCKD *xd = &x->e_mbd; 5846 const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0]; 5847 const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0]; 5848 const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0); 5849 const int bh = tx_size_high_unit[max_tx_size]; 5850 const int bw = tx_size_wide_unit[max_tx_size]; 5851 int idx, idy; 5852 5853 xd->above_txfm_context = cm->above_txfm_context[xd->tile.tile_row] + mi_col; 5854 xd->left_txfm_context = 5855 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); 5856 5857 for (idy = 0; idy < mi_height; idy += bh) 5858 for (idx = 0; idx < mi_width; idx += bw) 5859 update_txfm_count(x, xd, td_counts, max_tx_size, 0, idy, idx, 5860 allow_update_cdf); 5861 } 5862 5863 static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row, 5864 int blk_col) { 5865 MB_MODE_INFO *mbmi = xd->mi[0]; 5866 const BLOCK_SIZE bsize = mbmi->sb_type; 5867 const int max_blocks_high = max_block_high(xd, bsize, 0); 5868 const int max_blocks_wide = max_block_wide(xd, bsize, 0); 5869 const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col); 5870 const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index]; 5871 5872 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; 5873 5874 if (tx_size == plane_tx_size) { 5875 mbmi->tx_size = tx_size; 5876 txfm_partition_update(xd->above_txfm_context + blk_col, 5877 xd->left_txfm_context + blk_row, tx_size, tx_size); 5878 5879 } else { 5880 if (tx_size == TX_8X8) { 5881 mbmi->inter_tx_size[txb_size_index] = TX_4X4; 5882 mbmi->tx_size = TX_4X4; 5883 txfm_partition_update(xd->above_txfm_context + blk_col, 5884 xd->left_txfm_context + blk_row, TX_4X4, tx_size); 5885 return; 5886 } 5887 const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; 5888 const int bsw = tx_size_wide_unit[sub_txs]; 5889 const int bsh = tx_size_high_unit[sub_txs]; 5890 for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) { 5891 for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) { 5892 const int offsetr = blk_row + row; 5893 const int offsetc = blk_col + col; 5894 if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue; 5895 set_txfm_context(xd, sub_txs, offsetr, offsetc); 5896 } 5897 } 5898 } 5899 } 5900 5901 static void tx_partition_set_contexts(const AV1_COMMON *const cm, 5902 MACROBLOCKD *xd, BLOCK_SIZE plane_bsize, 5903 int mi_row, int mi_col) { 5904 const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0]; 5905 const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0]; 5906 const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0); 5907 const int bh = tx_size_high_unit[max_tx_size]; 5908 const int bw = tx_size_wide_unit[max_tx_size]; 5909 int idx, idy; 5910 5911 xd->above_txfm_context = cm->above_txfm_context[xd->tile.tile_row] + mi_col; 5912 xd->left_txfm_context = 5913 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); 5914 5915 for (idy = 0; idy < mi_height; idy += bh) 5916 for (idx = 0; idx < mi_width; idx += bw) 5917 set_txfm_context(xd, max_tx_size, idy, idx); 5918 } 5919 5920 static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data, 5921 ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run, 5922 int mi_row, int mi_col, BLOCK_SIZE bsize, 5923 int *rate) { 5924 const AV1_COMMON *const cm = &cpi->common; 5925 const int num_planes = av1_num_planes(cm); 5926 MACROBLOCK *const x = &td->mb; 5927 MACROBLOCKD *const xd = &x->e_mbd; 5928 MB_MODE_INFO **mi_4x4 = xd->mi; 5929 MB_MODE_INFO *mbmi = mi_4x4[0]; 5930 const int seg_skip = 5931 segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP); 5932 const int mis = cm->mi_stride; 5933 const int mi_width = mi_size_wide[bsize]; 5934 const int mi_height = mi_size_high[bsize]; 5935 const int is_inter = is_inter_block(mbmi); 5936 5937 if (cpi->two_pass_partition_search && x->cb_partition_scan) { 5938 for (int row = mi_row; row < mi_row + mi_width; 5939 row += FIRST_PARTITION_PASS_SAMPLE_REGION) { 5940 for (int col = mi_col; col < mi_col + mi_height; 5941 col += FIRST_PARTITION_PASS_SAMPLE_REGION) { 5942 const int index = av1_first_partition_pass_stats_index(row, col); 5943 FIRST_PARTITION_PASS_STATS *const stats = 5944 &x->first_partition_pass_stats[index]; 5945 // Increase the counter of data samples. 5946 ++stats->sample_counts; 5947 // Increase the counter for ref_frame[0] and ref_frame[1]. 5948 if (stats->ref0_counts[mbmi->ref_frame[0]] < 255) 5949 ++stats->ref0_counts[mbmi->ref_frame[0]]; 5950 if (mbmi->ref_frame[1] >= 0 && 5951 stats->ref1_counts[mbmi->ref_frame[1]] < 255) 5952 ++stats->ref1_counts[mbmi->ref_frame[1]]; 5953 if (cpi->sf.use_first_partition_pass_interintra_stats) { 5954 // Increase the counter for interintra_motion_mode_count 5955 if (mbmi->motion_mode == 0 && mbmi->ref_frame[1] == INTRA_FRAME && 5956 stats->interintra_motion_mode_count[mbmi->ref_frame[0]] < 255) { 5957 ++stats->interintra_motion_mode_count[mbmi->ref_frame[0]]; 5958 } 5959 } 5960 } 5961 } 5962 } 5963 5964 if (!is_inter) { 5965 xd->cfl.is_chroma_reference = 5966 is_chroma_reference(mi_row, mi_col, bsize, cm->seq_params.subsampling_x, 5967 cm->seq_params.subsampling_y); 5968 xd->cfl.store_y = store_cfl_required(cm, xd); 5969 mbmi->skip = 1; 5970 for (int plane = 0; plane < num_planes; ++plane) { 5971 av1_encode_intra_block_plane(cpi, x, bsize, plane, 5972 cpi->optimize_seg_arr[mbmi->segment_id], 5973 mi_row, mi_col); 5974 } 5975 5976 // If there is at least one lossless segment, force the skip for intra 5977 // block to be 0, in order to avoid the segment_id to be changed by in 5978 // write_segment_id(). 5979 if (!cpi->common.seg.segid_preskip && cpi->common.seg.update_map && 5980 cpi->has_lossless_segment) 5981 mbmi->skip = 0; 5982 5983 xd->cfl.store_y = 0; 5984 if (av1_allow_palette(cm->allow_screen_content_tools, bsize)) { 5985 for (int plane = 0; plane < AOMMIN(2, num_planes); ++plane) { 5986 if (mbmi->palette_mode_info.palette_size[plane] > 0) { 5987 if (!dry_run) { 5988 av1_tokenize_color_map(x, plane, t, bsize, mbmi->tx_size, 5989 PALETTE_MAP, tile_data->allow_update_cdf, 5990 td->counts); 5991 } else if (dry_run == DRY_RUN_COSTCOEFFS) { 5992 rate += 5993 av1_cost_color_map(x, plane, bsize, mbmi->tx_size, PALETTE_MAP); 5994 } 5995 } 5996 } 5997 } 5998 5999 av1_update_txb_context(cpi, td, dry_run, bsize, rate, mi_row, mi_col, 6000 tile_data->allow_update_cdf); 6001 } else { 6002 int ref; 6003 const int is_compound = has_second_ref(mbmi); 6004 6005 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); 6006 for (ref = 0; ref < 1 + is_compound; ++ref) { 6007 const YV12_BUFFER_CONFIG *cfg = 6008 get_ref_frame_yv12_buf(cm, mbmi->ref_frame[ref]); 6009 assert(IMPLIES(!is_intrabc_block(mbmi), cfg)); 6010 av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col, 6011 xd->block_ref_scale_factors[ref], num_planes); 6012 } 6013 6014 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 6015 av1_num_planes(cm) - 1); 6016 if (mbmi->motion_mode == OBMC_CAUSAL) { 6017 assert(cpi->oxcf.enable_obmc == 1); 6018 av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col); 6019 } 6020 6021 #if CONFIG_MISMATCH_DEBUG 6022 if (dry_run == OUTPUT_ENABLED) { 6023 for (int plane = 0; plane < num_planes; ++plane) { 6024 const struct macroblockd_plane *pd = &xd->plane[plane]; 6025 int pixel_c, pixel_r; 6026 mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0, 6027 pd->subsampling_x, pd->subsampling_y); 6028 if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x, 6029 pd->subsampling_y)) 6030 continue; 6031 mismatch_record_block_pre(pd->dst.buf, pd->dst.stride, 6032 cm->current_frame.order_hint, plane, pixel_c, 6033 pixel_r, pd->width, pd->height, 6034 xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); 6035 } 6036 } 6037 #else 6038 (void)num_planes; 6039 #endif 6040 6041 av1_encode_sb(cpi, x, bsize, mi_row, mi_col, dry_run); 6042 av1_tokenize_sb_vartx(cpi, td, t, dry_run, mi_row, mi_col, bsize, rate, 6043 tile_data->allow_update_cdf); 6044 } 6045 6046 if (!dry_run) { 6047 if (av1_allow_intrabc(cm) && is_intrabc_block(mbmi)) td->intrabc_used = 1; 6048 if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id] && 6049 mbmi->sb_type > BLOCK_4X4 && !(is_inter && (mbmi->skip || seg_skip))) { 6050 if (is_inter) { 6051 tx_partition_count_update(cm, x, bsize, mi_row, mi_col, td->counts, 6052 tile_data->allow_update_cdf); 6053 } else { 6054 if (mbmi->tx_size != max_txsize_rect_lookup[bsize]) 6055 ++x->txb_split_count; 6056 if (block_signals_txsize(bsize)) { 6057 const int tx_size_ctx = get_tx_size_context(xd); 6058 const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize); 6059 const int depth = tx_size_to_depth(mbmi->tx_size, bsize); 6060 const int max_depths = bsize_to_max_depth(bsize); 6061 6062 if (tile_data->allow_update_cdf) 6063 update_cdf(xd->tile_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx], 6064 depth, max_depths + 1); 6065 #if CONFIG_ENTROPY_STATS 6066 ++td->counts->intra_tx_size[tx_size_cat][tx_size_ctx][depth]; 6067 #endif 6068 } 6069 } 6070 assert(IMPLIES(is_rect_tx(mbmi->tx_size), is_rect_tx_allowed(xd, mbmi))); 6071 } else { 6072 int i, j; 6073 TX_SIZE intra_tx_size; 6074 // The new intra coding scheme requires no change of transform size 6075 if (is_inter) { 6076 if (xd->lossless[mbmi->segment_id]) { 6077 intra_tx_size = TX_4X4; 6078 } else { 6079 intra_tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode); 6080 } 6081 } else { 6082 intra_tx_size = mbmi->tx_size; 6083 } 6084 6085 for (j = 0; j < mi_height; j++) 6086 for (i = 0; i < mi_width; i++) 6087 if (mi_col + i < cm->mi_cols && mi_row + j < cm->mi_rows) 6088 mi_4x4[mis * j + i]->tx_size = intra_tx_size; 6089 6090 if (intra_tx_size != max_txsize_rect_lookup[bsize]) ++x->txb_split_count; 6091 } 6092 } 6093 6094 if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type) && 6095 is_inter && !(mbmi->skip || seg_skip) && 6096 !xd->lossless[mbmi->segment_id]) { 6097 if (dry_run) tx_partition_set_contexts(cm, xd, bsize, mi_row, mi_col); 6098 } else { 6099 TX_SIZE tx_size = mbmi->tx_size; 6100 // The new intra coding scheme requires no change of transform size 6101 if (is_inter) { 6102 if (xd->lossless[mbmi->segment_id]) { 6103 tx_size = TX_4X4; 6104 } else { 6105 tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode); 6106 } 6107 } else { 6108 tx_size = (bsize > BLOCK_4X4) ? tx_size : TX_4X4; 6109 } 6110 mbmi->tx_size = tx_size; 6111 set_txfm_ctxs(tx_size, xd->n4_w, xd->n4_h, 6112 (mbmi->skip || seg_skip) && is_inter_block(mbmi), xd); 6113 } 6114 CFL_CTX *const cfl = &xd->cfl; 6115 if (is_inter_block(mbmi) && 6116 !is_chroma_reference(mi_row, mi_col, bsize, cfl->subsampling_x, 6117 cfl->subsampling_y) && 6118 is_cfl_allowed(xd)) { 6119 cfl_store_block(xd, mbmi->sb_type, mbmi->tx_size); 6120 } 6121 } 6122