1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <assert.h> 12 #include <math.h> 13 14 #include "./vp9_rtcd.h" 15 #include "./vpx_dsp_rtcd.h" 16 17 #include "vpx_dsp/vpx_dsp_common.h" 18 #include "vpx_mem/vpx_mem.h" 19 #include "vpx_ports/mem.h" 20 #include "vpx_ports/system_state.h" 21 22 #include "vp9/common/vp9_common.h" 23 #include "vp9/common/vp9_entropy.h" 24 #include "vp9/common/vp9_entropymode.h" 25 #include "vp9/common/vp9_idct.h" 26 #include "vp9/common/vp9_mvref_common.h" 27 #include "vp9/common/vp9_pred_common.h" 28 #include "vp9/common/vp9_quant_common.h" 29 #include "vp9/common/vp9_reconinter.h" 30 #include "vp9/common/vp9_reconintra.h" 31 #include "vp9/common/vp9_scan.h" 32 #include "vp9/common/vp9_seg_common.h" 33 34 #include "vp9/encoder/vp9_cost.h" 35 #include "vp9/encoder/vp9_encodemb.h" 36 #include "vp9/encoder/vp9_encodemv.h" 37 #include "vp9/encoder/vp9_encoder.h" 38 #include "vp9/encoder/vp9_mcomp.h" 39 #include "vp9/encoder/vp9_quantize.h" 40 #include "vp9/encoder/vp9_ratectrl.h" 41 #include "vp9/encoder/vp9_rd.h" 42 #include "vp9/encoder/vp9_rdopt.h" 43 #include "vp9/encoder/vp9_aq_variance.h" 44 45 #define LAST_FRAME_MODE_MASK \ 46 ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME)) 47 #define GOLDEN_FRAME_MODE_MASK \ 48 ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME)) 49 #define ALT_REF_MODE_MASK \ 50 ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME)) 51 52 #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01) 53 54 #define MIN_EARLY_TERM_INDEX 3 55 #define NEW_MV_DISCOUNT_FACTOR 8 56 57 typedef struct { 58 PREDICTION_MODE mode; 59 MV_REFERENCE_FRAME ref_frame[2]; 60 } MODE_DEFINITION; 61 62 typedef struct { 63 MV_REFERENCE_FRAME ref_frame[2]; 64 } REF_DEFINITION; 65 66 struct rdcost_block_args { 67 const VP9_COMP *cpi; 68 MACROBLOCK *x; 69 ENTROPY_CONTEXT t_above[16]; 70 ENTROPY_CONTEXT t_left[16]; 71 int this_rate; 72 int64_t this_dist; 73 int64_t this_sse; 74 int64_t this_rd; 75 int64_t best_rd; 76 int exit_early; 77 int use_fast_coef_costing; 78 const scan_order *so; 79 uint8_t skippable; 80 }; 81 82 #define LAST_NEW_MV_INDEX 6 83 static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { 84 { NEARESTMV, { LAST_FRAME, NONE } }, 85 { NEARESTMV, { ALTREF_FRAME, NONE } }, 86 { NEARESTMV, { GOLDEN_FRAME, NONE } }, 87 88 { DC_PRED, { INTRA_FRAME, NONE } }, 89 90 { NEWMV, { LAST_FRAME, NONE } }, 91 { NEWMV, { ALTREF_FRAME, NONE } }, 92 { NEWMV, { GOLDEN_FRAME, NONE } }, 93 94 { NEARMV, { LAST_FRAME, NONE } }, 95 { NEARMV, { ALTREF_FRAME, NONE } }, 96 { NEARMV, { GOLDEN_FRAME, NONE } }, 97 98 { ZEROMV, { LAST_FRAME, NONE } }, 99 { ZEROMV, { GOLDEN_FRAME, NONE } }, 100 { ZEROMV, { ALTREF_FRAME, NONE } }, 101 102 { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } }, 103 { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } }, 104 105 { TM_PRED, { INTRA_FRAME, NONE } }, 106 107 { NEARMV, { LAST_FRAME, ALTREF_FRAME } }, 108 { NEWMV, { LAST_FRAME, ALTREF_FRAME } }, 109 { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } }, 110 { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } }, 111 112 { ZEROMV, { LAST_FRAME, ALTREF_FRAME } }, 113 { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } }, 114 115 { H_PRED, { INTRA_FRAME, NONE } }, 116 { V_PRED, { INTRA_FRAME, NONE } }, 117 { D135_PRED, { INTRA_FRAME, NONE } }, 118 { D207_PRED, { INTRA_FRAME, NONE } }, 119 { D153_PRED, { INTRA_FRAME, NONE } }, 120 { D63_PRED, { INTRA_FRAME, NONE } }, 121 { D117_PRED, { INTRA_FRAME, NONE } }, 122 { D45_PRED, { INTRA_FRAME, NONE } }, 123 }; 124 125 static const REF_DEFINITION vp9_ref_order[MAX_REFS] = { 126 { { LAST_FRAME, NONE } }, { { GOLDEN_FRAME, NONE } }, 127 { { ALTREF_FRAME, NONE } }, { { LAST_FRAME, ALTREF_FRAME } }, 128 { { GOLDEN_FRAME, ALTREF_FRAME } }, { { INTRA_FRAME, NONE } }, 129 }; 130 131 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int m, int n, 132 int min_plane, int max_plane) { 133 int i; 134 135 for (i = min_plane; i < max_plane; ++i) { 136 struct macroblock_plane *const p = &x->plane[i]; 137 struct macroblockd_plane *const pd = &x->e_mbd.plane[i]; 138 139 p->coeff = ctx->coeff_pbuf[i][m]; 140 p->qcoeff = ctx->qcoeff_pbuf[i][m]; 141 pd->dqcoeff = ctx->dqcoeff_pbuf[i][m]; 142 p->eobs = ctx->eobs_pbuf[i][m]; 143 144 ctx->coeff_pbuf[i][m] = ctx->coeff_pbuf[i][n]; 145 ctx->qcoeff_pbuf[i][m] = ctx->qcoeff_pbuf[i][n]; 146 ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n]; 147 ctx->eobs_pbuf[i][m] = ctx->eobs_pbuf[i][n]; 148 149 ctx->coeff_pbuf[i][n] = p->coeff; 150 ctx->qcoeff_pbuf[i][n] = p->qcoeff; 151 ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff; 152 ctx->eobs_pbuf[i][n] = p->eobs; 153 } 154 } 155 156 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, 157 MACROBLOCKD *xd, int *out_rate_sum, 158 int64_t *out_dist_sum, int *skip_txfm_sb, 159 int64_t *skip_sse_sb) { 160 // Note our transform coeffs are 8 times an orthogonal transform. 161 // Hence quantizer step is also 8 times. To get effective quantizer 162 // we need to divide by 8 before sending to modeling function. 163 int i; 164 int64_t rate_sum = 0; 165 int64_t dist_sum = 0; 166 const int ref = xd->mi[0]->ref_frame[0]; 167 unsigned int sse; 168 unsigned int var = 0; 169 int64_t total_sse = 0; 170 int skip_flag = 1; 171 const int shift = 6; 172 int64_t dist; 173 const int dequant_shift = 174 #if CONFIG_VP9_HIGHBITDEPTH 175 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 176 #endif // CONFIG_VP9_HIGHBITDEPTH 177 3; 178 unsigned int qstep_vec[MAX_MB_PLANE]; 179 unsigned int nlog2_vec[MAX_MB_PLANE]; 180 unsigned int sum_sse_vec[MAX_MB_PLANE]; 181 int any_zero_sum_sse = 0; 182 183 x->pred_sse[ref] = 0; 184 185 for (i = 0; i < MAX_MB_PLANE; ++i) { 186 struct macroblock_plane *const p = &x->plane[i]; 187 struct macroblockd_plane *const pd = &xd->plane[i]; 188 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); 189 const TX_SIZE max_tx_size = max_txsize_lookup[bs]; 190 const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size]; 191 const int64_t dc_thr = p->quant_thred[0] >> shift; 192 const int64_t ac_thr = p->quant_thred[1] >> shift; 193 unsigned int sum_sse = 0; 194 // The low thresholds are used to measure if the prediction errors are 195 // low enough so that we can skip the mode search. 196 const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2); 197 const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2); 198 int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]); 199 int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]); 200 int idx, idy; 201 int lw = b_width_log2_lookup[unit_size] + 2; 202 int lh = b_height_log2_lookup[unit_size] + 2; 203 204 for (idy = 0; idy < bh; ++idy) { 205 for (idx = 0; idx < bw; ++idx) { 206 uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw); 207 uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh); 208 int block_idx = (idy << 1) + idx; 209 int low_err_skip = 0; 210 211 var = cpi->fn_ptr[unit_size].vf(src, p->src.stride, dst, pd->dst.stride, 212 &sse); 213 x->bsse[(i << 2) + block_idx] = sse; 214 sum_sse += sse; 215 216 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_NONE; 217 if (!x->select_tx_size) { 218 // Check if all ac coefficients can be quantized to zero. 219 if (var < ac_thr || var == 0) { 220 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_ONLY; 221 222 // Check if dc coefficient can be quantized to zero. 223 if (sse - var < dc_thr || sse == var) { 224 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_DC; 225 226 if (!sse || (var < low_ac_thr && sse - var < low_dc_thr)) 227 low_err_skip = 1; 228 } 229 } 230 } 231 232 if (skip_flag && !low_err_skip) skip_flag = 0; 233 234 if (i == 0) x->pred_sse[ref] += sse; 235 } 236 } 237 238 total_sse += sum_sse; 239 sum_sse_vec[i] = sum_sse; 240 any_zero_sum_sse = any_zero_sum_sse || (sum_sse == 0); 241 qstep_vec[i] = pd->dequant[1] >> dequant_shift; 242 nlog2_vec[i] = num_pels_log2_lookup[bs]; 243 } 244 245 // Fast approximate the modelling function. 246 if (cpi->sf.simple_model_rd_from_var) { 247 for (i = 0; i < MAX_MB_PLANE; ++i) { 248 int64_t rate; 249 const int64_t square_error = sum_sse_vec[i]; 250 int quantizer = qstep_vec[i]; 251 252 if (quantizer < 120) 253 rate = (square_error * (280 - quantizer)) >> (16 - VP9_PROB_COST_SHIFT); 254 else 255 rate = 0; 256 dist = (square_error * quantizer) >> 8; 257 rate_sum += rate; 258 dist_sum += dist; 259 } 260 } else { 261 if (any_zero_sum_sse) { 262 for (i = 0; i < MAX_MB_PLANE; ++i) { 263 int rate; 264 vp9_model_rd_from_var_lapndz(sum_sse_vec[i], nlog2_vec[i], qstep_vec[i], 265 &rate, &dist); 266 rate_sum += rate; 267 dist_sum += dist; 268 } 269 } else { 270 vp9_model_rd_from_var_lapndz_vec(sum_sse_vec, nlog2_vec, qstep_vec, 271 &rate_sum, &dist_sum); 272 } 273 } 274 275 *skip_txfm_sb = skip_flag; 276 *skip_sse_sb = total_sse << 4; 277 *out_rate_sum = (int)rate_sum; 278 *out_dist_sum = dist_sum << 4; 279 } 280 281 #if CONFIG_VP9_HIGHBITDEPTH 282 int64_t vp9_highbd_block_error_c(const tran_low_t *coeff, 283 const tran_low_t *dqcoeff, intptr_t block_size, 284 int64_t *ssz, int bd) { 285 int i; 286 int64_t error = 0, sqcoeff = 0; 287 int shift = 2 * (bd - 8); 288 int rounding = shift > 0 ? 1 << (shift - 1) : 0; 289 290 for (i = 0; i < block_size; i++) { 291 const int64_t diff = coeff[i] - dqcoeff[i]; 292 error += diff * diff; 293 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i]; 294 } 295 assert(error >= 0 && sqcoeff >= 0); 296 error = (error + rounding) >> shift; 297 sqcoeff = (sqcoeff + rounding) >> shift; 298 299 *ssz = sqcoeff; 300 return error; 301 } 302 303 static int64_t vp9_highbd_block_error_dispatch(const tran_low_t *coeff, 304 const tran_low_t *dqcoeff, 305 intptr_t block_size, 306 int64_t *ssz, int bd) { 307 if (bd == 8) { 308 return vp9_block_error(coeff, dqcoeff, block_size, ssz); 309 } else { 310 return vp9_highbd_block_error(coeff, dqcoeff, block_size, ssz, bd); 311 } 312 } 313 #endif // CONFIG_VP9_HIGHBITDEPTH 314 315 int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, 316 intptr_t block_size, int64_t *ssz) { 317 int i; 318 int64_t error = 0, sqcoeff = 0; 319 320 for (i = 0; i < block_size; i++) { 321 const int diff = coeff[i] - dqcoeff[i]; 322 error += diff * diff; 323 sqcoeff += coeff[i] * coeff[i]; 324 } 325 326 *ssz = sqcoeff; 327 return error; 328 } 329 330 int64_t vp9_block_error_fp_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, 331 int block_size) { 332 int i; 333 int64_t error = 0; 334 335 for (i = 0; i < block_size; i++) { 336 const int diff = coeff[i] - dqcoeff[i]; 337 error += diff * diff; 338 } 339 340 return error; 341 } 342 343 /* The trailing '0' is a terminator which is used inside cost_coeffs() to 344 * decide whether to include cost of a trailing EOB node or not (i.e. we 345 * can skip this if the last coefficient in this transform block, e.g. the 346 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block, 347 * were non-zero). */ 348 static const int16_t band_counts[TX_SIZES][8] = { 349 { 1, 2, 3, 4, 3, 16 - 13, 0 }, 350 { 1, 2, 3, 4, 11, 64 - 21, 0 }, 351 { 1, 2, 3, 4, 11, 256 - 21, 0 }, 352 { 1, 2, 3, 4, 11, 1024 - 21, 0 }, 353 }; 354 static int cost_coeffs(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size, 355 int pt, const int16_t *scan, const int16_t *nb, 356 int use_fast_coef_costing) { 357 MACROBLOCKD *const xd = &x->e_mbd; 358 MODE_INFO *mi = xd->mi[0]; 359 const struct macroblock_plane *p = &x->plane[plane]; 360 const PLANE_TYPE type = get_plane_type(plane); 361 const int16_t *band_count = &band_counts[tx_size][1]; 362 const int eob = p->eobs[block]; 363 const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); 364 unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = 365 x->token_costs[tx_size][type][is_inter_block(mi)]; 366 uint8_t token_cache[32 * 32]; 367 int cost; 368 #if CONFIG_VP9_HIGHBITDEPTH 369 const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd); 370 #else 371 const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8); 372 #endif 373 374 // Check for consistency of tx_size with mode info 375 assert(type == PLANE_TYPE_Y 376 ? mi->tx_size == tx_size 377 : get_uv_tx_size(mi, &xd->plane[plane]) == tx_size); 378 379 if (eob == 0) { 380 // single eob token 381 cost = token_costs[0][0][pt][EOB_TOKEN]; 382 } else { 383 if (use_fast_coef_costing) { 384 int band_left = *band_count++; 385 int c; 386 387 // dc token 388 int v = qcoeff[0]; 389 int16_t prev_t; 390 cost = vp9_get_token_cost(v, &prev_t, cat6_high_cost); 391 cost += (*token_costs)[0][pt][prev_t]; 392 393 token_cache[0] = vp9_pt_energy_class[prev_t]; 394 ++token_costs; 395 396 // ac tokens 397 for (c = 1; c < eob; c++) { 398 const int rc = scan[c]; 399 int16_t t; 400 401 v = qcoeff[rc]; 402 cost += vp9_get_token_cost(v, &t, cat6_high_cost); 403 cost += (*token_costs)[!prev_t][!prev_t][t]; 404 prev_t = t; 405 if (!--band_left) { 406 band_left = *band_count++; 407 ++token_costs; 408 } 409 } 410 411 // eob token 412 if (band_left) cost += (*token_costs)[0][!prev_t][EOB_TOKEN]; 413 414 } else { // !use_fast_coef_costing 415 int band_left = *band_count++; 416 int c; 417 418 // dc token 419 int v = qcoeff[0]; 420 int16_t tok; 421 unsigned int(*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS]; 422 cost = vp9_get_token_cost(v, &tok, cat6_high_cost); 423 cost += (*token_costs)[0][pt][tok]; 424 425 token_cache[0] = vp9_pt_energy_class[tok]; 426 ++token_costs; 427 428 tok_cost_ptr = &((*token_costs)[!tok]); 429 430 // ac tokens 431 for (c = 1; c < eob; c++) { 432 const int rc = scan[c]; 433 434 v = qcoeff[rc]; 435 cost += vp9_get_token_cost(v, &tok, cat6_high_cost); 436 pt = get_coef_context(nb, token_cache, c); 437 cost += (*tok_cost_ptr)[pt][tok]; 438 token_cache[rc] = vp9_pt_energy_class[tok]; 439 if (!--band_left) { 440 band_left = *band_count++; 441 ++token_costs; 442 } 443 tok_cost_ptr = &((*token_costs)[!tok]); 444 } 445 446 // eob token 447 if (band_left) { 448 pt = get_coef_context(nb, token_cache, c); 449 cost += (*token_costs)[0][pt][EOB_TOKEN]; 450 } 451 } 452 } 453 454 return cost; 455 } 456 457 static INLINE int num_4x4_to_edge(int plane_4x4_dim, int mb_to_edge_dim, 458 int subsampling_dim, int blk_dim) { 459 return plane_4x4_dim + (mb_to_edge_dim >> (5 + subsampling_dim)) - blk_dim; 460 } 461 462 // Compute the pixel domain sum square error on all visible 4x4s in the 463 // transform block. 464 static unsigned pixel_sse(const VP9_COMP *const cpi, const MACROBLOCKD *xd, 465 const struct macroblockd_plane *const pd, 466 const uint8_t *src, const int src_stride, 467 const uint8_t *dst, const int dst_stride, int blk_row, 468 int blk_col, const BLOCK_SIZE plane_bsize, 469 const BLOCK_SIZE tx_bsize) { 470 unsigned int sse = 0; 471 const int plane_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; 472 const int plane_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; 473 const int tx_4x4_w = num_4x4_blocks_wide_lookup[tx_bsize]; 474 const int tx_4x4_h = num_4x4_blocks_high_lookup[tx_bsize]; 475 int b4x4s_to_right_edge = num_4x4_to_edge(plane_4x4_w, xd->mb_to_right_edge, 476 pd->subsampling_x, blk_col); 477 int b4x4s_to_bottom_edge = num_4x4_to_edge(plane_4x4_h, xd->mb_to_bottom_edge, 478 pd->subsampling_y, blk_row); 479 if (tx_bsize == BLOCK_4X4 || 480 (b4x4s_to_right_edge >= tx_4x4_w && b4x4s_to_bottom_edge >= tx_4x4_h)) { 481 cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse); 482 } else { 483 const vpx_variance_fn_t vf_4x4 = cpi->fn_ptr[BLOCK_4X4].vf; 484 int r, c; 485 unsigned this_sse = 0; 486 int max_r = VPXMIN(b4x4s_to_bottom_edge, tx_4x4_h); 487 int max_c = VPXMIN(b4x4s_to_right_edge, tx_4x4_w); 488 sse = 0; 489 // if we are in the unrestricted motion border. 490 for (r = 0; r < max_r; ++r) { 491 // Skip visiting the sub blocks that are wholly within the UMV. 492 for (c = 0; c < max_c; ++c) { 493 vf_4x4(src + r * src_stride * 4 + c * 4, src_stride, 494 dst + r * dst_stride * 4 + c * 4, dst_stride, &this_sse); 495 sse += this_sse; 496 } 497 } 498 } 499 return sse; 500 } 501 502 // Compute the squares sum squares on all visible 4x4s in the transform block. 503 static int64_t sum_squares_visible(const MACROBLOCKD *xd, 504 const struct macroblockd_plane *const pd, 505 const int16_t *diff, const int diff_stride, 506 int blk_row, int blk_col, 507 const BLOCK_SIZE plane_bsize, 508 const BLOCK_SIZE tx_bsize) { 509 int64_t sse; 510 const int plane_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; 511 const int plane_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; 512 const int tx_4x4_w = num_4x4_blocks_wide_lookup[tx_bsize]; 513 const int tx_4x4_h = num_4x4_blocks_high_lookup[tx_bsize]; 514 int b4x4s_to_right_edge = num_4x4_to_edge(plane_4x4_w, xd->mb_to_right_edge, 515 pd->subsampling_x, blk_col); 516 int b4x4s_to_bottom_edge = num_4x4_to_edge(plane_4x4_h, xd->mb_to_bottom_edge, 517 pd->subsampling_y, blk_row); 518 if (tx_bsize == BLOCK_4X4 || 519 (b4x4s_to_right_edge >= tx_4x4_w && b4x4s_to_bottom_edge >= tx_4x4_h)) { 520 assert(tx_4x4_w == tx_4x4_h); 521 sse = (int64_t)vpx_sum_squares_2d_i16(diff, diff_stride, tx_4x4_w << 2); 522 } else { 523 int r, c; 524 int max_r = VPXMIN(b4x4s_to_bottom_edge, tx_4x4_h); 525 int max_c = VPXMIN(b4x4s_to_right_edge, tx_4x4_w); 526 sse = 0; 527 // if we are in the unrestricted motion border. 528 for (r = 0; r < max_r; ++r) { 529 // Skip visiting the sub blocks that are wholly within the UMV. 530 for (c = 0; c < max_c; ++c) { 531 sse += (int64_t)vpx_sum_squares_2d_i16( 532 diff + r * diff_stride * 4 + c * 4, diff_stride, 4); 533 } 534 } 535 } 536 return sse; 537 } 538 539 static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane, 540 BLOCK_SIZE plane_bsize, int block, int blk_row, 541 int blk_col, TX_SIZE tx_size, int64_t *out_dist, 542 int64_t *out_sse) { 543 MACROBLOCKD *const xd = &x->e_mbd; 544 const struct macroblock_plane *const p = &x->plane[plane]; 545 const struct macroblockd_plane *const pd = &xd->plane[plane]; 546 const int eob = p->eobs[block]; 547 548 if (x->block_tx_domain && eob) { 549 const int ss_txfrm_size = tx_size << 1; 550 int64_t this_sse; 551 const int shift = tx_size == TX_32X32 ? 0 : 2; 552 const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); 553 const tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 554 #if CONFIG_VP9_HIGHBITDEPTH 555 const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8; 556 *out_dist = vp9_highbd_block_error_dispatch( 557 coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse, bd) >> 558 shift; 559 #else 560 *out_dist = 561 vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse) >> 562 shift; 563 #endif // CONFIG_VP9_HIGHBITDEPTH 564 *out_sse = this_sse >> shift; 565 566 if (x->skip_encode && !is_inter_block(xd->mi[0])) { 567 // TODO(jingning): tune the model to better capture the distortion. 568 const int64_t p = 569 (pd->dequant[1] * pd->dequant[1] * (1 << ss_txfrm_size)) >> 570 #if CONFIG_VP9_HIGHBITDEPTH 571 (shift + 2 + (bd - 8) * 2); 572 #else 573 (shift + 2); 574 #endif // CONFIG_VP9_HIGHBITDEPTH 575 *out_dist += (p >> 4); 576 *out_sse += p; 577 } 578 } else { 579 const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size]; 580 const int bs = 4 * num_4x4_blocks_wide_lookup[tx_bsize]; 581 const int src_stride = p->src.stride; 582 const int dst_stride = pd->dst.stride; 583 const int src_idx = 4 * (blk_row * src_stride + blk_col); 584 const int dst_idx = 4 * (blk_row * dst_stride + blk_col); 585 const uint8_t *src = &p->src.buf[src_idx]; 586 const uint8_t *dst = &pd->dst.buf[dst_idx]; 587 const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 588 unsigned int tmp; 589 590 tmp = pixel_sse(cpi, xd, pd, src, src_stride, dst, dst_stride, blk_row, 591 blk_col, plane_bsize, tx_bsize); 592 *out_sse = (int64_t)tmp * 16; 593 594 if (eob) { 595 #if CONFIG_VP9_HIGHBITDEPTH 596 DECLARE_ALIGNED(16, uint16_t, recon16[1024]); 597 uint8_t *recon = (uint8_t *)recon16; 598 #else 599 DECLARE_ALIGNED(16, uint8_t, recon[1024]); 600 #endif // CONFIG_VP9_HIGHBITDEPTH 601 602 #if CONFIG_VP9_HIGHBITDEPTH 603 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 604 vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(dst), dst_stride, recon16, 605 32, NULL, 0, 0, 0, 0, bs, bs, xd->bd); 606 if (xd->lossless) { 607 vp9_highbd_iwht4x4_add(dqcoeff, recon16, 32, eob, xd->bd); 608 } else { 609 switch (tx_size) { 610 case TX_4X4: 611 vp9_highbd_idct4x4_add(dqcoeff, recon16, 32, eob, xd->bd); 612 break; 613 case TX_8X8: 614 vp9_highbd_idct8x8_add(dqcoeff, recon16, 32, eob, xd->bd); 615 break; 616 case TX_16X16: 617 vp9_highbd_idct16x16_add(dqcoeff, recon16, 32, eob, xd->bd); 618 break; 619 default: 620 assert(tx_size == TX_32X32); 621 vp9_highbd_idct32x32_add(dqcoeff, recon16, 32, eob, xd->bd); 622 break; 623 } 624 } 625 recon = CONVERT_TO_BYTEPTR(recon16); 626 } else { 627 #endif // CONFIG_VP9_HIGHBITDEPTH 628 vpx_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, 0, 0, 0, bs, bs); 629 switch (tx_size) { 630 case TX_32X32: vp9_idct32x32_add(dqcoeff, recon, 32, eob); break; 631 case TX_16X16: vp9_idct16x16_add(dqcoeff, recon, 32, eob); break; 632 case TX_8X8: vp9_idct8x8_add(dqcoeff, recon, 32, eob); break; 633 default: 634 assert(tx_size == TX_4X4); 635 // this is like vp9_short_idct4x4 but has a special case around 636 // eob<=1, which is significant (not just an optimization) for 637 // the lossless case. 638 x->inv_txfm_add(dqcoeff, recon, 32, eob); 639 break; 640 } 641 #if CONFIG_VP9_HIGHBITDEPTH 642 } 643 #endif // CONFIG_VP9_HIGHBITDEPTH 644 645 tmp = pixel_sse(cpi, xd, pd, src, src_stride, recon, 32, blk_row, blk_col, 646 plane_bsize, tx_bsize); 647 } 648 649 *out_dist = (int64_t)tmp * 16; 650 } 651 } 652 653 static int rate_block(int plane, int block, TX_SIZE tx_size, int coeff_ctx, 654 struct rdcost_block_args *args) { 655 return cost_coeffs(args->x, plane, block, tx_size, coeff_ctx, args->so->scan, 656 args->so->neighbors, args->use_fast_coef_costing); 657 } 658 659 static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, 660 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { 661 struct rdcost_block_args *args = arg; 662 MACROBLOCK *const x = args->x; 663 MACROBLOCKD *const xd = &x->e_mbd; 664 MODE_INFO *const mi = xd->mi[0]; 665 int64_t rd1, rd2, rd; 666 int rate; 667 int64_t dist; 668 int64_t sse; 669 const int coeff_ctx = 670 combine_entropy_contexts(args->t_left[blk_row], args->t_above[blk_col]); 671 672 if (args->exit_early) return; 673 674 if (!is_inter_block(mi)) { 675 struct encode_b_args intra_arg = { x, x->block_qcoeff_opt, args->t_above, 676 args->t_left, &mi->skip }; 677 vp9_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size, 678 &intra_arg); 679 if (x->block_tx_domain) { 680 dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col, 681 tx_size, &dist, &sse); 682 } else { 683 const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size]; 684 const struct macroblock_plane *const p = &x->plane[plane]; 685 const struct macroblockd_plane *const pd = &xd->plane[plane]; 686 const int src_stride = p->src.stride; 687 const int dst_stride = pd->dst.stride; 688 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 689 const uint8_t *src = &p->src.buf[4 * (blk_row * src_stride + blk_col)]; 690 const uint8_t *dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)]; 691 const int16_t *diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; 692 unsigned int tmp; 693 sse = sum_squares_visible(xd, pd, diff, diff_stride, blk_row, blk_col, 694 plane_bsize, tx_bsize); 695 #if CONFIG_VP9_HIGHBITDEPTH 696 if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && (xd->bd > 8)) 697 sse = ROUND64_POWER_OF_TWO(sse, (xd->bd - 8) * 2); 698 #endif // CONFIG_VP9_HIGHBITDEPTH 699 sse = sse * 16; 700 tmp = pixel_sse(args->cpi, xd, pd, src, src_stride, dst, dst_stride, 701 blk_row, blk_col, plane_bsize, tx_bsize); 702 dist = (int64_t)tmp * 16; 703 } 704 } else { 705 int skip_txfm_flag = SKIP_TXFM_NONE; 706 if (max_txsize_lookup[plane_bsize] == tx_size) 707 skip_txfm_flag = x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))]; 708 if (skip_txfm_flag == SKIP_TXFM_NONE) { 709 // full forward transform and quantization 710 vp9_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size); 711 if (x->block_qcoeff_opt) 712 vp9_optimize_b(x, plane, block, tx_size, coeff_ctx); 713 dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col, 714 tx_size, &dist, &sse); 715 } else if (skip_txfm_flag == SKIP_TXFM_AC_ONLY) { 716 // compute DC coefficient 717 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block); 718 tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block); 719 vp9_xform_quant_dc(x, plane, block, blk_row, blk_col, plane_bsize, 720 tx_size); 721 sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4; 722 dist = sse; 723 if (x->plane[plane].eobs[block]) { 724 const int64_t orig_sse = (int64_t)coeff[0] * coeff[0]; 725 const int64_t resd_sse = coeff[0] - dqcoeff[0]; 726 int64_t dc_correct = orig_sse - resd_sse * resd_sse; 727 #if CONFIG_VP9_HIGHBITDEPTH 728 dc_correct >>= ((xd->bd - 8) * 2); 729 #endif 730 if (tx_size != TX_32X32) dc_correct >>= 2; 731 732 dist = VPXMAX(0, sse - dc_correct); 733 } 734 } else { 735 // SKIP_TXFM_AC_DC 736 // skip forward transform. Because this is handled here, the quantization 737 // does not need to do it. 738 x->plane[plane].eobs[block] = 0; 739 sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4; 740 dist = sse; 741 } 742 } 743 744 rd = RDCOST(x->rdmult, x->rddiv, 0, dist); 745 if (args->this_rd + rd > args->best_rd) { 746 args->exit_early = 1; 747 return; 748 } 749 750 rate = rate_block(plane, block, tx_size, coeff_ctx, args); 751 args->t_above[blk_col] = (x->plane[plane].eobs[block] > 0) ? 1 : 0; 752 args->t_left[blk_row] = (x->plane[plane].eobs[block] > 0) ? 1 : 0; 753 rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist); 754 rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse); 755 756 // TODO(jingning): temporarily enabled only for luma component 757 rd = VPXMIN(rd1, rd2); 758 if (plane == 0) { 759 x->zcoeff_blk[tx_size][block] = 760 !x->plane[plane].eobs[block] || 761 (x->sharpness == 0 && rd1 > rd2 && !xd->lossless); 762 x->sum_y_eobs[tx_size] += x->plane[plane].eobs[block]; 763 } 764 765 args->this_rate += rate; 766 args->this_dist += dist; 767 args->this_sse += sse; 768 args->this_rd += rd; 769 770 if (args->this_rd > args->best_rd) { 771 args->exit_early = 1; 772 return; 773 } 774 775 args->skippable &= !x->plane[plane].eobs[block]; 776 } 777 778 static void txfm_rd_in_plane(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, 779 int64_t *distortion, int *skippable, int64_t *sse, 780 int64_t ref_best_rd, int plane, BLOCK_SIZE bsize, 781 TX_SIZE tx_size, int use_fast_coef_costing) { 782 MACROBLOCKD *const xd = &x->e_mbd; 783 const struct macroblockd_plane *const pd = &xd->plane[plane]; 784 struct rdcost_block_args args; 785 vp9_zero(args); 786 args.cpi = cpi; 787 args.x = x; 788 args.best_rd = ref_best_rd; 789 args.use_fast_coef_costing = use_fast_coef_costing; 790 args.skippable = 1; 791 792 if (plane == 0) xd->mi[0]->tx_size = tx_size; 793 794 vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); 795 796 args.so = get_scan(xd, tx_size, get_plane_type(plane), 0); 797 798 vp9_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm, 799 &args); 800 if (args.exit_early) { 801 *rate = INT_MAX; 802 *distortion = INT64_MAX; 803 *sse = INT64_MAX; 804 *skippable = 0; 805 } else { 806 *distortion = args.this_dist; 807 *rate = args.this_rate; 808 *sse = args.this_sse; 809 *skippable = args.skippable; 810 } 811 } 812 813 static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x, int *rate, 814 int64_t *distortion, int *skip, int64_t *sse, 815 int64_t ref_best_rd, BLOCK_SIZE bs) { 816 const TX_SIZE max_tx_size = max_txsize_lookup[bs]; 817 VP9_COMMON *const cm = &cpi->common; 818 const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; 819 MACROBLOCKD *const xd = &x->e_mbd; 820 MODE_INFO *const mi = xd->mi[0]; 821 822 mi->tx_size = VPXMIN(max_tx_size, largest_tx_size); 823 824 txfm_rd_in_plane(cpi, x, rate, distortion, skip, sse, ref_best_rd, 0, bs, 825 mi->tx_size, cpi->sf.use_fast_coef_costing); 826 } 827 828 static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, 829 int64_t *distortion, int *skip, 830 int64_t *psse, int64_t ref_best_rd, 831 BLOCK_SIZE bs) { 832 const TX_SIZE max_tx_size = max_txsize_lookup[bs]; 833 VP9_COMMON *const cm = &cpi->common; 834 MACROBLOCKD *const xd = &x->e_mbd; 835 MODE_INFO *const mi = xd->mi[0]; 836 vpx_prob skip_prob = vp9_get_skip_prob(cm, xd); 837 int r[TX_SIZES][2], s[TX_SIZES]; 838 int64_t d[TX_SIZES], sse[TX_SIZES]; 839 int64_t rd[TX_SIZES][2] = { { INT64_MAX, INT64_MAX }, 840 { INT64_MAX, INT64_MAX }, 841 { INT64_MAX, INT64_MAX }, 842 { INT64_MAX, INT64_MAX } }; 843 int n; 844 int s0, s1; 845 int64_t best_rd = ref_best_rd; 846 TX_SIZE best_tx = max_tx_size; 847 int start_tx, end_tx; 848 const int tx_size_ctx = get_tx_size_context(xd); 849 assert(skip_prob > 0); 850 s0 = vp9_cost_bit(skip_prob, 0); 851 s1 = vp9_cost_bit(skip_prob, 1); 852 853 if (cm->tx_mode == TX_MODE_SELECT) { 854 start_tx = max_tx_size; 855 end_tx = VPXMAX(start_tx - cpi->sf.tx_size_search_depth, 0); 856 if (bs > BLOCK_32X32) end_tx = VPXMIN(end_tx + 1, start_tx); 857 } else { 858 TX_SIZE chosen_tx_size = 859 VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]); 860 start_tx = chosen_tx_size; 861 end_tx = chosen_tx_size; 862 } 863 864 for (n = start_tx; n >= end_tx; n--) { 865 const int r_tx_size = cpi->tx_size_cost[max_tx_size - 1][tx_size_ctx][n]; 866 txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], best_rd, 0, bs, n, 867 cpi->sf.use_fast_coef_costing); 868 r[n][1] = r[n][0]; 869 if (r[n][0] < INT_MAX) { 870 r[n][1] += r_tx_size; 871 } 872 if (d[n] == INT64_MAX || r[n][0] == INT_MAX) { 873 rd[n][0] = rd[n][1] = INT64_MAX; 874 } else if (s[n]) { 875 if (is_inter_block(mi)) { 876 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]); 877 r[n][1] -= r_tx_size; 878 } else { 879 rd[n][0] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]); 880 rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size, sse[n]); 881 } 882 } else { 883 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); 884 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); 885 } 886 887 if (is_inter_block(mi) && !xd->lossless && !s[n] && sse[n] != INT64_MAX) { 888 rd[n][0] = VPXMIN(rd[n][0], RDCOST(x->rdmult, x->rddiv, s1, sse[n])); 889 rd[n][1] = VPXMIN(rd[n][1], RDCOST(x->rdmult, x->rddiv, s1, sse[n])); 890 } 891 892 // Early termination in transform size search. 893 if (cpi->sf.tx_size_search_breakout && 894 (rd[n][1] == INT64_MAX || 895 (n < (int)max_tx_size && rd[n][1] > rd[n + 1][1]) || s[n] == 1)) 896 break; 897 898 if (rd[n][1] < best_rd) { 899 best_tx = n; 900 best_rd = rd[n][1]; 901 } 902 } 903 mi->tx_size = best_tx; 904 905 *distortion = d[mi->tx_size]; 906 *rate = r[mi->tx_size][cm->tx_mode == TX_MODE_SELECT]; 907 *skip = s[mi->tx_size]; 908 *psse = sse[mi->tx_size]; 909 } 910 911 static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, 912 int64_t *distortion, int *skip, int64_t *psse, 913 BLOCK_SIZE bs, int64_t ref_best_rd) { 914 MACROBLOCKD *xd = &x->e_mbd; 915 int64_t sse; 916 int64_t *ret_sse = psse ? psse : &sse; 917 918 assert(bs == xd->mi[0]->sb_type); 919 920 if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) { 921 choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd, 922 bs); 923 } else { 924 choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd, 925 bs); 926 } 927 } 928 929 static int conditional_skipintra(PREDICTION_MODE mode, 930 PREDICTION_MODE best_intra_mode) { 931 if (mode == D117_PRED && best_intra_mode != V_PRED && 932 best_intra_mode != D135_PRED) 933 return 1; 934 if (mode == D63_PRED && best_intra_mode != V_PRED && 935 best_intra_mode != D45_PRED) 936 return 1; 937 if (mode == D207_PRED && best_intra_mode != H_PRED && 938 best_intra_mode != D45_PRED) 939 return 1; 940 if (mode == D153_PRED && best_intra_mode != H_PRED && 941 best_intra_mode != D135_PRED) 942 return 1; 943 return 0; 944 } 945 946 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int row, 947 int col, PREDICTION_MODE *best_mode, 948 const int *bmode_costs, ENTROPY_CONTEXT *a, 949 ENTROPY_CONTEXT *l, int *bestrate, 950 int *bestratey, int64_t *bestdistortion, 951 BLOCK_SIZE bsize, int64_t rd_thresh) { 952 PREDICTION_MODE mode; 953 MACROBLOCKD *const xd = &x->e_mbd; 954 int64_t best_rd = rd_thresh; 955 struct macroblock_plane *p = &x->plane[0]; 956 struct macroblockd_plane *pd = &xd->plane[0]; 957 const int src_stride = p->src.stride; 958 const int dst_stride = pd->dst.stride; 959 const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4]; 960 uint8_t *dst_init = &pd->dst.buf[row * 4 * src_stride + col * 4]; 961 ENTROPY_CONTEXT ta[2], tempa[2]; 962 ENTROPY_CONTEXT tl[2], templ[2]; 963 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 964 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 965 int idx, idy; 966 uint8_t best_dst[8 * 8]; 967 #if CONFIG_VP9_HIGHBITDEPTH 968 uint16_t best_dst16[8 * 8]; 969 #endif 970 memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0])); 971 memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0])); 972 973 xd->mi[0]->tx_size = TX_4X4; 974 975 #if CONFIG_VP9_HIGHBITDEPTH 976 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 977 for (mode = DC_PRED; mode <= TM_PRED; ++mode) { 978 int64_t this_rd; 979 int ratey = 0; 980 int64_t distortion = 0; 981 int rate = bmode_costs[mode]; 982 983 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue; 984 985 // Only do the oblique modes if the best so far is 986 // one of the neighboring directional modes 987 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { 988 if (conditional_skipintra(mode, *best_mode)) continue; 989 } 990 991 memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0])); 992 memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0])); 993 994 for (idy = 0; idy < num_4x4_blocks_high; ++idy) { 995 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { 996 const int block = (row + idy) * 2 + (col + idx); 997 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride]; 998 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride]; 999 uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst); 1000 int16_t *const src_diff = 1001 vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff); 1002 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block); 1003 xd->mi[0]->bmi[block].as_mode = mode; 1004 vp9_predict_intra_block(xd, 1, TX_4X4, mode, 1005 x->skip_encode ? src : dst, 1006 x->skip_encode ? src_stride : dst_stride, dst, 1007 dst_stride, col + idx, row + idy, 0); 1008 vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, 1009 dst_stride, xd->bd); 1010 if (xd->lossless) { 1011 const scan_order *so = &vp9_default_scan_orders[TX_4X4]; 1012 const int coeff_ctx = 1013 combine_entropy_contexts(tempa[idx], templ[idy]); 1014 vp9_highbd_fwht4x4(src_diff, coeff, 8); 1015 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); 1016 ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan, 1017 so->neighbors, cpi->sf.use_fast_coef_costing); 1018 tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0 ? 1 : 0); 1019 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) 1020 goto next_highbd; 1021 vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst16, 1022 dst_stride, p->eobs[block], xd->bd); 1023 } else { 1024 int64_t unused; 1025 const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block); 1026 const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type]; 1027 const int coeff_ctx = 1028 combine_entropy_contexts(tempa[idx], templ[idy]); 1029 if (tx_type == DCT_DCT) 1030 vpx_highbd_fdct4x4(src_diff, coeff, 8); 1031 else 1032 vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type); 1033 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); 1034 ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan, 1035 so->neighbors, cpi->sf.use_fast_coef_costing); 1036 distortion += vp9_highbd_block_error_dispatch( 1037 coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16, 1038 &unused, xd->bd) >> 1039 2; 1040 tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0 ? 1 : 0); 1041 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) 1042 goto next_highbd; 1043 vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block), 1044 dst16, dst_stride, p->eobs[block], xd->bd); 1045 } 1046 } 1047 } 1048 1049 rate += ratey; 1050 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); 1051 1052 if (this_rd < best_rd) { 1053 *bestrate = rate; 1054 *bestratey = ratey; 1055 *bestdistortion = distortion; 1056 best_rd = this_rd; 1057 *best_mode = mode; 1058 memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0])); 1059 memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0])); 1060 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) { 1061 memcpy(best_dst16 + idy * 8, 1062 CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride), 1063 num_4x4_blocks_wide * 4 * sizeof(uint16_t)); 1064 } 1065 } 1066 next_highbd : {} 1067 } 1068 if (best_rd >= rd_thresh || x->skip_encode) return best_rd; 1069 1070 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) { 1071 memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride), 1072 best_dst16 + idy * 8, num_4x4_blocks_wide * 4 * sizeof(uint16_t)); 1073 } 1074 1075 return best_rd; 1076 } 1077 #endif // CONFIG_VP9_HIGHBITDEPTH 1078 1079 for (mode = DC_PRED; mode <= TM_PRED; ++mode) { 1080 int64_t this_rd; 1081 int ratey = 0; 1082 int64_t distortion = 0; 1083 int rate = bmode_costs[mode]; 1084 1085 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue; 1086 1087 // Only do the oblique modes if the best so far is 1088 // one of the neighboring directional modes 1089 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { 1090 if (conditional_skipintra(mode, *best_mode)) continue; 1091 } 1092 1093 memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0])); 1094 memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0])); 1095 1096 for (idy = 0; idy < num_4x4_blocks_high; ++idy) { 1097 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { 1098 const int block = (row + idy) * 2 + (col + idx); 1099 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride]; 1100 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride]; 1101 int16_t *const src_diff = 1102 vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff); 1103 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block); 1104 xd->mi[0]->bmi[block].as_mode = mode; 1105 vp9_predict_intra_block(xd, 1, TX_4X4, mode, x->skip_encode ? src : dst, 1106 x->skip_encode ? src_stride : dst_stride, dst, 1107 dst_stride, col + idx, row + idy, 0); 1108 vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride); 1109 1110 if (xd->lossless) { 1111 const scan_order *so = &vp9_default_scan_orders[TX_4X4]; 1112 const int coeff_ctx = 1113 combine_entropy_contexts(tempa[idx], templ[idy]); 1114 vp9_fwht4x4(src_diff, coeff, 8); 1115 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); 1116 ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan, 1117 so->neighbors, cpi->sf.use_fast_coef_costing); 1118 tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0) ? 1 : 0; 1119 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) 1120 goto next; 1121 vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride, 1122 p->eobs[block]); 1123 } else { 1124 int64_t unused; 1125 const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block); 1126 const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type]; 1127 const int coeff_ctx = 1128 combine_entropy_contexts(tempa[idx], templ[idy]); 1129 vp9_fht4x4(src_diff, coeff, 8, tx_type); 1130 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); 1131 ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan, 1132 so->neighbors, cpi->sf.use_fast_coef_costing); 1133 tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0) ? 1 : 0; 1134 distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block), 1135 16, &unused) >> 1136 2; 1137 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) 1138 goto next; 1139 vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block), dst, 1140 dst_stride, p->eobs[block]); 1141 } 1142 } 1143 } 1144 1145 rate += ratey; 1146 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); 1147 1148 if (this_rd < best_rd) { 1149 *bestrate = rate; 1150 *bestratey = ratey; 1151 *bestdistortion = distortion; 1152 best_rd = this_rd; 1153 *best_mode = mode; 1154 memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0])); 1155 memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0])); 1156 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) 1157 memcpy(best_dst + idy * 8, dst_init + idy * dst_stride, 1158 num_4x4_blocks_wide * 4); 1159 } 1160 next : {} 1161 } 1162 1163 if (best_rd >= rd_thresh || x->skip_encode) return best_rd; 1164 1165 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) 1166 memcpy(dst_init + idy * dst_stride, best_dst + idy * 8, 1167 num_4x4_blocks_wide * 4); 1168 1169 return best_rd; 1170 } 1171 1172 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb, 1173 int *rate, int *rate_y, 1174 int64_t *distortion, 1175 int64_t best_rd) { 1176 int i, j; 1177 const MACROBLOCKD *const xd = &mb->e_mbd; 1178 MODE_INFO *const mic = xd->mi[0]; 1179 const MODE_INFO *above_mi = xd->above_mi; 1180 const MODE_INFO *left_mi = xd->left_mi; 1181 const BLOCK_SIZE bsize = xd->mi[0]->sb_type; 1182 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 1183 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 1184 int idx, idy; 1185 int cost = 0; 1186 int64_t total_distortion = 0; 1187 int tot_rate_y = 0; 1188 int64_t total_rd = 0; 1189 const int *bmode_costs = cpi->mbmode_cost; 1190 1191 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block. 1192 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { 1193 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { 1194 PREDICTION_MODE best_mode = DC_PRED; 1195 int r = INT_MAX, ry = INT_MAX; 1196 int64_t d = INT64_MAX, this_rd = INT64_MAX; 1197 i = idy * 2 + idx; 1198 if (cpi->common.frame_type == KEY_FRAME) { 1199 const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i); 1200 const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i); 1201 1202 bmode_costs = cpi->y_mode_costs[A][L]; 1203 } 1204 1205 this_rd = rd_pick_intra4x4block( 1206 cpi, mb, idy, idx, &best_mode, bmode_costs, 1207 xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r, 1208 &ry, &d, bsize, best_rd - total_rd); 1209 1210 if (this_rd >= best_rd - total_rd) return INT64_MAX; 1211 1212 total_rd += this_rd; 1213 cost += r; 1214 total_distortion += d; 1215 tot_rate_y += ry; 1216 1217 mic->bmi[i].as_mode = best_mode; 1218 for (j = 1; j < num_4x4_blocks_high; ++j) 1219 mic->bmi[i + j * 2].as_mode = best_mode; 1220 for (j = 1; j < num_4x4_blocks_wide; ++j) 1221 mic->bmi[i + j].as_mode = best_mode; 1222 1223 if (total_rd >= best_rd) return INT64_MAX; 1224 } 1225 } 1226 1227 *rate = cost; 1228 *rate_y = tot_rate_y; 1229 *distortion = total_distortion; 1230 mic->mode = mic->bmi[3].as_mode; 1231 1232 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion); 1233 } 1234 1235 // This function is used only for intra_only frames 1236 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate, 1237 int *rate_tokenonly, int64_t *distortion, 1238 int *skippable, BLOCK_SIZE bsize, 1239 int64_t best_rd) { 1240 PREDICTION_MODE mode; 1241 PREDICTION_MODE mode_selected = DC_PRED; 1242 MACROBLOCKD *const xd = &x->e_mbd; 1243 MODE_INFO *const mic = xd->mi[0]; 1244 int this_rate, this_rate_tokenonly, s; 1245 int64_t this_distortion, this_rd; 1246 TX_SIZE best_tx = TX_4X4; 1247 int *bmode_costs; 1248 const MODE_INFO *above_mi = xd->above_mi; 1249 const MODE_INFO *left_mi = xd->left_mi; 1250 const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0); 1251 const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0); 1252 bmode_costs = cpi->y_mode_costs[A][L]; 1253 1254 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); 1255 /* Y Search for intra prediction mode */ 1256 for (mode = DC_PRED; mode <= TM_PRED; mode++) { 1257 if (cpi->sf.use_nonrd_pick_mode) { 1258 // These speed features are turned on in hybrid non-RD and RD mode 1259 // for key frame coding in the context of real-time setting. 1260 if (conditional_skipintra(mode, mode_selected)) continue; 1261 if (*skippable) break; 1262 } 1263 1264 mic->mode = mode; 1265 1266 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL, 1267 bsize, best_rd); 1268 1269 if (this_rate_tokenonly == INT_MAX) continue; 1270 1271 this_rate = this_rate_tokenonly + bmode_costs[mode]; 1272 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); 1273 1274 if (this_rd < best_rd) { 1275 mode_selected = mode; 1276 best_rd = this_rd; 1277 best_tx = mic->tx_size; 1278 *rate = this_rate; 1279 *rate_tokenonly = this_rate_tokenonly; 1280 *distortion = this_distortion; 1281 *skippable = s; 1282 } 1283 } 1284 1285 mic->mode = mode_selected; 1286 mic->tx_size = best_tx; 1287 1288 return best_rd; 1289 } 1290 1291 // Return value 0: early termination triggered, no valid rd cost available; 1292 // 1: rd cost values are valid. 1293 static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, 1294 int64_t *distortion, int *skippable, int64_t *sse, 1295 BLOCK_SIZE bsize, int64_t ref_best_rd) { 1296 MACROBLOCKD *const xd = &x->e_mbd; 1297 MODE_INFO *const mi = xd->mi[0]; 1298 const TX_SIZE uv_tx_size = get_uv_tx_size(mi, &xd->plane[1]); 1299 int plane; 1300 int pnrate = 0, pnskip = 1; 1301 int64_t pndist = 0, pnsse = 0; 1302 int is_cost_valid = 1; 1303 1304 if (ref_best_rd < 0) is_cost_valid = 0; 1305 1306 if (is_inter_block(mi) && is_cost_valid) { 1307 int plane; 1308 for (plane = 1; plane < MAX_MB_PLANE; ++plane) 1309 vp9_subtract_plane(x, bsize, plane); 1310 } 1311 1312 *rate = 0; 1313 *distortion = 0; 1314 *sse = 0; 1315 *skippable = 1; 1316 1317 for (plane = 1; plane < MAX_MB_PLANE; ++plane) { 1318 txfm_rd_in_plane(cpi, x, &pnrate, &pndist, &pnskip, &pnsse, ref_best_rd, 1319 plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing); 1320 if (pnrate == INT_MAX) { 1321 is_cost_valid = 0; 1322 break; 1323 } 1324 *rate += pnrate; 1325 *distortion += pndist; 1326 *sse += pnsse; 1327 *skippable &= pnskip; 1328 } 1329 1330 if (!is_cost_valid) { 1331 // reset cost value 1332 *rate = INT_MAX; 1333 *distortion = INT64_MAX; 1334 *sse = INT64_MAX; 1335 *skippable = 0; 1336 } 1337 1338 return is_cost_valid; 1339 } 1340 1341 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, 1342 PICK_MODE_CONTEXT *ctx, int *rate, 1343 int *rate_tokenonly, int64_t *distortion, 1344 int *skippable, BLOCK_SIZE bsize, 1345 TX_SIZE max_tx_size) { 1346 MACROBLOCKD *xd = &x->e_mbd; 1347 PREDICTION_MODE mode; 1348 PREDICTION_MODE mode_selected = DC_PRED; 1349 int64_t best_rd = INT64_MAX, this_rd; 1350 int this_rate_tokenonly, this_rate, s; 1351 int64_t this_distortion, this_sse; 1352 1353 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); 1354 for (mode = DC_PRED; mode <= TM_PRED; ++mode) { 1355 if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) continue; 1356 #if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH 1357 if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && 1358 (xd->above_mi == NULL || xd->left_mi == NULL) && need_top_left[mode]) 1359 continue; 1360 #endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH 1361 1362 xd->mi[0]->uv_mode = mode; 1363 1364 if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, 1365 &this_sse, bsize, best_rd)) 1366 continue; 1367 this_rate = 1368 this_rate_tokenonly + 1369 cpi->intra_uv_mode_cost[cpi->common.frame_type][xd->mi[0]->mode][mode]; 1370 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); 1371 1372 if (this_rd < best_rd) { 1373 mode_selected = mode; 1374 best_rd = this_rd; 1375 *rate = this_rate; 1376 *rate_tokenonly = this_rate_tokenonly; 1377 *distortion = this_distortion; 1378 *skippable = s; 1379 if (!x->select_tx_size) swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE); 1380 } 1381 } 1382 1383 xd->mi[0]->uv_mode = mode_selected; 1384 return best_rd; 1385 } 1386 1387 static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, 1388 int *rate_tokenonly, int64_t *distortion, 1389 int *skippable, BLOCK_SIZE bsize) { 1390 const VP9_COMMON *cm = &cpi->common; 1391 int64_t unused; 1392 1393 x->e_mbd.mi[0]->uv_mode = DC_PRED; 1394 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); 1395 super_block_uvrd(cpi, x, rate_tokenonly, distortion, skippable, &unused, 1396 bsize, INT64_MAX); 1397 *rate = 1398 *rate_tokenonly + 1399 cpi->intra_uv_mode_cost[cm->frame_type][x->e_mbd.mi[0]->mode][DC_PRED]; 1400 return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); 1401 } 1402 1403 static void choose_intra_uv_mode(VP9_COMP *cpi, MACROBLOCK *const x, 1404 PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize, 1405 TX_SIZE max_tx_size, int *rate_uv, 1406 int *rate_uv_tokenonly, int64_t *dist_uv, 1407 int *skip_uv, PREDICTION_MODE *mode_uv) { 1408 // Use an estimated rd for uv_intra based on DC_PRED if the 1409 // appropriate speed flag is set. 1410 if (cpi->sf.use_uv_intra_rd_estimate) { 1411 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, 1412 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize); 1413 // Else do a proper rd search for each possible transform size that may 1414 // be considered in the main rd loop. 1415 } else { 1416 rd_pick_intra_sbuv_mode(cpi, x, ctx, rate_uv, rate_uv_tokenonly, dist_uv, 1417 skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, 1418 max_tx_size); 1419 } 1420 *mode_uv = x->e_mbd.mi[0]->uv_mode; 1421 } 1422 1423 static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode, 1424 int mode_context) { 1425 assert(is_inter_mode(mode)); 1426 return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)]; 1427 } 1428 1429 static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, 1430 int i, PREDICTION_MODE mode, int_mv this_mv[2], 1431 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], 1432 int_mv seg_mvs[MAX_REF_FRAMES], 1433 int_mv *best_ref_mv[2], const int *mvjcost, 1434 int *mvcost[2]) { 1435 MODE_INFO *const mi = xd->mi[0]; 1436 const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; 1437 int thismvcost = 0; 1438 int idx, idy; 1439 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mi->sb_type]; 1440 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mi->sb_type]; 1441 const int is_compound = has_second_ref(mi); 1442 1443 switch (mode) { 1444 case NEWMV: 1445 this_mv[0].as_int = seg_mvs[mi->ref_frame[0]].as_int; 1446 thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv, 1447 mvjcost, mvcost, MV_COST_WEIGHT_SUB); 1448 if (is_compound) { 1449 this_mv[1].as_int = seg_mvs[mi->ref_frame[1]].as_int; 1450 thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv, 1451 mvjcost, mvcost, MV_COST_WEIGHT_SUB); 1452 } 1453 break; 1454 case NEARMV: 1455 case NEARESTMV: 1456 this_mv[0].as_int = frame_mv[mode][mi->ref_frame[0]].as_int; 1457 if (is_compound) 1458 this_mv[1].as_int = frame_mv[mode][mi->ref_frame[1]].as_int; 1459 break; 1460 default: 1461 assert(mode == ZEROMV); 1462 this_mv[0].as_int = 0; 1463 if (is_compound) this_mv[1].as_int = 0; 1464 break; 1465 } 1466 1467 mi->bmi[i].as_mv[0].as_int = this_mv[0].as_int; 1468 if (is_compound) mi->bmi[i].as_mv[1].as_int = this_mv[1].as_int; 1469 1470 mi->bmi[i].as_mode = mode; 1471 1472 for (idy = 0; idy < num_4x4_blocks_high; ++idy) 1473 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) 1474 memmove(&mi->bmi[i + idy * 2 + idx], &mi->bmi[i], sizeof(mi->bmi[i])); 1475 1476 return cost_mv_ref(cpi, mode, mbmi_ext->mode_context[mi->ref_frame[0]]) + 1477 thismvcost; 1478 } 1479 1480 static int64_t encode_inter_mb_segment(VP9_COMP *cpi, MACROBLOCK *x, 1481 int64_t best_yrd, int i, int *labelyrate, 1482 int64_t *distortion, int64_t *sse, 1483 ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl, 1484 int mi_row, int mi_col) { 1485 int k; 1486 MACROBLOCKD *xd = &x->e_mbd; 1487 struct macroblockd_plane *const pd = &xd->plane[0]; 1488 struct macroblock_plane *const p = &x->plane[0]; 1489 MODE_INFO *const mi = xd->mi[0]; 1490 const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->sb_type, pd); 1491 const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 1492 const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize]; 1493 int idx, idy; 1494 1495 const uint8_t *const src = 1496 &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)]; 1497 uint8_t *const dst = 1498 &pd->dst.buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->dst.stride)]; 1499 int64_t thisdistortion = 0, thissse = 0; 1500 int thisrate = 0, ref; 1501 const scan_order *so = &vp9_default_scan_orders[TX_4X4]; 1502 const int is_compound = has_second_ref(mi); 1503 const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; 1504 1505 for (ref = 0; ref < 1 + is_compound; ++ref) { 1506 const int bw = b_width_log2_lookup[BLOCK_8X8]; 1507 const int h = 4 * (i >> bw); 1508 const int w = 4 * (i & ((1 << bw) - 1)); 1509 const struct scale_factors *sf = &xd->block_refs[ref]->sf; 1510 int y_stride = pd->pre[ref].stride; 1511 uint8_t *pre = pd->pre[ref].buf + (h * pd->pre[ref].stride + w); 1512 1513 if (vp9_is_scaled(sf)) { 1514 const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); 1515 const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); 1516 1517 y_stride = xd->block_refs[ref]->buf->y_stride; 1518 pre = xd->block_refs[ref]->buf->y_buffer; 1519 pre += scaled_buffer_offset(x_start + w, y_start + h, y_stride, sf); 1520 } 1521 #if CONFIG_VP9_HIGHBITDEPTH 1522 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 1523 vp9_highbd_build_inter_predictor( 1524 CONVERT_TO_SHORTPTR(pre), y_stride, CONVERT_TO_SHORTPTR(dst), 1525 pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, 1526 &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3, 1527 mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2), 1528 xd->bd); 1529 } else { 1530 vp9_build_inter_predictor( 1531 pre, y_stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, 1532 &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3, 1533 mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2)); 1534 } 1535 #else 1536 vp9_build_inter_predictor( 1537 pre, y_stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, 1538 &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3, 1539 mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2)); 1540 #endif // CONFIG_VP9_HIGHBITDEPTH 1541 } 1542 1543 #if CONFIG_VP9_HIGHBITDEPTH 1544 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 1545 vpx_highbd_subtract_block( 1546 height, width, vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 1547 8, src, p->src.stride, dst, pd->dst.stride, xd->bd); 1548 } else { 1549 vpx_subtract_block(height, width, 1550 vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 1551 8, src, p->src.stride, dst, pd->dst.stride); 1552 } 1553 #else 1554 vpx_subtract_block(height, width, 1555 vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 1556 8, src, p->src.stride, dst, pd->dst.stride); 1557 #endif // CONFIG_VP9_HIGHBITDEPTH 1558 1559 k = i; 1560 for (idy = 0; idy < height / 4; ++idy) { 1561 for (idx = 0; idx < width / 4; ++idx) { 1562 #if CONFIG_VP9_HIGHBITDEPTH 1563 const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8; 1564 #endif 1565 int64_t ssz, rd, rd1, rd2; 1566 tran_low_t *coeff; 1567 int coeff_ctx; 1568 k += (idy * 2 + idx); 1569 coeff_ctx = combine_entropy_contexts(ta[k & 1], tl[k >> 1]); 1570 coeff = BLOCK_OFFSET(p->coeff, k); 1571 x->fwd_txfm4x4(vp9_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff), 1572 coeff, 8); 1573 vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan); 1574 #if CONFIG_VP9_HIGHBITDEPTH 1575 thisdistortion += vp9_highbd_block_error_dispatch( 1576 coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz, bd); 1577 #else 1578 thisdistortion += 1579 vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz); 1580 #endif // CONFIG_VP9_HIGHBITDEPTH 1581 thissse += ssz; 1582 thisrate += cost_coeffs(x, 0, k, TX_4X4, coeff_ctx, so->scan, 1583 so->neighbors, cpi->sf.use_fast_coef_costing); 1584 ta[k & 1] = tl[k >> 1] = (x->plane[0].eobs[k] > 0) ? 1 : 0; 1585 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2); 1586 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2); 1587 rd = VPXMIN(rd1, rd2); 1588 if (rd >= best_yrd) return INT64_MAX; 1589 } 1590 } 1591 1592 *distortion = thisdistortion >> 2; 1593 *labelyrate = thisrate; 1594 *sse = thissse >> 2; 1595 1596 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); 1597 } 1598 1599 typedef struct { 1600 int eobs; 1601 int brate; 1602 int byrate; 1603 int64_t bdist; 1604 int64_t bsse; 1605 int64_t brdcost; 1606 int_mv mvs[2]; 1607 ENTROPY_CONTEXT ta[2]; 1608 ENTROPY_CONTEXT tl[2]; 1609 } SEG_RDSTAT; 1610 1611 typedef struct { 1612 int_mv *ref_mv[2]; 1613 int_mv mvp; 1614 1615 int64_t segment_rd; 1616 int r; 1617 int64_t d; 1618 int64_t sse; 1619 int segment_yrate; 1620 PREDICTION_MODE modes[4]; 1621 SEG_RDSTAT rdstat[4][INTER_MODES]; 1622 int mvthresh; 1623 } BEST_SEG_INFO; 1624 1625 static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) { 1626 return (mv->row >> 3) < mv_limits->row_min || 1627 (mv->row >> 3) > mv_limits->row_max || 1628 (mv->col >> 3) < mv_limits->col_min || 1629 (mv->col >> 3) > mv_limits->col_max; 1630 } 1631 1632 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { 1633 MODE_INFO *const mi = x->e_mbd.mi[0]; 1634 struct macroblock_plane *const p = &x->plane[0]; 1635 struct macroblockd_plane *const pd = &x->e_mbd.plane[0]; 1636 1637 p->src.buf = 1638 &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)]; 1639 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0); 1640 pd->pre[0].buf = 1641 &pd->pre[0].buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->pre[0].stride)]; 1642 if (has_second_ref(mi)) 1643 pd->pre[1].buf = 1644 &pd->pre[1] 1645 .buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->pre[1].stride)]; 1646 } 1647 1648 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src, 1649 struct buf_2d orig_pre[2]) { 1650 MODE_INFO *mi = x->e_mbd.mi[0]; 1651 x->plane[0].src = orig_src; 1652 x->e_mbd.plane[0].pre[0] = orig_pre[0]; 1653 if (has_second_ref(mi)) x->e_mbd.plane[0].pre[1] = orig_pre[1]; 1654 } 1655 1656 static INLINE int mv_has_subpel(const MV *mv) { 1657 return (mv->row & 0x0F) || (mv->col & 0x0F); 1658 } 1659 1660 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion. 1661 // TODO(aconverse): Find out if this is still productive then clean up or remove 1662 static int check_best_zero_mv(const VP9_COMP *cpi, 1663 const uint8_t mode_context[MAX_REF_FRAMES], 1664 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], 1665 int this_mode, 1666 const MV_REFERENCE_FRAME ref_frames[2]) { 1667 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) && 1668 frame_mv[this_mode][ref_frames[0]].as_int == 0 && 1669 (ref_frames[1] == NONE || 1670 frame_mv[this_mode][ref_frames[1]].as_int == 0)) { 1671 int rfc = mode_context[ref_frames[0]]; 1672 int c1 = cost_mv_ref(cpi, NEARMV, rfc); 1673 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); 1674 int c3 = cost_mv_ref(cpi, ZEROMV, rfc); 1675 1676 if (this_mode == NEARMV) { 1677 if (c1 > c3) return 0; 1678 } else if (this_mode == NEARESTMV) { 1679 if (c2 > c3) return 0; 1680 } else { 1681 assert(this_mode == ZEROMV); 1682 if (ref_frames[1] == NONE) { 1683 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) || 1684 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0)) 1685 return 0; 1686 } else { 1687 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 && 1688 frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) || 1689 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 && 1690 frame_mv[NEARMV][ref_frames[1]].as_int == 0)) 1691 return 0; 1692 } 1693 } 1694 } 1695 return 1; 1696 } 1697 1698 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, 1699 int_mv *frame_mv, int mi_row, int mi_col, 1700 int_mv single_newmv[MAX_REF_FRAMES], 1701 int *rate_mv) { 1702 const VP9_COMMON *const cm = &cpi->common; 1703 const int pw = 4 * num_4x4_blocks_wide_lookup[bsize]; 1704 const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; 1705 MACROBLOCKD *xd = &x->e_mbd; 1706 MODE_INFO *mi = xd->mi[0]; 1707 const int refs[2] = { mi->ref_frame[0], 1708 mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1] }; 1709 int_mv ref_mv[2]; 1710 int ite, ref; 1711 const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; 1712 struct scale_factors sf; 1713 1714 // Do joint motion search in compound mode to get more accurate mv. 1715 struct buf_2d backup_yv12[2][MAX_MB_PLANE]; 1716 uint32_t last_besterr[2] = { UINT_MAX, UINT_MAX }; 1717 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = { 1718 vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]), 1719 vp9_get_scaled_ref_frame(cpi, mi->ref_frame[1]) 1720 }; 1721 1722 // Prediction buffer from second frame. 1723 #if CONFIG_VP9_HIGHBITDEPTH 1724 DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[64 * 64]); 1725 uint8_t *second_pred; 1726 #else 1727 DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]); 1728 #endif // CONFIG_VP9_HIGHBITDEPTH 1729 1730 for (ref = 0; ref < 2; ++ref) { 1731 ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0]; 1732 1733 if (scaled_ref_frame[ref]) { 1734 int i; 1735 // Swap out the reference frame for a version that's been scaled to 1736 // match the resolution of the current frame, allowing the existing 1737 // motion search code to be used without additional modifications. 1738 for (i = 0; i < MAX_MB_PLANE; i++) 1739 backup_yv12[ref][i] = xd->plane[i].pre[ref]; 1740 vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, 1741 NULL); 1742 } 1743 1744 frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int; 1745 } 1746 1747 // Since we have scaled the reference frames to match the size of the current 1748 // frame we must use a unit scaling factor during mode selection. 1749 #if CONFIG_VP9_HIGHBITDEPTH 1750 vp9_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width, 1751 cm->height, cm->use_highbitdepth); 1752 #else 1753 vp9_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width, 1754 cm->height); 1755 #endif // CONFIG_VP9_HIGHBITDEPTH 1756 1757 // Allow joint search multiple times iteratively for each reference frame 1758 // and break out of the search loop if it couldn't find a better mv. 1759 for (ite = 0; ite < 4; ite++) { 1760 struct buf_2d ref_yv12[2]; 1761 uint32_t bestsme = UINT_MAX; 1762 int sadpb = x->sadperbit16; 1763 MV tmp_mv; 1764 int search_range = 3; 1765 1766 const MvLimits tmp_mv_limits = x->mv_limits; 1767 int id = ite % 2; // Even iterations search in the first reference frame, 1768 // odd iterations search in the second. The predictor 1769 // found for the 'other' reference frame is factored in. 1770 1771 // Initialized here because of compiler problem in Visual Studio. 1772 ref_yv12[0] = xd->plane[0].pre[0]; 1773 ref_yv12[1] = xd->plane[0].pre[1]; 1774 1775 // Get the prediction block from the 'other' reference frame. 1776 #if CONFIG_VP9_HIGHBITDEPTH 1777 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 1778 second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16); 1779 vp9_highbd_build_inter_predictor( 1780 CONVERT_TO_SHORTPTR(ref_yv12[!id].buf), ref_yv12[!id].stride, 1781 second_pred_alloc_16, pw, &frame_mv[refs[!id]].as_mv, &sf, pw, ph, 0, 1782 kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd->bd); 1783 } else { 1784 second_pred = (uint8_t *)second_pred_alloc_16; 1785 vp9_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride, 1786 second_pred, pw, &frame_mv[refs[!id]].as_mv, 1787 &sf, pw, ph, 0, kernel, MV_PRECISION_Q3, 1788 mi_col * MI_SIZE, mi_row * MI_SIZE); 1789 } 1790 #else 1791 vp9_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride, 1792 second_pred, pw, &frame_mv[refs[!id]].as_mv, &sf, 1793 pw, ph, 0, kernel, MV_PRECISION_Q3, 1794 mi_col * MI_SIZE, mi_row * MI_SIZE); 1795 #endif // CONFIG_VP9_HIGHBITDEPTH 1796 1797 // Do compound motion search on the current reference frame. 1798 if (id) xd->plane[0].pre[0] = ref_yv12[id]; 1799 vp9_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv); 1800 1801 // Use the mv result from the single mode as mv predictor. 1802 tmp_mv = frame_mv[refs[id]].as_mv; 1803 1804 tmp_mv.col >>= 3; 1805 tmp_mv.row >>= 3; 1806 1807 // Small-range full-pixel motion search. 1808 bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, search_range, 1809 &cpi->fn_ptr[bsize], &ref_mv[id].as_mv, 1810 second_pred); 1811 if (bestsme < UINT_MAX) 1812 bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv, 1813 second_pred, &cpi->fn_ptr[bsize], 1); 1814 1815 x->mv_limits = tmp_mv_limits; 1816 1817 if (bestsme < UINT_MAX) { 1818 uint32_t dis; /* TODO: use dis in distortion calculation later. */ 1819 uint32_t sse; 1820 bestsme = cpi->find_fractional_mv_step( 1821 x, &tmp_mv, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv, 1822 x->errorperbit, &cpi->fn_ptr[bsize], 0, 1823 cpi->sf.mv.subpel_search_level, NULL, x->nmvjointcost, x->mvcost, 1824 &dis, &sse, second_pred, pw, ph, cpi->sf.use_accurate_subpel_search); 1825 } 1826 1827 // Restore the pointer to the first (possibly scaled) prediction buffer. 1828 if (id) xd->plane[0].pre[0] = ref_yv12[0]; 1829 1830 if (bestsme < last_besterr[id]) { 1831 frame_mv[refs[id]].as_mv = tmp_mv; 1832 last_besterr[id] = bestsme; 1833 } else { 1834 break; 1835 } 1836 } 1837 1838 *rate_mv = 0; 1839 1840 for (ref = 0; ref < 2; ++ref) { 1841 if (scaled_ref_frame[ref]) { 1842 // Restore the prediction frame pointers to their unscaled versions. 1843 int i; 1844 for (i = 0; i < MAX_MB_PLANE; i++) 1845 xd->plane[i].pre[ref] = backup_yv12[ref][i]; 1846 } 1847 1848 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv, 1849 &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv, 1850 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); 1851 } 1852 } 1853 1854 static int64_t rd_pick_best_sub8x8_mode( 1855 VP9_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv, 1856 int_mv *second_best_ref_mv, int64_t best_rd, int *returntotrate, 1857 int *returnyrate, int64_t *returndistortion, int *skippable, int64_t *psse, 1858 int mvthresh, int_mv seg_mvs[4][MAX_REF_FRAMES], BEST_SEG_INFO *bsi_buf, 1859 int filter_idx, int mi_row, int mi_col) { 1860 int i; 1861 BEST_SEG_INFO *bsi = bsi_buf + filter_idx; 1862 MACROBLOCKD *xd = &x->e_mbd; 1863 MODE_INFO *mi = xd->mi[0]; 1864 int mode_idx; 1865 int k, br = 0, idx, idy; 1866 int64_t bd = 0, block_sse = 0; 1867 PREDICTION_MODE this_mode; 1868 VP9_COMMON *cm = &cpi->common; 1869 struct macroblock_plane *const p = &x->plane[0]; 1870 struct macroblockd_plane *const pd = &xd->plane[0]; 1871 const int label_count = 4; 1872 int64_t this_segment_rd = 0; 1873 int label_mv_thresh; 1874 int segmentyrate = 0; 1875 const BLOCK_SIZE bsize = mi->sb_type; 1876 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 1877 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 1878 const int pw = num_4x4_blocks_wide << 2; 1879 const int ph = num_4x4_blocks_high << 2; 1880 ENTROPY_CONTEXT t_above[2], t_left[2]; 1881 int subpelmv = 1, have_ref = 0; 1882 SPEED_FEATURES *const sf = &cpi->sf; 1883 const int has_second_rf = has_second_ref(mi); 1884 const int inter_mode_mask = sf->inter_mode_mask[bsize]; 1885 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; 1886 1887 vp9_zero(*bsi); 1888 1889 bsi->segment_rd = best_rd; 1890 bsi->ref_mv[0] = best_ref_mv; 1891 bsi->ref_mv[1] = second_best_ref_mv; 1892 bsi->mvp.as_int = best_ref_mv->as_int; 1893 bsi->mvthresh = mvthresh; 1894 1895 for (i = 0; i < 4; i++) bsi->modes[i] = ZEROMV; 1896 1897 memcpy(t_above, pd->above_context, sizeof(t_above)); 1898 memcpy(t_left, pd->left_context, sizeof(t_left)); 1899 1900 // 64 makes this threshold really big effectively 1901 // making it so that we very rarely check mvs on 1902 // segments. setting this to 1 would make mv thresh 1903 // roughly equal to what it is for macroblocks 1904 label_mv_thresh = 1 * bsi->mvthresh / label_count; 1905 1906 // Segmentation method overheads 1907 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { 1908 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { 1909 // TODO(jingning,rbultje): rewrite the rate-distortion optimization 1910 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop 1911 int_mv mode_mv[MB_MODE_COUNT][2]; 1912 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; 1913 PREDICTION_MODE mode_selected = ZEROMV; 1914 int64_t best_rd = INT64_MAX; 1915 const int i = idy * 2 + idx; 1916 int ref; 1917 1918 for (ref = 0; ref < 1 + has_second_rf; ++ref) { 1919 const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; 1920 frame_mv[ZEROMV][frame].as_int = 0; 1921 vp9_append_sub8x8_mvs_for_idx( 1922 cm, xd, i, ref, mi_row, mi_col, &frame_mv[NEARESTMV][frame], 1923 &frame_mv[NEARMV][frame], mbmi_ext->mode_context); 1924 } 1925 1926 // search for the best motion vector on this segment 1927 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { 1928 const struct buf_2d orig_src = x->plane[0].src; 1929 struct buf_2d orig_pre[2]; 1930 1931 mode_idx = INTER_OFFSET(this_mode); 1932 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX; 1933 if (!(inter_mode_mask & (1 << this_mode))) continue; 1934 1935 if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv, 1936 this_mode, mi->ref_frame)) 1937 continue; 1938 1939 memcpy(orig_pre, pd->pre, sizeof(orig_pre)); 1940 memcpy(bsi->rdstat[i][mode_idx].ta, t_above, 1941 sizeof(bsi->rdstat[i][mode_idx].ta)); 1942 memcpy(bsi->rdstat[i][mode_idx].tl, t_left, 1943 sizeof(bsi->rdstat[i][mode_idx].tl)); 1944 1945 // motion search for newmv (single predictor case only) 1946 if (!has_second_rf && this_mode == NEWMV && 1947 seg_mvs[i][mi->ref_frame[0]].as_int == INVALID_MV) { 1948 MV *const new_mv = &mode_mv[NEWMV][0].as_mv; 1949 int step_param = 0; 1950 uint32_t bestsme = UINT_MAX; 1951 int sadpb = x->sadperbit4; 1952 MV mvp_full; 1953 int max_mv; 1954 int cost_list[5]; 1955 const MvLimits tmp_mv_limits = x->mv_limits; 1956 1957 /* Is the best so far sufficiently good that we cant justify doing 1958 * and new motion search. */ 1959 if (best_rd < label_mv_thresh) break; 1960 1961 if (cpi->oxcf.mode != BEST) { 1962 // use previous block's result as next block's MV predictor. 1963 if (i > 0) { 1964 bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int; 1965 if (i == 2) bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int; 1966 } 1967 } 1968 if (i == 0) 1969 max_mv = x->max_mv_context[mi->ref_frame[0]]; 1970 else 1971 max_mv = 1972 VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3; 1973 1974 if (sf->mv.auto_mv_step_size && cm->show_frame) { 1975 // Take wtd average of the step_params based on the last frame's 1976 // max mv magnitude and the best ref mvs of the current block for 1977 // the given reference. 1978 step_param = 1979 (vp9_init_search_range(max_mv) + cpi->mv_step_param) / 2; 1980 } else { 1981 step_param = cpi->mv_step_param; 1982 } 1983 1984 mvp_full.row = bsi->mvp.as_mv.row >> 3; 1985 mvp_full.col = bsi->mvp.as_mv.col >> 3; 1986 1987 if (sf->adaptive_motion_search) { 1988 if (x->pred_mv[mi->ref_frame[0]].row != INT16_MAX && 1989 x->pred_mv[mi->ref_frame[0]].col != INT16_MAX) { 1990 mvp_full.row = x->pred_mv[mi->ref_frame[0]].row >> 3; 1991 mvp_full.col = x->pred_mv[mi->ref_frame[0]].col >> 3; 1992 } 1993 step_param = VPXMAX(step_param, 8); 1994 } 1995 1996 // adjust src pointer for this block 1997 mi_buf_shift(x, i); 1998 1999 vp9_set_mv_search_range(&x->mv_limits, &bsi->ref_mv[0]->as_mv); 2000 2001 bestsme = vp9_full_pixel_search( 2002 cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, 2003 sadpb, 2004 sf->mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL, 2005 &bsi->ref_mv[0]->as_mv, new_mv, INT_MAX, 1); 2006 2007 x->mv_limits = tmp_mv_limits; 2008 2009 if (bestsme < UINT_MAX) { 2010 uint32_t distortion; 2011 cpi->find_fractional_mv_step( 2012 x, new_mv, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv, 2013 x->errorperbit, &cpi->fn_ptr[bsize], sf->mv.subpel_force_stop, 2014 sf->mv.subpel_search_level, cond_cost_list(cpi, cost_list), 2015 x->nmvjointcost, x->mvcost, &distortion, 2016 &x->pred_sse[mi->ref_frame[0]], NULL, pw, ph, 2017 cpi->sf.use_accurate_subpel_search); 2018 2019 // save motion search result for use in compound prediction 2020 seg_mvs[i][mi->ref_frame[0]].as_mv = *new_mv; 2021 } 2022 2023 x->pred_mv[mi->ref_frame[0]] = *new_mv; 2024 2025 // restore src pointers 2026 mi_buf_restore(x, orig_src, orig_pre); 2027 } 2028 2029 if (has_second_rf) { 2030 if (seg_mvs[i][mi->ref_frame[1]].as_int == INVALID_MV || 2031 seg_mvs[i][mi->ref_frame[0]].as_int == INVALID_MV) 2032 continue; 2033 } 2034 2035 if (has_second_rf && this_mode == NEWMV && 2036 mi->interp_filter == EIGHTTAP) { 2037 // adjust src pointers 2038 mi_buf_shift(x, i); 2039 if (sf->comp_inter_joint_search_thresh <= bsize) { 2040 int rate_mv; 2041 joint_motion_search(cpi, x, bsize, frame_mv[this_mode], mi_row, 2042 mi_col, seg_mvs[i], &rate_mv); 2043 seg_mvs[i][mi->ref_frame[0]].as_int = 2044 frame_mv[this_mode][mi->ref_frame[0]].as_int; 2045 seg_mvs[i][mi->ref_frame[1]].as_int = 2046 frame_mv[this_mode][mi->ref_frame[1]].as_int; 2047 } 2048 // restore src pointers 2049 mi_buf_restore(x, orig_src, orig_pre); 2050 } 2051 2052 bsi->rdstat[i][mode_idx].brate = set_and_cost_bmi_mvs( 2053 cpi, x, xd, i, this_mode, mode_mv[this_mode], frame_mv, seg_mvs[i], 2054 bsi->ref_mv, x->nmvjointcost, x->mvcost); 2055 2056 for (ref = 0; ref < 1 + has_second_rf; ++ref) { 2057 bsi->rdstat[i][mode_idx].mvs[ref].as_int = 2058 mode_mv[this_mode][ref].as_int; 2059 if (num_4x4_blocks_wide > 1) 2060 bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int = 2061 mode_mv[this_mode][ref].as_int; 2062 if (num_4x4_blocks_high > 1) 2063 bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int = 2064 mode_mv[this_mode][ref].as_int; 2065 } 2066 2067 // Trap vectors that reach beyond the UMV borders 2068 if (mv_check_bounds(&x->mv_limits, &mode_mv[this_mode][0].as_mv) || 2069 (has_second_rf && 2070 mv_check_bounds(&x->mv_limits, &mode_mv[this_mode][1].as_mv))) 2071 continue; 2072 2073 if (filter_idx > 0) { 2074 BEST_SEG_INFO *ref_bsi = bsi_buf; 2075 subpelmv = 0; 2076 have_ref = 1; 2077 2078 for (ref = 0; ref < 1 + has_second_rf; ++ref) { 2079 subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv); 2080 have_ref &= mode_mv[this_mode][ref].as_int == 2081 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int; 2082 } 2083 2084 if (filter_idx > 1 && !subpelmv && !have_ref) { 2085 ref_bsi = bsi_buf + 1; 2086 have_ref = 1; 2087 for (ref = 0; ref < 1 + has_second_rf; ++ref) 2088 have_ref &= mode_mv[this_mode][ref].as_int == 2089 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int; 2090 } 2091 2092 if (!subpelmv && have_ref && 2093 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) { 2094 memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx], 2095 sizeof(SEG_RDSTAT)); 2096 if (num_4x4_blocks_wide > 1) 2097 bsi->rdstat[i + 1][mode_idx].eobs = 2098 ref_bsi->rdstat[i + 1][mode_idx].eobs; 2099 if (num_4x4_blocks_high > 1) 2100 bsi->rdstat[i + 2][mode_idx].eobs = 2101 ref_bsi->rdstat[i + 2][mode_idx].eobs; 2102 2103 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) { 2104 mode_selected = this_mode; 2105 best_rd = bsi->rdstat[i][mode_idx].brdcost; 2106 } 2107 continue; 2108 } 2109 } 2110 2111 bsi->rdstat[i][mode_idx].brdcost = encode_inter_mb_segment( 2112 cpi, x, bsi->segment_rd - this_segment_rd, i, 2113 &bsi->rdstat[i][mode_idx].byrate, &bsi->rdstat[i][mode_idx].bdist, 2114 &bsi->rdstat[i][mode_idx].bsse, bsi->rdstat[i][mode_idx].ta, 2115 bsi->rdstat[i][mode_idx].tl, mi_row, mi_col); 2116 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) { 2117 bsi->rdstat[i][mode_idx].brdcost += 2118 RDCOST(x->rdmult, x->rddiv, bsi->rdstat[i][mode_idx].brate, 0); 2119 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate; 2120 bsi->rdstat[i][mode_idx].eobs = p->eobs[i]; 2121 if (num_4x4_blocks_wide > 1) 2122 bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1]; 2123 if (num_4x4_blocks_high > 1) 2124 bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2]; 2125 } 2126 2127 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) { 2128 mode_selected = this_mode; 2129 best_rd = bsi->rdstat[i][mode_idx].brdcost; 2130 } 2131 } /*for each 4x4 mode*/ 2132 2133 if (best_rd == INT64_MAX) { 2134 int iy, midx; 2135 for (iy = i + 1; iy < 4; ++iy) 2136 for (midx = 0; midx < INTER_MODES; ++midx) 2137 bsi->rdstat[iy][midx].brdcost = INT64_MAX; 2138 bsi->segment_rd = INT64_MAX; 2139 return INT64_MAX; 2140 } 2141 2142 mode_idx = INTER_OFFSET(mode_selected); 2143 memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above)); 2144 memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left)); 2145 2146 set_and_cost_bmi_mvs(cpi, x, xd, i, mode_selected, mode_mv[mode_selected], 2147 frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost, 2148 x->mvcost); 2149 2150 br += bsi->rdstat[i][mode_idx].brate; 2151 bd += bsi->rdstat[i][mode_idx].bdist; 2152 block_sse += bsi->rdstat[i][mode_idx].bsse; 2153 segmentyrate += bsi->rdstat[i][mode_idx].byrate; 2154 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost; 2155 2156 if (this_segment_rd > bsi->segment_rd) { 2157 int iy, midx; 2158 for (iy = i + 1; iy < 4; ++iy) 2159 for (midx = 0; midx < INTER_MODES; ++midx) 2160 bsi->rdstat[iy][midx].brdcost = INT64_MAX; 2161 bsi->segment_rd = INT64_MAX; 2162 return INT64_MAX; 2163 } 2164 } 2165 } /* for each label */ 2166 2167 bsi->r = br; 2168 bsi->d = bd; 2169 bsi->segment_yrate = segmentyrate; 2170 bsi->segment_rd = this_segment_rd; 2171 bsi->sse = block_sse; 2172 2173 // update the coding decisions 2174 for (k = 0; k < 4; ++k) bsi->modes[k] = mi->bmi[k].as_mode; 2175 2176 if (bsi->segment_rd > best_rd) return INT64_MAX; 2177 /* set it to the best */ 2178 for (i = 0; i < 4; i++) { 2179 mode_idx = INTER_OFFSET(bsi->modes[i]); 2180 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int; 2181 if (has_second_ref(mi)) 2182 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int; 2183 x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs; 2184 mi->bmi[i].as_mode = bsi->modes[i]; 2185 } 2186 2187 /* 2188 * used to set mbmi->mv.as_int 2189 */ 2190 *returntotrate = bsi->r; 2191 *returndistortion = bsi->d; 2192 *returnyrate = bsi->segment_yrate; 2193 *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0); 2194 *psse = bsi->sse; 2195 mi->mode = bsi->modes[3]; 2196 2197 return bsi->segment_rd; 2198 } 2199 2200 static void estimate_ref_frame_costs(const VP9_COMMON *cm, 2201 const MACROBLOCKD *xd, int segment_id, 2202 unsigned int *ref_costs_single, 2203 unsigned int *ref_costs_comp, 2204 vpx_prob *comp_mode_p) { 2205 int seg_ref_active = 2206 segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME); 2207 if (seg_ref_active) { 2208 memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single)); 2209 memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp)); 2210 *comp_mode_p = 128; 2211 } else { 2212 vpx_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd); 2213 vpx_prob comp_inter_p = 128; 2214 2215 if (cm->reference_mode == REFERENCE_MODE_SELECT) { 2216 comp_inter_p = vp9_get_reference_mode_prob(cm, xd); 2217 *comp_mode_p = comp_inter_p; 2218 } else { 2219 *comp_mode_p = 128; 2220 } 2221 2222 ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0); 2223 2224 if (cm->reference_mode != COMPOUND_REFERENCE) { 2225 vpx_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd); 2226 vpx_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd); 2227 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); 2228 2229 if (cm->reference_mode == REFERENCE_MODE_SELECT) 2230 base_cost += vp9_cost_bit(comp_inter_p, 0); 2231 2232 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] = 2233 ref_costs_single[ALTREF_FRAME] = base_cost; 2234 ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0); 2235 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1); 2236 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1); 2237 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0); 2238 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1); 2239 } else { 2240 ref_costs_single[LAST_FRAME] = 512; 2241 ref_costs_single[GOLDEN_FRAME] = 512; 2242 ref_costs_single[ALTREF_FRAME] = 512; 2243 } 2244 if (cm->reference_mode != SINGLE_REFERENCE) { 2245 vpx_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd); 2246 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); 2247 2248 if (cm->reference_mode == REFERENCE_MODE_SELECT) 2249 base_cost += vp9_cost_bit(comp_inter_p, 1); 2250 2251 ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0); 2252 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1); 2253 } else { 2254 ref_costs_comp[LAST_FRAME] = 512; 2255 ref_costs_comp[GOLDEN_FRAME] = 512; 2256 } 2257 } 2258 } 2259 2260 static void store_coding_context( 2261 MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index, 2262 int64_t comp_pred_diff[REFERENCE_MODES], 2263 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS], int skippable) { 2264 MACROBLOCKD *const xd = &x->e_mbd; 2265 2266 // Take a snapshot of the coding context so it can be 2267 // restored if we decide to encode this way 2268 ctx->skip = x->skip; 2269 ctx->skippable = skippable; 2270 ctx->best_mode_index = mode_index; 2271 ctx->mic = *xd->mi[0]; 2272 ctx->mbmi_ext = *x->mbmi_ext; 2273 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE]; 2274 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE]; 2275 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT]; 2276 2277 memcpy(ctx->best_filter_diff, best_filter_diff, 2278 sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS); 2279 } 2280 2281 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, 2282 MV_REFERENCE_FRAME ref_frame, 2283 BLOCK_SIZE block_size, int mi_row, int mi_col, 2284 int_mv frame_nearest_mv[MAX_REF_FRAMES], 2285 int_mv frame_near_mv[MAX_REF_FRAMES], 2286 struct buf_2d yv12_mb[4][MAX_MB_PLANE]) { 2287 const VP9_COMMON *cm = &cpi->common; 2288 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); 2289 MACROBLOCKD *const xd = &x->e_mbd; 2290 MODE_INFO *const mi = xd->mi[0]; 2291 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame]; 2292 const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; 2293 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; 2294 2295 assert(yv12 != NULL); 2296 2297 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this 2298 // use the UV scaling factors. 2299 vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf); 2300 2301 // Gets an initial list of candidate vectors from neighbours and orders them 2302 vp9_find_mv_refs(cm, xd, mi, ref_frame, candidates, mi_row, mi_col, 2303 mbmi_ext->mode_context); 2304 2305 // Candidate refinement carried out at encoder and decoder 2306 vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates, 2307 &frame_nearest_mv[ref_frame], 2308 &frame_near_mv[ref_frame]); 2309 2310 // Further refinement that is encode side only to test the top few candidates 2311 // in full and choose the best as the centre point for subsequent searches. 2312 // The current implementation doesn't support scaling. 2313 if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8) 2314 vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame, 2315 block_size); 2316 } 2317 2318 #if CONFIG_NON_GREEDY_MV 2319 #define MAX_PREV_NB_FULL_MV_NUM 8 2320 static int find_prev_nb_full_mvs(const VP9_COMMON *cm, const MACROBLOCKD *xd, 2321 int ref_frame, BLOCK_SIZE bsize, int mi_row, 2322 int mi_col, int_mv *nb_full_mvs) { 2323 int i; 2324 const TileInfo *tile = &xd->tile; 2325 int full_mv_num = 0; 2326 assert(bsize >= BLOCK_8X8); 2327 for (i = 0; i < MVREF_NEIGHBOURS; ++i) { 2328 const POSITION *mv_ref = &mv_ref_blocks[bsize][i]; 2329 if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { 2330 const MODE_INFO *nb_mi = 2331 xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; 2332 if (nb_mi->sb_type >= BLOCK_8X8) { 2333 if (nb_mi->ref_frame[0] == ref_frame) { 2334 nb_full_mvs[full_mv_num].as_mv = get_full_mv(&nb_mi->mv[0].as_mv); 2335 ++full_mv_num; 2336 if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) { 2337 return full_mv_num; 2338 } 2339 } else if (nb_mi->ref_frame[1] == ref_frame) { 2340 nb_full_mvs[full_mv_num].as_mv = get_full_mv(&nb_mi->mv[1].as_mv); 2341 ++full_mv_num; 2342 if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) { 2343 return full_mv_num; 2344 } 2345 } 2346 } else { 2347 int j; 2348 for (j = 0; j < 4; ++j) { 2349 // TODO(angiebird): avoid using duplicated mvs 2350 if (nb_mi->ref_frame[0] == ref_frame) { 2351 nb_full_mvs[full_mv_num].as_mv = 2352 get_full_mv(&nb_mi->bmi[j].as_mv[0].as_mv); 2353 ++full_mv_num; 2354 if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) { 2355 return full_mv_num; 2356 } 2357 } else if (nb_mi->ref_frame[1] == ref_frame) { 2358 nb_full_mvs[full_mv_num].as_mv = 2359 get_full_mv(&nb_mi->bmi[j].as_mv[1].as_mv); 2360 ++full_mv_num; 2361 if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) { 2362 return full_mv_num; 2363 } 2364 } 2365 } 2366 } 2367 } 2368 } 2369 return full_mv_num; 2370 } 2371 #endif // CONFIG_NON_GREEDY_MV 2372 2373 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, 2374 int mi_row, int mi_col, int_mv *tmp_mv, 2375 int *rate_mv) { 2376 MACROBLOCKD *xd = &x->e_mbd; 2377 const VP9_COMMON *cm = &cpi->common; 2378 MODE_INFO *mi = xd->mi[0]; 2379 struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } }; 2380 int step_param; 2381 MV mvp_full; 2382 int ref = mi->ref_frame[0]; 2383 MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv; 2384 const MvLimits tmp_mv_limits = x->mv_limits; 2385 int cost_list[5]; 2386 const int best_predmv_idx = x->mv_best_ref_index[ref]; 2387 const YV12_BUFFER_CONFIG *scaled_ref_frame = 2388 vp9_get_scaled_ref_frame(cpi, ref); 2389 const int pw = num_4x4_blocks_wide_lookup[bsize] << 2; 2390 const int ph = num_4x4_blocks_high_lookup[bsize] << 2; 2391 MV pred_mv[3]; 2392 2393 #if CONFIG_NON_GREEDY_MV 2394 double mv_dist = 0; 2395 double mv_cost = 0; 2396 double lambda = (pw * ph) / 4; 2397 double bestsme; 2398 int_mv nb_full_mvs[MAX_PREV_NB_FULL_MV_NUM]; 2399 2400 const int nb_full_mv_num = 2401 find_prev_nb_full_mvs(cm, xd, ref, bsize, mi_row, mi_col, nb_full_mvs); 2402 #else // CONFIG_NON_GREEDY_MV 2403 int bestsme = INT_MAX; 2404 int sadpb = x->sadperbit16; 2405 #endif // CONFIG_NON_GREEDY_MV 2406 2407 pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv; 2408 pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv; 2409 pred_mv[2] = x->pred_mv[ref]; 2410 2411 if (scaled_ref_frame) { 2412 int i; 2413 // Swap out the reference frame for a version that's been scaled to 2414 // match the resolution of the current frame, allowing the existing 2415 // motion search code to be used without additional modifications. 2416 for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0]; 2417 2418 vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); 2419 } 2420 2421 // Work out the size of the first step in the mv step search. 2422 // 0 here is maximum length first step. 1 is VPXMAX >> 1 etc. 2423 if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) { 2424 // Take wtd average of the step_params based on the last frame's 2425 // max mv magnitude and that based on the best ref mvs of the current 2426 // block for the given reference. 2427 step_param = 2428 (vp9_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) / 2429 2; 2430 } else { 2431 step_param = cpi->mv_step_param; 2432 } 2433 2434 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) { 2435 const int boffset = 2436 2 * (b_width_log2_lookup[BLOCK_64X64] - 2437 VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize])); 2438 step_param = VPXMAX(step_param, boffset); 2439 } 2440 2441 if (cpi->sf.adaptive_motion_search) { 2442 int bwl = b_width_log2_lookup[bsize]; 2443 int bhl = b_height_log2_lookup[bsize]; 2444 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4); 2445 2446 if (tlevel < 5) step_param += 2; 2447 2448 // prev_mv_sad is not setup for dynamically scaled frames. 2449 if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) { 2450 int i; 2451 for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) { 2452 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) { 2453 x->pred_mv[ref].row = INT16_MAX; 2454 x->pred_mv[ref].col = INT16_MAX; 2455 tmp_mv->as_int = INVALID_MV; 2456 2457 if (scaled_ref_frame) { 2458 int i; 2459 for (i = 0; i < MAX_MB_PLANE; ++i) 2460 xd->plane[i].pre[0] = backup_yv12[i]; 2461 } 2462 return; 2463 } 2464 } 2465 } 2466 } 2467 2468 // Note: MV limits are modified here. Always restore the original values 2469 // after full-pixel motion search. 2470 vp9_set_mv_search_range(&x->mv_limits, &ref_mv); 2471 2472 mvp_full = pred_mv[best_predmv_idx]; 2473 mvp_full.col >>= 3; 2474 mvp_full.row >>= 3; 2475 2476 #if CONFIG_NON_GREEDY_MV 2477 bestsme = vp9_full_pixel_diamond_new( 2478 cpi, x, &mvp_full, step_param, lambda, 1, &cpi->fn_ptr[bsize], 2479 nb_full_mvs, nb_full_mv_num, &tmp_mv->as_mv, &mv_dist, &mv_cost); 2480 #else // CONFIG_NON_GREEDY_MV 2481 bestsme = vp9_full_pixel_search( 2482 cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, sadpb, 2483 cond_cost_list(cpi, cost_list), &ref_mv, &tmp_mv->as_mv, INT_MAX, 1); 2484 #endif // CONFIG_NON_GREEDY_MV 2485 2486 if (cpi->sf.enhanced_full_pixel_motion_search) { 2487 int i; 2488 for (i = 0; i < 3; ++i) { 2489 #if CONFIG_NON_GREEDY_MV 2490 double this_me; 2491 #else // CONFIG_NON_GREEDY_MV 2492 int this_me; 2493 #endif // CONFIG_NON_GREEDY_MV 2494 MV this_mv; 2495 int diff_row; 2496 int diff_col; 2497 int step; 2498 2499 if (pred_mv[i].row == INT16_MAX || pred_mv[i].col == INT16_MAX) continue; 2500 if (i == best_predmv_idx) continue; 2501 2502 diff_row = ((int)pred_mv[i].row - 2503 pred_mv[i > 0 ? (i - 1) : best_predmv_idx].row) >> 2504 3; 2505 diff_col = ((int)pred_mv[i].col - 2506 pred_mv[i > 0 ? (i - 1) : best_predmv_idx].col) >> 2507 3; 2508 if (diff_row == 0 && diff_col == 0) continue; 2509 if (diff_row < 0) diff_row = -diff_row; 2510 if (diff_col < 0) diff_col = -diff_col; 2511 step = get_msb((diff_row + diff_col + 1) >> 1); 2512 if (step <= 0) continue; 2513 2514 mvp_full = pred_mv[i]; 2515 mvp_full.col >>= 3; 2516 mvp_full.row >>= 3; 2517 #if CONFIG_NON_GREEDY_MV 2518 this_me = vp9_full_pixel_diamond_new( 2519 cpi, x, &mvp_full, VPXMAX(step_param, MAX_MVSEARCH_STEPS - step), 2520 lambda, 1, &cpi->fn_ptr[bsize], nb_full_mvs, nb_full_mv_num, &this_mv, 2521 &mv_dist, &mv_cost); 2522 #else // CONFIG_NON_GREEDY_MV 2523 this_me = vp9_full_pixel_search( 2524 cpi, x, bsize, &mvp_full, 2525 VPXMAX(step_param, MAX_MVSEARCH_STEPS - step), 2526 cpi->sf.mv.search_method, sadpb, cond_cost_list(cpi, cost_list), 2527 &ref_mv, &this_mv, INT_MAX, 1); 2528 #endif // CONFIG_NON_GREEDY_MV 2529 if (this_me < bestsme) { 2530 tmp_mv->as_mv = this_mv; 2531 bestsme = this_me; 2532 } 2533 } 2534 } 2535 2536 x->mv_limits = tmp_mv_limits; 2537 2538 if (bestsme < INT_MAX) { 2539 uint32_t dis; /* TODO: use dis in distortion calculation later. */ 2540 cpi->find_fractional_mv_step( 2541 x, &tmp_mv->as_mv, &ref_mv, cm->allow_high_precision_mv, x->errorperbit, 2542 &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, 2543 cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list), 2544 x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, pw, ph, 2545 cpi->sf.use_accurate_subpel_search); 2546 } 2547 *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost, 2548 x->mvcost, MV_COST_WEIGHT); 2549 2550 x->pred_mv[ref] = tmp_mv->as_mv; 2551 2552 if (scaled_ref_frame) { 2553 int i; 2554 for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; 2555 } 2556 } 2557 2558 static INLINE void restore_dst_buf(MACROBLOCKD *xd, 2559 uint8_t *orig_dst[MAX_MB_PLANE], 2560 int orig_dst_stride[MAX_MB_PLANE]) { 2561 int i; 2562 for (i = 0; i < MAX_MB_PLANE; i++) { 2563 xd->plane[i].dst.buf = orig_dst[i]; 2564 xd->plane[i].dst.stride = orig_dst_stride[i]; 2565 } 2566 } 2567 2568 // In some situations we want to discount tha pparent cost of a new motion 2569 // vector. Where there is a subtle motion field and especially where there is 2570 // low spatial complexity then it can be hard to cover the cost of a new motion 2571 // vector in a single block, even if that motion vector reduces distortion. 2572 // However, once established that vector may be usable through the nearest and 2573 // near mv modes to reduce distortion in subsequent blocks and also improve 2574 // visual quality. 2575 static int discount_newmv_test(const VP9_COMP *cpi, int this_mode, 2576 int_mv this_mv, 2577 int_mv (*mode_mv)[MAX_REF_FRAMES], 2578 int ref_frame) { 2579 return (!cpi->rc.is_src_frame_alt_ref && (this_mode == NEWMV) && 2580 (this_mv.as_int != 0) && 2581 ((mode_mv[NEARESTMV][ref_frame].as_int == 0) || 2582 (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) && 2583 ((mode_mv[NEARMV][ref_frame].as_int == 0) || 2584 (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV))); 2585 } 2586 2587 static int64_t handle_inter_mode( 2588 VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int *rate2, 2589 int64_t *distortion, int *skippable, int *rate_y, int *rate_uv, 2590 int *disable_skip, int_mv (*mode_mv)[MAX_REF_FRAMES], int mi_row, 2591 int mi_col, int_mv single_newmv[MAX_REF_FRAMES], 2592 INTERP_FILTER (*single_filter)[MAX_REF_FRAMES], 2593 int (*single_skippable)[MAX_REF_FRAMES], int64_t *psse, 2594 const int64_t ref_best_rd, int64_t *mask_filter, int64_t filter_cache[]) { 2595 VP9_COMMON *cm = &cpi->common; 2596 MACROBLOCKD *xd = &x->e_mbd; 2597 MODE_INFO *mi = xd->mi[0]; 2598 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; 2599 const int is_comp_pred = has_second_ref(mi); 2600 const int this_mode = mi->mode; 2601 int_mv *frame_mv = mode_mv[this_mode]; 2602 int i; 2603 int refs[2] = { mi->ref_frame[0], 2604 (mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1]) }; 2605 int_mv cur_mv[2]; 2606 #if CONFIG_VP9_HIGHBITDEPTH 2607 DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]); 2608 uint8_t *tmp_buf; 2609 #else 2610 DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]); 2611 #endif // CONFIG_VP9_HIGHBITDEPTH 2612 int pred_exists = 0; 2613 int intpel_mv; 2614 int64_t rd, tmp_rd, best_rd = INT64_MAX; 2615 int best_needs_copy = 0; 2616 uint8_t *orig_dst[MAX_MB_PLANE]; 2617 int orig_dst_stride[MAX_MB_PLANE]; 2618 int rs = 0; 2619 INTERP_FILTER best_filter = SWITCHABLE; 2620 uint8_t skip_txfm[MAX_MB_PLANE << 2] = { 0 }; 2621 int64_t bsse[MAX_MB_PLANE << 2] = { 0 }; 2622 2623 int bsl = mi_width_log2_lookup[bsize]; 2624 int pred_filter_search = 2625 cpi->sf.cb_pred_filter_search 2626 ? (((mi_row + mi_col) >> bsl) + 2627 get_chessboard_index(cm->current_video_frame)) & 2628 0x1 2629 : 0; 2630 2631 int skip_txfm_sb = 0; 2632 int64_t skip_sse_sb = INT64_MAX; 2633 int64_t distortion_y = 0, distortion_uv = 0; 2634 2635 #if CONFIG_VP9_HIGHBITDEPTH 2636 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 2637 tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16); 2638 } else { 2639 tmp_buf = (uint8_t *)tmp_buf16; 2640 } 2641 #endif // CONFIG_VP9_HIGHBITDEPTH 2642 2643 if (pred_filter_search) { 2644 INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE; 2645 if (xd->above_mi && is_inter_block(xd->above_mi)) 2646 af = xd->above_mi->interp_filter; 2647 if (xd->left_mi && is_inter_block(xd->left_mi)) 2648 lf = xd->left_mi->interp_filter; 2649 2650 if ((this_mode != NEWMV) || (af == lf)) best_filter = af; 2651 } 2652 2653 if (is_comp_pred) { 2654 if (frame_mv[refs[0]].as_int == INVALID_MV || 2655 frame_mv[refs[1]].as_int == INVALID_MV) 2656 return INT64_MAX; 2657 2658 if (cpi->sf.adaptive_mode_search) { 2659 if (single_filter[this_mode][refs[0]] == 2660 single_filter[this_mode][refs[1]]) 2661 best_filter = single_filter[this_mode][refs[0]]; 2662 } 2663 } 2664 2665 if (this_mode == NEWMV) { 2666 int rate_mv; 2667 if (is_comp_pred) { 2668 // Initialize mv using single prediction mode result. 2669 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; 2670 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; 2671 2672 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { 2673 joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, 2674 single_newmv, &rate_mv); 2675 } else { 2676 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv, 2677 &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv, 2678 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); 2679 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv, 2680 &x->mbmi_ext->ref_mvs[refs[1]][0].as_mv, 2681 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); 2682 } 2683 *rate2 += rate_mv; 2684 } else { 2685 int_mv tmp_mv; 2686 single_motion_search(cpi, x, bsize, mi_row, mi_col, &tmp_mv, &rate_mv); 2687 if (tmp_mv.as_int == INVALID_MV) return INT64_MAX; 2688 2689 frame_mv[refs[0]].as_int = xd->mi[0]->bmi[0].as_mv[0].as_int = 2690 tmp_mv.as_int; 2691 single_newmv[refs[0]].as_int = tmp_mv.as_int; 2692 2693 // Estimate the rate implications of a new mv but discount this 2694 // under certain circumstances where we want to help initiate a weak 2695 // motion field, where the distortion gain for a single block may not 2696 // be enough to overcome the cost of a new mv. 2697 if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0])) { 2698 *rate2 += VPXMAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1); 2699 } else { 2700 *rate2 += rate_mv; 2701 } 2702 } 2703 } 2704 2705 for (i = 0; i < is_comp_pred + 1; ++i) { 2706 cur_mv[i] = frame_mv[refs[i]]; 2707 // Clip "next_nearest" so that it does not extend to far out of image 2708 if (this_mode != NEWMV) clamp_mv2(&cur_mv[i].as_mv, xd); 2709 2710 if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX; 2711 mi->mv[i].as_int = cur_mv[i].as_int; 2712 } 2713 2714 // do first prediction into the destination buffer. Do the next 2715 // prediction into a temporary buffer. Then keep track of which one 2716 // of these currently holds the best predictor, and use the other 2717 // one for future predictions. In the end, copy from tmp_buf to 2718 // dst if necessary. 2719 for (i = 0; i < MAX_MB_PLANE; i++) { 2720 orig_dst[i] = xd->plane[i].dst.buf; 2721 orig_dst_stride[i] = xd->plane[i].dst.stride; 2722 } 2723 2724 // We don't include the cost of the second reference here, because there 2725 // are only two options: Last/ARF or Golden/ARF; The second one is always 2726 // known, which is ARF. 2727 // 2728 // Under some circumstances we discount the cost of new mv mode to encourage 2729 // initiation of a motion field. 2730 if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv, 2731 refs[0])) { 2732 *rate2 += 2733 VPXMIN(cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]), 2734 cost_mv_ref(cpi, NEARESTMV, mbmi_ext->mode_context[refs[0]])); 2735 } else { 2736 *rate2 += cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]); 2737 } 2738 2739 if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd && 2740 mi->mode != NEARESTMV) 2741 return INT64_MAX; 2742 2743 pred_exists = 0; 2744 // Are all MVs integer pel for Y and UV 2745 intpel_mv = !mv_has_subpel(&mi->mv[0].as_mv); 2746 if (is_comp_pred) intpel_mv &= !mv_has_subpel(&mi->mv[1].as_mv); 2747 2748 // Search for best switchable filter by checking the variance of 2749 // pred error irrespective of whether the filter will be used 2750 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX; 2751 2752 if (cm->interp_filter != BILINEAR) { 2753 if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) { 2754 best_filter = EIGHTTAP; 2755 } else if (best_filter == SWITCHABLE) { 2756 int newbest; 2757 int tmp_rate_sum = 0; 2758 int64_t tmp_dist_sum = 0; 2759 2760 for (i = 0; i < SWITCHABLE_FILTERS; ++i) { 2761 int j; 2762 int64_t rs_rd; 2763 int tmp_skip_sb = 0; 2764 int64_t tmp_skip_sse = INT64_MAX; 2765 2766 mi->interp_filter = i; 2767 rs = vp9_get_switchable_rate(cpi, xd); 2768 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); 2769 2770 if (i > 0 && intpel_mv) { 2771 rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum); 2772 filter_cache[i] = rd; 2773 filter_cache[SWITCHABLE_FILTERS] = 2774 VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); 2775 if (cm->interp_filter == SWITCHABLE) rd += rs_rd; 2776 *mask_filter = VPXMAX(*mask_filter, rd); 2777 } else { 2778 int rate_sum = 0; 2779 int64_t dist_sum = 0; 2780 if (i > 0 && cpi->sf.adaptive_interp_filter_search && 2781 (cpi->sf.interp_filter_search_mask & (1 << i))) { 2782 rate_sum = INT_MAX; 2783 dist_sum = INT64_MAX; 2784 continue; 2785 } 2786 2787 if ((cm->interp_filter == SWITCHABLE && (!i || best_needs_copy)) || 2788 (cm->interp_filter != SWITCHABLE && 2789 (cm->interp_filter == mi->interp_filter || 2790 (i == 0 && intpel_mv)))) { 2791 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2792 } else { 2793 for (j = 0; j < MAX_MB_PLANE; j++) { 2794 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64; 2795 xd->plane[j].dst.stride = 64; 2796 } 2797 } 2798 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); 2799 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, &tmp_skip_sb, 2800 &tmp_skip_sse); 2801 2802 rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum); 2803 filter_cache[i] = rd; 2804 filter_cache[SWITCHABLE_FILTERS] = 2805 VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); 2806 if (cm->interp_filter == SWITCHABLE) rd += rs_rd; 2807 *mask_filter = VPXMAX(*mask_filter, rd); 2808 2809 if (i == 0 && intpel_mv) { 2810 tmp_rate_sum = rate_sum; 2811 tmp_dist_sum = dist_sum; 2812 } 2813 } 2814 2815 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { 2816 if (rd / 2 > ref_best_rd) { 2817 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2818 return INT64_MAX; 2819 } 2820 } 2821 newbest = i == 0 || rd < best_rd; 2822 2823 if (newbest) { 2824 best_rd = rd; 2825 best_filter = mi->interp_filter; 2826 if (cm->interp_filter == SWITCHABLE && i && !intpel_mv) 2827 best_needs_copy = !best_needs_copy; 2828 } 2829 2830 if ((cm->interp_filter == SWITCHABLE && newbest) || 2831 (cm->interp_filter != SWITCHABLE && 2832 cm->interp_filter == mi->interp_filter)) { 2833 pred_exists = 1; 2834 tmp_rd = best_rd; 2835 2836 skip_txfm_sb = tmp_skip_sb; 2837 skip_sse_sb = tmp_skip_sse; 2838 memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm)); 2839 memcpy(bsse, x->bsse, sizeof(bsse)); 2840 } 2841 } 2842 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2843 } 2844 } 2845 // Set the appropriate filter 2846 mi->interp_filter = 2847 cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter; 2848 rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi, xd) : 0; 2849 2850 if (pred_exists) { 2851 if (best_needs_copy) { 2852 // again temporarily set the buffers to local memory to prevent a memcpy 2853 for (i = 0; i < MAX_MB_PLANE; i++) { 2854 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64; 2855 xd->plane[i].dst.stride = 64; 2856 } 2857 } 2858 rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0); 2859 } else { 2860 int tmp_rate; 2861 int64_t tmp_dist; 2862 // Handles the special case when a filter that is not in the 2863 // switchable list (ex. bilinear) is indicated at the frame level, or 2864 // skip condition holds. 2865 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); 2866 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist, &skip_txfm_sb, 2867 &skip_sse_sb); 2868 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist); 2869 memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm)); 2870 memcpy(bsse, x->bsse, sizeof(bsse)); 2871 } 2872 2873 if (!is_comp_pred) single_filter[this_mode][refs[0]] = mi->interp_filter; 2874 2875 if (cpi->sf.adaptive_mode_search) 2876 if (is_comp_pred) 2877 if (single_skippable[this_mode][refs[0]] && 2878 single_skippable[this_mode][refs[1]]) 2879 memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm)); 2880 2881 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { 2882 // if current pred_error modeled rd is substantially more than the best 2883 // so far, do not bother doing full rd 2884 if (rd / 2 > ref_best_rd) { 2885 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2886 return INT64_MAX; 2887 } 2888 } 2889 2890 if (cm->interp_filter == SWITCHABLE) *rate2 += rs; 2891 2892 memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm)); 2893 memcpy(x->bsse, bsse, sizeof(bsse)); 2894 2895 if (!skip_txfm_sb || xd->lossless) { 2896 int skippable_y, skippable_uv; 2897 int64_t sseuv = INT64_MAX; 2898 int64_t rdcosty = INT64_MAX; 2899 2900 // Y cost and distortion 2901 vp9_subtract_plane(x, bsize, 0); 2902 super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse, bsize, 2903 ref_best_rd); 2904 2905 if (*rate_y == INT_MAX) { 2906 *rate2 = INT_MAX; 2907 *distortion = INT64_MAX; 2908 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2909 return INT64_MAX; 2910 } 2911 2912 *rate2 += *rate_y; 2913 *distortion += distortion_y; 2914 2915 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); 2916 rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse)); 2917 2918 if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv, 2919 &sseuv, bsize, ref_best_rd - rdcosty)) { 2920 *rate2 = INT_MAX; 2921 *distortion = INT64_MAX; 2922 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2923 return INT64_MAX; 2924 } 2925 2926 *psse += sseuv; 2927 *rate2 += *rate_uv; 2928 *distortion += distortion_uv; 2929 *skippable = skippable_y && skippable_uv; 2930 } else { 2931 x->skip = 1; 2932 *disable_skip = 1; 2933 2934 // The cost of skip bit needs to be added. 2935 *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); 2936 2937 *distortion = skip_sse_sb; 2938 } 2939 2940 if (!is_comp_pred) single_skippable[this_mode][refs[0]] = *skippable; 2941 2942 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2943 return 0; // The rate-distortion cost will be re-calculated by caller. 2944 } 2945 2946 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost, 2947 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, 2948 int64_t best_rd) { 2949 VP9_COMMON *const cm = &cpi->common; 2950 MACROBLOCKD *const xd = &x->e_mbd; 2951 struct macroblockd_plane *const pd = xd->plane; 2952 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0; 2953 int y_skip = 0, uv_skip = 0; 2954 int64_t dist_y = 0, dist_uv = 0; 2955 TX_SIZE max_uv_tx_size; 2956 x->skip_encode = 0; 2957 ctx->skip = 0; 2958 xd->mi[0]->ref_frame[0] = INTRA_FRAME; 2959 xd->mi[0]->ref_frame[1] = NONE; 2960 // Initialize interp_filter here so we do not have to check for inter block 2961 // modes in get_pred_context_switchable_interp() 2962 xd->mi[0]->interp_filter = SWITCHABLE_FILTERS; 2963 2964 if (bsize >= BLOCK_8X8) { 2965 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y, 2966 &y_skip, bsize, best_rd) >= best_rd) { 2967 rd_cost->rate = INT_MAX; 2968 return; 2969 } 2970 } else { 2971 y_skip = 0; 2972 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly, 2973 &dist_y, best_rd) >= best_rd) { 2974 rd_cost->rate = INT_MAX; 2975 return; 2976 } 2977 } 2978 max_uv_tx_size = uv_txsize_lookup[bsize][xd->mi[0]->tx_size] 2979 [pd[1].subsampling_x][pd[1].subsampling_y]; 2980 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, &dist_uv, 2981 &uv_skip, VPXMAX(BLOCK_8X8, bsize), max_uv_tx_size); 2982 2983 if (y_skip && uv_skip) { 2984 rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + 2985 vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); 2986 rd_cost->dist = dist_y + dist_uv; 2987 } else { 2988 rd_cost->rate = 2989 rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); 2990 rd_cost->dist = dist_y + dist_uv; 2991 } 2992 2993 ctx->mic = *xd->mi[0]; 2994 ctx->mbmi_ext = *x->mbmi_ext; 2995 rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); 2996 } 2997 2998 // This function is designed to apply a bias or adjustment to an rd value based 2999 // on the relative variance of the source and reconstruction. 3000 #define VERY_LOW_VAR_THRESH 2 3001 #define LOW_VAR_THRESH 5 3002 #define VAR_MULT 100 3003 static unsigned int max_var_adjust[VP9E_CONTENT_INVALID] = { 16, 16, 100 }; 3004 3005 static void rd_variance_adjustment(VP9_COMP *cpi, MACROBLOCK *x, 3006 BLOCK_SIZE bsize, int64_t *this_rd, 3007 MV_REFERENCE_FRAME ref_frame, 3008 unsigned int source_variance) { 3009 MACROBLOCKD *const xd = &x->e_mbd; 3010 unsigned int rec_variance; 3011 unsigned int src_variance; 3012 unsigned int src_rec_min; 3013 unsigned int absvar_diff = 0; 3014 unsigned int var_factor = 0; 3015 unsigned int adj_max; 3016 vp9e_tune_content content_type = cpi->oxcf.content; 3017 3018 if (*this_rd == INT64_MAX) return; 3019 3020 #if CONFIG_VP9_HIGHBITDEPTH 3021 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 3022 if (source_variance > 100) { 3023 rec_variance = vp9_high_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, 3024 bsize, xd->bd); 3025 src_variance = source_variance; 3026 } else { 3027 rec_variance = 3028 vp9_high_get_sby_variance(cpi, &xd->plane[0].dst, bsize, xd->bd); 3029 src_variance = 3030 vp9_high_get_sby_variance(cpi, &x->plane[0].src, bsize, xd->bd); 3031 } 3032 } else { 3033 if (source_variance > 100) { 3034 rec_variance = 3035 vp9_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize); 3036 src_variance = source_variance; 3037 } else { 3038 rec_variance = vp9_get_sby_variance(cpi, &xd->plane[0].dst, bsize); 3039 src_variance = vp9_get_sby_variance(cpi, &x->plane[0].src, bsize); 3040 } 3041 } 3042 #else 3043 if (source_variance > 100) { 3044 rec_variance = vp9_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize); 3045 src_variance = source_variance; 3046 } else { 3047 rec_variance = vp9_get_sby_variance(cpi, &xd->plane[0].dst, bsize); 3048 src_variance = vp9_get_sby_variance(cpi, &x->plane[0].src, bsize); 3049 } 3050 #endif // CONFIG_VP9_HIGHBITDEPTH 3051 3052 // Lower of source (raw per pixel value) and recon variance. Note that 3053 // if the source per pixel is 0 then the recon value here will not be per 3054 // pixel (see above) so will likely be much larger. 3055 src_rec_min = VPXMIN(source_variance, rec_variance); 3056 3057 if (src_rec_min > LOW_VAR_THRESH) return; 3058 3059 absvar_diff = (src_variance > rec_variance) ? (src_variance - rec_variance) 3060 : (rec_variance - src_variance); 3061 3062 adj_max = max_var_adjust[content_type]; 3063 3064 var_factor = 3065 (unsigned int)((int64_t)VAR_MULT * absvar_diff) / VPXMAX(1, src_variance); 3066 var_factor = VPXMIN(adj_max, var_factor); 3067 3068 *this_rd += (*this_rd * var_factor) / 100; 3069 3070 if (content_type == VP9E_CONTENT_FILM) { 3071 if (src_rec_min <= VERY_LOW_VAR_THRESH) { 3072 if (ref_frame == INTRA_FRAME) *this_rd *= 2; 3073 if (bsize > 6) *this_rd *= 2; 3074 } 3075 } 3076 } 3077 3078 // Do we have an internal image edge (e.g. formatting bars). 3079 int vp9_internal_image_edge(VP9_COMP *cpi) { 3080 return (cpi->oxcf.pass == 2) && 3081 ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) || 3082 (cpi->twopass.this_frame_stats.inactive_zone_cols > 0)); 3083 } 3084 3085 // Checks to see if a super block is on a horizontal image edge. 3086 // In most cases this is the "real" edge unless there are formatting 3087 // bars embedded in the stream. 3088 int vp9_active_h_edge(VP9_COMP *cpi, int mi_row, int mi_step) { 3089 int top_edge = 0; 3090 int bottom_edge = cpi->common.mi_rows; 3091 int is_active_h_edge = 0; 3092 3093 // For two pass account for any formatting bars detected. 3094 if (cpi->oxcf.pass == 2) { 3095 TWO_PASS *twopass = &cpi->twopass; 3096 3097 // The inactive region is specified in MBs not mi units. 3098 // The image edge is in the following MB row. 3099 top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2); 3100 3101 bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2); 3102 bottom_edge = VPXMAX(top_edge, bottom_edge); 3103 } 3104 3105 if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) || 3106 ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) { 3107 is_active_h_edge = 1; 3108 } 3109 return is_active_h_edge; 3110 } 3111 3112 // Checks to see if a super block is on a vertical image edge. 3113 // In most cases this is the "real" edge unless there are formatting 3114 // bars embedded in the stream. 3115 int vp9_active_v_edge(VP9_COMP *cpi, int mi_col, int mi_step) { 3116 int left_edge = 0; 3117 int right_edge = cpi->common.mi_cols; 3118 int is_active_v_edge = 0; 3119 3120 // For two pass account for any formatting bars detected. 3121 if (cpi->oxcf.pass == 2) { 3122 TWO_PASS *twopass = &cpi->twopass; 3123 3124 // The inactive region is specified in MBs not mi units. 3125 // The image edge is in the following MB row. 3126 left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2); 3127 3128 right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2); 3129 right_edge = VPXMAX(left_edge, right_edge); 3130 } 3131 3132 if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) || 3133 ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) { 3134 is_active_v_edge = 1; 3135 } 3136 return is_active_v_edge; 3137 } 3138 3139 // Checks to see if a super block is at the edge of the active image. 3140 // In most cases this is the "real" edge unless there are formatting 3141 // bars embedded in the stream. 3142 int vp9_active_edge_sb(VP9_COMP *cpi, int mi_row, int mi_col) { 3143 return vp9_active_h_edge(cpi, mi_row, MI_BLOCK_SIZE) || 3144 vp9_active_v_edge(cpi, mi_col, MI_BLOCK_SIZE); 3145 } 3146 3147 void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, 3148 MACROBLOCK *x, int mi_row, int mi_col, 3149 RD_COST *rd_cost, BLOCK_SIZE bsize, 3150 PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) { 3151 VP9_COMMON *const cm = &cpi->common; 3152 TileInfo *const tile_info = &tile_data->tile_info; 3153 RD_OPT *const rd_opt = &cpi->rd; 3154 SPEED_FEATURES *const sf = &cpi->sf; 3155 MACROBLOCKD *const xd = &x->e_mbd; 3156 MODE_INFO *const mi = xd->mi[0]; 3157 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; 3158 const struct segmentation *const seg = &cm->seg; 3159 PREDICTION_MODE this_mode; 3160 MV_REFERENCE_FRAME ref_frame, second_ref_frame; 3161 unsigned char segment_id = mi->segment_id; 3162 int comp_pred, i, k; 3163 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; 3164 struct buf_2d yv12_mb[4][MAX_MB_PLANE]; 3165 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } }; 3166 INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES]; 3167 int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES]; 3168 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, 3169 VP9_ALT_FLAG }; 3170 int64_t best_rd = best_rd_so_far; 3171 int64_t best_pred_diff[REFERENCE_MODES]; 3172 int64_t best_pred_rd[REFERENCE_MODES]; 3173 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; 3174 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; 3175 MODE_INFO best_mbmode; 3176 int best_mode_skippable = 0; 3177 int midx, best_mode_index = -1; 3178 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; 3179 vpx_prob comp_mode_p; 3180 int64_t best_intra_rd = INT64_MAX; 3181 unsigned int best_pred_sse = UINT_MAX; 3182 PREDICTION_MODE best_intra_mode = DC_PRED; 3183 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES]; 3184 int64_t dist_uv[TX_SIZES]; 3185 int skip_uv[TX_SIZES]; 3186 PREDICTION_MODE mode_uv[TX_SIZES]; 3187 const int intra_cost_penalty = 3188 vp9_get_intra_cost_penalty(cpi, bsize, cm->base_qindex, cm->y_dc_delta_q); 3189 int best_skip2 = 0; 3190 uint8_t ref_frame_skip_mask[2] = { 0, 1 }; 3191 uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 }; 3192 int mode_skip_start = sf->mode_skip_start + 1; 3193 const int *const rd_threshes = rd_opt->threshes[segment_id][bsize]; 3194 const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize]; 3195 int64_t mode_threshold[MAX_MODES]; 3196 int8_t *tile_mode_map = tile_data->mode_map[bsize]; 3197 int8_t mode_map[MAX_MODES]; // Maintain mode_map information locally to avoid 3198 // lock mechanism involved with reads from 3199 // tile_mode_map 3200 const int mode_search_skip_flags = sf->mode_search_skip_flags; 3201 const int is_rect_partition = 3202 num_4x4_blocks_wide_lookup[bsize] != num_4x4_blocks_high_lookup[bsize]; 3203 int64_t mask_filter = 0; 3204 int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS]; 3205 3206 vp9_zero(best_mbmode); 3207 3208 x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; 3209 3210 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX; 3211 3212 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, 3213 &comp_mode_p); 3214 3215 for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX; 3216 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 3217 best_filter_rd[i] = INT64_MAX; 3218 for (i = 0; i < TX_SIZES; i++) rate_uv_intra[i] = INT_MAX; 3219 for (i = 0; i < MAX_REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX; 3220 for (i = 0; i < MB_MODE_COUNT; ++i) { 3221 for (k = 0; k < MAX_REF_FRAMES; ++k) { 3222 single_inter_filter[i][k] = SWITCHABLE; 3223 single_skippable[i][k] = 0; 3224 } 3225 } 3226 3227 rd_cost->rate = INT_MAX; 3228 3229 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { 3230 x->pred_mv_sad[ref_frame] = INT_MAX; 3231 if ((cpi->ref_frame_flags & flag_list[ref_frame]) && 3232 !(is_rect_partition && (ctx->skip_ref_frame_mask & (1 << ref_frame)))) { 3233 assert(get_ref_frame_buffer(cpi, ref_frame) != NULL); 3234 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col, 3235 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); 3236 } 3237 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; 3238 frame_mv[ZEROMV][ref_frame].as_int = 0; 3239 } 3240 3241 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { 3242 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) { 3243 // Skip checking missing references in both single and compound reference 3244 // modes. Note that a mode will be skipped if both reference frames 3245 // are masked out. 3246 ref_frame_skip_mask[0] |= (1 << ref_frame); 3247 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; 3248 } else if (sf->reference_masking) { 3249 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { 3250 // Skip fixed mv modes for poor references 3251 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) { 3252 mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO; 3253 break; 3254 } 3255 } 3256 } 3257 // If the segment reference frame feature is enabled.... 3258 // then do nothing if the current ref frame is not allowed.. 3259 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && 3260 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { 3261 ref_frame_skip_mask[0] |= (1 << ref_frame); 3262 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; 3263 } 3264 } 3265 3266 // Disable this drop out case if the ref frame 3267 // segment level feature is enabled for this segment. This is to 3268 // prevent the possibility that we end up unable to pick any mode. 3269 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) { 3270 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, 3271 // unless ARNR filtering is enabled in which case we want 3272 // an unfiltered alternative. We allow near/nearest as well 3273 // because they may result in zero-zero MVs but be cheaper. 3274 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { 3275 ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << GOLDEN_FRAME); 3276 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK; 3277 mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO; 3278 if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0) 3279 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV); 3280 if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0) 3281 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV); 3282 } 3283 } 3284 3285 if (cpi->rc.is_src_frame_alt_ref) { 3286 if (sf->alt_ref_search_fp) { 3287 mode_skip_mask[ALTREF_FRAME] = 0; 3288 ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME); 3289 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK; 3290 } 3291 } 3292 3293 if (sf->alt_ref_search_fp) 3294 if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX) 3295 if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1)) 3296 mode_skip_mask[ALTREF_FRAME] |= INTER_ALL; 3297 3298 if (sf->adaptive_mode_search) { 3299 if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref && 3300 cpi->rc.frames_since_golden >= 3) 3301 if (x->pred_mv_sad[GOLDEN_FRAME] > (x->pred_mv_sad[LAST_FRAME] << 1)) 3302 mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL; 3303 } 3304 3305 if (bsize > sf->max_intra_bsize) { 3306 ref_frame_skip_mask[0] |= (1 << INTRA_FRAME); 3307 ref_frame_skip_mask[1] |= (1 << INTRA_FRAME); 3308 } 3309 3310 mode_skip_mask[INTRA_FRAME] |= 3311 ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]); 3312 3313 for (i = 0; i <= LAST_NEW_MV_INDEX; ++i) mode_threshold[i] = 0; 3314 3315 for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i) 3316 mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5; 3317 3318 midx = sf->schedule_mode_search ? mode_skip_start : 0; 3319 3320 while (midx > 4) { 3321 uint8_t end_pos = 0; 3322 for (i = 5; i < midx; ++i) { 3323 if (mode_threshold[tile_mode_map[i - 1]] > 3324 mode_threshold[tile_mode_map[i]]) { 3325 uint8_t tmp = tile_mode_map[i]; 3326 tile_mode_map[i] = tile_mode_map[i - 1]; 3327 tile_mode_map[i - 1] = tmp; 3328 end_pos = i; 3329 } 3330 } 3331 midx = end_pos; 3332 } 3333 3334 memcpy(mode_map, tile_mode_map, sizeof(mode_map)); 3335 3336 for (midx = 0; midx < MAX_MODES; ++midx) { 3337 int mode_index = mode_map[midx]; 3338 int mode_excluded = 0; 3339 int64_t this_rd = INT64_MAX; 3340 int disable_skip = 0; 3341 int compmode_cost = 0; 3342 int rate2 = 0, rate_y = 0, rate_uv = 0; 3343 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; 3344 int skippable = 0; 3345 int this_skip2 = 0; 3346 int64_t total_sse = INT64_MAX; 3347 int early_term = 0; 3348 3349 this_mode = vp9_mode_order[mode_index].mode; 3350 ref_frame = vp9_mode_order[mode_index].ref_frame[0]; 3351 second_ref_frame = vp9_mode_order[mode_index].ref_frame[1]; 3352 3353 vp9_zero(x->sum_y_eobs); 3354 3355 if (is_rect_partition) { 3356 if (ctx->skip_ref_frame_mask & (1 << ref_frame)) continue; 3357 if (second_ref_frame > 0 && 3358 (ctx->skip_ref_frame_mask & (1 << second_ref_frame))) 3359 continue; 3360 } 3361 3362 // Look at the reference frame of the best mode so far and set the 3363 // skip mask to look at a subset of the remaining modes. 3364 if (midx == mode_skip_start && best_mode_index >= 0) { 3365 switch (best_mbmode.ref_frame[0]) { 3366 case INTRA_FRAME: break; 3367 case LAST_FRAME: ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK; break; 3368 case GOLDEN_FRAME: 3369 ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK; 3370 break; 3371 case ALTREF_FRAME: ref_frame_skip_mask[0] |= ALT_REF_MODE_MASK; break; 3372 case NONE: 3373 case MAX_REF_FRAMES: assert(0 && "Invalid Reference frame"); break; 3374 } 3375 } 3376 3377 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) && 3378 (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame)))) 3379 continue; 3380 3381 if (mode_skip_mask[ref_frame] & (1 << this_mode)) continue; 3382 3383 // Test best rd so far against threshold for trying this mode. 3384 if (best_mode_skippable && sf->schedule_mode_search) 3385 mode_threshold[mode_index] <<= 1; 3386 3387 if (best_rd < mode_threshold[mode_index]) continue; 3388 3389 // This is only used in motion vector unit test. 3390 if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue; 3391 3392 if (sf->motion_field_mode_search) { 3393 const int mi_width = VPXMIN(num_8x8_blocks_wide_lookup[bsize], 3394 tile_info->mi_col_end - mi_col); 3395 const int mi_height = VPXMIN(num_8x8_blocks_high_lookup[bsize], 3396 tile_info->mi_row_end - mi_row); 3397 const int bsl = mi_width_log2_lookup[bsize]; 3398 int cb_partition_search_ctrl = 3399 (((mi_row + mi_col) >> bsl) + 3400 get_chessboard_index(cm->current_video_frame)) & 3401 0x1; 3402 MODE_INFO *ref_mi; 3403 int const_motion = 1; 3404 int skip_ref_frame = !cb_partition_search_ctrl; 3405 MV_REFERENCE_FRAME rf = NONE; 3406 int_mv ref_mv; 3407 ref_mv.as_int = INVALID_MV; 3408 3409 if ((mi_row - 1) >= tile_info->mi_row_start) { 3410 ref_mv = xd->mi[-xd->mi_stride]->mv[0]; 3411 rf = xd->mi[-xd->mi_stride]->ref_frame[0]; 3412 for (i = 0; i < mi_width; ++i) { 3413 ref_mi = xd->mi[-xd->mi_stride + i]; 3414 const_motion &= (ref_mv.as_int == ref_mi->mv[0].as_int) && 3415 (ref_frame == ref_mi->ref_frame[0]); 3416 skip_ref_frame &= (rf == ref_mi->ref_frame[0]); 3417 } 3418 } 3419 3420 if ((mi_col - 1) >= tile_info->mi_col_start) { 3421 if (ref_mv.as_int == INVALID_MV) ref_mv = xd->mi[-1]->mv[0]; 3422 if (rf == NONE) rf = xd->mi[-1]->ref_frame[0]; 3423 for (i = 0; i < mi_height; ++i) { 3424 ref_mi = xd->mi[i * xd->mi_stride - 1]; 3425 const_motion &= (ref_mv.as_int == ref_mi->mv[0].as_int) && 3426 (ref_frame == ref_mi->ref_frame[0]); 3427 skip_ref_frame &= (rf == ref_mi->ref_frame[0]); 3428 } 3429 } 3430 3431 if (skip_ref_frame && this_mode != NEARESTMV && this_mode != NEWMV) 3432 if (rf > INTRA_FRAME) 3433 if (ref_frame != rf) continue; 3434 3435 if (const_motion) 3436 if (this_mode == NEARMV || this_mode == ZEROMV) continue; 3437 } 3438 3439 comp_pred = second_ref_frame > INTRA_FRAME; 3440 if (comp_pred) { 3441 if (!cpi->allow_comp_inter_inter) continue; 3442 3443 if (cm->ref_frame_sign_bias[ref_frame] == 3444 cm->ref_frame_sign_bias[second_ref_frame]) 3445 continue; 3446 3447 // Skip compound inter modes if ARF is not available. 3448 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue; 3449 3450 // Do not allow compound prediction if the segment level reference frame 3451 // feature is in use as in this case there can only be one reference. 3452 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue; 3453 3454 if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && 3455 best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME) 3456 continue; 3457 3458 mode_excluded = cm->reference_mode == SINGLE_REFERENCE; 3459 } else { 3460 if (ref_frame != INTRA_FRAME) 3461 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE; 3462 } 3463 3464 if (ref_frame == INTRA_FRAME) { 3465 if (sf->adaptive_mode_search) 3466 if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse) 3467 continue; 3468 3469 if (this_mode != DC_PRED) { 3470 // Disable intra modes other than DC_PRED for blocks with low variance 3471 // Threshold for intra skipping based on source variance 3472 // TODO(debargha): Specialize the threshold for super block sizes 3473 const unsigned int skip_intra_var_thresh = 64; 3474 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) && 3475 x->source_variance < skip_intra_var_thresh) 3476 continue; 3477 // Only search the oblique modes if the best so far is 3478 // one of the neighboring directional modes 3479 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && 3480 (this_mode >= D45_PRED && this_mode <= TM_PRED)) { 3481 if (best_mode_index >= 0 && best_mbmode.ref_frame[0] > INTRA_FRAME) 3482 continue; 3483 } 3484 if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { 3485 if (conditional_skipintra(this_mode, best_intra_mode)) continue; 3486 } 3487 } 3488 } else { 3489 const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, second_ref_frame }; 3490 if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv, this_mode, 3491 ref_frames)) 3492 continue; 3493 } 3494 3495 mi->mode = this_mode; 3496 mi->uv_mode = DC_PRED; 3497 mi->ref_frame[0] = ref_frame; 3498 mi->ref_frame[1] = second_ref_frame; 3499 // Evaluate all sub-pel filters irrespective of whether we can use 3500 // them for this frame. 3501 mi->interp_filter = 3502 cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter; 3503 mi->mv[0].as_int = mi->mv[1].as_int = 0; 3504 3505 x->skip = 0; 3506 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); 3507 3508 // Select prediction reference frames. 3509 for (i = 0; i < MAX_MB_PLANE; i++) { 3510 xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; 3511 if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; 3512 } 3513 3514 if (ref_frame == INTRA_FRAME) { 3515 TX_SIZE uv_tx; 3516 struct macroblockd_plane *const pd = &xd->plane[1]; 3517 memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); 3518 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, bsize, 3519 best_rd); 3520 if (rate_y == INT_MAX) continue; 3521 3522 uv_tx = uv_txsize_lookup[bsize][mi->tx_size][pd->subsampling_x] 3523 [pd->subsampling_y]; 3524 if (rate_uv_intra[uv_tx] == INT_MAX) { 3525 choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx], 3526 &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx], 3527 &skip_uv[uv_tx], &mode_uv[uv_tx]); 3528 } 3529 3530 rate_uv = rate_uv_tokenonly[uv_tx]; 3531 distortion_uv = dist_uv[uv_tx]; 3532 skippable = skippable && skip_uv[uv_tx]; 3533 mi->uv_mode = mode_uv[uv_tx]; 3534 3535 rate2 = rate_y + cpi->mbmode_cost[mi->mode] + rate_uv_intra[uv_tx]; 3536 if (this_mode != DC_PRED && this_mode != TM_PRED) 3537 rate2 += intra_cost_penalty; 3538 distortion2 = distortion_y + distortion_uv; 3539 } else { 3540 this_rd = handle_inter_mode( 3541 cpi, x, bsize, &rate2, &distortion2, &skippable, &rate_y, &rate_uv, 3542 &disable_skip, frame_mv, mi_row, mi_col, single_newmv, 3543 single_inter_filter, single_skippable, &total_sse, best_rd, 3544 &mask_filter, filter_cache); 3545 if (this_rd == INT64_MAX) continue; 3546 3547 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred); 3548 3549 if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost; 3550 } 3551 3552 // Estimate the reference frame signaling cost and add it 3553 // to the rolling cost variable. 3554 if (comp_pred) { 3555 rate2 += ref_costs_comp[ref_frame]; 3556 } else { 3557 rate2 += ref_costs_single[ref_frame]; 3558 } 3559 3560 if (!disable_skip) { 3561 const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd); 3562 const int skip_cost0 = vp9_cost_bit(skip_prob, 0); 3563 const int skip_cost1 = vp9_cost_bit(skip_prob, 1); 3564 3565 if (skippable) { 3566 // Back out the coefficient coding costs 3567 rate2 -= (rate_y + rate_uv); 3568 3569 // Cost the skip mb case 3570 rate2 += skip_cost1; 3571 } else if (ref_frame != INTRA_FRAME && !xd->lossless) { 3572 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + skip_cost0, 3573 distortion2) < 3574 RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) { 3575 // Add in the cost of the no skip flag. 3576 rate2 += skip_cost0; 3577 } else { 3578 // FIXME(rbultje) make this work for splitmv also 3579 assert(total_sse >= 0); 3580 3581 rate2 += skip_cost1; 3582 distortion2 = total_sse; 3583 rate2 -= (rate_y + rate_uv); 3584 this_skip2 = 1; 3585 } 3586 } else { 3587 // Add in the cost of the no skip flag. 3588 rate2 += skip_cost0; 3589 } 3590 3591 // Calculate the final RD estimate for this mode. 3592 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 3593 } 3594 3595 // Apply an adjustment to the rd value based on the similarity of the 3596 // source variance and reconstructed variance. 3597 rd_variance_adjustment(cpi, x, bsize, &this_rd, ref_frame, 3598 x->source_variance); 3599 3600 if (ref_frame == INTRA_FRAME) { 3601 // Keep record of best intra rd 3602 if (this_rd < best_intra_rd) { 3603 best_intra_rd = this_rd; 3604 best_intra_mode = mi->mode; 3605 } 3606 } 3607 3608 if (!disable_skip && ref_frame == INTRA_FRAME) { 3609 for (i = 0; i < REFERENCE_MODES; ++i) 3610 best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd); 3611 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 3612 best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd); 3613 } 3614 3615 // Did this mode help.. i.e. is it the new best mode 3616 if (this_rd < best_rd || x->skip) { 3617 int max_plane = MAX_MB_PLANE; 3618 if (!mode_excluded) { 3619 // Note index of best mode so far 3620 best_mode_index = mode_index; 3621 3622 if (ref_frame == INTRA_FRAME) { 3623 /* required for left and above block mv */ 3624 mi->mv[0].as_int = 0; 3625 max_plane = 1; 3626 // Initialize interp_filter here so we do not have to check for 3627 // inter block modes in get_pred_context_switchable_interp() 3628 mi->interp_filter = SWITCHABLE_FILTERS; 3629 } else { 3630 best_pred_sse = x->pred_sse[ref_frame]; 3631 } 3632 3633 rd_cost->rate = rate2; 3634 rd_cost->dist = distortion2; 3635 rd_cost->rdcost = this_rd; 3636 best_rd = this_rd; 3637 best_mbmode = *mi; 3638 best_skip2 = this_skip2; 3639 best_mode_skippable = skippable; 3640 3641 if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane); 3642 memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mi->tx_size], 3643 sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); 3644 ctx->sum_y_eobs = x->sum_y_eobs[mi->tx_size]; 3645 3646 // TODO(debargha): enhance this test with a better distortion prediction 3647 // based on qp, activity mask and history 3648 if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) && 3649 (mode_index > MIN_EARLY_TERM_INDEX)) { 3650 int qstep = xd->plane[0].dequant[1]; 3651 // TODO(debargha): Enhance this by specializing for each mode_index 3652 int scale = 4; 3653 #if CONFIG_VP9_HIGHBITDEPTH 3654 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 3655 qstep >>= (xd->bd - 8); 3656 } 3657 #endif // CONFIG_VP9_HIGHBITDEPTH 3658 if (x->source_variance < UINT_MAX) { 3659 const int var_adjust = (x->source_variance < 16); 3660 scale -= var_adjust; 3661 } 3662 if (ref_frame > INTRA_FRAME && distortion2 * scale < qstep * qstep) { 3663 early_term = 1; 3664 } 3665 } 3666 } 3667 } 3668 3669 /* keep record of best compound/single-only prediction */ 3670 if (!disable_skip && ref_frame != INTRA_FRAME) { 3671 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; 3672 3673 if (cm->reference_mode == REFERENCE_MODE_SELECT) { 3674 single_rate = rate2 - compmode_cost; 3675 hybrid_rate = rate2; 3676 } else { 3677 single_rate = rate2; 3678 hybrid_rate = rate2 + compmode_cost; 3679 } 3680 3681 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); 3682 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); 3683 3684 if (!comp_pred) { 3685 if (single_rd < best_pred_rd[SINGLE_REFERENCE]) 3686 best_pred_rd[SINGLE_REFERENCE] = single_rd; 3687 } else { 3688 if (single_rd < best_pred_rd[COMPOUND_REFERENCE]) 3689 best_pred_rd[COMPOUND_REFERENCE] = single_rd; 3690 } 3691 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT]) 3692 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd; 3693 3694 /* keep record of best filter type */ 3695 if (!mode_excluded && cm->interp_filter != BILINEAR) { 3696 int64_t ref = 3697 filter_cache[cm->interp_filter == SWITCHABLE ? SWITCHABLE_FILTERS 3698 : cm->interp_filter]; 3699 3700 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { 3701 int64_t adj_rd; 3702 if (ref == INT64_MAX) 3703 adj_rd = 0; 3704 else if (filter_cache[i] == INT64_MAX) 3705 // when early termination is triggered, the encoder does not have 3706 // access to the rate-distortion cost. it only knows that the cost 3707 // should be above the maximum valid value. hence it takes the known 3708 // maximum plus an arbitrary constant as the rate-distortion cost. 3709 adj_rd = mask_filter - ref + 10; 3710 else 3711 adj_rd = filter_cache[i] - ref; 3712 3713 adj_rd += this_rd; 3714 best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd); 3715 } 3716 } 3717 } 3718 3719 if (early_term) break; 3720 3721 if (x->skip && !comp_pred) break; 3722 } 3723 3724 // The inter modes' rate costs are not calculated precisely in some cases. 3725 // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and 3726 // ZEROMV. Here, checks are added for those cases, and the mode decisions 3727 // are corrected. 3728 if (best_mbmode.mode == NEWMV) { 3729 const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0], 3730 best_mbmode.ref_frame[1] }; 3731 int comp_pred_mode = refs[1] > INTRA_FRAME; 3732 3733 if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int && 3734 ((comp_pred_mode && 3735 frame_mv[NEARESTMV][refs[1]].as_int == best_mbmode.mv[1].as_int) || 3736 !comp_pred_mode)) 3737 best_mbmode.mode = NEARESTMV; 3738 else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int && 3739 ((comp_pred_mode && 3740 frame_mv[NEARMV][refs[1]].as_int == best_mbmode.mv[1].as_int) || 3741 !comp_pred_mode)) 3742 best_mbmode.mode = NEARMV; 3743 else if (best_mbmode.mv[0].as_int == 0 && 3744 ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) || 3745 !comp_pred_mode)) 3746 best_mbmode.mode = ZEROMV; 3747 } 3748 3749 if (best_mode_index < 0 || best_rd >= best_rd_so_far) { 3750 // If adaptive interp filter is enabled, then the current leaf node of 8x8 3751 // data is needed for sub8x8. Hence preserve the context. 3752 #if CONFIG_CONSISTENT_RECODE 3753 if (bsize == BLOCK_8X8) ctx->mic = *xd->mi[0]; 3754 #else 3755 if (cpi->row_mt && bsize == BLOCK_8X8) ctx->mic = *xd->mi[0]; 3756 #endif 3757 rd_cost->rate = INT_MAX; 3758 rd_cost->rdcost = INT64_MAX; 3759 return; 3760 } 3761 3762 // If we used an estimate for the uv intra rd in the loop above... 3763 if (sf->use_uv_intra_rd_estimate) { 3764 // Do Intra UV best rd mode selection if best mode choice above was intra. 3765 if (best_mbmode.ref_frame[0] == INTRA_FRAME) { 3766 TX_SIZE uv_tx_size; 3767 *mi = best_mbmode; 3768 uv_tx_size = get_uv_tx_size(mi, &xd->plane[1]); 3769 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size], 3770 &rate_uv_tokenonly[uv_tx_size], 3771 &dist_uv[uv_tx_size], &skip_uv[uv_tx_size], 3772 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, 3773 uv_tx_size); 3774 } 3775 } 3776 3777 assert((cm->interp_filter == SWITCHABLE) || 3778 (cm->interp_filter == best_mbmode.interp_filter) || 3779 !is_inter_block(&best_mbmode)); 3780 3781 if (!cpi->rc.is_src_frame_alt_ref) 3782 vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, 3783 sf->adaptive_rd_thresh, bsize, best_mode_index); 3784 3785 // macroblock modes 3786 *mi = best_mbmode; 3787 x->skip |= best_skip2; 3788 3789 for (i = 0; i < REFERENCE_MODES; ++i) { 3790 if (best_pred_rd[i] == INT64_MAX) 3791 best_pred_diff[i] = INT_MIN; 3792 else 3793 best_pred_diff[i] = best_rd - best_pred_rd[i]; 3794 } 3795 3796 if (!x->skip) { 3797 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { 3798 if (best_filter_rd[i] == INT64_MAX) 3799 best_filter_diff[i] = 0; 3800 else 3801 best_filter_diff[i] = best_rd - best_filter_rd[i]; 3802 } 3803 if (cm->interp_filter == SWITCHABLE) 3804 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); 3805 } else { 3806 vp9_zero(best_filter_diff); 3807 } 3808 3809 // TODO(yunqingwang): Moving this line in front of the above best_filter_diff 3810 // updating code causes PSNR loss. Need to figure out the confliction. 3811 x->skip |= best_mode_skippable; 3812 3813 if (!x->skip && !x->select_tx_size) { 3814 int has_high_freq_coeff = 0; 3815 int plane; 3816 int max_plane = is_inter_block(xd->mi[0]) ? MAX_MB_PLANE : 1; 3817 for (plane = 0; plane < max_plane; ++plane) { 3818 x->plane[plane].eobs = ctx->eobs_pbuf[plane][1]; 3819 has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane); 3820 } 3821 3822 for (plane = max_plane; plane < MAX_MB_PLANE; ++plane) { 3823 x->plane[plane].eobs = ctx->eobs_pbuf[plane][2]; 3824 has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane); 3825 } 3826 3827 best_mode_skippable |= !has_high_freq_coeff; 3828 } 3829 3830 assert(best_mode_index >= 0); 3831 3832 store_coding_context(x, ctx, best_mode_index, best_pred_diff, 3833 best_filter_diff, best_mode_skippable); 3834 } 3835 3836 void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, TileDataEnc *tile_data, 3837 MACROBLOCK *x, RD_COST *rd_cost, 3838 BLOCK_SIZE bsize, 3839 PICK_MODE_CONTEXT *ctx, 3840 int64_t best_rd_so_far) { 3841 VP9_COMMON *const cm = &cpi->common; 3842 MACROBLOCKD *const xd = &x->e_mbd; 3843 MODE_INFO *const mi = xd->mi[0]; 3844 unsigned char segment_id = mi->segment_id; 3845 const int comp_pred = 0; 3846 int i; 3847 int64_t best_pred_diff[REFERENCE_MODES]; 3848 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; 3849 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; 3850 vpx_prob comp_mode_p; 3851 INTERP_FILTER best_filter = SWITCHABLE; 3852 int64_t this_rd = INT64_MAX; 3853 int rate2 = 0; 3854 const int64_t distortion2 = 0; 3855 3856 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; 3857 3858 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, 3859 &comp_mode_p); 3860 3861 for (i = 0; i < MAX_REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX; 3862 for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX; 3863 3864 rd_cost->rate = INT_MAX; 3865 3866 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)); 3867 3868 mi->mode = ZEROMV; 3869 mi->uv_mode = DC_PRED; 3870 mi->ref_frame[0] = LAST_FRAME; 3871 mi->ref_frame[1] = NONE; 3872 mi->mv[0].as_int = 0; 3873 x->skip = 1; 3874 3875 ctx->sum_y_eobs = 0; 3876 3877 if (cm->interp_filter != BILINEAR) { 3878 best_filter = EIGHTTAP; 3879 if (cm->interp_filter == SWITCHABLE && 3880 x->source_variance >= cpi->sf.disable_filter_search_var_thresh) { 3881 int rs; 3882 int best_rs = INT_MAX; 3883 for (i = 0; i < SWITCHABLE_FILTERS; ++i) { 3884 mi->interp_filter = i; 3885 rs = vp9_get_switchable_rate(cpi, xd); 3886 if (rs < best_rs) { 3887 best_rs = rs; 3888 best_filter = mi->interp_filter; 3889 } 3890 } 3891 } 3892 } 3893 // Set the appropriate filter 3894 if (cm->interp_filter == SWITCHABLE) { 3895 mi->interp_filter = best_filter; 3896 rate2 += vp9_get_switchable_rate(cpi, xd); 3897 } else { 3898 mi->interp_filter = cm->interp_filter; 3899 } 3900 3901 if (cm->reference_mode == REFERENCE_MODE_SELECT) 3902 rate2 += vp9_cost_bit(comp_mode_p, comp_pred); 3903 3904 // Estimate the reference frame signaling cost and add it 3905 // to the rolling cost variable. 3906 rate2 += ref_costs_single[LAST_FRAME]; 3907 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 3908 3909 rd_cost->rate = rate2; 3910 rd_cost->dist = distortion2; 3911 rd_cost->rdcost = this_rd; 3912 3913 if (this_rd >= best_rd_so_far) { 3914 rd_cost->rate = INT_MAX; 3915 rd_cost->rdcost = INT64_MAX; 3916 return; 3917 } 3918 3919 assert((cm->interp_filter == SWITCHABLE) || 3920 (cm->interp_filter == mi->interp_filter)); 3921 3922 vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, 3923 cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV); 3924 3925 vp9_zero(best_pred_diff); 3926 vp9_zero(best_filter_diff); 3927 3928 if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE); 3929 store_coding_context(x, ctx, THR_ZEROMV, best_pred_diff, best_filter_diff, 0); 3930 } 3931 3932 void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data, 3933 MACROBLOCK *x, int mi_row, int mi_col, 3934 RD_COST *rd_cost, BLOCK_SIZE bsize, 3935 PICK_MODE_CONTEXT *ctx, 3936 int64_t best_rd_so_far) { 3937 VP9_COMMON *const cm = &cpi->common; 3938 RD_OPT *const rd_opt = &cpi->rd; 3939 SPEED_FEATURES *const sf = &cpi->sf; 3940 MACROBLOCKD *const xd = &x->e_mbd; 3941 MODE_INFO *const mi = xd->mi[0]; 3942 const struct segmentation *const seg = &cm->seg; 3943 MV_REFERENCE_FRAME ref_frame, second_ref_frame; 3944 unsigned char segment_id = mi->segment_id; 3945 int comp_pred, i; 3946 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; 3947 struct buf_2d yv12_mb[4][MAX_MB_PLANE]; 3948 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, 3949 VP9_ALT_FLAG }; 3950 int64_t best_rd = best_rd_so_far; 3951 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise 3952 int64_t best_pred_diff[REFERENCE_MODES]; 3953 int64_t best_pred_rd[REFERENCE_MODES]; 3954 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; 3955 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; 3956 MODE_INFO best_mbmode; 3957 int ref_index, best_ref_index = 0; 3958 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; 3959 vpx_prob comp_mode_p; 3960 INTERP_FILTER tmp_best_filter = SWITCHABLE; 3961 int rate_uv_intra, rate_uv_tokenonly; 3962 int64_t dist_uv; 3963 int skip_uv; 3964 PREDICTION_MODE mode_uv = DC_PRED; 3965 const int intra_cost_penalty = 3966 vp9_get_intra_cost_penalty(cpi, bsize, cm->base_qindex, cm->y_dc_delta_q); 3967 int_mv seg_mvs[4][MAX_REF_FRAMES]; 3968 b_mode_info best_bmodes[4]; 3969 int best_skip2 = 0; 3970 int ref_frame_skip_mask[2] = { 0 }; 3971 int64_t mask_filter = 0; 3972 int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS]; 3973 int internal_active_edge = 3974 vp9_active_edge_sb(cpi, mi_row, mi_col) && vp9_internal_image_edge(cpi); 3975 const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize]; 3976 3977 x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; 3978 memset(x->zcoeff_blk[TX_4X4], 0, 4); 3979 vp9_zero(best_mbmode); 3980 3981 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX; 3982 3983 for (i = 0; i < 4; i++) { 3984 int j; 3985 for (j = 0; j < MAX_REF_FRAMES; j++) seg_mvs[i][j].as_int = INVALID_MV; 3986 } 3987 3988 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, 3989 &comp_mode_p); 3990 3991 for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX; 3992 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 3993 best_filter_rd[i] = INT64_MAX; 3994 rate_uv_intra = INT_MAX; 3995 3996 rd_cost->rate = INT_MAX; 3997 3998 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { 3999 if (cpi->ref_frame_flags & flag_list[ref_frame]) { 4000 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col, 4001 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); 4002 } else { 4003 ref_frame_skip_mask[0] |= (1 << ref_frame); 4004 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; 4005 } 4006 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; 4007 frame_mv[ZEROMV][ref_frame].as_int = 0; 4008 } 4009 4010 for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) { 4011 int mode_excluded = 0; 4012 int64_t this_rd = INT64_MAX; 4013 int disable_skip = 0; 4014 int compmode_cost = 0; 4015 int rate2 = 0, rate_y = 0, rate_uv = 0; 4016 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; 4017 int skippable = 0; 4018 int i; 4019 int this_skip2 = 0; 4020 int64_t total_sse = INT_MAX; 4021 int early_term = 0; 4022 struct buf_2d backup_yv12[2][MAX_MB_PLANE]; 4023 4024 ref_frame = vp9_ref_order[ref_index].ref_frame[0]; 4025 second_ref_frame = vp9_ref_order[ref_index].ref_frame[1]; 4026 4027 vp9_zero(x->sum_y_eobs); 4028 4029 #if CONFIG_BETTER_HW_COMPATIBILITY 4030 // forbid 8X4 and 4X8 partitions if any reference frame is scaled. 4031 if (bsize == BLOCK_8X4 || bsize == BLOCK_4X8) { 4032 int ref_scaled = ref_frame > INTRA_FRAME && 4033 vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf); 4034 if (second_ref_frame > INTRA_FRAME) 4035 ref_scaled += vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf); 4036 if (ref_scaled) continue; 4037 } 4038 #endif 4039 // Look at the reference frame of the best mode so far and set the 4040 // skip mask to look at a subset of the remaining modes. 4041 if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) { 4042 if (ref_index == 3) { 4043 switch (best_mbmode.ref_frame[0]) { 4044 case INTRA_FRAME: break; 4045 case LAST_FRAME: 4046 ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME); 4047 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; 4048 break; 4049 case GOLDEN_FRAME: 4050 ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << ALTREF_FRAME); 4051 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; 4052 break; 4053 case ALTREF_FRAME: 4054 ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << LAST_FRAME); 4055 break; 4056 case NONE: 4057 case MAX_REF_FRAMES: assert(0 && "Invalid Reference frame"); break; 4058 } 4059 } 4060 } 4061 4062 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) && 4063 (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame)))) 4064 continue; 4065 4066 // Test best rd so far against threshold for trying this mode. 4067 if (!internal_active_edge && 4068 rd_less_than_thresh(best_rd, 4069 rd_opt->threshes[segment_id][bsize][ref_index], 4070 &rd_thresh_freq_fact[ref_index])) 4071 continue; 4072 4073 // This is only used in motion vector unit test. 4074 if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue; 4075 4076 comp_pred = second_ref_frame > INTRA_FRAME; 4077 if (comp_pred) { 4078 if (!cpi->allow_comp_inter_inter) continue; 4079 4080 if (cm->ref_frame_sign_bias[ref_frame] == 4081 cm->ref_frame_sign_bias[second_ref_frame]) 4082 continue; 4083 4084 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue; 4085 // Do not allow compound prediction if the segment level reference frame 4086 // feature is in use as in this case there can only be one reference. 4087 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue; 4088 4089 if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && 4090 best_mbmode.ref_frame[0] == INTRA_FRAME) 4091 continue; 4092 } 4093 4094 if (comp_pred) 4095 mode_excluded = cm->reference_mode == SINGLE_REFERENCE; 4096 else if (ref_frame != INTRA_FRAME) 4097 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE; 4098 4099 // If the segment reference frame feature is enabled.... 4100 // then do nothing if the current ref frame is not allowed.. 4101 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && 4102 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { 4103 continue; 4104 // Disable this drop out case if the ref frame 4105 // segment level feature is enabled for this segment. This is to 4106 // prevent the possibility that we end up unable to pick any mode. 4107 } else if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) { 4108 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, 4109 // unless ARNR filtering is enabled in which case we want 4110 // an unfiltered alternative. We allow near/nearest as well 4111 // because they may result in zero-zero MVs but be cheaper. 4112 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) 4113 continue; 4114 } 4115 4116 mi->tx_size = TX_4X4; 4117 mi->uv_mode = DC_PRED; 4118 mi->ref_frame[0] = ref_frame; 4119 mi->ref_frame[1] = second_ref_frame; 4120 // Evaluate all sub-pel filters irrespective of whether we can use 4121 // them for this frame. 4122 mi->interp_filter = 4123 cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter; 4124 x->skip = 0; 4125 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); 4126 4127 // Select prediction reference frames. 4128 for (i = 0; i < MAX_MB_PLANE; i++) { 4129 xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; 4130 if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; 4131 } 4132 4133 if (ref_frame == INTRA_FRAME) { 4134 int rate; 4135 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, &distortion_y, 4136 best_rd) >= best_rd) 4137 continue; 4138 rate2 += rate; 4139 rate2 += intra_cost_penalty; 4140 distortion2 += distortion_y; 4141 4142 if (rate_uv_intra == INT_MAX) { 4143 choose_intra_uv_mode(cpi, x, ctx, bsize, TX_4X4, &rate_uv_intra, 4144 &rate_uv_tokenonly, &dist_uv, &skip_uv, &mode_uv); 4145 } 4146 rate2 += rate_uv_intra; 4147 rate_uv = rate_uv_tokenonly; 4148 distortion2 += dist_uv; 4149 distortion_uv = dist_uv; 4150 mi->uv_mode = mode_uv; 4151 } else { 4152 int rate; 4153 int64_t distortion; 4154 int64_t this_rd_thresh; 4155 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX; 4156 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX; 4157 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse; 4158 int tmp_best_skippable = 0; 4159 int switchable_filter_index; 4160 int_mv *second_ref = 4161 comp_pred ? &x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL; 4162 b_mode_info tmp_best_bmodes[16]; 4163 MODE_INFO tmp_best_mbmode; 4164 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS]; 4165 int pred_exists = 0; 4166 int uv_skippable; 4167 4168 YV12_BUFFER_CONFIG *scaled_ref_frame[2] = { NULL, NULL }; 4169 int ref; 4170 4171 for (ref = 0; ref < 2; ++ref) { 4172 scaled_ref_frame[ref] = 4173 mi->ref_frame[ref] > INTRA_FRAME 4174 ? vp9_get_scaled_ref_frame(cpi, mi->ref_frame[ref]) 4175 : NULL; 4176 4177 if (scaled_ref_frame[ref]) { 4178 int i; 4179 // Swap out the reference frame for a version that's been scaled to 4180 // match the resolution of the current frame, allowing the existing 4181 // motion search code to be used without additional modifications. 4182 for (i = 0; i < MAX_MB_PLANE; i++) 4183 backup_yv12[ref][i] = xd->plane[i].pre[ref]; 4184 vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, 4185 NULL); 4186 } 4187 } 4188 4189 this_rd_thresh = (ref_frame == LAST_FRAME) 4190 ? rd_opt->threshes[segment_id][bsize][THR_LAST] 4191 : rd_opt->threshes[segment_id][bsize][THR_ALTR]; 4192 this_rd_thresh = (ref_frame == GOLDEN_FRAME) 4193 ? rd_opt->threshes[segment_id][bsize][THR_GOLD] 4194 : this_rd_thresh; 4195 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) 4196 filter_cache[i] = INT64_MAX; 4197 4198 if (cm->interp_filter != BILINEAR) { 4199 tmp_best_filter = EIGHTTAP; 4200 if (x->source_variance < sf->disable_filter_search_var_thresh) { 4201 tmp_best_filter = EIGHTTAP; 4202 } else if (sf->adaptive_pred_interp_filter == 1 && 4203 ctx->pred_interp_filter < SWITCHABLE) { 4204 tmp_best_filter = ctx->pred_interp_filter; 4205 } else if (sf->adaptive_pred_interp_filter == 2) { 4206 tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE 4207 ? ctx->pred_interp_filter 4208 : 0; 4209 } else { 4210 for (switchable_filter_index = 0; 4211 switchable_filter_index < SWITCHABLE_FILTERS; 4212 ++switchable_filter_index) { 4213 int newbest, rs; 4214 int64_t rs_rd; 4215 MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext; 4216 mi->interp_filter = switchable_filter_index; 4217 tmp_rd = rd_pick_best_sub8x8_mode( 4218 cpi, x, &mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd, 4219 &rate, &rate_y, &distortion, &skippable, &total_sse, 4220 (int)this_rd_thresh, seg_mvs, bsi, switchable_filter_index, 4221 mi_row, mi_col); 4222 4223 if (tmp_rd == INT64_MAX) continue; 4224 rs = vp9_get_switchable_rate(cpi, xd); 4225 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); 4226 filter_cache[switchable_filter_index] = tmp_rd; 4227 filter_cache[SWITCHABLE_FILTERS] = 4228 VPXMIN(filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd); 4229 if (cm->interp_filter == SWITCHABLE) tmp_rd += rs_rd; 4230 4231 mask_filter = VPXMAX(mask_filter, tmp_rd); 4232 4233 newbest = (tmp_rd < tmp_best_rd); 4234 if (newbest) { 4235 tmp_best_filter = mi->interp_filter; 4236 tmp_best_rd = tmp_rd; 4237 } 4238 if ((newbest && cm->interp_filter == SWITCHABLE) || 4239 (mi->interp_filter == cm->interp_filter && 4240 cm->interp_filter != SWITCHABLE)) { 4241 tmp_best_rdu = tmp_rd; 4242 tmp_best_rate = rate; 4243 tmp_best_ratey = rate_y; 4244 tmp_best_distortion = distortion; 4245 tmp_best_sse = total_sse; 4246 tmp_best_skippable = skippable; 4247 tmp_best_mbmode = *mi; 4248 for (i = 0; i < 4; i++) { 4249 tmp_best_bmodes[i] = xd->mi[0]->bmi[i]; 4250 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i]; 4251 x->sum_y_eobs[TX_4X4] += x->plane[0].eobs[i]; 4252 } 4253 pred_exists = 1; 4254 if (switchable_filter_index == 0 && sf->use_rd_breakout && 4255 best_rd < INT64_MAX) { 4256 if (tmp_best_rdu / 2 > best_rd) { 4257 // skip searching the other filters if the first is 4258 // already substantially larger than the best so far 4259 tmp_best_filter = mi->interp_filter; 4260 tmp_best_rdu = INT64_MAX; 4261 break; 4262 } 4263 } 4264 } 4265 } // switchable_filter_index loop 4266 } 4267 } 4268 4269 if (tmp_best_rdu == INT64_MAX && pred_exists) continue; 4270 4271 mi->interp_filter = (cm->interp_filter == SWITCHABLE ? tmp_best_filter 4272 : cm->interp_filter); 4273 if (!pred_exists) { 4274 // Handles the special case when a filter that is not in the 4275 // switchable list (bilinear, 6-tap) is indicated at the frame level 4276 tmp_rd = rd_pick_best_sub8x8_mode( 4277 cpi, x, &x->mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd, 4278 &rate, &rate_y, &distortion, &skippable, &total_sse, 4279 (int)this_rd_thresh, seg_mvs, bsi, 0, mi_row, mi_col); 4280 if (tmp_rd == INT64_MAX) continue; 4281 } else { 4282 total_sse = tmp_best_sse; 4283 rate = tmp_best_rate; 4284 rate_y = tmp_best_ratey; 4285 distortion = tmp_best_distortion; 4286 skippable = tmp_best_skippable; 4287 *mi = tmp_best_mbmode; 4288 for (i = 0; i < 4; i++) xd->mi[0]->bmi[i] = tmp_best_bmodes[i]; 4289 } 4290 4291 rate2 += rate; 4292 distortion2 += distortion; 4293 4294 if (cm->interp_filter == SWITCHABLE) 4295 rate2 += vp9_get_switchable_rate(cpi, xd); 4296 4297 if (!mode_excluded) 4298 mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE 4299 : cm->reference_mode == COMPOUND_REFERENCE; 4300 4301 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred); 4302 4303 tmp_best_rdu = 4304 best_rd - VPXMIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2), 4305 RDCOST(x->rdmult, x->rddiv, 0, total_sse)); 4306 4307 if (tmp_best_rdu > 0) { 4308 // If even the 'Y' rd value of split is higher than best so far 4309 // then dont bother looking at UV 4310 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, BLOCK_8X8); 4311 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); 4312 if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable, 4313 &uv_sse, BLOCK_8X8, tmp_best_rdu)) { 4314 for (ref = 0; ref < 2; ++ref) { 4315 if (scaled_ref_frame[ref]) { 4316 int i; 4317 for (i = 0; i < MAX_MB_PLANE; ++i) 4318 xd->plane[i].pre[ref] = backup_yv12[ref][i]; 4319 } 4320 } 4321 continue; 4322 } 4323 4324 rate2 += rate_uv; 4325 distortion2 += distortion_uv; 4326 skippable = skippable && uv_skippable; 4327 total_sse += uv_sse; 4328 } 4329 4330 for (ref = 0; ref < 2; ++ref) { 4331 if (scaled_ref_frame[ref]) { 4332 // Restore the prediction frame pointers to their unscaled versions. 4333 int i; 4334 for (i = 0; i < MAX_MB_PLANE; ++i) 4335 xd->plane[i].pre[ref] = backup_yv12[ref][i]; 4336 } 4337 } 4338 } 4339 4340 if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost; 4341 4342 // Estimate the reference frame signaling cost and add it 4343 // to the rolling cost variable. 4344 if (second_ref_frame > INTRA_FRAME) { 4345 rate2 += ref_costs_comp[ref_frame]; 4346 } else { 4347 rate2 += ref_costs_single[ref_frame]; 4348 } 4349 4350 if (!disable_skip) { 4351 const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd); 4352 const int skip_cost0 = vp9_cost_bit(skip_prob, 0); 4353 const int skip_cost1 = vp9_cost_bit(skip_prob, 1); 4354 4355 // Skip is never coded at the segment level for sub8x8 blocks and instead 4356 // always coded in the bitstream at the mode info level. 4357 if (ref_frame != INTRA_FRAME && !xd->lossless) { 4358 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + skip_cost0, 4359 distortion2) < 4360 RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) { 4361 // Add in the cost of the no skip flag. 4362 rate2 += skip_cost0; 4363 } else { 4364 // FIXME(rbultje) make this work for splitmv also 4365 rate2 += skip_cost1; 4366 distortion2 = total_sse; 4367 assert(total_sse >= 0); 4368 rate2 -= (rate_y + rate_uv); 4369 rate_y = 0; 4370 rate_uv = 0; 4371 this_skip2 = 1; 4372 } 4373 } else { 4374 // Add in the cost of the no skip flag. 4375 rate2 += skip_cost0; 4376 } 4377 4378 // Calculate the final RD estimate for this mode. 4379 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 4380 } 4381 4382 if (!disable_skip && ref_frame == INTRA_FRAME) { 4383 for (i = 0; i < REFERENCE_MODES; ++i) 4384 best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd); 4385 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 4386 best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd); 4387 } 4388 4389 // Did this mode help.. i.e. is it the new best mode 4390 if (this_rd < best_rd || x->skip) { 4391 if (!mode_excluded) { 4392 int max_plane = MAX_MB_PLANE; 4393 // Note index of best mode so far 4394 best_ref_index = ref_index; 4395 4396 if (ref_frame == INTRA_FRAME) { 4397 /* required for left and above block mv */ 4398 mi->mv[0].as_int = 0; 4399 max_plane = 1; 4400 // Initialize interp_filter here so we do not have to check for 4401 // inter block modes in get_pred_context_switchable_interp() 4402 mi->interp_filter = SWITCHABLE_FILTERS; 4403 } 4404 4405 rd_cost->rate = rate2; 4406 rd_cost->dist = distortion2; 4407 rd_cost->rdcost = this_rd; 4408 best_rd = this_rd; 4409 best_yrd = 4410 best_rd - RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv); 4411 best_mbmode = *mi; 4412 best_skip2 = this_skip2; 4413 if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane); 4414 memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4], 4415 sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); 4416 ctx->sum_y_eobs = x->sum_y_eobs[TX_4X4]; 4417 4418 for (i = 0; i < 4; i++) best_bmodes[i] = xd->mi[0]->bmi[i]; 4419 4420 // TODO(debargha): enhance this test with a better distortion prediction 4421 // based on qp, activity mask and history 4422 if ((sf->mode_search_skip_flags & FLAG_EARLY_TERMINATE) && 4423 (ref_index > MIN_EARLY_TERM_INDEX)) { 4424 int qstep = xd->plane[0].dequant[1]; 4425 // TODO(debargha): Enhance this by specializing for each mode_index 4426 int scale = 4; 4427 #if CONFIG_VP9_HIGHBITDEPTH 4428 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 4429 qstep >>= (xd->bd - 8); 4430 } 4431 #endif // CONFIG_VP9_HIGHBITDEPTH 4432 if (x->source_variance < UINT_MAX) { 4433 const int var_adjust = (x->source_variance < 16); 4434 scale -= var_adjust; 4435 } 4436 if (ref_frame > INTRA_FRAME && distortion2 * scale < qstep * qstep) { 4437 early_term = 1; 4438 } 4439 } 4440 } 4441 } 4442 4443 /* keep record of best compound/single-only prediction */ 4444 if (!disable_skip && ref_frame != INTRA_FRAME) { 4445 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; 4446 4447 if (cm->reference_mode == REFERENCE_MODE_SELECT) { 4448 single_rate = rate2 - compmode_cost; 4449 hybrid_rate = rate2; 4450 } else { 4451 single_rate = rate2; 4452 hybrid_rate = rate2 + compmode_cost; 4453 } 4454 4455 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); 4456 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); 4457 4458 if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE]) 4459 best_pred_rd[SINGLE_REFERENCE] = single_rd; 4460 else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE]) 4461 best_pred_rd[COMPOUND_REFERENCE] = single_rd; 4462 4463 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT]) 4464 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd; 4465 } 4466 4467 /* keep record of best filter type */ 4468 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME && 4469 cm->interp_filter != BILINEAR) { 4470 int64_t ref = 4471 filter_cache[cm->interp_filter == SWITCHABLE ? SWITCHABLE_FILTERS 4472 : cm->interp_filter]; 4473 int64_t adj_rd; 4474 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { 4475 if (ref == INT64_MAX) 4476 adj_rd = 0; 4477 else if (filter_cache[i] == INT64_MAX) 4478 // when early termination is triggered, the encoder does not have 4479 // access to the rate-distortion cost. it only knows that the cost 4480 // should be above the maximum valid value. hence it takes the known 4481 // maximum plus an arbitrary constant as the rate-distortion cost. 4482 adj_rd = mask_filter - ref + 10; 4483 else 4484 adj_rd = filter_cache[i] - ref; 4485 4486 adj_rd += this_rd; 4487 best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd); 4488 } 4489 } 4490 4491 if (early_term) break; 4492 4493 if (x->skip && !comp_pred) break; 4494 } 4495 4496 if (best_rd >= best_rd_so_far) { 4497 rd_cost->rate = INT_MAX; 4498 rd_cost->rdcost = INT64_MAX; 4499 return; 4500 } 4501 4502 // If we used an estimate for the uv intra rd in the loop above... 4503 if (sf->use_uv_intra_rd_estimate) { 4504 // Do Intra UV best rd mode selection if best mode choice above was intra. 4505 if (best_mbmode.ref_frame[0] == INTRA_FRAME) { 4506 *mi = best_mbmode; 4507 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra, &rate_uv_tokenonly, 4508 &dist_uv, &skip_uv, BLOCK_8X8, TX_4X4); 4509 } 4510 } 4511 4512 if (best_rd == INT64_MAX) { 4513 rd_cost->rate = INT_MAX; 4514 rd_cost->dist = INT64_MAX; 4515 rd_cost->rdcost = INT64_MAX; 4516 return; 4517 } 4518 4519 assert((cm->interp_filter == SWITCHABLE) || 4520 (cm->interp_filter == best_mbmode.interp_filter) || 4521 !is_inter_block(&best_mbmode)); 4522 4523 vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, sf->adaptive_rd_thresh, 4524 bsize, best_ref_index); 4525 4526 // macroblock modes 4527 *mi = best_mbmode; 4528 x->skip |= best_skip2; 4529 if (!is_inter_block(&best_mbmode)) { 4530 for (i = 0; i < 4; i++) xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode; 4531 } else { 4532 for (i = 0; i < 4; ++i) 4533 memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info)); 4534 4535 mi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int; 4536 mi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int; 4537 } 4538 4539 for (i = 0; i < REFERENCE_MODES; ++i) { 4540 if (best_pred_rd[i] == INT64_MAX) 4541 best_pred_diff[i] = INT_MIN; 4542 else 4543 best_pred_diff[i] = best_rd - best_pred_rd[i]; 4544 } 4545 4546 if (!x->skip) { 4547 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { 4548 if (best_filter_rd[i] == INT64_MAX) 4549 best_filter_diff[i] = 0; 4550 else 4551 best_filter_diff[i] = best_rd - best_filter_rd[i]; 4552 } 4553 if (cm->interp_filter == SWITCHABLE) 4554 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); 4555 } else { 4556 vp9_zero(best_filter_diff); 4557 } 4558 4559 store_coding_context(x, ctx, best_ref_index, best_pred_diff, best_filter_diff, 4560 0); 4561 } 4562