1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <stdio.h> 12 #include <math.h> 13 #include <limits.h> 14 #include <assert.h> 15 16 #include "vp9/common/vp9_pragmas.h" 17 #include "vp9/encoder/vp9_tokenize.h" 18 #include "vp9/encoder/vp9_treewriter.h" 19 #include "vp9/encoder/vp9_onyx_int.h" 20 #include "vp9/encoder/vp9_modecosts.h" 21 #include "vp9/encoder/vp9_encodeintra.h" 22 #include "vp9/common/vp9_entropymode.h" 23 #include "vp9/common/vp9_reconinter.h" 24 #include "vp9/common/vp9_reconintra.h" 25 #include "vp9/common/vp9_findnearmv.h" 26 #include "vp9/common/vp9_quant_common.h" 27 #include "vp9/encoder/vp9_encodemb.h" 28 #include "vp9/encoder/vp9_quantize.h" 29 #include "vp9/encoder/vp9_variance.h" 30 #include "vp9/encoder/vp9_mcomp.h" 31 #include "vp9/encoder/vp9_rdopt.h" 32 #include "vp9/encoder/vp9_ratectrl.h" 33 #include "vpx_mem/vpx_mem.h" 34 #include "vp9/common/vp9_systemdependent.h" 35 #include "vp9/encoder/vp9_encodemv.h" 36 #include "vp9/common/vp9_seg_common.h" 37 #include "vp9/common/vp9_pred_common.h" 38 #include "vp9/common/vp9_entropy.h" 39 #include "vp9_rtcd.h" 40 #include "vp9/common/vp9_mvref_common.h" 41 #include "vp9/common/vp9_common.h" 42 43 #define INVALID_MV 0x80008000 44 45 /* Factor to weigh the rate for switchable interp filters */ 46 #define SWITCHABLE_INTERP_RATE_FACTOR 1 47 48 DECLARE_ALIGNED(16, extern const uint8_t, 49 vp9_pt_energy_class[MAX_ENTROPY_TOKENS]); 50 51 #define LAST_FRAME_MODE_MASK 0xFFDADCD60 52 #define GOLDEN_FRAME_MODE_MASK 0xFFB5A3BB0 53 #define ALT_REF_MODE_MASK 0xFF8C648D0 54 55 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { 56 {RD_NEARESTMV, LAST_FRAME, NONE}, 57 {RD_NEARESTMV, ALTREF_FRAME, NONE}, 58 {RD_NEARESTMV, GOLDEN_FRAME, NONE}, 59 60 {RD_DC_PRED, INTRA_FRAME, NONE}, 61 62 {RD_NEWMV, LAST_FRAME, NONE}, 63 {RD_NEWMV, ALTREF_FRAME, NONE}, 64 {RD_NEWMV, GOLDEN_FRAME, NONE}, 65 66 {RD_NEARMV, LAST_FRAME, NONE}, 67 {RD_NEARMV, ALTREF_FRAME, NONE}, 68 {RD_NEARESTMV, LAST_FRAME, ALTREF_FRAME}, 69 {RD_NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME}, 70 71 {RD_TM_PRED, INTRA_FRAME, NONE}, 72 73 {RD_NEARMV, LAST_FRAME, ALTREF_FRAME}, 74 {RD_NEWMV, LAST_FRAME, ALTREF_FRAME}, 75 {RD_NEARMV, GOLDEN_FRAME, NONE}, 76 {RD_NEARMV, GOLDEN_FRAME, ALTREF_FRAME}, 77 {RD_NEWMV, GOLDEN_FRAME, ALTREF_FRAME}, 78 79 {RD_SPLITMV, LAST_FRAME, NONE}, 80 {RD_SPLITMV, GOLDEN_FRAME, NONE}, 81 {RD_SPLITMV, ALTREF_FRAME, NONE}, 82 {RD_SPLITMV, LAST_FRAME, ALTREF_FRAME}, 83 {RD_SPLITMV, GOLDEN_FRAME, ALTREF_FRAME}, 84 85 {RD_ZEROMV, LAST_FRAME, NONE}, 86 {RD_ZEROMV, GOLDEN_FRAME, NONE}, 87 {RD_ZEROMV, ALTREF_FRAME, NONE}, 88 {RD_ZEROMV, LAST_FRAME, ALTREF_FRAME}, 89 {RD_ZEROMV, GOLDEN_FRAME, ALTREF_FRAME}, 90 91 {RD_I4X4_PRED, INTRA_FRAME, NONE}, 92 {RD_H_PRED, INTRA_FRAME, NONE}, 93 {RD_V_PRED, INTRA_FRAME, NONE}, 94 {RD_D135_PRED, INTRA_FRAME, NONE}, 95 {RD_D207_PRED, INTRA_FRAME, NONE}, 96 {RD_D153_PRED, INTRA_FRAME, NONE}, 97 {RD_D63_PRED, INTRA_FRAME, NONE}, 98 {RD_D117_PRED, INTRA_FRAME, NONE}, 99 {RD_D45_PRED, INTRA_FRAME, NONE}, 100 }; 101 102 // The baseline rd thresholds for breaking out of the rd loop for 103 // certain modes are assumed to be based on 8x8 blocks. 104 // This table is used to correct for blocks size. 105 // The factors here are << 2 (2 = x0.5, 32 = x8 etc). 106 static int rd_thresh_block_size_factor[BLOCK_SIZES] = 107 {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32}; 108 109 #define MAX_RD_THRESH_FACT 64 110 #define RD_THRESH_INC 1 111 112 static void fill_token_costs(vp9_coeff_cost *c, 113 vp9_coeff_probs_model (*p)[BLOCK_TYPES]) { 114 int i, j, k, l; 115 TX_SIZE t; 116 for (t = TX_4X4; t <= TX_32X32; t++) 117 for (i = 0; i < BLOCK_TYPES; i++) 118 for (j = 0; j < REF_TYPES; j++) 119 for (k = 0; k < COEF_BANDS; k++) 120 for (l = 0; l < PREV_COEF_CONTEXTS; l++) { 121 vp9_prob probs[ENTROPY_NODES]; 122 vp9_model_to_full_probs(p[t][i][j][k][l], probs); 123 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, 124 vp9_coef_tree); 125 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs, 126 vp9_coef_tree); 127 assert(c[t][i][j][k][0][l][DCT_EOB_TOKEN] == 128 c[t][i][j][k][1][l][DCT_EOB_TOKEN]); 129 } 130 } 131 132 static const int rd_iifactor[32] = { 133 4, 4, 3, 2, 1, 0, 0, 0, 134 0, 0, 0, 0, 0, 0, 0, 0, 135 0, 0, 0, 0, 0, 0, 0, 0, 136 0, 0, 0, 0, 0, 0, 0, 0, 137 }; 138 139 // 3* dc_qlookup[Q]*dc_qlookup[Q]; 140 141 /* values are now correlated to quantizer */ 142 static int sad_per_bit16lut[QINDEX_RANGE]; 143 static int sad_per_bit4lut[QINDEX_RANGE]; 144 145 void vp9_init_me_luts() { 146 int i; 147 148 // Initialize the sad lut tables using a formulaic calculation for now 149 // This is to make it easier to resolve the impact of experimental changes 150 // to the quantizer tables. 151 for (i = 0; i < QINDEX_RANGE; i++) { 152 sad_per_bit16lut[i] = 153 (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107); 154 sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742); 155 } 156 } 157 158 static int compute_rd_mult(int qindex) { 159 const int q = vp9_dc_quant(qindex, 0); 160 return (11 * q * q) >> 2; 161 } 162 163 static MB_PREDICTION_MODE rd_mode_to_mode(RD_PREDICTION_MODE rd_mode) { 164 if (rd_mode == RD_SPLITMV || rd_mode == RD_I4X4_PRED) { 165 assert(!"Invalid rd_mode"); 166 return MB_MODE_COUNT; 167 } 168 assert((int)rd_mode < (int)MB_MODE_COUNT); 169 return (MB_PREDICTION_MODE)rd_mode; 170 } 171 172 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { 173 cpi->mb.sadperbit16 = sad_per_bit16lut[qindex]; 174 cpi->mb.sadperbit4 = sad_per_bit4lut[qindex]; 175 } 176 177 178 void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { 179 int q, i, bsize; 180 181 vp9_clear_system_state(); // __asm emms; 182 183 // Further tests required to see if optimum is different 184 // for key frames, golden frames and arf frames. 185 // if (cpi->common.refresh_golden_frame || 186 // cpi->common.refresh_alt_ref_frame) 187 qindex = clamp(qindex, 0, MAXQ); 188 189 cpi->RDMULT = compute_rd_mult(qindex); 190 if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { 191 if (cpi->twopass.next_iiratio > 31) 192 cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4; 193 else 194 cpi->RDMULT += 195 (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4; 196 } 197 cpi->mb.errorperbit = cpi->RDMULT >> 6; 198 cpi->mb.errorperbit += (cpi->mb.errorperbit == 0); 199 200 vp9_set_speed_features(cpi); 201 202 q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25); 203 q <<= 2; 204 if (q < 8) 205 q = 8; 206 207 if (cpi->RDMULT > 1000) { 208 cpi->RDDIV = 1; 209 cpi->RDMULT /= 100; 210 211 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) { 212 for (i = 0; i < MAX_MODES; ++i) { 213 // Threshold here seem unecessarily harsh but fine given actual 214 // range of values used for cpi->sf.thresh_mult[] 215 int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]); 216 217 // *4 relates to the scaling of rd_thresh_block_size_factor[] 218 if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) { 219 cpi->rd_threshes[bsize][i] = 220 cpi->sf.thresh_mult[i] * q * 221 rd_thresh_block_size_factor[bsize] / (4 * 100); 222 } else { 223 cpi->rd_threshes[bsize][i] = INT_MAX; 224 } 225 } 226 } 227 } else { 228 cpi->RDDIV = 100; 229 230 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) { 231 for (i = 0; i < MAX_MODES; i++) { 232 // Threshold here seem unecessarily harsh but fine given actual 233 // range of values used for cpi->sf.thresh_mult[] 234 int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]); 235 236 if (cpi->sf.thresh_mult[i] < thresh_max) { 237 cpi->rd_threshes[bsize][i] = 238 cpi->sf.thresh_mult[i] * q * 239 rd_thresh_block_size_factor[bsize] / 4; 240 } else { 241 cpi->rd_threshes[bsize][i] = INT_MAX; 242 } 243 } 244 } 245 } 246 247 fill_token_costs(cpi->mb.token_costs, cpi->common.fc.coef_probs); 248 249 for (i = 0; i < NUM_PARTITION_CONTEXTS; i++) 250 vp9_cost_tokens(cpi->mb.partition_cost[i], 251 cpi->common.fc.partition_prob[cpi->common.frame_type][i], 252 vp9_partition_tree); 253 254 /*rough estimate for costing*/ 255 vp9_init_mode_costs(cpi); 256 257 if (cpi->common.frame_type != KEY_FRAME) { 258 vp9_build_nmv_cost_table( 259 cpi->mb.nmvjointcost, 260 cpi->mb.e_mbd.allow_high_precision_mv ? 261 cpi->mb.nmvcost_hp : cpi->mb.nmvcost, 262 &cpi->common.fc.nmvc, 263 cpi->mb.e_mbd.allow_high_precision_mv, 1, 1); 264 265 for (i = 0; i < INTER_MODE_CONTEXTS; i++) { 266 MB_PREDICTION_MODE m; 267 268 for (m = NEARESTMV; m < MB_MODE_COUNT; m++) 269 cpi->mb.inter_mode_cost[i][m - NEARESTMV] = 270 cost_token(vp9_inter_mode_tree, 271 cpi->common.fc.inter_mode_probs[i], 272 vp9_inter_mode_encodings - NEARESTMV + m); 273 } 274 } 275 } 276 277 static INLINE void linear_interpolate2(double x, int ntab, int inv_step, 278 const double *tab1, const double *tab2, 279 double *v1, double *v2) { 280 double y = x * inv_step; 281 int d = (int) y; 282 if (d >= ntab - 1) { 283 *v1 = tab1[ntab - 1]; 284 *v2 = tab2[ntab - 1]; 285 } else { 286 double a = y - d; 287 *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a; 288 *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a; 289 } 290 } 291 292 static void model_rd_norm(double x, double *R, double *D) { 293 static const int inv_tab_step = 8; 294 static const int tab_size = 120; 295 // NOTE: The tables below must be of the same size 296 // 297 // Normalized rate 298 // This table models the rate for a Laplacian source 299 // source with given variance when quantized with a uniform quantizer 300 // with given stepsize. The closed form expression is: 301 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)], 302 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance), 303 // and H(x) is the binary entropy function. 304 static const double rate_tab[] = { 305 64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194, 306 2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206, 307 1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708, 308 0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412, 309 0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236, 310 0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132, 311 0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073, 312 0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040, 313 0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022, 314 0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012, 315 0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006, 316 0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003, 317 0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002, 318 0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 319 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000, 320 }; 321 // Normalized distortion 322 // This table models the normalized distortion for a Laplacian source 323 // source with given variance when quantized with a uniform quantizer 324 // with given stepsize. The closed form expression is: 325 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2)) 326 // where x = qpstep / sqrt(variance) 327 // Note the actual distortion is Dn * variance. 328 static const double dist_tab[] = { 329 0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061, 330 0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242, 331 0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458, 332 0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645, 333 0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780, 334 0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870, 335 0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925, 336 0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957, 337 0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976, 338 0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987, 339 0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993, 340 0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996, 341 0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998, 342 0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999, 343 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000, 344 }; 345 /* 346 assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]); 347 assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]); 348 assert(sizeof(rate_tab) == sizeof(dist_tab)); 349 */ 350 assert(x >= 0.0); 351 linear_interpolate2(x, tab_size, inv_tab_step, 352 rate_tab, dist_tab, R, D); 353 } 354 355 static void model_rd_from_var_lapndz(int var, int n, int qstep, 356 int *rate, int64_t *dist) { 357 // This function models the rate and distortion for a Laplacian 358 // source with given variance when quantized with a uniform quantizer 359 // with given stepsize. The closed form expressions are in: 360 // Hang and Chen, "Source Model for transform video coder and its 361 // application - Part I: Fundamental Theory", IEEE Trans. Circ. 362 // Sys. for Video Tech., April 1997. 363 vp9_clear_system_state(); 364 if (var == 0 || n == 0) { 365 *rate = 0; 366 *dist = 0; 367 } else { 368 double D, R; 369 double s2 = (double) var / n; 370 double x = qstep / sqrt(s2); 371 model_rd_norm(x, &R, &D); 372 *rate = ((n << 8) * R + 0.5); 373 *dist = (var * D + 0.5); 374 } 375 vp9_clear_system_state(); 376 } 377 378 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, 379 MACROBLOCK *x, MACROBLOCKD *xd, 380 int *out_rate_sum, int64_t *out_dist_sum) { 381 // Note our transform coeffs are 8 times an orthogonal transform. 382 // Hence quantizer step is also 8 times. To get effective quantizer 383 // we need to divide by 8 before sending to modeling function. 384 int i, rate_sum = 0, dist_sum = 0; 385 386 for (i = 0; i < MAX_MB_PLANE; ++i) { 387 struct macroblock_plane *const p = &x->plane[i]; 388 struct macroblockd_plane *const pd = &xd->plane[i]; 389 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); 390 unsigned int sse; 391 int rate; 392 int64_t dist; 393 (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, 394 pd->dst.buf, pd->dst.stride, &sse); 395 // sse works better than var, since there is no dc prediction used 396 model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs], 397 pd->dequant[1] >> 3, &rate, &dist); 398 399 rate_sum += rate; 400 dist_sum += dist; 401 } 402 403 *out_rate_sum = rate_sum; 404 *out_dist_sum = dist_sum << 4; 405 } 406 407 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize, 408 TX_SIZE tx_size, 409 MACROBLOCK *x, MACROBLOCKD *xd, 410 int *out_rate_sum, int64_t *out_dist_sum, 411 int *out_skip) { 412 int j, k; 413 BLOCK_SIZE bs; 414 struct macroblock_plane *const p = &x->plane[0]; 415 struct macroblockd_plane *const pd = &xd->plane[0]; 416 const int width = 4 << num_4x4_blocks_wide_lookup[bsize]; 417 const int height = 4 << num_4x4_blocks_high_lookup[bsize]; 418 int rate_sum = 0; 419 int64_t dist_sum = 0; 420 const int t = 4 << tx_size; 421 422 if (tx_size == TX_4X4) { 423 bs = BLOCK_4X4; 424 } else if (tx_size == TX_8X8) { 425 bs = BLOCK_8X8; 426 } else if (tx_size == TX_16X16) { 427 bs = BLOCK_16X16; 428 } else if (tx_size == TX_32X32) { 429 bs = BLOCK_32X32; 430 } else { 431 assert(0); 432 } 433 434 *out_skip = 1; 435 for (j = 0; j < height; j += t) { 436 for (k = 0; k < width; k += t) { 437 int rate; 438 int64_t dist; 439 unsigned int sse; 440 cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride, 441 &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride, 442 &sse); 443 // sse works better than var, since there is no dc prediction used 444 model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist); 445 rate_sum += rate; 446 dist_sum += dist; 447 *out_skip &= (rate < 1024); 448 } 449 } 450 451 *out_rate_sum = rate_sum; 452 *out_dist_sum = dist_sum << 4; 453 } 454 455 int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, 456 intptr_t block_size, int64_t *ssz) { 457 int i; 458 int64_t error = 0, sqcoeff = 0; 459 460 for (i = 0; i < block_size; i++) { 461 int this_diff = coeff[i] - dqcoeff[i]; 462 error += (unsigned)this_diff * this_diff; 463 sqcoeff += (unsigned) coeff[i] * coeff[i]; 464 } 465 466 *ssz = sqcoeff; 467 return error; 468 } 469 470 /* The trailing '0' is a terminator which is used inside cost_coeffs() to 471 * decide whether to include cost of a trailing EOB node or not (i.e. we 472 * can skip this if the last coefficient in this transform block, e.g. the 473 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block, 474 * were non-zero). */ 475 static const int16_t band_counts[TX_SIZES][8] = { 476 { 1, 2, 3, 4, 3, 16 - 13, 0 }, 477 { 1, 2, 3, 4, 11, 64 - 21, 0 }, 478 { 1, 2, 3, 4, 11, 256 - 21, 0 }, 479 { 1, 2, 3, 4, 11, 1024 - 21, 0 }, 480 }; 481 482 static INLINE int cost_coeffs(MACROBLOCK *mb, 483 int plane, int block, 484 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L, 485 TX_SIZE tx_size, 486 const int16_t *scan, const int16_t *nb) { 487 MACROBLOCKD *const xd = &mb->e_mbd; 488 MB_MODE_INFO *mbmi = &xd->this_mi->mbmi; 489 struct macroblockd_plane *pd = &xd->plane[plane]; 490 const PLANE_TYPE type = pd->plane_type; 491 const int16_t *band_count = &band_counts[tx_size][1]; 492 const int eob = pd->eobs[block]; 493 const int16_t *const qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block); 494 const int ref = mbmi->ref_frame[0] != INTRA_FRAME; 495 unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = 496 mb->token_costs[tx_size][type][ref]; 497 const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L; 498 uint8_t token_cache[1024]; 499 int pt = combine_entropy_contexts(above_ec, left_ec); 500 int c, cost; 501 502 // Check for consistency of tx_size with mode info 503 assert(type == PLANE_TYPE_Y_WITH_DC ? mbmi->tx_size == tx_size 504 : get_uv_tx_size(mbmi) == tx_size); 505 506 if (eob == 0) { 507 // single eob token 508 cost = token_costs[0][0][pt][DCT_EOB_TOKEN]; 509 c = 0; 510 } else { 511 int band_left = *band_count++; 512 513 // dc token 514 int v = qcoeff_ptr[0]; 515 int prev_t = vp9_dct_value_tokens_ptr[v].token; 516 cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v]; 517 token_cache[0] = vp9_pt_energy_class[prev_t]; 518 ++token_costs; 519 520 // ac tokens 521 for (c = 1; c < eob; c++) { 522 const int rc = scan[c]; 523 int t; 524 525 v = qcoeff_ptr[rc]; 526 t = vp9_dct_value_tokens_ptr[v].token; 527 pt = get_coef_context(nb, token_cache, c); 528 cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v]; 529 token_cache[rc] = vp9_pt_energy_class[t]; 530 prev_t = t; 531 if (!--band_left) { 532 band_left = *band_count++; 533 ++token_costs; 534 } 535 } 536 537 // eob token 538 if (band_left) { 539 pt = get_coef_context(nb, token_cache, c); 540 cost += (*token_costs)[0][pt][DCT_EOB_TOKEN]; 541 } 542 } 543 544 // is eob first coefficient; 545 *A = *L = (c > 0); 546 547 return cost; 548 } 549 550 struct rdcost_block_args { 551 MACROBLOCK *x; 552 ENTROPY_CONTEXT t_above[16]; 553 ENTROPY_CONTEXT t_left[16]; 554 TX_SIZE tx_size; 555 int bw; 556 int bh; 557 int rate; 558 int64_t dist; 559 int64_t sse; 560 int64_t best_rd; 561 int skip; 562 const int16_t *scan, *nb; 563 }; 564 565 static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) { 566 const int ss_txfrm_size = tx_size << 1; 567 struct rdcost_block_args* args = arg; 568 MACROBLOCK* const x = args->x; 569 MACROBLOCKD* const xd = &x->e_mbd; 570 struct macroblock_plane *const p = &x->plane[plane]; 571 struct macroblockd_plane *const pd = &xd->plane[plane]; 572 int64_t this_sse; 573 int shift = args->tx_size == TX_32X32 ? 0 : 2; 574 int16_t *const coeff = BLOCK_OFFSET(p->coeff, block); 575 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 576 args->dist += vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, 577 &this_sse) >> shift; 578 args->sse += this_sse >> shift; 579 580 if (x->skip_encode && 581 xd->this_mi->mbmi.ref_frame[0] == INTRA_FRAME) { 582 // TODO(jingning): tune the model to better capture the distortion. 583 int64_t p = (pd->dequant[1] * pd->dequant[1] * 584 (1 << ss_txfrm_size)) >> shift; 585 args->dist += p; 586 args->sse += p; 587 } 588 } 589 590 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize, 591 TX_SIZE tx_size, void *arg) { 592 struct rdcost_block_args* args = arg; 593 594 int x_idx, y_idx; 595 txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx); 596 597 args->rate += cost_coeffs(args->x, plane, block, 598 args->t_above + x_idx, 599 args->t_left + y_idx, args->tx_size, 600 args->scan, args->nb); 601 } 602 603 static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, 604 TX_SIZE tx_size, void *arg) { 605 struct rdcost_block_args *args = arg; 606 MACROBLOCK *const x = args->x; 607 MACROBLOCKD *const xd = &x->e_mbd; 608 struct encode_b_args encode_args = {x, NULL}; 609 int64_t rd1, rd2, rd; 610 611 if (args->skip) 612 return; 613 rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist); 614 rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse); 615 rd = MIN(rd1, rd2); 616 if (rd > args->best_rd) { 617 args->skip = 1; 618 args->rate = INT_MAX; 619 args->dist = INT64_MAX; 620 args->sse = INT64_MAX; 621 return; 622 } 623 624 if (!is_inter_block(&xd->this_mi->mbmi)) 625 vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args); 626 else 627 vp9_xform_quant(plane, block, plane_bsize, tx_size, &encode_args); 628 629 dist_block(plane, block, tx_size, args); 630 rate_block(plane, block, plane_bsize, tx_size, args); 631 } 632 633 static void txfm_rd_in_plane(MACROBLOCK *x, 634 int *rate, int64_t *distortion, 635 int *skippable, int64_t *sse, 636 int64_t ref_best_rd, int plane, 637 BLOCK_SIZE bsize, TX_SIZE tx_size) { 638 MACROBLOCKD *const xd = &x->e_mbd; 639 struct macroblockd_plane *const pd = &xd->plane[plane]; 640 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); 641 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs]; 642 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs]; 643 int i; 644 struct rdcost_block_args args = { x, { 0 }, { 0 }, tx_size, 645 num_4x4_blocks_wide, num_4x4_blocks_high, 646 0, 0, 0, ref_best_rd, 0 }; 647 if (plane == 0) 648 xd->this_mi->mbmi.tx_size = tx_size; 649 650 switch (tx_size) { 651 case TX_4X4: 652 vpx_memcpy(&args.t_above, pd->above_context, 653 sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide); 654 vpx_memcpy(&args.t_left, pd->left_context, 655 sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high); 656 get_scan_nb_4x4(get_tx_type_4x4(pd->plane_type, xd, 0), 657 &args.scan, &args.nb); 658 break; 659 case TX_8X8: 660 for (i = 0; i < num_4x4_blocks_wide; i += 2) 661 args.t_above[i] = !!*(uint16_t *)&pd->above_context[i]; 662 for (i = 0; i < num_4x4_blocks_high; i += 2) 663 args.t_left[i] = !!*(uint16_t *)&pd->left_context[i]; 664 get_scan_nb_8x8(get_tx_type_8x8(pd->plane_type, xd), 665 &args.scan, &args.nb); 666 break; 667 case TX_16X16: 668 for (i = 0; i < num_4x4_blocks_wide; i += 4) 669 args.t_above[i] = !!*(uint32_t *)&pd->above_context[i]; 670 for (i = 0; i < num_4x4_blocks_high; i += 4) 671 args.t_left[i] = !!*(uint32_t *)&pd->left_context[i]; 672 get_scan_nb_16x16(get_tx_type_16x16(pd->plane_type, xd), 673 &args.scan, &args.nb); 674 break; 675 case TX_32X32: 676 for (i = 0; i < num_4x4_blocks_wide; i += 8) 677 args.t_above[i] = !!*(uint64_t *)&pd->above_context[i]; 678 for (i = 0; i < num_4x4_blocks_high; i += 8) 679 args.t_left[i] = !!*(uint64_t *)&pd->left_context[i]; 680 args.scan = vp9_default_scan_32x32; 681 args.nb = vp9_default_scan_32x32_neighbors; 682 break; 683 default: 684 assert(0); 685 } 686 687 foreach_transformed_block_in_plane(xd, bsize, plane, block_yrd_txfm, &args); 688 *distortion = args.dist; 689 *rate = args.rate; 690 *sse = args.sse; 691 *skippable = vp9_is_skippable_in_plane(xd, bsize, plane) && (!args.skip); 692 } 693 694 static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x, 695 int *rate, int64_t *distortion, 696 int *skip, int64_t *sse, 697 int64_t ref_best_rd, 698 BLOCK_SIZE bs) { 699 const TX_SIZE max_txfm_size = max_txsize_lookup[bs]; 700 VP9_COMMON *const cm = &cpi->common; 701 MACROBLOCKD *const xd = &x->e_mbd; 702 MB_MODE_INFO *const mbmi = &xd->this_mi->mbmi; 703 if (max_txfm_size == TX_32X32 && 704 (cm->tx_mode == ALLOW_32X32 || 705 cm->tx_mode == TX_MODE_SELECT)) { 706 mbmi->tx_size = TX_32X32; 707 } else if (max_txfm_size >= TX_16X16 && 708 (cm->tx_mode == ALLOW_16X16 || 709 cm->tx_mode == ALLOW_32X32 || 710 cm->tx_mode == TX_MODE_SELECT)) { 711 mbmi->tx_size = TX_16X16; 712 } else if (cm->tx_mode != ONLY_4X4) { 713 mbmi->tx_size = TX_8X8; 714 } else { 715 mbmi->tx_size = TX_4X4; 716 } 717 txfm_rd_in_plane(x, rate, distortion, skip, 718 &sse[mbmi->tx_size], ref_best_rd, 0, bs, 719 mbmi->tx_size); 720 cpi->txfm_stepdown_count[0]++; 721 } 722 723 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, 724 int (*r)[2], int *rate, 725 int64_t *d, int64_t *distortion, 726 int *s, int *skip, 727 int64_t tx_cache[TX_MODES], 728 BLOCK_SIZE bs) { 729 const TX_SIZE max_tx_size = max_txsize_lookup[bs]; 730 VP9_COMMON *const cm = &cpi->common; 731 MACROBLOCKD *const xd = &x->e_mbd; 732 MB_MODE_INFO *const mbmi = &xd->this_mi->mbmi; 733 vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd); 734 int64_t rd[TX_SIZES][2]; 735 int n, m; 736 int s0, s1; 737 738 const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs, xd->this_mi); 739 740 for (n = TX_4X4; n <= max_tx_size; n++) { 741 r[n][1] = r[n][0]; 742 if (r[n][0] == INT_MAX) 743 continue; 744 for (m = 0; m <= n - (n == max_tx_size); m++) { 745 if (m == n) 746 r[n][1] += vp9_cost_zero(tx_probs[m]); 747 else 748 r[n][1] += vp9_cost_one(tx_probs[m]); 749 } 750 } 751 752 assert(skip_prob > 0); 753 s0 = vp9_cost_bit(skip_prob, 0); 754 s1 = vp9_cost_bit(skip_prob, 1); 755 756 for (n = TX_4X4; n <= max_tx_size; n++) { 757 if (d[n] == INT64_MAX) { 758 rd[n][0] = rd[n][1] = INT64_MAX; 759 continue; 760 } 761 if (s[n]) { 762 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]); 763 } else { 764 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); 765 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); 766 } 767 } 768 769 if (max_tx_size == TX_32X32 && 770 (cm->tx_mode == ALLOW_32X32 || 771 (cm->tx_mode == TX_MODE_SELECT && 772 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && 773 rd[TX_32X32][1] < rd[TX_4X4][1]))) { 774 mbmi->tx_size = TX_32X32; 775 } else if (max_tx_size >= TX_16X16 && 776 (cm->tx_mode == ALLOW_16X16 || 777 cm->tx_mode == ALLOW_32X32 || 778 (cm->tx_mode == TX_MODE_SELECT && 779 rd[TX_16X16][1] < rd[TX_8X8][1] && 780 rd[TX_16X16][1] < rd[TX_4X4][1]))) { 781 mbmi->tx_size = TX_16X16; 782 } else if (cm->tx_mode == ALLOW_8X8 || 783 cm->tx_mode == ALLOW_16X16 || 784 cm->tx_mode == ALLOW_32X32 || 785 (cm->tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) { 786 mbmi->tx_size = TX_8X8; 787 } else { 788 mbmi->tx_size = TX_4X4; 789 } 790 791 *distortion = d[mbmi->tx_size]; 792 *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT]; 793 *skip = s[mbmi->tx_size]; 794 795 tx_cache[ONLY_4X4] = rd[TX_4X4][0]; 796 tx_cache[ALLOW_8X8] = rd[TX_8X8][0]; 797 tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0]; 798 tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0]; 799 if (max_tx_size == TX_32X32 && 800 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && 801 rd[TX_32X32][1] < rd[TX_4X4][1]) 802 tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1]; 803 else if (max_tx_size >= TX_16X16 && 804 rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) 805 tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1]; 806 else 807 tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ? 808 rd[TX_4X4][1] : rd[TX_8X8][1]; 809 810 if (max_tx_size == TX_32X32 && 811 rd[TX_32X32][1] < rd[TX_16X16][1] && 812 rd[TX_32X32][1] < rd[TX_8X8][1] && 813 rd[TX_32X32][1] < rd[TX_4X4][1]) { 814 cpi->txfm_stepdown_count[0]++; 815 } else if (max_tx_size >= TX_16X16 && 816 rd[TX_16X16][1] < rd[TX_8X8][1] && 817 rd[TX_16X16][1] < rd[TX_4X4][1]) { 818 cpi->txfm_stepdown_count[max_tx_size - TX_16X16]++; 819 } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) { 820 cpi->txfm_stepdown_count[max_tx_size - TX_8X8]++; 821 } else { 822 cpi->txfm_stepdown_count[max_tx_size - TX_4X4]++; 823 } 824 } 825 826 static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, 827 int (*r)[2], int *rate, 828 int64_t *d, int64_t *distortion, 829 int *s, int *skip, int64_t *sse, 830 int64_t ref_best_rd, 831 BLOCK_SIZE bs) { 832 const TX_SIZE max_txfm_size = max_txsize_lookup[bs]; 833 VP9_COMMON *const cm = &cpi->common; 834 MACROBLOCKD *const xd = &x->e_mbd; 835 MB_MODE_INFO *const mbmi = &xd->this_mi->mbmi; 836 vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd); 837 int64_t rd[TX_SIZES][2]; 838 int n, m; 839 int s0, s1; 840 double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00}; 841 // double scale_r[TX_SIZES] = {2.82, 2.00, 1.41, 1.00}; 842 843 const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs, xd->this_mi); 844 845 // for (n = TX_4X4; n <= max_txfm_size; n++) 846 // r[n][0] = (r[n][0] * scale_r[n]); 847 848 for (n = TX_4X4; n <= max_txfm_size; n++) { 849 r[n][1] = r[n][0]; 850 for (m = 0; m <= n - (n == max_txfm_size); m++) { 851 if (m == n) 852 r[n][1] += vp9_cost_zero(tx_probs[m]); 853 else 854 r[n][1] += vp9_cost_one(tx_probs[m]); 855 } 856 } 857 858 assert(skip_prob > 0); 859 s0 = vp9_cost_bit(skip_prob, 0); 860 s1 = vp9_cost_bit(skip_prob, 1); 861 862 for (n = TX_4X4; n <= max_txfm_size; n++) { 863 if (s[n]) { 864 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]); 865 } else { 866 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); 867 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); 868 } 869 } 870 for (n = TX_4X4; n <= max_txfm_size; n++) { 871 rd[n][0] = (scale_rd[n] * rd[n][0]); 872 rd[n][1] = (scale_rd[n] * rd[n][1]); 873 } 874 875 if (max_txfm_size == TX_32X32 && 876 (cm->tx_mode == ALLOW_32X32 || 877 (cm->tx_mode == TX_MODE_SELECT && 878 rd[TX_32X32][1] <= rd[TX_16X16][1] && 879 rd[TX_32X32][1] <= rd[TX_8X8][1] && 880 rd[TX_32X32][1] <= rd[TX_4X4][1]))) { 881 mbmi->tx_size = TX_32X32; 882 } else if (max_txfm_size >= TX_16X16 && 883 (cm->tx_mode == ALLOW_16X16 || 884 cm->tx_mode == ALLOW_32X32 || 885 (cm->tx_mode == TX_MODE_SELECT && 886 rd[TX_16X16][1] <= rd[TX_8X8][1] && 887 rd[TX_16X16][1] <= rd[TX_4X4][1]))) { 888 mbmi->tx_size = TX_16X16; 889 } else if (cm->tx_mode == ALLOW_8X8 || 890 cm->tx_mode == ALLOW_16X16 || 891 cm->tx_mode == ALLOW_32X32 || 892 (cm->tx_mode == TX_MODE_SELECT && 893 rd[TX_8X8][1] <= rd[TX_4X4][1])) { 894 mbmi->tx_size = TX_8X8; 895 } else { 896 mbmi->tx_size = TX_4X4; 897 } 898 899 // Actually encode using the chosen mode if a model was used, but do not 900 // update the r, d costs 901 txfm_rd_in_plane(x, rate, distortion, skip, &sse[mbmi->tx_size], 902 ref_best_rd, 0, bs, mbmi->tx_size); 903 904 if (max_txfm_size == TX_32X32 && 905 rd[TX_32X32][1] <= rd[TX_16X16][1] && 906 rd[TX_32X32][1] <= rd[TX_8X8][1] && 907 rd[TX_32X32][1] <= rd[TX_4X4][1]) { 908 cpi->txfm_stepdown_count[0]++; 909 } else if (max_txfm_size >= TX_16X16 && 910 rd[TX_16X16][1] <= rd[TX_8X8][1] && 911 rd[TX_16X16][1] <= rd[TX_4X4][1]) { 912 cpi->txfm_stepdown_count[max_txfm_size - TX_16X16]++; 913 } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) { 914 cpi->txfm_stepdown_count[max_txfm_size - TX_8X8]++; 915 } else { 916 cpi->txfm_stepdown_count[max_txfm_size - TX_4X4]++; 917 } 918 } 919 920 static void super_block_yrd(VP9_COMP *cpi, 921 MACROBLOCK *x, int *rate, int64_t *distortion, 922 int *skip, int64_t *psse, BLOCK_SIZE bs, 923 int64_t txfm_cache[TX_MODES], 924 int64_t ref_best_rd) { 925 int r[TX_SIZES][2], s[TX_SIZES]; 926 int64_t d[TX_SIZES], sse[TX_SIZES]; 927 MACROBLOCKD *xd = &x->e_mbd; 928 MB_MODE_INFO *const mbmi = &xd->this_mi->mbmi; 929 930 assert(bs == mbmi->sb_type); 931 if (mbmi->ref_frame[0] > INTRA_FRAME) 932 vp9_subtract_sby(x, bs); 933 934 if (cpi->sf.tx_size_search_method == USE_LARGESTALL || 935 (cpi->sf.tx_size_search_method != USE_FULL_RD && 936 mbmi->ref_frame[0] == INTRA_FRAME)) { 937 vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t)); 938 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse, 939 ref_best_rd, bs); 940 if (psse) 941 *psse = sse[mbmi->tx_size]; 942 return; 943 } 944 945 if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER && 946 mbmi->ref_frame[0] > INTRA_FRAME) { 947 if (bs >= BLOCK_32X32) 948 model_rd_for_sb_y_tx(cpi, bs, TX_32X32, x, xd, 949 &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]); 950 if (bs >= BLOCK_16X16) 951 model_rd_for_sb_y_tx(cpi, bs, TX_16X16, x, xd, 952 &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]); 953 954 model_rd_for_sb_y_tx(cpi, bs, TX_8X8, x, xd, 955 &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]); 956 957 model_rd_for_sb_y_tx(cpi, bs, TX_4X4, x, xd, 958 &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]); 959 960 choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s, 961 skip, sse, ref_best_rd, bs); 962 } else { 963 if (bs >= BLOCK_32X32) 964 txfm_rd_in_plane(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], 965 &sse[TX_32X32], ref_best_rd, 0, bs, TX_32X32); 966 if (bs >= BLOCK_16X16) 967 txfm_rd_in_plane(x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], 968 &sse[TX_16X16], ref_best_rd, 0, bs, TX_16X16); 969 txfm_rd_in_plane(x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], 970 &sse[TX_8X8], ref_best_rd, 0, bs, TX_8X8); 971 txfm_rd_in_plane(x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], 972 &sse[TX_4X4], ref_best_rd, 0, bs, TX_4X4); 973 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, 974 skip, txfm_cache, bs); 975 } 976 if (psse) 977 *psse = sse[mbmi->tx_size]; 978 } 979 980 static int conditional_skipintra(MB_PREDICTION_MODE mode, 981 MB_PREDICTION_MODE best_intra_mode) { 982 if (mode == D117_PRED && 983 best_intra_mode != V_PRED && 984 best_intra_mode != D135_PRED) 985 return 1; 986 if (mode == D63_PRED && 987 best_intra_mode != V_PRED && 988 best_intra_mode != D45_PRED) 989 return 1; 990 if (mode == D207_PRED && 991 best_intra_mode != H_PRED && 992 best_intra_mode != D45_PRED) 993 return 1; 994 if (mode == D153_PRED && 995 best_intra_mode != H_PRED && 996 best_intra_mode != D135_PRED) 997 return 1; 998 return 0; 999 } 1000 1001 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, 1002 MB_PREDICTION_MODE *best_mode, 1003 int *bmode_costs, 1004 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, 1005 int *bestrate, int *bestratey, 1006 int64_t *bestdistortion, 1007 BLOCK_SIZE bsize, int64_t rd_thresh) { 1008 MB_PREDICTION_MODE mode; 1009 MACROBLOCKD *xd = &x->e_mbd; 1010 int64_t best_rd = rd_thresh; 1011 int rate = 0; 1012 int64_t distortion; 1013 struct macroblock_plane *p = &x->plane[0]; 1014 struct macroblockd_plane *pd = &xd->plane[0]; 1015 const int src_stride = p->src.stride; 1016 const int dst_stride = pd->dst.stride; 1017 uint8_t *src_init = raster_block_offset_uint8(BLOCK_8X8, ib, 1018 p->src.buf, src_stride); 1019 uint8_t *dst_init = raster_block_offset_uint8(BLOCK_8X8, ib, 1020 pd->dst.buf, dst_stride); 1021 int16_t *src_diff, *coeff; 1022 1023 ENTROPY_CONTEXT ta[2], tempa[2]; 1024 ENTROPY_CONTEXT tl[2], templ[2]; 1025 TX_TYPE tx_type = DCT_DCT; 1026 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 1027 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 1028 int idx, idy, block; 1029 uint8_t best_dst[8 * 8]; 1030 1031 assert(ib < 4); 1032 1033 vpx_memcpy(ta, a, sizeof(ta)); 1034 vpx_memcpy(tl, l, sizeof(tl)); 1035 xd->this_mi->mbmi.tx_size = TX_4X4; 1036 1037 for (mode = DC_PRED; mode <= TM_PRED; ++mode) { 1038 int64_t this_rd; 1039 int ratey = 0; 1040 1041 if (!(cpi->sf.intra_y_mode_mask & (1 << mode))) 1042 continue; 1043 1044 // Only do the oblique modes if the best so far is 1045 // one of the neighboring directional modes 1046 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { 1047 if (conditional_skipintra(mode, *best_mode)) 1048 continue; 1049 } 1050 1051 rate = bmode_costs[mode]; 1052 distortion = 0; 1053 1054 vpx_memcpy(tempa, ta, sizeof(ta)); 1055 vpx_memcpy(templ, tl, sizeof(tl)); 1056 1057 for (idy = 0; idy < num_4x4_blocks_high; ++idy) { 1058 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { 1059 int64_t ssz; 1060 const int16_t *scan; 1061 uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride; 1062 uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride; 1063 1064 block = ib + idy * 2 + idx; 1065 xd->this_mi->bmi[block].as_mode = mode; 1066 src_diff = raster_block_offset_int16(BLOCK_8X8, block, p->src_diff); 1067 coeff = BLOCK_OFFSET(x->plane[0].coeff, block); 1068 vp9_predict_intra_block(xd, block, 1, 1069 TX_4X4, mode, 1070 x->skip_encode ? src : dst, 1071 x->skip_encode ? src_stride : dst_stride, 1072 dst, dst_stride); 1073 vp9_subtract_block(4, 4, src_diff, 8, 1074 src, src_stride, 1075 dst, dst_stride); 1076 1077 tx_type = get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block); 1078 if (tx_type != DCT_DCT) { 1079 vp9_short_fht4x4(src_diff, coeff, 8, tx_type); 1080 x->quantize_b_4x4(x, block, tx_type, 16); 1081 } else { 1082 x->fwd_txm4x4(src_diff, coeff, 16); 1083 x->quantize_b_4x4(x, block, tx_type, 16); 1084 } 1085 1086 scan = get_scan_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block)); 1087 ratey += cost_coeffs(x, 0, block, 1088 tempa + idx, templ + idy, TX_4X4, scan, 1089 vp9_get_coef_neighbors_handle(scan)); 1090 distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block), 1091 16, &ssz) >> 2; 1092 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) 1093 goto next; 1094 1095 if (tx_type != DCT_DCT) 1096 vp9_short_iht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), 1097 dst, pd->dst.stride, tx_type); 1098 else 1099 xd->inv_txm4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), 1100 dst, pd->dst.stride); 1101 } 1102 } 1103 1104 rate += ratey; 1105 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); 1106 1107 if (this_rd < best_rd) { 1108 *bestrate = rate; 1109 *bestratey = ratey; 1110 *bestdistortion = distortion; 1111 best_rd = this_rd; 1112 *best_mode = mode; 1113 vpx_memcpy(a, tempa, sizeof(tempa)); 1114 vpx_memcpy(l, templ, sizeof(templ)); 1115 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) 1116 vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride, 1117 num_4x4_blocks_wide * 4); 1118 } 1119 next: 1120 {} 1121 } 1122 1123 if (best_rd >= rd_thresh || x->skip_encode) 1124 return best_rd; 1125 1126 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) 1127 vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8, 1128 num_4x4_blocks_wide * 4); 1129 1130 return best_rd; 1131 } 1132 1133 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi, 1134 MACROBLOCK * const mb, 1135 int * const rate, 1136 int * const rate_y, 1137 int64_t * const distortion, 1138 int64_t best_rd) { 1139 int i, j; 1140 MACROBLOCKD *const xd = &mb->e_mbd; 1141 MODE_INFO *const mic = xd->this_mi; 1142 const MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride]; 1143 const MODE_INFO *left_mi = xd->mi_8x8[-1]; 1144 const BLOCK_SIZE bsize = xd->this_mi->mbmi.sb_type; 1145 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 1146 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 1147 int idx, idy; 1148 int cost = 0; 1149 int64_t total_distortion = 0; 1150 int tot_rate_y = 0; 1151 int64_t total_rd = 0; 1152 ENTROPY_CONTEXT t_above[4], t_left[4]; 1153 int *bmode_costs; 1154 1155 vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); 1156 vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left)); 1157 1158 bmode_costs = mb->mbmode_cost; 1159 1160 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block. 1161 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { 1162 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { 1163 MB_PREDICTION_MODE best_mode = DC_PRED; 1164 int r = INT_MAX, ry = INT_MAX; 1165 int64_t d = INT64_MAX, this_rd = INT64_MAX; 1166 i = idy * 2 + idx; 1167 if (cpi->common.frame_type == KEY_FRAME) { 1168 const MB_PREDICTION_MODE A = above_block_mode(mic, above_mi, i); 1169 const MB_PREDICTION_MODE L = (xd->left_available || idx) ? 1170 left_block_mode(mic, left_mi, i) : 1171 DC_PRED; 1172 1173 bmode_costs = mb->y_mode_costs[A][L]; 1174 } 1175 1176 this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs, 1177 t_above + idx, t_left + idy, &r, &ry, &d, 1178 bsize, best_rd - total_rd); 1179 if (this_rd >= best_rd - total_rd) 1180 return INT64_MAX; 1181 1182 total_rd += this_rd; 1183 cost += r; 1184 total_distortion += d; 1185 tot_rate_y += ry; 1186 1187 mic->bmi[i].as_mode = best_mode; 1188 for (j = 1; j < num_4x4_blocks_high; ++j) 1189 mic->bmi[i + j * 2].as_mode = best_mode; 1190 for (j = 1; j < num_4x4_blocks_wide; ++j) 1191 mic->bmi[i + j].as_mode = best_mode; 1192 1193 if (total_rd >= best_rd) 1194 return INT64_MAX; 1195 } 1196 } 1197 1198 *rate = cost; 1199 *rate_y = tot_rate_y; 1200 *distortion = total_distortion; 1201 mic->mbmi.mode = mic->bmi[3].as_mode; 1202 1203 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion); 1204 } 1205 1206 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, 1207 int *rate, int *rate_tokenonly, 1208 int64_t *distortion, int *skippable, 1209 BLOCK_SIZE bsize, 1210 int64_t tx_cache[TX_MODES], 1211 int64_t best_rd) { 1212 MB_PREDICTION_MODE mode; 1213 MB_PREDICTION_MODE mode_selected = DC_PRED; 1214 MACROBLOCKD *const xd = &x->e_mbd; 1215 MODE_INFO *const mic = xd->this_mi; 1216 int this_rate, this_rate_tokenonly, s; 1217 int64_t this_distortion, this_rd; 1218 TX_SIZE best_tx = TX_4X4; 1219 int i; 1220 int *bmode_costs = x->mbmode_cost; 1221 1222 if (cpi->sf.tx_size_search_method == USE_FULL_RD) 1223 for (i = 0; i < TX_MODES; i++) 1224 tx_cache[i] = INT64_MAX; 1225 1226 /* Y Search for intra prediction mode */ 1227 for (mode = DC_PRED; mode <= TM_PRED; mode++) { 1228 int64_t local_tx_cache[TX_MODES]; 1229 MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride]; 1230 MODE_INFO *left_mi = xd->mi_8x8[-1]; 1231 1232 if (!(cpi->sf.intra_y_mode_mask & (1 << mode))) 1233 continue; 1234 1235 if (cpi->common.frame_type == KEY_FRAME) { 1236 const MB_PREDICTION_MODE A = above_block_mode(mic, above_mi, 0); 1237 const MB_PREDICTION_MODE L = xd->left_available ? 1238 left_block_mode(mic, left_mi, 0) : DC_PRED; 1239 1240 bmode_costs = x->y_mode_costs[A][L]; 1241 } 1242 mic->mbmi.mode = mode; 1243 1244 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL, 1245 bsize, local_tx_cache, best_rd); 1246 1247 if (this_rate_tokenonly == INT_MAX) 1248 continue; 1249 1250 this_rate = this_rate_tokenonly + bmode_costs[mode]; 1251 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); 1252 1253 if (this_rd < best_rd) { 1254 mode_selected = mode; 1255 best_rd = this_rd; 1256 best_tx = mic->mbmi.tx_size; 1257 *rate = this_rate; 1258 *rate_tokenonly = this_rate_tokenonly; 1259 *distortion = this_distortion; 1260 *skippable = s; 1261 } 1262 1263 if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) { 1264 for (i = 0; i < TX_MODES && local_tx_cache[i] < INT64_MAX; i++) { 1265 const int64_t adj_rd = this_rd + local_tx_cache[i] - 1266 local_tx_cache[cpi->common.tx_mode]; 1267 if (adj_rd < tx_cache[i]) { 1268 tx_cache[i] = adj_rd; 1269 } 1270 } 1271 } 1272 } 1273 1274 mic->mbmi.mode = mode_selected; 1275 mic->mbmi.tx_size = best_tx; 1276 1277 return best_rd; 1278 } 1279 1280 static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, 1281 int *rate, int64_t *distortion, int *skippable, 1282 int64_t *sse, BLOCK_SIZE bsize, 1283 int64_t ref_best_rd) { 1284 MACROBLOCKD *const xd = &x->e_mbd; 1285 MB_MODE_INFO *const mbmi = &xd->this_mi->mbmi; 1286 TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi); 1287 int plane; 1288 int pnrate = 0, pnskip = 1; 1289 int64_t pndist = 0, pnsse = 0; 1290 1291 if (ref_best_rd < 0) 1292 goto term; 1293 1294 if (is_inter_block(mbmi)) 1295 vp9_subtract_sbuv(x, bsize); 1296 1297 *rate = 0; 1298 *distortion = 0; 1299 *sse = 0; 1300 *skippable = 1; 1301 1302 for (plane = 1; plane < MAX_MB_PLANE; ++plane) { 1303 txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse, 1304 ref_best_rd, plane, bsize, uv_txfm_size); 1305 if (pnrate == INT_MAX) 1306 goto term; 1307 *rate += pnrate; 1308 *distortion += pndist; 1309 *sse += pnsse; 1310 *skippable &= pnskip; 1311 } 1312 return; 1313 1314 term: 1315 *rate = INT_MAX; 1316 *distortion = INT64_MAX; 1317 *sse = INT64_MAX; 1318 *skippable = 0; 1319 return; 1320 } 1321 1322 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, 1323 int *rate, int *rate_tokenonly, 1324 int64_t *distortion, int *skippable, 1325 BLOCK_SIZE bsize) { 1326 MB_PREDICTION_MODE mode; 1327 MB_PREDICTION_MODE mode_selected = DC_PRED; 1328 int64_t best_rd = INT64_MAX, this_rd; 1329 int this_rate_tokenonly, this_rate, s; 1330 int64_t this_distortion, this_sse; 1331 1332 // int mode_mask = (bsize <= BLOCK_8X8) 1333 // ? ALL_INTRA_MODES : cpi->sf.intra_uv_mode_mask; 1334 1335 for (mode = DC_PRED; mode <= TM_PRED; mode++) { 1336 // if (!(mode_mask & (1 << mode))) 1337 if (!(cpi->sf.intra_uv_mode_mask & (1 << mode))) 1338 continue; 1339 1340 x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode; 1341 1342 super_block_uvrd(&cpi->common, x, &this_rate_tokenonly, 1343 &this_distortion, &s, &this_sse, bsize, best_rd); 1344 if (this_rate_tokenonly == INT_MAX) 1345 continue; 1346 this_rate = this_rate_tokenonly + 1347 x->intra_uv_mode_cost[cpi->common.frame_type][mode]; 1348 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); 1349 1350 if (this_rd < best_rd) { 1351 mode_selected = mode; 1352 best_rd = this_rd; 1353 *rate = this_rate; 1354 *rate_tokenonly = this_rate_tokenonly; 1355 *distortion = this_distortion; 1356 *skippable = s; 1357 } 1358 } 1359 1360 x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode_selected; 1361 1362 return best_rd; 1363 } 1364 1365 static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x, 1366 int *rate, int *rate_tokenonly, 1367 int64_t *distortion, int *skippable, 1368 BLOCK_SIZE bsize) { 1369 int64_t this_rd; 1370 int64_t this_sse; 1371 1372 x->e_mbd.mi_8x8[0]->mbmi.uv_mode = DC_PRED; 1373 super_block_uvrd(&cpi->common, x, rate_tokenonly, 1374 distortion, skippable, &this_sse, bsize, INT64_MAX); 1375 *rate = *rate_tokenonly + 1376 x->intra_uv_mode_cost[cpi->common.frame_type][DC_PRED]; 1377 this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion); 1378 1379 return this_rd; 1380 } 1381 1382 static void choose_intra_uv_mode(VP9_COMP *cpi, BLOCK_SIZE bsize, 1383 int *rate_uv, int *rate_uv_tokenonly, 1384 int64_t *dist_uv, int *skip_uv, 1385 MB_PREDICTION_MODE *mode_uv) { 1386 MACROBLOCK *const x = &cpi->mb; 1387 1388 // Use an estimated rd for uv_intra based on DC_PRED if the 1389 // appropriate speed flag is set. 1390 if (cpi->sf.use_uv_intra_rd_estimate) { 1391 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, 1392 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize); 1393 // Else do a proper rd search for each possible transform size that may 1394 // be considered in the main rd loop. 1395 } else { 1396 rd_pick_intra_sbuv_mode(cpi, x, 1397 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, 1398 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize); 1399 } 1400 *mode_uv = x->e_mbd.mi_8x8[0]->mbmi.uv_mode; 1401 } 1402 1403 static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode, 1404 int mode_context) { 1405 MACROBLOCK *const x = &cpi->mb; 1406 MACROBLOCKD *const xd = &x->e_mbd; 1407 const int segment_id = xd->this_mi->mbmi.segment_id; 1408 1409 // Don't account for mode here if segment skip is enabled. 1410 if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) { 1411 assert(is_inter_mode(mode)); 1412 return x->inter_mode_cost[mode_context][mode - NEARESTMV]; 1413 } else { 1414 return 0; 1415 } 1416 } 1417 1418 void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) { 1419 x->e_mbd.mi_8x8[0]->mbmi.mode = mb; 1420 x->e_mbd.mi_8x8[0]->mbmi.mv[0].as_int = mv->as_int; 1421 } 1422 1423 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, 1424 BLOCK_SIZE bsize, 1425 int_mv *frame_mv, 1426 int mi_row, int mi_col, 1427 int_mv single_newmv[MAX_REF_FRAMES], 1428 int *rate_mv); 1429 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, 1430 BLOCK_SIZE bsize, 1431 int mi_row, int mi_col, 1432 int_mv *tmp_mv, int *rate_mv); 1433 1434 static int labels2mode(MACROBLOCK *x, int i, 1435 MB_PREDICTION_MODE this_mode, 1436 int_mv *this_mv, int_mv *this_second_mv, 1437 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], 1438 int_mv seg_mvs[MAX_REF_FRAMES], 1439 int_mv *best_ref_mv, 1440 int_mv *second_best_ref_mv, 1441 int *mvjcost, int *mvcost[2], VP9_COMP *cpi) { 1442 MACROBLOCKD *const xd = &x->e_mbd; 1443 MODE_INFO *const mic = xd->this_mi; 1444 MB_MODE_INFO *mbmi = &mic->mbmi; 1445 int cost = 0, thismvcost = 0; 1446 int idx, idy; 1447 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type]; 1448 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; 1449 1450 /* We have to be careful retrieving previously-encoded motion vectors. 1451 Ones from this macroblock have to be pulled from the BLOCKD array 1452 as they have not yet made it to the bmi array in our MB_MODE_INFO. */ 1453 MB_PREDICTION_MODE m; 1454 1455 // the only time we should do costing for new motion vector or mode 1456 // is when we are on a new label (jbb May 08, 2007) 1457 switch (m = this_mode) { 1458 case NEWMV: 1459 this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int; 1460 thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost, 1461 102); 1462 if (mbmi->ref_frame[1] > 0) { 1463 this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int; 1464 thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv, 1465 mvjcost, mvcost, 102); 1466 } 1467 break; 1468 case NEARESTMV: 1469 this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int; 1470 if (mbmi->ref_frame[1] > 0) 1471 this_second_mv->as_int = 1472 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int; 1473 break; 1474 case NEARMV: 1475 this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int; 1476 if (mbmi->ref_frame[1] > 0) 1477 this_second_mv->as_int = 1478 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int; 1479 break; 1480 case ZEROMV: 1481 this_mv->as_int = 0; 1482 if (mbmi->ref_frame[1] > 0) 1483 this_second_mv->as_int = 0; 1484 break; 1485 default: 1486 break; 1487 } 1488 1489 cost = cost_mv_ref(cpi, this_mode, 1490 mbmi->mode_context[mbmi->ref_frame[0]]); 1491 1492 mic->bmi[i].as_mv[0].as_int = this_mv->as_int; 1493 if (mbmi->ref_frame[1] > 0) 1494 mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int; 1495 1496 x->partition_info->bmi[i].mode = m; 1497 for (idy = 0; idy < num_4x4_blocks_high; ++idy) 1498 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) 1499 vpx_memcpy(&mic->bmi[i + idy * 2 + idx], 1500 &mic->bmi[i], sizeof(mic->bmi[i])); 1501 1502 cost += thismvcost; 1503 return cost; 1504 } 1505 1506 static int64_t encode_inter_mb_segment(VP9_COMP *cpi, 1507 MACROBLOCK *x, 1508 int64_t best_yrd, 1509 int i, 1510 int *labelyrate, 1511 int64_t *distortion, int64_t *sse, 1512 ENTROPY_CONTEXT *ta, 1513 ENTROPY_CONTEXT *tl) { 1514 int k; 1515 MACROBLOCKD *xd = &x->e_mbd; 1516 struct macroblockd_plane *const pd = &xd->plane[0]; 1517 MODE_INFO *const mi = xd->this_mi; 1518 const BLOCK_SIZE bsize = mi->mbmi.sb_type; 1519 const int width = plane_block_width(bsize, pd); 1520 const int height = plane_block_height(bsize, pd); 1521 int idx, idy; 1522 const int src_stride = x->plane[0].src.stride; 1523 uint8_t* const src = raster_block_offset_uint8(BLOCK_8X8, i, 1524 x->plane[0].src.buf, 1525 src_stride); 1526 int16_t* src_diff = raster_block_offset_int16(BLOCK_8X8, i, 1527 x->plane[0].src_diff); 1528 int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, i); 1529 uint8_t* const dst = raster_block_offset_uint8(BLOCK_8X8, i, 1530 pd->dst.buf, pd->dst.stride); 1531 int64_t thisdistortion = 0, thissse = 0; 1532 int thisrate = 0; 1533 int ref, second_ref = has_second_ref(&mi->mbmi); 1534 1535 for (ref = 0; ref < 1 + second_ref; ++ref) { 1536 const uint8_t *pre = raster_block_offset_uint8(BLOCK_8X8, i, 1537 pd->pre[ref].buf, pd->pre[ref].stride); 1538 vp9_build_inter_predictor(pre, pd->pre[ref].stride, 1539 dst, pd->dst.stride, 1540 &mi->bmi[i].as_mv[ref].as_mv, 1541 &xd->scale_factor[ref], 1542 width, height, ref, &xd->subpix, MV_PRECISION_Q3); 1543 } 1544 1545 vp9_subtract_block(height, width, src_diff, 8, src, src_stride, 1546 dst, pd->dst.stride); 1547 1548 k = i; 1549 for (idy = 0; idy < height / 4; ++idy) { 1550 for (idx = 0; idx < width / 4; ++idx) { 1551 int64_t ssz, rd, rd1, rd2; 1552 1553 k += (idy * 2 + idx); 1554 src_diff = raster_block_offset_int16(BLOCK_8X8, k, 1555 x->plane[0].src_diff); 1556 coeff = BLOCK_OFFSET(x->plane[0].coeff, k); 1557 x->fwd_txm4x4(src_diff, coeff, 16); 1558 x->quantize_b_4x4(x, k, DCT_DCT, 16); 1559 thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k), 1560 16, &ssz); 1561 thissse += ssz; 1562 thisrate += cost_coeffs(x, 0, k, 1563 ta + (k & 1), 1564 tl + (k >> 1), TX_4X4, 1565 vp9_default_scan_4x4, 1566 vp9_default_scan_4x4_neighbors); 1567 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2); 1568 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2); 1569 rd = MIN(rd1, rd2); 1570 if (rd >= best_yrd) 1571 return INT64_MAX; 1572 } 1573 } 1574 *distortion = thisdistortion >> 2; 1575 *labelyrate = thisrate; 1576 *sse = thissse >> 2; 1577 1578 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); 1579 } 1580 1581 typedef struct { 1582 int eobs; 1583 int brate; 1584 int byrate; 1585 int64_t bdist; 1586 int64_t bsse; 1587 int64_t brdcost; 1588 int_mv mvs[2]; 1589 ENTROPY_CONTEXT ta[2]; 1590 ENTROPY_CONTEXT tl[2]; 1591 } SEG_RDSTAT; 1592 1593 typedef struct { 1594 int_mv *ref_mv, *second_ref_mv; 1595 int_mv mvp; 1596 1597 int64_t segment_rd; 1598 int r; 1599 int64_t d; 1600 int64_t sse; 1601 int segment_yrate; 1602 MB_PREDICTION_MODE modes[4]; 1603 SEG_RDSTAT rdstat[4][INTER_MODES]; 1604 int mvthresh; 1605 } BEST_SEG_INFO; 1606 1607 static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) { 1608 int r = 0; 1609 r |= (mv->as_mv.row >> 3) < x->mv_row_min; 1610 r |= (mv->as_mv.row >> 3) > x->mv_row_max; 1611 r |= (mv->as_mv.col >> 3) < x->mv_col_min; 1612 r |= (mv->as_mv.col >> 3) > x->mv_col_max; 1613 return r; 1614 } 1615 1616 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { 1617 MB_MODE_INFO *const mbmi = &x->e_mbd.mi_8x8[0]->mbmi; 1618 struct macroblock_plane *const p = &x->plane[0]; 1619 struct macroblockd_plane *const pd = &x->e_mbd.plane[0]; 1620 1621 p->src.buf = raster_block_offset_uint8(BLOCK_8X8, i, p->src.buf, 1622 p->src.stride); 1623 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0); 1624 pd->pre[0].buf = raster_block_offset_uint8(BLOCK_8X8, i, pd->pre[0].buf, 1625 pd->pre[0].stride); 1626 if (mbmi->ref_frame[1]) 1627 pd->pre[1].buf = raster_block_offset_uint8(BLOCK_8X8, i, pd->pre[1].buf, 1628 pd->pre[1].stride); 1629 } 1630 1631 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src, 1632 struct buf_2d orig_pre[2]) { 1633 MB_MODE_INFO *mbmi = &x->e_mbd.mi_8x8[0]->mbmi; 1634 x->plane[0].src = orig_src; 1635 x->e_mbd.plane[0].pre[0] = orig_pre[0]; 1636 if (mbmi->ref_frame[1]) 1637 x->e_mbd.plane[0].pre[1] = orig_pre[1]; 1638 } 1639 1640 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, 1641 BEST_SEG_INFO *bsi_buf, int filter_idx, 1642 int_mv seg_mvs[4][MAX_REF_FRAMES], 1643 int mi_row, int mi_col) { 1644 int i, j, br = 0, idx, idy; 1645 int64_t bd = 0, block_sse = 0; 1646 MB_PREDICTION_MODE this_mode; 1647 MODE_INFO *mi = x->e_mbd.mi_8x8[0]; 1648 MB_MODE_INFO *const mbmi = &mi->mbmi; 1649 const int label_count = 4; 1650 int64_t this_segment_rd = 0; 1651 int label_mv_thresh; 1652 int segmentyrate = 0; 1653 const BLOCK_SIZE bsize = mbmi->sb_type; 1654 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 1655 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 1656 vp9_variance_fn_ptr_t *v_fn_ptr; 1657 ENTROPY_CONTEXT t_above[2], t_left[2]; 1658 BEST_SEG_INFO *bsi = bsi_buf + filter_idx; 1659 int mode_idx; 1660 int subpelmv = 1, have_ref = 0; 1661 1662 vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above)); 1663 vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left)); 1664 1665 v_fn_ptr = &cpi->fn_ptr[bsize]; 1666 1667 // 64 makes this threshold really big effectively 1668 // making it so that we very rarely check mvs on 1669 // segments. setting this to 1 would make mv thresh 1670 // roughly equal to what it is for macroblocks 1671 label_mv_thresh = 1 * bsi->mvthresh / label_count; 1672 1673 // Segmentation method overheads 1674 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { 1675 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { 1676 // TODO(jingning,rbultje): rewrite the rate-distortion optimization 1677 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop 1678 int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT]; 1679 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; 1680 MB_PREDICTION_MODE mode_selected = ZEROMV; 1681 int64_t best_rd = INT64_MAX; 1682 i = idy * 2 + idx; 1683 1684 frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0; 1685 frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0; 1686 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, 1687 &frame_mv[NEARESTMV][mbmi->ref_frame[0]], 1688 &frame_mv[NEARMV][mbmi->ref_frame[0]], 1689 i, 0, mi_row, mi_col); 1690 if (mbmi->ref_frame[1] > 0) 1691 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, 1692 &frame_mv[NEARESTMV][mbmi->ref_frame[1]], 1693 &frame_mv[NEARMV][mbmi->ref_frame[1]], 1694 i, 1, mi_row, mi_col); 1695 1696 // search for the best motion vector on this segment 1697 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { 1698 const struct buf_2d orig_src = x->plane[0].src; 1699 struct buf_2d orig_pre[2]; 1700 1701 mode_idx = inter_mode_offset(this_mode); 1702 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX; 1703 1704 // if we're near/nearest and mv == 0,0, compare to zeromv 1705 if ((this_mode == NEARMV || this_mode == NEARESTMV || 1706 this_mode == ZEROMV) && 1707 frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 && 1708 (mbmi->ref_frame[1] <= 0 || 1709 frame_mv[this_mode][mbmi->ref_frame[1]].as_int == 0)) { 1710 int rfc = mbmi->mode_context[mbmi->ref_frame[0]]; 1711 int c1 = cost_mv_ref(cpi, NEARMV, rfc); 1712 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); 1713 int c3 = cost_mv_ref(cpi, ZEROMV, rfc); 1714 1715 if (this_mode == NEARMV) { 1716 if (c1 > c3) 1717 continue; 1718 } else if (this_mode == NEARESTMV) { 1719 if (c2 > c3) 1720 continue; 1721 } else { 1722 assert(this_mode == ZEROMV); 1723 if (mbmi->ref_frame[1] <= 0) { 1724 if ((c3 >= c2 && 1725 frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) || 1726 (c3 >= c1 && 1727 frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0)) 1728 continue; 1729 } else { 1730 if ((c3 >= c2 && 1731 frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 && 1732 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) || 1733 (c3 >= c1 && 1734 frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 && 1735 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0)) 1736 continue; 1737 } 1738 } 1739 } 1740 1741 vpx_memcpy(orig_pre, x->e_mbd.plane[0].pre, sizeof(orig_pre)); 1742 vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above, 1743 sizeof(bsi->rdstat[i][mode_idx].ta)); 1744 vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left, 1745 sizeof(bsi->rdstat[i][mode_idx].tl)); 1746 1747 // motion search for newmv (single predictor case only) 1748 if (mbmi->ref_frame[1] <= 0 && this_mode == NEWMV && 1749 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) { 1750 int step_param = 0; 1751 int further_steps; 1752 int thissme, bestsme = INT_MAX; 1753 int sadpb = x->sadperbit4; 1754 int_mv mvp_full; 1755 int max_mv; 1756 1757 /* Is the best so far sufficiently good that we cant justify doing 1758 * and new motion search. */ 1759 if (best_rd < label_mv_thresh) 1760 break; 1761 1762 if (cpi->compressor_speed) { 1763 // use previous block's result as next block's MV predictor. 1764 if (i > 0) { 1765 bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int; 1766 if (i == 2) 1767 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int; 1768 } 1769 } 1770 if (i == 0) 1771 max_mv = x->max_mv_context[mbmi->ref_frame[0]]; 1772 else 1773 max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3; 1774 1775 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) { 1776 // Take wtd average of the step_params based on the last frame's 1777 // max mv magnitude and the best ref mvs of the current block for 1778 // the given reference. 1779 step_param = (vp9_init_search_range(cpi, max_mv) + 1780 cpi->mv_step_param) >> 1; 1781 } else { 1782 step_param = cpi->mv_step_param; 1783 } 1784 1785 mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3; 1786 mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3; 1787 1788 if (cpi->sf.adaptive_motion_search && cpi->common.show_frame) { 1789 mvp_full.as_mv.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3; 1790 mvp_full.as_mv.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3; 1791 step_param = MAX(step_param, 8); 1792 } 1793 1794 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; 1795 // adjust src pointer for this block 1796 mi_buf_shift(x, i); 1797 if (cpi->sf.search_method == HEX) { 1798 bestsme = vp9_hex_search(x, &mvp_full, 1799 step_param, 1800 sadpb, 1, v_fn_ptr, 1, 1801 bsi->ref_mv, &mode_mv[NEWMV]); 1802 } else if (cpi->sf.search_method == SQUARE) { 1803 bestsme = vp9_square_search(x, &mvp_full, 1804 step_param, 1805 sadpb, 1, v_fn_ptr, 1, 1806 bsi->ref_mv, &mode_mv[NEWMV]); 1807 } else if (cpi->sf.search_method == BIGDIA) { 1808 bestsme = vp9_bigdia_search(x, &mvp_full, 1809 step_param, 1810 sadpb, 1, v_fn_ptr, 1, 1811 bsi->ref_mv, &mode_mv[NEWMV]); 1812 } else { 1813 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, 1814 sadpb, further_steps, 0, v_fn_ptr, 1815 bsi->ref_mv, &mode_mv[NEWMV]); 1816 } 1817 1818 // Should we do a full search (best quality only) 1819 if (cpi->compressor_speed == 0) { 1820 /* Check if mvp_full is within the range. */ 1821 clamp_mv(&mvp_full.as_mv, x->mv_col_min, x->mv_col_max, 1822 x->mv_row_min, x->mv_row_max); 1823 1824 thissme = cpi->full_search_sad(x, &mvp_full, 1825 sadpb, 16, v_fn_ptr, 1826 x->nmvjointcost, x->mvcost, 1827 bsi->ref_mv, i); 1828 1829 if (thissme < bestsme) { 1830 bestsme = thissme; 1831 mode_mv[NEWMV].as_int = mi->bmi[i].as_mv[0].as_int; 1832 } else { 1833 /* The full search result is actually worse so re-instate the 1834 * previous best vector */ 1835 mi->bmi[i].as_mv[0].as_int = mode_mv[NEWMV].as_int; 1836 } 1837 } 1838 1839 if (bestsme < INT_MAX) { 1840 int distortion; 1841 unsigned int sse; 1842 cpi->find_fractional_mv_step(x, &mode_mv[NEWMV], 1843 bsi->ref_mv, x->errorperbit, v_fn_ptr, 1844 0, cpi->sf.subpel_iters_per_step, 1845 x->nmvjointcost, x->mvcost, 1846 &distortion, &sse); 1847 1848 // save motion search result for use in compound prediction 1849 seg_mvs[i][mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int; 1850 } 1851 1852 if (cpi->sf.adaptive_motion_search) 1853 x->pred_mv[mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int; 1854 1855 // restore src pointers 1856 mi_buf_restore(x, orig_src, orig_pre); 1857 } 1858 1859 if (mbmi->ref_frame[1] > 0 && this_mode == NEWMV && 1860 mbmi->interp_filter == EIGHTTAP) { 1861 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV || 1862 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) 1863 continue; 1864 1865 // adjust src pointers 1866 mi_buf_shift(x, i); 1867 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { 1868 int rate_mv; 1869 joint_motion_search(cpi, x, bsize, frame_mv[this_mode], 1870 mi_row, mi_col, seg_mvs[i], 1871 &rate_mv); 1872 seg_mvs[i][mbmi->ref_frame[0]].as_int = 1873 frame_mv[this_mode][mbmi->ref_frame[0]].as_int; 1874 seg_mvs[i][mbmi->ref_frame[1]].as_int = 1875 frame_mv[this_mode][mbmi->ref_frame[1]].as_int; 1876 } 1877 // restore src pointers 1878 mi_buf_restore(x, orig_src, orig_pre); 1879 } 1880 1881 bsi->rdstat[i][mode_idx].brate = 1882 labels2mode(x, i, this_mode, &mode_mv[this_mode], 1883 &second_mode_mv[this_mode], frame_mv, seg_mvs[i], 1884 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, 1885 x->mvcost, cpi); 1886 1887 bsi->rdstat[i][mode_idx].mvs[0].as_int = mode_mv[this_mode].as_int; 1888 if (num_4x4_blocks_wide > 1) 1889 bsi->rdstat[i + 1][mode_idx].mvs[0].as_int = 1890 mode_mv[this_mode].as_int; 1891 if (num_4x4_blocks_high > 1) 1892 bsi->rdstat[i + 2][mode_idx].mvs[0].as_int = 1893 mode_mv[this_mode].as_int; 1894 if (mbmi->ref_frame[1] > 0) { 1895 bsi->rdstat[i][mode_idx].mvs[1].as_int = 1896 second_mode_mv[this_mode].as_int; 1897 if (num_4x4_blocks_wide > 1) 1898 bsi->rdstat[i + 1][mode_idx].mvs[1].as_int = 1899 second_mode_mv[this_mode].as_int; 1900 if (num_4x4_blocks_high > 1) 1901 bsi->rdstat[i + 2][mode_idx].mvs[1].as_int = 1902 second_mode_mv[this_mode].as_int; 1903 } 1904 1905 // Trap vectors that reach beyond the UMV borders 1906 if (mv_check_bounds(x, &mode_mv[this_mode])) 1907 continue; 1908 if (mbmi->ref_frame[1] > 0 && 1909 mv_check_bounds(x, &second_mode_mv[this_mode])) 1910 continue; 1911 1912 if (filter_idx > 0) { 1913 BEST_SEG_INFO *ref_bsi = bsi_buf; 1914 subpelmv = (mode_mv[this_mode].as_mv.row & 0x0f) || 1915 (mode_mv[this_mode].as_mv.col & 0x0f); 1916 have_ref = mode_mv[this_mode].as_int == 1917 ref_bsi->rdstat[i][mode_idx].mvs[0].as_int; 1918 if (mbmi->ref_frame[1] > 0) { 1919 subpelmv |= (second_mode_mv[this_mode].as_mv.row & 0x0f) || 1920 (second_mode_mv[this_mode].as_mv.col & 0x0f); 1921 have_ref &= second_mode_mv[this_mode].as_int == 1922 ref_bsi->rdstat[i][mode_idx].mvs[1].as_int; 1923 } 1924 1925 if (filter_idx > 1 && !subpelmv && !have_ref) { 1926 ref_bsi = bsi_buf + 1; 1927 have_ref = mode_mv[this_mode].as_int == 1928 ref_bsi->rdstat[i][mode_idx].mvs[0].as_int; 1929 if (mbmi->ref_frame[1] > 0) { 1930 have_ref &= second_mode_mv[this_mode].as_int == 1931 ref_bsi->rdstat[i][mode_idx].mvs[1].as_int; 1932 } 1933 } 1934 1935 if (!subpelmv && have_ref && 1936 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) { 1937 vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx], 1938 sizeof(SEG_RDSTAT)); 1939 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) { 1940 mode_selected = this_mode; 1941 best_rd = bsi->rdstat[i][mode_idx].brdcost; 1942 } 1943 continue; 1944 } 1945 } 1946 1947 bsi->rdstat[i][mode_idx].brdcost = 1948 encode_inter_mb_segment(cpi, x, 1949 bsi->segment_rd - this_segment_rd, i, 1950 &bsi->rdstat[i][mode_idx].byrate, 1951 &bsi->rdstat[i][mode_idx].bdist, 1952 &bsi->rdstat[i][mode_idx].bsse, 1953 bsi->rdstat[i][mode_idx].ta, 1954 bsi->rdstat[i][mode_idx].tl); 1955 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) { 1956 bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv, 1957 bsi->rdstat[i][mode_idx].brate, 0); 1958 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate; 1959 bsi->rdstat[i][mode_idx].eobs = x->e_mbd.plane[0].eobs[i]; 1960 } 1961 1962 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) { 1963 mode_selected = this_mode; 1964 best_rd = bsi->rdstat[i][mode_idx].brdcost; 1965 } 1966 } /*for each 4x4 mode*/ 1967 1968 if (best_rd == INT64_MAX) { 1969 int iy, midx; 1970 for (iy = i + 1; iy < 4; ++iy) 1971 for (midx = 0; midx < INTER_MODES; ++midx) 1972 bsi->rdstat[iy][midx].brdcost = INT64_MAX; 1973 bsi->segment_rd = INT64_MAX; 1974 return; 1975 } 1976 1977 mode_idx = inter_mode_offset(mode_selected); 1978 vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above)); 1979 vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left)); 1980 1981 labels2mode(x, i, mode_selected, &mode_mv[mode_selected], 1982 &second_mode_mv[mode_selected], frame_mv, seg_mvs[i], 1983 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, 1984 x->mvcost, cpi); 1985 1986 br += bsi->rdstat[i][mode_idx].brate; 1987 bd += bsi->rdstat[i][mode_idx].bdist; 1988 block_sse += bsi->rdstat[i][mode_idx].bsse; 1989 segmentyrate += bsi->rdstat[i][mode_idx].byrate; 1990 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost; 1991 1992 if (this_segment_rd > bsi->segment_rd) { 1993 int iy, midx; 1994 for (iy = i + 1; iy < 4; ++iy) 1995 for (midx = 0; midx < INTER_MODES; ++midx) 1996 bsi->rdstat[iy][midx].brdcost = INT64_MAX; 1997 bsi->segment_rd = INT64_MAX; 1998 return; 1999 } 2000 2001 for (j = 1; j < num_4x4_blocks_high; ++j) 2002 vpx_memcpy(&x->partition_info->bmi[i + j * 2], 2003 &x->partition_info->bmi[i], 2004 sizeof(x->partition_info->bmi[i])); 2005 for (j = 1; j < num_4x4_blocks_wide; ++j) 2006 vpx_memcpy(&x->partition_info->bmi[i + j], 2007 &x->partition_info->bmi[i], 2008 sizeof(x->partition_info->bmi[i])); 2009 } 2010 } /* for each label */ 2011 2012 bsi->r = br; 2013 bsi->d = bd; 2014 bsi->segment_yrate = segmentyrate; 2015 bsi->segment_rd = this_segment_rd; 2016 bsi->sse = block_sse; 2017 2018 // update the coding decisions 2019 for (i = 0; i < 4; ++i) 2020 bsi->modes[i] = x->partition_info->bmi[i].mode; 2021 } 2022 2023 static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, 2024 int_mv *best_ref_mv, 2025 int_mv *second_best_ref_mv, 2026 int64_t best_rd, 2027 int *returntotrate, 2028 int *returnyrate, 2029 int64_t *returndistortion, 2030 int *skippable, int64_t *psse, 2031 int mvthresh, 2032 int_mv seg_mvs[4][MAX_REF_FRAMES], 2033 BEST_SEG_INFO *bsi_buf, 2034 int filter_idx, 2035 int mi_row, int mi_col) { 2036 int i; 2037 BEST_SEG_INFO *bsi = bsi_buf + filter_idx; 2038 MACROBLOCKD *xd = &x->e_mbd; 2039 MODE_INFO *mi = xd->this_mi; 2040 MB_MODE_INFO *mbmi = &mi->mbmi; 2041 int mode_idx; 2042 2043 vp9_zero(*bsi); 2044 2045 bsi->segment_rd = best_rd; 2046 bsi->ref_mv = best_ref_mv; 2047 bsi->second_ref_mv = second_best_ref_mv; 2048 bsi->mvp.as_int = best_ref_mv->as_int; 2049 bsi->mvthresh = mvthresh; 2050 2051 for (i = 0; i < 4; i++) 2052 bsi->modes[i] = ZEROMV; 2053 2054 rd_check_segment_txsize(cpi, x, bsi_buf, filter_idx, seg_mvs, mi_row, mi_col); 2055 2056 if (bsi->segment_rd > best_rd) 2057 return INT64_MAX; 2058 /* set it to the best */ 2059 for (i = 0; i < 4; i++) { 2060 mode_idx = inter_mode_offset(bsi->modes[i]); 2061 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int; 2062 if (mbmi->ref_frame[1] > 0) 2063 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int; 2064 xd->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs; 2065 x->partition_info->bmi[i].mode = bsi->modes[i]; 2066 } 2067 2068 /* 2069 * used to set mbmi->mv.as_int 2070 */ 2071 *returntotrate = bsi->r; 2072 *returndistortion = bsi->d; 2073 *returnyrate = bsi->segment_yrate; 2074 *skippable = vp9_is_skippable_in_plane(&x->e_mbd, BLOCK_8X8, 0); 2075 *psse = bsi->sse; 2076 mbmi->mode = bsi->modes[3]; 2077 2078 return bsi->segment_rd; 2079 } 2080 2081 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, 2082 uint8_t *ref_y_buffer, int ref_y_stride, 2083 int ref_frame, BLOCK_SIZE block_size ) { 2084 MACROBLOCKD *xd = &x->e_mbd; 2085 MB_MODE_INFO *mbmi = &xd->this_mi->mbmi; 2086 int_mv this_mv; 2087 int i; 2088 int zero_seen = 0; 2089 int best_index = 0; 2090 int best_sad = INT_MAX; 2091 int this_sad = INT_MAX; 2092 unsigned int max_mv = 0; 2093 2094 uint8_t *src_y_ptr = x->plane[0].src.buf; 2095 uint8_t *ref_y_ptr; 2096 int row_offset, col_offset; 2097 int num_mv_refs = MAX_MV_REF_CANDIDATES + 2098 (cpi->sf.adaptive_motion_search && 2099 cpi->common.show_frame && 2100 block_size < cpi->sf.max_partition_size); 2101 2102 // Get the sad for each candidate reference mv 2103 for (i = 0; i < num_mv_refs; i++) { 2104 this_mv.as_int = (i < MAX_MV_REF_CANDIDATES) ? 2105 mbmi->ref_mvs[ref_frame][i].as_int : x->pred_mv[ref_frame].as_int; 2106 2107 max_mv = MAX(max_mv, 2108 MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3); 2109 // The list is at an end if we see 0 for a second time. 2110 if (!this_mv.as_int && zero_seen) 2111 break; 2112 zero_seen = zero_seen || !this_mv.as_int; 2113 2114 row_offset = this_mv.as_mv.row >> 3; 2115 col_offset = this_mv.as_mv.col >> 3; 2116 ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset; 2117 2118 // Find sad for current vector. 2119 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride, 2120 ref_y_ptr, ref_y_stride, 2121 0x7fffffff); 2122 2123 // Note if it is the best so far. 2124 if (this_sad < best_sad) { 2125 best_sad = this_sad; 2126 best_index = i; 2127 } 2128 } 2129 2130 // Note the index of the mv that worked best in the reference list. 2131 x->mv_best_ref_index[ref_frame] = best_index; 2132 x->max_mv_context[ref_frame] = max_mv; 2133 } 2134 2135 static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, 2136 unsigned int *ref_costs_single, 2137 unsigned int *ref_costs_comp, 2138 vp9_prob *comp_mode_p) { 2139 VP9_COMMON *const cm = &cpi->common; 2140 MACROBLOCKD *const xd = &cpi->mb.e_mbd; 2141 int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id, 2142 SEG_LVL_REF_FRAME); 2143 if (seg_ref_active) { 2144 vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single)); 2145 vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp)); 2146 *comp_mode_p = 128; 2147 } else { 2148 vp9_prob intra_inter_p = vp9_get_pred_prob_intra_inter(cm, xd); 2149 vp9_prob comp_inter_p = 128; 2150 2151 if (cm->comp_pred_mode == HYBRID_PREDICTION) { 2152 comp_inter_p = vp9_get_pred_prob_comp_inter_inter(cm, xd); 2153 *comp_mode_p = comp_inter_p; 2154 } else { 2155 *comp_mode_p = 128; 2156 } 2157 2158 ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0); 2159 2160 if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) { 2161 vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd); 2162 vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd); 2163 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); 2164 2165 if (cm->comp_pred_mode == HYBRID_PREDICTION) 2166 base_cost += vp9_cost_bit(comp_inter_p, 0); 2167 2168 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] = 2169 ref_costs_single[ALTREF_FRAME] = base_cost; 2170 ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0); 2171 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1); 2172 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1); 2173 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0); 2174 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1); 2175 } else { 2176 ref_costs_single[LAST_FRAME] = 512; 2177 ref_costs_single[GOLDEN_FRAME] = 512; 2178 ref_costs_single[ALTREF_FRAME] = 512; 2179 } 2180 if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) { 2181 vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd); 2182 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); 2183 2184 if (cm->comp_pred_mode == HYBRID_PREDICTION) 2185 base_cost += vp9_cost_bit(comp_inter_p, 1); 2186 2187 ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0); 2188 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1); 2189 } else { 2190 ref_costs_comp[LAST_FRAME] = 512; 2191 ref_costs_comp[GOLDEN_FRAME] = 512; 2192 } 2193 } 2194 } 2195 2196 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, 2197 int mode_index, 2198 PARTITION_INFO *partition, 2199 int_mv *ref_mv, 2200 int_mv *second_ref_mv, 2201 int64_t comp_pred_diff[NB_PREDICTION_TYPES], 2202 int64_t tx_size_diff[TX_MODES], 2203 int64_t best_filter_diff[SWITCHABLE_FILTERS + 1]) { 2204 MACROBLOCKD *const xd = &x->e_mbd; 2205 2206 // Take a snapshot of the coding context so it can be 2207 // restored if we decide to encode this way 2208 ctx->skip = x->skip; 2209 ctx->best_mode_index = mode_index; 2210 ctx->mic = *xd->this_mi; 2211 2212 if (partition) 2213 ctx->partition_info = *partition; 2214 2215 ctx->best_ref_mv.as_int = ref_mv->as_int; 2216 ctx->second_best_ref_mv.as_int = second_ref_mv->as_int; 2217 2218 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY]; 2219 ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY]; 2220 ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION]; 2221 2222 // FIXME(rbultje) does this memcpy the whole array? I believe sizeof() 2223 // doesn't actually work this way 2224 memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff)); 2225 memcpy(ctx->best_filter_diff, best_filter_diff, 2226 sizeof(*best_filter_diff) * (SWITCHABLE_FILTERS + 1)); 2227 } 2228 2229 static void setup_pred_block(const MACROBLOCKD *xd, 2230 struct buf_2d dst[MAX_MB_PLANE], 2231 const YV12_BUFFER_CONFIG *src, 2232 int mi_row, int mi_col, 2233 const struct scale_factors *scale, 2234 const struct scale_factors *scale_uv) { 2235 int i; 2236 2237 dst[0].buf = src->y_buffer; 2238 dst[0].stride = src->y_stride; 2239 dst[1].buf = src->u_buffer; 2240 dst[2].buf = src->v_buffer; 2241 dst[1].stride = dst[2].stride = src->uv_stride; 2242 #if CONFIG_ALPHA 2243 dst[3].buf = src->alpha_buffer; 2244 dst[3].stride = src->alpha_stride; 2245 #endif 2246 2247 // TODO(jkoleszar): Make scale factors per-plane data 2248 for (i = 0; i < MAX_MB_PLANE; i++) { 2249 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col, 2250 i ? scale_uv : scale, 2251 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y); 2252 } 2253 } 2254 2255 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, 2256 int idx, MV_REFERENCE_FRAME frame_type, 2257 BLOCK_SIZE block_size, 2258 int mi_row, int mi_col, 2259 int_mv frame_nearest_mv[MAX_REF_FRAMES], 2260 int_mv frame_near_mv[MAX_REF_FRAMES], 2261 struct buf_2d yv12_mb[4][MAX_MB_PLANE], 2262 struct scale_factors scale[MAX_REF_FRAMES]) { 2263 VP9_COMMON *cm = &cpi->common; 2264 YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]]; 2265 MACROBLOCKD *const xd = &x->e_mbd; 2266 MB_MODE_INFO *const mbmi = &xd->this_mi->mbmi; 2267 2268 // set up scaling factors 2269 scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1]; 2270 2271 scale[frame_type].x_offset_q4 = 2272 ROUND_POWER_OF_TWO(mi_col * MI_SIZE * scale[frame_type].x_scale_fp, 2273 REF_SCALE_SHIFT) & 0xf; 2274 scale[frame_type].y_offset_q4 = 2275 ROUND_POWER_OF_TWO(mi_row * MI_SIZE * scale[frame_type].y_scale_fp, 2276 REF_SCALE_SHIFT) & 0xf; 2277 2278 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this 2279 // use the UV scaling factors. 2280 setup_pred_block(xd, yv12_mb[frame_type], yv12, mi_row, mi_col, 2281 &scale[frame_type], &scale[frame_type]); 2282 2283 // Gets an initial list of candidate vectors from neighbours and orders them 2284 vp9_find_mv_refs(&cpi->common, xd, xd->this_mi, 2285 xd->last_mi, 2286 frame_type, 2287 mbmi->ref_mvs[frame_type], mi_row, mi_col); 2288 2289 // Candidate refinement carried out at encoder and decoder 2290 vp9_find_best_ref_mvs(xd, 2291 mbmi->ref_mvs[frame_type], 2292 &frame_nearest_mv[frame_type], 2293 &frame_near_mv[frame_type]); 2294 2295 // Further refinement that is encode side only to test the top few candidates 2296 // in full and choose the best as the centre point for subsequent searches. 2297 // The current implementation doesn't support scaling. 2298 if (!vp9_is_scaled(&scale[frame_type])) 2299 mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride, 2300 frame_type, block_size); 2301 } 2302 2303 static YV12_BUFFER_CONFIG *get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) { 2304 YV12_BUFFER_CONFIG *scaled_ref_frame = NULL; 2305 int fb = get_ref_frame_idx(cpi, ref_frame); 2306 int fb_scale = get_scale_ref_frame_idx(cpi, ref_frame); 2307 if (cpi->scaled_ref_idx[fb_scale] != cpi->common.ref_frame_map[fb]) 2308 scaled_ref_frame = &cpi->common.yv12_fb[cpi->scaled_ref_idx[fb_scale]]; 2309 return scaled_ref_frame; 2310 } 2311 2312 static INLINE int get_switchable_rate(const MACROBLOCK *x) { 2313 const MACROBLOCKD *const xd = &x->e_mbd; 2314 const MB_MODE_INFO *const mbmi = &xd->this_mi->mbmi; 2315 const int ctx = vp9_get_pred_context_switchable_interp(xd); 2316 return SWITCHABLE_INTERP_RATE_FACTOR * 2317 x->switchable_interp_costs[ctx][mbmi->interp_filter]; 2318 } 2319 2320 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, 2321 BLOCK_SIZE bsize, 2322 int mi_row, int mi_col, 2323 int_mv *tmp_mv, int *rate_mv) { 2324 MACROBLOCKD *xd = &x->e_mbd; 2325 VP9_COMMON *cm = &cpi->common; 2326 MB_MODE_INFO *mbmi = &xd->this_mi->mbmi; 2327 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; 2328 int bestsme = INT_MAX; 2329 int further_steps, step_param; 2330 int sadpb = x->sadperbit16; 2331 int_mv mvp_full; 2332 int ref = mbmi->ref_frame[0]; 2333 int_mv ref_mv = mbmi->ref_mvs[ref][0]; 2334 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); 2335 2336 int tmp_col_min = x->mv_col_min; 2337 int tmp_col_max = x->mv_col_max; 2338 int tmp_row_min = x->mv_row_min; 2339 int tmp_row_max = x->mv_row_max; 2340 2341 YV12_BUFFER_CONFIG *scaled_ref_frame = get_scaled_ref_frame(cpi, ref); 2342 2343 if (scaled_ref_frame) { 2344 int i; 2345 // Swap out the reference frame for a version that's been scaled to 2346 // match the resolution of the current frame, allowing the existing 2347 // motion search code to be used without additional modifications. 2348 for (i = 0; i < MAX_MB_PLANE; i++) 2349 backup_yv12[i] = xd->plane[i].pre[0]; 2350 2351 setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); 2352 } 2353 2354 vp9_clamp_mv_min_max(x, &ref_mv.as_mv); 2355 2356 // Adjust search parameters based on small partitions' result. 2357 if (x->fast_ms) { 2358 // && abs(mvp_full.as_mv.row - x->pred_mv.as_mv.row) < 24 && 2359 // abs(mvp_full.as_mv.col - x->pred_mv.as_mv.col) < 24) { 2360 // adjust search range 2361 step_param = 6; 2362 if (x->fast_ms > 1) 2363 step_param = 8; 2364 2365 // Get prediction MV. 2366 mvp_full.as_int = x->pred_mv[ref].as_int; 2367 2368 // Adjust MV sign if needed. 2369 if (cm->ref_frame_sign_bias[ref]) { 2370 mvp_full.as_mv.col *= -1; 2371 mvp_full.as_mv.row *= -1; 2372 } 2373 } else { 2374 // Work out the size of the first step in the mv step search. 2375 // 0 here is maximum length first step. 1 is MAX >> 1 etc. 2376 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) { 2377 // Take wtd average of the step_params based on the last frame's 2378 // max mv magnitude and that based on the best ref mvs of the current 2379 // block for the given reference. 2380 step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) + 2381 cpi->mv_step_param) >> 1; 2382 } else { 2383 step_param = cpi->mv_step_param; 2384 } 2385 } 2386 2387 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 && 2388 cpi->common.show_frame) { 2389 int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize), 2390 b_width_log2(bsize))); 2391 step_param = MAX(step_param, boffset); 2392 } 2393 2394 mvp_full.as_int = x->mv_best_ref_index[ref] < MAX_MV_REF_CANDIDATES ? 2395 mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int : 2396 x->pred_mv[ref].as_int; 2397 2398 mvp_full.as_mv.col >>= 3; 2399 mvp_full.as_mv.row >>= 3; 2400 2401 // Further step/diamond searches as necessary 2402 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; 2403 2404 if (cpi->sf.search_method == HEX) { 2405 bestsme = vp9_hex_search(x, &mvp_full, 2406 step_param, 2407 sadpb, 1, 2408 &cpi->fn_ptr[block_size], 1, 2409 &ref_mv, tmp_mv); 2410 } else if (cpi->sf.search_method == SQUARE) { 2411 bestsme = vp9_square_search(x, &mvp_full, 2412 step_param, 2413 sadpb, 1, 2414 &cpi->fn_ptr[block_size], 1, 2415 &ref_mv, tmp_mv); 2416 } else if (cpi->sf.search_method == BIGDIA) { 2417 bestsme = vp9_bigdia_search(x, &mvp_full, 2418 step_param, 2419 sadpb, 1, 2420 &cpi->fn_ptr[block_size], 1, 2421 &ref_mv, tmp_mv); 2422 } else { 2423 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, 2424 sadpb, further_steps, 1, 2425 &cpi->fn_ptr[block_size], 2426 &ref_mv, tmp_mv); 2427 } 2428 2429 x->mv_col_min = tmp_col_min; 2430 x->mv_col_max = tmp_col_max; 2431 x->mv_row_min = tmp_row_min; 2432 x->mv_row_max = tmp_row_max; 2433 2434 if (bestsme < INT_MAX) { 2435 int dis; /* TODO: use dis in distortion calculation later. */ 2436 unsigned int sse; 2437 cpi->find_fractional_mv_step(x, tmp_mv, &ref_mv, 2438 x->errorperbit, 2439 &cpi->fn_ptr[block_size], 2440 0, cpi->sf.subpel_iters_per_step, 2441 x->nmvjointcost, x->mvcost, 2442 &dis, &sse); 2443 } 2444 *rate_mv = vp9_mv_bit_cost(tmp_mv, &ref_mv, 2445 x->nmvjointcost, x->mvcost, 2446 96); 2447 2448 if (cpi->sf.adaptive_motion_search && cpi->common.show_frame) 2449 x->pred_mv[ref].as_int = tmp_mv->as_int; 2450 2451 if (scaled_ref_frame) { 2452 int i; 2453 for (i = 0; i < MAX_MB_PLANE; i++) 2454 xd->plane[i].pre[0] = backup_yv12[i]; 2455 } 2456 } 2457 2458 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, 2459 BLOCK_SIZE bsize, 2460 int_mv *frame_mv, 2461 int mi_row, int mi_col, 2462 int_mv single_newmv[MAX_REF_FRAMES], 2463 int *rate_mv) { 2464 int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize); 2465 MACROBLOCKD *xd = &x->e_mbd; 2466 MB_MODE_INFO *mbmi = &xd->this_mi->mbmi; 2467 int refs[2] = { mbmi->ref_frame[0], 2468 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; 2469 int_mv ref_mv[2]; 2470 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); 2471 int ite; 2472 // Prediction buffer from second frame. 2473 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t)); 2474 2475 // Do joint motion search in compound mode to get more accurate mv. 2476 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; 2477 struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}}; 2478 struct buf_2d scaled_first_yv12; 2479 int last_besterr[2] = {INT_MAX, INT_MAX}; 2480 YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL}; 2481 scaled_ref_frame[0] = get_scaled_ref_frame(cpi, mbmi->ref_frame[0]); 2482 scaled_ref_frame[1] = get_scaled_ref_frame(cpi, mbmi->ref_frame[1]); 2483 2484 ref_mv[0] = mbmi->ref_mvs[refs[0]][0]; 2485 ref_mv[1] = mbmi->ref_mvs[refs[1]][0]; 2486 2487 if (scaled_ref_frame[0]) { 2488 int i; 2489 // Swap out the reference frame for a version that's been scaled to 2490 // match the resolution of the current frame, allowing the existing 2491 // motion search code to be used without additional modifications. 2492 for (i = 0; i < MAX_MB_PLANE; i++) 2493 backup_yv12[i] = xd->plane[i].pre[0]; 2494 setup_pre_planes(xd, 0, scaled_ref_frame[0], mi_row, mi_col, NULL); 2495 } 2496 2497 if (scaled_ref_frame[1]) { 2498 int i; 2499 for (i = 0; i < MAX_MB_PLANE; i++) 2500 backup_second_yv12[i] = xd->plane[i].pre[1]; 2501 2502 setup_pre_planes(xd, 0, scaled_ref_frame[1], mi_row, mi_col, NULL); 2503 } 2504 2505 xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0], 2506 mi_row, mi_col); 2507 xd->scale_factor[1].set_scaled_offsets(&xd->scale_factor[1], 2508 mi_row, mi_col); 2509 scaled_first_yv12 = xd->plane[0].pre[0]; 2510 2511 // Initialize mv using single prediction mode result. 2512 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; 2513 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; 2514 2515 // Allow joint search multiple times iteratively for each ref frame 2516 // and break out the search loop if it couldn't find better mv. 2517 for (ite = 0; ite < 4; ite++) { 2518 struct buf_2d ref_yv12[2]; 2519 int bestsme = INT_MAX; 2520 int sadpb = x->sadperbit16; 2521 int_mv tmp_mv; 2522 int search_range = 3; 2523 2524 int tmp_col_min = x->mv_col_min; 2525 int tmp_col_max = x->mv_col_max; 2526 int tmp_row_min = x->mv_row_min; 2527 int tmp_row_max = x->mv_row_max; 2528 int id = ite % 2; 2529 2530 // Initialized here because of compiler problem in Visual Studio. 2531 ref_yv12[0] = xd->plane[0].pre[0]; 2532 ref_yv12[1] = xd->plane[0].pre[1]; 2533 2534 // Get pred block from second frame. 2535 vp9_build_inter_predictor(ref_yv12[!id].buf, 2536 ref_yv12[!id].stride, 2537 second_pred, pw, 2538 &frame_mv[refs[!id]].as_mv, 2539 &xd->scale_factor[!id], 2540 pw, ph, 0, 2541 &xd->subpix, MV_PRECISION_Q3); 2542 2543 // Compound motion search on first ref frame. 2544 if (id) 2545 xd->plane[0].pre[0] = ref_yv12[id]; 2546 vp9_clamp_mv_min_max(x, &ref_mv[id].as_mv); 2547 2548 // Use mv result from single mode as mvp. 2549 tmp_mv.as_int = frame_mv[refs[id]].as_int; 2550 2551 tmp_mv.as_mv.col >>= 3; 2552 tmp_mv.as_mv.row >>= 3; 2553 2554 // Small-range full-pixel motion search 2555 bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, 2556 search_range, 2557 &cpi->fn_ptr[block_size], 2558 x->nmvjointcost, x->mvcost, 2559 &ref_mv[id], second_pred, 2560 pw, ph); 2561 2562 x->mv_col_min = tmp_col_min; 2563 x->mv_col_max = tmp_col_max; 2564 x->mv_row_min = tmp_row_min; 2565 x->mv_row_max = tmp_row_max; 2566 2567 if (bestsme < INT_MAX) { 2568 int dis; /* TODO: use dis in distortion calculation later. */ 2569 unsigned int sse; 2570 2571 bestsme = cpi->find_fractional_mv_step_comp( 2572 x, &tmp_mv, 2573 &ref_mv[id], 2574 x->errorperbit, 2575 &cpi->fn_ptr[block_size], 2576 0, cpi->sf.subpel_iters_per_step, 2577 x->nmvjointcost, x->mvcost, 2578 &dis, &sse, second_pred, 2579 pw, ph); 2580 } 2581 2582 if (id) 2583 xd->plane[0].pre[0] = scaled_first_yv12; 2584 2585 if (bestsme < last_besterr[id]) { 2586 frame_mv[refs[id]].as_int = tmp_mv.as_int; 2587 last_besterr[id] = bestsme; 2588 } else { 2589 break; 2590 } 2591 } 2592 2593 // restore the predictor 2594 if (scaled_ref_frame[0]) { 2595 int i; 2596 for (i = 0; i < MAX_MB_PLANE; i++) 2597 xd->plane[i].pre[0] = backup_yv12[i]; 2598 } 2599 2600 if (scaled_ref_frame[1]) { 2601 int i; 2602 for (i = 0; i < MAX_MB_PLANE; i++) 2603 xd->plane[i].pre[1] = backup_second_yv12[i]; 2604 } 2605 *rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]], 2606 &mbmi->ref_mvs[refs[0]][0], 2607 x->nmvjointcost, x->mvcost, 96); 2608 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]], 2609 &mbmi->ref_mvs[refs[1]][0], 2610 x->nmvjointcost, x->mvcost, 96); 2611 2612 vpx_free(second_pred); 2613 } 2614 2615 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, 2616 BLOCK_SIZE bsize, 2617 int64_t txfm_cache[], 2618 int *rate2, int64_t *distortion, 2619 int *skippable, 2620 int *rate_y, int64_t *distortion_y, 2621 int *rate_uv, int64_t *distortion_uv, 2622 int *mode_excluded, int *disable_skip, 2623 INTERPOLATIONFILTERTYPE *best_filter, 2624 int_mv (*mode_mv)[MAX_REF_FRAMES], 2625 int mi_row, int mi_col, 2626 int_mv single_newmv[MAX_REF_FRAMES], 2627 int64_t *psse, 2628 const int64_t ref_best_rd) { 2629 VP9_COMMON *cm = &cpi->common; 2630 MACROBLOCKD *xd = &x->e_mbd; 2631 MB_MODE_INFO *mbmi = &xd->this_mi->mbmi; 2632 const int is_comp_pred = (mbmi->ref_frame[1] > 0); 2633 const int num_refs = is_comp_pred ? 2 : 1; 2634 const int this_mode = mbmi->mode; 2635 int_mv *frame_mv = mode_mv[this_mode]; 2636 int i; 2637 int refs[2] = { mbmi->ref_frame[0], 2638 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; 2639 int_mv cur_mv[2]; 2640 int64_t this_rd = 0; 2641 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64); 2642 int pred_exists = 0; 2643 int intpel_mv; 2644 int64_t rd, best_rd = INT64_MAX; 2645 int best_needs_copy = 0; 2646 uint8_t *orig_dst[MAX_MB_PLANE]; 2647 int orig_dst_stride[MAX_MB_PLANE]; 2648 int rs = 0; 2649 2650 if (this_mode == NEWMV) { 2651 int rate_mv; 2652 if (is_comp_pred) { 2653 // Initialize mv using single prediction mode result. 2654 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; 2655 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; 2656 2657 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { 2658 joint_motion_search(cpi, x, bsize, frame_mv, 2659 mi_row, mi_col, single_newmv, &rate_mv); 2660 } else { 2661 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]], 2662 &mbmi->ref_mvs[refs[0]][0], 2663 x->nmvjointcost, x->mvcost, 96); 2664 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]], 2665 &mbmi->ref_mvs[refs[1]][0], 2666 x->nmvjointcost, x->mvcost, 96); 2667 } 2668 if (frame_mv[refs[0]].as_int == INVALID_MV || 2669 frame_mv[refs[1]].as_int == INVALID_MV) 2670 return INT64_MAX; 2671 *rate2 += rate_mv; 2672 } else { 2673 int_mv tmp_mv; 2674 single_motion_search(cpi, x, bsize, mi_row, mi_col, &tmp_mv, &rate_mv); 2675 *rate2 += rate_mv; 2676 frame_mv[refs[0]].as_int = 2677 xd->this_mi->bmi[0].as_mv[0].as_int = tmp_mv.as_int; 2678 single_newmv[refs[0]].as_int = tmp_mv.as_int; 2679 } 2680 } 2681 2682 // if we're near/nearest and mv == 0,0, compare to zeromv 2683 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) && 2684 frame_mv[refs[0]].as_int == 0 && 2685 !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) && 2686 (num_refs == 1 || frame_mv[refs[1]].as_int == 0)) { 2687 int rfc = mbmi->mode_context[mbmi->ref_frame[0]]; 2688 int c1 = cost_mv_ref(cpi, NEARMV, rfc); 2689 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); 2690 int c3 = cost_mv_ref(cpi, ZEROMV, rfc); 2691 2692 if (this_mode == NEARMV) { 2693 if (c1 > c3) 2694 return INT64_MAX; 2695 } else if (this_mode == NEARESTMV) { 2696 if (c2 > c3) 2697 return INT64_MAX; 2698 } else { 2699 assert(this_mode == ZEROMV); 2700 if (num_refs == 1) { 2701 if ((c3 >= c2 && 2702 mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) || 2703 (c3 >= c1 && 2704 mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0)) 2705 return INT64_MAX; 2706 } else { 2707 if ((c3 >= c2 && 2708 mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 && 2709 mode_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) || 2710 (c3 >= c1 && 2711 mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 && 2712 mode_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0)) 2713 return INT64_MAX; 2714 } 2715 } 2716 } 2717 2718 for (i = 0; i < num_refs; ++i) { 2719 cur_mv[i] = frame_mv[refs[i]]; 2720 // Clip "next_nearest" so that it does not extend to far out of image 2721 if (this_mode != NEWMV) 2722 clamp_mv2(&cur_mv[i].as_mv, xd); 2723 2724 if (mv_check_bounds(x, &cur_mv[i])) 2725 return INT64_MAX; 2726 mbmi->mv[i].as_int = cur_mv[i].as_int; 2727 } 2728 2729 // do first prediction into the destination buffer. Do the next 2730 // prediction into a temporary buffer. Then keep track of which one 2731 // of these currently holds the best predictor, and use the other 2732 // one for future predictions. In the end, copy from tmp_buf to 2733 // dst if necessary. 2734 for (i = 0; i < MAX_MB_PLANE; i++) { 2735 orig_dst[i] = xd->plane[i].dst.buf; 2736 orig_dst_stride[i] = xd->plane[i].dst.stride; 2737 } 2738 2739 /* We don't include the cost of the second reference here, because there 2740 * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other 2741 * words if you present them in that order, the second one is always known 2742 * if the first is known */ 2743 *rate2 += cost_mv_ref(cpi, this_mode, 2744 mbmi->mode_context[mbmi->ref_frame[0]]); 2745 2746 if (!(*mode_excluded)) { 2747 if (is_comp_pred) { 2748 *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY); 2749 } else { 2750 *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY); 2751 } 2752 } 2753 2754 pred_exists = 0; 2755 // Are all MVs integer pel for Y and UV 2756 intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 && 2757 (mbmi->mv[0].as_mv.col & 15) == 0; 2758 if (is_comp_pred) 2759 intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 && 2760 (mbmi->mv[1].as_mv.col & 15) == 0; 2761 // Search for best switchable filter by checking the variance of 2762 // pred error irrespective of whether the filter will be used 2763 if (cm->mcomp_filter_type != BILINEAR) { 2764 *best_filter = EIGHTTAP; 2765 if (x->source_variance < 2766 cpi->sf.disable_filter_search_var_thresh) { 2767 *best_filter = EIGHTTAP; 2768 vp9_zero(cpi->rd_filter_cache); 2769 } else { 2770 int i, newbest; 2771 int tmp_rate_sum = 0; 2772 int64_t tmp_dist_sum = 0; 2773 2774 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX; 2775 for (i = 0; i < SWITCHABLE_FILTERS; ++i) { 2776 int j; 2777 int64_t rs_rd; 2778 mbmi->interp_filter = i; 2779 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); 2780 rs = get_switchable_rate(x); 2781 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); 2782 2783 if (i > 0 && intpel_mv) { 2784 cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv, 2785 tmp_rate_sum, tmp_dist_sum); 2786 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = 2787 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], 2788 cpi->rd_filter_cache[i] + rs_rd); 2789 rd = cpi->rd_filter_cache[i]; 2790 if (cm->mcomp_filter_type == SWITCHABLE) 2791 rd += rs_rd; 2792 } else { 2793 int rate_sum = 0; 2794 int64_t dist_sum = 0; 2795 if ((cm->mcomp_filter_type == SWITCHABLE && 2796 (!i || best_needs_copy)) || 2797 (cm->mcomp_filter_type != SWITCHABLE && 2798 (cm->mcomp_filter_type == mbmi->interp_filter || 2799 (i == 0 && intpel_mv)))) { 2800 for (j = 0; j < MAX_MB_PLANE; j++) { 2801 xd->plane[j].dst.buf = orig_dst[j]; 2802 xd->plane[j].dst.stride = orig_dst_stride[j]; 2803 } 2804 } else { 2805 for (j = 0; j < MAX_MB_PLANE; j++) { 2806 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64; 2807 xd->plane[j].dst.stride = 64; 2808 } 2809 } 2810 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); 2811 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum); 2812 cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv, 2813 rate_sum, dist_sum); 2814 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = 2815 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], 2816 cpi->rd_filter_cache[i] + rs_rd); 2817 rd = cpi->rd_filter_cache[i]; 2818 if (cm->mcomp_filter_type == SWITCHABLE) 2819 rd += rs_rd; 2820 if (i == 0 && intpel_mv) { 2821 tmp_rate_sum = rate_sum; 2822 tmp_dist_sum = dist_sum; 2823 } 2824 } 2825 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { 2826 if (rd / 2 > ref_best_rd) { 2827 for (i = 0; i < MAX_MB_PLANE; i++) { 2828 xd->plane[i].dst.buf = orig_dst[i]; 2829 xd->plane[i].dst.stride = orig_dst_stride[i]; 2830 } 2831 return INT64_MAX; 2832 } 2833 } 2834 newbest = i == 0 || rd < best_rd; 2835 2836 if (newbest) { 2837 best_rd = rd; 2838 *best_filter = mbmi->interp_filter; 2839 if (cm->mcomp_filter_type == SWITCHABLE && i && !intpel_mv) 2840 best_needs_copy = !best_needs_copy; 2841 } 2842 2843 if ((cm->mcomp_filter_type == SWITCHABLE && newbest) || 2844 (cm->mcomp_filter_type != SWITCHABLE && 2845 cm->mcomp_filter_type == mbmi->interp_filter)) { 2846 pred_exists = 1; 2847 } 2848 } 2849 2850 for (i = 0; i < MAX_MB_PLANE; i++) { 2851 xd->plane[i].dst.buf = orig_dst[i]; 2852 xd->plane[i].dst.stride = orig_dst_stride[i]; 2853 } 2854 } 2855 } 2856 // Set the appropriate filter 2857 mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ? 2858 cm->mcomp_filter_type : *best_filter; 2859 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); 2860 rs = cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(x) : 0; 2861 2862 if (pred_exists) { 2863 if (best_needs_copy) { 2864 // again temporarily set the buffers to local memory to prevent a memcpy 2865 for (i = 0; i < MAX_MB_PLANE; i++) { 2866 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64; 2867 xd->plane[i].dst.stride = 64; 2868 } 2869 } 2870 } else { 2871 // Handles the special case when a filter that is not in the 2872 // switchable list (ex. bilinear, 6-tap) is indicated at the frame level 2873 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); 2874 } 2875 2876 2877 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { 2878 int tmp_rate; 2879 int64_t tmp_dist; 2880 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist); 2881 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist); 2882 // if current pred_error modeled rd is substantially more than the best 2883 // so far, do not bother doing full rd 2884 if (rd / 2 > ref_best_rd) { 2885 for (i = 0; i < MAX_MB_PLANE; i++) { 2886 xd->plane[i].dst.buf = orig_dst[i]; 2887 xd->plane[i].dst.stride = orig_dst_stride[i]; 2888 } 2889 return INT64_MAX; 2890 } 2891 } 2892 2893 if (cpi->common.mcomp_filter_type == SWITCHABLE) 2894 *rate2 += get_switchable_rate(x); 2895 2896 if (!is_comp_pred && cpi->enable_encode_breakout) { 2897 if (cpi->active_map_enabled && x->active_ptr[0] == 0) 2898 x->skip = 1; 2899 else if (x->encode_breakout) { 2900 const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]); 2901 const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]); 2902 unsigned int var, sse; 2903 // Skipping threshold for ac. 2904 unsigned int thresh_ac; 2905 // The encode_breakout input 2906 unsigned int encode_breakout = x->encode_breakout << 4; 2907 int max_thresh = 36000; 2908 2909 // Use extreme low threshold for static frames to limit skipping. 2910 if (cpi->enable_encode_breakout == 2) 2911 max_thresh = 128; 2912 2913 // Calculate threshold according to dequant value. 2914 thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9; 2915 2916 // Use encode_breakout input if it is bigger than internal threshold. 2917 if (thresh_ac < encode_breakout) 2918 thresh_ac = encode_breakout; 2919 2920 // Set a maximum for threshold to avoid big PSNR loss in low bitrate case. 2921 if (thresh_ac > max_thresh) 2922 thresh_ac = max_thresh; 2923 2924 var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride, 2925 xd->plane[0].dst.buf, 2926 xd->plane[0].dst.stride, &sse); 2927 2928 // Adjust threshold according to partition size. 2929 thresh_ac >>= 8 - (b_width_log2_lookup[bsize] + 2930 b_height_log2_lookup[bsize]); 2931 2932 // Y skipping condition checking 2933 if (sse < thresh_ac || sse == 0) { 2934 // Skipping threshold for dc 2935 unsigned int thresh_dc; 2936 2937 thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6); 2938 2939 // dc skipping checking 2940 if ((sse - var) < thresh_dc || sse == var) { 2941 unsigned int sse_u, sse_v; 2942 unsigned int var_u, var_v; 2943 2944 var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf, 2945 x->plane[1].src.stride, 2946 xd->plane[1].dst.buf, 2947 xd->plane[1].dst.stride, &sse_u); 2948 2949 // U skipping condition checking 2950 if ((sse_u * 4 < thresh_ac || sse_u == 0) && 2951 (sse_u - var_u < thresh_dc || sse_u == var_u)) { 2952 var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf, 2953 x->plane[2].src.stride, 2954 xd->plane[2].dst.buf, 2955 xd->plane[2].dst.stride, &sse_v); 2956 2957 // V skipping condition checking 2958 if ((sse_v * 4 < thresh_ac || sse_v == 0) && 2959 (sse_v - var_v < thresh_dc || sse_v == var_v)) { 2960 x->skip = 1; 2961 2962 // The cost of skip bit needs to be added. 2963 *rate2 += vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1); 2964 2965 // Scaling factor for SSE from spatial domain to frequency domain 2966 // is 16. Adjust distortion accordingly. 2967 *distortion_uv = (sse_u + sse_v) << 4; 2968 *distortion = (sse << 4) + *distortion_uv; 2969 2970 *disable_skip = 1; 2971 this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); 2972 } 2973 } 2974 } 2975 } 2976 } 2977 } 2978 2979 if (!x->skip) { 2980 int skippable_y, skippable_uv; 2981 int64_t sseuv = INT64_MAX; 2982 int64_t rdcosty = INT64_MAX; 2983 2984 // Y cost and distortion 2985 super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse, 2986 bsize, txfm_cache, ref_best_rd); 2987 2988 if (*rate_y == INT_MAX) { 2989 *rate2 = INT_MAX; 2990 *distortion = INT64_MAX; 2991 for (i = 0; i < MAX_MB_PLANE; i++) { 2992 xd->plane[i].dst.buf = orig_dst[i]; 2993 xd->plane[i].dst.stride = orig_dst_stride[i]; 2994 } 2995 return INT64_MAX; 2996 } 2997 2998 *rate2 += *rate_y; 2999 *distortion += *distortion_y; 3000 3001 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); 3002 rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse)); 3003 3004 super_block_uvrd(cm, x, rate_uv, distortion_uv, &skippable_uv, &sseuv, 3005 bsize, ref_best_rd - rdcosty); 3006 if (*rate_uv == INT_MAX) { 3007 *rate2 = INT_MAX; 3008 *distortion = INT64_MAX; 3009 for (i = 0; i < MAX_MB_PLANE; i++) { 3010 xd->plane[i].dst.buf = orig_dst[i]; 3011 xd->plane[i].dst.stride = orig_dst_stride[i]; 3012 } 3013 return INT64_MAX; 3014 } 3015 3016 *psse += sseuv; 3017 *rate2 += *rate_uv; 3018 *distortion += *distortion_uv; 3019 *skippable = skippable_y && skippable_uv; 3020 } 3021 3022 for (i = 0; i < MAX_MB_PLANE; i++) { 3023 xd->plane[i].dst.buf = orig_dst[i]; 3024 xd->plane[i].dst.stride = orig_dst_stride[i]; 3025 } 3026 3027 return this_rd; // if 0, this will be re-calculated by caller 3028 } 3029 3030 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, 3031 int *returnrate, int64_t *returndist, 3032 BLOCK_SIZE bsize, 3033 PICK_MODE_CONTEXT *ctx, int64_t best_rd) { 3034 VP9_COMMON *const cm = &cpi->common; 3035 MACROBLOCKD *const xd = &x->e_mbd; 3036 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0; 3037 int y_skip = 0, uv_skip = 0; 3038 int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 }; 3039 x->skip_encode = 0; 3040 ctx->skip = 0; 3041 xd->this_mi->mbmi.ref_frame[0] = INTRA_FRAME; 3042 if (bsize >= BLOCK_8X8) { 3043 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, 3044 &dist_y, &y_skip, bsize, tx_cache, 3045 best_rd) >= best_rd) { 3046 *returnrate = INT_MAX; 3047 return; 3048 } 3049 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, 3050 &dist_uv, &uv_skip, bsize); 3051 } else { 3052 y_skip = 0; 3053 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly, 3054 &dist_y, best_rd) >= best_rd) { 3055 *returnrate = INT_MAX; 3056 return; 3057 } 3058 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, 3059 &dist_uv, &uv_skip, BLOCK_8X8); 3060 } 3061 3062 if (y_skip && uv_skip) { 3063 *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + 3064 vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1); 3065 *returndist = dist_y + dist_uv; 3066 vp9_zero(ctx->tx_rd_diff); 3067 } else { 3068 int i; 3069 *returnrate = rate_y + rate_uv + 3070 vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 0); 3071 *returndist = dist_y + dist_uv; 3072 if (cpi->sf.tx_size_search_method == USE_FULL_RD) 3073 for (i = 0; i < TX_MODES; i++) 3074 ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode]; 3075 } 3076 3077 ctx->mic = *xd->this_mi; 3078 } 3079 3080 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, 3081 int mi_row, int mi_col, 3082 int *returnrate, 3083 int64_t *returndistortion, 3084 BLOCK_SIZE bsize, 3085 PICK_MODE_CONTEXT *ctx, 3086 int64_t best_rd_so_far) { 3087 VP9_COMMON *cm = &cpi->common; 3088 MACROBLOCKD *xd = &x->e_mbd; 3089 MB_MODE_INFO *mbmi = &xd->this_mi->mbmi; 3090 const struct segmentation *seg = &cm->seg; 3091 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); 3092 RD_PREDICTION_MODE this_mode; 3093 MV_REFERENCE_FRAME ref_frame, second_ref_frame; 3094 unsigned char segment_id = mbmi->segment_id; 3095 int comp_pred, i; 3096 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; 3097 struct buf_2d yv12_mb[4][MAX_MB_PLANE]; 3098 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } }; 3099 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, 3100 VP9_ALT_FLAG }; 3101 int idx_list[4] = {0, 3102 cpi->lst_fb_idx, 3103 cpi->gld_fb_idx, 3104 cpi->alt_fb_idx}; 3105 int64_t best_rd = best_rd_so_far; 3106 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise 3107 int64_t best_tx_rd[TX_MODES]; 3108 int64_t best_tx_diff[TX_MODES]; 3109 int64_t best_pred_diff[NB_PREDICTION_TYPES]; 3110 int64_t best_pred_rd[NB_PREDICTION_TYPES]; 3111 int64_t best_filter_rd[SWITCHABLE_FILTERS + 1]; 3112 int64_t best_filter_diff[SWITCHABLE_FILTERS + 1]; 3113 MB_MODE_INFO best_mbmode = { 0 }; 3114 int j; 3115 int mode_index, best_mode_index = 0; 3116 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; 3117 vp9_prob comp_mode_p; 3118 int64_t best_intra_rd = INT64_MAX; 3119 int64_t best_inter_rd = INT64_MAX; 3120 MB_PREDICTION_MODE best_intra_mode = DC_PRED; 3121 // MB_PREDICTION_MODE best_inter_mode = ZEROMV; 3122 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME; 3123 INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE; 3124 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES]; 3125 int64_t dist_uv[TX_SIZES]; 3126 int skip_uv[TX_SIZES]; 3127 MB_PREDICTION_MODE mode_uv[TX_SIZES]; 3128 struct scale_factors scale_factor[4]; 3129 unsigned int ref_frame_mask = 0; 3130 unsigned int mode_mask = 0; 3131 int64_t mode_distortions[MB_MODE_COUNT] = {-1}; 3132 int64_t frame_distortions[MAX_REF_FRAMES] = {-1}; 3133 int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex, 3134 cpi->common.y_dc_delta_q); 3135 int_mv seg_mvs[4][MAX_REF_FRAMES]; 3136 union b_mode_info best_bmodes[4]; 3137 PARTITION_INFO best_partition; 3138 const int bws = num_8x8_blocks_wide_lookup[bsize] / 2; 3139 const int bhs = num_8x8_blocks_high_lookup[bsize] / 2; 3140 int best_skip2 = 0; 3141 3142 x->skip_encode = cpi->sf.skip_encode_frame && xd->q_index < QIDX_SKIP_THRESH; 3143 3144 for (i = 0; i < 4; i++) { 3145 int j; 3146 for (j = 0; j < MAX_REF_FRAMES; j++) 3147 seg_mvs[i][j].as_int = INVALID_MV; 3148 } 3149 // Everywhere the flag is set the error is much higher than its neighbors. 3150 ctx->frames_with_high_error = 0; 3151 ctx->modes_with_high_error = 0; 3152 3153 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp, 3154 &comp_mode_p); 3155 3156 for (i = 0; i < NB_PREDICTION_TYPES; ++i) 3157 best_pred_rd[i] = INT64_MAX; 3158 for (i = 0; i < TX_MODES; i++) 3159 best_tx_rd[i] = INT64_MAX; 3160 for (i = 0; i <= SWITCHABLE_FILTERS; i++) 3161 best_filter_rd[i] = INT64_MAX; 3162 for (i = 0; i < TX_SIZES; i++) 3163 rate_uv_intra[i] = INT_MAX; 3164 3165 *returnrate = INT_MAX; 3166 3167 // Create a mask set to 1 for each reference frame used by a smaller 3168 // resolution. 3169 if (cpi->sf.use_avoid_tested_higherror) { 3170 switch (block_size) { 3171 case BLOCK_64X64: 3172 for (i = 0; i < 4; i++) { 3173 for (j = 0; j < 4; j++) { 3174 ref_frame_mask |= x->mb_context[i][j].frames_with_high_error; 3175 mode_mask |= x->mb_context[i][j].modes_with_high_error; 3176 } 3177 } 3178 for (i = 0; i < 4; i++) { 3179 ref_frame_mask |= x->sb32_context[i].frames_with_high_error; 3180 mode_mask |= x->sb32_context[i].modes_with_high_error; 3181 } 3182 break; 3183 case BLOCK_32X32: 3184 for (i = 0; i < 4; i++) { 3185 ref_frame_mask |= 3186 x->mb_context[xd->sb_index][i].frames_with_high_error; 3187 mode_mask |= x->mb_context[xd->sb_index][i].modes_with_high_error; 3188 } 3189 break; 3190 default: 3191 // Until we handle all block sizes set it to present; 3192 ref_frame_mask = 0; 3193 mode_mask = 0; 3194 break; 3195 } 3196 ref_frame_mask = ~ref_frame_mask; 3197 mode_mask = ~mode_mask; 3198 } 3199 3200 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { 3201 if (cpi->ref_frame_flags & flag_list[ref_frame]) { 3202 setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size, 3203 mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], 3204 yv12_mb, scale_factor); 3205 } 3206 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; 3207 frame_mv[ZEROMV][ref_frame].as_int = 0; 3208 } 3209 3210 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { 3211 int mode_excluded = 0; 3212 int64_t this_rd = INT64_MAX; 3213 int disable_skip = 0; 3214 int compmode_cost = 0; 3215 int rate2 = 0, rate_y = 0, rate_uv = 0; 3216 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; 3217 int skippable = 0; 3218 int64_t tx_cache[TX_MODES]; 3219 int i; 3220 int this_skip2 = 0; 3221 int64_t total_sse = INT_MAX; 3222 int early_term = 0; 3223 3224 for (i = 0; i < TX_MODES; ++i) 3225 tx_cache[i] = INT64_MAX; 3226 3227 x->skip = 0; 3228 this_mode = vp9_mode_order[mode_index].mode; 3229 ref_frame = vp9_mode_order[mode_index].ref_frame; 3230 second_ref_frame = vp9_mode_order[mode_index].second_ref_frame; 3231 3232 // Look at the reference frame of the best mode so far and set the 3233 // skip mask to look at a subset of the remaining modes. 3234 if (mode_index > cpi->sf.mode_skip_start) { 3235 if (mode_index == (cpi->sf.mode_skip_start + 1)) { 3236 switch (vp9_mode_order[best_mode_index].ref_frame) { 3237 case INTRA_FRAME: 3238 cpi->mode_skip_mask = 0; 3239 break; 3240 case LAST_FRAME: 3241 cpi->mode_skip_mask = LAST_FRAME_MODE_MASK; 3242 break; 3243 case GOLDEN_FRAME: 3244 cpi->mode_skip_mask = GOLDEN_FRAME_MODE_MASK; 3245 break; 3246 case ALTREF_FRAME: 3247 cpi->mode_skip_mask = ALT_REF_MODE_MASK; 3248 break; 3249 case NONE: 3250 case MAX_REF_FRAMES: 3251 assert(!"Invalid Reference frame"); 3252 } 3253 } 3254 if (cpi->mode_skip_mask & (1 << mode_index)) 3255 continue; 3256 } 3257 3258 // Skip if the current reference frame has been masked off 3259 if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask && 3260 (cpi->ref_frame_mask & (1 << ref_frame))) 3261 continue; 3262 3263 // Test best rd so far against threshold for trying this mode. 3264 if ((best_rd < ((cpi->rd_threshes[bsize][mode_index] * 3265 cpi->rd_thresh_freq_fact[bsize][mode_index]) >> 5)) || 3266 cpi->rd_threshes[bsize][mode_index] == INT_MAX) 3267 continue; 3268 3269 // Do not allow compound prediction if the segment level reference 3270 // frame feature is in use as in this case there can only be one reference. 3271 if ((second_ref_frame > INTRA_FRAME) && 3272 vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) 3273 continue; 3274 3275 // Skip some checking based on small partitions' result. 3276 if (x->fast_ms > 1 && !ref_frame) 3277 continue; 3278 if (x->fast_ms > 2 && ref_frame != x->subblock_ref) 3279 continue; 3280 3281 if (cpi->sf.use_avoid_tested_higherror && bsize >= BLOCK_8X8) { 3282 if (!(ref_frame_mask & (1 << ref_frame))) { 3283 continue; 3284 } 3285 if (!(mode_mask & (1 << this_mode))) { 3286 continue; 3287 } 3288 if (second_ref_frame != NONE 3289 && !(ref_frame_mask & (1 << second_ref_frame))) { 3290 continue; 3291 } 3292 } 3293 3294 mbmi->ref_frame[0] = ref_frame; 3295 mbmi->ref_frame[1] = second_ref_frame; 3296 3297 if (!(ref_frame == INTRA_FRAME 3298 || (cpi->ref_frame_flags & flag_list[ref_frame]))) { 3299 continue; 3300 } 3301 if (!(second_ref_frame == NONE 3302 || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) { 3303 continue; 3304 } 3305 3306 comp_pred = second_ref_frame > INTRA_FRAME; 3307 if (comp_pred) { 3308 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) 3309 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) 3310 continue; 3311 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) 3312 if (ref_frame != best_inter_ref_frame && 3313 second_ref_frame != best_inter_ref_frame) 3314 continue; 3315 } 3316 // TODO(jingning, jkoleszar): scaling reference frame not supported for 3317 // SPLITMV. 3318 if (ref_frame > 0 && 3319 vp9_is_scaled(&scale_factor[ref_frame]) && 3320 this_mode == RD_SPLITMV) 3321 continue; 3322 3323 if (second_ref_frame > 0 && 3324 vp9_is_scaled(&scale_factor[second_ref_frame]) && 3325 this_mode == RD_SPLITMV) 3326 continue; 3327 3328 if (bsize >= BLOCK_8X8 && 3329 (this_mode == RD_I4X4_PRED || this_mode == RD_SPLITMV)) 3330 continue; 3331 3332 if (bsize < BLOCK_8X8 && 3333 !(this_mode == RD_I4X4_PRED || this_mode == RD_SPLITMV)) 3334 continue; 3335 3336 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor); 3337 mbmi->uv_mode = DC_PRED; 3338 3339 // Evaluate all sub-pel filters irrespective of whether we can use 3340 // them for this frame. 3341 mbmi->interp_filter = cm->mcomp_filter_type; 3342 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); 3343 3344 if (comp_pred) { 3345 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) 3346 continue; 3347 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor); 3348 3349 mode_excluded = mode_excluded 3350 ? mode_excluded 3351 : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY; 3352 } else { 3353 if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) { 3354 mode_excluded = 3355 mode_excluded ? 3356 mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY; 3357 } 3358 } 3359 3360 // Select prediction reference frames. 3361 for (i = 0; i < MAX_MB_PLANE; i++) { 3362 xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; 3363 if (comp_pred) 3364 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; 3365 } 3366 3367 // If the segment reference frame feature is enabled.... 3368 // then do nothing if the current ref frame is not allowed.. 3369 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && 3370 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != 3371 (int)ref_frame) { 3372 continue; 3373 // If the segment skip feature is enabled.... 3374 // then do nothing if the current mode is not allowed.. 3375 } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) && 3376 (this_mode != RD_ZEROMV && ref_frame != INTRA_FRAME)) { 3377 continue; 3378 // Disable this drop out case if the ref frame 3379 // segment level feature is enabled for this segment. This is to 3380 // prevent the possibility that we end up unable to pick any mode. 3381 } else if (!vp9_segfeature_active(seg, segment_id, 3382 SEG_LVL_REF_FRAME)) { 3383 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, 3384 // unless ARNR filtering is enabled in which case we want 3385 // an unfiltered alternative. We allow near/nearest as well 3386 // because they may result in zero-zero MVs but be cheaper. 3387 if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { 3388 if ((this_mode != RD_ZEROMV && 3389 !(this_mode == RD_NEARMV && 3390 frame_mv[RD_NEARMV][ALTREF_FRAME].as_int == 0) && 3391 !(this_mode == RD_NEARESTMV && 3392 frame_mv[RD_NEARESTMV][ALTREF_FRAME].as_int == 0)) || 3393 ref_frame != ALTREF_FRAME) { 3394 continue; 3395 } 3396 } 3397 } 3398 // TODO(JBB): This is to make up for the fact that we don't have sad 3399 // functions that work when the block size reads outside the umv. We 3400 // should fix this either by making the motion search just work on 3401 // a representative block in the boundary ( first ) and then implement a 3402 // function that does sads when inside the border.. 3403 if (((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) && 3404 this_mode == RD_NEWMV) { 3405 continue; 3406 } 3407 3408 #ifdef MODE_TEST_HIT_STATS 3409 // TEST/DEBUG CODE 3410 // Keep a rcord of the number of test hits at each size 3411 cpi->mode_test_hits[bsize]++; 3412 #endif 3413 3414 if (this_mode == RD_I4X4_PRED) { 3415 int rate; 3416 3417 /* 3418 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && 3419 (vp9_mode_order[best_mode_index].ref_frame > INTRA_FRAME)) 3420 continue; 3421 */ 3422 3423 // RD_I4X4_PRED is only considered for block sizes less than 8x8. 3424 mbmi->tx_size = TX_4X4; 3425 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, 3426 &distortion_y, best_rd) >= best_rd) 3427 continue; 3428 rate2 += rate; 3429 rate2 += intra_cost_penalty; 3430 distortion2 += distortion_y; 3431 3432 if (rate_uv_intra[TX_4X4] == INT_MAX) { 3433 choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[TX_4X4], 3434 &rate_uv_tokenonly[TX_4X4], 3435 &dist_uv[TX_4X4], &skip_uv[TX_4X4], 3436 &mode_uv[TX_4X4]); 3437 } 3438 rate2 += rate_uv_intra[TX_4X4]; 3439 rate_uv = rate_uv_tokenonly[TX_4X4]; 3440 distortion2 += dist_uv[TX_4X4]; 3441 distortion_uv = dist_uv[TX_4X4]; 3442 mbmi->uv_mode = mode_uv[TX_4X4]; 3443 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 3444 for (i = 0; i < TX_MODES; ++i) 3445 tx_cache[i] = tx_cache[ONLY_4X4]; 3446 } else if (ref_frame == INTRA_FRAME) { 3447 TX_SIZE uv_tx; 3448 // Disable intra modes other than DC_PRED for blocks with low variance 3449 // Threshold for intra skipping based on source variance 3450 // TODO(debargha): Specialize the threshold for super block sizes 3451 static const int skip_intra_var_thresh[BLOCK_SIZES] = { 3452 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 3453 }; 3454 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) && 3455 this_mode != RD_DC_PRED && 3456 x->source_variance < skip_intra_var_thresh[mbmi->sb_type]) 3457 continue; 3458 // Only search the oblique modes if the best so far is 3459 // one of the neighboring directional modes 3460 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && 3461 (this_mode >= RD_D45_PRED && this_mode <= RD_TM_PRED)) { 3462 if (vp9_mode_order[best_mode_index].ref_frame > INTRA_FRAME) 3463 continue; 3464 } 3465 mbmi->mode = rd_mode_to_mode(this_mode); 3466 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { 3467 if (conditional_skipintra(mbmi->mode, best_intra_mode)) 3468 continue; 3469 } 3470 3471 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, 3472 bsize, tx_cache, best_rd); 3473 3474 if (rate_y == INT_MAX) 3475 continue; 3476 3477 uv_tx = MIN(mbmi->tx_size, max_uv_txsize_lookup[bsize]); 3478 if (rate_uv_intra[uv_tx] == INT_MAX) { 3479 choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[uv_tx], 3480 &rate_uv_tokenonly[uv_tx], 3481 &dist_uv[uv_tx], &skip_uv[uv_tx], 3482 &mode_uv[uv_tx]); 3483 } 3484 3485 rate_uv = rate_uv_tokenonly[uv_tx]; 3486 distortion_uv = dist_uv[uv_tx]; 3487 skippable = skippable && skip_uv[uv_tx]; 3488 mbmi->uv_mode = mode_uv[uv_tx]; 3489 3490 rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx]; 3491 if (this_mode != RD_DC_PRED && this_mode != RD_TM_PRED) 3492 rate2 += intra_cost_penalty; 3493 distortion2 = distortion_y + distortion_uv; 3494 } else if (this_mode == RD_SPLITMV) { 3495 const int is_comp_pred = second_ref_frame > 0; 3496 int rate; 3497 int64_t distortion; 3498 int64_t this_rd_thresh; 3499 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX; 3500 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX; 3501 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse; 3502 int tmp_best_skippable = 0; 3503 int switchable_filter_index; 3504 int_mv *second_ref = is_comp_pred ? 3505 &mbmi->ref_mvs[second_ref_frame][0] : NULL; 3506 union b_mode_info tmp_best_bmodes[16]; 3507 MB_MODE_INFO tmp_best_mbmode; 3508 PARTITION_INFO tmp_best_partition; 3509 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS]; 3510 int pred_exists = 0; 3511 int uv_skippable; 3512 if (is_comp_pred) { 3513 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) 3514 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) 3515 continue; 3516 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) 3517 if (ref_frame != best_inter_ref_frame && 3518 second_ref_frame != best_inter_ref_frame) 3519 continue; 3520 } 3521 3522 this_rd_thresh = (ref_frame == LAST_FRAME) ? 3523 cpi->rd_threshes[bsize][THR_NEWMV] : 3524 cpi->rd_threshes[bsize][THR_NEWA]; 3525 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ? 3526 cpi->rd_threshes[bsize][THR_NEWG] : this_rd_thresh; 3527 xd->this_mi->mbmi.tx_size = TX_4X4; 3528 3529 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX; 3530 if (cm->mcomp_filter_type != BILINEAR) { 3531 tmp_best_filter = EIGHTTAP; 3532 if (x->source_variance < 3533 cpi->sf.disable_filter_search_var_thresh) { 3534 tmp_best_filter = EIGHTTAP; 3535 vp9_zero(cpi->rd_filter_cache); 3536 } else { 3537 for (switchable_filter_index = 0; 3538 switchable_filter_index < SWITCHABLE_FILTERS; 3539 ++switchable_filter_index) { 3540 int newbest, rs; 3541 int64_t rs_rd; 3542 mbmi->interp_filter = switchable_filter_index; 3543 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); 3544 3545 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, 3546 &mbmi->ref_mvs[ref_frame][0], 3547 second_ref, 3548 best_yrd, 3549 &rate, &rate_y, &distortion, 3550 &skippable, &total_sse, 3551 (int)this_rd_thresh, seg_mvs, 3552 bsi, switchable_filter_index, 3553 mi_row, mi_col); 3554 3555 if (tmp_rd == INT64_MAX) 3556 continue; 3557 cpi->rd_filter_cache[switchable_filter_index] = tmp_rd; 3558 rs = get_switchable_rate(x); 3559 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); 3560 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = 3561 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], 3562 tmp_rd + rs_rd); 3563 if (cm->mcomp_filter_type == SWITCHABLE) 3564 tmp_rd += rs_rd; 3565 3566 newbest = (tmp_rd < tmp_best_rd); 3567 if (newbest) { 3568 tmp_best_filter = mbmi->interp_filter; 3569 tmp_best_rd = tmp_rd; 3570 } 3571 if ((newbest && cm->mcomp_filter_type == SWITCHABLE) || 3572 (mbmi->interp_filter == cm->mcomp_filter_type && 3573 cm->mcomp_filter_type != SWITCHABLE)) { 3574 tmp_best_rdu = tmp_rd; 3575 tmp_best_rate = rate; 3576 tmp_best_ratey = rate_y; 3577 tmp_best_distortion = distortion; 3578 tmp_best_sse = total_sse; 3579 tmp_best_skippable = skippable; 3580 tmp_best_mbmode = *mbmi; 3581 tmp_best_partition = *x->partition_info; 3582 for (i = 0; i < 4; i++) 3583 tmp_best_bmodes[i] = xd->this_mi->bmi[i]; 3584 pred_exists = 1; 3585 if (switchable_filter_index == 0 && 3586 cpi->sf.use_rd_breakout && 3587 best_rd < INT64_MAX) { 3588 if (tmp_best_rdu / 2 > best_rd) { 3589 // skip searching the other filters if the first is 3590 // already substantially larger than the best so far 3591 tmp_best_filter = mbmi->interp_filter; 3592 tmp_best_rdu = INT64_MAX; 3593 break; 3594 } 3595 } 3596 } 3597 } // switchable_filter_index loop 3598 } 3599 } 3600 3601 if (tmp_best_rdu == INT64_MAX) 3602 continue; 3603 3604 mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ? 3605 tmp_best_filter : cm->mcomp_filter_type); 3606 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); 3607 if (!pred_exists) { 3608 // Handles the special case when a filter that is not in the 3609 // switchable list (bilinear, 6-tap) is indicated at the frame level 3610 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, 3611 &mbmi->ref_mvs[ref_frame][0], 3612 second_ref, 3613 best_yrd, 3614 &rate, &rate_y, &distortion, 3615 &skippable, &total_sse, 3616 (int)this_rd_thresh, seg_mvs, 3617 bsi, 0, 3618 mi_row, mi_col); 3619 if (tmp_rd == INT64_MAX) 3620 continue; 3621 } else { 3622 if (cpi->common.mcomp_filter_type == SWITCHABLE) { 3623 int rs = get_switchable_rate(x); 3624 tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0); 3625 } 3626 tmp_rd = tmp_best_rdu; 3627 total_sse = tmp_best_sse; 3628 rate = tmp_best_rate; 3629 rate_y = tmp_best_ratey; 3630 distortion = tmp_best_distortion; 3631 skippable = tmp_best_skippable; 3632 *mbmi = tmp_best_mbmode; 3633 *x->partition_info = tmp_best_partition; 3634 for (i = 0; i < 4; i++) 3635 xd->this_mi->bmi[i] = tmp_best_bmodes[i]; 3636 } 3637 3638 rate2 += rate; 3639 distortion2 += distortion; 3640 3641 if (cpi->common.mcomp_filter_type == SWITCHABLE) 3642 rate2 += get_switchable_rate(x); 3643 3644 if (!mode_excluded) { 3645 if (is_comp_pred) 3646 mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY; 3647 else 3648 mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY; 3649 } 3650 compmode_cost = vp9_cost_bit(comp_mode_p, is_comp_pred); 3651 3652 tmp_best_rdu = best_rd - 3653 MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2), 3654 RDCOST(x->rdmult, x->rddiv, 0, total_sse)); 3655 3656 if (tmp_best_rdu > 0) { 3657 // If even the 'Y' rd value of split is higher than best so far 3658 // then dont bother looking at UV 3659 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, 3660 BLOCK_8X8); 3661 super_block_uvrd(cm, x, &rate_uv, &distortion_uv, &uv_skippable, 3662 &uv_sse, BLOCK_8X8, tmp_best_rdu); 3663 if (rate_uv == INT_MAX) 3664 continue; 3665 rate2 += rate_uv; 3666 distortion2 += distortion_uv; 3667 skippable = skippable && uv_skippable; 3668 total_sse += uv_sse; 3669 3670 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 3671 for (i = 0; i < TX_MODES; ++i) 3672 tx_cache[i] = tx_cache[ONLY_4X4]; 3673 } 3674 } else { 3675 mbmi->mode = rd_mode_to_mode(this_mode); 3676 compmode_cost = vp9_cost_bit(comp_mode_p, second_ref_frame > INTRA_FRAME); 3677 this_rd = handle_inter_mode(cpi, x, bsize, 3678 tx_cache, 3679 &rate2, &distortion2, &skippable, 3680 &rate_y, &distortion_y, 3681 &rate_uv, &distortion_uv, 3682 &mode_excluded, &disable_skip, 3683 &tmp_best_filter, frame_mv, 3684 mi_row, mi_col, 3685 single_newmv, &total_sse, best_rd); 3686 if (this_rd == INT64_MAX) 3687 continue; 3688 } 3689 3690 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { 3691 rate2 += compmode_cost; 3692 } 3693 3694 // Estimate the reference frame signaling cost and add it 3695 // to the rolling cost variable. 3696 if (second_ref_frame > INTRA_FRAME) { 3697 rate2 += ref_costs_comp[ref_frame]; 3698 } else { 3699 rate2 += ref_costs_single[ref_frame]; 3700 } 3701 3702 if (!disable_skip) { 3703 // Test for the condition where skip block will be activated 3704 // because there are no non zero coefficients and make any 3705 // necessary adjustment for rate. Ignore if skip is coded at 3706 // segment level as the cost wont have been added in. 3707 // Is Mb level skip allowed (i.e. not coded at segment level). 3708 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id, 3709 SEG_LVL_SKIP); 3710 3711 if (skippable && bsize >= BLOCK_8X8) { 3712 // Back out the coefficient coding costs 3713 rate2 -= (rate_y + rate_uv); 3714 // for best yrd calculation 3715 rate_uv = 0; 3716 3717 if (mb_skip_allowed) { 3718 int prob_skip_cost; 3719 3720 // Cost the skip mb case 3721 vp9_prob skip_prob = 3722 vp9_get_pred_prob_mbskip(cm, xd); 3723 3724 if (skip_prob) { 3725 prob_skip_cost = vp9_cost_bit(skip_prob, 1); 3726 rate2 += prob_skip_cost; 3727 } 3728 } 3729 } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) { 3730 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < 3731 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { 3732 // Add in the cost of the no skip flag. 3733 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 3734 0); 3735 rate2 += prob_skip_cost; 3736 } else { 3737 // FIXME(rbultje) make this work for splitmv also 3738 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 3739 1); 3740 rate2 += prob_skip_cost; 3741 distortion2 = total_sse; 3742 assert(total_sse >= 0); 3743 rate2 -= (rate_y + rate_uv); 3744 rate_y = 0; 3745 rate_uv = 0; 3746 this_skip2 = 1; 3747 } 3748 } else if (mb_skip_allowed) { 3749 // Add in the cost of the no skip flag. 3750 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 3751 0); 3752 rate2 += prob_skip_cost; 3753 } 3754 3755 // Calculate the final RD estimate for this mode. 3756 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 3757 } 3758 3759 // Keep record of best intra rd 3760 if (xd->this_mi->mbmi.ref_frame[0] == INTRA_FRAME && 3761 is_intra_mode(xd->this_mi->mbmi.mode) && 3762 this_rd < best_intra_rd) { 3763 best_intra_rd = this_rd; 3764 best_intra_mode = xd->this_mi->mbmi.mode; 3765 } 3766 // Keep record of best inter rd with single reference 3767 if (xd->this_mi->mbmi.ref_frame[0] > INTRA_FRAME && 3768 xd->this_mi->mbmi.ref_frame[1] == NONE && 3769 !mode_excluded && 3770 this_rd < best_inter_rd) { 3771 best_inter_rd = this_rd; 3772 best_inter_ref_frame = ref_frame; 3773 // best_inter_mode = xd->this_mi->mbmi.mode; 3774 } 3775 3776 if (!disable_skip && ref_frame == INTRA_FRAME) { 3777 for (i = 0; i < NB_PREDICTION_TYPES; ++i) 3778 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); 3779 for (i = 0; i <= SWITCHABLE_FILTERS; i++) 3780 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd); 3781 } 3782 3783 if (this_mode != RD_I4X4_PRED && this_mode != RD_SPLITMV) { 3784 // Store the respective mode distortions for later use. 3785 if (mode_distortions[this_mode] == -1 3786 || distortion2 < mode_distortions[this_mode]) { 3787 mode_distortions[this_mode] = distortion2; 3788 } 3789 if (frame_distortions[ref_frame] == -1 3790 || distortion2 < frame_distortions[ref_frame]) { 3791 frame_distortions[ref_frame] = distortion2; 3792 } 3793 } 3794 3795 // Did this mode help.. i.e. is it the new best mode 3796 if (this_rd < best_rd || x->skip) { 3797 if (!mode_excluded) { 3798 // Note index of best mode so far 3799 best_mode_index = mode_index; 3800 3801 if (ref_frame == INTRA_FRAME) { 3802 /* required for left and above block mv */ 3803 mbmi->mv[0].as_int = 0; 3804 } 3805 3806 *returnrate = rate2; 3807 *returndistortion = distortion2; 3808 best_rd = this_rd; 3809 best_yrd = best_rd - 3810 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv); 3811 best_mbmode = *mbmi; 3812 best_skip2 = this_skip2; 3813 best_partition = *x->partition_info; 3814 3815 if (this_mode == RD_I4X4_PRED || this_mode == RD_SPLITMV) 3816 for (i = 0; i < 4; i++) 3817 best_bmodes[i] = xd->this_mi->bmi[i]; 3818 3819 // TODO(debargha): enhance this test with a better distortion prediction 3820 // based on qp, activity mask and history 3821 if (cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) { 3822 const int qstep = xd->plane[0].dequant[1]; 3823 // TODO(debargha): Enhance this by specializing for each mode_index 3824 int scale = 4; 3825 if (x->source_variance < UINT_MAX) { 3826 const int var_adjust = (x->source_variance < 16); 3827 scale -= var_adjust; 3828 } 3829 if (ref_frame > INTRA_FRAME && 3830 distortion2 * scale < qstep * qstep) { 3831 early_term = 1; 3832 } 3833 } 3834 } 3835 } 3836 3837 /* keep record of best compound/single-only prediction */ 3838 if (!disable_skip && ref_frame != INTRA_FRAME) { 3839 int single_rd, hybrid_rd, single_rate, hybrid_rate; 3840 3841 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { 3842 single_rate = rate2 - compmode_cost; 3843 hybrid_rate = rate2; 3844 } else { 3845 single_rate = rate2; 3846 hybrid_rate = rate2 + compmode_cost; 3847 } 3848 3849 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); 3850 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); 3851 3852 if (second_ref_frame <= INTRA_FRAME && 3853 single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) { 3854 best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd; 3855 } else if (second_ref_frame > INTRA_FRAME && 3856 single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) { 3857 best_pred_rd[COMP_PREDICTION_ONLY] = single_rd; 3858 } 3859 if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION]) 3860 best_pred_rd[HYBRID_PREDICTION] = hybrid_rd; 3861 } 3862 3863 /* keep record of best filter type */ 3864 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME && 3865 cm->mcomp_filter_type != BILINEAR) { 3866 int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ? 3867 SWITCHABLE_FILTERS : cm->mcomp_filter_type]; 3868 for (i = 0; i <= SWITCHABLE_FILTERS; i++) { 3869 int64_t adj_rd; 3870 // In cases of poor prediction, filter_cache[] can contain really big 3871 // values, which actually are bigger than this_rd itself. This can 3872 // cause negative best_filter_rd[] values, which is obviously silly. 3873 // Therefore, if filter_cache < ref, we do an adjusted calculation. 3874 if (cpi->rd_filter_cache[i] >= ref) 3875 adj_rd = this_rd + cpi->rd_filter_cache[i] - ref; 3876 else // FIXME(rbultje) do this for comppred also 3877 adj_rd = this_rd - (ref - cpi->rd_filter_cache[i]) * this_rd / ref; 3878 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd); 3879 } 3880 } 3881 3882 /* keep record of best txfm size */ 3883 if (bsize < BLOCK_32X32) { 3884 if (bsize < BLOCK_16X16) { 3885 if (this_mode == RD_SPLITMV || this_mode == RD_I4X4_PRED) 3886 tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4]; 3887 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8]; 3888 } 3889 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16]; 3890 } 3891 if (!mode_excluded && this_rd != INT64_MAX) { 3892 for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) { 3893 int64_t adj_rd = INT64_MAX; 3894 if (this_mode != RD_I4X4_PRED) { 3895 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode]; 3896 } else { 3897 adj_rd = this_rd; 3898 } 3899 3900 if (adj_rd < best_tx_rd[i]) 3901 best_tx_rd[i] = adj_rd; 3902 } 3903 } 3904 3905 if (early_term) 3906 break; 3907 3908 if (x->skip && !comp_pred) 3909 break; 3910 } 3911 3912 if (best_rd >= best_rd_so_far) 3913 return INT64_MAX; 3914 3915 // If we used an estimate for the uv intra rd in the loop above... 3916 if (cpi->sf.use_uv_intra_rd_estimate) { 3917 // Do Intra UV best rd mode selection if best mode choice above was intra. 3918 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) { 3919 TX_SIZE uv_tx_size = get_uv_tx_size(mbmi); 3920 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[uv_tx_size], 3921 &rate_uv_tokenonly[uv_tx_size], 3922 &dist_uv[uv_tx_size], 3923 &skip_uv[uv_tx_size], 3924 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize); 3925 } 3926 } 3927 3928 // If we are using reference masking and the set mask flag is set then 3929 // create the reference frame mask. 3930 if (cpi->sf.reference_masking && cpi->set_ref_frame_mask) 3931 cpi->ref_frame_mask = ~(1 << vp9_mode_order[best_mode_index].ref_frame); 3932 3933 // Flag all modes that have a distortion thats > 2x the best we found at 3934 // this level. 3935 for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) { 3936 if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV) 3937 continue; 3938 3939 if (mode_distortions[mode_index] > 2 * *returndistortion) { 3940 ctx->modes_with_high_error |= (1 << mode_index); 3941 } 3942 } 3943 3944 // Flag all ref frames that have a distortion thats > 2x the best we found at 3945 // this level. 3946 for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { 3947 if (frame_distortions[ref_frame] > 2 * *returndistortion) { 3948 ctx->frames_with_high_error |= (1 << ref_frame); 3949 } 3950 } 3951 3952 if (best_rd == INT64_MAX && bsize < BLOCK_8X8) { 3953 *returnrate = INT_MAX; 3954 *returndistortion = INT_MAX; 3955 return best_rd; 3956 } 3957 3958 assert((cm->mcomp_filter_type == SWITCHABLE) || 3959 (cm->mcomp_filter_type == best_mbmode.interp_filter) || 3960 (best_mbmode.ref_frame[0] == INTRA_FRAME)); 3961 3962 // Updating rd_thresh_freq_fact[] here means that the different 3963 // partition/block sizes are handled independently based on the best 3964 // choice for the current partition. It may well be better to keep a scaled 3965 // best rd so far value and update rd_thresh_freq_fact based on the mode/size 3966 // combination that wins out. 3967 if (cpi->sf.adaptive_rd_thresh) { 3968 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { 3969 if (mode_index == best_mode_index) { 3970 cpi->rd_thresh_freq_fact[bsize][mode_index] -= 3971 (cpi->rd_thresh_freq_fact[bsize][mode_index] >> 3); 3972 } else { 3973 cpi->rd_thresh_freq_fact[bsize][mode_index] += RD_THRESH_INC; 3974 if (cpi->rd_thresh_freq_fact[bsize][mode_index] > 3975 (cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FACT)) { 3976 cpi->rd_thresh_freq_fact[bsize][mode_index] = 3977 cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FACT; 3978 } 3979 } 3980 } 3981 } 3982 3983 // macroblock modes 3984 *mbmi = best_mbmode; 3985 x->skip |= best_skip2; 3986 if (best_mbmode.ref_frame[0] == INTRA_FRAME && 3987 best_mbmode.sb_type < BLOCK_8X8) { 3988 for (i = 0; i < 4; i++) 3989 xd->this_mi->bmi[i].as_mode = best_bmodes[i].as_mode; 3990 } 3991 3992 if (best_mbmode.ref_frame[0] != INTRA_FRAME && 3993 best_mbmode.sb_type < BLOCK_8X8) { 3994 for (i = 0; i < 4; i++) 3995 xd->this_mi->bmi[i].as_mv[0].as_int = 3996 best_bmodes[i].as_mv[0].as_int; 3997 3998 if (mbmi->ref_frame[1] > 0) 3999 for (i = 0; i < 4; i++) 4000 xd->this_mi->bmi[i].as_mv[1].as_int = 4001 best_bmodes[i].as_mv[1].as_int; 4002 4003 *x->partition_info = best_partition; 4004 4005 mbmi->mv[0].as_int = xd->this_mi->bmi[3].as_mv[0].as_int; 4006 mbmi->mv[1].as_int = xd->this_mi->bmi[3].as_mv[1].as_int; 4007 } 4008 4009 for (i = 0; i < NB_PREDICTION_TYPES; ++i) { 4010 if (best_pred_rd[i] == INT64_MAX) 4011 best_pred_diff[i] = INT_MIN; 4012 else 4013 best_pred_diff[i] = best_rd - best_pred_rd[i]; 4014 } 4015 4016 if (!x->skip) { 4017 for (i = 0; i <= SWITCHABLE_FILTERS; i++) { 4018 if (best_filter_rd[i] == INT64_MAX) 4019 best_filter_diff[i] = 0; 4020 else 4021 best_filter_diff[i] = best_rd - best_filter_rd[i]; 4022 } 4023 if (cm->mcomp_filter_type == SWITCHABLE) 4024 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); 4025 } else { 4026 vpx_memset(best_filter_diff, 0, sizeof(best_filter_diff)); 4027 } 4028 4029 if (!x->skip) { 4030 for (i = 0; i < TX_MODES; i++) { 4031 if (best_tx_rd[i] == INT64_MAX) 4032 best_tx_diff[i] = 0; 4033 else 4034 best_tx_diff[i] = best_rd - best_tx_rd[i]; 4035 } 4036 } else { 4037 vpx_memset(best_tx_diff, 0, sizeof(best_tx_diff)); 4038 } 4039 4040 set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1], 4041 scale_factor); 4042 store_coding_context(x, ctx, best_mode_index, 4043 &best_partition, 4044 &mbmi->ref_mvs[mbmi->ref_frame[0]][0], 4045 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : 4046 mbmi->ref_frame[1]][0], 4047 best_pred_diff, best_tx_diff, best_filter_diff); 4048 4049 return best_rd; 4050 } 4051