1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <stdio.h> 12 #include <math.h> 13 #include <limits.h> 14 #include <assert.h> 15 16 #include "vp9/common/vp9_pragmas.h" 17 #include "vp9/encoder/vp9_tokenize.h" 18 #include "vp9/encoder/vp9_treewriter.h" 19 #include "vp9/encoder/vp9_onyx_int.h" 20 #include "vp9/encoder/vp9_modecosts.h" 21 #include "vp9/encoder/vp9_encodeintra.h" 22 #include "vp9/common/vp9_entropymode.h" 23 #include "vp9/common/vp9_reconinter.h" 24 #include "vp9/common/vp9_reconintra.h" 25 #include "vp9/common/vp9_findnearmv.h" 26 #include "vp9/common/vp9_quant_common.h" 27 #include "vp9/encoder/vp9_encodemb.h" 28 #include "vp9/encoder/vp9_quantize.h" 29 #include "vp9/encoder/vp9_variance.h" 30 #include "vp9/encoder/vp9_mcomp.h" 31 #include "vp9/encoder/vp9_rdopt.h" 32 #include "vp9/encoder/vp9_ratectrl.h" 33 #include "vpx_mem/vpx_mem.h" 34 #include "vp9/common/vp9_systemdependent.h" 35 #include "vp9/encoder/vp9_encodemv.h" 36 #include "vp9/common/vp9_seg_common.h" 37 #include "vp9/common/vp9_pred_common.h" 38 #include "vp9/common/vp9_entropy.h" 39 #include "./vp9_rtcd.h" 40 #include "vp9/common/vp9_mvref_common.h" 41 #include "vp9/common/vp9_common.h" 42 43 #define INVALID_MV 0x80008000 44 45 /* Factor to weigh the rate for switchable interp filters */ 46 #define SWITCHABLE_INTERP_RATE_FACTOR 1 47 48 #define LAST_FRAME_MODE_MASK 0xFFEDCD60 49 #define GOLDEN_FRAME_MODE_MASK 0xFFDA3BB0 50 #define ALT_REF_MODE_MASK 0xFFC648D0 51 52 #define MIN_EARLY_TERM_INDEX 3 53 54 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { 55 {NEARESTMV, LAST_FRAME, NONE}, 56 {NEARESTMV, ALTREF_FRAME, NONE}, 57 {NEARESTMV, GOLDEN_FRAME, NONE}, 58 59 {DC_PRED, INTRA_FRAME, NONE}, 60 61 {NEWMV, LAST_FRAME, NONE}, 62 {NEWMV, ALTREF_FRAME, NONE}, 63 {NEWMV, GOLDEN_FRAME, NONE}, 64 65 {NEARMV, LAST_FRAME, NONE}, 66 {NEARMV, ALTREF_FRAME, NONE}, 67 {NEARESTMV, LAST_FRAME, ALTREF_FRAME}, 68 {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME}, 69 70 {TM_PRED, INTRA_FRAME, NONE}, 71 72 {NEARMV, LAST_FRAME, ALTREF_FRAME}, 73 {NEWMV, LAST_FRAME, ALTREF_FRAME}, 74 {NEARMV, GOLDEN_FRAME, NONE}, 75 {NEARMV, GOLDEN_FRAME, ALTREF_FRAME}, 76 {NEWMV, GOLDEN_FRAME, ALTREF_FRAME}, 77 78 {ZEROMV, LAST_FRAME, NONE}, 79 {ZEROMV, GOLDEN_FRAME, NONE}, 80 {ZEROMV, ALTREF_FRAME, NONE}, 81 {ZEROMV, LAST_FRAME, ALTREF_FRAME}, 82 {ZEROMV, GOLDEN_FRAME, ALTREF_FRAME}, 83 84 {H_PRED, INTRA_FRAME, NONE}, 85 {V_PRED, INTRA_FRAME, NONE}, 86 {D135_PRED, INTRA_FRAME, NONE}, 87 {D207_PRED, INTRA_FRAME, NONE}, 88 {D153_PRED, INTRA_FRAME, NONE}, 89 {D63_PRED, INTRA_FRAME, NONE}, 90 {D117_PRED, INTRA_FRAME, NONE}, 91 {D45_PRED, INTRA_FRAME, NONE}, 92 }; 93 94 const REF_DEFINITION vp9_ref_order[MAX_REFS] = { 95 {LAST_FRAME, NONE}, 96 {GOLDEN_FRAME, NONE}, 97 {ALTREF_FRAME, NONE}, 98 {LAST_FRAME, ALTREF_FRAME}, 99 {GOLDEN_FRAME, ALTREF_FRAME}, 100 {INTRA_FRAME, NONE}, 101 }; 102 103 // The baseline rd thresholds for breaking out of the rd loop for 104 // certain modes are assumed to be based on 8x8 blocks. 105 // This table is used to correct for blocks size. 106 // The factors here are << 2 (2 = x0.5, 32 = x8 etc). 107 static int rd_thresh_block_size_factor[BLOCK_SIZES] = 108 {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32}; 109 110 #define RD_THRESH_MAX_FACT 64 111 #define RD_THRESH_INC 1 112 #define RD_THRESH_POW 1.25 113 #define RD_MULT_EPB_RATIO 64 114 115 #define MV_COST_WEIGHT 108 116 #define MV_COST_WEIGHT_SUB 120 117 118 static void fill_token_costs(vp9_coeff_cost *c, 119 vp9_coeff_probs_model (*p)[BLOCK_TYPES]) { 120 int i, j, k, l; 121 TX_SIZE t; 122 for (t = TX_4X4; t <= TX_32X32; t++) 123 for (i = 0; i < BLOCK_TYPES; i++) 124 for (j = 0; j < REF_TYPES; j++) 125 for (k = 0; k < COEF_BANDS; k++) 126 for (l = 0; l < PREV_COEF_CONTEXTS; l++) { 127 vp9_prob probs[ENTROPY_NODES]; 128 vp9_model_to_full_probs(p[t][i][j][k][l], probs); 129 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, 130 vp9_coef_tree); 131 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs, 132 vp9_coef_tree); 133 assert(c[t][i][j][k][0][l][DCT_EOB_TOKEN] == 134 c[t][i][j][k][1][l][DCT_EOB_TOKEN]); 135 } 136 } 137 138 static const int rd_iifactor[32] = { 139 4, 4, 3, 2, 1, 0, 0, 0, 140 0, 0, 0, 0, 0, 0, 0, 0, 141 0, 0, 0, 0, 0, 0, 0, 0, 142 0, 0, 0, 0, 0, 0, 0, 0, 143 }; 144 145 // 3* dc_qlookup[Q]*dc_qlookup[Q]; 146 147 /* values are now correlated to quantizer */ 148 static int sad_per_bit16lut[QINDEX_RANGE]; 149 static int sad_per_bit4lut[QINDEX_RANGE]; 150 151 void vp9_init_me_luts() { 152 int i; 153 154 // Initialize the sad lut tables using a formulaic calculation for now 155 // This is to make it easier to resolve the impact of experimental changes 156 // to the quantizer tables. 157 for (i = 0; i < QINDEX_RANGE; i++) { 158 sad_per_bit16lut[i] = 159 (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107); 160 sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742); 161 } 162 } 163 164 int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) { 165 const int q = vp9_dc_quant(qindex, 0); 166 // TODO(debargha): Adjust the function below 167 int rdmult = 88 * q * q / 25; 168 if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { 169 if (cpi->twopass.next_iiratio > 31) 170 rdmult += (rdmult * rd_iifactor[31]) >> 4; 171 else 172 rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4; 173 } 174 return rdmult; 175 } 176 177 static int compute_rd_thresh_factor(int qindex) { 178 int q; 179 // TODO(debargha): Adjust the function below 180 q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12); 181 if (q < 8) 182 q = 8; 183 return q; 184 } 185 186 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { 187 cpi->mb.sadperbit16 = sad_per_bit16lut[qindex]; 188 cpi->mb.sadperbit4 = sad_per_bit4lut[qindex]; 189 } 190 191 static void set_block_thresholds(VP9_COMP *cpi) { 192 int i, bsize, segment_id; 193 VP9_COMMON *cm = &cpi->common; 194 195 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) { 196 int q; 197 int segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); 198 segment_qindex = clamp(segment_qindex + cm->y_dc_delta_q, 0, MAXQ); 199 q = compute_rd_thresh_factor(segment_qindex); 200 201 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) { 202 // Threshold here seem unecessarily harsh but fine given actual 203 // range of values used for cpi->sf.thresh_mult[] 204 int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]); 205 206 for (i = 0; i < MAX_MODES; ++i) { 207 if (cpi->sf.thresh_mult[i] < thresh_max) { 208 cpi->rd_threshes[segment_id][bsize][i] = 209 cpi->sf.thresh_mult[i] * q * 210 rd_thresh_block_size_factor[bsize] / 4; 211 } else { 212 cpi->rd_threshes[segment_id][bsize][i] = INT_MAX; 213 } 214 } 215 216 for (i = 0; i < MAX_REFS; ++i) { 217 if (cpi->sf.thresh_mult_sub8x8[i] < thresh_max) { 218 cpi->rd_thresh_sub8x8[segment_id][bsize][i] = 219 cpi->sf.thresh_mult_sub8x8[i] * q * 220 rd_thresh_block_size_factor[bsize] / 4; 221 } else { 222 cpi->rd_thresh_sub8x8[segment_id][bsize][i] = INT_MAX; 223 } 224 } 225 } 226 } 227 } 228 229 void vp9_initialize_rd_consts(VP9_COMP *cpi) { 230 VP9_COMMON *cm = &cpi->common; 231 int qindex, i; 232 233 vp9_clear_system_state(); // __asm emms; 234 235 // Further tests required to see if optimum is different 236 // for key frames, golden frames and arf frames. 237 // if (cpi->common.refresh_golden_frame || 238 // cpi->common.refresh_alt_ref_frame) 239 qindex = clamp(cm->base_qindex + cm->y_dc_delta_q, 0, MAXQ); 240 241 cpi->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128) 242 cpi->RDMULT = vp9_compute_rd_mult(cpi, qindex); 243 244 cpi->mb.errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO; 245 cpi->mb.errorperbit += (cpi->mb.errorperbit == 0); 246 247 vp9_set_speed_features(cpi); 248 249 cpi->mb.select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL && 250 cm->frame_type != KEY_FRAME) ? 251 0 : 1; 252 253 set_block_thresholds(cpi); 254 255 fill_token_costs(cpi->mb.token_costs, cm->fc.coef_probs); 256 257 for (i = 0; i < PARTITION_CONTEXTS; i++) 258 vp9_cost_tokens(cpi->mb.partition_cost[i], get_partition_probs(cm, i), 259 vp9_partition_tree); 260 261 /*rough estimate for costing*/ 262 vp9_init_mode_costs(cpi); 263 264 if (!frame_is_intra_only(cm)) { 265 vp9_build_nmv_cost_table( 266 cpi->mb.nmvjointcost, 267 cm->allow_high_precision_mv ? cpi->mb.nmvcost_hp : cpi->mb.nmvcost, 268 &cm->fc.nmvc, 269 cm->allow_high_precision_mv, 1, 1); 270 271 for (i = 0; i < INTER_MODE_CONTEXTS; i++) { 272 MB_PREDICTION_MODE m; 273 274 for (m = NEARESTMV; m < MB_MODE_COUNT; m++) 275 cpi->mb.inter_mode_cost[i][INTER_OFFSET(m)] = 276 cost_token(vp9_inter_mode_tree, 277 cm->fc.inter_mode_probs[i], 278 &vp9_inter_mode_encodings[INTER_OFFSET(m)]); 279 } 280 } 281 } 282 283 static INLINE void linear_interpolate2(double x, int ntab, int inv_step, 284 const double *tab1, const double *tab2, 285 double *v1, double *v2) { 286 double y = x * inv_step; 287 int d = (int) y; 288 if (d >= ntab - 1) { 289 *v1 = tab1[ntab - 1]; 290 *v2 = tab2[ntab - 1]; 291 } else { 292 double a = y - d; 293 *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a; 294 *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a; 295 } 296 } 297 298 static void model_rd_norm(double x, double *R, double *D) { 299 static const int inv_tab_step = 8; 300 static const int tab_size = 120; 301 // NOTE: The tables below must be of the same size 302 // 303 // Normalized rate 304 // This table models the rate for a Laplacian source 305 // source with given variance when quantized with a uniform quantizer 306 // with given stepsize. The closed form expression is: 307 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)], 308 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance), 309 // and H(x) is the binary entropy function. 310 static const double rate_tab[] = { 311 64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194, 312 2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206, 313 1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708, 314 0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412, 315 0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236, 316 0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132, 317 0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073, 318 0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040, 319 0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022, 320 0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012, 321 0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006, 322 0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003, 323 0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002, 324 0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 325 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000, 326 }; 327 // Normalized distortion 328 // This table models the normalized distortion for a Laplacian source 329 // source with given variance when quantized with a uniform quantizer 330 // with given stepsize. The closed form expression is: 331 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2)) 332 // where x = qpstep / sqrt(variance) 333 // Note the actual distortion is Dn * variance. 334 static const double dist_tab[] = { 335 0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061, 336 0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242, 337 0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458, 338 0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645, 339 0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780, 340 0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870, 341 0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925, 342 0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957, 343 0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976, 344 0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987, 345 0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993, 346 0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996, 347 0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998, 348 0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999, 349 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000, 350 }; 351 /* 352 assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]); 353 assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]); 354 assert(sizeof(rate_tab) == sizeof(dist_tab)); 355 */ 356 assert(x >= 0.0); 357 linear_interpolate2(x, tab_size, inv_tab_step, 358 rate_tab, dist_tab, R, D); 359 } 360 361 static void model_rd_from_var_lapndz(int var, int n, int qstep, 362 int *rate, int64_t *dist) { 363 // This function models the rate and distortion for a Laplacian 364 // source with given variance when quantized with a uniform quantizer 365 // with given stepsize. The closed form expressions are in: 366 // Hang and Chen, "Source Model for transform video coder and its 367 // application - Part I: Fundamental Theory", IEEE Trans. Circ. 368 // Sys. for Video Tech., April 1997. 369 vp9_clear_system_state(); 370 if (var == 0 || n == 0) { 371 *rate = 0; 372 *dist = 0; 373 } else { 374 double D, R; 375 double s2 = (double) var / n; 376 double x = qstep / sqrt(s2); 377 model_rd_norm(x, &R, &D); 378 *rate = (int)((n << 8) * R + 0.5); 379 *dist = (int)(var * D + 0.5); 380 } 381 vp9_clear_system_state(); 382 } 383 384 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, 385 MACROBLOCK *x, MACROBLOCKD *xd, 386 int *out_rate_sum, int64_t *out_dist_sum) { 387 // Note our transform coeffs are 8 times an orthogonal transform. 388 // Hence quantizer step is also 8 times. To get effective quantizer 389 // we need to divide by 8 before sending to modeling function. 390 int i, rate_sum = 0, dist_sum = 0; 391 392 for (i = 0; i < MAX_MB_PLANE; ++i) { 393 struct macroblock_plane *const p = &x->plane[i]; 394 struct macroblockd_plane *const pd = &xd->plane[i]; 395 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); 396 unsigned int sse; 397 int rate; 398 int64_t dist; 399 (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, 400 pd->dst.buf, pd->dst.stride, &sse); 401 // sse works better than var, since there is no dc prediction used 402 model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs], 403 pd->dequant[1] >> 3, &rate, &dist); 404 405 rate_sum += rate; 406 dist_sum += (int)dist; 407 } 408 409 *out_rate_sum = rate_sum; 410 *out_dist_sum = dist_sum << 4; 411 } 412 413 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize, 414 TX_SIZE tx_size, 415 MACROBLOCK *x, MACROBLOCKD *xd, 416 int *out_rate_sum, int64_t *out_dist_sum, 417 int *out_skip) { 418 int j, k; 419 BLOCK_SIZE bs; 420 struct macroblock_plane *const p = &x->plane[0]; 421 struct macroblockd_plane *const pd = &xd->plane[0]; 422 const int width = 4 << num_4x4_blocks_wide_lookup[bsize]; 423 const int height = 4 << num_4x4_blocks_high_lookup[bsize]; 424 int rate_sum = 0; 425 int64_t dist_sum = 0; 426 const int t = 4 << tx_size; 427 428 if (tx_size == TX_4X4) { 429 bs = BLOCK_4X4; 430 } else if (tx_size == TX_8X8) { 431 bs = BLOCK_8X8; 432 } else if (tx_size == TX_16X16) { 433 bs = BLOCK_16X16; 434 } else if (tx_size == TX_32X32) { 435 bs = BLOCK_32X32; 436 } else { 437 assert(0); 438 } 439 440 *out_skip = 1; 441 for (j = 0; j < height; j += t) { 442 for (k = 0; k < width; k += t) { 443 int rate; 444 int64_t dist; 445 unsigned int sse; 446 cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride, 447 &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride, 448 &sse); 449 // sse works better than var, since there is no dc prediction used 450 model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist); 451 rate_sum += rate; 452 dist_sum += dist; 453 *out_skip &= (rate < 1024); 454 } 455 } 456 457 *out_rate_sum = rate_sum; 458 *out_dist_sum = dist_sum << 4; 459 } 460 461 int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, 462 intptr_t block_size, int64_t *ssz) { 463 int i; 464 int64_t error = 0, sqcoeff = 0; 465 466 for (i = 0; i < block_size; i++) { 467 int this_diff = coeff[i] - dqcoeff[i]; 468 error += (unsigned)this_diff * this_diff; 469 sqcoeff += (unsigned) coeff[i] * coeff[i]; 470 } 471 472 *ssz = sqcoeff; 473 return error; 474 } 475 476 /* The trailing '0' is a terminator which is used inside cost_coeffs() to 477 * decide whether to include cost of a trailing EOB node or not (i.e. we 478 * can skip this if the last coefficient in this transform block, e.g. the 479 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block, 480 * were non-zero). */ 481 static const int16_t band_counts[TX_SIZES][8] = { 482 { 1, 2, 3, 4, 3, 16 - 13, 0 }, 483 { 1, 2, 3, 4, 11, 64 - 21, 0 }, 484 { 1, 2, 3, 4, 11, 256 - 21, 0 }, 485 { 1, 2, 3, 4, 11, 1024 - 21, 0 }, 486 }; 487 488 static INLINE int cost_coeffs(MACROBLOCK *x, 489 int plane, int block, 490 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L, 491 TX_SIZE tx_size, 492 const int16_t *scan, const int16_t *nb) { 493 MACROBLOCKD *const xd = &x->e_mbd; 494 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; 495 struct macroblockd_plane *pd = &xd->plane[plane]; 496 const PLANE_TYPE type = pd->plane_type; 497 const int16_t *band_count = &band_counts[tx_size][1]; 498 const int eob = pd->eobs[block]; 499 const int16_t *const qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block); 500 const int ref = mbmi->ref_frame[0] != INTRA_FRAME; 501 unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = 502 x->token_costs[tx_size][type][ref]; 503 const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L; 504 uint8_t *p_tok = x->token_cache; 505 int pt = combine_entropy_contexts(above_ec, left_ec); 506 int c, cost; 507 508 // Check for consistency of tx_size with mode info 509 assert(type == PLANE_TYPE_Y_WITH_DC ? mbmi->tx_size == tx_size 510 : get_uv_tx_size(mbmi) == tx_size); 511 512 if (eob == 0) { 513 // single eob token 514 cost = token_costs[0][0][pt][DCT_EOB_TOKEN]; 515 c = 0; 516 } else { 517 int band_left = *band_count++; 518 519 // dc token 520 int v = qcoeff_ptr[0]; 521 int prev_t = vp9_dct_value_tokens_ptr[v].token; 522 cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v]; 523 p_tok[0] = vp9_pt_energy_class[prev_t]; 524 ++token_costs; 525 526 // ac tokens 527 for (c = 1; c < eob; c++) { 528 const int rc = scan[c]; 529 int t; 530 531 v = qcoeff_ptr[rc]; 532 t = vp9_dct_value_tokens_ptr[v].token; 533 pt = get_coef_context(nb, p_tok, c); 534 cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v]; 535 p_tok[rc] = vp9_pt_energy_class[t]; 536 prev_t = t; 537 if (!--band_left) { 538 band_left = *band_count++; 539 ++token_costs; 540 } 541 } 542 543 // eob token 544 if (band_left) { 545 pt = get_coef_context(nb, p_tok, c); 546 cost += (*token_costs)[0][pt][DCT_EOB_TOKEN]; 547 } 548 } 549 550 // is eob first coefficient; 551 *A = *L = (c > 0); 552 553 return cost; 554 } 555 556 static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) { 557 const int ss_txfrm_size = tx_size << 1; 558 struct rdcost_block_args* args = arg; 559 MACROBLOCK* const x = args->x; 560 MACROBLOCKD* const xd = &x->e_mbd; 561 struct macroblock_plane *const p = &x->plane[plane]; 562 struct macroblockd_plane *const pd = &xd->plane[plane]; 563 int64_t this_sse; 564 int shift = args->tx_size == TX_32X32 ? 0 : 2; 565 int16_t *const coeff = BLOCK_OFFSET(p->coeff, block); 566 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 567 args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, 568 &this_sse) >> shift; 569 args->sse = this_sse >> shift; 570 571 if (x->skip_encode && 572 xd->mi_8x8[0]->mbmi.ref_frame[0] == INTRA_FRAME) { 573 // TODO(jingning): tune the model to better capture the distortion. 574 int64_t p = (pd->dequant[1] * pd->dequant[1] * 575 (1 << ss_txfrm_size)) >> (shift + 2); 576 args->dist += (p >> 4); 577 args->sse += p; 578 } 579 } 580 581 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize, 582 TX_SIZE tx_size, void *arg) { 583 struct rdcost_block_args* args = arg; 584 585 int x_idx, y_idx; 586 txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx); 587 588 args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx, 589 args->t_left + y_idx, args->tx_size, 590 args->scan, args->nb); 591 } 592 593 static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, 594 TX_SIZE tx_size, void *arg) { 595 struct rdcost_block_args *args = arg; 596 MACROBLOCK *const x = args->x; 597 MACROBLOCKD *const xd = &x->e_mbd; 598 struct encode_b_args encode_args = {x, NULL}; 599 int64_t rd1, rd2, rd; 600 601 if (args->skip) 602 return; 603 604 if (!is_inter_block(&xd->mi_8x8[0]->mbmi)) 605 vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args); 606 else 607 vp9_xform_quant(plane, block, plane_bsize, tx_size, &encode_args); 608 609 dist_block(plane, block, tx_size, args); 610 rate_block(plane, block, plane_bsize, tx_size, args); 611 rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist); 612 rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse); 613 614 // TODO(jingning): temporarily enabled only for luma component 615 rd = MIN(rd1, rd2); 616 if (!xd->lossless && plane == 0) 617 x->zcoeff_blk[tx_size][block] = rd1 > rd2 || !xd->plane[plane].eobs[block]; 618 619 args->this_rate += args->rate; 620 args->this_dist += args->dist; 621 args->this_sse += args->sse; 622 args->this_rd += rd; 623 624 if (args->this_rd > args->best_rd) { 625 args->skip = 1; 626 return; 627 } 628 } 629 630 void vp9_get_entropy_contexts(TX_SIZE tx_size, 631 ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16], 632 const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left, 633 int num_4x4_w, int num_4x4_h) { 634 int i; 635 switch (tx_size) { 636 case TX_4X4: 637 vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w); 638 vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h); 639 break; 640 case TX_8X8: 641 for (i = 0; i < num_4x4_w; i += 2) 642 t_above[i] = !!*(const uint16_t *)&above[i]; 643 for (i = 0; i < num_4x4_h; i += 2) 644 t_left[i] = !!*(const uint16_t *)&left[i]; 645 break; 646 case TX_16X16: 647 for (i = 0; i < num_4x4_w; i += 4) 648 t_above[i] = !!*(const uint32_t *)&above[i]; 649 for (i = 0; i < num_4x4_h; i += 4) 650 t_left[i] = !!*(const uint32_t *)&left[i]; 651 break; 652 case TX_32X32: 653 for (i = 0; i < num_4x4_w; i += 8) 654 t_above[i] = !!*(const uint64_t *)&above[i]; 655 for (i = 0; i < num_4x4_h; i += 8) 656 t_left[i] = !!*(const uint64_t *)&left[i]; 657 break; 658 default: 659 assert(!"Invalid transform size."); 660 } 661 } 662 663 static void init_rdcost_stack(MACROBLOCK *x, TX_SIZE tx_size, 664 const int num_4x4_w, const int num_4x4_h, 665 const int64_t ref_rdcost, 666 struct rdcost_block_args *arg) { 667 vpx_memset(arg, 0, sizeof(struct rdcost_block_args)); 668 arg->x = x; 669 arg->tx_size = tx_size; 670 arg->bw = num_4x4_w; 671 arg->bh = num_4x4_h; 672 arg->best_rd = ref_rdcost; 673 } 674 675 static void txfm_rd_in_plane(MACROBLOCK *x, 676 struct rdcost_block_args *rd_stack, 677 int *rate, int64_t *distortion, 678 int *skippable, int64_t *sse, 679 int64_t ref_best_rd, int plane, 680 BLOCK_SIZE bsize, TX_SIZE tx_size) { 681 MACROBLOCKD *const xd = &x->e_mbd; 682 struct macroblockd_plane *const pd = &xd->plane[plane]; 683 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); 684 const int num_4x4_w = num_4x4_blocks_wide_lookup[bs]; 685 const int num_4x4_h = num_4x4_blocks_high_lookup[bs]; 686 687 init_rdcost_stack(x, tx_size, num_4x4_w, num_4x4_h, 688 ref_best_rd, rd_stack); 689 if (plane == 0) 690 xd->mi_8x8[0]->mbmi.tx_size = tx_size; 691 692 vp9_get_entropy_contexts(tx_size, rd_stack->t_above, rd_stack->t_left, 693 pd->above_context, pd->left_context, 694 num_4x4_w, num_4x4_h); 695 696 get_scan(xd, tx_size, pd->plane_type, 0, &rd_stack->scan, &rd_stack->nb); 697 698 foreach_transformed_block_in_plane(xd, bsize, plane, 699 block_yrd_txfm, rd_stack); 700 if (rd_stack->skip) { 701 *rate = INT_MAX; 702 *distortion = INT64_MAX; 703 *sse = INT64_MAX; 704 *skippable = 0; 705 } else { 706 *distortion = rd_stack->this_dist; 707 *rate = rd_stack->this_rate; 708 *sse = rd_stack->this_sse; 709 *skippable = vp9_is_skippable_in_plane(xd, bsize, plane); 710 } 711 } 712 713 static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x, 714 int *rate, int64_t *distortion, 715 int *skip, int64_t *sse, 716 int64_t ref_best_rd, 717 BLOCK_SIZE bs) { 718 const TX_SIZE max_tx_size = max_txsize_lookup[bs]; 719 VP9_COMMON *const cm = &cpi->common; 720 const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; 721 MACROBLOCKD *const xd = &x->e_mbd; 722 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; 723 724 mbmi->tx_size = MIN(max_tx_size, largest_tx_size); 725 726 txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip, 727 &sse[mbmi->tx_size], ref_best_rd, 0, bs, 728 mbmi->tx_size); 729 cpi->tx_stepdown_count[0]++; 730 } 731 732 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, 733 int (*r)[2], int *rate, 734 int64_t *d, int64_t *distortion, 735 int *s, int *skip, 736 int64_t tx_cache[TX_MODES], 737 BLOCK_SIZE bs) { 738 const TX_SIZE max_tx_size = max_txsize_lookup[bs]; 739 VP9_COMMON *const cm = &cpi->common; 740 MACROBLOCKD *const xd = &x->e_mbd; 741 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; 742 vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd); 743 int64_t rd[TX_SIZES][2]; 744 int n, m; 745 int s0, s1; 746 747 const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs); 748 749 for (n = TX_4X4; n <= max_tx_size; n++) { 750 r[n][1] = r[n][0]; 751 if (r[n][0] == INT_MAX) 752 continue; 753 for (m = 0; m <= n - (n == max_tx_size); m++) { 754 if (m == n) 755 r[n][1] += vp9_cost_zero(tx_probs[m]); 756 else 757 r[n][1] += vp9_cost_one(tx_probs[m]); 758 } 759 } 760 761 assert(skip_prob > 0); 762 s0 = vp9_cost_bit(skip_prob, 0); 763 s1 = vp9_cost_bit(skip_prob, 1); 764 765 for (n = TX_4X4; n <= max_tx_size; n++) { 766 if (d[n] == INT64_MAX) { 767 rd[n][0] = rd[n][1] = INT64_MAX; 768 continue; 769 } 770 if (s[n]) { 771 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]); 772 } else { 773 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); 774 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); 775 } 776 } 777 778 if (max_tx_size == TX_32X32 && 779 (cm->tx_mode == ALLOW_32X32 || 780 (cm->tx_mode == TX_MODE_SELECT && 781 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && 782 rd[TX_32X32][1] < rd[TX_4X4][1]))) { 783 mbmi->tx_size = TX_32X32; 784 } else if (max_tx_size >= TX_16X16 && 785 (cm->tx_mode == ALLOW_16X16 || 786 cm->tx_mode == ALLOW_32X32 || 787 (cm->tx_mode == TX_MODE_SELECT && 788 rd[TX_16X16][1] < rd[TX_8X8][1] && 789 rd[TX_16X16][1] < rd[TX_4X4][1]))) { 790 mbmi->tx_size = TX_16X16; 791 } else if (cm->tx_mode == ALLOW_8X8 || 792 cm->tx_mode == ALLOW_16X16 || 793 cm->tx_mode == ALLOW_32X32 || 794 (cm->tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) { 795 mbmi->tx_size = TX_8X8; 796 } else { 797 mbmi->tx_size = TX_4X4; 798 } 799 800 *distortion = d[mbmi->tx_size]; 801 *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT]; 802 *skip = s[mbmi->tx_size]; 803 804 tx_cache[ONLY_4X4] = rd[TX_4X4][0]; 805 tx_cache[ALLOW_8X8] = rd[TX_8X8][0]; 806 tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0]; 807 tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0]; 808 if (max_tx_size == TX_32X32 && 809 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && 810 rd[TX_32X32][1] < rd[TX_4X4][1]) 811 tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1]; 812 else if (max_tx_size >= TX_16X16 && 813 rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) 814 tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1]; 815 else 816 tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ? 817 rd[TX_4X4][1] : rd[TX_8X8][1]; 818 819 if (max_tx_size == TX_32X32 && 820 rd[TX_32X32][1] < rd[TX_16X16][1] && 821 rd[TX_32X32][1] < rd[TX_8X8][1] && 822 rd[TX_32X32][1] < rd[TX_4X4][1]) { 823 cpi->tx_stepdown_count[0]++; 824 } else if (max_tx_size >= TX_16X16 && 825 rd[TX_16X16][1] < rd[TX_8X8][1] && 826 rd[TX_16X16][1] < rd[TX_4X4][1]) { 827 cpi->tx_stepdown_count[max_tx_size - TX_16X16]++; 828 } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) { 829 cpi->tx_stepdown_count[max_tx_size - TX_8X8]++; 830 } else { 831 cpi->tx_stepdown_count[max_tx_size - TX_4X4]++; 832 } 833 } 834 835 static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, 836 int (*r)[2], int *rate, 837 int64_t *d, int64_t *distortion, 838 int *s, int *skip, int64_t *sse, 839 int64_t ref_best_rd, 840 BLOCK_SIZE bs) { 841 const TX_SIZE max_tx_size = max_txsize_lookup[bs]; 842 VP9_COMMON *const cm = &cpi->common; 843 MACROBLOCKD *const xd = &x->e_mbd; 844 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; 845 vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd); 846 int64_t rd[TX_SIZES][2]; 847 int n, m; 848 int s0, s1; 849 double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00}; 850 // double scale_r[TX_SIZES] = {2.82, 2.00, 1.41, 1.00}; 851 852 const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs); 853 854 // for (n = TX_4X4; n <= max_txfm_size; n++) 855 // r[n][0] = (r[n][0] * scale_r[n]); 856 857 for (n = TX_4X4; n <= max_tx_size; n++) { 858 r[n][1] = r[n][0]; 859 for (m = 0; m <= n - (n == max_tx_size); m++) { 860 if (m == n) 861 r[n][1] += vp9_cost_zero(tx_probs[m]); 862 else 863 r[n][1] += vp9_cost_one(tx_probs[m]); 864 } 865 } 866 867 assert(skip_prob > 0); 868 s0 = vp9_cost_bit(skip_prob, 0); 869 s1 = vp9_cost_bit(skip_prob, 1); 870 871 for (n = TX_4X4; n <= max_tx_size; n++) { 872 if (s[n]) { 873 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]); 874 } else { 875 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); 876 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); 877 } 878 } 879 for (n = TX_4X4; n <= max_tx_size; n++) { 880 rd[n][0] = (int64_t)(scale_rd[n] * rd[n][0]); 881 rd[n][1] = (int64_t)(scale_rd[n] * rd[n][1]); 882 } 883 884 if (max_tx_size == TX_32X32 && 885 (cm->tx_mode == ALLOW_32X32 || 886 (cm->tx_mode == TX_MODE_SELECT && 887 rd[TX_32X32][1] <= rd[TX_16X16][1] && 888 rd[TX_32X32][1] <= rd[TX_8X8][1] && 889 rd[TX_32X32][1] <= rd[TX_4X4][1]))) { 890 mbmi->tx_size = TX_32X32; 891 } else if (max_tx_size >= TX_16X16 && 892 (cm->tx_mode == ALLOW_16X16 || 893 cm->tx_mode == ALLOW_32X32 || 894 (cm->tx_mode == TX_MODE_SELECT && 895 rd[TX_16X16][1] <= rd[TX_8X8][1] && 896 rd[TX_16X16][1] <= rd[TX_4X4][1]))) { 897 mbmi->tx_size = TX_16X16; 898 } else if (cm->tx_mode == ALLOW_8X8 || 899 cm->tx_mode == ALLOW_16X16 || 900 cm->tx_mode == ALLOW_32X32 || 901 (cm->tx_mode == TX_MODE_SELECT && 902 rd[TX_8X8][1] <= rd[TX_4X4][1])) { 903 mbmi->tx_size = TX_8X8; 904 } else { 905 mbmi->tx_size = TX_4X4; 906 } 907 908 // Actually encode using the chosen mode if a model was used, but do not 909 // update the r, d costs 910 txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip, 911 &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size); 912 913 if (max_tx_size == TX_32X32 && 914 rd[TX_32X32][1] <= rd[TX_16X16][1] && 915 rd[TX_32X32][1] <= rd[TX_8X8][1] && 916 rd[TX_32X32][1] <= rd[TX_4X4][1]) { 917 cpi->tx_stepdown_count[0]++; 918 } else if (max_tx_size >= TX_16X16 && 919 rd[TX_16X16][1] <= rd[TX_8X8][1] && 920 rd[TX_16X16][1] <= rd[TX_4X4][1]) { 921 cpi->tx_stepdown_count[max_tx_size - TX_16X16]++; 922 } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) { 923 cpi->tx_stepdown_count[max_tx_size - TX_8X8]++; 924 } else { 925 cpi->tx_stepdown_count[max_tx_size - TX_4X4]++; 926 } 927 } 928 929 static void super_block_yrd(VP9_COMP *cpi, 930 MACROBLOCK *x, int *rate, int64_t *distortion, 931 int *skip, int64_t *psse, BLOCK_SIZE bs, 932 int64_t txfm_cache[TX_MODES], 933 int64_t ref_best_rd) { 934 int r[TX_SIZES][2], s[TX_SIZES]; 935 int64_t d[TX_SIZES], sse[TX_SIZES]; 936 MACROBLOCKD *xd = &x->e_mbd; 937 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; 938 struct rdcost_block_args *rdcost_stack = &cpi->rdcost_stack; 939 const int b_inter_mode = is_inter_block(mbmi); 940 941 assert(bs == mbmi->sb_type); 942 if (b_inter_mode) 943 vp9_subtract_sby(x, bs); 944 945 if (cpi->sf.tx_size_search_method == USE_LARGESTALL || 946 (cpi->sf.tx_size_search_method != USE_FULL_RD && 947 !b_inter_mode)) { 948 vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t)); 949 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse, 950 ref_best_rd, bs); 951 if (psse) 952 *psse = sse[mbmi->tx_size]; 953 return; 954 } 955 956 if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER && 957 b_inter_mode) { 958 if (bs >= BLOCK_32X32) 959 model_rd_for_sb_y_tx(cpi, bs, TX_32X32, x, xd, 960 &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]); 961 if (bs >= BLOCK_16X16) 962 model_rd_for_sb_y_tx(cpi, bs, TX_16X16, x, xd, 963 &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]); 964 965 model_rd_for_sb_y_tx(cpi, bs, TX_8X8, x, xd, 966 &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]); 967 968 model_rd_for_sb_y_tx(cpi, bs, TX_4X4, x, xd, 969 &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]); 970 971 choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s, 972 skip, sse, ref_best_rd, bs); 973 } else { 974 if (bs >= BLOCK_32X32) 975 txfm_rd_in_plane(x, rdcost_stack, &r[TX_32X32][0], &d[TX_32X32], 976 &s[TX_32X32], &sse[TX_32X32], 977 ref_best_rd, 0, bs, TX_32X32); 978 if (bs >= BLOCK_16X16) 979 txfm_rd_in_plane(x, rdcost_stack, &r[TX_16X16][0], &d[TX_16X16], 980 &s[TX_16X16], &sse[TX_16X16], 981 ref_best_rd, 0, bs, TX_16X16); 982 txfm_rd_in_plane(x, rdcost_stack, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], 983 &sse[TX_8X8], ref_best_rd, 0, bs, TX_8X8); 984 txfm_rd_in_plane(x, rdcost_stack, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], 985 &sse[TX_4X4], ref_best_rd, 0, bs, TX_4X4); 986 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, 987 skip, txfm_cache, bs); 988 } 989 if (psse) 990 *psse = sse[mbmi->tx_size]; 991 } 992 993 static int conditional_skipintra(MB_PREDICTION_MODE mode, 994 MB_PREDICTION_MODE best_intra_mode) { 995 if (mode == D117_PRED && 996 best_intra_mode != V_PRED && 997 best_intra_mode != D135_PRED) 998 return 1; 999 if (mode == D63_PRED && 1000 best_intra_mode != V_PRED && 1001 best_intra_mode != D45_PRED) 1002 return 1; 1003 if (mode == D207_PRED && 1004 best_intra_mode != H_PRED && 1005 best_intra_mode != D45_PRED) 1006 return 1; 1007 if (mode == D153_PRED && 1008 best_intra_mode != H_PRED && 1009 best_intra_mode != D135_PRED) 1010 return 1; 1011 return 0; 1012 } 1013 1014 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, 1015 MB_PREDICTION_MODE *best_mode, 1016 int *bmode_costs, 1017 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, 1018 int *bestrate, int *bestratey, 1019 int64_t *bestdistortion, 1020 BLOCK_SIZE bsize, int64_t rd_thresh) { 1021 MB_PREDICTION_MODE mode; 1022 MACROBLOCKD *xd = &x->e_mbd; 1023 int64_t best_rd = rd_thresh; 1024 int rate = 0; 1025 int64_t distortion; 1026 struct macroblock_plane *p = &x->plane[0]; 1027 struct macroblockd_plane *pd = &xd->plane[0]; 1028 const int src_stride = p->src.stride; 1029 const int dst_stride = pd->dst.stride; 1030 uint8_t *src_init = raster_block_offset_uint8(BLOCK_8X8, ib, 1031 p->src.buf, src_stride); 1032 uint8_t *dst_init = raster_block_offset_uint8(BLOCK_8X8, ib, 1033 pd->dst.buf, dst_stride); 1034 int16_t *src_diff, *coeff; 1035 1036 ENTROPY_CONTEXT ta[2], tempa[2]; 1037 ENTROPY_CONTEXT tl[2], templ[2]; 1038 1039 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 1040 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 1041 int idx, idy; 1042 uint8_t best_dst[8 * 8]; 1043 1044 assert(ib < 4); 1045 1046 vpx_memcpy(ta, a, sizeof(ta)); 1047 vpx_memcpy(tl, l, sizeof(tl)); 1048 xd->mi_8x8[0]->mbmi.tx_size = TX_4X4; 1049 1050 for (mode = DC_PRED; mode <= TM_PRED; ++mode) { 1051 int64_t this_rd; 1052 int ratey = 0; 1053 1054 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) 1055 continue; 1056 1057 // Only do the oblique modes if the best so far is 1058 // one of the neighboring directional modes 1059 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { 1060 if (conditional_skipintra(mode, *best_mode)) 1061 continue; 1062 } 1063 1064 rate = bmode_costs[mode]; 1065 distortion = 0; 1066 1067 vpx_memcpy(tempa, ta, sizeof(ta)); 1068 vpx_memcpy(templ, tl, sizeof(tl)); 1069 1070 for (idy = 0; idy < num_4x4_blocks_high; ++idy) { 1071 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { 1072 int64_t ssz; 1073 const int16_t *scan; 1074 const int16_t *nb; 1075 uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride; 1076 uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride; 1077 const int block = ib + idy * 2 + idx; 1078 TX_TYPE tx_type; 1079 xd->mi_8x8[0]->bmi[block].as_mode = mode; 1080 src_diff = raster_block_offset_int16(BLOCK_8X8, block, p->src_diff); 1081 coeff = BLOCK_OFFSET(x->plane[0].coeff, block); 1082 vp9_predict_intra_block(xd, block, 1, 1083 TX_4X4, mode, 1084 x->skip_encode ? src : dst, 1085 x->skip_encode ? src_stride : dst_stride, 1086 dst, dst_stride); 1087 vp9_subtract_block(4, 4, src_diff, 8, 1088 src, src_stride, 1089 dst, dst_stride); 1090 1091 tx_type = get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block); 1092 get_scan_nb_4x4(tx_type, &scan, &nb); 1093 1094 if (tx_type != DCT_DCT) 1095 vp9_short_fht4x4(src_diff, coeff, 8, tx_type); 1096 else 1097 x->fwd_txm4x4(src_diff, coeff, 8); 1098 1099 vp9_regular_quantize_b_4x4(x, 4, block, scan, get_iscan_4x4(tx_type)); 1100 1101 ratey += cost_coeffs(x, 0, block, 1102 tempa + idx, templ + idy, TX_4X4, scan, nb); 1103 distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block), 1104 16, &ssz) >> 2; 1105 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) 1106 goto next; 1107 1108 if (tx_type != DCT_DCT) 1109 vp9_iht4x4_16_add(BLOCK_OFFSET(pd->dqcoeff, block), 1110 dst, pd->dst.stride, tx_type); 1111 else 1112 xd->itxm_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, pd->dst.stride, 1113 16); 1114 } 1115 } 1116 1117 rate += ratey; 1118 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); 1119 1120 if (this_rd < best_rd) { 1121 *bestrate = rate; 1122 *bestratey = ratey; 1123 *bestdistortion = distortion; 1124 best_rd = this_rd; 1125 *best_mode = mode; 1126 vpx_memcpy(a, tempa, sizeof(tempa)); 1127 vpx_memcpy(l, templ, sizeof(templ)); 1128 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) 1129 vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride, 1130 num_4x4_blocks_wide * 4); 1131 } 1132 next: 1133 {} 1134 } 1135 1136 if (best_rd >= rd_thresh || x->skip_encode) 1137 return best_rd; 1138 1139 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) 1140 vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8, 1141 num_4x4_blocks_wide * 4); 1142 1143 return best_rd; 1144 } 1145 1146 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi, 1147 MACROBLOCK * const mb, 1148 int * const rate, 1149 int * const rate_y, 1150 int64_t * const distortion, 1151 int64_t best_rd) { 1152 int i, j; 1153 MACROBLOCKD *const xd = &mb->e_mbd; 1154 MODE_INFO *const mic = xd->mi_8x8[0]; 1155 const MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride]; 1156 const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL; 1157 const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type; 1158 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 1159 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 1160 int idx, idy; 1161 int cost = 0; 1162 int64_t total_distortion = 0; 1163 int tot_rate_y = 0; 1164 int64_t total_rd = 0; 1165 ENTROPY_CONTEXT t_above[4], t_left[4]; 1166 int *bmode_costs; 1167 1168 vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); 1169 vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left)); 1170 1171 bmode_costs = mb->mbmode_cost; 1172 1173 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block. 1174 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { 1175 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { 1176 MB_PREDICTION_MODE best_mode = DC_PRED; 1177 int r = INT_MAX, ry = INT_MAX; 1178 int64_t d = INT64_MAX, this_rd = INT64_MAX; 1179 i = idy * 2 + idx; 1180 if (cpi->common.frame_type == KEY_FRAME) { 1181 const MB_PREDICTION_MODE A = above_block_mode(mic, above_mi, i); 1182 const MB_PREDICTION_MODE L = left_block_mode(mic, left_mi, i); 1183 1184 bmode_costs = mb->y_mode_costs[A][L]; 1185 } 1186 1187 this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs, 1188 t_above + idx, t_left + idy, &r, &ry, &d, 1189 bsize, best_rd - total_rd); 1190 if (this_rd >= best_rd - total_rd) 1191 return INT64_MAX; 1192 1193 total_rd += this_rd; 1194 cost += r; 1195 total_distortion += d; 1196 tot_rate_y += ry; 1197 1198 mic->bmi[i].as_mode = best_mode; 1199 for (j = 1; j < num_4x4_blocks_high; ++j) 1200 mic->bmi[i + j * 2].as_mode = best_mode; 1201 for (j = 1; j < num_4x4_blocks_wide; ++j) 1202 mic->bmi[i + j].as_mode = best_mode; 1203 1204 if (total_rd >= best_rd) 1205 return INT64_MAX; 1206 } 1207 } 1208 1209 *rate = cost; 1210 *rate_y = tot_rate_y; 1211 *distortion = total_distortion; 1212 mic->mbmi.mode = mic->bmi[3].as_mode; 1213 1214 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion); 1215 } 1216 1217 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, 1218 int *rate, int *rate_tokenonly, 1219 int64_t *distortion, int *skippable, 1220 BLOCK_SIZE bsize, 1221 int64_t tx_cache[TX_MODES], 1222 int64_t best_rd) { 1223 MB_PREDICTION_MODE mode; 1224 MB_PREDICTION_MODE mode_selected = DC_PRED; 1225 MACROBLOCKD *const xd = &x->e_mbd; 1226 MODE_INFO *const mic = xd->mi_8x8[0]; 1227 int this_rate, this_rate_tokenonly, s; 1228 int64_t this_distortion, this_rd; 1229 TX_SIZE best_tx = TX_4X4; 1230 int i; 1231 int *bmode_costs = x->mbmode_cost; 1232 1233 if (cpi->sf.tx_size_search_method == USE_FULL_RD) 1234 for (i = 0; i < TX_MODES; i++) 1235 tx_cache[i] = INT64_MAX; 1236 1237 /* Y Search for intra prediction mode */ 1238 for (mode = DC_PRED; mode <= TM_PRED; mode++) { 1239 int64_t local_tx_cache[TX_MODES]; 1240 MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride]; 1241 MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL; 1242 1243 if (!(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]] & (1 << mode))) 1244 continue; 1245 1246 if (cpi->common.frame_type == KEY_FRAME) { 1247 const MB_PREDICTION_MODE A = above_block_mode(mic, above_mi, 0); 1248 const MB_PREDICTION_MODE L = left_block_mode(mic, left_mi, 0); 1249 1250 bmode_costs = x->y_mode_costs[A][L]; 1251 } 1252 mic->mbmi.mode = mode; 1253 1254 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL, 1255 bsize, local_tx_cache, best_rd); 1256 1257 if (this_rate_tokenonly == INT_MAX) 1258 continue; 1259 1260 this_rate = this_rate_tokenonly + bmode_costs[mode]; 1261 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); 1262 1263 if (this_rd < best_rd) { 1264 mode_selected = mode; 1265 best_rd = this_rd; 1266 best_tx = mic->mbmi.tx_size; 1267 *rate = this_rate; 1268 *rate_tokenonly = this_rate_tokenonly; 1269 *distortion = this_distortion; 1270 *skippable = s; 1271 } 1272 1273 if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) { 1274 for (i = 0; i < TX_MODES && local_tx_cache[i] < INT64_MAX; i++) { 1275 const int64_t adj_rd = this_rd + local_tx_cache[i] - 1276 local_tx_cache[cpi->common.tx_mode]; 1277 if (adj_rd < tx_cache[i]) { 1278 tx_cache[i] = adj_rd; 1279 } 1280 } 1281 } 1282 } 1283 1284 mic->mbmi.mode = mode_selected; 1285 mic->mbmi.tx_size = best_tx; 1286 1287 return best_rd; 1288 } 1289 1290 static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x, 1291 int *rate, int64_t *distortion, int *skippable, 1292 int64_t *sse, BLOCK_SIZE bsize, 1293 int64_t ref_best_rd) { 1294 MACROBLOCKD *const xd = &x->e_mbd; 1295 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; 1296 TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi); 1297 int plane; 1298 int pnrate = 0, pnskip = 1; 1299 int64_t pndist = 0, pnsse = 0; 1300 1301 if (ref_best_rd < 0) 1302 goto term; 1303 1304 if (is_inter_block(mbmi)) 1305 vp9_subtract_sbuv(x, bsize); 1306 1307 *rate = 0; 1308 *distortion = 0; 1309 *sse = 0; 1310 *skippable = 1; 1311 1312 for (plane = 1; plane < MAX_MB_PLANE; ++plane) { 1313 txfm_rd_in_plane(x, &cpi->rdcost_stack, &pnrate, &pndist, &pnskip, &pnsse, 1314 ref_best_rd, plane, bsize, uv_txfm_size); 1315 if (pnrate == INT_MAX) 1316 goto term; 1317 *rate += pnrate; 1318 *distortion += pndist; 1319 *sse += pnsse; 1320 *skippable &= pnskip; 1321 } 1322 return; 1323 1324 term: 1325 *rate = INT_MAX; 1326 *distortion = INT64_MAX; 1327 *sse = INT64_MAX; 1328 *skippable = 0; 1329 return; 1330 } 1331 1332 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, 1333 PICK_MODE_CONTEXT *ctx, 1334 int *rate, int *rate_tokenonly, 1335 int64_t *distortion, int *skippable, 1336 BLOCK_SIZE bsize) { 1337 MB_PREDICTION_MODE mode; 1338 MB_PREDICTION_MODE mode_selected = DC_PRED; 1339 int64_t best_rd = INT64_MAX, this_rd; 1340 int this_rate_tokenonly, this_rate, s; 1341 int64_t this_distortion, this_sse; 1342 1343 // int mode_mask = (bsize <= BLOCK_8X8) 1344 // ? ALL_INTRA_MODES : cpi->sf.intra_uv_mode_mask; 1345 1346 for (mode = DC_PRED; mode <= TM_PRED; mode ++) { 1347 // if (!(mode_mask & (1 << mode))) 1348 if (!(cpi->sf.intra_uv_mode_mask[max_uv_txsize_lookup[bsize]] 1349 & (1 << mode))) 1350 continue; 1351 1352 x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode; 1353 1354 super_block_uvrd(cpi, x, &this_rate_tokenonly, 1355 &this_distortion, &s, &this_sse, bsize, best_rd); 1356 if (this_rate_tokenonly == INT_MAX) 1357 continue; 1358 this_rate = this_rate_tokenonly + 1359 x->intra_uv_mode_cost[cpi->common.frame_type][mode]; 1360 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); 1361 1362 if (this_rd < best_rd) { 1363 mode_selected = mode; 1364 best_rd = this_rd; 1365 *rate = this_rate; 1366 *rate_tokenonly = this_rate_tokenonly; 1367 *distortion = this_distortion; 1368 *skippable = s; 1369 if (!x->select_txfm_size) { 1370 int i; 1371 struct macroblock_plane *const p = x->plane; 1372 struct macroblockd_plane *const pd = x->e_mbd.plane; 1373 for (i = 1; i < MAX_MB_PLANE; ++i) { 1374 p[i].coeff = ctx->coeff_pbuf[i][2]; 1375 pd[i].qcoeff = ctx->qcoeff_pbuf[i][2]; 1376 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2]; 1377 pd[i].eobs = ctx->eobs_pbuf[i][2]; 1378 1379 ctx->coeff_pbuf[i][2] = ctx->coeff_pbuf[i][0]; 1380 ctx->qcoeff_pbuf[i][2] = ctx->qcoeff_pbuf[i][0]; 1381 ctx->dqcoeff_pbuf[i][2] = ctx->dqcoeff_pbuf[i][0]; 1382 ctx->eobs_pbuf[i][2] = ctx->eobs_pbuf[i][0]; 1383 1384 ctx->coeff_pbuf[i][0] = p[i].coeff; 1385 ctx->qcoeff_pbuf[i][0] = pd[i].qcoeff; 1386 ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff; 1387 ctx->eobs_pbuf[i][0] = pd[i].eobs; 1388 } 1389 } 1390 } 1391 } 1392 1393 x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode_selected; 1394 1395 return best_rd; 1396 } 1397 1398 static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x, 1399 int *rate, int *rate_tokenonly, 1400 int64_t *distortion, int *skippable, 1401 BLOCK_SIZE bsize) { 1402 int64_t this_rd; 1403 int64_t this_sse; 1404 1405 x->e_mbd.mi_8x8[0]->mbmi.uv_mode = DC_PRED; 1406 super_block_uvrd(cpi, x, rate_tokenonly, distortion, 1407 skippable, &this_sse, bsize, INT64_MAX); 1408 *rate = *rate_tokenonly + 1409 x->intra_uv_mode_cost[cpi->common.frame_type][DC_PRED]; 1410 this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion); 1411 1412 return this_rd; 1413 } 1414 1415 static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, 1416 BLOCK_SIZE bsize, int *rate_uv, 1417 int *rate_uv_tokenonly, 1418 int64_t *dist_uv, int *skip_uv, 1419 MB_PREDICTION_MODE *mode_uv) { 1420 MACROBLOCK *const x = &cpi->mb; 1421 1422 // Use an estimated rd for uv_intra based on DC_PRED if the 1423 // appropriate speed flag is set. 1424 if (cpi->sf.use_uv_intra_rd_estimate) { 1425 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, 1426 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize); 1427 // Else do a proper rd search for each possible transform size that may 1428 // be considered in the main rd loop. 1429 } else { 1430 rd_pick_intra_sbuv_mode(cpi, x, ctx, 1431 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, 1432 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize); 1433 } 1434 *mode_uv = x->e_mbd.mi_8x8[0]->mbmi.uv_mode; 1435 } 1436 1437 static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode, 1438 int mode_context) { 1439 MACROBLOCK *const x = &cpi->mb; 1440 MACROBLOCKD *const xd = &x->e_mbd; 1441 const int segment_id = xd->mi_8x8[0]->mbmi.segment_id; 1442 1443 // Don't account for mode here if segment skip is enabled. 1444 if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) { 1445 assert(is_inter_mode(mode)); 1446 return x->inter_mode_cost[mode_context][INTER_OFFSET(mode)]; 1447 } else { 1448 return 0; 1449 } 1450 } 1451 1452 void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) { 1453 x->e_mbd.mi_8x8[0]->mbmi.mode = mb; 1454 x->e_mbd.mi_8x8[0]->mbmi.mv[0].as_int = mv->as_int; 1455 } 1456 1457 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, 1458 BLOCK_SIZE bsize, 1459 int_mv *frame_mv, 1460 int mi_row, int mi_col, 1461 int_mv single_newmv[MAX_REF_FRAMES], 1462 int *rate_mv); 1463 1464 static int labels2mode(MACROBLOCK *x, int i, 1465 MB_PREDICTION_MODE this_mode, 1466 int_mv *this_mv, int_mv *this_second_mv, 1467 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], 1468 int_mv seg_mvs[MAX_REF_FRAMES], 1469 int_mv *best_ref_mv, 1470 int_mv *second_best_ref_mv, 1471 int *mvjcost, int *mvcost[2], VP9_COMP *cpi) { 1472 MACROBLOCKD *const xd = &x->e_mbd; 1473 MODE_INFO *const mic = xd->mi_8x8[0]; 1474 MB_MODE_INFO *mbmi = &mic->mbmi; 1475 int cost = 0, thismvcost = 0; 1476 int idx, idy; 1477 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type]; 1478 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; 1479 const int has_second_rf = has_second_ref(mbmi); 1480 1481 /* We have to be careful retrieving previously-encoded motion vectors. 1482 Ones from this macroblock have to be pulled from the BLOCKD array 1483 as they have not yet made it to the bmi array in our MB_MODE_INFO. */ 1484 MB_PREDICTION_MODE m; 1485 1486 // the only time we should do costing for new motion vector or mode 1487 // is when we are on a new label (jbb May 08, 2007) 1488 switch (m = this_mode) { 1489 case NEWMV: 1490 this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int; 1491 thismvcost = vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv, 1492 mvjcost, mvcost, MV_COST_WEIGHT_SUB); 1493 if (has_second_rf) { 1494 this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int; 1495 thismvcost += vp9_mv_bit_cost(&this_second_mv->as_mv, 1496 &second_best_ref_mv->as_mv, 1497 mvjcost, mvcost, MV_COST_WEIGHT_SUB); 1498 } 1499 break; 1500 case NEARESTMV: 1501 this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int; 1502 if (has_second_rf) 1503 this_second_mv->as_int = 1504 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int; 1505 break; 1506 case NEARMV: 1507 this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int; 1508 if (has_second_rf) 1509 this_second_mv->as_int = 1510 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int; 1511 break; 1512 case ZEROMV: 1513 this_mv->as_int = 0; 1514 if (has_second_rf) 1515 this_second_mv->as_int = 0; 1516 break; 1517 default: 1518 break; 1519 } 1520 1521 cost = cost_mv_ref(cpi, this_mode, 1522 mbmi->mode_context[mbmi->ref_frame[0]]); 1523 1524 mic->bmi[i].as_mv[0].as_int = this_mv->as_int; 1525 if (has_second_rf) 1526 mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int; 1527 1528 mic->bmi[i].as_mode = m; 1529 1530 for (idy = 0; idy < num_4x4_blocks_high; ++idy) 1531 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) 1532 vpx_memcpy(&mic->bmi[i + idy * 2 + idx], 1533 &mic->bmi[i], sizeof(mic->bmi[i])); 1534 1535 cost += thismvcost; 1536 return cost; 1537 } 1538 1539 static int64_t encode_inter_mb_segment(VP9_COMP *cpi, 1540 MACROBLOCK *x, 1541 int64_t best_yrd, 1542 int i, 1543 int *labelyrate, 1544 int64_t *distortion, int64_t *sse, 1545 ENTROPY_CONTEXT *ta, 1546 ENTROPY_CONTEXT *tl) { 1547 int k; 1548 MACROBLOCKD *xd = &x->e_mbd; 1549 struct macroblockd_plane *const pd = &xd->plane[0]; 1550 struct macroblock_plane *const p = &x->plane[0]; 1551 MODE_INFO *const mi = xd->mi_8x8[0]; 1552 const BLOCK_SIZE bsize = mi->mbmi.sb_type; 1553 const int width = plane_block_width(bsize, pd); 1554 const int height = plane_block_height(bsize, pd); 1555 int idx, idy; 1556 1557 uint8_t *const src = raster_block_offset_uint8(BLOCK_8X8, i, 1558 p->src.buf, p->src.stride); 1559 uint8_t *const dst = raster_block_offset_uint8(BLOCK_8X8, i, 1560 pd->dst.buf, pd->dst.stride); 1561 int64_t thisdistortion = 0, thissse = 0; 1562 int thisrate = 0, ref; 1563 const int is_compound = has_second_ref(&mi->mbmi); 1564 for (ref = 0; ref < 1 + is_compound; ++ref) { 1565 const uint8_t *pre = raster_block_offset_uint8(BLOCK_8X8, i, 1566 pd->pre[ref].buf, pd->pre[ref].stride); 1567 vp9_build_inter_predictor(pre, pd->pre[ref].stride, 1568 dst, pd->dst.stride, 1569 &mi->bmi[i].as_mv[ref].as_mv, 1570 &xd->scale_factor[ref], 1571 width, height, ref, &xd->subpix, MV_PRECISION_Q3); 1572 } 1573 1574 vp9_subtract_block(height, width, 1575 raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8, 1576 src, p->src.stride, 1577 dst, pd->dst.stride); 1578 1579 k = i; 1580 for (idy = 0; idy < height / 4; ++idy) { 1581 for (idx = 0; idx < width / 4; ++idx) { 1582 int64_t ssz, rd, rd1, rd2; 1583 int16_t* coeff; 1584 1585 k += (idy * 2 + idx); 1586 coeff = BLOCK_OFFSET(p->coeff, k); 1587 x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff), 1588 coeff, 8); 1589 vp9_regular_quantize_b_4x4(x, 4, k, get_scan_4x4(DCT_DCT), 1590 get_iscan_4x4(DCT_DCT)); 1591 thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k), 1592 16, &ssz); 1593 thissse += ssz; 1594 thisrate += cost_coeffs(x, 0, k, 1595 ta + (k & 1), 1596 tl + (k >> 1), TX_4X4, 1597 vp9_default_scan_4x4, 1598 vp9_default_scan_4x4_neighbors); 1599 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2); 1600 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2); 1601 rd = MIN(rd1, rd2); 1602 if (rd >= best_yrd) 1603 return INT64_MAX; 1604 } 1605 } 1606 1607 *distortion = thisdistortion >> 2; 1608 *labelyrate = thisrate; 1609 *sse = thissse >> 2; 1610 1611 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); 1612 } 1613 1614 typedef struct { 1615 int eobs; 1616 int brate; 1617 int byrate; 1618 int64_t bdist; 1619 int64_t bsse; 1620 int64_t brdcost; 1621 int_mv mvs[2]; 1622 ENTROPY_CONTEXT ta[2]; 1623 ENTROPY_CONTEXT tl[2]; 1624 } SEG_RDSTAT; 1625 1626 typedef struct { 1627 int_mv *ref_mv, *second_ref_mv; 1628 int_mv mvp; 1629 1630 int64_t segment_rd; 1631 int r; 1632 int64_t d; 1633 int64_t sse; 1634 int segment_yrate; 1635 MB_PREDICTION_MODE modes[4]; 1636 SEG_RDSTAT rdstat[4][INTER_MODES]; 1637 int mvthresh; 1638 } BEST_SEG_INFO; 1639 1640 static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) { 1641 int r = 0; 1642 r |= (mv->as_mv.row >> 3) < x->mv_row_min; 1643 r |= (mv->as_mv.row >> 3) > x->mv_row_max; 1644 r |= (mv->as_mv.col >> 3) < x->mv_col_min; 1645 r |= (mv->as_mv.col >> 3) > x->mv_col_max; 1646 return r; 1647 } 1648 1649 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { 1650 MB_MODE_INFO *const mbmi = &x->e_mbd.mi_8x8[0]->mbmi; 1651 struct macroblock_plane *const p = &x->plane[0]; 1652 struct macroblockd_plane *const pd = &x->e_mbd.plane[0]; 1653 1654 p->src.buf = raster_block_offset_uint8(BLOCK_8X8, i, p->src.buf, 1655 p->src.stride); 1656 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0); 1657 pd->pre[0].buf = raster_block_offset_uint8(BLOCK_8X8, i, pd->pre[0].buf, 1658 pd->pre[0].stride); 1659 if (has_second_ref(mbmi)) 1660 pd->pre[1].buf = raster_block_offset_uint8(BLOCK_8X8, i, pd->pre[1].buf, 1661 pd->pre[1].stride); 1662 } 1663 1664 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src, 1665 struct buf_2d orig_pre[2]) { 1666 MB_MODE_INFO *mbmi = &x->e_mbd.mi_8x8[0]->mbmi; 1667 x->plane[0].src = orig_src; 1668 x->e_mbd.plane[0].pre[0] = orig_pre[0]; 1669 if (has_second_ref(mbmi)) 1670 x->e_mbd.plane[0].pre[1] = orig_pre[1]; 1671 } 1672 1673 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, 1674 const TileInfo *const tile, 1675 BEST_SEG_INFO *bsi_buf, int filter_idx, 1676 int_mv seg_mvs[4][MAX_REF_FRAMES], 1677 int mi_row, int mi_col) { 1678 int i, br = 0, idx, idy; 1679 int64_t bd = 0, block_sse = 0; 1680 MB_PREDICTION_MODE this_mode; 1681 MODE_INFO *mi = x->e_mbd.mi_8x8[0]; 1682 MB_MODE_INFO *const mbmi = &mi->mbmi; 1683 struct macroblockd_plane *const pd = &x->e_mbd.plane[0]; 1684 const int label_count = 4; 1685 int64_t this_segment_rd = 0; 1686 int label_mv_thresh; 1687 int segmentyrate = 0; 1688 const BLOCK_SIZE bsize = mbmi->sb_type; 1689 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 1690 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 1691 vp9_variance_fn_ptr_t *v_fn_ptr; 1692 ENTROPY_CONTEXT t_above[2], t_left[2]; 1693 BEST_SEG_INFO *bsi = bsi_buf + filter_idx; 1694 int mode_idx; 1695 int subpelmv = 1, have_ref = 0; 1696 const int has_second_rf = has_second_ref(mbmi); 1697 1698 vpx_memcpy(t_above, pd->above_context, sizeof(t_above)); 1699 vpx_memcpy(t_left, pd->left_context, sizeof(t_left)); 1700 1701 v_fn_ptr = &cpi->fn_ptr[bsize]; 1702 1703 // 64 makes this threshold really big effectively 1704 // making it so that we very rarely check mvs on 1705 // segments. setting this to 1 would make mv thresh 1706 // roughly equal to what it is for macroblocks 1707 label_mv_thresh = 1 * bsi->mvthresh / label_count; 1708 1709 // Segmentation method overheads 1710 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { 1711 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { 1712 // TODO(jingning,rbultje): rewrite the rate-distortion optimization 1713 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop 1714 int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT]; 1715 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; 1716 MB_PREDICTION_MODE mode_selected = ZEROMV; 1717 int64_t best_rd = INT64_MAX; 1718 i = idy * 2 + idx; 1719 1720 frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0; 1721 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, tile, 1722 &frame_mv[NEARESTMV][mbmi->ref_frame[0]], 1723 &frame_mv[NEARMV][mbmi->ref_frame[0]], 1724 i, 0, mi_row, mi_col); 1725 if (has_second_rf) { 1726 frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0; 1727 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, tile, 1728 &frame_mv[NEARESTMV][mbmi->ref_frame[1]], 1729 &frame_mv[NEARMV][mbmi->ref_frame[1]], 1730 i, 1, mi_row, mi_col); 1731 } 1732 // search for the best motion vector on this segment 1733 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { 1734 const struct buf_2d orig_src = x->plane[0].src; 1735 struct buf_2d orig_pre[2]; 1736 1737 mode_idx = INTER_OFFSET(this_mode); 1738 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX; 1739 1740 // if we're near/nearest and mv == 0,0, compare to zeromv 1741 if ((this_mode == NEARMV || this_mode == NEARESTMV || 1742 this_mode == ZEROMV) && 1743 frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 && 1744 (!has_second_rf || 1745 frame_mv[this_mode][mbmi->ref_frame[1]].as_int == 0)) { 1746 int rfc = mbmi->mode_context[mbmi->ref_frame[0]]; 1747 int c1 = cost_mv_ref(cpi, NEARMV, rfc); 1748 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); 1749 int c3 = cost_mv_ref(cpi, ZEROMV, rfc); 1750 1751 if (this_mode == NEARMV) { 1752 if (c1 > c3) 1753 continue; 1754 } else if (this_mode == NEARESTMV) { 1755 if (c2 > c3) 1756 continue; 1757 } else { 1758 assert(this_mode == ZEROMV); 1759 if (!has_second_rf) { 1760 if ((c3 >= c2 && 1761 frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) || 1762 (c3 >= c1 && 1763 frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0)) 1764 continue; 1765 } else { 1766 if ((c3 >= c2 && 1767 frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 && 1768 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) || 1769 (c3 >= c1 && 1770 frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 && 1771 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0)) 1772 continue; 1773 } 1774 } 1775 } 1776 1777 vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre)); 1778 vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above, 1779 sizeof(bsi->rdstat[i][mode_idx].ta)); 1780 vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left, 1781 sizeof(bsi->rdstat[i][mode_idx].tl)); 1782 1783 // motion search for newmv (single predictor case only) 1784 if (!has_second_rf && this_mode == NEWMV && 1785 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) { 1786 int step_param = 0; 1787 int further_steps; 1788 int thissme, bestsme = INT_MAX; 1789 int sadpb = x->sadperbit4; 1790 int_mv mvp_full; 1791 int max_mv; 1792 1793 /* Is the best so far sufficiently good that we cant justify doing 1794 * and new motion search. */ 1795 if (best_rd < label_mv_thresh) 1796 break; 1797 1798 if (cpi->compressor_speed) { 1799 // use previous block's result as next block's MV predictor. 1800 if (i > 0) { 1801 bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int; 1802 if (i == 2) 1803 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int; 1804 } 1805 } 1806 if (i == 0) 1807 max_mv = x->max_mv_context[mbmi->ref_frame[0]]; 1808 else 1809 max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3; 1810 1811 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) { 1812 // Take wtd average of the step_params based on the last frame's 1813 // max mv magnitude and the best ref mvs of the current block for 1814 // the given reference. 1815 step_param = (vp9_init_search_range(cpi, max_mv) + 1816 cpi->mv_step_param) >> 1; 1817 } else { 1818 step_param = cpi->mv_step_param; 1819 } 1820 1821 mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3; 1822 mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3; 1823 1824 if (cpi->sf.adaptive_motion_search && cpi->common.show_frame) { 1825 mvp_full.as_mv.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3; 1826 mvp_full.as_mv.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3; 1827 step_param = MAX(step_param, 8); 1828 } 1829 1830 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; 1831 // adjust src pointer for this block 1832 mi_buf_shift(x, i); 1833 if (cpi->sf.search_method == HEX) { 1834 bestsme = vp9_hex_search(x, &mvp_full.as_mv, 1835 step_param, 1836 sadpb, 1, v_fn_ptr, 1, 1837 &bsi->ref_mv->as_mv, 1838 &mode_mv[NEWMV].as_mv); 1839 } else if (cpi->sf.search_method == SQUARE) { 1840 bestsme = vp9_square_search(x, &mvp_full.as_mv, 1841 step_param, 1842 sadpb, 1, v_fn_ptr, 1, 1843 &bsi->ref_mv->as_mv, 1844 &mode_mv[NEWMV].as_mv); 1845 } else if (cpi->sf.search_method == BIGDIA) { 1846 bestsme = vp9_bigdia_search(x, &mvp_full.as_mv, 1847 step_param, 1848 sadpb, 1, v_fn_ptr, 1, 1849 &bsi->ref_mv->as_mv, 1850 &mode_mv[NEWMV].as_mv); 1851 } else { 1852 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, 1853 sadpb, further_steps, 0, v_fn_ptr, 1854 bsi->ref_mv, &mode_mv[NEWMV]); 1855 } 1856 1857 // Should we do a full search (best quality only) 1858 if (cpi->compressor_speed == 0) { 1859 /* Check if mvp_full is within the range. */ 1860 clamp_mv(&mvp_full.as_mv, x->mv_col_min, x->mv_col_max, 1861 x->mv_row_min, x->mv_row_max); 1862 1863 thissme = cpi->full_search_sad(x, &mvp_full, 1864 sadpb, 16, v_fn_ptr, 1865 x->nmvjointcost, x->mvcost, 1866 bsi->ref_mv, i); 1867 1868 if (thissme < bestsme) { 1869 bestsme = thissme; 1870 mode_mv[NEWMV].as_int = mi->bmi[i].as_mv[0].as_int; 1871 } else { 1872 /* The full search result is actually worse so re-instate the 1873 * previous best vector */ 1874 mi->bmi[i].as_mv[0].as_int = mode_mv[NEWMV].as_int; 1875 } 1876 } 1877 1878 if (bestsme < INT_MAX) { 1879 int distortion; 1880 unsigned int sse; 1881 cpi->find_fractional_mv_step(x, 1882 &mode_mv[NEWMV].as_mv, 1883 &bsi->ref_mv->as_mv, 1884 cpi->common.allow_high_precision_mv, 1885 x->errorperbit, v_fn_ptr, 1886 0, cpi->sf.subpel_iters_per_step, 1887 x->nmvjointcost, x->mvcost, 1888 &distortion, &sse); 1889 1890 // save motion search result for use in compound prediction 1891 seg_mvs[i][mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int; 1892 } 1893 1894 if (cpi->sf.adaptive_motion_search) 1895 x->pred_mv[mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int; 1896 1897 // restore src pointers 1898 mi_buf_restore(x, orig_src, orig_pre); 1899 } 1900 1901 if (has_second_rf) { 1902 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV || 1903 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) 1904 continue; 1905 } 1906 1907 if (has_second_rf && this_mode == NEWMV && 1908 mbmi->interp_filter == EIGHTTAP) { 1909 // adjust src pointers 1910 mi_buf_shift(x, i); 1911 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { 1912 int rate_mv; 1913 joint_motion_search(cpi, x, bsize, frame_mv[this_mode], 1914 mi_row, mi_col, seg_mvs[i], 1915 &rate_mv); 1916 seg_mvs[i][mbmi->ref_frame[0]].as_int = 1917 frame_mv[this_mode][mbmi->ref_frame[0]].as_int; 1918 seg_mvs[i][mbmi->ref_frame[1]].as_int = 1919 frame_mv[this_mode][mbmi->ref_frame[1]].as_int; 1920 } 1921 // restore src pointers 1922 mi_buf_restore(x, orig_src, orig_pre); 1923 } 1924 1925 bsi->rdstat[i][mode_idx].brate = 1926 labels2mode(x, i, this_mode, &mode_mv[this_mode], 1927 &second_mode_mv[this_mode], frame_mv, seg_mvs[i], 1928 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, 1929 x->mvcost, cpi); 1930 1931 1932 bsi->rdstat[i][mode_idx].mvs[0].as_int = mode_mv[this_mode].as_int; 1933 if (num_4x4_blocks_wide > 1) 1934 bsi->rdstat[i + 1][mode_idx].mvs[0].as_int = 1935 mode_mv[this_mode].as_int; 1936 if (num_4x4_blocks_high > 1) 1937 bsi->rdstat[i + 2][mode_idx].mvs[0].as_int = 1938 mode_mv[this_mode].as_int; 1939 if (has_second_rf) { 1940 bsi->rdstat[i][mode_idx].mvs[1].as_int = 1941 second_mode_mv[this_mode].as_int; 1942 if (num_4x4_blocks_wide > 1) 1943 bsi->rdstat[i + 1][mode_idx].mvs[1].as_int = 1944 second_mode_mv[this_mode].as_int; 1945 if (num_4x4_blocks_high > 1) 1946 bsi->rdstat[i + 2][mode_idx].mvs[1].as_int = 1947 second_mode_mv[this_mode].as_int; 1948 } 1949 1950 // Trap vectors that reach beyond the UMV borders 1951 if (mv_check_bounds(x, &mode_mv[this_mode])) 1952 continue; 1953 if (has_second_rf && 1954 mv_check_bounds(x, &second_mode_mv[this_mode])) 1955 continue; 1956 1957 if (filter_idx > 0) { 1958 BEST_SEG_INFO *ref_bsi = bsi_buf; 1959 subpelmv = (mode_mv[this_mode].as_mv.row & 0x0f) || 1960 (mode_mv[this_mode].as_mv.col & 0x0f); 1961 have_ref = mode_mv[this_mode].as_int == 1962 ref_bsi->rdstat[i][mode_idx].mvs[0].as_int; 1963 if (has_second_rf) { 1964 subpelmv |= (second_mode_mv[this_mode].as_mv.row & 0x0f) || 1965 (second_mode_mv[this_mode].as_mv.col & 0x0f); 1966 have_ref &= second_mode_mv[this_mode].as_int == 1967 ref_bsi->rdstat[i][mode_idx].mvs[1].as_int; 1968 } 1969 1970 if (filter_idx > 1 && !subpelmv && !have_ref) { 1971 ref_bsi = bsi_buf + 1; 1972 have_ref = mode_mv[this_mode].as_int == 1973 ref_bsi->rdstat[i][mode_idx].mvs[0].as_int; 1974 if (has_second_rf) { 1975 have_ref &= second_mode_mv[this_mode].as_int == 1976 ref_bsi->rdstat[i][mode_idx].mvs[1].as_int; 1977 } 1978 } 1979 1980 if (!subpelmv && have_ref && 1981 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) { 1982 vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx], 1983 sizeof(SEG_RDSTAT)); 1984 if (num_4x4_blocks_wide > 1) 1985 bsi->rdstat[i + 1][mode_idx].eobs = 1986 ref_bsi->rdstat[i + 1][mode_idx].eobs; 1987 if (num_4x4_blocks_high > 1) 1988 bsi->rdstat[i + 2][mode_idx].eobs = 1989 ref_bsi->rdstat[i + 2][mode_idx].eobs; 1990 1991 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) { 1992 mode_selected = this_mode; 1993 best_rd = bsi->rdstat[i][mode_idx].brdcost; 1994 } 1995 continue; 1996 } 1997 } 1998 1999 bsi->rdstat[i][mode_idx].brdcost = 2000 encode_inter_mb_segment(cpi, x, 2001 bsi->segment_rd - this_segment_rd, i, 2002 &bsi->rdstat[i][mode_idx].byrate, 2003 &bsi->rdstat[i][mode_idx].bdist, 2004 &bsi->rdstat[i][mode_idx].bsse, 2005 bsi->rdstat[i][mode_idx].ta, 2006 bsi->rdstat[i][mode_idx].tl); 2007 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) { 2008 bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv, 2009 bsi->rdstat[i][mode_idx].brate, 0); 2010 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate; 2011 bsi->rdstat[i][mode_idx].eobs = pd->eobs[i]; 2012 if (num_4x4_blocks_wide > 1) 2013 bsi->rdstat[i + 1][mode_idx].eobs = pd->eobs[i + 1]; 2014 if (num_4x4_blocks_high > 1) 2015 bsi->rdstat[i + 2][mode_idx].eobs = pd->eobs[i + 2]; 2016 } 2017 2018 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) { 2019 mode_selected = this_mode; 2020 best_rd = bsi->rdstat[i][mode_idx].brdcost; 2021 } 2022 } /*for each 4x4 mode*/ 2023 2024 if (best_rd == INT64_MAX) { 2025 int iy, midx; 2026 for (iy = i + 1; iy < 4; ++iy) 2027 for (midx = 0; midx < INTER_MODES; ++midx) 2028 bsi->rdstat[iy][midx].brdcost = INT64_MAX; 2029 bsi->segment_rd = INT64_MAX; 2030 return; 2031 } 2032 2033 mode_idx = INTER_OFFSET(mode_selected); 2034 vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above)); 2035 vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left)); 2036 2037 labels2mode(x, i, mode_selected, &mode_mv[mode_selected], 2038 &second_mode_mv[mode_selected], frame_mv, seg_mvs[i], 2039 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, 2040 x->mvcost, cpi); 2041 2042 br += bsi->rdstat[i][mode_idx].brate; 2043 bd += bsi->rdstat[i][mode_idx].bdist; 2044 block_sse += bsi->rdstat[i][mode_idx].bsse; 2045 segmentyrate += bsi->rdstat[i][mode_idx].byrate; 2046 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost; 2047 2048 if (this_segment_rd > bsi->segment_rd) { 2049 int iy, midx; 2050 for (iy = i + 1; iy < 4; ++iy) 2051 for (midx = 0; midx < INTER_MODES; ++midx) 2052 bsi->rdstat[iy][midx].brdcost = INT64_MAX; 2053 bsi->segment_rd = INT64_MAX; 2054 return; 2055 } 2056 } 2057 } /* for each label */ 2058 2059 bsi->r = br; 2060 bsi->d = bd; 2061 bsi->segment_yrate = segmentyrate; 2062 bsi->segment_rd = this_segment_rd; 2063 bsi->sse = block_sse; 2064 2065 // update the coding decisions 2066 for (i = 0; i < 4; ++i) 2067 bsi->modes[i] = mi->bmi[i].as_mode; 2068 } 2069 2070 static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, 2071 const TileInfo *const tile, 2072 int_mv *best_ref_mv, 2073 int_mv *second_best_ref_mv, 2074 int64_t best_rd, 2075 int *returntotrate, 2076 int *returnyrate, 2077 int64_t *returndistortion, 2078 int *skippable, int64_t *psse, 2079 int mvthresh, 2080 int_mv seg_mvs[4][MAX_REF_FRAMES], 2081 BEST_SEG_INFO *bsi_buf, 2082 int filter_idx, 2083 int mi_row, int mi_col) { 2084 int i; 2085 BEST_SEG_INFO *bsi = bsi_buf + filter_idx; 2086 MACROBLOCKD *xd = &x->e_mbd; 2087 MODE_INFO *mi = xd->mi_8x8[0]; 2088 MB_MODE_INFO *mbmi = &mi->mbmi; 2089 int mode_idx; 2090 2091 vp9_zero(*bsi); 2092 2093 bsi->segment_rd = best_rd; 2094 bsi->ref_mv = best_ref_mv; 2095 bsi->second_ref_mv = second_best_ref_mv; 2096 bsi->mvp.as_int = best_ref_mv->as_int; 2097 bsi->mvthresh = mvthresh; 2098 2099 for (i = 0; i < 4; i++) 2100 bsi->modes[i] = ZEROMV; 2101 2102 rd_check_segment_txsize(cpi, x, tile, bsi_buf, filter_idx, seg_mvs, 2103 mi_row, mi_col); 2104 2105 if (bsi->segment_rd > best_rd) 2106 return INT64_MAX; 2107 /* set it to the best */ 2108 for (i = 0; i < 4; i++) { 2109 mode_idx = INTER_OFFSET(bsi->modes[i]); 2110 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int; 2111 if (has_second_ref(mbmi)) 2112 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int; 2113 xd->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs; 2114 mi->bmi[i].as_mode = bsi->modes[i]; 2115 } 2116 2117 /* 2118 * used to set mbmi->mv.as_int 2119 */ 2120 *returntotrate = bsi->r; 2121 *returndistortion = bsi->d; 2122 *returnyrate = bsi->segment_yrate; 2123 *skippable = vp9_is_skippable_in_plane(&x->e_mbd, BLOCK_8X8, 0); 2124 *psse = bsi->sse; 2125 mbmi->mode = bsi->modes[3]; 2126 2127 return bsi->segment_rd; 2128 } 2129 2130 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, 2131 uint8_t *ref_y_buffer, int ref_y_stride, 2132 int ref_frame, BLOCK_SIZE block_size ) { 2133 MACROBLOCKD *xd = &x->e_mbd; 2134 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; 2135 int_mv this_mv; 2136 int i; 2137 int zero_seen = 0; 2138 int best_index = 0; 2139 int best_sad = INT_MAX; 2140 int this_sad = INT_MAX; 2141 unsigned int max_mv = 0; 2142 2143 uint8_t *src_y_ptr = x->plane[0].src.buf; 2144 uint8_t *ref_y_ptr; 2145 int row_offset, col_offset; 2146 int num_mv_refs = MAX_MV_REF_CANDIDATES + 2147 (cpi->sf.adaptive_motion_search && 2148 cpi->common.show_frame && 2149 block_size < cpi->sf.max_partition_size); 2150 2151 // Get the sad for each candidate reference mv 2152 for (i = 0; i < num_mv_refs; i++) { 2153 this_mv.as_int = (i < MAX_MV_REF_CANDIDATES) ? 2154 mbmi->ref_mvs[ref_frame][i].as_int : x->pred_mv[ref_frame].as_int; 2155 2156 max_mv = MAX(max_mv, 2157 MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3); 2158 // The list is at an end if we see 0 for a second time. 2159 if (!this_mv.as_int && zero_seen) 2160 break; 2161 zero_seen = zero_seen || !this_mv.as_int; 2162 2163 row_offset = this_mv.as_mv.row >> 3; 2164 col_offset = this_mv.as_mv.col >> 3; 2165 ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset; 2166 2167 // Find sad for current vector. 2168 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride, 2169 ref_y_ptr, ref_y_stride, 2170 0x7fffffff); 2171 2172 // Note if it is the best so far. 2173 if (this_sad < best_sad) { 2174 best_sad = this_sad; 2175 best_index = i; 2176 } 2177 } 2178 2179 // Note the index of the mv that worked best in the reference list. 2180 x->mv_best_ref_index[ref_frame] = best_index; 2181 x->max_mv_context[ref_frame] = max_mv; 2182 } 2183 2184 static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, 2185 unsigned int *ref_costs_single, 2186 unsigned int *ref_costs_comp, 2187 vp9_prob *comp_mode_p) { 2188 VP9_COMMON *const cm = &cpi->common; 2189 MACROBLOCKD *const xd = &cpi->mb.e_mbd; 2190 int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id, 2191 SEG_LVL_REF_FRAME); 2192 if (seg_ref_active) { 2193 vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single)); 2194 vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp)); 2195 *comp_mode_p = 128; 2196 } else { 2197 vp9_prob intra_inter_p = vp9_get_pred_prob_intra_inter(cm, xd); 2198 vp9_prob comp_inter_p = 128; 2199 2200 if (cm->comp_pred_mode == HYBRID_PREDICTION) { 2201 comp_inter_p = vp9_get_pred_prob_comp_inter_inter(cm, xd); 2202 *comp_mode_p = comp_inter_p; 2203 } else { 2204 *comp_mode_p = 128; 2205 } 2206 2207 ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0); 2208 2209 if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) { 2210 vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd); 2211 vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd); 2212 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); 2213 2214 if (cm->comp_pred_mode == HYBRID_PREDICTION) 2215 base_cost += vp9_cost_bit(comp_inter_p, 0); 2216 2217 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] = 2218 ref_costs_single[ALTREF_FRAME] = base_cost; 2219 ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0); 2220 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1); 2221 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1); 2222 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0); 2223 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1); 2224 } else { 2225 ref_costs_single[LAST_FRAME] = 512; 2226 ref_costs_single[GOLDEN_FRAME] = 512; 2227 ref_costs_single[ALTREF_FRAME] = 512; 2228 } 2229 if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) { 2230 vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd); 2231 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); 2232 2233 if (cm->comp_pred_mode == HYBRID_PREDICTION) 2234 base_cost += vp9_cost_bit(comp_inter_p, 1); 2235 2236 ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0); 2237 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1); 2238 } else { 2239 ref_costs_comp[LAST_FRAME] = 512; 2240 ref_costs_comp[GOLDEN_FRAME] = 512; 2241 } 2242 } 2243 } 2244 2245 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, 2246 int mode_index, 2247 int_mv *ref_mv, 2248 int_mv *second_ref_mv, 2249 int64_t comp_pred_diff[NB_PREDICTION_TYPES], 2250 int64_t tx_size_diff[TX_MODES], 2251 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) { 2252 MACROBLOCKD *const xd = &x->e_mbd; 2253 2254 // Take a snapshot of the coding context so it can be 2255 // restored if we decide to encode this way 2256 ctx->skip = x->skip; 2257 ctx->best_mode_index = mode_index; 2258 ctx->mic = *xd->mi_8x8[0]; 2259 2260 ctx->best_ref_mv.as_int = ref_mv->as_int; 2261 ctx->second_best_ref_mv.as_int = second_ref_mv->as_int; 2262 2263 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY]; 2264 ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY]; 2265 ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION]; 2266 2267 vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff)); 2268 vpx_memcpy(ctx->best_filter_diff, best_filter_diff, 2269 sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS); 2270 } 2271 2272 static void setup_pred_block(const MACROBLOCKD *xd, 2273 struct buf_2d dst[MAX_MB_PLANE], 2274 const YV12_BUFFER_CONFIG *src, 2275 int mi_row, int mi_col, 2276 const struct scale_factors *scale, 2277 const struct scale_factors *scale_uv) { 2278 int i; 2279 2280 dst[0].buf = src->y_buffer; 2281 dst[0].stride = src->y_stride; 2282 dst[1].buf = src->u_buffer; 2283 dst[2].buf = src->v_buffer; 2284 dst[1].stride = dst[2].stride = src->uv_stride; 2285 #if CONFIG_ALPHA 2286 dst[3].buf = src->alpha_buffer; 2287 dst[3].stride = src->alpha_stride; 2288 #endif 2289 2290 // TODO(jkoleszar): Make scale factors per-plane data 2291 for (i = 0; i < MAX_MB_PLANE; i++) { 2292 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col, 2293 i ? scale_uv : scale, 2294 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y); 2295 } 2296 } 2297 2298 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, 2299 const TileInfo *const tile, 2300 int idx, MV_REFERENCE_FRAME frame_type, 2301 BLOCK_SIZE block_size, 2302 int mi_row, int mi_col, 2303 int_mv frame_nearest_mv[MAX_REF_FRAMES], 2304 int_mv frame_near_mv[MAX_REF_FRAMES], 2305 struct buf_2d yv12_mb[4][MAX_MB_PLANE], 2306 struct scale_factors scale[MAX_REF_FRAMES]) { 2307 VP9_COMMON *cm = &cpi->common; 2308 YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]]; 2309 MACROBLOCKD *const xd = &x->e_mbd; 2310 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; 2311 2312 // set up scaling factors 2313 scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1]; 2314 2315 scale[frame_type].sfc->set_scaled_offsets(&scale[frame_type], 2316 mi_row * MI_SIZE, mi_col * MI_SIZE); 2317 2318 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this 2319 // use the UV scaling factors. 2320 setup_pred_block(xd, yv12_mb[frame_type], yv12, mi_row, mi_col, 2321 &scale[frame_type], &scale[frame_type]); 2322 2323 // Gets an initial list of candidate vectors from neighbours and orders them 2324 vp9_find_mv_refs(cm, xd, tile, xd->mi_8x8[0], 2325 xd->last_mi, 2326 frame_type, 2327 mbmi->ref_mvs[frame_type], mi_row, mi_col); 2328 2329 // Candidate refinement carried out at encoder and decoder 2330 vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, 2331 mbmi->ref_mvs[frame_type], 2332 &frame_nearest_mv[frame_type], 2333 &frame_near_mv[frame_type]); 2334 2335 // Further refinement that is encode side only to test the top few candidates 2336 // in full and choose the best as the centre point for subsequent searches. 2337 // The current implementation doesn't support scaling. 2338 if (!vp9_is_scaled(scale[frame_type].sfc) && block_size >= BLOCK_8X8) 2339 mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride, 2340 frame_type, block_size); 2341 } 2342 2343 static YV12_BUFFER_CONFIG *get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) { 2344 YV12_BUFFER_CONFIG *scaled_ref_frame = NULL; 2345 int fb = get_ref_frame_idx(cpi, ref_frame); 2346 int fb_scale = get_scale_ref_frame_idx(cpi, ref_frame); 2347 if (cpi->scaled_ref_idx[fb_scale] != cpi->common.ref_frame_map[fb]) 2348 scaled_ref_frame = &cpi->common.yv12_fb[cpi->scaled_ref_idx[fb_scale]]; 2349 return scaled_ref_frame; 2350 } 2351 2352 static INLINE int get_switchable_rate(const MACROBLOCK *x) { 2353 const MACROBLOCKD *const xd = &x->e_mbd; 2354 const MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; 2355 const int ctx = vp9_get_pred_context_switchable_interp(xd); 2356 return SWITCHABLE_INTERP_RATE_FACTOR * 2357 x->switchable_interp_costs[ctx][mbmi->interp_filter]; 2358 } 2359 2360 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, 2361 const TileInfo *const tile, 2362 BLOCK_SIZE bsize, 2363 int mi_row, int mi_col, 2364 int_mv *tmp_mv, int *rate_mv) { 2365 MACROBLOCKD *xd = &x->e_mbd; 2366 VP9_COMMON *cm = &cpi->common; 2367 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; 2368 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; 2369 int bestsme = INT_MAX; 2370 int further_steps, step_param; 2371 int sadpb = x->sadperbit16; 2372 int_mv mvp_full; 2373 int ref = mbmi->ref_frame[0]; 2374 int_mv ref_mv = mbmi->ref_mvs[ref][0]; 2375 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); 2376 2377 int tmp_col_min = x->mv_col_min; 2378 int tmp_col_max = x->mv_col_max; 2379 int tmp_row_min = x->mv_row_min; 2380 int tmp_row_max = x->mv_row_max; 2381 2382 YV12_BUFFER_CONFIG *scaled_ref_frame = get_scaled_ref_frame(cpi, ref); 2383 2384 if (scaled_ref_frame) { 2385 int i; 2386 // Swap out the reference frame for a version that's been scaled to 2387 // match the resolution of the current frame, allowing the existing 2388 // motion search code to be used without additional modifications. 2389 for (i = 0; i < MAX_MB_PLANE; i++) 2390 backup_yv12[i] = xd->plane[i].pre[0]; 2391 2392 setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); 2393 } 2394 2395 vp9_clamp_mv_min_max(x, &ref_mv.as_mv); 2396 2397 // Adjust search parameters based on small partitions' result. 2398 if (x->fast_ms) { 2399 // && abs(mvp_full.as_mv.row - x->pred_mv.as_mv.row) < 24 && 2400 // abs(mvp_full.as_mv.col - x->pred_mv.as_mv.col) < 24) { 2401 // adjust search range 2402 step_param = 6; 2403 if (x->fast_ms > 1) 2404 step_param = 8; 2405 2406 // Get prediction MV. 2407 mvp_full.as_int = x->pred_mv[ref].as_int; 2408 2409 // Adjust MV sign if needed. 2410 if (cm->ref_frame_sign_bias[ref]) { 2411 mvp_full.as_mv.col *= -1; 2412 mvp_full.as_mv.row *= -1; 2413 } 2414 } else { 2415 // Work out the size of the first step in the mv step search. 2416 // 0 here is maximum length first step. 1 is MAX >> 1 etc. 2417 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) { 2418 // Take wtd average of the step_params based on the last frame's 2419 // max mv magnitude and that based on the best ref mvs of the current 2420 // block for the given reference. 2421 step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) + 2422 cpi->mv_step_param) >> 1; 2423 } else { 2424 step_param = cpi->mv_step_param; 2425 } 2426 } 2427 2428 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 && 2429 cpi->common.show_frame) { 2430 int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize), 2431 b_width_log2(bsize))); 2432 step_param = MAX(step_param, boffset); 2433 } 2434 2435 mvp_full.as_int = x->mv_best_ref_index[ref] < MAX_MV_REF_CANDIDATES ? 2436 mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int : 2437 x->pred_mv[ref].as_int; 2438 2439 mvp_full.as_mv.col >>= 3; 2440 mvp_full.as_mv.row >>= 3; 2441 2442 // Further step/diamond searches as necessary 2443 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; 2444 2445 if (cpi->sf.search_method == HEX) { 2446 bestsme = vp9_hex_search(x, &mvp_full.as_mv, 2447 step_param, 2448 sadpb, 1, 2449 &cpi->fn_ptr[block_size], 1, 2450 &ref_mv.as_mv, &tmp_mv->as_mv); 2451 } else if (cpi->sf.search_method == SQUARE) { 2452 bestsme = vp9_square_search(x, &mvp_full.as_mv, 2453 step_param, 2454 sadpb, 1, 2455 &cpi->fn_ptr[block_size], 1, 2456 &ref_mv.as_mv, &tmp_mv->as_mv); 2457 } else if (cpi->sf.search_method == BIGDIA) { 2458 bestsme = vp9_bigdia_search(x, &mvp_full.as_mv, 2459 step_param, 2460 sadpb, 1, 2461 &cpi->fn_ptr[block_size], 1, 2462 &ref_mv.as_mv, &tmp_mv->as_mv); 2463 } else { 2464 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, 2465 sadpb, further_steps, 1, 2466 &cpi->fn_ptr[block_size], 2467 &ref_mv, tmp_mv); 2468 } 2469 2470 x->mv_col_min = tmp_col_min; 2471 x->mv_col_max = tmp_col_max; 2472 x->mv_row_min = tmp_row_min; 2473 x->mv_row_max = tmp_row_max; 2474 2475 if (bestsme < INT_MAX) { 2476 int dis; /* TODO: use dis in distortion calculation later. */ 2477 unsigned int sse; 2478 cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv.as_mv, 2479 cm->allow_high_precision_mv, 2480 x->errorperbit, 2481 &cpi->fn_ptr[block_size], 2482 0, cpi->sf.subpel_iters_per_step, 2483 x->nmvjointcost, x->mvcost, 2484 &dis, &sse); 2485 } 2486 *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv.as_mv, 2487 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); 2488 2489 if (cpi->sf.adaptive_motion_search && cpi->common.show_frame) 2490 x->pred_mv[ref].as_int = tmp_mv->as_int; 2491 2492 if (scaled_ref_frame) { 2493 int i; 2494 for (i = 0; i < MAX_MB_PLANE; i++) 2495 xd->plane[i].pre[0] = backup_yv12[i]; 2496 } 2497 } 2498 2499 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, 2500 BLOCK_SIZE bsize, 2501 int_mv *frame_mv, 2502 int mi_row, int mi_col, 2503 int_mv single_newmv[MAX_REF_FRAMES], 2504 int *rate_mv) { 2505 int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize); 2506 MACROBLOCKD *xd = &x->e_mbd; 2507 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; 2508 const int refs[2] = { mbmi->ref_frame[0], 2509 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] }; 2510 int_mv ref_mv[2]; 2511 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); 2512 int ite, ref; 2513 // Prediction buffer from second frame. 2514 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t)); 2515 2516 // Do joint motion search in compound mode to get more accurate mv. 2517 struct buf_2d backup_yv12[2][MAX_MB_PLANE]; 2518 struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0]; 2519 int last_besterr[2] = {INT_MAX, INT_MAX}; 2520 YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = { 2521 get_scaled_ref_frame(cpi, mbmi->ref_frame[0]), 2522 get_scaled_ref_frame(cpi, mbmi->ref_frame[1]) 2523 }; 2524 2525 for (ref = 0; ref < 2; ++ref) { 2526 ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0]; 2527 2528 if (scaled_ref_frame[ref]) { 2529 int i; 2530 // Swap out the reference frame for a version that's been scaled to 2531 // match the resolution of the current frame, allowing the existing 2532 // motion search code to be used without additional modifications. 2533 for (i = 0; i < MAX_MB_PLANE; i++) 2534 backup_yv12[ref][i] = xd->plane[i].pre[ref]; 2535 setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, NULL); 2536 } 2537 2538 xd->scale_factor[ref].sfc->set_scaled_offsets(&xd->scale_factor[ref], 2539 mi_row, mi_col); 2540 frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int; 2541 } 2542 2543 // Allow joint search multiple times iteratively for each ref frame 2544 // and break out the search loop if it couldn't find better mv. 2545 for (ite = 0; ite < 4; ite++) { 2546 struct buf_2d ref_yv12[2]; 2547 int bestsme = INT_MAX; 2548 int sadpb = x->sadperbit16; 2549 int_mv tmp_mv; 2550 int search_range = 3; 2551 2552 int tmp_col_min = x->mv_col_min; 2553 int tmp_col_max = x->mv_col_max; 2554 int tmp_row_min = x->mv_row_min; 2555 int tmp_row_max = x->mv_row_max; 2556 int id = ite % 2; 2557 2558 // Initialized here because of compiler problem in Visual Studio. 2559 ref_yv12[0] = xd->plane[0].pre[0]; 2560 ref_yv12[1] = xd->plane[0].pre[1]; 2561 2562 // Get pred block from second frame. 2563 vp9_build_inter_predictor(ref_yv12[!id].buf, 2564 ref_yv12[!id].stride, 2565 second_pred, pw, 2566 &frame_mv[refs[!id]].as_mv, 2567 &xd->scale_factor[!id], 2568 pw, ph, 0, 2569 &xd->subpix, MV_PRECISION_Q3); 2570 2571 // Compound motion search on first ref frame. 2572 if (id) 2573 xd->plane[0].pre[0] = ref_yv12[id]; 2574 vp9_clamp_mv_min_max(x, &ref_mv[id].as_mv); 2575 2576 // Use mv result from single mode as mvp. 2577 tmp_mv.as_int = frame_mv[refs[id]].as_int; 2578 2579 tmp_mv.as_mv.col >>= 3; 2580 tmp_mv.as_mv.row >>= 3; 2581 2582 // Small-range full-pixel motion search 2583 bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, 2584 search_range, 2585 &cpi->fn_ptr[block_size], 2586 x->nmvjointcost, x->mvcost, 2587 &ref_mv[id], second_pred, 2588 pw, ph); 2589 2590 x->mv_col_min = tmp_col_min; 2591 x->mv_col_max = tmp_col_max; 2592 x->mv_row_min = tmp_row_min; 2593 x->mv_row_max = tmp_row_max; 2594 2595 if (bestsme < INT_MAX) { 2596 int dis; /* TODO: use dis in distortion calculation later. */ 2597 unsigned int sse; 2598 2599 bestsme = cpi->find_fractional_mv_step_comp( 2600 x, &tmp_mv.as_mv, 2601 &ref_mv[id].as_mv, 2602 cpi->common.allow_high_precision_mv, 2603 x->errorperbit, 2604 &cpi->fn_ptr[block_size], 2605 0, cpi->sf.subpel_iters_per_step, 2606 x->nmvjointcost, x->mvcost, 2607 &dis, &sse, second_pred, 2608 pw, ph); 2609 } 2610 2611 if (id) 2612 xd->plane[0].pre[0] = scaled_first_yv12; 2613 2614 if (bestsme < last_besterr[id]) { 2615 frame_mv[refs[id]].as_int = tmp_mv.as_int; 2616 last_besterr[id] = bestsme; 2617 } else { 2618 break; 2619 } 2620 } 2621 2622 *rate_mv = 0; 2623 2624 for (ref = 0; ref < 2; ++ref) { 2625 if (scaled_ref_frame[ref]) { 2626 // restore the predictor 2627 int i; 2628 for (i = 0; i < MAX_MB_PLANE; i++) 2629 xd->plane[i].pre[ref] = backup_yv12[ref][i]; 2630 } 2631 2632 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv, 2633 &mbmi->ref_mvs[refs[ref]][0].as_mv, 2634 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); 2635 } 2636 2637 vpx_free(second_pred); 2638 } 2639 2640 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, 2641 const TileInfo *const tile, 2642 BLOCK_SIZE bsize, 2643 int64_t txfm_cache[], 2644 int *rate2, int64_t *distortion, 2645 int *skippable, 2646 int *rate_y, int64_t *distortion_y, 2647 int *rate_uv, int64_t *distortion_uv, 2648 int *mode_excluded, int *disable_skip, 2649 INTERPOLATION_TYPE *best_filter, 2650 int_mv (*mode_mv)[MAX_REF_FRAMES], 2651 int mi_row, int mi_col, 2652 int_mv single_newmv[MAX_REF_FRAMES], 2653 int64_t *psse, 2654 const int64_t ref_best_rd) { 2655 VP9_COMMON *cm = &cpi->common; 2656 MACROBLOCKD *xd = &x->e_mbd; 2657 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; 2658 const int is_comp_pred = has_second_ref(mbmi); 2659 const int num_refs = is_comp_pred ? 2 : 1; 2660 const int this_mode = mbmi->mode; 2661 int_mv *frame_mv = mode_mv[this_mode]; 2662 int i; 2663 int refs[2] = { mbmi->ref_frame[0], 2664 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; 2665 int_mv cur_mv[2]; 2666 int64_t this_rd = 0; 2667 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64); 2668 int pred_exists = 0; 2669 int intpel_mv; 2670 int64_t rd, best_rd = INT64_MAX; 2671 int best_needs_copy = 0; 2672 uint8_t *orig_dst[MAX_MB_PLANE]; 2673 int orig_dst_stride[MAX_MB_PLANE]; 2674 int rs = 0; 2675 2676 if (is_comp_pred) { 2677 if (frame_mv[refs[0]].as_int == INVALID_MV || 2678 frame_mv[refs[1]].as_int == INVALID_MV) 2679 return INT64_MAX; 2680 } 2681 2682 if (this_mode == NEWMV) { 2683 int rate_mv; 2684 if (is_comp_pred) { 2685 // Initialize mv using single prediction mode result. 2686 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; 2687 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; 2688 2689 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { 2690 joint_motion_search(cpi, x, bsize, frame_mv, 2691 mi_row, mi_col, single_newmv, &rate_mv); 2692 } else { 2693 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv, 2694 &mbmi->ref_mvs[refs[0]][0].as_mv, 2695 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); 2696 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv, 2697 &mbmi->ref_mvs[refs[1]][0].as_mv, 2698 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); 2699 } 2700 *rate2 += rate_mv; 2701 } else { 2702 int_mv tmp_mv; 2703 single_motion_search(cpi, x, tile, bsize, mi_row, mi_col, 2704 &tmp_mv, &rate_mv); 2705 *rate2 += rate_mv; 2706 frame_mv[refs[0]].as_int = 2707 xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int; 2708 single_newmv[refs[0]].as_int = tmp_mv.as_int; 2709 } 2710 } 2711 2712 // if we're near/nearest and mv == 0,0, compare to zeromv 2713 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) && 2714 frame_mv[refs[0]].as_int == 0 && 2715 !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) && 2716 (num_refs == 1 || frame_mv[refs[1]].as_int == 0)) { 2717 int rfc = mbmi->mode_context[mbmi->ref_frame[0]]; 2718 int c1 = cost_mv_ref(cpi, NEARMV, rfc); 2719 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); 2720 int c3 = cost_mv_ref(cpi, ZEROMV, rfc); 2721 2722 if (this_mode == NEARMV) { 2723 if (c1 > c3) 2724 return INT64_MAX; 2725 } else if (this_mode == NEARESTMV) { 2726 if (c2 > c3) 2727 return INT64_MAX; 2728 } else { 2729 assert(this_mode == ZEROMV); 2730 if (num_refs == 1) { 2731 if ((c3 >= c2 && 2732 mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) || 2733 (c3 >= c1 && 2734 mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0)) 2735 return INT64_MAX; 2736 } else { 2737 if ((c3 >= c2 && 2738 mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 && 2739 mode_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) || 2740 (c3 >= c1 && 2741 mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 && 2742 mode_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0)) 2743 return INT64_MAX; 2744 } 2745 } 2746 } 2747 2748 for (i = 0; i < num_refs; ++i) { 2749 cur_mv[i] = frame_mv[refs[i]]; 2750 // Clip "next_nearest" so that it does not extend to far out of image 2751 if (this_mode != NEWMV) 2752 clamp_mv2(&cur_mv[i].as_mv, xd); 2753 2754 if (mv_check_bounds(x, &cur_mv[i])) 2755 return INT64_MAX; 2756 mbmi->mv[i].as_int = cur_mv[i].as_int; 2757 } 2758 2759 // do first prediction into the destination buffer. Do the next 2760 // prediction into a temporary buffer. Then keep track of which one 2761 // of these currently holds the best predictor, and use the other 2762 // one for future predictions. In the end, copy from tmp_buf to 2763 // dst if necessary. 2764 for (i = 0; i < MAX_MB_PLANE; i++) { 2765 orig_dst[i] = xd->plane[i].dst.buf; 2766 orig_dst_stride[i] = xd->plane[i].dst.stride; 2767 } 2768 2769 /* We don't include the cost of the second reference here, because there 2770 * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other 2771 * words if you present them in that order, the second one is always known 2772 * if the first is known */ 2773 *rate2 += cost_mv_ref(cpi, this_mode, 2774 mbmi->mode_context[mbmi->ref_frame[0]]); 2775 2776 if (!(*mode_excluded)) { 2777 if (is_comp_pred) { 2778 *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY); 2779 } else { 2780 *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY); 2781 } 2782 } 2783 2784 pred_exists = 0; 2785 // Are all MVs integer pel for Y and UV 2786 intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 && 2787 (mbmi->mv[0].as_mv.col & 15) == 0; 2788 if (is_comp_pred) 2789 intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 && 2790 (mbmi->mv[1].as_mv.col & 15) == 0; 2791 // Search for best switchable filter by checking the variance of 2792 // pred error irrespective of whether the filter will be used 2793 if (cm->mcomp_filter_type != BILINEAR) { 2794 *best_filter = EIGHTTAP; 2795 if (x->source_variance < 2796 cpi->sf.disable_filter_search_var_thresh) { 2797 *best_filter = EIGHTTAP; 2798 vp9_zero(cpi->rd_filter_cache); 2799 } else { 2800 int i, newbest; 2801 int tmp_rate_sum = 0; 2802 int64_t tmp_dist_sum = 0; 2803 2804 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX; 2805 for (i = 0; i < SWITCHABLE_FILTERS; ++i) { 2806 int j; 2807 int64_t rs_rd; 2808 mbmi->interp_filter = i; 2809 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); 2810 rs = get_switchable_rate(x); 2811 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); 2812 2813 if (i > 0 && intpel_mv) { 2814 cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv, 2815 tmp_rate_sum, tmp_dist_sum); 2816 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = 2817 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], 2818 cpi->rd_filter_cache[i] + rs_rd); 2819 rd = cpi->rd_filter_cache[i]; 2820 if (cm->mcomp_filter_type == SWITCHABLE) 2821 rd += rs_rd; 2822 } else { 2823 int rate_sum = 0; 2824 int64_t dist_sum = 0; 2825 if ((cm->mcomp_filter_type == SWITCHABLE && 2826 (!i || best_needs_copy)) || 2827 (cm->mcomp_filter_type != SWITCHABLE && 2828 (cm->mcomp_filter_type == mbmi->interp_filter || 2829 (i == 0 && intpel_mv)))) { 2830 for (j = 0; j < MAX_MB_PLANE; j++) { 2831 xd->plane[j].dst.buf = orig_dst[j]; 2832 xd->plane[j].dst.stride = orig_dst_stride[j]; 2833 } 2834 } else { 2835 for (j = 0; j < MAX_MB_PLANE; j++) { 2836 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64; 2837 xd->plane[j].dst.stride = 64; 2838 } 2839 } 2840 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); 2841 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum); 2842 cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv, 2843 rate_sum, dist_sum); 2844 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = 2845 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], 2846 cpi->rd_filter_cache[i] + rs_rd); 2847 rd = cpi->rd_filter_cache[i]; 2848 if (cm->mcomp_filter_type == SWITCHABLE) 2849 rd += rs_rd; 2850 if (i == 0 && intpel_mv) { 2851 tmp_rate_sum = rate_sum; 2852 tmp_dist_sum = dist_sum; 2853 } 2854 } 2855 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { 2856 if (rd / 2 > ref_best_rd) { 2857 for (i = 0; i < MAX_MB_PLANE; i++) { 2858 xd->plane[i].dst.buf = orig_dst[i]; 2859 xd->plane[i].dst.stride = orig_dst_stride[i]; 2860 } 2861 return INT64_MAX; 2862 } 2863 } 2864 newbest = i == 0 || rd < best_rd; 2865 2866 if (newbest) { 2867 best_rd = rd; 2868 *best_filter = mbmi->interp_filter; 2869 if (cm->mcomp_filter_type == SWITCHABLE && i && !intpel_mv) 2870 best_needs_copy = !best_needs_copy; 2871 } 2872 2873 if ((cm->mcomp_filter_type == SWITCHABLE && newbest) || 2874 (cm->mcomp_filter_type != SWITCHABLE && 2875 cm->mcomp_filter_type == mbmi->interp_filter)) { 2876 pred_exists = 1; 2877 } 2878 } 2879 2880 for (i = 0; i < MAX_MB_PLANE; i++) { 2881 xd->plane[i].dst.buf = orig_dst[i]; 2882 xd->plane[i].dst.stride = orig_dst_stride[i]; 2883 } 2884 } 2885 } 2886 // Set the appropriate filter 2887 mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ? 2888 cm->mcomp_filter_type : *best_filter; 2889 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); 2890 rs = cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(x) : 0; 2891 2892 if (pred_exists) { 2893 if (best_needs_copy) { 2894 // again temporarily set the buffers to local memory to prevent a memcpy 2895 for (i = 0; i < MAX_MB_PLANE; i++) { 2896 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64; 2897 xd->plane[i].dst.stride = 64; 2898 } 2899 } 2900 } else { 2901 // Handles the special case when a filter that is not in the 2902 // switchable list (ex. bilinear, 6-tap) is indicated at the frame level 2903 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); 2904 } 2905 2906 2907 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { 2908 int tmp_rate; 2909 int64_t tmp_dist; 2910 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist); 2911 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist); 2912 // if current pred_error modeled rd is substantially more than the best 2913 // so far, do not bother doing full rd 2914 if (rd / 2 > ref_best_rd) { 2915 for (i = 0; i < MAX_MB_PLANE; i++) { 2916 xd->plane[i].dst.buf = orig_dst[i]; 2917 xd->plane[i].dst.stride = orig_dst_stride[i]; 2918 } 2919 return INT64_MAX; 2920 } 2921 } 2922 2923 if (cpi->common.mcomp_filter_type == SWITCHABLE) 2924 *rate2 += get_switchable_rate(x); 2925 2926 if (!is_comp_pred && cpi->enable_encode_breakout) { 2927 if (cpi->active_map_enabled && x->active_ptr[0] == 0) 2928 x->skip = 1; 2929 else if (x->encode_breakout) { 2930 const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]); 2931 const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]); 2932 unsigned int var, sse; 2933 // Skipping threshold for ac. 2934 unsigned int thresh_ac; 2935 // The encode_breakout input 2936 unsigned int encode_breakout = x->encode_breakout << 4; 2937 unsigned int max_thresh = 36000; 2938 2939 // Use extreme low threshold for static frames to limit skipping. 2940 if (cpi->enable_encode_breakout == 2) 2941 max_thresh = 128; 2942 2943 // Calculate threshold according to dequant value. 2944 thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9; 2945 2946 // Use encode_breakout input if it is bigger than internal threshold. 2947 if (thresh_ac < encode_breakout) 2948 thresh_ac = encode_breakout; 2949 2950 // Set a maximum for threshold to avoid big PSNR loss in low bitrate case. 2951 if (thresh_ac > max_thresh) 2952 thresh_ac = max_thresh; 2953 2954 var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride, 2955 xd->plane[0].dst.buf, 2956 xd->plane[0].dst.stride, &sse); 2957 2958 // Adjust threshold according to partition size. 2959 thresh_ac >>= 8 - (b_width_log2_lookup[bsize] + 2960 b_height_log2_lookup[bsize]); 2961 2962 // Y skipping condition checking 2963 if (sse < thresh_ac || sse == 0) { 2964 // Skipping threshold for dc 2965 unsigned int thresh_dc; 2966 2967 thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6); 2968 2969 // dc skipping checking 2970 if ((sse - var) < thresh_dc || sse == var) { 2971 unsigned int sse_u, sse_v; 2972 unsigned int var_u, var_v; 2973 2974 var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf, 2975 x->plane[1].src.stride, 2976 xd->plane[1].dst.buf, 2977 xd->plane[1].dst.stride, &sse_u); 2978 2979 // U skipping condition checking 2980 if ((sse_u * 4 < thresh_ac || sse_u == 0) && 2981 (sse_u - var_u < thresh_dc || sse_u == var_u)) { 2982 var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf, 2983 x->plane[2].src.stride, 2984 xd->plane[2].dst.buf, 2985 xd->plane[2].dst.stride, &sse_v); 2986 2987 // V skipping condition checking 2988 if ((sse_v * 4 < thresh_ac || sse_v == 0) && 2989 (sse_v - var_v < thresh_dc || sse_v == var_v)) { 2990 x->skip = 1; 2991 2992 // The cost of skip bit needs to be added. 2993 *rate2 += vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1); 2994 2995 // Scaling factor for SSE from spatial domain to frequency domain 2996 // is 16. Adjust distortion accordingly. 2997 *distortion_uv = (sse_u + sse_v) << 4; 2998 *distortion = (sse << 4) + *distortion_uv; 2999 3000 *disable_skip = 1; 3001 this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); 3002 } 3003 } 3004 } 3005 } 3006 } 3007 } 3008 3009 if (!x->skip) { 3010 int skippable_y, skippable_uv; 3011 int64_t sseuv = INT64_MAX; 3012 int64_t rdcosty = INT64_MAX; 3013 3014 // Y cost and distortion 3015 super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse, 3016 bsize, txfm_cache, ref_best_rd); 3017 3018 if (*rate_y == INT_MAX) { 3019 *rate2 = INT_MAX; 3020 *distortion = INT64_MAX; 3021 for (i = 0; i < MAX_MB_PLANE; i++) { 3022 xd->plane[i].dst.buf = orig_dst[i]; 3023 xd->plane[i].dst.stride = orig_dst_stride[i]; 3024 } 3025 return INT64_MAX; 3026 } 3027 3028 *rate2 += *rate_y; 3029 *distortion += *distortion_y; 3030 3031 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); 3032 rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse)); 3033 3034 super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv, 3035 bsize, ref_best_rd - rdcosty); 3036 if (*rate_uv == INT_MAX) { 3037 *rate2 = INT_MAX; 3038 *distortion = INT64_MAX; 3039 for (i = 0; i < MAX_MB_PLANE; i++) { 3040 xd->plane[i].dst.buf = orig_dst[i]; 3041 xd->plane[i].dst.stride = orig_dst_stride[i]; 3042 } 3043 return INT64_MAX; 3044 } 3045 3046 *psse += sseuv; 3047 *rate2 += *rate_uv; 3048 *distortion += *distortion_uv; 3049 *skippable = skippable_y && skippable_uv; 3050 } 3051 3052 for (i = 0; i < MAX_MB_PLANE; i++) { 3053 xd->plane[i].dst.buf = orig_dst[i]; 3054 xd->plane[i].dst.stride = orig_dst_stride[i]; 3055 } 3056 3057 return this_rd; // if 0, this will be re-calculated by caller 3058 } 3059 3060 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, 3061 int max_plane) { 3062 struct macroblock_plane *const p = x->plane; 3063 struct macroblockd_plane *const pd = x->e_mbd.plane; 3064 int i; 3065 3066 for (i = 0; i < max_plane; ++i) { 3067 p[i].coeff = ctx->coeff_pbuf[i][1]; 3068 pd[i].qcoeff = ctx->qcoeff_pbuf[i][1]; 3069 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; 3070 pd[i].eobs = ctx->eobs_pbuf[i][1]; 3071 3072 ctx->coeff_pbuf[i][1] = ctx->coeff_pbuf[i][0]; 3073 ctx->qcoeff_pbuf[i][1] = ctx->qcoeff_pbuf[i][0]; 3074 ctx->dqcoeff_pbuf[i][1] = ctx->dqcoeff_pbuf[i][0]; 3075 ctx->eobs_pbuf[i][1] = ctx->eobs_pbuf[i][0]; 3076 3077 ctx->coeff_pbuf[i][0] = p[i].coeff; 3078 ctx->qcoeff_pbuf[i][0] = pd[i].qcoeff; 3079 ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff; 3080 ctx->eobs_pbuf[i][0] = pd[i].eobs; 3081 } 3082 } 3083 3084 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, 3085 int *returnrate, int64_t *returndist, 3086 BLOCK_SIZE bsize, 3087 PICK_MODE_CONTEXT *ctx, int64_t best_rd) { 3088 VP9_COMMON *const cm = &cpi->common; 3089 MACROBLOCKD *const xd = &x->e_mbd; 3090 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0; 3091 int y_skip = 0, uv_skip = 0; 3092 int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 }; 3093 x->skip_encode = 0; 3094 ctx->skip = 0; 3095 xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME; 3096 if (bsize >= BLOCK_8X8) { 3097 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, 3098 &dist_y, &y_skip, bsize, tx_cache, 3099 best_rd) >= best_rd) { 3100 *returnrate = INT_MAX; 3101 return; 3102 } 3103 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, 3104 &dist_uv, &uv_skip, bsize); 3105 } else { 3106 y_skip = 0; 3107 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly, 3108 &dist_y, best_rd) >= best_rd) { 3109 *returnrate = INT_MAX; 3110 return; 3111 } 3112 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, 3113 &dist_uv, &uv_skip, BLOCK_8X8); 3114 } 3115 3116 if (y_skip && uv_skip) { 3117 *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + 3118 vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1); 3119 *returndist = dist_y + dist_uv; 3120 vp9_zero(ctx->tx_rd_diff); 3121 } else { 3122 int i; 3123 *returnrate = rate_y + rate_uv + 3124 vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 0); 3125 *returndist = dist_y + dist_uv; 3126 if (cpi->sf.tx_size_search_method == USE_FULL_RD) 3127 for (i = 0; i < TX_MODES; i++) { 3128 if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX) 3129 ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode]; 3130 else 3131 ctx->tx_rd_diff[i] = 0; 3132 } 3133 } 3134 3135 ctx->mic = *xd->mi_8x8[0]; 3136 } 3137 3138 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, 3139 const TileInfo *const tile, 3140 int mi_row, int mi_col, 3141 int *returnrate, 3142 int64_t *returndistortion, 3143 BLOCK_SIZE bsize, 3144 PICK_MODE_CONTEXT *ctx, 3145 int64_t best_rd_so_far) { 3146 VP9_COMMON *cm = &cpi->common; 3147 MACROBLOCKD *xd = &x->e_mbd; 3148 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; 3149 const struct segmentation *seg = &cm->seg; 3150 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); 3151 MB_PREDICTION_MODE this_mode; 3152 MV_REFERENCE_FRAME ref_frame, second_ref_frame; 3153 unsigned char segment_id = mbmi->segment_id; 3154 int comp_pred, i; 3155 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; 3156 struct buf_2d yv12_mb[4][MAX_MB_PLANE]; 3157 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } }; 3158 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, 3159 VP9_ALT_FLAG }; 3160 int idx_list[4] = {0, 3161 cpi->lst_fb_idx, 3162 cpi->gld_fb_idx, 3163 cpi->alt_fb_idx}; 3164 int64_t best_rd = best_rd_so_far; 3165 int64_t best_tx_rd[TX_MODES]; 3166 int64_t best_tx_diff[TX_MODES]; 3167 int64_t best_pred_diff[NB_PREDICTION_TYPES]; 3168 int64_t best_pred_rd[NB_PREDICTION_TYPES]; 3169 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; 3170 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; 3171 MB_MODE_INFO best_mbmode = { 0 }; 3172 int j; 3173 int mode_index, best_mode_index = 0; 3174 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; 3175 vp9_prob comp_mode_p; 3176 int64_t best_intra_rd = INT64_MAX; 3177 int64_t best_inter_rd = INT64_MAX; 3178 MB_PREDICTION_MODE best_intra_mode = DC_PRED; 3179 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME; 3180 INTERPOLATION_TYPE tmp_best_filter = SWITCHABLE; 3181 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES]; 3182 int64_t dist_uv[TX_SIZES]; 3183 int skip_uv[TX_SIZES]; 3184 MB_PREDICTION_MODE mode_uv[TX_SIZES]; 3185 struct scale_factors scale_factor[4]; 3186 unsigned int ref_frame_mask = 0; 3187 unsigned int mode_mask = 0; 3188 int64_t mode_distortions[MB_MODE_COUNT] = {-1}; 3189 int64_t frame_distortions[MAX_REF_FRAMES] = {-1}; 3190 int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q); 3191 const int bws = num_8x8_blocks_wide_lookup[bsize] / 2; 3192 const int bhs = num_8x8_blocks_high_lookup[bsize] / 2; 3193 int best_skip2 = 0; 3194 3195 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; 3196 3197 // Everywhere the flag is set the error is much higher than its neighbors. 3198 ctx->frames_with_high_error = 0; 3199 ctx->modes_with_high_error = 0; 3200 3201 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp, 3202 &comp_mode_p); 3203 3204 for (i = 0; i < NB_PREDICTION_TYPES; ++i) 3205 best_pred_rd[i] = INT64_MAX; 3206 for (i = 0; i < TX_MODES; i++) 3207 best_tx_rd[i] = INT64_MAX; 3208 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 3209 best_filter_rd[i] = INT64_MAX; 3210 for (i = 0; i < TX_SIZES; i++) 3211 rate_uv_intra[i] = INT_MAX; 3212 3213 *returnrate = INT_MAX; 3214 3215 // Create a mask set to 1 for each reference frame used by a smaller 3216 // resolution. 3217 if (cpi->sf.use_avoid_tested_higherror) { 3218 switch (block_size) { 3219 case BLOCK_64X64: 3220 for (i = 0; i < 4; i++) { 3221 for (j = 0; j < 4; j++) { 3222 ref_frame_mask |= x->mb_context[i][j].frames_with_high_error; 3223 mode_mask |= x->mb_context[i][j].modes_with_high_error; 3224 } 3225 } 3226 for (i = 0; i < 4; i++) { 3227 ref_frame_mask |= x->sb32_context[i].frames_with_high_error; 3228 mode_mask |= x->sb32_context[i].modes_with_high_error; 3229 } 3230 break; 3231 case BLOCK_32X32: 3232 for (i = 0; i < 4; i++) { 3233 ref_frame_mask |= 3234 x->mb_context[x->sb_index][i].frames_with_high_error; 3235 mode_mask |= x->mb_context[x->sb_index][i].modes_with_high_error; 3236 } 3237 break; 3238 default: 3239 // Until we handle all block sizes set it to present; 3240 ref_frame_mask = 0; 3241 mode_mask = 0; 3242 break; 3243 } 3244 ref_frame_mask = ~ref_frame_mask; 3245 mode_mask = ~mode_mask; 3246 } 3247 3248 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { 3249 if (cpi->ref_frame_flags & flag_list[ref_frame]) { 3250 setup_buffer_inter(cpi, x, tile, idx_list[ref_frame], ref_frame, 3251 block_size, mi_row, mi_col, 3252 frame_mv[NEARESTMV], frame_mv[NEARMV], 3253 yv12_mb, scale_factor); 3254 } 3255 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; 3256 frame_mv[ZEROMV][ref_frame].as_int = 0; 3257 } 3258 3259 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { 3260 int mode_excluded = 0; 3261 int64_t this_rd = INT64_MAX; 3262 int disable_skip = 0; 3263 int compmode_cost = 0; 3264 int rate2 = 0, rate_y = 0, rate_uv = 0; 3265 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; 3266 int skippable = 0; 3267 int64_t tx_cache[TX_MODES]; 3268 int i; 3269 int this_skip2 = 0; 3270 int64_t total_sse = INT_MAX; 3271 int early_term = 0; 3272 3273 for (i = 0; i < TX_MODES; ++i) 3274 tx_cache[i] = INT64_MAX; 3275 3276 x->skip = 0; 3277 this_mode = vp9_mode_order[mode_index].mode; 3278 ref_frame = vp9_mode_order[mode_index].ref_frame; 3279 second_ref_frame = vp9_mode_order[mode_index].second_ref_frame; 3280 3281 // Look at the reference frame of the best mode so far and set the 3282 // skip mask to look at a subset of the remaining modes. 3283 if (mode_index > cpi->sf.mode_skip_start) { 3284 if (mode_index == (cpi->sf.mode_skip_start + 1)) { 3285 switch (vp9_mode_order[best_mode_index].ref_frame) { 3286 case INTRA_FRAME: 3287 cpi->mode_skip_mask = 0; 3288 break; 3289 case LAST_FRAME: 3290 cpi->mode_skip_mask = LAST_FRAME_MODE_MASK; 3291 break; 3292 case GOLDEN_FRAME: 3293 cpi->mode_skip_mask = GOLDEN_FRAME_MODE_MASK; 3294 break; 3295 case ALTREF_FRAME: 3296 cpi->mode_skip_mask = ALT_REF_MODE_MASK; 3297 break; 3298 case NONE: 3299 case MAX_REF_FRAMES: 3300 assert(!"Invalid Reference frame"); 3301 } 3302 } 3303 if (cpi->mode_skip_mask & ((int64_t)1 << mode_index)) 3304 continue; 3305 } 3306 3307 // Skip if the current reference frame has been masked off 3308 if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask && 3309 (cpi->ref_frame_mask & (1 << ref_frame))) 3310 continue; 3311 3312 // Test best rd so far against threshold for trying this mode. 3313 if ((best_rd < ((int64_t)cpi->rd_threshes[segment_id][bsize][mode_index] * 3314 cpi->rd_thresh_freq_fact[bsize][mode_index] >> 5)) || 3315 cpi->rd_threshes[segment_id][bsize][mode_index] == INT_MAX) 3316 continue; 3317 3318 // Do not allow compound prediction if the segment level reference 3319 // frame feature is in use as in this case there can only be one reference. 3320 if ((second_ref_frame > INTRA_FRAME) && 3321 vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) 3322 continue; 3323 3324 // Skip some checking based on small partitions' result. 3325 if (x->fast_ms > 1 && !ref_frame) 3326 continue; 3327 if (x->fast_ms > 2 && ref_frame != x->subblock_ref) 3328 continue; 3329 3330 if (cpi->sf.use_avoid_tested_higherror && bsize >= BLOCK_8X8) { 3331 if (!(ref_frame_mask & (1 << ref_frame))) { 3332 continue; 3333 } 3334 if (!(mode_mask & (1 << this_mode))) { 3335 continue; 3336 } 3337 if (second_ref_frame != NONE 3338 && !(ref_frame_mask & (1 << second_ref_frame))) { 3339 continue; 3340 } 3341 } 3342 3343 mbmi->ref_frame[0] = ref_frame; 3344 mbmi->ref_frame[1] = second_ref_frame; 3345 3346 if (!(ref_frame == INTRA_FRAME 3347 || (cpi->ref_frame_flags & flag_list[ref_frame]))) { 3348 continue; 3349 } 3350 if (!(second_ref_frame == NONE 3351 || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) { 3352 continue; 3353 } 3354 3355 comp_pred = second_ref_frame > INTRA_FRAME; 3356 if (comp_pred) { 3357 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) 3358 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) 3359 continue; 3360 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) 3361 if (ref_frame != best_inter_ref_frame && 3362 second_ref_frame != best_inter_ref_frame) 3363 continue; 3364 } 3365 3366 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor); 3367 mbmi->uv_mode = DC_PRED; 3368 3369 // Evaluate all sub-pel filters irrespective of whether we can use 3370 // them for this frame. 3371 mbmi->interp_filter = cm->mcomp_filter_type; 3372 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); 3373 3374 if (comp_pred) { 3375 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) 3376 continue; 3377 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor); 3378 3379 mode_excluded = mode_excluded 3380 ? mode_excluded 3381 : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY; 3382 } else { 3383 if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) { 3384 mode_excluded = 3385 mode_excluded ? 3386 mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY; 3387 } 3388 } 3389 3390 // Select prediction reference frames. 3391 for (i = 0; i < MAX_MB_PLANE; i++) { 3392 xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; 3393 if (comp_pred) 3394 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; 3395 } 3396 3397 // If the segment reference frame feature is enabled.... 3398 // then do nothing if the current ref frame is not allowed.. 3399 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && 3400 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != 3401 (int)ref_frame) { 3402 continue; 3403 // If the segment skip feature is enabled.... 3404 // then do nothing if the current mode is not allowed.. 3405 } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) && 3406 (this_mode != ZEROMV && ref_frame != INTRA_FRAME)) { 3407 continue; 3408 // Disable this drop out case if the ref frame 3409 // segment level feature is enabled for this segment. This is to 3410 // prevent the possibility that we end up unable to pick any mode. 3411 } else if (!vp9_segfeature_active(seg, segment_id, 3412 SEG_LVL_REF_FRAME)) { 3413 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, 3414 // unless ARNR filtering is enabled in which case we want 3415 // an unfiltered alternative. We allow near/nearest as well 3416 // because they may result in zero-zero MVs but be cheaper. 3417 if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { 3418 if ((this_mode != ZEROMV && 3419 !(this_mode == NEARMV && 3420 frame_mv[NEARMV][ALTREF_FRAME].as_int == 0) && 3421 !(this_mode == NEARESTMV && 3422 frame_mv[NEARESTMV][ALTREF_FRAME].as_int == 0)) || 3423 ref_frame != ALTREF_FRAME) { 3424 continue; 3425 } 3426 } 3427 } 3428 // TODO(JBB): This is to make up for the fact that we don't have sad 3429 // functions that work when the block size reads outside the umv. We 3430 // should fix this either by making the motion search just work on 3431 // a representative block in the boundary ( first ) and then implement a 3432 // function that does sads when inside the border.. 3433 if (((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) && 3434 this_mode == NEWMV) { 3435 continue; 3436 } 3437 3438 #ifdef MODE_TEST_HIT_STATS 3439 // TEST/DEBUG CODE 3440 // Keep a rcord of the number of test hits at each size 3441 cpi->mode_test_hits[bsize]++; 3442 #endif 3443 3444 3445 if (ref_frame == INTRA_FRAME) { 3446 TX_SIZE uv_tx; 3447 // Disable intra modes other than DC_PRED for blocks with low variance 3448 // Threshold for intra skipping based on source variance 3449 // TODO(debargha): Specialize the threshold for super block sizes 3450 static const unsigned int skip_intra_var_thresh[BLOCK_SIZES] = { 3451 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 3452 }; 3453 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) && 3454 this_mode != DC_PRED && 3455 x->source_variance < skip_intra_var_thresh[mbmi->sb_type]) 3456 continue; 3457 // Only search the oblique modes if the best so far is 3458 // one of the neighboring directional modes 3459 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && 3460 (this_mode >= D45_PRED && this_mode <= TM_PRED)) { 3461 if (vp9_mode_order[best_mode_index].ref_frame > INTRA_FRAME) 3462 continue; 3463 } 3464 mbmi->mode = this_mode; 3465 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { 3466 if (conditional_skipintra(mbmi->mode, best_intra_mode)) 3467 continue; 3468 } 3469 3470 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, 3471 bsize, tx_cache, best_rd); 3472 3473 if (rate_y == INT_MAX) 3474 continue; 3475 3476 uv_tx = MIN(mbmi->tx_size, max_uv_txsize_lookup[bsize]); 3477 if (rate_uv_intra[uv_tx] == INT_MAX) { 3478 choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[uv_tx], 3479 &rate_uv_tokenonly[uv_tx], 3480 &dist_uv[uv_tx], &skip_uv[uv_tx], 3481 &mode_uv[uv_tx]); 3482 } 3483 3484 rate_uv = rate_uv_tokenonly[uv_tx]; 3485 distortion_uv = dist_uv[uv_tx]; 3486 skippable = skippable && skip_uv[uv_tx]; 3487 mbmi->uv_mode = mode_uv[uv_tx]; 3488 3489 rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx]; 3490 if (this_mode != DC_PRED && this_mode != TM_PRED) 3491 rate2 += intra_cost_penalty; 3492 distortion2 = distortion_y + distortion_uv; 3493 } else { 3494 mbmi->mode = this_mode; 3495 compmode_cost = vp9_cost_bit(comp_mode_p, second_ref_frame > INTRA_FRAME); 3496 this_rd = handle_inter_mode(cpi, x, tile, bsize, 3497 tx_cache, 3498 &rate2, &distortion2, &skippable, 3499 &rate_y, &distortion_y, 3500 &rate_uv, &distortion_uv, 3501 &mode_excluded, &disable_skip, 3502 &tmp_best_filter, frame_mv, 3503 mi_row, mi_col, 3504 single_newmv, &total_sse, best_rd); 3505 if (this_rd == INT64_MAX) 3506 continue; 3507 } 3508 3509 if (cm->comp_pred_mode == HYBRID_PREDICTION) { 3510 rate2 += compmode_cost; 3511 } 3512 3513 // Estimate the reference frame signaling cost and add it 3514 // to the rolling cost variable. 3515 if (second_ref_frame > INTRA_FRAME) { 3516 rate2 += ref_costs_comp[ref_frame]; 3517 } else { 3518 rate2 += ref_costs_single[ref_frame]; 3519 } 3520 3521 if (!disable_skip) { 3522 // Test for the condition where skip block will be activated 3523 // because there are no non zero coefficients and make any 3524 // necessary adjustment for rate. Ignore if skip is coded at 3525 // segment level as the cost wont have been added in. 3526 // Is Mb level skip allowed (i.e. not coded at segment level). 3527 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id, 3528 SEG_LVL_SKIP); 3529 3530 if (skippable) { 3531 // Back out the coefficient coding costs 3532 rate2 -= (rate_y + rate_uv); 3533 // for best yrd calculation 3534 rate_uv = 0; 3535 3536 if (mb_skip_allowed) { 3537 int prob_skip_cost; 3538 3539 // Cost the skip mb case 3540 vp9_prob skip_prob = 3541 vp9_get_pred_prob_mbskip(cm, xd); 3542 3543 if (skip_prob) { 3544 prob_skip_cost = vp9_cost_bit(skip_prob, 1); 3545 rate2 += prob_skip_cost; 3546 } 3547 } 3548 } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) { 3549 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < 3550 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { 3551 // Add in the cost of the no skip flag. 3552 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 3553 0); 3554 rate2 += prob_skip_cost; 3555 } else { 3556 // FIXME(rbultje) make this work for splitmv also 3557 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 3558 1); 3559 rate2 += prob_skip_cost; 3560 distortion2 = total_sse; 3561 assert(total_sse >= 0); 3562 rate2 -= (rate_y + rate_uv); 3563 rate_y = 0; 3564 rate_uv = 0; 3565 this_skip2 = 1; 3566 } 3567 } else if (mb_skip_allowed) { 3568 // Add in the cost of the no skip flag. 3569 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 3570 0); 3571 rate2 += prob_skip_cost; 3572 } 3573 3574 // Calculate the final RD estimate for this mode. 3575 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 3576 } 3577 3578 // Keep record of best intra rd 3579 if (!is_inter_block(&xd->mi_8x8[0]->mbmi) && 3580 this_rd < best_intra_rd) { 3581 best_intra_rd = this_rd; 3582 best_intra_mode = xd->mi_8x8[0]->mbmi.mode; 3583 } 3584 3585 // Keep record of best inter rd with single reference 3586 if (is_inter_block(&xd->mi_8x8[0]->mbmi) && 3587 !has_second_ref(&xd->mi_8x8[0]->mbmi) && 3588 !mode_excluded && this_rd < best_inter_rd) { 3589 best_inter_rd = this_rd; 3590 best_inter_ref_frame = ref_frame; 3591 } 3592 3593 if (!disable_skip && ref_frame == INTRA_FRAME) { 3594 for (i = 0; i < NB_PREDICTION_TYPES; ++i) 3595 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); 3596 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 3597 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd); 3598 } 3599 3600 // Store the respective mode distortions for later use. 3601 if (mode_distortions[this_mode] == -1 3602 || distortion2 < mode_distortions[this_mode]) { 3603 mode_distortions[this_mode] = distortion2; 3604 } 3605 if (frame_distortions[ref_frame] == -1 3606 || distortion2 < frame_distortions[ref_frame]) { 3607 frame_distortions[ref_frame] = distortion2; 3608 } 3609 3610 // Did this mode help.. i.e. is it the new best mode 3611 if (this_rd < best_rd || x->skip) { 3612 int max_plane = MAX_MB_PLANE; 3613 if (!mode_excluded) { 3614 // Note index of best mode so far 3615 best_mode_index = mode_index; 3616 3617 if (ref_frame == INTRA_FRAME) { 3618 /* required for left and above block mv */ 3619 mbmi->mv[0].as_int = 0; 3620 max_plane = 1; 3621 } 3622 3623 *returnrate = rate2; 3624 *returndistortion = distortion2; 3625 best_rd = this_rd; 3626 best_mbmode = *mbmi; 3627 best_skip2 = this_skip2; 3628 if (!x->select_txfm_size) 3629 swap_block_ptr(x, ctx, max_plane); 3630 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], 3631 sizeof(uint8_t) * ctx->num_4x4_blk); 3632 3633 // TODO(debargha): enhance this test with a better distortion prediction 3634 // based on qp, activity mask and history 3635 if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) && 3636 (mode_index > MIN_EARLY_TERM_INDEX)) { 3637 const int qstep = xd->plane[0].dequant[1]; 3638 // TODO(debargha): Enhance this by specializing for each mode_index 3639 int scale = 4; 3640 if (x->source_variance < UINT_MAX) { 3641 const int var_adjust = (x->source_variance < 16); 3642 scale -= var_adjust; 3643 } 3644 if (ref_frame > INTRA_FRAME && 3645 distortion2 * scale < qstep * qstep) { 3646 early_term = 1; 3647 } 3648 } 3649 } 3650 } 3651 3652 /* keep record of best compound/single-only prediction */ 3653 if (!disable_skip && ref_frame != INTRA_FRAME) { 3654 int single_rd, hybrid_rd, single_rate, hybrid_rate; 3655 3656 if (cm->comp_pred_mode == HYBRID_PREDICTION) { 3657 single_rate = rate2 - compmode_cost; 3658 hybrid_rate = rate2; 3659 } else { 3660 single_rate = rate2; 3661 hybrid_rate = rate2 + compmode_cost; 3662 } 3663 3664 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); 3665 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); 3666 3667 if (second_ref_frame <= INTRA_FRAME && 3668 single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) { 3669 best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd; 3670 } else if (second_ref_frame > INTRA_FRAME && 3671 single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) { 3672 best_pred_rd[COMP_PREDICTION_ONLY] = single_rd; 3673 } 3674 if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION]) 3675 best_pred_rd[HYBRID_PREDICTION] = hybrid_rd; 3676 } 3677 3678 /* keep record of best filter type */ 3679 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME && 3680 cm->mcomp_filter_type != BILINEAR) { 3681 int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ? 3682 SWITCHABLE_FILTERS : cm->mcomp_filter_type]; 3683 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { 3684 int64_t adj_rd; 3685 // In cases of poor prediction, filter_cache[] can contain really big 3686 // values, which actually are bigger than this_rd itself. This can 3687 // cause negative best_filter_rd[] values, which is obviously silly. 3688 // Therefore, if filter_cache < ref, we do an adjusted calculation. 3689 if (cpi->rd_filter_cache[i] >= ref) { 3690 adj_rd = this_rd + cpi->rd_filter_cache[i] - ref; 3691 } else { 3692 // FIXME(rbultje) do this for comppsred also 3693 // 3694 // To prevent out-of-range computation in 3695 // adj_rd = cpi->rd_filter_cache[i] * this_rd / ref 3696 // cpi->rd_filter_cache[i] / ref is converted to a 256 based ratio. 3697 int tmp = cpi->rd_filter_cache[i] * 256 / ref; 3698 adj_rd = (this_rd * tmp) >> 8; 3699 } 3700 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd); 3701 } 3702 } 3703 3704 /* keep record of best txfm size */ 3705 if (bsize < BLOCK_32X32) { 3706 if (bsize < BLOCK_16X16) 3707 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8]; 3708 3709 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16]; 3710 } 3711 if (!mode_excluded && this_rd != INT64_MAX) { 3712 for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) { 3713 int64_t adj_rd = INT64_MAX; 3714 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode]; 3715 3716 if (adj_rd < best_tx_rd[i]) 3717 best_tx_rd[i] = adj_rd; 3718 } 3719 } 3720 3721 if (early_term) 3722 break; 3723 3724 if (x->skip && !comp_pred) 3725 break; 3726 } 3727 3728 if (best_rd >= best_rd_so_far) 3729 return INT64_MAX; 3730 3731 // If we used an estimate for the uv intra rd in the loop above... 3732 if (cpi->sf.use_uv_intra_rd_estimate) { 3733 // Do Intra UV best rd mode selection if best mode choice above was intra. 3734 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) { 3735 TX_SIZE uv_tx_size = get_uv_tx_size(mbmi); 3736 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size], 3737 &rate_uv_tokenonly[uv_tx_size], 3738 &dist_uv[uv_tx_size], 3739 &skip_uv[uv_tx_size], 3740 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize); 3741 } 3742 } 3743 3744 // If we are using reference masking and the set mask flag is set then 3745 // create the reference frame mask. 3746 if (cpi->sf.reference_masking && cpi->set_ref_frame_mask) 3747 cpi->ref_frame_mask = ~(1 << vp9_mode_order[best_mode_index].ref_frame); 3748 3749 // Flag all modes that have a distortion thats > 2x the best we found at 3750 // this level. 3751 for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) { 3752 if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV) 3753 continue; 3754 3755 if (mode_distortions[mode_index] > 2 * *returndistortion) { 3756 ctx->modes_with_high_error |= (1 << mode_index); 3757 } 3758 } 3759 3760 // Flag all ref frames that have a distortion thats > 2x the best we found at 3761 // this level. 3762 for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { 3763 if (frame_distortions[ref_frame] > 2 * *returndistortion) { 3764 ctx->frames_with_high_error |= (1 << ref_frame); 3765 } 3766 } 3767 3768 assert((cm->mcomp_filter_type == SWITCHABLE) || 3769 (cm->mcomp_filter_type == best_mbmode.interp_filter) || 3770 (best_mbmode.ref_frame[0] == INTRA_FRAME)); 3771 3772 // Updating rd_thresh_freq_fact[] here means that the different 3773 // partition/block sizes are handled independently based on the best 3774 // choice for the current partition. It may well be better to keep a scaled 3775 // best rd so far value and update rd_thresh_freq_fact based on the mode/size 3776 // combination that wins out. 3777 if (cpi->sf.adaptive_rd_thresh) { 3778 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { 3779 if (mode_index == best_mode_index) { 3780 cpi->rd_thresh_freq_fact[bsize][mode_index] -= 3781 (cpi->rd_thresh_freq_fact[bsize][mode_index] >> 3); 3782 } else { 3783 cpi->rd_thresh_freq_fact[bsize][mode_index] += RD_THRESH_INC; 3784 if (cpi->rd_thresh_freq_fact[bsize][mode_index] > 3785 (cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT)) { 3786 cpi->rd_thresh_freq_fact[bsize][mode_index] = 3787 cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT; 3788 } 3789 } 3790 } 3791 } 3792 3793 // macroblock modes 3794 *mbmi = best_mbmode; 3795 x->skip |= best_skip2; 3796 3797 for (i = 0; i < NB_PREDICTION_TYPES; ++i) { 3798 if (best_pred_rd[i] == INT64_MAX) 3799 best_pred_diff[i] = INT_MIN; 3800 else 3801 best_pred_diff[i] = best_rd - best_pred_rd[i]; 3802 } 3803 3804 if (!x->skip) { 3805 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { 3806 if (best_filter_rd[i] == INT64_MAX) 3807 best_filter_diff[i] = 0; 3808 else 3809 best_filter_diff[i] = best_rd - best_filter_rd[i]; 3810 } 3811 if (cm->mcomp_filter_type == SWITCHABLE) 3812 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); 3813 } else { 3814 vp9_zero(best_filter_diff); 3815 } 3816 3817 if (!x->skip) { 3818 for (i = 0; i < TX_MODES; i++) { 3819 if (best_tx_rd[i] == INT64_MAX) 3820 best_tx_diff[i] = 0; 3821 else 3822 best_tx_diff[i] = best_rd - best_tx_rd[i]; 3823 } 3824 } else { 3825 vp9_zero(best_tx_diff); 3826 } 3827 3828 set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1], 3829 scale_factor); 3830 store_coding_context(x, ctx, best_mode_index, 3831 &mbmi->ref_mvs[mbmi->ref_frame[0]][0], 3832 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : 3833 mbmi->ref_frame[1]][0], 3834 best_pred_diff, best_tx_diff, best_filter_diff); 3835 3836 return best_rd; 3837 } 3838 3839 3840 int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, 3841 const TileInfo *const tile, 3842 int mi_row, int mi_col, 3843 int *returnrate, 3844 int64_t *returndistortion, 3845 BLOCK_SIZE bsize, 3846 PICK_MODE_CONTEXT *ctx, 3847 int64_t best_rd_so_far) { 3848 VP9_COMMON *cm = &cpi->common; 3849 MACROBLOCKD *xd = &x->e_mbd; 3850 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; 3851 const struct segmentation *seg = &cm->seg; 3852 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); 3853 MV_REFERENCE_FRAME ref_frame, second_ref_frame; 3854 unsigned char segment_id = mbmi->segment_id; 3855 int comp_pred, i; 3856 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; 3857 struct buf_2d yv12_mb[4][MAX_MB_PLANE]; 3858 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, 3859 VP9_ALT_FLAG }; 3860 int idx_list[4] = {0, 3861 cpi->lst_fb_idx, 3862 cpi->gld_fb_idx, 3863 cpi->alt_fb_idx}; 3864 int64_t best_rd = best_rd_so_far; 3865 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise 3866 int64_t best_tx_rd[TX_MODES]; 3867 int64_t best_tx_diff[TX_MODES]; 3868 int64_t best_pred_diff[NB_PREDICTION_TYPES]; 3869 int64_t best_pred_rd[NB_PREDICTION_TYPES]; 3870 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; 3871 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; 3872 MB_MODE_INFO best_mbmode = { 0 }; 3873 int mode_index, best_mode_index = 0; 3874 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; 3875 vp9_prob comp_mode_p; 3876 int64_t best_inter_rd = INT64_MAX; 3877 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME; 3878 INTERPOLATION_TYPE tmp_best_filter = SWITCHABLE; 3879 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES]; 3880 int64_t dist_uv[TX_SIZES]; 3881 int skip_uv[TX_SIZES]; 3882 MB_PREDICTION_MODE mode_uv[TX_SIZES] = { 0 }; 3883 struct scale_factors scale_factor[4]; 3884 unsigned int ref_frame_mask = 0; 3885 unsigned int mode_mask = 0; 3886 int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex, 3887 cpi->common.y_dc_delta_q); 3888 int_mv seg_mvs[4][MAX_REF_FRAMES]; 3889 b_mode_info best_bmodes[4]; 3890 int best_skip2 = 0; 3891 3892 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; 3893 vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4); 3894 3895 for (i = 0; i < 4; i++) { 3896 int j; 3897 for (j = 0; j < MAX_REF_FRAMES; j++) 3898 seg_mvs[i][j].as_int = INVALID_MV; 3899 } 3900 3901 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp, 3902 &comp_mode_p); 3903 3904 for (i = 0; i < NB_PREDICTION_TYPES; ++i) 3905 best_pred_rd[i] = INT64_MAX; 3906 for (i = 0; i < TX_MODES; i++) 3907 best_tx_rd[i] = INT64_MAX; 3908 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 3909 best_filter_rd[i] = INT64_MAX; 3910 for (i = 0; i < TX_SIZES; i++) 3911 rate_uv_intra[i] = INT_MAX; 3912 3913 *returnrate = INT_MAX; 3914 3915 // Create a mask set to 1 for each reference frame used by a smaller 3916 // resolution. 3917 if (cpi->sf.use_avoid_tested_higherror) { 3918 ref_frame_mask = 0; 3919 mode_mask = 0; 3920 ref_frame_mask = ~ref_frame_mask; 3921 mode_mask = ~mode_mask; 3922 } 3923 3924 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { 3925 if (cpi->ref_frame_flags & flag_list[ref_frame]) { 3926 setup_buffer_inter(cpi, x, tile, idx_list[ref_frame], ref_frame, 3927 block_size, mi_row, mi_col, 3928 frame_mv[NEARESTMV], frame_mv[NEARMV], 3929 yv12_mb, scale_factor); 3930 } 3931 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; 3932 frame_mv[ZEROMV][ref_frame].as_int = 0; 3933 } 3934 3935 for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) { 3936 int mode_excluded = 0; 3937 int64_t this_rd = INT64_MAX; 3938 int disable_skip = 0; 3939 int compmode_cost = 0; 3940 int rate2 = 0, rate_y = 0, rate_uv = 0; 3941 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; 3942 int skippable = 0; 3943 int64_t tx_cache[TX_MODES]; 3944 int i; 3945 int this_skip2 = 0; 3946 int64_t total_sse = INT_MAX; 3947 int early_term = 0; 3948 3949 for (i = 0; i < TX_MODES; ++i) 3950 tx_cache[i] = INT64_MAX; 3951 3952 x->skip = 0; 3953 ref_frame = vp9_ref_order[mode_index].ref_frame; 3954 second_ref_frame = vp9_ref_order[mode_index].second_ref_frame; 3955 3956 // Look at the reference frame of the best mode so far and set the 3957 // skip mask to look at a subset of the remaining modes. 3958 if (mode_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) { 3959 if (mode_index == 3) { 3960 switch (vp9_ref_order[best_mode_index].ref_frame) { 3961 case INTRA_FRAME: 3962 cpi->mode_skip_mask = 0; 3963 break; 3964 case LAST_FRAME: 3965 cpi->mode_skip_mask = 0x0010; 3966 break; 3967 case GOLDEN_FRAME: 3968 cpi->mode_skip_mask = 0x0008; 3969 break; 3970 case ALTREF_FRAME: 3971 cpi->mode_skip_mask = 0x0000; 3972 break; 3973 case NONE: 3974 case MAX_REF_FRAMES: 3975 assert(!"Invalid Reference frame"); 3976 } 3977 } 3978 if (cpi->mode_skip_mask & ((int64_t)1 << mode_index)) 3979 continue; 3980 } 3981 3982 // Skip if the current reference frame has been masked off 3983 if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask && 3984 (cpi->ref_frame_mask & (1 << ref_frame))) 3985 continue; 3986 3987 // Test best rd so far against threshold for trying this mode. 3988 if ((best_rd < 3989 ((int64_t)cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] * 3990 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 5)) || 3991 cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX) 3992 continue; 3993 3994 // Do not allow compound prediction if the segment level reference 3995 // frame feature is in use as in this case there can only be one reference. 3996 if ((second_ref_frame > INTRA_FRAME) && 3997 vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) 3998 continue; 3999 4000 mbmi->ref_frame[0] = ref_frame; 4001 mbmi->ref_frame[1] = second_ref_frame; 4002 4003 if (!(ref_frame == INTRA_FRAME 4004 || (cpi->ref_frame_flags & flag_list[ref_frame]))) { 4005 continue; 4006 } 4007 if (!(second_ref_frame == NONE 4008 || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) { 4009 continue; 4010 } 4011 4012 comp_pred = second_ref_frame > INTRA_FRAME; 4013 if (comp_pred) { 4014 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) 4015 if (vp9_ref_order[best_mode_index].ref_frame == INTRA_FRAME) 4016 continue; 4017 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) 4018 if (ref_frame != best_inter_ref_frame && 4019 second_ref_frame != best_inter_ref_frame) 4020 continue; 4021 } 4022 4023 // TODO(jingning, jkoleszar): scaling reference frame not supported for 4024 // sub8x8 blocks. 4025 if (ref_frame > 0 && 4026 vp9_is_scaled(scale_factor[ref_frame].sfc)) 4027 continue; 4028 4029 if (second_ref_frame > 0 && 4030 vp9_is_scaled(scale_factor[second_ref_frame].sfc)) 4031 continue; 4032 4033 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor); 4034 mbmi->uv_mode = DC_PRED; 4035 4036 // Evaluate all sub-pel filters irrespective of whether we can use 4037 // them for this frame. 4038 mbmi->interp_filter = cm->mcomp_filter_type; 4039 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); 4040 4041 if (comp_pred) { 4042 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) 4043 continue; 4044 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor); 4045 4046 mode_excluded = mode_excluded 4047 ? mode_excluded 4048 : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY; 4049 } else { 4050 if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) { 4051 mode_excluded = 4052 mode_excluded ? 4053 mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY; 4054 } 4055 } 4056 4057 // Select prediction reference frames. 4058 for (i = 0; i < MAX_MB_PLANE; i++) { 4059 xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; 4060 if (comp_pred) 4061 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; 4062 } 4063 4064 // If the segment reference frame feature is enabled.... 4065 // then do nothing if the current ref frame is not allowed.. 4066 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && 4067 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != 4068 (int)ref_frame) { 4069 continue; 4070 // If the segment skip feature is enabled.... 4071 // then do nothing if the current mode is not allowed.. 4072 } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) && 4073 ref_frame != INTRA_FRAME) { 4074 continue; 4075 // Disable this drop out case if the ref frame 4076 // segment level feature is enabled for this segment. This is to 4077 // prevent the possibility that we end up unable to pick any mode. 4078 } else if (!vp9_segfeature_active(seg, segment_id, 4079 SEG_LVL_REF_FRAME)) { 4080 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, 4081 // unless ARNR filtering is enabled in which case we want 4082 // an unfiltered alternative. We allow near/nearest as well 4083 // because they may result in zero-zero MVs but be cheaper. 4084 if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) 4085 continue; 4086 } 4087 4088 #ifdef MODE_TEST_HIT_STATS 4089 // TEST/DEBUG CODE 4090 // Keep a rcord of the number of test hits at each size 4091 cpi->mode_test_hits[bsize]++; 4092 #endif 4093 4094 if (ref_frame == INTRA_FRAME) { 4095 int rate; 4096 mbmi->tx_size = TX_4X4; 4097 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, 4098 &distortion_y, best_rd) >= best_rd) 4099 continue; 4100 rate2 += rate; 4101 rate2 += intra_cost_penalty; 4102 distortion2 += distortion_y; 4103 4104 if (rate_uv_intra[TX_4X4] == INT_MAX) { 4105 choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[TX_4X4], 4106 &rate_uv_tokenonly[TX_4X4], 4107 &dist_uv[TX_4X4], &skip_uv[TX_4X4], 4108 &mode_uv[TX_4X4]); 4109 } 4110 rate2 += rate_uv_intra[TX_4X4]; 4111 rate_uv = rate_uv_tokenonly[TX_4X4]; 4112 distortion2 += dist_uv[TX_4X4]; 4113 distortion_uv = dist_uv[TX_4X4]; 4114 mbmi->uv_mode = mode_uv[TX_4X4]; 4115 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 4116 for (i = 0; i < TX_MODES; ++i) 4117 tx_cache[i] = tx_cache[ONLY_4X4]; 4118 } else { 4119 int rate; 4120 int64_t distortion; 4121 int64_t this_rd_thresh; 4122 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX; 4123 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX; 4124 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse; 4125 int tmp_best_skippable = 0; 4126 int switchable_filter_index; 4127 int_mv *second_ref = comp_pred ? 4128 &mbmi->ref_mvs[second_ref_frame][0] : NULL; 4129 b_mode_info tmp_best_bmodes[16]; 4130 MB_MODE_INFO tmp_best_mbmode; 4131 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS]; 4132 int pred_exists = 0; 4133 int uv_skippable; 4134 4135 this_rd_thresh = (ref_frame == LAST_FRAME) ? 4136 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_LAST] : 4137 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_ALTR]; 4138 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ? 4139 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh; 4140 xd->mi_8x8[0]->mbmi.tx_size = TX_4X4; 4141 4142 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX; 4143 if (cm->mcomp_filter_type != BILINEAR) { 4144 tmp_best_filter = EIGHTTAP; 4145 if (x->source_variance < 4146 cpi->sf.disable_filter_search_var_thresh) { 4147 tmp_best_filter = EIGHTTAP; 4148 vp9_zero(cpi->rd_filter_cache); 4149 } else { 4150 for (switchable_filter_index = 0; 4151 switchable_filter_index < SWITCHABLE_FILTERS; 4152 ++switchable_filter_index) { 4153 int newbest, rs; 4154 int64_t rs_rd; 4155 mbmi->interp_filter = switchable_filter_index; 4156 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); 4157 4158 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile, 4159 &mbmi->ref_mvs[ref_frame][0], 4160 second_ref, 4161 best_yrd, 4162 &rate, &rate_y, &distortion, 4163 &skippable, &total_sse, 4164 (int)this_rd_thresh, seg_mvs, 4165 bsi, switchable_filter_index, 4166 mi_row, mi_col); 4167 4168 if (tmp_rd == INT64_MAX) 4169 continue; 4170 cpi->rd_filter_cache[switchable_filter_index] = tmp_rd; 4171 rs = get_switchable_rate(x); 4172 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); 4173 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = 4174 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], 4175 tmp_rd + rs_rd); 4176 if (cm->mcomp_filter_type == SWITCHABLE) 4177 tmp_rd += rs_rd; 4178 4179 newbest = (tmp_rd < tmp_best_rd); 4180 if (newbest) { 4181 tmp_best_filter = mbmi->interp_filter; 4182 tmp_best_rd = tmp_rd; 4183 } 4184 if ((newbest && cm->mcomp_filter_type == SWITCHABLE) || 4185 (mbmi->interp_filter == cm->mcomp_filter_type && 4186 cm->mcomp_filter_type != SWITCHABLE)) { 4187 tmp_best_rdu = tmp_rd; 4188 tmp_best_rate = rate; 4189 tmp_best_ratey = rate_y; 4190 tmp_best_distortion = distortion; 4191 tmp_best_sse = total_sse; 4192 tmp_best_skippable = skippable; 4193 tmp_best_mbmode = *mbmi; 4194 for (i = 0; i < 4; i++) { 4195 tmp_best_bmodes[i] = xd->mi_8x8[0]->bmi[i]; 4196 x->zcoeff_blk[TX_4X4][i] = !xd->plane[0].eobs[i]; 4197 } 4198 pred_exists = 1; 4199 if (switchable_filter_index == 0 && 4200 cpi->sf.use_rd_breakout && 4201 best_rd < INT64_MAX) { 4202 if (tmp_best_rdu / 2 > best_rd) { 4203 // skip searching the other filters if the first is 4204 // already substantially larger than the best so far 4205 tmp_best_filter = mbmi->interp_filter; 4206 tmp_best_rdu = INT64_MAX; 4207 break; 4208 } 4209 } 4210 } 4211 } // switchable_filter_index loop 4212 } 4213 } 4214 4215 if (tmp_best_rdu == INT64_MAX) 4216 continue; 4217 4218 mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ? 4219 tmp_best_filter : cm->mcomp_filter_type); 4220 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); 4221 if (!pred_exists) { 4222 // Handles the special case when a filter that is not in the 4223 // switchable list (bilinear, 6-tap) is indicated at the frame level 4224 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile, 4225 &mbmi->ref_mvs[ref_frame][0], 4226 second_ref, 4227 best_yrd, 4228 &rate, &rate_y, &distortion, 4229 &skippable, &total_sse, 4230 (int)this_rd_thresh, seg_mvs, 4231 bsi, 0, 4232 mi_row, mi_col); 4233 if (tmp_rd == INT64_MAX) 4234 continue; 4235 } else { 4236 if (cpi->common.mcomp_filter_type == SWITCHABLE) { 4237 int rs = get_switchable_rate(x); 4238 tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0); 4239 } 4240 tmp_rd = tmp_best_rdu; 4241 total_sse = tmp_best_sse; 4242 rate = tmp_best_rate; 4243 rate_y = tmp_best_ratey; 4244 distortion = tmp_best_distortion; 4245 skippable = tmp_best_skippable; 4246 *mbmi = tmp_best_mbmode; 4247 for (i = 0; i < 4; i++) 4248 xd->mi_8x8[0]->bmi[i] = tmp_best_bmodes[i]; 4249 } 4250 4251 rate2 += rate; 4252 distortion2 += distortion; 4253 4254 if (cpi->common.mcomp_filter_type == SWITCHABLE) 4255 rate2 += get_switchable_rate(x); 4256 4257 if (!mode_excluded) { 4258 if (comp_pred) 4259 mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY; 4260 else 4261 mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY; 4262 } 4263 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred); 4264 4265 tmp_best_rdu = best_rd - 4266 MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2), 4267 RDCOST(x->rdmult, x->rddiv, 0, total_sse)); 4268 4269 if (tmp_best_rdu > 0) { 4270 // If even the 'Y' rd value of split is higher than best so far 4271 // then dont bother looking at UV 4272 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, 4273 BLOCK_8X8); 4274 super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable, 4275 &uv_sse, BLOCK_8X8, tmp_best_rdu); 4276 if (rate_uv == INT_MAX) 4277 continue; 4278 rate2 += rate_uv; 4279 distortion2 += distortion_uv; 4280 skippable = skippable && uv_skippable; 4281 total_sse += uv_sse; 4282 4283 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 4284 for (i = 0; i < TX_MODES; ++i) 4285 tx_cache[i] = tx_cache[ONLY_4X4]; 4286 } 4287 } 4288 4289 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { 4290 rate2 += compmode_cost; 4291 } 4292 4293 // Estimate the reference frame signaling cost and add it 4294 // to the rolling cost variable. 4295 if (second_ref_frame > INTRA_FRAME) { 4296 rate2 += ref_costs_comp[ref_frame]; 4297 } else { 4298 rate2 += ref_costs_single[ref_frame]; 4299 } 4300 4301 if (!disable_skip) { 4302 // Test for the condition where skip block will be activated 4303 // because there are no non zero coefficients and make any 4304 // necessary adjustment for rate. Ignore if skip is coded at 4305 // segment level as the cost wont have been added in. 4306 // Is Mb level skip allowed (i.e. not coded at segment level). 4307 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id, 4308 SEG_LVL_SKIP); 4309 4310 if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) { 4311 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < 4312 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { 4313 // Add in the cost of the no skip flag. 4314 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 4315 0); 4316 rate2 += prob_skip_cost; 4317 } else { 4318 // FIXME(rbultje) make this work for splitmv also 4319 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 4320 1); 4321 rate2 += prob_skip_cost; 4322 distortion2 = total_sse; 4323 assert(total_sse >= 0); 4324 rate2 -= (rate_y + rate_uv); 4325 rate_y = 0; 4326 rate_uv = 0; 4327 this_skip2 = 1; 4328 } 4329 } else if (mb_skip_allowed) { 4330 // Add in the cost of the no skip flag. 4331 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 4332 0); 4333 rate2 += prob_skip_cost; 4334 } 4335 4336 // Calculate the final RD estimate for this mode. 4337 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 4338 } 4339 4340 // Keep record of best inter rd with single reference 4341 if (xd->mi_8x8[0]->mbmi.ref_frame[0] > INTRA_FRAME && 4342 xd->mi_8x8[0]->mbmi.ref_frame[1] == NONE && 4343 !mode_excluded && 4344 this_rd < best_inter_rd) { 4345 best_inter_rd = this_rd; 4346 best_inter_ref_frame = ref_frame; 4347 } 4348 4349 if (!disable_skip && ref_frame == INTRA_FRAME) { 4350 for (i = 0; i < NB_PREDICTION_TYPES; ++i) 4351 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); 4352 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 4353 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd); 4354 } 4355 4356 // Did this mode help.. i.e. is it the new best mode 4357 if (this_rd < best_rd || x->skip) { 4358 if (!mode_excluded) { 4359 int max_plane = MAX_MB_PLANE; 4360 // Note index of best mode so far 4361 best_mode_index = mode_index; 4362 4363 if (ref_frame == INTRA_FRAME) { 4364 /* required for left and above block mv */ 4365 mbmi->mv[0].as_int = 0; 4366 max_plane = 1; 4367 } 4368 4369 *returnrate = rate2; 4370 *returndistortion = distortion2; 4371 best_rd = this_rd; 4372 best_yrd = best_rd - 4373 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv); 4374 best_mbmode = *mbmi; 4375 best_skip2 = this_skip2; 4376 if (!x->select_txfm_size) 4377 swap_block_ptr(x, ctx, max_plane); 4378 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], 4379 sizeof(uint8_t) * ctx->num_4x4_blk); 4380 4381 for (i = 0; i < 4; i++) 4382 best_bmodes[i] = xd->mi_8x8[0]->bmi[i]; 4383 4384 // TODO(debargha): enhance this test with a better distortion prediction 4385 // based on qp, activity mask and history 4386 if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) && 4387 (mode_index > MIN_EARLY_TERM_INDEX)) { 4388 const int qstep = xd->plane[0].dequant[1]; 4389 // TODO(debargha): Enhance this by specializing for each mode_index 4390 int scale = 4; 4391 if (x->source_variance < UINT_MAX) { 4392 const int var_adjust = (x->source_variance < 16); 4393 scale -= var_adjust; 4394 } 4395 if (ref_frame > INTRA_FRAME && 4396 distortion2 * scale < qstep * qstep) { 4397 early_term = 1; 4398 } 4399 } 4400 } 4401 } 4402 4403 /* keep record of best compound/single-only prediction */ 4404 if (!disable_skip && ref_frame != INTRA_FRAME) { 4405 int single_rd, hybrid_rd, single_rate, hybrid_rate; 4406 4407 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { 4408 single_rate = rate2 - compmode_cost; 4409 hybrid_rate = rate2; 4410 } else { 4411 single_rate = rate2; 4412 hybrid_rate = rate2 + compmode_cost; 4413 } 4414 4415 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); 4416 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); 4417 4418 if (second_ref_frame <= INTRA_FRAME && 4419 single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) { 4420 best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd; 4421 } else if (second_ref_frame > INTRA_FRAME && 4422 single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) { 4423 best_pred_rd[COMP_PREDICTION_ONLY] = single_rd; 4424 } 4425 if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION]) 4426 best_pred_rd[HYBRID_PREDICTION] = hybrid_rd; 4427 } 4428 4429 /* keep record of best filter type */ 4430 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME && 4431 cm->mcomp_filter_type != BILINEAR) { 4432 int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ? 4433 SWITCHABLE_FILTERS : cm->mcomp_filter_type]; 4434 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { 4435 int64_t adj_rd; 4436 // In cases of poor prediction, filter_cache[] can contain really big 4437 // values, which actually are bigger than this_rd itself. This can 4438 // cause negative best_filter_rd[] values, which is obviously silly. 4439 // Therefore, if filter_cache < ref, we do an adjusted calculation. 4440 if (cpi->rd_filter_cache[i] >= ref) 4441 adj_rd = this_rd + cpi->rd_filter_cache[i] - ref; 4442 else // FIXME(rbultje) do this for comppred also 4443 adj_rd = this_rd - (ref - cpi->rd_filter_cache[i]) * this_rd / ref; 4444 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd); 4445 } 4446 } 4447 4448 /* keep record of best txfm size */ 4449 if (bsize < BLOCK_32X32) { 4450 if (bsize < BLOCK_16X16) { 4451 tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4]; 4452 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8]; 4453 } 4454 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16]; 4455 } 4456 if (!mode_excluded && this_rd != INT64_MAX) { 4457 for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) { 4458 int64_t adj_rd = INT64_MAX; 4459 if (ref_frame > INTRA_FRAME) 4460 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode]; 4461 else 4462 adj_rd = this_rd; 4463 4464 if (adj_rd < best_tx_rd[i]) 4465 best_tx_rd[i] = adj_rd; 4466 } 4467 } 4468 4469 if (early_term) 4470 break; 4471 4472 if (x->skip && !comp_pred) 4473 break; 4474 } 4475 4476 if (best_rd >= best_rd_so_far) 4477 return INT64_MAX; 4478 4479 // If we used an estimate for the uv intra rd in the loop above... 4480 if (cpi->sf.use_uv_intra_rd_estimate) { 4481 // Do Intra UV best rd mode selection if best mode choice above was intra. 4482 if (vp9_ref_order[best_mode_index].ref_frame == INTRA_FRAME) { 4483 TX_SIZE uv_tx_size = get_uv_tx_size(mbmi); 4484 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size], 4485 &rate_uv_tokenonly[uv_tx_size], 4486 &dist_uv[uv_tx_size], 4487 &skip_uv[uv_tx_size], 4488 BLOCK_8X8); 4489 } 4490 } 4491 4492 // If we are using reference masking and the set mask flag is set then 4493 // create the reference frame mask. 4494 if (cpi->sf.reference_masking && cpi->set_ref_frame_mask) 4495 cpi->ref_frame_mask = ~(1 << vp9_ref_order[best_mode_index].ref_frame); 4496 4497 if (best_rd == INT64_MAX && bsize < BLOCK_8X8) { 4498 *returnrate = INT_MAX; 4499 *returndistortion = INT_MAX; 4500 return best_rd; 4501 } 4502 4503 assert((cm->mcomp_filter_type == SWITCHABLE) || 4504 (cm->mcomp_filter_type == best_mbmode.interp_filter) || 4505 (best_mbmode.ref_frame[0] == INTRA_FRAME)); 4506 4507 // Updating rd_thresh_freq_fact[] here means that the different 4508 // partition/block sizes are handled independently based on the best 4509 // choice for the current partition. It may well be better to keep a scaled 4510 // best rd so far value and update rd_thresh_freq_fact based on the mode/size 4511 // combination that wins out. 4512 if (cpi->sf.adaptive_rd_thresh) { 4513 for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) { 4514 if (mode_index == best_mode_index) { 4515 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] -= 4516 (cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 3); 4517 } else { 4518 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] += RD_THRESH_INC; 4519 if (cpi->rd_thresh_freq_sub8x8[bsize][mode_index] > 4520 (cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT)) { 4521 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] = 4522 cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT; 4523 } 4524 } 4525 } 4526 } 4527 4528 // macroblock modes 4529 *mbmi = best_mbmode; 4530 x->skip |= best_skip2; 4531 if (best_mbmode.ref_frame[0] == INTRA_FRAME) { 4532 for (i = 0; i < 4; i++) 4533 xd->mi_8x8[0]->bmi[i].as_mode = best_bmodes[i].as_mode; 4534 } else { 4535 for (i = 0; i < 4; ++i) 4536 vpx_memcpy(&xd->mi_8x8[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info)); 4537 4538 mbmi->mv[0].as_int = xd->mi_8x8[0]->bmi[3].as_mv[0].as_int; 4539 mbmi->mv[1].as_int = xd->mi_8x8[0]->bmi[3].as_mv[1].as_int; 4540 } 4541 4542 for (i = 0; i < NB_PREDICTION_TYPES; ++i) { 4543 if (best_pred_rd[i] == INT64_MAX) 4544 best_pred_diff[i] = INT_MIN; 4545 else 4546 best_pred_diff[i] = best_rd - best_pred_rd[i]; 4547 } 4548 4549 if (!x->skip) { 4550 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { 4551 if (best_filter_rd[i] == INT64_MAX) 4552 best_filter_diff[i] = 0; 4553 else 4554 best_filter_diff[i] = best_rd - best_filter_rd[i]; 4555 } 4556 if (cm->mcomp_filter_type == SWITCHABLE) 4557 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); 4558 } else { 4559 vp9_zero(best_filter_diff); 4560 } 4561 4562 if (!x->skip) { 4563 for (i = 0; i < TX_MODES; i++) { 4564 if (best_tx_rd[i] == INT64_MAX) 4565 best_tx_diff[i] = 0; 4566 else 4567 best_tx_diff[i] = best_rd - best_tx_rd[i]; 4568 } 4569 } else { 4570 vp9_zero(best_tx_diff); 4571 } 4572 4573 set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1], 4574 scale_factor); 4575 store_coding_context(x, ctx, best_mode_index, 4576 &mbmi->ref_mvs[mbmi->ref_frame[0]][0], 4577 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : 4578 mbmi->ref_frame[1]][0], 4579 best_pred_diff, best_tx_diff, best_filter_diff); 4580 4581 return best_rd; 4582 } 4583