1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <assert.h> 12 #include <limits.h> 13 #include <math.h> 14 #include <stdio.h> 15 16 #include "./vp9_rtcd.h" 17 18 #include "vpx_mem/vpx_mem.h" 19 20 #include "vp9/common/vp9_common.h" 21 #include "vp9/common/vp9_entropy.h" 22 #include "vp9/common/vp9_entropymode.h" 23 #include "vp9/common/vp9_idct.h" 24 #include "vp9/common/vp9_mvref_common.h" 25 #include "vp9/common/vp9_pragmas.h" 26 #include "vp9/common/vp9_pred_common.h" 27 #include "vp9/common/vp9_quant_common.h" 28 #include "vp9/common/vp9_reconinter.h" 29 #include "vp9/common/vp9_reconintra.h" 30 #include "vp9/common/vp9_seg_common.h" 31 #include "vp9/common/vp9_systemdependent.h" 32 33 #include "vp9/encoder/vp9_cost.h" 34 #include "vp9/encoder/vp9_encodemb.h" 35 #include "vp9/encoder/vp9_encodemv.h" 36 #include "vp9/encoder/vp9_mcomp.h" 37 #include "vp9/encoder/vp9_onyx_int.h" 38 #include "vp9/encoder/vp9_quantize.h" 39 #include "vp9/encoder/vp9_ratectrl.h" 40 #include "vp9/encoder/vp9_rdopt.h" 41 #include "vp9/encoder/vp9_tokenize.h" 42 #include "vp9/encoder/vp9_variance.h" 43 44 #define RD_THRESH_MAX_FACT 64 45 #define RD_THRESH_INC 1 46 #define RD_THRESH_POW 1.25 47 #define RD_MULT_EPB_RATIO 64 48 49 /* Factor to weigh the rate for switchable interp filters */ 50 #define SWITCHABLE_INTERP_RATE_FACTOR 1 51 52 #define LAST_FRAME_MODE_MASK 0xFFEDCD60 53 #define GOLDEN_FRAME_MODE_MASK 0xFFDA3BB0 54 #define ALT_REF_MODE_MASK 0xFFC648D0 55 56 #define MIN_EARLY_TERM_INDEX 3 57 58 typedef struct { 59 MB_PREDICTION_MODE mode; 60 MV_REFERENCE_FRAME ref_frame[2]; 61 } MODE_DEFINITION; 62 63 typedef struct { 64 MV_REFERENCE_FRAME ref_frame[2]; 65 } REF_DEFINITION; 66 67 struct rdcost_block_args { 68 MACROBLOCK *x; 69 ENTROPY_CONTEXT t_above[16]; 70 ENTROPY_CONTEXT t_left[16]; 71 int rate; 72 int64_t dist; 73 int64_t sse; 74 int this_rate; 75 int64_t this_dist; 76 int64_t this_sse; 77 int64_t this_rd; 78 int64_t best_rd; 79 int skip; 80 int use_fast_coef_costing; 81 const scan_order *so; 82 }; 83 84 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { 85 {NEARESTMV, {LAST_FRAME, NONE}}, 86 {NEARESTMV, {ALTREF_FRAME, NONE}}, 87 {NEARESTMV, {GOLDEN_FRAME, NONE}}, 88 89 {DC_PRED, {INTRA_FRAME, NONE}}, 90 91 {NEWMV, {LAST_FRAME, NONE}}, 92 {NEWMV, {ALTREF_FRAME, NONE}}, 93 {NEWMV, {GOLDEN_FRAME, NONE}}, 94 95 {NEARMV, {LAST_FRAME, NONE}}, 96 {NEARMV, {ALTREF_FRAME, NONE}}, 97 {NEARESTMV, {LAST_FRAME, ALTREF_FRAME}}, 98 {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}}, 99 100 {TM_PRED, {INTRA_FRAME, NONE}}, 101 102 {NEARMV, {LAST_FRAME, ALTREF_FRAME}}, 103 {NEWMV, {LAST_FRAME, ALTREF_FRAME}}, 104 {NEARMV, {GOLDEN_FRAME, NONE}}, 105 {NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}}, 106 {NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}}, 107 108 {ZEROMV, {LAST_FRAME, NONE}}, 109 {ZEROMV, {GOLDEN_FRAME, NONE}}, 110 {ZEROMV, {ALTREF_FRAME, NONE}}, 111 {ZEROMV, {LAST_FRAME, ALTREF_FRAME}}, 112 {ZEROMV, {GOLDEN_FRAME, ALTREF_FRAME}}, 113 114 {H_PRED, {INTRA_FRAME, NONE}}, 115 {V_PRED, {INTRA_FRAME, NONE}}, 116 {D135_PRED, {INTRA_FRAME, NONE}}, 117 {D207_PRED, {INTRA_FRAME, NONE}}, 118 {D153_PRED, {INTRA_FRAME, NONE}}, 119 {D63_PRED, {INTRA_FRAME, NONE}}, 120 {D117_PRED, {INTRA_FRAME, NONE}}, 121 {D45_PRED, {INTRA_FRAME, NONE}}, 122 }; 123 124 const REF_DEFINITION vp9_ref_order[MAX_REFS] = { 125 {{LAST_FRAME, NONE}}, 126 {{GOLDEN_FRAME, NONE}}, 127 {{ALTREF_FRAME, NONE}}, 128 {{LAST_FRAME, ALTREF_FRAME}}, 129 {{GOLDEN_FRAME, ALTREF_FRAME}}, 130 {{INTRA_FRAME, NONE}}, 131 }; 132 133 // The baseline rd thresholds for breaking out of the rd loop for 134 // certain modes are assumed to be based on 8x8 blocks. 135 // This table is used to correct for blocks size. 136 // The factors here are << 2 (2 = x0.5, 32 = x8 etc). 137 static int rd_thresh_block_size_factor[BLOCK_SIZES] = 138 {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32}; 139 140 static int raster_block_offset(BLOCK_SIZE plane_bsize, 141 int raster_block, int stride) { 142 const int bw = b_width_log2(plane_bsize); 143 const int y = 4 * (raster_block >> bw); 144 const int x = 4 * (raster_block & ((1 << bw) - 1)); 145 return y * stride + x; 146 } 147 static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize, 148 int raster_block, int16_t *base) { 149 const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 150 return base + raster_block_offset(plane_bsize, raster_block, stride); 151 } 152 153 static void fill_mode_costs(VP9_COMP *cpi) { 154 MACROBLOCK *const x = &cpi->mb; 155 const FRAME_CONTEXT *const fc = &cpi->common.fc; 156 int i, j; 157 158 for (i = 0; i < INTRA_MODES; i++) 159 for (j = 0; j < INTRA_MODES; j++) 160 vp9_cost_tokens((int *)x->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j], 161 vp9_intra_mode_tree); 162 163 // TODO(rbultje) separate tables for superblock costing? 164 vp9_cost_tokens(x->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree); 165 vp9_cost_tokens(x->intra_uv_mode_cost[KEY_FRAME], 166 vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree); 167 vp9_cost_tokens(x->intra_uv_mode_cost[INTER_FRAME], 168 fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree); 169 170 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) 171 vp9_cost_tokens((int *)x->switchable_interp_costs[i], 172 fc->switchable_interp_prob[i], vp9_switchable_interp_tree); 173 } 174 175 static void fill_token_costs(vp9_coeff_cost *c, 176 vp9_coeff_probs_model (*p)[PLANE_TYPES]) { 177 int i, j, k, l; 178 TX_SIZE t; 179 for (t = TX_4X4; t <= TX_32X32; ++t) 180 for (i = 0; i < PLANE_TYPES; ++i) 181 for (j = 0; j < REF_TYPES; ++j) 182 for (k = 0; k < COEF_BANDS; ++k) 183 for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { 184 vp9_prob probs[ENTROPY_NODES]; 185 vp9_model_to_full_probs(p[t][i][j][k][l], probs); 186 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, 187 vp9_coef_tree); 188 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs, 189 vp9_coef_tree); 190 assert(c[t][i][j][k][0][l][EOB_TOKEN] == 191 c[t][i][j][k][1][l][EOB_TOKEN]); 192 } 193 } 194 195 static const int rd_iifactor[32] = { 196 4, 4, 3, 2, 1, 0, 0, 0, 197 0, 0, 0, 0, 0, 0, 0, 0, 198 0, 0, 0, 0, 0, 0, 0, 0, 199 0, 0, 0, 0, 0, 0, 0, 0, 200 }; 201 202 // 3* dc_qlookup[Q]*dc_qlookup[Q]; 203 204 /* values are now correlated to quantizer */ 205 static int sad_per_bit16lut[QINDEX_RANGE]; 206 static int sad_per_bit4lut[QINDEX_RANGE]; 207 208 void vp9_init_me_luts() { 209 int i; 210 211 // Initialize the sad lut tables using a formulaic calculation for now 212 // This is to make it easier to resolve the impact of experimental changes 213 // to the quantizer tables. 214 for (i = 0; i < QINDEX_RANGE; i++) { 215 const double q = vp9_convert_qindex_to_q(i); 216 sad_per_bit16lut[i] = (int)(0.0418 * q + 2.4107); 217 sad_per_bit4lut[i] = (int)(0.063 * q + 2.742); 218 } 219 } 220 221 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) { 222 const int q = vp9_dc_quant(qindex, 0); 223 // TODO(debargha): Adjust the function below 224 int rdmult = 88 * q * q / 25; 225 if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { 226 if (cpi->twopass.next_iiratio > 31) 227 rdmult += (rdmult * rd_iifactor[31]) >> 4; 228 else 229 rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4; 230 } 231 return rdmult; 232 } 233 234 static int compute_rd_thresh_factor(int qindex) { 235 // TODO(debargha): Adjust the function below 236 const int q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12); 237 return MAX(q, 8); 238 } 239 240 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { 241 cpi->mb.sadperbit16 = sad_per_bit16lut[qindex]; 242 cpi->mb.sadperbit4 = sad_per_bit4lut[qindex]; 243 } 244 245 static void set_block_thresholds(VP9_COMP *cpi) { 246 const VP9_COMMON *const cm = &cpi->common; 247 int i, bsize, segment_id; 248 249 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) { 250 const int qindex = clamp(vp9_get_qindex(&cm->seg, segment_id, 251 cm->base_qindex) + cm->y_dc_delta_q, 252 0, MAXQ); 253 const int q = compute_rd_thresh_factor(qindex); 254 255 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) { 256 // Threshold here seems unnecessarily harsh but fine given actual 257 // range of values used for cpi->sf.thresh_mult[]. 258 const int t = q * rd_thresh_block_size_factor[bsize]; 259 const int thresh_max = INT_MAX / t; 260 261 for (i = 0; i < MAX_MODES; ++i) 262 cpi->rd_threshes[segment_id][bsize][i] = 263 cpi->rd_thresh_mult[i] < thresh_max ? cpi->rd_thresh_mult[i] * t / 4 264 : INT_MAX; 265 266 for (i = 0; i < MAX_REFS; ++i) { 267 cpi->rd_thresh_sub8x8[segment_id][bsize][i] = 268 cpi->rd_thresh_mult_sub8x8[i] < thresh_max 269 ? cpi->rd_thresh_mult_sub8x8[i] * t / 4 270 : INT_MAX; 271 } 272 } 273 } 274 } 275 276 void vp9_initialize_rd_consts(VP9_COMP *cpi) { 277 VP9_COMMON *const cm = &cpi->common; 278 MACROBLOCK *const x = &cpi->mb; 279 int i; 280 281 vp9_clear_system_state(); 282 283 cpi->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128) 284 cpi->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); 285 286 x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO; 287 x->errorperbit += (x->errorperbit == 0); 288 289 x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL && 290 cm->frame_type != KEY_FRAME) ? 0 : 1; 291 292 set_block_thresholds(cpi); 293 294 if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) { 295 fill_token_costs(x->token_costs, cm->fc.coef_probs); 296 297 for (i = 0; i < PARTITION_CONTEXTS; i++) 298 vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i), 299 vp9_partition_tree); 300 } 301 302 if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 || 303 cm->frame_type == KEY_FRAME) { 304 fill_mode_costs(cpi); 305 306 if (!frame_is_intra_only(cm)) { 307 vp9_build_nmv_cost_table(x->nmvjointcost, 308 cm->allow_high_precision_mv ? x->nmvcost_hp 309 : x->nmvcost, 310 &cm->fc.nmvc, cm->allow_high_precision_mv); 311 312 for (i = 0; i < INTER_MODE_CONTEXTS; ++i) 313 vp9_cost_tokens((int *)x->inter_mode_cost[i], 314 cm->fc.inter_mode_probs[i], vp9_inter_mode_tree); 315 } 316 } 317 } 318 319 static const int MAX_XSQ_Q10 = 245727; 320 321 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) { 322 // NOTE: The tables below must be of the same size 323 324 // The functions described below are sampled at the four most significant 325 // bits of x^2 + 8 / 256 326 327 // Normalized rate 328 // This table models the rate for a Laplacian source 329 // source with given variance when quantized with a uniform quantizer 330 // with given stepsize. The closed form expression is: 331 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)], 332 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance), 333 // and H(x) is the binary entropy function. 334 static const int rate_tab_q10[] = { 335 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 336 4553, 4389, 4255, 4142, 4044, 3958, 3881, 3811, 337 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, 338 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638, 339 2589, 2501, 2423, 2353, 2290, 2232, 2179, 2130, 340 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, 341 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199, 342 1159, 1086, 1021, 963, 911, 864, 821, 781, 343 745, 680, 623, 574, 530, 490, 455, 424, 344 395, 345, 304, 269, 239, 213, 190, 171, 345 154, 126, 104, 87, 73, 61, 52, 44, 346 38, 28, 21, 16, 12, 10, 8, 6, 347 5, 3, 2, 1, 1, 1, 0, 0, 348 }; 349 // Normalized distortion 350 // This table models the normalized distortion for a Laplacian source 351 // source with given variance when quantized with a uniform quantizer 352 // with given stepsize. The closed form expression is: 353 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2)) 354 // where x = qpstep / sqrt(variance) 355 // Note the actual distortion is Dn * variance. 356 static const int dist_tab_q10[] = { 357 0, 0, 1, 1, 1, 2, 2, 2, 358 3, 3, 4, 5, 5, 6, 7, 7, 359 8, 9, 11, 12, 13, 15, 16, 17, 360 18, 21, 24, 26, 29, 31, 34, 36, 361 39, 44, 49, 54, 59, 64, 69, 73, 362 78, 88, 97, 106, 115, 124, 133, 142, 363 151, 167, 184, 200, 215, 231, 245, 260, 364 274, 301, 327, 351, 375, 397, 418, 439, 365 458, 495, 528, 559, 587, 613, 637, 659, 366 680, 717, 749, 777, 801, 823, 842, 859, 367 874, 899, 919, 936, 949, 960, 969, 977, 368 983, 994, 1001, 1006, 1010, 1013, 1015, 1017, 369 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024, 370 }; 371 static const int xsq_iq_q10[] = { 372 0, 4, 8, 12, 16, 20, 24, 28, 373 32, 40, 48, 56, 64, 72, 80, 88, 374 96, 112, 128, 144, 160, 176, 192, 208, 375 224, 256, 288, 320, 352, 384, 416, 448, 376 480, 544, 608, 672, 736, 800, 864, 928, 377 992, 1120, 1248, 1376, 1504, 1632, 1760, 1888, 378 2016, 2272, 2528, 2784, 3040, 3296, 3552, 3808, 379 4064, 4576, 5088, 5600, 6112, 6624, 7136, 7648, 380 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328, 381 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 382 32736, 36832, 40928, 45024, 49120, 53216, 57312, 61408, 383 65504, 73696, 81888, 90080, 98272, 106464, 114656, 122848, 384 131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728, 385 }; 386 /* 387 static const int tab_size = sizeof(rate_tab_q10) / sizeof(rate_tab_q10[0]); 388 assert(sizeof(dist_tab_q10) / sizeof(dist_tab_q10[0]) == tab_size); 389 assert(sizeof(xsq_iq_q10) / sizeof(xsq_iq_q10[0]) == tab_size); 390 assert(MAX_XSQ_Q10 + 1 == xsq_iq_q10[tab_size - 1]); 391 */ 392 int tmp = (xsq_q10 >> 2) + 8; 393 int k = get_msb(tmp) - 3; 394 int xq = (k << 3) + ((tmp >> k) & 0x7); 395 const int one_q10 = 1 << 10; 396 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k); 397 const int b_q10 = one_q10 - a_q10; 398 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10; 399 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10; 400 } 401 402 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n, 403 unsigned int qstep, int *rate, 404 int64_t *dist) { 405 // This function models the rate and distortion for a Laplacian 406 // source with given variance when quantized with a uniform quantizer 407 // with given stepsize. The closed form expressions are in: 408 // Hang and Chen, "Source Model for transform video coder and its 409 // application - Part I: Fundamental Theory", IEEE Trans. Circ. 410 // Sys. for Video Tech., April 1997. 411 if (var == 0) { 412 *rate = 0; 413 *dist = 0; 414 } else { 415 int d_q10, r_q10; 416 const uint64_t xsq_q10_64 = 417 ((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var; 418 const int xsq_q10 = xsq_q10_64 > MAX_XSQ_Q10 ? 419 MAX_XSQ_Q10 : (int)xsq_q10_64; 420 model_rd_norm(xsq_q10, &r_q10, &d_q10); 421 *rate = (n * r_q10 + 2) >> 2; 422 *dist = (var * (int64_t)d_q10 + 512) >> 10; 423 } 424 } 425 426 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, 427 MACROBLOCK *x, MACROBLOCKD *xd, 428 int *out_rate_sum, int64_t *out_dist_sum) { 429 // Note our transform coeffs are 8 times an orthogonal transform. 430 // Hence quantizer step is also 8 times. To get effective quantizer 431 // we need to divide by 8 before sending to modeling function. 432 int i; 433 int64_t rate_sum = 0; 434 int64_t dist_sum = 0; 435 const int ref = xd->mi[0]->mbmi.ref_frame[0]; 436 unsigned int sse; 437 438 for (i = 0; i < MAX_MB_PLANE; ++i) { 439 struct macroblock_plane *const p = &x->plane[i]; 440 struct macroblockd_plane *const pd = &xd->plane[i]; 441 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); 442 443 (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, 444 pd->dst.buf, pd->dst.stride, &sse); 445 446 if (i == 0) 447 x->pred_sse[ref] = sse; 448 449 // Fast approximate the modelling function. 450 if (cpi->speed > 4) { 451 int64_t rate; 452 int64_t dist; 453 int64_t square_error = sse; 454 int quantizer = (pd->dequant[1] >> 3); 455 456 if (quantizer < 120) 457 rate = (square_error * (280 - quantizer)) >> 8; 458 else 459 rate = 0; 460 dist = (square_error * quantizer) >> 8; 461 rate_sum += rate; 462 dist_sum += dist; 463 } else { 464 int rate; 465 int64_t dist; 466 vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs], 467 pd->dequant[1] >> 3, &rate, &dist); 468 rate_sum += rate; 469 dist_sum += dist; 470 } 471 } 472 473 *out_rate_sum = (int)rate_sum; 474 *out_dist_sum = dist_sum << 4; 475 } 476 477 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize, 478 TX_SIZE tx_size, 479 MACROBLOCK *x, MACROBLOCKD *xd, 480 int *out_rate_sum, int64_t *out_dist_sum, 481 int *out_skip) { 482 int j, k; 483 BLOCK_SIZE bs; 484 const struct macroblock_plane *const p = &x->plane[0]; 485 const struct macroblockd_plane *const pd = &xd->plane[0]; 486 const int width = 4 * num_4x4_blocks_wide_lookup[bsize]; 487 const int height = 4 * num_4x4_blocks_high_lookup[bsize]; 488 int rate_sum = 0; 489 int64_t dist_sum = 0; 490 const int t = 4 << tx_size; 491 492 if (tx_size == TX_4X4) { 493 bs = BLOCK_4X4; 494 } else if (tx_size == TX_8X8) { 495 bs = BLOCK_8X8; 496 } else if (tx_size == TX_16X16) { 497 bs = BLOCK_16X16; 498 } else if (tx_size == TX_32X32) { 499 bs = BLOCK_32X32; 500 } else { 501 assert(0); 502 } 503 504 *out_skip = 1; 505 for (j = 0; j < height; j += t) { 506 for (k = 0; k < width; k += t) { 507 int rate; 508 int64_t dist; 509 unsigned int sse; 510 cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride, 511 &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride, 512 &sse); 513 // sse works better than var, since there is no dc prediction used 514 vp9_model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, 515 &rate, &dist); 516 rate_sum += rate; 517 dist_sum += dist; 518 *out_skip &= (rate < 1024); 519 } 520 } 521 522 *out_rate_sum = rate_sum; 523 *out_dist_sum = dist_sum << 4; 524 } 525 526 int64_t vp9_block_error_c(const int16_t *coeff, const int16_t *dqcoeff, 527 intptr_t block_size, int64_t *ssz) { 528 int i; 529 int64_t error = 0, sqcoeff = 0; 530 531 for (i = 0; i < block_size; i++) { 532 const int diff = coeff[i] - dqcoeff[i]; 533 error += diff * diff; 534 sqcoeff += coeff[i] * coeff[i]; 535 } 536 537 *ssz = sqcoeff; 538 return error; 539 } 540 541 /* The trailing '0' is a terminator which is used inside cost_coeffs() to 542 * decide whether to include cost of a trailing EOB node or not (i.e. we 543 * can skip this if the last coefficient in this transform block, e.g. the 544 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block, 545 * were non-zero). */ 546 static const int16_t band_counts[TX_SIZES][8] = { 547 { 1, 2, 3, 4, 3, 16 - 13, 0 }, 548 { 1, 2, 3, 4, 11, 64 - 21, 0 }, 549 { 1, 2, 3, 4, 11, 256 - 21, 0 }, 550 { 1, 2, 3, 4, 11, 1024 - 21, 0 }, 551 }; 552 static INLINE int cost_coeffs(MACROBLOCK *x, 553 int plane, int block, 554 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L, 555 TX_SIZE tx_size, 556 const int16_t *scan, const int16_t *nb, 557 int use_fast_coef_costing) { 558 MACROBLOCKD *const xd = &x->e_mbd; 559 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; 560 const struct macroblock_plane *p = &x->plane[plane]; 561 const struct macroblockd_plane *pd = &xd->plane[plane]; 562 const PLANE_TYPE type = pd->plane_type; 563 const int16_t *band_count = &band_counts[tx_size][1]; 564 const int eob = p->eobs[block]; 565 const int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); 566 unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = 567 x->token_costs[tx_size][type][is_inter_block(mbmi)]; 568 uint8_t token_cache[32 * 32]; 569 int pt = combine_entropy_contexts(*A, *L); 570 int c, cost; 571 // Check for consistency of tx_size with mode info 572 assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size 573 : get_uv_tx_size(mbmi) == tx_size); 574 575 if (eob == 0) { 576 // single eob token 577 cost = token_costs[0][0][pt][EOB_TOKEN]; 578 c = 0; 579 } else { 580 int band_left = *band_count++; 581 582 // dc token 583 int v = qcoeff[0]; 584 int prev_t = vp9_dct_value_tokens_ptr[v].token; 585 cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v]; 586 token_cache[0] = vp9_pt_energy_class[prev_t]; 587 ++token_costs; 588 589 // ac tokens 590 for (c = 1; c < eob; c++) { 591 const int rc = scan[c]; 592 int t; 593 594 v = qcoeff[rc]; 595 t = vp9_dct_value_tokens_ptr[v].token; 596 if (use_fast_coef_costing) { 597 cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v]; 598 } else { 599 pt = get_coef_context(nb, token_cache, c); 600 cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v]; 601 token_cache[rc] = vp9_pt_energy_class[t]; 602 } 603 prev_t = t; 604 if (!--band_left) { 605 band_left = *band_count++; 606 ++token_costs; 607 } 608 } 609 610 // eob token 611 if (band_left) { 612 if (use_fast_coef_costing) { 613 cost += (*token_costs)[0][!prev_t][EOB_TOKEN]; 614 } else { 615 pt = get_coef_context(nb, token_cache, c); 616 cost += (*token_costs)[0][pt][EOB_TOKEN]; 617 } 618 } 619 } 620 621 // is eob first coefficient; 622 *A = *L = (c > 0); 623 624 return cost; 625 } 626 static void dist_block(int plane, int block, TX_SIZE tx_size, 627 struct rdcost_block_args* args) { 628 const int ss_txfrm_size = tx_size << 1; 629 MACROBLOCK* const x = args->x; 630 MACROBLOCKD* const xd = &x->e_mbd; 631 const struct macroblock_plane *const p = &x->plane[plane]; 632 const struct macroblockd_plane *const pd = &xd->plane[plane]; 633 int64_t this_sse; 634 int shift = tx_size == TX_32X32 ? 0 : 2; 635 int16_t *const coeff = BLOCK_OFFSET(p->coeff, block); 636 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 637 args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, 638 &this_sse) >> shift; 639 args->sse = this_sse >> shift; 640 641 if (x->skip_encode && !is_inter_block(&xd->mi[0]->mbmi)) { 642 // TODO(jingning): tune the model to better capture the distortion. 643 int64_t p = (pd->dequant[1] * pd->dequant[1] * 644 (1 << ss_txfrm_size)) >> (shift + 2); 645 args->dist += (p >> 4); 646 args->sse += p; 647 } 648 } 649 650 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize, 651 TX_SIZE tx_size, struct rdcost_block_args* args) { 652 int x_idx, y_idx; 653 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx); 654 655 args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx, 656 args->t_left + y_idx, tx_size, 657 args->so->scan, args->so->neighbors, 658 args->use_fast_coef_costing); 659 } 660 661 static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, 662 TX_SIZE tx_size, void *arg) { 663 struct rdcost_block_args *args = arg; 664 MACROBLOCK *const x = args->x; 665 MACROBLOCKD *const xd = &x->e_mbd; 666 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; 667 int64_t rd1, rd2, rd; 668 669 if (args->skip) 670 return; 671 672 if (!is_inter_block(mbmi)) 673 vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip); 674 else 675 vp9_xform_quant(x, plane, block, plane_bsize, tx_size); 676 677 dist_block(plane, block, tx_size, args); 678 rate_block(plane, block, plane_bsize, tx_size, args); 679 rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist); 680 rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse); 681 682 // TODO(jingning): temporarily enabled only for luma component 683 rd = MIN(rd1, rd2); 684 if (plane == 0) 685 x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] || 686 (rd1 > rd2 && !xd->lossless); 687 688 args->this_rate += args->rate; 689 args->this_dist += args->dist; 690 args->this_sse += args->sse; 691 args->this_rd += rd; 692 693 if (args->this_rd > args->best_rd) { 694 args->skip = 1; 695 return; 696 } 697 } 698 699 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, 700 const struct macroblockd_plane *pd, 701 ENTROPY_CONTEXT t_above[16], 702 ENTROPY_CONTEXT t_left[16]) { 703 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); 704 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; 705 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; 706 const ENTROPY_CONTEXT *const above = pd->above_context; 707 const ENTROPY_CONTEXT *const left = pd->left_context; 708 709 int i; 710 switch (tx_size) { 711 case TX_4X4: 712 vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w); 713 vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h); 714 break; 715 case TX_8X8: 716 for (i = 0; i < num_4x4_w; i += 2) 717 t_above[i] = !!*(const uint16_t *)&above[i]; 718 for (i = 0; i < num_4x4_h; i += 2) 719 t_left[i] = !!*(const uint16_t *)&left[i]; 720 break; 721 case TX_16X16: 722 for (i = 0; i < num_4x4_w; i += 4) 723 t_above[i] = !!*(const uint32_t *)&above[i]; 724 for (i = 0; i < num_4x4_h; i += 4) 725 t_left[i] = !!*(const uint32_t *)&left[i]; 726 break; 727 case TX_32X32: 728 for (i = 0; i < num_4x4_w; i += 8) 729 t_above[i] = !!*(const uint64_t *)&above[i]; 730 for (i = 0; i < num_4x4_h; i += 8) 731 t_left[i] = !!*(const uint64_t *)&left[i]; 732 break; 733 default: 734 assert(0 && "Invalid transform size."); 735 } 736 } 737 738 static void txfm_rd_in_plane(MACROBLOCK *x, 739 int *rate, int64_t *distortion, 740 int *skippable, int64_t *sse, 741 int64_t ref_best_rd, int plane, 742 BLOCK_SIZE bsize, TX_SIZE tx_size, 743 int use_fast_coef_casting) { 744 MACROBLOCKD *const xd = &x->e_mbd; 745 const struct macroblockd_plane *const pd = &xd->plane[plane]; 746 struct rdcost_block_args args = { 0 }; 747 args.x = x; 748 args.best_rd = ref_best_rd; 749 args.use_fast_coef_costing = use_fast_coef_casting; 750 751 if (plane == 0) 752 xd->mi[0]->mbmi.tx_size = tx_size; 753 754 vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); 755 756 args.so = get_scan(xd, tx_size, pd->plane_type, 0); 757 758 vp9_foreach_transformed_block_in_plane(xd, bsize, plane, 759 block_rd_txfm, &args); 760 if (args.skip) { 761 *rate = INT_MAX; 762 *distortion = INT64_MAX; 763 *sse = INT64_MAX; 764 *skippable = 0; 765 } else { 766 *distortion = args.this_dist; 767 *rate = args.this_rate; 768 *sse = args.this_sse; 769 *skippable = vp9_is_skippable_in_plane(x, bsize, plane); 770 } 771 } 772 773 static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x, 774 int *rate, int64_t *distortion, 775 int *skip, int64_t *sse, 776 int64_t ref_best_rd, 777 BLOCK_SIZE bs) { 778 const TX_SIZE max_tx_size = max_txsize_lookup[bs]; 779 VP9_COMMON *const cm = &cpi->common; 780 const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; 781 MACROBLOCKD *const xd = &x->e_mbd; 782 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; 783 784 mbmi->tx_size = MIN(max_tx_size, largest_tx_size); 785 786 txfm_rd_in_plane(x, rate, distortion, skip, 787 &sse[mbmi->tx_size], ref_best_rd, 0, bs, 788 mbmi->tx_size, cpi->sf.use_fast_coef_costing); 789 cpi->tx_stepdown_count[0]++; 790 } 791 792 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, 793 int (*r)[2], int *rate, 794 int64_t *d, int64_t *distortion, 795 int *s, int *skip, 796 int64_t tx_cache[TX_MODES], 797 BLOCK_SIZE bs) { 798 const TX_SIZE max_tx_size = max_txsize_lookup[bs]; 799 VP9_COMMON *const cm = &cpi->common; 800 MACROBLOCKD *const xd = &x->e_mbd; 801 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; 802 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd); 803 int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX}, 804 {INT64_MAX, INT64_MAX}, 805 {INT64_MAX, INT64_MAX}, 806 {INT64_MAX, INT64_MAX}}; 807 int n, m; 808 int s0, s1; 809 const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; 810 int64_t best_rd = INT64_MAX; 811 TX_SIZE best_tx = TX_4X4; 812 813 const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs); 814 assert(skip_prob > 0); 815 s0 = vp9_cost_bit(skip_prob, 0); 816 s1 = vp9_cost_bit(skip_prob, 1); 817 818 for (n = TX_4X4; n <= max_tx_size; n++) { 819 r[n][1] = r[n][0]; 820 if (r[n][0] < INT_MAX) { 821 for (m = 0; m <= n - (n == max_tx_size); m++) { 822 if (m == n) 823 r[n][1] += vp9_cost_zero(tx_probs[m]); 824 else 825 r[n][1] += vp9_cost_one(tx_probs[m]); 826 } 827 } 828 if (d[n] == INT64_MAX) { 829 rd[n][0] = rd[n][1] = INT64_MAX; 830 } else if (s[n]) { 831 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]); 832 } else { 833 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); 834 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); 835 } 836 837 if (rd[n][1] < best_rd) { 838 best_tx = n; 839 best_rd = rd[n][1]; 840 } 841 } 842 mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ? 843 best_tx : MIN(max_tx_size, max_mode_tx_size); 844 845 846 *distortion = d[mbmi->tx_size]; 847 *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT]; 848 *skip = s[mbmi->tx_size]; 849 850 tx_cache[ONLY_4X4] = rd[TX_4X4][0]; 851 tx_cache[ALLOW_8X8] = rd[TX_8X8][0]; 852 tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0]; 853 tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0]; 854 855 if (max_tx_size == TX_32X32 && best_tx == TX_32X32) { 856 tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1]; 857 cpi->tx_stepdown_count[0]++; 858 } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) { 859 tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1]; 860 cpi->tx_stepdown_count[max_tx_size - TX_16X16]++; 861 } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) { 862 tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1]; 863 cpi->tx_stepdown_count[max_tx_size - TX_8X8]++; 864 } else { 865 tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1]; 866 cpi->tx_stepdown_count[max_tx_size - TX_4X4]++; 867 } 868 } 869 870 static int64_t scaled_rd_cost(int rdmult, int rddiv, 871 int rate, int64_t dist, double scale) { 872 return (int64_t) (RDCOST(rdmult, rddiv, rate, dist) * scale); 873 } 874 875 static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, 876 int (*r)[2], int *rate, 877 int64_t *d, int64_t *distortion, 878 int *s, int *skip, int64_t *sse, 879 int64_t ref_best_rd, 880 BLOCK_SIZE bs) { 881 const TX_SIZE max_tx_size = max_txsize_lookup[bs]; 882 VP9_COMMON *const cm = &cpi->common; 883 MACROBLOCKD *const xd = &x->e_mbd; 884 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; 885 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd); 886 int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX}, 887 {INT64_MAX, INT64_MAX}, 888 {INT64_MAX, INT64_MAX}, 889 {INT64_MAX, INT64_MAX}}; 890 int n, m; 891 int s0, s1; 892 double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00}; 893 const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; 894 int64_t best_rd = INT64_MAX; 895 TX_SIZE best_tx = TX_4X4; 896 897 const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs); 898 assert(skip_prob > 0); 899 s0 = vp9_cost_bit(skip_prob, 0); 900 s1 = vp9_cost_bit(skip_prob, 1); 901 902 for (n = TX_4X4; n <= max_tx_size; n++) { 903 double scale = scale_rd[n]; 904 r[n][1] = r[n][0]; 905 for (m = 0; m <= n - (n == max_tx_size); m++) { 906 if (m == n) 907 r[n][1] += vp9_cost_zero(tx_probs[m]); 908 else 909 r[n][1] += vp9_cost_one(tx_probs[m]); 910 } 911 if (s[n]) { 912 rd[n][0] = rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, s1, d[n], 913 scale); 914 } else { 915 rd[n][0] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][0] + s0, d[n], 916 scale); 917 rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][1] + s0, d[n], 918 scale); 919 } 920 if (rd[n][1] < best_rd) { 921 best_rd = rd[n][1]; 922 best_tx = n; 923 } 924 } 925 926 mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ? 927 best_tx : MIN(max_tx_size, max_mode_tx_size); 928 929 // Actually encode using the chosen mode if a model was used, but do not 930 // update the r, d costs 931 txfm_rd_in_plane(x, rate, distortion, skip, 932 &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size, 933 cpi->sf.use_fast_coef_costing); 934 935 if (max_tx_size == TX_32X32 && best_tx == TX_32X32) { 936 cpi->tx_stepdown_count[0]++; 937 } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) { 938 cpi->tx_stepdown_count[max_tx_size - TX_16X16]++; 939 } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) { 940 cpi->tx_stepdown_count[max_tx_size - TX_8X8]++; 941 } else { 942 cpi->tx_stepdown_count[max_tx_size - TX_4X4]++; 943 } 944 } 945 946 static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, 947 int64_t *distortion, int *skip, 948 int64_t *psse, BLOCK_SIZE bs, 949 int64_t txfm_cache[TX_MODES], 950 int64_t ref_best_rd) { 951 int r[TX_SIZES][2], s[TX_SIZES]; 952 int64_t d[TX_SIZES], sse[TX_SIZES]; 953 MACROBLOCKD *xd = &x->e_mbd; 954 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; 955 const TX_SIZE max_tx_size = max_txsize_lookup[bs]; 956 TX_SIZE tx_size; 957 958 assert(bs == mbmi->sb_type); 959 960 vp9_subtract_plane(x, bs, 0); 961 962 if (cpi->sf.tx_size_search_method == USE_LARGESTALL) { 963 vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t)); 964 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse, 965 ref_best_rd, bs); 966 if (psse) 967 *psse = sse[mbmi->tx_size]; 968 return; 969 } 970 971 if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER) { 972 for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) 973 model_rd_for_sb_y_tx(cpi, bs, tx_size, x, xd, 974 &r[tx_size][0], &d[tx_size], &s[tx_size]); 975 choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s, 976 skip, sse, ref_best_rd, bs); 977 } else { 978 for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) 979 txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size], 980 &s[tx_size], &sse[tx_size], 981 ref_best_rd, 0, bs, tx_size, 982 cpi->sf.use_fast_coef_costing); 983 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, 984 skip, txfm_cache, bs); 985 } 986 if (psse) 987 *psse = sse[mbmi->tx_size]; 988 } 989 990 static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, 991 int64_t *distortion, int *skip, 992 int64_t *psse, BLOCK_SIZE bs, 993 int64_t txfm_cache[TX_MODES], 994 int64_t ref_best_rd) { 995 int64_t sse[TX_SIZES]; 996 MACROBLOCKD *xd = &x->e_mbd; 997 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; 998 999 assert(bs == mbmi->sb_type); 1000 if (cpi->sf.tx_size_search_method != USE_FULL_RD) { 1001 vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t)); 1002 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse, 1003 ref_best_rd, bs); 1004 } else { 1005 int r[TX_SIZES][2], s[TX_SIZES]; 1006 int64_t d[TX_SIZES]; 1007 TX_SIZE tx_size; 1008 for (tx_size = TX_4X4; tx_size <= max_txsize_lookup[bs]; ++tx_size) 1009 txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size], 1010 &s[tx_size], &sse[tx_size], 1011 ref_best_rd, 0, bs, tx_size, 1012 cpi->sf.use_fast_coef_costing); 1013 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, 1014 skip, txfm_cache, bs); 1015 } 1016 if (psse) 1017 *psse = sse[mbmi->tx_size]; 1018 } 1019 1020 1021 static int conditional_skipintra(MB_PREDICTION_MODE mode, 1022 MB_PREDICTION_MODE best_intra_mode) { 1023 if (mode == D117_PRED && 1024 best_intra_mode != V_PRED && 1025 best_intra_mode != D135_PRED) 1026 return 1; 1027 if (mode == D63_PRED && 1028 best_intra_mode != V_PRED && 1029 best_intra_mode != D45_PRED) 1030 return 1; 1031 if (mode == D207_PRED && 1032 best_intra_mode != H_PRED && 1033 best_intra_mode != D45_PRED) 1034 return 1; 1035 if (mode == D153_PRED && 1036 best_intra_mode != H_PRED && 1037 best_intra_mode != D135_PRED) 1038 return 1; 1039 return 0; 1040 } 1041 1042 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, 1043 MB_PREDICTION_MODE *best_mode, 1044 const int *bmode_costs, 1045 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, 1046 int *bestrate, int *bestratey, 1047 int64_t *bestdistortion, 1048 BLOCK_SIZE bsize, int64_t rd_thresh) { 1049 MB_PREDICTION_MODE mode; 1050 MACROBLOCKD *const xd = &x->e_mbd; 1051 int64_t best_rd = rd_thresh; 1052 1053 struct macroblock_plane *p = &x->plane[0]; 1054 struct macroblockd_plane *pd = &xd->plane[0]; 1055 const int src_stride = p->src.stride; 1056 const int dst_stride = pd->dst.stride; 1057 const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib, 1058 src_stride)]; 1059 uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib, 1060 dst_stride)]; 1061 ENTROPY_CONTEXT ta[2], tempa[2]; 1062 ENTROPY_CONTEXT tl[2], templ[2]; 1063 1064 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 1065 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 1066 int idx, idy; 1067 uint8_t best_dst[8 * 8]; 1068 1069 assert(ib < 4); 1070 1071 vpx_memcpy(ta, a, sizeof(ta)); 1072 vpx_memcpy(tl, l, sizeof(tl)); 1073 xd->mi[0]->mbmi.tx_size = TX_4X4; 1074 1075 for (mode = DC_PRED; mode <= TM_PRED; ++mode) { 1076 int64_t this_rd; 1077 int ratey = 0; 1078 int64_t distortion = 0; 1079 int rate = bmode_costs[mode]; 1080 1081 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) 1082 continue; 1083 1084 // Only do the oblique modes if the best so far is 1085 // one of the neighboring directional modes 1086 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { 1087 if (conditional_skipintra(mode, *best_mode)) 1088 continue; 1089 } 1090 1091 vpx_memcpy(tempa, ta, sizeof(ta)); 1092 vpx_memcpy(templ, tl, sizeof(tl)); 1093 1094 for (idy = 0; idy < num_4x4_blocks_high; ++idy) { 1095 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { 1096 const int block = ib + idy * 2 + idx; 1097 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride]; 1098 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride]; 1099 int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block, 1100 p->src_diff); 1101 int16_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block); 1102 xd->mi[0]->bmi[block].as_mode = mode; 1103 vp9_predict_intra_block(xd, block, 1, 1104 TX_4X4, mode, 1105 x->skip_encode ? src : dst, 1106 x->skip_encode ? src_stride : dst_stride, 1107 dst, dst_stride, idx, idy, 0); 1108 vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride); 1109 1110 if (xd->lossless) { 1111 const scan_order *so = &vp9_default_scan_orders[TX_4X4]; 1112 vp9_fwht4x4(src_diff, coeff, 8); 1113 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); 1114 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, 1115 so->scan, so->neighbors, 1116 cpi->sf.use_fast_coef_costing); 1117 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) 1118 goto next; 1119 vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride, 1120 p->eobs[block]); 1121 } else { 1122 int64_t unused; 1123 const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block); 1124 const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type]; 1125 vp9_fht4x4(src_diff, coeff, 8, tx_type); 1126 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); 1127 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, 1128 so->scan, so->neighbors, 1129 cpi->sf.use_fast_coef_costing); 1130 distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block), 1131 16, &unused) >> 2; 1132 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) 1133 goto next; 1134 vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block), 1135 dst, dst_stride, p->eobs[block]); 1136 } 1137 } 1138 } 1139 1140 rate += ratey; 1141 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); 1142 1143 if (this_rd < best_rd) { 1144 *bestrate = rate; 1145 *bestratey = ratey; 1146 *bestdistortion = distortion; 1147 best_rd = this_rd; 1148 *best_mode = mode; 1149 vpx_memcpy(a, tempa, sizeof(tempa)); 1150 vpx_memcpy(l, templ, sizeof(templ)); 1151 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) 1152 vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride, 1153 num_4x4_blocks_wide * 4); 1154 } 1155 next: 1156 {} 1157 } 1158 1159 if (best_rd >= rd_thresh || x->skip_encode) 1160 return best_rd; 1161 1162 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) 1163 vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8, 1164 num_4x4_blocks_wide * 4); 1165 1166 return best_rd; 1167 } 1168 1169 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb, 1170 int *rate, int *rate_y, 1171 int64_t *distortion, 1172 int64_t best_rd) { 1173 int i, j; 1174 const MACROBLOCKD *const xd = &mb->e_mbd; 1175 MODE_INFO *const mic = xd->mi[0]; 1176 const MODE_INFO *above_mi = xd->mi[-xd->mi_stride]; 1177 const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL; 1178 const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; 1179 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 1180 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 1181 int idx, idy; 1182 int cost = 0; 1183 int64_t total_distortion = 0; 1184 int tot_rate_y = 0; 1185 int64_t total_rd = 0; 1186 ENTROPY_CONTEXT t_above[4], t_left[4]; 1187 const int *bmode_costs = mb->mbmode_cost; 1188 1189 vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); 1190 vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left)); 1191 1192 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block. 1193 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { 1194 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { 1195 MB_PREDICTION_MODE best_mode = DC_PRED; 1196 int r = INT_MAX, ry = INT_MAX; 1197 int64_t d = INT64_MAX, this_rd = INT64_MAX; 1198 i = idy * 2 + idx; 1199 if (cpi->common.frame_type == KEY_FRAME) { 1200 const MB_PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i); 1201 const MB_PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i); 1202 1203 bmode_costs = mb->y_mode_costs[A][L]; 1204 } 1205 1206 this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs, 1207 t_above + idx, t_left + idy, &r, &ry, &d, 1208 bsize, best_rd - total_rd); 1209 if (this_rd >= best_rd - total_rd) 1210 return INT64_MAX; 1211 1212 total_rd += this_rd; 1213 cost += r; 1214 total_distortion += d; 1215 tot_rate_y += ry; 1216 1217 mic->bmi[i].as_mode = best_mode; 1218 for (j = 1; j < num_4x4_blocks_high; ++j) 1219 mic->bmi[i + j * 2].as_mode = best_mode; 1220 for (j = 1; j < num_4x4_blocks_wide; ++j) 1221 mic->bmi[i + j].as_mode = best_mode; 1222 1223 if (total_rd >= best_rd) 1224 return INT64_MAX; 1225 } 1226 } 1227 1228 *rate = cost; 1229 *rate_y = tot_rate_y; 1230 *distortion = total_distortion; 1231 mic->mbmi.mode = mic->bmi[3].as_mode; 1232 1233 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion); 1234 } 1235 1236 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, 1237 int *rate, int *rate_tokenonly, 1238 int64_t *distortion, int *skippable, 1239 BLOCK_SIZE bsize, 1240 int64_t tx_cache[TX_MODES], 1241 int64_t best_rd) { 1242 MB_PREDICTION_MODE mode; 1243 MB_PREDICTION_MODE mode_selected = DC_PRED; 1244 MACROBLOCKD *const xd = &x->e_mbd; 1245 MODE_INFO *const mic = xd->mi[0]; 1246 int this_rate, this_rate_tokenonly, s; 1247 int64_t this_distortion, this_rd; 1248 TX_SIZE best_tx = TX_4X4; 1249 int i; 1250 int *bmode_costs = x->mbmode_cost; 1251 1252 if (cpi->sf.tx_size_search_method == USE_FULL_RD) 1253 for (i = 0; i < TX_MODES; i++) 1254 tx_cache[i] = INT64_MAX; 1255 1256 /* Y Search for intra prediction mode */ 1257 for (mode = DC_PRED; mode <= TM_PRED; mode++) { 1258 int64_t local_tx_cache[TX_MODES]; 1259 MODE_INFO *above_mi = xd->mi[-xd->mi_stride]; 1260 MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL; 1261 1262 if (!(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]] & (1 << mode))) 1263 continue; 1264 1265 if (cpi->common.frame_type == KEY_FRAME) { 1266 const MB_PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0); 1267 const MB_PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0); 1268 1269 bmode_costs = x->y_mode_costs[A][L]; 1270 } 1271 mic->mbmi.mode = mode; 1272 1273 intra_super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, 1274 &s, NULL, bsize, local_tx_cache, best_rd); 1275 1276 if (this_rate_tokenonly == INT_MAX) 1277 continue; 1278 1279 this_rate = this_rate_tokenonly + bmode_costs[mode]; 1280 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); 1281 1282 if (this_rd < best_rd) { 1283 mode_selected = mode; 1284 best_rd = this_rd; 1285 best_tx = mic->mbmi.tx_size; 1286 *rate = this_rate; 1287 *rate_tokenonly = this_rate_tokenonly; 1288 *distortion = this_distortion; 1289 *skippable = s; 1290 } 1291 1292 if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) { 1293 for (i = 0; i < TX_MODES && local_tx_cache[i] < INT64_MAX; i++) { 1294 const int64_t adj_rd = this_rd + local_tx_cache[i] - 1295 local_tx_cache[cpi->common.tx_mode]; 1296 if (adj_rd < tx_cache[i]) { 1297 tx_cache[i] = adj_rd; 1298 } 1299 } 1300 } 1301 } 1302 1303 mic->mbmi.mode = mode_selected; 1304 mic->mbmi.tx_size = best_tx; 1305 1306 return best_rd; 1307 } 1308 1309 static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, 1310 int *rate, int64_t *distortion, int *skippable, 1311 int64_t *sse, BLOCK_SIZE bsize, 1312 int64_t ref_best_rd) { 1313 MACROBLOCKD *const xd = &x->e_mbd; 1314 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; 1315 TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi); 1316 int plane; 1317 int pnrate = 0, pnskip = 1; 1318 int64_t pndist = 0, pnsse = 0; 1319 1320 if (ref_best_rd < 0) 1321 goto term; 1322 1323 if (is_inter_block(mbmi)) { 1324 int plane; 1325 for (plane = 1; plane < MAX_MB_PLANE; ++plane) 1326 vp9_subtract_plane(x, bsize, plane); 1327 } 1328 1329 *rate = 0; 1330 *distortion = 0; 1331 *sse = 0; 1332 *skippable = 1; 1333 1334 for (plane = 1; plane < MAX_MB_PLANE; ++plane) { 1335 txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse, 1336 ref_best_rd, plane, bsize, uv_txfm_size, 1337 cpi->sf.use_fast_coef_costing); 1338 if (pnrate == INT_MAX) 1339 goto term; 1340 *rate += pnrate; 1341 *distortion += pndist; 1342 *sse += pnsse; 1343 *skippable &= pnskip; 1344 } 1345 return; 1346 1347 term: 1348 *rate = INT_MAX; 1349 *distortion = INT64_MAX; 1350 *sse = INT64_MAX; 1351 *skippable = 0; 1352 return; 1353 } 1354 1355 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, 1356 PICK_MODE_CONTEXT *ctx, 1357 int *rate, int *rate_tokenonly, 1358 int64_t *distortion, int *skippable, 1359 BLOCK_SIZE bsize, TX_SIZE max_tx_size) { 1360 MACROBLOCKD *xd = &x->e_mbd; 1361 MB_PREDICTION_MODE mode; 1362 MB_PREDICTION_MODE mode_selected = DC_PRED; 1363 int64_t best_rd = INT64_MAX, this_rd; 1364 int this_rate_tokenonly, this_rate, s; 1365 int64_t this_distortion, this_sse; 1366 1367 for (mode = DC_PRED; mode <= TM_PRED; ++mode) { 1368 if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) 1369 continue; 1370 1371 xd->mi[0]->mbmi.uv_mode = mode; 1372 1373 super_block_uvrd(cpi, x, &this_rate_tokenonly, 1374 &this_distortion, &s, &this_sse, bsize, best_rd); 1375 if (this_rate_tokenonly == INT_MAX) 1376 continue; 1377 this_rate = this_rate_tokenonly + 1378 x->intra_uv_mode_cost[cpi->common.frame_type][mode]; 1379 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); 1380 1381 if (this_rd < best_rd) { 1382 mode_selected = mode; 1383 best_rd = this_rd; 1384 *rate = this_rate; 1385 *rate_tokenonly = this_rate_tokenonly; 1386 *distortion = this_distortion; 1387 *skippable = s; 1388 if (!x->select_txfm_size) { 1389 int i; 1390 struct macroblock_plane *const p = x->plane; 1391 struct macroblockd_plane *const pd = xd->plane; 1392 for (i = 1; i < MAX_MB_PLANE; ++i) { 1393 p[i].coeff = ctx->coeff_pbuf[i][2]; 1394 p[i].qcoeff = ctx->qcoeff_pbuf[i][2]; 1395 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2]; 1396 p[i].eobs = ctx->eobs_pbuf[i][2]; 1397 1398 ctx->coeff_pbuf[i][2] = ctx->coeff_pbuf[i][0]; 1399 ctx->qcoeff_pbuf[i][2] = ctx->qcoeff_pbuf[i][0]; 1400 ctx->dqcoeff_pbuf[i][2] = ctx->dqcoeff_pbuf[i][0]; 1401 ctx->eobs_pbuf[i][2] = ctx->eobs_pbuf[i][0]; 1402 1403 ctx->coeff_pbuf[i][0] = p[i].coeff; 1404 ctx->qcoeff_pbuf[i][0] = p[i].qcoeff; 1405 ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff; 1406 ctx->eobs_pbuf[i][0] = p[i].eobs; 1407 } 1408 } 1409 } 1410 } 1411 1412 xd->mi[0]->mbmi.uv_mode = mode_selected; 1413 return best_rd; 1414 } 1415 1416 static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x, 1417 int *rate, int *rate_tokenonly, 1418 int64_t *distortion, int *skippable, 1419 BLOCK_SIZE bsize) { 1420 const VP9_COMMON *cm = &cpi->common; 1421 int64_t unused; 1422 1423 x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED; 1424 super_block_uvrd(cpi, x, rate_tokenonly, distortion, 1425 skippable, &unused, bsize, INT64_MAX); 1426 *rate = *rate_tokenonly + x->intra_uv_mode_cost[cm->frame_type][DC_PRED]; 1427 return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); 1428 } 1429 1430 static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, 1431 BLOCK_SIZE bsize, TX_SIZE max_tx_size, 1432 int *rate_uv, int *rate_uv_tokenonly, 1433 int64_t *dist_uv, int *skip_uv, 1434 MB_PREDICTION_MODE *mode_uv) { 1435 MACROBLOCK *const x = &cpi->mb; 1436 1437 // Use an estimated rd for uv_intra based on DC_PRED if the 1438 // appropriate speed flag is set. 1439 if (cpi->sf.use_uv_intra_rd_estimate) { 1440 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, 1441 skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize); 1442 // Else do a proper rd search for each possible transform size that may 1443 // be considered in the main rd loop. 1444 } else { 1445 rd_pick_intra_sbuv_mode(cpi, x, ctx, 1446 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, 1447 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size); 1448 } 1449 *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode; 1450 } 1451 1452 static int cost_mv_ref(const VP9_COMP *cpi, MB_PREDICTION_MODE mode, 1453 int mode_context) { 1454 const MACROBLOCK *const x = &cpi->mb; 1455 const int segment_id = x->e_mbd.mi[0]->mbmi.segment_id; 1456 1457 // Don't account for mode here if segment skip is enabled. 1458 if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) { 1459 assert(is_inter_mode(mode)); 1460 return x->inter_mode_cost[mode_context][INTER_OFFSET(mode)]; 1461 } else { 1462 return 0; 1463 } 1464 } 1465 1466 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, 1467 BLOCK_SIZE bsize, 1468 int_mv *frame_mv, 1469 int mi_row, int mi_col, 1470 int_mv single_newmv[MAX_REF_FRAMES], 1471 int *rate_mv); 1472 1473 static int labels2mode(VP9_COMP *cpi, MACROBLOCKD *xd, int i, 1474 MB_PREDICTION_MODE mode, 1475 int_mv this_mv[2], 1476 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], 1477 int_mv seg_mvs[MAX_REF_FRAMES], 1478 int_mv *best_ref_mv[2], 1479 const int *mvjcost, int *mvcost[2]) { 1480 MODE_INFO *const mic = xd->mi[0]; 1481 const MB_MODE_INFO *const mbmi = &mic->mbmi; 1482 int thismvcost = 0; 1483 int idx, idy; 1484 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type]; 1485 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; 1486 const int is_compound = has_second_ref(mbmi); 1487 1488 // the only time we should do costing for new motion vector or mode 1489 // is when we are on a new label (jbb May 08, 2007) 1490 switch (mode) { 1491 case NEWMV: 1492 this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int; 1493 thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv, 1494 mvjcost, mvcost, MV_COST_WEIGHT_SUB); 1495 if (is_compound) { 1496 this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int; 1497 thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv, 1498 mvjcost, mvcost, MV_COST_WEIGHT_SUB); 1499 } 1500 break; 1501 case NEARESTMV: 1502 this_mv[0].as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int; 1503 if (is_compound) 1504 this_mv[1].as_int = frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int; 1505 break; 1506 case NEARMV: 1507 this_mv[0].as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int; 1508 if (is_compound) 1509 this_mv[1].as_int = frame_mv[NEARMV][mbmi->ref_frame[1]].as_int; 1510 break; 1511 case ZEROMV: 1512 this_mv[0].as_int = 0; 1513 if (is_compound) 1514 this_mv[1].as_int = 0; 1515 break; 1516 default: 1517 break; 1518 } 1519 1520 mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int; 1521 if (is_compound) 1522 mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int; 1523 1524 mic->bmi[i].as_mode = mode; 1525 1526 for (idy = 0; idy < num_4x4_blocks_high; ++idy) 1527 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) 1528 vpx_memcpy(&mic->bmi[i + idy * 2 + idx], 1529 &mic->bmi[i], sizeof(mic->bmi[i])); 1530 1531 return cost_mv_ref(cpi, mode, mbmi->mode_context[mbmi->ref_frame[0]]) + 1532 thismvcost; 1533 } 1534 1535 static int64_t encode_inter_mb_segment(VP9_COMP *cpi, 1536 MACROBLOCK *x, 1537 int64_t best_yrd, 1538 int i, 1539 int *labelyrate, 1540 int64_t *distortion, int64_t *sse, 1541 ENTROPY_CONTEXT *ta, 1542 ENTROPY_CONTEXT *tl, 1543 int mi_row, int mi_col) { 1544 int k; 1545 MACROBLOCKD *xd = &x->e_mbd; 1546 struct macroblockd_plane *const pd = &xd->plane[0]; 1547 struct macroblock_plane *const p = &x->plane[0]; 1548 MODE_INFO *const mi = xd->mi[0]; 1549 const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd); 1550 const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 1551 const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize]; 1552 int idx, idy; 1553 1554 const uint8_t *const src = &p->src.buf[raster_block_offset(BLOCK_8X8, i, 1555 p->src.stride)]; 1556 uint8_t *const dst = &pd->dst.buf[raster_block_offset(BLOCK_8X8, i, 1557 pd->dst.stride)]; 1558 int64_t thisdistortion = 0, thissse = 0; 1559 int thisrate = 0, ref; 1560 const scan_order *so = &vp9_default_scan_orders[TX_4X4]; 1561 const int is_compound = has_second_ref(&mi->mbmi); 1562 const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter); 1563 1564 for (ref = 0; ref < 1 + is_compound; ++ref) { 1565 const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i, 1566 pd->pre[ref].stride)]; 1567 vp9_build_inter_predictor(pre, pd->pre[ref].stride, 1568 dst, pd->dst.stride, 1569 &mi->bmi[i].as_mv[ref].as_mv, 1570 &xd->block_refs[ref]->sf, width, height, ref, 1571 kernel, MV_PRECISION_Q3, 1572 mi_col * MI_SIZE + 4 * (i % 2), 1573 mi_row * MI_SIZE + 4 * (i / 2)); 1574 } 1575 1576 vp9_subtract_block(height, width, 1577 raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8, 1578 src, p->src.stride, 1579 dst, pd->dst.stride); 1580 1581 k = i; 1582 for (idy = 0; idy < height / 4; ++idy) { 1583 for (idx = 0; idx < width / 4; ++idx) { 1584 int64_t ssz, rd, rd1, rd2; 1585 int16_t* coeff; 1586 1587 k += (idy * 2 + idx); 1588 coeff = BLOCK_OFFSET(p->coeff, k); 1589 x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff), 1590 coeff, 8); 1591 vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan); 1592 thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k), 1593 16, &ssz); 1594 thissse += ssz; 1595 thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4, 1596 so->scan, so->neighbors, 1597 cpi->sf.use_fast_coef_costing); 1598 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2); 1599 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2); 1600 rd = MIN(rd1, rd2); 1601 if (rd >= best_yrd) 1602 return INT64_MAX; 1603 } 1604 } 1605 1606 *distortion = thisdistortion >> 2; 1607 *labelyrate = thisrate; 1608 *sse = thissse >> 2; 1609 1610 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); 1611 } 1612 1613 typedef struct { 1614 int eobs; 1615 int brate; 1616 int byrate; 1617 int64_t bdist; 1618 int64_t bsse; 1619 int64_t brdcost; 1620 int_mv mvs[2]; 1621 ENTROPY_CONTEXT ta[2]; 1622 ENTROPY_CONTEXT tl[2]; 1623 } SEG_RDSTAT; 1624 1625 typedef struct { 1626 int_mv *ref_mv[2]; 1627 int_mv mvp; 1628 1629 int64_t segment_rd; 1630 int r; 1631 int64_t d; 1632 int64_t sse; 1633 int segment_yrate; 1634 MB_PREDICTION_MODE modes[4]; 1635 SEG_RDSTAT rdstat[4][INTER_MODES]; 1636 int mvthresh; 1637 } BEST_SEG_INFO; 1638 1639 static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) { 1640 return (mv->row >> 3) < x->mv_row_min || 1641 (mv->row >> 3) > x->mv_row_max || 1642 (mv->col >> 3) < x->mv_col_min || 1643 (mv->col >> 3) > x->mv_col_max; 1644 } 1645 1646 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { 1647 MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi; 1648 struct macroblock_plane *const p = &x->plane[0]; 1649 struct macroblockd_plane *const pd = &x->e_mbd.plane[0]; 1650 1651 p->src.buf = &p->src.buf[raster_block_offset(BLOCK_8X8, i, p->src.stride)]; 1652 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0); 1653 pd->pre[0].buf = &pd->pre[0].buf[raster_block_offset(BLOCK_8X8, i, 1654 pd->pre[0].stride)]; 1655 if (has_second_ref(mbmi)) 1656 pd->pre[1].buf = &pd->pre[1].buf[raster_block_offset(BLOCK_8X8, i, 1657 pd->pre[1].stride)]; 1658 } 1659 1660 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src, 1661 struct buf_2d orig_pre[2]) { 1662 MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi; 1663 x->plane[0].src = orig_src; 1664 x->e_mbd.plane[0].pre[0] = orig_pre[0]; 1665 if (has_second_ref(mbmi)) 1666 x->e_mbd.plane[0].pre[1] = orig_pre[1]; 1667 } 1668 1669 static INLINE int mv_has_subpel(const MV *mv) { 1670 return (mv->row & 0x0F) || (mv->col & 0x0F); 1671 } 1672 1673 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion. 1674 // TODO(aconverse): Find out if this is still productive then clean up or remove 1675 static int check_best_zero_mv( 1676 const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES], 1677 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], 1678 int disable_inter_mode_mask, int this_mode, int ref_frame, 1679 int second_ref_frame) { 1680 if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) && 1681 (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) && 1682 frame_mv[this_mode][ref_frame].as_int == 0 && 1683 (second_ref_frame == NONE || 1684 frame_mv[this_mode][second_ref_frame].as_int == 0)) { 1685 int rfc = mode_context[ref_frame]; 1686 int c1 = cost_mv_ref(cpi, NEARMV, rfc); 1687 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); 1688 int c3 = cost_mv_ref(cpi, ZEROMV, rfc); 1689 1690 if (this_mode == NEARMV) { 1691 if (c1 > c3) return 0; 1692 } else if (this_mode == NEARESTMV) { 1693 if (c2 > c3) return 0; 1694 } else { 1695 assert(this_mode == ZEROMV); 1696 if (second_ref_frame == NONE) { 1697 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0) || 1698 (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0)) 1699 return 0; 1700 } else { 1701 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0 && 1702 frame_mv[NEARESTMV][second_ref_frame].as_int == 0) || 1703 (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0 && 1704 frame_mv[NEARMV][second_ref_frame].as_int == 0)) 1705 return 0; 1706 } 1707 } 1708 } 1709 return 1; 1710 } 1711 1712 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, 1713 const TileInfo *const tile, 1714 BEST_SEG_INFO *bsi_buf, int filter_idx, 1715 int_mv seg_mvs[4][MAX_REF_FRAMES], 1716 int mi_row, int mi_col) { 1717 int k, br = 0, idx, idy; 1718 int64_t bd = 0, block_sse = 0; 1719 MB_PREDICTION_MODE this_mode; 1720 MACROBLOCKD *xd = &x->e_mbd; 1721 VP9_COMMON *cm = &cpi->common; 1722 MODE_INFO *mi = xd->mi[0]; 1723 MB_MODE_INFO *const mbmi = &mi->mbmi; 1724 struct macroblock_plane *const p = &x->plane[0]; 1725 struct macroblockd_plane *const pd = &xd->plane[0]; 1726 const int label_count = 4; 1727 int64_t this_segment_rd = 0; 1728 int label_mv_thresh; 1729 int segmentyrate = 0; 1730 const BLOCK_SIZE bsize = mbmi->sb_type; 1731 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 1732 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 1733 vp9_variance_fn_ptr_t *v_fn_ptr = &cpi->fn_ptr[bsize]; 1734 ENTROPY_CONTEXT t_above[2], t_left[2]; 1735 BEST_SEG_INFO *bsi = bsi_buf + filter_idx; 1736 int mode_idx; 1737 int subpelmv = 1, have_ref = 0; 1738 const int has_second_rf = has_second_ref(mbmi); 1739 const int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize]; 1740 1741 vpx_memcpy(t_above, pd->above_context, sizeof(t_above)); 1742 vpx_memcpy(t_left, pd->left_context, sizeof(t_left)); 1743 1744 // 64 makes this threshold really big effectively 1745 // making it so that we very rarely check mvs on 1746 // segments. setting this to 1 would make mv thresh 1747 // roughly equal to what it is for macroblocks 1748 label_mv_thresh = 1 * bsi->mvthresh / label_count; 1749 1750 // Segmentation method overheads 1751 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { 1752 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { 1753 // TODO(jingning,rbultje): rewrite the rate-distortion optimization 1754 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop 1755 int_mv mode_mv[MB_MODE_COUNT][2]; 1756 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; 1757 MB_PREDICTION_MODE mode_selected = ZEROMV; 1758 int64_t best_rd = INT64_MAX; 1759 const int i = idy * 2 + idx; 1760 int ref; 1761 1762 for (ref = 0; ref < 1 + has_second_rf; ++ref) { 1763 const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref]; 1764 frame_mv[ZEROMV][frame].as_int = 0; 1765 vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col, 1766 &frame_mv[NEARESTMV][frame], 1767 &frame_mv[NEARMV][frame]); 1768 } 1769 1770 // search for the best motion vector on this segment 1771 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { 1772 const struct buf_2d orig_src = x->plane[0].src; 1773 struct buf_2d orig_pre[2]; 1774 1775 mode_idx = INTER_OFFSET(this_mode); 1776 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX; 1777 if (disable_inter_mode_mask & (1 << mode_idx)) 1778 continue; 1779 1780 if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, 1781 disable_inter_mode_mask, 1782 this_mode, mbmi->ref_frame[0], 1783 mbmi->ref_frame[1])) 1784 continue; 1785 1786 vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre)); 1787 vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above, 1788 sizeof(bsi->rdstat[i][mode_idx].ta)); 1789 vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left, 1790 sizeof(bsi->rdstat[i][mode_idx].tl)); 1791 1792 // motion search for newmv (single predictor case only) 1793 if (!has_second_rf && this_mode == NEWMV && 1794 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) { 1795 int_mv *const new_mv = &mode_mv[NEWMV][0]; 1796 int step_param = 0; 1797 int further_steps; 1798 int thissme, bestsme = INT_MAX; 1799 int sadpb = x->sadperbit4; 1800 MV mvp_full; 1801 int max_mv; 1802 1803 /* Is the best so far sufficiently good that we cant justify doing 1804 * and new motion search. */ 1805 if (best_rd < label_mv_thresh) 1806 break; 1807 1808 if (cpi->oxcf.mode != MODE_SECONDPASS_BEST && 1809 cpi->oxcf.mode != MODE_BESTQUALITY) { 1810 // use previous block's result as next block's MV predictor. 1811 if (i > 0) { 1812 bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int; 1813 if (i == 2) 1814 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int; 1815 } 1816 } 1817 if (i == 0) 1818 max_mv = x->max_mv_context[mbmi->ref_frame[0]]; 1819 else 1820 max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3; 1821 1822 if (cpi->sf.auto_mv_step_size && cm->show_frame) { 1823 // Take wtd average of the step_params based on the last frame's 1824 // max mv magnitude and the best ref mvs of the current block for 1825 // the given reference. 1826 step_param = (vp9_init_search_range(cpi, max_mv) + 1827 cpi->mv_step_param) >> 1; 1828 } else { 1829 step_param = cpi->mv_step_param; 1830 } 1831 1832 mvp_full.row = bsi->mvp.as_mv.row >> 3; 1833 mvp_full.col = bsi->mvp.as_mv.col >> 3; 1834 1835 if (cpi->sf.adaptive_motion_search && cm->show_frame) { 1836 mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3; 1837 mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3; 1838 step_param = MAX(step_param, 8); 1839 } 1840 1841 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; 1842 // adjust src pointer for this block 1843 mi_buf_shift(x, i); 1844 1845 vp9_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv); 1846 1847 if (cpi->sf.search_method == HEX) { 1848 bestsme = vp9_hex_search(x, &mvp_full, 1849 step_param, 1850 sadpb, 1, v_fn_ptr, 1, 1851 &bsi->ref_mv[0]->as_mv, 1852 &new_mv->as_mv); 1853 if (bestsme < INT_MAX) 1854 bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv, 1855 &bsi->ref_mv[0]->as_mv, 1856 v_fn_ptr, 1); 1857 } else if (cpi->sf.search_method == SQUARE) { 1858 bestsme = vp9_square_search(x, &mvp_full, 1859 step_param, 1860 sadpb, 1, v_fn_ptr, 1, 1861 &bsi->ref_mv[0]->as_mv, 1862 &new_mv->as_mv); 1863 if (bestsme < INT_MAX) 1864 bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv, 1865 &bsi->ref_mv[0]->as_mv, 1866 v_fn_ptr, 1); 1867 } else if (cpi->sf.search_method == BIGDIA) { 1868 bestsme = vp9_bigdia_search(x, &mvp_full, 1869 step_param, 1870 sadpb, 1, v_fn_ptr, 1, 1871 &bsi->ref_mv[0]->as_mv, 1872 &new_mv->as_mv); 1873 if (bestsme < INT_MAX) 1874 bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv, 1875 &bsi->ref_mv[0]->as_mv, 1876 v_fn_ptr, 1); 1877 } else { 1878 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, 1879 sadpb, further_steps, 0, v_fn_ptr, 1880 &bsi->ref_mv[0]->as_mv, 1881 &new_mv->as_mv); 1882 } 1883 1884 // Should we do a full search (best quality only) 1885 if (cpi->oxcf.mode == MODE_BESTQUALITY || 1886 cpi->oxcf.mode == MODE_SECONDPASS_BEST) { 1887 int_mv *const best_mv = &mi->bmi[i].as_mv[0]; 1888 /* Check if mvp_full is within the range. */ 1889 clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, 1890 x->mv_row_min, x->mv_row_max); 1891 thissme = cpi->full_search_sad(x, &mvp_full, 1892 sadpb, 16, v_fn_ptr, 1893 x->nmvjointcost, x->mvcost, 1894 &bsi->ref_mv[0]->as_mv, 1895 &best_mv->as_mv); 1896 if (thissme < bestsme) { 1897 bestsme = thissme; 1898 new_mv->as_int = best_mv->as_int; 1899 } else { 1900 // The full search result is actually worse so re-instate the 1901 // previous best vector 1902 best_mv->as_int = new_mv->as_int; 1903 } 1904 } 1905 1906 if (bestsme < INT_MAX) { 1907 int distortion; 1908 cpi->find_fractional_mv_step(x, 1909 &new_mv->as_mv, 1910 &bsi->ref_mv[0]->as_mv, 1911 cm->allow_high_precision_mv, 1912 x->errorperbit, v_fn_ptr, 1913 cpi->sf.subpel_force_stop, 1914 cpi->sf.subpel_iters_per_step, 1915 x->nmvjointcost, x->mvcost, 1916 &distortion, 1917 &x->pred_sse[mbmi->ref_frame[0]]); 1918 1919 // save motion search result for use in compound prediction 1920 seg_mvs[i][mbmi->ref_frame[0]].as_int = new_mv->as_int; 1921 } 1922 1923 if (cpi->sf.adaptive_motion_search) 1924 x->pred_mv[mbmi->ref_frame[0]].as_int = new_mv->as_int; 1925 1926 // restore src pointers 1927 mi_buf_restore(x, orig_src, orig_pre); 1928 } 1929 1930 if (has_second_rf) { 1931 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV || 1932 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) 1933 continue; 1934 } 1935 1936 if (has_second_rf && this_mode == NEWMV && 1937 mbmi->interp_filter == EIGHTTAP) { 1938 // adjust src pointers 1939 mi_buf_shift(x, i); 1940 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { 1941 int rate_mv; 1942 joint_motion_search(cpi, x, bsize, frame_mv[this_mode], 1943 mi_row, mi_col, seg_mvs[i], 1944 &rate_mv); 1945 seg_mvs[i][mbmi->ref_frame[0]].as_int = 1946 frame_mv[this_mode][mbmi->ref_frame[0]].as_int; 1947 seg_mvs[i][mbmi->ref_frame[1]].as_int = 1948 frame_mv[this_mode][mbmi->ref_frame[1]].as_int; 1949 } 1950 // restore src pointers 1951 mi_buf_restore(x, orig_src, orig_pre); 1952 } 1953 1954 bsi->rdstat[i][mode_idx].brate = 1955 labels2mode(cpi, xd, i, this_mode, mode_mv[this_mode], frame_mv, 1956 seg_mvs[i], bsi->ref_mv, x->nmvjointcost, x->mvcost); 1957 1958 for (ref = 0; ref < 1 + has_second_rf; ++ref) { 1959 bsi->rdstat[i][mode_idx].mvs[ref].as_int = 1960 mode_mv[this_mode][ref].as_int; 1961 if (num_4x4_blocks_wide > 1) 1962 bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int = 1963 mode_mv[this_mode][ref].as_int; 1964 if (num_4x4_blocks_high > 1) 1965 bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int = 1966 mode_mv[this_mode][ref].as_int; 1967 } 1968 1969 // Trap vectors that reach beyond the UMV borders 1970 if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) || 1971 (has_second_rf && 1972 mv_check_bounds(x, &mode_mv[this_mode][1].as_mv))) 1973 continue; 1974 1975 if (filter_idx > 0) { 1976 BEST_SEG_INFO *ref_bsi = bsi_buf; 1977 subpelmv = 0; 1978 have_ref = 1; 1979 1980 for (ref = 0; ref < 1 + has_second_rf; ++ref) { 1981 subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv); 1982 have_ref &= mode_mv[this_mode][ref].as_int == 1983 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int; 1984 } 1985 1986 if (filter_idx > 1 && !subpelmv && !have_ref) { 1987 ref_bsi = bsi_buf + 1; 1988 have_ref = 1; 1989 for (ref = 0; ref < 1 + has_second_rf; ++ref) 1990 have_ref &= mode_mv[this_mode][ref].as_int == 1991 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int; 1992 } 1993 1994 if (!subpelmv && have_ref && 1995 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) { 1996 vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx], 1997 sizeof(SEG_RDSTAT)); 1998 if (num_4x4_blocks_wide > 1) 1999 bsi->rdstat[i + 1][mode_idx].eobs = 2000 ref_bsi->rdstat[i + 1][mode_idx].eobs; 2001 if (num_4x4_blocks_high > 1) 2002 bsi->rdstat[i + 2][mode_idx].eobs = 2003 ref_bsi->rdstat[i + 2][mode_idx].eobs; 2004 2005 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) { 2006 mode_selected = this_mode; 2007 best_rd = bsi->rdstat[i][mode_idx].brdcost; 2008 } 2009 continue; 2010 } 2011 } 2012 2013 bsi->rdstat[i][mode_idx].brdcost = 2014 encode_inter_mb_segment(cpi, x, 2015 bsi->segment_rd - this_segment_rd, i, 2016 &bsi->rdstat[i][mode_idx].byrate, 2017 &bsi->rdstat[i][mode_idx].bdist, 2018 &bsi->rdstat[i][mode_idx].bsse, 2019 bsi->rdstat[i][mode_idx].ta, 2020 bsi->rdstat[i][mode_idx].tl, 2021 mi_row, mi_col); 2022 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) { 2023 bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv, 2024 bsi->rdstat[i][mode_idx].brate, 0); 2025 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate; 2026 bsi->rdstat[i][mode_idx].eobs = p->eobs[i]; 2027 if (num_4x4_blocks_wide > 1) 2028 bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1]; 2029 if (num_4x4_blocks_high > 1) 2030 bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2]; 2031 } 2032 2033 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) { 2034 mode_selected = this_mode; 2035 best_rd = bsi->rdstat[i][mode_idx].brdcost; 2036 } 2037 } /*for each 4x4 mode*/ 2038 2039 if (best_rd == INT64_MAX) { 2040 int iy, midx; 2041 for (iy = i + 1; iy < 4; ++iy) 2042 for (midx = 0; midx < INTER_MODES; ++midx) 2043 bsi->rdstat[iy][midx].brdcost = INT64_MAX; 2044 bsi->segment_rd = INT64_MAX; 2045 return; 2046 } 2047 2048 mode_idx = INTER_OFFSET(mode_selected); 2049 vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above)); 2050 vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left)); 2051 2052 labels2mode(cpi, xd, i, mode_selected, mode_mv[mode_selected], 2053 frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost, 2054 x->mvcost); 2055 2056 br += bsi->rdstat[i][mode_idx].brate; 2057 bd += bsi->rdstat[i][mode_idx].bdist; 2058 block_sse += bsi->rdstat[i][mode_idx].bsse; 2059 segmentyrate += bsi->rdstat[i][mode_idx].byrate; 2060 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost; 2061 2062 if (this_segment_rd > bsi->segment_rd) { 2063 int iy, midx; 2064 for (iy = i + 1; iy < 4; ++iy) 2065 for (midx = 0; midx < INTER_MODES; ++midx) 2066 bsi->rdstat[iy][midx].brdcost = INT64_MAX; 2067 bsi->segment_rd = INT64_MAX; 2068 return; 2069 } 2070 } 2071 } /* for each label */ 2072 2073 bsi->r = br; 2074 bsi->d = bd; 2075 bsi->segment_yrate = segmentyrate; 2076 bsi->segment_rd = this_segment_rd; 2077 bsi->sse = block_sse; 2078 2079 // update the coding decisions 2080 for (k = 0; k < 4; ++k) 2081 bsi->modes[k] = mi->bmi[k].as_mode; 2082 } 2083 2084 static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, 2085 const TileInfo *const tile, 2086 int_mv *best_ref_mv, 2087 int_mv *second_best_ref_mv, 2088 int64_t best_rd, 2089 int *returntotrate, 2090 int *returnyrate, 2091 int64_t *returndistortion, 2092 int *skippable, int64_t *psse, 2093 int mvthresh, 2094 int_mv seg_mvs[4][MAX_REF_FRAMES], 2095 BEST_SEG_INFO *bsi_buf, 2096 int filter_idx, 2097 int mi_row, int mi_col) { 2098 int i; 2099 BEST_SEG_INFO *bsi = bsi_buf + filter_idx; 2100 MACROBLOCKD *xd = &x->e_mbd; 2101 MODE_INFO *mi = xd->mi[0]; 2102 MB_MODE_INFO *mbmi = &mi->mbmi; 2103 int mode_idx; 2104 2105 vp9_zero(*bsi); 2106 2107 bsi->segment_rd = best_rd; 2108 bsi->ref_mv[0] = best_ref_mv; 2109 bsi->ref_mv[1] = second_best_ref_mv; 2110 bsi->mvp.as_int = best_ref_mv->as_int; 2111 bsi->mvthresh = mvthresh; 2112 2113 for (i = 0; i < 4; i++) 2114 bsi->modes[i] = ZEROMV; 2115 2116 rd_check_segment_txsize(cpi, x, tile, bsi_buf, filter_idx, seg_mvs, 2117 mi_row, mi_col); 2118 2119 if (bsi->segment_rd > best_rd) 2120 return INT64_MAX; 2121 /* set it to the best */ 2122 for (i = 0; i < 4; i++) { 2123 mode_idx = INTER_OFFSET(bsi->modes[i]); 2124 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int; 2125 if (has_second_ref(mbmi)) 2126 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int; 2127 x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs; 2128 mi->bmi[i].as_mode = bsi->modes[i]; 2129 } 2130 2131 /* 2132 * used to set mbmi->mv.as_int 2133 */ 2134 *returntotrate = bsi->r; 2135 *returndistortion = bsi->d; 2136 *returnyrate = bsi->segment_yrate; 2137 *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0); 2138 *psse = bsi->sse; 2139 mbmi->mode = bsi->modes[3]; 2140 2141 return bsi->segment_rd; 2142 } 2143 2144 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, 2145 uint8_t *ref_y_buffer, int ref_y_stride, 2146 int ref_frame, BLOCK_SIZE block_size ) { 2147 MACROBLOCKD *xd = &x->e_mbd; 2148 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; 2149 int_mv this_mv; 2150 int i; 2151 int zero_seen = 0; 2152 int best_index = 0; 2153 int best_sad = INT_MAX; 2154 int this_sad = INT_MAX; 2155 int max_mv = 0; 2156 2157 uint8_t *src_y_ptr = x->plane[0].src.buf; 2158 uint8_t *ref_y_ptr; 2159 int row_offset, col_offset; 2160 int num_mv_refs = MAX_MV_REF_CANDIDATES + 2161 (cpi->sf.adaptive_motion_search && 2162 cpi->common.show_frame && 2163 block_size < cpi->sf.max_partition_size); 2164 2165 int_mv pred_mv[3]; 2166 pred_mv[0] = mbmi->ref_mvs[ref_frame][0]; 2167 pred_mv[1] = mbmi->ref_mvs[ref_frame][1]; 2168 pred_mv[2] = x->pred_mv[ref_frame]; 2169 2170 // Get the sad for each candidate reference mv 2171 for (i = 0; i < num_mv_refs; i++) { 2172 this_mv.as_int = pred_mv[i].as_int; 2173 2174 max_mv = MAX(max_mv, 2175 MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3); 2176 // only need to check zero mv once 2177 if (!this_mv.as_int && zero_seen) 2178 continue; 2179 2180 zero_seen = zero_seen || !this_mv.as_int; 2181 2182 row_offset = this_mv.as_mv.row >> 3; 2183 col_offset = this_mv.as_mv.col >> 3; 2184 ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset; 2185 2186 // Find sad for current vector. 2187 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride, 2188 ref_y_ptr, ref_y_stride, 2189 0x7fffffff); 2190 2191 // Note if it is the best so far. 2192 if (this_sad < best_sad) { 2193 best_sad = this_sad; 2194 best_index = i; 2195 } 2196 } 2197 2198 // Note the index of the mv that worked best in the reference list. 2199 x->mv_best_ref_index[ref_frame] = best_index; 2200 x->max_mv_context[ref_frame] = max_mv; 2201 x->pred_mv_sad[ref_frame] = best_sad; 2202 } 2203 2204 static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, 2205 unsigned int *ref_costs_single, 2206 unsigned int *ref_costs_comp, 2207 vp9_prob *comp_mode_p) { 2208 VP9_COMMON *const cm = &cpi->common; 2209 MACROBLOCKD *const xd = &cpi->mb.e_mbd; 2210 int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id, 2211 SEG_LVL_REF_FRAME); 2212 if (seg_ref_active) { 2213 vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single)); 2214 vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp)); 2215 *comp_mode_p = 128; 2216 } else { 2217 vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd); 2218 vp9_prob comp_inter_p = 128; 2219 2220 if (cm->reference_mode == REFERENCE_MODE_SELECT) { 2221 comp_inter_p = vp9_get_reference_mode_prob(cm, xd); 2222 *comp_mode_p = comp_inter_p; 2223 } else { 2224 *comp_mode_p = 128; 2225 } 2226 2227 ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0); 2228 2229 if (cm->reference_mode != COMPOUND_REFERENCE) { 2230 vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd); 2231 vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd); 2232 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); 2233 2234 if (cm->reference_mode == REFERENCE_MODE_SELECT) 2235 base_cost += vp9_cost_bit(comp_inter_p, 0); 2236 2237 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] = 2238 ref_costs_single[ALTREF_FRAME] = base_cost; 2239 ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0); 2240 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1); 2241 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1); 2242 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0); 2243 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1); 2244 } else { 2245 ref_costs_single[LAST_FRAME] = 512; 2246 ref_costs_single[GOLDEN_FRAME] = 512; 2247 ref_costs_single[ALTREF_FRAME] = 512; 2248 } 2249 if (cm->reference_mode != SINGLE_REFERENCE) { 2250 vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd); 2251 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); 2252 2253 if (cm->reference_mode == REFERENCE_MODE_SELECT) 2254 base_cost += vp9_cost_bit(comp_inter_p, 1); 2255 2256 ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0); 2257 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1); 2258 } else { 2259 ref_costs_comp[LAST_FRAME] = 512; 2260 ref_costs_comp[GOLDEN_FRAME] = 512; 2261 } 2262 } 2263 } 2264 2265 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, 2266 int mode_index, 2267 int_mv *ref_mv, 2268 int_mv *second_ref_mv, 2269 int64_t comp_pred_diff[REFERENCE_MODES], 2270 int64_t tx_size_diff[TX_MODES], 2271 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) { 2272 MACROBLOCKD *const xd = &x->e_mbd; 2273 2274 // Take a snapshot of the coding context so it can be 2275 // restored if we decide to encode this way 2276 ctx->skip = x->skip; 2277 ctx->best_mode_index = mode_index; 2278 ctx->mic = *xd->mi[0]; 2279 2280 ctx->best_ref_mv[0].as_int = ref_mv->as_int; 2281 ctx->best_ref_mv[1].as_int = second_ref_mv->as_int; 2282 2283 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE]; 2284 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE]; 2285 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT]; 2286 2287 vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff)); 2288 vpx_memcpy(ctx->best_filter_diff, best_filter_diff, 2289 sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS); 2290 } 2291 2292 static void setup_pred_block(const MACROBLOCKD *xd, 2293 struct buf_2d dst[MAX_MB_PLANE], 2294 const YV12_BUFFER_CONFIG *src, 2295 int mi_row, int mi_col, 2296 const struct scale_factors *scale, 2297 const struct scale_factors *scale_uv) { 2298 int i; 2299 2300 dst[0].buf = src->y_buffer; 2301 dst[0].stride = src->y_stride; 2302 dst[1].buf = src->u_buffer; 2303 dst[2].buf = src->v_buffer; 2304 dst[1].stride = dst[2].stride = src->uv_stride; 2305 #if CONFIG_ALPHA 2306 dst[3].buf = src->alpha_buffer; 2307 dst[3].stride = src->alpha_stride; 2308 #endif 2309 2310 // TODO(jkoleszar): Make scale factors per-plane data 2311 for (i = 0; i < MAX_MB_PLANE; i++) { 2312 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col, 2313 i ? scale_uv : scale, 2314 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y); 2315 } 2316 } 2317 2318 void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, 2319 const TileInfo *const tile, 2320 MV_REFERENCE_FRAME ref_frame, 2321 BLOCK_SIZE block_size, 2322 int mi_row, int mi_col, 2323 int_mv frame_nearest_mv[MAX_REF_FRAMES], 2324 int_mv frame_near_mv[MAX_REF_FRAMES], 2325 struct buf_2d yv12_mb[4][MAX_MB_PLANE]) { 2326 const VP9_COMMON *cm = &cpi->common; 2327 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); 2328 MACROBLOCKD *const xd = &x->e_mbd; 2329 MODE_INFO *const mi = xd->mi[0]; 2330 int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame]; 2331 const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; 2332 2333 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this 2334 // use the UV scaling factors. 2335 setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf); 2336 2337 // Gets an initial list of candidate vectors from neighbours and orders them 2338 vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col); 2339 2340 // Candidate refinement carried out at encoder and decoder 2341 vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates, 2342 &frame_nearest_mv[ref_frame], 2343 &frame_near_mv[ref_frame]); 2344 2345 // Further refinement that is encode side only to test the top few candidates 2346 // in full and choose the best as the centre point for subsequent searches. 2347 // The current implementation doesn't support scaling. 2348 if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8) 2349 mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, 2350 ref_frame, block_size); 2351 } 2352 2353 const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, 2354 int ref_frame) { 2355 const VP9_COMMON *const cm = &cpi->common; 2356 const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]; 2357 const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1]; 2358 return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL; 2359 } 2360 2361 int vp9_get_switchable_rate(const MACROBLOCK *x) { 2362 const MACROBLOCKD *const xd = &x->e_mbd; 2363 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; 2364 const int ctx = vp9_get_pred_context_switchable_interp(xd); 2365 return SWITCHABLE_INTERP_RATE_FACTOR * 2366 x->switchable_interp_costs[ctx][mbmi->interp_filter]; 2367 } 2368 2369 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, 2370 const TileInfo *const tile, 2371 BLOCK_SIZE bsize, 2372 int mi_row, int mi_col, 2373 int_mv *tmp_mv, int *rate_mv) { 2374 MACROBLOCKD *xd = &x->e_mbd; 2375 VP9_COMMON *cm = &cpi->common; 2376 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; 2377 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; 2378 int bestsme = INT_MAX; 2379 int further_steps, step_param; 2380 int sadpb = x->sadperbit16; 2381 MV mvp_full; 2382 int ref = mbmi->ref_frame[0]; 2383 MV ref_mv = mbmi->ref_mvs[ref][0].as_mv; 2384 2385 int tmp_col_min = x->mv_col_min; 2386 int tmp_col_max = x->mv_col_max; 2387 int tmp_row_min = x->mv_row_min; 2388 int tmp_row_max = x->mv_row_max; 2389 2390 const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, 2391 ref); 2392 2393 MV pred_mv[3]; 2394 pred_mv[0] = mbmi->ref_mvs[ref][0].as_mv; 2395 pred_mv[1] = mbmi->ref_mvs[ref][1].as_mv; 2396 pred_mv[2] = x->pred_mv[ref].as_mv; 2397 2398 if (scaled_ref_frame) { 2399 int i; 2400 // Swap out the reference frame for a version that's been scaled to 2401 // match the resolution of the current frame, allowing the existing 2402 // motion search code to be used without additional modifications. 2403 for (i = 0; i < MAX_MB_PLANE; i++) 2404 backup_yv12[i] = xd->plane[i].pre[0]; 2405 2406 vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); 2407 } 2408 2409 vp9_set_mv_search_range(x, &ref_mv); 2410 2411 // Work out the size of the first step in the mv step search. 2412 // 0 here is maximum length first step. 1 is MAX >> 1 etc. 2413 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) { 2414 // Take wtd average of the step_params based on the last frame's 2415 // max mv magnitude and that based on the best ref mvs of the current 2416 // block for the given reference. 2417 step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) + 2418 cpi->mv_step_param) >> 1; 2419 } else { 2420 step_param = cpi->mv_step_param; 2421 } 2422 2423 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 && 2424 cpi->common.show_frame) { 2425 int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize), 2426 b_width_log2(bsize))); 2427 step_param = MAX(step_param, boffset); 2428 } 2429 2430 if (cpi->sf.adaptive_motion_search) { 2431 int bwl = b_width_log2_lookup[bsize]; 2432 int bhl = b_height_log2_lookup[bsize]; 2433 int i; 2434 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4); 2435 2436 if (tlevel < 5) 2437 step_param += 2; 2438 2439 for (i = LAST_FRAME; i <= ALTREF_FRAME && cpi->common.show_frame; ++i) { 2440 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) { 2441 x->pred_mv[ref].as_int = 0; 2442 tmp_mv->as_int = INVALID_MV; 2443 2444 if (scaled_ref_frame) { 2445 int i; 2446 for (i = 0; i < MAX_MB_PLANE; i++) 2447 xd->plane[i].pre[0] = backup_yv12[i]; 2448 } 2449 return; 2450 } 2451 } 2452 } 2453 2454 mvp_full = pred_mv[x->mv_best_ref_index[ref]]; 2455 2456 mvp_full.col >>= 3; 2457 mvp_full.row >>= 3; 2458 2459 // Further step/diamond searches as necessary 2460 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; 2461 2462 if (cpi->sf.search_method == FAST_DIAMOND) { 2463 bestsme = vp9_fast_dia_search(x, &mvp_full, step_param, sadpb, 0, 2464 &cpi->fn_ptr[bsize], 1, 2465 &ref_mv, &tmp_mv->as_mv); 2466 if (bestsme < INT_MAX) 2467 bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, 2468 &cpi->fn_ptr[bsize], 1); 2469 } else if (cpi->sf.search_method == FAST_HEX) { 2470 bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, 0, 2471 &cpi->fn_ptr[bsize], 1, 2472 &ref_mv, &tmp_mv->as_mv); 2473 if (bestsme < INT_MAX) 2474 bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, 2475 &cpi->fn_ptr[bsize], 1); 2476 } else if (cpi->sf.search_method == HEX) { 2477 bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1, 2478 &cpi->fn_ptr[bsize], 1, 2479 &ref_mv, &tmp_mv->as_mv); 2480 if (bestsme < INT_MAX) 2481 bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, 2482 &cpi->fn_ptr[bsize], 1); 2483 } else if (cpi->sf.search_method == SQUARE) { 2484 bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1, 2485 &cpi->fn_ptr[bsize], 1, 2486 &ref_mv, &tmp_mv->as_mv); 2487 if (bestsme < INT_MAX) 2488 bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, 2489 &cpi->fn_ptr[bsize], 1); 2490 } else if (cpi->sf.search_method == BIGDIA) { 2491 bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1, 2492 &cpi->fn_ptr[bsize], 1, 2493 &ref_mv, &tmp_mv->as_mv); 2494 if (bestsme < INT_MAX) 2495 bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, 2496 &cpi->fn_ptr[bsize], 1); 2497 } else { 2498 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, 2499 sadpb, further_steps, 1, 2500 &cpi->fn_ptr[bsize], 2501 &ref_mv, &tmp_mv->as_mv); 2502 } 2503 2504 x->mv_col_min = tmp_col_min; 2505 x->mv_col_max = tmp_col_max; 2506 x->mv_row_min = tmp_row_min; 2507 x->mv_row_max = tmp_row_max; 2508 2509 if (bestsme < INT_MAX) { 2510 int dis; /* TODO: use dis in distortion calculation later. */ 2511 cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv, 2512 cm->allow_high_precision_mv, 2513 x->errorperbit, 2514 &cpi->fn_ptr[bsize], 2515 cpi->sf.subpel_force_stop, 2516 cpi->sf.subpel_iters_per_step, 2517 x->nmvjointcost, x->mvcost, 2518 &dis, &x->pred_sse[ref]); 2519 } 2520 *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, 2521 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); 2522 2523 if (cpi->sf.adaptive_motion_search && cpi->common.show_frame) 2524 x->pred_mv[ref].as_int = tmp_mv->as_int; 2525 2526 if (scaled_ref_frame) { 2527 int i; 2528 for (i = 0; i < MAX_MB_PLANE; i++) 2529 xd->plane[i].pre[0] = backup_yv12[i]; 2530 } 2531 } 2532 2533 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, 2534 BLOCK_SIZE bsize, 2535 int_mv *frame_mv, 2536 int mi_row, int mi_col, 2537 int_mv single_newmv[MAX_REF_FRAMES], 2538 int *rate_mv) { 2539 const int pw = 4 * num_4x4_blocks_wide_lookup[bsize]; 2540 const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; 2541 MACROBLOCKD *xd = &x->e_mbd; 2542 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; 2543 const int refs[2] = { mbmi->ref_frame[0], 2544 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] }; 2545 int_mv ref_mv[2]; 2546 int ite, ref; 2547 // Prediction buffer from second frame. 2548 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t)); 2549 const InterpKernel *kernel = vp9_get_interp_kernel(mbmi->interp_filter); 2550 2551 // Do joint motion search in compound mode to get more accurate mv. 2552 struct buf_2d backup_yv12[2][MAX_MB_PLANE]; 2553 struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0]; 2554 int last_besterr[2] = {INT_MAX, INT_MAX}; 2555 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = { 2556 vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]), 2557 vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1]) 2558 }; 2559 2560 for (ref = 0; ref < 2; ++ref) { 2561 ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0]; 2562 2563 if (scaled_ref_frame[ref]) { 2564 int i; 2565 // Swap out the reference frame for a version that's been scaled to 2566 // match the resolution of the current frame, allowing the existing 2567 // motion search code to be used without additional modifications. 2568 for (i = 0; i < MAX_MB_PLANE; i++) 2569 backup_yv12[ref][i] = xd->plane[i].pre[ref]; 2570 vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, 2571 NULL); 2572 } 2573 2574 frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int; 2575 } 2576 2577 // Allow joint search multiple times iteratively for each ref frame 2578 // and break out the search loop if it couldn't find better mv. 2579 for (ite = 0; ite < 4; ite++) { 2580 struct buf_2d ref_yv12[2]; 2581 int bestsme = INT_MAX; 2582 int sadpb = x->sadperbit16; 2583 int_mv tmp_mv; 2584 int search_range = 3; 2585 2586 int tmp_col_min = x->mv_col_min; 2587 int tmp_col_max = x->mv_col_max; 2588 int tmp_row_min = x->mv_row_min; 2589 int tmp_row_max = x->mv_row_max; 2590 int id = ite % 2; 2591 2592 // Initialized here because of compiler problem in Visual Studio. 2593 ref_yv12[0] = xd->plane[0].pre[0]; 2594 ref_yv12[1] = xd->plane[0].pre[1]; 2595 2596 // Get pred block from second frame. 2597 vp9_build_inter_predictor(ref_yv12[!id].buf, 2598 ref_yv12[!id].stride, 2599 second_pred, pw, 2600 &frame_mv[refs[!id]].as_mv, 2601 &xd->block_refs[!id]->sf, 2602 pw, ph, 0, 2603 kernel, MV_PRECISION_Q3, 2604 mi_col * MI_SIZE, mi_row * MI_SIZE); 2605 2606 // Compound motion search on first ref frame. 2607 if (id) 2608 xd->plane[0].pre[0] = ref_yv12[id]; 2609 vp9_set_mv_search_range(x, &ref_mv[id].as_mv); 2610 2611 // Use mv result from single mode as mvp. 2612 tmp_mv.as_int = frame_mv[refs[id]].as_int; 2613 2614 tmp_mv.as_mv.col >>= 3; 2615 tmp_mv.as_mv.row >>= 3; 2616 2617 // Small-range full-pixel motion search 2618 bestsme = vp9_refining_search_8p_c(x, &tmp_mv.as_mv, sadpb, 2619 search_range, 2620 &cpi->fn_ptr[bsize], 2621 x->nmvjointcost, x->mvcost, 2622 &ref_mv[id].as_mv, second_pred, 2623 pw, ph); 2624 if (bestsme < INT_MAX) 2625 bestsme = vp9_get_mvpred_av_var(x, &tmp_mv.as_mv, &ref_mv[id].as_mv, 2626 second_pred, &cpi->fn_ptr[bsize], 1); 2627 2628 x->mv_col_min = tmp_col_min; 2629 x->mv_col_max = tmp_col_max; 2630 x->mv_row_min = tmp_row_min; 2631 x->mv_row_max = tmp_row_max; 2632 2633 if (bestsme < INT_MAX) { 2634 int dis; /* TODO: use dis in distortion calculation later. */ 2635 unsigned int sse; 2636 bestsme = cpi->find_fractional_mv_step_comp( 2637 x, &tmp_mv.as_mv, 2638 &ref_mv[id].as_mv, 2639 cpi->common.allow_high_precision_mv, 2640 x->errorperbit, 2641 &cpi->fn_ptr[bsize], 2642 0, cpi->sf.subpel_iters_per_step, 2643 x->nmvjointcost, x->mvcost, 2644 &dis, &sse, second_pred, 2645 pw, ph); 2646 } 2647 2648 if (id) 2649 xd->plane[0].pre[0] = scaled_first_yv12; 2650 2651 if (bestsme < last_besterr[id]) { 2652 frame_mv[refs[id]].as_int = tmp_mv.as_int; 2653 last_besterr[id] = bestsme; 2654 } else { 2655 break; 2656 } 2657 } 2658 2659 *rate_mv = 0; 2660 2661 for (ref = 0; ref < 2; ++ref) { 2662 if (scaled_ref_frame[ref]) { 2663 // restore the predictor 2664 int i; 2665 for (i = 0; i < MAX_MB_PLANE; i++) 2666 xd->plane[i].pre[ref] = backup_yv12[ref][i]; 2667 } 2668 2669 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv, 2670 &mbmi->ref_mvs[refs[ref]][0].as_mv, 2671 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); 2672 } 2673 2674 vpx_free(second_pred); 2675 } 2676 2677 static INLINE void restore_dst_buf(MACROBLOCKD *xd, 2678 uint8_t *orig_dst[MAX_MB_PLANE], 2679 int orig_dst_stride[MAX_MB_PLANE]) { 2680 int i; 2681 for (i = 0; i < MAX_MB_PLANE; i++) { 2682 xd->plane[i].dst.buf = orig_dst[i]; 2683 xd->plane[i].dst.stride = orig_dst_stride[i]; 2684 } 2685 } 2686 2687 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, 2688 const TileInfo *const tile, 2689 BLOCK_SIZE bsize, 2690 int64_t txfm_cache[], 2691 int *rate2, int64_t *distortion, 2692 int *skippable, 2693 int *rate_y, int64_t *distortion_y, 2694 int *rate_uv, int64_t *distortion_uv, 2695 int *mode_excluded, int *disable_skip, 2696 INTERP_FILTER *best_filter, 2697 int_mv (*mode_mv)[MAX_REF_FRAMES], 2698 int mi_row, int mi_col, 2699 int_mv single_newmv[MAX_REF_FRAMES], 2700 int64_t *psse, 2701 const int64_t ref_best_rd) { 2702 VP9_COMMON *cm = &cpi->common; 2703 MACROBLOCKD *xd = &x->e_mbd; 2704 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; 2705 const int is_comp_pred = has_second_ref(mbmi); 2706 const int num_refs = is_comp_pred ? 2 : 1; 2707 const int this_mode = mbmi->mode; 2708 int_mv *frame_mv = mode_mv[this_mode]; 2709 int i; 2710 int refs[2] = { mbmi->ref_frame[0], 2711 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; 2712 int_mv cur_mv[2]; 2713 int64_t this_rd = 0; 2714 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64); 2715 int pred_exists = 0; 2716 int intpel_mv; 2717 int64_t rd, best_rd = INT64_MAX; 2718 int best_needs_copy = 0; 2719 uint8_t *orig_dst[MAX_MB_PLANE]; 2720 int orig_dst_stride[MAX_MB_PLANE]; 2721 int rs = 0; 2722 2723 if (is_comp_pred) { 2724 if (frame_mv[refs[0]].as_int == INVALID_MV || 2725 frame_mv[refs[1]].as_int == INVALID_MV) 2726 return INT64_MAX; 2727 } 2728 2729 if (this_mode == NEWMV) { 2730 int rate_mv; 2731 if (is_comp_pred) { 2732 // Initialize mv using single prediction mode result. 2733 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; 2734 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; 2735 2736 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { 2737 joint_motion_search(cpi, x, bsize, frame_mv, 2738 mi_row, mi_col, single_newmv, &rate_mv); 2739 } else { 2740 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv, 2741 &mbmi->ref_mvs[refs[0]][0].as_mv, 2742 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); 2743 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv, 2744 &mbmi->ref_mvs[refs[1]][0].as_mv, 2745 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); 2746 } 2747 *rate2 += rate_mv; 2748 } else { 2749 int_mv tmp_mv; 2750 single_motion_search(cpi, x, tile, bsize, mi_row, mi_col, 2751 &tmp_mv, &rate_mv); 2752 if (tmp_mv.as_int == INVALID_MV) 2753 return INT64_MAX; 2754 *rate2 += rate_mv; 2755 frame_mv[refs[0]].as_int = 2756 xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int; 2757 single_newmv[refs[0]].as_int = tmp_mv.as_int; 2758 } 2759 } 2760 2761 for (i = 0; i < num_refs; ++i) { 2762 cur_mv[i] = frame_mv[refs[i]]; 2763 // Clip "next_nearest" so that it does not extend to far out of image 2764 if (this_mode != NEWMV) 2765 clamp_mv2(&cur_mv[i].as_mv, xd); 2766 2767 if (mv_check_bounds(x, &cur_mv[i].as_mv)) 2768 return INT64_MAX; 2769 mbmi->mv[i].as_int = cur_mv[i].as_int; 2770 } 2771 2772 // do first prediction into the destination buffer. Do the next 2773 // prediction into a temporary buffer. Then keep track of which one 2774 // of these currently holds the best predictor, and use the other 2775 // one for future predictions. In the end, copy from tmp_buf to 2776 // dst if necessary. 2777 for (i = 0; i < MAX_MB_PLANE; i++) { 2778 orig_dst[i] = xd->plane[i].dst.buf; 2779 orig_dst_stride[i] = xd->plane[i].dst.stride; 2780 } 2781 2782 /* We don't include the cost of the second reference here, because there 2783 * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other 2784 * words if you present them in that order, the second one is always known 2785 * if the first is known */ 2786 *rate2 += cost_mv_ref(cpi, this_mode, mbmi->mode_context[refs[0]]); 2787 2788 if (!(*mode_excluded)) 2789 *mode_excluded = is_comp_pred ? cm->reference_mode == SINGLE_REFERENCE 2790 : cm->reference_mode == COMPOUND_REFERENCE; 2791 2792 pred_exists = 0; 2793 // Are all MVs integer pel for Y and UV 2794 intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv); 2795 if (is_comp_pred) 2796 intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv); 2797 2798 // Search for best switchable filter by checking the variance of 2799 // pred error irrespective of whether the filter will be used 2800 cpi->mask_filter_rd = 0; 2801 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) 2802 cpi->rd_filter_cache[i] = INT64_MAX; 2803 2804 if (cm->interp_filter != BILINEAR) { 2805 *best_filter = EIGHTTAP; 2806 if (x->source_variance < 2807 cpi->sf.disable_filter_search_var_thresh) { 2808 *best_filter = EIGHTTAP; 2809 } else { 2810 int newbest; 2811 int tmp_rate_sum = 0; 2812 int64_t tmp_dist_sum = 0; 2813 2814 for (i = 0; i < SWITCHABLE_FILTERS; ++i) { 2815 int j; 2816 int64_t rs_rd; 2817 mbmi->interp_filter = i; 2818 rs = vp9_get_switchable_rate(x); 2819 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); 2820 2821 if (i > 0 && intpel_mv) { 2822 rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum); 2823 cpi->rd_filter_cache[i] = rd; 2824 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = 2825 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); 2826 if (cm->interp_filter == SWITCHABLE) 2827 rd += rs_rd; 2828 cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd); 2829 } else { 2830 int rate_sum = 0; 2831 int64_t dist_sum = 0; 2832 if ((cm->interp_filter == SWITCHABLE && 2833 (!i || best_needs_copy)) || 2834 (cm->interp_filter != SWITCHABLE && 2835 (cm->interp_filter == mbmi->interp_filter || 2836 (i == 0 && intpel_mv)))) { 2837 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2838 } else { 2839 for (j = 0; j < MAX_MB_PLANE; j++) { 2840 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64; 2841 xd->plane[j].dst.stride = 64; 2842 } 2843 } 2844 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); 2845 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum); 2846 2847 rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum); 2848 cpi->rd_filter_cache[i] = rd; 2849 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = 2850 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); 2851 if (cm->interp_filter == SWITCHABLE) 2852 rd += rs_rd; 2853 cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd); 2854 2855 if (i == 0 && intpel_mv) { 2856 tmp_rate_sum = rate_sum; 2857 tmp_dist_sum = dist_sum; 2858 } 2859 } 2860 2861 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { 2862 if (rd / 2 > ref_best_rd) { 2863 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2864 return INT64_MAX; 2865 } 2866 } 2867 newbest = i == 0 || rd < best_rd; 2868 2869 if (newbest) { 2870 best_rd = rd; 2871 *best_filter = mbmi->interp_filter; 2872 if (cm->interp_filter == SWITCHABLE && i && !intpel_mv) 2873 best_needs_copy = !best_needs_copy; 2874 } 2875 2876 if ((cm->interp_filter == SWITCHABLE && newbest) || 2877 (cm->interp_filter != SWITCHABLE && 2878 cm->interp_filter == mbmi->interp_filter)) { 2879 pred_exists = 1; 2880 } 2881 } 2882 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2883 } 2884 } 2885 // Set the appropriate filter 2886 mbmi->interp_filter = cm->interp_filter != SWITCHABLE ? 2887 cm->interp_filter : *best_filter; 2888 rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(x) : 0; 2889 2890 if (pred_exists) { 2891 if (best_needs_copy) { 2892 // again temporarily set the buffers to local memory to prevent a memcpy 2893 for (i = 0; i < MAX_MB_PLANE; i++) { 2894 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64; 2895 xd->plane[i].dst.stride = 64; 2896 } 2897 } 2898 } else { 2899 // Handles the special case when a filter that is not in the 2900 // switchable list (ex. bilinear, 6-tap) is indicated at the frame level 2901 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); 2902 } 2903 2904 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { 2905 int tmp_rate; 2906 int64_t tmp_dist; 2907 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist); 2908 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist); 2909 // if current pred_error modeled rd is substantially more than the best 2910 // so far, do not bother doing full rd 2911 if (rd / 2 > ref_best_rd) { 2912 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2913 return INT64_MAX; 2914 } 2915 } 2916 2917 if (cm->interp_filter == SWITCHABLE) 2918 *rate2 += vp9_get_switchable_rate(x); 2919 2920 if (!is_comp_pred) { 2921 if (!x->in_active_map) { 2922 if (psse) 2923 *psse = 0; 2924 *distortion = 0; 2925 x->skip = 1; 2926 } else if (cpi->allow_encode_breakout && x->encode_breakout) { 2927 const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]); 2928 const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]); 2929 unsigned int var, sse; 2930 // Skipping threshold for ac. 2931 unsigned int thresh_ac; 2932 // Set a maximum for threshold to avoid big PSNR loss in low bitrate case. 2933 // Use extreme low threshold for static frames to limit skipping. 2934 const unsigned int max_thresh = (cpi->allow_encode_breakout == 2935 ENCODE_BREAKOUT_LIMITED) ? 128 : 36000; 2936 // The encode_breakout input 2937 const unsigned int min_thresh = 2938 MIN(((unsigned int)x->encode_breakout << 4), max_thresh); 2939 2940 // Calculate threshold according to dequant value. 2941 thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9; 2942 thresh_ac = clamp(thresh_ac, min_thresh, max_thresh); 2943 2944 var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride, 2945 xd->plane[0].dst.buf, 2946 xd->plane[0].dst.stride, &sse); 2947 2948 // Adjust threshold according to partition size. 2949 thresh_ac >>= 8 - (b_width_log2_lookup[bsize] + 2950 b_height_log2_lookup[bsize]); 2951 2952 // Y skipping condition checking 2953 if (sse < thresh_ac || sse == 0) { 2954 // Skipping threshold for dc 2955 unsigned int thresh_dc; 2956 2957 thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6); 2958 2959 // dc skipping checking 2960 if ((sse - var) < thresh_dc || sse == var) { 2961 unsigned int sse_u, sse_v; 2962 unsigned int var_u, var_v; 2963 2964 var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf, 2965 x->plane[1].src.stride, 2966 xd->plane[1].dst.buf, 2967 xd->plane[1].dst.stride, &sse_u); 2968 2969 // U skipping condition checking 2970 if ((sse_u * 4 < thresh_ac || sse_u == 0) && 2971 (sse_u - var_u < thresh_dc || sse_u == var_u)) { 2972 var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf, 2973 x->plane[2].src.stride, 2974 xd->plane[2].dst.buf, 2975 xd->plane[2].dst.stride, &sse_v); 2976 2977 // V skipping condition checking 2978 if ((sse_v * 4 < thresh_ac || sse_v == 0) && 2979 (sse_v - var_v < thresh_dc || sse_v == var_v)) { 2980 x->skip = 1; 2981 2982 // The cost of skip bit needs to be added. 2983 *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); 2984 2985 // Scaling factor for SSE from spatial domain to frequency domain 2986 // is 16. Adjust distortion accordingly. 2987 *distortion_uv = (sse_u + sse_v) << 4; 2988 *distortion = (sse << 4) + *distortion_uv; 2989 2990 *disable_skip = 1; 2991 this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); 2992 } 2993 } 2994 } 2995 } 2996 } 2997 } 2998 2999 if (!x->skip) { 3000 int skippable_y, skippable_uv; 3001 int64_t sseuv = INT64_MAX; 3002 int64_t rdcosty = INT64_MAX; 3003 3004 // Y cost and distortion 3005 inter_super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse, 3006 bsize, txfm_cache, ref_best_rd); 3007 3008 if (*rate_y == INT_MAX) { 3009 *rate2 = INT_MAX; 3010 *distortion = INT64_MAX; 3011 restore_dst_buf(xd, orig_dst, orig_dst_stride); 3012 return INT64_MAX; 3013 } 3014 3015 *rate2 += *rate_y; 3016 *distortion += *distortion_y; 3017 3018 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); 3019 rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse)); 3020 3021 super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv, 3022 bsize, ref_best_rd - rdcosty); 3023 if (*rate_uv == INT_MAX) { 3024 *rate2 = INT_MAX; 3025 *distortion = INT64_MAX; 3026 restore_dst_buf(xd, orig_dst, orig_dst_stride); 3027 return INT64_MAX; 3028 } 3029 3030 *psse += sseuv; 3031 *rate2 += *rate_uv; 3032 *distortion += *distortion_uv; 3033 *skippable = skippable_y && skippable_uv; 3034 } 3035 3036 restore_dst_buf(xd, orig_dst, orig_dst_stride); 3037 return this_rd; // if 0, this will be re-calculated by caller 3038 } 3039 3040 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, 3041 int max_plane) { 3042 struct macroblock_plane *const p = x->plane; 3043 struct macroblockd_plane *const pd = x->e_mbd.plane; 3044 int i; 3045 3046 for (i = 0; i < max_plane; ++i) { 3047 p[i].coeff = ctx->coeff_pbuf[i][1]; 3048 p[i].qcoeff = ctx->qcoeff_pbuf[i][1]; 3049 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; 3050 p[i].eobs = ctx->eobs_pbuf[i][1]; 3051 3052 ctx->coeff_pbuf[i][1] = ctx->coeff_pbuf[i][0]; 3053 ctx->qcoeff_pbuf[i][1] = ctx->qcoeff_pbuf[i][0]; 3054 ctx->dqcoeff_pbuf[i][1] = ctx->dqcoeff_pbuf[i][0]; 3055 ctx->eobs_pbuf[i][1] = ctx->eobs_pbuf[i][0]; 3056 3057 ctx->coeff_pbuf[i][0] = p[i].coeff; 3058 ctx->qcoeff_pbuf[i][0] = p[i].qcoeff; 3059 ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff; 3060 ctx->eobs_pbuf[i][0] = p[i].eobs; 3061 } 3062 } 3063 3064 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, 3065 int *returnrate, int64_t *returndist, 3066 BLOCK_SIZE bsize, 3067 PICK_MODE_CONTEXT *ctx, int64_t best_rd) { 3068 VP9_COMMON *const cm = &cpi->common; 3069 MACROBLOCKD *const xd = &x->e_mbd; 3070 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0; 3071 int y_skip = 0, uv_skip = 0; 3072 int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 }; 3073 TX_SIZE max_uv_tx_size; 3074 x->skip_encode = 0; 3075 ctx->skip = 0; 3076 xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME; 3077 3078 if (bsize >= BLOCK_8X8) { 3079 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, 3080 &dist_y, &y_skip, bsize, tx_cache, 3081 best_rd) >= best_rd) { 3082 *returnrate = INT_MAX; 3083 return; 3084 } 3085 max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize); 3086 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, 3087 &dist_uv, &uv_skip, bsize, max_uv_tx_size); 3088 } else { 3089 y_skip = 0; 3090 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly, 3091 &dist_y, best_rd) >= best_rd) { 3092 *returnrate = INT_MAX; 3093 return; 3094 } 3095 max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize); 3096 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, 3097 &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size); 3098 } 3099 3100 if (y_skip && uv_skip) { 3101 *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + 3102 vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); 3103 *returndist = dist_y + dist_uv; 3104 vp9_zero(ctx->tx_rd_diff); 3105 } else { 3106 int i; 3107 *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); 3108 *returndist = dist_y + dist_uv; 3109 if (cpi->sf.tx_size_search_method == USE_FULL_RD) 3110 for (i = 0; i < TX_MODES; i++) { 3111 if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX) 3112 ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode]; 3113 else 3114 ctx->tx_rd_diff[i] = 0; 3115 } 3116 } 3117 3118 ctx->mic = *xd->mi[0]; 3119 } 3120 3121 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, 3122 const TileInfo *const tile, 3123 int mi_row, int mi_col, 3124 int *returnrate, 3125 int64_t *returndistortion, 3126 BLOCK_SIZE bsize, 3127 PICK_MODE_CONTEXT *ctx, 3128 int64_t best_rd_so_far) { 3129 VP9_COMMON *const cm = &cpi->common; 3130 MACROBLOCKD *const xd = &x->e_mbd; 3131 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; 3132 const struct segmentation *const seg = &cm->seg; 3133 MB_PREDICTION_MODE this_mode; 3134 MV_REFERENCE_FRAME ref_frame, second_ref_frame; 3135 unsigned char segment_id = mbmi->segment_id; 3136 int comp_pred, i; 3137 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; 3138 struct buf_2d yv12_mb[4][MAX_MB_PLANE]; 3139 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } }; 3140 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, 3141 VP9_ALT_FLAG }; 3142 int64_t best_rd = best_rd_so_far; 3143 int64_t best_tx_rd[TX_MODES]; 3144 int64_t best_tx_diff[TX_MODES]; 3145 int64_t best_pred_diff[REFERENCE_MODES]; 3146 int64_t best_pred_rd[REFERENCE_MODES]; 3147 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; 3148 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; 3149 MB_MODE_INFO best_mbmode = { 0 }; 3150 int mode_index, best_mode_index = 0; 3151 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; 3152 vp9_prob comp_mode_p; 3153 int64_t best_intra_rd = INT64_MAX; 3154 int64_t best_inter_rd = INT64_MAX; 3155 MB_PREDICTION_MODE best_intra_mode = DC_PRED; 3156 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME; 3157 INTERP_FILTER tmp_best_filter = SWITCHABLE; 3158 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES]; 3159 int64_t dist_uv[TX_SIZES]; 3160 int skip_uv[TX_SIZES]; 3161 MB_PREDICTION_MODE mode_uv[TX_SIZES]; 3162 int64_t mode_distortions[MB_MODE_COUNT] = {-1}; 3163 int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q); 3164 const int bws = num_8x8_blocks_wide_lookup[bsize] / 2; 3165 const int bhs = num_8x8_blocks_high_lookup[bsize] / 2; 3166 int best_skip2 = 0; 3167 int mode_skip_mask = 0; 3168 int mode_skip_start = cpi->sf.mode_skip_start + 1; 3169 const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize]; 3170 const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize]; 3171 const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags; 3172 const int intra_y_mode_mask = 3173 cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]]; 3174 int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize]; 3175 3176 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; 3177 3178 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp, 3179 &comp_mode_p); 3180 3181 for (i = 0; i < REFERENCE_MODES; ++i) 3182 best_pred_rd[i] = INT64_MAX; 3183 for (i = 0; i < TX_MODES; i++) 3184 best_tx_rd[i] = INT64_MAX; 3185 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 3186 best_filter_rd[i] = INT64_MAX; 3187 for (i = 0; i < TX_SIZES; i++) 3188 rate_uv_intra[i] = INT_MAX; 3189 for (i = 0; i < MAX_REF_FRAMES; ++i) 3190 x->pred_sse[i] = INT_MAX; 3191 3192 *returnrate = INT_MAX; 3193 3194 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { 3195 x->pred_mv_sad[ref_frame] = INT_MAX; 3196 if (cpi->ref_frame_flags & flag_list[ref_frame]) { 3197 vp9_setup_buffer_inter(cpi, x, tile, 3198 ref_frame, bsize, mi_row, mi_col, 3199 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); 3200 } 3201 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; 3202 frame_mv[ZEROMV][ref_frame].as_int = 0; 3203 } 3204 3205 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { 3206 // All modes from vp9_mode_order that use this frame as any ref 3207 static const int ref_frame_mask_all[] = { 3208 0x0, 0x123291, 0x25c444, 0x39b722 3209 }; 3210 // Fixed mv modes (NEARESTMV, NEARMV, ZEROMV) from vp9_mode_order that use 3211 // this frame as their primary ref 3212 static const int ref_frame_mask_fixedmv[] = { 3213 0x0, 0x121281, 0x24c404, 0x080102 3214 }; 3215 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) { 3216 // Skip modes for missing references 3217 mode_skip_mask |= ref_frame_mask_all[ref_frame]; 3218 } else if (cpi->sf.reference_masking) { 3219 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { 3220 // Skip fixed mv modes for poor references 3221 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) { 3222 mode_skip_mask |= ref_frame_mask_fixedmv[ref_frame]; 3223 break; 3224 } 3225 } 3226 } 3227 // If the segment reference frame feature is enabled.... 3228 // then do nothing if the current ref frame is not allowed.. 3229 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && 3230 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { 3231 mode_skip_mask |= ref_frame_mask_all[ref_frame]; 3232 } 3233 } 3234 3235 // If the segment skip feature is enabled.... 3236 // then do nothing if the current mode is not allowed.. 3237 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) { 3238 const int inter_non_zero_mode_mask = 0x1F7F7; 3239 mode_skip_mask |= inter_non_zero_mode_mask; 3240 } 3241 3242 // Disable this drop out case if the ref frame 3243 // segment level feature is enabled for this segment. This is to 3244 // prevent the possibility that we end up unable to pick any mode. 3245 if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) { 3246 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, 3247 // unless ARNR filtering is enabled in which case we want 3248 // an unfiltered alternative. We allow near/nearest as well 3249 // because they may result in zero-zero MVs but be cheaper. 3250 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { 3251 const int altref_zero_mask = 3252 ~((1 << THR_NEARESTA) | (1 << THR_NEARA) | (1 << THR_ZEROA)); 3253 mode_skip_mask |= altref_zero_mask; 3254 if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0) 3255 mode_skip_mask |= (1 << THR_NEARA); 3256 if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0) 3257 mode_skip_mask |= (1 << THR_NEARESTA); 3258 } 3259 } 3260 3261 // TODO(JBB): This is to make up for the fact that we don't have sad 3262 // functions that work when the block size reads outside the umv. We 3263 // should fix this either by making the motion search just work on 3264 // a representative block in the boundary ( first ) and then implement a 3265 // function that does sads when inside the border.. 3266 if ((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) { 3267 const int new_modes_mask = 3268 (1 << THR_NEWMV) | (1 << THR_NEWG) | (1 << THR_NEWA) | 3269 (1 << THR_COMP_NEWLA) | (1 << THR_COMP_NEWGA); 3270 mode_skip_mask |= new_modes_mask; 3271 } 3272 3273 if (bsize > cpi->sf.max_intra_bsize) { 3274 mode_skip_mask |= 0xFF30808; 3275 } 3276 3277 if (!x->in_active_map) { 3278 int mode_index; 3279 assert(cpi->ref_frame_flags & VP9_LAST_FLAG); 3280 if (frame_mv[NEARESTMV][LAST_FRAME].as_int == 0) 3281 mode_index = THR_NEARESTMV; 3282 else if (frame_mv[NEARMV][LAST_FRAME].as_int == 0) 3283 mode_index = THR_NEARMV; 3284 else 3285 mode_index = THR_ZEROMV; 3286 mode_skip_mask = ~(1 << mode_index); 3287 mode_skip_start = MAX_MODES; 3288 disable_inter_mode_mask = 0; 3289 } 3290 3291 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { 3292 int mode_excluded = 0; 3293 int64_t this_rd = INT64_MAX; 3294 int disable_skip = 0; 3295 int compmode_cost = 0; 3296 int rate2 = 0, rate_y = 0, rate_uv = 0; 3297 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; 3298 int skippable = 0; 3299 int64_t tx_cache[TX_MODES]; 3300 int i; 3301 int this_skip2 = 0; 3302 int64_t total_sse = INT64_MAX; 3303 int early_term = 0; 3304 3305 // Look at the reference frame of the best mode so far and set the 3306 // skip mask to look at a subset of the remaining modes. 3307 if (mode_index == mode_skip_start) { 3308 switch (vp9_mode_order[best_mode_index].ref_frame[0]) { 3309 case INTRA_FRAME: 3310 break; 3311 case LAST_FRAME: 3312 mode_skip_mask |= LAST_FRAME_MODE_MASK; 3313 break; 3314 case GOLDEN_FRAME: 3315 mode_skip_mask |= GOLDEN_FRAME_MODE_MASK; 3316 break; 3317 case ALTREF_FRAME: 3318 mode_skip_mask |= ALT_REF_MODE_MASK; 3319 break; 3320 case NONE: 3321 case MAX_REF_FRAMES: 3322 assert(0 && "Invalid Reference frame"); 3323 } 3324 } 3325 if (mode_skip_mask & (1 << mode_index)) 3326 continue; 3327 3328 // Test best rd so far against threshold for trying this mode. 3329 if (best_rd < ((int64_t)rd_threshes[mode_index] * 3330 rd_thresh_freq_fact[mode_index] >> 5) || 3331 rd_threshes[mode_index] == INT_MAX) 3332 continue; 3333 3334 this_mode = vp9_mode_order[mode_index].mode; 3335 ref_frame = vp9_mode_order[mode_index].ref_frame[0]; 3336 if (ref_frame != INTRA_FRAME && 3337 disable_inter_mode_mask & (1 << INTER_OFFSET(this_mode))) 3338 continue; 3339 second_ref_frame = vp9_mode_order[mode_index].ref_frame[1]; 3340 3341 comp_pred = second_ref_frame > INTRA_FRAME; 3342 if (comp_pred) { 3343 if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && 3344 vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME) 3345 continue; 3346 if ((mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) && 3347 ref_frame != best_inter_ref_frame && 3348 second_ref_frame != best_inter_ref_frame) 3349 continue; 3350 mode_excluded = cm->reference_mode == SINGLE_REFERENCE; 3351 } else { 3352 if (ref_frame != INTRA_FRAME) 3353 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE; 3354 } 3355 3356 if (ref_frame == INTRA_FRAME) { 3357 if (!(intra_y_mode_mask & (1 << this_mode))) 3358 continue; 3359 if (this_mode != DC_PRED) { 3360 // Disable intra modes other than DC_PRED for blocks with low variance 3361 // Threshold for intra skipping based on source variance 3362 // TODO(debargha): Specialize the threshold for super block sizes 3363 const unsigned int skip_intra_var_thresh = 64; 3364 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) && 3365 x->source_variance < skip_intra_var_thresh) 3366 continue; 3367 // Only search the oblique modes if the best so far is 3368 // one of the neighboring directional modes 3369 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && 3370 (this_mode >= D45_PRED && this_mode <= TM_PRED)) { 3371 if (vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME) 3372 continue; 3373 } 3374 if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { 3375 if (conditional_skipintra(this_mode, best_intra_mode)) 3376 continue; 3377 } 3378 } 3379 } else { 3380 if (x->in_active_map && 3381 !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) 3382 if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, 3383 disable_inter_mode_mask, this_mode, ref_frame, 3384 second_ref_frame)) 3385 continue; 3386 } 3387 3388 mbmi->mode = this_mode; 3389 mbmi->uv_mode = x->in_active_map ? DC_PRED : this_mode; 3390 mbmi->ref_frame[0] = ref_frame; 3391 mbmi->ref_frame[1] = second_ref_frame; 3392 // Evaluate all sub-pel filters irrespective of whether we can use 3393 // them for this frame. 3394 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP 3395 : cm->interp_filter; 3396 x->skip = 0; 3397 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); 3398 3399 // Select prediction reference frames. 3400 for (i = 0; i < MAX_MB_PLANE; i++) { 3401 xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; 3402 if (comp_pred) 3403 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; 3404 } 3405 3406 for (i = 0; i < TX_MODES; ++i) 3407 tx_cache[i] = INT64_MAX; 3408 3409 #ifdef MODE_TEST_HIT_STATS 3410 // TEST/DEBUG CODE 3411 // Keep a rcord of the number of test hits at each size 3412 cpi->mode_test_hits[bsize]++; 3413 #endif 3414 3415 if (ref_frame == INTRA_FRAME) { 3416 TX_SIZE uv_tx; 3417 intra_super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, 3418 bsize, tx_cache, best_rd); 3419 3420 if (rate_y == INT_MAX) 3421 continue; 3422 3423 uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize); 3424 if (rate_uv_intra[uv_tx] == INT_MAX) { 3425 choose_intra_uv_mode(cpi, ctx, bsize, uv_tx, 3426 &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx], 3427 &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]); 3428 } 3429 3430 rate_uv = rate_uv_tokenonly[uv_tx]; 3431 distortion_uv = dist_uv[uv_tx]; 3432 skippable = skippable && skip_uv[uv_tx]; 3433 mbmi->uv_mode = mode_uv[uv_tx]; 3434 3435 rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx]; 3436 if (this_mode != DC_PRED && this_mode != TM_PRED) 3437 rate2 += intra_cost_penalty; 3438 distortion2 = distortion_y + distortion_uv; 3439 } else { 3440 this_rd = handle_inter_mode(cpi, x, tile, bsize, 3441 tx_cache, 3442 &rate2, &distortion2, &skippable, 3443 &rate_y, &distortion_y, 3444 &rate_uv, &distortion_uv, 3445 &mode_excluded, &disable_skip, 3446 &tmp_best_filter, frame_mv, 3447 mi_row, mi_col, 3448 single_newmv, &total_sse, best_rd); 3449 if (this_rd == INT64_MAX) 3450 continue; 3451 3452 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred); 3453 3454 if (cm->reference_mode == REFERENCE_MODE_SELECT) 3455 rate2 += compmode_cost; 3456 } 3457 3458 // Estimate the reference frame signaling cost and add it 3459 // to the rolling cost variable. 3460 if (comp_pred) { 3461 rate2 += ref_costs_comp[ref_frame]; 3462 } else { 3463 rate2 += ref_costs_single[ref_frame]; 3464 } 3465 3466 if (!disable_skip) { 3467 // Test for the condition where skip block will be activated 3468 // because there are no non zero coefficients and make any 3469 // necessary adjustment for rate. Ignore if skip is coded at 3470 // segment level as the cost wont have been added in. 3471 // Is Mb level skip allowed (i.e. not coded at segment level). 3472 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id, 3473 SEG_LVL_SKIP); 3474 3475 if (skippable) { 3476 // Back out the coefficient coding costs 3477 rate2 -= (rate_y + rate_uv); 3478 // for best yrd calculation 3479 rate_uv = 0; 3480 3481 if (mb_skip_allowed) { 3482 int prob_skip_cost; 3483 3484 // Cost the skip mb case 3485 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd); 3486 if (skip_prob) { 3487 prob_skip_cost = vp9_cost_bit(skip_prob, 1); 3488 rate2 += prob_skip_cost; 3489 } 3490 } 3491 } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) { 3492 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < 3493 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { 3494 // Add in the cost of the no skip flag. 3495 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); 3496 } else { 3497 // FIXME(rbultje) make this work for splitmv also 3498 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); 3499 distortion2 = total_sse; 3500 assert(total_sse >= 0); 3501 rate2 -= (rate_y + rate_uv); 3502 rate_y = 0; 3503 rate_uv = 0; 3504 this_skip2 = 1; 3505 } 3506 } else if (mb_skip_allowed) { 3507 // Add in the cost of the no skip flag. 3508 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); 3509 } 3510 3511 // Calculate the final RD estimate for this mode. 3512 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 3513 } 3514 3515 if (ref_frame == INTRA_FRAME) { 3516 // Keep record of best intra rd 3517 if (this_rd < best_intra_rd) { 3518 best_intra_rd = this_rd; 3519 best_intra_mode = mbmi->mode; 3520 } 3521 } else { 3522 // Keep record of best inter rd with single reference 3523 if (!comp_pred && !mode_excluded && this_rd < best_inter_rd) { 3524 best_inter_rd = this_rd; 3525 best_inter_ref_frame = ref_frame; 3526 } 3527 } 3528 3529 if (!disable_skip && ref_frame == INTRA_FRAME) { 3530 for (i = 0; i < REFERENCE_MODES; ++i) 3531 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); 3532 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 3533 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd); 3534 } 3535 3536 // Store the respective mode distortions for later use. 3537 if (mode_distortions[this_mode] == -1 3538 || distortion2 < mode_distortions[this_mode]) { 3539 mode_distortions[this_mode] = distortion2; 3540 } 3541 3542 // Did this mode help.. i.e. is it the new best mode 3543 if (this_rd < best_rd || x->skip) { 3544 int max_plane = MAX_MB_PLANE; 3545 if (!mode_excluded) { 3546 // Note index of best mode so far 3547 best_mode_index = mode_index; 3548 3549 if (ref_frame == INTRA_FRAME) { 3550 /* required for left and above block mv */ 3551 mbmi->mv[0].as_int = 0; 3552 max_plane = 1; 3553 } 3554 3555 *returnrate = rate2; 3556 *returndistortion = distortion2; 3557 best_rd = this_rd; 3558 best_mbmode = *mbmi; 3559 best_skip2 = this_skip2; 3560 if (!x->select_txfm_size) 3561 swap_block_ptr(x, ctx, max_plane); 3562 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], 3563 sizeof(uint8_t) * ctx->num_4x4_blk); 3564 3565 // TODO(debargha): enhance this test with a better distortion prediction 3566 // based on qp, activity mask and history 3567 if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) && 3568 (mode_index > MIN_EARLY_TERM_INDEX)) { 3569 const int qstep = xd->plane[0].dequant[1]; 3570 // TODO(debargha): Enhance this by specializing for each mode_index 3571 int scale = 4; 3572 if (x->source_variance < UINT_MAX) { 3573 const int var_adjust = (x->source_variance < 16); 3574 scale -= var_adjust; 3575 } 3576 if (ref_frame > INTRA_FRAME && 3577 distortion2 * scale < qstep * qstep) { 3578 early_term = 1; 3579 } 3580 } 3581 } 3582 } 3583 3584 /* keep record of best compound/single-only prediction */ 3585 if (!disable_skip && ref_frame != INTRA_FRAME) { 3586 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; 3587 3588 if (cm->reference_mode == REFERENCE_MODE_SELECT) { 3589 single_rate = rate2 - compmode_cost; 3590 hybrid_rate = rate2; 3591 } else { 3592 single_rate = rate2; 3593 hybrid_rate = rate2 + compmode_cost; 3594 } 3595 3596 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); 3597 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); 3598 3599 if (!comp_pred) { 3600 if (single_rd < best_pred_rd[SINGLE_REFERENCE]) { 3601 best_pred_rd[SINGLE_REFERENCE] = single_rd; 3602 } 3603 } else { 3604 if (single_rd < best_pred_rd[COMPOUND_REFERENCE]) { 3605 best_pred_rd[COMPOUND_REFERENCE] = single_rd; 3606 } 3607 } 3608 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT]) 3609 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd; 3610 3611 /* keep record of best filter type */ 3612 if (!mode_excluded && cm->interp_filter != BILINEAR) { 3613 int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ? 3614 SWITCHABLE_FILTERS : cm->interp_filter]; 3615 3616 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { 3617 int64_t adj_rd; 3618 if (ref == INT64_MAX) 3619 adj_rd = 0; 3620 else if (cpi->rd_filter_cache[i] == INT64_MAX) 3621 // when early termination is triggered, the encoder does not have 3622 // access to the rate-distortion cost. it only knows that the cost 3623 // should be above the maximum valid value. hence it takes the known 3624 // maximum plus an arbitrary constant as the rate-distortion cost. 3625 adj_rd = cpi->mask_filter_rd - ref + 10; 3626 else 3627 adj_rd = cpi->rd_filter_cache[i] - ref; 3628 3629 adj_rd += this_rd; 3630 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd); 3631 } 3632 } 3633 } 3634 3635 /* keep record of best txfm size */ 3636 if (bsize < BLOCK_32X32) { 3637 if (bsize < BLOCK_16X16) 3638 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8]; 3639 3640 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16]; 3641 } 3642 if (!mode_excluded && this_rd != INT64_MAX) { 3643 for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) { 3644 int64_t adj_rd = INT64_MAX; 3645 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode]; 3646 3647 if (adj_rd < best_tx_rd[i]) 3648 best_tx_rd[i] = adj_rd; 3649 } 3650 } 3651 3652 if (early_term) 3653 break; 3654 3655 if (x->skip && !comp_pred) 3656 break; 3657 } 3658 3659 if (best_rd >= best_rd_so_far) 3660 return INT64_MAX; 3661 3662 // If we used an estimate for the uv intra rd in the loop above... 3663 if (cpi->sf.use_uv_intra_rd_estimate) { 3664 // Do Intra UV best rd mode selection if best mode choice above was intra. 3665 if (vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME) { 3666 TX_SIZE uv_tx_size; 3667 *mbmi = best_mbmode; 3668 uv_tx_size = get_uv_tx_size(mbmi); 3669 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size], 3670 &rate_uv_tokenonly[uv_tx_size], 3671 &dist_uv[uv_tx_size], 3672 &skip_uv[uv_tx_size], 3673 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, 3674 uv_tx_size); 3675 } 3676 } 3677 3678 assert((cm->interp_filter == SWITCHABLE) || 3679 (cm->interp_filter == best_mbmode.interp_filter) || 3680 !is_inter_block(&best_mbmode)); 3681 3682 // Updating rd_thresh_freq_fact[] here means that the different 3683 // partition/block sizes are handled independently based on the best 3684 // choice for the current partition. It may well be better to keep a scaled 3685 // best rd so far value and update rd_thresh_freq_fact based on the mode/size 3686 // combination that wins out. 3687 if (cpi->sf.adaptive_rd_thresh) { 3688 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { 3689 int *const fact = &cpi->rd_thresh_freq_fact[bsize][mode_index]; 3690 3691 if (mode_index == best_mode_index) { 3692 *fact -= (*fact >> 3); 3693 } else { 3694 *fact = MIN(*fact + RD_THRESH_INC, 3695 cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT); 3696 } 3697 } 3698 } 3699 3700 // macroblock modes 3701 *mbmi = best_mbmode; 3702 x->skip |= best_skip2; 3703 3704 for (i = 0; i < REFERENCE_MODES; ++i) { 3705 if (best_pred_rd[i] == INT64_MAX) 3706 best_pred_diff[i] = INT_MIN; 3707 else 3708 best_pred_diff[i] = best_rd - best_pred_rd[i]; 3709 } 3710 3711 if (!x->skip) { 3712 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { 3713 if (best_filter_rd[i] == INT64_MAX) 3714 best_filter_diff[i] = 0; 3715 else 3716 best_filter_diff[i] = best_rd - best_filter_rd[i]; 3717 } 3718 if (cm->interp_filter == SWITCHABLE) 3719 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); 3720 for (i = 0; i < TX_MODES; i++) { 3721 if (best_tx_rd[i] == INT64_MAX) 3722 best_tx_diff[i] = 0; 3723 else 3724 best_tx_diff[i] = best_rd - best_tx_rd[i]; 3725 } 3726 } else { 3727 vp9_zero(best_filter_diff); 3728 vp9_zero(best_tx_diff); 3729 } 3730 3731 if (!x->in_active_map) { 3732 assert(mbmi->ref_frame[0] == LAST_FRAME); 3733 assert(mbmi->ref_frame[1] == NONE); 3734 assert(mbmi->mode == NEARESTMV || 3735 mbmi->mode == NEARMV || 3736 mbmi->mode == ZEROMV); 3737 assert(frame_mv[mbmi->mode][LAST_FRAME].as_int == 0); 3738 assert(mbmi->mode == mbmi->uv_mode); 3739 } 3740 3741 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); 3742 store_coding_context(x, ctx, best_mode_index, 3743 &mbmi->ref_mvs[mbmi->ref_frame[0]][0], 3744 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : 3745 mbmi->ref_frame[1]][0], 3746 best_pred_diff, best_tx_diff, best_filter_diff); 3747 3748 return best_rd; 3749 } 3750 3751 3752 int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, 3753 const TileInfo *const tile, 3754 int mi_row, int mi_col, 3755 int *returnrate, 3756 int64_t *returndistortion, 3757 BLOCK_SIZE bsize, 3758 PICK_MODE_CONTEXT *ctx, 3759 int64_t best_rd_so_far) { 3760 VP9_COMMON *cm = &cpi->common; 3761 MACROBLOCKD *xd = &x->e_mbd; 3762 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; 3763 const struct segmentation *seg = &cm->seg; 3764 MV_REFERENCE_FRAME ref_frame, second_ref_frame; 3765 unsigned char segment_id = mbmi->segment_id; 3766 int comp_pred, i; 3767 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; 3768 struct buf_2d yv12_mb[4][MAX_MB_PLANE]; 3769 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, 3770 VP9_ALT_FLAG }; 3771 int64_t best_rd = best_rd_so_far; 3772 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise 3773 int64_t best_tx_rd[TX_MODES]; 3774 int64_t best_tx_diff[TX_MODES]; 3775 int64_t best_pred_diff[REFERENCE_MODES]; 3776 int64_t best_pred_rd[REFERENCE_MODES]; 3777 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; 3778 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; 3779 MB_MODE_INFO best_mbmode = { 0 }; 3780 int mode_index, best_mode_index = 0; 3781 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; 3782 vp9_prob comp_mode_p; 3783 int64_t best_inter_rd = INT64_MAX; 3784 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME; 3785 INTERP_FILTER tmp_best_filter = SWITCHABLE; 3786 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES]; 3787 int64_t dist_uv[TX_SIZES]; 3788 int skip_uv[TX_SIZES]; 3789 MB_PREDICTION_MODE mode_uv[TX_SIZES] = { 0 }; 3790 int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q); 3791 int_mv seg_mvs[4][MAX_REF_FRAMES]; 3792 b_mode_info best_bmodes[4]; 3793 int best_skip2 = 0; 3794 int ref_frame_mask = 0; 3795 int mode_skip_mask = 0; 3796 3797 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; 3798 vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4); 3799 3800 for (i = 0; i < 4; i++) { 3801 int j; 3802 for (j = 0; j < MAX_REF_FRAMES; j++) 3803 seg_mvs[i][j].as_int = INVALID_MV; 3804 } 3805 3806 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp, 3807 &comp_mode_p); 3808 3809 for (i = 0; i < REFERENCE_MODES; ++i) 3810 best_pred_rd[i] = INT64_MAX; 3811 for (i = 0; i < TX_MODES; i++) 3812 best_tx_rd[i] = INT64_MAX; 3813 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 3814 best_filter_rd[i] = INT64_MAX; 3815 for (i = 0; i < TX_SIZES; i++) 3816 rate_uv_intra[i] = INT_MAX; 3817 3818 *returnrate = INT_MAX; 3819 3820 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { 3821 if (cpi->ref_frame_flags & flag_list[ref_frame]) { 3822 vp9_setup_buffer_inter(cpi, x, tile, 3823 ref_frame, bsize, mi_row, mi_col, 3824 frame_mv[NEARESTMV], frame_mv[NEARMV], 3825 yv12_mb); 3826 } 3827 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; 3828 frame_mv[ZEROMV][ref_frame].as_int = 0; 3829 } 3830 3831 for (ref_frame = LAST_FRAME; 3832 ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) { 3833 int i; 3834 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { 3835 if ((x->pred_mv_sad[ref_frame] >> 1) > x->pred_mv_sad[i]) { 3836 ref_frame_mask |= (1 << ref_frame); 3837 break; 3838 } 3839 } 3840 } 3841 3842 for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) { 3843 int mode_excluded = 0; 3844 int64_t this_rd = INT64_MAX; 3845 int disable_skip = 0; 3846 int compmode_cost = 0; 3847 int rate2 = 0, rate_y = 0, rate_uv = 0; 3848 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; 3849 int skippable = 0; 3850 int64_t tx_cache[TX_MODES]; 3851 int i; 3852 int this_skip2 = 0; 3853 int64_t total_sse = INT_MAX; 3854 int early_term = 0; 3855 3856 for (i = 0; i < TX_MODES; ++i) 3857 tx_cache[i] = INT64_MAX; 3858 3859 x->skip = 0; 3860 ref_frame = vp9_ref_order[mode_index].ref_frame[0]; 3861 second_ref_frame = vp9_ref_order[mode_index].ref_frame[1]; 3862 3863 // Look at the reference frame of the best mode so far and set the 3864 // skip mask to look at a subset of the remaining modes. 3865 if (mode_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) { 3866 if (mode_index == 3) { 3867 switch (vp9_ref_order[best_mode_index].ref_frame[0]) { 3868 case INTRA_FRAME: 3869 mode_skip_mask = 0; 3870 break; 3871 case LAST_FRAME: 3872 mode_skip_mask = 0x0010; 3873 break; 3874 case GOLDEN_FRAME: 3875 mode_skip_mask = 0x0008; 3876 break; 3877 case ALTREF_FRAME: 3878 mode_skip_mask = 0x0000; 3879 break; 3880 case NONE: 3881 case MAX_REF_FRAMES: 3882 assert(0 && "Invalid Reference frame"); 3883 } 3884 } 3885 if (mode_skip_mask & (1 << mode_index)) 3886 continue; 3887 } 3888 3889 // Test best rd so far against threshold for trying this mode. 3890 if ((best_rd < 3891 ((int64_t)cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] * 3892 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 5)) || 3893 cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX) 3894 continue; 3895 3896 // Do not allow compound prediction if the segment level reference 3897 // frame feature is in use as in this case there can only be one reference. 3898 if ((second_ref_frame > INTRA_FRAME) && 3899 vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) 3900 continue; 3901 3902 mbmi->ref_frame[0] = ref_frame; 3903 mbmi->ref_frame[1] = second_ref_frame; 3904 3905 if (!(ref_frame == INTRA_FRAME 3906 || (cpi->ref_frame_flags & flag_list[ref_frame]))) { 3907 continue; 3908 } 3909 if (!(second_ref_frame == NONE 3910 || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) { 3911 continue; 3912 } 3913 3914 comp_pred = second_ref_frame > INTRA_FRAME; 3915 if (comp_pred) { 3916 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) 3917 if (vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME) 3918 continue; 3919 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) 3920 if (ref_frame != best_inter_ref_frame && 3921 second_ref_frame != best_inter_ref_frame) 3922 continue; 3923 } 3924 3925 // TODO(jingning, jkoleszar): scaling reference frame not supported for 3926 // sub8x8 blocks. 3927 if (ref_frame > 0 && vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf)) 3928 continue; 3929 3930 if (second_ref_frame > 0 && 3931 vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf)) 3932 continue; 3933 3934 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); 3935 mbmi->uv_mode = DC_PRED; 3936 3937 // Evaluate all sub-pel filters irrespective of whether we can use 3938 // them for this frame. 3939 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP 3940 : cm->interp_filter; 3941 3942 if (comp_pred) { 3943 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) 3944 continue; 3945 3946 mode_excluded = mode_excluded ? mode_excluded 3947 : cm->reference_mode == SINGLE_REFERENCE; 3948 } else { 3949 if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) { 3950 mode_excluded = mode_excluded ? 3951 mode_excluded : cm->reference_mode == COMPOUND_REFERENCE; 3952 } 3953 } 3954 3955 // Select prediction reference frames. 3956 for (i = 0; i < MAX_MB_PLANE; i++) { 3957 xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; 3958 if (comp_pred) 3959 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; 3960 } 3961 3962 // If the segment reference frame feature is enabled.... 3963 // then do nothing if the current ref frame is not allowed.. 3964 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && 3965 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != 3966 (int)ref_frame) { 3967 continue; 3968 // If the segment skip feature is enabled.... 3969 // then do nothing if the current mode is not allowed.. 3970 } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) && 3971 ref_frame != INTRA_FRAME) { 3972 continue; 3973 // Disable this drop out case if the ref frame 3974 // segment level feature is enabled for this segment. This is to 3975 // prevent the possibility that we end up unable to pick any mode. 3976 } else if (!vp9_segfeature_active(seg, segment_id, 3977 SEG_LVL_REF_FRAME)) { 3978 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, 3979 // unless ARNR filtering is enabled in which case we want 3980 // an unfiltered alternative. We allow near/nearest as well 3981 // because they may result in zero-zero MVs but be cheaper. 3982 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) 3983 continue; 3984 } 3985 3986 #ifdef MODE_TEST_HIT_STATS 3987 // TEST/DEBUG CODE 3988 // Keep a rcord of the number of test hits at each size 3989 cpi->mode_test_hits[bsize]++; 3990 #endif 3991 3992 if (ref_frame == INTRA_FRAME) { 3993 int rate; 3994 mbmi->tx_size = TX_4X4; 3995 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, 3996 &distortion_y, best_rd) >= best_rd) 3997 continue; 3998 rate2 += rate; 3999 rate2 += intra_cost_penalty; 4000 distortion2 += distortion_y; 4001 4002 if (rate_uv_intra[TX_4X4] == INT_MAX) { 4003 choose_intra_uv_mode(cpi, ctx, bsize, TX_4X4, 4004 &rate_uv_intra[TX_4X4], 4005 &rate_uv_tokenonly[TX_4X4], 4006 &dist_uv[TX_4X4], &skip_uv[TX_4X4], 4007 &mode_uv[TX_4X4]); 4008 } 4009 rate2 += rate_uv_intra[TX_4X4]; 4010 rate_uv = rate_uv_tokenonly[TX_4X4]; 4011 distortion2 += dist_uv[TX_4X4]; 4012 distortion_uv = dist_uv[TX_4X4]; 4013 mbmi->uv_mode = mode_uv[TX_4X4]; 4014 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 4015 for (i = 0; i < TX_MODES; ++i) 4016 tx_cache[i] = tx_cache[ONLY_4X4]; 4017 } else { 4018 int rate; 4019 int64_t distortion; 4020 int64_t this_rd_thresh; 4021 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX; 4022 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX; 4023 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse; 4024 int tmp_best_skippable = 0; 4025 int switchable_filter_index; 4026 int_mv *second_ref = comp_pred ? 4027 &mbmi->ref_mvs[second_ref_frame][0] : NULL; 4028 b_mode_info tmp_best_bmodes[16]; 4029 MB_MODE_INFO tmp_best_mbmode; 4030 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS]; 4031 int pred_exists = 0; 4032 int uv_skippable; 4033 4034 this_rd_thresh = (ref_frame == LAST_FRAME) ? 4035 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_LAST] : 4036 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_ALTR]; 4037 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ? 4038 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh; 4039 xd->mi[0]->mbmi.tx_size = TX_4X4; 4040 4041 cpi->mask_filter_rd = 0; 4042 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) 4043 cpi->rd_filter_cache[i] = INT64_MAX; 4044 4045 if (cm->interp_filter != BILINEAR) { 4046 tmp_best_filter = EIGHTTAP; 4047 if (x->source_variance < 4048 cpi->sf.disable_filter_search_var_thresh) { 4049 tmp_best_filter = EIGHTTAP; 4050 } else if (cpi->sf.adaptive_pred_interp_filter == 1 && 4051 ctx->pred_interp_filter < SWITCHABLE) { 4052 tmp_best_filter = ctx->pred_interp_filter; 4053 } else if (cpi->sf.adaptive_pred_interp_filter == 2) { 4054 tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ? 4055 ctx->pred_interp_filter : 0; 4056 } else { 4057 for (switchable_filter_index = 0; 4058 switchable_filter_index < SWITCHABLE_FILTERS; 4059 ++switchable_filter_index) { 4060 int newbest, rs; 4061 int64_t rs_rd; 4062 mbmi->interp_filter = switchable_filter_index; 4063 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile, 4064 &mbmi->ref_mvs[ref_frame][0], 4065 second_ref, 4066 best_yrd, 4067 &rate, &rate_y, &distortion, 4068 &skippable, &total_sse, 4069 (int)this_rd_thresh, seg_mvs, 4070 bsi, switchable_filter_index, 4071 mi_row, mi_col); 4072 4073 if (tmp_rd == INT64_MAX) 4074 continue; 4075 rs = vp9_get_switchable_rate(x); 4076 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); 4077 cpi->rd_filter_cache[switchable_filter_index] = tmp_rd; 4078 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = 4079 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], 4080 tmp_rd + rs_rd); 4081 if (cm->interp_filter == SWITCHABLE) 4082 tmp_rd += rs_rd; 4083 4084 cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, tmp_rd); 4085 4086 newbest = (tmp_rd < tmp_best_rd); 4087 if (newbest) { 4088 tmp_best_filter = mbmi->interp_filter; 4089 tmp_best_rd = tmp_rd; 4090 } 4091 if ((newbest && cm->interp_filter == SWITCHABLE) || 4092 (mbmi->interp_filter == cm->interp_filter && 4093 cm->interp_filter != SWITCHABLE)) { 4094 tmp_best_rdu = tmp_rd; 4095 tmp_best_rate = rate; 4096 tmp_best_ratey = rate_y; 4097 tmp_best_distortion = distortion; 4098 tmp_best_sse = total_sse; 4099 tmp_best_skippable = skippable; 4100 tmp_best_mbmode = *mbmi; 4101 for (i = 0; i < 4; i++) { 4102 tmp_best_bmodes[i] = xd->mi[0]->bmi[i]; 4103 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i]; 4104 } 4105 pred_exists = 1; 4106 if (switchable_filter_index == 0 && 4107 cpi->sf.use_rd_breakout && 4108 best_rd < INT64_MAX) { 4109 if (tmp_best_rdu / 2 > best_rd) { 4110 // skip searching the other filters if the first is 4111 // already substantially larger than the best so far 4112 tmp_best_filter = mbmi->interp_filter; 4113 tmp_best_rdu = INT64_MAX; 4114 break; 4115 } 4116 } 4117 } 4118 } // switchable_filter_index loop 4119 } 4120 } 4121 4122 if (tmp_best_rdu == INT64_MAX && pred_exists) 4123 continue; 4124 4125 mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ? 4126 tmp_best_filter : cm->interp_filter); 4127 if (!pred_exists) { 4128 // Handles the special case when a filter that is not in the 4129 // switchable list (bilinear, 6-tap) is indicated at the frame level 4130 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile, 4131 &mbmi->ref_mvs[ref_frame][0], 4132 second_ref, 4133 best_yrd, 4134 &rate, &rate_y, &distortion, 4135 &skippable, &total_sse, 4136 (int)this_rd_thresh, seg_mvs, 4137 bsi, 0, 4138 mi_row, mi_col); 4139 if (tmp_rd == INT64_MAX) 4140 continue; 4141 } else { 4142 total_sse = tmp_best_sse; 4143 rate = tmp_best_rate; 4144 rate_y = tmp_best_ratey; 4145 distortion = tmp_best_distortion; 4146 skippable = tmp_best_skippable; 4147 *mbmi = tmp_best_mbmode; 4148 for (i = 0; i < 4; i++) 4149 xd->mi[0]->bmi[i] = tmp_best_bmodes[i]; 4150 } 4151 4152 rate2 += rate; 4153 distortion2 += distortion; 4154 4155 if (cm->interp_filter == SWITCHABLE) 4156 rate2 += vp9_get_switchable_rate(x); 4157 4158 if (!mode_excluded) 4159 mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE 4160 : cm->reference_mode == COMPOUND_REFERENCE; 4161 4162 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred); 4163 4164 tmp_best_rdu = best_rd - 4165 MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2), 4166 RDCOST(x->rdmult, x->rddiv, 0, total_sse)); 4167 4168 if (tmp_best_rdu > 0) { 4169 // If even the 'Y' rd value of split is higher than best so far 4170 // then dont bother looking at UV 4171 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, 4172 BLOCK_8X8); 4173 super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable, 4174 &uv_sse, BLOCK_8X8, tmp_best_rdu); 4175 if (rate_uv == INT_MAX) 4176 continue; 4177 rate2 += rate_uv; 4178 distortion2 += distortion_uv; 4179 skippable = skippable && uv_skippable; 4180 total_sse += uv_sse; 4181 4182 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 4183 for (i = 0; i < TX_MODES; ++i) 4184 tx_cache[i] = tx_cache[ONLY_4X4]; 4185 } 4186 } 4187 4188 if (cm->reference_mode == REFERENCE_MODE_SELECT) 4189 rate2 += compmode_cost; 4190 4191 // Estimate the reference frame signaling cost and add it 4192 // to the rolling cost variable. 4193 if (second_ref_frame > INTRA_FRAME) { 4194 rate2 += ref_costs_comp[ref_frame]; 4195 } else { 4196 rate2 += ref_costs_single[ref_frame]; 4197 } 4198 4199 if (!disable_skip) { 4200 // Test for the condition where skip block will be activated 4201 // because there are no non zero coefficients and make any 4202 // necessary adjustment for rate. Ignore if skip is coded at 4203 // segment level as the cost wont have been added in. 4204 // Is Mb level skip allowed (i.e. not coded at segment level). 4205 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id, 4206 SEG_LVL_SKIP); 4207 4208 if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) { 4209 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < 4210 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { 4211 // Add in the cost of the no skip flag. 4212 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); 4213 } else { 4214 // FIXME(rbultje) make this work for splitmv also 4215 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); 4216 distortion2 = total_sse; 4217 assert(total_sse >= 0); 4218 rate2 -= (rate_y + rate_uv); 4219 rate_y = 0; 4220 rate_uv = 0; 4221 this_skip2 = 1; 4222 } 4223 } else if (mb_skip_allowed) { 4224 // Add in the cost of the no skip flag. 4225 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); 4226 } 4227 4228 // Calculate the final RD estimate for this mode. 4229 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 4230 } 4231 4232 // Keep record of best inter rd with single reference 4233 if (is_inter_block(&xd->mi[0]->mbmi) && 4234 !has_second_ref(&xd->mi[0]->mbmi) && 4235 !mode_excluded && 4236 this_rd < best_inter_rd) { 4237 best_inter_rd = this_rd; 4238 best_inter_ref_frame = ref_frame; 4239 } 4240 4241 if (!disable_skip && ref_frame == INTRA_FRAME) { 4242 for (i = 0; i < REFERENCE_MODES; ++i) 4243 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); 4244 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 4245 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd); 4246 } 4247 4248 // Did this mode help.. i.e. is it the new best mode 4249 if (this_rd < best_rd || x->skip) { 4250 if (!mode_excluded) { 4251 int max_plane = MAX_MB_PLANE; 4252 // Note index of best mode so far 4253 best_mode_index = mode_index; 4254 4255 if (ref_frame == INTRA_FRAME) { 4256 /* required for left and above block mv */ 4257 mbmi->mv[0].as_int = 0; 4258 max_plane = 1; 4259 } 4260 4261 *returnrate = rate2; 4262 *returndistortion = distortion2; 4263 best_rd = this_rd; 4264 best_yrd = best_rd - 4265 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv); 4266 best_mbmode = *mbmi; 4267 best_skip2 = this_skip2; 4268 if (!x->select_txfm_size) 4269 swap_block_ptr(x, ctx, max_plane); 4270 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], 4271 sizeof(uint8_t) * ctx->num_4x4_blk); 4272 4273 for (i = 0; i < 4; i++) 4274 best_bmodes[i] = xd->mi[0]->bmi[i]; 4275 4276 // TODO(debargha): enhance this test with a better distortion prediction 4277 // based on qp, activity mask and history 4278 if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) && 4279 (mode_index > MIN_EARLY_TERM_INDEX)) { 4280 const int qstep = xd->plane[0].dequant[1]; 4281 // TODO(debargha): Enhance this by specializing for each mode_index 4282 int scale = 4; 4283 if (x->source_variance < UINT_MAX) { 4284 const int var_adjust = (x->source_variance < 16); 4285 scale -= var_adjust; 4286 } 4287 if (ref_frame > INTRA_FRAME && 4288 distortion2 * scale < qstep * qstep) { 4289 early_term = 1; 4290 } 4291 } 4292 } 4293 } 4294 4295 /* keep record of best compound/single-only prediction */ 4296 if (!disable_skip && ref_frame != INTRA_FRAME) { 4297 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; 4298 4299 if (cm->reference_mode == REFERENCE_MODE_SELECT) { 4300 single_rate = rate2 - compmode_cost; 4301 hybrid_rate = rate2; 4302 } else { 4303 single_rate = rate2; 4304 hybrid_rate = rate2 + compmode_cost; 4305 } 4306 4307 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); 4308 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); 4309 4310 if (second_ref_frame <= INTRA_FRAME && 4311 single_rd < best_pred_rd[SINGLE_REFERENCE]) { 4312 best_pred_rd[SINGLE_REFERENCE] = single_rd; 4313 } else if (second_ref_frame > INTRA_FRAME && 4314 single_rd < best_pred_rd[COMPOUND_REFERENCE]) { 4315 best_pred_rd[COMPOUND_REFERENCE] = single_rd; 4316 } 4317 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT]) 4318 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd; 4319 } 4320 4321 /* keep record of best filter type */ 4322 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME && 4323 cm->interp_filter != BILINEAR) { 4324 int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ? 4325 SWITCHABLE_FILTERS : cm->interp_filter]; 4326 int64_t adj_rd; 4327 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { 4328 if (ref == INT64_MAX) 4329 adj_rd = 0; 4330 else if (cpi->rd_filter_cache[i] == INT64_MAX) 4331 // when early termination is triggered, the encoder does not have 4332 // access to the rate-distortion cost. it only knows that the cost 4333 // should be above the maximum valid value. hence it takes the known 4334 // maximum plus an arbitrary constant as the rate-distortion cost. 4335 adj_rd = cpi->mask_filter_rd - ref + 10; 4336 else 4337 adj_rd = cpi->rd_filter_cache[i] - ref; 4338 4339 adj_rd += this_rd; 4340 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd); 4341 } 4342 } 4343 4344 /* keep record of best txfm size */ 4345 if (bsize < BLOCK_32X32) { 4346 if (bsize < BLOCK_16X16) { 4347 tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4]; 4348 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8]; 4349 } 4350 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16]; 4351 } 4352 if (!mode_excluded && this_rd != INT64_MAX) { 4353 for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) { 4354 int64_t adj_rd = INT64_MAX; 4355 if (ref_frame > INTRA_FRAME) 4356 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode]; 4357 else 4358 adj_rd = this_rd; 4359 4360 if (adj_rd < best_tx_rd[i]) 4361 best_tx_rd[i] = adj_rd; 4362 } 4363 } 4364 4365 if (early_term) 4366 break; 4367 4368 if (x->skip && !comp_pred) 4369 break; 4370 } 4371 4372 if (best_rd >= best_rd_so_far) 4373 return INT64_MAX; 4374 4375 // If we used an estimate for the uv intra rd in the loop above... 4376 if (cpi->sf.use_uv_intra_rd_estimate) { 4377 // Do Intra UV best rd mode selection if best mode choice above was intra. 4378 if (vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME) { 4379 TX_SIZE uv_tx_size; 4380 *mbmi = best_mbmode; 4381 uv_tx_size = get_uv_tx_size(mbmi); 4382 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size], 4383 &rate_uv_tokenonly[uv_tx_size], 4384 &dist_uv[uv_tx_size], 4385 &skip_uv[uv_tx_size], 4386 BLOCK_8X8, uv_tx_size); 4387 } 4388 } 4389 4390 if (best_rd == INT64_MAX && bsize < BLOCK_8X8) { 4391 *returnrate = INT_MAX; 4392 *returndistortion = INT64_MAX; 4393 return best_rd; 4394 } 4395 4396 assert((cm->interp_filter == SWITCHABLE) || 4397 (cm->interp_filter == best_mbmode.interp_filter) || 4398 !is_inter_block(&best_mbmode)); 4399 4400 // Updating rd_thresh_freq_fact[] here means that the different 4401 // partition/block sizes are handled independently based on the best 4402 // choice for the current partition. It may well be better to keep a scaled 4403 // best rd so far value and update rd_thresh_freq_fact based on the mode/size 4404 // combination that wins out. 4405 if (cpi->sf.adaptive_rd_thresh) { 4406 for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) { 4407 int *const fact = &cpi->rd_thresh_freq_sub8x8[bsize][mode_index]; 4408 4409 if (mode_index == best_mode_index) { 4410 *fact -= (*fact >> 3); 4411 } else { 4412 *fact = MIN(*fact + RD_THRESH_INC, 4413 cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT); 4414 } 4415 } 4416 } 4417 4418 // macroblock modes 4419 *mbmi = best_mbmode; 4420 x->skip |= best_skip2; 4421 if (!is_inter_block(&best_mbmode)) { 4422 for (i = 0; i < 4; i++) 4423 xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode; 4424 } else { 4425 for (i = 0; i < 4; ++i) 4426 vpx_memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info)); 4427 4428 mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int; 4429 mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int; 4430 } 4431 4432 for (i = 0; i < REFERENCE_MODES; ++i) { 4433 if (best_pred_rd[i] == INT64_MAX) 4434 best_pred_diff[i] = INT_MIN; 4435 else 4436 best_pred_diff[i] = best_rd - best_pred_rd[i]; 4437 } 4438 4439 if (!x->skip) { 4440 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { 4441 if (best_filter_rd[i] == INT64_MAX) 4442 best_filter_diff[i] = 0; 4443 else 4444 best_filter_diff[i] = best_rd - best_filter_rd[i]; 4445 } 4446 if (cm->interp_filter == SWITCHABLE) 4447 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); 4448 } else { 4449 vp9_zero(best_filter_diff); 4450 } 4451 4452 if (!x->skip) { 4453 for (i = 0; i < TX_MODES; i++) { 4454 if (best_tx_rd[i] == INT64_MAX) 4455 best_tx_diff[i] = 0; 4456 else 4457 best_tx_diff[i] = best_rd - best_tx_rd[i]; 4458 } 4459 } else { 4460 vp9_zero(best_tx_diff); 4461 } 4462 4463 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); 4464 store_coding_context(x, ctx, best_mode_index, 4465 &mbmi->ref_mvs[mbmi->ref_frame[0]][0], 4466 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : 4467 mbmi->ref_frame[1]][0], 4468 best_pred_diff, best_tx_diff, best_filter_diff); 4469 4470 return best_rd; 4471 } 4472