1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <assert.h> 12 #include <math.h> 13 #include <stdio.h> 14 15 #include "./vp9_rtcd.h" 16 17 #include "vpx_dsp/vpx_dsp_common.h" 18 #include "vpx_mem/vpx_mem.h" 19 #include "vpx_ports/bitops.h" 20 #include "vpx_ports/mem.h" 21 #include "vpx_ports/system_state.h" 22 23 #include "vp9/common/vp9_common.h" 24 #include "vp9/common/vp9_entropy.h" 25 #include "vp9/common/vp9_entropymode.h" 26 #include "vp9/common/vp9_mvref_common.h" 27 #include "vp9/common/vp9_pred_common.h" 28 #include "vp9/common/vp9_quant_common.h" 29 #include "vp9/common/vp9_reconinter.h" 30 #include "vp9/common/vp9_reconintra.h" 31 #include "vp9/common/vp9_seg_common.h" 32 33 #include "vp9/encoder/vp9_cost.h" 34 #include "vp9/encoder/vp9_encodemb.h" 35 #include "vp9/encoder/vp9_encodemv.h" 36 #include "vp9/encoder/vp9_encoder.h" 37 #include "vp9/encoder/vp9_mcomp.h" 38 #include "vp9/encoder/vp9_quantize.h" 39 #include "vp9/encoder/vp9_ratectrl.h" 40 #include "vp9/encoder/vp9_rd.h" 41 #include "vp9/encoder/vp9_tokenize.h" 42 43 #define RD_THRESH_POW 1.25 44 45 // Factor to weigh the rate for switchable interp filters. 46 #define SWITCHABLE_INTERP_RATE_FACTOR 1 47 48 void vp9_rd_cost_reset(RD_COST *rd_cost) { 49 rd_cost->rate = INT_MAX; 50 rd_cost->dist = INT64_MAX; 51 rd_cost->rdcost = INT64_MAX; 52 } 53 54 void vp9_rd_cost_init(RD_COST *rd_cost) { 55 rd_cost->rate = 0; 56 rd_cost->dist = 0; 57 rd_cost->rdcost = 0; 58 } 59 60 // The baseline rd thresholds for breaking out of the rd loop for 61 // certain modes are assumed to be based on 8x8 blocks. 62 // This table is used to correct for block size. 63 // The factors here are << 2 (2 = x0.5, 32 = x8 etc). 64 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = { 65 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32 66 }; 67 68 static void fill_mode_costs(VP9_COMP *cpi) { 69 const FRAME_CONTEXT *const fc = cpi->common.fc; 70 int i, j; 71 72 for (i = 0; i < INTRA_MODES; ++i) { 73 for (j = 0; j < INTRA_MODES; ++j) { 74 vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j], 75 vp9_intra_mode_tree); 76 } 77 } 78 79 vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree); 80 for (i = 0; i < INTRA_MODES; ++i) { 81 vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME][i], 82 vp9_kf_uv_mode_prob[i], vp9_intra_mode_tree); 83 vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME][i], 84 fc->uv_mode_prob[i], vp9_intra_mode_tree); 85 } 86 87 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) { 88 vp9_cost_tokens(cpi->switchable_interp_costs[i], 89 fc->switchable_interp_prob[i], vp9_switchable_interp_tree); 90 } 91 92 for (i = TX_8X8; i < TX_SIZES; ++i) { 93 for (j = 0; j < TX_SIZE_CONTEXTS; ++j) { 94 const vpx_prob *tx_probs = get_tx_probs(i, j, &fc->tx_probs); 95 int k; 96 for (k = 0; k <= i; ++k) { 97 int cost = 0; 98 int m; 99 for (m = 0; m <= k - (k == i); ++m) { 100 if (m == k) 101 cost += vp9_cost_zero(tx_probs[m]); 102 else 103 cost += vp9_cost_one(tx_probs[m]); 104 } 105 cpi->tx_size_cost[i - 1][j][k] = cost; 106 } 107 } 108 } 109 } 110 111 static void fill_token_costs(vp9_coeff_cost *c, 112 vp9_coeff_probs_model (*p)[PLANE_TYPES]) { 113 int i, j, k, l; 114 TX_SIZE t; 115 for (t = TX_4X4; t <= TX_32X32; ++t) 116 for (i = 0; i < PLANE_TYPES; ++i) 117 for (j = 0; j < REF_TYPES; ++j) 118 for (k = 0; k < COEF_BANDS; ++k) 119 for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { 120 vpx_prob probs[ENTROPY_NODES]; 121 vp9_model_to_full_probs(p[t][i][j][k][l], probs); 122 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, vp9_coef_tree); 123 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs, 124 vp9_coef_tree); 125 assert(c[t][i][j][k][0][l][EOB_TOKEN] == 126 c[t][i][j][k][1][l][EOB_TOKEN]); 127 } 128 } 129 130 // Values are now correlated to quantizer. 131 static int sad_per_bit16lut_8[QINDEX_RANGE]; 132 static int sad_per_bit4lut_8[QINDEX_RANGE]; 133 134 #if CONFIG_VP9_HIGHBITDEPTH 135 static int sad_per_bit16lut_10[QINDEX_RANGE]; 136 static int sad_per_bit4lut_10[QINDEX_RANGE]; 137 static int sad_per_bit16lut_12[QINDEX_RANGE]; 138 static int sad_per_bit4lut_12[QINDEX_RANGE]; 139 #endif 140 141 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range, 142 vpx_bit_depth_t bit_depth) { 143 int i; 144 // Initialize the sad lut tables using a formulaic calculation for now. 145 // This is to make it easier to resolve the impact of experimental changes 146 // to the quantizer tables. 147 for (i = 0; i < range; i++) { 148 const double q = vp9_convert_qindex_to_q(i, bit_depth); 149 bit16lut[i] = (int)(0.0418 * q + 2.4107); 150 bit4lut[i] = (int)(0.063 * q + 2.742); 151 } 152 } 153 154 void vp9_init_me_luts(void) { 155 init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE, 156 VPX_BITS_8); 157 #if CONFIG_VP9_HIGHBITDEPTH 158 init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE, 159 VPX_BITS_10); 160 init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE, 161 VPX_BITS_12); 162 #endif 163 } 164 165 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12, 166 8, 8, 4, 4, 2, 2, 1, 0 }; 167 168 // Note that the element below for frame type "USE_BUF_FRAME", which indicates 169 // that the show frame flag is set, should not be used as no real frame 170 // is encoded so we should not reach here. However, a dummy value 171 // is inserted here to make sure the data structure has the right number 172 // of values assigned. 173 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128, 174 128, 144, 144 }; 175 176 int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) { 177 // largest dc_quant is 21387, therefore rdmult should always fit in int32_t 178 const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth); 179 uint32_t rdmult = q * q; 180 181 if (cpi->common.frame_type != KEY_FRAME) { 182 if (qindex < 128) 183 rdmult = rdmult * 4; 184 else if (qindex < 190) 185 rdmult = rdmult * 4 + rdmult / 2; 186 else 187 rdmult = rdmult * 3; 188 } else { 189 if (qindex < 64) 190 rdmult = rdmult * 4; 191 else if (qindex <= 128) 192 rdmult = rdmult * 3 + rdmult / 2; 193 else if (qindex < 190) 194 rdmult = rdmult * 4 + rdmult / 2; 195 else 196 rdmult = rdmult * 7 + rdmult / 2; 197 } 198 #if CONFIG_VP9_HIGHBITDEPTH 199 switch (cpi->common.bit_depth) { 200 case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break; 201 case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break; 202 default: break; 203 } 204 #endif // CONFIG_VP9_HIGHBITDEPTH 205 return rdmult > 0 ? rdmult : 1; 206 } 207 208 static int modulate_rdmult(const VP9_COMP *cpi, int rdmult) { 209 int64_t rdmult_64 = rdmult; 210 if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { 211 const GF_GROUP *const gf_group = &cpi->twopass.gf_group; 212 const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index]; 213 const int gfu_boost = cpi->multi_layer_arf 214 ? gf_group->gfu_boost[gf_group->index] 215 : cpi->rc.gfu_boost; 216 const int boost_index = VPXMIN(15, (gfu_boost / 100)); 217 218 rdmult_64 = (rdmult_64 * rd_frame_type_factor[frame_type]) >> 7; 219 rdmult_64 += ((rdmult_64 * rd_boost_factor[boost_index]) >> 7); 220 } 221 return (int)rdmult_64; 222 } 223 224 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) { 225 int rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex); 226 return modulate_rdmult(cpi, rdmult); 227 } 228 229 int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) { 230 int rdmult = 231 vp9_compute_rd_mult_based_on_qindex(cpi, cpi->common.base_qindex); 232 rdmult = (int)((double)rdmult / beta); 233 rdmult = rdmult > 0 ? rdmult : 1; 234 return modulate_rdmult(cpi, rdmult); 235 } 236 237 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) { 238 double q; 239 #if CONFIG_VP9_HIGHBITDEPTH 240 switch (bit_depth) { 241 case VPX_BITS_8: q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break; 242 case VPX_BITS_10: q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break; 243 default: 244 assert(bit_depth == VPX_BITS_12); 245 q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0; 246 break; 247 } 248 #else 249 (void)bit_depth; 250 q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; 251 #endif // CONFIG_VP9_HIGHBITDEPTH 252 // TODO(debargha): Adjust the function below. 253 return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8); 254 } 255 256 void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) { 257 #if CONFIG_VP9_HIGHBITDEPTH 258 switch (cpi->common.bit_depth) { 259 case VPX_BITS_8: 260 x->sadperbit16 = sad_per_bit16lut_8[qindex]; 261 x->sadperbit4 = sad_per_bit4lut_8[qindex]; 262 break; 263 case VPX_BITS_10: 264 x->sadperbit16 = sad_per_bit16lut_10[qindex]; 265 x->sadperbit4 = sad_per_bit4lut_10[qindex]; 266 break; 267 default: 268 assert(cpi->common.bit_depth == VPX_BITS_12); 269 x->sadperbit16 = sad_per_bit16lut_12[qindex]; 270 x->sadperbit4 = sad_per_bit4lut_12[qindex]; 271 break; 272 } 273 #else 274 (void)cpi; 275 x->sadperbit16 = sad_per_bit16lut_8[qindex]; 276 x->sadperbit4 = sad_per_bit4lut_8[qindex]; 277 #endif // CONFIG_VP9_HIGHBITDEPTH 278 } 279 280 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) { 281 int i, bsize, segment_id; 282 283 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) { 284 const int qindex = 285 clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) + 286 cm->y_dc_delta_q, 287 0, MAXQ); 288 const int q = compute_rd_thresh_factor(qindex, cm->bit_depth); 289 290 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) { 291 // Threshold here seems unnecessarily harsh but fine given actual 292 // range of values used for cpi->sf.thresh_mult[]. 293 const int t = q * rd_thresh_block_size_factor[bsize]; 294 const int thresh_max = INT_MAX / t; 295 296 if (bsize >= BLOCK_8X8) { 297 for (i = 0; i < MAX_MODES; ++i) 298 rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max 299 ? rd->thresh_mult[i] * t / 4 300 : INT_MAX; 301 } else { 302 for (i = 0; i < MAX_REFS; ++i) 303 rd->threshes[segment_id][bsize][i] = 304 rd->thresh_mult_sub8x8[i] < thresh_max 305 ? rd->thresh_mult_sub8x8[i] * t / 4 306 : INT_MAX; 307 } 308 } 309 } 310 } 311 312 void vp9_initialize_rd_consts(VP9_COMP *cpi) { 313 VP9_COMMON *const cm = &cpi->common; 314 MACROBLOCK *const x = &cpi->td.mb; 315 MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; 316 RD_OPT *const rd = &cpi->rd; 317 int i; 318 319 vpx_clear_system_state(); 320 321 rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128). 322 rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); 323 324 set_error_per_bit(x, rd->RDMULT); 325 326 x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL && 327 cm->frame_type != KEY_FRAME) 328 ? 0 329 : 1; 330 331 set_block_thresholds(cm, rd); 332 set_partition_probs(cm, xd); 333 334 if (cpi->oxcf.pass == 1) { 335 if (!frame_is_intra_only(cm)) 336 vp9_build_nmv_cost_table( 337 x->nmvjointcost, 338 cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost, 339 &cm->fc->nmvc, cm->allow_high_precision_mv); 340 } else { 341 if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) 342 fill_token_costs(x->token_costs, cm->fc->coef_probs); 343 344 if (cpi->sf.partition_search_type != VAR_BASED_PARTITION || 345 cm->frame_type == KEY_FRAME) { 346 for (i = 0; i < PARTITION_CONTEXTS; ++i) 347 vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i), 348 vp9_partition_tree); 349 } 350 351 if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 || 352 cm->frame_type == KEY_FRAME) { 353 fill_mode_costs(cpi); 354 355 if (!frame_is_intra_only(cm)) { 356 vp9_build_nmv_cost_table( 357 x->nmvjointcost, 358 cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost, 359 &cm->fc->nmvc, cm->allow_high_precision_mv); 360 361 for (i = 0; i < INTER_MODE_CONTEXTS; ++i) 362 vp9_cost_tokens((int *)cpi->inter_mode_cost[i], 363 cm->fc->inter_mode_probs[i], vp9_inter_mode_tree); 364 } 365 } 366 } 367 } 368 369 // NOTE: The tables below must be of the same size. 370 371 // The functions described below are sampled at the four most significant 372 // bits of x^2 + 8 / 256. 373 374 // Normalized rate: 375 // This table models the rate for a Laplacian source with given variance 376 // when quantized with a uniform quantizer with given stepsize. The 377 // closed form expression is: 378 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)], 379 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance), 380 // and H(x) is the binary entropy function. 381 static const int rate_tab_q10[] = { 382 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142, 4044, 383 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, 3133, 3037, 384 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353, 2290, 2232, 2179, 385 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, 1608, 1530, 1460, 1398, 386 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963, 911, 864, 821, 781, 745, 387 680, 623, 574, 530, 490, 455, 424, 395, 345, 304, 269, 239, 213, 388 190, 171, 154, 126, 104, 87, 73, 61, 52, 44, 38, 28, 21, 389 16, 12, 10, 8, 6, 5, 3, 2, 1, 1, 1, 0, 0, 390 }; 391 392 // Normalized distortion: 393 // This table models the normalized distortion for a Laplacian source 394 // with given variance when quantized with a uniform quantizer 395 // with given stepsize. The closed form expression is: 396 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2)) 397 // where x = qpstep / sqrt(variance). 398 // Note the actual distortion is Dn * variance. 399 static const int dist_tab_q10[] = { 400 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5, 5, 401 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 21, 402 24, 26, 29, 31, 34, 36, 39, 44, 49, 54, 59, 64, 69, 403 73, 78, 88, 97, 106, 115, 124, 133, 142, 151, 167, 184, 200, 404 215, 231, 245, 260, 274, 301, 327, 351, 375, 397, 418, 439, 458, 405 495, 528, 559, 587, 613, 637, 659, 680, 717, 749, 777, 801, 823, 406 842, 859, 874, 899, 919, 936, 949, 960, 969, 977, 983, 994, 1001, 407 1006, 1010, 1013, 1015, 1017, 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024, 408 }; 409 static const int xsq_iq_q10[] = { 410 0, 4, 8, 12, 16, 20, 24, 28, 32, 411 40, 48, 56, 64, 72, 80, 88, 96, 112, 412 128, 144, 160, 176, 192, 208, 224, 256, 288, 413 320, 352, 384, 416, 448, 480, 544, 608, 672, 414 736, 800, 864, 928, 992, 1120, 1248, 1376, 1504, 415 1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296, 416 3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136, 417 7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328, 418 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736, 419 36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696, 420 81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808, 421 180192, 196576, 212960, 229344, 245728, 422 }; 423 424 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) { 425 const int tmp = (xsq_q10 >> 2) + 8; 426 const int k = get_msb(tmp) - 3; 427 const int xq = (k << 3) + ((tmp >> k) & 0x7); 428 const int one_q10 = 1 << 10; 429 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k); 430 const int b_q10 = one_q10 - a_q10; 431 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10; 432 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10; 433 } 434 435 static void model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE], 436 int r_q10[MAX_MB_PLANE], 437 int d_q10[MAX_MB_PLANE]) { 438 int i; 439 const int one_q10 = 1 << 10; 440 for (i = 0; i < MAX_MB_PLANE; ++i) { 441 const int tmp = (xsq_q10[i] >> 2) + 8; 442 const int k = get_msb(tmp) - 3; 443 const int xq = (k << 3) + ((tmp >> k) & 0x7); 444 const int a_q10 = ((xsq_q10[i] - xsq_iq_q10[xq]) << 10) >> (2 + k); 445 const int b_q10 = one_q10 - a_q10; 446 r_q10[i] = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10; 447 d_q10[i] = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10; 448 } 449 } 450 451 static const uint32_t MAX_XSQ_Q10 = 245727; 452 453 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2, 454 unsigned int qstep, int *rate, 455 int64_t *dist) { 456 // This function models the rate and distortion for a Laplacian 457 // source with given variance when quantized with a uniform quantizer 458 // with given stepsize. The closed form expressions are in: 459 // Hang and Chen, "Source Model for transform video coder and its 460 // application - Part I: Fundamental Theory", IEEE Trans. Circ. 461 // Sys. for Video Tech., April 1997. 462 if (var == 0) { 463 *rate = 0; 464 *dist = 0; 465 } else { 466 int d_q10, r_q10; 467 const uint64_t xsq_q10_64 = 468 (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var; 469 const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10); 470 model_rd_norm(xsq_q10, &r_q10, &d_q10); 471 *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT); 472 *dist = (var * (int64_t)d_q10 + 512) >> 10; 473 } 474 } 475 476 // Implements a fixed length vector form of vp9_model_rd_from_var_lapndz where 477 // vectors are of length MAX_MB_PLANE and all elements of var are non-zero. 478 void vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE], 479 unsigned int n_log2[MAX_MB_PLANE], 480 unsigned int qstep[MAX_MB_PLANE], 481 int64_t *rate_sum, int64_t *dist_sum) { 482 int i; 483 int xsq_q10[MAX_MB_PLANE], d_q10[MAX_MB_PLANE], r_q10[MAX_MB_PLANE]; 484 for (i = 0; i < MAX_MB_PLANE; ++i) { 485 const uint64_t xsq_q10_64 = 486 (((uint64_t)qstep[i] * qstep[i] << (n_log2[i] + 10)) + (var[i] >> 1)) / 487 var[i]; 488 xsq_q10[i] = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10); 489 } 490 model_rd_norm_vec(xsq_q10, r_q10, d_q10); 491 for (i = 0; i < MAX_MB_PLANE; ++i) { 492 int rate = 493 ROUND_POWER_OF_TWO(r_q10[i] << n_log2[i], 10 - VP9_PROB_COST_SHIFT); 494 int64_t dist = (var[i] * (int64_t)d_q10[i] + 512) >> 10; 495 *rate_sum += rate; 496 *dist_sum += dist; 497 } 498 } 499 500 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, 501 const struct macroblockd_plane *pd, 502 ENTROPY_CONTEXT t_above[16], 503 ENTROPY_CONTEXT t_left[16]) { 504 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); 505 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; 506 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; 507 const ENTROPY_CONTEXT *const above = pd->above_context; 508 const ENTROPY_CONTEXT *const left = pd->left_context; 509 510 int i; 511 switch (tx_size) { 512 case TX_4X4: 513 memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w); 514 memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h); 515 break; 516 case TX_8X8: 517 for (i = 0; i < num_4x4_w; i += 2) 518 t_above[i] = !!*(const uint16_t *)&above[i]; 519 for (i = 0; i < num_4x4_h; i += 2) 520 t_left[i] = !!*(const uint16_t *)&left[i]; 521 break; 522 case TX_16X16: 523 for (i = 0; i < num_4x4_w; i += 4) 524 t_above[i] = !!*(const uint32_t *)&above[i]; 525 for (i = 0; i < num_4x4_h; i += 4) 526 t_left[i] = !!*(const uint32_t *)&left[i]; 527 break; 528 default: 529 assert(tx_size == TX_32X32); 530 for (i = 0; i < num_4x4_w; i += 8) 531 t_above[i] = !!*(const uint64_t *)&above[i]; 532 for (i = 0; i < num_4x4_h; i += 8) 533 t_left[i] = !!*(const uint64_t *)&left[i]; 534 break; 535 } 536 } 537 538 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer, 539 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) { 540 int i; 541 int zero_seen = 0; 542 int best_index = 0; 543 int best_sad = INT_MAX; 544 int this_sad = INT_MAX; 545 int max_mv = 0; 546 int near_same_nearest; 547 uint8_t *src_y_ptr = x->plane[0].src.buf; 548 uint8_t *ref_y_ptr; 549 const int num_mv_refs = 550 MAX_MV_REF_CANDIDATES + (block_size < x->max_partition_size); 551 552 MV pred_mv[3]; 553 pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv; 554 pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv; 555 pred_mv[2] = x->pred_mv[ref_frame]; 556 assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0]))); 557 558 near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int == 559 x->mbmi_ext->ref_mvs[ref_frame][1].as_int; 560 561 // Get the sad for each candidate reference mv. 562 for (i = 0; i < num_mv_refs; ++i) { 563 const MV *this_mv = &pred_mv[i]; 564 int fp_row, fp_col; 565 if (this_mv->row == INT16_MAX || this_mv->col == INT16_MAX) continue; 566 if (i == 1 && near_same_nearest) continue; 567 fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3; 568 fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3; 569 max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3); 570 571 if (fp_row == 0 && fp_col == 0 && zero_seen) continue; 572 zero_seen |= (fp_row == 0 && fp_col == 0); 573 574 ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col]; 575 // Find sad for current vector. 576 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride, 577 ref_y_ptr, ref_y_stride); 578 // Note if it is the best so far. 579 if (this_sad < best_sad) { 580 best_sad = this_sad; 581 best_index = i; 582 } 583 } 584 585 // Note the index of the mv that worked best in the reference list. 586 x->mv_best_ref_index[ref_frame] = best_index; 587 x->max_mv_context[ref_frame] = max_mv; 588 x->pred_mv_sad[ref_frame] = best_sad; 589 } 590 591 void vp9_setup_pred_block(const MACROBLOCKD *xd, 592 struct buf_2d dst[MAX_MB_PLANE], 593 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, 594 const struct scale_factors *scale, 595 const struct scale_factors *scale_uv) { 596 int i; 597 598 dst[0].buf = src->y_buffer; 599 dst[0].stride = src->y_stride; 600 dst[1].buf = src->u_buffer; 601 dst[2].buf = src->v_buffer; 602 dst[1].stride = dst[2].stride = src->uv_stride; 603 604 for (i = 0; i < MAX_MB_PLANE; ++i) { 605 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col, 606 i ? scale_uv : scale, xd->plane[i].subsampling_x, 607 xd->plane[i].subsampling_y); 608 } 609 } 610 611 int vp9_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block, 612 int stride) { 613 const int bw = b_width_log2_lookup[plane_bsize]; 614 const int y = 4 * (raster_block >> bw); 615 const int x = 4 * (raster_block & ((1 << bw) - 1)); 616 return y * stride + x; 617 } 618 619 int16_t *vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block, 620 int16_t *base) { 621 const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 622 return base + vp9_raster_block_offset(plane_bsize, raster_block, stride); 623 } 624 625 YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, 626 int ref_frame) { 627 const VP9_COMMON *const cm = &cpi->common; 628 const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1]; 629 const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame); 630 assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME); 631 return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX) 632 ? &cm->buffer_pool->frame_bufs[scaled_idx].buf 633 : NULL; 634 } 635 636 int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) { 637 const MODE_INFO *const mi = xd->mi[0]; 638 const int ctx = get_pred_context_switchable_interp(xd); 639 return SWITCHABLE_INTERP_RATE_FACTOR * 640 cpi->switchable_interp_costs[ctx][mi->interp_filter]; 641 } 642 643 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) { 644 int i; 645 RD_OPT *const rd = &cpi->rd; 646 SPEED_FEATURES *const sf = &cpi->sf; 647 648 // Set baseline threshold values. 649 for (i = 0; i < MAX_MODES; ++i) 650 rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0; 651 652 if (sf->adaptive_rd_thresh) { 653 rd->thresh_mult[THR_NEARESTMV] = 300; 654 rd->thresh_mult[THR_NEARESTG] = 300; 655 rd->thresh_mult[THR_NEARESTA] = 300; 656 } else { 657 rd->thresh_mult[THR_NEARESTMV] = 0; 658 rd->thresh_mult[THR_NEARESTG] = 0; 659 rd->thresh_mult[THR_NEARESTA] = 0; 660 } 661 662 rd->thresh_mult[THR_DC] += 1000; 663 664 rd->thresh_mult[THR_NEWMV] += 1000; 665 rd->thresh_mult[THR_NEWA] += 1000; 666 rd->thresh_mult[THR_NEWG] += 1000; 667 668 rd->thresh_mult[THR_NEARMV] += 1000; 669 rd->thresh_mult[THR_NEARA] += 1000; 670 rd->thresh_mult[THR_COMP_NEARESTLA] += 1000; 671 rd->thresh_mult[THR_COMP_NEARESTGA] += 1000; 672 673 rd->thresh_mult[THR_TM] += 1000; 674 675 rd->thresh_mult[THR_COMP_NEARLA] += 1500; 676 rd->thresh_mult[THR_COMP_NEWLA] += 2000; 677 rd->thresh_mult[THR_NEARG] += 1000; 678 rd->thresh_mult[THR_COMP_NEARGA] += 1500; 679 rd->thresh_mult[THR_COMP_NEWGA] += 2000; 680 681 rd->thresh_mult[THR_ZEROMV] += 2000; 682 rd->thresh_mult[THR_ZEROG] += 2000; 683 rd->thresh_mult[THR_ZEROA] += 2000; 684 rd->thresh_mult[THR_COMP_ZEROLA] += 2500; 685 rd->thresh_mult[THR_COMP_ZEROGA] += 2500; 686 687 rd->thresh_mult[THR_H_PRED] += 2000; 688 rd->thresh_mult[THR_V_PRED] += 2000; 689 rd->thresh_mult[THR_D45_PRED] += 2500; 690 rd->thresh_mult[THR_D135_PRED] += 2500; 691 rd->thresh_mult[THR_D117_PRED] += 2500; 692 rd->thresh_mult[THR_D153_PRED] += 2500; 693 rd->thresh_mult[THR_D207_PRED] += 2500; 694 rd->thresh_mult[THR_D63_PRED] += 2500; 695 } 696 697 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) { 698 static const int thresh_mult[2][MAX_REFS] = { 699 { 2500, 2500, 2500, 4500, 4500, 2500 }, 700 { 2000, 2000, 2000, 4000, 4000, 2000 } 701 }; 702 RD_OPT *const rd = &cpi->rd; 703 const int idx = cpi->oxcf.mode == BEST; 704 memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx])); 705 } 706 707 void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh, 708 int bsize, int best_mode_index) { 709 if (rd_thresh > 0) { 710 const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES; 711 int mode; 712 for (mode = 0; mode < top_mode; ++mode) { 713 const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4); 714 const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64); 715 BLOCK_SIZE bs; 716 for (bs = min_size; bs <= max_size; ++bs) { 717 int *const fact = &factor_buf[bs][mode]; 718 if (mode == best_mode_index) { 719 *fact -= (*fact >> 4); 720 } else { 721 *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT); 722 } 723 } 724 } 725 } 726 } 727 728 int vp9_get_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize, 729 int qindex, int qdelta) { 730 // Reduce the intra cost penalty for small blocks (<=16x16). 731 int reduction_fac = 732 (bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0; 733 734 if (cpi->noise_estimate.enabled && cpi->noise_estimate.level == kHigh) 735 // Don't reduce intra cost penalty if estimated noise level is high. 736 reduction_fac = 0; 737 738 // Always use VPX_BITS_8 as input here because the penalty is applied 739 // to rate not distortion so we want a consistent penalty for all bit 740 // depths. If the actual bit depth were passed in here then the value 741 // retured by vp9_dc_quant() would scale with the bit depth and we would 742 // then need to apply inverse scaling to correct back to a bit depth 743 // independent rate penalty. 744 return (20 * vp9_dc_quant(qindex, qdelta, VPX_BITS_8)) >> reduction_fac; 745 } 746