Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <assert.h>
     12 #include <math.h>
     13 #include <stdio.h>
     14 
     15 #include "./vp9_rtcd.h"
     16 
     17 #include "vpx_dsp/vpx_dsp_common.h"
     18 #include "vpx_mem/vpx_mem.h"
     19 #include "vpx_ports/bitops.h"
     20 #include "vpx_ports/mem.h"
     21 #include "vpx_ports/system_state.h"
     22 
     23 #include "vp9/common/vp9_common.h"
     24 #include "vp9/common/vp9_entropy.h"
     25 #include "vp9/common/vp9_entropymode.h"
     26 #include "vp9/common/vp9_mvref_common.h"
     27 #include "vp9/common/vp9_pred_common.h"
     28 #include "vp9/common/vp9_quant_common.h"
     29 #include "vp9/common/vp9_reconinter.h"
     30 #include "vp9/common/vp9_reconintra.h"
     31 #include "vp9/common/vp9_seg_common.h"
     32 
     33 #include "vp9/encoder/vp9_cost.h"
     34 #include "vp9/encoder/vp9_encodemb.h"
     35 #include "vp9/encoder/vp9_encodemv.h"
     36 #include "vp9/encoder/vp9_encoder.h"
     37 #include "vp9/encoder/vp9_mcomp.h"
     38 #include "vp9/encoder/vp9_quantize.h"
     39 #include "vp9/encoder/vp9_ratectrl.h"
     40 #include "vp9/encoder/vp9_rd.h"
     41 #include "vp9/encoder/vp9_tokenize.h"
     42 
     43 #define RD_THRESH_POW 1.25
     44 
     45 // Factor to weigh the rate for switchable interp filters.
     46 #define SWITCHABLE_INTERP_RATE_FACTOR 1
     47 
     48 void vp9_rd_cost_reset(RD_COST *rd_cost) {
     49   rd_cost->rate = INT_MAX;
     50   rd_cost->dist = INT64_MAX;
     51   rd_cost->rdcost = INT64_MAX;
     52 }
     53 
     54 void vp9_rd_cost_init(RD_COST *rd_cost) {
     55   rd_cost->rate = 0;
     56   rd_cost->dist = 0;
     57   rd_cost->rdcost = 0;
     58 }
     59 
     60 // The baseline rd thresholds for breaking out of the rd loop for
     61 // certain modes are assumed to be based on 8x8 blocks.
     62 // This table is used to correct for block size.
     63 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
     64 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
     65   2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
     66 };
     67 
     68 static void fill_mode_costs(VP9_COMP *cpi) {
     69   const FRAME_CONTEXT *const fc = cpi->common.fc;
     70   int i, j;
     71 
     72   for (i = 0; i < INTRA_MODES; ++i)
     73     for (j = 0; j < INTRA_MODES; ++j)
     74       vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
     75                       vp9_intra_mode_tree);
     76 
     77   vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
     78   for (i = 0; i < INTRA_MODES; ++i) {
     79     vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME][i],
     80                     vp9_kf_uv_mode_prob[i], vp9_intra_mode_tree);
     81     vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME][i],
     82                     fc->uv_mode_prob[i], vp9_intra_mode_tree);
     83   }
     84 
     85   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
     86     vp9_cost_tokens(cpi->switchable_interp_costs[i],
     87                     fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
     88 }
     89 
     90 static void fill_token_costs(vp9_coeff_cost *c,
     91                              vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
     92   int i, j, k, l;
     93   TX_SIZE t;
     94   for (t = TX_4X4; t <= TX_32X32; ++t)
     95     for (i = 0; i < PLANE_TYPES; ++i)
     96       for (j = 0; j < REF_TYPES; ++j)
     97         for (k = 0; k < COEF_BANDS; ++k)
     98           for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
     99             vpx_prob probs[ENTROPY_NODES];
    100             vp9_model_to_full_probs(p[t][i][j][k][l], probs);
    101             vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, vp9_coef_tree);
    102             vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
    103                                  vp9_coef_tree);
    104             assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
    105                    c[t][i][j][k][1][l][EOB_TOKEN]);
    106           }
    107 }
    108 
    109 // Values are now correlated to quantizer.
    110 static int sad_per_bit16lut_8[QINDEX_RANGE];
    111 static int sad_per_bit4lut_8[QINDEX_RANGE];
    112 
    113 #if CONFIG_VP9_HIGHBITDEPTH
    114 static int sad_per_bit16lut_10[QINDEX_RANGE];
    115 static int sad_per_bit4lut_10[QINDEX_RANGE];
    116 static int sad_per_bit16lut_12[QINDEX_RANGE];
    117 static int sad_per_bit4lut_12[QINDEX_RANGE];
    118 #endif
    119 
    120 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
    121                             vpx_bit_depth_t bit_depth) {
    122   int i;
    123   // Initialize the sad lut tables using a formulaic calculation for now.
    124   // This is to make it easier to resolve the impact of experimental changes
    125   // to the quantizer tables.
    126   for (i = 0; i < range; i++) {
    127     const double q = vp9_convert_qindex_to_q(i, bit_depth);
    128     bit16lut[i] = (int)(0.0418 * q + 2.4107);
    129     bit4lut[i] = (int)(0.063 * q + 2.742);
    130   }
    131 }
    132 
    133 void vp9_init_me_luts(void) {
    134   init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
    135                   VPX_BITS_8);
    136 #if CONFIG_VP9_HIGHBITDEPTH
    137   init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
    138                   VPX_BITS_10);
    139   init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
    140                   VPX_BITS_12);
    141 #endif
    142 }
    143 
    144 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
    145                                          8,  8,  4,  4,  2,  2,  1,  0 };
    146 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,
    147                                                               128, 144 };
    148 
    149 int64_t vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
    150   const int64_t q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
    151 #if CONFIG_VP9_HIGHBITDEPTH
    152   int64_t rdmult = 0;
    153   switch (cpi->common.bit_depth) {
    154     case VPX_BITS_8: rdmult = 88 * q * q / 24; break;
    155     case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4); break;
    156     case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8); break;
    157     default:
    158       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
    159       return -1;
    160   }
    161 #else
    162   int64_t rdmult = 88 * q * q / 24;
    163 #endif  // CONFIG_VP9_HIGHBITDEPTH
    164   return rdmult;
    165 }
    166 
    167 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
    168   int64_t rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex);
    169 
    170   if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    171     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
    172     const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
    173     const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
    174 
    175     rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
    176     rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
    177   }
    178   if (rdmult < 1) rdmult = 1;
    179   return (int)rdmult;
    180 }
    181 
    182 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
    183   double q;
    184 #if CONFIG_VP9_HIGHBITDEPTH
    185   switch (bit_depth) {
    186     case VPX_BITS_8: q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break;
    187     case VPX_BITS_10: q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break;
    188     case VPX_BITS_12: q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0; break;
    189     default:
    190       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
    191       return -1;
    192   }
    193 #else
    194   (void)bit_depth;
    195   q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
    196 #endif  // CONFIG_VP9_HIGHBITDEPTH
    197   // TODO(debargha): Adjust the function below.
    198   return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
    199 }
    200 
    201 void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
    202 #if CONFIG_VP9_HIGHBITDEPTH
    203   switch (cpi->common.bit_depth) {
    204     case VPX_BITS_8:
    205       x->sadperbit16 = sad_per_bit16lut_8[qindex];
    206       x->sadperbit4 = sad_per_bit4lut_8[qindex];
    207       break;
    208     case VPX_BITS_10:
    209       x->sadperbit16 = sad_per_bit16lut_10[qindex];
    210       x->sadperbit4 = sad_per_bit4lut_10[qindex];
    211       break;
    212     case VPX_BITS_12:
    213       x->sadperbit16 = sad_per_bit16lut_12[qindex];
    214       x->sadperbit4 = sad_per_bit4lut_12[qindex];
    215       break;
    216     default:
    217       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
    218   }
    219 #else
    220   (void)cpi;
    221   x->sadperbit16 = sad_per_bit16lut_8[qindex];
    222   x->sadperbit4 = sad_per_bit4lut_8[qindex];
    223 #endif  // CONFIG_VP9_HIGHBITDEPTH
    224 }
    225 
    226 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
    227   int i, bsize, segment_id;
    228 
    229   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
    230     const int qindex =
    231         clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
    232                   cm->y_dc_delta_q,
    233               0, MAXQ);
    234     const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
    235 
    236     for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
    237       // Threshold here seems unnecessarily harsh but fine given actual
    238       // range of values used for cpi->sf.thresh_mult[].
    239       const int t = q * rd_thresh_block_size_factor[bsize];
    240       const int thresh_max = INT_MAX / t;
    241 
    242       if (bsize >= BLOCK_8X8) {
    243         for (i = 0; i < MAX_MODES; ++i)
    244           rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
    245                                                    ? rd->thresh_mult[i] * t / 4
    246                                                    : INT_MAX;
    247       } else {
    248         for (i = 0; i < MAX_REFS; ++i)
    249           rd->threshes[segment_id][bsize][i] =
    250               rd->thresh_mult_sub8x8[i] < thresh_max
    251                   ? rd->thresh_mult_sub8x8[i] * t / 4
    252                   : INT_MAX;
    253       }
    254     }
    255   }
    256 }
    257 
    258 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
    259   VP9_COMMON *const cm = &cpi->common;
    260   MACROBLOCK *const x = &cpi->td.mb;
    261   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
    262   RD_OPT *const rd = &cpi->rd;
    263   int i;
    264 
    265   vpx_clear_system_state();
    266 
    267   rd->RDDIV = RDDIV_BITS;  // In bits (to multiply D by 128).
    268   rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
    269 
    270   set_error_per_bit(x, rd->RDMULT);
    271 
    272   x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
    273                        cm->frame_type != KEY_FRAME)
    274                           ? 0
    275                           : 1;
    276 
    277   set_block_thresholds(cm, rd);
    278   set_partition_probs(cm, xd);
    279 
    280   if (cpi->oxcf.pass == 1) {
    281     if (!frame_is_intra_only(cm))
    282       vp9_build_nmv_cost_table(
    283           x->nmvjointcost,
    284           cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
    285           &cm->fc->nmvc, cm->allow_high_precision_mv);
    286   } else {
    287     if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
    288       fill_token_costs(x->token_costs, cm->fc->coef_probs);
    289 
    290     if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
    291         cm->frame_type == KEY_FRAME) {
    292       for (i = 0; i < PARTITION_CONTEXTS; ++i)
    293         vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
    294                         vp9_partition_tree);
    295     }
    296 
    297     if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
    298         cm->frame_type == KEY_FRAME) {
    299       fill_mode_costs(cpi);
    300 
    301       if (!frame_is_intra_only(cm)) {
    302         vp9_build_nmv_cost_table(
    303             x->nmvjointcost,
    304             cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
    305             &cm->fc->nmvc, cm->allow_high_precision_mv);
    306 
    307         for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
    308           vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
    309                           cm->fc->inter_mode_probs[i], vp9_inter_mode_tree);
    310       }
    311     }
    312   }
    313 }
    314 
    315 // NOTE: The tables below must be of the same size.
    316 
    317 // The functions described below are sampled at the four most significant
    318 // bits of x^2 + 8 / 256.
    319 
    320 // Normalized rate:
    321 // This table models the rate for a Laplacian source with given variance
    322 // when quantized with a uniform quantizer with given stepsize. The
    323 // closed form expression is:
    324 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
    325 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
    326 // and H(x) is the binary entropy function.
    327 static const int rate_tab_q10[] = {
    328   65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142, 4044,
    329   3958,  3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, 3133, 3037,
    330   2952,  2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353, 2290, 2232, 2179,
    331   2130,  2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, 1608, 1530, 1460, 1398,
    332   1342,  1290, 1243, 1199, 1159, 1086, 1021, 963,  911,  864,  821,  781,  745,
    333   680,   623,  574,  530,  490,  455,  424,  395,  345,  304,  269,  239,  213,
    334   190,   171,  154,  126,  104,  87,   73,   61,   52,   44,   38,   28,   21,
    335   16,    12,   10,   8,    6,    5,    3,    2,    1,    1,    1,    0,    0,
    336 };
    337 
    338 // Normalized distortion:
    339 // This table models the normalized distortion for a Laplacian source
    340 // with given variance when quantized with a uniform quantizer
    341 // with given stepsize. The closed form expression is:
    342 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
    343 // where x = qpstep / sqrt(variance).
    344 // Note the actual distortion is Dn * variance.
    345 static const int dist_tab_q10[] = {
    346   0,    0,    1,    1,    1,    2,    2,    2,    3,    3,    4,    5,    5,
    347   6,    7,    7,    8,    9,    11,   12,   13,   15,   16,   17,   18,   21,
    348   24,   26,   29,   31,   34,   36,   39,   44,   49,   54,   59,   64,   69,
    349   73,   78,   88,   97,   106,  115,  124,  133,  142,  151,  167,  184,  200,
    350   215,  231,  245,  260,  274,  301,  327,  351,  375,  397,  418,  439,  458,
    351   495,  528,  559,  587,  613,  637,  659,  680,  717,  749,  777,  801,  823,
    352   842,  859,  874,  899,  919,  936,  949,  960,  969,  977,  983,  994,  1001,
    353   1006, 1010, 1013, 1015, 1017, 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
    354 };
    355 static const int xsq_iq_q10[] = {
    356   0,      4,      8,      12,     16,     20,     24,     28,     32,
    357   40,     48,     56,     64,     72,     80,     88,     96,     112,
    358   128,    144,    160,    176,    192,    208,    224,    256,    288,
    359   320,    352,    384,    416,    448,    480,    544,    608,    672,
    360   736,    800,    864,    928,    992,    1120,   1248,   1376,   1504,
    361   1632,   1760,   1888,   2016,   2272,   2528,   2784,   3040,   3296,
    362   3552,   3808,   4064,   4576,   5088,   5600,   6112,   6624,   7136,
    363   7648,   8160,   9184,   10208,  11232,  12256,  13280,  14304,  15328,
    364   16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,  32736,
    365   36832,  40928,  45024,  49120,  53216,  57312,  61408,  65504,  73696,
    366   81888,  90080,  98272,  106464, 114656, 122848, 131040, 147424, 163808,
    367   180192, 196576, 212960, 229344, 245728,
    368 };
    369 
    370 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
    371   const int tmp = (xsq_q10 >> 2) + 8;
    372   const int k = get_msb(tmp) - 3;
    373   const int xq = (k << 3) + ((tmp >> k) & 0x7);
    374   const int one_q10 = 1 << 10;
    375   const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
    376   const int b_q10 = one_q10 - a_q10;
    377   *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
    378   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
    379 }
    380 
    381 static void model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],
    382                               int r_q10[MAX_MB_PLANE],
    383                               int d_q10[MAX_MB_PLANE]) {
    384   int i;
    385   const int one_q10 = 1 << 10;
    386   for (i = 0; i < MAX_MB_PLANE; ++i) {
    387     const int tmp = (xsq_q10[i] >> 2) + 8;
    388     const int k = get_msb(tmp) - 3;
    389     const int xq = (k << 3) + ((tmp >> k) & 0x7);
    390     const int a_q10 = ((xsq_q10[i] - xsq_iq_q10[xq]) << 10) >> (2 + k);
    391     const int b_q10 = one_q10 - a_q10;
    392     r_q10[i] = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
    393     d_q10[i] = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
    394   }
    395 }
    396 
    397 static const uint32_t MAX_XSQ_Q10 = 245727;
    398 
    399 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
    400                                   unsigned int qstep, int *rate,
    401                                   int64_t *dist) {
    402   // This function models the rate and distortion for a Laplacian
    403   // source with given variance when quantized with a uniform quantizer
    404   // with given stepsize. The closed form expressions are in:
    405   // Hang and Chen, "Source Model for transform video coder and its
    406   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
    407   // Sys. for Video Tech., April 1997.
    408   if (var == 0) {
    409     *rate = 0;
    410     *dist = 0;
    411   } else {
    412     int d_q10, r_q10;
    413     const uint64_t xsq_q10_64 =
    414         (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
    415     const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
    416     model_rd_norm(xsq_q10, &r_q10, &d_q10);
    417     *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT);
    418     *dist = (var * (int64_t)d_q10 + 512) >> 10;
    419   }
    420 }
    421 
    422 // Implements a fixed length vector form of vp9_model_rd_from_var_lapndz where
    423 // vectors are of length MAX_MB_PLANE and all elements of var are non-zero.
    424 void vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],
    425                                       unsigned int n_log2[MAX_MB_PLANE],
    426                                       unsigned int qstep[MAX_MB_PLANE],
    427                                       int64_t *rate_sum, int64_t *dist_sum) {
    428   int i;
    429   int xsq_q10[MAX_MB_PLANE], d_q10[MAX_MB_PLANE], r_q10[MAX_MB_PLANE];
    430   for (i = 0; i < MAX_MB_PLANE; ++i) {
    431     const uint64_t xsq_q10_64 =
    432         (((uint64_t)qstep[i] * qstep[i] << (n_log2[i] + 10)) + (var[i] >> 1)) /
    433         var[i];
    434     xsq_q10[i] = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
    435   }
    436   model_rd_norm_vec(xsq_q10, r_q10, d_q10);
    437   for (i = 0; i < MAX_MB_PLANE; ++i) {
    438     int rate =
    439         ROUND_POWER_OF_TWO(r_q10[i] << n_log2[i], 10 - VP9_PROB_COST_SHIFT);
    440     int64_t dist = (var[i] * (int64_t)d_q10[i] + 512) >> 10;
    441     *rate_sum += rate;
    442     *dist_sum += dist;
    443   }
    444 }
    445 
    446 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
    447                               const struct macroblockd_plane *pd,
    448                               ENTROPY_CONTEXT t_above[16],
    449                               ENTROPY_CONTEXT t_left[16]) {
    450   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
    451   const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
    452   const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
    453   const ENTROPY_CONTEXT *const above = pd->above_context;
    454   const ENTROPY_CONTEXT *const left = pd->left_context;
    455 
    456   int i;
    457   switch (tx_size) {
    458     case TX_4X4:
    459       memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
    460       memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
    461       break;
    462     case TX_8X8:
    463       for (i = 0; i < num_4x4_w; i += 2)
    464         t_above[i] = !!*(const uint16_t *)&above[i];
    465       for (i = 0; i < num_4x4_h; i += 2)
    466         t_left[i] = !!*(const uint16_t *)&left[i];
    467       break;
    468     case TX_16X16:
    469       for (i = 0; i < num_4x4_w; i += 4)
    470         t_above[i] = !!*(const uint32_t *)&above[i];
    471       for (i = 0; i < num_4x4_h; i += 4)
    472         t_left[i] = !!*(const uint32_t *)&left[i];
    473       break;
    474     case TX_32X32:
    475       for (i = 0; i < num_4x4_w; i += 8)
    476         t_above[i] = !!*(const uint64_t *)&above[i];
    477       for (i = 0; i < num_4x4_h; i += 8)
    478         t_left[i] = !!*(const uint64_t *)&left[i];
    479       break;
    480     default: assert(0 && "Invalid transform size."); break;
    481   }
    482 }
    483 
    484 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
    485                  int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
    486   int i;
    487   int zero_seen = 0;
    488   int best_index = 0;
    489   int best_sad = INT_MAX;
    490   int this_sad = INT_MAX;
    491   int max_mv = 0;
    492   int near_same_nearest;
    493   uint8_t *src_y_ptr = x->plane[0].src.buf;
    494   uint8_t *ref_y_ptr;
    495   const int num_mv_refs =
    496       MAX_MV_REF_CANDIDATES +
    497       (cpi->sf.adaptive_motion_search && block_size < x->max_partition_size);
    498 
    499   MV pred_mv[3];
    500   pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
    501   pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
    502   pred_mv[2] = x->pred_mv[ref_frame];
    503   assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
    504 
    505   near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
    506                       x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
    507   // Get the sad for each candidate reference mv.
    508   for (i = 0; i < num_mv_refs; ++i) {
    509     const MV *this_mv = &pred_mv[i];
    510     int fp_row, fp_col;
    511 
    512     if (i == 1 && near_same_nearest) continue;
    513     fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
    514     fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
    515     max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
    516 
    517     if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
    518     zero_seen |= (fp_row == 0 && fp_col == 0);
    519 
    520     ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col];
    521     // Find sad for current vector.
    522     this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
    523                                            ref_y_ptr, ref_y_stride);
    524     // Note if it is the best so far.
    525     if (this_sad < best_sad) {
    526       best_sad = this_sad;
    527       best_index = i;
    528     }
    529   }
    530 
    531   // Note the index of the mv that worked best in the reference list.
    532   x->mv_best_ref_index[ref_frame] = best_index;
    533   x->max_mv_context[ref_frame] = max_mv;
    534   x->pred_mv_sad[ref_frame] = best_sad;
    535 }
    536 
    537 void vp9_setup_pred_block(const MACROBLOCKD *xd,
    538                           struct buf_2d dst[MAX_MB_PLANE],
    539                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
    540                           const struct scale_factors *scale,
    541                           const struct scale_factors *scale_uv) {
    542   int i;
    543 
    544   dst[0].buf = src->y_buffer;
    545   dst[0].stride = src->y_stride;
    546   dst[1].buf = src->u_buffer;
    547   dst[2].buf = src->v_buffer;
    548   dst[1].stride = dst[2].stride = src->uv_stride;
    549 
    550   for (i = 0; i < MAX_MB_PLANE; ++i) {
    551     setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
    552                      i ? scale_uv : scale, xd->plane[i].subsampling_x,
    553                      xd->plane[i].subsampling_y);
    554   }
    555 }
    556 
    557 int vp9_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
    558                             int stride) {
    559   const int bw = b_width_log2_lookup[plane_bsize];
    560   const int y = 4 * (raster_block >> bw);
    561   const int x = 4 * (raster_block & ((1 << bw) - 1));
    562   return y * stride + x;
    563 }
    564 
    565 int16_t *vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
    566                                        int16_t *base) {
    567   const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
    568   return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
    569 }
    570 
    571 YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
    572                                              int ref_frame) {
    573   const VP9_COMMON *const cm = &cpi->common;
    574   const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
    575   const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
    576   return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
    577              ? &cm->buffer_pool->frame_bufs[scaled_idx].buf
    578              : NULL;
    579 }
    580 
    581 int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
    582   const MODE_INFO *const mi = xd->mi[0];
    583   const int ctx = get_pred_context_switchable_interp(xd);
    584   return SWITCHABLE_INTERP_RATE_FACTOR *
    585          cpi->switchable_interp_costs[ctx][mi->interp_filter];
    586 }
    587 
    588 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
    589   int i;
    590   RD_OPT *const rd = &cpi->rd;
    591   SPEED_FEATURES *const sf = &cpi->sf;
    592 
    593   // Set baseline threshold values.
    594   for (i = 0; i < MAX_MODES; ++i)
    595     rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
    596 
    597   if (sf->adaptive_rd_thresh) {
    598     rd->thresh_mult[THR_NEARESTMV] = 300;
    599     rd->thresh_mult[THR_NEARESTG] = 300;
    600     rd->thresh_mult[THR_NEARESTA] = 300;
    601   } else {
    602     rd->thresh_mult[THR_NEARESTMV] = 0;
    603     rd->thresh_mult[THR_NEARESTG] = 0;
    604     rd->thresh_mult[THR_NEARESTA] = 0;
    605   }
    606 
    607   rd->thresh_mult[THR_DC] += 1000;
    608 
    609   rd->thresh_mult[THR_NEWMV] += 1000;
    610   rd->thresh_mult[THR_NEWA] += 1000;
    611   rd->thresh_mult[THR_NEWG] += 1000;
    612 
    613   rd->thresh_mult[THR_NEARMV] += 1000;
    614   rd->thresh_mult[THR_NEARA] += 1000;
    615   rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
    616   rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
    617 
    618   rd->thresh_mult[THR_TM] += 1000;
    619 
    620   rd->thresh_mult[THR_COMP_NEARLA] += 1500;
    621   rd->thresh_mult[THR_COMP_NEWLA] += 2000;
    622   rd->thresh_mult[THR_NEARG] += 1000;
    623   rd->thresh_mult[THR_COMP_NEARGA] += 1500;
    624   rd->thresh_mult[THR_COMP_NEWGA] += 2000;
    625 
    626   rd->thresh_mult[THR_ZEROMV] += 2000;
    627   rd->thresh_mult[THR_ZEROG] += 2000;
    628   rd->thresh_mult[THR_ZEROA] += 2000;
    629   rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
    630   rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
    631 
    632   rd->thresh_mult[THR_H_PRED] += 2000;
    633   rd->thresh_mult[THR_V_PRED] += 2000;
    634   rd->thresh_mult[THR_D45_PRED] += 2500;
    635   rd->thresh_mult[THR_D135_PRED] += 2500;
    636   rd->thresh_mult[THR_D117_PRED] += 2500;
    637   rd->thresh_mult[THR_D153_PRED] += 2500;
    638   rd->thresh_mult[THR_D207_PRED] += 2500;
    639   rd->thresh_mult[THR_D63_PRED] += 2500;
    640 }
    641 
    642 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
    643   static const int thresh_mult[2][MAX_REFS] = {
    644     { 2500, 2500, 2500, 4500, 4500, 2500 },
    645     { 2000, 2000, 2000, 4000, 4000, 2000 }
    646   };
    647   RD_OPT *const rd = &cpi->rd;
    648   const int idx = cpi->oxcf.mode == BEST;
    649   memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
    650 }
    651 
    652 void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
    653                                int bsize, int best_mode_index) {
    654   if (rd_thresh > 0) {
    655     const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
    656     int mode;
    657     for (mode = 0; mode < top_mode; ++mode) {
    658       const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
    659       const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
    660       BLOCK_SIZE bs;
    661       for (bs = min_size; bs <= max_size; ++bs) {
    662         int *const fact = &factor_buf[bs][mode];
    663         if (mode == best_mode_index) {
    664           *fact -= (*fact >> 4);
    665         } else {
    666           *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
    667         }
    668       }
    669     }
    670   }
    671 }
    672 
    673 int vp9_get_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
    674                                int qindex, int qdelta) {
    675   // Reduce the intra cost penalty for small blocks (<=16x16).
    676   int reduction_fac =
    677       (bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
    678 
    679   if (cpi->noise_estimate.enabled && cpi->noise_estimate.level == kHigh)
    680     // Don't reduce intra cost penalty if estimated noise level is high.
    681     reduction_fac = 0;
    682 
    683   // Always use VPX_BITS_8 as input here because the penalty is applied
    684   // to rate not distortion so we want a consistent penalty for all bit
    685   // depths. If the actual bit depth were passed in here then the value
    686   // retured by vp9_dc_quant() would scale with the bit depth and we would
    687   // then need to apply inverse scaling to correct back to a bit depth
    688   // independent rate penalty.
    689   return (20 * vp9_dc_quant(qindex, qdelta, VPX_BITS_8)) >> reduction_fac;
    690 }
    691