Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "./vpx_dsp_rtcd.h"
     12 
     13 #include "vpx_config.h"
     14 #include "vp8_rtcd.h"
     15 #include "encodemb.h"
     16 #include "vp8/common/reconinter.h"
     17 #include "vp8/encoder/quantize.h"
     18 #include "tokenize.h"
     19 #include "vp8/common/invtrans.h"
     20 #include "vpx_mem/vpx_mem.h"
     21 #include "rdopt.h"
     22 
     23 void vp8_subtract_b(BLOCK *be, BLOCKD *bd, int pitch) {
     24   unsigned char *src_ptr = (*(be->base_src) + be->src);
     25   short *diff_ptr = be->src_diff;
     26   unsigned char *pred_ptr = bd->predictor;
     27   int src_stride = be->src_stride;
     28 
     29   vpx_subtract_block(4, 4, diff_ptr, pitch, src_ptr, src_stride, pred_ptr,
     30                      pitch);
     31 }
     32 
     33 void vp8_subtract_mbuv(short *diff, unsigned char *usrc, unsigned char *vsrc,
     34                        int src_stride, unsigned char *upred,
     35                        unsigned char *vpred, int pred_stride) {
     36   short *udiff = diff + 256;
     37   short *vdiff = diff + 320;
     38 
     39   vpx_subtract_block(8, 8, udiff, 8, usrc, src_stride, upred, pred_stride);
     40   vpx_subtract_block(8, 8, vdiff, 8, vsrc, src_stride, vpred, pred_stride);
     41 }
     42 
     43 void vp8_subtract_mby(short *diff, unsigned char *src, int src_stride,
     44                       unsigned char *pred, int pred_stride) {
     45   vpx_subtract_block(16, 16, diff, 16, src, src_stride, pred, pred_stride);
     46 }
     47 
     48 static void vp8_subtract_mb(MACROBLOCK *x) {
     49   BLOCK *b = &x->block[0];
     50 
     51   vp8_subtract_mby(x->src_diff, *(b->base_src), b->src_stride,
     52                    x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride);
     53   vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
     54                     x->src.uv_stride, x->e_mbd.dst.u_buffer,
     55                     x->e_mbd.dst.v_buffer, x->e_mbd.dst.uv_stride);
     56 }
     57 
     58 static void build_dcblock(MACROBLOCK *x) {
     59   short *src_diff_ptr = &x->src_diff[384];
     60   int i;
     61 
     62   for (i = 0; i < 16; ++i) {
     63     src_diff_ptr[i] = x->coeff[i * 16];
     64   }
     65 }
     66 
     67 void vp8_transform_mbuv(MACROBLOCK *x) {
     68   int i;
     69 
     70   for (i = 16; i < 24; i += 2) {
     71     x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
     72   }
     73 }
     74 
     75 void vp8_transform_intra_mby(MACROBLOCK *x) {
     76   int i;
     77 
     78   for (i = 0; i < 16; i += 2) {
     79     x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
     80   }
     81 
     82   /* build dc block from 16 y dc values */
     83   build_dcblock(x);
     84 
     85   /* do 2nd order transform on the dc block */
     86   x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
     87 }
     88 
     89 static void transform_mb(MACROBLOCK *x) {
     90   int i;
     91 
     92   for (i = 0; i < 16; i += 2) {
     93     x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
     94   }
     95 
     96   /* build dc block from 16 y dc values */
     97   if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) build_dcblock(x);
     98 
     99   for (i = 16; i < 24; i += 2) {
    100     x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
    101   }
    102 
    103   /* do 2nd order transform on the dc block */
    104   if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) {
    105     x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
    106   }
    107 }
    108 
    109 static void transform_mby(MACROBLOCK *x) {
    110   int i;
    111 
    112   for (i = 0; i < 16; i += 2) {
    113     x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
    114   }
    115 
    116   /* build dc block from 16 y dc values */
    117   if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) {
    118     build_dcblock(x);
    119     x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
    120   }
    121 }
    122 
    123 #define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF)
    124 
    125 typedef struct vp8_token_state vp8_token_state;
    126 
    127 struct vp8_token_state {
    128   int rate;
    129   int error;
    130   signed char next;
    131   signed char token;
    132   short qc;
    133 };
    134 
    135 /* TODO: experiments to find optimal multiple numbers */
    136 #define Y1_RD_MULT 4
    137 #define UV_RD_MULT 2
    138 #define Y2_RD_MULT 16
    139 
    140 static const int plane_rd_mult[4] = { Y1_RD_MULT, Y2_RD_MULT, UV_RD_MULT,
    141                                       Y1_RD_MULT };
    142 
    143 static void optimize_b(MACROBLOCK *mb, int ib, int type, ENTROPY_CONTEXT *a,
    144                        ENTROPY_CONTEXT *l) {
    145   BLOCK *b;
    146   BLOCKD *d;
    147   vp8_token_state tokens[17][2];
    148   unsigned best_mask[2];
    149   const short *dequant_ptr;
    150   const short *coeff_ptr;
    151   short *qcoeff_ptr;
    152   short *dqcoeff_ptr;
    153   int eob;
    154   int i0;
    155   int rc;
    156   int x;
    157   int sz = 0;
    158   int next;
    159   int rdmult;
    160   int rddiv;
    161   int final_eob;
    162   int rd_cost0;
    163   int rd_cost1;
    164   int rate0;
    165   int rate1;
    166   int error0;
    167   int error1;
    168   int t0;
    169   int t1;
    170   int best;
    171   int band;
    172   int pt;
    173   int i;
    174   int err_mult = plane_rd_mult[type];
    175 
    176   b = &mb->block[ib];
    177   d = &mb->e_mbd.block[ib];
    178 
    179   dequant_ptr = d->dequant;
    180   coeff_ptr = b->coeff;
    181   qcoeff_ptr = d->qcoeff;
    182   dqcoeff_ptr = d->dqcoeff;
    183   i0 = !type;
    184   eob = *d->eob;
    185 
    186   /* Now set up a Viterbi trellis to evaluate alternative roundings. */
    187   rdmult = mb->rdmult * err_mult;
    188   if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
    189     rdmult = (rdmult * 9) >> 4;
    190   }
    191 
    192   rddiv = mb->rddiv;
    193   best_mask[0] = best_mask[1] = 0;
    194   /* Initialize the sentinel node of the trellis. */
    195   tokens[eob][0].rate = 0;
    196   tokens[eob][0].error = 0;
    197   tokens[eob][0].next = 16;
    198   tokens[eob][0].token = DCT_EOB_TOKEN;
    199   tokens[eob][0].qc = 0;
    200   *(tokens[eob] + 1) = *(tokens[eob] + 0);
    201   next = eob;
    202   for (i = eob; i-- > i0;) {
    203     int base_bits;
    204     int d2;
    205     int dx;
    206 
    207     rc = vp8_default_zig_zag1d[i];
    208     x = qcoeff_ptr[rc];
    209     /* Only add a trellis state for non-zero coefficients. */
    210     if (x) {
    211       int shortcut = 0;
    212       error0 = tokens[next][0].error;
    213       error1 = tokens[next][1].error;
    214       /* Evaluate the first possibility for this state. */
    215       rate0 = tokens[next][0].rate;
    216       rate1 = tokens[next][1].rate;
    217       t0 = (vp8_dct_value_tokens_ptr + x)->Token;
    218       /* Consider both possible successor states. */
    219       if (next < 16) {
    220         band = vp8_coef_bands[i + 1];
    221         pt = vp8_prev_token_class[t0];
    222         rate0 += mb->token_costs[type][band][pt][tokens[next][0].token];
    223         rate1 += mb->token_costs[type][band][pt][tokens[next][1].token];
    224       }
    225       rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
    226       rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
    227       if (rd_cost0 == rd_cost1) {
    228         rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
    229         rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
    230       }
    231       /* And pick the best. */
    232       best = rd_cost1 < rd_cost0;
    233       base_bits = *(vp8_dct_value_cost_ptr + x);
    234       dx = dqcoeff_ptr[rc] - coeff_ptr[rc];
    235       d2 = dx * dx;
    236       tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
    237       tokens[i][0].error = d2 + (best ? error1 : error0);
    238       tokens[i][0].next = next;
    239       tokens[i][0].token = t0;
    240       tokens[i][0].qc = x;
    241       best_mask[0] |= best << i;
    242       /* Evaluate the second possibility for this state. */
    243       rate0 = tokens[next][0].rate;
    244       rate1 = tokens[next][1].rate;
    245 
    246       if ((abs(x) * dequant_ptr[rc] > abs(coeff_ptr[rc])) &&
    247           (abs(x) * dequant_ptr[rc] < abs(coeff_ptr[rc]) + dequant_ptr[rc])) {
    248         shortcut = 1;
    249       } else {
    250         shortcut = 0;
    251       }
    252 
    253       if (shortcut) {
    254         sz = -(x < 0);
    255         x -= 2 * sz + 1;
    256       }
    257 
    258       /* Consider both possible successor states. */
    259       if (!x) {
    260         /* If we reduced this coefficient to zero, check to see if
    261          *  we need to move the EOB back here.
    262          */
    263         t0 =
    264             tokens[next][0].token == DCT_EOB_TOKEN ? DCT_EOB_TOKEN : ZERO_TOKEN;
    265         t1 =
    266             tokens[next][1].token == DCT_EOB_TOKEN ? DCT_EOB_TOKEN : ZERO_TOKEN;
    267       } else {
    268         t0 = t1 = (vp8_dct_value_tokens_ptr + x)->Token;
    269       }
    270       if (next < 16) {
    271         band = vp8_coef_bands[i + 1];
    272         if (t0 != DCT_EOB_TOKEN) {
    273           pt = vp8_prev_token_class[t0];
    274           rate0 += mb->token_costs[type][band][pt][tokens[next][0].token];
    275         }
    276         if (t1 != DCT_EOB_TOKEN) {
    277           pt = vp8_prev_token_class[t1];
    278           rate1 += mb->token_costs[type][band][pt][tokens[next][1].token];
    279         }
    280       }
    281 
    282       rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
    283       rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
    284       if (rd_cost0 == rd_cost1) {
    285         rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
    286         rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
    287       }
    288       /* And pick the best. */
    289       best = rd_cost1 < rd_cost0;
    290       base_bits = *(vp8_dct_value_cost_ptr + x);
    291 
    292       if (shortcut) {
    293         dx -= (dequant_ptr[rc] + sz) ^ sz;
    294         d2 = dx * dx;
    295       }
    296       tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
    297       tokens[i][1].error = d2 + (best ? error1 : error0);
    298       tokens[i][1].next = next;
    299       tokens[i][1].token = best ? t1 : t0;
    300       tokens[i][1].qc = x;
    301       best_mask[1] |= best << i;
    302       /* Finally, make this the new head of the trellis. */
    303       next = i;
    304     }
    305     /* There's no choice to make for a zero coefficient, so we don't
    306      *  add a new trellis node, but we do need to update the costs.
    307      */
    308     else {
    309       band = vp8_coef_bands[i + 1];
    310       t0 = tokens[next][0].token;
    311       t1 = tokens[next][1].token;
    312       /* Update the cost of each path if we're past the EOB token. */
    313       if (t0 != DCT_EOB_TOKEN) {
    314         tokens[next][0].rate += mb->token_costs[type][band][0][t0];
    315         tokens[next][0].token = ZERO_TOKEN;
    316       }
    317       if (t1 != DCT_EOB_TOKEN) {
    318         tokens[next][1].rate += mb->token_costs[type][band][0][t1];
    319         tokens[next][1].token = ZERO_TOKEN;
    320       }
    321       /* Don't update next, because we didn't add a new node. */
    322     }
    323   }
    324 
    325   /* Now pick the best path through the whole trellis. */
    326   band = vp8_coef_bands[i + 1];
    327   VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
    328   rate0 = tokens[next][0].rate;
    329   rate1 = tokens[next][1].rate;
    330   error0 = tokens[next][0].error;
    331   error1 = tokens[next][1].error;
    332   t0 = tokens[next][0].token;
    333   t1 = tokens[next][1].token;
    334   rate0 += mb->token_costs[type][band][pt][t0];
    335   rate1 += mb->token_costs[type][band][pt][t1];
    336   rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
    337   rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
    338   if (rd_cost0 == rd_cost1) {
    339     rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
    340     rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
    341   }
    342   best = rd_cost1 < rd_cost0;
    343   final_eob = i0 - 1;
    344   for (i = next; i < eob; i = next) {
    345     x = tokens[i][best].qc;
    346     if (x) final_eob = i;
    347     rc = vp8_default_zig_zag1d[i];
    348     qcoeff_ptr[rc] = x;
    349     dqcoeff_ptr[rc] = x * dequant_ptr[rc];
    350     next = tokens[i][best].next;
    351     best = (best_mask[best] >> i) & 1;
    352   }
    353   final_eob++;
    354 
    355   *a = *l = (final_eob != !type);
    356   *d->eob = (char)final_eob;
    357 }
    358 static void check_reset_2nd_coeffs(MACROBLOCKD *x, int type, ENTROPY_CONTEXT *a,
    359                                    ENTROPY_CONTEXT *l) {
    360   int sum = 0;
    361   int i;
    362   BLOCKD *bd = &x->block[24];
    363 
    364   if (bd->dequant[0] >= 35 && bd->dequant[1] >= 35) return;
    365 
    366   for (i = 0; i < (*bd->eob); ++i) {
    367     int coef = bd->dqcoeff[vp8_default_zig_zag1d[i]];
    368     sum += (coef >= 0) ? coef : -coef;
    369     if (sum >= 35) return;
    370   }
    371   /**************************************************************************
    372   our inverse hadamard transform effectively is weighted sum of all 16 inputs
    373   with weight either 1 or -1. It has a last stage scaling of (sum+3)>>3. And
    374   dc only idct is (dc+4)>>3. So if all the sums are between -35 and 29, the
    375   output after inverse wht and idct will be all zero. A sum of absolute value
    376   smaller than 35 guarantees all 16 different (+1/-1) weighted sums in wht
    377   fall between -35 and +35.
    378   **************************************************************************/
    379   if (sum < 35) {
    380     for (i = 0; i < (*bd->eob); ++i) {
    381       int rc = vp8_default_zig_zag1d[i];
    382       bd->qcoeff[rc] = 0;
    383       bd->dqcoeff[rc] = 0;
    384     }
    385     *bd->eob = 0;
    386     *a = *l = (*bd->eob != !type);
    387   }
    388 }
    389 
    390 static void optimize_mb(MACROBLOCK *x) {
    391   int b;
    392   int type;
    393   int has_2nd_order;
    394 
    395   ENTROPY_CONTEXT_PLANES t_above, t_left;
    396   ENTROPY_CONTEXT *ta;
    397   ENTROPY_CONTEXT *tl;
    398 
    399   memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    400   memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    401 
    402   ta = (ENTROPY_CONTEXT *)&t_above;
    403   tl = (ENTROPY_CONTEXT *)&t_left;
    404 
    405   has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED &&
    406                    x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
    407   type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
    408 
    409   for (b = 0; b < 16; ++b) {
    410     optimize_b(x, b, type, ta + vp8_block2above[b], tl + vp8_block2left[b]);
    411   }
    412 
    413   for (b = 16; b < 24; ++b) {
    414     optimize_b(x, b, PLANE_TYPE_UV, ta + vp8_block2above[b],
    415                tl + vp8_block2left[b]);
    416   }
    417 
    418   if (has_2nd_order) {
    419     b = 24;
    420     optimize_b(x, b, PLANE_TYPE_Y2, ta + vp8_block2above[b],
    421                tl + vp8_block2left[b]);
    422     check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2, ta + vp8_block2above[b],
    423                            tl + vp8_block2left[b]);
    424   }
    425 }
    426 
    427 void vp8_optimize_mby(MACROBLOCK *x) {
    428   int b;
    429   int type;
    430   int has_2nd_order;
    431 
    432   ENTROPY_CONTEXT_PLANES t_above, t_left;
    433   ENTROPY_CONTEXT *ta;
    434   ENTROPY_CONTEXT *tl;
    435 
    436   if (!x->e_mbd.above_context) return;
    437 
    438   if (!x->e_mbd.left_context) return;
    439 
    440   memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    441   memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    442 
    443   ta = (ENTROPY_CONTEXT *)&t_above;
    444   tl = (ENTROPY_CONTEXT *)&t_left;
    445 
    446   has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED &&
    447                    x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
    448   type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
    449 
    450   for (b = 0; b < 16; ++b) {
    451     optimize_b(x, b, type, ta + vp8_block2above[b], tl + vp8_block2left[b]);
    452   }
    453 
    454   if (has_2nd_order) {
    455     b = 24;
    456     optimize_b(x, b, PLANE_TYPE_Y2, ta + vp8_block2above[b],
    457                tl + vp8_block2left[b]);
    458     check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2, ta + vp8_block2above[b],
    459                            tl + vp8_block2left[b]);
    460   }
    461 }
    462 
    463 void vp8_optimize_mbuv(MACROBLOCK *x) {
    464   int b;
    465   ENTROPY_CONTEXT_PLANES t_above, t_left;
    466   ENTROPY_CONTEXT *ta;
    467   ENTROPY_CONTEXT *tl;
    468 
    469   if (!x->e_mbd.above_context) return;
    470 
    471   if (!x->e_mbd.left_context) return;
    472 
    473   memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    474   memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    475 
    476   ta = (ENTROPY_CONTEXT *)&t_above;
    477   tl = (ENTROPY_CONTEXT *)&t_left;
    478 
    479   for (b = 16; b < 24; ++b) {
    480     optimize_b(x, b, PLANE_TYPE_UV, ta + vp8_block2above[b],
    481                tl + vp8_block2left[b]);
    482   }
    483 }
    484 
    485 void vp8_encode_inter16x16(MACROBLOCK *x) {
    486   vp8_build_inter_predictors_mb(&x->e_mbd);
    487 
    488   vp8_subtract_mb(x);
    489 
    490   transform_mb(x);
    491 
    492   vp8_quantize_mb(x);
    493 
    494   if (x->optimize) optimize_mb(x);
    495 }
    496 
    497 /* this funciton is used by first pass only */
    498 void vp8_encode_inter16x16y(MACROBLOCK *x) {
    499   BLOCK *b = &x->block[0];
    500 
    501   vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.dst.y_buffer,
    502                                       x->e_mbd.dst.y_stride);
    503 
    504   vp8_subtract_mby(x->src_diff, *(b->base_src), b->src_stride,
    505                    x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride);
    506 
    507   transform_mby(x);
    508 
    509   vp8_quantize_mby(x);
    510 
    511   vp8_inverse_transform_mby(&x->e_mbd);
    512 }
    513