Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include "vpx_config.h"
     13 #include "vpx_rtcd.h"
     14 #include "encodemb.h"
     15 #include "vp8/common/reconinter.h"
     16 #include "quantize.h"
     17 #include "tokenize.h"
     18 #include "vp8/common/invtrans.h"
     19 #include "vpx_mem/vpx_mem.h"
     20 #include "rdopt.h"
     21 
     22 void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch)
     23 {
     24     unsigned char *src_ptr = (*(be->base_src) + be->src);
     25     short *diff_ptr = be->src_diff;
     26     unsigned char *pred_ptr = bd->predictor;
     27     int src_stride = be->src_stride;
     28 
     29     int r, c;
     30 
     31     for (r = 0; r < 4; r++)
     32     {
     33         for (c = 0; c < 4; c++)
     34         {
     35             diff_ptr[c] = src_ptr[c] - pred_ptr[c];
     36         }
     37 
     38         diff_ptr += pitch;
     39         pred_ptr += pitch;
     40         src_ptr  += src_stride;
     41     }
     42 }
     43 
     44 void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
     45                          int src_stride, unsigned char *upred,
     46                          unsigned char *vpred, int pred_stride)
     47 {
     48     short *udiff = diff + 256;
     49     short *vdiff = diff + 320;
     50 
     51     int r, c;
     52 
     53     for (r = 0; r < 8; r++)
     54     {
     55         for (c = 0; c < 8; c++)
     56         {
     57             udiff[c] = usrc[c] - upred[c];
     58         }
     59 
     60         udiff += 8;
     61         upred += pred_stride;
     62         usrc  += src_stride;
     63     }
     64 
     65     for (r = 0; r < 8; r++)
     66     {
     67         for (c = 0; c < 8; c++)
     68         {
     69             vdiff[c] = vsrc[c] - vpred[c];
     70         }
     71 
     72         vdiff += 8;
     73         vpred += pred_stride;
     74         vsrc  += src_stride;
     75     }
     76 }
     77 
     78 void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride,
     79                         unsigned char *pred, int pred_stride)
     80 {
     81     int r, c;
     82 
     83     for (r = 0; r < 16; r++)
     84     {
     85         for (c = 0; c < 16; c++)
     86         {
     87             diff[c] = src[c] - pred[c];
     88         }
     89 
     90         diff += 16;
     91         pred += pred_stride;
     92         src  += src_stride;
     93     }
     94 }
     95 
     96 static void vp8_subtract_mb(MACROBLOCK *x)
     97 {
     98     BLOCK *b = &x->block[0];
     99 
    100     vp8_subtract_mby(x->src_diff, *(b->base_src),
    101         b->src_stride, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride);
    102     vp8_subtract_mbuv(x->src_diff, x->src.u_buffer,
    103         x->src.v_buffer, x->src.uv_stride, x->e_mbd.dst.u_buffer,
    104         x->e_mbd.dst.v_buffer, x->e_mbd.dst.uv_stride);
    105 }
    106 
    107 static void build_dcblock(MACROBLOCK *x)
    108 {
    109     short *src_diff_ptr = &x->src_diff[384];
    110     int i;
    111 
    112     for (i = 0; i < 16; i++)
    113     {
    114         src_diff_ptr[i] = x->coeff[i * 16];
    115     }
    116 }
    117 
    118 void vp8_transform_mbuv(MACROBLOCK *x)
    119 {
    120     int i;
    121 
    122     for (i = 16; i < 24; i += 2)
    123     {
    124         x->short_fdct8x4(&x->block[i].src_diff[0],
    125             &x->block[i].coeff[0], 16);
    126     }
    127 }
    128 
    129 
    130 void vp8_transform_intra_mby(MACROBLOCK *x)
    131 {
    132     int i;
    133 
    134     for (i = 0; i < 16; i += 2)
    135     {
    136         x->short_fdct8x4(&x->block[i].src_diff[0],
    137             &x->block[i].coeff[0], 32);
    138     }
    139 
    140     /* build dc block from 16 y dc values */
    141     build_dcblock(x);
    142 
    143     /* do 2nd order transform on the dc block */
    144     x->short_walsh4x4(&x->block[24].src_diff[0],
    145         &x->block[24].coeff[0], 8);
    146 
    147 }
    148 
    149 
    150 static void transform_mb(MACROBLOCK *x)
    151 {
    152     int i;
    153 
    154     for (i = 0; i < 16; i += 2)
    155     {
    156         x->short_fdct8x4(&x->block[i].src_diff[0],
    157             &x->block[i].coeff[0], 32);
    158     }
    159 
    160     /* build dc block from 16 y dc values */
    161     if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
    162         build_dcblock(x);
    163 
    164     for (i = 16; i < 24; i += 2)
    165     {
    166         x->short_fdct8x4(&x->block[i].src_diff[0],
    167             &x->block[i].coeff[0], 16);
    168     }
    169 
    170     /* do 2nd order transform on the dc block */
    171     if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
    172         x->short_walsh4x4(&x->block[24].src_diff[0],
    173         &x->block[24].coeff[0], 8);
    174 
    175 }
    176 
    177 
    178 static void transform_mby(MACROBLOCK *x)
    179 {
    180     int i;
    181 
    182     for (i = 0; i < 16; i += 2)
    183     {
    184         x->short_fdct8x4(&x->block[i].src_diff[0],
    185             &x->block[i].coeff[0], 32);
    186     }
    187 
    188     /* build dc block from 16 y dc values */
    189     if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
    190     {
    191         build_dcblock(x);
    192         x->short_walsh4x4(&x->block[24].src_diff[0],
    193             &x->block[24].coeff[0], 8);
    194     }
    195 }
    196 
    197 
    198 
    199 #define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
    200 
    201 typedef struct vp8_token_state vp8_token_state;
    202 
    203 struct vp8_token_state{
    204   int           rate;
    205   int           error;
    206   signed char   next;
    207   signed char   token;
    208   short         qc;
    209 };
    210 
    211 /* TODO: experiments to find optimal multiple numbers */
    212 #define Y1_RD_MULT 4
    213 #define UV_RD_MULT 2
    214 #define Y2_RD_MULT 16
    215 
    216 static const int plane_rd_mult[4]=
    217 {
    218     Y1_RD_MULT,
    219     Y2_RD_MULT,
    220     UV_RD_MULT,
    221     Y1_RD_MULT
    222 };
    223 
    224 static void optimize_b(MACROBLOCK *mb, int ib, int type,
    225                        ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
    226 {
    227     BLOCK *b;
    228     BLOCKD *d;
    229     vp8_token_state tokens[17][2];
    230     unsigned best_mask[2];
    231     const short *dequant_ptr;
    232     const short *coeff_ptr;
    233     short *qcoeff_ptr;
    234     short *dqcoeff_ptr;
    235     int eob;
    236     int i0;
    237     int rc;
    238     int x;
    239     int sz = 0;
    240     int next;
    241     int rdmult;
    242     int rddiv;
    243     int final_eob;
    244     int rd_cost0;
    245     int rd_cost1;
    246     int rate0;
    247     int rate1;
    248     int error0;
    249     int error1;
    250     int t0;
    251     int t1;
    252     int best;
    253     int band;
    254     int pt;
    255     int i;
    256     int err_mult = plane_rd_mult[type];
    257 
    258     b = &mb->block[ib];
    259     d = &mb->e_mbd.block[ib];
    260 
    261     /* Enable this to test the effect of RDO as a replacement for the dynamic
    262      *  zero bin instead of an augmentation of it.
    263      */
    264 #if 0
    265     vp8_strict_quantize_b(b, d);
    266 #endif
    267 
    268     dequant_ptr = d->dequant;
    269     coeff_ptr = b->coeff;
    270     qcoeff_ptr = d->qcoeff;
    271     dqcoeff_ptr = d->dqcoeff;
    272     i0 = !type;
    273     eob = *d->eob;
    274 
    275     /* Now set up a Viterbi trellis to evaluate alternative roundings. */
    276     rdmult = mb->rdmult * err_mult;
    277     if(mb->e_mbd.mode_info_context->mbmi.ref_frame==INTRA_FRAME)
    278         rdmult = (rdmult * 9)>>4;
    279 
    280     rddiv = mb->rddiv;
    281     best_mask[0] = best_mask[1] = 0;
    282     /* Initialize the sentinel node of the trellis. */
    283     tokens[eob][0].rate = 0;
    284     tokens[eob][0].error = 0;
    285     tokens[eob][0].next = 16;
    286     tokens[eob][0].token = DCT_EOB_TOKEN;
    287     tokens[eob][0].qc = 0;
    288     *(tokens[eob] + 1) = *(tokens[eob] + 0);
    289     next = eob;
    290     for (i = eob; i-- > i0;)
    291     {
    292         int base_bits;
    293         int d2;
    294         int dx;
    295 
    296         rc = vp8_default_zig_zag1d[i];
    297         x = qcoeff_ptr[rc];
    298         /* Only add a trellis state for non-zero coefficients. */
    299         if (x)
    300         {
    301             int shortcut=0;
    302             error0 = tokens[next][0].error;
    303             error1 = tokens[next][1].error;
    304             /* Evaluate the first possibility for this state. */
    305             rate0 = tokens[next][0].rate;
    306             rate1 = tokens[next][1].rate;
    307             t0 = (vp8_dct_value_tokens_ptr + x)->Token;
    308             /* Consider both possible successor states. */
    309             if (next < 16)
    310             {
    311                 band = vp8_coef_bands[i + 1];
    312                 pt = vp8_prev_token_class[t0];
    313                 rate0 +=
    314                     mb->token_costs[type][band][pt][tokens[next][0].token];
    315                 rate1 +=
    316                     mb->token_costs[type][band][pt][tokens[next][1].token];
    317             }
    318             rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
    319             rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
    320             if (rd_cost0 == rd_cost1)
    321             {
    322                 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
    323                 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
    324             }
    325             /* And pick the best. */
    326             best = rd_cost1 < rd_cost0;
    327             base_bits = *(vp8_dct_value_cost_ptr + x);
    328             dx = dqcoeff_ptr[rc] - coeff_ptr[rc];
    329             d2 = dx*dx;
    330             tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
    331             tokens[i][0].error = d2 + (best ? error1 : error0);
    332             tokens[i][0].next = next;
    333             tokens[i][0].token = t0;
    334             tokens[i][0].qc = x;
    335             best_mask[0] |= best << i;
    336             /* Evaluate the second possibility for this state. */
    337             rate0 = tokens[next][0].rate;
    338             rate1 = tokens[next][1].rate;
    339 
    340             if((abs(x)*dequant_ptr[rc]>abs(coeff_ptr[rc])) &&
    341                (abs(x)*dequant_ptr[rc]<abs(coeff_ptr[rc])+dequant_ptr[rc]))
    342                 shortcut = 1;
    343             else
    344                 shortcut = 0;
    345 
    346             if(shortcut)
    347             {
    348                 sz = -(x < 0);
    349                 x -= 2*sz + 1;
    350             }
    351 
    352             /* Consider both possible successor states. */
    353             if (!x)
    354             {
    355                 /* If we reduced this coefficient to zero, check to see if
    356                  *  we need to move the EOB back here.
    357                  */
    358                 t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
    359                     DCT_EOB_TOKEN : ZERO_TOKEN;
    360                 t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
    361                     DCT_EOB_TOKEN : ZERO_TOKEN;
    362             }
    363             else
    364             {
    365                 t0=t1 = (vp8_dct_value_tokens_ptr + x)->Token;
    366             }
    367             if (next < 16)
    368             {
    369                 band = vp8_coef_bands[i + 1];
    370                 if(t0!=DCT_EOB_TOKEN)
    371                 {
    372                     pt = vp8_prev_token_class[t0];
    373                     rate0 += mb->token_costs[type][band][pt][
    374                         tokens[next][0].token];
    375                 }
    376                 if(t1!=DCT_EOB_TOKEN)
    377                 {
    378                     pt = vp8_prev_token_class[t1];
    379                     rate1 += mb->token_costs[type][band][pt][
    380                         tokens[next][1].token];
    381                 }
    382             }
    383 
    384             rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
    385             rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
    386             if (rd_cost0 == rd_cost1)
    387             {
    388                 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
    389                 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
    390             }
    391             /* And pick the best. */
    392             best = rd_cost1 < rd_cost0;
    393             base_bits = *(vp8_dct_value_cost_ptr + x);
    394 
    395             if(shortcut)
    396             {
    397                 dx -= (dequant_ptr[rc] + sz) ^ sz;
    398                 d2 = dx*dx;
    399             }
    400             tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
    401             tokens[i][1].error = d2 + (best ? error1 : error0);
    402             tokens[i][1].next = next;
    403             tokens[i][1].token =best?t1:t0;
    404             tokens[i][1].qc = x;
    405             best_mask[1] |= best << i;
    406             /* Finally, make this the new head of the trellis. */
    407             next = i;
    408         }
    409         /* There's no choice to make for a zero coefficient, so we don't
    410          *  add a new trellis node, but we do need to update the costs.
    411          */
    412         else
    413         {
    414             band = vp8_coef_bands[i + 1];
    415             t0 = tokens[next][0].token;
    416             t1 = tokens[next][1].token;
    417             /* Update the cost of each path if we're past the EOB token. */
    418             if (t0 != DCT_EOB_TOKEN)
    419             {
    420                 tokens[next][0].rate += mb->token_costs[type][band][0][t0];
    421                 tokens[next][0].token = ZERO_TOKEN;
    422             }
    423             if (t1 != DCT_EOB_TOKEN)
    424             {
    425                 tokens[next][1].rate += mb->token_costs[type][band][0][t1];
    426                 tokens[next][1].token = ZERO_TOKEN;
    427             }
    428             /* Don't update next, because we didn't add a new node. */
    429         }
    430     }
    431 
    432     /* Now pick the best path through the whole trellis. */
    433     band = vp8_coef_bands[i + 1];
    434     VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
    435     rate0 = tokens[next][0].rate;
    436     rate1 = tokens[next][1].rate;
    437     error0 = tokens[next][0].error;
    438     error1 = tokens[next][1].error;
    439     t0 = tokens[next][0].token;
    440     t1 = tokens[next][1].token;
    441     rate0 += mb->token_costs[type][band][pt][t0];
    442     rate1 += mb->token_costs[type][band][pt][t1];
    443     rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
    444     rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
    445     if (rd_cost0 == rd_cost1)
    446     {
    447         rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
    448         rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
    449     }
    450     best = rd_cost1 < rd_cost0;
    451     final_eob = i0 - 1;
    452     for (i = next; i < eob; i = next)
    453     {
    454         x = tokens[i][best].qc;
    455         if (x)
    456             final_eob = i;
    457         rc = vp8_default_zig_zag1d[i];
    458         qcoeff_ptr[rc] = x;
    459         dqcoeff_ptr[rc] = x * dequant_ptr[rc];
    460         next = tokens[i][best].next;
    461         best = (best_mask[best] >> i) & 1;
    462     }
    463     final_eob++;
    464 
    465     *a = *l = (final_eob != !type);
    466     *d->eob = (char)final_eob;
    467 }
    468 static void check_reset_2nd_coeffs(MACROBLOCKD *x, int type,
    469                                    ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
    470 {
    471     int sum=0;
    472     int i;
    473     BLOCKD *bd = &x->block[24];
    474 
    475     if(bd->dequant[0]>=35 && bd->dequant[1]>=35)
    476         return;
    477 
    478     for(i=0;i<(*bd->eob);i++)
    479     {
    480         int coef = bd->dqcoeff[vp8_default_zig_zag1d[i]];
    481         sum+= (coef>=0)?coef:-coef;
    482         if(sum>=35)
    483             return;
    484     }
    485     /**************************************************************************
    486     our inverse hadamard transform effectively is weighted sum of all 16 inputs
    487     with weight either 1 or -1. It has a last stage scaling of (sum+3)>>3. And
    488     dc only idct is (dc+4)>>3. So if all the sums are between -35 and 29, the
    489     output after inverse wht and idct will be all zero. A sum of absolute value
    490     smaller than 35 guarantees all 16 different (+1/-1) weighted sums in wht
    491     fall between -35 and +35.
    492     **************************************************************************/
    493     if(sum < 35)
    494     {
    495         for(i=0;i<(*bd->eob);i++)
    496         {
    497             int rc = vp8_default_zig_zag1d[i];
    498             bd->qcoeff[rc]=0;
    499             bd->dqcoeff[rc]=0;
    500         }
    501         *bd->eob = 0;
    502         *a = *l = (*bd->eob != !type);
    503     }
    504 }
    505 
    506 static void optimize_mb(MACROBLOCK *x)
    507 {
    508     int b;
    509     int type;
    510     int has_2nd_order;
    511 
    512     ENTROPY_CONTEXT_PLANES t_above, t_left;
    513     ENTROPY_CONTEXT *ta;
    514     ENTROPY_CONTEXT *tl;
    515 
    516     vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    517     vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    518 
    519     ta = (ENTROPY_CONTEXT *)&t_above;
    520     tl = (ENTROPY_CONTEXT *)&t_left;
    521 
    522     has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
    523         && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
    524     type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
    525 
    526     for (b = 0; b < 16; b++)
    527     {
    528         optimize_b(x, b, type,
    529             ta + vp8_block2above[b], tl + vp8_block2left[b]);
    530     }
    531 
    532     for (b = 16; b < 24; b++)
    533     {
    534         optimize_b(x, b, PLANE_TYPE_UV,
    535             ta + vp8_block2above[b], tl + vp8_block2left[b]);
    536     }
    537 
    538     if (has_2nd_order)
    539     {
    540         b=24;
    541         optimize_b(x, b, PLANE_TYPE_Y2,
    542             ta + vp8_block2above[b], tl + vp8_block2left[b]);
    543         check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2,
    544             ta + vp8_block2above[b], tl + vp8_block2left[b]);
    545     }
    546 }
    547 
    548 
    549 void vp8_optimize_mby(MACROBLOCK *x)
    550 {
    551     int b;
    552     int type;
    553     int has_2nd_order;
    554 
    555     ENTROPY_CONTEXT_PLANES t_above, t_left;
    556     ENTROPY_CONTEXT *ta;
    557     ENTROPY_CONTEXT *tl;
    558 
    559     if (!x->e_mbd.above_context)
    560         return;
    561 
    562     if (!x->e_mbd.left_context)
    563         return;
    564 
    565     vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    566     vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    567 
    568     ta = (ENTROPY_CONTEXT *)&t_above;
    569     tl = (ENTROPY_CONTEXT *)&t_left;
    570 
    571     has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
    572         && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
    573     type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
    574 
    575     for (b = 0; b < 16; b++)
    576     {
    577         optimize_b(x, b, type,
    578             ta + vp8_block2above[b], tl + vp8_block2left[b]);
    579     }
    580 
    581 
    582     if (has_2nd_order)
    583     {
    584         b=24;
    585         optimize_b(x, b, PLANE_TYPE_Y2,
    586             ta + vp8_block2above[b], tl + vp8_block2left[b]);
    587         check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2,
    588             ta + vp8_block2above[b], tl + vp8_block2left[b]);
    589     }
    590 }
    591 
    592 void vp8_optimize_mbuv(MACROBLOCK *x)
    593 {
    594     int b;
    595     ENTROPY_CONTEXT_PLANES t_above, t_left;
    596     ENTROPY_CONTEXT *ta;
    597     ENTROPY_CONTEXT *tl;
    598 
    599     if (!x->e_mbd.above_context)
    600         return;
    601 
    602     if (!x->e_mbd.left_context)
    603         return;
    604 
    605     vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    606     vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
    607 
    608     ta = (ENTROPY_CONTEXT *)&t_above;
    609     tl = (ENTROPY_CONTEXT *)&t_left;
    610 
    611     for (b = 16; b < 24; b++)
    612     {
    613         optimize_b(x, b, PLANE_TYPE_UV,
    614             ta + vp8_block2above[b], tl + vp8_block2left[b]);
    615     }
    616 }
    617 
    618 void vp8_encode_inter16x16(MACROBLOCK *x)
    619 {
    620     vp8_build_inter_predictors_mb(&x->e_mbd);
    621 
    622     vp8_subtract_mb(x);
    623 
    624     transform_mb(x);
    625 
    626     vp8_quantize_mb(x);
    627 
    628     if (x->optimize)
    629         optimize_mb(x);
    630 }
    631 
    632 /* this funciton is used by first pass only */
    633 void vp8_encode_inter16x16y(MACROBLOCK *x)
    634 {
    635     BLOCK *b = &x->block[0];
    636 
    637     vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.dst.y_buffer,
    638                                         x->e_mbd.dst.y_stride);
    639 
    640     vp8_subtract_mby(x->src_diff, *(b->base_src),
    641         b->src_stride, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride);
    642 
    643     transform_mby(x);
    644 
    645     vp8_quantize_mby(x);
    646 
    647     vp8_inverse_transform_mby(&x->e_mbd);
    648 }
    649