Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include "onyxc_int.h"
     13 #include "onyx_int.h"
     14 #include "systemdependent.h"
     15 #include "quantize.h"
     16 #include "alloccommon.h"
     17 #include "mcomp.h"
     18 #include "firstpass.h"
     19 #include "psnr.h"
     20 #include "vpx_scale/vpxscale.h"
     21 #include "extend.h"
     22 #include "ratectrl.h"
     23 #include "quant_common.h"
     24 #include "segmentation.h"
     25 #include "g_common.h"
     26 #include "vpx_scale/yv12extend.h"
     27 #include "postproc.h"
     28 #include "vpx_mem/vpx_mem.h"
     29 #include "swapyv12buffer.h"
     30 #include "threading.h"
     31 #include "vpx_ports/vpx_timer.h"
     32 #include "vpxerrors.h"
     33 
     34 #include <math.h>
     35 #include <limits.h>
     36 
     37 #define ALT_REF_MC_ENABLED 1    // dis/enable MC in AltRef filtering
     38 #define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
     39 
     40 #define USE_FILTER_LUT 1
     41 #if VP8_TEMPORAL_ALT_REF
     42 
     43 #if USE_FILTER_LUT
     44 static int modifier_lut[7][19] =
     45 {
     46     // Strength=0
     47     {16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
     48     // Strength=1
     49     {16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
     50     // Strength=2
     51     {16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
     52     // Strength=3
     53     {16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
     54     // Strength=4
     55     {16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
     56     // Strength=5
     57     {16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0},
     58     // Strength=6
     59     {16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1}
     60 };
     61 #endif
     62 static void build_predictors_mb
     63 (
     64     MACROBLOCKD *x,
     65     unsigned char *y_mb_ptr,
     66     unsigned char *u_mb_ptr,
     67     unsigned char *v_mb_ptr,
     68     int stride,
     69     int mv_row,
     70     int mv_col,
     71     unsigned char *pred
     72 )
     73 {
     74     int offset;
     75     unsigned char *yptr, *uptr, *vptr;
     76 
     77     // Y
     78     yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
     79 
     80     if ((mv_row | mv_col) & 7)
     81     {
     82 //        vp8_sixtap_predict16x16_c(yptr, stride,
     83 //                                    mv_col & 7, mv_row & 7, &pred[0], 16);
     84         x->subpixel_predict16x16(yptr, stride,
     85                                     mv_col & 7, mv_row & 7, &pred[0], 16);
     86     }
     87     else
     88     {
     89         //vp8_copy_mem16x16_c (yptr, stride, &pred[0], 16);
     90         RECON_INVOKE(&x->rtcd->recon, copy16x16)(yptr, stride, &pred[0], 16);
     91     }
     92 
     93     // U & V
     94     mv_row >>= 1;
     95     mv_col >>= 1;
     96     stride >>= 1;
     97     offset = (mv_row >> 3) * stride + (mv_col >> 3);
     98     uptr = u_mb_ptr + offset;
     99     vptr = v_mb_ptr + offset;
    100 
    101     if ((mv_row | mv_col) & 7)
    102     {
    103         x->subpixel_predict8x8(uptr, stride,
    104                             mv_col & 7, mv_row & 7, &pred[256], 8);
    105         x->subpixel_predict8x8(vptr, stride,
    106                             mv_col & 7, mv_row & 7, &pred[320], 8);
    107     }
    108     else
    109     {
    110         RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, stride, &pred[256], 8);
    111         RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, stride, &pred[320], 8);
    112     }
    113 }
    114 static void apply_temporal_filter
    115 (
    116     unsigned char *frame1,
    117     unsigned int stride,
    118     unsigned char *frame2,
    119     unsigned int block_size,
    120     int strength,
    121     int filter_weight,
    122     unsigned int *accumulator,
    123     unsigned int *count
    124 )
    125 {
    126     int i, j, k;
    127     int modifier;
    128     int byte = 0;
    129 
    130 #if USE_FILTER_LUT
    131     int *lut = modifier_lut[strength];
    132 #endif
    133 
    134     for (i = 0,k = 0; i < block_size; i++)
    135     {
    136         for (j = 0; j < block_size; j++, k++)
    137         {
    138 
    139             int src_byte = frame1[byte];
    140             int pixel_value = *frame2++;
    141 
    142 #if USE_FILTER_LUT
    143             // LUT implementation --
    144             // improves precision of filter
    145             modifier = abs(src_byte-pixel_value);
    146             modifier = modifier>18 ? 0 : lut[modifier];
    147 #else
    148             modifier   = src_byte;
    149             modifier  -= pixel_value;
    150             modifier  *= modifier;
    151             modifier >>= strength;
    152             modifier  *= 3;
    153 
    154             if (modifier > 16)
    155                 modifier = 16;
    156 
    157             modifier = 16 - modifier;
    158 #endif
    159             modifier *= filter_weight;
    160 
    161             count[k] += modifier;
    162             accumulator[k] += modifier * pixel_value;
    163 
    164             byte++;
    165         }
    166 
    167         byte += stride - block_size;
    168     }
    169 }
    170 
    171 #if ALT_REF_MC_ENABLED
    172 static int dummy_cost[2*mv_max+1];
    173 
    174 static int find_matching_mb
    175 (
    176     VP8_COMP *cpi,
    177     YV12_BUFFER_CONFIG *arf_frame,
    178     YV12_BUFFER_CONFIG *frame_ptr,
    179     int mb_offset,
    180     int error_thresh
    181 )
    182 {
    183     MACROBLOCK *x = &cpi->mb;
    184     int thissme;
    185     int step_param;
    186     int further_steps;
    187     int n = 0;
    188     int sadpb = x->sadperbit16;
    189     int bestsme = INT_MAX;
    190     int num00 = 0;
    191 
    192     BLOCK *b = &x->block[0];
    193     BLOCKD *d = &x->e_mbd.block[0];
    194     MV best_ref_mv1 = {0,0};
    195 
    196     int *mvcost[2]    = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
    197     int *mvsadcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
    198 
    199     // Save input state
    200     unsigned char **base_src = b->base_src;
    201     int src = b->src;
    202     int src_stride = b->src_stride;
    203     unsigned char **base_pre = d->base_pre;
    204     int pre = d->pre;
    205     int pre_stride = d->pre_stride;
    206 
    207     // Setup frame pointers
    208     b->base_src = &arf_frame->y_buffer;
    209     b->src_stride = arf_frame->y_stride;
    210     b->src = mb_offset;
    211 
    212     d->base_pre = &frame_ptr->y_buffer;
    213     d->pre_stride = frame_ptr->y_stride;
    214     d->pre = mb_offset;
    215 
    216     // Further step/diamond searches as necessary
    217     if (cpi->Speed < 8)
    218     {
    219         step_param = cpi->sf.first_step +
    220                     ((cpi->Speed > 5) ? 1 : 0);
    221         further_steps =
    222             (cpi->sf.max_step_search_steps - 1)-step_param;
    223     }
    224     else
    225     {
    226         step_param = cpi->sf.first_step + 2;
    227         further_steps = 0;
    228     }
    229 
    230     if (1/*cpi->sf.search_method == HEX*/)
    231     {
    232         // TODO Check that the 16x16 vf & sdf are selected here
    233         bestsme = vp8_hex_search(x, b, d,
    234             &best_ref_mv1, &d->bmi.mv.as_mv,
    235             step_param,
    236             sadpb/*x->errorperbit*/,
    237             &num00, &cpi->fn_ptr[BLOCK_16X16],
    238             mvsadcost, mvcost);
    239     }
    240     else
    241     {
    242         int mv_x, mv_y;
    243 
    244         bestsme = cpi->diamond_search_sad(x, b, d,
    245             &best_ref_mv1, &d->bmi.mv.as_mv,
    246             step_param,
    247             sadpb / 2/*x->errorperbit*/,
    248             &num00, &cpi->fn_ptr[BLOCK_16X16],
    249             mvsadcost, mvcost); //sadpb < 9
    250 
    251         // Further step/diamond searches as necessary
    252         n = 0;
    253         //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
    254 
    255         n = num00;
    256         num00 = 0;
    257 
    258         while (n < further_steps)
    259         {
    260             n++;
    261 
    262             if (num00)
    263                 num00--;
    264             else
    265             {
    266                 thissme = cpi->diamond_search_sad(x, b, d,
    267                     &best_ref_mv1, &d->bmi.mv.as_mv,
    268                     step_param + n,
    269                     sadpb / 4/*x->errorperbit*/,
    270                     &num00, &cpi->fn_ptr[BLOCK_16X16],
    271                     mvsadcost, mvcost); //sadpb = 9
    272 
    273                 if (thissme < bestsme)
    274                 {
    275                     bestsme = thissme;
    276                     mv_y = d->bmi.mv.as_mv.row;
    277                     mv_x = d->bmi.mv.as_mv.col;
    278                 }
    279                 else
    280                 {
    281                     d->bmi.mv.as_mv.row = mv_y;
    282                     d->bmi.mv.as_mv.col = mv_x;
    283                 }
    284             }
    285         }
    286     }
    287 
    288 #if ALT_REF_SUBPEL_ENABLED
    289     // Try sub-pixel MC?
    290     //if (bestsme > error_thresh && bestsme < INT_MAX)
    291     {
    292         bestsme = cpi->find_fractional_mv_step(x, b, d,
    293                     &d->bmi.mv.as_mv, &best_ref_mv1,
    294                     x->errorperbit, &cpi->fn_ptr[BLOCK_16X16],
    295                     cpi->mb.mvcost);
    296     }
    297 #endif
    298 
    299     // Save input state
    300     b->base_src = base_src;
    301     b->src = src;
    302     b->src_stride = src_stride;
    303     d->base_pre = base_pre;
    304     d->pre = pre;
    305     d->pre_stride = pre_stride;
    306 
    307     return bestsme;
    308 }
    309 #endif
    310 
    311 static void vp8cx_temp_blur1_c
    312 (
    313     VP8_COMP *cpi,
    314     int frame_count,
    315     int alt_ref_index,
    316     int strength
    317 )
    318 {
    319     int byte;
    320     int frame;
    321     int mb_col, mb_row;
    322     unsigned int filter_weight[MAX_LAG_BUFFERS];
    323     unsigned char *mm_ptr = cpi->fp_motion_map;
    324     int cols = cpi->common.mb_cols;
    325     int rows = cpi->common.mb_rows;
    326     int MBs  = cpi->common.MBs;
    327     int mb_y_offset = 0;
    328     int mb_uv_offset = 0;
    329     unsigned int accumulator[384];
    330     unsigned int count[384];
    331     MACROBLOCKD *mbd = &cpi->mb.e_mbd;
    332     YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
    333     unsigned char *dst1, *dst2;
    334     DECLARE_ALIGNED(16, unsigned char,  predictor[384]);
    335 
    336     // Save input state
    337     unsigned char *y_buffer = mbd->pre.y_buffer;
    338     unsigned char *u_buffer = mbd->pre.u_buffer;
    339     unsigned char *v_buffer = mbd->pre.v_buffer;
    340 
    341     if (!cpi->use_weighted_temporal_filter)
    342     {
    343         // Temporal filtering is unweighted
    344         for (frame = 0; frame < frame_count; frame++)
    345             filter_weight[frame] = 1;
    346     }
    347 
    348     for (mb_row = 0; mb_row < rows; mb_row++)
    349     {
    350 #if ALT_REF_MC_ENABLED
    351         // Reduced search extent by 3 for 6-tap filter & smaller UMV border
    352         cpi->mb.mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 19));
    353         cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
    354                                 + (VP8BORDERINPIXELS - 19);
    355 #endif
    356 
    357         for (mb_col = 0; mb_col < cols; mb_col++)
    358         {
    359             int i, j, k, w;
    360             int weight_cap;
    361             int stride;
    362 
    363             vpx_memset(accumulator, 0, 384*sizeof(unsigned int));
    364             vpx_memset(count, 0, 384*sizeof(unsigned int));
    365 
    366 #if ALT_REF_MC_ENABLED
    367             // Reduced search extent by 3 for 6-tap filter & smaller UMV border
    368             cpi->mb.mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 19));
    369             cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
    370                                     + (VP8BORDERINPIXELS - 19);
    371 #endif
    372 
    373             // Read & process macroblock weights from motion map
    374             if (cpi->use_weighted_temporal_filter)
    375             {
    376                 weight_cap = 2;
    377 
    378                 for (frame = alt_ref_index-1; frame >= 0; frame--)
    379                 {
    380                     w = *(mm_ptr + (frame+1)*MBs);
    381                     filter_weight[frame] = w < weight_cap ? w : weight_cap;
    382                     weight_cap = w;
    383                 }
    384 
    385                 filter_weight[alt_ref_index] = 2;
    386 
    387                 weight_cap = 2;
    388 
    389                 for (frame = alt_ref_index+1; frame < frame_count; frame++)
    390                 {
    391                     w = *(mm_ptr + frame*MBs);
    392                     filter_weight[frame] = w < weight_cap ? w : weight_cap;
    393                     weight_cap = w;
    394                 }
    395 
    396             }
    397 
    398             for (frame = 0; frame < frame_count; frame++)
    399             {
    400                 int err;
    401 
    402                 if (cpi->frames[frame] == NULL)
    403                     continue;
    404 
    405                 mbd->block[0].bmi.mv.as_mv.row = 0;
    406                 mbd->block[0].bmi.mv.as_mv.col = 0;
    407 
    408 #if ALT_REF_MC_ENABLED
    409                 //if (filter_weight[frame] == 0)
    410                 {
    411 #define THRESH_LOW   10000
    412 #define THRESH_HIGH  20000
    413 
    414                     // Correlation has been lost try MC
    415                     err = find_matching_mb ( cpi,
    416                                              cpi->frames[alt_ref_index],
    417                                              cpi->frames[frame],
    418                                              mb_y_offset,
    419                                              THRESH_LOW );
    420 
    421                     if (filter_weight[frame] < 2)
    422                     {
    423                         // Set weight depending on error
    424                         filter_weight[frame] = err<THRESH_LOW
    425                                                 ? 2 : err<THRESH_HIGH ? 1 : 0;
    426                     }
    427                 }
    428 #endif
    429                 if (filter_weight[frame] != 0)
    430                 {
    431                     // Construct the predictors
    432                     build_predictors_mb (
    433                               mbd,
    434                               cpi->frames[frame]->y_buffer + mb_y_offset,
    435                               cpi->frames[frame]->u_buffer + mb_uv_offset,
    436                               cpi->frames[frame]->v_buffer + mb_uv_offset,
    437                               cpi->frames[frame]->y_stride,
    438                               mbd->block[0].bmi.mv.as_mv.row,
    439                               mbd->block[0].bmi.mv.as_mv.col,
    440                               predictor );
    441 
    442                     // Apply the filter (YUV)
    443                     apply_temporal_filter ( f->y_buffer + mb_y_offset,
    444                                             f->y_stride,
    445                                             predictor,
    446                                             16,
    447                                             strength,
    448                                             filter_weight[frame],
    449                                             accumulator,
    450                                             count );
    451 
    452                     apply_temporal_filter ( f->u_buffer + mb_uv_offset,
    453                                             f->uv_stride,
    454                                             predictor + 256,
    455                                             8,
    456                                             strength,
    457                                             filter_weight[frame],
    458                                             accumulator + 256,
    459                                             count + 256 );
    460 
    461                     apply_temporal_filter ( f->v_buffer + mb_uv_offset,
    462                                             f->uv_stride,
    463                                             predictor + 320,
    464                                             8,
    465                                             strength,
    466                                             filter_weight[frame],
    467                                             accumulator + 320,
    468                                             count + 320 );
    469                 }
    470             }
    471 
    472             // Normalize filter output to produce AltRef frame
    473             dst1 = cpi->alt_ref_buffer.source_buffer.y_buffer;
    474             stride = cpi->alt_ref_buffer.source_buffer.y_stride;
    475             byte = mb_y_offset;
    476             for (i = 0,k = 0; i < 16; i++)
    477             {
    478                 for (j = 0; j < 16; j++, k++)
    479                 {
    480                     unsigned int pval = accumulator[k] + (count[k] >> 1);
    481                     pval *= cpi->fixed_divide[count[k]];
    482                     pval >>= 19;
    483 
    484                     dst1[byte] = (unsigned char)pval;
    485 
    486                     // move to next pixel
    487                     byte++;
    488                 }
    489 
    490                 byte += stride - 16;
    491             }
    492 
    493             dst1 = cpi->alt_ref_buffer.source_buffer.u_buffer;
    494             dst2 = cpi->alt_ref_buffer.source_buffer.v_buffer;
    495             stride = cpi->alt_ref_buffer.source_buffer.uv_stride;
    496             byte = mb_uv_offset;
    497             for (i = 0,k = 256; i < 8; i++)
    498             {
    499                 for (j = 0; j < 8; j++, k++)
    500                 {
    501                     int m=k+64;
    502 
    503                     // U
    504                     unsigned int pval = accumulator[k] + (count[k] >> 1);
    505                     pval *= cpi->fixed_divide[count[k]];
    506                     pval >>= 19;
    507                     dst1[byte] = (unsigned char)pval;
    508 
    509                     // V
    510                     pval = accumulator[m] + (count[m] >> 1);
    511                     pval *= cpi->fixed_divide[count[m]];
    512                     pval >>= 19;
    513                     dst2[byte] = (unsigned char)pval;
    514 
    515                     // move to next pixel
    516                     byte++;
    517                 }
    518 
    519                 byte += stride - 8;
    520             }
    521 
    522             mm_ptr++;
    523             mb_y_offset += 16;
    524             mb_uv_offset += 8;
    525         }
    526 
    527         mb_y_offset += 16*f->y_stride-f->y_width;
    528         mb_uv_offset += 8*f->uv_stride-f->uv_width;
    529     }
    530 
    531     // Restore input state
    532     mbd->pre.y_buffer = y_buffer;
    533     mbd->pre.u_buffer = u_buffer;
    534     mbd->pre.v_buffer = v_buffer;
    535 }
    536 
    537 void vp8cx_temp_filter_c
    538 (
    539     VP8_COMP *cpi
    540 )
    541 {
    542     int frame = 0;
    543 
    544     int num_frames_backward = 0;
    545     int num_frames_forward = 0;
    546     int frames_to_blur_backward = 0;
    547     int frames_to_blur_forward = 0;
    548     int frames_to_blur = 0;
    549     int start_frame = 0;
    550     unsigned int filtered = 0;
    551 
    552     int strength = cpi->oxcf.arnr_strength;
    553 
    554     int blur_type = cpi->oxcf.arnr_type;
    555 
    556     int max_frames = cpi->active_arnr_frames;
    557 
    558     num_frames_backward = cpi->last_alt_ref_sei - cpi->source_encode_index;
    559 
    560     if (num_frames_backward < 0)
    561         num_frames_backward += cpi->oxcf.lag_in_frames;
    562 
    563     num_frames_forward = cpi->oxcf.lag_in_frames - (num_frames_backward + 1);
    564 
    565     switch (blur_type)
    566     {
    567     case 1:
    568         /////////////////////////////////////////
    569         // Backward Blur
    570 
    571         frames_to_blur_backward = num_frames_backward;
    572 
    573         if (frames_to_blur_backward >= max_frames)
    574             frames_to_blur_backward = max_frames - 1;
    575 
    576         frames_to_blur = frames_to_blur_backward + 1;
    577         break;
    578 
    579     case 2:
    580         /////////////////////////////////////////
    581         // Forward Blur
    582 
    583         frames_to_blur_forward = num_frames_forward;
    584 
    585         if (frames_to_blur_forward >= max_frames)
    586             frames_to_blur_forward = max_frames - 1;
    587 
    588         frames_to_blur = frames_to_blur_forward + 1;
    589         break;
    590 
    591     case 3:
    592     default:
    593         /////////////////////////////////////////
    594         // Center Blur
    595         frames_to_blur_forward = num_frames_forward;
    596         frames_to_blur_backward = num_frames_backward;
    597 
    598         if (frames_to_blur_forward > frames_to_blur_backward)
    599             frames_to_blur_forward = frames_to_blur_backward;
    600 
    601         if (frames_to_blur_backward > frames_to_blur_forward)
    602             frames_to_blur_backward = frames_to_blur_forward;
    603 
    604         // When max_frames is even we have 1 more frame backward than forward
    605         if (frames_to_blur_forward > (max_frames - 1) / 2)
    606             frames_to_blur_forward = ((max_frames - 1) / 2);
    607 
    608         if (frames_to_blur_backward > (max_frames / 2))
    609             frames_to_blur_backward = (max_frames / 2);
    610 
    611         frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
    612         break;
    613     }
    614 
    615     start_frame = (cpi->last_alt_ref_sei
    616                     + frames_to_blur_forward) % cpi->oxcf.lag_in_frames;
    617 
    618 #ifdef DEBUGFWG
    619     // DEBUG FWG
    620     printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d"
    621            , max_frames
    622            , num_frames_backward
    623            , num_frames_forward
    624            , frames_to_blur
    625            , frames_to_blur_backward
    626            , frames_to_blur_forward
    627            , cpi->source_encode_index
    628            , cpi->last_alt_ref_sei
    629            , start_frame);
    630 #endif
    631 
    632     // Setup frame pointers, NULL indicates frame not included in filter
    633     vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
    634     for (frame = 0; frame < frames_to_blur; frame++)
    635     {
    636         int which_buffer =  start_frame - frame;
    637 
    638         if (which_buffer < 0)
    639             which_buffer += cpi->oxcf.lag_in_frames;
    640 
    641         cpi->frames[frames_to_blur-1-frame]
    642                 = &cpi->src_buffer[which_buffer].source_buffer;
    643     }
    644 
    645     vp8cx_temp_blur1_c (
    646         cpi,
    647         frames_to_blur,
    648         frames_to_blur_backward,
    649         strength );
    650 }
    651 #endif
    652