Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include "vp8/common/onyxc_int.h"
     13 #include "onyx_int.h"
     14 #include "vp8/common/systemdependent.h"
     15 #include "quantize.h"
     16 #include "vp8/common/alloccommon.h"
     17 #include "mcomp.h"
     18 #include "firstpass.h"
     19 #include "psnr.h"
     20 #include "vpx_scale/vpxscale.h"
     21 #include "vp8/common/extend.h"
     22 #include "ratectrl.h"
     23 #include "vp8/common/quant_common.h"
     24 #include "segmentation.h"
     25 #include "vp8/common/g_common.h"
     26 #include "vpx_scale/yv12extend.h"
     27 #include "vp8/common/postproc.h"
     28 #include "vpx_mem/vpx_mem.h"
     29 #include "vp8/common/swapyv12buffer.h"
     30 #include "vp8/common/threading.h"
     31 #include "vpx_ports/vpx_timer.h"
     32 
     33 #include <math.h>
     34 #include <limits.h>
     35 
     36 #define ALT_REF_MC_ENABLED 1    // dis/enable MC in AltRef filtering
     37 #define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
     38 
     39 #if VP8_TEMPORAL_ALT_REF
     40 
     41 static void vp8_temporal_filter_predictors_mb_c
     42 (
     43     MACROBLOCKD *x,
     44     unsigned char *y_mb_ptr,
     45     unsigned char *u_mb_ptr,
     46     unsigned char *v_mb_ptr,
     47     int stride,
     48     int mv_row,
     49     int mv_col,
     50     unsigned char *pred
     51 )
     52 {
     53     int offset;
     54     unsigned char *yptr, *uptr, *vptr;
     55 
     56     // Y
     57     yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
     58 
     59     if ((mv_row | mv_col) & 7)
     60     {
     61         x->subpixel_predict16x16(yptr, stride,
     62                                     mv_col & 7, mv_row & 7, &pred[0], 16);
     63     }
     64     else
     65     {
     66         RECON_INVOKE(&x->rtcd->recon, copy16x16)(yptr, stride, &pred[0], 16);
     67     }
     68 
     69     // U & V
     70     mv_row >>= 1;
     71     mv_col >>= 1;
     72     stride = (stride + 1) >> 1;
     73     offset = (mv_row >> 3) * stride + (mv_col >> 3);
     74     uptr = u_mb_ptr + offset;
     75     vptr = v_mb_ptr + offset;
     76 
     77     if ((mv_row | mv_col) & 7)
     78     {
     79         x->subpixel_predict8x8(uptr, stride,
     80                             mv_col & 7, mv_row & 7, &pred[256], 8);
     81         x->subpixel_predict8x8(vptr, stride,
     82                             mv_col & 7, mv_row & 7, &pred[320], 8);
     83     }
     84     else
     85     {
     86         RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, stride, &pred[256], 8);
     87         RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, stride, &pred[320], 8);
     88     }
     89 }
     90 void vp8_temporal_filter_apply_c
     91 (
     92     unsigned char *frame1,
     93     unsigned int stride,
     94     unsigned char *frame2,
     95     unsigned int block_size,
     96     int strength,
     97     int filter_weight,
     98     unsigned int *accumulator,
     99     unsigned short *count
    100 )
    101 {
    102     int i, j, k;
    103     int modifier;
    104     int byte = 0;
    105 
    106     for (i = 0,k = 0; i < block_size; i++)
    107     {
    108         for (j = 0; j < block_size; j++, k++)
    109         {
    110 
    111             int src_byte = frame1[byte];
    112             int pixel_value = *frame2++;
    113 
    114             modifier   = src_byte - pixel_value;
    115             // This is an integer approximation of:
    116             // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
    117             // modifier =  (int)roundf(coeff > 16 ? 0 : 16-coeff);
    118             modifier  *= modifier;
    119             modifier  *= 3;
    120             modifier  += 1 << (strength - 1);
    121             modifier >>= strength;
    122 
    123             if (modifier > 16)
    124                 modifier = 16;
    125 
    126             modifier = 16 - modifier;
    127             modifier *= filter_weight;
    128 
    129             count[k] += modifier;
    130             accumulator[k] += modifier * pixel_value;
    131 
    132             byte++;
    133         }
    134 
    135         byte += stride - block_size;
    136     }
    137 }
    138 
    139 #if ALT_REF_MC_ENABLED
    140 static int dummy_cost[2*mv_max+1];
    141 
    142 static int vp8_temporal_filter_find_matching_mb_c
    143 (
    144     VP8_COMP *cpi,
    145     YV12_BUFFER_CONFIG *arf_frame,
    146     YV12_BUFFER_CONFIG *frame_ptr,
    147     int mb_offset,
    148     int error_thresh
    149 )
    150 {
    151     MACROBLOCK *x = &cpi->mb;
    152     int thissme;
    153     int step_param;
    154     int further_steps;
    155     int n = 0;
    156     int sadpb = x->sadperbit16;
    157     int bestsme = INT_MAX;
    158     int num00 = 0;
    159 
    160     BLOCK *b = &x->block[0];
    161     BLOCKD *d = &x->e_mbd.block[0];
    162     MV best_ref_mv1 = {0,0};
    163 
    164     int *mvcost[2]    = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
    165     int *mvsadcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
    166 
    167     // Save input state
    168     unsigned char **base_src = b->base_src;
    169     int src = b->src;
    170     int src_stride = b->src_stride;
    171     unsigned char **base_pre = d->base_pre;
    172     int pre = d->pre;
    173     int pre_stride = d->pre_stride;
    174 
    175     // Setup frame pointers
    176     b->base_src = &arf_frame->y_buffer;
    177     b->src_stride = arf_frame->y_stride;
    178     b->src = mb_offset;
    179 
    180     d->base_pre = &frame_ptr->y_buffer;
    181     d->pre_stride = frame_ptr->y_stride;
    182     d->pre = mb_offset;
    183 
    184     // Further step/diamond searches as necessary
    185     if (cpi->Speed < 8)
    186     {
    187         step_param = cpi->sf.first_step +
    188                     ((cpi->Speed > 5) ? 1 : 0);
    189         further_steps =
    190             (cpi->sf.max_step_search_steps - 1)-step_param;
    191     }
    192     else
    193     {
    194         step_param = cpi->sf.first_step + 2;
    195         further_steps = 0;
    196     }
    197 
    198     if (1/*cpi->sf.search_method == HEX*/)
    199     {
    200         // TODO Check that the 16x16 vf & sdf are selected here
    201         bestsme = vp8_hex_search(x, b, d,
    202             &best_ref_mv1, &d->bmi.mv.as_mv,
    203             step_param,
    204             sadpb/*x->errorperbit*/,
    205             &num00, &cpi->fn_ptr[BLOCK_16X16],
    206             mvsadcost, mvcost, &best_ref_mv1);
    207     }
    208     else
    209     {
    210         int mv_x, mv_y;
    211 
    212         bestsme = cpi->diamond_search_sad(x, b, d,
    213             &best_ref_mv1, &d->bmi.mv.as_mv,
    214             step_param,
    215             sadpb / 2/*x->errorperbit*/,
    216             &num00, &cpi->fn_ptr[BLOCK_16X16],
    217             mvsadcost, mvcost, &best_ref_mv1); //sadpb < 9
    218 
    219         // Further step/diamond searches as necessary
    220         n = 0;
    221         //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
    222 
    223         n = num00;
    224         num00 = 0;
    225 
    226         while (n < further_steps)
    227         {
    228             n++;
    229 
    230             if (num00)
    231                 num00--;
    232             else
    233             {
    234                 thissme = cpi->diamond_search_sad(x, b, d,
    235                     &best_ref_mv1, &d->bmi.mv.as_mv,
    236                     step_param + n,
    237                     sadpb / 4/*x->errorperbit*/,
    238                     &num00, &cpi->fn_ptr[BLOCK_16X16],
    239                     mvsadcost, mvcost, &best_ref_mv1); //sadpb = 9
    240 
    241                 if (thissme < bestsme)
    242                 {
    243                     bestsme = thissme;
    244                     mv_y = d->bmi.mv.as_mv.row;
    245                     mv_x = d->bmi.mv.as_mv.col;
    246                 }
    247                 else
    248                 {
    249                     d->bmi.mv.as_mv.row = mv_y;
    250                     d->bmi.mv.as_mv.col = mv_x;
    251                 }
    252             }
    253         }
    254     }
    255 
    256 #if ALT_REF_SUBPEL_ENABLED
    257     // Try sub-pixel MC?
    258     //if (bestsme > error_thresh && bestsme < INT_MAX)
    259     {
    260         bestsme = cpi->find_fractional_mv_step(x, b, d,
    261                     &d->bmi.mv.as_mv, &best_ref_mv1,
    262                     x->errorperbit, &cpi->fn_ptr[BLOCK_16X16],
    263                     mvcost);
    264     }
    265 #endif
    266 
    267     // Save input state
    268     b->base_src = base_src;
    269     b->src = src;
    270     b->src_stride = src_stride;
    271     d->base_pre = base_pre;
    272     d->pre = pre;
    273     d->pre_stride = pre_stride;
    274 
    275     return bestsme;
    276 }
    277 #endif
    278 
    279 static void vp8_temporal_filter_iterate_c
    280 (
    281     VP8_COMP *cpi,
    282     int frame_count,
    283     int alt_ref_index,
    284     int strength
    285 )
    286 {
    287     int byte;
    288     int frame;
    289     int mb_col, mb_row;
    290     unsigned int filter_weight;
    291     int mb_cols = cpi->common.mb_cols;
    292     int mb_rows = cpi->common.mb_rows;
    293     int MBs  = cpi->common.MBs;
    294     int mb_y_offset = 0;
    295     int mb_uv_offset = 0;
    296     DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16*16 + 8*8 + 8*8);
    297     DECLARE_ALIGNED_ARRAY(16, unsigned short, count, 16*16 + 8*8 + 8*8);
    298     MACROBLOCKD *mbd = &cpi->mb.e_mbd;
    299     YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
    300     unsigned char *dst1, *dst2;
    301     DECLARE_ALIGNED_ARRAY(16, unsigned char,  predictor, 16*16 + 8*8 + 8*8);
    302 
    303     // Save input state
    304     unsigned char *y_buffer = mbd->pre.y_buffer;
    305     unsigned char *u_buffer = mbd->pre.u_buffer;
    306     unsigned char *v_buffer = mbd->pre.v_buffer;
    307 
    308     for (mb_row = 0; mb_row < mb_rows; mb_row++)
    309     {
    310 #if ALT_REF_MC_ENABLED
    311         // Reduced search extent by 3 for 6-tap filter & smaller UMV border
    312         cpi->mb.mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 19));
    313         cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
    314                                 + (VP8BORDERINPIXELS - 19);
    315 #endif
    316 
    317         for (mb_col = 0; mb_col < mb_cols; mb_col++)
    318         {
    319             int i, j, k, w;
    320             int weight_cap;
    321             int stride;
    322 
    323             vpx_memset(accumulator, 0, 384*sizeof(unsigned int));
    324             vpx_memset(count, 0, 384*sizeof(unsigned short));
    325 
    326 #if ALT_REF_MC_ENABLED
    327             // Reduced search extent by 3 for 6-tap filter & smaller UMV border
    328             cpi->mb.mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 19));
    329             cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
    330                                     + (VP8BORDERINPIXELS - 19);
    331 #endif
    332 
    333             for (frame = 0; frame < frame_count; frame++)
    334             {
    335                 int err = 0;
    336 
    337                 if (cpi->frames[frame] == NULL)
    338                     continue;
    339 
    340                 mbd->block[0].bmi.mv.as_mv.row = 0;
    341                 mbd->block[0].bmi.mv.as_mv.col = 0;
    342 
    343 #if ALT_REF_MC_ENABLED
    344 #define THRESH_LOW   10000
    345 #define THRESH_HIGH  20000
    346 
    347                 // Find best match in this frame by MC
    348                 err = vp8_temporal_filter_find_matching_mb_c
    349                       (cpi,
    350                        cpi->frames[alt_ref_index],
    351                        cpi->frames[frame],
    352                        mb_y_offset,
    353                        THRESH_LOW);
    354 
    355 #endif
    356                 // Assign higher weight to matching MB if it's error
    357                 // score is lower. If not applying MC default behavior
    358                 // is to weight all MBs equal.
    359                 filter_weight = err<THRESH_LOW
    360                                   ? 2 : err<THRESH_HIGH ? 1 : 0;
    361 
    362                 if (filter_weight != 0)
    363                 {
    364                     // Construct the predictors
    365                     vp8_temporal_filter_predictors_mb_c
    366                         (mbd,
    367                          cpi->frames[frame]->y_buffer + mb_y_offset,
    368                          cpi->frames[frame]->u_buffer + mb_uv_offset,
    369                          cpi->frames[frame]->v_buffer + mb_uv_offset,
    370                          cpi->frames[frame]->y_stride,
    371                          mbd->block[0].bmi.mv.as_mv.row,
    372                          mbd->block[0].bmi.mv.as_mv.col,
    373                          predictor);
    374 
    375                     // Apply the filter (YUV)
    376                     TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
    377                         (f->y_buffer + mb_y_offset,
    378                          f->y_stride,
    379                          predictor,
    380                          16,
    381                          strength,
    382                          filter_weight,
    383                          accumulator,
    384                          count);
    385 
    386                     TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
    387                         (f->u_buffer + mb_uv_offset,
    388                          f->uv_stride,
    389                          predictor + 256,
    390                          8,
    391                          strength,
    392                          filter_weight,
    393                          accumulator + 256,
    394                          count + 256);
    395 
    396                     TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
    397                         (f->v_buffer + mb_uv_offset,
    398                          f->uv_stride,
    399                          predictor + 320,
    400                          8,
    401                          strength,
    402                          filter_weight,
    403                          accumulator + 320,
    404                          count + 320);
    405                 }
    406             }
    407 
    408             // Normalize filter output to produce AltRef frame
    409             dst1 = cpi->alt_ref_buffer.source_buffer.y_buffer;
    410             stride = cpi->alt_ref_buffer.source_buffer.y_stride;
    411             byte = mb_y_offset;
    412             for (i = 0,k = 0; i < 16; i++)
    413             {
    414                 for (j = 0; j < 16; j++, k++)
    415                 {
    416                     unsigned int pval = accumulator[k] + (count[k] >> 1);
    417                     pval *= cpi->fixed_divide[count[k]];
    418                     pval >>= 19;
    419 
    420                     dst1[byte] = (unsigned char)pval;
    421 
    422                     // move to next pixel
    423                     byte++;
    424                 }
    425 
    426                 byte += stride - 16;
    427             }
    428 
    429             dst1 = cpi->alt_ref_buffer.source_buffer.u_buffer;
    430             dst2 = cpi->alt_ref_buffer.source_buffer.v_buffer;
    431             stride = cpi->alt_ref_buffer.source_buffer.uv_stride;
    432             byte = mb_uv_offset;
    433             for (i = 0,k = 256; i < 8; i++)
    434             {
    435                 for (j = 0; j < 8; j++, k++)
    436                 {
    437                     int m=k+64;
    438 
    439                     // U
    440                     unsigned int pval = accumulator[k] + (count[k] >> 1);
    441                     pval *= cpi->fixed_divide[count[k]];
    442                     pval >>= 19;
    443                     dst1[byte] = (unsigned char)pval;
    444 
    445                     // V
    446                     pval = accumulator[m] + (count[m] >> 1);
    447                     pval *= cpi->fixed_divide[count[m]];
    448                     pval >>= 19;
    449                     dst2[byte] = (unsigned char)pval;
    450 
    451                     // move to next pixel
    452                     byte++;
    453                 }
    454 
    455                 byte += stride - 8;
    456             }
    457 
    458             mb_y_offset += 16;
    459             mb_uv_offset += 8;
    460         }
    461 
    462         mb_y_offset += 16*(f->y_stride-mb_cols);
    463         mb_uv_offset += 8*(f->uv_stride-mb_cols);
    464     }
    465 
    466     // Restore input state
    467     mbd->pre.y_buffer = y_buffer;
    468     mbd->pre.u_buffer = u_buffer;
    469     mbd->pre.v_buffer = v_buffer;
    470 }
    471 
    472 void vp8_temporal_filter_prepare_c
    473 (
    474     VP8_COMP *cpi
    475 )
    476 {
    477     int frame = 0;
    478 
    479     int num_frames_backward = 0;
    480     int num_frames_forward = 0;
    481     int frames_to_blur_backward = 0;
    482     int frames_to_blur_forward = 0;
    483     int frames_to_blur = 0;
    484     int start_frame = 0;
    485     unsigned int filtered = 0;
    486 
    487     int strength = cpi->oxcf.arnr_strength;
    488 
    489     int blur_type = cpi->oxcf.arnr_type;
    490 
    491     int max_frames = cpi->active_arnr_frames;
    492 
    493     num_frames_backward = cpi->last_alt_ref_sei - cpi->source_encode_index;
    494 
    495     if (num_frames_backward < 0)
    496         num_frames_backward += cpi->oxcf.lag_in_frames;
    497 
    498     num_frames_forward = cpi->oxcf.lag_in_frames - (num_frames_backward + 1);
    499 
    500     switch (blur_type)
    501     {
    502     case 1:
    503         /////////////////////////////////////////
    504         // Backward Blur
    505 
    506         frames_to_blur_backward = num_frames_backward;
    507 
    508         if (frames_to_blur_backward >= max_frames)
    509             frames_to_blur_backward = max_frames - 1;
    510 
    511         frames_to_blur = frames_to_blur_backward + 1;
    512         break;
    513 
    514     case 2:
    515         /////////////////////////////////////////
    516         // Forward Blur
    517 
    518         frames_to_blur_forward = num_frames_forward;
    519 
    520         if (frames_to_blur_forward >= max_frames)
    521             frames_to_blur_forward = max_frames - 1;
    522 
    523         frames_to_blur = frames_to_blur_forward + 1;
    524         break;
    525 
    526     case 3:
    527     default:
    528         /////////////////////////////////////////
    529         // Center Blur
    530         frames_to_blur_forward = num_frames_forward;
    531         frames_to_blur_backward = num_frames_backward;
    532 
    533         if (frames_to_blur_forward > frames_to_blur_backward)
    534             frames_to_blur_forward = frames_to_blur_backward;
    535 
    536         if (frames_to_blur_backward > frames_to_blur_forward)
    537             frames_to_blur_backward = frames_to_blur_forward;
    538 
    539         // When max_frames is even we have 1 more frame backward than forward
    540         if (frames_to_blur_forward > (max_frames - 1) / 2)
    541             frames_to_blur_forward = ((max_frames - 1) / 2);
    542 
    543         if (frames_to_blur_backward > (max_frames / 2))
    544             frames_to_blur_backward = (max_frames / 2);
    545 
    546         frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
    547         break;
    548     }
    549 
    550     start_frame = (cpi->last_alt_ref_sei
    551                     + frames_to_blur_forward) % cpi->oxcf.lag_in_frames;
    552 
    553 #ifdef DEBUGFWG
    554     // DEBUG FWG
    555     printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d"
    556            , max_frames
    557            , num_frames_backward
    558            , num_frames_forward
    559            , frames_to_blur
    560            , frames_to_blur_backward
    561            , frames_to_blur_forward
    562            , cpi->source_encode_index
    563            , cpi->last_alt_ref_sei
    564            , start_frame);
    565 #endif
    566 
    567     // Setup frame pointers, NULL indicates frame not included in filter
    568     vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
    569     for (frame = 0; frame < frames_to_blur; frame++)
    570     {
    571         int which_buffer =  start_frame - frame;
    572 
    573         if (which_buffer < 0)
    574             which_buffer += cpi->oxcf.lag_in_frames;
    575 
    576         cpi->frames[frames_to_blur-1-frame]
    577                 = &cpi->src_buffer[which_buffer].source_buffer;
    578     }
    579 
    580     vp8_temporal_filter_iterate_c (
    581         cpi,
    582         frames_to_blur,
    583         frames_to_blur_backward,
    584         strength );
    585 }
    586 #endif
    587