Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include "variance.h"
     13 
     14 const int vp8_six_tap[8][6] =
     15 {
     16     { 0,  0,  128,    0,   0,  0 },         // note that 1/8 pel positions are just as per alpha -0.5 bicubic
     17     { 0, -6,  123,   12,  -1,  0 },
     18     { 2, -11, 108,   36,  -8,  1 },         // New 1/4 pel 6 tap filter
     19     { 0, -9,   93,   50,  -6,  0 },
     20     { 3, -16,  77,   77, -16,  3 },         // New 1/2 pel 6 tap filter
     21     { 0, -6,   50,   93,  -9,  0 },
     22     { 1, -8,   36,  108, -11,  2 },         // New 1/4 pel 6 tap filter
     23     { 0, -1,   12,  123,  -6,  0 }
     24 };
     25 
     26 
     27 #ifdef USEBILINEAR
     28 const int VP8_FILTER_WEIGHT = 128;
     29 const int VP8_FILTER_SHIFT  =   7;
     30 const int vp8_bilinear_taps[8][2] =
     31 {
     32     { 128,   0 },
     33     { 112,  16 },
     34     {  96,  32 },
     35     {  80,  48 },
     36     {  64,  64 },
     37     {  48,  80 },
     38     {  32,  96 },
     39     {  16, 112 }
     40 };
     41 
     42 unsigned int vp8_get_mb_ss_c
     43 (
     44     short *src_ptr
     45 )
     46 {
     47     unsigned int i = 0, sum = 0;
     48 
     49     do
     50     {
     51         sum += (src_ptr[i] * src_ptr[i]);
     52         i++;
     53     }
     54     while (i < 256);
     55 
     56     return sum;
     57 }
     58 
     59 
     60 void  vp8_variance(
     61     unsigned char *src_ptr,
     62     int  source_stride,
     63     unsigned char *ref_ptr,
     64     int  recon_stride,
     65     int  w,
     66     int  h,
     67     unsigned int *sse,
     68     int *sum)
     69 {
     70     int i, j;
     71     int diff;
     72 
     73     *sum = 0;
     74     *sse = 0;
     75 
     76     for (i = 0; i < h; i++)
     77     {
     78         for (j = 0; j < w; j++)
     79         {
     80             diff = src_ptr[j] - ref_ptr[j];
     81             *sum += diff;
     82             *sse += diff * diff;
     83         }
     84 
     85         src_ptr += source_stride;
     86         ref_ptr += recon_stride;
     87     }
     88 }
     89 
     90 unsigned int
     91 vp8_get8x8var_c
     92 (
     93     unsigned char *src_ptr,
     94     int  source_stride,
     95     unsigned char *ref_ptr,
     96     int  recon_stride,
     97     unsigned int *SSE,
     98     int *Sum
     99 )
    100 {
    101 
    102     vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, SSE, Sum);
    103     return (*SSE - (((*Sum) * (*Sum)) >> 6));
    104 }
    105 
    106 unsigned int
    107 vp8_get16x16var_c
    108 (
    109     unsigned char *src_ptr,
    110     int  source_stride,
    111     unsigned char *ref_ptr,
    112     int  recon_stride,
    113     unsigned int *SSE,
    114     int *Sum
    115 )
    116 {
    117 
    118     vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, SSE, Sum);
    119     return (*SSE - (((*Sum) * (*Sum)) >> 8));
    120 
    121 }
    122 
    123 
    124 
    125 unsigned int vp8_variance16x16_c(
    126     unsigned char *src_ptr,
    127     int  source_stride,
    128     unsigned char *ref_ptr,
    129     int  recon_stride,
    130     unsigned int *sse)
    131 {
    132     unsigned int var;
    133     int avg;
    134 
    135 
    136     vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
    137     *sse = var;
    138     return (var - ((avg * avg) >> 8));
    139 }
    140 
    141 unsigned int vp8_variance8x16_c(
    142     unsigned char *src_ptr,
    143     int  source_stride,
    144     unsigned char *ref_ptr,
    145     int  recon_stride,
    146     unsigned int *sse)
    147 {
    148     unsigned int var;
    149     int avg;
    150 
    151 
    152     vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
    153     *sse = var;
    154     return (var - ((avg * avg) >> 7));
    155 }
    156 
    157 unsigned int vp8_variance16x8_c(
    158     unsigned char *src_ptr,
    159     int  source_stride,
    160     unsigned char *ref_ptr,
    161     int  recon_stride,
    162     unsigned int *sse)
    163 {
    164     unsigned int var;
    165     int avg;
    166 
    167 
    168     vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
    169     *sse = var;
    170     return (var - ((avg * avg) >> 7));
    171 }
    172 
    173 
    174 unsigned int vp8_variance8x8_c(
    175     unsigned char *src_ptr,
    176     int  source_stride,
    177     unsigned char *ref_ptr,
    178     int  recon_stride,
    179     unsigned int *sse)
    180 {
    181     unsigned int var;
    182     int avg;
    183 
    184 
    185     vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
    186     *sse = var;
    187     return (var - ((avg * avg) >> 6));
    188 }
    189 
    190 unsigned int vp8_variance4x4_c(
    191     unsigned char *src_ptr,
    192     int  source_stride,
    193     unsigned char *ref_ptr,
    194     int  recon_stride,
    195     unsigned int *sse)
    196 {
    197     unsigned int var;
    198     int avg;
    199 
    200 
    201     vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
    202     *sse = var;
    203     return (var - ((avg * avg) >> 4));
    204 }
    205 
    206 
    207 unsigned int vp8_mse16x16_c(
    208     unsigned char *src_ptr,
    209     int  source_stride,
    210     unsigned char *ref_ptr,
    211     int  recon_stride,
    212     unsigned int *sse)
    213 {
    214     unsigned int var;
    215     int avg;
    216 
    217     vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
    218     *sse = var;
    219     return var;
    220 }
    221 
    222 
    223 /****************************************************************************
    224  *
    225  *  ROUTINE       : filter_block2d_bil_first_pass
    226  *
    227  *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.
    228  *                  UINT32 src_pixels_per_line : Stride of input block.
    229  *                  UINT32 pixel_step        : Offset between filter input samples (see notes).
    230  *                  UINT32 output_height     : Input block height.
    231  *                  UINT32 output_width      : Input block width.
    232  *                  INT32  *vp8_filter          : Array of 2 bi-linear filter taps.
    233  *
    234  *  OUTPUTS       : INT32 *output_ptr        : Pointer to filtered block.
    235  *
    236  *  RETURNS       : void
    237  *
    238  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
    239  *                  either horizontal or vertical direction to produce the
    240  *                  filtered output block. Used to implement first-pass
    241  *                  of 2-D separable filter.
    242  *
    243  *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
    244  *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
    245  *                  pixel_step defines whether the filter is applied
    246  *                  horizontally (pixel_step=1) or vertically (pixel_step=stride).
    247  *                  It defines the offset required to move from one input
    248  *                  to the next.
    249  *
    250  ****************************************************************************/
    251 void vp8e_filter_block2d_bil_first_pass
    252 (
    253     unsigned char *src_ptr,
    254     unsigned short *output_ptr,
    255     unsigned int src_pixels_per_line,
    256     int pixel_step,
    257     unsigned int output_height,
    258     unsigned int output_width,
    259     const int *vp8_filter
    260 )
    261 {
    262     unsigned int i, j;
    263 
    264     for (i = 0; i < output_height; i++)
    265     {
    266         for (j = 0; j < output_width; j++)
    267         {
    268             // Apply bilinear filter
    269             output_ptr[j] = (((int)src_ptr[0]          * vp8_filter[0]) +
    270                              ((int)src_ptr[pixel_step] * vp8_filter[1]) +
    271                              (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
    272             src_ptr++;
    273         }
    274 
    275         // Next row...
    276         src_ptr    += src_pixels_per_line - output_width;
    277         output_ptr += output_width;
    278     }
    279 }
    280 
    281 /****************************************************************************
    282  *
    283  *  ROUTINE       : filter_block2d_bil_second_pass
    284  *
    285  *  INPUTS        : INT32  *src_ptr          : Pointer to source block.
    286  *                  UINT32 src_pixels_per_line : Stride of input block.
    287  *                  UINT32 pixel_step        : Offset between filter input samples (see notes).
    288  *                  UINT32 output_height     : Input block height.
    289  *                  UINT32 output_width      : Input block width.
    290  *                  INT32  *vp8_filter          : Array of 2 bi-linear filter taps.
    291  *
    292  *  OUTPUTS       : UINT16 *output_ptr       : Pointer to filtered block.
    293  *
    294  *  RETURNS       : void
    295  *
    296  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
    297  *                  either horizontal or vertical direction to produce the
    298  *                  filtered output block. Used to implement second-pass
    299  *                  of 2-D separable filter.
    300  *
    301  *  SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
    302  *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
    303  *                  pixel_step defines whether the filter is applied
    304  *                  horizontally (pixel_step=1) or vertically (pixel_step=stride).
    305  *                  It defines the offset required to move from one input
    306  *                  to the next.
    307  *
    308  ****************************************************************************/
    309 void vp8e_filter_block2d_bil_second_pass
    310 (
    311     unsigned short *src_ptr,
    312     unsigned char  *output_ptr,
    313     unsigned int  src_pixels_per_line,
    314     unsigned int  pixel_step,
    315     unsigned int  output_height,
    316     unsigned int  output_width,
    317     const int *vp8_filter
    318 )
    319 {
    320     unsigned int  i, j;
    321     int  Temp;
    322 
    323     for (i = 0; i < output_height; i++)
    324     {
    325         for (j = 0; j < output_width; j++)
    326         {
    327             // Apply filter
    328             Temp = ((int)src_ptr[0]         * vp8_filter[0]) +
    329                    ((int)src_ptr[pixel_step] * vp8_filter[1]) +
    330                    (VP8_FILTER_WEIGHT / 2);
    331             output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
    332             src_ptr++;
    333         }
    334 
    335         // Next row...
    336         src_ptr    += src_pixels_per_line - output_width;
    337         output_ptr += output_width;
    338     }
    339 }
    340 
    341 
    342 /****************************************************************************
    343  *
    344  *  ROUTINE       : filter_block2d_bil
    345  *
    346  *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.
    347  *                  UINT32 src_pixels_per_line : Stride of input block.
    348  *                  INT32  *HFilter         : Array of 2 horizontal filter taps.
    349  *                  INT32  *VFilter         : Array of 2 vertical filter taps.
    350  *
    351  *  OUTPUTS       : UINT16 *output_ptr       : Pointer to filtered block.
    352  *
    353  *  RETURNS       : void
    354  *
    355  *  FUNCTION      : 2-D filters an 8x8 input block by applying a 2-tap
    356  *                  bi-linear filter horizontally followed by a 2-tap
    357  *                  bi-linear filter vertically on the result.
    358  *
    359  *  SPECIAL NOTES : The intermediate horizontally filtered block must produce
    360  *                  1 more point than the input block in each column. This
    361  *                  is to ensure that the 2-tap filter has one extra data-point
    362  *                  at the top of each column so filter taps do not extend
    363  *                  beyond data. Thus the output of the first stage filter
    364  *                  is an 8x9 (hx_v) block.
    365  *
    366  ****************************************************************************/
    367 void vp8e_filter_block2d_bil
    368 (
    369     unsigned char  *src_ptr,
    370     unsigned char *output_ptr,
    371     unsigned int src_pixels_per_line,
    372     int  *HFilter,
    373     int  *VFilter
    374 )
    375 {
    376 
    377     unsigned short FData[20*16];    // Temp data bufffer used in filtering
    378 
    379     // First filter 1-D horizontally...
    380     vp8e_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, 9, 8, HFilter);
    381 
    382     // then 1-D vertically...
    383     vp8e_filter_block2d_bil_second_pass(FData, output_ptr, 8, 8, 8, 8, VFilter);
    384 }
    385 
    386 
    387 
    388 unsigned int vp8_sub_pixel_variance4x4_c
    389 (
    390     unsigned char  *src_ptr,
    391     int  src_pixels_per_line,
    392     int  xoffset,
    393     int  yoffset,
    394     unsigned char *dst_ptr,
    395     int dst_pixels_per_line,
    396     unsigned int *sse
    397 )
    398 {
    399     unsigned char  temp2[20*16];
    400     const int *HFilter, *VFilter;
    401     unsigned short FData3[5*4]; // Temp data bufffer used in filtering
    402 
    403     HFilter = vp8_bilinear_taps[xoffset];
    404     VFilter = vp8_bilinear_taps[yoffset];
    405 
    406     // First filter 1d Horizontal
    407     vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter);
    408 
    409     // Now filter Verticaly
    410     vp8e_filter_block2d_bil_second_pass(FData3, temp2, 4,  4,  4,  4, VFilter);
    411 
    412     return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
    413 }
    414 
    415 
    416 unsigned int vp8_sub_pixel_variance8x8_c
    417 (
    418     unsigned char  *src_ptr,
    419     int  src_pixels_per_line,
    420     int  xoffset,
    421     int  yoffset,
    422     unsigned char *dst_ptr,
    423     int dst_pixels_per_line,
    424     unsigned int *sse
    425 )
    426 {
    427     unsigned short FData3[9*8]; // Temp data bufffer used in filtering
    428     unsigned char  temp2[20*16];
    429     const int *HFilter, *VFilter;
    430 
    431     HFilter = vp8_bilinear_taps[xoffset];
    432     VFilter = vp8_bilinear_taps[yoffset];
    433 
    434     vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
    435     vp8e_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
    436 
    437     return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
    438 }
    439 
    440 unsigned int vp8_sub_pixel_variance16x16_c
    441 (
    442     unsigned char  *src_ptr,
    443     int  src_pixels_per_line,
    444     int  xoffset,
    445     int  yoffset,
    446     unsigned char *dst_ptr,
    447     int dst_pixels_per_line,
    448     unsigned int *sse
    449 )
    450 {
    451     unsigned short FData3[17*16];   // Temp data bufffer used in filtering
    452     unsigned char  temp2[20*16];
    453     const int *HFilter, *VFilter;
    454 
    455     HFilter = vp8_bilinear_taps[xoffset];
    456     VFilter = vp8_bilinear_taps[yoffset];
    457 
    458     vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
    459     vp8e_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
    460 
    461     return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
    462 }
    463 
    464 unsigned int vp8_sub_pixel_mse16x16_c
    465 (
    466     unsigned char  *src_ptr,
    467     int  src_pixels_per_line,
    468     int  xoffset,
    469     int  yoffset,
    470     unsigned char *dst_ptr,
    471     int dst_pixels_per_line,
    472     unsigned int *sse
    473 )
    474 {
    475     vp8_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
    476     return *sse;
    477 }
    478 
    479 unsigned int vp8_sub_pixel_variance16x8_c
    480 (
    481     unsigned char  *src_ptr,
    482     int  src_pixels_per_line,
    483     int  xoffset,
    484     int  yoffset,
    485     unsigned char *dst_ptr,
    486     int dst_pixels_per_line,
    487     unsigned int *sse
    488 )
    489 {
    490     unsigned short FData3[16*9];    // Temp data bufffer used in filtering
    491     unsigned char  temp2[20*16];
    492     const int *HFilter, *VFilter;
    493 
    494     HFilter = vp8_bilinear_taps[xoffset];
    495     VFilter = vp8_bilinear_taps[yoffset];
    496 
    497     vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
    498     vp8e_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
    499 
    500     return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
    501 }
    502 
    503 unsigned int vp8_sub_pixel_variance8x16_c
    504 (
    505     unsigned char  *src_ptr,
    506     int  src_pixels_per_line,
    507     int  xoffset,
    508     int  yoffset,
    509     unsigned char *dst_ptr,
    510     int dst_pixels_per_line,
    511     unsigned int *sse
    512 )
    513 {
    514     unsigned short FData3[9*16];    // Temp data bufffer used in filtering
    515     unsigned char  temp2[20*16];
    516     const int *HFilter, *VFilter;
    517 
    518 
    519     HFilter = vp8_bilinear_taps[xoffset];
    520     VFilter = vp8_bilinear_taps[yoffset];
    521 
    522 
    523     vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter);
    524     vp8e_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
    525 
    526     return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
    527 }
    528 #endif
    529