Home | History | Annotate | Download | only in common
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include "filter.h"
     13 
     14 DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
     15 {
     16     { 128,   0 },
     17     { 112,  16 },
     18     {  96,  32 },
     19     {  80,  48 },
     20     {  64,  64 },
     21     {  48,  80 },
     22     {  32,  96 },
     23     {  16, 112 }
     24 };
     25 
     26 DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
     27 {
     28 
     29     { 0,  0,  128,    0,   0,  0 },         /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
     30     { 0, -6,  123,   12,  -1,  0 },
     31     { 2, -11, 108,   36,  -8,  1 },         /* New 1/4 pel 6 tap filter */
     32     { 0, -9,   93,   50,  -6,  0 },
     33     { 3, -16,  77,   77, -16,  3 },         /* New 1/2 pel 6 tap filter */
     34     { 0, -6,   50,   93,  -9,  0 },
     35     { 1, -8,   36,  108, -11,  2 },         /* New 1/4 pel 6 tap filter */
     36     { 0, -1,   12,  123,  -6,  0 },
     37 };
     38 
     39 static void filter_block2d_first_pass
     40 (
     41     unsigned char *src_ptr,
     42     int *output_ptr,
     43     unsigned int src_pixels_per_line,
     44     unsigned int pixel_step,
     45     unsigned int output_height,
     46     unsigned int output_width,
     47     const short *vp8_filter
     48 )
     49 {
     50     unsigned int i, j;
     51     int  Temp;
     52 
     53     for (i = 0; i < output_height; i++)
     54     {
     55         for (j = 0; j < output_width; j++)
     56         {
     57             Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
     58                    ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
     59                    ((int)src_ptr[0]                 * vp8_filter[2]) +
     60                    ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
     61                    ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
     62                    ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
     63                    (VP8_FILTER_WEIGHT >> 1);      /* Rounding */
     64 
     65             /* Normalize back to 0-255 */
     66             Temp = Temp >> VP8_FILTER_SHIFT;
     67 
     68             if (Temp < 0)
     69                 Temp = 0;
     70             else if (Temp > 255)
     71                 Temp = 255;
     72 
     73             output_ptr[j] = Temp;
     74             src_ptr++;
     75         }
     76 
     77         /* Next row... */
     78         src_ptr    += src_pixels_per_line - output_width;
     79         output_ptr += output_width;
     80     }
     81 }
     82 
     83 static void filter_block2d_second_pass
     84 (
     85     int *src_ptr,
     86     unsigned char *output_ptr,
     87     int output_pitch,
     88     unsigned int src_pixels_per_line,
     89     unsigned int pixel_step,
     90     unsigned int output_height,
     91     unsigned int output_width,
     92     const short *vp8_filter
     93 )
     94 {
     95     unsigned int i, j;
     96     int  Temp;
     97 
     98     for (i = 0; i < output_height; i++)
     99     {
    100         for (j = 0; j < output_width; j++)
    101         {
    102             /* Apply filter */
    103             Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
    104                    ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
    105                    ((int)src_ptr[0]                 * vp8_filter[2]) +
    106                    ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
    107                    ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
    108                    ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
    109                    (VP8_FILTER_WEIGHT >> 1);   /* Rounding */
    110 
    111             /* Normalize back to 0-255 */
    112             Temp = Temp >> VP8_FILTER_SHIFT;
    113 
    114             if (Temp < 0)
    115                 Temp = 0;
    116             else if (Temp > 255)
    117                 Temp = 255;
    118 
    119             output_ptr[j] = (unsigned char)Temp;
    120             src_ptr++;
    121         }
    122 
    123         /* Start next row */
    124         src_ptr    += src_pixels_per_line - output_width;
    125         output_ptr += output_pitch;
    126     }
    127 }
    128 
    129 
    130 static void filter_block2d
    131 (
    132     unsigned char  *src_ptr,
    133     unsigned char  *output_ptr,
    134     unsigned int src_pixels_per_line,
    135     int output_pitch,
    136     const short  *HFilter,
    137     const short  *VFilter
    138 )
    139 {
    140     int FData[9*4]; /* Temp data buffer used in filtering */
    141 
    142     /* First filter 1-D horizontally... */
    143     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
    144 
    145     /* then filter verticaly... */
    146     filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
    147 }
    148 
    149 
    150 void vp8_sixtap_predict4x4_c
    151 (
    152     unsigned char  *src_ptr,
    153     int   src_pixels_per_line,
    154     int  xoffset,
    155     int  yoffset,
    156     unsigned char *dst_ptr,
    157     int dst_pitch
    158 )
    159 {
    160     const short  *HFilter;
    161     const short  *VFilter;
    162 
    163     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
    164     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
    165 
    166     filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
    167 }
    168 void vp8_sixtap_predict8x8_c
    169 (
    170     unsigned char  *src_ptr,
    171     int  src_pixels_per_line,
    172     int  xoffset,
    173     int  yoffset,
    174     unsigned char *dst_ptr,
    175     int  dst_pitch
    176 )
    177 {
    178     const short  *HFilter;
    179     const short  *VFilter;
    180     int FData[13*16];   /* Temp data buffer used in filtering */
    181 
    182     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
    183     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
    184 
    185     /* First filter 1-D horizontally... */
    186     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
    187 
    188 
    189     /* then filter verticaly... */
    190     filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
    191 
    192 }
    193 
    194 void vp8_sixtap_predict8x4_c
    195 (
    196     unsigned char  *src_ptr,
    197     int  src_pixels_per_line,
    198     int  xoffset,
    199     int  yoffset,
    200     unsigned char *dst_ptr,
    201     int  dst_pitch
    202 )
    203 {
    204     const short  *HFilter;
    205     const short  *VFilter;
    206     int FData[13*16];   /* Temp data buffer used in filtering */
    207 
    208     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
    209     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
    210 
    211     /* First filter 1-D horizontally... */
    212     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
    213 
    214 
    215     /* then filter verticaly... */
    216     filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
    217 
    218 }
    219 
    220 void vp8_sixtap_predict16x16_c
    221 (
    222     unsigned char  *src_ptr,
    223     int  src_pixels_per_line,
    224     int  xoffset,
    225     int  yoffset,
    226     unsigned char *dst_ptr,
    227     int  dst_pitch
    228 )
    229 {
    230     const short  *HFilter;
    231     const short  *VFilter;
    232     int FData[21*24];   /* Temp data buffer used in filtering */
    233 
    234 
    235     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
    236     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
    237 
    238     /* First filter 1-D horizontally... */
    239     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
    240 
    241     /* then filter verticaly... */
    242     filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
    243 
    244 }
    245 
    246 
    247 /****************************************************************************
    248  *
    249  *  ROUTINE       : filter_block2d_bil_first_pass
    250  *
    251  *  INPUTS        : UINT8  *src_ptr    : Pointer to source block.
    252  *                  UINT32  src_stride : Stride of source block.
    253  *                  UINT32  height     : Block height.
    254  *                  UINT32  width      : Block width.
    255  *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps.
    256  *
    257  *  OUTPUTS       : INT32  *dst_ptr    : Pointer to filtered block.
    258  *
    259  *  RETURNS       : void
    260  *
    261  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block
    262  *                  in the horizontal direction to produce the filtered output
    263  *                  block. Used to implement first-pass of 2-D separable filter.
    264  *
    265  *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
    266  *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
    267  *
    268  ****************************************************************************/
    269 static void filter_block2d_bil_first_pass
    270 (
    271     unsigned char  *src_ptr,
    272     unsigned short *dst_ptr,
    273     unsigned int    src_stride,
    274     unsigned int    height,
    275     unsigned int    width,
    276     const short    *vp8_filter
    277 )
    278 {
    279     unsigned int i, j;
    280 
    281     for (i = 0; i < height; i++)
    282     {
    283         for (j = 0; j < width; j++)
    284         {
    285             /* Apply bilinear filter */
    286             dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
    287                           ((int)src_ptr[1] * vp8_filter[1]) +
    288                           (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
    289             src_ptr++;
    290         }
    291 
    292         /* Next row... */
    293         src_ptr += src_stride - width;
    294         dst_ptr += width;
    295     }
    296 }
    297 
    298 /****************************************************************************
    299  *
    300  *  ROUTINE       : filter_block2d_bil_second_pass
    301  *
    302  *  INPUTS        : INT32  *src_ptr    : Pointer to source block.
    303  *                  UINT32  dst_pitch  : Destination block pitch.
    304  *                  UINT32  height     : Block height.
    305  *                  UINT32  width      : Block width.
    306  *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps.
    307  *
    308  *  OUTPUTS       : UINT16 *dst_ptr    : Pointer to filtered block.
    309  *
    310  *  RETURNS       : void
    311  *
    312  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block
    313  *                  in the vertical direction to produce the filtered output
    314  *                  block. Used to implement second-pass of 2-D separable filter.
    315  *
    316  *  SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
    317  *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
    318  *
    319  ****************************************************************************/
    320 static void filter_block2d_bil_second_pass
    321 (
    322     unsigned short *src_ptr,
    323     unsigned char  *dst_ptr,
    324     int             dst_pitch,
    325     unsigned int    height,
    326     unsigned int    width,
    327     const short    *vp8_filter
    328 )
    329 {
    330     unsigned int  i, j;
    331     int  Temp;
    332 
    333     for (i = 0; i < height; i++)
    334     {
    335         for (j = 0; j < width; j++)
    336         {
    337             /* Apply filter */
    338             Temp = ((int)src_ptr[0]     * vp8_filter[0]) +
    339                    ((int)src_ptr[width] * vp8_filter[1]) +
    340                    (VP8_FILTER_WEIGHT / 2);
    341             dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
    342             src_ptr++;
    343         }
    344 
    345         /* Next row... */
    346         dst_ptr += dst_pitch;
    347     }
    348 }
    349 
    350 
    351 /****************************************************************************
    352  *
    353  *  ROUTINE       : filter_block2d_bil
    354  *
    355  *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.
    356  *                  UINT32  src_pitch        : Stride of source block.
    357  *                  UINT32  dst_pitch        : Stride of destination block.
    358  *                  INT32  *HFilter          : Array of 2 horizontal filter taps.
    359  *                  INT32  *VFilter          : Array of 2 vertical filter taps.
    360  *                  INT32  Width             : Block width
    361  *                  INT32  Height            : Block height
    362  *
    363  *  OUTPUTS       : UINT16 *dst_ptr       : Pointer to filtered block.
    364  *
    365  *  RETURNS       : void
    366  *
    367  *  FUNCTION      : 2-D filters an input block by applying a 2-tap
    368  *                  bi-linear filter horizontally followed by a 2-tap
    369  *                  bi-linear filter vertically on the result.
    370  *
    371  *  SPECIAL NOTES : The largest block size can be handled here is 16x16
    372  *
    373  ****************************************************************************/
    374 static void filter_block2d_bil
    375 (
    376     unsigned char *src_ptr,
    377     unsigned char *dst_ptr,
    378     unsigned int   src_pitch,
    379     unsigned int   dst_pitch,
    380     const short   *HFilter,
    381     const short   *VFilter,
    382     int            Width,
    383     int            Height
    384 )
    385 {
    386 
    387     unsigned short FData[17*16];    /* Temp data buffer used in filtering */
    388 
    389     /* First filter 1-D horizontally... */
    390     filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
    391 
    392     /* then 1-D vertically... */
    393     filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
    394 }
    395 
    396 
    397 void vp8_bilinear_predict4x4_c
    398 (
    399     unsigned char  *src_ptr,
    400     int   src_pixels_per_line,
    401     int  xoffset,
    402     int  yoffset,
    403     unsigned char *dst_ptr,
    404     int dst_pitch
    405 )
    406 {
    407     const short *HFilter;
    408     const short *VFilter;
    409 
    410     HFilter = vp8_bilinear_filters[xoffset];
    411     VFilter = vp8_bilinear_filters[yoffset];
    412 #if 0
    413     {
    414         int i;
    415         unsigned char temp1[16];
    416         unsigned char temp2[16];
    417 
    418         bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
    419         filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
    420 
    421         for (i = 0; i < 16; i++)
    422         {
    423             if (temp1[i] != temp2[i])
    424             {
    425                 bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
    426                 filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
    427             }
    428         }
    429     }
    430 #endif
    431     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
    432 
    433 }
    434 
    435 void vp8_bilinear_predict8x8_c
    436 (
    437     unsigned char  *src_ptr,
    438     int  src_pixels_per_line,
    439     int  xoffset,
    440     int  yoffset,
    441     unsigned char *dst_ptr,
    442     int  dst_pitch
    443 )
    444 {
    445     const short *HFilter;
    446     const short *VFilter;
    447 
    448     HFilter = vp8_bilinear_filters[xoffset];
    449     VFilter = vp8_bilinear_filters[yoffset];
    450 
    451     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
    452 
    453 }
    454 
    455 void vp8_bilinear_predict8x4_c
    456 (
    457     unsigned char  *src_ptr,
    458     int  src_pixels_per_line,
    459     int  xoffset,
    460     int  yoffset,
    461     unsigned char *dst_ptr,
    462     int  dst_pitch
    463 )
    464 {
    465     const short *HFilter;
    466     const short *VFilter;
    467 
    468     HFilter = vp8_bilinear_filters[xoffset];
    469     VFilter = vp8_bilinear_filters[yoffset];
    470 
    471     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
    472 
    473 }
    474 
    475 void vp8_bilinear_predict16x16_c
    476 (
    477     unsigned char  *src_ptr,
    478     int  src_pixels_per_line,
    479     int  xoffset,
    480     int  yoffset,
    481     unsigned char *dst_ptr,
    482     int  dst_pitch
    483 )
    484 {
    485     const short *HFilter;
    486     const short *VFilter;
    487 
    488     HFilter = vp8_bilinear_filters[xoffset];
    489     VFilter = vp8_bilinear_filters[yoffset];
    490 
    491     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
    492 }
    493