Home | History | Annotate | Download | only in common
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include <stdlib.h>
     13 #include "filter.h"
     14 #include "vpx_ports/mem.h"
     15 
     16 DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
     17 {
     18     { 128,   0 },
     19     { 112,  16 },
     20     {  96,  32 },
     21     {  80,  48 },
     22     {  64,  64 },
     23     {  48,  80 },
     24     {  32,  96 },
     25     {  16, 112 }
     26 };
     27 
     28 DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
     29 {
     30 
     31     { 0,  0,  128,    0,   0,  0 },         /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
     32     { 0, -6,  123,   12,  -1,  0 },
     33     { 2, -11, 108,   36,  -8,  1 },         /* New 1/4 pel 6 tap filter */
     34     { 0, -9,   93,   50,  -6,  0 },
     35     { 3, -16,  77,   77, -16,  3 },         /* New 1/2 pel 6 tap filter */
     36     { 0, -6,   50,   93,  -9,  0 },
     37     { 1, -8,   36,  108, -11,  2 },         /* New 1/4 pel 6 tap filter */
     38     { 0, -1,   12,  123,  -6,  0 },
     39 };
     40 
     41 static void filter_block2d_first_pass
     42 (
     43     unsigned char *src_ptr,
     44     int *output_ptr,
     45     unsigned int src_pixels_per_line,
     46     unsigned int pixel_step,
     47     unsigned int output_height,
     48     unsigned int output_width,
     49     const short *vp8_filter
     50 )
     51 {
     52     unsigned int i, j;
     53     int  Temp;
     54 
     55     for (i = 0; i < output_height; i++)
     56     {
     57         for (j = 0; j < output_width; j++)
     58         {
     59             Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
     60                    ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
     61                    ((int)src_ptr[0]                 * vp8_filter[2]) +
     62                    ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
     63                    ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
     64                    ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
     65                    (VP8_FILTER_WEIGHT >> 1);      /* Rounding */
     66 
     67             /* Normalize back to 0-255 */
     68             Temp = Temp >> VP8_FILTER_SHIFT;
     69 
     70             if (Temp < 0)
     71                 Temp = 0;
     72             else if (Temp > 255)
     73                 Temp = 255;
     74 
     75             output_ptr[j] = Temp;
     76             src_ptr++;
     77         }
     78 
     79         /* Next row... */
     80         src_ptr    += src_pixels_per_line - output_width;
     81         output_ptr += output_width;
     82     }
     83 }
     84 
     85 static void filter_block2d_second_pass
     86 (
     87     int *src_ptr,
     88     unsigned char *output_ptr,
     89     int output_pitch,
     90     unsigned int src_pixels_per_line,
     91     unsigned int pixel_step,
     92     unsigned int output_height,
     93     unsigned int output_width,
     94     const short *vp8_filter
     95 )
     96 {
     97     unsigned int i, j;
     98     int  Temp;
     99 
    100     for (i = 0; i < output_height; i++)
    101     {
    102         for (j = 0; j < output_width; j++)
    103         {
    104             /* Apply filter */
    105             Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
    106                    ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
    107                    ((int)src_ptr[0]                 * vp8_filter[2]) +
    108                    ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
    109                    ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
    110                    ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
    111                    (VP8_FILTER_WEIGHT >> 1);   /* Rounding */
    112 
    113             /* Normalize back to 0-255 */
    114             Temp = Temp >> VP8_FILTER_SHIFT;
    115 
    116             if (Temp < 0)
    117                 Temp = 0;
    118             else if (Temp > 255)
    119                 Temp = 255;
    120 
    121             output_ptr[j] = (unsigned char)Temp;
    122             src_ptr++;
    123         }
    124 
    125         /* Start next row */
    126         src_ptr    += src_pixels_per_line - output_width;
    127         output_ptr += output_pitch;
    128     }
    129 }
    130 
    131 
    132 static void filter_block2d
    133 (
    134     unsigned char  *src_ptr,
    135     unsigned char  *output_ptr,
    136     unsigned int src_pixels_per_line,
    137     int output_pitch,
    138     const short  *HFilter,
    139     const short  *VFilter
    140 )
    141 {
    142     int FData[9*4]; /* Temp data buffer used in filtering */
    143 
    144     /* First filter 1-D horizontally... */
    145     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
    146 
    147     /* then filter verticaly... */
    148     filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
    149 }
    150 
    151 
    152 void vp8_sixtap_predict_c
    153 (
    154     unsigned char  *src_ptr,
    155     int   src_pixels_per_line,
    156     int  xoffset,
    157     int  yoffset,
    158     unsigned char *dst_ptr,
    159     int dst_pitch
    160 )
    161 {
    162     const short  *HFilter;
    163     const short  *VFilter;
    164 
    165     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
    166     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
    167 
    168     filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
    169 }
    170 void vp8_sixtap_predict8x8_c
    171 (
    172     unsigned char  *src_ptr,
    173     int  src_pixels_per_line,
    174     int  xoffset,
    175     int  yoffset,
    176     unsigned char *dst_ptr,
    177     int  dst_pitch
    178 )
    179 {
    180     const short  *HFilter;
    181     const short  *VFilter;
    182     int FData[13*16];   /* Temp data buffer used in filtering */
    183 
    184     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
    185     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
    186 
    187     /* First filter 1-D horizontally... */
    188     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
    189 
    190 
    191     /* then filter verticaly... */
    192     filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
    193 
    194 }
    195 
    196 void vp8_sixtap_predict8x4_c
    197 (
    198     unsigned char  *src_ptr,
    199     int  src_pixels_per_line,
    200     int  xoffset,
    201     int  yoffset,
    202     unsigned char *dst_ptr,
    203     int  dst_pitch
    204 )
    205 {
    206     const short  *HFilter;
    207     const short  *VFilter;
    208     int FData[13*16];   /* Temp data buffer used in filtering */
    209 
    210     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
    211     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
    212 
    213     /* First filter 1-D horizontally... */
    214     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
    215 
    216 
    217     /* then filter verticaly... */
    218     filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
    219 
    220 }
    221 
    222 void vp8_sixtap_predict16x16_c
    223 (
    224     unsigned char  *src_ptr,
    225     int  src_pixels_per_line,
    226     int  xoffset,
    227     int  yoffset,
    228     unsigned char *dst_ptr,
    229     int  dst_pitch
    230 )
    231 {
    232     const short  *HFilter;
    233     const short  *VFilter;
    234     int FData[21*24];   /* Temp data buffer used in filtering */
    235 
    236 
    237     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
    238     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
    239 
    240     /* First filter 1-D horizontally... */
    241     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
    242 
    243     /* then filter verticaly... */
    244     filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
    245 
    246 }
    247 
    248 
    249 /****************************************************************************
    250  *
    251  *  ROUTINE       : filter_block2d_bil_first_pass
    252  *
    253  *  INPUTS        : UINT8  *src_ptr    : Pointer to source block.
    254  *                  UINT32  src_stride : Stride of source block.
    255  *                  UINT32  height     : Block height.
    256  *                  UINT32  width      : Block width.
    257  *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps.
    258  *
    259  *  OUTPUTS       : INT32  *dst_ptr    : Pointer to filtered block.
    260  *
    261  *  RETURNS       : void
    262  *
    263  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block
    264  *                  in the horizontal direction to produce the filtered output
    265  *                  block. Used to implement first-pass of 2-D separable filter.
    266  *
    267  *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
    268  *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
    269  *
    270  ****************************************************************************/
    271 static void filter_block2d_bil_first_pass
    272 (
    273     unsigned char  *src_ptr,
    274     unsigned short *dst_ptr,
    275     unsigned int    src_stride,
    276     unsigned int    height,
    277     unsigned int    width,
    278     const short    *vp8_filter
    279 )
    280 {
    281     unsigned int i, j;
    282 
    283     for (i = 0; i < height; i++)
    284     {
    285         for (j = 0; j < width; j++)
    286         {
    287             /* Apply bilinear filter */
    288             dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
    289                           ((int)src_ptr[1] * vp8_filter[1]) +
    290                           (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
    291             src_ptr++;
    292         }
    293 
    294         /* Next row... */
    295         src_ptr += src_stride - width;
    296         dst_ptr += width;
    297     }
    298 }
    299 
    300 /****************************************************************************
    301  *
    302  *  ROUTINE       : filter_block2d_bil_second_pass
    303  *
    304  *  INPUTS        : INT32  *src_ptr    : Pointer to source block.
    305  *                  UINT32  dst_pitch  : Destination block pitch.
    306  *                  UINT32  height     : Block height.
    307  *                  UINT32  width      : Block width.
    308  *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps.
    309  *
    310  *  OUTPUTS       : UINT16 *dst_ptr    : Pointer to filtered block.
    311  *
    312  *  RETURNS       : void
    313  *
    314  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block
    315  *                  in the vertical direction to produce the filtered output
    316  *                  block. Used to implement second-pass of 2-D separable filter.
    317  *
    318  *  SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
    319  *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
    320  *
    321  ****************************************************************************/
    322 static void filter_block2d_bil_second_pass
    323 (
    324     unsigned short *src_ptr,
    325     unsigned char  *dst_ptr,
    326     int             dst_pitch,
    327     unsigned int    height,
    328     unsigned int    width,
    329     const short    *vp8_filter
    330 )
    331 {
    332     unsigned int  i, j;
    333     int  Temp;
    334 
    335     for (i = 0; i < height; i++)
    336     {
    337         for (j = 0; j < width; j++)
    338         {
    339             /* Apply filter */
    340             Temp = ((int)src_ptr[0]     * vp8_filter[0]) +
    341                    ((int)src_ptr[width] * vp8_filter[1]) +
    342                    (VP8_FILTER_WEIGHT / 2);
    343             dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
    344             src_ptr++;
    345         }
    346 
    347         /* Next row... */
    348         dst_ptr += dst_pitch;
    349     }
    350 }
    351 
    352 
    353 /****************************************************************************
    354  *
    355  *  ROUTINE       : filter_block2d_bil
    356  *
    357  *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.
    358  *                  UINT32  src_pitch        : Stride of source block.
    359  *                  UINT32  dst_pitch        : Stride of destination block.
    360  *                  INT32  *HFilter          : Array of 2 horizontal filter taps.
    361  *                  INT32  *VFilter          : Array of 2 vertical filter taps.
    362  *                  INT32  Width             : Block width
    363  *                  INT32  Height            : Block height
    364  *
    365  *  OUTPUTS       : UINT16 *dst_ptr       : Pointer to filtered block.
    366  *
    367  *  RETURNS       : void
    368  *
    369  *  FUNCTION      : 2-D filters an input block by applying a 2-tap
    370  *                  bi-linear filter horizontally followed by a 2-tap
    371  *                  bi-linear filter vertically on the result.
    372  *
    373  *  SPECIAL NOTES : The largest block size can be handled here is 16x16
    374  *
    375  ****************************************************************************/
    376 static void filter_block2d_bil
    377 (
    378     unsigned char *src_ptr,
    379     unsigned char *dst_ptr,
    380     unsigned int   src_pitch,
    381     unsigned int   dst_pitch,
    382     const short   *HFilter,
    383     const short   *VFilter,
    384     int            Width,
    385     int            Height
    386 )
    387 {
    388 
    389     unsigned short FData[17*16];    /* Temp data buffer used in filtering */
    390 
    391     /* First filter 1-D horizontally... */
    392     filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
    393 
    394     /* then 1-D vertically... */
    395     filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
    396 }
    397 
    398 
    399 void vp8_bilinear_predict4x4_c
    400 (
    401     unsigned char  *src_ptr,
    402     int   src_pixels_per_line,
    403     int  xoffset,
    404     int  yoffset,
    405     unsigned char *dst_ptr,
    406     int dst_pitch
    407 )
    408 {
    409     const short *HFilter;
    410     const short *VFilter;
    411 
    412     HFilter = vp8_bilinear_filters[xoffset];
    413     VFilter = vp8_bilinear_filters[yoffset];
    414 #if 0
    415     {
    416         int i;
    417         unsigned char temp1[16];
    418         unsigned char temp2[16];
    419 
    420         bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
    421         filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
    422 
    423         for (i = 0; i < 16; i++)
    424         {
    425             if (temp1[i] != temp2[i])
    426             {
    427                 bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
    428                 filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
    429             }
    430         }
    431     }
    432 #endif
    433     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
    434 
    435 }
    436 
    437 void vp8_bilinear_predict8x8_c
    438 (
    439     unsigned char  *src_ptr,
    440     int  src_pixels_per_line,
    441     int  xoffset,
    442     int  yoffset,
    443     unsigned char *dst_ptr,
    444     int  dst_pitch
    445 )
    446 {
    447     const short *HFilter;
    448     const short *VFilter;
    449 
    450     HFilter = vp8_bilinear_filters[xoffset];
    451     VFilter = vp8_bilinear_filters[yoffset];
    452 
    453     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
    454 
    455 }
    456 
    457 void vp8_bilinear_predict8x4_c
    458 (
    459     unsigned char  *src_ptr,
    460     int  src_pixels_per_line,
    461     int  xoffset,
    462     int  yoffset,
    463     unsigned char *dst_ptr,
    464     int  dst_pitch
    465 )
    466 {
    467     const short *HFilter;
    468     const short *VFilter;
    469 
    470     HFilter = vp8_bilinear_filters[xoffset];
    471     VFilter = vp8_bilinear_filters[yoffset];
    472 
    473     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
    474 
    475 }
    476 
    477 void vp8_bilinear_predict16x16_c
    478 (
    479     unsigned char  *src_ptr,
    480     int  src_pixels_per_line,
    481     int  xoffset,
    482     int  yoffset,
    483     unsigned char *dst_ptr,
    484     int  dst_pitch
    485 )
    486 {
    487     const short *HFilter;
    488     const short *VFilter;
    489 
    490     HFilter = vp8_bilinear_filters[xoffset];
    491     VFilter = vp8_bilinear_filters[yoffset];
    492 
    493     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
    494 }
    495