Home | History | Annotate | Download | only in common
      1 /*
      2  *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <assert.h>
     12 
     13 #include "./vpx_config.h"
     14 #include "./vp9_rtcd.h"
     15 #include "vp9/common/vp9_common.h"
     16 #include "vp9/common/vp9_convolve.h"
     17 #include "vp9/common/vp9_filter.h"
     18 #include "vpx/vpx_integer.h"
     19 #include "vpx_ports/mem.h"
     20 
     21 static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
     22                              uint8_t *dst, ptrdiff_t dst_stride,
     23                              const int16_t *filter_x0, int x_step_q4,
     24                              const int16_t *filter_y, int y_step_q4,
     25                              int w, int h, int taps) {
     26   int x, y, k;
     27 
     28   /* NOTE: This assumes that the filter table is 256-byte aligned. */
     29   /* TODO(agrange) Modify to make independent of table alignment. */
     30   const int16_t *const filter_x_base =
     31       (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
     32 
     33   /* Adjust base pointer address for this source line */
     34   src -= taps / 2 - 1;
     35 
     36   for (y = 0; y < h; ++y) {
     37     /* Initial phase offset */
     38     int x_q4 = (int)(filter_x0 - filter_x_base) / taps;
     39 
     40     for (x = 0; x < w; ++x) {
     41       /* Per-pixel src offset */
     42       const int src_x = x_q4 >> SUBPEL_BITS;
     43       int sum = 0;
     44 
     45       /* Pointer to filter to use */
     46       const int16_t *const filter_x = filter_x_base +
     47           (x_q4 & SUBPEL_MASK) * taps;
     48 
     49       for (k = 0; k < taps; ++k)
     50         sum += src[src_x + k] * filter_x[k];
     51 
     52       dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
     53 
     54       /* Move to the next source pixel */
     55       x_q4 += x_step_q4;
     56     }
     57     src += src_stride;
     58     dst += dst_stride;
     59   }
     60 }
     61 
     62 static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
     63                                  uint8_t *dst, ptrdiff_t dst_stride,
     64                                  const int16_t *filter_x0, int x_step_q4,
     65                                  const int16_t *filter_y, int y_step_q4,
     66                                  int w, int h, int taps) {
     67   int x, y, k;
     68 
     69   /* NOTE: This assumes that the filter table is 256-byte aligned. */
     70   /* TODO(agrange) Modify to make independent of table alignment. */
     71   const int16_t *const filter_x_base =
     72       (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
     73 
     74   /* Adjust base pointer address for this source line */
     75   src -= taps / 2 - 1;
     76 
     77   for (y = 0; y < h; ++y) {
     78     /* Initial phase offset */
     79     int x_q4 = (int)(filter_x0 - filter_x_base) / taps;
     80 
     81     for (x = 0; x < w; ++x) {
     82       /* Per-pixel src offset */
     83       const int src_x = x_q4 >> SUBPEL_BITS;
     84       int sum = 0;
     85 
     86       /* Pointer to filter to use */
     87       const int16_t *const filter_x = filter_x_base +
     88           (x_q4 & SUBPEL_MASK) * taps;
     89 
     90       for (k = 0; k < taps; ++k)
     91         sum += src[src_x + k] * filter_x[k];
     92 
     93       dst[x] = ROUND_POWER_OF_TWO(dst[x] +
     94                    clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
     95 
     96       /* Move to the next source pixel */
     97       x_q4 += x_step_q4;
     98     }
     99     src += src_stride;
    100     dst += dst_stride;
    101   }
    102 }
    103 
    104 static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride,
    105                             uint8_t *dst, ptrdiff_t dst_stride,
    106                             const int16_t *filter_x, int x_step_q4,
    107                             const int16_t *filter_y0, int y_step_q4,
    108                             int w, int h, int taps) {
    109   int x, y, k;
    110 
    111   /* NOTE: This assumes that the filter table is 256-byte aligned. */
    112   /* TODO(agrange) Modify to make independent of table alignment. */
    113   const int16_t *const filter_y_base =
    114       (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
    115 
    116   /* Adjust base pointer address for this source column */
    117   src -= src_stride * (taps / 2 - 1);
    118 
    119   for (x = 0; x < w; ++x) {
    120     /* Initial phase offset */
    121     int y_q4 = (int)(filter_y0 - filter_y_base) / taps;
    122 
    123     for (y = 0; y < h; ++y) {
    124       /* Per-pixel src offset */
    125       const int src_y = y_q4 >> SUBPEL_BITS;
    126       int sum = 0;
    127 
    128       /* Pointer to filter to use */
    129       const int16_t *const filter_y = filter_y_base +
    130           (y_q4 & SUBPEL_MASK) * taps;
    131 
    132       for (k = 0; k < taps; ++k)
    133         sum += src[(src_y + k) * src_stride] * filter_y[k];
    134 
    135       dst[y * dst_stride] =
    136           clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
    137 
    138       /* Move to the next source pixel */
    139       y_q4 += y_step_q4;
    140     }
    141     ++src;
    142     ++dst;
    143   }
    144 }
    145 
    146 static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
    147                                 uint8_t *dst, ptrdiff_t dst_stride,
    148                                 const int16_t *filter_x, int x_step_q4,
    149                                 const int16_t *filter_y0, int y_step_q4,
    150                                 int w, int h, int taps) {
    151   int x, y, k;
    152 
    153   /* NOTE: This assumes that the filter table is 256-byte aligned. */
    154   /* TODO(agrange) Modify to make independent of table alignment. */
    155   const int16_t *const filter_y_base =
    156       (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
    157 
    158   /* Adjust base pointer address for this source column */
    159   src -= src_stride * (taps / 2 - 1);
    160 
    161   for (x = 0; x < w; ++x) {
    162     /* Initial phase offset */
    163     int y_q4 = (int)(filter_y0 - filter_y_base) / taps;
    164 
    165     for (y = 0; y < h; ++y) {
    166       /* Per-pixel src offset */
    167       const int src_y = y_q4 >> SUBPEL_BITS;
    168       int sum = 0;
    169 
    170       /* Pointer to filter to use */
    171       const int16_t *const filter_y = filter_y_base +
    172           (y_q4 & SUBPEL_MASK) * taps;
    173 
    174       for (k = 0; k < taps; ++k)
    175         sum += src[(src_y + k) * src_stride] * filter_y[k];
    176 
    177       dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
    178            clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
    179 
    180       /* Move to the next source pixel */
    181       y_q4 += y_step_q4;
    182     }
    183     ++src;
    184     ++dst;
    185   }
    186 }
    187 
    188 static void convolve_c(const uint8_t *src, ptrdiff_t src_stride,
    189                        uint8_t *dst, ptrdiff_t dst_stride,
    190                        const int16_t *filter_x, int x_step_q4,
    191                        const int16_t *filter_y, int y_step_q4,
    192                        int w, int h, int taps) {
    193   /* Fixed size intermediate buffer places limits on parameters.
    194    * Maximum intermediate_height is 324, for y_step_q4 == 80,
    195    * h == 64, taps == 8.
    196    * y_step_q4 of 80 allows for 1/10 scale for 5 layer svc
    197    */
    198   uint8_t temp[64 * 324];
    199   int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + taps;
    200 
    201   assert(w <= 64);
    202   assert(h <= 64);
    203   assert(taps <= 8);
    204   assert(y_step_q4 <= 80);
    205   assert(x_step_q4 <= 80);
    206 
    207   if (intermediate_height < h)
    208     intermediate_height = h;
    209 
    210   convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 64,
    211                    filter_x, x_step_q4, filter_y, y_step_q4, w,
    212                    intermediate_height, taps);
    213   convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, filter_x,
    214                   x_step_q4, filter_y, y_step_q4, w, h, taps);
    215 }
    216 
    217 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
    218                            uint8_t *dst, ptrdiff_t dst_stride,
    219                            const int16_t *filter_x, int x_step_q4,
    220                            const int16_t *filter_y, int y_step_q4,
    221                            int w, int h) {
    222   convolve_horiz_c(src, src_stride, dst, dst_stride,
    223                    filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
    224 }
    225 
    226 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
    227                                uint8_t *dst, ptrdiff_t dst_stride,
    228                                const int16_t *filter_x, int x_step_q4,
    229                                const int16_t *filter_y, int y_step_q4,
    230                                int w, int h) {
    231   convolve_avg_horiz_c(src, src_stride, dst, dst_stride,
    232                        filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
    233 }
    234 
    235 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
    236                           uint8_t *dst, ptrdiff_t dst_stride,
    237                           const int16_t *filter_x, int x_step_q4,
    238                           const int16_t *filter_y, int y_step_q4,
    239                           int w, int h) {
    240   convolve_vert_c(src, src_stride, dst, dst_stride,
    241                   filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
    242 }
    243 
    244 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
    245                               uint8_t *dst, ptrdiff_t dst_stride,
    246                               const int16_t *filter_x, int x_step_q4,
    247                               const int16_t *filter_y, int y_step_q4,
    248                               int w, int h) {
    249   convolve_avg_vert_c(src, src_stride, dst, dst_stride,
    250                       filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
    251 }
    252 
    253 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
    254                      uint8_t *dst, ptrdiff_t dst_stride,
    255                      const int16_t *filter_x, int x_step_q4,
    256                      const int16_t *filter_y, int y_step_q4,
    257                      int w, int h) {
    258   convolve_c(src, src_stride, dst, dst_stride,
    259              filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
    260 }
    261 
    262 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
    263                          uint8_t *dst, ptrdiff_t dst_stride,
    264                          const int16_t *filter_x, int x_step_q4,
    265                          const int16_t *filter_y, int y_step_q4,
    266                          int w, int h) {
    267   /* Fixed size intermediate buffer places limits on parameters. */
    268   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64);
    269   assert(w <= 64);
    270   assert(h <= 64);
    271 
    272   vp9_convolve8(src, src_stride, temp, 64,
    273                filter_x, x_step_q4, filter_y, y_step_q4, w, h);
    274   vp9_convolve_avg(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
    275 }
    276 
    277 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
    278                          uint8_t *dst, ptrdiff_t dst_stride,
    279                          const int16_t *filter_x, int filter_x_stride,
    280                          const int16_t *filter_y, int filter_y_stride,
    281                          int w, int h) {
    282   int r;
    283 
    284   for (r = h; r > 0; --r) {
    285     vpx_memcpy(dst, src, w);
    286     src += src_stride;
    287     dst += dst_stride;
    288   }
    289 }
    290 
    291 void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
    292                         uint8_t *dst, ptrdiff_t dst_stride,
    293                         const int16_t *filter_x, int filter_x_stride,
    294                         const int16_t *filter_y, int filter_y_stride,
    295                         int w, int h) {
    296   int x, y;
    297 
    298   for (y = 0; y < h; ++y) {
    299     for (x = 0; x < w; ++x)
    300       dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
    301 
    302     src += src_stride;
    303     dst += dst_stride;
    304   }
    305 }
    306