Home | History | Annotate | Download | only in vpx_dsp
      1 /*
      2  *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <assert.h>
     12 #include <string.h>
     13 
     14 #include "./vpx_config.h"
     15 #include "./vpx_dsp_rtcd.h"
     16 #include "vpx/vpx_integer.h"
     17 #include "vpx_dsp/vpx_convolve.h"
     18 #include "vpx_dsp/vpx_dsp_common.h"
     19 #include "vpx_dsp/vpx_filter.h"
     20 #include "vpx_ports/mem.h"
     21 
     22 static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
     23                            uint8_t *dst, ptrdiff_t dst_stride,
     24                            const InterpKernel *x_filters,
     25                            int x0_q4, int x_step_q4, int w, int h) {
     26   int x, y;
     27   src -= SUBPEL_TAPS / 2 - 1;
     28   for (y = 0; y < h; ++y) {
     29     int x_q4 = x0_q4;
     30     for (x = 0; x < w; ++x) {
     31       const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
     32       const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
     33       int k, sum = 0;
     34       for (k = 0; k < SUBPEL_TAPS; ++k)
     35         sum += src_x[k] * x_filter[k];
     36       dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
     37       x_q4 += x_step_q4;
     38     }
     39     src += src_stride;
     40     dst += dst_stride;
     41   }
     42 }
     43 
     44 static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
     45                                uint8_t *dst, ptrdiff_t dst_stride,
     46                                const InterpKernel *x_filters,
     47                                int x0_q4, int x_step_q4, int w, int h) {
     48   int x, y;
     49   src -= SUBPEL_TAPS / 2 - 1;
     50   for (y = 0; y < h; ++y) {
     51     int x_q4 = x0_q4;
     52     for (x = 0; x < w; ++x) {
     53       const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
     54       const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
     55       int k, sum = 0;
     56       for (k = 0; k < SUBPEL_TAPS; ++k)
     57         sum += src_x[k] * x_filter[k];
     58       dst[x] = ROUND_POWER_OF_TWO(dst[x] +
     59           clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
     60       x_q4 += x_step_q4;
     61     }
     62     src += src_stride;
     63     dst += dst_stride;
     64   }
     65 }
     66 
     67 static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
     68                           uint8_t *dst, ptrdiff_t dst_stride,
     69                           const InterpKernel *y_filters,
     70                           int y0_q4, int y_step_q4, int w, int h) {
     71   int x, y;
     72   src -= src_stride * (SUBPEL_TAPS / 2 - 1);
     73 
     74   for (x = 0; x < w; ++x) {
     75     int y_q4 = y0_q4;
     76     for (y = 0; y < h; ++y) {
     77       const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
     78       const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
     79       int k, sum = 0;
     80       for (k = 0; k < SUBPEL_TAPS; ++k)
     81         sum += src_y[k * src_stride] * y_filter[k];
     82       dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
     83       y_q4 += y_step_q4;
     84     }
     85     ++src;
     86     ++dst;
     87   }
     88 }
     89 
     90 static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
     91                               uint8_t *dst, ptrdiff_t dst_stride,
     92                               const InterpKernel *y_filters,
     93                               int y0_q4, int y_step_q4, int w, int h) {
     94   int x, y;
     95   src -= src_stride * (SUBPEL_TAPS / 2 - 1);
     96 
     97   for (x = 0; x < w; ++x) {
     98     int y_q4 = y0_q4;
     99     for (y = 0; y < h; ++y) {
    100       const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
    101       const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
    102       int k, sum = 0;
    103       for (k = 0; k < SUBPEL_TAPS; ++k)
    104         sum += src_y[k * src_stride] * y_filter[k];
    105       dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
    106           clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
    107       y_q4 += y_step_q4;
    108     }
    109     ++src;
    110     ++dst;
    111   }
    112 }
    113 
    114 static void convolve(const uint8_t *src, ptrdiff_t src_stride,
    115                      uint8_t *dst, ptrdiff_t dst_stride,
    116                      const InterpKernel *const x_filters,
    117                      int x0_q4, int x_step_q4,
    118                      const InterpKernel *const y_filters,
    119                      int y0_q4, int y_step_q4,
    120                      int w, int h) {
    121   // Note: Fixed size intermediate buffer, temp, places limits on parameters.
    122   // 2d filtering proceeds in 2 steps:
    123   //   (1) Interpolate horizontally into an intermediate buffer, temp.
    124   //   (2) Interpolate temp vertically to derive the sub-pixel result.
    125   // Deriving the maximum number of rows in the temp buffer (135):
    126   // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
    127   // --Largest block size is 64x64 pixels.
    128   // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
    129   //   original frame (in 1/16th pixel units).
    130   // --Must round-up because block may be located at sub-pixel position.
    131   // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
    132   // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
    133   uint8_t temp[135 * 64];
    134   int intermediate_height =
    135           (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
    136 
    137   assert(w <= 64);
    138   assert(h <= 64);
    139   assert(y_step_q4 <= 32);
    140   assert(x_step_q4 <= 32);
    141 
    142   convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
    143                  x_filters, x0_q4, x_step_q4, w, intermediate_height);
    144   convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
    145                 y_filters, y0_q4, y_step_q4, w, h);
    146 }
    147 
    148 static const InterpKernel *get_filter_base(const int16_t *filter) {
    149   // NOTE: This assumes that the filter table is 256-byte aligned.
    150   // TODO(agrange) Modify to make independent of table alignment.
    151   return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
    152 }
    153 
    154 static int get_filter_offset(const int16_t *f, const InterpKernel *base) {
    155   return (int)((const InterpKernel *)(intptr_t)f - base);
    156 }
    157 
    158 void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
    159                            uint8_t *dst, ptrdiff_t dst_stride,
    160                            const int16_t *filter_x, int x_step_q4,
    161                            const int16_t *filter_y, int y_step_q4,
    162                            int w, int h) {
    163   const InterpKernel *const filters_x = get_filter_base(filter_x);
    164   const int x0_q4 = get_filter_offset(filter_x, filters_x);
    165 
    166   (void)filter_y;
    167   (void)y_step_q4;
    168 
    169   convolve_horiz(src, src_stride, dst, dst_stride, filters_x,
    170                  x0_q4, x_step_q4, w, h);
    171 }
    172 
    173 void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
    174                                uint8_t *dst, ptrdiff_t dst_stride,
    175                                const int16_t *filter_x, int x_step_q4,
    176                                const int16_t *filter_y, int y_step_q4,
    177                                int w, int h) {
    178   const InterpKernel *const filters_x = get_filter_base(filter_x);
    179   const int x0_q4 = get_filter_offset(filter_x, filters_x);
    180 
    181   (void)filter_y;
    182   (void)y_step_q4;
    183 
    184   convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,
    185                      x0_q4, x_step_q4, w, h);
    186 }
    187 
    188 void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
    189                           uint8_t *dst, ptrdiff_t dst_stride,
    190                           const int16_t *filter_x, int x_step_q4,
    191                           const int16_t *filter_y, int y_step_q4,
    192                           int w, int h) {
    193   const InterpKernel *const filters_y = get_filter_base(filter_y);
    194   const int y0_q4 = get_filter_offset(filter_y, filters_y);
    195 
    196   (void)filter_x;
    197   (void)x_step_q4;
    198 
    199   convolve_vert(src, src_stride, dst, dst_stride, filters_y,
    200                 y0_q4, y_step_q4, w, h);
    201 }
    202 
    203 void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
    204                               uint8_t *dst, ptrdiff_t dst_stride,
    205                               const int16_t *filter_x, int x_step_q4,
    206                               const int16_t *filter_y, int y_step_q4,
    207                               int w, int h) {
    208   const InterpKernel *const filters_y = get_filter_base(filter_y);
    209   const int y0_q4 = get_filter_offset(filter_y, filters_y);
    210 
    211   (void)filter_x;
    212   (void)x_step_q4;
    213 
    214   convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,
    215                     y0_q4, y_step_q4, w, h);
    216 }
    217 
    218 void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
    219                      uint8_t *dst, ptrdiff_t dst_stride,
    220                      const int16_t *filter_x, int x_step_q4,
    221                      const int16_t *filter_y, int y_step_q4,
    222                      int w, int h) {
    223   const InterpKernel *const filters_x = get_filter_base(filter_x);
    224   const int x0_q4 = get_filter_offset(filter_x, filters_x);
    225 
    226   const InterpKernel *const filters_y = get_filter_base(filter_y);
    227   const int y0_q4 = get_filter_offset(filter_y, filters_y);
    228 
    229   convolve(src, src_stride, dst, dst_stride,
    230            filters_x, x0_q4, x_step_q4,
    231            filters_y, y0_q4, y_step_q4, w, h);
    232 }
    233 
    234 void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
    235                          uint8_t *dst, ptrdiff_t dst_stride,
    236                          const int16_t *filter_x, int x_step_q4,
    237                          const int16_t *filter_y, int y_step_q4,
    238                          int w, int h) {
    239   /* Fixed size intermediate buffer places limits on parameters. */
    240   DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]);
    241   assert(w <= 64);
    242   assert(h <= 64);
    243 
    244   vpx_convolve8_c(src, src_stride, temp, 64,
    245                   filter_x, x_step_q4, filter_y, y_step_q4, w, h);
    246   vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
    247 }
    248 
    249 void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
    250                          uint8_t *dst, ptrdiff_t dst_stride,
    251                          const int16_t *filter_x, int filter_x_stride,
    252                          const int16_t *filter_y, int filter_y_stride,
    253                          int w, int h) {
    254   int r;
    255 
    256   (void)filter_x;  (void)filter_x_stride;
    257   (void)filter_y;  (void)filter_y_stride;
    258 
    259   for (r = h; r > 0; --r) {
    260     memcpy(dst, src, w);
    261     src += src_stride;
    262     dst += dst_stride;
    263   }
    264 }
    265 
    266 void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
    267                         uint8_t *dst, ptrdiff_t dst_stride,
    268                         const int16_t *filter_x, int filter_x_stride,
    269                         const int16_t *filter_y, int filter_y_stride,
    270                         int w, int h) {
    271   int x, y;
    272 
    273   (void)filter_x;  (void)filter_x_stride;
    274   (void)filter_y;  (void)filter_y_stride;
    275 
    276   for (y = 0; y < h; ++y) {
    277     for (x = 0; x < w; ++x)
    278       dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
    279 
    280     src += src_stride;
    281     dst += dst_stride;
    282   }
    283 }
    284 
    285 void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
    286                         uint8_t *dst, ptrdiff_t dst_stride,
    287                         const int16_t *filter_x, int x_step_q4,
    288                         const int16_t *filter_y, int y_step_q4,
    289                         int w, int h) {
    290   vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
    291                         filter_y, y_step_q4, w, h);
    292 }
    293 
    294 void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride,
    295                        uint8_t *dst, ptrdiff_t dst_stride,
    296                        const int16_t *filter_x, int x_step_q4,
    297                        const int16_t *filter_y, int y_step_q4,
    298                        int w, int h) {
    299   vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
    300                        filter_y, y_step_q4, w, h);
    301 }
    302 
    303 void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride,
    304                      uint8_t *dst, ptrdiff_t dst_stride,
    305                      const int16_t *filter_x, int x_step_q4,
    306                      const int16_t *filter_y, int y_step_q4,
    307                      int w, int h) {
    308   vpx_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
    309                   filter_y, y_step_q4, w, h);
    310 }
    311 
    312 void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
    313                             uint8_t *dst, ptrdiff_t dst_stride,
    314                             const int16_t *filter_x, int x_step_q4,
    315                             const int16_t *filter_y, int y_step_q4,
    316                             int w, int h) {
    317   vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
    318                             x_step_q4, filter_y, y_step_q4, w, h);
    319 }
    320 
    321 void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
    322                            uint8_t *dst, ptrdiff_t dst_stride,
    323                            const int16_t *filter_x, int x_step_q4,
    324                            const int16_t *filter_y, int y_step_q4,
    325                            int w, int h) {
    326   vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
    327                            x_step_q4, filter_y, y_step_q4, w, h);
    328 }
    329 
    330 void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride,
    331                      uint8_t *dst, ptrdiff_t dst_stride,
    332                      const int16_t *filter_x, int x_step_q4,
    333                      const int16_t *filter_y, int y_step_q4,
    334                      int w, int h) {
    335   vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
    336                       filter_y, y_step_q4, w, h);
    337 }
    338 
    339 #if CONFIG_VP9_HIGHBITDEPTH
    340 static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
    341                                   uint8_t *dst8, ptrdiff_t dst_stride,
    342                                   const InterpKernel *x_filters,
    343                                   int x0_q4, int x_step_q4,
    344                                   int w, int h, int bd) {
    345   int x, y;
    346   uint16_t *src = CONVERT_TO_SHORTPTR(src8);
    347   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
    348   src -= SUBPEL_TAPS / 2 - 1;
    349   for (y = 0; y < h; ++y) {
    350     int x_q4 = x0_q4;
    351     for (x = 0; x < w; ++x) {
    352       const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
    353       const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
    354       int k, sum = 0;
    355       for (k = 0; k < SUBPEL_TAPS; ++k)
    356         sum += src_x[k] * x_filter[k];
    357       dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
    358       x_q4 += x_step_q4;
    359     }
    360     src += src_stride;
    361     dst += dst_stride;
    362   }
    363 }
    364 
    365 static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,
    366                                       uint8_t *dst8, ptrdiff_t dst_stride,
    367                                       const InterpKernel *x_filters,
    368                                       int x0_q4, int x_step_q4,
    369                                       int w, int h, int bd) {
    370   int x, y;
    371   uint16_t *src = CONVERT_TO_SHORTPTR(src8);
    372   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
    373   src -= SUBPEL_TAPS / 2 - 1;
    374   for (y = 0; y < h; ++y) {
    375     int x_q4 = x0_q4;
    376     for (x = 0; x < w; ++x) {
    377       const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
    378       const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
    379       int k, sum = 0;
    380       for (k = 0; k < SUBPEL_TAPS; ++k)
    381         sum += src_x[k] * x_filter[k];
    382       dst[x] = ROUND_POWER_OF_TWO(dst[x] +
    383           clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);
    384       x_q4 += x_step_q4;
    385     }
    386     src += src_stride;
    387     dst += dst_stride;
    388   }
    389 }
    390 
    391 static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,
    392                                  uint8_t *dst8, ptrdiff_t dst_stride,
    393                                  const InterpKernel *y_filters,
    394                                  int y0_q4, int y_step_q4, int w, int h,
    395                                  int bd) {
    396   int x, y;
    397   uint16_t *src = CONVERT_TO_SHORTPTR(src8);
    398   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
    399   src -= src_stride * (SUBPEL_TAPS / 2 - 1);
    400   for (x = 0; x < w; ++x) {
    401     int y_q4 = y0_q4;
    402     for (y = 0; y < h; ++y) {
    403       const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
    404       const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
    405       int k, sum = 0;
    406       for (k = 0; k < SUBPEL_TAPS; ++k)
    407         sum += src_y[k * src_stride] * y_filter[k];
    408       dst[y * dst_stride] = clip_pixel_highbd(
    409           ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
    410       y_q4 += y_step_q4;
    411     }
    412     ++src;
    413     ++dst;
    414   }
    415 }
    416 
    417 static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,
    418                                      uint8_t *dst8, ptrdiff_t dst_stride,
    419                                      const InterpKernel *y_filters,
    420                                      int y0_q4, int y_step_q4, int w, int h,
    421                                      int bd) {
    422   int x, y;
    423   uint16_t *src = CONVERT_TO_SHORTPTR(src8);
    424   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
    425   src -= src_stride * (SUBPEL_TAPS / 2 - 1);
    426   for (x = 0; x < w; ++x) {
    427     int y_q4 = y0_q4;
    428     for (y = 0; y < h; ++y) {
    429       const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
    430       const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
    431       int k, sum = 0;
    432       for (k = 0; k < SUBPEL_TAPS; ++k)
    433         sum += src_y[k * src_stride] * y_filter[k];
    434       dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
    435           clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);
    436       y_q4 += y_step_q4;
    437     }
    438     ++src;
    439     ++dst;
    440   }
    441 }
    442 
    443 static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,
    444                             uint8_t *dst, ptrdiff_t dst_stride,
    445                             const InterpKernel *const x_filters,
    446                             int x0_q4, int x_step_q4,
    447                             const InterpKernel *const y_filters,
    448                             int y0_q4, int y_step_q4,
    449                             int w, int h, int bd) {
    450   // Note: Fixed size intermediate buffer, temp, places limits on parameters.
    451   // 2d filtering proceeds in 2 steps:
    452   //   (1) Interpolate horizontally into an intermediate buffer, temp.
    453   //   (2) Interpolate temp vertically to derive the sub-pixel result.
    454   // Deriving the maximum number of rows in the temp buffer (135):
    455   // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
    456   // --Largest block size is 64x64 pixels.
    457   // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
    458   //   original frame (in 1/16th pixel units).
    459   // --Must round-up because block may be located at sub-pixel position.
    460   // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
    461   // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
    462   uint16_t temp[64 * 135];
    463   int intermediate_height =
    464           (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
    465 
    466   assert(w <= 64);
    467   assert(h <= 64);
    468   assert(y_step_q4 <= 32);
    469   assert(x_step_q4 <= 32);
    470 
    471   highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1),
    472                         src_stride, CONVERT_TO_BYTEPTR(temp), 64,
    473                         x_filters, x0_q4, x_step_q4, w,
    474                         intermediate_height, bd);
    475   highbd_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1),
    476                        64, dst, dst_stride, y_filters, y0_q4, y_step_q4,
    477                        w, h, bd);
    478 }
    479 
    480 
    481 void vpx_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
    482                                   uint8_t *dst, ptrdiff_t dst_stride,
    483                                   const int16_t *filter_x, int x_step_q4,
    484                                   const int16_t *filter_y, int y_step_q4,
    485                                   int w, int h, int bd) {
    486   const InterpKernel *const filters_x = get_filter_base(filter_x);
    487   const int x0_q4 = get_filter_offset(filter_x, filters_x);
    488   (void)filter_y;
    489   (void)y_step_q4;
    490 
    491   highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x,
    492                         x0_q4, x_step_q4, w, h, bd);
    493 }
    494 
    495 void vpx_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
    496                                       uint8_t *dst, ptrdiff_t dst_stride,
    497                                       const int16_t *filter_x, int x_step_q4,
    498                                       const int16_t *filter_y, int y_step_q4,
    499                                       int w, int h, int bd) {
    500   const InterpKernel *const filters_x = get_filter_base(filter_x);
    501   const int x0_q4 = get_filter_offset(filter_x, filters_x);
    502   (void)filter_y;
    503   (void)y_step_q4;
    504 
    505   highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,
    506                             x0_q4, x_step_q4, w, h, bd);
    507 }
    508 
    509 void vpx_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
    510                                  uint8_t *dst, ptrdiff_t dst_stride,
    511                                  const int16_t *filter_x, int x_step_q4,
    512                                  const int16_t *filter_y, int y_step_q4,
    513                                  int w, int h, int bd) {
    514   const InterpKernel *const filters_y = get_filter_base(filter_y);
    515   const int y0_q4 = get_filter_offset(filter_y, filters_y);
    516   (void)filter_x;
    517   (void)x_step_q4;
    518 
    519   highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y,
    520                        y0_q4, y_step_q4, w, h, bd);
    521 }
    522 
    523 void vpx_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
    524                                      uint8_t *dst, ptrdiff_t dst_stride,
    525                                      const int16_t *filter_x, int x_step_q4,
    526                                      const int16_t *filter_y, int y_step_q4,
    527                                      int w, int h, int bd) {
    528   const InterpKernel *const filters_y = get_filter_base(filter_y);
    529   const int y0_q4 = get_filter_offset(filter_y, filters_y);
    530   (void)filter_x;
    531   (void)x_step_q4;
    532 
    533   highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,
    534                            y0_q4, y_step_q4, w, h, bd);
    535 }
    536 
    537 void vpx_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
    538                             uint8_t *dst, ptrdiff_t dst_stride,
    539                             const int16_t *filter_x, int x_step_q4,
    540                             const int16_t *filter_y, int y_step_q4,
    541                             int w, int h, int bd) {
    542   const InterpKernel *const filters_x = get_filter_base(filter_x);
    543   const int x0_q4 = get_filter_offset(filter_x, filters_x);
    544 
    545   const InterpKernel *const filters_y = get_filter_base(filter_y);
    546   const int y0_q4 = get_filter_offset(filter_y, filters_y);
    547 
    548   highbd_convolve(src, src_stride, dst, dst_stride,
    549                   filters_x, x0_q4, x_step_q4,
    550                   filters_y, y0_q4, y_step_q4, w, h, bd);
    551 }
    552 
    553 void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
    554                                 uint8_t *dst, ptrdiff_t dst_stride,
    555                                 const int16_t *filter_x, int x_step_q4,
    556                                 const int16_t *filter_y, int y_step_q4,
    557                                 int w, int h, int bd) {
    558   // Fixed size intermediate buffer places limits on parameters.
    559   DECLARE_ALIGNED(16, uint16_t, temp[64 * 64]);
    560   assert(w <= 64);
    561   assert(h <= 64);
    562 
    563   vpx_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64,
    564                          filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);
    565   vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride,
    566                             NULL, 0, NULL, 0, w, h, bd);
    567 }
    568 
    569 void vpx_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
    570                                 uint8_t *dst8, ptrdiff_t dst_stride,
    571                                 const int16_t *filter_x, int filter_x_stride,
    572                                 const int16_t *filter_y, int filter_y_stride,
    573                                 int w, int h, int bd) {
    574   int r;
    575   uint16_t *src = CONVERT_TO_SHORTPTR(src8);
    576   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
    577   (void)filter_x;
    578   (void)filter_y;
    579   (void)filter_x_stride;
    580   (void)filter_y_stride;
    581   (void)bd;
    582 
    583   for (r = h; r > 0; --r) {
    584     memcpy(dst, src, w * sizeof(uint16_t));
    585     src += src_stride;
    586     dst += dst_stride;
    587   }
    588 }
    589 
    590 void vpx_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,
    591                                uint8_t *dst8, ptrdiff_t dst_stride,
    592                                const int16_t *filter_x, int filter_x_stride,
    593                                const int16_t *filter_y, int filter_y_stride,
    594                                int w, int h, int bd) {
    595   int x, y;
    596   uint16_t *src = CONVERT_TO_SHORTPTR(src8);
    597   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
    598   (void)filter_x;
    599   (void)filter_y;
    600   (void)filter_x_stride;
    601   (void)filter_y_stride;
    602   (void)bd;
    603 
    604   for (y = 0; y < h; ++y) {
    605     for (x = 0; x < w; ++x) {
    606       dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
    607     }
    608     src += src_stride;
    609     dst += dst_stride;
    610   }
    611 }
    612 #endif
    613