Home | History | Annotate | Download | only in vpx_dsp
      1 /*
      2  *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <assert.h>
     12 #include <string.h>
     13 
     14 #include "./vpx_config.h"
     15 #include "./vpx_dsp_rtcd.h"
     16 #include "vpx/vpx_integer.h"
     17 #include "vpx_dsp/vpx_convolve.h"
     18 #include "vpx_dsp/vpx_dsp_common.h"
     19 #include "vpx_dsp/vpx_filter.h"
     20 #include "vpx_ports/mem.h"
     21 
     22 static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
     23                            uint8_t *dst, ptrdiff_t dst_stride,
     24                            const InterpKernel *x_filters, int x0_q4,
     25                            int x_step_q4, int w, int h) {
     26   int x, y;
     27   src -= SUBPEL_TAPS / 2 - 1;
     28 
     29   for (y = 0; y < h; ++y) {
     30     int x_q4 = x0_q4;
     31     for (x = 0; x < w; ++x) {
     32       const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
     33       const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
     34       int k, sum = 0;
     35       for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
     36       dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
     37       x_q4 += x_step_q4;
     38     }
     39     src += src_stride;
     40     dst += dst_stride;
     41   }
     42 }
     43 
     44 static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
     45                                uint8_t *dst, ptrdiff_t dst_stride,
     46                                const InterpKernel *x_filters, int x0_q4,
     47                                int x_step_q4, int w, int h) {
     48   int x, y;
     49   src -= SUBPEL_TAPS / 2 - 1;
     50 
     51   for (y = 0; y < h; ++y) {
     52     int x_q4 = x0_q4;
     53     for (x = 0; x < w; ++x) {
     54       const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
     55       const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
     56       int k, sum = 0;
     57       for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
     58       dst[x] = ROUND_POWER_OF_TWO(
     59           dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
     60       x_q4 += x_step_q4;
     61     }
     62     src += src_stride;
     63     dst += dst_stride;
     64   }
     65 }
     66 
     67 static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
     68                           uint8_t *dst, ptrdiff_t dst_stride,
     69                           const InterpKernel *y_filters, int y0_q4,
     70                           int y_step_q4, int w, int h) {
     71   int x, y;
     72   src -= src_stride * (SUBPEL_TAPS / 2 - 1);
     73 
     74   for (x = 0; x < w; ++x) {
     75     int y_q4 = y0_q4;
     76     for (y = 0; y < h; ++y) {
     77       const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
     78       const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
     79       int k, sum = 0;
     80       for (k = 0; k < SUBPEL_TAPS; ++k)
     81         sum += src_y[k * src_stride] * y_filter[k];
     82       dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
     83       y_q4 += y_step_q4;
     84     }
     85     ++src;
     86     ++dst;
     87   }
     88 }
     89 
     90 static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
     91                               uint8_t *dst, ptrdiff_t dst_stride,
     92                               const InterpKernel *y_filters, int y0_q4,
     93                               int y_step_q4, int w, int h) {
     94   int x, y;
     95   src -= src_stride * (SUBPEL_TAPS / 2 - 1);
     96 
     97   for (x = 0; x < w; ++x) {
     98     int y_q4 = y0_q4;
     99     for (y = 0; y < h; ++y) {
    100       const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
    101       const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
    102       int k, sum = 0;
    103       for (k = 0; k < SUBPEL_TAPS; ++k)
    104         sum += src_y[k * src_stride] * y_filter[k];
    105       dst[y * dst_stride] = ROUND_POWER_OF_TWO(
    106           dst[y * dst_stride] +
    107               clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)),
    108           1);
    109       y_q4 += y_step_q4;
    110     }
    111     ++src;
    112     ++dst;
    113   }
    114 }
    115 
    116 void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
    117                            uint8_t *dst, ptrdiff_t dst_stride,
    118                            const InterpKernel *filter, int x0_q4, int x_step_q4,
    119                            int y0_q4, int y_step_q4, int w, int h) {
    120   (void)y0_q4;
    121   (void)y_step_q4;
    122   convolve_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, w,
    123                  h);
    124 }
    125 
    126 void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
    127                                uint8_t *dst, ptrdiff_t dst_stride,
    128                                const InterpKernel *filter, int x0_q4,
    129                                int x_step_q4, int y0_q4, int y_step_q4, int w,
    130                                int h) {
    131   (void)y0_q4;
    132   (void)y_step_q4;
    133   convolve_avg_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4,
    134                      w, h);
    135 }
    136 
    137 void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
    138                           uint8_t *dst, ptrdiff_t dst_stride,
    139                           const InterpKernel *filter, int x0_q4, int x_step_q4,
    140                           int y0_q4, int y_step_q4, int w, int h) {
    141   (void)x0_q4;
    142   (void)x_step_q4;
    143   convolve_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4, w,
    144                 h);
    145 }
    146 
    147 void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
    148                               uint8_t *dst, ptrdiff_t dst_stride,
    149                               const InterpKernel *filter, int x0_q4,
    150                               int x_step_q4, int y0_q4, int y_step_q4, int w,
    151                               int h) {
    152   (void)x0_q4;
    153   (void)x_step_q4;
    154   convolve_avg_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4,
    155                     w, h);
    156 }
    157 
    158 void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
    159                      ptrdiff_t dst_stride, const InterpKernel *filter,
    160                      int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w,
    161                      int h) {
    162   // Note: Fixed size intermediate buffer, temp, places limits on parameters.
    163   // 2d filtering proceeds in 2 steps:
    164   //   (1) Interpolate horizontally into an intermediate buffer, temp.
    165   //   (2) Interpolate temp vertically to derive the sub-pixel result.
    166   // Deriving the maximum number of rows in the temp buffer (135):
    167   // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
    168   // --Largest block size is 64x64 pixels.
    169   // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
    170   //   original frame (in 1/16th pixel units).
    171   // --Must round-up because block may be located at sub-pixel position.
    172   // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
    173   // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
    174   // When calling in frame scaling function, the smallest scaling factor is x1/4
    175   // ==> y_step_q4 = 64. Since w and h are at most 16, the temp buffer is still
    176   // big enough.
    177   uint8_t temp[64 * 135];
    178   const int intermediate_height =
    179       (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
    180 
    181   assert(w <= 64);
    182   assert(h <= 64);
    183   assert(y_step_q4 <= 32 || (y_step_q4 <= 64 && h <= 32));
    184   assert(x_step_q4 <= 64);
    185 
    186   convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
    187                  filter, x0_q4, x_step_q4, w, intermediate_height);
    188   convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter,
    189                 y0_q4, y_step_q4, w, h);
    190 }
    191 
    192 void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
    193                          ptrdiff_t dst_stride, const InterpKernel *filter,
    194                          int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
    195                          int w, int h) {
    196   // Fixed size intermediate buffer places limits on parameters.
    197   DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]);
    198   assert(w <= 64);
    199   assert(h <= 64);
    200 
    201   vpx_convolve8_c(src, src_stride, temp, 64, filter, x0_q4, x_step_q4, y0_q4,
    202                   y_step_q4, w, h);
    203   vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h);
    204 }
    205 
    206 void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
    207                          ptrdiff_t dst_stride, const InterpKernel *filter,
    208                          int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
    209                          int w, int h) {
    210   int r;
    211 
    212   (void)filter;
    213   (void)x0_q4;
    214   (void)x_step_q4;
    215   (void)y0_q4;
    216   (void)y_step_q4;
    217 
    218   for (r = h; r > 0; --r) {
    219     memcpy(dst, src, w);
    220     src += src_stride;
    221     dst += dst_stride;
    222   }
    223 }
    224 
    225 void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
    226                         ptrdiff_t dst_stride, const InterpKernel *filter,
    227                         int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
    228                         int w, int h) {
    229   int x, y;
    230 
    231   (void)filter;
    232   (void)x0_q4;
    233   (void)x_step_q4;
    234   (void)y0_q4;
    235   (void)y_step_q4;
    236 
    237   for (y = 0; y < h; ++y) {
    238     for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
    239     src += src_stride;
    240     dst += dst_stride;
    241   }
    242 }
    243 
    244 void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
    245                         ptrdiff_t dst_stride, const InterpKernel *filter,
    246                         int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
    247                         int w, int h) {
    248   vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4,
    249                         x_step_q4, y0_q4, y_step_q4, w, h);
    250 }
    251 
    252 void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
    253                        ptrdiff_t dst_stride, const InterpKernel *filter,
    254                        int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
    255                        int w, int h) {
    256   vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4,
    257                        x_step_q4, y0_q4, y_step_q4, w, h);
    258 }
    259 
    260 void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
    261                      ptrdiff_t dst_stride, const InterpKernel *filter,
    262                      int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w,
    263                      int h) {
    264   vpx_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4,
    265                   y0_q4, y_step_q4, w, h);
    266 }
    267 
    268 void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
    269                             uint8_t *dst, ptrdiff_t dst_stride,
    270                             const InterpKernel *filter, int x0_q4,
    271                             int x_step_q4, int y0_q4, int y_step_q4, int w,
    272                             int h) {
    273   vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4,
    274                             x_step_q4, y0_q4, y_step_q4, w, h);
    275 }
    276 
    277 void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
    278                            uint8_t *dst, ptrdiff_t dst_stride,
    279                            const InterpKernel *filter, int x0_q4, int x_step_q4,
    280                            int y0_q4, int y_step_q4, int w, int h) {
    281   vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4,
    282                            x_step_q4, y0_q4, y_step_q4, w, h);
    283 }
    284 
    285 void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
    286                          ptrdiff_t dst_stride, const InterpKernel *filter,
    287                          int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
    288                          int w, int h) {
    289   vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4,
    290                       x_step_q4, y0_q4, y_step_q4, w, h);
    291 }
    292 
    293 #if CONFIG_VP9_HIGHBITDEPTH
    294 static void highbd_convolve_horiz(const uint16_t *src, ptrdiff_t src_stride,
    295                                   uint16_t *dst, ptrdiff_t dst_stride,
    296                                   const InterpKernel *x_filters, int x0_q4,
    297                                   int x_step_q4, int w, int h, int bd) {
    298   int x, y;
    299   src -= SUBPEL_TAPS / 2 - 1;
    300 
    301   for (y = 0; y < h; ++y) {
    302     int x_q4 = x0_q4;
    303     for (x = 0; x < w; ++x) {
    304       const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
    305       const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
    306       int k, sum = 0;
    307       for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
    308       dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
    309       x_q4 += x_step_q4;
    310     }
    311     src += src_stride;
    312     dst += dst_stride;
    313   }
    314 }
    315 
    316 static void highbd_convolve_avg_horiz(const uint16_t *src, ptrdiff_t src_stride,
    317                                       uint16_t *dst, ptrdiff_t dst_stride,
    318                                       const InterpKernel *x_filters, int x0_q4,
    319                                       int x_step_q4, int w, int h, int bd) {
    320   int x, y;
    321   src -= SUBPEL_TAPS / 2 - 1;
    322 
    323   for (y = 0; y < h; ++y) {
    324     int x_q4 = x0_q4;
    325     for (x = 0; x < w; ++x) {
    326       const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
    327       const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
    328       int k, sum = 0;
    329       for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
    330       dst[x] = ROUND_POWER_OF_TWO(
    331           dst[x] + clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
    332           1);
    333       x_q4 += x_step_q4;
    334     }
    335     src += src_stride;
    336     dst += dst_stride;
    337   }
    338 }
    339 
    340 static void highbd_convolve_vert(const uint16_t *src, ptrdiff_t src_stride,
    341                                  uint16_t *dst, ptrdiff_t dst_stride,
    342                                  const InterpKernel *y_filters, int y0_q4,
    343                                  int y_step_q4, int w, int h, int bd) {
    344   int x, y;
    345   src -= src_stride * (SUBPEL_TAPS / 2 - 1);
    346 
    347   for (x = 0; x < w; ++x) {
    348     int y_q4 = y0_q4;
    349     for (y = 0; y < h; ++y) {
    350       const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
    351       const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
    352       int k, sum = 0;
    353       for (k = 0; k < SUBPEL_TAPS; ++k)
    354         sum += src_y[k * src_stride] * y_filter[k];
    355       dst[y * dst_stride] =
    356           clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
    357       y_q4 += y_step_q4;
    358     }
    359     ++src;
    360     ++dst;
    361   }
    362 }
    363 
    364 static void highbd_convolve_avg_vert(const uint16_t *src, ptrdiff_t src_stride,
    365                                      uint16_t *dst, ptrdiff_t dst_stride,
    366                                      const InterpKernel *y_filters, int y0_q4,
    367                                      int y_step_q4, int w, int h, int bd) {
    368   int x, y;
    369   src -= src_stride * (SUBPEL_TAPS / 2 - 1);
    370 
    371   for (x = 0; x < w; ++x) {
    372     int y_q4 = y0_q4;
    373     for (y = 0; y < h; ++y) {
    374       const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
    375       const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
    376       int k, sum = 0;
    377       for (k = 0; k < SUBPEL_TAPS; ++k)
    378         sum += src_y[k * src_stride] * y_filter[k];
    379       dst[y * dst_stride] = ROUND_POWER_OF_TWO(
    380           dst[y * dst_stride] +
    381               clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
    382           1);
    383       y_q4 += y_step_q4;
    384     }
    385     ++src;
    386     ++dst;
    387   }
    388 }
    389 
    390 static void highbd_convolve(const uint16_t *src, ptrdiff_t src_stride,
    391                             uint16_t *dst, ptrdiff_t dst_stride,
    392                             const InterpKernel *filter, int x0_q4,
    393                             int x_step_q4, int y0_q4, int y_step_q4, int w,
    394                             int h, int bd) {
    395   // Note: Fixed size intermediate buffer, temp, places limits on parameters.
    396   // 2d filtering proceeds in 2 steps:
    397   //   (1) Interpolate horizontally into an intermediate buffer, temp.
    398   //   (2) Interpolate temp vertically to derive the sub-pixel result.
    399   // Deriving the maximum number of rows in the temp buffer (135):
    400   // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
    401   // --Largest block size is 64x64 pixels.
    402   // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
    403   //   original frame (in 1/16th pixel units).
    404   // --Must round-up because block may be located at sub-pixel position.
    405   // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
    406   // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
    407   uint16_t temp[64 * 135];
    408   const int intermediate_height =
    409       (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
    410 
    411   assert(w <= 64);
    412   assert(h <= 64);
    413   assert(y_step_q4 <= 32);
    414   assert(x_step_q4 <= 32);
    415 
    416   highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,
    417                         temp, 64, filter, x0_q4, x_step_q4, w,
    418                         intermediate_height, bd);
    419   highbd_convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
    420                        filter, y0_q4, y_step_q4, w, h, bd);
    421 }
    422 
    423 void vpx_highbd_convolve8_horiz_c(const uint16_t *src, ptrdiff_t src_stride,
    424                                   uint16_t *dst, ptrdiff_t dst_stride,
    425                                   const InterpKernel *filter, int x0_q4,
    426                                   int x_step_q4, int y0_q4, int y_step_q4,
    427                                   int w, int h, int bd) {
    428   (void)y0_q4;
    429   (void)y_step_q4;
    430 
    431   highbd_convolve_horiz(src, src_stride, dst, dst_stride, filter, x0_q4,
    432                         x_step_q4, w, h, bd);
    433 }
    434 
    435 void vpx_highbd_convolve8_avg_horiz_c(const uint16_t *src, ptrdiff_t src_stride,
    436                                       uint16_t *dst, ptrdiff_t dst_stride,
    437                                       const InterpKernel *filter, int x0_q4,
    438                                       int x_step_q4, int y0_q4, int y_step_q4,
    439                                       int w, int h, int bd) {
    440   (void)y0_q4;
    441   (void)y_step_q4;
    442 
    443   highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filter, x0_q4,
    444                             x_step_q4, w, h, bd);
    445 }
    446 
    447 void vpx_highbd_convolve8_vert_c(const uint16_t *src, ptrdiff_t src_stride,
    448                                  uint16_t *dst, ptrdiff_t dst_stride,
    449                                  const InterpKernel *filter, int x0_q4,
    450                                  int x_step_q4, int y0_q4, int y_step_q4, int w,
    451                                  int h, int bd) {
    452   (void)x0_q4;
    453   (void)x_step_q4;
    454 
    455   highbd_convolve_vert(src, src_stride, dst, dst_stride, filter, y0_q4,
    456                        y_step_q4, w, h, bd);
    457 }
    458 
    459 void vpx_highbd_convolve8_avg_vert_c(const uint16_t *src, ptrdiff_t src_stride,
    460                                      uint16_t *dst, ptrdiff_t dst_stride,
    461                                      const InterpKernel *filter, int x0_q4,
    462                                      int x_step_q4, int y0_q4, int y_step_q4,
    463                                      int w, int h, int bd) {
    464   (void)x0_q4;
    465   (void)x_step_q4;
    466 
    467   highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filter, y0_q4,
    468                            y_step_q4, w, h, bd);
    469 }
    470 
    471 void vpx_highbd_convolve8_c(const uint16_t *src, ptrdiff_t src_stride,
    472                             uint16_t *dst, ptrdiff_t dst_stride,
    473                             const InterpKernel *filter, int x0_q4,
    474                             int x_step_q4, int y0_q4, int y_step_q4, int w,
    475                             int h, int bd) {
    476   highbd_convolve(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4,
    477                   y0_q4, y_step_q4, w, h, bd);
    478 }
    479 
    480 void vpx_highbd_convolve8_avg_c(const uint16_t *src, ptrdiff_t src_stride,
    481                                 uint16_t *dst, ptrdiff_t dst_stride,
    482                                 const InterpKernel *filter, int x0_q4,
    483                                 int x_step_q4, int y0_q4, int y_step_q4, int w,
    484                                 int h, int bd) {
    485   // Fixed size intermediate buffer places limits on parameters.
    486   DECLARE_ALIGNED(16, uint16_t, temp[64 * 64]);
    487   assert(w <= 64);
    488   assert(h <= 64);
    489 
    490   vpx_highbd_convolve8_c(src, src_stride, temp, 64, filter, x0_q4, x_step_q4,
    491                          y0_q4, y_step_q4, w, h, bd);
    492   vpx_highbd_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h,
    493                             bd);
    494 }
    495 
    496 void vpx_highbd_convolve_copy_c(const uint16_t *src, ptrdiff_t src_stride,
    497                                 uint16_t *dst, ptrdiff_t dst_stride,
    498                                 const InterpKernel *filter, int x0_q4,
    499                                 int x_step_q4, int y0_q4, int y_step_q4, int w,
    500                                 int h, int bd) {
    501   int r;
    502 
    503   (void)filter;
    504   (void)x0_q4;
    505   (void)x_step_q4;
    506   (void)y0_q4;
    507   (void)y_step_q4;
    508   (void)bd;
    509 
    510   for (r = h; r > 0; --r) {
    511     memcpy(dst, src, w * sizeof(uint16_t));
    512     src += src_stride;
    513     dst += dst_stride;
    514   }
    515 }
    516 
    517 void vpx_highbd_convolve_avg_c(const uint16_t *src, ptrdiff_t src_stride,
    518                                uint16_t *dst, ptrdiff_t dst_stride,
    519                                const InterpKernel *filter, int x0_q4,
    520                                int x_step_q4, int y0_q4, int y_step_q4, int w,
    521                                int h, int bd) {
    522   int x, y;
    523 
    524   (void)filter;
    525   (void)x0_q4;
    526   (void)x_step_q4;
    527   (void)y0_q4;
    528   (void)y_step_q4;
    529   (void)bd;
    530 
    531   for (y = 0; y < h; ++y) {
    532     for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
    533     src += src_stride;
    534     dst += dst_stride;
    535   }
    536 }
    537 #endif
    538