Home | History | Annotate | Download | only in x86
      1 /*
      2  *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 #ifndef VPX_DSP_X86_CONVOLVE_H_
     11 #define VPX_DSP_X86_CONVOLVE_H_
     12 
     13 #include <assert.h>
     14 
     15 #include "./vpx_config.h"
     16 #include "vpx/vpx_integer.h"
     17 #include "vpx_ports/mem.h"
     18 
     19 typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch,
     20                                 uint8_t *output_ptr, ptrdiff_t out_pitch,
     21                                 uint32_t output_height, const int16_t *filter);
     22 
     23 #define FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt)         \
     24   void vpx_convolve8_##name##_##opt(                                         \
     25       const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                \
     26       ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4,    \
     27       int x_step_q4, int y0_q4, int y_step_q4, int w, int h) {               \
     28     const int16_t *filter = filter_kernel[offset];                           \
     29     (void)x0_q4;                                                             \
     30     (void)x_step_q4;                                                         \
     31     (void)y0_q4;                                                             \
     32     (void)y_step_q4;                                                         \
     33     assert(filter[3] != 128);                                                \
     34     assert(step_q4 == 16);                                                   \
     35     if (filter[0] | filter[1] | filter[2]) {                                 \
     36       while (w >= 16) {                                                      \
     37         vpx_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, dst, \
     38                                                  dst_stride, h, filter);     \
     39         src += 16;                                                           \
     40         dst += 16;                                                           \
     41         w -= 16;                                                             \
     42       }                                                                      \
     43       if (w == 8) {                                                          \
     44         vpx_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, dst,  \
     45                                                 dst_stride, h, filter);      \
     46       } else if (w == 4) {                                                   \
     47         vpx_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, dst,  \
     48                                                 dst_stride, h, filter);      \
     49       }                                                                      \
     50     } else {                                                                 \
     51       while (w >= 16) {                                                      \
     52         vpx_filter_block1d16_##dir##2_##avg##opt(src, src_stride, dst,       \
     53                                                  dst_stride, h, filter);     \
     54         src += 16;                                                           \
     55         dst += 16;                                                           \
     56         w -= 16;                                                             \
     57       }                                                                      \
     58       if (w == 8) {                                                          \
     59         vpx_filter_block1d8_##dir##2_##avg##opt(src, src_stride, dst,        \
     60                                                 dst_stride, h, filter);      \
     61       } else if (w == 4) {                                                   \
     62         vpx_filter_block1d4_##dir##2_##avg##opt(src, src_stride, dst,        \
     63                                                 dst_stride, h, filter);      \
     64       }                                                                      \
     65     }                                                                        \
     66   }
     67 
     68 #define FUN_CONV_2D(avg, opt)                                                  \
     69   void vpx_convolve8_##avg##opt(                                               \
     70       const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                  \
     71       ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4,             \
     72       int x_step_q4, int y0_q4, int y_step_q4, int w, int h) {                 \
     73     const int16_t *filter_x = filter[x0_q4];                                   \
     74     const int16_t *filter_y = filter[y0_q4];                                   \
     75     (void)filter_y;                                                            \
     76     assert(filter_x[3] != 128);                                                \
     77     assert(filter_y[3] != 128);                                                \
     78     assert(w <= 64);                                                           \
     79     assert(h <= 64);                                                           \
     80     assert(x_step_q4 == 16);                                                   \
     81     assert(y_step_q4 == 16);                                                   \
     82     if (filter_x[0] | filter_x[1] | filter_x[2]) {                             \
     83       DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]);                           \
     84       vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64,  \
     85                                 filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, \
     86                                 h + 7);                                        \
     87       vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride,    \
     88                                       filter, x0_q4, x_step_q4, y0_q4,         \
     89                                       y_step_q4, w, h);                        \
     90     } else {                                                                   \
     91       DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]);                           \
     92       vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, x0_q4,    \
     93                                 x_step_q4, y0_q4, y_step_q4, w, h + 1);        \
     94       vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, filter,     \
     95                                       x0_q4, x_step_q4, y0_q4, y_step_q4, w,   \
     96                                       h);                                      \
     97     }                                                                          \
     98   }
     99 
    100 #if CONFIG_VP9_HIGHBITDEPTH
    101 
    102 typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr,
    103                                        const ptrdiff_t src_pitch,
    104                                        uint16_t *output_ptr,
    105                                        ptrdiff_t out_pitch,
    106                                        unsigned int output_height,
    107                                        const int16_t *filter, int bd);
    108 
    109 #define HIGH_FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt)     \
    110   void vpx_highbd_convolve8_##name##_##opt(                                   \
    111       const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst,               \
    112       ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4,     \
    113       int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) {        \
    114     const int16_t *filter = filter_kernel[offset];                            \
    115     if (step_q4 == 16 && filter[3] != 128) {                                  \
    116       if (filter[0] | filter[1] | filter[2]) {                                \
    117         while (w >= 16) {                                                     \
    118           vpx_highbd_filter_block1d16_##dir##8_##avg##opt(                    \
    119               src_start, src_stride, dst, dst_stride, h, filter, bd);         \
    120           src += 16;                                                          \
    121           dst += 16;                                                          \
    122           w -= 16;                                                            \
    123         }                                                                     \
    124         while (w >= 8) {                                                      \
    125           vpx_highbd_filter_block1d8_##dir##8_##avg##opt(                     \
    126               src_start, src_stride, dst, dst_stride, h, filter, bd);         \
    127           src += 8;                                                           \
    128           dst += 8;                                                           \
    129           w -= 8;                                                             \
    130         }                                                                     \
    131         while (w >= 4) {                                                      \
    132           vpx_highbd_filter_block1d4_##dir##8_##avg##opt(                     \
    133               src_start, src_stride, dst, dst_stride, h, filter, bd);         \
    134           src += 4;                                                           \
    135           dst += 4;                                                           \
    136           w -= 4;                                                             \
    137         }                                                                     \
    138       } else {                                                                \
    139         while (w >= 16) {                                                     \
    140           vpx_highbd_filter_block1d16_##dir##2_##avg##opt(                    \
    141               src, src_stride, dst, dst_stride, h, filter, bd);               \
    142           src += 16;                                                          \
    143           dst += 16;                                                          \
    144           w -= 16;                                                            \
    145         }                                                                     \
    146         while (w >= 8) {                                                      \
    147           vpx_highbd_filter_block1d8_##dir##2_##avg##opt(                     \
    148               src, src_stride, dst, dst_stride, h, filter, bd);               \
    149           src += 8;                                                           \
    150           dst += 8;                                                           \
    151           w -= 8;                                                             \
    152         }                                                                     \
    153         while (w >= 4) {                                                      \
    154           vpx_highbd_filter_block1d4_##dir##2_##avg##opt(                     \
    155               src, src_stride, dst, dst_stride, h, filter, bd);               \
    156           src += 4;                                                           \
    157           dst += 4;                                                           \
    158           w -= 4;                                                             \
    159         }                                                                     \
    160       }                                                                       \
    161     }                                                                         \
    162     if (w) {                                                                  \
    163       vpx_highbd_convolve8_##name##_c(src, src_stride, dst, dst_stride,       \
    164                                       filter_kernel, x0_q4, x_step_q4, y0_q4, \
    165                                       y_step_q4, w, h, bd);                   \
    166     }                                                                         \
    167   }
    168 
    169 #define HIGH_FUN_CONV_2D(avg, opt)                                             \
    170   void vpx_highbd_convolve8_##avg##opt(                                        \
    171       const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst,                \
    172       ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4,             \
    173       int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) {         \
    174     const int16_t *filter_x = filter[x0_q4];                                   \
    175     assert(w <= 64);                                                           \
    176     assert(h <= 64);                                                           \
    177     if (x_step_q4 == 16 && y_step_q4 == 16) {                                  \
    178       if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) {   \
    179         DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]);                        \
    180         vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride,     \
    181                                          fdata2, 64, filter, x0_q4, x_step_q4, \
    182                                          y0_q4, y_step_q4, w, h + 7, bd);      \
    183         vpx_highbd_convolve8_##avg##vert_##opt(                                \
    184             fdata2 + 192, 64, dst, dst_stride, filter, x0_q4, x_step_q4,       \
    185             y0_q4, y_step_q4, w, h, bd);                                       \
    186       } else {                                                                 \
    187         DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]);                        \
    188         vpx_highbd_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter,  \
    189                                          x0_q4, x_step_q4, y0_q4, y_step_q4,   \
    190                                          w, h + 1, bd);                        \
    191         vpx_highbd_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride,    \
    192                                                filter, x0_q4, x_step_q4,       \
    193                                                y0_q4, y_step_q4, w, h, bd);    \
    194       }                                                                        \
    195     } else {                                                                   \
    196       vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, filter,  \
    197                                     x0_q4, x_step_q4, y0_q4, y_step_q4, w, h,  \
    198                                     bd);                                       \
    199     }                                                                          \
    200   }
    201 #endif  // CONFIG_VP9_HIGHBITDEPTH
    202 
    203 #endif  // VPX_DSP_X86_CONVOLVE_H_
    204