Home | History | Annotate | Download | only in x86
      1 /*
      2  *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "./vpx_config.h"
     12 #include "./vpx_dsp_rtcd.h"
     13 #include "vpx_dsp/x86/convolve.h"
     14 
     15 #if HAVE_SSE2
     16 filter8_1dfunction vpx_filter_block1d16_v8_sse2;
     17 filter8_1dfunction vpx_filter_block1d16_h8_sse2;
     18 filter8_1dfunction vpx_filter_block1d8_v8_sse2;
     19 filter8_1dfunction vpx_filter_block1d8_h8_sse2;
     20 filter8_1dfunction vpx_filter_block1d4_v8_sse2;
     21 filter8_1dfunction vpx_filter_block1d4_h8_sse2;
     22 filter8_1dfunction vpx_filter_block1d16_v8_avg_sse2;
     23 filter8_1dfunction vpx_filter_block1d16_h8_avg_sse2;
     24 filter8_1dfunction vpx_filter_block1d8_v8_avg_sse2;
     25 filter8_1dfunction vpx_filter_block1d8_h8_avg_sse2;
     26 filter8_1dfunction vpx_filter_block1d4_v8_avg_sse2;
     27 filter8_1dfunction vpx_filter_block1d4_h8_avg_sse2;
     28 
     29 filter8_1dfunction vpx_filter_block1d16_v2_sse2;
     30 filter8_1dfunction vpx_filter_block1d16_h2_sse2;
     31 filter8_1dfunction vpx_filter_block1d8_v2_sse2;
     32 filter8_1dfunction vpx_filter_block1d8_h2_sse2;
     33 filter8_1dfunction vpx_filter_block1d4_v2_sse2;
     34 filter8_1dfunction vpx_filter_block1d4_h2_sse2;
     35 filter8_1dfunction vpx_filter_block1d16_v2_avg_sse2;
     36 filter8_1dfunction vpx_filter_block1d16_h2_avg_sse2;
     37 filter8_1dfunction vpx_filter_block1d8_v2_avg_sse2;
     38 filter8_1dfunction vpx_filter_block1d8_h2_avg_sse2;
     39 filter8_1dfunction vpx_filter_block1d4_v2_avg_sse2;
     40 filter8_1dfunction vpx_filter_block1d4_h2_avg_sse2;
     41 
     42 // void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
     43 //                               uint8_t *dst, ptrdiff_t dst_stride,
     44 //                               const InterpKernel *filter, int x0_q4,
     45 //                               int32_t x_step_q4, int y0_q4, int y_step_q4,
     46 //                               int w, int h);
     47 // void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
     48 //                              uint8_t *dst, ptrdiff_t dst_stride,
     49 //                              const InterpKernel *filter, int x0_q4,
     50 //                              int32_t x_step_q4, int y0_q4, int y_step_q4,
     51 //                              int w, int h);
     52 // void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
     53 //                                   uint8_t *dst, ptrdiff_t dst_stride,
     54 //                                   const InterpKernel *filter, int x0_q4,
     55 //                                   int32_t x_step_q4, int y0_q4,
     56 //                                   int y_step_q4, int w, int h);
     57 // void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
     58 //                                  uint8_t *dst, ptrdiff_t dst_stride,
     59 //                                  const InterpKernel *filter, int x0_q4,
     60 //                                  int32_t x_step_q4, int y0_q4, int y_step_q4,
     61 //                                  int w, int h);
     62 FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , sse2);
     63 FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , sse2);
     64 FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, sse2);
     65 FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * 3, avg_, sse2);
     66 
     67 // void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
     68 //                         uint8_t *dst, ptrdiff_t dst_stride,
     69 //                         const InterpKernel *filter, int x0_q4,
     70 //                         int32_t x_step_q4, int y0_q4, int y_step_q4,
     71 //                         int w, int h);
     72 // void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
     73 //                             uint8_t *dst, ptrdiff_t dst_stride,
     74 //                             const InterpKernel *filter, int x0_q4,
     75 //                             int32_t x_step_q4, int y0_q4, int y_step_q4,
     76 //                             int w, int h);
     77 FUN_CONV_2D(, sse2);
     78 FUN_CONV_2D(avg_, sse2);
     79 
     80 #if CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
     81 highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_sse2;
     82 highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_sse2;
     83 highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_sse2;
     84 highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_sse2;
     85 highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_sse2;
     86 highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_sse2;
     87 highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_avg_sse2;
     88 highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_avg_sse2;
     89 highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_avg_sse2;
     90 highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_avg_sse2;
     91 highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_avg_sse2;
     92 highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_avg_sse2;
     93 
     94 highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_sse2;
     95 highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_sse2;
     96 highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_sse2;
     97 highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_sse2;
     98 highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_sse2;
     99 highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_sse2;
    100 highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_avg_sse2;
    101 highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_avg_sse2;
    102 highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_avg_sse2;
    103 highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_avg_sse2;
    104 highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_avg_sse2;
    105 highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_avg_sse2;
    106 
    107 // void vpx_highbd_convolve8_horiz_sse2(const uint8_t *src,
    108 //                                      ptrdiff_t src_stride,
    109 //                                      uint8_t *dst,
    110 //                                      ptrdiff_t dst_stride,
    111 //                                      const int16_t *filter_x,
    112 //                                      int x_step_q4,
    113 //                                      const int16_t *filter_y,
    114 //                                      int y_step_q4,
    115 //                                      int w, int h, int bd);
    116 // void vpx_highbd_convolve8_vert_sse2(const uint8_t *src,
    117 //                                     ptrdiff_t src_stride,
    118 //                                     uint8_t *dst,
    119 //                                     ptrdiff_t dst_stride,
    120 //                                     const int16_t *filter_x,
    121 //                                     int x_step_q4,
    122 //                                     const int16_t *filter_y,
    123 //                                     int y_step_q4,
    124 //                                     int w, int h, int bd);
    125 // void vpx_highbd_convolve8_avg_horiz_sse2(const uint8_t *src,
    126 //                                          ptrdiff_t src_stride,
    127 //                                          uint8_t *dst,
    128 //                                          ptrdiff_t dst_stride,
    129 //                                          const int16_t *filter_x,
    130 //                                          int x_step_q4,
    131 //                                          const int16_t *filter_y,
    132 //                                          int y_step_q4,
    133 //                                          int w, int h, int bd);
    134 // void vpx_highbd_convolve8_avg_vert_sse2(const uint8_t *src,
    135 //                                         ptrdiff_t src_stride,
    136 //                                         uint8_t *dst,
    137 //                                         ptrdiff_t dst_stride,
    138 //                                         const int16_t *filter_x,
    139 //                                         int x_step_q4,
    140 //                                         const int16_t *filter_y,
    141 //                                         int y_step_q4,
    142 //                                         int w, int h, int bd);
    143 HIGH_FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , sse2);
    144 HIGH_FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , sse2);
    145 HIGH_FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, sse2);
    146 HIGH_FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * 3, avg_,
    147                  sse2);
    148 
    149 // void vpx_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
    150 //                                uint8_t *dst, ptrdiff_t dst_stride,
    151 //                                const InterpKernel *filter, int x0_q4,
    152 //                                int32_t x_step_q4, int y0_q4, int y_step_q4,
    153 //                                int w, int h, int bd);
    154 // void vpx_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
    155 //                                    uint8_t *dst, ptrdiff_t dst_stride,
    156 //                                    const InterpKernel *filter, int x0_q4,
    157 //                                    int32_t x_step_q4, int y0_q4,
    158 //                                    int y_step_q4, int w, int h, int bd);
    159 HIGH_FUN_CONV_2D(, sse2);
    160 HIGH_FUN_CONV_2D(avg_, sse2);
    161 #endif  // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
    162 #endif  // HAVE_SSE2
    163