Home | History | Annotate | Download | only in vpx_dsp
      1 ##
      2 ##  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
      3 ##
      4 ##  Use of this source code is governed by a BSD-style license
      5 ##  that can be found in the LICENSE file in the root of the source
      6 ##  tree. An additional intellectual property rights grant can be found
      7 ##  in the file PATENTS.  All contributing project authors may
      8 ##  be found in the AUTHORS file in the root of the source tree.
      9 ##
     10 
     11 sub vpx_dsp_forward_decls() {
     12 print <<EOF
     13 /*
     14  * DSP
     15  */
     16 
     17 #include "vpx/vpx_integer.h"
     18 #include "vpx_dsp/vpx_dsp_common.h"
     19 #include "vpx_dsp/vpx_filter.h"
     20 
     21 EOF
     22 }
     23 forward_decls qw/vpx_dsp_forward_decls/;
     24 
     25 # functions that are 64 bit only.
     26 $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
     27 if ($opts{arch} eq "x86_64") {
     28   $mmx_x86_64 = 'mmx';
     29   $sse2_x86_64 = 'sse2';
     30   $ssse3_x86_64 = 'ssse3';
     31   $avx_x86_64 = 'avx';
     32   $avx2_x86_64 = 'avx2';
     33   $avx512_x86_64 = 'avx512';
     34 }
     35 
     36 #
     37 # Intra prediction
     38 #
     39 
     40 add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     41 specialize qw/vpx_d207_predictor_4x4 sse2/;
     42 
     43 add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     44 specialize qw/vpx_d45_predictor_4x4 neon sse2/;
     45 
     46 add_proto qw/void vpx_d45e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     47 
     48 add_proto qw/void vpx_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     49 specialize qw/vpx_d63_predictor_4x4 ssse3/;
     50 
     51 add_proto qw/void vpx_d63e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     52 
     53 add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     54 specialize qw/vpx_h_predictor_4x4 neon dspr2 msa sse2 vsx/;
     55 
     56 add_proto qw/void vpx_he_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     57 
     58 add_proto qw/void vpx_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     59 
     60 add_proto qw/void vpx_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     61 specialize qw/vpx_d135_predictor_4x4 neon/;
     62 
     63 add_proto qw/void vpx_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     64 specialize qw/vpx_d153_predictor_4x4 ssse3/;
     65 
     66 add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     67 specialize qw/vpx_v_predictor_4x4 neon msa sse2/;
     68 
     69 add_proto qw/void vpx_ve_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     70 
     71 add_proto qw/void vpx_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     72 specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa sse2 vsx/;
     73 
     74 add_proto qw/void vpx_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     75 specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon sse2/;
     76 
     77 add_proto qw/void vpx_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     78 specialize qw/vpx_dc_top_predictor_4x4 msa neon sse2/;
     79 
     80 add_proto qw/void vpx_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     81 specialize qw/vpx_dc_left_predictor_4x4 msa neon sse2/;
     82 
     83 add_proto qw/void vpx_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     84 specialize qw/vpx_dc_128_predictor_4x4 msa neon sse2/;
     85 
     86 add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     87 specialize qw/vpx_d207_predictor_8x8 ssse3/;
     88 
     89 add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     90 specialize qw/vpx_d45_predictor_8x8 neon sse2 vsx/;
     91 
     92 add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     93 specialize qw/vpx_d63_predictor_8x8 ssse3 vsx/;
     94 
     95 add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     96 specialize qw/vpx_h_predictor_8x8 neon dspr2 msa sse2 vsx/;
     97 
     98 add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     99 
    100 add_proto qw/void vpx_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    101 specialize qw/vpx_d135_predictor_8x8 neon/;
    102 
    103 add_proto qw/void vpx_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    104 specialize qw/vpx_d153_predictor_8x8 ssse3/;
    105 
    106 add_proto qw/void vpx_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    107 specialize qw/vpx_v_predictor_8x8 neon msa sse2/;
    108 
    109 add_proto qw/void vpx_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    110 specialize qw/vpx_tm_predictor_8x8 neon dspr2 msa sse2 vsx/;
    111 
    112 add_proto qw/void vpx_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    113 specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa sse2 vsx/;
    114 
    115 add_proto qw/void vpx_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    116 specialize qw/vpx_dc_top_predictor_8x8 neon msa sse2/;
    117 
    118 add_proto qw/void vpx_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    119 specialize qw/vpx_dc_left_predictor_8x8 neon msa sse2/;
    120 
    121 add_proto qw/void vpx_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    122 specialize qw/vpx_dc_128_predictor_8x8 neon msa sse2/;
    123 
    124 add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    125 specialize qw/vpx_d207_predictor_16x16 ssse3/;
    126 
    127 add_proto qw/void vpx_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    128 specialize qw/vpx_d45_predictor_16x16 neon ssse3 vsx/;
    129 
    130 add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    131 specialize qw/vpx_d63_predictor_16x16 ssse3 vsx/;
    132 
    133 add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    134 specialize qw/vpx_h_predictor_16x16 neon dspr2 msa sse2 vsx/;
    135 
    136 add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    137 
    138 add_proto qw/void vpx_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    139 specialize qw/vpx_d135_predictor_16x16 neon/;
    140 
    141 add_proto qw/void vpx_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    142 specialize qw/vpx_d153_predictor_16x16 ssse3/;
    143 
    144 add_proto qw/void vpx_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    145 specialize qw/vpx_v_predictor_16x16 neon msa sse2 vsx/;
    146 
    147 add_proto qw/void vpx_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    148 specialize qw/vpx_tm_predictor_16x16 neon msa sse2 vsx/;
    149 
    150 add_proto qw/void vpx_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    151 specialize qw/vpx_dc_predictor_16x16 dspr2 neon msa sse2 vsx/;
    152 
    153 add_proto qw/void vpx_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    154 specialize qw/vpx_dc_top_predictor_16x16 neon msa sse2 vsx/;
    155 
    156 add_proto qw/void vpx_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    157 specialize qw/vpx_dc_left_predictor_16x16 neon msa sse2 vsx/;
    158 
    159 add_proto qw/void vpx_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    160 specialize qw/vpx_dc_128_predictor_16x16 neon msa sse2 vsx/;
    161 
    162 add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    163 specialize qw/vpx_d207_predictor_32x32 ssse3/;
    164 
    165 add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    166 specialize qw/vpx_d45_predictor_32x32 neon ssse3 vsx/;
    167 
    168 add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    169 specialize qw/vpx_d63_predictor_32x32 ssse3 vsx/;
    170 
    171 add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    172 specialize qw/vpx_h_predictor_32x32 neon msa sse2 vsx/;
    173 
    174 add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    175 
    176 add_proto qw/void vpx_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    177 specialize qw/vpx_d135_predictor_32x32 neon/;
    178 
    179 add_proto qw/void vpx_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    180 specialize qw/vpx_d153_predictor_32x32 ssse3/;
    181 
    182 add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    183 specialize qw/vpx_v_predictor_32x32 neon msa sse2 vsx/;
    184 
    185 add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    186 specialize qw/vpx_tm_predictor_32x32 neon msa sse2 vsx/;
    187 
    188 add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    189 specialize qw/vpx_dc_predictor_32x32 msa neon sse2 vsx/;
    190 
    191 add_proto qw/void vpx_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    192 specialize qw/vpx_dc_top_predictor_32x32 msa neon sse2 vsx/;
    193 
    194 add_proto qw/void vpx_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    195 specialize qw/vpx_dc_left_predictor_32x32 msa neon sse2 vsx/;
    196 
    197 add_proto qw/void vpx_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
    198 specialize qw/vpx_dc_128_predictor_32x32 msa neon sse2 vsx/;
    199 
    200 # High bitdepth functions
    201 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
    202   add_proto qw/void vpx_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    203   specialize qw/vpx_highbd_d207_predictor_4x4 sse2/;
    204 
    205   add_proto qw/void vpx_highbd_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    206   specialize qw/vpx_highbd_d45_predictor_4x4 neon ssse3/;
    207 
    208   add_proto qw/void vpx_highbd_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    209   specialize qw/vpx_highbd_d63_predictor_4x4 sse2/;
    210 
    211   add_proto qw/void vpx_highbd_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    212   specialize qw/vpx_highbd_h_predictor_4x4 neon sse2/;
    213 
    214   add_proto qw/void vpx_highbd_d117_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    215   specialize qw/vpx_highbd_d117_predictor_4x4 sse2/;
    216 
    217   add_proto qw/void vpx_highbd_d135_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    218   specialize qw/vpx_highbd_d135_predictor_4x4 neon sse2/;
    219 
    220   add_proto qw/void vpx_highbd_d153_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    221   specialize qw/vpx_highbd_d153_predictor_4x4 sse2/;
    222 
    223   add_proto qw/void vpx_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    224   specialize qw/vpx_highbd_v_predictor_4x4 neon sse2/;
    225 
    226   add_proto qw/void vpx_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    227   specialize qw/vpx_highbd_tm_predictor_4x4 neon sse2/;
    228 
    229   add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    230   specialize qw/vpx_highbd_dc_predictor_4x4 neon sse2/;
    231 
    232   add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    233   specialize qw/vpx_highbd_dc_top_predictor_4x4 neon sse2/;
    234 
    235   add_proto qw/void vpx_highbd_dc_left_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    236   specialize qw/vpx_highbd_dc_left_predictor_4x4 neon sse2/;
    237 
    238   add_proto qw/void vpx_highbd_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    239   specialize qw/vpx_highbd_dc_128_predictor_4x4 neon sse2/;
    240 
    241   add_proto qw/void vpx_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    242   specialize qw/vpx_highbd_d207_predictor_8x8 ssse3/;
    243 
    244   add_proto qw/void vpx_highbd_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    245   specialize qw/vpx_highbd_d45_predictor_8x8 neon ssse3/;
    246 
    247   add_proto qw/void vpx_highbd_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    248   specialize qw/vpx_highbd_d63_predictor_8x8 ssse3/;
    249 
    250   add_proto qw/void vpx_highbd_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    251   specialize qw/vpx_highbd_h_predictor_8x8 neon sse2/;
    252 
    253   add_proto qw/void vpx_highbd_d117_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    254   specialize qw/vpx_highbd_d117_predictor_8x8 ssse3/;
    255 
    256   add_proto qw/void vpx_highbd_d135_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    257   specialize qw/vpx_highbd_d135_predictor_8x8 neon ssse3/;
    258 
    259   add_proto qw/void vpx_highbd_d153_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    260   specialize qw/vpx_highbd_d153_predictor_8x8 ssse3/;
    261 
    262   add_proto qw/void vpx_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    263   specialize qw/vpx_highbd_v_predictor_8x8 neon sse2/;
    264 
    265   add_proto qw/void vpx_highbd_tm_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    266   specialize qw/vpx_highbd_tm_predictor_8x8 neon sse2/;
    267 
    268   add_proto qw/void vpx_highbd_dc_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    269   specialize qw/vpx_highbd_dc_predictor_8x8 neon sse2/;
    270 
    271   add_proto qw/void vpx_highbd_dc_top_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    272   specialize qw/vpx_highbd_dc_top_predictor_8x8 neon sse2/;
    273 
    274   add_proto qw/void vpx_highbd_dc_left_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    275   specialize qw/vpx_highbd_dc_left_predictor_8x8 neon sse2/;
    276 
    277   add_proto qw/void vpx_highbd_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    278   specialize qw/vpx_highbd_dc_128_predictor_8x8 neon sse2/;
    279 
    280   add_proto qw/void vpx_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    281   specialize qw/vpx_highbd_d207_predictor_16x16 ssse3/;
    282 
    283   add_proto qw/void vpx_highbd_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    284   specialize qw/vpx_highbd_d45_predictor_16x16 neon ssse3/;
    285 
    286   add_proto qw/void vpx_highbd_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    287   specialize qw/vpx_highbd_d63_predictor_16x16 ssse3/;
    288 
    289   add_proto qw/void vpx_highbd_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    290   specialize qw/vpx_highbd_h_predictor_16x16 neon sse2/;
    291 
    292   add_proto qw/void vpx_highbd_d117_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    293   specialize qw/vpx_highbd_d117_predictor_16x16 ssse3/;
    294 
    295   add_proto qw/void vpx_highbd_d135_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    296   specialize qw/vpx_highbd_d135_predictor_16x16 neon ssse3/;
    297 
    298   add_proto qw/void vpx_highbd_d153_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    299   specialize qw/vpx_highbd_d153_predictor_16x16 ssse3/;
    300 
    301   add_proto qw/void vpx_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    302   specialize qw/vpx_highbd_v_predictor_16x16 neon sse2/;
    303 
    304   add_proto qw/void vpx_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    305   specialize qw/vpx_highbd_tm_predictor_16x16 neon sse2/;
    306 
    307   add_proto qw/void vpx_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    308   specialize qw/vpx_highbd_dc_predictor_16x16 neon sse2/;
    309 
    310   add_proto qw/void vpx_highbd_dc_top_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    311   specialize qw/vpx_highbd_dc_top_predictor_16x16 neon sse2/;
    312 
    313   add_proto qw/void vpx_highbd_dc_left_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    314   specialize qw/vpx_highbd_dc_left_predictor_16x16 neon sse2/;
    315 
    316   add_proto qw/void vpx_highbd_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    317   specialize qw/vpx_highbd_dc_128_predictor_16x16 neon sse2/;
    318 
    319   add_proto qw/void vpx_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    320   specialize qw/vpx_highbd_d207_predictor_32x32 ssse3/;
    321 
    322   add_proto qw/void vpx_highbd_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    323   specialize qw/vpx_highbd_d45_predictor_32x32 neon ssse3/;
    324 
    325   add_proto qw/void vpx_highbd_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    326   specialize qw/vpx_highbd_d63_predictor_32x32 ssse3/;
    327 
    328   add_proto qw/void vpx_highbd_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    329   specialize qw/vpx_highbd_h_predictor_32x32 neon sse2/;
    330 
    331   add_proto qw/void vpx_highbd_d117_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    332   specialize qw/vpx_highbd_d117_predictor_32x32 ssse3/;
    333 
    334   add_proto qw/void vpx_highbd_d135_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    335   specialize qw/vpx_highbd_d135_predictor_32x32 neon ssse3/;
    336 
    337   add_proto qw/void vpx_highbd_d153_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    338   specialize qw/vpx_highbd_d153_predictor_32x32 ssse3/;
    339 
    340   add_proto qw/void vpx_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    341   specialize qw/vpx_highbd_v_predictor_32x32 neon sse2/;
    342 
    343   add_proto qw/void vpx_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    344   specialize qw/vpx_highbd_tm_predictor_32x32 neon sse2/;
    345 
    346   add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    347   specialize qw/vpx_highbd_dc_predictor_32x32 neon sse2/;
    348 
    349   add_proto qw/void vpx_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    350   specialize qw/vpx_highbd_dc_top_predictor_32x32 neon sse2/;
    351 
    352   add_proto qw/void vpx_highbd_dc_left_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    353   specialize qw/vpx_highbd_dc_left_predictor_32x32 neon sse2/;
    354 
    355   add_proto qw/void vpx_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    356   specialize qw/vpx_highbd_dc_128_predictor_32x32 neon sse2/;
    357 }  # CONFIG_VP9_HIGHBITDEPTH
    358 
    359 #
    360 # Sub Pixel Filters
    361 #
    362 add_proto qw/void vpx_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
    363 specialize qw/vpx_convolve_copy neon dspr2 msa sse2 vsx/;
    364 
    365 add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
    366 specialize qw/vpx_convolve_avg neon dspr2 msa sse2 vsx/;
    367 
    368 add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
    369 specialize qw/vpx_convolve8 sse2 ssse3 avx2 neon dspr2 msa vsx/;
    370 
    371 add_proto qw/void vpx_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
    372 specialize qw/vpx_convolve8_horiz sse2 ssse3 avx2 neon dspr2 msa vsx/;
    373 
    374 add_proto qw/void vpx_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
    375 specialize qw/vpx_convolve8_vert sse2 ssse3 avx2 neon dspr2 msa vsx/;
    376 
    377 add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
    378 specialize qw/vpx_convolve8_avg sse2 ssse3 avx2 neon dspr2 msa vsx/;
    379 
    380 add_proto qw/void vpx_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
    381 specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 avx2 neon dspr2 msa vsx/;
    382 
    383 add_proto qw/void vpx_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
    384 specialize qw/vpx_convolve8_avg_vert sse2 ssse3 avx2 neon dspr2 msa vsx/;
    385 
    386 add_proto qw/void vpx_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
    387 specialize qw/vpx_scaled_2d ssse3 neon msa/;
    388 
    389 add_proto qw/void vpx_scaled_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
    390 
    391 add_proto qw/void vpx_scaled_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
    392 
    393 add_proto qw/void vpx_scaled_avg_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
    394 
    395 add_proto qw/void vpx_scaled_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
    396 
    397 add_proto qw/void vpx_scaled_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
    398 
    399 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
    400   #
    401   # Sub Pixel Filters
    402   #
    403   add_proto qw/void vpx_highbd_convolve_copy/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
    404   specialize qw/vpx_highbd_convolve_copy sse2 avx2 neon/;
    405 
    406   add_proto qw/void vpx_highbd_convolve_avg/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
    407   specialize qw/vpx_highbd_convolve_avg sse2 avx2 neon/;
    408 
    409   add_proto qw/void vpx_highbd_convolve8/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
    410   specialize qw/vpx_highbd_convolve8 avx2 neon/, "$sse2_x86_64";
    411 
    412   add_proto qw/void vpx_highbd_convolve8_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
    413   specialize qw/vpx_highbd_convolve8_horiz avx2 neon/, "$sse2_x86_64";
    414 
    415   add_proto qw/void vpx_highbd_convolve8_vert/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
    416   specialize qw/vpx_highbd_convolve8_vert avx2 neon/, "$sse2_x86_64";
    417 
    418   add_proto qw/void vpx_highbd_convolve8_avg/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
    419   specialize qw/vpx_highbd_convolve8_avg avx2 neon/, "$sse2_x86_64";
    420 
    421   add_proto qw/void vpx_highbd_convolve8_avg_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
    422   specialize qw/vpx_highbd_convolve8_avg_horiz avx2 neon/, "$sse2_x86_64";
    423 
    424   add_proto qw/void vpx_highbd_convolve8_avg_vert/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
    425   specialize qw/vpx_highbd_convolve8_avg_vert avx2 neon/, "$sse2_x86_64";
    426 }  # CONFIG_VP9_HIGHBITDEPTH
    427 
    428 #
    429 # Loopfilter
    430 #
    431 add_proto qw/void vpx_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
    432 specialize qw/vpx_lpf_vertical_16 sse2 neon dspr2 msa/;
    433 
    434 add_proto qw/void vpx_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
    435 specialize qw/vpx_lpf_vertical_16_dual sse2 neon dspr2 msa/;
    436 
    437 add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
    438 specialize qw/vpx_lpf_vertical_8 sse2 neon dspr2 msa/;
    439 
    440 add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
    441 specialize qw/vpx_lpf_vertical_8_dual sse2 neon dspr2 msa/;
    442 
    443 add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
    444 specialize qw/vpx_lpf_vertical_4 sse2 neon dspr2 msa/;
    445 
    446 add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
    447 specialize qw/vpx_lpf_vertical_4_dual sse2 neon dspr2 msa/;
    448 
    449 add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
    450 specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon dspr2 msa/;
    451 
    452 add_proto qw/void vpx_lpf_horizontal_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
    453 specialize qw/vpx_lpf_horizontal_16_dual sse2 avx2 neon dspr2 msa/;
    454 
    455 add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
    456 specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/;
    457 
    458 add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
    459 specialize qw/vpx_lpf_horizontal_8_dual sse2 neon dspr2 msa/;
    460 
    461 add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
    462 specialize qw/vpx_lpf_horizontal_4 sse2 neon dspr2 msa/;
    463 
    464 add_proto qw/void vpx_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
    465 specialize qw/vpx_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
    466 
    467 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
    468   add_proto qw/void vpx_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
    469   specialize qw/vpx_highbd_lpf_vertical_16 sse2 neon/;
    470 
    471   add_proto qw/void vpx_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
    472   specialize qw/vpx_highbd_lpf_vertical_16_dual sse2 neon/;
    473 
    474   add_proto qw/void vpx_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
    475   specialize qw/vpx_highbd_lpf_vertical_8 sse2 neon/;
    476 
    477   add_proto qw/void vpx_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
    478   specialize qw/vpx_highbd_lpf_vertical_8_dual sse2 neon/;
    479 
    480   add_proto qw/void vpx_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
    481   specialize qw/vpx_highbd_lpf_vertical_4 sse2 neon/;
    482 
    483   add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
    484   specialize qw/vpx_highbd_lpf_vertical_4_dual sse2 neon/;
    485 
    486   add_proto qw/void vpx_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
    487   specialize qw/vpx_highbd_lpf_horizontal_16 sse2 neon/;
    488 
    489   add_proto qw/void vpx_highbd_lpf_horizontal_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
    490   specialize qw/vpx_highbd_lpf_horizontal_16_dual sse2 neon/;
    491 
    492   add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
    493   specialize qw/vpx_highbd_lpf_horizontal_8 sse2 neon/;
    494 
    495   add_proto qw/void vpx_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
    496   specialize qw/vpx_highbd_lpf_horizontal_8_dual sse2 neon/;
    497 
    498   add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
    499   specialize qw/vpx_highbd_lpf_horizontal_4 sse2 neon/;
    500 
    501   add_proto qw/void vpx_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
    502   specialize qw/vpx_highbd_lpf_horizontal_4_dual sse2 neon/;
    503 }  # CONFIG_VP9_HIGHBITDEPTH
    504 
    505 #
    506 # Encoder functions.
    507 #
    508 
    509 #
    510 # Forward transform
    511 #
    512 if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
    513 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
    514   add_proto qw/void vpx_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
    515   specialize qw/vpx_fdct4x4 neon sse2/;
    516 
    517   add_proto qw/void vpx_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
    518   specialize qw/vpx_fdct4x4_1 sse2 neon/;
    519 
    520   add_proto qw/void vpx_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
    521   specialize qw/vpx_fdct8x8 neon sse2/;
    522 
    523   add_proto qw/void vpx_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
    524   specialize qw/vpx_fdct8x8_1 neon sse2 msa/;
    525 
    526   add_proto qw/void vpx_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
    527   specialize qw/vpx_fdct16x16 neon sse2/;
    528 
    529   add_proto qw/void vpx_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
    530   specialize qw/vpx_fdct16x16_1 sse2 neon/;
    531 
    532   add_proto qw/void vpx_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
    533   specialize qw/vpx_fdct32x32 neon sse2/;
    534 
    535   add_proto qw/void vpx_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
    536   specialize qw/vpx_fdct32x32_rd neon sse2/;
    537 
    538   add_proto qw/void vpx_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
    539   specialize qw/vpx_fdct32x32_1 sse2 neon/;
    540 
    541   add_proto qw/void vpx_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
    542   specialize qw/vpx_highbd_fdct4x4 sse2/;
    543 
    544   add_proto qw/void vpx_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
    545   specialize qw/vpx_highbd_fdct8x8 sse2/;
    546 
    547   add_proto qw/void vpx_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
    548   specialize qw/vpx_highbd_fdct8x8_1 neon/;
    549   $vpx_highbd_fdct8x8_1_neon=vpx_fdct8x8_1_neon;
    550 
    551   add_proto qw/void vpx_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
    552   specialize qw/vpx_highbd_fdct16x16 sse2/;
    553 
    554   add_proto qw/void vpx_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
    555 
    556   add_proto qw/void vpx_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
    557   specialize qw/vpx_highbd_fdct32x32 sse2/;
    558 
    559   add_proto qw/void vpx_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
    560   specialize qw/vpx_highbd_fdct32x32_rd sse2/;
    561 
    562   add_proto qw/void vpx_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
    563 } else {
    564   add_proto qw/void vpx_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
    565   specialize qw/vpx_fdct4x4 neon sse2 msa/;
    566 
    567   add_proto qw/void vpx_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
    568   specialize qw/vpx_fdct4x4_1 sse2 neon/;
    569 
    570   add_proto qw/void vpx_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
    571   specialize qw/vpx_fdct8x8 sse2 neon msa/, "$ssse3_x86_64";
    572 
    573   add_proto qw/void vpx_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
    574   specialize qw/vpx_fdct8x8_1 sse2 neon msa/;
    575 
    576   add_proto qw/void vpx_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
    577   specialize qw/vpx_fdct16x16 neon sse2 msa/;
    578 
    579   add_proto qw/void vpx_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
    580   specialize qw/vpx_fdct16x16_1 sse2 neon msa/;
    581 
    582   add_proto qw/void vpx_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
    583   specialize qw/vpx_fdct32x32 neon sse2 avx2 msa/;
    584 
    585   add_proto qw/void vpx_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
    586   specialize qw/vpx_fdct32x32_rd sse2 avx2 neon msa/;
    587 
    588   add_proto qw/void vpx_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
    589   specialize qw/vpx_fdct32x32_1 sse2 neon msa/;
    590 }  # CONFIG_VP9_HIGHBITDEPTH
    591 }  # CONFIG_VP9_ENCODER
    592 
    593 #
    594 # Inverse transform
    595 if (vpx_config("CONFIG_VP9") eq "yes") {
    596 
    597 add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
    598 add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
    599 add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride";
    600 add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride";
    601 add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
    602 add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride";
    603 add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride";
    604 add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride";
    605 add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
    606 add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride";
    607 add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride";
    608 add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride";
    609 add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
    610 add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
    611 add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
    612 
    613 if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
    614   # Note that there are more specializations appended when
    615   # CONFIG_VP9_HIGHBITDEPTH is off.
    616   specialize qw/vpx_idct4x4_16_add neon sse2 vsx/;
    617   specialize qw/vpx_idct4x4_1_add neon sse2/;
    618   specialize qw/vpx_idct8x8_64_add neon sse2 vsx/;
    619   specialize qw/vpx_idct8x8_12_add neon sse2 ssse3/;
    620   specialize qw/vpx_idct8x8_1_add neon sse2/;
    621   specialize qw/vpx_idct16x16_256_add neon sse2 vsx/;
    622   specialize qw/vpx_idct16x16_38_add neon sse2/;
    623   specialize qw/vpx_idct16x16_10_add neon sse2/;
    624   specialize qw/vpx_idct16x16_1_add neon sse2/;
    625   specialize qw/vpx_idct32x32_1024_add neon sse2 vsx/;
    626   specialize qw/vpx_idct32x32_135_add neon sse2 ssse3/;
    627   specialize qw/vpx_idct32x32_34_add neon sse2 ssse3/;
    628   specialize qw/vpx_idct32x32_1_add neon sse2/;
    629 
    630   if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") {
    631     # Note that these specializations are appended to the above ones.
    632     specialize qw/vpx_idct4x4_16_add dspr2 msa/;
    633     specialize qw/vpx_idct4x4_1_add dspr2 msa/;
    634     specialize qw/vpx_idct8x8_64_add dspr2 msa/;
    635     specialize qw/vpx_idct8x8_12_add dspr2 msa/;
    636     specialize qw/vpx_idct8x8_1_add dspr2 msa/;
    637     specialize qw/vpx_idct16x16_256_add dspr2 msa/;
    638     specialize qw/vpx_idct16x16_38_add dspr2 msa/;
    639     $vpx_idct16x16_38_add_dspr2=vpx_idct16x16_256_add_dspr2;
    640     $vpx_idct16x16_38_add_msa=vpx_idct16x16_256_add_msa;
    641     specialize qw/vpx_idct16x16_10_add dspr2 msa/;
    642     specialize qw/vpx_idct16x16_1_add dspr2 msa/;
    643     specialize qw/vpx_idct32x32_1024_add dspr2 msa/;
    644     specialize qw/vpx_idct32x32_135_add dspr2 msa/;
    645     $vpx_idct32x32_135_add_dspr2=vpx_idct32x32_1024_add_dspr2;
    646     $vpx_idct32x32_135_add_msa=vpx_idct32x32_1024_add_msa;
    647     specialize qw/vpx_idct32x32_34_add dspr2 msa/;
    648     specialize qw/vpx_idct32x32_1_add dspr2 msa/;
    649     specialize qw/vpx_iwht4x4_16_add msa sse2/;
    650     specialize qw/vpx_iwht4x4_1_add msa/;
    651   } # !CONFIG_VP9_HIGHBITDEPTH
    652 }  # !CONFIG_EMULATE_HARDWARE
    653 
    654 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
    655   # Note as optimized versions of these functions are added we need to add a check to ensure
    656   # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
    657   specialize qw/vpx_iwht4x4_16_add sse2/;
    658 
    659   add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
    660   add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
    661   specialize qw/vpx_highbd_idct4x4_1_add neon sse2/;
    662 
    663   add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
    664   add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
    665   add_proto qw/void vpx_highbd_idct8x8_1_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
    666   specialize qw/vpx_highbd_idct8x8_1_add neon sse2/;
    667 
    668   add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
    669   add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
    670   add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
    671   add_proto qw/void vpx_highbd_idct16x16_1_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
    672   specialize qw/vpx_highbd_idct16x16_1_add neon sse2/;
    673 
    674   add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
    675   add_proto qw/void vpx_highbd_idct32x32_135_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
    676   add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
    677   add_proto qw/void vpx_highbd_idct32x32_1_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
    678   specialize qw/vpx_highbd_idct32x32_1_add neon sse2/;
    679 
    680   add_proto qw/void vpx_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
    681   add_proto qw/void vpx_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
    682 
    683   if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
    684     specialize qw/vpx_highbd_idct4x4_16_add neon sse2 sse4_1/;
    685     specialize qw/vpx_highbd_idct8x8_64_add neon sse2 sse4_1/;
    686     specialize qw/vpx_highbd_idct8x8_12_add neon sse2 sse4_1/;
    687     specialize qw/vpx_highbd_idct16x16_256_add neon sse2 sse4_1/;
    688     specialize qw/vpx_highbd_idct16x16_38_add neon sse2 sse4_1/;
    689     specialize qw/vpx_highbd_idct16x16_10_add neon sse2 sse4_1/;
    690     specialize qw/vpx_highbd_idct32x32_1024_add neon sse2 sse4_1/;
    691     specialize qw/vpx_highbd_idct32x32_135_add neon sse2 sse4_1/;
    692     specialize qw/vpx_highbd_idct32x32_34_add neon sse2 sse4_1/;
    693   }  # !CONFIG_EMULATE_HARDWARE
    694 }  # CONFIG_VP9_HIGHBITDEPTH
    695 }  # CONFIG_VP9
    696 
    697 #
    698 # Quantization
    699 #
    700 if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
    701   add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    702   specialize qw/vpx_quantize_b neon sse2 ssse3 avx/;
    703 
    704   add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    705   specialize qw/vpx_quantize_b_32x32 neon ssse3 avx/;
    706 
    707   if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
    708     add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    709     specialize qw/vpx_highbd_quantize_b sse2/;
    710 
    711     add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    712     specialize qw/vpx_highbd_quantize_b_32x32 sse2/;
    713   }  # CONFIG_VP9_HIGHBITDEPTH
    714 }  # CONFIG_VP9_ENCODER
    715 
    716 if (vpx_config("CONFIG_ENCODERS") eq "yes") {
    717 #
    718 # Block subtraction
    719 #
    720 add_proto qw/void vpx_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
    721 specialize qw/vpx_subtract_block neon msa mmi sse2/;
    722 
    723 #
    724 # Single block SAD
    725 #
    726 add_proto qw/unsigned int vpx_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    727 specialize qw/vpx_sad64x64 neon avx2 msa sse2 vsx mmi/;
    728 
    729 add_proto qw/unsigned int vpx_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    730 specialize qw/vpx_sad64x32 neon avx2 msa sse2 vsx mmi/;
    731 
    732 add_proto qw/unsigned int vpx_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    733 specialize qw/vpx_sad32x64 neon avx2 msa sse2 vsx mmi/;
    734 
    735 add_proto qw/unsigned int vpx_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    736 specialize qw/vpx_sad32x32 neon avx2 msa sse2 vsx mmi/;
    737 
    738 add_proto qw/unsigned int vpx_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    739 specialize qw/vpx_sad32x16 neon avx2 msa sse2 vsx mmi/;
    740 
    741 add_proto qw/unsigned int vpx_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    742 specialize qw/vpx_sad16x32 neon msa sse2 vsx mmi/;
    743 
    744 add_proto qw/unsigned int vpx_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    745 specialize qw/vpx_sad16x16 neon msa sse2 vsx mmi/;
    746 
    747 add_proto qw/unsigned int vpx_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    748 specialize qw/vpx_sad16x8 neon msa sse2 vsx mmi/;
    749 
    750 add_proto qw/unsigned int vpx_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    751 specialize qw/vpx_sad8x16 neon msa sse2 mmi/;
    752 
    753 add_proto qw/unsigned int vpx_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    754 specialize qw/vpx_sad8x8 neon msa sse2 mmi/;
    755 
    756 add_proto qw/unsigned int vpx_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    757 specialize qw/vpx_sad8x4 neon msa sse2 mmi/;
    758 
    759 add_proto qw/unsigned int vpx_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    760 specialize qw/vpx_sad4x8 neon msa sse2 mmi/;
    761 
    762 add_proto qw/unsigned int vpx_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    763 specialize qw/vpx_sad4x4 neon msa sse2 mmi/;
    764 
    765 #
    766 # Avg
    767 #
    768 if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
    769   add_proto qw/unsigned int vpx_avg_8x8/, "const uint8_t *, int p";
    770   specialize qw/vpx_avg_8x8 sse2 neon msa/;
    771 
    772   add_proto qw/unsigned int vpx_avg_4x4/, "const uint8_t *, int p";
    773   specialize qw/vpx_avg_4x4 sse2 neon msa/;
    774 
    775   add_proto qw/void vpx_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
    776   specialize qw/vpx_minmax_8x8 sse2 neon msa/;
    777 
    778   if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
    779     add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
    780     specialize qw/vpx_hadamard_8x8 sse2 neon vsx/, "$ssse3_x86_64";
    781 
    782     add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
    783     specialize qw/vpx_hadamard_16x16 avx2 sse2 neon vsx/;
    784 
    785     add_proto qw/int vpx_satd/, "const tran_low_t *coeff, int length";
    786     specialize qw/vpx_satd avx2 sse2 neon/;
    787   } else {
    788     add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
    789     specialize qw/vpx_hadamard_8x8 sse2 neon msa vsx/, "$ssse3_x86_64";
    790 
    791     add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
    792     specialize qw/vpx_hadamard_16x16 avx2 sse2 neon msa vsx/;
    793 
    794     add_proto qw/int vpx_satd/, "const int16_t *coeff, int length";
    795     specialize qw/vpx_satd avx2 sse2 neon msa/;
    796   }
    797 
    798   add_proto qw/void vpx_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, const int ref_stride, const int height";
    799   specialize qw/vpx_int_pro_row sse2 neon msa/;
    800 
    801   add_proto qw/int16_t vpx_int_pro_col/, "const uint8_t *ref, const int width";
    802   specialize qw/vpx_int_pro_col sse2 neon msa/;
    803 
    804   add_proto qw/int vpx_vector_var/, "const int16_t *ref, const int16_t *src, const int bwl";
    805   specialize qw/vpx_vector_var neon sse2 msa/;
    806 }  # CONFIG_VP9_ENCODER
    807 
    808 add_proto qw/unsigned int vpx_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
    809 specialize qw/vpx_sad64x64_avg neon avx2 msa sse2 vsx mmi/;
    810 
    811 add_proto qw/unsigned int vpx_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
    812 specialize qw/vpx_sad64x32_avg neon avx2 msa sse2 vsx mmi/;
    813 
    814 add_proto qw/unsigned int vpx_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
    815 specialize qw/vpx_sad32x64_avg neon avx2 msa sse2 vsx mmi/;
    816 
    817 add_proto qw/unsigned int vpx_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
    818 specialize qw/vpx_sad32x32_avg neon avx2 msa sse2 vsx mmi/;
    819 
    820 add_proto qw/unsigned int vpx_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
    821 specialize qw/vpx_sad32x16_avg neon avx2 msa sse2 vsx mmi/;
    822 
    823 add_proto qw/unsigned int vpx_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
    824 specialize qw/vpx_sad16x32_avg neon msa sse2 vsx mmi/;
    825 
    826 add_proto qw/unsigned int vpx_sad16x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
    827 specialize qw/vpx_sad16x16_avg neon msa sse2 vsx mmi/;
    828 
    829 add_proto qw/unsigned int vpx_sad16x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
    830 specialize qw/vpx_sad16x8_avg neon msa sse2 vsx mmi/;
    831 
    832 add_proto qw/unsigned int vpx_sad8x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
    833 specialize qw/vpx_sad8x16_avg neon msa sse2 mmi/;
    834 
    835 add_proto qw/unsigned int vpx_sad8x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
    836 specialize qw/vpx_sad8x8_avg neon msa sse2 mmi/;
    837 
    838 add_proto qw/unsigned int vpx_sad8x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
    839 specialize qw/vpx_sad8x4_avg neon msa sse2 mmi/;
    840 
    841 add_proto qw/unsigned int vpx_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
    842 specialize qw/vpx_sad4x8_avg neon msa sse2 mmi/;
    843 
    844 add_proto qw/unsigned int vpx_sad4x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
    845 specialize qw/vpx_sad4x4_avg neon msa sse2 mmi/;
    846 
    847 #
    848 # Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
    849 #
    850 # Blocks of 3
    851 add_proto qw/void vpx_sad16x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    852 specialize qw/vpx_sad16x16x3 sse3 ssse3 msa mmi/;
    853 
    854 add_proto qw/void vpx_sad16x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    855 specialize qw/vpx_sad16x8x3 sse3 ssse3 msa mmi/;
    856 
    857 add_proto qw/void vpx_sad8x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    858 specialize qw/vpx_sad8x16x3 sse3 msa mmi/;
    859 
    860 add_proto qw/void vpx_sad8x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    861 specialize qw/vpx_sad8x8x3 sse3 msa mmi/;
    862 
    863 add_proto qw/void vpx_sad4x4x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    864 specialize qw/vpx_sad4x4x3 sse3 msa mmi/;
    865 
    866 # Blocks of 8
    867 add_proto qw/void vpx_sad16x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    868 specialize qw/vpx_sad16x16x8 sse4_1 msa mmi/;
    869 
    870 add_proto qw/void vpx_sad16x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    871 specialize qw/vpx_sad16x8x8 sse4_1 msa mmi/;
    872 
    873 add_proto qw/void vpx_sad8x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    874 specialize qw/vpx_sad8x16x8 sse4_1 msa mmi/;
    875 
    876 add_proto qw/void vpx_sad8x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    877 specialize qw/vpx_sad8x8x8 sse4_1 msa mmi/;
    878 
    879 add_proto qw/void vpx_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    880 specialize qw/vpx_sad4x4x8 sse4_1 msa mmi/;
    881 
    882 #
    883 # Multi-block SAD, comparing a reference to N independent blocks
    884 #
    885 add_proto qw/void vpx_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
    886 specialize qw/vpx_sad64x64x4d avx512 avx2 neon msa sse2 vsx mmi/;
    887 
    888 add_proto qw/void vpx_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
    889 specialize qw/vpx_sad64x32x4d neon msa sse2 vsx mmi/;
    890 
    891 add_proto qw/void vpx_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
    892 specialize qw/vpx_sad32x64x4d neon msa sse2 vsx mmi/;
    893 
    894 add_proto qw/void vpx_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
    895 specialize qw/vpx_sad32x32x4d avx2 neon msa sse2 vsx mmi/;
    896 
    897 add_proto qw/void vpx_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
    898 specialize qw/vpx_sad32x16x4d neon msa sse2 vsx mmi/;
    899 
    900 add_proto qw/void vpx_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
    901 specialize qw/vpx_sad16x32x4d neon msa sse2 vsx mmi/;
    902 
    903 add_proto qw/void vpx_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
    904 specialize qw/vpx_sad16x16x4d neon msa sse2 vsx mmi/;
    905 
    906 add_proto qw/void vpx_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
    907 specialize qw/vpx_sad16x8x4d neon msa sse2 vsx mmi/;
    908 
    909 add_proto qw/void vpx_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
    910 specialize qw/vpx_sad8x16x4d neon msa sse2 mmi/;
    911 
    912 add_proto qw/void vpx_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
    913 specialize qw/vpx_sad8x8x4d neon msa sse2 mmi/;
    914 
    915 add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
    916 specialize qw/vpx_sad8x4x4d neon msa sse2 mmi/;
    917 
    918 add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
    919 specialize qw/vpx_sad4x8x4d neon msa sse2 mmi/;
    920 
    921 add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
    922 specialize qw/vpx_sad4x4x4d neon msa sse2 mmi/;
    923 
    924 add_proto qw/uint64_t vpx_sum_squares_2d_i16/, "const int16_t *src, int stride, int size";
    925 specialize qw/vpx_sum_squares_2d_i16 sse2 msa/;
    926 
    927 #
    928 # Structured Similarity (SSIM)
    929 #
    930 if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
    931     add_proto qw/void vpx_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
    932     specialize qw/vpx_ssim_parms_8x8/, "$sse2_x86_64";
    933 
    934     add_proto qw/void vpx_ssim_parms_16x16/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
    935     specialize qw/vpx_ssim_parms_16x16/, "$sse2_x86_64";
    936 }
    937 
    938 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
    939   #
    940   # Block subtraction
    941   #
    942   add_proto qw/void vpx_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
    943 
    944   #
    945   # Single block SAD
    946   #
    947   add_proto qw/unsigned int vpx_highbd_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    948   specialize qw/vpx_highbd_sad64x64 sse2/;
    949 
    950   add_proto qw/unsigned int vpx_highbd_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    951   specialize qw/vpx_highbd_sad64x32 sse2/;
    952 
    953   add_proto qw/unsigned int vpx_highbd_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    954   specialize qw/vpx_highbd_sad32x64 sse2/;
    955 
    956   add_proto qw/unsigned int vpx_highbd_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    957   specialize qw/vpx_highbd_sad32x32 sse2/;
    958 
    959   add_proto qw/unsigned int vpx_highbd_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    960   specialize qw/vpx_highbd_sad32x16 sse2/;
    961 
    962   add_proto qw/unsigned int vpx_highbd_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    963   specialize qw/vpx_highbd_sad16x32 sse2/;
    964 
    965   add_proto qw/unsigned int vpx_highbd_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    966   specialize qw/vpx_highbd_sad16x16 sse2/;
    967 
    968   add_proto qw/unsigned int vpx_highbd_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    969   specialize qw/vpx_highbd_sad16x8 sse2/;
    970 
    971   add_proto qw/unsigned int vpx_highbd_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    972   specialize qw/vpx_highbd_sad8x16 sse2/;
    973 
    974   add_proto qw/unsigned int vpx_highbd_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    975   specialize qw/vpx_highbd_sad8x8 sse2/;
    976 
    977   add_proto qw/unsigned int vpx_highbd_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    978   specialize qw/vpx_highbd_sad8x4 sse2/;
    979 
    980   add_proto qw/unsigned int vpx_highbd_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    981 
    982   add_proto qw/unsigned int vpx_highbd_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    983 
    984   #
    985   # Avg
    986   #
    987   add_proto qw/unsigned int vpx_highbd_avg_8x8/, "const uint8_t *, int p";
    988   add_proto qw/unsigned int vpx_highbd_avg_4x4/, "const uint8_t *, int p";
    989   add_proto qw/void vpx_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
    990 
    991   add_proto qw/unsigned int vpx_highbd_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
    992   specialize qw/vpx_highbd_sad64x64_avg sse2/;
    993 
    994   add_proto qw/unsigned int vpx_highbd_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
    995   specialize qw/vpx_highbd_sad64x32_avg sse2/;
    996 
    997   add_proto qw/unsigned int vpx_highbd_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
    998   specialize qw/vpx_highbd_sad32x64_avg sse2/;
    999 
   1000   add_proto qw/unsigned int vpx_highbd_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
   1001   specialize qw/vpx_highbd_sad32x32_avg sse2/;
   1002 
   1003   add_proto qw/unsigned int vpx_highbd_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
   1004   specialize qw/vpx_highbd_sad32x16_avg sse2/;
   1005 
   1006   add_proto qw/unsigned int vpx_highbd_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
   1007   specialize qw/vpx_highbd_sad16x32_avg sse2/;
   1008 
   1009   add_proto qw/unsigned int vpx_highbd_sad16x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
   1010   specialize qw/vpx_highbd_sad16x16_avg sse2/;
   1011 
   1012   add_proto qw/unsigned int vpx_highbd_sad16x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
   1013   specialize qw/vpx_highbd_sad16x8_avg sse2/;
   1014 
   1015   add_proto qw/unsigned int vpx_highbd_sad8x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
   1016   specialize qw/vpx_highbd_sad8x16_avg sse2/;
   1017 
   1018   add_proto qw/unsigned int vpx_highbd_sad8x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
   1019   specialize qw/vpx_highbd_sad8x8_avg sse2/;
   1020 
   1021   add_proto qw/unsigned int vpx_highbd_sad8x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
   1022   specialize qw/vpx_highbd_sad8x4_avg sse2/;
   1023 
   1024   add_proto qw/unsigned int vpx_highbd_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
   1025 
   1026   add_proto qw/unsigned int vpx_highbd_sad4x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
   1027 
   1028   #
   1029   # Multi-block SAD, comparing a reference to N independent blocks
   1030   #
   1031   add_proto qw/void vpx_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
   1032   specialize qw/vpx_highbd_sad64x64x4d sse2/;
   1033 
   1034   add_proto qw/void vpx_highbd_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
   1035   specialize qw/vpx_highbd_sad64x32x4d sse2/;
   1036 
   1037   add_proto qw/void vpx_highbd_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
   1038   specialize qw/vpx_highbd_sad32x64x4d sse2/;
   1039 
   1040   add_proto qw/void vpx_highbd_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
   1041   specialize qw/vpx_highbd_sad32x32x4d sse2/;
   1042 
   1043   add_proto qw/void vpx_highbd_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
   1044   specialize qw/vpx_highbd_sad32x16x4d sse2/;
   1045 
   1046   add_proto qw/void vpx_highbd_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
   1047   specialize qw/vpx_highbd_sad16x32x4d sse2/;
   1048 
   1049   add_proto qw/void vpx_highbd_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
   1050   specialize qw/vpx_highbd_sad16x16x4d sse2/;
   1051 
   1052   add_proto qw/void vpx_highbd_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
   1053   specialize qw/vpx_highbd_sad16x8x4d sse2/;
   1054 
   1055   add_proto qw/void vpx_highbd_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
   1056   specialize qw/vpx_highbd_sad8x16x4d sse2/;
   1057 
   1058   add_proto qw/void vpx_highbd_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
   1059   specialize qw/vpx_highbd_sad8x8x4d sse2/;
   1060 
   1061   add_proto qw/void vpx_highbd_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
   1062   specialize qw/vpx_highbd_sad8x4x4d sse2/;
   1063 
   1064   add_proto qw/void vpx_highbd_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
   1065   specialize qw/vpx_highbd_sad4x8x4d sse2/;
   1066 
   1067   add_proto qw/void vpx_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
   1068   specialize qw/vpx_highbd_sad4x4x4d sse2/;
   1069 
   1070   #
   1071   # Structured Similarity (SSIM)
   1072   #
   1073   if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
   1074     add_proto qw/void vpx_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
   1075   }
   1076 }  # CONFIG_VP9_HIGHBITDEPTH
   1077 }  # CONFIG_ENCODERS
   1078 
   1079 if (vpx_config("CONFIG_ENCODERS") eq "yes" || vpx_config("CONFIG_POSTPROC") eq "yes" || vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
   1080 
   1081 #
   1082 # Variance
   1083 #
   1084 add_proto qw/unsigned int vpx_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1085   specialize qw/vpx_variance64x64 sse2 avx2 neon msa mmi/;
   1086 
   1087 add_proto qw/unsigned int vpx_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1088   specialize qw/vpx_variance64x32 sse2 avx2 neon msa mmi/;
   1089 
   1090 add_proto qw/unsigned int vpx_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1091   specialize qw/vpx_variance32x64 sse2 neon msa mmi/;
   1092 
   1093 add_proto qw/unsigned int vpx_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1094   specialize qw/vpx_variance32x32 sse2 avx2 neon msa mmi/;
   1095 
   1096 add_proto qw/unsigned int vpx_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1097   specialize qw/vpx_variance32x16 sse2 avx2 neon msa mmi/;
   1098 
   1099 add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1100   specialize qw/vpx_variance16x32 sse2 neon msa mmi/;
   1101 
   1102 add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1103   specialize qw/vpx_variance16x16 sse2 avx2 neon msa mmi/;
   1104 
   1105 add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1106   specialize qw/vpx_variance16x8 sse2 neon msa mmi/;
   1107 
   1108 add_proto qw/unsigned int vpx_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1109   specialize qw/vpx_variance8x16 sse2 neon msa mmi/;
   1110 
   1111 add_proto qw/unsigned int vpx_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1112   specialize qw/vpx_variance8x8 sse2 neon msa mmi/;
   1113 
   1114 add_proto qw/unsigned int vpx_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1115   specialize qw/vpx_variance8x4 sse2 neon msa mmi/;
   1116 
   1117 add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1118   specialize qw/vpx_variance4x8 sse2 neon msa mmi/;
   1119 
   1120 add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1121   specialize qw/vpx_variance4x4 sse2 neon msa mmi/;
   1122 
   1123 #
   1124 # Specialty Variance
   1125 #
   1126 add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
   1127   specialize qw/vpx_get16x16var sse2 avx2 neon msa/;
   1128 
   1129 add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
   1130   specialize qw/vpx_get8x8var sse2 neon msa/;
   1131 
   1132 add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1133   specialize qw/vpx_mse16x16 sse2 avx2 neon msa mmi/;
   1134 
   1135 add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1136   specialize qw/vpx_mse16x8 sse2 msa mmi/;
   1137 
   1138 add_proto qw/unsigned int vpx_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1139   specialize qw/vpx_mse8x16 sse2 msa mmi/;
   1140 
   1141 add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1142   specialize qw/vpx_mse8x8 sse2 msa mmi/;
   1143 
   1144 add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *";
   1145   specialize qw/vpx_get_mb_ss sse2 msa vsx/;
   1146 
   1147 add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride";
   1148   specialize qw/vpx_get4x4sse_cs neon msa vsx/;
   1149 
   1150 add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
   1151   specialize qw/vpx_comp_avg_pred neon sse2 vsx/;
   1152 
   1153 #
   1154 # Subpixel Variance
   1155 #
   1156 add_proto qw/uint32_t vpx_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1157   specialize qw/vpx_sub_pixel_variance64x64 avx2 neon msa mmi sse2 ssse3/;
   1158 
   1159 add_proto qw/uint32_t vpx_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1160   specialize qw/vpx_sub_pixel_variance64x32 neon msa mmi sse2 ssse3/;
   1161 
   1162 add_proto qw/uint32_t vpx_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1163   specialize qw/vpx_sub_pixel_variance32x64 neon msa mmi sse2 ssse3/;
   1164 
   1165 add_proto qw/uint32_t vpx_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1166   specialize qw/vpx_sub_pixel_variance32x32 avx2 neon msa mmi sse2 ssse3/;
   1167 
   1168 add_proto qw/uint32_t vpx_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1169   specialize qw/vpx_sub_pixel_variance32x16 neon msa mmi sse2 ssse3/;
   1170 
   1171 add_proto qw/uint32_t vpx_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1172   specialize qw/vpx_sub_pixel_variance16x32 neon msa mmi sse2 ssse3/;
   1173 
   1174 add_proto qw/uint32_t vpx_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1175   specialize qw/vpx_sub_pixel_variance16x16 neon msa mmi sse2 ssse3/;
   1176 
   1177 add_proto qw/uint32_t vpx_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1178   specialize qw/vpx_sub_pixel_variance16x8 neon msa mmi sse2 ssse3/;
   1179 
   1180 add_proto qw/uint32_t vpx_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1181   specialize qw/vpx_sub_pixel_variance8x16 neon msa mmi sse2 ssse3/;
   1182 
   1183 add_proto qw/uint32_t vpx_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1184   specialize qw/vpx_sub_pixel_variance8x8 neon msa mmi sse2 ssse3/;
   1185 
   1186 add_proto qw/uint32_t vpx_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1187   specialize qw/vpx_sub_pixel_variance8x4 neon msa mmi sse2 ssse3/;
   1188 
   1189 add_proto qw/uint32_t vpx_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1190   specialize qw/vpx_sub_pixel_variance4x8 neon msa mmi sse2 ssse3/;
   1191 
   1192 add_proto qw/uint32_t vpx_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1193   specialize qw/vpx_sub_pixel_variance4x4 neon msa mmi sse2 ssse3/;
   1194 
   1195 add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1196   specialize qw/vpx_sub_pixel_avg_variance64x64 neon avx2 msa mmi sse2 ssse3/;
   1197 
   1198 add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1199   specialize qw/vpx_sub_pixel_avg_variance64x32 neon msa mmi sse2 ssse3/;
   1200 
   1201 add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1202   specialize qw/vpx_sub_pixel_avg_variance32x64 neon msa mmi sse2 ssse3/;
   1203 
   1204 add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1205   specialize qw/vpx_sub_pixel_avg_variance32x32 neon avx2 msa mmi sse2 ssse3/;
   1206 
   1207 add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1208   specialize qw/vpx_sub_pixel_avg_variance32x16 neon msa mmi sse2 ssse3/;
   1209 
   1210 add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1211   specialize qw/vpx_sub_pixel_avg_variance16x32 neon msa mmi sse2 ssse3/;
   1212 
   1213 add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1214   specialize qw/vpx_sub_pixel_avg_variance16x16 neon msa mmi sse2 ssse3/;
   1215 
   1216 add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1217   specialize qw/vpx_sub_pixel_avg_variance16x8 neon msa mmi sse2 ssse3/;
   1218 
   1219 add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1220   specialize qw/vpx_sub_pixel_avg_variance8x16 neon msa mmi sse2 ssse3/;
   1221 
   1222 add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1223   specialize qw/vpx_sub_pixel_avg_variance8x8 neon msa mmi sse2 ssse3/;
   1224 
   1225 add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1226   specialize qw/vpx_sub_pixel_avg_variance8x4 neon msa mmi sse2 ssse3/;
   1227 
   1228 add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1229   specialize qw/vpx_sub_pixel_avg_variance4x8 neon msa mmi sse2 ssse3/;
   1230 
   1231 add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1232   specialize qw/vpx_sub_pixel_avg_variance4x4 neon msa mmi sse2 ssse3/;
   1233 
   1234 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   1235   add_proto qw/unsigned int vpx_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1236   specialize qw/vpx_highbd_12_variance64x64 sse2/;
   1237 
   1238   add_proto qw/unsigned int vpx_highbd_12_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1239   specialize qw/vpx_highbd_12_variance64x32 sse2/;
   1240 
   1241   add_proto qw/unsigned int vpx_highbd_12_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1242   specialize qw/vpx_highbd_12_variance32x64 sse2/;
   1243 
   1244   add_proto qw/unsigned int vpx_highbd_12_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1245   specialize qw/vpx_highbd_12_variance32x32 sse2/;
   1246 
   1247   add_proto qw/unsigned int vpx_highbd_12_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1248   specialize qw/vpx_highbd_12_variance32x16 sse2/;
   1249 
   1250   add_proto qw/unsigned int vpx_highbd_12_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1251   specialize qw/vpx_highbd_12_variance16x32 sse2/;
   1252 
   1253   add_proto qw/unsigned int vpx_highbd_12_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1254   specialize qw/vpx_highbd_12_variance16x16 sse2/;
   1255 
   1256   add_proto qw/unsigned int vpx_highbd_12_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1257   specialize qw/vpx_highbd_12_variance16x8 sse2/;
   1258 
   1259   add_proto qw/unsigned int vpx_highbd_12_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1260   specialize qw/vpx_highbd_12_variance8x16 sse2/;
   1261 
   1262   add_proto qw/unsigned int vpx_highbd_12_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1263   specialize qw/vpx_highbd_12_variance8x8 sse2/;
   1264 
   1265   add_proto qw/unsigned int vpx_highbd_12_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1266   add_proto qw/unsigned int vpx_highbd_12_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1267   add_proto qw/unsigned int vpx_highbd_12_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1268 
   1269   add_proto qw/unsigned int vpx_highbd_10_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1270   specialize qw/vpx_highbd_10_variance64x64 sse2/;
   1271 
   1272   add_proto qw/unsigned int vpx_highbd_10_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1273   specialize qw/vpx_highbd_10_variance64x32 sse2/;
   1274 
   1275   add_proto qw/unsigned int vpx_highbd_10_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1276   specialize qw/vpx_highbd_10_variance32x64 sse2/;
   1277 
   1278   add_proto qw/unsigned int vpx_highbd_10_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1279   specialize qw/vpx_highbd_10_variance32x32 sse2/;
   1280 
   1281   add_proto qw/unsigned int vpx_highbd_10_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1282   specialize qw/vpx_highbd_10_variance32x16 sse2/;
   1283 
   1284   add_proto qw/unsigned int vpx_highbd_10_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1285   specialize qw/vpx_highbd_10_variance16x32 sse2/;
   1286 
   1287   add_proto qw/unsigned int vpx_highbd_10_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1288   specialize qw/vpx_highbd_10_variance16x16 sse2/;
   1289 
   1290   add_proto qw/unsigned int vpx_highbd_10_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1291   specialize qw/vpx_highbd_10_variance16x8 sse2/;
   1292 
   1293   add_proto qw/unsigned int vpx_highbd_10_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1294   specialize qw/vpx_highbd_10_variance8x16 sse2/;
   1295 
   1296   add_proto qw/unsigned int vpx_highbd_10_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1297   specialize qw/vpx_highbd_10_variance8x8 sse2/;
   1298 
   1299   add_proto qw/unsigned int vpx_highbd_10_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1300   add_proto qw/unsigned int vpx_highbd_10_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1301   add_proto qw/unsigned int vpx_highbd_10_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1302 
   1303   add_proto qw/unsigned int vpx_highbd_8_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1304   specialize qw/vpx_highbd_8_variance64x64 sse2/;
   1305 
   1306   add_proto qw/unsigned int vpx_highbd_8_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1307   specialize qw/vpx_highbd_8_variance64x32 sse2/;
   1308 
   1309   add_proto qw/unsigned int vpx_highbd_8_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1310   specialize qw/vpx_highbd_8_variance32x64 sse2/;
   1311 
   1312   add_proto qw/unsigned int vpx_highbd_8_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1313   specialize qw/vpx_highbd_8_variance32x32 sse2/;
   1314 
   1315   add_proto qw/unsigned int vpx_highbd_8_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1316   specialize qw/vpx_highbd_8_variance32x16 sse2/;
   1317 
   1318   add_proto qw/unsigned int vpx_highbd_8_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1319   specialize qw/vpx_highbd_8_variance16x32 sse2/;
   1320 
   1321   add_proto qw/unsigned int vpx_highbd_8_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1322   specialize qw/vpx_highbd_8_variance16x16 sse2/;
   1323 
   1324   add_proto qw/unsigned int vpx_highbd_8_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1325   specialize qw/vpx_highbd_8_variance16x8 sse2/;
   1326 
   1327   add_proto qw/unsigned int vpx_highbd_8_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1328   specialize qw/vpx_highbd_8_variance8x16 sse2/;
   1329 
   1330   add_proto qw/unsigned int vpx_highbd_8_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1331   specialize qw/vpx_highbd_8_variance8x8 sse2/;
   1332 
   1333   add_proto qw/unsigned int vpx_highbd_8_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1334   add_proto qw/unsigned int vpx_highbd_8_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1335   add_proto qw/unsigned int vpx_highbd_8_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1336 
   1337   add_proto qw/void vpx_highbd_8_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
   1338   add_proto qw/void vpx_highbd_8_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
   1339 
   1340   add_proto qw/void vpx_highbd_10_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
   1341   add_proto qw/void vpx_highbd_10_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
   1342 
   1343   add_proto qw/void vpx_highbd_12_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
   1344   add_proto qw/void vpx_highbd_12_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
   1345 
   1346   add_proto qw/unsigned int vpx_highbd_8_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1347   specialize qw/vpx_highbd_8_mse16x16 sse2/;
   1348 
   1349   add_proto qw/unsigned int vpx_highbd_8_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1350   add_proto qw/unsigned int vpx_highbd_8_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1351   add_proto qw/unsigned int vpx_highbd_8_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1352   specialize qw/vpx_highbd_8_mse8x8 sse2/;
   1353 
   1354   add_proto qw/unsigned int vpx_highbd_10_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1355   specialize qw/vpx_highbd_10_mse16x16 sse2/;
   1356 
   1357   add_proto qw/unsigned int vpx_highbd_10_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1358   add_proto qw/unsigned int vpx_highbd_10_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1359   add_proto qw/unsigned int vpx_highbd_10_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1360   specialize qw/vpx_highbd_10_mse8x8 sse2/;
   1361 
   1362   add_proto qw/unsigned int vpx_highbd_12_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1363   specialize qw/vpx_highbd_12_mse16x16 sse2/;
   1364 
   1365   add_proto qw/unsigned int vpx_highbd_12_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1366   add_proto qw/unsigned int vpx_highbd_12_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1367   add_proto qw/unsigned int vpx_highbd_12_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1368   specialize qw/vpx_highbd_12_mse8x8 sse2/;
   1369 
   1370   add_proto qw/void vpx_highbd_comp_avg_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride";
   1371 
   1372   #
   1373   # Subpixel Variance
   1374   #
   1375   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1376   specialize qw/vpx_highbd_12_sub_pixel_variance64x64 sse2/;
   1377 
   1378   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1379   specialize qw/vpx_highbd_12_sub_pixel_variance64x32 sse2/;
   1380 
   1381   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1382   specialize qw/vpx_highbd_12_sub_pixel_variance32x64 sse2/;
   1383 
   1384   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1385   specialize qw/vpx_highbd_12_sub_pixel_variance32x32 sse2/;
   1386 
   1387   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1388   specialize qw/vpx_highbd_12_sub_pixel_variance32x16 sse2/;
   1389 
   1390   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1391   specialize qw/vpx_highbd_12_sub_pixel_variance16x32 sse2/;
   1392 
   1393   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1394   specialize qw/vpx_highbd_12_sub_pixel_variance16x16 sse2/;
   1395 
   1396   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1397   specialize qw/vpx_highbd_12_sub_pixel_variance16x8 sse2/;
   1398 
   1399   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1400   specialize qw/vpx_highbd_12_sub_pixel_variance8x16 sse2/;
   1401 
   1402   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1403   specialize qw/vpx_highbd_12_sub_pixel_variance8x8 sse2/;
   1404 
   1405   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1406   specialize qw/vpx_highbd_12_sub_pixel_variance8x4 sse2/;
   1407 
   1408   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1409   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1410 
   1411   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1412   specialize qw/vpx_highbd_10_sub_pixel_variance64x64 sse2/;
   1413 
   1414   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1415   specialize qw/vpx_highbd_10_sub_pixel_variance64x32 sse2/;
   1416 
   1417   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1418   specialize qw/vpx_highbd_10_sub_pixel_variance32x64 sse2/;
   1419 
   1420   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1421   specialize qw/vpx_highbd_10_sub_pixel_variance32x32 sse2/;
   1422 
   1423   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1424   specialize qw/vpx_highbd_10_sub_pixel_variance32x16 sse2/;
   1425 
   1426   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1427   specialize qw/vpx_highbd_10_sub_pixel_variance16x32 sse2/;
   1428 
   1429   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1430   specialize qw/vpx_highbd_10_sub_pixel_variance16x16 sse2/;
   1431 
   1432   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1433   specialize qw/vpx_highbd_10_sub_pixel_variance16x8 sse2/;
   1434 
   1435   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1436   specialize qw/vpx_highbd_10_sub_pixel_variance8x16 sse2/;
   1437 
   1438   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1439   specialize qw/vpx_highbd_10_sub_pixel_variance8x8 sse2/;
   1440 
   1441   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1442   specialize qw/vpx_highbd_10_sub_pixel_variance8x4 sse2/;
   1443 
   1444   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1445   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1446 
   1447   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1448   specialize qw/vpx_highbd_8_sub_pixel_variance64x64 sse2/;
   1449 
   1450   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1451   specialize qw/vpx_highbd_8_sub_pixel_variance64x32 sse2/;
   1452 
   1453   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1454   specialize qw/vpx_highbd_8_sub_pixel_variance32x64 sse2/;
   1455 
   1456   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1457   specialize qw/vpx_highbd_8_sub_pixel_variance32x32 sse2/;
   1458 
   1459   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1460   specialize qw/vpx_highbd_8_sub_pixel_variance32x16 sse2/;
   1461 
   1462   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1463   specialize qw/vpx_highbd_8_sub_pixel_variance16x32 sse2/;
   1464 
   1465   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1466   specialize qw/vpx_highbd_8_sub_pixel_variance16x16 sse2/;
   1467 
   1468   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1469   specialize qw/vpx_highbd_8_sub_pixel_variance16x8 sse2/;
   1470 
   1471   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1472   specialize qw/vpx_highbd_8_sub_pixel_variance8x16 sse2/;
   1473 
   1474   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1475   specialize qw/vpx_highbd_8_sub_pixel_variance8x8 sse2/;
   1476 
   1477   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1478   specialize qw/vpx_highbd_8_sub_pixel_variance8x4 sse2/;
   1479 
   1480   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1481   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1482 
   1483   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1484   specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x64 sse2/;
   1485 
   1486   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1487   specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x32 sse2/;
   1488 
   1489   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1490   specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x64 sse2/;
   1491 
   1492   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1493   specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x32 sse2/;
   1494 
   1495   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1496   specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x16 sse2/;
   1497 
   1498   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1499   specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x32 sse2/;
   1500 
   1501   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1502   specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x16 sse2/;
   1503 
   1504   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1505   specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x8 sse2/;
   1506 
   1507   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1508   specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x16 sse2/;
   1509 
   1510   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1511   specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x8 sse2/;
   1512 
   1513   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1514   specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x4 sse2/;
   1515 
   1516   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1517   add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1518 
   1519   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1520   specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x64 sse2/;
   1521 
   1522   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1523   specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x32 sse2/;
   1524 
   1525   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1526   specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x64 sse2/;
   1527 
   1528   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1529   specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x32 sse2/;
   1530 
   1531   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1532   specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x16 sse2/;
   1533 
   1534   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1535   specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x32 sse2/;
   1536 
   1537   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1538   specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x16 sse2/;
   1539 
   1540   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1541   specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x8 sse2/;
   1542 
   1543   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1544   specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x16 sse2/;
   1545 
   1546   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1547   specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x8 sse2/;
   1548 
   1549   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1550   specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x4 sse2/;
   1551 
   1552   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1553   add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1554 
   1555   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1556   specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x64 sse2/;
   1557 
   1558   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1559   specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x32 sse2/;
   1560 
   1561   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1562   specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x64 sse2/;
   1563 
   1564   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1565   specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x32 sse2/;
   1566 
   1567   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1568   specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x16 sse2/;
   1569 
   1570   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1571   specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x32 sse2/;
   1572 
   1573   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1574   specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x16 sse2/;
   1575 
   1576   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1577   specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x8 sse2/;
   1578 
   1579   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1580   specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x16 sse2/;
   1581 
   1582   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1583   specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x8 sse2/;
   1584 
   1585   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1586   specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x4 sse2/;
   1587 
   1588   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1589   add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1590 
   1591 }  # CONFIG_VP9_HIGHBITDEPTH
   1592 
   1593 #
   1594 # Post Processing
   1595 #
   1596 if (vpx_config("CONFIG_POSTPROC") eq "yes" || vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
   1597     add_proto qw/void vpx_plane_add_noise/, "uint8_t *start, const int8_t *noise, int blackclamp, int whiteclamp, int width, int height, int pitch";
   1598     specialize qw/vpx_plane_add_noise sse2 msa/;
   1599 
   1600     add_proto qw/void vpx_mbpost_proc_down/, "unsigned char *dst, int pitch, int rows, int cols,int flimit";
   1601     specialize qw/vpx_mbpost_proc_down sse2 neon msa/;
   1602 
   1603     add_proto qw/void vpx_mbpost_proc_across_ip/, "unsigned char *dst, int pitch, int rows, int cols,int flimit";
   1604     specialize qw/vpx_mbpost_proc_across_ip sse2 neon msa/;
   1605 
   1606     add_proto qw/void vpx_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size";
   1607     specialize qw/vpx_post_proc_down_and_across_mb_row sse2 neon msa/;
   1608 
   1609 }
   1610 
   1611 }  # CONFIG_ENCODERS || CONFIG_POSTPROC || CONFIG_VP9_POSTPROC
   1612 
   1613 1;
   1614