Home | History | Annotate | Download | only in common
      1 sub vp8_common_forward_decls() {
      2 print <<EOF
      3 /*
      4  * VP8
      5  */
      6 
      7 struct blockd;
      8 struct macroblockd;
      9 struct loop_filter_info;
     10 
     11 /* Encoder forward decls */
     12 struct block;
     13 struct macroblock;
     14 struct variance_vtable;
     15 union int_mv;
     16 struct yv12_buffer_config;
     17 EOF
     18 }
     19 forward_decls qw/vp8_common_forward_decls/;
     20 
     21 #
     22 # system state
     23 #
     24 add_proto qw/void vp8_clear_system_state/, "";
     25 specialize qw/vp8_clear_system_state mmx/;
     26 $vp8_clear_system_state_mmx=vpx_reset_mmx_state;
     27 
     28 #
     29 # Dequant
     30 #
     31 add_proto qw/void vp8_dequantize_b/, "struct blockd*, short *dqc";
     32 specialize qw/vp8_dequantize_b mmx media neon/;
     33 $vp8_dequantize_b_media=vp8_dequantize_b_v6;
     34 
     35 add_proto qw/void vp8_dequant_idct_add/, "short *input, short *dq, unsigned char *output, int stride";
     36 specialize qw/vp8_dequant_idct_add mmx media neon dspr2/;
     37 $vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6;
     38 $vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2;
     39 
     40 add_proto qw/void vp8_dequant_idct_add_y_block/, "short *q, short *dq, unsigned char *dst, int stride, char *eobs";
     41 specialize qw/vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2/;
     42 $vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6;
     43 $vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
     44 
     45 add_proto qw/void vp8_dequant_idct_add_uv_block/, "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs";
     46 specialize qw/vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2/;
     47 $vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6;
     48 $vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
     49 
     50 #
     51 # Loopfilter
     52 #
     53 add_proto qw/void vp8_loop_filter_mbv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
     54 specialize qw/vp8_loop_filter_mbv mmx sse2 media neon dspr2/;
     55 $vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6;
     56 $vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2;
     57 
     58 add_proto qw/void vp8_loop_filter_bv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
     59 specialize qw/vp8_loop_filter_bv mmx sse2 media neon dspr2/;
     60 $vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6;
     61 $vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2;
     62 
     63 add_proto qw/void vp8_loop_filter_mbh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
     64 specialize qw/vp8_loop_filter_mbh mmx sse2 media neon dspr2/;
     65 $vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6;
     66 $vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2;
     67 
     68 add_proto qw/void vp8_loop_filter_bh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
     69 specialize qw/vp8_loop_filter_bh mmx sse2 media neon dspr2/;
     70 $vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6;
     71 $vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2;
     72 
     73 
     74 add_proto qw/void vp8_loop_filter_simple_mbv/, "unsigned char *y, int ystride, const unsigned char *blimit";
     75 specialize qw/vp8_loop_filter_simple_mbv mmx sse2 media neon/;
     76 $vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c;
     77 $vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx;
     78 $vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2;
     79 $vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6;
     80 $vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon;
     81 
     82 add_proto qw/void vp8_loop_filter_simple_mbh/, "unsigned char *y, int ystride, const unsigned char *blimit";
     83 specialize qw/vp8_loop_filter_simple_mbh mmx sse2 media neon/;
     84 $vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c;
     85 $vp8_loop_filter_simple_mbh_mmx=vp8_loop_filter_simple_horizontal_edge_mmx;
     86 $vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2;
     87 $vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6;
     88 $vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon;
     89 
     90 add_proto qw/void vp8_loop_filter_simple_bv/, "unsigned char *y, int ystride, const unsigned char *blimit";
     91 specialize qw/vp8_loop_filter_simple_bv mmx sse2 media neon/;
     92 $vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c;
     93 $vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx;
     94 $vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2;
     95 $vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6;
     96 $vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon;
     97 
     98 add_proto qw/void vp8_loop_filter_simple_bh/, "unsigned char *y, int ystride, const unsigned char *blimit";
     99 specialize qw/vp8_loop_filter_simple_bh mmx sse2 media neon/;
    100 $vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c;
    101 $vp8_loop_filter_simple_bh_mmx=vp8_loop_filter_bhs_mmx;
    102 $vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2;
    103 $vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6;
    104 $vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon;
    105 
    106 #
    107 # IDCT
    108 #
    109 #idct16
    110 add_proto qw/void vp8_short_idct4x4llm/, "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride";
    111 specialize qw/vp8_short_idct4x4llm mmx media neon dspr2/;
    112 $vp8_short_idct4x4llm_media=vp8_short_idct4x4llm_v6_dual;
    113 $vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2;
    114 
    115 #iwalsh1
    116 add_proto qw/void vp8_short_inv_walsh4x4_1/, "short *input, short *output";
    117 specialize qw/vp8_short_inv_walsh4x4_1 dspr2/;
    118 $vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2;
    119 # no asm yet
    120 
    121 #iwalsh16
    122 add_proto qw/void vp8_short_inv_walsh4x4/, "short *input, short *output";
    123 specialize qw/vp8_short_inv_walsh4x4 mmx sse2 media neon dspr2/;
    124 $vp8_short_inv_walsh4x4_media=vp8_short_inv_walsh4x4_v6;
    125 $vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2;
    126 
    127 #idct1_scalar_add
    128 add_proto qw/void vp8_dc_only_idct_add/, "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride";
    129 specialize qw/vp8_dc_only_idct_add	mmx media neon dspr2/;
    130 $vp8_dc_only_idct_add_media=vp8_dc_only_idct_add_v6;
    131 $vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2;
    132 
    133 #
    134 # RECON
    135 #
    136 add_proto qw/void vp8_copy_mem16x16/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
    137 specialize qw/vp8_copy_mem16x16 mmx sse2 media neon dspr2/;
    138 $vp8_copy_mem16x16_media=vp8_copy_mem16x16_v6;
    139 $vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2;
    140 
    141 add_proto qw/void vp8_copy_mem8x8/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
    142 specialize qw/vp8_copy_mem8x8 mmx media neon dspr2/;
    143 $vp8_copy_mem8x8_media=vp8_copy_mem8x8_v6;
    144 $vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2;
    145 
    146 add_proto qw/void vp8_copy_mem8x4/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
    147 specialize qw/vp8_copy_mem8x4 mmx media neon dspr2/;
    148 $vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6;
    149 $vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2;
    150 
    151 add_proto qw/void vp8_build_intra_predictors_mby_s/, "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride";
    152 specialize qw/vp8_build_intra_predictors_mby_s sse2 ssse3/;
    153 #TODO: fix assembly for neon
    154 
    155 add_proto qw/void vp8_build_intra_predictors_mbuv_s/, "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row,  unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride";
    156 specialize qw/vp8_build_intra_predictors_mbuv_s sse2 ssse3/;
    157 
    158 add_proto qw/void vp8_intra4x4_predict/, "unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left";
    159 specialize qw/vp8_intra4x4_predict media/;
    160 $vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6;
    161 
    162 #
    163 # Postproc
    164 #
    165 if (vpx_config("CONFIG_POSTPROC") eq "yes") {
    166     add_proto qw/void vp8_mbpost_proc_down/, "unsigned char *dst, int pitch, int rows, int cols,int flimit";
    167     specialize qw/vp8_mbpost_proc_down mmx sse2/;
    168     $vp8_mbpost_proc_down_sse2=vp8_mbpost_proc_down_xmm;
    169 
    170     add_proto qw/void vp8_mbpost_proc_across_ip/, "unsigned char *dst, int pitch, int rows, int cols,int flimit";
    171     specialize qw/vp8_mbpost_proc_across_ip sse2/;
    172     $vp8_mbpost_proc_across_ip_sse2=vp8_mbpost_proc_across_ip_xmm;
    173 
    174     add_proto qw/void vp8_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size";
    175     specialize qw/vp8_post_proc_down_and_across_mb_row sse2/;
    176 
    177     add_proto qw/void vp8_plane_add_noise/, "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch";
    178     specialize qw/vp8_plane_add_noise mmx sse2/;
    179     $vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt;
    180 
    181     add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
    182     # no asm yet
    183 
    184     add_proto qw/void vp8_blend_mb_outer/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
    185     # no asm yet
    186 
    187     add_proto qw/void vp8_blend_b/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
    188     # no asm yet
    189 
    190     add_proto qw/void vp8_filter_by_weight16x16/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
    191     specialize qw/vp8_filter_by_weight16x16 sse2/;
    192 
    193     add_proto qw/void vp8_filter_by_weight8x8/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
    194     specialize qw/vp8_filter_by_weight8x8 sse2/;
    195 
    196     add_proto qw/void vp8_filter_by_weight4x4/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
    197     # no asm yet
    198 }
    199 
    200 #
    201 # Subpixel
    202 #
    203 add_proto qw/void vp8_sixtap_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
    204 specialize qw/vp8_sixtap_predict16x16 mmx sse2 ssse3 media neon dspr2/;
    205 $vp8_sixtap_predict16x16_media=vp8_sixtap_predict16x16_armv6;
    206 $vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2;
    207 
    208 add_proto qw/void vp8_sixtap_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
    209 specialize qw/vp8_sixtap_predict8x8 mmx sse2 ssse3 media neon dspr2/;
    210 $vp8_sixtap_predict8x8_media=vp8_sixtap_predict8x8_armv6;
    211 $vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2;
    212 
    213 add_proto qw/void vp8_sixtap_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
    214 specialize qw/vp8_sixtap_predict8x4 mmx sse2 ssse3 media neon dspr2/;
    215 $vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6;
    216 $vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2;
    217 
    218 add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
    219 specialize qw/vp8_sixtap_predict4x4 mmx ssse3 media neon dspr2/;
    220 $vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6;
    221 $vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2;
    222 
    223 add_proto qw/void vp8_bilinear_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
    224 specialize qw/vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon/;
    225 $vp8_bilinear_predict16x16_media=vp8_bilinear_predict16x16_armv6;
    226 
    227 add_proto qw/void vp8_bilinear_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
    228 specialize qw/vp8_bilinear_predict8x8 mmx sse2 ssse3 media neon/;
    229 $vp8_bilinear_predict8x8_media=vp8_bilinear_predict8x8_armv6;
    230 
    231 add_proto qw/void vp8_bilinear_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
    232 specialize qw/vp8_bilinear_predict8x4 mmx media neon/;
    233 $vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6;
    234 
    235 add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
    236 specialize qw/vp8_bilinear_predict4x4 mmx media neon/;
    237 $vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6;
    238 
    239 #
    240 # Whole-pixel Variance
    241 #
    242 add_proto qw/unsigned int vp8_variance4x4/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
    243 specialize qw/vp8_variance4x4 mmx sse2/;
    244 $vp8_variance4x4_sse2=vp8_variance4x4_wmt;
    245 
    246 add_proto qw/unsigned int vp8_variance8x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
    247 specialize qw/vp8_variance8x8 mmx sse2 media neon/;
    248 $vp8_variance8x8_sse2=vp8_variance8x8_wmt;
    249 $vp8_variance8x8_media=vp8_variance8x8_armv6;
    250 
    251 add_proto qw/unsigned int vp8_variance8x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
    252 specialize qw/vp8_variance8x16 mmx sse2 neon/;
    253 $vp8_variance8x16_sse2=vp8_variance8x16_wmt;
    254 
    255 add_proto qw/unsigned int vp8_variance16x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
    256 specialize qw/vp8_variance16x8 mmx sse2 neon/;
    257 $vp8_variance16x8_sse2=vp8_variance16x8_wmt;
    258 
    259 add_proto qw/unsigned int vp8_variance16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
    260 specialize qw/vp8_variance16x16 mmx sse2 media neon/;
    261 $vp8_variance16x16_sse2=vp8_variance16x16_wmt;
    262 $vp8_variance16x16_media=vp8_variance16x16_armv6;
    263 
    264 #
    265 # Sub-pixel Variance
    266 #
    267 add_proto qw/unsigned int vp8_sub_pixel_variance4x4/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
    268 specialize qw/vp8_sub_pixel_variance4x4 mmx sse2/;
    269 $vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt;
    270 
    271 add_proto qw/unsigned int vp8_sub_pixel_variance8x8/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
    272 specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media neon/;
    273 $vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt;
    274 $vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6;
    275 
    276 add_proto qw/unsigned int vp8_sub_pixel_variance8x16/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
    277 specialize qw/vp8_sub_pixel_variance8x16 mmx sse2/;
    278 $vp8_sub_pixel_variance8x16_sse2=vp8_sub_pixel_variance8x16_wmt;
    279 
    280 add_proto qw/unsigned int vp8_sub_pixel_variance16x8/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
    281 specialize qw/vp8_sub_pixel_variance16x8 mmx sse2 ssse3/;
    282 $vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_wmt;
    283 
    284 add_proto qw/unsigned int vp8_sub_pixel_variance16x16/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
    285 specialize qw/vp8_sub_pixel_variance16x16 mmx sse2 ssse3 media neon/;
    286 $vp8_sub_pixel_variance16x16_sse2=vp8_sub_pixel_variance16x16_wmt;
    287 $vp8_sub_pixel_variance16x16_media=vp8_sub_pixel_variance16x16_armv6;
    288 
    289 add_proto qw/unsigned int vp8_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
    290 specialize qw/vp8_variance_halfpixvar16x16_h mmx sse2 media neon/;
    291 $vp8_variance_halfpixvar16x16_h_sse2=vp8_variance_halfpixvar16x16_h_wmt;
    292 $vp8_variance_halfpixvar16x16_h_media=vp8_variance_halfpixvar16x16_h_armv6;
    293 
    294 add_proto qw/unsigned int vp8_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
    295 specialize qw/vp8_variance_halfpixvar16x16_v mmx sse2 media neon/;
    296 $vp8_variance_halfpixvar16x16_v_sse2=vp8_variance_halfpixvar16x16_v_wmt;
    297 $vp8_variance_halfpixvar16x16_v_media=vp8_variance_halfpixvar16x16_v_armv6;
    298 
    299 add_proto qw/unsigned int vp8_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
    300 specialize qw/vp8_variance_halfpixvar16x16_hv mmx sse2 media neon/;
    301 $vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt;
    302 $vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6;
    303 
    304 #
    305 # Single block SAD
    306 #
    307 add_proto qw/unsigned int vp8_sad4x4/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
    308 specialize qw/vp8_sad4x4 mmx sse2 neon/;
    309 $vp8_sad4x4_sse2=vp8_sad4x4_wmt;
    310 
    311 add_proto qw/unsigned int vp8_sad8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
    312 specialize qw/vp8_sad8x8 mmx sse2 neon/;
    313 $vp8_sad8x8_sse2=vp8_sad8x8_wmt;
    314 
    315 add_proto qw/unsigned int vp8_sad8x16/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
    316 specialize qw/vp8_sad8x16 mmx sse2 neon/;
    317 $vp8_sad8x16_sse2=vp8_sad8x16_wmt;
    318 
    319 add_proto qw/unsigned int vp8_sad16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
    320 specialize qw/vp8_sad16x8 mmx sse2 neon/;
    321 $vp8_sad16x8_sse2=vp8_sad16x8_wmt;
    322 
    323 add_proto qw/unsigned int vp8_sad16x16/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
    324 specialize qw/vp8_sad16x16 mmx sse2 sse3 media neon/;
    325 $vp8_sad16x16_sse2=vp8_sad16x16_wmt;
    326 $vp8_sad16x16_media=vp8_sad16x16_armv6;
    327 
    328 #
    329 # Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
    330 #
    331 add_proto qw/void vp8_sad4x4x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array";
    332 specialize qw/vp8_sad4x4x3 sse3/;
    333 
    334 add_proto qw/void vp8_sad8x8x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array";
    335 specialize qw/vp8_sad8x8x3 sse3/;
    336 
    337 add_proto qw/void vp8_sad8x16x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array";
    338 specialize qw/vp8_sad8x16x3 sse3/;
    339 
    340 add_proto qw/void vp8_sad16x8x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array";
    341 specialize qw/vp8_sad16x8x3 sse3 ssse3/;
    342 
    343 add_proto qw/void vp8_sad16x16x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array";
    344 specialize qw/vp8_sad16x16x3 sse3 ssse3/;
    345 
    346 # Note the only difference in the following prototypes is that they return into
    347 # an array of short
    348 add_proto qw/void vp8_sad4x4x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array";
    349 specialize qw/vp8_sad4x4x8 sse4_1/;
    350 $vp8_sad4x4x8_sse4_1=vp8_sad4x4x8_sse4;
    351 
    352 add_proto qw/void vp8_sad8x8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array";
    353 specialize qw/vp8_sad8x8x8 sse4_1/;
    354 $vp8_sad8x8x8_sse4_1=vp8_sad8x8x8_sse4;
    355 
    356 add_proto qw/void vp8_sad8x16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array";
    357 specialize qw/vp8_sad8x16x8 sse4_1/;
    358 $vp8_sad8x16x8_sse4_1=vp8_sad8x16x8_sse4;
    359 
    360 add_proto qw/void vp8_sad16x8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array";
    361 specialize qw/vp8_sad16x8x8 sse4_1/;
    362 $vp8_sad16x8x8_sse4_1=vp8_sad16x8x8_sse4;
    363 
    364 add_proto qw/void vp8_sad16x16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array";
    365 specialize qw/vp8_sad16x16x8 sse4_1/;
    366 $vp8_sad16x16x8_sse4_1=vp8_sad16x16x8_sse4;
    367 
    368 #
    369 # Multi-block SAD, comparing a reference to N independent blocks
    370 #
    371 add_proto qw/void vp8_sad4x4x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array";
    372 specialize qw/vp8_sad4x4x4d sse3/;
    373 
    374 add_proto qw/void vp8_sad8x8x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array";
    375 specialize qw/vp8_sad8x8x4d sse3/;
    376 
    377 add_proto qw/void vp8_sad8x16x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array";
    378 specialize qw/vp8_sad8x16x4d sse3/;
    379 
    380 add_proto qw/void vp8_sad16x8x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array";
    381 specialize qw/vp8_sad16x8x4d sse3/;
    382 
    383 add_proto qw/void vp8_sad16x16x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array";
    384 specialize qw/vp8_sad16x16x4d sse3/;
    385 
    386 #
    387 # Encoder functions below this point.
    388 #
    389 if (vpx_config("CONFIG_VP8_ENCODER") eq "yes") {
    390 
    391 #
    392 # Sum of squares (vector)
    393 #
    394 add_proto qw/unsigned int vp8_get_mb_ss/, "const short *";
    395 specialize qw/vp8_get_mb_ss mmx sse2/;
    396 
    397 #
    398 # SSE (Sum Squared Error)
    399 #
    400 add_proto qw/unsigned int vp8_sub_pixel_mse16x16/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
    401 specialize qw/vp8_sub_pixel_mse16x16 mmx sse2/;
    402 $vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt;
    403 
    404 add_proto qw/unsigned int vp8_mse16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
    405 specialize qw/vp8_mse16x16 mmx sse2 media neon/;
    406 $vp8_mse16x16_sse2=vp8_mse16x16_wmt;
    407 $vp8_mse16x16_media=vp8_mse16x16_armv6;
    408 
    409 add_proto qw/unsigned int vp8_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride";
    410 specialize qw/vp8_get4x4sse_cs mmx neon/;
    411 
    412 #
    413 # Block copy
    414 #
    415 if ($opts{arch} =~ /x86/) {
    416     add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n";
    417     specialize qw/vp8_copy32xn sse2 sse3/;
    418 }
    419 
    420 #
    421 # Structured Similarity (SSIM)
    422 #
    423 if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
    424     $opts{arch} eq "x86_64" and $sse2_on_x86_64 = "sse2";
    425 
    426     add_proto qw/void vp8_ssim_parms_8x8/, "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
    427     specialize qw/vp8_ssim_parms_8x8/, "$sse2_on_x86_64";
    428 
    429     add_proto qw/void vp8_ssim_parms_16x16/, "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
    430     specialize qw/vp8_ssim_parms_16x16/, "$sse2_on_x86_64";
    431 }
    432 
    433 #
    434 # Forward DCT
    435 #
    436 add_proto qw/void vp8_short_fdct4x4/, "short *input, short *output, int pitch";
    437 specialize qw/vp8_short_fdct4x4 mmx sse2 media neon/;
    438 $vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6;
    439 
    440 add_proto qw/void vp8_short_fdct8x4/, "short *input, short *output, int pitch";
    441 specialize qw/vp8_short_fdct8x4 mmx sse2 media neon/;
    442 $vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6;
    443 
    444 add_proto qw/void vp8_short_walsh4x4/, "short *input, short *output, int pitch";
    445 specialize qw/vp8_short_walsh4x4 sse2 media neon/;
    446 $vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6;
    447 
    448 #
    449 # Quantizer
    450 #
    451 add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *";
    452 specialize qw/vp8_regular_quantize_b sse2/;
    453 # TODO(johann) Update sse4 implementation and re-enable
    454 #$vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4;
    455 
    456 add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *";
    457 specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon/;
    458 $vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6;
    459 
    460 add_proto qw/void vp8_regular_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2";
    461 # no asm yet
    462 
    463 add_proto qw/void vp8_fast_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2";
    464 specialize qw/vp8_fast_quantize_b_pair neon/;
    465 
    466 add_proto qw/void vp8_quantize_mb/, "struct macroblock *";
    467 specialize qw/vp8_quantize_mb neon/;
    468 
    469 add_proto qw/void vp8_quantize_mby/, "struct macroblock *";
    470 specialize qw/vp8_quantize_mby neon/;
    471 
    472 add_proto qw/void vp8_quantize_mbuv/, "struct macroblock *";
    473 specialize qw/vp8_quantize_mbuv neon/;
    474 
    475 #
    476 # Block subtraction
    477 #
    478 add_proto qw/int vp8_block_error/, "short *coeff, short *dqcoeff";
    479 specialize qw/vp8_block_error mmx sse2/;
    480 $vp8_block_error_sse2=vp8_block_error_xmm;
    481 
    482 add_proto qw/int vp8_mbblock_error/, "struct macroblock *mb, int dc";
    483 specialize qw/vp8_mbblock_error mmx sse2/;
    484 $vp8_mbblock_error_sse2=vp8_mbblock_error_xmm;
    485 
    486 add_proto qw/int vp8_mbuverror/, "struct macroblock *mb";
    487 specialize qw/vp8_mbuverror mmx sse2/;
    488 $vp8_mbuverror_sse2=vp8_mbuverror_xmm;
    489 
    490 add_proto qw/void vp8_subtract_b/, "struct block *be, struct blockd *bd, int pitch";
    491 specialize qw/vp8_subtract_b mmx sse2 media neon/;
    492 $vp8_subtract_b_media=vp8_subtract_b_armv6;
    493 
    494 add_proto qw/void vp8_subtract_mby/, "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride";
    495 specialize qw/vp8_subtract_mby mmx sse2 media neon/;
    496 $vp8_subtract_mby_media=vp8_subtract_mby_armv6;
    497 
    498 add_proto qw/void vp8_subtract_mbuv/, "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride";
    499 specialize qw/vp8_subtract_mbuv mmx sse2 media neon/;
    500 $vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6;
    501 
    502 #
    503 # Motion search
    504 #
    505 add_proto qw/int vp8_full_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
    506 specialize qw/vp8_full_search_sad sse3 sse4_1/;
    507 $vp8_full_search_sad_sse3=vp8_full_search_sadx3;
    508 $vp8_full_search_sad_sse4_1=vp8_full_search_sadx8;
    509 
    510 add_proto qw/int vp8_refining_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
    511 specialize qw/vp8_refining_search_sad sse3/;
    512 $vp8_refining_search_sad_sse3=vp8_refining_search_sadx4;
    513 
    514 add_proto qw/int vp8_diamond_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
    515 $vp8_diamond_search_sad_sse3=vp8_diamond_search_sadx4;
    516 
    517 #
    518 # Alt-ref Noise Reduction (ARNR)
    519 #
    520 if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") {
    521     add_proto qw/void vp8_temporal_filter_apply/, "unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count";
    522     specialize qw/vp8_temporal_filter_apply sse2/;
    523 }
    524 
    525 #
    526 # Pick Loopfilter
    527 #
    528 add_proto qw/void vp8_yv12_copy_partial_frame/, "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc";
    529 specialize qw/vp8_yv12_copy_partial_frame neon/;
    530 
    531 #
    532 # Denoiser filter
    533 #
    534 if (vpx_config("CONFIG_TEMPORAL_DENOISING") eq "yes") {
    535     add_proto qw/int vp8_denoiser_filter/, "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset";
    536     specialize qw/vp8_denoiser_filter sse2 neon/;
    537 }
    538 
    539 # End of encoder only functions
    540 }
    541 1;
    542