Home | History | Annotate | Download | only in arm-neon-tests
      1 /*
      2 
      3 Copyright (c) 2009, 2010, 2011, 2012, 2013 STMicroelectronics
      4 Written by Christophe Lyon
      5 
      6 Permission is hereby granted, free of charge, to any person obtaining a copy
      7 of this software and associated documentation files (the "Software"), to deal
      8 in the Software without restriction, including without limitation the rights
      9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     10 copies of the Software, and to permit persons to whom the Software is
     11 furnished to do so, subject to the following conditions:
     12 
     13 The above copyright notice and this permission notice shall be included in
     14 all copies or substantial portions of the Software.
     15 
     16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     22 THE SOFTWARE.
     23 
     24 */
     25 
     26 #ifndef _STM_ARM_NEON_REF_H_
     27 #define _STM_ARM_NEON_REF_H_
     28 
     29 #if defined(__cplusplus)
     30 #include <cstdio>
     31 #include <cinttypes>
     32 #include <cstring>
     33 #else
     34 #include <stdio.h>
     35 #if defined(_MSC_VER)
     36 #include "msinttypes.h"
     37 #include <float.h> /* for isnan() ... */
     38 static int32_t _ptrNan[]={0x7fc00000L};
     39 #define NAN (*(float*)_ptrNan)
     40 static int32_t _ptrInf[]={0x7f800000L};
     41 #define INFINITY (*(float*)_ptrInf)
     42 #define HUGE_VALF INFINITY
     43 #else
     44 #include <inttypes.h>
     45 #endif
     46 #include <string.h>
     47 #endif
     48 
     49 #define xSTR(X) #X
     50 #define STR(X) xSTR(X)
     51 
     52 #define xNAME1(V,T) V ## _ ##  T
     53 #define xNAME(V,T) xNAME1(V,T)
     54 
     55 #define VAR(V,T,W) xNAME(V,T##W)
     56 #define VAR_DECL(V, T, W) T##W##_t VAR(V,T,W)
     57 
     58 #define VECT_NAME(T, W, N) T##W##x##N
     59 #define VECT_ARRAY_NAME(T, W, N, L) T##W##x##N##x##L
     60 #define VECT_TYPE(T, W, N) xNAME(VECT_NAME(T,W,N),t)
     61 #define VECT_ARRAY_TYPE(T, W, N, L) xNAME(VECT_ARRAY_NAME(T,W,N,L),t)
     62 
     63 #define VECT_VAR(V,T,W,N) xNAME(V,VECT_NAME(T,W,N))
     64 #define VECT_VAR_DECL(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N)
     65 
     66 /* This one is used for padding between input buffers.  */
     67 #define PAD(V, T, W, N) char VECT_VAR(V,T,W,N)=42;
     68 
     69 /* Array declarations.  */
     70 #define ARRAY(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[N]
     71 #define ARRAY4(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[4]
     72 
     73 /* Arrays of vectors.  */
     74 #define VECT_ARRAY_VAR(V,T,W,N,L) xNAME(V,VECT_ARRAY_NAME(T,W,N,L))
     75 #define VECT_ARRAY(V, T, W, N, L) T##W##_t VECT_ARRAY_VAR(V,T,W,N,L)[N*L]
     76 
     77 static int result_idx = 0;
     78 #define DUMP(MSG,T,W,N,FMT)						\
     79   fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++,		\
     80 	  STR(VECT_VAR(result, T, W, N)));				\
     81   for(i=0; i<N ; i++)							\
     82     {									\
     83       fprintf(ref_file, "%" FMT ", ", VECT_VAR(result, T, W, N)[i]);	\
     84     }									\
     85   fprintf(ref_file, " }\n");						\
     86   DUMP4GCC(MSG,T,W,N,FMT);
     87 
     88 /* Use casts for remove sign bits */
     89 #define DUMP_POLY(MSG,T,W,N,FMT)					\
     90   fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++,		\
     91 	  STR(VECT_VAR(result, T, W, N)));				\
     92   for(i=0; i<N ; i++)							\
     93     {									\
     94       fprintf(ref_file, "%" FMT ", ",					\
     95 	      (uint##W##_t)VECT_VAR(result, T, W, N)[i]);		\
     96     }									\
     97   fprintf(ref_file, " }\n");						\
     98   DUMP4GCC(MSG,T,W,N,FMT);
     99 
    100 #define DUMP_FP(MSG,T,W,N,FMT)						\
    101   fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++,		\
    102 	  STR(VECT_VAR(result, T, W, N)));				\
    103   for(i=0; i<N ; i++)							\
    104     {									\
    105       union fp_operand {						\
    106 	uint##W##_t i;							\
    107 	float##W##_t f;							\
    108       } tmp;								\
    109       tmp.f = VECT_VAR(result, T, W, N)[i];				\
    110       fprintf(ref_file, "%" FMT ", ", tmp.i);				\
    111     }									\
    112   fprintf(ref_file, " }\n");						\
    113   DUMP4GCC_FP(MSG,T,W,N,FMT);
    114 
    115 #define DUMP4GCC(MSG,T,W,N,FMT)						\
    116   fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ",	\
    117 	  STR(T), W, N);						\
    118   for(i=0; i<(N-1) ; i++)						\
    119     {									\
    120       if (W < 32) {							\
    121 	uint32_t tmp = (uint##W##_t) VECT_VAR(result, T, W, N)[i];	\
    122 	fprintf(gcc_tests_file, "0x%" FMT ", ", tmp);			\
    123       } else {								\
    124 	fprintf(gcc_tests_file, "0x%" FMT ", ", VECT_VAR(result, T, W, N)[i]); \
    125       }									\
    126     }									\
    127   if (W < 32) {								\
    128     uint32_t tmp = (uint##W##_t) VECT_VAR(result, T, W, N)[i];		\
    129     fprintf(gcc_tests_file, "0x%" FMT, tmp);				\
    130   } else {								\
    131     fprintf(gcc_tests_file, "0x%" FMT, VECT_VAR(result, T, W, N)[i]);	\
    132   }									\
    133   fprintf(gcc_tests_file, " };\n");
    134 
    135 #define DUMP4GCC_FP(MSG,T,W,N,FMT)					\
    136   {									\
    137     union fp_operand {							\
    138       uint##W##_t i;							\
    139       float##W##_t f;							\
    140     } tmp;								\
    141     fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ",	\
    142 	    "hfloat", W, N);						\
    143     for(i=0; i<(N-1) ; i++)						\
    144       {									\
    145 	tmp.f = VECT_VAR(result, T, W, N)[i];				\
    146 	fprintf(gcc_tests_file, "0x%" FMT ", ", tmp.i);			\
    147       }									\
    148     tmp.f = VECT_VAR(result, T, W, N)[i];				\
    149     fprintf(gcc_tests_file, "0x%" FMT, tmp.i);				\
    150     fprintf(gcc_tests_file, " };\n");					\
    151   }
    152 
    153 #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
    154 #define float16_t __fp16
    155 
    156 #define DUMP_FP16(MSG,T,W,N,FMT)					\
    157   fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++,		\
    158 	  STR(VECT_VAR(result, T, W, N)));				\
    159   for(i=0; i<N ; i++)							\
    160     {									\
    161       uint##W##_t tmp;							\
    162 	tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i];		\
    163 	fprintf(ref_file, "%" FMT ", ", tmp);				\
    164     }									\
    165   fprintf(ref_file, " }\n");						\
    166   DUMP4GCC_FP16(MSG,T,W,N,FMT);
    167 
    168 #define DUMP4GCC_FP16(MSG,T,W,N,FMT)					\
    169   {									\
    170     uint##W##_t tmp;							\
    171     fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ", \
    172 	    "hfloat", W, N);						\
    173     for(i=0; i<(N-1) ; i++)						\
    174       {									\
    175 	tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i];		\
    176 	fprintf(gcc_tests_file, "0x%" FMT ", ", tmp);			\
    177       }									\
    178     tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i];			\
    179     fprintf(gcc_tests_file, "0x%" FMT, tmp);				\
    180     fprintf(gcc_tests_file, " };\n");					\
    181   }
    182 #endif
    183 
    184 #define CLEAN_PATTERN_8  0x33
    185 #define CLEAN_PATTERN_16 0x3333
    186 #define CLEAN_PATTERN_32 0x33333333
    187 #define CLEAN_PATTERN_64 0x3333333333333333
    188 
    189 #define CLEAN(VAR,T,W,N)						\
    190   memset(VECT_VAR(VAR, T, W, N),					\
    191 	 CLEAN_PATTERN_8,						\
    192 	 sizeof(VECT_VAR(VAR, T, W, N)));
    193 
    194 #define CHECK_INIT(VAR,Q,T1,T2,W,N)					\
    195   {									\
    196     ARRAY(check_result, T1, W, N);					\
    197     int i;								\
    198 									\
    199     vst1##Q##_##T2##W(VECT_VAR(check_result, T1, W, N),			\
    200 		      VECT_VAR(VAR, T1, W, N));				\
    201     for(i=0; i<N ; i++)							\
    202       {									\
    203 	/*if (VECT_VAR(check_result, T1, W, N)[i] == CLEAN_PATTERN_##W)*/ { \
    204 	  fprintf(stdout, "%s:%d: %s[%d] unintialized! %#x\n",		\
    205 		  __FUNCTION__,	__LINE__,				\
    206 		  STR(VECT_VAR(VAR, T1, W, N)), i,			\
    207 		  VECT_VAR(check_result, T1, W, N)[i]);			\
    208 	}								\
    209       }									\
    210   }
    211 
    212 /* Generic declarations: */
    213 extern FILE* log_file;
    214 extern FILE* ref_file;
    215 extern FILE* gcc_tests_file;
    216 
    217 /* Input buffers, one of each size */
    218 extern ARRAY(buffer, int, 8, 8);
    219 extern ARRAY(buffer, int, 16, 4);
    220 extern ARRAY(buffer, int, 32, 2);
    221 extern ARRAY(buffer, int, 64, 1);
    222 extern ARRAY(buffer, uint, 8, 8);
    223 extern ARRAY(buffer, uint, 16, 4);
    224 extern ARRAY(buffer, uint, 32, 2);
    225 extern ARRAY(buffer, uint, 64, 1);
    226 extern ARRAY(buffer, poly, 8, 8);
    227 extern ARRAY(buffer, poly, 16, 4);
    228 extern ARRAY(buffer, float, 32, 2);
    229 #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
    230 extern ARRAY(buffer, float, 16, 4);
    231 #endif
    232 extern ARRAY(buffer, int, 8, 16);
    233 extern ARRAY(buffer, int, 16, 8);
    234 extern ARRAY(buffer, int, 32, 4);
    235 extern ARRAY(buffer, int, 64, 2);
    236 extern ARRAY(buffer, uint, 8, 16);
    237 extern ARRAY(buffer, uint, 16, 8);
    238 extern ARRAY(buffer, uint, 32, 4);
    239 extern ARRAY(buffer, uint, 64, 2);
    240 extern ARRAY(buffer, poly, 8, 16);
    241 extern ARRAY(buffer, poly, 16, 8);
    242 extern ARRAY(buffer, float, 32, 4);
    243 #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
    244 extern ARRAY(buffer, float, 16, 8);
    245 #endif
    246 
    247 /* The tests for vld1_dup and vdup expect at least 4 entries in the
    248    input buffer, so force 1- and 2-elements initializers to have 4
    249    entries.  */
    250 extern ARRAY(buffer_dup, int, 8, 8);
    251 extern ARRAY(buffer_dup, int, 16, 4);
    252 extern ARRAY4(buffer_dup, int, 32, 2);
    253 extern ARRAY4(buffer_dup, int, 64, 1);
    254 extern ARRAY(buffer_dup, uint, 8, 8);
    255 extern ARRAY(buffer_dup, uint, 16, 4);
    256 extern ARRAY4(buffer_dup, uint, 32, 2);
    257 extern ARRAY4(buffer_dup, uint, 64, 1);
    258 extern ARRAY(buffer_dup, poly, 8, 8);
    259 extern ARRAY(buffer_dup, poly, 16, 4);
    260 extern ARRAY4(buffer_dup, float, 32, 2);
    261 #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
    262 extern ARRAY4(buffer_dup, float, 16, 4);
    263 #endif
    264 extern ARRAY(buffer_dup, int, 8, 16);
    265 extern ARRAY(buffer_dup, int, 16, 8);
    266 extern ARRAY(buffer_dup, int, 32, 4);
    267 extern ARRAY4(buffer_dup, int, 64, 2);
    268 extern ARRAY(buffer_dup, uint, 8, 16);
    269 extern ARRAY(buffer_dup, uint, 16, 8);
    270 extern ARRAY(buffer_dup, uint, 32, 4);
    271 extern ARRAY4(buffer_dup, uint, 64, 2);
    272 extern ARRAY(buffer_dup, poly, 8, 16);
    273 extern ARRAY(buffer_dup, poly, 16, 8);
    274 extern ARRAY(buffer_dup, float, 32, 4);
    275 #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
    276 extern ARRAY(buffer_dup, float, 16, 8);
    277 #endif
    278 
    279 /* Input buffers for vld2, one of each size */
    280 extern VECT_ARRAY(buffer_vld2, int, 8, 8, 2);
    281 extern VECT_ARRAY(buffer_vld2, int, 16, 4, 2);
    282 extern VECT_ARRAY(buffer_vld2, int, 32, 2, 2);
    283 extern VECT_ARRAY(buffer_vld2, int, 64, 1, 2);
    284 extern VECT_ARRAY(buffer_vld2, uint, 8, 8, 2);
    285 extern VECT_ARRAY(buffer_vld2, uint, 16, 4, 2);
    286 extern VECT_ARRAY(buffer_vld2, uint, 32, 2, 2);
    287 extern VECT_ARRAY(buffer_vld2, uint, 64, 1, 2);
    288 extern VECT_ARRAY(buffer_vld2, poly, 8, 8, 2);
    289 extern VECT_ARRAY(buffer_vld2, poly, 16, 4, 2);
    290 extern VECT_ARRAY(buffer_vld2, float, 32, 2, 2);
    291 #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
    292 extern VECT_ARRAY(buffer_vld2, float, 16, 4, 2);
    293 #endif
    294 extern VECT_ARRAY(buffer_vld2, int, 8, 16, 2);
    295 extern VECT_ARRAY(buffer_vld2, int, 16, 8, 2);
    296 extern VECT_ARRAY(buffer_vld2, int, 32, 4, 2);
    297 extern VECT_ARRAY(buffer_vld2, int, 64, 2, 2);
    298 extern VECT_ARRAY(buffer_vld2, uint, 8, 16, 2);
    299 extern VECT_ARRAY(buffer_vld2, uint, 16, 8, 2);
    300 extern VECT_ARRAY(buffer_vld2, uint, 32, 4, 2);
    301 extern VECT_ARRAY(buffer_vld2, uint, 64, 2, 2);
    302 extern VECT_ARRAY(buffer_vld2, poly, 8, 16, 2);
    303 extern VECT_ARRAY(buffer_vld2, poly, 16, 8, 2);
    304 extern VECT_ARRAY(buffer_vld2, float, 32, 4, 2);
    305 #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
    306 extern VECT_ARRAY(buffer_vld2, float, 16, 8, 2);
    307 #endif
    308 
    309 /* Input buffers for vld3, one of each size */
    310 extern VECT_ARRAY(buffer_vld3, int, 8, 8, 3);
    311 extern VECT_ARRAY(buffer_vld3, int, 16, 4, 3);
    312 extern VECT_ARRAY(buffer_vld3, int, 32, 2, 3);
    313 extern VECT_ARRAY(buffer_vld3, int, 64, 1, 3);
    314 extern VECT_ARRAY(buffer_vld3, uint, 8, 8, 3);
    315 extern VECT_ARRAY(buffer_vld3, uint, 16, 4, 3);
    316 extern VECT_ARRAY(buffer_vld3, uint, 32, 2, 3);
    317 extern VECT_ARRAY(buffer_vld3, uint, 64, 1, 3);
    318 extern VECT_ARRAY(buffer_vld3, poly, 8, 8, 3);
    319 extern VECT_ARRAY(buffer_vld3, poly, 16, 4, 3);
    320 extern VECT_ARRAY(buffer_vld3, float, 32, 2, 3);
    321 #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
    322 extern VECT_ARRAY(buffer_vld3, float, 16, 4, 3);
    323 #endif
    324 extern VECT_ARRAY(buffer_vld3, int, 8, 16, 3);
    325 extern VECT_ARRAY(buffer_vld3, int, 16, 8, 3);
    326 extern VECT_ARRAY(buffer_vld3, int, 32, 4, 3);
    327 extern VECT_ARRAY(buffer_vld3, int, 64, 2, 3);
    328 extern VECT_ARRAY(buffer_vld3, uint, 8, 16, 3);
    329 extern VECT_ARRAY(buffer_vld3, uint, 16, 8, 3);
    330 extern VECT_ARRAY(buffer_vld3, uint, 32, 4, 3);
    331 extern VECT_ARRAY(buffer_vld3, uint, 64, 2, 3);
    332 extern VECT_ARRAY(buffer_vld3, poly, 8, 16, 3);
    333 extern VECT_ARRAY(buffer_vld3, poly, 16, 8, 3);
    334 extern VECT_ARRAY(buffer_vld3, float, 32, 4, 3);
    335 #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
    336 extern VECT_ARRAY(buffer_vld3, float, 16, 8, 3);
    337 #endif
    338 
    339 /* Input buffers for vld4, one of each size */
    340 extern VECT_ARRAY(buffer_vld4, int, 8, 8, 4);
    341 extern VECT_ARRAY(buffer_vld4, int, 16, 4, 4);
    342 extern VECT_ARRAY(buffer_vld4, int, 32, 2, 4);
    343 extern VECT_ARRAY(buffer_vld4, int, 64, 1, 4);
    344 extern VECT_ARRAY(buffer_vld4, uint, 8, 8, 4);
    345 extern VECT_ARRAY(buffer_vld4, uint, 16, 4, 4);
    346 extern VECT_ARRAY(buffer_vld4, uint, 32, 2, 4);
    347 extern VECT_ARRAY(buffer_vld4, uint, 64, 1, 4);
    348 extern VECT_ARRAY(buffer_vld4, poly, 8, 8, 4);
    349 extern VECT_ARRAY(buffer_vld4, poly, 16, 4, 4);
    350 extern VECT_ARRAY(buffer_vld4, float, 32, 2, 4);
    351 #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
    352 extern VECT_ARRAY(buffer_vld4, float, 16, 4, 4);
    353 #endif
    354 extern VECT_ARRAY(buffer_vld4, int, 8, 16, 4);
    355 extern VECT_ARRAY(buffer_vld4, int, 16, 8, 4);
    356 extern VECT_ARRAY(buffer_vld4, int, 32, 4, 4);
    357 extern VECT_ARRAY(buffer_vld4, int, 64, 2, 4);
    358 extern VECT_ARRAY(buffer_vld4, uint, 8, 16, 4);
    359 extern VECT_ARRAY(buffer_vld4, uint, 16, 8, 4);
    360 extern VECT_ARRAY(buffer_vld4, uint, 32, 4, 4);
    361 extern VECT_ARRAY(buffer_vld4, uint, 64, 2, 4);
    362 extern VECT_ARRAY(buffer_vld4, poly, 8, 16, 4);
    363 extern VECT_ARRAY(buffer_vld4, poly, 16, 8, 4);
    364 extern VECT_ARRAY(buffer_vld4, float, 32, 4, 4);
    365 #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
    366 extern VECT_ARRAY(buffer_vld4, float, 16, 8, 4);
    367 #endif
    368 
    369 /* Input buffers for vld2_lane */
    370 extern VECT_VAR_DECL(buffer_vld2_lane, int, 8, 2)[2];
    371 extern VECT_VAR_DECL(buffer_vld2_lane, int, 16, 2)[2];
    372 extern VECT_VAR_DECL(buffer_vld2_lane, int, 32, 2)[2];
    373 extern VECT_VAR_DECL(buffer_vld2_lane, int, 64, 2)[2];
    374 extern VECT_VAR_DECL(buffer_vld2_lane, uint, 8, 2)[2];
    375 extern VECT_VAR_DECL(buffer_vld2_lane, uint, 16, 2)[2];
    376 extern VECT_VAR_DECL(buffer_vld2_lane, uint, 32, 2)[2];
    377 extern VECT_VAR_DECL(buffer_vld2_lane, uint, 64, 2)[2];
    378 extern VECT_VAR_DECL(buffer_vld2_lane, poly, 8, 2)[2];
    379 extern VECT_VAR_DECL(buffer_vld2_lane, poly, 16, 2)[2];
    380 extern VECT_VAR_DECL(buffer_vld2_lane, float, 32, 2)[2];
    381 #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
    382 extern VECT_VAR_DECL(buffer_vld2_lane, float, 16, 2)[2];
    383 #endif
    384 
    385 /* Input buffers for vld3_lane */
    386 extern VECT_VAR_DECL(buffer_vld3_lane, int, 8, 3)[3];
    387 extern VECT_VAR_DECL(buffer_vld3_lane, int, 16, 3)[3];
    388 extern VECT_VAR_DECL(buffer_vld3_lane, int, 32, 3)[3];
    389 extern VECT_VAR_DECL(buffer_vld3_lane, int, 64, 3)[3];
    390 extern VECT_VAR_DECL(buffer_vld3_lane, uint, 8, 3)[3];
    391 extern VECT_VAR_DECL(buffer_vld3_lane, uint, 16, 3)[3];
    392 extern VECT_VAR_DECL(buffer_vld3_lane, uint, 32, 3)[3];
    393 extern VECT_VAR_DECL(buffer_vld3_lane, uint, 64, 3)[3];
    394 extern VECT_VAR_DECL(buffer_vld3_lane, poly, 8, 3)[3];
    395 extern VECT_VAR_DECL(buffer_vld3_lane, poly, 16, 3)[3];
    396 extern VECT_VAR_DECL(buffer_vld3_lane, float, 32, 3)[3];
    397 #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
    398 extern VECT_VAR_DECL(buffer_vld3_lane, float, 16, 3)[3];
    399 #endif
    400 
    401 /* Input buffers for vld4_lane */
    402 extern VECT_VAR_DECL(buffer_vld4_lane, int, 8, 4)[4];
    403 extern VECT_VAR_DECL(buffer_vld4_lane, int, 16, 4)[4];
    404 extern VECT_VAR_DECL(buffer_vld4_lane, int, 32, 4)[4];
    405 extern VECT_VAR_DECL(buffer_vld4_lane, int, 64, 4)[4];
    406 extern VECT_VAR_DECL(buffer_vld4_lane, uint, 8, 4)[4];
    407 extern VECT_VAR_DECL(buffer_vld4_lane, uint, 16, 4)[4];
    408 extern VECT_VAR_DECL(buffer_vld4_lane, uint, 32, 4)[4];
    409 extern VECT_VAR_DECL(buffer_vld4_lane, uint, 64, 4)[4];
    410 extern VECT_VAR_DECL(buffer_vld4_lane, poly, 8, 4)[4];
    411 extern VECT_VAR_DECL(buffer_vld4_lane, poly, 16, 4)[4];
    412 extern VECT_VAR_DECL(buffer_vld4_lane, float, 32, 4)[4];
    413 #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
    414 extern VECT_VAR_DECL(buffer_vld4_lane, float, 16, 4)[4];
    415 #endif
    416 
    417 /* Output buffers, one of each size */
    418 static ARRAY(result, int, 8, 8);
    419 static ARRAY(result, int, 16, 4);
    420 static ARRAY(result, int, 32, 2);
    421 static ARRAY(result, int, 64, 1);
    422 static ARRAY(result, uint, 8, 8);
    423 static ARRAY(result, uint, 16, 4);
    424 static ARRAY(result, uint, 32, 2);
    425 static ARRAY(result, uint, 64, 1);
    426 static ARRAY(result, poly, 8, 8);
    427 static ARRAY(result, poly, 16, 4);
    428 static ARRAY(result, float, 32, 2);
    429 #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
    430 static ARRAY(result, float, 16, 4);
    431 #endif
    432 static ARRAY(result, int, 8, 16);
    433 static ARRAY(result, int, 16, 8);
    434 static ARRAY(result, int, 32, 4);
    435 static ARRAY(result, int, 64, 2);
    436 static ARRAY(result, uint, 8, 16);
    437 static ARRAY(result, uint, 16, 8);
    438 static ARRAY(result, uint, 32, 4);
    439 static ARRAY(result, uint, 64, 2);
    440 static ARRAY(result, poly, 8, 16);
    441 static ARRAY(result, poly, 16, 8);
    442 static ARRAY(result, float, 32, 4);
    443 #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
    444 static ARRAY(result, float, 16, 8);
    445 #endif
    446 
    447 /* Dump results (generic function) */
    448 static void dump_results (char *test_name)
    449 {
    450   int i;
    451 
    452   fprintf(ref_file, "\n%s output:\n", test_name);
    453   fprintf(gcc_tests_file, "\n%s output:\n", test_name);
    454 
    455   DUMP(test_name, int, 8, 8, PRId8);
    456   DUMP(test_name, int, 16, 4, PRId16);
    457   DUMP(test_name, int, 32, 2, PRId32);
    458   DUMP(test_name, int, 64, 1, PRId64);
    459   DUMP(test_name, uint, 8, 8, PRIu8);
    460   DUMP(test_name, uint, 16, 4, PRIu16);
    461   DUMP(test_name, uint, 32, 2, PRIu32);
    462   DUMP(test_name, uint, 64, 1, PRIu64);
    463   DUMP_POLY(test_name, poly, 8, 8, PRIu8);
    464   DUMP_POLY(test_name, poly, 16, 4, PRIu16);
    465   DUMP_FP(test_name, float, 32, 2, PRIx32);
    466 #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
    467   DUMP_FP16(test_name, float, 16, 4, PRIu16);
    468 #endif
    469 
    470   DUMP(test_name, int, 8, 16, PRId8);
    471   DUMP(test_name, int, 16, 8, PRId16);
    472   DUMP(test_name, int, 32, 4, PRId32);
    473   DUMP(test_name, int, 64, 2, PRId64);
    474   DUMP(test_name, uint, 8, 16, PRIu8);
    475   DUMP(test_name, uint, 16, 8, PRIu16);
    476   DUMP(test_name, uint, 32, 4, PRIu32);
    477   DUMP(test_name, uint, 64, 2, PRIu64);
    478   DUMP_POLY(test_name, poly, 8, 16, PRIu8);
    479   DUMP_POLY(test_name, poly, 16, 8, PRIu16);
    480   DUMP_FP(test_name, float, 32, 4, PRIx32);
    481 #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
    482   DUMP_FP16(test_name, float, 16, 8, PRIu16);
    483 #endif
    484 }
    485 
    486 /* Dump results in hex (generic function) */
    487 static void dump_results_hex2 (const char *test_name, const char* comment)
    488 {
    489   int i;
    490 
    491   fprintf(ref_file, "\n%s%s output:\n", test_name, comment);
    492   fprintf(gcc_tests_file, "\n%s%s output:\n", test_name, comment);
    493 
    494   DUMP(test_name, int, 8, 8, PRIx8);
    495   DUMP(test_name, int, 16, 4, PRIx16);
    496   DUMP(test_name, int, 32, 2, PRIx32);
    497   DUMP(test_name, int, 64, 1, PRIx64);
    498   DUMP(test_name, uint, 8, 8, PRIx8);
    499   DUMP(test_name, uint, 16, 4, PRIx16);
    500   DUMP(test_name, uint, 32, 2, PRIx32);
    501   DUMP(test_name, uint, 64, 1, PRIx64);
    502   DUMP_POLY(test_name, poly, 8, 8, PRIx8);
    503   DUMP_POLY(test_name, poly, 16, 4, PRIx16);
    504   DUMP_FP(test_name, float, 32, 2, PRIx32);
    505 #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
    506   DUMP_FP16(test_name, float, 16, 4, PRIx16);
    507 #endif
    508 
    509   DUMP(test_name, int, 8, 16, PRIx8);
    510   DUMP(test_name, int, 16, 8, PRIx16);
    511   DUMP(test_name, int, 32, 4, PRIx32);
    512   DUMP(test_name, int, 64, 2, PRIx64);
    513   DUMP(test_name, uint, 8, 16, PRIx8);
    514   DUMP(test_name, uint, 16, 8, PRIx16);
    515   DUMP(test_name, uint, 32, 4, PRIx32);
    516   DUMP(test_name, uint, 64, 2, PRIx64);
    517   DUMP_POLY(test_name, poly, 8, 16, PRIx8);
    518   DUMP_POLY(test_name, poly, 16, 8, PRIx16);
    519   DUMP_FP(test_name, float, 32, 4, PRIx32);
    520 #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
    521   DUMP_FP16(test_name, float, 16, 8, PRIx16);
    522 #endif
    523 }
    524 
    525 static void dump_results_hex (const char *test_name)
    526 {
    527   dump_results_hex2(test_name, "");
    528 }
    529 
    530 #ifndef STM_ARM_NEON_MODELS
    531 
    532 /* This hack is to cope with various compilers/libc which may not
    533    provide endian.h or cross-compilers such as llvm which includes the
    534    host's endian.h.  */
    535 #ifndef __arm__
    536 #include <endian.h>
    537 #define THIS_ENDIAN __BYTE_ORDER
    538 #else /* __arm__ */
    539 #ifdef __ARMEL__
    540 #define THIS_ENDIAN __LITTLE_ENDIAN
    541 #else /* __ARMEL__ */
    542 #define THIS_ENDIAN __BIG_ENDIAN
    543 #endif
    544 #endif /* __arm__ */
    545 
    546 #if THIS_ENDIAN == __LITTLE_ENDIAN
    547 
    548 typedef union {
    549   struct {
    550     int _xxx:27;
    551     unsigned int QC:1;
    552     int V:1;
    553     int C:1;
    554     int Z:1;
    555     int N:1;
    556   } b;
    557   unsigned int word;
    558 } _ARM_FPSCR;
    559 
    560 #else /* __BIG_ENDIAN */
    561 
    562 typedef union {
    563   struct {
    564     int N:1;
    565     int Z:1;
    566     int C:1;
    567     int V:1;
    568     unsigned int QC:1;
    569     int _dnm:27;
    570   } b;
    571   unsigned int word;
    572 } _ARM_FPSCR;
    573 
    574 #endif /* __BIG_ENDIAN */
    575 
    576 #ifdef __ARMCC_VERSION
    577 register _ARM_FPSCR _afpscr_for_qc __asm("fpscr");
    578 # define Neon_Cumulative_Sat _afpscr_for_qc.b.QC
    579 # define Set_Neon_Cumulative_Sat(x, depend)  {Neon_Cumulative_Sat = (x);}
    580 #else
    581 /* GCC/ARM does not know this register */
    582 # define Neon_Cumulative_Sat  __read_neon_cumulative_sat()
    583 /* We need a fake dependency to ensure correct ordering of asm
    584    statements to preset the QC flag value, and Neon operators writing
    585    to QC. */
    586 #define Set_Neon_Cumulative_Sat(x, depend)	\
    587   __set_neon_cumulative_sat((x), (depend))
    588 
    589 # if defined(__aarch64__)
    590 static volatile int __read_neon_cumulative_sat (void) {
    591     _ARM_FPSCR _afpscr_for_qc;
    592     asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc));
    593     return _afpscr_for_qc.b.QC;
    594 }
    595 
    596 #define __set_neon_cumulative_sat(x, depend) {				\
    597     _ARM_FPSCR _afpscr_for_qc;						\
    598     asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc));		\
    599     _afpscr_for_qc.b.QC = x;						\
    600     asm volatile ("msr fpsr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \
    601   }
    602 
    603 # else
    604 static volatile int __read_neon_cumulative_sat (void) {
    605     _ARM_FPSCR _afpscr_for_qc;
    606     asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc));
    607     return _afpscr_for_qc.b.QC;
    608 }
    609 
    610 #define __set_neon_cumulative_sat(x, depend) {				\
    611     _ARM_FPSCR _afpscr_for_qc;						\
    612     asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc));		\
    613     _afpscr_for_qc.b.QC = x;						\
    614     asm volatile ("vmsr fpscr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \
    615   }
    616 
    617 # endif
    618 #endif
    619 
    620 #endif /* STM_ARM_NEON_MODELS */
    621 
    622 static void dump_neon_cumulative_sat(const char* msg, const char *name,
    623 				     const char* t1, int w, int n)
    624 {
    625   fprintf(ref_file, "%s:%d:%s Neon cumulative saturation %d\n", msg, result_idx++,
    626 	  name, Neon_Cumulative_Sat);
    627   fprintf(gcc_tests_file,
    628 	  "int VECT_VAR(expected_cumulative_sat,%s,%d,%d) = %d;\n",
    629 	  t1, w, n, Neon_Cumulative_Sat);
    630 }
    631 
    632 /* Clean output buffers before execution */
    633 static void clean_results (void)
    634 {
    635   result_idx = 0;
    636   CLEAN(result, int, 8, 8);
    637   CLEAN(result, int, 16, 4);
    638   CLEAN(result, int, 32, 2);
    639   CLEAN(result, int, 64, 1);
    640   CLEAN(result, uint, 8, 8);
    641   CLEAN(result, uint, 16, 4);
    642   CLEAN(result, uint, 32, 2);
    643   CLEAN(result, uint, 64, 1);
    644   CLEAN(result, poly, 8, 8);
    645   CLEAN(result, poly, 16, 4);
    646   CLEAN(result, float, 32, 2);
    647 
    648   CLEAN(result, int, 8, 16);
    649   CLEAN(result, int, 16, 8);
    650   CLEAN(result, int, 32, 4);
    651   CLEAN(result, int, 64, 2);
    652   CLEAN(result, uint, 8, 16);
    653   CLEAN(result, uint, 16, 8);
    654   CLEAN(result, uint, 32, 4);
    655   CLEAN(result, uint, 64, 2);
    656   CLEAN(result, poly, 8, 16);
    657   CLEAN(result, poly, 16, 8);
    658   CLEAN(result, float, 32, 4);
    659 }
    660 
    661 
    662 /* Helpers to declare variables of various types  */
    663 #define DECL_VARIABLE(VAR, T1, W, N)		\
    664   volatile VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
    665 
    666 #define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR)	\
    667   DECL_VARIABLE(VAR, int, 8, 8);			\
    668   DECL_VARIABLE(VAR, int, 16, 4);			\
    669   DECL_VARIABLE(VAR, int, 32, 2);			\
    670   DECL_VARIABLE(VAR, int, 64, 1)
    671 
    672 #define DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR)	\
    673   DECL_VARIABLE(VAR, uint, 8, 8);			\
    674   DECL_VARIABLE(VAR, uint, 16, 4);			\
    675   DECL_VARIABLE(VAR, uint, 32, 2);			\
    676   DECL_VARIABLE(VAR, uint, 64, 1)
    677 
    678 #define DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)	\
    679   DECL_VARIABLE(VAR, int, 8, 16);			\
    680   DECL_VARIABLE(VAR, int, 16, 8);			\
    681   DECL_VARIABLE(VAR, int, 32, 4);			\
    682   DECL_VARIABLE(VAR, int, 64, 2)
    683 
    684 #define DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)	\
    685   DECL_VARIABLE(VAR, uint, 8, 16);			\
    686   DECL_VARIABLE(VAR, uint, 16, 8);			\
    687   DECL_VARIABLE(VAR, uint, 32, 4);			\
    688   DECL_VARIABLE(VAR, uint, 64, 2)
    689 
    690 #define DECL_VARIABLE_64BITS_VARIANTS(VAR)	\
    691   DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR);	\
    692   DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR);	\
    693   DECL_VARIABLE(VAR, poly, 8, 8);		\
    694   DECL_VARIABLE(VAR, poly, 16, 4);		\
    695   DECL_VARIABLE(VAR, float, 32, 2)
    696 
    697 #define DECL_VARIABLE_128BITS_VARIANTS(VAR)	\
    698   DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR);	\
    699   DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR);	\
    700   DECL_VARIABLE(VAR, poly, 8, 16);		\
    701   DECL_VARIABLE(VAR, poly, 16, 8);		\
    702   DECL_VARIABLE(VAR, float, 32, 4)
    703 
    704 #define DECL_VARIABLE_ALL_VARIANTS(VAR)		\
    705   DECL_VARIABLE_64BITS_VARIANTS(VAR);		\
    706   DECL_VARIABLE_128BITS_VARIANTS(VAR)
    707 
    708 #define DECL_VARIABLE_SIGNED_VARIANTS(VAR)	\
    709   DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR);	\
    710   DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)
    711 
    712 #define DECL_VARIABLE_UNSIGNED_VARIANTS(VAR)	\
    713   DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR);	\
    714   DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)
    715 
    716 /* Helpers to initialize vectors */
    717 #define VDUP(VAR, Q, T1, T2, W, N, V)		\
    718   VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
    719 
    720 #define TEST_VSET_LANE(VAR, Q, T1, T2, W, N, L, V)			\
    721   VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V,			\
    722 						   VECT_VAR(VAR, T1, W, N), \
    723 						   L)
    724 
    725 /* We need to load initial values first, so rely on VLD1 */
    726 #define VLOAD(VAR, BUF, Q, T1, T2, W, N)				\
    727   VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N))
    728 
    729 /* Helpers for macros with 1 constant and 5 variable arguments */
    730 #define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
    731   MACRO(VAR, , int, s, 8, 8);					\
    732   MACRO(VAR, , int, s, 16, 4);					\
    733   MACRO(VAR, , int, s, 32, 2);					\
    734   MACRO(VAR, , int, s, 64, 1)
    735 
    736 #define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)	\
    737   MACRO(VAR, , uint, u, 8, 8);					\
    738   MACRO(VAR, , uint, u, 16, 4);					\
    739   MACRO(VAR, , uint, u, 32, 2);					\
    740   MACRO(VAR, , uint, u, 64, 1)
    741 
    742 #define TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
    743   MACRO(VAR, q, int, s, 8, 16);					\
    744   MACRO(VAR, q, int, s, 16, 8);					\
    745   MACRO(VAR, q, int, s, 32, 4);					\
    746   MACRO(VAR, q, int, s, 64, 2)
    747 
    748 #define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO,VAR)	\
    749   MACRO(VAR, q, uint, u, 8, 16);				\
    750   MACRO(VAR, q, uint, u, 16, 8);				\
    751   MACRO(VAR, q, uint, u, 32, 4);				\
    752   MACRO(VAR, q, uint, u, 64, 2)
    753 
    754 #define TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR)	\
    755   TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);	\
    756   TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
    757 
    758 #define TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)	\
    759   TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);	\
    760   TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
    761 
    762 #define TEST_MACRO_ALL_VARIANTS_1_5(MACRO, VAR)	\
    763   TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR);	\
    764   TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)
    765 
    766 #define TEST_MACRO_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
    767   TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);	\
    768   TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)
    769 
    770 /* Helpers for macros with 2 constant and 5 variable arguments */
    771 #define TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
    772   MACRO(VAR1, VAR2, , int, s, 8, 8);					\
    773   MACRO(VAR1, VAR2, , int, s, 16, 4);					\
    774   MACRO(VAR1, VAR2, , int, s, 32, 2);					\
    775   MACRO(VAR1, VAR2 , , int, s, 64, 1)
    776 
    777 #define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
    778   MACRO(VAR1, VAR2, , uint, u, 8, 8);					\
    779   MACRO(VAR1, VAR2, , uint, u, 16, 4);					\
    780   MACRO(VAR1, VAR2, , uint, u, 32, 2);					\
    781   MACRO(VAR1, VAR2, , uint, u, 64, 1)
    782 
    783 #define TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
    784   MACRO(VAR1, VAR2, q, int, s, 8, 16);					\
    785   MACRO(VAR1, VAR2, q, int, s, 16, 8);					\
    786   MACRO(VAR1, VAR2, q, int, s, 32, 4);					\
    787   MACRO(VAR1, VAR2, q, int, s, 64, 2)
    788 
    789 #define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
    790   MACRO(VAR1, VAR2, q, uint, u, 8, 16);					\
    791   MACRO(VAR1, VAR2, q, uint, u, 16, 8);					\
    792   MACRO(VAR1, VAR2, q, uint, u, 32, 4);					\
    793   MACRO(VAR1, VAR2, q, uint, u, 64, 2)
    794 
    795 #define TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
    796   TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
    797   TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
    798   MACRO(VAR1, VAR2, , poly, p, 8, 8);				\
    799   MACRO(VAR1, VAR2, , poly, p, 16, 4)
    800 
    801 #define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
    802   TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
    803   TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
    804   MACRO(VAR1, VAR2, q, poly, p, 8, 16);				\
    805   MACRO(VAR1, VAR2, q, poly, p, 16, 8)
    806 
    807 #define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
    808   TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
    809   TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)
    810 
    811 #define TEST_MACRO_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
    812   TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
    813   TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
    814 
    815 #endif /* _STM_ARM_NEON_REF_H_ */
    816