1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <assert.h> 4 #include <math.h> 5 #include "tests/malloc.h" 6 7 typedef unsigned char UChar; 8 typedef unsigned int UInt; 9 typedef unsigned long int UWord; 10 typedef unsigned long long int ULong; 11 typedef double Double; 12 typedef float Float; 13 14 #define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr))) 15 16 typedef union { UChar u8[16]; UInt u32[4]; Float f32[4]; Double f64[2]; } XMM; 17 typedef union { UChar u8[32]; UInt u32[8]; XMM xmm[2]; } YMM; 18 typedef struct { YMM r1; YMM r2; YMM r3; YMM r4; YMM m; } Block; 19 20 void showFloat ( XMM* vec, int idx ) 21 { 22 Float f = vec->f32[idx]; 23 int neg = signbit (f); 24 char sign = neg != 0 ? '-' : ' '; 25 switch (fpclassify (f)) { 26 case FP_NORMAL: { 27 for (int i = idx * 4 + 3; i >= idx * 4; i--) 28 printf("%02x", (UInt)vec->u8[i]); 29 break; 30 } 31 case FP_INFINITE: { 32 printf ("[ %cINF ]", sign); 33 break; 34 } 35 case FP_ZERO: { 36 printf ("[%cZERO ]", sign); 37 break; 38 } 39 case FP_NAN: { 40 printf ("[ NAN ]"); 41 break; 42 } 43 default: { 44 printf ("[%cSUBNR]", sign); 45 break; 46 } 47 } 48 } 49 50 void showDouble ( XMM* vec, int idx ) 51 { 52 Double d = vec->f64[idx]; 53 int neg = signbit (d); 54 char sign = neg != 0 ? '-' : ' '; 55 switch (fpclassify (d)) { 56 case FP_NORMAL: { 57 for (int i = idx * 8 + 7; i >= idx * 8; i--) 58 printf("%02x", (UInt)vec->u8[i]); 59 break; 60 } 61 case FP_INFINITE: { 62 printf ("[ %cINF ]", sign); 63 break; 64 } 65 case FP_ZERO: { 66 printf ("[ %cZERO ]", sign); 67 break; 68 } 69 case FP_NAN: { 70 printf ("[ NAN ]"); 71 break; 72 } 73 default: { 74 printf ("[ %cSUBNORMAL ]", sign); 75 break; 76 } 77 } 78 } 79 80 void showXMM ( XMM* vec, int isDouble ) 81 { 82 if (isDouble) { 83 showDouble ( vec, 1 ); 84 printf ("."); 85 showDouble ( vec, 0 ); 86 } else { 87 showFloat ( vec, 3 ); 88 printf ("."); 89 showFloat ( vec, 2 ); 90 printf ("."); 91 showFloat ( vec, 1 ); 92 printf ("."); 93 showFloat ( vec, 0 ); 94 } 95 } 96 97 void showYMM ( YMM* vec, int isDouble ) 98 { 99 assert(IS_32_ALIGNED(vec)); 100 showXMM ( &vec->xmm[1], isDouble ); 101 printf("."); 102 showXMM ( &vec->xmm[0], isDouble ); 103 } 104 105 void showBlock ( char* msg, Block* block, int isDouble ) 106 { 107 printf(" %s\n", msg); 108 printf("r1: "); showYMM(&block->r1, isDouble); printf("\n"); 109 printf("r2: "); showYMM(&block->r2, isDouble); printf("\n"); 110 printf("r3: "); showYMM(&block->r3, isDouble); printf("\n"); 111 printf("r4: "); showYMM(&block->r4, isDouble); printf("\n"); 112 printf(" m: "); showYMM(&block->m, isDouble); printf("\n"); 113 } 114 115 static Double special_values[10]; 116 117 static __attribute__((noinline)) 118 Double negate ( Double d ) { return -d; } 119 static __attribute__((noinline)) 120 Double divf64 ( Double x, Double y ) { return x/y; } 121 122 static __attribute__((noinline)) 123 Double plusZero ( void ) { return 0.0; } 124 static __attribute__((noinline)) 125 Double minusZero ( void ) { return negate(plusZero()); } 126 127 static __attribute__((noinline)) 128 Double plusOne ( void ) { return 1.0; } 129 static __attribute__((noinline)) 130 Double minusOne ( void ) { return negate(plusOne()); } 131 132 static __attribute__((noinline)) 133 Double plusInf ( void ) { return 1.0 / 0.0; } 134 static __attribute__((noinline)) 135 Double minusInf ( void ) { return negate(plusInf()); } 136 137 static __attribute__((noinline)) 138 Double plusNaN ( void ) { return divf64(plusInf(),plusInf()); } 139 static __attribute__((noinline)) 140 Double minusNaN ( void ) { return negate(plusNaN()); } 141 142 static __attribute__((noinline)) 143 Double plusDenorm ( void ) { return 1.23e-315 / 1e3; } 144 static __attribute__((noinline)) 145 Double minusDenorm ( void ) { return negate(plusDenorm()); } 146 147 static void init_special_values ( void ) 148 { 149 special_values[0] = plusZero(); 150 special_values[1] = minusZero(); 151 special_values[2] = plusOne(); 152 special_values[3] = minusOne(); 153 special_values[4] = plusInf(); 154 special_values[5] = minusInf(); 155 special_values[6] = plusNaN(); 156 special_values[7] = minusNaN(); 157 special_values[8] = plusDenorm(); 158 special_values[9] = minusDenorm(); 159 } 160 161 void specialFBlock ( Block* b ) 162 { 163 int i; 164 Float* p = (Float*)b; 165 for (i = 0; i < sizeof(Block) / sizeof(Float); i++) 166 p[i] = (Float) special_values[i % 10]; 167 } 168 169 void specialDBlock ( Block* b ) 170 { 171 int i; 172 Double* p = (Double*)b; 173 for (i = 0; i < sizeof(Block) / sizeof(Double); i++) 174 p[i] = special_values[i % 10]; 175 } 176 177 UChar randUChar ( void ) 178 { 179 static UInt seed = 80021; 180 seed = 1103515245 * seed + 12345; 181 return (seed >> 17) & 0xFF; 182 } 183 184 void randBlock ( Block* b ) 185 { 186 int i; 187 UChar* p = (UChar*)b; 188 for (i = 0; i < sizeof(Block); i++) 189 p[i] = randUChar(); 190 } 191 192 void oneBlock ( Block* b ) 193 { 194 int i; 195 UChar* p = (UChar*)b; 196 for (i = 0; i < sizeof(Block); i++) 197 p[i] = 1; 198 } 199 200 #define GEN_test(_name, _instr, _isD) \ 201 __attribute__ ((noinline)) void \ 202 test_##_name ( const char *n, Block* b) \ 203 { \ 204 printf("%s %s\n", #_name, n); \ 205 showBlock("before", b, _isD); \ 206 __asm__ __volatile__( \ 207 "vmovdqa 0(%0),%%ymm7" "\n\t" \ 208 "vmovdqa 32(%0),%%ymm8" "\n\t" \ 209 "vmovdqa 64(%0),%%ymm6" "\n\t" \ 210 "vmovdqa 96(%0),%%ymm9" "\n\t" \ 211 "leaq 128(%0),%%r14" "\n\t" \ 212 _instr "\n\t" \ 213 "vmovdqa %%ymm7, 0(%0)" "\n\t" \ 214 "vmovdqa %%ymm8, 32(%0)" "\n\t" \ 215 "vmovdqa %%ymm6, 64(%0)" "\n\t" \ 216 "vmovdqa %%ymm9, 96(%0)" "\n\t" \ 217 : /*OUT*/ \ 218 : /*IN*/"r"(b) \ 219 : /*TRASH*/"xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \ 220 ); \ 221 showBlock("after", b, _isD); \ 222 printf("\n"); \ 223 } 224 225 /* All these defines do the same thing (and someone with stronger 226 preprocessor foo could probably express things much smaller). 227 They generate 4 different functions to test 4 variants of an 228 fma4 instruction. One with as input 4 registers, one where 229 the output register is also one of the input registers and 230 two versions where different inputs are a memory location. 231 The xmm variants create 128 versions, the ymm variants 256. */ 232 233 #define GEN_test_VFMADDPD_xmm(_name) \ 234 GEN_test(_name##_xmm, \ 235 "vfmaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ 236 GEN_test(_name##_xmm_src_dst, \ 237 "vfmaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ 238 GEN_test(_name##_xmm_mem1, \ 239 "vfmaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ 240 GEN_test(_name##_xmm_mem2, \ 241 "vfmaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); 242 GEN_test_VFMADDPD_xmm(VFMADDPD) 243 244 #define GEN_test_VFMADDPD_ymm(_name) \ 245 GEN_test(_name##_ymm, \ 246 "vfmaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \ 247 GEN_test(_name##_ymm_src_dst, \ 248 "vfmaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \ 249 GEN_test(_name##_ymm_mem1, \ 250 "vfmaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \ 251 GEN_test(_name##_ymm_mem2, \ 252 "vfmaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1); 253 GEN_test_VFMADDPD_ymm(VFMADDPD) 254 255 #define GEN_test_VFMADDPS_xmm(_name) \ 256 GEN_test(_name##_xmm, \ 257 "vfmaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ 258 GEN_test(_name##_xmm_src_dst, \ 259 "vfmaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ 260 GEN_test(_name##_xmm_mem1, \ 261 "vfmaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ 262 GEN_test(_name##_xmm_mem2, \ 263 "vfmaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); 264 GEN_test_VFMADDPS_xmm(VFMADDPS) 265 266 #define GEN_test_VFMADDPS_ymm(_name) \ 267 GEN_test(_name##_ymm, \ 268 "vfmaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \ 269 GEN_test(_name##_ymm_src_dst, \ 270 "vfmaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \ 271 GEN_test(_name##_ymm_mem1, \ 272 "vfmaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \ 273 GEN_test(_name##_ymm_mem2, \ 274 "vfmaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0); 275 GEN_test_VFMADDPS_ymm(VFMADDPS) 276 277 #define GEN_test_VFMADDSD_xmm(_name) \ 278 GEN_test(_name##_xmm, \ 279 "vfmaddsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ 280 GEN_test(_name##_xmm_src_dst, \ 281 "vfmaddsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ 282 GEN_test(_name##_xmm_mem1, \ 283 "vfmaddsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ 284 GEN_test(_name##_xmm_mem2, \ 285 "vfmaddsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); 286 GEN_test_VFMADDSD_xmm(VFMADDSD) 287 288 #define GEN_test_VFMADDSS_xmm(_name) \ 289 GEN_test(_name##_xmm, \ 290 "vfmaddss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ 291 GEN_test(_name##_xmm_src_dst, \ 292 "vfmaddss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ 293 GEN_test(_name##_xmm_mem1, \ 294 "vfmaddss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ 295 GEN_test(_name##_xmm_mem2, \ 296 "vfmaddss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); 297 GEN_test_VFMADDSS_xmm(VFMADDSS) 298 299 #define GEN_test_VFMADDSUBPD_xmm(_name) \ 300 GEN_test(_name##_xmm, \ 301 "vfmaddsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ 302 GEN_test(_name##_xmm_src_dst, \ 303 "vfmaddsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ 304 GEN_test(_name##_xmm_mem1, \ 305 "vfmaddsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ 306 GEN_test(_name##_xmm_mem2, \ 307 "vfmaddsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); 308 GEN_test_VFMADDSUBPD_xmm(VFMADDSUBPD) 309 310 #define GEN_test_VFMADDSUBPD_ymm(_name) \ 311 GEN_test(_name##_ymm, \ 312 "vfmaddsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \ 313 GEN_test(_name##_ymm_src_dst, \ 314 "vfmaddsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \ 315 GEN_test(_name##_ymm_mem1, \ 316 "vfmaddsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \ 317 GEN_test(_name##_ymm_mem2, \ 318 "vfmaddsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1); 319 GEN_test_VFMADDSUBPD_ymm(VFMADDSUBPD) 320 321 #define GEN_test_VFMADDSUBPS_xmm(_name) \ 322 GEN_test(_name##_xmm, \ 323 "vfmaddsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ 324 GEN_test(_name##_xmm_src_dst, \ 325 "vfmaddsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ 326 GEN_test(_name##_xmm_mem1, \ 327 "vfmaddsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ 328 GEN_test(_name##_xmm_mem2, \ 329 "vfmaddsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); 330 GEN_test_VFMADDSUBPS_xmm(VFMADDSUBPS) 331 332 #define GEN_test_VFMADDSUBPS_ymm(_name) \ 333 GEN_test(_name##_ymm, \ 334 "vfmaddsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \ 335 GEN_test(_name##_ymm_src_dst, \ 336 "vfmaddsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \ 337 GEN_test(_name##_ymm_mem1, \ 338 "vfmaddsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \ 339 GEN_test(_name##_ymm_mem2, \ 340 "vfmaddsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0); 341 GEN_test_VFMADDSUBPS_ymm(VFMADDSUBPS) 342 343 #define GEN_test_VFMSUBADDPD_xmm(_name) \ 344 GEN_test(_name##_xmm, \ 345 "vfmsubaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ 346 GEN_test(_name##_xmm_src_dst, \ 347 "vfmsubaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ 348 GEN_test(_name##_xmm_mem1, \ 349 "vfmsubaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ 350 GEN_test(_name##_xmm_mem2, \ 351 "vfmsubaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); 352 GEN_test_VFMSUBADDPD_xmm(VFMSUBADDPD) 353 354 #define GEN_test_VFMSUBADDPD_ymm(_name) \ 355 GEN_test(_name##_ymm, \ 356 "vfmsubaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \ 357 GEN_test(_name##_ymm_src_dst, \ 358 "vfmsubaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \ 359 GEN_test(_name##_ymm_mem1, \ 360 "vfmsubaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \ 361 GEN_test(_name##_ymm_mem2, \ 362 "vfmsubaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1); 363 GEN_test_VFMSUBADDPD_ymm(VFMSUBADDPD) 364 365 #define GEN_test_VFMSUBADDPS_xmm(_name) \ 366 GEN_test(_name##_xmm, \ 367 "vfmsubaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ 368 GEN_test(_name##_xmm_src_dst, \ 369 "vfmsubaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ 370 GEN_test(_name##_xmm_mem1, \ 371 "vfmsubaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ 372 GEN_test(_name##_xmm_mem2, \ 373 "vfmsubaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); 374 GEN_test_VFMSUBADDPS_xmm(VFMSUBADDPS) 375 376 #define GEN_test_VFMSUBADDPS_ymm(_name) \ 377 GEN_test(_name##_ymm, \ 378 "vfmsubaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \ 379 GEN_test(_name##_ymm_src_dst, \ 380 "vfmsubaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \ 381 GEN_test(_name##_ymm_mem1, \ 382 "vfmsubaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \ 383 GEN_test(_name##_ymm_mem2, \ 384 "vfmsubaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0); 385 GEN_test_VFMSUBADDPS_ymm(VFMSUBADDPS) 386 387 #define GEN_test_VFMSUBPD_xmm(_name) \ 388 GEN_test(_name##_xmm, \ 389 "vfmsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ 390 GEN_test(_name##_xmm_src_dst, \ 391 "vfmsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ 392 GEN_test(_name##_xmm_mem1, \ 393 "vfmsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ 394 GEN_test(_name##_xmm_mem2, \ 395 "vfmsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); 396 GEN_test_VFMSUBPD_xmm(VFMSUBPD) 397 398 #define GEN_test_VFMSUBPD_ymm(_name) \ 399 GEN_test(_name##_ymm, \ 400 "vfmsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \ 401 GEN_test(_name##_ymm_src_dst, \ 402 "vfmsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \ 403 GEN_test(_name##_ymm_mem1, \ 404 "vfmsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \ 405 GEN_test(_name##_ymm_mem2, \ 406 "vfmsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1); 407 GEN_test_VFMSUBPD_ymm(VFMSUBPD) 408 409 #define GEN_test_VFMSUBPS_xmm(_name) \ 410 GEN_test(_name##_xmm, \ 411 "vfmsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ 412 GEN_test(_name##_xmm_src_dst, \ 413 "vfmsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ 414 GEN_test(_name##_xmm_mem1, \ 415 "vfmsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ 416 GEN_test(_name##_xmm_mem2, \ 417 "vfmsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); 418 GEN_test_VFMSUBPS_xmm(VFMSUBPS) 419 420 #define GEN_test_VFMSUBPS_ymm(_name) \ 421 GEN_test(_name##_ymm, \ 422 "vfmsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \ 423 GEN_test(_name##_ymm_src_dst, \ 424 "vfmsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \ 425 GEN_test(_name##_ymm_mem1, \ 426 "vfmsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \ 427 GEN_test(_name##_ymm_mem2, \ 428 "vfmsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0); 429 GEN_test_VFMSUBPS_ymm(VFMSUBPS) 430 431 #define GEN_test_VFMSUBSD_xmm(_name) \ 432 GEN_test(_name##_xmm, \ 433 "vfmsubsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ 434 GEN_test(_name##_xmm_src_dst, \ 435 "vfmsubsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ 436 GEN_test(_name##_xmm_mem1, \ 437 "vfmsubsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ 438 GEN_test(_name##_xmm_mem2, \ 439 "vfmsubsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); 440 GEN_test_VFMSUBSD_xmm(VFMSUBSD) 441 442 #define GEN_test_VFMSUBSS_xmm(_name) \ 443 GEN_test(_name##_xmm, \ 444 "vfmsubss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ 445 GEN_test(_name##_xmm_src_dst, \ 446 "vfmsubss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ 447 GEN_test(_name##_xmm_mem1, \ 448 "vfmsubss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ 449 GEN_test(_name##_xmm_mem2, \ 450 "vfmsubss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); 451 GEN_test_VFMSUBSS_xmm(VFMSUBSS) 452 453 #define GEN_test_VFNMADDPD_xmm(_name) \ 454 GEN_test(_name##_xmm, \ 455 "vfnmaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ 456 GEN_test(_name##_xmm_src_dst, \ 457 "vfnmaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ 458 GEN_test(_name##_xmm_mem1, \ 459 "vfnmaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ 460 GEN_test(_name##_xmm_mem2, \ 461 "vfnmaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); 462 GEN_test_VFNMADDPD_xmm(VFNMADDPD) 463 464 #define GEN_test_VFNMADDPD_ymm(_name) \ 465 GEN_test(_name##_ymm, \ 466 "vfnmaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \ 467 GEN_test(_name##_ymm_src_dst, \ 468 "vfnmaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \ 469 GEN_test(_name##_ymm_mem1, \ 470 "vfnmaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \ 471 GEN_test(_name##_ymm_mem2, \ 472 "vfnmaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1); 473 GEN_test_VFNMADDPD_ymm(VFNMADDPD) 474 475 #define GEN_test_VFNMADDPS_xmm(_name) \ 476 GEN_test(_name##_xmm, \ 477 "vfnmaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ 478 GEN_test(_name##_xmm_src_dst, \ 479 "vfnmaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ 480 GEN_test(_name##_xmm_mem1, \ 481 "vfnmaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ 482 GEN_test(_name##_xmm_mem2, \ 483 "vfnmaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); 484 GEN_test_VFNMADDPS_xmm(VFNMADDPS) 485 486 #define GEN_test_VFNMADDPS_ymm(_name) \ 487 GEN_test(_name##_ymm, \ 488 "vfnmaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \ 489 GEN_test(_name##_ymm_src_dst, \ 490 "vfnmaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \ 491 GEN_test(_name##_ymm_mem1, \ 492 "vfnmaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \ 493 GEN_test(_name##_ymm_mem2, \ 494 "vfnmaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0); 495 GEN_test_VFNMADDPS_ymm(VFNMADDPS) 496 497 #define GEN_test_VFNMADDSD_xmm(_name) \ 498 GEN_test(_name##_xmm, \ 499 "vfnmaddsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ 500 GEN_test(_name##_xmm_src_dst, \ 501 "vfnmaddsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ 502 GEN_test(_name##_xmm_mem1, \ 503 "vfnmaddsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ 504 GEN_test(_name##_xmm_mem2, \ 505 "vfnmaddsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); 506 GEN_test_VFNMADDSD_xmm(VFNMADDSD) 507 508 #define GEN_test_VFNMADDSS_xmm(_name) \ 509 GEN_test(_name##_xmm, \ 510 "vfnmaddss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ 511 GEN_test(_name##_xmm_src_dst, \ 512 "vfnmaddss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ 513 GEN_test(_name##_xmm_mem1, \ 514 "vfnmaddss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ 515 GEN_test(_name##_xmm_mem2, \ 516 "vfnmaddss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); 517 GEN_test_VFNMADDSS_xmm(VFNMADDSS) 518 519 #define GEN_test_VFNMSUBPD_xmm(_name) \ 520 GEN_test(_name##_xmm, \ 521 "vfnmsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ 522 GEN_test(_name##_xmm_src_dst, \ 523 "vfnmsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ 524 GEN_test(_name##_xmm_mem1, \ 525 "vfnmsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ 526 GEN_test(_name##_xmm_mem2, \ 527 "vfnmsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); 528 GEN_test_VFNMSUBPD_xmm(VFNMSUBPD) 529 530 #define GEN_test_VFNMSUBPD_ymm(_name) \ 531 GEN_test(_name##_ymm, \ 532 "vfnmsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \ 533 GEN_test(_name##_ymm_src_dst, \ 534 "vfnmsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \ 535 GEN_test(_name##_ymm_mem1, \ 536 "vfnmsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \ 537 GEN_test(_name##_ymm_mem2, \ 538 "vfnmsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1); 539 GEN_test_VFNMSUBPD_ymm(VFNMSUBPD) 540 541 #define GEN_test_VFNMSUBPS_xmm(_name) \ 542 GEN_test(_name##_xmm, \ 543 "vfnmsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ 544 GEN_test(_name##_xmm_src_dst, \ 545 "vfnmsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ 546 GEN_test(_name##_xmm_mem1, \ 547 "vfnmsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ 548 GEN_test(_name##_xmm_mem2, \ 549 "vfnmsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); 550 GEN_test_VFNMSUBPS_xmm(VFNMSUBPS) 551 552 #define GEN_test_VFNMSUBPS_ymm(_name) \ 553 GEN_test(_name##_ymm, \ 554 "vfnmsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \ 555 GEN_test(_name##_ymm_src_dst, \ 556 "vfnmsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \ 557 GEN_test(_name##_ymm_mem1, \ 558 "vfnmsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \ 559 GEN_test(_name##_ymm_mem2, \ 560 "vfnmsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0); 561 GEN_test_VFNMSUBPS_ymm(VFNMSUBPS) 562 563 #define GEN_test_VFNMSUBSD_xmm(_name) \ 564 GEN_test(_name##_xmm, \ 565 "vfnmsubsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \ 566 GEN_test(_name##_xmm_src_dst, \ 567 "vfnmsubsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \ 568 GEN_test(_name##_xmm_mem1, \ 569 "vfnmsubsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \ 570 GEN_test(_name##_xmm_mem2, \ 571 "vfnmsubsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1); 572 GEN_test_VFNMSUBSD_xmm(VFNMSUBSD) 573 574 #define GEN_test_VFNMSUBSS_xmm(_name) \ 575 GEN_test(_name##_xmm, \ 576 "vfnmsubss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \ 577 GEN_test(_name##_xmm_src_dst, \ 578 "vfnmsubss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \ 579 GEN_test(_name##_xmm_mem1, \ 580 "vfnmsubss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \ 581 GEN_test(_name##_xmm_mem2, \ 582 "vfnmsubss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0); 583 GEN_test_VFNMSUBSS_xmm(VFNMSUBSS) 584 585 #define DO_test_block(_name, _sub, _bname, _block) \ 586 test_##_name##_##_sub(_bname,_block); 587 588 #define DO_test(_name, _sub, _isD) { \ 589 Block* b = memalign32(sizeof(Block)); \ 590 oneBlock(b); \ 591 DO_test_block(_name, _sub, "ones", b); \ 592 if (_isD) { \ 593 specialDBlock(b); \ 594 DO_test_block(_name, _sub, "specialD", b); \ 595 } else { \ 596 specialFBlock(b); \ 597 DO_test_block(_name, _sub, "specialF", b); \ 598 } \ 599 randBlock(b); \ 600 DO_test_block(_name, _sub, "rand", b); \ 601 free(b); \ 602 } 603 604 #define DO_tests_xmm(_name,_isD) \ 605 DO_test(_name, xmm, _isD); \ 606 DO_test(_name, xmm_src_dst, _isD); \ 607 DO_test(_name, xmm_mem1, _isD); \ 608 DO_test(_name, xmm_mem2, _isD); 609 610 #define DO_tests_ymm(_name,_isD) \ 611 DO_test(_name, ymm, _isD); \ 612 DO_test(_name, ymm_src_dst, _isD); \ 613 DO_test(_name, ymm_mem1, _isD); \ 614 DO_test(_name, ymm_mem2, _isD); 615 616 int main ( void ) 617 { 618 init_special_values(); 619 620 // 128 621 DO_tests_xmm(VFMADDPD, 1); 622 DO_tests_xmm(VFMADDPS, 0); 623 DO_tests_xmm(VFMADDSD, 1); 624 DO_tests_xmm(VFMADDSS, 0); 625 DO_tests_xmm(VFMADDSUBPD, 1); 626 DO_tests_xmm(VFMADDSUBPS, 0); 627 DO_tests_xmm(VFMSUBADDPD, 1); 628 DO_tests_xmm(VFMSUBADDPS, 0); 629 DO_tests_xmm(VFMSUBPD, 1); 630 DO_tests_xmm(VFMSUBPS, 0); 631 DO_tests_xmm(VFMSUBSD, 1); 632 DO_tests_xmm(VFMSUBSS, 0); 633 DO_tests_xmm(VFNMADDPD, 1); 634 DO_tests_xmm(VFNMADDPS, 0); 635 DO_tests_xmm(VFNMADDSD, 1); 636 DO_tests_xmm(VFNMADDSS, 0); 637 DO_tests_xmm(VFNMSUBPD, 1); 638 DO_tests_xmm(VFNMSUBPS, 0); 639 DO_tests_xmm(VFNMSUBSD, 1); 640 DO_tests_xmm(VFNMSUBSS, 0); 641 642 // 256 643 /* 644 DO_tests_ymm(VFMADDPD, 1); 645 DO_tests_ymm(VFMADDPS, 0); 646 DO_tests_ymm(VFMADDSUBPD, 1); 647 DO_tests_ymm(VFMADDSUBPS, 0); 648 DO_tests_ymm(VFMSUBADDPD, 1); 649 DO_tests_ymm(VFMSUBADDPS, 0); 650 DO_tests_ymm(VFMSUBPD, 1); 651 DO_tests_ymm(VFMSUBPS, 0); 652 DO_tests_ymm(VFNMADDPD, 1); 653 DO_tests_ymm(VFNMADDPS, 0); 654 DO_tests_ymm(VFNMSUBPD, 1); 655 DO_tests_ymm(VFNMSUBPS, 0); 656 */ 657 658 return 0; 659 } 660