1 /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 #ifndef __IMMINTRIN_H 24 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." 25 #endif 26 27 #ifndef __AVX512FINTRIN_H 28 #define __AVX512FINTRIN_H 29 30 typedef double __v8df __attribute__((__vector_size__(64))); 31 typedef float __v16sf __attribute__((__vector_size__(64))); 32 typedef long long __v8di __attribute__((__vector_size__(64))); 33 typedef int __v16si __attribute__((__vector_size__(64))); 34 35 typedef float __m512 __attribute__((__vector_size__(64))); 36 typedef double __m512d __attribute__((__vector_size__(64))); 37 typedef long long __m512i __attribute__((__vector_size__(64))); 38 39 typedef unsigned char __mmask8; 40 typedef unsigned short __mmask16; 41 42 /* Rounding mode macros. */ 43 #define _MM_FROUND_TO_NEAREST_INT 0x00 44 #define _MM_FROUND_TO_NEG_INF 0x01 45 #define _MM_FROUND_TO_POS_INF 0x02 46 #define _MM_FROUND_TO_ZERO 0x03 47 #define _MM_FROUND_CUR_DIRECTION 0x04 48 49 /* Define the default attributes for the functions in this file. */ 50 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) 51 52 /* Create vectors with repeated elements */ 53 54 static __inline __m512i __DEFAULT_FN_ATTRS 55 _mm512_setzero_si512(void) 56 { 57 return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; 58 } 59 60 static __inline__ __m512d __DEFAULT_FN_ATTRS 61 _mm512_undefined_pd() 62 { 63 return (__m512d)__builtin_ia32_undef512(); 64 } 65 66 static __inline__ __m512 __DEFAULT_FN_ATTRS 67 _mm512_undefined() 68 { 69 return (__m512)__builtin_ia32_undef512(); 70 } 71 72 static __inline__ __m512 __DEFAULT_FN_ATTRS 73 _mm512_undefined_ps() 74 { 75 return (__m512)__builtin_ia32_undef512(); 76 } 77 78 static __inline__ __m512i __DEFAULT_FN_ATTRS 79 _mm512_undefined_epi32() 80 { 81 return (__m512i)__builtin_ia32_undef512(); 82 } 83 84 static __inline __m512i __DEFAULT_FN_ATTRS 85 _mm512_maskz_set1_epi32(__mmask16 __M, int __A) 86 { 87 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, 88 (__v16si) 89 _mm512_setzero_si512 (), 90 __M); 91 } 92 93 static __inline __m512i __DEFAULT_FN_ATTRS 94 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A) 95 { 96 #ifdef __x86_64__ 97 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, 98 (__v8di) 99 _mm512_setzero_si512 (), 100 __M); 101 #else 102 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, 103 (__v8di) 104 _mm512_setzero_si512 (), 105 __M); 106 #endif 107 } 108 109 static __inline __m512 __DEFAULT_FN_ATTRS 110 _mm512_setzero_ps(void) 111 { 112 return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 113 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 114 } 115 static __inline __m512d __DEFAULT_FN_ATTRS 116 _mm512_setzero_pd(void) 117 { 118 return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 119 } 120 121 static __inline __m512 __DEFAULT_FN_ATTRS 122 _mm512_set1_ps(float __w) 123 { 124 return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, 125 __w, __w, __w, __w, __w, __w, __w, __w }; 126 } 127 128 static __inline __m512d __DEFAULT_FN_ATTRS 129 _mm512_set1_pd(double __w) 130 { 131 return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; 132 } 133 134 static __inline __m512i __DEFAULT_FN_ATTRS 135 _mm512_set1_epi32(int __s) 136 { 137 return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s, 138 __s, __s, __s, __s, __s, __s, __s, __s }; 139 } 140 141 static __inline __m512i __DEFAULT_FN_ATTRS 142 _mm512_set1_epi64(long long __d) 143 { 144 return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; 145 } 146 147 static __inline__ __m512 __DEFAULT_FN_ATTRS 148 _mm512_broadcastss_ps(__m128 __X) 149 { 150 float __f = __X[0]; 151 return (__v16sf){ __f, __f, __f, __f, 152 __f, __f, __f, __f, 153 __f, __f, __f, __f, 154 __f, __f, __f, __f }; 155 } 156 157 static __inline__ __m512d __DEFAULT_FN_ATTRS 158 _mm512_broadcastsd_pd(__m128d __X) 159 { 160 double __d = __X[0]; 161 return (__v8df){ __d, __d, __d, __d, 162 __d, __d, __d, __d }; 163 } 164 165 /* Cast between vector types */ 166 167 static __inline __m512d __DEFAULT_FN_ATTRS 168 _mm512_castpd256_pd512(__m256d __a) 169 { 170 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); 171 } 172 173 static __inline __m512 __DEFAULT_FN_ATTRS 174 _mm512_castps256_ps512(__m256 __a) 175 { 176 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 177 -1, -1, -1, -1, -1, -1, -1, -1); 178 } 179 180 static __inline __m128d __DEFAULT_FN_ATTRS 181 _mm512_castpd512_pd128(__m512d __a) 182 { 183 return __builtin_shufflevector(__a, __a, 0, 1); 184 } 185 186 static __inline __m128 __DEFAULT_FN_ATTRS 187 _mm512_castps512_ps128(__m512 __a) 188 { 189 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); 190 } 191 192 /* Bitwise operators */ 193 static __inline__ __m512i __DEFAULT_FN_ATTRS 194 _mm512_and_epi32(__m512i __a, __m512i __b) 195 { 196 return __a & __b; 197 } 198 199 static __inline__ __m512i __DEFAULT_FN_ATTRS 200 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 201 { 202 return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a, 203 (__v16si) __b, 204 (__v16si) __src, 205 (__mmask16) __k); 206 } 207 static __inline__ __m512i __DEFAULT_FN_ATTRS 208 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) 209 { 210 return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a, 211 (__v16si) __b, 212 (__v16si) 213 _mm512_setzero_si512 (), 214 (__mmask16) __k); 215 } 216 217 static __inline__ __m512i __DEFAULT_FN_ATTRS 218 _mm512_and_epi64(__m512i __a, __m512i __b) 219 { 220 return __a & __b; 221 } 222 223 static __inline__ __m512i __DEFAULT_FN_ATTRS 224 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 225 { 226 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a, 227 (__v8di) __b, 228 (__v8di) __src, 229 (__mmask8) __k); 230 } 231 static __inline__ __m512i __DEFAULT_FN_ATTRS 232 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) 233 { 234 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a, 235 (__v8di) __b, 236 (__v8di) 237 _mm512_setzero_si512 (), 238 (__mmask8) __k); 239 } 240 241 static __inline__ __m512i __DEFAULT_FN_ATTRS 242 _mm512_andnot_epi32 (__m512i __A, __m512i __B) 243 { 244 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 245 (__v16si) __B, 246 (__v16si) 247 _mm512_setzero_si512 (), 248 (__mmask16) -1); 249 } 250 251 static __inline__ __m512i __DEFAULT_FN_ATTRS 252 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 253 { 254 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 255 (__v16si) __B, 256 (__v16si) __W, 257 (__mmask16) __U); 258 } 259 260 static __inline__ __m512i __DEFAULT_FN_ATTRS 261 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 262 { 263 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 264 (__v16si) __B, 265 (__v16si) 266 _mm512_setzero_si512 (), 267 (__mmask16) __U); 268 } 269 270 static __inline__ __m512i __DEFAULT_FN_ATTRS 271 _mm512_andnot_epi64 (__m512i __A, __m512i __B) 272 { 273 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 274 (__v8di) __B, 275 (__v8di) 276 _mm512_setzero_si512 (), 277 (__mmask8) -1); 278 } 279 280 static __inline__ __m512i __DEFAULT_FN_ATTRS 281 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 282 { 283 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 284 (__v8di) __B, 285 (__v8di) __W, __U); 286 } 287 288 static __inline__ __m512i __DEFAULT_FN_ATTRS 289 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 290 { 291 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 292 (__v8di) __B, 293 (__v8di) 294 _mm512_setzero_pd (), 295 __U); 296 } 297 static __inline__ __m512i __DEFAULT_FN_ATTRS 298 _mm512_or_epi32(__m512i __a, __m512i __b) 299 { 300 return __a | __b; 301 } 302 303 static __inline__ __m512i __DEFAULT_FN_ATTRS 304 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 305 { 306 return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a, 307 (__v16si) __b, 308 (__v16si) __src, 309 (__mmask16) __k); 310 } 311 static __inline__ __m512i __DEFAULT_FN_ATTRS 312 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) 313 { 314 return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a, 315 (__v16si) __b, 316 (__v16si) 317 _mm512_setzero_si512 (), 318 (__mmask16) __k); 319 } 320 321 static __inline__ __m512i __DEFAULT_FN_ATTRS 322 _mm512_or_epi64(__m512i __a, __m512i __b) 323 { 324 return __a | __b; 325 } 326 327 static __inline__ __m512i __DEFAULT_FN_ATTRS 328 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 329 { 330 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a, 331 (__v8di) __b, 332 (__v8di) __src, 333 (__mmask8) __k); 334 } 335 static __inline__ __m512i __DEFAULT_FN_ATTRS 336 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) 337 { 338 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a, 339 (__v8di) __b, 340 (__v8di) 341 _mm512_setzero_si512 (), 342 (__mmask8) __k); 343 } 344 345 static __inline__ __m512i __DEFAULT_FN_ATTRS 346 _mm512_xor_epi32(__m512i __a, __m512i __b) 347 { 348 return __a ^ __b; 349 } 350 351 static __inline__ __m512i __DEFAULT_FN_ATTRS 352 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 353 { 354 return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a, 355 (__v16si) __b, 356 (__v16si) __src, 357 (__mmask16) __k); 358 } 359 static __inline__ __m512i __DEFAULT_FN_ATTRS 360 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) 361 { 362 return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a, 363 (__v16si) __b, 364 (__v16si) 365 _mm512_setzero_si512 (), 366 (__mmask16) __k); 367 } 368 369 static __inline__ __m512i __DEFAULT_FN_ATTRS 370 _mm512_xor_epi64(__m512i __a, __m512i __b) 371 { 372 return __a ^ __b; 373 } 374 375 static __inline__ __m512i __DEFAULT_FN_ATTRS 376 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 377 { 378 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a, 379 (__v8di) __b, 380 (__v8di) __src, 381 (__mmask8) __k); 382 } 383 static __inline__ __m512i __DEFAULT_FN_ATTRS 384 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) 385 { 386 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a, 387 (__v8di) __b, 388 (__v8di) 389 _mm512_setzero_si512 (), 390 (__mmask8) __k); 391 } 392 393 static __inline__ __m512i __DEFAULT_FN_ATTRS 394 _mm512_and_si512(__m512i __a, __m512i __b) 395 { 396 return __a & __b; 397 } 398 399 static __inline__ __m512i __DEFAULT_FN_ATTRS 400 _mm512_or_si512(__m512i __a, __m512i __b) 401 { 402 return __a | __b; 403 } 404 405 static __inline__ __m512i __DEFAULT_FN_ATTRS 406 _mm512_xor_si512(__m512i __a, __m512i __b) 407 { 408 return __a ^ __b; 409 } 410 /* Arithmetic */ 411 412 static __inline __m512d __DEFAULT_FN_ATTRS 413 _mm512_add_pd(__m512d __a, __m512d __b) 414 { 415 return __a + __b; 416 } 417 418 static __inline __m512 __DEFAULT_FN_ATTRS 419 _mm512_add_ps(__m512 __a, __m512 __b) 420 { 421 return __a + __b; 422 } 423 424 static __inline __m512d __DEFAULT_FN_ATTRS 425 _mm512_mul_pd(__m512d __a, __m512d __b) 426 { 427 return __a * __b; 428 } 429 430 static __inline __m512 __DEFAULT_FN_ATTRS 431 _mm512_mul_ps(__m512 __a, __m512 __b) 432 { 433 return __a * __b; 434 } 435 436 static __inline __m512d __DEFAULT_FN_ATTRS 437 _mm512_sub_pd(__m512d __a, __m512d __b) 438 { 439 return __a - __b; 440 } 441 442 static __inline __m512 __DEFAULT_FN_ATTRS 443 _mm512_sub_ps(__m512 __a, __m512 __b) 444 { 445 return __a - __b; 446 } 447 448 static __inline__ __m512i __DEFAULT_FN_ATTRS 449 _mm512_add_epi64 (__m512i __A, __m512i __B) 450 { 451 return (__m512i) ((__v8di) __A + (__v8di) __B); 452 } 453 454 static __inline__ __m512i __DEFAULT_FN_ATTRS 455 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 456 { 457 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, 458 (__v8di) __B, 459 (__v8di) __W, 460 (__mmask8) __U); 461 } 462 463 static __inline__ __m512i __DEFAULT_FN_ATTRS 464 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 465 { 466 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, 467 (__v8di) __B, 468 (__v8di) 469 _mm512_setzero_si512 (), 470 (__mmask8) __U); 471 } 472 473 static __inline__ __m512i __DEFAULT_FN_ATTRS 474 _mm512_sub_epi64 (__m512i __A, __m512i __B) 475 { 476 return (__m512i) ((__v8di) __A - (__v8di) __B); 477 } 478 479 static __inline__ __m512i __DEFAULT_FN_ATTRS 480 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 481 { 482 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, 483 (__v8di) __B, 484 (__v8di) __W, 485 (__mmask8) __U); 486 } 487 488 static __inline__ __m512i __DEFAULT_FN_ATTRS 489 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 490 { 491 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, 492 (__v8di) __B, 493 (__v8di) 494 _mm512_setzero_si512 (), 495 (__mmask8) __U); 496 } 497 498 static __inline__ __m512i __DEFAULT_FN_ATTRS 499 _mm512_add_epi32 (__m512i __A, __m512i __B) 500 { 501 return (__m512i) ((__v16si) __A + (__v16si) __B); 502 } 503 504 static __inline__ __m512i __DEFAULT_FN_ATTRS 505 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 506 { 507 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, 508 (__v16si) __B, 509 (__v16si) __W, 510 (__mmask16) __U); 511 } 512 513 static __inline__ __m512i __DEFAULT_FN_ATTRS 514 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 515 { 516 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, 517 (__v16si) __B, 518 (__v16si) 519 _mm512_setzero_si512 (), 520 (__mmask16) __U); 521 } 522 523 static __inline__ __m512i __DEFAULT_FN_ATTRS 524 _mm512_sub_epi32 (__m512i __A, __m512i __B) 525 { 526 return (__m512i) ((__v16si) __A - (__v16si) __B); 527 } 528 529 static __inline__ __m512i __DEFAULT_FN_ATTRS 530 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 531 { 532 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, 533 (__v16si) __B, 534 (__v16si) __W, 535 (__mmask16) __U); 536 } 537 538 static __inline__ __m512i __DEFAULT_FN_ATTRS 539 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 540 { 541 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, 542 (__v16si) __B, 543 (__v16si) 544 _mm512_setzero_si512 (), 545 (__mmask16) __U); 546 } 547 548 static __inline__ __m512d __DEFAULT_FN_ATTRS 549 _mm512_max_pd(__m512d __A, __m512d __B) 550 { 551 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 552 (__v8df) __B, 553 (__v8df) 554 _mm512_setzero_pd (), 555 (__mmask8) -1, 556 _MM_FROUND_CUR_DIRECTION); 557 } 558 559 static __inline__ __m512 __DEFAULT_FN_ATTRS 560 _mm512_max_ps(__m512 __A, __m512 __B) 561 { 562 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 563 (__v16sf) __B, 564 (__v16sf) 565 _mm512_setzero_ps (), 566 (__mmask16) -1, 567 _MM_FROUND_CUR_DIRECTION); 568 } 569 570 static __inline__ __m128 __DEFAULT_FN_ATTRS 571 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 572 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, 573 (__v4sf) __B, 574 (__v4sf) __W, 575 (__mmask8) __U, 576 _MM_FROUND_CUR_DIRECTION); 577 } 578 579 static __inline__ __m128 __DEFAULT_FN_ATTRS 580 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { 581 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, 582 (__v4sf) __B, 583 (__v4sf) _mm_setzero_ps (), 584 (__mmask8) __U, 585 _MM_FROUND_CUR_DIRECTION); 586 } 587 588 #define _mm_max_round_ss(__A, __B, __R) __extension__ ({ \ 589 (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \ 590 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); }) 591 592 #define _mm_mask_max_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \ 593 (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \ 594 (__v4sf) __W, (__mmask8) __U,__R); }) 595 596 #define _mm_maskz_max_round_ss(__U, __A, __B, __R) __extension__ ({ \ 597 (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \ 598 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); }) 599 600 static __inline__ __m128d __DEFAULT_FN_ATTRS 601 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 602 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, 603 (__v2df) __B, 604 (__v2df) __W, 605 (__mmask8) __U, 606 _MM_FROUND_CUR_DIRECTION); 607 } 608 609 static __inline__ __m128d __DEFAULT_FN_ATTRS 610 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { 611 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, 612 (__v2df) __B, 613 (__v2df) _mm_setzero_pd (), 614 (__mmask8) __U, 615 _MM_FROUND_CUR_DIRECTION); 616 } 617 618 #define _mm_max_round_sd(__A, __B, __R) __extension__ ({ \ 619 (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \ 620 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); }) 621 622 #define _mm_mask_max_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \ 623 (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \ 624 (__v2df) __W, (__mmask8) __U,__R); }) 625 626 #define _mm_maskz_max_round_sd(__U, __A, __B, __R) __extension__ ({ \ 627 (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \ 628 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); }) 629 630 static __inline __m512i 631 __DEFAULT_FN_ATTRS 632 _mm512_max_epi32(__m512i __A, __m512i __B) 633 { 634 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 635 (__v16si) __B, 636 (__v16si) 637 _mm512_setzero_si512 (), 638 (__mmask16) -1); 639 } 640 641 static __inline __m512i __DEFAULT_FN_ATTRS 642 _mm512_max_epu32(__m512i __A, __m512i __B) 643 { 644 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 645 (__v16si) __B, 646 (__v16si) 647 _mm512_setzero_si512 (), 648 (__mmask16) -1); 649 } 650 651 static __inline __m512i __DEFAULT_FN_ATTRS 652 _mm512_max_epi64(__m512i __A, __m512i __B) 653 { 654 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 655 (__v8di) __B, 656 (__v8di) 657 _mm512_setzero_si512 (), 658 (__mmask8) -1); 659 } 660 661 static __inline __m512i __DEFAULT_FN_ATTRS 662 _mm512_max_epu64(__m512i __A, __m512i __B) 663 { 664 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 665 (__v8di) __B, 666 (__v8di) 667 _mm512_setzero_si512 (), 668 (__mmask8) -1); 669 } 670 671 static __inline__ __m512d __DEFAULT_FN_ATTRS 672 _mm512_min_pd(__m512d __A, __m512d __B) 673 { 674 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 675 (__v8df) __B, 676 (__v8df) 677 _mm512_setzero_pd (), 678 (__mmask8) -1, 679 _MM_FROUND_CUR_DIRECTION); 680 } 681 682 static __inline__ __m512 __DEFAULT_FN_ATTRS 683 _mm512_min_ps(__m512 __A, __m512 __B) 684 { 685 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 686 (__v16sf) __B, 687 (__v16sf) 688 _mm512_setzero_ps (), 689 (__mmask16) -1, 690 _MM_FROUND_CUR_DIRECTION); 691 } 692 693 static __inline__ __m128 __DEFAULT_FN_ATTRS 694 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 695 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A, 696 (__v4sf) __B, 697 (__v4sf) __W, 698 (__mmask8) __U, 699 _MM_FROUND_CUR_DIRECTION); 700 } 701 702 static __inline__ __m128 __DEFAULT_FN_ATTRS 703 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { 704 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A, 705 (__v4sf) __B, 706 (__v4sf) _mm_setzero_ps (), 707 (__mmask8) __U, 708 _MM_FROUND_CUR_DIRECTION); 709 } 710 711 #define _mm_min_round_ss(__A, __B, __R) __extension__ ({ \ 712 (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \ 713 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); }) 714 715 #define _mm_mask_min_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \ 716 (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \ 717 (__v4sf) __W, (__mmask8) __U,__R); }) 718 719 #define _mm_maskz_min_round_ss(__U, __A, __B, __R) __extension__ ({ \ 720 (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \ 721 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); }) 722 723 static __inline__ __m128d __DEFAULT_FN_ATTRS 724 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 725 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, 726 (__v2df) __B, 727 (__v2df) __W, 728 (__mmask8) __U, 729 _MM_FROUND_CUR_DIRECTION); 730 } 731 732 static __inline__ __m128d __DEFAULT_FN_ATTRS 733 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { 734 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, 735 (__v2df) __B, 736 (__v2df) _mm_setzero_pd (), 737 (__mmask8) __U, 738 _MM_FROUND_CUR_DIRECTION); 739 } 740 741 #define _mm_min_round_sd(__A, __B, __R) __extension__ ({ \ 742 (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \ 743 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); }) 744 745 #define _mm_mask_min_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \ 746 (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \ 747 (__v2df) __W, (__mmask8) __U,__R); }) 748 749 #define _mm_maskz_min_round_sd(__U, __A, __B, __R) __extension__ ({ \ 750 (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \ 751 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); }) 752 753 static __inline __m512i 754 __DEFAULT_FN_ATTRS 755 _mm512_min_epi32(__m512i __A, __m512i __B) 756 { 757 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 758 (__v16si) __B, 759 (__v16si) 760 _mm512_setzero_si512 (), 761 (__mmask16) -1); 762 } 763 764 static __inline __m512i __DEFAULT_FN_ATTRS 765 _mm512_min_epu32(__m512i __A, __m512i __B) 766 { 767 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 768 (__v16si) __B, 769 (__v16si) 770 _mm512_setzero_si512 (), 771 (__mmask16) -1); 772 } 773 774 static __inline __m512i __DEFAULT_FN_ATTRS 775 _mm512_min_epi64(__m512i __A, __m512i __B) 776 { 777 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 778 (__v8di) __B, 779 (__v8di) 780 _mm512_setzero_si512 (), 781 (__mmask8) -1); 782 } 783 784 static __inline __m512i __DEFAULT_FN_ATTRS 785 _mm512_min_epu64(__m512i __A, __m512i __B) 786 { 787 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 788 (__v8di) __B, 789 (__v8di) 790 _mm512_setzero_si512 (), 791 (__mmask8) -1); 792 } 793 794 static __inline __m512i __DEFAULT_FN_ATTRS 795 _mm512_mul_epi32(__m512i __X, __m512i __Y) 796 { 797 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 798 (__v16si) __Y, 799 (__v8di) 800 _mm512_setzero_si512 (), 801 (__mmask8) -1); 802 } 803 804 static __inline __m512i __DEFAULT_FN_ATTRS 805 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 806 { 807 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 808 (__v16si) __Y, 809 (__v8di) __W, __M); 810 } 811 812 static __inline __m512i __DEFAULT_FN_ATTRS 813 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y) 814 { 815 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 816 (__v16si) __Y, 817 (__v8di) 818 _mm512_setzero_si512 (), 819 __M); 820 } 821 822 static __inline __m512i __DEFAULT_FN_ATTRS 823 _mm512_mul_epu32(__m512i __X, __m512i __Y) 824 { 825 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 826 (__v16si) __Y, 827 (__v8di) 828 _mm512_setzero_si512 (), 829 (__mmask8) -1); 830 } 831 832 static __inline __m512i __DEFAULT_FN_ATTRS 833 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 834 { 835 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 836 (__v16si) __Y, 837 (__v8di) __W, __M); 838 } 839 840 static __inline __m512i __DEFAULT_FN_ATTRS 841 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y) 842 { 843 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 844 (__v16si) __Y, 845 (__v8di) 846 _mm512_setzero_si512 (), 847 __M); 848 } 849 850 static __inline __m512i __DEFAULT_FN_ATTRS 851 _mm512_mullo_epi32 (__m512i __A, __m512i __B) 852 { 853 return (__m512i) ((__v16si) __A * (__v16si) __B); 854 } 855 856 static __inline __m512i __DEFAULT_FN_ATTRS 857 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 858 { 859 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, 860 (__v16si) __B, 861 (__v16si) 862 _mm512_setzero_si512 (), 863 __M); 864 } 865 866 static __inline __m512i __DEFAULT_FN_ATTRS 867 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 868 { 869 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, 870 (__v16si) __B, 871 (__v16si) __W, __M); 872 } 873 874 static __inline__ __m512d __DEFAULT_FN_ATTRS 875 _mm512_sqrt_pd(__m512d __a) 876 { 877 return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a, 878 (__v8df) _mm512_setzero_pd (), 879 (__mmask8) -1, 880 _MM_FROUND_CUR_DIRECTION); 881 } 882 883 static __inline__ __m512 __DEFAULT_FN_ATTRS 884 _mm512_sqrt_ps(__m512 __a) 885 { 886 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a, 887 (__v16sf) _mm512_setzero_ps (), 888 (__mmask16) -1, 889 _MM_FROUND_CUR_DIRECTION); 890 } 891 892 static __inline__ __m512d __DEFAULT_FN_ATTRS 893 _mm512_rsqrt14_pd(__m512d __A) 894 { 895 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 896 (__v8df) 897 _mm512_setzero_pd (), 898 (__mmask8) -1);} 899 900 static __inline__ __m512 __DEFAULT_FN_ATTRS 901 _mm512_rsqrt14_ps(__m512 __A) 902 { 903 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 904 (__v16sf) 905 _mm512_setzero_ps (), 906 (__mmask16) -1); 907 } 908 909 static __inline__ __m128 __DEFAULT_FN_ATTRS 910 _mm_rsqrt14_ss(__m128 __A, __m128 __B) 911 { 912 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __A, 913 (__v4sf) __B, 914 (__v4sf) 915 _mm_setzero_ps (), 916 (__mmask8) -1); 917 } 918 919 static __inline__ __m128d __DEFAULT_FN_ATTRS 920 _mm_rsqrt14_sd(__m128d __A, __m128d __B) 921 { 922 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __A, 923 (__v2df) __B, 924 (__v2df) 925 _mm_setzero_pd (), 926 (__mmask8) -1); 927 } 928 929 static __inline__ __m512d __DEFAULT_FN_ATTRS 930 _mm512_rcp14_pd(__m512d __A) 931 { 932 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 933 (__v8df) 934 _mm512_setzero_pd (), 935 (__mmask8) -1); 936 } 937 938 static __inline__ __m512 __DEFAULT_FN_ATTRS 939 _mm512_rcp14_ps(__m512 __A) 940 { 941 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 942 (__v16sf) 943 _mm512_setzero_ps (), 944 (__mmask16) -1); 945 } 946 static __inline__ __m128 __DEFAULT_FN_ATTRS 947 _mm_rcp14_ss(__m128 __A, __m128 __B) 948 { 949 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __A, 950 (__v4sf) __B, 951 (__v4sf) 952 _mm_setzero_ps (), 953 (__mmask8) -1); 954 } 955 956 static __inline__ __m128d __DEFAULT_FN_ATTRS 957 _mm_rcp14_sd(__m128d __A, __m128d __B) 958 { 959 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __A, 960 (__v2df) __B, 961 (__v2df) 962 _mm_setzero_pd (), 963 (__mmask8) -1); 964 } 965 966 static __inline __m512 __DEFAULT_FN_ATTRS 967 _mm512_floor_ps(__m512 __A) 968 { 969 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 970 _MM_FROUND_FLOOR, 971 (__v16sf) __A, -1, 972 _MM_FROUND_CUR_DIRECTION); 973 } 974 975 static __inline __m512d __DEFAULT_FN_ATTRS 976 _mm512_floor_pd(__m512d __A) 977 { 978 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 979 _MM_FROUND_FLOOR, 980 (__v8df) __A, -1, 981 _MM_FROUND_CUR_DIRECTION); 982 } 983 984 static __inline __m512 __DEFAULT_FN_ATTRS 985 _mm512_ceil_ps(__m512 __A) 986 { 987 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 988 _MM_FROUND_CEIL, 989 (__v16sf) __A, -1, 990 _MM_FROUND_CUR_DIRECTION); 991 } 992 993 static __inline __m512d __DEFAULT_FN_ATTRS 994 _mm512_ceil_pd(__m512d __A) 995 { 996 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 997 _MM_FROUND_CEIL, 998 (__v8df) __A, -1, 999 _MM_FROUND_CUR_DIRECTION); 1000 } 1001 1002 static __inline __m512i __DEFAULT_FN_ATTRS 1003 _mm512_abs_epi64(__m512i __A) 1004 { 1005 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 1006 (__v8di) 1007 _mm512_setzero_si512 (), 1008 (__mmask8) -1); 1009 } 1010 1011 static __inline __m512i __DEFAULT_FN_ATTRS 1012 _mm512_abs_epi32(__m512i __A) 1013 { 1014 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 1015 (__v16si) 1016 _mm512_setzero_si512 (), 1017 (__mmask16) -1); 1018 } 1019 1020 static __inline__ __m128 __DEFAULT_FN_ATTRS 1021 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 1022 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A, 1023 (__v4sf) __B, 1024 (__v4sf) __W, 1025 (__mmask8) __U, 1026 _MM_FROUND_CUR_DIRECTION); 1027 } 1028 1029 static __inline__ __m128 __DEFAULT_FN_ATTRS 1030 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) { 1031 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A, 1032 (__v4sf) __B, 1033 (__v4sf) _mm_setzero_ps (), 1034 (__mmask8) __U, 1035 _MM_FROUND_CUR_DIRECTION); 1036 } 1037 1038 #define _mm_add_round_ss(__A, __B, __R) __extension__ ({ \ 1039 (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \ 1040 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); }) 1041 1042 #define _mm_mask_add_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \ 1043 (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \ 1044 (__v4sf) __W, (__mmask8) __U,__R); }) 1045 1046 #define _mm_maskz_add_round_ss(__U, __A, __B, __R) __extension__ ({ \ 1047 (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \ 1048 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); }) 1049 1050 static __inline__ __m128d __DEFAULT_FN_ATTRS 1051 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 1052 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, 1053 (__v2df) __B, 1054 (__v2df) __W, 1055 (__mmask8) __U, 1056 _MM_FROUND_CUR_DIRECTION); 1057 } 1058 1059 static __inline__ __m128d __DEFAULT_FN_ATTRS 1060 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) { 1061 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, 1062 (__v2df) __B, 1063 (__v2df) _mm_setzero_pd (), 1064 (__mmask8) __U, 1065 _MM_FROUND_CUR_DIRECTION); 1066 } 1067 #define _mm_add_round_sd(__A, __B, __R) __extension__ ({ \ 1068 (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \ 1069 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); }) 1070 1071 #define _mm_mask_add_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \ 1072 (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \ 1073 (__v2df) __W, (__mmask8) __U,__R); }) 1074 1075 #define _mm_maskz_add_round_sd(__U, __A, __B, __R) __extension__ ({ \ 1076 (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \ 1077 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); }) 1078 1079 static __inline__ __m512d __DEFAULT_FN_ATTRS 1080 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 1081 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 1082 (__v8df) __B, 1083 (__v8df) __W, 1084 (__mmask8) __U, 1085 _MM_FROUND_CUR_DIRECTION); 1086 } 1087 1088 static __inline__ __m512d __DEFAULT_FN_ATTRS 1089 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) { 1090 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 1091 (__v8df) __B, 1092 (__v8df) _mm512_setzero_pd (), 1093 (__mmask8) __U, 1094 _MM_FROUND_CUR_DIRECTION); 1095 } 1096 1097 static __inline__ __m512 __DEFAULT_FN_ATTRS 1098 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 1099 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 1100 (__v16sf) __B, 1101 (__v16sf) __W, 1102 (__mmask16) __U, 1103 _MM_FROUND_CUR_DIRECTION); 1104 } 1105 1106 static __inline__ __m512 __DEFAULT_FN_ATTRS 1107 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { 1108 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 1109 (__v16sf) __B, 1110 (__v16sf) _mm512_setzero_ps (), 1111 (__mmask16) __U, 1112 _MM_FROUND_CUR_DIRECTION); 1113 } 1114 1115 #define _mm512_add_round_pd(__A, __B, __R) __extension__ ({ \ 1116 (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \ 1117 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); }) 1118 1119 #define _mm512_mask_add_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \ 1120 (__m512d) __builtin_ia32_addpd512_mask((__v8df) __A, (__v8df) __B, \ 1121 (__v8df) __W, (__mmask8) __U, __R); }) 1122 1123 #define _mm512_maskz_add_round_pd(__U, __A, __B, __R) __extension__ ({ \ 1124 (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \ 1125 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R); }) 1126 1127 #define _mm512_add_round_ps(__A, __B, __R) __extension__ ({ \ 1128 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1129 (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, __R); }) 1130 1131 #define _mm512_mask_add_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \ 1132 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1133 (__v16sf) __W, (__mmask16)__U, __R); }) 1134 1135 #define _mm512_maskz_add_round_ps(__U, __A, __B, __R) __extension__ ({ \ 1136 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1137 (__v16sf) _mm512_setzero_ps(), (__mmask16)__U, __R); }) 1138 1139 static __inline__ __m128 __DEFAULT_FN_ATTRS 1140 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 1141 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A, 1142 (__v4sf) __B, 1143 (__v4sf) __W, 1144 (__mmask8) __U, 1145 _MM_FROUND_CUR_DIRECTION); 1146 } 1147 1148 static __inline__ __m128 __DEFAULT_FN_ATTRS 1149 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) { 1150 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A, 1151 (__v4sf) __B, 1152 (__v4sf) _mm_setzero_ps (), 1153 (__mmask8) __U, 1154 _MM_FROUND_CUR_DIRECTION); 1155 } 1156 #define _mm_sub_round_ss(__A, __B, __R) __extension__ ({ \ 1157 (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \ 1158 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); }) 1159 1160 #define _mm_mask_sub_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \ 1161 (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \ 1162 (__v4sf) __W, (__mmask8) __U,__R); }) 1163 1164 #define _mm_maskz_sub_round_ss(__U, __A, __B, __R) __extension__ ({ \ 1165 (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \ 1166 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); }) 1167 1168 static __inline__ __m128d __DEFAULT_FN_ATTRS 1169 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 1170 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, 1171 (__v2df) __B, 1172 (__v2df) __W, 1173 (__mmask8) __U, 1174 _MM_FROUND_CUR_DIRECTION); 1175 } 1176 1177 static __inline__ __m128d __DEFAULT_FN_ATTRS 1178 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) { 1179 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, 1180 (__v2df) __B, 1181 (__v2df) _mm_setzero_pd (), 1182 (__mmask8) __U, 1183 _MM_FROUND_CUR_DIRECTION); 1184 } 1185 1186 #define _mm_sub_round_sd(__A, __B, __R) __extension__ ({ \ 1187 (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \ 1188 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); }) 1189 1190 #define _mm_mask_sub_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \ 1191 (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \ 1192 (__v2df) __W, (__mmask8) __U,__R); }) 1193 1194 #define _mm_maskz_sub_round_sd(__U, __A, __B, __R) __extension__ ({ \ 1195 (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \ 1196 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); }) 1197 1198 static __inline__ __m512d __DEFAULT_FN_ATTRS 1199 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 1200 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 1201 (__v8df) __B, 1202 (__v8df) __W, 1203 (__mmask8) __U, 1204 _MM_FROUND_CUR_DIRECTION); 1205 } 1206 1207 static __inline__ __m512d __DEFAULT_FN_ATTRS 1208 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) { 1209 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 1210 (__v8df) __B, 1211 (__v8df) 1212 _mm512_setzero_pd (), 1213 (__mmask8) __U, 1214 _MM_FROUND_CUR_DIRECTION); 1215 } 1216 1217 static __inline__ __m512 __DEFAULT_FN_ATTRS 1218 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 1219 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 1220 (__v16sf) __B, 1221 (__v16sf) __W, 1222 (__mmask16) __U, 1223 _MM_FROUND_CUR_DIRECTION); 1224 } 1225 1226 static __inline__ __m512 __DEFAULT_FN_ATTRS 1227 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { 1228 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 1229 (__v16sf) __B, 1230 (__v16sf) 1231 _mm512_setzero_ps (), 1232 (__mmask16) __U, 1233 _MM_FROUND_CUR_DIRECTION); 1234 } 1235 1236 #define _mm512_sub_round_pd(__A, __B, __R) __extension__ ({ \ 1237 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B,\ 1238 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); }) 1239 1240 #define _mm512_mask_sub_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \ 1241 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \ 1242 (__v8df) __W, (__mmask8) __U, __R); }) 1243 1244 #define _mm512_maskz_sub_round_pd(__U, __A, __B, __R) __extension__ ({ \ 1245 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \ 1246 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);}) 1247 1248 #define _mm512_sub_round_ps(__A, __B, __R) __extension__ ({ \ 1249 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1250 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);}) 1251 1252 #define _mm512_mask_sub_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \ 1253 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1254 (__v16sf) __W, (__mmask16) __U, __R); }); 1255 1256 #define _mm512_maskz_sub_round_ps(__U, __A, __B, __R) __extension__ ({ \ 1257 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1258 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);}); 1259 1260 static __inline__ __m128 __DEFAULT_FN_ATTRS 1261 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 1262 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, 1263 (__v4sf) __B, 1264 (__v4sf) __W, 1265 (__mmask8) __U, 1266 _MM_FROUND_CUR_DIRECTION); 1267 } 1268 1269 static __inline__ __m128 __DEFAULT_FN_ATTRS 1270 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) { 1271 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, 1272 (__v4sf) __B, 1273 (__v4sf) _mm_setzero_ps (), 1274 (__mmask8) __U, 1275 _MM_FROUND_CUR_DIRECTION); 1276 } 1277 #define _mm_mul_round_ss(__A, __B, __R) __extension__ ({ \ 1278 (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \ 1279 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); }) 1280 1281 #define _mm_mask_mul_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \ 1282 (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \ 1283 (__v4sf) __W, (__mmask8) __U,__R); }) 1284 1285 #define _mm_maskz_mul_round_ss(__U, __A, __B, __R) __extension__ ({ \ 1286 (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \ 1287 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); }) 1288 1289 static __inline__ __m128d __DEFAULT_FN_ATTRS 1290 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 1291 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, 1292 (__v2df) __B, 1293 (__v2df) __W, 1294 (__mmask8) __U, 1295 _MM_FROUND_CUR_DIRECTION); 1296 } 1297 1298 static __inline__ __m128d __DEFAULT_FN_ATTRS 1299 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) { 1300 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, 1301 (__v2df) __B, 1302 (__v2df) _mm_setzero_pd (), 1303 (__mmask8) __U, 1304 _MM_FROUND_CUR_DIRECTION); 1305 } 1306 1307 #define _mm_mul_round_sd(__A, __B, __R) __extension__ ({ \ 1308 (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \ 1309 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); }) 1310 1311 #define _mm_mask_mul_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \ 1312 (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \ 1313 (__v2df) __W, (__mmask8) __U,__R); }) 1314 1315 #define _mm_maskz_mul_round_sd(__U, __A, __B, __R) __extension__ ({ \ 1316 (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \ 1317 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); }) 1318 1319 static __inline__ __m512d __DEFAULT_FN_ATTRS 1320 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 1321 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 1322 (__v8df) __B, 1323 (__v8df) __W, 1324 (__mmask8) __U, 1325 _MM_FROUND_CUR_DIRECTION); 1326 } 1327 1328 static __inline__ __m512d __DEFAULT_FN_ATTRS 1329 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) { 1330 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 1331 (__v8df) __B, 1332 (__v8df) 1333 _mm512_setzero_pd (), 1334 (__mmask8) __U, 1335 _MM_FROUND_CUR_DIRECTION); 1336 } 1337 1338 static __inline__ __m512 __DEFAULT_FN_ATTRS 1339 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 1340 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 1341 (__v16sf) __B, 1342 (__v16sf) __W, 1343 (__mmask16) __U, 1344 _MM_FROUND_CUR_DIRECTION); 1345 } 1346 1347 static __inline__ __m512 __DEFAULT_FN_ATTRS 1348 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { 1349 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 1350 (__v16sf) __B, 1351 (__v16sf) 1352 _mm512_setzero_ps (), 1353 (__mmask16) __U, 1354 _MM_FROUND_CUR_DIRECTION); 1355 } 1356 1357 #define _mm512_mul_round_pd(__A, __B, __R) __extension__ ({ \ 1358 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B,\ 1359 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); }) 1360 1361 #define _mm512_mask_mul_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \ 1362 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \ 1363 (__v8df) __W, (__mmask8) __U, __R); }) 1364 1365 #define _mm512_maskz_mul_round_pd(__U, __A, __B, __R) __extension__ ({ \ 1366 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \ 1367 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);}) 1368 1369 #define _mm512_mul_round_ps(__A, __B, __R) __extension__ ({ \ 1370 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1371 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);}) 1372 1373 #define _mm512_mask_mul_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \ 1374 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1375 (__v16sf) __W, (__mmask16) __U, __R); }); 1376 1377 #define _mm512_maskz_mul_round_ps(__U, __A, __B, __R) __extension__ ({ \ 1378 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1379 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);}); 1380 1381 static __inline__ __m128 __DEFAULT_FN_ATTRS 1382 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 1383 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A, 1384 (__v4sf) __B, 1385 (__v4sf) __W, 1386 (__mmask8) __U, 1387 _MM_FROUND_CUR_DIRECTION); 1388 } 1389 1390 static __inline__ __m128 __DEFAULT_FN_ATTRS 1391 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) { 1392 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A, 1393 (__v4sf) __B, 1394 (__v4sf) _mm_setzero_ps (), 1395 (__mmask8) __U, 1396 _MM_FROUND_CUR_DIRECTION); 1397 } 1398 1399 #define _mm_div_round_ss(__A, __B, __R) __extension__ ({ \ 1400 (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \ 1401 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); }) 1402 1403 #define _mm_mask_div_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \ 1404 (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \ 1405 (__v4sf) __W, (__mmask8) __U,__R); }) 1406 1407 #define _mm_maskz_div_round_ss(__U, __A, __B, __R) __extension__ ({ \ 1408 (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \ 1409 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); }) 1410 1411 static __inline__ __m128d __DEFAULT_FN_ATTRS 1412 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 1413 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, 1414 (__v2df) __B, 1415 (__v2df) __W, 1416 (__mmask8) __U, 1417 _MM_FROUND_CUR_DIRECTION); 1418 } 1419 1420 static __inline__ __m128d __DEFAULT_FN_ATTRS 1421 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) { 1422 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, 1423 (__v2df) __B, 1424 (__v2df) _mm_setzero_pd (), 1425 (__mmask8) __U, 1426 _MM_FROUND_CUR_DIRECTION); 1427 } 1428 1429 #define _mm_div_round_sd(__A, __B, __R) __extension__ ({ \ 1430 (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \ 1431 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); }) 1432 1433 #define _mm_mask_div_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \ 1434 (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \ 1435 (__v2df) __W, (__mmask8) __U,__R); }) 1436 1437 #define _mm_maskz_div_round_sd(__U, __A, __B, __R) __extension__ ({ \ 1438 (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \ 1439 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); }) 1440 1441 static __inline__ __m512d __DEFAULT_FN_ATTRS 1442 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 1443 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, 1444 (__v8df) __B, 1445 (__v8df) __W, 1446 (__mmask8) __U, 1447 _MM_FROUND_CUR_DIRECTION); 1448 } 1449 1450 static __inline__ __m512d __DEFAULT_FN_ATTRS 1451 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { 1452 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, 1453 (__v8df) __B, 1454 (__v8df) 1455 _mm512_setzero_pd (), 1456 (__mmask8) __U, 1457 _MM_FROUND_CUR_DIRECTION); 1458 } 1459 1460 static __inline__ __m512 __DEFAULT_FN_ATTRS 1461 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 1462 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 1463 (__v16sf) __B, 1464 (__v16sf) __W, 1465 (__mmask16) __U, 1466 _MM_FROUND_CUR_DIRECTION); 1467 } 1468 1469 static __inline__ __m512 __DEFAULT_FN_ATTRS 1470 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { 1471 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 1472 (__v16sf) __B, 1473 (__v16sf) 1474 _mm512_setzero_ps (), 1475 (__mmask16) __U, 1476 _MM_FROUND_CUR_DIRECTION); 1477 } 1478 1479 #define _mm512_div_round_pd(__A, __B, __R) __extension__ ({ \ 1480 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B,\ 1481 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); }) 1482 1483 #define _mm512_mask_div_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \ 1484 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \ 1485 (__v8df) __W, (__mmask8) __U, __R); }) 1486 1487 #define _mm512_maskz_div_round_pd(__U, __A, __B, __R) __extension__ ({ \ 1488 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \ 1489 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);}) 1490 1491 #define _mm512_div_round_ps(__A, __B, __R) __extension__ ({ \ 1492 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1493 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);}) 1494 1495 #define _mm512_mask_div_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \ 1496 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1497 (__v16sf) __W, (__mmask16) __U, __R); }); 1498 1499 #define _mm512_maskz_div_round_ps(__U, __A, __B, __R) __extension__ ({ \ 1500 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1501 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);}); 1502 1503 #define _mm512_roundscale_ps(A, B) __extension__ ({ \ 1504 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \ 1505 -1, _MM_FROUND_CUR_DIRECTION); }) 1506 1507 #define _mm512_roundscale_pd(A, B) __extension__ ({ \ 1508 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \ 1509 -1, _MM_FROUND_CUR_DIRECTION); }) 1510 1511 #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \ 1512 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \ 1513 (__v8df) (B), (__v8df) (C), \ 1514 (__mmask8) -1, (R)); }) 1515 1516 1517 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \ 1518 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \ 1519 (__v8df) (B), (__v8df) (C), \ 1520 (__mmask8) (U), (R)); }) 1521 1522 1523 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \ 1524 (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) (A), \ 1525 (__v8df) (B), (__v8df) (C), \ 1526 (__mmask8) (U), (R)); }) 1527 1528 1529 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \ 1530 (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \ 1531 (__v8df) (B), (__v8df) (C), \ 1532 (__mmask8) (U), (R)); }) 1533 1534 1535 #define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \ 1536 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \ 1537 (__v8df) (B), -(__v8df) (C), \ 1538 (__mmask8) -1, (R)); }) 1539 1540 1541 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \ 1542 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \ 1543 (__v8df) (B), -(__v8df) (C), \ 1544 (__mmask8) (U), (R)); }) 1545 1546 1547 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \ 1548 (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \ 1549 (__v8df) (B), -(__v8df) (C), \ 1550 (__mmask8) (U), (R)); }) 1551 1552 1553 #define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \ 1554 (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \ 1555 (__v8df) (B), (__v8df) (C), \ 1556 (__mmask8) -1, (R)); }) 1557 1558 1559 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \ 1560 (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) (A), \ 1561 (__v8df) (B), (__v8df) (C), \ 1562 (__mmask8) (U), (R)); }) 1563 1564 1565 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \ 1566 (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \ 1567 (__v8df) (B), (__v8df) (C), \ 1568 (__mmask8) (U), (R)); }) 1569 1570 1571 #define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \ 1572 (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \ 1573 (__v8df) (B), -(__v8df) (C), \ 1574 (__mmask8) -1, (R)); }) 1575 1576 1577 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \ 1578 (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \ 1579 (__v8df) (B), -(__v8df) (C), \ 1580 (__mmask8) (U), (R)); }) 1581 1582 1583 static __inline__ __m512d __DEFAULT_FN_ATTRS 1584 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) 1585 { 1586 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 1587 (__v8df) __B, 1588 (__v8df) __C, 1589 (__mmask8) -1, 1590 _MM_FROUND_CUR_DIRECTION); 1591 } 1592 1593 static __inline__ __m512d __DEFAULT_FN_ATTRS 1594 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 1595 { 1596 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 1597 (__v8df) __B, 1598 (__v8df) __C, 1599 (__mmask8) __U, 1600 _MM_FROUND_CUR_DIRECTION); 1601 } 1602 1603 static __inline__ __m512d __DEFAULT_FN_ATTRS 1604 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 1605 { 1606 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, 1607 (__v8df) __B, 1608 (__v8df) __C, 1609 (__mmask8) __U, 1610 _MM_FROUND_CUR_DIRECTION); 1611 } 1612 1613 static __inline__ __m512d __DEFAULT_FN_ATTRS 1614 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 1615 { 1616 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 1617 (__v8df) __B, 1618 (__v8df) __C, 1619 (__mmask8) __U, 1620 _MM_FROUND_CUR_DIRECTION); 1621 } 1622 1623 static __inline__ __m512d __DEFAULT_FN_ATTRS 1624 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) 1625 { 1626 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 1627 (__v8df) __B, 1628 -(__v8df) __C, 1629 (__mmask8) -1, 1630 _MM_FROUND_CUR_DIRECTION); 1631 } 1632 1633 static __inline__ __m512d __DEFAULT_FN_ATTRS 1634 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 1635 { 1636 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 1637 (__v8df) __B, 1638 -(__v8df) __C, 1639 (__mmask8) __U, 1640 _MM_FROUND_CUR_DIRECTION); 1641 } 1642 1643 static __inline__ __m512d __DEFAULT_FN_ATTRS 1644 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 1645 { 1646 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 1647 (__v8df) __B, 1648 -(__v8df) __C, 1649 (__mmask8) __U, 1650 _MM_FROUND_CUR_DIRECTION); 1651 } 1652 1653 static __inline__ __m512d __DEFAULT_FN_ATTRS 1654 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) 1655 { 1656 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 1657 (__v8df) __B, 1658 (__v8df) __C, 1659 (__mmask8) -1, 1660 _MM_FROUND_CUR_DIRECTION); 1661 } 1662 1663 static __inline__ __m512d __DEFAULT_FN_ATTRS 1664 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 1665 { 1666 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, 1667 (__v8df) __B, 1668 (__v8df) __C, 1669 (__mmask8) __U, 1670 _MM_FROUND_CUR_DIRECTION); 1671 } 1672 1673 static __inline__ __m512d __DEFAULT_FN_ATTRS 1674 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 1675 { 1676 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 1677 (__v8df) __B, 1678 (__v8df) __C, 1679 (__mmask8) __U, 1680 _MM_FROUND_CUR_DIRECTION); 1681 } 1682 1683 static __inline__ __m512d __DEFAULT_FN_ATTRS 1684 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) 1685 { 1686 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 1687 (__v8df) __B, 1688 -(__v8df) __C, 1689 (__mmask8) -1, 1690 _MM_FROUND_CUR_DIRECTION); 1691 } 1692 1693 static __inline__ __m512d __DEFAULT_FN_ATTRS 1694 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 1695 { 1696 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 1697 (__v8df) __B, 1698 -(__v8df) __C, 1699 (__mmask8) __U, 1700 _MM_FROUND_CUR_DIRECTION); 1701 } 1702 1703 #define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \ 1704 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \ 1705 (__v16sf) (B), (__v16sf) (C), \ 1706 (__mmask16) -1, (R)); }) 1707 1708 1709 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \ 1710 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \ 1711 (__v16sf) (B), (__v16sf) (C), \ 1712 (__mmask16) (U), (R)); }) 1713 1714 1715 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \ 1716 (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) (A), \ 1717 (__v16sf) (B), (__v16sf) (C), \ 1718 (__mmask16) (U), (R)); }) 1719 1720 1721 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \ 1722 (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \ 1723 (__v16sf) (B), (__v16sf) (C), \ 1724 (__mmask16) (U), (R)); }) 1725 1726 1727 #define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \ 1728 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \ 1729 (__v16sf) (B), -(__v16sf) (C), \ 1730 (__mmask16) -1, (R)); }) 1731 1732 1733 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \ 1734 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \ 1735 (__v16sf) (B), -(__v16sf) (C), \ 1736 (__mmask16) (U), (R)); }) 1737 1738 1739 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \ 1740 (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \ 1741 (__v16sf) (B), -(__v16sf) (C), \ 1742 (__mmask16) (U), (R)); }) 1743 1744 1745 #define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \ 1746 (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \ 1747 (__v16sf) (B), (__v16sf) (C), \ 1748 (__mmask16) -1, (R)); }) 1749 1750 1751 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \ 1752 (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) (A), \ 1753 (__v16sf) (B), (__v16sf) (C), \ 1754 (__mmask16) (U), (R)); }) 1755 1756 1757 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \ 1758 (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \ 1759 (__v16sf) (B), (__v16sf) (C), \ 1760 (__mmask16) (U), (R)); }) 1761 1762 1763 #define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \ 1764 (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \ 1765 (__v16sf) (B), -(__v16sf) (C), \ 1766 (__mmask16) -1, (R)); }) 1767 1768 1769 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \ 1770 (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \ 1771 (__v16sf) (B), -(__v16sf) (C), \ 1772 (__mmask16) (U), (R)); }) 1773 1774 1775 static __inline__ __m512 __DEFAULT_FN_ATTRS 1776 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) 1777 { 1778 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 1779 (__v16sf) __B, 1780 (__v16sf) __C, 1781 (__mmask16) -1, 1782 _MM_FROUND_CUR_DIRECTION); 1783 } 1784 1785 static __inline__ __m512 __DEFAULT_FN_ATTRS 1786 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 1787 { 1788 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 1789 (__v16sf) __B, 1790 (__v16sf) __C, 1791 (__mmask16) __U, 1792 _MM_FROUND_CUR_DIRECTION); 1793 } 1794 1795 static __inline__ __m512 __DEFAULT_FN_ATTRS 1796 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 1797 { 1798 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, 1799 (__v16sf) __B, 1800 (__v16sf) __C, 1801 (__mmask16) __U, 1802 _MM_FROUND_CUR_DIRECTION); 1803 } 1804 1805 static __inline__ __m512 __DEFAULT_FN_ATTRS 1806 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 1807 { 1808 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 1809 (__v16sf) __B, 1810 (__v16sf) __C, 1811 (__mmask16) __U, 1812 _MM_FROUND_CUR_DIRECTION); 1813 } 1814 1815 static __inline__ __m512 __DEFAULT_FN_ATTRS 1816 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) 1817 { 1818 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 1819 (__v16sf) __B, 1820 -(__v16sf) __C, 1821 (__mmask16) -1, 1822 _MM_FROUND_CUR_DIRECTION); 1823 } 1824 1825 static __inline__ __m512 __DEFAULT_FN_ATTRS 1826 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 1827 { 1828 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 1829 (__v16sf) __B, 1830 -(__v16sf) __C, 1831 (__mmask16) __U, 1832 _MM_FROUND_CUR_DIRECTION); 1833 } 1834 1835 static __inline__ __m512 __DEFAULT_FN_ATTRS 1836 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 1837 { 1838 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 1839 (__v16sf) __B, 1840 -(__v16sf) __C, 1841 (__mmask16) __U, 1842 _MM_FROUND_CUR_DIRECTION); 1843 } 1844 1845 static __inline__ __m512 __DEFAULT_FN_ATTRS 1846 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) 1847 { 1848 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 1849 (__v16sf) __B, 1850 (__v16sf) __C, 1851 (__mmask16) -1, 1852 _MM_FROUND_CUR_DIRECTION); 1853 } 1854 1855 static __inline__ __m512 __DEFAULT_FN_ATTRS 1856 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 1857 { 1858 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, 1859 (__v16sf) __B, 1860 (__v16sf) __C, 1861 (__mmask16) __U, 1862 _MM_FROUND_CUR_DIRECTION); 1863 } 1864 1865 static __inline__ __m512 __DEFAULT_FN_ATTRS 1866 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 1867 { 1868 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 1869 (__v16sf) __B, 1870 (__v16sf) __C, 1871 (__mmask16) __U, 1872 _MM_FROUND_CUR_DIRECTION); 1873 } 1874 1875 static __inline__ __m512 __DEFAULT_FN_ATTRS 1876 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) 1877 { 1878 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 1879 (__v16sf) __B, 1880 -(__v16sf) __C, 1881 (__mmask16) -1, 1882 _MM_FROUND_CUR_DIRECTION); 1883 } 1884 1885 static __inline__ __m512 __DEFAULT_FN_ATTRS 1886 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 1887 { 1888 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 1889 (__v16sf) __B, 1890 -(__v16sf) __C, 1891 (__mmask16) __U, 1892 _MM_FROUND_CUR_DIRECTION); 1893 } 1894 1895 #define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \ 1896 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \ 1897 (__v8df) (B), (__v8df) (C), \ 1898 (__mmask8) -1, (R)); }) 1899 1900 1901 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \ 1902 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \ 1903 (__v8df) (B), (__v8df) (C), \ 1904 (__mmask8) (U), (R)); }) 1905 1906 1907 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \ 1908 (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) (A), \ 1909 (__v8df) (B), (__v8df) (C), \ 1910 (__mmask8) (U), (R)); }) 1911 1912 1913 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \ 1914 (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \ 1915 (__v8df) (B), (__v8df) (C), \ 1916 (__mmask8) (U), (R)); }) 1917 1918 1919 #define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \ 1920 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \ 1921 (__v8df) (B), -(__v8df) (C), \ 1922 (__mmask8) -1, (R)); }) 1923 1924 1925 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \ 1926 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \ 1927 (__v8df) (B), -(__v8df) (C), \ 1928 (__mmask8) (U), (R)); }) 1929 1930 1931 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \ 1932 (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \ 1933 (__v8df) (B), -(__v8df) (C), \ 1934 (__mmask8) (U), (R)); }) 1935 1936 1937 static __inline__ __m512d __DEFAULT_FN_ATTRS 1938 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) 1939 { 1940 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 1941 (__v8df) __B, 1942 (__v8df) __C, 1943 (__mmask8) -1, 1944 _MM_FROUND_CUR_DIRECTION); 1945 } 1946 1947 static __inline__ __m512d __DEFAULT_FN_ATTRS 1948 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 1949 { 1950 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 1951 (__v8df) __B, 1952 (__v8df) __C, 1953 (__mmask8) __U, 1954 _MM_FROUND_CUR_DIRECTION); 1955 } 1956 1957 static __inline__ __m512d __DEFAULT_FN_ATTRS 1958 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 1959 { 1960 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, 1961 (__v8df) __B, 1962 (__v8df) __C, 1963 (__mmask8) __U, 1964 _MM_FROUND_CUR_DIRECTION); 1965 } 1966 1967 static __inline__ __m512d __DEFAULT_FN_ATTRS 1968 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 1969 { 1970 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 1971 (__v8df) __B, 1972 (__v8df) __C, 1973 (__mmask8) __U, 1974 _MM_FROUND_CUR_DIRECTION); 1975 } 1976 1977 static __inline__ __m512d __DEFAULT_FN_ATTRS 1978 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) 1979 { 1980 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 1981 (__v8df) __B, 1982 -(__v8df) __C, 1983 (__mmask8) -1, 1984 _MM_FROUND_CUR_DIRECTION); 1985 } 1986 1987 static __inline__ __m512d __DEFAULT_FN_ATTRS 1988 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 1989 { 1990 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 1991 (__v8df) __B, 1992 -(__v8df) __C, 1993 (__mmask8) __U, 1994 _MM_FROUND_CUR_DIRECTION); 1995 } 1996 1997 static __inline__ __m512d __DEFAULT_FN_ATTRS 1998 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 1999 { 2000 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 2001 (__v8df) __B, 2002 -(__v8df) __C, 2003 (__mmask8) __U, 2004 _MM_FROUND_CUR_DIRECTION); 2005 } 2006 2007 #define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \ 2008 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \ 2009 (__v16sf) (B), (__v16sf) (C), \ 2010 (__mmask16) -1, (R)); }) 2011 2012 2013 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \ 2014 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \ 2015 (__v16sf) (B), (__v16sf) (C), \ 2016 (__mmask16) (U), (R)); }) 2017 2018 2019 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \ 2020 (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) (A), \ 2021 (__v16sf) (B), (__v16sf) (C), \ 2022 (__mmask16) (U), (R)); }) 2023 2024 2025 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \ 2026 (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \ 2027 (__v16sf) (B), (__v16sf) (C), \ 2028 (__mmask16) (U), (R)); }) 2029 2030 2031 #define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \ 2032 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \ 2033 (__v16sf) (B), -(__v16sf) (C), \ 2034 (__mmask16) -1, (R)); }) 2035 2036 2037 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \ 2038 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \ 2039 (__v16sf) (B), -(__v16sf) (C), \ 2040 (__mmask16) (U), (R)); }) 2041 2042 2043 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \ 2044 (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \ 2045 (__v16sf) (B), -(__v16sf) (C), \ 2046 (__mmask16) (U), (R)); }) 2047 2048 2049 static __inline__ __m512 __DEFAULT_FN_ATTRS 2050 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) 2051 { 2052 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 2053 (__v16sf) __B, 2054 (__v16sf) __C, 2055 (__mmask16) -1, 2056 _MM_FROUND_CUR_DIRECTION); 2057 } 2058 2059 static __inline__ __m512 __DEFAULT_FN_ATTRS 2060 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 2061 { 2062 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 2063 (__v16sf) __B, 2064 (__v16sf) __C, 2065 (__mmask16) __U, 2066 _MM_FROUND_CUR_DIRECTION); 2067 } 2068 2069 static __inline__ __m512 __DEFAULT_FN_ATTRS 2070 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 2071 { 2072 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, 2073 (__v16sf) __B, 2074 (__v16sf) __C, 2075 (__mmask16) __U, 2076 _MM_FROUND_CUR_DIRECTION); 2077 } 2078 2079 static __inline__ __m512 __DEFAULT_FN_ATTRS 2080 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2081 { 2082 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 2083 (__v16sf) __B, 2084 (__v16sf) __C, 2085 (__mmask16) __U, 2086 _MM_FROUND_CUR_DIRECTION); 2087 } 2088 2089 static __inline__ __m512 __DEFAULT_FN_ATTRS 2090 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) 2091 { 2092 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 2093 (__v16sf) __B, 2094 -(__v16sf) __C, 2095 (__mmask16) -1, 2096 _MM_FROUND_CUR_DIRECTION); 2097 } 2098 2099 static __inline__ __m512 __DEFAULT_FN_ATTRS 2100 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 2101 { 2102 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 2103 (__v16sf) __B, 2104 -(__v16sf) __C, 2105 (__mmask16) __U, 2106 _MM_FROUND_CUR_DIRECTION); 2107 } 2108 2109 static __inline__ __m512 __DEFAULT_FN_ATTRS 2110 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2111 { 2112 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 2113 (__v16sf) __B, 2114 -(__v16sf) __C, 2115 (__mmask16) __U, 2116 _MM_FROUND_CUR_DIRECTION); 2117 } 2118 2119 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \ 2120 (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) (A), \ 2121 (__v8df) (B), (__v8df) (C), \ 2122 (__mmask8) (U), (R)); }) 2123 2124 2125 static __inline__ __m512d __DEFAULT_FN_ATTRS 2126 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 2127 { 2128 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, 2129 (__v8df) __B, 2130 (__v8df) __C, 2131 (__mmask8) __U, 2132 _MM_FROUND_CUR_DIRECTION); 2133 } 2134 2135 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \ 2136 (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) (A), \ 2137 (__v16sf) (B), (__v16sf) (C), \ 2138 (__mmask16) (U), (R)); }) 2139 2140 2141 static __inline__ __m512 __DEFAULT_FN_ATTRS 2142 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 2143 { 2144 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, 2145 (__v16sf) __B, 2146 (__v16sf) __C, 2147 (__mmask16) __U, 2148 _MM_FROUND_CUR_DIRECTION); 2149 } 2150 2151 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \ 2152 (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) (A), \ 2153 (__v8df) (B), (__v8df) (C), \ 2154 (__mmask8) (U), (R)); }) 2155 2156 2157 static __inline__ __m512d __DEFAULT_FN_ATTRS 2158 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 2159 { 2160 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, 2161 (__v8df) __B, 2162 (__v8df) __C, 2163 (__mmask8) __U, 2164 _MM_FROUND_CUR_DIRECTION); 2165 } 2166 2167 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \ 2168 (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) (A), \ 2169 (__v16sf) (B), (__v16sf) (C), \ 2170 (__mmask16) (U), (R)); }) 2171 2172 2173 static __inline__ __m512 __DEFAULT_FN_ATTRS 2174 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 2175 { 2176 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, 2177 (__v16sf) __B, 2178 (__v16sf) __C, 2179 (__mmask16) __U, 2180 _MM_FROUND_CUR_DIRECTION); 2181 } 2182 2183 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \ 2184 (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) (A), \ 2185 (__v8df) (B), (__v8df) (C), \ 2186 (__mmask8) (U), (R)); }) 2187 2188 2189 static __inline__ __m512d __DEFAULT_FN_ATTRS 2190 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 2191 { 2192 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, 2193 (__v8df) __B, 2194 (__v8df) __C, 2195 (__mmask8) __U, 2196 _MM_FROUND_CUR_DIRECTION); 2197 } 2198 2199 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \ 2200 (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) (A), \ 2201 (__v16sf) (B), (__v16sf) (C), \ 2202 (__mmask16) (U), (R)); }) 2203 2204 2205 static __inline__ __m512 __DEFAULT_FN_ATTRS 2206 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 2207 { 2208 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, 2209 (__v16sf) __B, 2210 (__v16sf) __C, 2211 (__mmask16) __U, 2212 _MM_FROUND_CUR_DIRECTION); 2213 } 2214 2215 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \ 2216 (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) (A), \ 2217 (__v8df) (B), (__v8df) (C), \ 2218 (__mmask8) (U), (R)); }) 2219 2220 2221 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \ 2222 (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) (A), \ 2223 (__v8df) (B), (__v8df) (C), \ 2224 (__mmask8) (U), (R)); }) 2225 2226 2227 static __inline__ __m512d __DEFAULT_FN_ATTRS 2228 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 2229 { 2230 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, 2231 (__v8df) __B, 2232 (__v8df) __C, 2233 (__mmask8) __U, 2234 _MM_FROUND_CUR_DIRECTION); 2235 } 2236 2237 static __inline__ __m512d __DEFAULT_FN_ATTRS 2238 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 2239 { 2240 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, 2241 (__v8df) __B, 2242 (__v8df) __C, 2243 (__mmask8) __U, 2244 _MM_FROUND_CUR_DIRECTION); 2245 } 2246 2247 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \ 2248 (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) (A), \ 2249 (__v16sf) (B), (__v16sf) (C), \ 2250 (__mmask16) (U), (R)); }) 2251 2252 2253 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \ 2254 (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) (A), \ 2255 (__v16sf) (B), (__v16sf) (C), \ 2256 (__mmask16) (U), (R)); }) 2257 2258 2259 static __inline__ __m512 __DEFAULT_FN_ATTRS 2260 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 2261 { 2262 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, 2263 (__v16sf) __B, 2264 (__v16sf) __C, 2265 (__mmask16) __U, 2266 _MM_FROUND_CUR_DIRECTION); 2267 } 2268 2269 static __inline__ __m512 __DEFAULT_FN_ATTRS 2270 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 2271 { 2272 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, 2273 (__v16sf) __B, 2274 (__v16sf) __C, 2275 (__mmask16) __U, 2276 _MM_FROUND_CUR_DIRECTION); 2277 } 2278 2279 2280 2281 /* Vector permutations */ 2282 2283 static __inline __m512i __DEFAULT_FN_ATTRS 2284 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) 2285 { 2286 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I 2287 /* idx */ , 2288 (__v16si) __A, 2289 (__v16si) __B, 2290 (__mmask16) -1); 2291 } 2292 static __inline __m512i __DEFAULT_FN_ATTRS 2293 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) 2294 { 2295 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I 2296 /* idx */ , 2297 (__v8di) __A, 2298 (__v8di) __B, 2299 (__mmask8) -1); 2300 } 2301 2302 static __inline __m512d __DEFAULT_FN_ATTRS 2303 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) 2304 { 2305 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I 2306 /* idx */ , 2307 (__v8df) __A, 2308 (__v8df) __B, 2309 (__mmask8) -1); 2310 } 2311 static __inline __m512 __DEFAULT_FN_ATTRS 2312 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) 2313 { 2314 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I 2315 /* idx */ , 2316 (__v16sf) __A, 2317 (__v16sf) __B, 2318 (__mmask16) -1); 2319 } 2320 2321 #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \ 2322 (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \ 2323 (__v8di)(__m512i)(B), \ 2324 (I), (__v8di)_mm512_setzero_si512(), \ 2325 (__mmask8)-1); }) 2326 2327 #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \ 2328 (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \ 2329 (__v16si)(__m512i)(B), \ 2330 (I), (__v16si)_mm512_setzero_si512(), \ 2331 (__mmask16)-1); }) 2332 2333 /* Vector Extract */ 2334 2335 #define _mm512_extractf64x4_pd(A, I) __extension__ ({ \ 2336 (__m256d) \ 2337 __builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), \ 2338 (I), \ 2339 (__v4df)_mm256_setzero_si256(), \ 2340 (__mmask8) -1); }) 2341 2342 #define _mm512_extractf32x4_ps(A, I) __extension__ ({ \ 2343 (__m128) \ 2344 __builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), \ 2345 (I), \ 2346 (__v4sf)_mm_setzero_ps(), \ 2347 (__mmask8) -1); }) 2348 2349 /* Vector Blend */ 2350 2351 static __inline __m512d __DEFAULT_FN_ATTRS 2352 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) 2353 { 2354 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A, 2355 (__v8df) __W, 2356 (__mmask8) __U); 2357 } 2358 2359 static __inline __m512 __DEFAULT_FN_ATTRS 2360 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) 2361 { 2362 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A, 2363 (__v16sf) __W, 2364 (__mmask16) __U); 2365 } 2366 2367 static __inline __m512i __DEFAULT_FN_ATTRS 2368 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) 2369 { 2370 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A, 2371 (__v8di) __W, 2372 (__mmask8) __U); 2373 } 2374 2375 static __inline __m512i __DEFAULT_FN_ATTRS 2376 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) 2377 { 2378 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A, 2379 (__v16si) __W, 2380 (__mmask16) __U); 2381 } 2382 2383 /* Compare */ 2384 2385 #define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \ 2386 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 2387 (__v16sf)(__m512)(B), \ 2388 (P), (__mmask16)-1, (R)); }) 2389 2390 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \ 2391 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 2392 (__v16sf)(__m512)(B), \ 2393 (P), (__mmask16)(U), (R)); }) 2394 2395 #define _mm512_cmp_ps_mask(A, B, P) \ 2396 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 2397 2398 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \ 2399 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 2400 2401 #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \ 2402 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 2403 (__v8df)(__m512d)(B), \ 2404 (P), (__mmask8)-1, (R)); }) 2405 2406 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \ 2407 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 2408 (__v8df)(__m512d)(B), \ 2409 (P), (__mmask8)(U), (R)); }) 2410 2411 #define _mm512_cmp_pd_mask(A, B, P) \ 2412 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 2413 2414 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \ 2415 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 2416 2417 /* Conversion */ 2418 2419 static __inline __m512i __DEFAULT_FN_ATTRS 2420 _mm512_cvttps_epu32(__m512 __A) 2421 { 2422 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 2423 (__v16si) 2424 _mm512_setzero_si512 (), 2425 (__mmask16) -1, 2426 _MM_FROUND_CUR_DIRECTION); 2427 } 2428 2429 #define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \ 2430 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), \ 2431 (__v16sf)_mm512_setzero_ps(), \ 2432 (__mmask16)-1, (R)); }) 2433 2434 #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \ 2435 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), \ 2436 (__v16sf)_mm512_setzero_ps(), \ 2437 (__mmask16)-1, (R)); }) 2438 2439 static __inline __m512d __DEFAULT_FN_ATTRS 2440 _mm512_cvtepi32_pd(__m256i __A) 2441 { 2442 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, 2443 (__v8df) 2444 _mm512_setzero_pd (), 2445 (__mmask8) -1); 2446 } 2447 2448 static __inline __m512d __DEFAULT_FN_ATTRS 2449 _mm512_cvtepu32_pd(__m256i __A) 2450 { 2451 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, 2452 (__v8df) 2453 _mm512_setzero_pd (), 2454 (__mmask8) -1); 2455 } 2456 2457 #define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \ 2458 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(A), \ 2459 (__v8sf)_mm256_setzero_ps(), \ 2460 (__mmask8)-1, (R)); }) 2461 2462 #define _mm512_cvtps_ph(A, I) __extension__ ({ \ 2463 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(A), (I), \ 2464 (__v16hi)_mm256_setzero_si256(), \ 2465 -1); }) 2466 2467 static __inline __m512 __DEFAULT_FN_ATTRS 2468 _mm512_cvtph_ps(__m256i __A) 2469 { 2470 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 2471 (__v16sf) 2472 _mm512_setzero_ps (), 2473 (__mmask16) -1, 2474 _MM_FROUND_CUR_DIRECTION); 2475 } 2476 2477 static __inline __m512i __DEFAULT_FN_ATTRS 2478 _mm512_cvttps_epi32(__m512 __a) 2479 { 2480 return (__m512i) 2481 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a, 2482 (__v16si) _mm512_setzero_si512 (), 2483 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); 2484 } 2485 2486 static __inline __m256i __DEFAULT_FN_ATTRS 2487 _mm512_cvttpd_epi32(__m512d __a) 2488 { 2489 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a, 2490 (__v8si)_mm256_setzero_si256(), 2491 (__mmask8) -1, 2492 _MM_FROUND_CUR_DIRECTION); 2493 } 2494 2495 #define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \ 2496 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(A), \ 2497 (__v8si)_mm256_setzero_si256(), \ 2498 (__mmask8)-1, (R)); }) 2499 2500 #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \ 2501 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(A), \ 2502 (__v16si)_mm512_setzero_si512(), \ 2503 (__mmask16)-1, (R)); }) 2504 2505 #define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \ 2506 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(A), \ 2507 (__v16si)_mm512_setzero_si512(), \ 2508 (__mmask16)-1, (R)); }) 2509 2510 #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \ 2511 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(A), \ 2512 (__v8si)_mm256_setzero_si256(), \ 2513 (__mmask8)-1, (R)); }) 2514 2515 #define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \ 2516 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(A), \ 2517 (__v16si)_mm512_setzero_si512(), \ 2518 (__mmask16)-1, (R)); }) 2519 2520 #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \ 2521 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(A), \ 2522 (__v8si)_mm256_setzero_si256(), \ 2523 (__mmask8) -1, (R)); }) 2524 2525 /* Unpack and Interleave */ 2526 static __inline __m512d __DEFAULT_FN_ATTRS 2527 _mm512_unpackhi_pd(__m512d __a, __m512d __b) 2528 { 2529 return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); 2530 } 2531 2532 static __inline __m512d __DEFAULT_FN_ATTRS 2533 _mm512_unpacklo_pd(__m512d __a, __m512d __b) 2534 { 2535 return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); 2536 } 2537 2538 static __inline __m512 __DEFAULT_FN_ATTRS 2539 _mm512_unpackhi_ps(__m512 __a, __m512 __b) 2540 { 2541 return __builtin_shufflevector(__a, __b, 2542 2, 18, 3, 19, 2543 2+4, 18+4, 3+4, 19+4, 2544 2+8, 18+8, 3+8, 19+8, 2545 2+12, 18+12, 3+12, 19+12); 2546 } 2547 2548 static __inline __m512 __DEFAULT_FN_ATTRS 2549 _mm512_unpacklo_ps(__m512 __a, __m512 __b) 2550 { 2551 return __builtin_shufflevector(__a, __b, 2552 0, 16, 1, 17, 2553 0+4, 16+4, 1+4, 17+4, 2554 0+8, 16+8, 1+8, 17+8, 2555 0+12, 16+12, 1+12, 17+12); 2556 } 2557 2558 /* Bit Test */ 2559 2560 static __inline __mmask16 __DEFAULT_FN_ATTRS 2561 _mm512_test_epi32_mask(__m512i __A, __m512i __B) 2562 { 2563 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, 2564 (__v16si) __B, 2565 (__mmask16) -1); 2566 } 2567 2568 static __inline __mmask8 __DEFAULT_FN_ATTRS 2569 _mm512_test_epi64_mask(__m512i __A, __m512i __B) 2570 { 2571 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, 2572 (__v8di) __B, 2573 (__mmask8) -1); 2574 } 2575 2576 /* SIMD load ops */ 2577 2578 static __inline __m512i __DEFAULT_FN_ATTRS 2579 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P) 2580 { 2581 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P, 2582 (__v16si) 2583 _mm512_setzero_si512 (), 2584 (__mmask16) __U); 2585 } 2586 2587 static __inline __m512i __DEFAULT_FN_ATTRS 2588 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P) 2589 { 2590 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P, 2591 (__v8di) 2592 _mm512_setzero_si512 (), 2593 (__mmask8) __U); 2594 } 2595 2596 static __inline __m512 __DEFAULT_FN_ATTRS 2597 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P) 2598 { 2599 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P, 2600 (__v16sf) 2601 _mm512_setzero_ps (), 2602 (__mmask16) __U); 2603 } 2604 2605 static __inline __m512d __DEFAULT_FN_ATTRS 2606 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) 2607 { 2608 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P, 2609 (__v8df) 2610 _mm512_setzero_pd (), 2611 (__mmask8) __U); 2612 } 2613 2614 static __inline __m512 __DEFAULT_FN_ATTRS 2615 _mm512_maskz_load_ps(__mmask16 __U, void const *__P) 2616 { 2617 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P, 2618 (__v16sf) 2619 _mm512_setzero_ps (), 2620 (__mmask16) __U); 2621 } 2622 2623 static __inline __m512d __DEFAULT_FN_ATTRS 2624 _mm512_maskz_load_pd(__mmask8 __U, void const *__P) 2625 { 2626 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P, 2627 (__v8df) 2628 _mm512_setzero_pd (), 2629 (__mmask8) __U); 2630 } 2631 2632 static __inline __m512d __DEFAULT_FN_ATTRS 2633 _mm512_loadu_pd(double const *__p) 2634 { 2635 struct __loadu_pd { 2636 __m512d __v; 2637 } __attribute__((__packed__, __may_alias__)); 2638 return ((struct __loadu_pd*)__p)->__v; 2639 } 2640 2641 static __inline __m512 __DEFAULT_FN_ATTRS 2642 _mm512_loadu_ps(float const *__p) 2643 { 2644 struct __loadu_ps { 2645 __m512 __v; 2646 } __attribute__((__packed__, __may_alias__)); 2647 return ((struct __loadu_ps*)__p)->__v; 2648 } 2649 2650 static __inline __m512 __DEFAULT_FN_ATTRS 2651 _mm512_load_ps(double const *__p) 2652 { 2653 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p, 2654 (__v16sf) 2655 _mm512_setzero_ps (), 2656 (__mmask16) -1); 2657 } 2658 2659 static __inline __m512d __DEFAULT_FN_ATTRS 2660 _mm512_load_pd(float const *__p) 2661 { 2662 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p, 2663 (__v8df) 2664 _mm512_setzero_pd (), 2665 (__mmask8) -1); 2666 } 2667 2668 /* SIMD store ops */ 2669 2670 static __inline void __DEFAULT_FN_ATTRS 2671 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) 2672 { 2673 __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A, 2674 (__mmask8) __U); 2675 } 2676 2677 static __inline void __DEFAULT_FN_ATTRS 2678 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) 2679 { 2680 __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A, 2681 (__mmask16) __U); 2682 } 2683 2684 static __inline void __DEFAULT_FN_ATTRS 2685 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A) 2686 { 2687 __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); 2688 } 2689 2690 static __inline void __DEFAULT_FN_ATTRS 2691 _mm512_storeu_pd(void *__P, __m512d __A) 2692 { 2693 __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1); 2694 } 2695 2696 static __inline void __DEFAULT_FN_ATTRS 2697 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A) 2698 { 2699 __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A, 2700 (__mmask16) __U); 2701 } 2702 2703 static __inline void __DEFAULT_FN_ATTRS 2704 _mm512_storeu_ps(void *__P, __m512 __A) 2705 { 2706 __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1); 2707 } 2708 2709 static __inline void __DEFAULT_FN_ATTRS 2710 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A) 2711 { 2712 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); 2713 } 2714 2715 static __inline void __DEFAULT_FN_ATTRS 2716 _mm512_store_pd(void *__P, __m512d __A) 2717 { 2718 *(__m512d*)__P = __A; 2719 } 2720 2721 static __inline void __DEFAULT_FN_ATTRS 2722 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A) 2723 { 2724 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A, 2725 (__mmask16) __U); 2726 } 2727 2728 static __inline void __DEFAULT_FN_ATTRS 2729 _mm512_store_ps(void *__P, __m512 __A) 2730 { 2731 *(__m512*)__P = __A; 2732 } 2733 2734 /* Mask ops */ 2735 2736 static __inline __mmask16 __DEFAULT_FN_ATTRS 2737 _mm512_knot(__mmask16 __M) 2738 { 2739 return __builtin_ia32_knothi(__M); 2740 } 2741 2742 /* Integer compare */ 2743 2744 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2745 _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) { 2746 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b, 2747 (__mmask16)-1); 2748 } 2749 2750 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2751 _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2752 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b, 2753 __u); 2754 } 2755 2756 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2757 _mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) { 2758 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0, 2759 (__mmask16)-1); 2760 } 2761 2762 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2763 _mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2764 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0, 2765 __u); 2766 } 2767 2768 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2769 _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 2770 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, 2771 __u); 2772 } 2773 2774 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2775 _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) { 2776 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, 2777 (__mmask8)-1); 2778 } 2779 2780 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2781 _mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) { 2782 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0, 2783 (__mmask8)-1); 2784 } 2785 2786 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2787 _mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 2788 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0, 2789 __u); 2790 } 2791 2792 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2793 _mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) { 2794 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5, 2795 (__mmask16)-1); 2796 } 2797 2798 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2799 _mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2800 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5, 2801 __u); 2802 } 2803 2804 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2805 _mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) { 2806 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5, 2807 (__mmask16)-1); 2808 } 2809 2810 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2811 _mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2812 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5, 2813 __u); 2814 } 2815 2816 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2817 _mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) { 2818 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5, 2819 (__mmask8)-1); 2820 } 2821 2822 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2823 _mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 2824 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5, 2825 __u); 2826 } 2827 2828 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2829 _mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) { 2830 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5, 2831 (__mmask8)-1); 2832 } 2833 2834 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2835 _mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 2836 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5, 2837 __u); 2838 } 2839 2840 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2841 _mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) { 2842 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b, 2843 (__mmask16)-1); 2844 } 2845 2846 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2847 _mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2848 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b, 2849 __u); 2850 } 2851 2852 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2853 _mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) { 2854 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6, 2855 (__mmask16)-1); 2856 } 2857 2858 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2859 _mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2860 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6, 2861 __u); 2862 } 2863 2864 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2865 _mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 2866 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b, 2867 __u); 2868 } 2869 2870 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2871 _mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) { 2872 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b, 2873 (__mmask8)-1); 2874 } 2875 2876 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2877 _mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) { 2878 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6, 2879 (__mmask8)-1); 2880 } 2881 2882 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2883 _mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 2884 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6, 2885 __u); 2886 } 2887 2888 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2889 _mm512_cmple_epi32_mask(__m512i __a, __m512i __b) { 2890 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2, 2891 (__mmask16)-1); 2892 } 2893 2894 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2895 _mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2896 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2, 2897 __u); 2898 } 2899 2900 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2901 _mm512_cmple_epu32_mask(__m512i __a, __m512i __b) { 2902 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2, 2903 (__mmask16)-1); 2904 } 2905 2906 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2907 _mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2908 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2, 2909 __u); 2910 } 2911 2912 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2913 _mm512_cmple_epi64_mask(__m512i __a, __m512i __b) { 2914 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2, 2915 (__mmask8)-1); 2916 } 2917 2918 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2919 _mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 2920 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2, 2921 __u); 2922 } 2923 2924 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2925 _mm512_cmple_epu64_mask(__m512i __a, __m512i __b) { 2926 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2, 2927 (__mmask8)-1); 2928 } 2929 2930 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2931 _mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 2932 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2, 2933 __u); 2934 } 2935 2936 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2937 _mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) { 2938 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1, 2939 (__mmask16)-1); 2940 } 2941 2942 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2943 _mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2944 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1, 2945 __u); 2946 } 2947 2948 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2949 _mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) { 2950 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1, 2951 (__mmask16)-1); 2952 } 2953 2954 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2955 _mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2956 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1, 2957 __u); 2958 } 2959 2960 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2961 _mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) { 2962 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1, 2963 (__mmask8)-1); 2964 } 2965 2966 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2967 _mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 2968 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1, 2969 __u); 2970 } 2971 2972 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2973 _mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) { 2974 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1, 2975 (__mmask8)-1); 2976 } 2977 2978 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2979 _mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 2980 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1, 2981 __u); 2982 } 2983 2984 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2985 _mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) { 2986 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4, 2987 (__mmask16)-1); 2988 } 2989 2990 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2991 _mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2992 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4, 2993 __u); 2994 } 2995 2996 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2997 _mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) { 2998 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4, 2999 (__mmask16)-1); 3000 } 3001 3002 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 3003 _mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 3004 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4, 3005 __u); 3006 } 3007 3008 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 3009 _mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) { 3010 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4, 3011 (__mmask8)-1); 3012 } 3013 3014 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 3015 _mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 3016 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4, 3017 __u); 3018 } 3019 3020 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 3021 _mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) { 3022 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4, 3023 (__mmask8)-1); 3024 } 3025 3026 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 3027 _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 3028 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4, 3029 __u); 3030 } 3031 3032 #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \ 3033 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 3034 (__v16si)(__m512i)(b), (p), \ 3035 (__mmask16)-1); }) 3036 3037 #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \ 3038 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 3039 (__v16si)(__m512i)(b), (p), \ 3040 (__mmask16)-1); }) 3041 3042 #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \ 3043 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 3044 (__v8di)(__m512i)(b), (p), \ 3045 (__mmask8)-1); }) 3046 3047 #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \ 3048 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 3049 (__v8di)(__m512i)(b), (p), \ 3050 (__mmask8)-1); }) 3051 3052 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ 3053 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 3054 (__v16si)(__m512i)(b), (p), \ 3055 (__mmask16)(m)); }) 3056 3057 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ 3058 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 3059 (__v16si)(__m512i)(b), (p), \ 3060 (__mmask16)(m)); }) 3061 3062 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ 3063 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 3064 (__v8di)(__m512i)(b), (p), \ 3065 (__mmask8)(m)); }) 3066 3067 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ 3068 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 3069 (__v8di)(__m512i)(b), (p), \ 3070 (__mmask8)(m)); }) 3071 3072 #undef __DEFAULT_FN_ATTRS 3073 3074 #endif // __AVX512FINTRIN_H 3075